xref: /llvm-project/llvm/lib/Support/APFloat.cpp (revision b1fe03f0840a2c488b1f07a669bfea3cc986ce3b)
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cstring>
29 #include <limits.h>
30 
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
32   do {                                                                         \
33     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
34       return U.IEEE.METHOD_CALL;                                               \
35     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
36       return U.Double.METHOD_CALL;                                             \
37     llvm_unreachable("Unexpected semantics");                                  \
38   } while (false)
39 
40 using namespace llvm;
41 
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
45 ///
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49 
50 /* Assumed in hexadecimal significand parsing, and conversion to
51    hexadecimal strings.  */
52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 
54 namespace llvm {
55 
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior {
58   // Represents standard IEEE 754 behavior. A value is nonfinite if the
59   // exponent field is all 1s. In such cases, a value is Inf if the
60   // significand bits are all zero, and NaN otherwise
61   IEEE754,
62 
63   // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64   // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65   // representation for Inf, and operations that would ordinarily produce Inf
66   // produce NaN instead.
67   // The details of the NaN representation(s) in this form are determined by the
68   // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69   // encodings do not distinguish between signalling and quiet NaN.
70   NanOnly,
71 
72   // This behavior is present in Float6E3M2FN and Float6E2M3FN types,
73   // which do not support Inf or NaN values.
74   FiniteOnly,
75 };
76 
77 // How NaN values are represented. This is curently only used in combination
78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79 // while having IEEE non-finite behavior is liable to lead to unexpected
80 // results.
81 enum class fltNanEncoding {
82   // Represents the standard IEEE behavior where a value is NaN if its
83   // exponent is all 1s and the significand is non-zero.
84   IEEE,
85 
86   // Represents the behavior in the Float8E4M3 floating point type where NaN is
87   // represented by having the exponent and mantissa set to all 1s.
88   // This behavior matches the FP8 E4M3 type described in
89   // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90   // as non-signalling, although the paper does not state whether the NaN
91   // values are signalling or not.
92   AllOnes,
93 
94   // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95   // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96   // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97   // there is only one NaN value, it is treated as quiet NaN. This matches the
98   // behavior described in https://arxiv.org/abs/2206.02915 .
99   NegativeZero,
100 };
101 
102 /* Represents floating point arithmetic semantics.  */
103 struct fltSemantics {
104   /* The largest E such that 2^E is representable; this matches the
105      definition of IEEE 754.  */
106   APFloatBase::ExponentType maxExponent;
107 
108   /* The smallest E such that 2^E is a normalized number; this
109      matches the definition of IEEE 754.  */
110   APFloatBase::ExponentType minExponent;
111 
112   /* Number of bits in the significand.  This includes the integer
113      bit.  */
114   unsigned int precision;
115 
116   /* Number of bits actually used in the semantics. */
117   unsigned int sizeInBits;
118 
119   fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
120 
121   fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
122   // Returns true if any number described by this semantics can be precisely
123   // represented by the specified semantics. Does not take into account
124   // the value of fltNonfiniteBehavior.
125   bool isRepresentableBy(const fltSemantics &S) const {
126     return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
127            precision <= S.precision;
128   }
129 };
130 
131 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
132 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
133 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
134 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
135 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
136 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
137 static constexpr fltSemantics semFloat8E5M2FNUZ = {
138     15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
139 static constexpr fltSemantics semFloat8E4M3FN = {
140     8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
141 static constexpr fltSemantics semFloat8E4M3FNUZ = {
142     7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
143 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
144     4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
145 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
146 static constexpr fltSemantics semFloat6E3M2FN = {
147     4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
148 static constexpr fltSemantics semFloat6E2M3FN = {
149     2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
150 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
151 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
152 
153 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
154    64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
155    (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
156    Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
157    to each other, and two 11-bit exponents.
158 
159    Note: we need to make the value different from semBogus as otherwise
160    an unsafe optimization may collapse both values to a single address,
161    and we heavily rely on them having distinct addresses.             */
162 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
163 
164 /* These are legacy semantics for the fallback, inaccrurate implementation of
165    IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
166    operation. It's equivalent to having an IEEE number with consecutive 106
167    bits of mantissa and 11 bits of exponent.
168 
169    It's not equivalent to IBM double-double. For example, a legit IBM
170    double-double, 1 + epsilon:
171 
172      1 + epsilon = 1 + (1 >> 1076)
173 
174    is not representable by a consecutive 106 bits of mantissa.
175 
176    Currently, these semantics are used in the following way:
177 
178      semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
179      (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
180      semPPCDoubleDoubleLegacy -> IEEE operations
181 
182    We use bitcastToAPInt() to get the bit representation (in APInt) of the
183    underlying IEEEdouble, then use the APInt constructor to construct the
184    legacy IEEE float.
185 
186    TODO: Implement all operations in semPPCDoubleDouble, and delete these
187    semantics.  */
188 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
189                                                           53 + 53, 128};
190 
191 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
192   switch (S) {
193   case S_IEEEhalf:
194     return IEEEhalf();
195   case S_BFloat:
196     return BFloat();
197   case S_IEEEsingle:
198     return IEEEsingle();
199   case S_IEEEdouble:
200     return IEEEdouble();
201   case S_IEEEquad:
202     return IEEEquad();
203   case S_PPCDoubleDouble:
204     return PPCDoubleDouble();
205   case S_Float8E5M2:
206     return Float8E5M2();
207   case S_Float8E5M2FNUZ:
208     return Float8E5M2FNUZ();
209   case S_Float8E4M3FN:
210     return Float8E4M3FN();
211   case S_Float8E4M3FNUZ:
212     return Float8E4M3FNUZ();
213   case S_Float8E4M3B11FNUZ:
214     return Float8E4M3B11FNUZ();
215   case S_FloatTF32:
216     return FloatTF32();
217   case S_Float6E3M2FN:
218     return Float6E3M2FN();
219   case S_Float6E2M3FN:
220     return Float6E2M3FN();
221   case S_x87DoubleExtended:
222     return x87DoubleExtended();
223   }
224   llvm_unreachable("Unrecognised floating semantics");
225 }
226 
227 APFloatBase::Semantics
228 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
229   if (&Sem == &llvm::APFloat::IEEEhalf())
230     return S_IEEEhalf;
231   else if (&Sem == &llvm::APFloat::BFloat())
232     return S_BFloat;
233   else if (&Sem == &llvm::APFloat::IEEEsingle())
234     return S_IEEEsingle;
235   else if (&Sem == &llvm::APFloat::IEEEdouble())
236     return S_IEEEdouble;
237   else if (&Sem == &llvm::APFloat::IEEEquad())
238     return S_IEEEquad;
239   else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
240     return S_PPCDoubleDouble;
241   else if (&Sem == &llvm::APFloat::Float8E5M2())
242     return S_Float8E5M2;
243   else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
244     return S_Float8E5M2FNUZ;
245   else if (&Sem == &llvm::APFloat::Float8E4M3FN())
246     return S_Float8E4M3FN;
247   else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
248     return S_Float8E4M3FNUZ;
249   else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
250     return S_Float8E4M3B11FNUZ;
251   else if (&Sem == &llvm::APFloat::FloatTF32())
252     return S_FloatTF32;
253   else if (&Sem == &llvm::APFloat::Float6E3M2FN())
254     return S_Float6E3M2FN;
255   else if (&Sem == &llvm::APFloat::Float6E2M3FN())
256     return S_Float6E2M3FN;
257   else if (&Sem == &llvm::APFloat::x87DoubleExtended())
258     return S_x87DoubleExtended;
259   else
260     llvm_unreachable("Unknown floating semantics");
261 }
262 
263 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
264 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
265 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
266 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
267 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
268 const fltSemantics &APFloatBase::PPCDoubleDouble() {
269   return semPPCDoubleDouble;
270 }
271 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
272 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
273 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
274 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
275 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
276   return semFloat8E4M3B11FNUZ;
277 }
278 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
279 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
280 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
281 const fltSemantics &APFloatBase::x87DoubleExtended() {
282   return semX87DoubleExtended;
283 }
284 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
285 
286 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
287 constexpr RoundingMode APFloatBase::rmTowardPositive;
288 constexpr RoundingMode APFloatBase::rmTowardNegative;
289 constexpr RoundingMode APFloatBase::rmTowardZero;
290 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
291 
292 /* A tight upper bound on number of parts required to hold the value
293    pow(5, power) is
294 
295      power * 815 / (351 * integerPartWidth) + 1
296 
297    However, whilst the result may require only this many parts,
298    because we are multiplying two values to get it, the
299    multiplication may require an extra part with the excess part
300    being zero (consider the trivial case of 1 * 1, tcFullMultiply
301    requires two parts to hold the single-part result).  So we add an
302    extra one to guarantee enough space whilst multiplying.  */
303 const unsigned int maxExponent = 16383;
304 const unsigned int maxPrecision = 113;
305 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
306 const unsigned int maxPowerOfFiveParts =
307     2 +
308     ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
309 
310 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
311   return semantics.precision;
312 }
313 APFloatBase::ExponentType
314 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
315   return semantics.maxExponent;
316 }
317 APFloatBase::ExponentType
318 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
319   return semantics.minExponent;
320 }
321 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
322   return semantics.sizeInBits;
323 }
324 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
325                                                  bool isSigned) {
326   // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
327   // at least one more bit than the MaxExponent to hold the max FP value.
328   unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
329   // Extra sign bit needed.
330   if (isSigned)
331     ++MinBitWidth;
332   return MinBitWidth;
333 }
334 
335 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
336                                             const fltSemantics &Dst) {
337   // Exponent range must be larger.
338   if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
339     return false;
340 
341   // If the mantissa is long enough, the result value could still be denormal
342   // with a larger exponent range.
343   //
344   // FIXME: This condition is probably not accurate but also shouldn't be a
345   // practical concern with existing types.
346   return Dst.precision >= Src.precision;
347 }
348 
349 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
350   return Sem.sizeInBits;
351 }
352 
353 static constexpr APFloatBase::ExponentType
354 exponentZero(const fltSemantics &semantics) {
355   return semantics.minExponent - 1;
356 }
357 
358 static constexpr APFloatBase::ExponentType
359 exponentInf(const fltSemantics &semantics) {
360   return semantics.maxExponent + 1;
361 }
362 
363 static constexpr APFloatBase::ExponentType
364 exponentNaN(const fltSemantics &semantics) {
365   if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
366     if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
367       return exponentZero(semantics);
368     return semantics.maxExponent;
369   }
370   return semantics.maxExponent + 1;
371 }
372 
373 /* A bunch of private, handy routines.  */
374 
375 static inline Error createError(const Twine &Err) {
376   return make_error<StringError>(Err, inconvertibleErrorCode());
377 }
378 
379 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
380   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
381 }
382 
383 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
384 static inline unsigned int
385 decDigitValue(unsigned int c)
386 {
387   return c - '0';
388 }
389 
390 /* Return the value of a decimal exponent of the form
391    [+-]ddddddd.
392 
393    If the exponent overflows, returns a large exponent with the
394    appropriate sign.  */
395 static Expected<int> readExponent(StringRef::iterator begin,
396                                   StringRef::iterator end) {
397   bool isNegative;
398   unsigned int absExponent;
399   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
400   StringRef::iterator p = begin;
401 
402   // Treat no exponent as 0 to match binutils
403   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
404     return 0;
405   }
406 
407   isNegative = (*p == '-');
408   if (*p == '-' || *p == '+') {
409     p++;
410     if (p == end)
411       return createError("Exponent has no digits");
412   }
413 
414   absExponent = decDigitValue(*p++);
415   if (absExponent >= 10U)
416     return createError("Invalid character in exponent");
417 
418   for (; p != end; ++p) {
419     unsigned int value;
420 
421     value = decDigitValue(*p);
422     if (value >= 10U)
423       return createError("Invalid character in exponent");
424 
425     absExponent = absExponent * 10U + value;
426     if (absExponent >= overlargeExponent) {
427       absExponent = overlargeExponent;
428       break;
429     }
430   }
431 
432   if (isNegative)
433     return -(int) absExponent;
434   else
435     return (int) absExponent;
436 }
437 
438 /* This is ugly and needs cleaning up, but I don't immediately see
439    how whilst remaining safe.  */
440 static Expected<int> totalExponent(StringRef::iterator p,
441                                    StringRef::iterator end,
442                                    int exponentAdjustment) {
443   int unsignedExponent;
444   bool negative, overflow;
445   int exponent = 0;
446 
447   if (p == end)
448     return createError("Exponent has no digits");
449 
450   negative = *p == '-';
451   if (*p == '-' || *p == '+') {
452     p++;
453     if (p == end)
454       return createError("Exponent has no digits");
455   }
456 
457   unsignedExponent = 0;
458   overflow = false;
459   for (; p != end; ++p) {
460     unsigned int value;
461 
462     value = decDigitValue(*p);
463     if (value >= 10U)
464       return createError("Invalid character in exponent");
465 
466     unsignedExponent = unsignedExponent * 10 + value;
467     if (unsignedExponent > 32767) {
468       overflow = true;
469       break;
470     }
471   }
472 
473   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
474     overflow = true;
475 
476   if (!overflow) {
477     exponent = unsignedExponent;
478     if (negative)
479       exponent = -exponent;
480     exponent += exponentAdjustment;
481     if (exponent > 32767 || exponent < -32768)
482       overflow = true;
483   }
484 
485   if (overflow)
486     exponent = negative ? -32768: 32767;
487 
488   return exponent;
489 }
490 
491 static Expected<StringRef::iterator>
492 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
493                            StringRef::iterator *dot) {
494   StringRef::iterator p = begin;
495   *dot = end;
496   while (p != end && *p == '0')
497     p++;
498 
499   if (p != end && *p == '.') {
500     *dot = p++;
501 
502     if (end - begin == 1)
503       return createError("Significand has no digits");
504 
505     while (p != end && *p == '0')
506       p++;
507   }
508 
509   return p;
510 }
511 
512 /* Given a normal decimal floating point number of the form
513 
514      dddd.dddd[eE][+-]ddd
515 
516    where the decimal point and exponent are optional, fill out the
517    structure D.  Exponent is appropriate if the significand is
518    treated as an integer, and normalizedExponent if the significand
519    is taken to have the decimal point after a single leading
520    non-zero digit.
521 
522    If the value is zero, V->firstSigDigit points to a non-digit, and
523    the return exponent is zero.
524 */
525 struct decimalInfo {
526   const char *firstSigDigit;
527   const char *lastSigDigit;
528   int exponent;
529   int normalizedExponent;
530 };
531 
532 static Error interpretDecimal(StringRef::iterator begin,
533                               StringRef::iterator end, decimalInfo *D) {
534   StringRef::iterator dot = end;
535 
536   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
537   if (!PtrOrErr)
538     return PtrOrErr.takeError();
539   StringRef::iterator p = *PtrOrErr;
540 
541   D->firstSigDigit = p;
542   D->exponent = 0;
543   D->normalizedExponent = 0;
544 
545   for (; p != end; ++p) {
546     if (*p == '.') {
547       if (dot != end)
548         return createError("String contains multiple dots");
549       dot = p++;
550       if (p == end)
551         break;
552     }
553     if (decDigitValue(*p) >= 10U)
554       break;
555   }
556 
557   if (p != end) {
558     if (*p != 'e' && *p != 'E')
559       return createError("Invalid character in significand");
560     if (p == begin)
561       return createError("Significand has no digits");
562     if (dot != end && p - begin == 1)
563       return createError("Significand has no digits");
564 
565     /* p points to the first non-digit in the string */
566     auto ExpOrErr = readExponent(p + 1, end);
567     if (!ExpOrErr)
568       return ExpOrErr.takeError();
569     D->exponent = *ExpOrErr;
570 
571     /* Implied decimal point?  */
572     if (dot == end)
573       dot = p;
574   }
575 
576   /* If number is all zeroes accept any exponent.  */
577   if (p != D->firstSigDigit) {
578     /* Drop insignificant trailing zeroes.  */
579     if (p != begin) {
580       do
581         do
582           p--;
583         while (p != begin && *p == '0');
584       while (p != begin && *p == '.');
585     }
586 
587     /* Adjust the exponents for any decimal point.  */
588     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
589     D->normalizedExponent = (D->exponent +
590               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
591                                       - (dot > D->firstSigDigit && dot < p)));
592   }
593 
594   D->lastSigDigit = p;
595   return Error::success();
596 }
597 
598 /* Return the trailing fraction of a hexadecimal number.
599    DIGITVALUE is the first hex digit of the fraction, P points to
600    the next digit.  */
601 static Expected<lostFraction>
602 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
603                             unsigned int digitValue) {
604   unsigned int hexDigit;
605 
606   /* If the first trailing digit isn't 0 or 8 we can work out the
607      fraction immediately.  */
608   if (digitValue > 8)
609     return lfMoreThanHalf;
610   else if (digitValue < 8 && digitValue > 0)
611     return lfLessThanHalf;
612 
613   // Otherwise we need to find the first non-zero digit.
614   while (p != end && (*p == '0' || *p == '.'))
615     p++;
616 
617   if (p == end)
618     return createError("Invalid trailing hexadecimal fraction!");
619 
620   hexDigit = hexDigitValue(*p);
621 
622   /* If we ran off the end it is exactly zero or one-half, otherwise
623      a little more.  */
624   if (hexDigit == UINT_MAX)
625     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
626   else
627     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
628 }
629 
630 /* Return the fraction lost were a bignum truncated losing the least
631    significant BITS bits.  */
632 static lostFraction
633 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
634                               unsigned int partCount,
635                               unsigned int bits)
636 {
637   unsigned int lsb;
638 
639   lsb = APInt::tcLSB(parts, partCount);
640 
641   /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX.  */
642   if (bits <= lsb)
643     return lfExactlyZero;
644   if (bits == lsb + 1)
645     return lfExactlyHalf;
646   if (bits <= partCount * APFloatBase::integerPartWidth &&
647       APInt::tcExtractBit(parts, bits - 1))
648     return lfMoreThanHalf;
649 
650   return lfLessThanHalf;
651 }
652 
653 /* Shift DST right BITS bits noting lost fraction.  */
654 static lostFraction
655 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
656 {
657   lostFraction lost_fraction;
658 
659   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
660 
661   APInt::tcShiftRight(dst, parts, bits);
662 
663   return lost_fraction;
664 }
665 
666 /* Combine the effect of two lost fractions.  */
667 static lostFraction
668 combineLostFractions(lostFraction moreSignificant,
669                      lostFraction lessSignificant)
670 {
671   if (lessSignificant != lfExactlyZero) {
672     if (moreSignificant == lfExactlyZero)
673       moreSignificant = lfLessThanHalf;
674     else if (moreSignificant == lfExactlyHalf)
675       moreSignificant = lfMoreThanHalf;
676   }
677 
678   return moreSignificant;
679 }
680 
681 /* The error from the true value, in half-ulps, on multiplying two
682    floating point numbers, which differ from the value they
683    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
684    than the returned value.
685 
686    See "How to Read Floating Point Numbers Accurately" by William D
687    Clinger.  */
688 static unsigned int
689 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
690 {
691   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
692 
693   if (HUerr1 + HUerr2 == 0)
694     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
695   else
696     return inexactMultiply + 2 * (HUerr1 + HUerr2);
697 }
698 
699 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
700    when the least significant BITS are truncated.  BITS cannot be
701    zero.  */
702 static APFloatBase::integerPart
703 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
704                  bool isNearest) {
705   unsigned int count, partBits;
706   APFloatBase::integerPart part, boundary;
707 
708   assert(bits != 0);
709 
710   bits--;
711   count = bits / APFloatBase::integerPartWidth;
712   partBits = bits % APFloatBase::integerPartWidth + 1;
713 
714   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
715 
716   if (isNearest)
717     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
718   else
719     boundary = 0;
720 
721   if (count == 0) {
722     if (part - boundary <= boundary - part)
723       return part - boundary;
724     else
725       return boundary - part;
726   }
727 
728   if (part == boundary) {
729     while (--count)
730       if (parts[count])
731         return ~(APFloatBase::integerPart) 0; /* A lot.  */
732 
733     return parts[0];
734   } else if (part == boundary - 1) {
735     while (--count)
736       if (~parts[count])
737         return ~(APFloatBase::integerPart) 0; /* A lot.  */
738 
739     return -parts[0];
740   }
741 
742   return ~(APFloatBase::integerPart) 0; /* A lot.  */
743 }
744 
745 /* Place pow(5, power) in DST, and return the number of parts used.
746    DST must be at least one part larger than size of the answer.  */
747 static unsigned int
748 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
749   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
750   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
751   pow5s[0] = 78125 * 5;
752 
753   unsigned int partsCount = 1;
754   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
755   unsigned int result;
756   assert(power <= maxExponent);
757 
758   p1 = dst;
759   p2 = scratch;
760 
761   *p1 = firstEightPowers[power & 7];
762   power >>= 3;
763 
764   result = 1;
765   pow5 = pow5s;
766 
767   for (unsigned int n = 0; power; power >>= 1, n++) {
768     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
769     if (n != 0) {
770       APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
771                             partsCount, partsCount);
772       partsCount *= 2;
773       if (pow5[partsCount - 1] == 0)
774         partsCount--;
775     }
776 
777     if (power & 1) {
778       APFloatBase::integerPart *tmp;
779 
780       APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
781       result += partsCount;
782       if (p2[result - 1] == 0)
783         result--;
784 
785       /* Now result is in p1 with partsCount parts and p2 is scratch
786          space.  */
787       tmp = p1;
788       p1 = p2;
789       p2 = tmp;
790     }
791 
792     pow5 += partsCount;
793   }
794 
795   if (p1 != dst)
796     APInt::tcAssign(dst, p1, result);
797 
798   return result;
799 }
800 
801 /* Zero at the end to avoid modular arithmetic when adding one; used
802    when rounding up during hexadecimal output.  */
803 static const char hexDigitsLower[] = "0123456789abcdef0";
804 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
805 static const char infinityL[] = "infinity";
806 static const char infinityU[] = "INFINITY";
807 static const char NaNL[] = "nan";
808 static const char NaNU[] = "NAN";
809 
810 /* Write out an integerPart in hexadecimal, starting with the most
811    significant nibble.  Write out exactly COUNT hexdigits, return
812    COUNT.  */
813 static unsigned int
814 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
815            const char *hexDigitChars)
816 {
817   unsigned int result = count;
818 
819   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
820 
821   part >>= (APFloatBase::integerPartWidth - 4 * count);
822   while (count--) {
823     dst[count] = hexDigitChars[part & 0xf];
824     part >>= 4;
825   }
826 
827   return result;
828 }
829 
830 /* Write out an unsigned decimal integer.  */
831 static char *
832 writeUnsignedDecimal (char *dst, unsigned int n)
833 {
834   char buff[40], *p;
835 
836   p = buff;
837   do
838     *p++ = '0' + n % 10;
839   while (n /= 10);
840 
841   do
842     *dst++ = *--p;
843   while (p != buff);
844 
845   return dst;
846 }
847 
848 /* Write out a signed decimal integer.  */
849 static char *
850 writeSignedDecimal (char *dst, int value)
851 {
852   if (value < 0) {
853     *dst++ = '-';
854     dst = writeUnsignedDecimal(dst, -(unsigned) value);
855   } else
856     dst = writeUnsignedDecimal(dst, value);
857 
858   return dst;
859 }
860 
861 namespace detail {
862 /* Constructors.  */
863 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
864   unsigned int count;
865 
866   semantics = ourSemantics;
867   count = partCount();
868   if (count > 1)
869     significand.parts = new integerPart[count];
870 }
871 
872 void IEEEFloat::freeSignificand() {
873   if (needsCleanup())
874     delete [] significand.parts;
875 }
876 
877 void IEEEFloat::assign(const IEEEFloat &rhs) {
878   assert(semantics == rhs.semantics);
879 
880   sign = rhs.sign;
881   category = rhs.category;
882   exponent = rhs.exponent;
883   if (isFiniteNonZero() || category == fcNaN)
884     copySignificand(rhs);
885 }
886 
887 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
888   assert(isFiniteNonZero() || category == fcNaN);
889   assert(rhs.partCount() >= partCount());
890 
891   APInt::tcAssign(significandParts(), rhs.significandParts(),
892                   partCount());
893 }
894 
895 /* Make this number a NaN, with an arbitrary but deterministic value
896    for the significand.  If double or longer, this is a signalling NaN,
897    which may not be ideal.  If float, this is QNaN(0).  */
898 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
899   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
900     llvm_unreachable("This floating point format does not support NaN");
901 
902   category = fcNaN;
903   sign = Negative;
904   exponent = exponentNaN();
905 
906   integerPart *significand = significandParts();
907   unsigned numParts = partCount();
908 
909   APInt fill_storage;
910   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
911     // Finite-only types do not distinguish signalling and quiet NaN, so
912     // make them all signalling.
913     SNaN = false;
914     if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
915       sign = true;
916       fill_storage = APInt::getZero(semantics->precision - 1);
917     } else {
918       fill_storage = APInt::getAllOnes(semantics->precision - 1);
919     }
920     fill = &fill_storage;
921   }
922 
923   // Set the significand bits to the fill.
924   if (!fill || fill->getNumWords() < numParts)
925     APInt::tcSet(significand, 0, numParts);
926   if (fill) {
927     APInt::tcAssign(significand, fill->getRawData(),
928                     std::min(fill->getNumWords(), numParts));
929 
930     // Zero out the excess bits of the significand.
931     unsigned bitsToPreserve = semantics->precision - 1;
932     unsigned part = bitsToPreserve / 64;
933     bitsToPreserve %= 64;
934     significand[part] &= ((1ULL << bitsToPreserve) - 1);
935     for (part++; part != numParts; ++part)
936       significand[part] = 0;
937   }
938 
939   unsigned QNaNBit = semantics->precision - 2;
940 
941   if (SNaN) {
942     // We always have to clear the QNaN bit to make it an SNaN.
943     APInt::tcClearBit(significand, QNaNBit);
944 
945     // If there are no bits set in the payload, we have to set
946     // *something* to make it a NaN instead of an infinity;
947     // conventionally, this is the next bit down from the QNaN bit.
948     if (APInt::tcIsZero(significand, numParts))
949       APInt::tcSetBit(significand, QNaNBit - 1);
950   } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
951     // The only NaN is a quiet NaN, and it has no bits sets in the significand.
952     // Do nothing.
953   } else {
954     // We always have to set the QNaN bit to make it a QNaN.
955     APInt::tcSetBit(significand, QNaNBit);
956   }
957 
958   // For x87 extended precision, we want to make a NaN, not a
959   // pseudo-NaN.  Maybe we should expose the ability to make
960   // pseudo-NaNs?
961   if (semantics == &semX87DoubleExtended)
962     APInt::tcSetBit(significand, QNaNBit + 1);
963 }
964 
965 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
966   if (this != &rhs) {
967     if (semantics != rhs.semantics) {
968       freeSignificand();
969       initialize(rhs.semantics);
970     }
971     assign(rhs);
972   }
973 
974   return *this;
975 }
976 
977 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
978   freeSignificand();
979 
980   semantics = rhs.semantics;
981   significand = rhs.significand;
982   exponent = rhs.exponent;
983   category = rhs.category;
984   sign = rhs.sign;
985 
986   rhs.semantics = &semBogus;
987   return *this;
988 }
989 
990 bool IEEEFloat::isDenormal() const {
991   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
992          (APInt::tcExtractBit(significandParts(),
993                               semantics->precision - 1) == 0);
994 }
995 
996 bool IEEEFloat::isSmallest() const {
997   // The smallest number by magnitude in our format will be the smallest
998   // denormal, i.e. the floating point number with exponent being minimum
999   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1000   return isFiniteNonZero() && exponent == semantics->minExponent &&
1001     significandMSB() == 0;
1002 }
1003 
1004 bool IEEEFloat::isSmallestNormalized() const {
1005   return getCategory() == fcNormal && exponent == semantics->minExponent &&
1006          isSignificandAllZerosExceptMSB();
1007 }
1008 
1009 bool IEEEFloat::isSignificandAllOnes() const {
1010   // Test if the significand excluding the integral bit is all ones. This allows
1011   // us to test for binade boundaries.
1012   const integerPart *Parts = significandParts();
1013   const unsigned PartCount = partCountForBits(semantics->precision);
1014   for (unsigned i = 0; i < PartCount - 1; i++)
1015     if (~Parts[i])
1016       return false;
1017 
1018   // Set the unused high bits to all ones when we compare.
1019   const unsigned NumHighBits =
1020     PartCount*integerPartWidth - semantics->precision + 1;
1021   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1022          "Can not have more high bits to fill than integerPartWidth");
1023   const integerPart HighBitFill =
1024     ~integerPart(0) << (integerPartWidth - NumHighBits);
1025   if (~(Parts[PartCount - 1] | HighBitFill))
1026     return false;
1027 
1028   return true;
1029 }
1030 
1031 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1032   // Test if the significand excluding the integral bit is all ones except for
1033   // the least significant bit.
1034   const integerPart *Parts = significandParts();
1035 
1036   if (Parts[0] & 1)
1037     return false;
1038 
1039   const unsigned PartCount = partCountForBits(semantics->precision);
1040   for (unsigned i = 0; i < PartCount - 1; i++) {
1041     if (~Parts[i] & ~unsigned{!i})
1042       return false;
1043   }
1044 
1045   // Set the unused high bits to all ones when we compare.
1046   const unsigned NumHighBits =
1047       PartCount * integerPartWidth - semantics->precision + 1;
1048   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1049          "Can not have more high bits to fill than integerPartWidth");
1050   const integerPart HighBitFill = ~integerPart(0)
1051                                   << (integerPartWidth - NumHighBits);
1052   if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1053     return false;
1054 
1055   return true;
1056 }
1057 
1058 bool IEEEFloat::isSignificandAllZeros() const {
1059   // Test if the significand excluding the integral bit is all zeros. This
1060   // allows us to test for binade boundaries.
1061   const integerPart *Parts = significandParts();
1062   const unsigned PartCount = partCountForBits(semantics->precision);
1063 
1064   for (unsigned i = 0; i < PartCount - 1; i++)
1065     if (Parts[i])
1066       return false;
1067 
1068   // Compute how many bits are used in the final word.
1069   const unsigned NumHighBits =
1070     PartCount*integerPartWidth - semantics->precision + 1;
1071   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1072          "clear than integerPartWidth");
1073   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1074 
1075   if (Parts[PartCount - 1] & HighBitMask)
1076     return false;
1077 
1078   return true;
1079 }
1080 
1081 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1082   const integerPart *Parts = significandParts();
1083   const unsigned PartCount = partCountForBits(semantics->precision);
1084 
1085   for (unsigned i = 0; i < PartCount - 1; i++) {
1086     if (Parts[i])
1087       return false;
1088   }
1089 
1090   const unsigned NumHighBits =
1091       PartCount * integerPartWidth - semantics->precision + 1;
1092   return Parts[PartCount - 1] == integerPart(1)
1093                                      << (integerPartWidth - NumHighBits);
1094 }
1095 
1096 bool IEEEFloat::isLargest() const {
1097   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1098       semantics->nanEncoding == fltNanEncoding::AllOnes) {
1099     // The largest number by magnitude in our format will be the floating point
1100     // number with maximum exponent and with significand that is all ones except
1101     // the LSB.
1102     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1103            isSignificandAllOnesExceptLSB();
1104   } else {
1105     // The largest number by magnitude in our format will be the floating point
1106     // number with maximum exponent and with significand that is all ones.
1107     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1108            isSignificandAllOnes();
1109   }
1110 }
1111 
1112 bool IEEEFloat::isInteger() const {
1113   // This could be made more efficient; I'm going for obviously correct.
1114   if (!isFinite()) return false;
1115   IEEEFloat truncated = *this;
1116   truncated.roundToIntegral(rmTowardZero);
1117   return compare(truncated) == cmpEqual;
1118 }
1119 
1120 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1121   if (this == &rhs)
1122     return true;
1123   if (semantics != rhs.semantics ||
1124       category != rhs.category ||
1125       sign != rhs.sign)
1126     return false;
1127   if (category==fcZero || category==fcInfinity)
1128     return true;
1129 
1130   if (isFiniteNonZero() && exponent != rhs.exponent)
1131     return false;
1132 
1133   return std::equal(significandParts(), significandParts() + partCount(),
1134                     rhs.significandParts());
1135 }
1136 
1137 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1138   initialize(&ourSemantics);
1139   sign = 0;
1140   category = fcNormal;
1141   zeroSignificand();
1142   exponent = ourSemantics.precision - 1;
1143   significandParts()[0] = value;
1144   normalize(rmNearestTiesToEven, lfExactlyZero);
1145 }
1146 
1147 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1148   initialize(&ourSemantics);
1149   makeZero(false);
1150 }
1151 
1152 // Delegate to the previous constructor, because later copy constructor may
1153 // actually inspects category, which can't be garbage.
1154 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1155     : IEEEFloat(ourSemantics) {}
1156 
1157 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1158   initialize(rhs.semantics);
1159   assign(rhs);
1160 }
1161 
1162 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1163   *this = std::move(rhs);
1164 }
1165 
1166 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1167 
1168 unsigned int IEEEFloat::partCount() const {
1169   return partCountForBits(semantics->precision + 1);
1170 }
1171 
1172 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1173   return const_cast<IEEEFloat *>(this)->significandParts();
1174 }
1175 
1176 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1177   if (partCount() > 1)
1178     return significand.parts;
1179   else
1180     return &significand.part;
1181 }
1182 
1183 void IEEEFloat::zeroSignificand() {
1184   APInt::tcSet(significandParts(), 0, partCount());
1185 }
1186 
1187 /* Increment an fcNormal floating point number's significand.  */
1188 void IEEEFloat::incrementSignificand() {
1189   integerPart carry;
1190 
1191   carry = APInt::tcIncrement(significandParts(), partCount());
1192 
1193   /* Our callers should never cause us to overflow.  */
1194   assert(carry == 0);
1195   (void)carry;
1196 }
1197 
1198 /* Add the significand of the RHS.  Returns the carry flag.  */
1199 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1200   integerPart *parts;
1201 
1202   parts = significandParts();
1203 
1204   assert(semantics == rhs.semantics);
1205   assert(exponent == rhs.exponent);
1206 
1207   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1208 }
1209 
1210 /* Subtract the significand of the RHS with a borrow flag.  Returns
1211    the borrow flag.  */
1212 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1213                                                       integerPart borrow) {
1214   integerPart *parts;
1215 
1216   parts = significandParts();
1217 
1218   assert(semantics == rhs.semantics);
1219   assert(exponent == rhs.exponent);
1220 
1221   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1222                            partCount());
1223 }
1224 
1225 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1226    on to the full-precision result of the multiplication.  Returns the
1227    lost fraction.  */
1228 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1229                                             IEEEFloat addend) {
1230   unsigned int omsb;        // One, not zero, based MSB.
1231   unsigned int partsCount, newPartsCount, precision;
1232   integerPart *lhsSignificand;
1233   integerPart scratch[4];
1234   integerPart *fullSignificand;
1235   lostFraction lost_fraction;
1236   bool ignored;
1237 
1238   assert(semantics == rhs.semantics);
1239 
1240   precision = semantics->precision;
1241 
1242   // Allocate space for twice as many bits as the original significand, plus one
1243   // extra bit for the addition to overflow into.
1244   newPartsCount = partCountForBits(precision * 2 + 1);
1245 
1246   if (newPartsCount > 4)
1247     fullSignificand = new integerPart[newPartsCount];
1248   else
1249     fullSignificand = scratch;
1250 
1251   lhsSignificand = significandParts();
1252   partsCount = partCount();
1253 
1254   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1255                         rhs.significandParts(), partsCount, partsCount);
1256 
1257   lost_fraction = lfExactlyZero;
1258   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1259   exponent += rhs.exponent;
1260 
1261   // Assume the operands involved in the multiplication are single-precision
1262   // FP, and the two multiplicants are:
1263   //   *this = a23 . a22 ... a0 * 2^e1
1264   //     rhs = b23 . b22 ... b0 * 2^e2
1265   // the result of multiplication is:
1266   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1267   // Note that there are three significant bits at the left-hand side of the
1268   // radix point: two for the multiplication, and an overflow bit for the
1269   // addition (that will always be zero at this point). Move the radix point
1270   // toward left by two bits, and adjust exponent accordingly.
1271   exponent += 2;
1272 
1273   if (addend.isNonZero()) {
1274     // The intermediate result of the multiplication has "2 * precision"
1275     // signicant bit; adjust the addend to be consistent with mul result.
1276     //
1277     Significand savedSignificand = significand;
1278     const fltSemantics *savedSemantics = semantics;
1279     fltSemantics extendedSemantics;
1280     opStatus status;
1281     unsigned int extendedPrecision;
1282 
1283     // Normalize our MSB to one below the top bit to allow for overflow.
1284     extendedPrecision = 2 * precision + 1;
1285     if (omsb != extendedPrecision - 1) {
1286       assert(extendedPrecision > omsb);
1287       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1288                          (extendedPrecision - 1) - omsb);
1289       exponent -= (extendedPrecision - 1) - omsb;
1290     }
1291 
1292     /* Create new semantics.  */
1293     extendedSemantics = *semantics;
1294     extendedSemantics.precision = extendedPrecision;
1295 
1296     if (newPartsCount == 1)
1297       significand.part = fullSignificand[0];
1298     else
1299       significand.parts = fullSignificand;
1300     semantics = &extendedSemantics;
1301 
1302     // Make a copy so we can convert it to the extended semantics.
1303     // Note that we cannot convert the addend directly, as the extendedSemantics
1304     // is a local variable (which we take a reference to).
1305     IEEEFloat extendedAddend(addend);
1306     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1307     assert(status == opOK);
1308     (void)status;
1309 
1310     // Shift the significand of the addend right by one bit. This guarantees
1311     // that the high bit of the significand is zero (same as fullSignificand),
1312     // so the addition will overflow (if it does overflow at all) into the top bit.
1313     lost_fraction = extendedAddend.shiftSignificandRight(1);
1314     assert(lost_fraction == lfExactlyZero &&
1315            "Lost precision while shifting addend for fused-multiply-add.");
1316 
1317     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1318 
1319     /* Restore our state.  */
1320     if (newPartsCount == 1)
1321       fullSignificand[0] = significand.part;
1322     significand = savedSignificand;
1323     semantics = savedSemantics;
1324 
1325     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1326   }
1327 
1328   // Convert the result having "2 * precision" significant-bits back to the one
1329   // having "precision" significant-bits. First, move the radix point from
1330   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1331   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1332   exponent -= precision + 1;
1333 
1334   // In case MSB resides at the left-hand side of radix point, shift the
1335   // mantissa right by some amount to make sure the MSB reside right before
1336   // the radix point (i.e. "MSB . rest-significant-bits").
1337   //
1338   // Note that the result is not normalized when "omsb < precision". So, the
1339   // caller needs to call IEEEFloat::normalize() if normalized value is
1340   // expected.
1341   if (omsb > precision) {
1342     unsigned int bits, significantParts;
1343     lostFraction lf;
1344 
1345     bits = omsb - precision;
1346     significantParts = partCountForBits(omsb);
1347     lf = shiftRight(fullSignificand, significantParts, bits);
1348     lost_fraction = combineLostFractions(lf, lost_fraction);
1349     exponent += bits;
1350   }
1351 
1352   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1353 
1354   if (newPartsCount > 4)
1355     delete [] fullSignificand;
1356 
1357   return lost_fraction;
1358 }
1359 
1360 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1361   return multiplySignificand(rhs, IEEEFloat(*semantics));
1362 }
1363 
1364 /* Multiply the significands of LHS and RHS to DST.  */
1365 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1366   unsigned int bit, i, partsCount;
1367   const integerPart *rhsSignificand;
1368   integerPart *lhsSignificand, *dividend, *divisor;
1369   integerPart scratch[4];
1370   lostFraction lost_fraction;
1371 
1372   assert(semantics == rhs.semantics);
1373 
1374   lhsSignificand = significandParts();
1375   rhsSignificand = rhs.significandParts();
1376   partsCount = partCount();
1377 
1378   if (partsCount > 2)
1379     dividend = new integerPart[partsCount * 2];
1380   else
1381     dividend = scratch;
1382 
1383   divisor = dividend + partsCount;
1384 
1385   /* Copy the dividend and divisor as they will be modified in-place.  */
1386   for (i = 0; i < partsCount; i++) {
1387     dividend[i] = lhsSignificand[i];
1388     divisor[i] = rhsSignificand[i];
1389     lhsSignificand[i] = 0;
1390   }
1391 
1392   exponent -= rhs.exponent;
1393 
1394   unsigned int precision = semantics->precision;
1395 
1396   /* Normalize the divisor.  */
1397   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1398   if (bit) {
1399     exponent += bit;
1400     APInt::tcShiftLeft(divisor, partsCount, bit);
1401   }
1402 
1403   /* Normalize the dividend.  */
1404   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1405   if (bit) {
1406     exponent -= bit;
1407     APInt::tcShiftLeft(dividend, partsCount, bit);
1408   }
1409 
1410   /* Ensure the dividend >= divisor initially for the loop below.
1411      Incidentally, this means that the division loop below is
1412      guaranteed to set the integer bit to one.  */
1413   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1414     exponent--;
1415     APInt::tcShiftLeft(dividend, partsCount, 1);
1416     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1417   }
1418 
1419   /* Long division.  */
1420   for (bit = precision; bit; bit -= 1) {
1421     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1422       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1423       APInt::tcSetBit(lhsSignificand, bit - 1);
1424     }
1425 
1426     APInt::tcShiftLeft(dividend, partsCount, 1);
1427   }
1428 
1429   /* Figure out the lost fraction.  */
1430   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1431 
1432   if (cmp > 0)
1433     lost_fraction = lfMoreThanHalf;
1434   else if (cmp == 0)
1435     lost_fraction = lfExactlyHalf;
1436   else if (APInt::tcIsZero(dividend, partsCount))
1437     lost_fraction = lfExactlyZero;
1438   else
1439     lost_fraction = lfLessThanHalf;
1440 
1441   if (partsCount > 2)
1442     delete [] dividend;
1443 
1444   return lost_fraction;
1445 }
1446 
1447 unsigned int IEEEFloat::significandMSB() const {
1448   return APInt::tcMSB(significandParts(), partCount());
1449 }
1450 
1451 unsigned int IEEEFloat::significandLSB() const {
1452   return APInt::tcLSB(significandParts(), partCount());
1453 }
1454 
1455 /* Note that a zero result is NOT normalized to fcZero.  */
1456 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1457   /* Our exponent should not overflow.  */
1458   assert((ExponentType) (exponent + bits) >= exponent);
1459 
1460   exponent += bits;
1461 
1462   return shiftRight(significandParts(), partCount(), bits);
1463 }
1464 
1465 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1466 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1467   assert(bits < semantics->precision);
1468 
1469   if (bits) {
1470     unsigned int partsCount = partCount();
1471 
1472     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1473     exponent -= bits;
1474 
1475     assert(!APInt::tcIsZero(significandParts(), partsCount));
1476   }
1477 }
1478 
1479 IEEEFloat::cmpResult
1480 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1481   int compare;
1482 
1483   assert(semantics == rhs.semantics);
1484   assert(isFiniteNonZero());
1485   assert(rhs.isFiniteNonZero());
1486 
1487   compare = exponent - rhs.exponent;
1488 
1489   /* If exponents are equal, do an unsigned bignum comparison of the
1490      significands.  */
1491   if (compare == 0)
1492     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1493                                partCount());
1494 
1495   if (compare > 0)
1496     return cmpGreaterThan;
1497   else if (compare < 0)
1498     return cmpLessThan;
1499   else
1500     return cmpEqual;
1501 }
1502 
1503 /* Set the least significant BITS bits of a bignum, clear the
1504    rest.  */
1505 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1506                                       unsigned bits) {
1507   unsigned i = 0;
1508   while (bits > APInt::APINT_BITS_PER_WORD) {
1509     dst[i++] = ~(APInt::WordType)0;
1510     bits -= APInt::APINT_BITS_PER_WORD;
1511   }
1512 
1513   if (bits)
1514     dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1515 
1516   while (i < parts)
1517     dst[i++] = 0;
1518 }
1519 
1520 /* Handle overflow.  Sign is preserved.  We either become infinity or
1521    the largest finite number.  */
1522 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1523   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {
1524     /* Infinity?  */
1525     if (rounding_mode == rmNearestTiesToEven ||
1526         rounding_mode == rmNearestTiesToAway ||
1527         (rounding_mode == rmTowardPositive && !sign) ||
1528         (rounding_mode == rmTowardNegative && sign)) {
1529       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1530         makeNaN(false, sign);
1531       else
1532         category = fcInfinity;
1533       return static_cast<opStatus>(opOverflow | opInexact);
1534     }
1535   }
1536 
1537   /* Otherwise we become the largest finite number.  */
1538   category = fcNormal;
1539   exponent = semantics->maxExponent;
1540   tcSetLeastSignificantBits(significandParts(), partCount(),
1541                             semantics->precision);
1542   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1543       semantics->nanEncoding == fltNanEncoding::AllOnes)
1544     APInt::tcClearBit(significandParts(), 0);
1545 
1546   return opInexact;
1547 }
1548 
1549 /* Returns TRUE if, when truncating the current number, with BIT the
1550    new LSB, with the given lost fraction and rounding mode, the result
1551    would need to be rounded away from zero (i.e., by increasing the
1552    signficand).  This routine must work for fcZero of both signs, and
1553    fcNormal numbers.  */
1554 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1555                                   lostFraction lost_fraction,
1556                                   unsigned int bit) const {
1557   /* NaNs and infinities should not have lost fractions.  */
1558   assert(isFiniteNonZero() || category == fcZero);
1559 
1560   /* Current callers never pass this so we don't handle it.  */
1561   assert(lost_fraction != lfExactlyZero);
1562 
1563   switch (rounding_mode) {
1564   case rmNearestTiesToAway:
1565     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1566 
1567   case rmNearestTiesToEven:
1568     if (lost_fraction == lfMoreThanHalf)
1569       return true;
1570 
1571     /* Our zeroes don't have a significand to test.  */
1572     if (lost_fraction == lfExactlyHalf && category != fcZero)
1573       return APInt::tcExtractBit(significandParts(), bit);
1574 
1575     return false;
1576 
1577   case rmTowardZero:
1578     return false;
1579 
1580   case rmTowardPositive:
1581     return !sign;
1582 
1583   case rmTowardNegative:
1584     return sign;
1585 
1586   default:
1587     break;
1588   }
1589   llvm_unreachable("Invalid rounding mode found");
1590 }
1591 
1592 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1593                                          lostFraction lost_fraction) {
1594   unsigned int omsb;                /* One, not zero, based MSB.  */
1595   int exponentChange;
1596 
1597   if (!isFiniteNonZero())
1598     return opOK;
1599 
1600   /* Before rounding normalize the exponent of fcNormal numbers.  */
1601   omsb = significandMSB() + 1;
1602 
1603   if (omsb) {
1604     /* OMSB is numbered from 1.  We want to place it in the integer
1605        bit numbered PRECISION if possible, with a compensating change in
1606        the exponent.  */
1607     exponentChange = omsb - semantics->precision;
1608 
1609     /* If the resulting exponent is too high, overflow according to
1610        the rounding mode.  */
1611     if (exponent + exponentChange > semantics->maxExponent)
1612       return handleOverflow(rounding_mode);
1613 
1614     /* Subnormal numbers have exponent minExponent, and their MSB
1615        is forced based on that.  */
1616     if (exponent + exponentChange < semantics->minExponent)
1617       exponentChange = semantics->minExponent - exponent;
1618 
1619     /* Shifting left is easy as we don't lose precision.  */
1620     if (exponentChange < 0) {
1621       assert(lost_fraction == lfExactlyZero);
1622 
1623       shiftSignificandLeft(-exponentChange);
1624 
1625       return opOK;
1626     }
1627 
1628     if (exponentChange > 0) {
1629       lostFraction lf;
1630 
1631       /* Shift right and capture any new lost fraction.  */
1632       lf = shiftSignificandRight(exponentChange);
1633 
1634       lost_fraction = combineLostFractions(lf, lost_fraction);
1635 
1636       /* Keep OMSB up-to-date.  */
1637       if (omsb > (unsigned) exponentChange)
1638         omsb -= exponentChange;
1639       else
1640         omsb = 0;
1641     }
1642   }
1643 
1644   // The all-ones values is an overflow if NaN is all ones. If NaN is
1645   // represented by negative zero, then it is a valid finite value.
1646   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1647       semantics->nanEncoding == fltNanEncoding::AllOnes &&
1648       exponent == semantics->maxExponent && isSignificandAllOnes())
1649     return handleOverflow(rounding_mode);
1650 
1651   /* Now round the number according to rounding_mode given the lost
1652      fraction.  */
1653 
1654   /* As specified in IEEE 754, since we do not trap we do not report
1655      underflow for exact results.  */
1656   if (lost_fraction == lfExactlyZero) {
1657     /* Canonicalize zeroes.  */
1658     if (omsb == 0) {
1659       category = fcZero;
1660       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1661         sign = false;
1662     }
1663 
1664     return opOK;
1665   }
1666 
1667   /* Increment the significand if we're rounding away from zero.  */
1668   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1669     if (omsb == 0)
1670       exponent = semantics->minExponent;
1671 
1672     incrementSignificand();
1673     omsb = significandMSB() + 1;
1674 
1675     /* Did the significand increment overflow?  */
1676     if (omsb == (unsigned) semantics->precision + 1) {
1677       /* Renormalize by incrementing the exponent and shifting our
1678          significand right one.  However if we already have the
1679          maximum exponent we overflow to infinity.  */
1680       if (exponent == semantics->maxExponent)
1681         // Invoke overflow handling with a rounding mode that will guarantee
1682         // that the result gets turned into the correct infinity representation.
1683         // This is needed instead of just setting the category to infinity to
1684         // account for 8-bit floating point types that have no inf, only NaN.
1685         return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1686 
1687       shiftSignificandRight(1);
1688 
1689       return opInexact;
1690     }
1691 
1692     // The all-ones values is an overflow if NaN is all ones. If NaN is
1693     // represented by negative zero, then it is a valid finite value.
1694     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1695         semantics->nanEncoding == fltNanEncoding::AllOnes &&
1696         exponent == semantics->maxExponent && isSignificandAllOnes())
1697       return handleOverflow(rounding_mode);
1698   }
1699 
1700   /* The normal case - we were and are not denormal, and any
1701      significand increment above didn't overflow.  */
1702   if (omsb == semantics->precision)
1703     return opInexact;
1704 
1705   /* We have a non-zero denormal.  */
1706   assert(omsb < semantics->precision);
1707 
1708   /* Canonicalize zeroes.  */
1709   if (omsb == 0) {
1710     category = fcZero;
1711     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1712       sign = false;
1713   }
1714 
1715   /* The fcZero case is a denormal that underflowed to zero.  */
1716   return (opStatus) (opUnderflow | opInexact);
1717 }
1718 
1719 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1720                                                      bool subtract) {
1721   switch (PackCategoriesIntoKey(category, rhs.category)) {
1722   default:
1723     llvm_unreachable(nullptr);
1724 
1725   case PackCategoriesIntoKey(fcZero, fcNaN):
1726   case PackCategoriesIntoKey(fcNormal, fcNaN):
1727   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1728     assign(rhs);
1729     [[fallthrough]];
1730   case PackCategoriesIntoKey(fcNaN, fcZero):
1731   case PackCategoriesIntoKey(fcNaN, fcNormal):
1732   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1733   case PackCategoriesIntoKey(fcNaN, fcNaN):
1734     if (isSignaling()) {
1735       makeQuiet();
1736       return opInvalidOp;
1737     }
1738     return rhs.isSignaling() ? opInvalidOp : opOK;
1739 
1740   case PackCategoriesIntoKey(fcNormal, fcZero):
1741   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1742   case PackCategoriesIntoKey(fcInfinity, fcZero):
1743     return opOK;
1744 
1745   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1746   case PackCategoriesIntoKey(fcZero, fcInfinity):
1747     category = fcInfinity;
1748     sign = rhs.sign ^ subtract;
1749     return opOK;
1750 
1751   case PackCategoriesIntoKey(fcZero, fcNormal):
1752     assign(rhs);
1753     sign = rhs.sign ^ subtract;
1754     return opOK;
1755 
1756   case PackCategoriesIntoKey(fcZero, fcZero):
1757     /* Sign depends on rounding mode; handled by caller.  */
1758     return opOK;
1759 
1760   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1761     /* Differently signed infinities can only be validly
1762        subtracted.  */
1763     if (((sign ^ rhs.sign)!=0) != subtract) {
1764       makeNaN();
1765       return opInvalidOp;
1766     }
1767 
1768     return opOK;
1769 
1770   case PackCategoriesIntoKey(fcNormal, fcNormal):
1771     return opDivByZero;
1772   }
1773 }
1774 
1775 /* Add or subtract two normal numbers.  */
1776 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1777                                                  bool subtract) {
1778   integerPart carry;
1779   lostFraction lost_fraction;
1780   int bits;
1781 
1782   /* Determine if the operation on the absolute values is effectively
1783      an addition or subtraction.  */
1784   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1785 
1786   /* Are we bigger exponent-wise than the RHS?  */
1787   bits = exponent - rhs.exponent;
1788 
1789   /* Subtraction is more subtle than one might naively expect.  */
1790   if (subtract) {
1791     IEEEFloat temp_rhs(rhs);
1792 
1793     if (bits == 0)
1794       lost_fraction = lfExactlyZero;
1795     else if (bits > 0) {
1796       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1797       shiftSignificandLeft(1);
1798     } else {
1799       lost_fraction = shiftSignificandRight(-bits - 1);
1800       temp_rhs.shiftSignificandLeft(1);
1801     }
1802 
1803     // Should we reverse the subtraction.
1804     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1805       carry = temp_rhs.subtractSignificand
1806         (*this, lost_fraction != lfExactlyZero);
1807       copySignificand(temp_rhs);
1808       sign = !sign;
1809     } else {
1810       carry = subtractSignificand
1811         (temp_rhs, lost_fraction != lfExactlyZero);
1812     }
1813 
1814     /* Invert the lost fraction - it was on the RHS and
1815        subtracted.  */
1816     if (lost_fraction == lfLessThanHalf)
1817       lost_fraction = lfMoreThanHalf;
1818     else if (lost_fraction == lfMoreThanHalf)
1819       lost_fraction = lfLessThanHalf;
1820 
1821     /* The code above is intended to ensure that no borrow is
1822        necessary.  */
1823     assert(!carry);
1824     (void)carry;
1825   } else {
1826     if (bits > 0) {
1827       IEEEFloat temp_rhs(rhs);
1828 
1829       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1830       carry = addSignificand(temp_rhs);
1831     } else {
1832       lost_fraction = shiftSignificandRight(-bits);
1833       carry = addSignificand(rhs);
1834     }
1835 
1836     /* We have a guard bit; generating a carry cannot happen.  */
1837     assert(!carry);
1838     (void)carry;
1839   }
1840 
1841   return lost_fraction;
1842 }
1843 
1844 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1845   switch (PackCategoriesIntoKey(category, rhs.category)) {
1846   default:
1847     llvm_unreachable(nullptr);
1848 
1849   case PackCategoriesIntoKey(fcZero, fcNaN):
1850   case PackCategoriesIntoKey(fcNormal, fcNaN):
1851   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1852     assign(rhs);
1853     sign = false;
1854     [[fallthrough]];
1855   case PackCategoriesIntoKey(fcNaN, fcZero):
1856   case PackCategoriesIntoKey(fcNaN, fcNormal):
1857   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1858   case PackCategoriesIntoKey(fcNaN, fcNaN):
1859     sign ^= rhs.sign; // restore the original sign
1860     if (isSignaling()) {
1861       makeQuiet();
1862       return opInvalidOp;
1863     }
1864     return rhs.isSignaling() ? opInvalidOp : opOK;
1865 
1866   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1867   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1868   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1869     category = fcInfinity;
1870     return opOK;
1871 
1872   case PackCategoriesIntoKey(fcZero, fcNormal):
1873   case PackCategoriesIntoKey(fcNormal, fcZero):
1874   case PackCategoriesIntoKey(fcZero, fcZero):
1875     category = fcZero;
1876     return opOK;
1877 
1878   case PackCategoriesIntoKey(fcZero, fcInfinity):
1879   case PackCategoriesIntoKey(fcInfinity, fcZero):
1880     makeNaN();
1881     return opInvalidOp;
1882 
1883   case PackCategoriesIntoKey(fcNormal, fcNormal):
1884     return opOK;
1885   }
1886 }
1887 
1888 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1889   switch (PackCategoriesIntoKey(category, rhs.category)) {
1890   default:
1891     llvm_unreachable(nullptr);
1892 
1893   case PackCategoriesIntoKey(fcZero, fcNaN):
1894   case PackCategoriesIntoKey(fcNormal, fcNaN):
1895   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1896     assign(rhs);
1897     sign = false;
1898     [[fallthrough]];
1899   case PackCategoriesIntoKey(fcNaN, fcZero):
1900   case PackCategoriesIntoKey(fcNaN, fcNormal):
1901   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1902   case PackCategoriesIntoKey(fcNaN, fcNaN):
1903     sign ^= rhs.sign; // restore the original sign
1904     if (isSignaling()) {
1905       makeQuiet();
1906       return opInvalidOp;
1907     }
1908     return rhs.isSignaling() ? opInvalidOp : opOK;
1909 
1910   case PackCategoriesIntoKey(fcInfinity, fcZero):
1911   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1912   case PackCategoriesIntoKey(fcZero, fcInfinity):
1913   case PackCategoriesIntoKey(fcZero, fcNormal):
1914     return opOK;
1915 
1916   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1917     category = fcZero;
1918     return opOK;
1919 
1920   case PackCategoriesIntoKey(fcNormal, fcZero):
1921     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1922       makeNaN(false, sign);
1923     else
1924       category = fcInfinity;
1925     return opDivByZero;
1926 
1927   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1928   case PackCategoriesIntoKey(fcZero, fcZero):
1929     makeNaN();
1930     return opInvalidOp;
1931 
1932   case PackCategoriesIntoKey(fcNormal, fcNormal):
1933     return opOK;
1934   }
1935 }
1936 
1937 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1938   switch (PackCategoriesIntoKey(category, rhs.category)) {
1939   default:
1940     llvm_unreachable(nullptr);
1941 
1942   case PackCategoriesIntoKey(fcZero, fcNaN):
1943   case PackCategoriesIntoKey(fcNormal, fcNaN):
1944   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1945     assign(rhs);
1946     [[fallthrough]];
1947   case PackCategoriesIntoKey(fcNaN, fcZero):
1948   case PackCategoriesIntoKey(fcNaN, fcNormal):
1949   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1950   case PackCategoriesIntoKey(fcNaN, fcNaN):
1951     if (isSignaling()) {
1952       makeQuiet();
1953       return opInvalidOp;
1954     }
1955     return rhs.isSignaling() ? opInvalidOp : opOK;
1956 
1957   case PackCategoriesIntoKey(fcZero, fcInfinity):
1958   case PackCategoriesIntoKey(fcZero, fcNormal):
1959   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1960     return opOK;
1961 
1962   case PackCategoriesIntoKey(fcNormal, fcZero):
1963   case PackCategoriesIntoKey(fcInfinity, fcZero):
1964   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1965   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1966   case PackCategoriesIntoKey(fcZero, fcZero):
1967     makeNaN();
1968     return opInvalidOp;
1969 
1970   case PackCategoriesIntoKey(fcNormal, fcNormal):
1971     return opOK;
1972   }
1973 }
1974 
1975 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1976   switch (PackCategoriesIntoKey(category, rhs.category)) {
1977   default:
1978     llvm_unreachable(nullptr);
1979 
1980   case PackCategoriesIntoKey(fcZero, fcNaN):
1981   case PackCategoriesIntoKey(fcNormal, fcNaN):
1982   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1983     assign(rhs);
1984     [[fallthrough]];
1985   case PackCategoriesIntoKey(fcNaN, fcZero):
1986   case PackCategoriesIntoKey(fcNaN, fcNormal):
1987   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1988   case PackCategoriesIntoKey(fcNaN, fcNaN):
1989     if (isSignaling()) {
1990       makeQuiet();
1991       return opInvalidOp;
1992     }
1993     return rhs.isSignaling() ? opInvalidOp : opOK;
1994 
1995   case PackCategoriesIntoKey(fcZero, fcInfinity):
1996   case PackCategoriesIntoKey(fcZero, fcNormal):
1997   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1998     return opOK;
1999 
2000   case PackCategoriesIntoKey(fcNormal, fcZero):
2001   case PackCategoriesIntoKey(fcInfinity, fcZero):
2002   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2003   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2004   case PackCategoriesIntoKey(fcZero, fcZero):
2005     makeNaN();
2006     return opInvalidOp;
2007 
2008   case PackCategoriesIntoKey(fcNormal, fcNormal):
2009     return opDivByZero; // fake status, indicating this is not a special case
2010   }
2011 }
2012 
2013 /* Change sign.  */
2014 void IEEEFloat::changeSign() {
2015   // With NaN-as-negative-zero, neither NaN or negative zero can change
2016   // their signs.
2017   if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2018       (isZero() || isNaN()))
2019     return;
2020   /* Look mummy, this one's easy.  */
2021   sign = !sign;
2022 }
2023 
2024 /* Normalized addition or subtraction.  */
2025 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2026                                              roundingMode rounding_mode,
2027                                              bool subtract) {
2028   opStatus fs;
2029 
2030   fs = addOrSubtractSpecials(rhs, subtract);
2031 
2032   /* This return code means it was not a simple case.  */
2033   if (fs == opDivByZero) {
2034     lostFraction lost_fraction;
2035 
2036     lost_fraction = addOrSubtractSignificand(rhs, subtract);
2037     fs = normalize(rounding_mode, lost_fraction);
2038 
2039     /* Can only be zero if we lost no fraction.  */
2040     assert(category != fcZero || lost_fraction == lfExactlyZero);
2041   }
2042 
2043   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2044      positive zero unless rounding to minus infinity, except that
2045      adding two like-signed zeroes gives that zero.  */
2046   if (category == fcZero) {
2047     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2048       sign = (rounding_mode == rmTowardNegative);
2049     // NaN-in-negative-zero means zeros need to be normalized to +0.
2050     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2051       sign = false;
2052   }
2053 
2054   return fs;
2055 }
2056 
2057 /* Normalized addition.  */
2058 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2059                                    roundingMode rounding_mode) {
2060   return addOrSubtract(rhs, rounding_mode, false);
2061 }
2062 
2063 /* Normalized subtraction.  */
2064 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2065                                         roundingMode rounding_mode) {
2066   return addOrSubtract(rhs, rounding_mode, true);
2067 }
2068 
2069 /* Normalized multiply.  */
2070 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2071                                         roundingMode rounding_mode) {
2072   opStatus fs;
2073 
2074   sign ^= rhs.sign;
2075   fs = multiplySpecials(rhs);
2076 
2077   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2078     sign = false;
2079   if (isFiniteNonZero()) {
2080     lostFraction lost_fraction = multiplySignificand(rhs);
2081     fs = normalize(rounding_mode, lost_fraction);
2082     if (lost_fraction != lfExactlyZero)
2083       fs = (opStatus) (fs | opInexact);
2084   }
2085 
2086   return fs;
2087 }
2088 
2089 /* Normalized divide.  */
2090 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2091                                       roundingMode rounding_mode) {
2092   opStatus fs;
2093 
2094   sign ^= rhs.sign;
2095   fs = divideSpecials(rhs);
2096 
2097   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2098     sign = false;
2099   if (isFiniteNonZero()) {
2100     lostFraction lost_fraction = divideSignificand(rhs);
2101     fs = normalize(rounding_mode, lost_fraction);
2102     if (lost_fraction != lfExactlyZero)
2103       fs = (opStatus) (fs | opInexact);
2104   }
2105 
2106   return fs;
2107 }
2108 
2109 /* Normalized remainder.  */
2110 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2111   opStatus fs;
2112   unsigned int origSign = sign;
2113 
2114   // First handle the special cases.
2115   fs = remainderSpecials(rhs);
2116   if (fs != opDivByZero)
2117     return fs;
2118 
2119   fs = opOK;
2120 
2121   // Make sure the current value is less than twice the denom. If the addition
2122   // did not succeed (an overflow has happened), which means that the finite
2123   // value we currently posses must be less than twice the denom (as we are
2124   // using the same semantics).
2125   IEEEFloat P2 = rhs;
2126   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2127     fs = mod(P2);
2128     assert(fs == opOK);
2129   }
2130 
2131   // Lets work with absolute numbers.
2132   IEEEFloat P = rhs;
2133   P.sign = false;
2134   sign = false;
2135 
2136   //
2137   // To calculate the remainder we use the following scheme.
2138   //
2139   // The remainder is defained as follows:
2140   //
2141   // remainder = numer - rquot * denom = x - r * p
2142   //
2143   // Where r is the result of: x/p, rounded toward the nearest integral value
2144   // (with halfway cases rounded toward the even number).
2145   //
2146   // Currently, (after x mod 2p):
2147   // r is the number of 2p's present inside x, which is inherently, an even
2148   // number of p's.
2149   //
2150   // We may split the remaining calculation into 4 options:
2151   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2152   // - if x == 0.5p then we round to the nearest even number which is 0, and we
2153   //   are done as well.
2154   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2155   //   to subtract 1p at least once.
2156   // - if x >= p then we must subtract p at least once, as x must be a
2157   //   remainder.
2158   //
2159   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2160   //
2161   // We can now split the remaining calculation to the following 3 options:
2162   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2163   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2164   //   must round up to the next even number. so we must subtract p once more.
2165   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2166   //   integral, and subtract p once more.
2167   //
2168 
2169   // Extend the semantics to prevent an overflow/underflow or inexact result.
2170   bool losesInfo;
2171   fltSemantics extendedSemantics = *semantics;
2172   extendedSemantics.maxExponent++;
2173   extendedSemantics.minExponent--;
2174   extendedSemantics.precision += 2;
2175 
2176   IEEEFloat VEx = *this;
2177   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2178   assert(fs == opOK && !losesInfo);
2179   IEEEFloat PEx = P;
2180   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2181   assert(fs == opOK && !losesInfo);
2182 
2183   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2184   // any fraction.
2185   fs = VEx.add(VEx, rmNearestTiesToEven);
2186   assert(fs == opOK);
2187 
2188   if (VEx.compare(PEx) == cmpGreaterThan) {
2189     fs = subtract(P, rmNearestTiesToEven);
2190     assert(fs == opOK);
2191 
2192     // Make VEx = this.add(this), but because we have different semantics, we do
2193     // not want to `convert` again, so we just subtract PEx twice (which equals
2194     // to the desired value).
2195     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2196     assert(fs == opOK);
2197     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2198     assert(fs == opOK);
2199 
2200     cmpResult result = VEx.compare(PEx);
2201     if (result == cmpGreaterThan || result == cmpEqual) {
2202       fs = subtract(P, rmNearestTiesToEven);
2203       assert(fs == opOK);
2204     }
2205   }
2206 
2207   if (isZero()) {
2208     sign = origSign;    // IEEE754 requires this
2209     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2210       // But some 8-bit floats only have positive 0.
2211       sign = false;
2212   }
2213 
2214   else
2215     sign ^= origSign;
2216   return fs;
2217 }
2218 
2219 /* Normalized llvm frem (C fmod). */
2220 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2221   opStatus fs;
2222   fs = modSpecials(rhs);
2223   unsigned int origSign = sign;
2224 
2225   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2226          compareAbsoluteValue(rhs) != cmpLessThan) {
2227     int Exp = ilogb(*this) - ilogb(rhs);
2228     IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2229     // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2230     // check for it.
2231     if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2232       V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2233     V.sign = sign;
2234 
2235     fs = subtract(V, rmNearestTiesToEven);
2236     assert(fs==opOK);
2237   }
2238   if (isZero()) {
2239     sign = origSign; // fmod requires this
2240     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2241       sign = false;
2242   }
2243   return fs;
2244 }
2245 
2246 /* Normalized fused-multiply-add.  */
2247 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2248                                                 const IEEEFloat &addend,
2249                                                 roundingMode rounding_mode) {
2250   opStatus fs;
2251 
2252   /* Post-multiplication sign, before addition.  */
2253   sign ^= multiplicand.sign;
2254 
2255   /* If and only if all arguments are normal do we need to do an
2256      extended-precision calculation.  */
2257   if (isFiniteNonZero() &&
2258       multiplicand.isFiniteNonZero() &&
2259       addend.isFinite()) {
2260     lostFraction lost_fraction;
2261 
2262     lost_fraction = multiplySignificand(multiplicand, addend);
2263     fs = normalize(rounding_mode, lost_fraction);
2264     if (lost_fraction != lfExactlyZero)
2265       fs = (opStatus) (fs | opInexact);
2266 
2267     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2268        positive zero unless rounding to minus infinity, except that
2269        adding two like-signed zeroes gives that zero.  */
2270     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2271       sign = (rounding_mode == rmTowardNegative);
2272       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2273         sign = false;
2274     }
2275   } else {
2276     fs = multiplySpecials(multiplicand);
2277 
2278     /* FS can only be opOK or opInvalidOp.  There is no more work
2279        to do in the latter case.  The IEEE-754R standard says it is
2280        implementation-defined in this case whether, if ADDEND is a
2281        quiet NaN, we raise invalid op; this implementation does so.
2282 
2283        If we need to do the addition we can do so with normal
2284        precision.  */
2285     if (fs == opOK)
2286       fs = addOrSubtract(addend, rounding_mode, false);
2287   }
2288 
2289   return fs;
2290 }
2291 
2292 /* Rounding-mode correct round to integral value.  */
2293 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2294   opStatus fs;
2295 
2296   if (isInfinity())
2297     // [IEEE Std 754-2008 6.1]:
2298     // The behavior of infinity in floating-point arithmetic is derived from the
2299     // limiting cases of real arithmetic with operands of arbitrarily
2300     // large magnitude, when such a limit exists.
2301     // ...
2302     // Operations on infinite operands are usually exact and therefore signal no
2303     // exceptions ...
2304     return opOK;
2305 
2306   if (isNaN()) {
2307     if (isSignaling()) {
2308       // [IEEE Std 754-2008 6.2]:
2309       // Under default exception handling, any operation signaling an invalid
2310       // operation exception and for which a floating-point result is to be
2311       // delivered shall deliver a quiet NaN.
2312       makeQuiet();
2313       // [IEEE Std 754-2008 6.2]:
2314       // Signaling NaNs shall be reserved operands that, under default exception
2315       // handling, signal the invalid operation exception(see 7.2) for every
2316       // general-computational and signaling-computational operation except for
2317       // the conversions described in 5.12.
2318       return opInvalidOp;
2319     } else {
2320       // [IEEE Std 754-2008 6.2]:
2321       // For an operation with quiet NaN inputs, other than maximum and minimum
2322       // operations, if a floating-point result is to be delivered the result
2323       // shall be a quiet NaN which should be one of the input NaNs.
2324       // ...
2325       // Every general-computational and quiet-computational operation involving
2326       // one or more input NaNs, none of them signaling, shall signal no
2327       // exception, except fusedMultiplyAdd might signal the invalid operation
2328       // exception(see 7.2).
2329       return opOK;
2330     }
2331   }
2332 
2333   if (isZero()) {
2334     // [IEEE Std 754-2008 6.3]:
2335     // ... the sign of the result of conversions, the quantize operation, the
2336     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2337     // the sign of the first or only operand.
2338     return opOK;
2339   }
2340 
2341   // If the exponent is large enough, we know that this value is already
2342   // integral, and the arithmetic below would potentially cause it to saturate
2343   // to +/-Inf.  Bail out early instead.
2344   if (exponent+1 >= (int)semanticsPrecision(*semantics))
2345     return opOK;
2346 
2347   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2348   // precision of our format, and then subtract it back off again.  The choice
2349   // of rounding modes for the addition/subtraction determines the rounding mode
2350   // for our integral rounding as well.
2351   // NOTE: When the input value is negative, we do subtraction followed by
2352   // addition instead.
2353   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2354   IntegerConstant <<= semanticsPrecision(*semantics)-1;
2355   IEEEFloat MagicConstant(*semantics);
2356   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2357                                       rmNearestTiesToEven);
2358   assert(fs == opOK);
2359   MagicConstant.sign = sign;
2360 
2361   // Preserve the input sign so that we can handle the case of zero result
2362   // correctly.
2363   bool inputSign = isNegative();
2364 
2365   fs = add(MagicConstant, rounding_mode);
2366 
2367   // Current value and 'MagicConstant' are both integers, so the result of the
2368   // subtraction is always exact according to Sterbenz' lemma.
2369   subtract(MagicConstant, rounding_mode);
2370 
2371   // Restore the input sign.
2372   if (inputSign != isNegative())
2373     changeSign();
2374 
2375   return fs;
2376 }
2377 
2378 
2379 /* Comparison requires normalized numbers.  */
2380 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2381   cmpResult result;
2382 
2383   assert(semantics == rhs.semantics);
2384 
2385   switch (PackCategoriesIntoKey(category, rhs.category)) {
2386   default:
2387     llvm_unreachable(nullptr);
2388 
2389   case PackCategoriesIntoKey(fcNaN, fcZero):
2390   case PackCategoriesIntoKey(fcNaN, fcNormal):
2391   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2392   case PackCategoriesIntoKey(fcNaN, fcNaN):
2393   case PackCategoriesIntoKey(fcZero, fcNaN):
2394   case PackCategoriesIntoKey(fcNormal, fcNaN):
2395   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2396     return cmpUnordered;
2397 
2398   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2399   case PackCategoriesIntoKey(fcInfinity, fcZero):
2400   case PackCategoriesIntoKey(fcNormal, fcZero):
2401     if (sign)
2402       return cmpLessThan;
2403     else
2404       return cmpGreaterThan;
2405 
2406   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2407   case PackCategoriesIntoKey(fcZero, fcInfinity):
2408   case PackCategoriesIntoKey(fcZero, fcNormal):
2409     if (rhs.sign)
2410       return cmpGreaterThan;
2411     else
2412       return cmpLessThan;
2413 
2414   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2415     if (sign == rhs.sign)
2416       return cmpEqual;
2417     else if (sign)
2418       return cmpLessThan;
2419     else
2420       return cmpGreaterThan;
2421 
2422   case PackCategoriesIntoKey(fcZero, fcZero):
2423     return cmpEqual;
2424 
2425   case PackCategoriesIntoKey(fcNormal, fcNormal):
2426     break;
2427   }
2428 
2429   /* Two normal numbers.  Do they have the same sign?  */
2430   if (sign != rhs.sign) {
2431     if (sign)
2432       result = cmpLessThan;
2433     else
2434       result = cmpGreaterThan;
2435   } else {
2436     /* Compare absolute values; invert result if negative.  */
2437     result = compareAbsoluteValue(rhs);
2438 
2439     if (sign) {
2440       if (result == cmpLessThan)
2441         result = cmpGreaterThan;
2442       else if (result == cmpGreaterThan)
2443         result = cmpLessThan;
2444     }
2445   }
2446 
2447   return result;
2448 }
2449 
2450 /// IEEEFloat::convert - convert a value of one floating point type to another.
2451 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2452 /// records whether the transformation lost information, i.e. whether
2453 /// converting the result back to the original type will produce the
2454 /// original value (this is almost the same as return value==fsOK, but there
2455 /// are edge cases where this is not so).
2456 
2457 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2458                                        roundingMode rounding_mode,
2459                                        bool *losesInfo) {
2460   lostFraction lostFraction;
2461   unsigned int newPartCount, oldPartCount;
2462   opStatus fs;
2463   int shift;
2464   const fltSemantics &fromSemantics = *semantics;
2465   bool is_signaling = isSignaling();
2466 
2467   lostFraction = lfExactlyZero;
2468   newPartCount = partCountForBits(toSemantics.precision + 1);
2469   oldPartCount = partCount();
2470   shift = toSemantics.precision - fromSemantics.precision;
2471 
2472   bool X86SpecialNan = false;
2473   if (&fromSemantics == &semX87DoubleExtended &&
2474       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2475       (!(*significandParts() & 0x8000000000000000ULL) ||
2476        !(*significandParts() & 0x4000000000000000ULL))) {
2477     // x86 has some unusual NaNs which cannot be represented in any other
2478     // format; note them here.
2479     X86SpecialNan = true;
2480   }
2481 
2482   // If this is a truncation of a denormal number, and the target semantics
2483   // has larger exponent range than the source semantics (this can happen
2484   // when truncating from PowerPC double-double to double format), the
2485   // right shift could lose result mantissa bits.  Adjust exponent instead
2486   // of performing excessive shift.
2487   // Also do a similar trick in case shifting denormal would produce zero
2488   // significand as this case isn't handled correctly by normalize.
2489   if (shift < 0 && isFiniteNonZero()) {
2490     int omsb = significandMSB() + 1;
2491     int exponentChange = omsb - fromSemantics.precision;
2492     if (exponent + exponentChange < toSemantics.minExponent)
2493       exponentChange = toSemantics.minExponent - exponent;
2494     if (exponentChange < shift)
2495       exponentChange = shift;
2496     if (exponentChange < 0) {
2497       shift -= exponentChange;
2498       exponent += exponentChange;
2499     } else if (omsb <= -shift) {
2500       exponentChange = omsb + shift - 1; // leave at least one bit set
2501       shift -= exponentChange;
2502       exponent += exponentChange;
2503     }
2504   }
2505 
2506   // If this is a truncation, perform the shift before we narrow the storage.
2507   if (shift < 0 && (isFiniteNonZero() ||
2508                     (category == fcNaN && semantics->nonFiniteBehavior !=
2509                                               fltNonfiniteBehavior::NanOnly)))
2510     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2511 
2512   // Fix the storage so it can hold to new value.
2513   if (newPartCount > oldPartCount) {
2514     // The new type requires more storage; make it available.
2515     integerPart *newParts;
2516     newParts = new integerPart[newPartCount];
2517     APInt::tcSet(newParts, 0, newPartCount);
2518     if (isFiniteNonZero() || category==fcNaN)
2519       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2520     freeSignificand();
2521     significand.parts = newParts;
2522   } else if (newPartCount == 1 && oldPartCount != 1) {
2523     // Switch to built-in storage for a single part.
2524     integerPart newPart = 0;
2525     if (isFiniteNonZero() || category==fcNaN)
2526       newPart = significandParts()[0];
2527     freeSignificand();
2528     significand.part = newPart;
2529   }
2530 
2531   // Now that we have the right storage, switch the semantics.
2532   semantics = &toSemantics;
2533 
2534   // If this is an extension, perform the shift now that the storage is
2535   // available.
2536   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2537     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2538 
2539   if (isFiniteNonZero()) {
2540     fs = normalize(rounding_mode, lostFraction);
2541     *losesInfo = (fs != opOK);
2542   } else if (category == fcNaN) {
2543     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2544       *losesInfo =
2545           fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2546       makeNaN(false, sign);
2547       return is_signaling ? opInvalidOp : opOK;
2548     }
2549 
2550     // If NaN is negative zero, we need to create a new NaN to avoid converting
2551     // NaN to -Inf.
2552     if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2553         semantics->nanEncoding != fltNanEncoding::NegativeZero)
2554       makeNaN(false, false);
2555 
2556     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2557 
2558     // For x87 extended precision, we want to make a NaN, not a special NaN if
2559     // the input wasn't special either.
2560     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2561       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2562 
2563     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2564     // This also guarantees that a sNaN does not become Inf on a truncation
2565     // that loses all payload bits.
2566     if (is_signaling) {
2567       makeQuiet();
2568       fs = opInvalidOp;
2569     } else {
2570       fs = opOK;
2571     }
2572   } else if (category == fcInfinity &&
2573              semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2574     makeNaN(false, sign);
2575     *losesInfo = true;
2576     fs = opInexact;
2577   } else if (category == fcZero &&
2578              semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2579     // Negative zero loses info, but positive zero doesn't.
2580     *losesInfo =
2581         fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2582     fs = *losesInfo ? opInexact : opOK;
2583     // NaN is negative zero means -0 -> +0, which can lose information
2584     sign = false;
2585   } else {
2586     *losesInfo = false;
2587     fs = opOK;
2588   }
2589 
2590   return fs;
2591 }
2592 
2593 /* Convert a floating point number to an integer according to the
2594    rounding mode.  If the rounded integer value is out of range this
2595    returns an invalid operation exception and the contents of the
2596    destination parts are unspecified.  If the rounded value is in
2597    range but the floating point number is not the exact integer, the C
2598    standard doesn't require an inexact exception to be raised.  IEEE
2599    854 does require it so we do that.
2600 
2601    Note that for conversions to integer type the C standard requires
2602    round-to-zero to always be used.  */
2603 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2604     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2605     roundingMode rounding_mode, bool *isExact) const {
2606   lostFraction lost_fraction;
2607   const integerPart *src;
2608   unsigned int dstPartsCount, truncatedBits;
2609 
2610   *isExact = false;
2611 
2612   /* Handle the three special cases first.  */
2613   if (category == fcInfinity || category == fcNaN)
2614     return opInvalidOp;
2615 
2616   dstPartsCount = partCountForBits(width);
2617   assert(dstPartsCount <= parts.size() && "Integer too big");
2618 
2619   if (category == fcZero) {
2620     APInt::tcSet(parts.data(), 0, dstPartsCount);
2621     // Negative zero can't be represented as an int.
2622     *isExact = !sign;
2623     return opOK;
2624   }
2625 
2626   src = significandParts();
2627 
2628   /* Step 1: place our absolute value, with any fraction truncated, in
2629      the destination.  */
2630   if (exponent < 0) {
2631     /* Our absolute value is less than one; truncate everything.  */
2632     APInt::tcSet(parts.data(), 0, dstPartsCount);
2633     /* For exponent -1 the integer bit represents .5, look at that.
2634        For smaller exponents leftmost truncated bit is 0. */
2635     truncatedBits = semantics->precision -1U - exponent;
2636   } else {
2637     /* We want the most significant (exponent + 1) bits; the rest are
2638        truncated.  */
2639     unsigned int bits = exponent + 1U;
2640 
2641     /* Hopelessly large in magnitude?  */
2642     if (bits > width)
2643       return opInvalidOp;
2644 
2645     if (bits < semantics->precision) {
2646       /* We truncate (semantics->precision - bits) bits.  */
2647       truncatedBits = semantics->precision - bits;
2648       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2649     } else {
2650       /* We want at least as many bits as are available.  */
2651       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2652                        0);
2653       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2654                          bits - semantics->precision);
2655       truncatedBits = 0;
2656     }
2657   }
2658 
2659   /* Step 2: work out any lost fraction, and increment the absolute
2660      value if we would round away from zero.  */
2661   if (truncatedBits) {
2662     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2663                                                   truncatedBits);
2664     if (lost_fraction != lfExactlyZero &&
2665         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2666       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2667         return opInvalidOp;     /* Overflow.  */
2668     }
2669   } else {
2670     lost_fraction = lfExactlyZero;
2671   }
2672 
2673   /* Step 3: check if we fit in the destination.  */
2674   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2675 
2676   if (sign) {
2677     if (!isSigned) {
2678       /* Negative numbers cannot be represented as unsigned.  */
2679       if (omsb != 0)
2680         return opInvalidOp;
2681     } else {
2682       /* It takes omsb bits to represent the unsigned integer value.
2683          We lose a bit for the sign, but care is needed as the
2684          maximally negative integer is a special case.  */
2685       if (omsb == width &&
2686           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2687         return opInvalidOp;
2688 
2689       /* This case can happen because of rounding.  */
2690       if (omsb > width)
2691         return opInvalidOp;
2692     }
2693 
2694     APInt::tcNegate (parts.data(), dstPartsCount);
2695   } else {
2696     if (omsb >= width + !isSigned)
2697       return opInvalidOp;
2698   }
2699 
2700   if (lost_fraction == lfExactlyZero) {
2701     *isExact = true;
2702     return opOK;
2703   } else
2704     return opInexact;
2705 }
2706 
2707 /* Same as convertToSignExtendedInteger, except we provide
2708    deterministic values in case of an invalid operation exception,
2709    namely zero for NaNs and the minimal or maximal value respectively
2710    for underflow or overflow.
2711    The *isExact output tells whether the result is exact, in the sense
2712    that converting it back to the original floating point type produces
2713    the original value.  This is almost equivalent to result==opOK,
2714    except for negative zeroes.
2715 */
2716 IEEEFloat::opStatus
2717 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2718                             unsigned int width, bool isSigned,
2719                             roundingMode rounding_mode, bool *isExact) const {
2720   opStatus fs;
2721 
2722   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2723                                     isExact);
2724 
2725   if (fs == opInvalidOp) {
2726     unsigned int bits, dstPartsCount;
2727 
2728     dstPartsCount = partCountForBits(width);
2729     assert(dstPartsCount <= parts.size() && "Integer too big");
2730 
2731     if (category == fcNaN)
2732       bits = 0;
2733     else if (sign)
2734       bits = isSigned;
2735     else
2736       bits = width - isSigned;
2737 
2738     tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2739     if (sign && isSigned)
2740       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2741   }
2742 
2743   return fs;
2744 }
2745 
2746 /* Convert an unsigned integer SRC to a floating point number,
2747    rounding according to ROUNDING_MODE.  The sign of the floating
2748    point number is not modified.  */
2749 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2750     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2751   unsigned int omsb, precision, dstCount;
2752   integerPart *dst;
2753   lostFraction lost_fraction;
2754 
2755   category = fcNormal;
2756   omsb = APInt::tcMSB(src, srcCount) + 1;
2757   dst = significandParts();
2758   dstCount = partCount();
2759   precision = semantics->precision;
2760 
2761   /* We want the most significant PRECISION bits of SRC.  There may not
2762      be that many; extract what we can.  */
2763   if (precision <= omsb) {
2764     exponent = omsb - 1;
2765     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2766                                                   omsb - precision);
2767     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2768   } else {
2769     exponent = precision - 1;
2770     lost_fraction = lfExactlyZero;
2771     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2772   }
2773 
2774   return normalize(rounding_mode, lost_fraction);
2775 }
2776 
2777 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2778                                                 roundingMode rounding_mode) {
2779   unsigned int partCount = Val.getNumWords();
2780   APInt api = Val;
2781 
2782   sign = false;
2783   if (isSigned && api.isNegative()) {
2784     sign = true;
2785     api = -api;
2786   }
2787 
2788   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2789 }
2790 
2791 /* Convert a two's complement integer SRC to a floating point number,
2792    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2793    integer is signed, in which case it must be sign-extended.  */
2794 IEEEFloat::opStatus
2795 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2796                                           unsigned int srcCount, bool isSigned,
2797                                           roundingMode rounding_mode) {
2798   opStatus status;
2799 
2800   if (isSigned &&
2801       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2802     integerPart *copy;
2803 
2804     /* If we're signed and negative negate a copy.  */
2805     sign = true;
2806     copy = new integerPart[srcCount];
2807     APInt::tcAssign(copy, src, srcCount);
2808     APInt::tcNegate(copy, srcCount);
2809     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2810     delete [] copy;
2811   } else {
2812     sign = false;
2813     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2814   }
2815 
2816   return status;
2817 }
2818 
2819 /* FIXME: should this just take a const APInt reference?  */
2820 IEEEFloat::opStatus
2821 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2822                                           unsigned int width, bool isSigned,
2823                                           roundingMode rounding_mode) {
2824   unsigned int partCount = partCountForBits(width);
2825   APInt api = APInt(width, ArrayRef(parts, partCount));
2826 
2827   sign = false;
2828   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2829     sign = true;
2830     api = -api;
2831   }
2832 
2833   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2834 }
2835 
2836 Expected<IEEEFloat::opStatus>
2837 IEEEFloat::convertFromHexadecimalString(StringRef s,
2838                                         roundingMode rounding_mode) {
2839   lostFraction lost_fraction = lfExactlyZero;
2840 
2841   category = fcNormal;
2842   zeroSignificand();
2843   exponent = 0;
2844 
2845   integerPart *significand = significandParts();
2846   unsigned partsCount = partCount();
2847   unsigned bitPos = partsCount * integerPartWidth;
2848   bool computedTrailingFraction = false;
2849 
2850   // Skip leading zeroes and any (hexa)decimal point.
2851   StringRef::iterator begin = s.begin();
2852   StringRef::iterator end = s.end();
2853   StringRef::iterator dot;
2854   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2855   if (!PtrOrErr)
2856     return PtrOrErr.takeError();
2857   StringRef::iterator p = *PtrOrErr;
2858   StringRef::iterator firstSignificantDigit = p;
2859 
2860   while (p != end) {
2861     integerPart hex_value;
2862 
2863     if (*p == '.') {
2864       if (dot != end)
2865         return createError("String contains multiple dots");
2866       dot = p++;
2867       continue;
2868     }
2869 
2870     hex_value = hexDigitValue(*p);
2871     if (hex_value == UINT_MAX)
2872       break;
2873 
2874     p++;
2875 
2876     // Store the number while we have space.
2877     if (bitPos) {
2878       bitPos -= 4;
2879       hex_value <<= bitPos % integerPartWidth;
2880       significand[bitPos / integerPartWidth] |= hex_value;
2881     } else if (!computedTrailingFraction) {
2882       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2883       if (!FractOrErr)
2884         return FractOrErr.takeError();
2885       lost_fraction = *FractOrErr;
2886       computedTrailingFraction = true;
2887     }
2888   }
2889 
2890   /* Hex floats require an exponent but not a hexadecimal point.  */
2891   if (p == end)
2892     return createError("Hex strings require an exponent");
2893   if (*p != 'p' && *p != 'P')
2894     return createError("Invalid character in significand");
2895   if (p == begin)
2896     return createError("Significand has no digits");
2897   if (dot != end && p - begin == 1)
2898     return createError("Significand has no digits");
2899 
2900   /* Ignore the exponent if we are zero.  */
2901   if (p != firstSignificantDigit) {
2902     int expAdjustment;
2903 
2904     /* Implicit hexadecimal point?  */
2905     if (dot == end)
2906       dot = p;
2907 
2908     /* Calculate the exponent adjustment implicit in the number of
2909        significant digits.  */
2910     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2911     if (expAdjustment < 0)
2912       expAdjustment++;
2913     expAdjustment = expAdjustment * 4 - 1;
2914 
2915     /* Adjust for writing the significand starting at the most
2916        significant nibble.  */
2917     expAdjustment += semantics->precision;
2918     expAdjustment -= partsCount * integerPartWidth;
2919 
2920     /* Adjust for the given exponent.  */
2921     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2922     if (!ExpOrErr)
2923       return ExpOrErr.takeError();
2924     exponent = *ExpOrErr;
2925   }
2926 
2927   return normalize(rounding_mode, lost_fraction);
2928 }
2929 
2930 IEEEFloat::opStatus
2931 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2932                                         unsigned sigPartCount, int exp,
2933                                         roundingMode rounding_mode) {
2934   unsigned int parts, pow5PartCount;
2935   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2936   integerPart pow5Parts[maxPowerOfFiveParts];
2937   bool isNearest;
2938 
2939   isNearest = (rounding_mode == rmNearestTiesToEven ||
2940                rounding_mode == rmNearestTiesToAway);
2941 
2942   parts = partCountForBits(semantics->precision + 11);
2943 
2944   /* Calculate pow(5, abs(exp)).  */
2945   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2946 
2947   for (;; parts *= 2) {
2948     opStatus sigStatus, powStatus;
2949     unsigned int excessPrecision, truncatedBits;
2950 
2951     calcSemantics.precision = parts * integerPartWidth - 1;
2952     excessPrecision = calcSemantics.precision - semantics->precision;
2953     truncatedBits = excessPrecision;
2954 
2955     IEEEFloat decSig(calcSemantics, uninitialized);
2956     decSig.makeZero(sign);
2957     IEEEFloat pow5(calcSemantics);
2958 
2959     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2960                                                 rmNearestTiesToEven);
2961     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2962                                               rmNearestTiesToEven);
2963     /* Add exp, as 10^n = 5^n * 2^n.  */
2964     decSig.exponent += exp;
2965 
2966     lostFraction calcLostFraction;
2967     integerPart HUerr, HUdistance;
2968     unsigned int powHUerr;
2969 
2970     if (exp >= 0) {
2971       /* multiplySignificand leaves the precision-th bit set to 1.  */
2972       calcLostFraction = decSig.multiplySignificand(pow5);
2973       powHUerr = powStatus != opOK;
2974     } else {
2975       calcLostFraction = decSig.divideSignificand(pow5);
2976       /* Denormal numbers have less precision.  */
2977       if (decSig.exponent < semantics->minExponent) {
2978         excessPrecision += (semantics->minExponent - decSig.exponent);
2979         truncatedBits = excessPrecision;
2980         if (excessPrecision > calcSemantics.precision)
2981           excessPrecision = calcSemantics.precision;
2982       }
2983       /* Extra half-ulp lost in reciprocal of exponent.  */
2984       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2985     }
2986 
2987     /* Both multiplySignificand and divideSignificand return the
2988        result with the integer bit set.  */
2989     assert(APInt::tcExtractBit
2990            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2991 
2992     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2993                        powHUerr);
2994     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2995                                       excessPrecision, isNearest);
2996 
2997     /* Are we guaranteed to round correctly if we truncate?  */
2998     if (HUdistance >= HUerr) {
2999       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3000                        calcSemantics.precision - excessPrecision,
3001                        excessPrecision);
3002       /* Take the exponent of decSig.  If we tcExtract-ed less bits
3003          above we must adjust our exponent to compensate for the
3004          implicit right shift.  */
3005       exponent = (decSig.exponent + semantics->precision
3006                   - (calcSemantics.precision - excessPrecision));
3007       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3008                                                        decSig.partCount(),
3009                                                        truncatedBits);
3010       return normalize(rounding_mode, calcLostFraction);
3011     }
3012   }
3013 }
3014 
3015 Expected<IEEEFloat::opStatus>
3016 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3017   decimalInfo D;
3018   opStatus fs;
3019 
3020   /* Scan the text.  */
3021   StringRef::iterator p = str.begin();
3022   if (Error Err = interpretDecimal(p, str.end(), &D))
3023     return std::move(Err);
3024 
3025   /* Handle the quick cases.  First the case of no significant digits,
3026      i.e. zero, and then exponents that are obviously too large or too
3027      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
3028      definitely overflows if
3029 
3030            (exp - 1) * L >= maxExponent
3031 
3032      and definitely underflows to zero where
3033 
3034            (exp + 1) * L <= minExponent - precision
3035 
3036      With integer arithmetic the tightest bounds for L are
3037 
3038            93/28 < L < 196/59            [ numerator <= 256 ]
3039            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
3040   */
3041 
3042   // Test if we have a zero number allowing for strings with no null terminators
3043   // and zero decimals with non-zero exponents.
3044   //
3045   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3046   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3047   // be at most one dot. On the other hand, if we have a zero with a non-zero
3048   // exponent, then we know that D.firstSigDigit will be non-numeric.
3049   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3050     category = fcZero;
3051     fs = opOK;
3052     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3053       sign = false;
3054 
3055     /* Check whether the normalized exponent is high enough to overflow
3056        max during the log-rebasing in the max-exponent check below. */
3057   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3058     fs = handleOverflow(rounding_mode);
3059 
3060   /* If it wasn't, then it also wasn't high enough to overflow max
3061      during the log-rebasing in the min-exponent check.  Check that it
3062      won't overflow min in either check, then perform the min-exponent
3063      check. */
3064   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3065              (D.normalizedExponent + 1) * 28738 <=
3066                8651 * (semantics->minExponent - (int) semantics->precision)) {
3067     /* Underflow to zero and round.  */
3068     category = fcNormal;
3069     zeroSignificand();
3070     fs = normalize(rounding_mode, lfLessThanHalf);
3071 
3072   /* We can finally safely perform the max-exponent check. */
3073   } else if ((D.normalizedExponent - 1) * 42039
3074              >= 12655 * semantics->maxExponent) {
3075     /* Overflow and round.  */
3076     fs = handleOverflow(rounding_mode);
3077   } else {
3078     integerPart *decSignificand;
3079     unsigned int partCount;
3080 
3081     /* A tight upper bound on number of bits required to hold an
3082        N-digit decimal integer is N * 196 / 59.  Allocate enough space
3083        to hold the full significand, and an extra part required by
3084        tcMultiplyPart.  */
3085     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3086     partCount = partCountForBits(1 + 196 * partCount / 59);
3087     decSignificand = new integerPart[partCount + 1];
3088     partCount = 0;
3089 
3090     /* Convert to binary efficiently - we do almost all multiplication
3091        in an integerPart.  When this would overflow do we do a single
3092        bignum multiplication, and then revert again to multiplication
3093        in an integerPart.  */
3094     do {
3095       integerPart decValue, val, multiplier;
3096 
3097       val = 0;
3098       multiplier = 1;
3099 
3100       do {
3101         if (*p == '.') {
3102           p++;
3103           if (p == str.end()) {
3104             break;
3105           }
3106         }
3107         decValue = decDigitValue(*p++);
3108         if (decValue >= 10U) {
3109           delete[] decSignificand;
3110           return createError("Invalid character in significand");
3111         }
3112         multiplier *= 10;
3113         val = val * 10 + decValue;
3114         /* The maximum number that can be multiplied by ten with any
3115            digit added without overflowing an integerPart.  */
3116       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3117 
3118       /* Multiply out the current part.  */
3119       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3120                             partCount, partCount + 1, false);
3121 
3122       /* If we used another part (likely but not guaranteed), increase
3123          the count.  */
3124       if (decSignificand[partCount])
3125         partCount++;
3126     } while (p <= D.lastSigDigit);
3127 
3128     category = fcNormal;
3129     fs = roundSignificandWithExponent(decSignificand, partCount,
3130                                       D.exponent, rounding_mode);
3131 
3132     delete [] decSignificand;
3133   }
3134 
3135   return fs;
3136 }
3137 
3138 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3139   const size_t MIN_NAME_SIZE = 3;
3140 
3141   if (str.size() < MIN_NAME_SIZE)
3142     return false;
3143 
3144   if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3145     makeInf(false);
3146     return true;
3147   }
3148 
3149   bool IsNegative = str.front() == '-';
3150   if (IsNegative) {
3151     str = str.drop_front();
3152     if (str.size() < MIN_NAME_SIZE)
3153       return false;
3154 
3155     if (str == "inf" || str == "INFINITY" || str == "Inf") {
3156       makeInf(true);
3157       return true;
3158     }
3159   }
3160 
3161   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3162   bool IsSignaling = str.front() == 's' || str.front() == 'S';
3163   if (IsSignaling) {
3164     str = str.drop_front();
3165     if (str.size() < MIN_NAME_SIZE)
3166       return false;
3167   }
3168 
3169   if (str.starts_with("nan") || str.starts_with("NaN")) {
3170     str = str.drop_front(3);
3171 
3172     // A NaN without payload.
3173     if (str.empty()) {
3174       makeNaN(IsSignaling, IsNegative);
3175       return true;
3176     }
3177 
3178     // Allow the payload to be inside parentheses.
3179     if (str.front() == '(') {
3180       // Parentheses should be balanced (and not empty).
3181       if (str.size() <= 2 || str.back() != ')')
3182         return false;
3183 
3184       str = str.slice(1, str.size() - 1);
3185     }
3186 
3187     // Determine the payload number's radix.
3188     unsigned Radix = 10;
3189     if (str[0] == '0') {
3190       if (str.size() > 1 && tolower(str[1]) == 'x') {
3191         str = str.drop_front(2);
3192         Radix = 16;
3193       } else
3194         Radix = 8;
3195     }
3196 
3197     // Parse the payload and make the NaN.
3198     APInt Payload;
3199     if (!str.getAsInteger(Radix, Payload)) {
3200       makeNaN(IsSignaling, IsNegative, &Payload);
3201       return true;
3202     }
3203   }
3204 
3205   return false;
3206 }
3207 
3208 Expected<IEEEFloat::opStatus>
3209 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3210   if (str.empty())
3211     return createError("Invalid string length");
3212 
3213   // Handle special cases.
3214   if (convertFromStringSpecials(str))
3215     return opOK;
3216 
3217   /* Handle a leading minus sign.  */
3218   StringRef::iterator p = str.begin();
3219   size_t slen = str.size();
3220   sign = *p == '-' ? 1 : 0;
3221   if (*p == '-' || *p == '+') {
3222     p++;
3223     slen--;
3224     if (!slen)
3225       return createError("String has no digits");
3226   }
3227 
3228   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3229     if (slen == 2)
3230       return createError("Invalid string");
3231     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3232                                         rounding_mode);
3233   }
3234 
3235   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3236 }
3237 
3238 /* Write out a hexadecimal representation of the floating point value
3239    to DST, which must be of sufficient size, in the C99 form
3240    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
3241    excluding the terminating NUL.
3242 
3243    If UPPERCASE, the output is in upper case, otherwise in lower case.
3244 
3245    HEXDIGITS digits appear altogether, rounding the value if
3246    necessary.  If HEXDIGITS is 0, the minimal precision to display the
3247    number precisely is used instead.  If nothing would appear after
3248    the decimal point it is suppressed.
3249 
3250    The decimal exponent is always printed and has at least one digit.
3251    Zero values display an exponent of zero.  Infinities and NaNs
3252    appear as "infinity" or "nan" respectively.
3253 
3254    The above rules are as specified by C99.  There is ambiguity about
3255    what the leading hexadecimal digit should be.  This implementation
3256    uses whatever is necessary so that the exponent is displayed as
3257    stored.  This implies the exponent will fall within the IEEE format
3258    range, and the leading hexadecimal digit will be 0 (for denormals),
3259    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3260    any other digits zero).
3261 */
3262 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3263                                            bool upperCase,
3264                                            roundingMode rounding_mode) const {
3265   char *p;
3266 
3267   p = dst;
3268   if (sign)
3269     *dst++ = '-';
3270 
3271   switch (category) {
3272   case fcInfinity:
3273     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3274     dst += sizeof infinityL - 1;
3275     break;
3276 
3277   case fcNaN:
3278     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3279     dst += sizeof NaNU - 1;
3280     break;
3281 
3282   case fcZero:
3283     *dst++ = '0';
3284     *dst++ = upperCase ? 'X': 'x';
3285     *dst++ = '0';
3286     if (hexDigits > 1) {
3287       *dst++ = '.';
3288       memset (dst, '0', hexDigits - 1);
3289       dst += hexDigits - 1;
3290     }
3291     *dst++ = upperCase ? 'P': 'p';
3292     *dst++ = '0';
3293     break;
3294 
3295   case fcNormal:
3296     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3297     break;
3298   }
3299 
3300   *dst = 0;
3301 
3302   return static_cast<unsigned int>(dst - p);
3303 }
3304 
3305 /* Does the hard work of outputting the correctly rounded hexadecimal
3306    form of a normal floating point number with the specified number of
3307    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
3308    digits necessary to print the value precisely is output.  */
3309 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3310                                           bool upperCase,
3311                                           roundingMode rounding_mode) const {
3312   unsigned int count, valueBits, shift, partsCount, outputDigits;
3313   const char *hexDigitChars;
3314   const integerPart *significand;
3315   char *p;
3316   bool roundUp;
3317 
3318   *dst++ = '0';
3319   *dst++ = upperCase ? 'X': 'x';
3320 
3321   roundUp = false;
3322   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3323 
3324   significand = significandParts();
3325   partsCount = partCount();
3326 
3327   /* +3 because the first digit only uses the single integer bit, so
3328      we have 3 virtual zero most-significant-bits.  */
3329   valueBits = semantics->precision + 3;
3330   shift = integerPartWidth - valueBits % integerPartWidth;
3331 
3332   /* The natural number of digits required ignoring trailing
3333      insignificant zeroes.  */
3334   outputDigits = (valueBits - significandLSB () + 3) / 4;
3335 
3336   /* hexDigits of zero means use the required number for the
3337      precision.  Otherwise, see if we are truncating.  If we are,
3338      find out if we need to round away from zero.  */
3339   if (hexDigits) {
3340     if (hexDigits < outputDigits) {
3341       /* We are dropping non-zero bits, so need to check how to round.
3342          "bits" is the number of dropped bits.  */
3343       unsigned int bits;
3344       lostFraction fraction;
3345 
3346       bits = valueBits - hexDigits * 4;
3347       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3348       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3349     }
3350     outputDigits = hexDigits;
3351   }
3352 
3353   /* Write the digits consecutively, and start writing in the location
3354      of the hexadecimal point.  We move the most significant digit
3355      left and add the hexadecimal point later.  */
3356   p = ++dst;
3357 
3358   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3359 
3360   while (outputDigits && count) {
3361     integerPart part;
3362 
3363     /* Put the most significant integerPartWidth bits in "part".  */
3364     if (--count == partsCount)
3365       part = 0;  /* An imaginary higher zero part.  */
3366     else
3367       part = significand[count] << shift;
3368 
3369     if (count && shift)
3370       part |= significand[count - 1] >> (integerPartWidth - shift);
3371 
3372     /* Convert as much of "part" to hexdigits as we can.  */
3373     unsigned int curDigits = integerPartWidth / 4;
3374 
3375     if (curDigits > outputDigits)
3376       curDigits = outputDigits;
3377     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3378     outputDigits -= curDigits;
3379   }
3380 
3381   if (roundUp) {
3382     char *q = dst;
3383 
3384     /* Note that hexDigitChars has a trailing '0'.  */
3385     do {
3386       q--;
3387       *q = hexDigitChars[hexDigitValue (*q) + 1];
3388     } while (*q == '0');
3389     assert(q >= p);
3390   } else {
3391     /* Add trailing zeroes.  */
3392     memset (dst, '0', outputDigits);
3393     dst += outputDigits;
3394   }
3395 
3396   /* Move the most significant digit to before the point, and if there
3397      is something after the decimal point add it.  This must come
3398      after rounding above.  */
3399   p[-1] = p[0];
3400   if (dst -1 == p)
3401     dst--;
3402   else
3403     p[0] = '.';
3404 
3405   /* Finally output the exponent.  */
3406   *dst++ = upperCase ? 'P': 'p';
3407 
3408   return writeSignedDecimal (dst, exponent);
3409 }
3410 
3411 hash_code hash_value(const IEEEFloat &Arg) {
3412   if (!Arg.isFiniteNonZero())
3413     return hash_combine((uint8_t)Arg.category,
3414                         // NaN has no sign, fix it at zero.
3415                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3416                         Arg.semantics->precision);
3417 
3418   // Normal floats need their exponent and significand hashed.
3419   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3420                       Arg.semantics->precision, Arg.exponent,
3421                       hash_combine_range(
3422                         Arg.significandParts(),
3423                         Arg.significandParts() + Arg.partCount()));
3424 }
3425 
3426 // Conversion from APFloat to/from host float/double.  It may eventually be
3427 // possible to eliminate these and have everybody deal with APFloats, but that
3428 // will take a while.  This approach will not easily extend to long double.
3429 // Current implementation requires integerPartWidth==64, which is correct at
3430 // the moment but could be made more general.
3431 
3432 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3433 // the actual IEEE respresentations.  We compensate for that here.
3434 
3435 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3436   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3437   assert(partCount()==2);
3438 
3439   uint64_t myexponent, mysignificand;
3440 
3441   if (isFiniteNonZero()) {
3442     myexponent = exponent+16383; //bias
3443     mysignificand = significandParts()[0];
3444     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3445       myexponent = 0;   // denormal
3446   } else if (category==fcZero) {
3447     myexponent = 0;
3448     mysignificand = 0;
3449   } else if (category==fcInfinity) {
3450     myexponent = 0x7fff;
3451     mysignificand = 0x8000000000000000ULL;
3452   } else {
3453     assert(category == fcNaN && "Unknown category");
3454     myexponent = 0x7fff;
3455     mysignificand = significandParts()[0];
3456   }
3457 
3458   uint64_t words[2];
3459   words[0] = mysignificand;
3460   words[1] =  ((uint64_t)(sign & 1) << 15) |
3461               (myexponent & 0x7fffLL);
3462   return APInt(80, words);
3463 }
3464 
3465 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3466   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3467   assert(partCount()==2);
3468 
3469   uint64_t words[2];
3470   opStatus fs;
3471   bool losesInfo;
3472 
3473   // Convert number to double.  To avoid spurious underflows, we re-
3474   // normalize against the "double" minExponent first, and only *then*
3475   // truncate the mantissa.  The result of that second conversion
3476   // may be inexact, but should never underflow.
3477   // Declare fltSemantics before APFloat that uses it (and
3478   // saves pointer to it) to ensure correct destruction order.
3479   fltSemantics extendedSemantics = *semantics;
3480   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3481   IEEEFloat extended(*this);
3482   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3483   assert(fs == opOK && !losesInfo);
3484   (void)fs;
3485 
3486   IEEEFloat u(extended);
3487   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3488   assert(fs == opOK || fs == opInexact);
3489   (void)fs;
3490   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3491 
3492   // If conversion was exact or resulted in a special case, we're done;
3493   // just set the second double to zero.  Otherwise, re-convert back to
3494   // the extended format and compute the difference.  This now should
3495   // convert exactly to double.
3496   if (u.isFiniteNonZero() && losesInfo) {
3497     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3498     assert(fs == opOK && !losesInfo);
3499     (void)fs;
3500 
3501     IEEEFloat v(extended);
3502     v.subtract(u, rmNearestTiesToEven);
3503     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3504     assert(fs == opOK && !losesInfo);
3505     (void)fs;
3506     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3507   } else {
3508     words[1] = 0;
3509   }
3510 
3511   return APInt(128, words);
3512 }
3513 
3514 template <const fltSemantics &S>
3515 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3516   assert(semantics == &S);
3517 
3518   constexpr int bias = -(S.minExponent - 1);
3519   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3520   constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3521   constexpr integerPart integer_bit =
3522       integerPart{1} << (trailing_significand_bits % integerPartWidth);
3523   constexpr uint64_t significand_mask = integer_bit - 1;
3524   constexpr unsigned int exponent_bits =
3525       S.sizeInBits - 1 - trailing_significand_bits;
3526   static_assert(exponent_bits < 64);
3527   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3528 
3529   uint64_t myexponent;
3530   std::array<integerPart, partCountForBits(trailing_significand_bits)>
3531       mysignificand;
3532 
3533   if (isFiniteNonZero()) {
3534     myexponent = exponent + bias;
3535     std::copy_n(significandParts(), mysignificand.size(),
3536                 mysignificand.begin());
3537     if (myexponent == 1 &&
3538         !(significandParts()[integer_bit_part] & integer_bit))
3539       myexponent = 0; // denormal
3540   } else if (category == fcZero) {
3541     myexponent = ::exponentZero(S) + bias;
3542     mysignificand.fill(0);
3543   } else if (category == fcInfinity) {
3544     if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3545         S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3546       llvm_unreachable("semantics don't support inf!");
3547     myexponent = ::exponentInf(S) + bias;
3548     mysignificand.fill(0);
3549   } else {
3550     assert(category == fcNaN && "Unknown category!");
3551     if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3552       llvm_unreachable("semantics don't support NaN!");
3553     myexponent = ::exponentNaN(S) + bias;
3554     std::copy_n(significandParts(), mysignificand.size(),
3555                 mysignificand.begin());
3556   }
3557   std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3558   auto words_iter =
3559       std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3560   if constexpr (significand_mask != 0) {
3561     // Clear the integer bit.
3562     words[mysignificand.size() - 1] &= significand_mask;
3563   }
3564   std::fill(words_iter, words.end(), uint64_t{0});
3565   constexpr size_t last_word = words.size() - 1;
3566   uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3567                           << ((S.sizeInBits - 1) % 64);
3568   words[last_word] |= shifted_sign;
3569   uint64_t shifted_exponent = (myexponent & exponent_mask)
3570                               << (trailing_significand_bits % 64);
3571   words[last_word] |= shifted_exponent;
3572   if constexpr (last_word == 0) {
3573     return APInt(S.sizeInBits, words[0]);
3574   }
3575   return APInt(S.sizeInBits, words);
3576 }
3577 
3578 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3579   assert(partCount() == 2);
3580   return convertIEEEFloatToAPInt<semIEEEquad>();
3581 }
3582 
3583 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3584   assert(partCount()==1);
3585   return convertIEEEFloatToAPInt<semIEEEdouble>();
3586 }
3587 
3588 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3589   assert(partCount()==1);
3590   return convertIEEEFloatToAPInt<semIEEEsingle>();
3591 }
3592 
3593 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3594   assert(partCount() == 1);
3595   return convertIEEEFloatToAPInt<semBFloat>();
3596 }
3597 
3598 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3599   assert(partCount()==1);
3600   return convertIEEEFloatToAPInt<semIEEEhalf>();
3601 }
3602 
3603 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3604   assert(partCount() == 1);
3605   return convertIEEEFloatToAPInt<semFloat8E5M2>();
3606 }
3607 
3608 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3609   assert(partCount() == 1);
3610   return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3611 }
3612 
3613 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3614   assert(partCount() == 1);
3615   return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3616 }
3617 
3618 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3619   assert(partCount() == 1);
3620   return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3621 }
3622 
3623 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3624   assert(partCount() == 1);
3625   return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3626 }
3627 
3628 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3629   assert(partCount() == 1);
3630   return convertIEEEFloatToAPInt<semFloatTF32>();
3631 }
3632 
3633 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3634   assert(partCount() == 1);
3635   return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3636 }
3637 
3638 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3639   assert(partCount() == 1);
3640   return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3641 }
3642 
3643 // This function creates an APInt that is just a bit map of the floating
3644 // point constant as it would appear in memory.  It is not a conversion,
3645 // and treating the result as a normal integer is unlikely to be useful.
3646 
3647 APInt IEEEFloat::bitcastToAPInt() const {
3648   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3649     return convertHalfAPFloatToAPInt();
3650 
3651   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3652     return convertBFloatAPFloatToAPInt();
3653 
3654   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3655     return convertFloatAPFloatToAPInt();
3656 
3657   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3658     return convertDoubleAPFloatToAPInt();
3659 
3660   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3661     return convertQuadrupleAPFloatToAPInt();
3662 
3663   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3664     return convertPPCDoubleDoubleAPFloatToAPInt();
3665 
3666   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3667     return convertFloat8E5M2APFloatToAPInt();
3668 
3669   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3670     return convertFloat8E5M2FNUZAPFloatToAPInt();
3671 
3672   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3673     return convertFloat8E4M3FNAPFloatToAPInt();
3674 
3675   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3676     return convertFloat8E4M3FNUZAPFloatToAPInt();
3677 
3678   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3679     return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3680 
3681   if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3682     return convertFloatTF32APFloatToAPInt();
3683 
3684   if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3685     return convertFloat6E3M2FNAPFloatToAPInt();
3686 
3687   if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3688     return convertFloat6E2M3FNAPFloatToAPInt();
3689 
3690   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3691          "unknown format!");
3692   return convertF80LongDoubleAPFloatToAPInt();
3693 }
3694 
3695 float IEEEFloat::convertToFloat() const {
3696   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3697          "Float semantics are not IEEEsingle");
3698   APInt api = bitcastToAPInt();
3699   return api.bitsToFloat();
3700 }
3701 
3702 double IEEEFloat::convertToDouble() const {
3703   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3704          "Float semantics are not IEEEdouble");
3705   APInt api = bitcastToAPInt();
3706   return api.bitsToDouble();
3707 }
3708 
3709 #ifdef HAS_IEE754_FLOAT128
3710 float128 IEEEFloat::convertToQuad() const {
3711   assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3712          "Float semantics are not IEEEquads");
3713   APInt api = bitcastToAPInt();
3714   return api.bitsToQuad();
3715 }
3716 #endif
3717 
3718 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3719 /// does not support these bit patterns:
3720 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3721 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3722 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3723 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3724 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3725 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3726   uint64_t i1 = api.getRawData()[0];
3727   uint64_t i2 = api.getRawData()[1];
3728   uint64_t myexponent = (i2 & 0x7fff);
3729   uint64_t mysignificand = i1;
3730   uint8_t myintegerbit = mysignificand >> 63;
3731 
3732   initialize(&semX87DoubleExtended);
3733   assert(partCount()==2);
3734 
3735   sign = static_cast<unsigned int>(i2>>15);
3736   if (myexponent == 0 && mysignificand == 0) {
3737     makeZero(sign);
3738   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3739     makeInf(sign);
3740   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3741              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3742     category = fcNaN;
3743     exponent = exponentNaN();
3744     significandParts()[0] = mysignificand;
3745     significandParts()[1] = 0;
3746   } else {
3747     category = fcNormal;
3748     exponent = myexponent - 16383;
3749     significandParts()[0] = mysignificand;
3750     significandParts()[1] = 0;
3751     if (myexponent==0)          // denormal
3752       exponent = -16382;
3753   }
3754 }
3755 
3756 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3757   uint64_t i1 = api.getRawData()[0];
3758   uint64_t i2 = api.getRawData()[1];
3759   opStatus fs;
3760   bool losesInfo;
3761 
3762   // Get the first double and convert to our format.
3763   initFromDoubleAPInt(APInt(64, i1));
3764   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3765   assert(fs == opOK && !losesInfo);
3766   (void)fs;
3767 
3768   // Unless we have a special case, add in second double.
3769   if (isFiniteNonZero()) {
3770     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3771     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3772     assert(fs == opOK && !losesInfo);
3773     (void)fs;
3774 
3775     add(v, rmNearestTiesToEven);
3776   }
3777 }
3778 
3779 template <const fltSemantics &S>
3780 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3781   assert(api.getBitWidth() == S.sizeInBits);
3782   constexpr integerPart integer_bit = integerPart{1}
3783                                       << ((S.precision - 1) % integerPartWidth);
3784   constexpr uint64_t significand_mask = integer_bit - 1;
3785   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3786   constexpr unsigned int stored_significand_parts =
3787       partCountForBits(trailing_significand_bits);
3788   constexpr unsigned int exponent_bits =
3789       S.sizeInBits - 1 - trailing_significand_bits;
3790   static_assert(exponent_bits < 64);
3791   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3792   constexpr int bias = -(S.minExponent - 1);
3793 
3794   // Copy the bits of the significand. We need to clear out the exponent and
3795   // sign bit in the last word.
3796   std::array<integerPart, stored_significand_parts> mysignificand;
3797   std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3798   if constexpr (significand_mask != 0) {
3799     mysignificand[mysignificand.size() - 1] &= significand_mask;
3800   }
3801 
3802   // We assume the last word holds the sign bit, the exponent, and potentially
3803   // some of the trailing significand field.
3804   uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3805   uint64_t myexponent =
3806       (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3807 
3808   initialize(&S);
3809   assert(partCount() == mysignificand.size());
3810 
3811   sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3812 
3813   bool all_zero_significand =
3814       llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3815 
3816   bool is_zero = myexponent == 0 && all_zero_significand;
3817 
3818   if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3819     if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3820       makeInf(sign);
3821       return;
3822     }
3823   }
3824 
3825   bool is_nan = false;
3826 
3827   if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3828     is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3829   } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3830     bool all_ones_significand =
3831         std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3832                     [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3833         (!significand_mask ||
3834          mysignificand[mysignificand.size() - 1] == significand_mask);
3835     is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3836   } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3837     is_nan = is_zero && sign;
3838   }
3839 
3840   if (is_nan) {
3841     category = fcNaN;
3842     exponent = ::exponentNaN(S);
3843     std::copy_n(mysignificand.begin(), mysignificand.size(),
3844                 significandParts());
3845     return;
3846   }
3847 
3848   if (is_zero) {
3849     makeZero(sign);
3850     return;
3851   }
3852 
3853   category = fcNormal;
3854   exponent = myexponent - bias;
3855   std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3856   if (myexponent == 0) // denormal
3857     exponent = S.minExponent;
3858   else
3859     significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3860 }
3861 
3862 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3863   initFromIEEEAPInt<semIEEEquad>(api);
3864 }
3865 
3866 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3867   initFromIEEEAPInt<semIEEEdouble>(api);
3868 }
3869 
3870 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3871   initFromIEEEAPInt<semIEEEsingle>(api);
3872 }
3873 
3874 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3875   initFromIEEEAPInt<semBFloat>(api);
3876 }
3877 
3878 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3879   initFromIEEEAPInt<semIEEEhalf>(api);
3880 }
3881 
3882 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3883   initFromIEEEAPInt<semFloat8E5M2>(api);
3884 }
3885 
3886 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3887   initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3888 }
3889 
3890 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3891   initFromIEEEAPInt<semFloat8E4M3FN>(api);
3892 }
3893 
3894 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3895   initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3896 }
3897 
3898 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3899   initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3900 }
3901 
3902 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3903   initFromIEEEAPInt<semFloatTF32>(api);
3904 }
3905 
3906 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3907   initFromIEEEAPInt<semFloat6E3M2FN>(api);
3908 }
3909 
3910 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3911   initFromIEEEAPInt<semFloat6E2M3FN>(api);
3912 }
3913 
3914 /// Treat api as containing the bits of a floating point number.
3915 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3916   assert(api.getBitWidth() == Sem->sizeInBits);
3917   if (Sem == &semIEEEhalf)
3918     return initFromHalfAPInt(api);
3919   if (Sem == &semBFloat)
3920     return initFromBFloatAPInt(api);
3921   if (Sem == &semIEEEsingle)
3922     return initFromFloatAPInt(api);
3923   if (Sem == &semIEEEdouble)
3924     return initFromDoubleAPInt(api);
3925   if (Sem == &semX87DoubleExtended)
3926     return initFromF80LongDoubleAPInt(api);
3927   if (Sem == &semIEEEquad)
3928     return initFromQuadrupleAPInt(api);
3929   if (Sem == &semPPCDoubleDoubleLegacy)
3930     return initFromPPCDoubleDoubleAPInt(api);
3931   if (Sem == &semFloat8E5M2)
3932     return initFromFloat8E5M2APInt(api);
3933   if (Sem == &semFloat8E5M2FNUZ)
3934     return initFromFloat8E5M2FNUZAPInt(api);
3935   if (Sem == &semFloat8E4M3FN)
3936     return initFromFloat8E4M3FNAPInt(api);
3937   if (Sem == &semFloat8E4M3FNUZ)
3938     return initFromFloat8E4M3FNUZAPInt(api);
3939   if (Sem == &semFloat8E4M3B11FNUZ)
3940     return initFromFloat8E4M3B11FNUZAPInt(api);
3941   if (Sem == &semFloatTF32)
3942     return initFromFloatTF32APInt(api);
3943   if (Sem == &semFloat6E3M2FN)
3944     return initFromFloat6E3M2FNAPInt(api);
3945   if (Sem == &semFloat6E2M3FN)
3946     return initFromFloat6E2M3FNAPInt(api);
3947 
3948   llvm_unreachable(nullptr);
3949 }
3950 
3951 /// Make this number the largest magnitude normal number in the given
3952 /// semantics.
3953 void IEEEFloat::makeLargest(bool Negative) {
3954   // We want (in interchange format):
3955   //   sign = {Negative}
3956   //   exponent = 1..10
3957   //   significand = 1..1
3958   category = fcNormal;
3959   sign = Negative;
3960   exponent = semantics->maxExponent;
3961 
3962   // Use memset to set all but the highest integerPart to all ones.
3963   integerPart *significand = significandParts();
3964   unsigned PartCount = partCount();
3965   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3966 
3967   // Set the high integerPart especially setting all unused top bits for
3968   // internal consistency.
3969   const unsigned NumUnusedHighBits =
3970     PartCount*integerPartWidth - semantics->precision;
3971   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3972                                    ? (~integerPart(0) >> NumUnusedHighBits)
3973                                    : 0;
3974 
3975   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3976       semantics->nanEncoding == fltNanEncoding::AllOnes)
3977     significand[0] &= ~integerPart(1);
3978 }
3979 
3980 /// Make this number the smallest magnitude denormal number in the given
3981 /// semantics.
3982 void IEEEFloat::makeSmallest(bool Negative) {
3983   // We want (in interchange format):
3984   //   sign = {Negative}
3985   //   exponent = 0..0
3986   //   significand = 0..01
3987   category = fcNormal;
3988   sign = Negative;
3989   exponent = semantics->minExponent;
3990   APInt::tcSet(significandParts(), 1, partCount());
3991 }
3992 
3993 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3994   // We want (in interchange format):
3995   //   sign = {Negative}
3996   //   exponent = 0..0
3997   //   significand = 10..0
3998 
3999   category = fcNormal;
4000   zeroSignificand();
4001   sign = Negative;
4002   exponent = semantics->minExponent;
4003   APInt::tcSetBit(significandParts(), semantics->precision - 1);
4004 }
4005 
4006 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4007   initFromAPInt(&Sem, API);
4008 }
4009 
4010 IEEEFloat::IEEEFloat(float f) {
4011   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
4012 }
4013 
4014 IEEEFloat::IEEEFloat(double d) {
4015   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
4016 }
4017 
4018 namespace {
4019   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4020     Buffer.append(Str.begin(), Str.end());
4021   }
4022 
4023   /// Removes data from the given significand until it is no more
4024   /// precise than is required for the desired precision.
4025   void AdjustToPrecision(APInt &significand,
4026                          int &exp, unsigned FormatPrecision) {
4027     unsigned bits = significand.getActiveBits();
4028 
4029     // 196/59 is a very slight overestimate of lg_2(10).
4030     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4031 
4032     if (bits <= bitsRequired) return;
4033 
4034     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4035     if (!tensRemovable) return;
4036 
4037     exp += tensRemovable;
4038 
4039     APInt divisor(significand.getBitWidth(), 1);
4040     APInt powten(significand.getBitWidth(), 10);
4041     while (true) {
4042       if (tensRemovable & 1)
4043         divisor *= powten;
4044       tensRemovable >>= 1;
4045       if (!tensRemovable) break;
4046       powten *= powten;
4047     }
4048 
4049     significand = significand.udiv(divisor);
4050 
4051     // Truncate the significand down to its active bit count.
4052     significand = significand.trunc(significand.getActiveBits());
4053   }
4054 
4055 
4056   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4057                          int &exp, unsigned FormatPrecision) {
4058     unsigned N = buffer.size();
4059     if (N <= FormatPrecision) return;
4060 
4061     // The most significant figures are the last ones in the buffer.
4062     unsigned FirstSignificant = N - FormatPrecision;
4063 
4064     // Round.
4065     // FIXME: this probably shouldn't use 'round half up'.
4066 
4067     // Rounding down is just a truncation, except we also want to drop
4068     // trailing zeros from the new result.
4069     if (buffer[FirstSignificant - 1] < '5') {
4070       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4071         FirstSignificant++;
4072 
4073       exp += FirstSignificant;
4074       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4075       return;
4076     }
4077 
4078     // Rounding up requires a decimal add-with-carry.  If we continue
4079     // the carry, the newly-introduced zeros will just be truncated.
4080     for (unsigned I = FirstSignificant; I != N; ++I) {
4081       if (buffer[I] == '9') {
4082         FirstSignificant++;
4083       } else {
4084         buffer[I]++;
4085         break;
4086       }
4087     }
4088 
4089     // If we carried through, we have exactly one digit of precision.
4090     if (FirstSignificant == N) {
4091       exp += FirstSignificant;
4092       buffer.clear();
4093       buffer.push_back('1');
4094       return;
4095     }
4096 
4097     exp += FirstSignificant;
4098     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4099   }
4100 } // namespace
4101 
4102 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4103                          unsigned FormatMaxPadding, bool TruncateZero) const {
4104   switch (category) {
4105   case fcInfinity:
4106     if (isNegative())
4107       return append(Str, "-Inf");
4108     else
4109       return append(Str, "+Inf");
4110 
4111   case fcNaN: return append(Str, "NaN");
4112 
4113   case fcZero:
4114     if (isNegative())
4115       Str.push_back('-');
4116 
4117     if (!FormatMaxPadding) {
4118       if (TruncateZero)
4119         append(Str, "0.0E+0");
4120       else {
4121         append(Str, "0.0");
4122         if (FormatPrecision > 1)
4123           Str.append(FormatPrecision - 1, '0');
4124         append(Str, "e+00");
4125       }
4126     } else
4127       Str.push_back('0');
4128     return;
4129 
4130   case fcNormal:
4131     break;
4132   }
4133 
4134   if (isNegative())
4135     Str.push_back('-');
4136 
4137   // Decompose the number into an APInt and an exponent.
4138   int exp = exponent - ((int) semantics->precision - 1);
4139   APInt significand(
4140       semantics->precision,
4141       ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4142 
4143   // Set FormatPrecision if zero.  We want to do this before we
4144   // truncate trailing zeros, as those are part of the precision.
4145   if (!FormatPrecision) {
4146     // We use enough digits so the number can be round-tripped back to an
4147     // APFloat. The formula comes from "How to Print Floating-Point Numbers
4148     // Accurately" by Steele and White.
4149     // FIXME: Using a formula based purely on the precision is conservative;
4150     // we can print fewer digits depending on the actual value being printed.
4151 
4152     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4153     FormatPrecision = 2 + semantics->precision * 59 / 196;
4154   }
4155 
4156   // Ignore trailing binary zeros.
4157   int trailingZeros = significand.countr_zero();
4158   exp += trailingZeros;
4159   significand.lshrInPlace(trailingZeros);
4160 
4161   // Change the exponent from 2^e to 10^e.
4162   if (exp == 0) {
4163     // Nothing to do.
4164   } else if (exp > 0) {
4165     // Just shift left.
4166     significand = significand.zext(semantics->precision + exp);
4167     significand <<= exp;
4168     exp = 0;
4169   } else { /* exp < 0 */
4170     int texp = -exp;
4171 
4172     // We transform this using the identity:
4173     //   (N)(2^-e) == (N)(5^e)(10^-e)
4174     // This means we have to multiply N (the significand) by 5^e.
4175     // To avoid overflow, we have to operate on numbers large
4176     // enough to store N * 5^e:
4177     //   log2(N * 5^e) == log2(N) + e * log2(5)
4178     //                 <= semantics->precision + e * 137 / 59
4179     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4180 
4181     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4182 
4183     // Multiply significand by 5^e.
4184     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4185     significand = significand.zext(precision);
4186     APInt five_to_the_i(precision, 5);
4187     while (true) {
4188       if (texp & 1) significand *= five_to_the_i;
4189 
4190       texp >>= 1;
4191       if (!texp) break;
4192       five_to_the_i *= five_to_the_i;
4193     }
4194   }
4195 
4196   AdjustToPrecision(significand, exp, FormatPrecision);
4197 
4198   SmallVector<char, 256> buffer;
4199 
4200   // Fill the buffer.
4201   unsigned precision = significand.getBitWidth();
4202   if (precision < 4) {
4203     // We need enough precision to store the value 10.
4204     precision = 4;
4205     significand = significand.zext(precision);
4206   }
4207   APInt ten(precision, 10);
4208   APInt digit(precision, 0);
4209 
4210   bool inTrail = true;
4211   while (significand != 0) {
4212     // digit <- significand % 10
4213     // significand <- significand / 10
4214     APInt::udivrem(significand, ten, significand, digit);
4215 
4216     unsigned d = digit.getZExtValue();
4217 
4218     // Drop trailing zeros.
4219     if (inTrail && !d) exp++;
4220     else {
4221       buffer.push_back((char) ('0' + d));
4222       inTrail = false;
4223     }
4224   }
4225 
4226   assert(!buffer.empty() && "no characters in buffer!");
4227 
4228   // Drop down to FormatPrecision.
4229   // TODO: don't do more precise calculations above than are required.
4230   AdjustToPrecision(buffer, exp, FormatPrecision);
4231 
4232   unsigned NDigits = buffer.size();
4233 
4234   // Check whether we should use scientific notation.
4235   bool FormatScientific;
4236   if (!FormatMaxPadding)
4237     FormatScientific = true;
4238   else {
4239     if (exp >= 0) {
4240       // 765e3 --> 765000
4241       //              ^^^
4242       // But we shouldn't make the number look more precise than it is.
4243       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4244                           NDigits + (unsigned) exp > FormatPrecision);
4245     } else {
4246       // Power of the most significant digit.
4247       int MSD = exp + (int) (NDigits - 1);
4248       if (MSD >= 0) {
4249         // 765e-2 == 7.65
4250         FormatScientific = false;
4251       } else {
4252         // 765e-5 == 0.00765
4253         //           ^ ^^
4254         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4255       }
4256     }
4257   }
4258 
4259   // Scientific formatting is pretty straightforward.
4260   if (FormatScientific) {
4261     exp += (NDigits - 1);
4262 
4263     Str.push_back(buffer[NDigits-1]);
4264     Str.push_back('.');
4265     if (NDigits == 1 && TruncateZero)
4266       Str.push_back('0');
4267     else
4268       for (unsigned I = 1; I != NDigits; ++I)
4269         Str.push_back(buffer[NDigits-1-I]);
4270     // Fill with zeros up to FormatPrecision.
4271     if (!TruncateZero && FormatPrecision > NDigits - 1)
4272       Str.append(FormatPrecision - NDigits + 1, '0');
4273     // For !TruncateZero we use lower 'e'.
4274     Str.push_back(TruncateZero ? 'E' : 'e');
4275 
4276     Str.push_back(exp >= 0 ? '+' : '-');
4277     if (exp < 0) exp = -exp;
4278     SmallVector<char, 6> expbuf;
4279     do {
4280       expbuf.push_back((char) ('0' + (exp % 10)));
4281       exp /= 10;
4282     } while (exp);
4283     // Exponent always at least two digits if we do not truncate zeros.
4284     if (!TruncateZero && expbuf.size() < 2)
4285       expbuf.push_back('0');
4286     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4287       Str.push_back(expbuf[E-1-I]);
4288     return;
4289   }
4290 
4291   // Non-scientific, positive exponents.
4292   if (exp >= 0) {
4293     for (unsigned I = 0; I != NDigits; ++I)
4294       Str.push_back(buffer[NDigits-1-I]);
4295     for (unsigned I = 0; I != (unsigned) exp; ++I)
4296       Str.push_back('0');
4297     return;
4298   }
4299 
4300   // Non-scientific, negative exponents.
4301 
4302   // The number of digits to the left of the decimal point.
4303   int NWholeDigits = exp + (int) NDigits;
4304 
4305   unsigned I = 0;
4306   if (NWholeDigits > 0) {
4307     for (; I != (unsigned) NWholeDigits; ++I)
4308       Str.push_back(buffer[NDigits-I-1]);
4309     Str.push_back('.');
4310   } else {
4311     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4312 
4313     Str.push_back('0');
4314     Str.push_back('.');
4315     for (unsigned Z = 1; Z != NZeros; ++Z)
4316       Str.push_back('0');
4317   }
4318 
4319   for (; I != NDigits; ++I)
4320     Str.push_back(buffer[NDigits-I-1]);
4321 }
4322 
4323 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4324   // Special floats and denormals have no exact inverse.
4325   if (!isFiniteNonZero())
4326     return false;
4327 
4328   // Check that the number is a power of two by making sure that only the
4329   // integer bit is set in the significand.
4330   if (significandLSB() != semantics->precision - 1)
4331     return false;
4332 
4333   // Get the inverse.
4334   IEEEFloat reciprocal(*semantics, 1ULL);
4335   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4336     return false;
4337 
4338   // Avoid multiplication with a denormal, it is not safe on all platforms and
4339   // may be slower than a normal division.
4340   if (reciprocal.isDenormal())
4341     return false;
4342 
4343   assert(reciprocal.isFiniteNonZero() &&
4344          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4345 
4346   if (inv)
4347     *inv = APFloat(reciprocal, *semantics);
4348 
4349   return true;
4350 }
4351 
4352 int IEEEFloat::getExactLog2Abs() const {
4353   if (!isFinite() || isZero())
4354     return INT_MIN;
4355 
4356   const integerPart *Parts = significandParts();
4357   const int PartCount = partCountForBits(semantics->precision);
4358 
4359   int PopCount = 0;
4360   for (int i = 0; i < PartCount; ++i) {
4361     PopCount += llvm::popcount(Parts[i]);
4362     if (PopCount > 1)
4363       return INT_MIN;
4364   }
4365 
4366   if (exponent != semantics->minExponent)
4367     return exponent;
4368 
4369   int CountrParts = 0;
4370   for (int i = 0; i < PartCount;
4371        ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4372     if (Parts[i] != 0) {
4373       return exponent - semantics->precision + CountrParts +
4374              llvm::countr_zero(Parts[i]) + 1;
4375     }
4376   }
4377 
4378   llvm_unreachable("didn't find the set bit");
4379 }
4380 
4381 bool IEEEFloat::isSignaling() const {
4382   if (!isNaN())
4383     return false;
4384   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4385       semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4386     return false;
4387 
4388   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4389   // first bit of the trailing significand being 0.
4390   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4391 }
4392 
4393 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4394 ///
4395 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4396 /// appropriate sign switching before/after the computation.
4397 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4398   // If we are performing nextDown, swap sign so we have -x.
4399   if (nextDown)
4400     changeSign();
4401 
4402   // Compute nextUp(x)
4403   opStatus result = opOK;
4404 
4405   // Handle each float category separately.
4406   switch (category) {
4407   case fcInfinity:
4408     // nextUp(+inf) = +inf
4409     if (!isNegative())
4410       break;
4411     // nextUp(-inf) = -getLargest()
4412     makeLargest(true);
4413     break;
4414   case fcNaN:
4415     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4416     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4417     //                     change the payload.
4418     if (isSignaling()) {
4419       result = opInvalidOp;
4420       // For consistency, propagate the sign of the sNaN to the qNaN.
4421       makeNaN(false, isNegative(), nullptr);
4422     }
4423     break;
4424   case fcZero:
4425     // nextUp(pm 0) = +getSmallest()
4426     makeSmallest(false);
4427     break;
4428   case fcNormal:
4429     // nextUp(-getSmallest()) = -0
4430     if (isSmallest() && isNegative()) {
4431       APInt::tcSet(significandParts(), 0, partCount());
4432       category = fcZero;
4433       exponent = 0;
4434       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4435         sign = false;
4436       break;
4437     }
4438 
4439     if (isLargest() && !isNegative()) {
4440       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4441         // nextUp(getLargest()) == NAN
4442         makeNaN();
4443         break;
4444       } else if (semantics->nonFiniteBehavior ==
4445                  fltNonfiniteBehavior::FiniteOnly) {
4446         // nextUp(getLargest()) == getLargest()
4447         break;
4448       } else {
4449         // nextUp(getLargest()) == INFINITY
4450         APInt::tcSet(significandParts(), 0, partCount());
4451         category = fcInfinity;
4452         exponent = semantics->maxExponent + 1;
4453         break;
4454       }
4455     }
4456 
4457     // nextUp(normal) == normal + inc.
4458     if (isNegative()) {
4459       // If we are negative, we need to decrement the significand.
4460 
4461       // We only cross a binade boundary that requires adjusting the exponent
4462       // if:
4463       //   1. exponent != semantics->minExponent. This implies we are not in the
4464       //   smallest binade or are dealing with denormals.
4465       //   2. Our significand excluding the integral bit is all zeros.
4466       bool WillCrossBinadeBoundary =
4467         exponent != semantics->minExponent && isSignificandAllZeros();
4468 
4469       // Decrement the significand.
4470       //
4471       // We always do this since:
4472       //   1. If we are dealing with a non-binade decrement, by definition we
4473       //   just decrement the significand.
4474       //   2. If we are dealing with a normal -> normal binade decrement, since
4475       //   we have an explicit integral bit the fact that all bits but the
4476       //   integral bit are zero implies that subtracting one will yield a
4477       //   significand with 0 integral bit and 1 in all other spots. Thus we
4478       //   must just adjust the exponent and set the integral bit to 1.
4479       //   3. If we are dealing with a normal -> denormal binade decrement,
4480       //   since we set the integral bit to 0 when we represent denormals, we
4481       //   just decrement the significand.
4482       integerPart *Parts = significandParts();
4483       APInt::tcDecrement(Parts, partCount());
4484 
4485       if (WillCrossBinadeBoundary) {
4486         // Our result is a normal number. Do the following:
4487         // 1. Set the integral bit to 1.
4488         // 2. Decrement the exponent.
4489         APInt::tcSetBit(Parts, semantics->precision - 1);
4490         exponent--;
4491       }
4492     } else {
4493       // If we are positive, we need to increment the significand.
4494 
4495       // We only cross a binade boundary that requires adjusting the exponent if
4496       // the input is not a denormal and all of said input's significand bits
4497       // are set. If all of said conditions are true: clear the significand, set
4498       // the integral bit to 1, and increment the exponent. If we have a
4499       // denormal always increment since moving denormals and the numbers in the
4500       // smallest normal binade have the same exponent in our representation.
4501       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4502 
4503       if (WillCrossBinadeBoundary) {
4504         integerPart *Parts = significandParts();
4505         APInt::tcSet(Parts, 0, partCount());
4506         APInt::tcSetBit(Parts, semantics->precision - 1);
4507         assert(exponent != semantics->maxExponent &&
4508                "We can not increment an exponent beyond the maxExponent allowed"
4509                " by the given floating point semantics.");
4510         exponent++;
4511       } else {
4512         incrementSignificand();
4513       }
4514     }
4515     break;
4516   }
4517 
4518   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4519   if (nextDown)
4520     changeSign();
4521 
4522   return result;
4523 }
4524 
4525 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4526   return ::exponentNaN(*semantics);
4527 }
4528 
4529 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4530   return ::exponentInf(*semantics);
4531 }
4532 
4533 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4534   return ::exponentZero(*semantics);
4535 }
4536 
4537 void IEEEFloat::makeInf(bool Negative) {
4538   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4539     llvm_unreachable("This floating point format does not support Inf");
4540 
4541   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4542     // There is no Inf, so make NaN instead.
4543     makeNaN(false, Negative);
4544     return;
4545   }
4546   category = fcInfinity;
4547   sign = Negative;
4548   exponent = exponentInf();
4549   APInt::tcSet(significandParts(), 0, partCount());
4550 }
4551 
4552 void IEEEFloat::makeZero(bool Negative) {
4553   category = fcZero;
4554   sign = Negative;
4555   if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4556     // Merge negative zero to positive because 0b10000...000 is used for NaN
4557     sign = false;
4558   }
4559   exponent = exponentZero();
4560   APInt::tcSet(significandParts(), 0, partCount());
4561 }
4562 
4563 void IEEEFloat::makeQuiet() {
4564   assert(isNaN());
4565   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4566     APInt::tcSetBit(significandParts(), semantics->precision - 2);
4567 }
4568 
4569 int ilogb(const IEEEFloat &Arg) {
4570   if (Arg.isNaN())
4571     return IEEEFloat::IEK_NaN;
4572   if (Arg.isZero())
4573     return IEEEFloat::IEK_Zero;
4574   if (Arg.isInfinity())
4575     return IEEEFloat::IEK_Inf;
4576   if (!Arg.isDenormal())
4577     return Arg.exponent;
4578 
4579   IEEEFloat Normalized(Arg);
4580   int SignificandBits = Arg.getSemantics().precision - 1;
4581 
4582   Normalized.exponent += SignificandBits;
4583   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4584   return Normalized.exponent - SignificandBits;
4585 }
4586 
4587 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4588   auto MaxExp = X.getSemantics().maxExponent;
4589   auto MinExp = X.getSemantics().minExponent;
4590 
4591   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4592   // overflow; clamp it to a safe range before adding, but ensure that the range
4593   // is large enough that the clamp does not change the result. The range we
4594   // need to support is the difference between the largest possible exponent and
4595   // the normalized exponent of half the smallest denormal.
4596 
4597   int SignificandBits = X.getSemantics().precision - 1;
4598   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4599 
4600   // Clamp to one past the range ends to let normalize handle overlflow.
4601   X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4602   X.normalize(RoundingMode, lfExactlyZero);
4603   if (X.isNaN())
4604     X.makeQuiet();
4605   return X;
4606 }
4607 
4608 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4609   Exp = ilogb(Val);
4610 
4611   // Quiet signalling nans.
4612   if (Exp == IEEEFloat::IEK_NaN) {
4613     IEEEFloat Quiet(Val);
4614     Quiet.makeQuiet();
4615     return Quiet;
4616   }
4617 
4618   if (Exp == IEEEFloat::IEK_Inf)
4619     return Val;
4620 
4621   // 1 is added because frexp is defined to return a normalized fraction in
4622   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4623   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4624   return scalbn(Val, -Exp, RM);
4625 }
4626 
4627 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4628     : Semantics(&S),
4629       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4630   assert(Semantics == &semPPCDoubleDouble);
4631 }
4632 
4633 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4634     : Semantics(&S),
4635       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4636                             APFloat(semIEEEdouble, uninitialized)}) {
4637   assert(Semantics == &semPPCDoubleDouble);
4638 }
4639 
4640 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4641     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4642                                            APFloat(semIEEEdouble)}) {
4643   assert(Semantics == &semPPCDoubleDouble);
4644 }
4645 
4646 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4647     : Semantics(&S),
4648       Floats(new APFloat[2]{
4649           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4650           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4651   assert(Semantics == &semPPCDoubleDouble);
4652 }
4653 
4654 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4655                              APFloat &&Second)
4656     : Semantics(&S),
4657       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4658   assert(Semantics == &semPPCDoubleDouble);
4659   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4660   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4661 }
4662 
4663 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4664     : Semantics(RHS.Semantics),
4665       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4666                                          APFloat(RHS.Floats[1])}
4667                         : nullptr) {
4668   assert(Semantics == &semPPCDoubleDouble);
4669 }
4670 
4671 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4672     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4673   RHS.Semantics = &semBogus;
4674   assert(Semantics == &semPPCDoubleDouble);
4675 }
4676 
4677 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4678   if (Semantics == RHS.Semantics && RHS.Floats) {
4679     Floats[0] = RHS.Floats[0];
4680     Floats[1] = RHS.Floats[1];
4681   } else if (this != &RHS) {
4682     this->~DoubleAPFloat();
4683     new (this) DoubleAPFloat(RHS);
4684   }
4685   return *this;
4686 }
4687 
4688 // Implement addition, subtraction, multiplication and division based on:
4689 // "Software for Doubled-Precision Floating-Point Computations",
4690 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4691 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4692                                          const APFloat &c, const APFloat &cc,
4693                                          roundingMode RM) {
4694   int Status = opOK;
4695   APFloat z = a;
4696   Status |= z.add(c, RM);
4697   if (!z.isFinite()) {
4698     if (!z.isInfinity()) {
4699       Floats[0] = std::move(z);
4700       Floats[1].makeZero(/* Neg = */ false);
4701       return (opStatus)Status;
4702     }
4703     Status = opOK;
4704     auto AComparedToC = a.compareAbsoluteValue(c);
4705     z = cc;
4706     Status |= z.add(aa, RM);
4707     if (AComparedToC == APFloat::cmpGreaterThan) {
4708       // z = cc + aa + c + a;
4709       Status |= z.add(c, RM);
4710       Status |= z.add(a, RM);
4711     } else {
4712       // z = cc + aa + a + c;
4713       Status |= z.add(a, RM);
4714       Status |= z.add(c, RM);
4715     }
4716     if (!z.isFinite()) {
4717       Floats[0] = std::move(z);
4718       Floats[1].makeZero(/* Neg = */ false);
4719       return (opStatus)Status;
4720     }
4721     Floats[0] = z;
4722     APFloat zz = aa;
4723     Status |= zz.add(cc, RM);
4724     if (AComparedToC == APFloat::cmpGreaterThan) {
4725       // Floats[1] = a - z + c + zz;
4726       Floats[1] = a;
4727       Status |= Floats[1].subtract(z, RM);
4728       Status |= Floats[1].add(c, RM);
4729       Status |= Floats[1].add(zz, RM);
4730     } else {
4731       // Floats[1] = c - z + a + zz;
4732       Floats[1] = c;
4733       Status |= Floats[1].subtract(z, RM);
4734       Status |= Floats[1].add(a, RM);
4735       Status |= Floats[1].add(zz, RM);
4736     }
4737   } else {
4738     // q = a - z;
4739     APFloat q = a;
4740     Status |= q.subtract(z, RM);
4741 
4742     // zz = q + c + (a - (q + z)) + aa + cc;
4743     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4744     auto zz = q;
4745     Status |= zz.add(c, RM);
4746     Status |= q.add(z, RM);
4747     Status |= q.subtract(a, RM);
4748     q.changeSign();
4749     Status |= zz.add(q, RM);
4750     Status |= zz.add(aa, RM);
4751     Status |= zz.add(cc, RM);
4752     if (zz.isZero() && !zz.isNegative()) {
4753       Floats[0] = std::move(z);
4754       Floats[1].makeZero(/* Neg = */ false);
4755       return opOK;
4756     }
4757     Floats[0] = z;
4758     Status |= Floats[0].add(zz, RM);
4759     if (!Floats[0].isFinite()) {
4760       Floats[1].makeZero(/* Neg = */ false);
4761       return (opStatus)Status;
4762     }
4763     Floats[1] = std::move(z);
4764     Status |= Floats[1].subtract(Floats[0], RM);
4765     Status |= Floats[1].add(zz, RM);
4766   }
4767   return (opStatus)Status;
4768 }
4769 
4770 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4771                                                 const DoubleAPFloat &RHS,
4772                                                 DoubleAPFloat &Out,
4773                                                 roundingMode RM) {
4774   if (LHS.getCategory() == fcNaN) {
4775     Out = LHS;
4776     return opOK;
4777   }
4778   if (RHS.getCategory() == fcNaN) {
4779     Out = RHS;
4780     return opOK;
4781   }
4782   if (LHS.getCategory() == fcZero) {
4783     Out = RHS;
4784     return opOK;
4785   }
4786   if (RHS.getCategory() == fcZero) {
4787     Out = LHS;
4788     return opOK;
4789   }
4790   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4791       LHS.isNegative() != RHS.isNegative()) {
4792     Out.makeNaN(false, Out.isNegative(), nullptr);
4793     return opInvalidOp;
4794   }
4795   if (LHS.getCategory() == fcInfinity) {
4796     Out = LHS;
4797     return opOK;
4798   }
4799   if (RHS.getCategory() == fcInfinity) {
4800     Out = RHS;
4801     return opOK;
4802   }
4803   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4804 
4805   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4806       CC(RHS.Floats[1]);
4807   assert(&A.getSemantics() == &semIEEEdouble);
4808   assert(&AA.getSemantics() == &semIEEEdouble);
4809   assert(&C.getSemantics() == &semIEEEdouble);
4810   assert(&CC.getSemantics() == &semIEEEdouble);
4811   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4812   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4813   return Out.addImpl(A, AA, C, CC, RM);
4814 }
4815 
4816 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4817                                      roundingMode RM) {
4818   return addWithSpecial(*this, RHS, *this, RM);
4819 }
4820 
4821 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4822                                           roundingMode RM) {
4823   changeSign();
4824   auto Ret = add(RHS, RM);
4825   changeSign();
4826   return Ret;
4827 }
4828 
4829 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4830                                           APFloat::roundingMode RM) {
4831   const auto &LHS = *this;
4832   auto &Out = *this;
4833   /* Interesting observation: For special categories, finding the lowest
4834      common ancestor of the following layered graph gives the correct
4835      return category:
4836 
4837         NaN
4838        /   \
4839      Zero  Inf
4840        \   /
4841        Normal
4842 
4843      e.g. NaN * NaN = NaN
4844           Zero * Inf = NaN
4845           Normal * Zero = Zero
4846           Normal * Inf = Inf
4847   */
4848   if (LHS.getCategory() == fcNaN) {
4849     Out = LHS;
4850     return opOK;
4851   }
4852   if (RHS.getCategory() == fcNaN) {
4853     Out = RHS;
4854     return opOK;
4855   }
4856   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4857       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4858     Out.makeNaN(false, false, nullptr);
4859     return opOK;
4860   }
4861   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4862     Out = LHS;
4863     return opOK;
4864   }
4865   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4866     Out = RHS;
4867     return opOK;
4868   }
4869   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4870          "Special cases not handled exhaustively");
4871 
4872   int Status = opOK;
4873   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4874   // t = a * c
4875   APFloat T = A;
4876   Status |= T.multiply(C, RM);
4877   if (!T.isFiniteNonZero()) {
4878     Floats[0] = T;
4879     Floats[1].makeZero(/* Neg = */ false);
4880     return (opStatus)Status;
4881   }
4882 
4883   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4884   APFloat Tau = A;
4885   T.changeSign();
4886   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4887   T.changeSign();
4888   {
4889     // v = a * d
4890     APFloat V = A;
4891     Status |= V.multiply(D, RM);
4892     // w = b * c
4893     APFloat W = B;
4894     Status |= W.multiply(C, RM);
4895     Status |= V.add(W, RM);
4896     // tau += v + w
4897     Status |= Tau.add(V, RM);
4898   }
4899   // u = t + tau
4900   APFloat U = T;
4901   Status |= U.add(Tau, RM);
4902 
4903   Floats[0] = U;
4904   if (!U.isFinite()) {
4905     Floats[1].makeZero(/* Neg = */ false);
4906   } else {
4907     // Floats[1] = (t - u) + tau
4908     Status |= T.subtract(U, RM);
4909     Status |= T.add(Tau, RM);
4910     Floats[1] = T;
4911   }
4912   return (opStatus)Status;
4913 }
4914 
4915 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4916                                         APFloat::roundingMode RM) {
4917   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4918   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4919   auto Ret =
4920       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4921   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4922   return Ret;
4923 }
4924 
4925 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4926   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4927   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4928   auto Ret =
4929       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4930   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4931   return Ret;
4932 }
4933 
4934 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4935   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4936   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4937   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4938   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4939   return Ret;
4940 }
4941 
4942 APFloat::opStatus
4943 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4944                                 const DoubleAPFloat &Addend,
4945                                 APFloat::roundingMode RM) {
4946   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4947   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4948   auto Ret = Tmp.fusedMultiplyAdd(
4949       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4950       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4951   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4952   return Ret;
4953 }
4954 
4955 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4956   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4957   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4958   auto Ret = Tmp.roundToIntegral(RM);
4959   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4960   return Ret;
4961 }
4962 
4963 void DoubleAPFloat::changeSign() {
4964   Floats[0].changeSign();
4965   Floats[1].changeSign();
4966 }
4967 
4968 APFloat::cmpResult
4969 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4970   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4971   if (Result != cmpEqual)
4972     return Result;
4973   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4974   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4975     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4976     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4977     if (Against && !RHSAgainst)
4978       return cmpLessThan;
4979     if (!Against && RHSAgainst)
4980       return cmpGreaterThan;
4981     if (!Against && !RHSAgainst)
4982       return Result;
4983     if (Against && RHSAgainst)
4984       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4985   }
4986   return Result;
4987 }
4988 
4989 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4990   return Floats[0].getCategory();
4991 }
4992 
4993 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4994 
4995 void DoubleAPFloat::makeInf(bool Neg) {
4996   Floats[0].makeInf(Neg);
4997   Floats[1].makeZero(/* Neg = */ false);
4998 }
4999 
5000 void DoubleAPFloat::makeZero(bool Neg) {
5001   Floats[0].makeZero(Neg);
5002   Floats[1].makeZero(/* Neg = */ false);
5003 }
5004 
5005 void DoubleAPFloat::makeLargest(bool Neg) {
5006   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5007   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5008   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5009   if (Neg)
5010     changeSign();
5011 }
5012 
5013 void DoubleAPFloat::makeSmallest(bool Neg) {
5014   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5015   Floats[0].makeSmallest(Neg);
5016   Floats[1].makeZero(/* Neg = */ false);
5017 }
5018 
5019 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
5020   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5021   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
5022   if (Neg)
5023     Floats[0].changeSign();
5024   Floats[1].makeZero(/* Neg = */ false);
5025 }
5026 
5027 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5028   Floats[0].makeNaN(SNaN, Neg, fill);
5029   Floats[1].makeZero(/* Neg = */ false);
5030 }
5031 
5032 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
5033   auto Result = Floats[0].compare(RHS.Floats[0]);
5034   // |Float[0]| > |Float[1]|
5035   if (Result == APFloat::cmpEqual)
5036     return Floats[1].compare(RHS.Floats[1]);
5037   return Result;
5038 }
5039 
5040 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
5041   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5042          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5043 }
5044 
5045 hash_code hash_value(const DoubleAPFloat &Arg) {
5046   if (Arg.Floats)
5047     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5048   return hash_combine(Arg.Semantics);
5049 }
5050 
5051 APInt DoubleAPFloat::bitcastToAPInt() const {
5052   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5053   uint64_t Data[] = {
5054       Floats[0].bitcastToAPInt().getRawData()[0],
5055       Floats[1].bitcastToAPInt().getRawData()[0],
5056   };
5057   return APInt(128, 2, Data);
5058 }
5059 
5060 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
5061                                                              roundingMode RM) {
5062   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5063   APFloat Tmp(semPPCDoubleDoubleLegacy);
5064   auto Ret = Tmp.convertFromString(S, RM);
5065   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5066   return Ret;
5067 }
5068 
5069 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5070   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5071   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5072   auto Ret = Tmp.next(nextDown);
5073   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5074   return Ret;
5075 }
5076 
5077 APFloat::opStatus
5078 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5079                                 unsigned int Width, bool IsSigned,
5080                                 roundingMode RM, bool *IsExact) const {
5081   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5082   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5083       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5084 }
5085 
5086 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5087                                                   bool IsSigned,
5088                                                   roundingMode RM) {
5089   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5090   APFloat Tmp(semPPCDoubleDoubleLegacy);
5091   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5092   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5093   return Ret;
5094 }
5095 
5096 APFloat::opStatus
5097 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5098                                               unsigned int InputSize,
5099                                               bool IsSigned, roundingMode RM) {
5100   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5101   APFloat Tmp(semPPCDoubleDoubleLegacy);
5102   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5103   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5104   return Ret;
5105 }
5106 
5107 APFloat::opStatus
5108 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5109                                               unsigned int InputSize,
5110                                               bool IsSigned, roundingMode RM) {
5111   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5112   APFloat Tmp(semPPCDoubleDoubleLegacy);
5113   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5114   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5115   return Ret;
5116 }
5117 
5118 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5119                                                unsigned int HexDigits,
5120                                                bool UpperCase,
5121                                                roundingMode RM) const {
5122   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5123   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5124       .convertToHexString(DST, HexDigits, UpperCase, RM);
5125 }
5126 
5127 bool DoubleAPFloat::isDenormal() const {
5128   return getCategory() == fcNormal &&
5129          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5130           // (double)(Hi + Lo) == Hi defines a normal number.
5131           Floats[0] != Floats[0] + Floats[1]);
5132 }
5133 
5134 bool DoubleAPFloat::isSmallest() const {
5135   if (getCategory() != fcNormal)
5136     return false;
5137   DoubleAPFloat Tmp(*this);
5138   Tmp.makeSmallest(this->isNegative());
5139   return Tmp.compare(*this) == cmpEqual;
5140 }
5141 
5142 bool DoubleAPFloat::isSmallestNormalized() const {
5143   if (getCategory() != fcNormal)
5144     return false;
5145 
5146   DoubleAPFloat Tmp(*this);
5147   Tmp.makeSmallestNormalized(this->isNegative());
5148   return Tmp.compare(*this) == cmpEqual;
5149 }
5150 
5151 bool DoubleAPFloat::isLargest() const {
5152   if (getCategory() != fcNormal)
5153     return false;
5154   DoubleAPFloat Tmp(*this);
5155   Tmp.makeLargest(this->isNegative());
5156   return Tmp.compare(*this) == cmpEqual;
5157 }
5158 
5159 bool DoubleAPFloat::isInteger() const {
5160   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5161   return Floats[0].isInteger() && Floats[1].isInteger();
5162 }
5163 
5164 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5165                              unsigned FormatPrecision,
5166                              unsigned FormatMaxPadding,
5167                              bool TruncateZero) const {
5168   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5169   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5170       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5171 }
5172 
5173 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5174   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5175   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5176   if (!inv)
5177     return Tmp.getExactInverse(nullptr);
5178   APFloat Inv(semPPCDoubleDoubleLegacy);
5179   auto Ret = Tmp.getExactInverse(&Inv);
5180   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5181   return Ret;
5182 }
5183 
5184 int DoubleAPFloat::getExactLog2() const {
5185   // TODO: Implement me
5186   return INT_MIN;
5187 }
5188 
5189 int DoubleAPFloat::getExactLog2Abs() const {
5190   // TODO: Implement me
5191   return INT_MIN;
5192 }
5193 
5194 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5195                      APFloat::roundingMode RM) {
5196   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5197   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5198                        scalbn(Arg.Floats[1], Exp, RM));
5199 }
5200 
5201 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5202                     APFloat::roundingMode RM) {
5203   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5204   APFloat First = frexp(Arg.Floats[0], Exp, RM);
5205   APFloat Second = Arg.Floats[1];
5206   if (Arg.getCategory() == APFloat::fcNormal)
5207     Second = scalbn(Second, -Exp, RM);
5208   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5209 }
5210 
5211 } // namespace detail
5212 
5213 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5214   if (usesLayout<IEEEFloat>(Semantics)) {
5215     new (&IEEE) IEEEFloat(std::move(F));
5216     return;
5217   }
5218   if (usesLayout<DoubleAPFloat>(Semantics)) {
5219     const fltSemantics& S = F.getSemantics();
5220     new (&Double)
5221         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5222                       APFloat(semIEEEdouble));
5223     return;
5224   }
5225   llvm_unreachable("Unexpected semantics");
5226 }
5227 
5228 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5229                                                        roundingMode RM) {
5230   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5231 }
5232 
5233 hash_code hash_value(const APFloat &Arg) {
5234   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5235     return hash_value(Arg.U.IEEE);
5236   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5237     return hash_value(Arg.U.Double);
5238   llvm_unreachable("Unexpected semantics");
5239 }
5240 
5241 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5242     : APFloat(Semantics) {
5243   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5244   assert(StatusOrErr && "Invalid floating point representation");
5245   consumeError(StatusOrErr.takeError());
5246 }
5247 
5248 FPClassTest APFloat::classify() const {
5249   if (isZero())
5250     return isNegative() ? fcNegZero : fcPosZero;
5251   if (isNormal())
5252     return isNegative() ? fcNegNormal : fcPosNormal;
5253   if (isDenormal())
5254     return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5255   if (isInfinity())
5256     return isNegative() ? fcNegInf : fcPosInf;
5257   assert(isNaN() && "Other class of FP constant");
5258   return isSignaling() ? fcSNan : fcQNan;
5259 }
5260 
5261 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5262                                    roundingMode RM, bool *losesInfo) {
5263   if (&getSemantics() == &ToSemantics) {
5264     *losesInfo = false;
5265     return opOK;
5266   }
5267   if (usesLayout<IEEEFloat>(getSemantics()) &&
5268       usesLayout<IEEEFloat>(ToSemantics))
5269     return U.IEEE.convert(ToSemantics, RM, losesInfo);
5270   if (usesLayout<IEEEFloat>(getSemantics()) &&
5271       usesLayout<DoubleAPFloat>(ToSemantics)) {
5272     assert(&ToSemantics == &semPPCDoubleDouble);
5273     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5274     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5275     return Ret;
5276   }
5277   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5278       usesLayout<IEEEFloat>(ToSemantics)) {
5279     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5280     *this = APFloat(std::move(getIEEE()), ToSemantics);
5281     return Ret;
5282   }
5283   llvm_unreachable("Unexpected semantics");
5284 }
5285 
5286 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5287   return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5288 }
5289 
5290 void APFloat::print(raw_ostream &OS) const {
5291   SmallVector<char, 16> Buffer;
5292   toString(Buffer);
5293   OS << Buffer << "\n";
5294 }
5295 
5296 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5297 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5298 #endif
5299 
5300 void APFloat::Profile(FoldingSetNodeID &NID) const {
5301   NID.Add(bitcastToAPInt());
5302 }
5303 
5304 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5305    an APSInt, whose initial bit-width and signed-ness are used to determine the
5306    precision of the conversion.
5307  */
5308 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5309                                             roundingMode rounding_mode,
5310                                             bool *isExact) const {
5311   unsigned bitWidth = result.getBitWidth();
5312   SmallVector<uint64_t, 4> parts(result.getNumWords());
5313   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5314                                      rounding_mode, isExact);
5315   // Keeps the original signed-ness.
5316   result = APInt(bitWidth, parts);
5317   return status;
5318 }
5319 
5320 double APFloat::convertToDouble() const {
5321   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5322     return getIEEE().convertToDouble();
5323   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5324          "Float semantics is not representable by IEEEdouble");
5325   APFloat Temp = *this;
5326   bool LosesInfo;
5327   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5328   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5329   (void)St;
5330   return Temp.getIEEE().convertToDouble();
5331 }
5332 
5333 #ifdef HAS_IEE754_FLOAT128
5334 float128 APFloat::convertToQuad() const {
5335   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
5336     return getIEEE().convertToQuad();
5337   assert(getSemantics().isRepresentableBy(semIEEEquad) &&
5338          "Float semantics is not representable by IEEEquad");
5339   APFloat Temp = *this;
5340   bool LosesInfo;
5341   opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5342   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5343   (void)St;
5344   return Temp.getIEEE().convertToQuad();
5345 }
5346 #endif
5347 
5348 float APFloat::convertToFloat() const {
5349   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5350     return getIEEE().convertToFloat();
5351   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5352          "Float semantics is not representable by IEEEsingle");
5353   APFloat Temp = *this;
5354   bool LosesInfo;
5355   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5356   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5357   (void)St;
5358   return Temp.getIEEE().convertToFloat();
5359 }
5360 
5361 } // namespace llvm
5362 
5363 #undef APFLOAT_DISPATCH_ON_SEMANTICS
5364