xref: /llvm-project/llvm/lib/Support/APFloat.cpp (revision 99f527d2807b5a14dc7ee64d15405f09e95ee9f2)
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cstring>
29 #include <limits.h>
30 
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
32   do {                                                                         \
33     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
34       return U.IEEE.METHOD_CALL;                                               \
35     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
36       return U.Double.METHOD_CALL;                                             \
37     llvm_unreachable("Unexpected semantics");                                  \
38   } while (false)
39 
40 using namespace llvm;
41 
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
45 ///
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49 
50 /* Assumed in hexadecimal significand parsing, and conversion to
51    hexadecimal strings.  */
52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 
54 namespace llvm {
55 
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior {
58   // Represents standard IEEE 754 behavior. A value is nonfinite if the
59   // exponent field is all 1s. In such cases, a value is Inf if the
60   // significand bits are all zero, and NaN otherwise
61   IEEE754,
62 
63   // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64   // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65   // representation for Inf, and operations that would ordinarily produce Inf
66   // produce NaN instead.
67   // The details of the NaN representation(s) in this form are determined by the
68   // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69   // encodings do not distinguish between signalling and quiet NaN.
70   NanOnly,
71 
72   // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73   // Float4E2M1FN types, which do not support Inf or NaN values.
74   FiniteOnly,
75 };
76 
77 // How NaN values are represented. This is curently only used in combination
78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79 // while having IEEE non-finite behavior is liable to lead to unexpected
80 // results.
81 enum class fltNanEncoding {
82   // Represents the standard IEEE behavior where a value is NaN if its
83   // exponent is all 1s and the significand is non-zero.
84   IEEE,
85 
86   // Represents the behavior in the Float8E4M3FN floating point type where NaN
87   // is represented by having the exponent and mantissa set to all 1s.
88   // This behavior matches the FP8 E4M3 type described in
89   // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90   // as non-signalling, although the paper does not state whether the NaN
91   // values are signalling or not.
92   AllOnes,
93 
94   // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95   // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96   // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97   // there is only one NaN value, it is treated as quiet NaN. This matches the
98   // behavior described in https://arxiv.org/abs/2206.02915 .
99   NegativeZero,
100 };
101 
102 /* Represents floating point arithmetic semantics.  */
103 struct fltSemantics {
104   /* The largest E such that 2^E is representable; this matches the
105      definition of IEEE 754.  */
106   APFloatBase::ExponentType maxExponent;
107 
108   /* The smallest E such that 2^E is a normalized number; this
109      matches the definition of IEEE 754.  */
110   APFloatBase::ExponentType minExponent;
111 
112   /* Number of bits in the significand.  This includes the integer
113      bit.  */
114   unsigned int precision;
115 
116   /* Number of bits actually used in the semantics. */
117   unsigned int sizeInBits;
118 
119   fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
120 
121   fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
122 
123   /* Whether this semantics has an encoding for Zero */
124   bool hasZero = true;
125 
126   /* Whether this semantics can represent signed values */
127   bool hasSignedRepr = true;
128 
129   // Returns true if any number described by this semantics can be precisely
130   // represented by the specified semantics. Does not take into account
131   // the value of fltNonfiniteBehavior.
132   bool isRepresentableBy(const fltSemantics &S) const {
133     return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
134            precision <= S.precision;
135   }
136 };
137 
138 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
139 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
140 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
141 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
142 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
143 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
144 static constexpr fltSemantics semFloat8E5M2FNUZ = {
145     15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
146 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
147 static constexpr fltSemantics semFloat8E4M3FN = {
148     8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
149 static constexpr fltSemantics semFloat8E4M3FNUZ = {
150     7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
151 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
152     4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
153 static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8};
154 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
155 static constexpr fltSemantics semFloat8E8M0FNU = {
156     127,   -127, 1, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes,
157     false, false};
158 
159 static constexpr fltSemantics semFloat6E3M2FN = {
160     4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
161 static constexpr fltSemantics semFloat6E2M3FN = {
162     2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
163 static constexpr fltSemantics semFloat4E2M1FN = {
164     2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};
165 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
166 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
167 
168 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
169    64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
170    (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
171    Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
172    to each other, and two 11-bit exponents.
173 
174    Note: we need to make the value different from semBogus as otherwise
175    an unsafe optimization may collapse both values to a single address,
176    and we heavily rely on them having distinct addresses.             */
177 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
178 
179 /* These are legacy semantics for the fallback, inaccrurate implementation of
180    IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
181    operation. It's equivalent to having an IEEE number with consecutive 106
182    bits of mantissa and 11 bits of exponent.
183 
184    It's not equivalent to IBM double-double. For example, a legit IBM
185    double-double, 1 + epsilon:
186 
187      1 + epsilon = 1 + (1 >> 1076)
188 
189    is not representable by a consecutive 106 bits of mantissa.
190 
191    Currently, these semantics are used in the following way:
192 
193      semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
194      (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
195      semPPCDoubleDoubleLegacy -> IEEE operations
196 
197    We use bitcastToAPInt() to get the bit representation (in APInt) of the
198    underlying IEEEdouble, then use the APInt constructor to construct the
199    legacy IEEE float.
200 
201    TODO: Implement all operations in semPPCDoubleDouble, and delete these
202    semantics.  */
203 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
204                                                           53 + 53, 128};
205 
206 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
207   switch (S) {
208   case S_IEEEhalf:
209     return IEEEhalf();
210   case S_BFloat:
211     return BFloat();
212   case S_IEEEsingle:
213     return IEEEsingle();
214   case S_IEEEdouble:
215     return IEEEdouble();
216   case S_IEEEquad:
217     return IEEEquad();
218   case S_PPCDoubleDouble:
219     return PPCDoubleDouble();
220   case S_Float8E5M2:
221     return Float8E5M2();
222   case S_Float8E5M2FNUZ:
223     return Float8E5M2FNUZ();
224   case S_Float8E4M3:
225     return Float8E4M3();
226   case S_Float8E4M3FN:
227     return Float8E4M3FN();
228   case S_Float8E4M3FNUZ:
229     return Float8E4M3FNUZ();
230   case S_Float8E4M3B11FNUZ:
231     return Float8E4M3B11FNUZ();
232   case S_Float8E3M4:
233     return Float8E3M4();
234   case S_FloatTF32:
235     return FloatTF32();
236   case S_Float8E8M0FNU:
237     return Float8E8M0FNU();
238   case S_Float6E3M2FN:
239     return Float6E3M2FN();
240   case S_Float6E2M3FN:
241     return Float6E2M3FN();
242   case S_Float4E2M1FN:
243     return Float4E2M1FN();
244   case S_x87DoubleExtended:
245     return x87DoubleExtended();
246   }
247   llvm_unreachable("Unrecognised floating semantics");
248 }
249 
250 APFloatBase::Semantics
251 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
252   if (&Sem == &llvm::APFloat::IEEEhalf())
253     return S_IEEEhalf;
254   else if (&Sem == &llvm::APFloat::BFloat())
255     return S_BFloat;
256   else if (&Sem == &llvm::APFloat::IEEEsingle())
257     return S_IEEEsingle;
258   else if (&Sem == &llvm::APFloat::IEEEdouble())
259     return S_IEEEdouble;
260   else if (&Sem == &llvm::APFloat::IEEEquad())
261     return S_IEEEquad;
262   else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
263     return S_PPCDoubleDouble;
264   else if (&Sem == &llvm::APFloat::Float8E5M2())
265     return S_Float8E5M2;
266   else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
267     return S_Float8E5M2FNUZ;
268   else if (&Sem == &llvm::APFloat::Float8E4M3())
269     return S_Float8E4M3;
270   else if (&Sem == &llvm::APFloat::Float8E4M3FN())
271     return S_Float8E4M3FN;
272   else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
273     return S_Float8E4M3FNUZ;
274   else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
275     return S_Float8E4M3B11FNUZ;
276   else if (&Sem == &llvm::APFloat::Float8E3M4())
277     return S_Float8E3M4;
278   else if (&Sem == &llvm::APFloat::FloatTF32())
279     return S_FloatTF32;
280   else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
281     return S_Float8E8M0FNU;
282   else if (&Sem == &llvm::APFloat::Float6E3M2FN())
283     return S_Float6E3M2FN;
284   else if (&Sem == &llvm::APFloat::Float6E2M3FN())
285     return S_Float6E2M3FN;
286   else if (&Sem == &llvm::APFloat::Float4E2M1FN())
287     return S_Float4E2M1FN;
288   else if (&Sem == &llvm::APFloat::x87DoubleExtended())
289     return S_x87DoubleExtended;
290   else
291     llvm_unreachable("Unknown floating semantics");
292 }
293 
294 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
295 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
296 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
297 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
298 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
299 const fltSemantics &APFloatBase::PPCDoubleDouble() {
300   return semPPCDoubleDouble;
301 }
302 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
303 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
304 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
305 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
306 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
307 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
308   return semFloat8E4M3B11FNUZ;
309 }
310 const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; }
311 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
312 const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; }
313 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
314 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
315 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }
316 const fltSemantics &APFloatBase::x87DoubleExtended() {
317   return semX87DoubleExtended;
318 }
319 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
320 
321 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
322 constexpr RoundingMode APFloatBase::rmTowardPositive;
323 constexpr RoundingMode APFloatBase::rmTowardNegative;
324 constexpr RoundingMode APFloatBase::rmTowardZero;
325 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
326 
327 /* A tight upper bound on number of parts required to hold the value
328    pow(5, power) is
329 
330      power * 815 / (351 * integerPartWidth) + 1
331 
332    However, whilst the result may require only this many parts,
333    because we are multiplying two values to get it, the
334    multiplication may require an extra part with the excess part
335    being zero (consider the trivial case of 1 * 1, tcFullMultiply
336    requires two parts to hold the single-part result).  So we add an
337    extra one to guarantee enough space whilst multiplying.  */
338 const unsigned int maxExponent = 16383;
339 const unsigned int maxPrecision = 113;
340 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
341 const unsigned int maxPowerOfFiveParts =
342     2 +
343     ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
344 
345 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
346   return semantics.precision;
347 }
348 APFloatBase::ExponentType
349 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
350   return semantics.maxExponent;
351 }
352 APFloatBase::ExponentType
353 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
354   return semantics.minExponent;
355 }
356 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
357   return semantics.sizeInBits;
358 }
359 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
360                                                  bool isSigned) {
361   // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
362   // at least one more bit than the MaxExponent to hold the max FP value.
363   unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
364   // Extra sign bit needed.
365   if (isSigned)
366     ++MinBitWidth;
367   return MinBitWidth;
368 }
369 
370 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
371                                             const fltSemantics &Dst) {
372   // Exponent range must be larger.
373   if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
374     return false;
375 
376   // If the mantissa is long enough, the result value could still be denormal
377   // with a larger exponent range.
378   //
379   // FIXME: This condition is probably not accurate but also shouldn't be a
380   // practical concern with existing types.
381   return Dst.precision >= Src.precision;
382 }
383 
384 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
385   return Sem.sizeInBits;
386 }
387 
388 static constexpr APFloatBase::ExponentType
389 exponentZero(const fltSemantics &semantics) {
390   return semantics.minExponent - 1;
391 }
392 
393 static constexpr APFloatBase::ExponentType
394 exponentInf(const fltSemantics &semantics) {
395   return semantics.maxExponent + 1;
396 }
397 
398 static constexpr APFloatBase::ExponentType
399 exponentNaN(const fltSemantics &semantics) {
400   if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
401     if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
402       return exponentZero(semantics);
403     return semantics.maxExponent;
404   }
405   return semantics.maxExponent + 1;
406 }
407 
408 /* A bunch of private, handy routines.  */
409 
410 static inline Error createError(const Twine &Err) {
411   return make_error<StringError>(Err, inconvertibleErrorCode());
412 }
413 
414 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
415   return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
416                           APFloatBase::integerPartWidth);
417 }
418 
419 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
420 static inline unsigned int
421 decDigitValue(unsigned int c)
422 {
423   return c - '0';
424 }
425 
426 /* Return the value of a decimal exponent of the form
427    [+-]ddddddd.
428 
429    If the exponent overflows, returns a large exponent with the
430    appropriate sign.  */
431 static Expected<int> readExponent(StringRef::iterator begin,
432                                   StringRef::iterator end) {
433   bool isNegative;
434   unsigned int absExponent;
435   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
436   StringRef::iterator p = begin;
437 
438   // Treat no exponent as 0 to match binutils
439   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
440     return 0;
441   }
442 
443   isNegative = (*p == '-');
444   if (*p == '-' || *p == '+') {
445     p++;
446     if (p == end)
447       return createError("Exponent has no digits");
448   }
449 
450   absExponent = decDigitValue(*p++);
451   if (absExponent >= 10U)
452     return createError("Invalid character in exponent");
453 
454   for (; p != end; ++p) {
455     unsigned int value;
456 
457     value = decDigitValue(*p);
458     if (value >= 10U)
459       return createError("Invalid character in exponent");
460 
461     absExponent = absExponent * 10U + value;
462     if (absExponent >= overlargeExponent) {
463       absExponent = overlargeExponent;
464       break;
465     }
466   }
467 
468   if (isNegative)
469     return -(int) absExponent;
470   else
471     return (int) absExponent;
472 }
473 
474 /* This is ugly and needs cleaning up, but I don't immediately see
475    how whilst remaining safe.  */
476 static Expected<int> totalExponent(StringRef::iterator p,
477                                    StringRef::iterator end,
478                                    int exponentAdjustment) {
479   int unsignedExponent;
480   bool negative, overflow;
481   int exponent = 0;
482 
483   if (p == end)
484     return createError("Exponent has no digits");
485 
486   negative = *p == '-';
487   if (*p == '-' || *p == '+') {
488     p++;
489     if (p == end)
490       return createError("Exponent has no digits");
491   }
492 
493   unsignedExponent = 0;
494   overflow = false;
495   for (; p != end; ++p) {
496     unsigned int value;
497 
498     value = decDigitValue(*p);
499     if (value >= 10U)
500       return createError("Invalid character in exponent");
501 
502     unsignedExponent = unsignedExponent * 10 + value;
503     if (unsignedExponent > 32767) {
504       overflow = true;
505       break;
506     }
507   }
508 
509   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
510     overflow = true;
511 
512   if (!overflow) {
513     exponent = unsignedExponent;
514     if (negative)
515       exponent = -exponent;
516     exponent += exponentAdjustment;
517     if (exponent > 32767 || exponent < -32768)
518       overflow = true;
519   }
520 
521   if (overflow)
522     exponent = negative ? -32768: 32767;
523 
524   return exponent;
525 }
526 
527 static Expected<StringRef::iterator>
528 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
529                            StringRef::iterator *dot) {
530   StringRef::iterator p = begin;
531   *dot = end;
532   while (p != end && *p == '0')
533     p++;
534 
535   if (p != end && *p == '.') {
536     *dot = p++;
537 
538     if (end - begin == 1)
539       return createError("Significand has no digits");
540 
541     while (p != end && *p == '0')
542       p++;
543   }
544 
545   return p;
546 }
547 
548 /* Given a normal decimal floating point number of the form
549 
550      dddd.dddd[eE][+-]ddd
551 
552    where the decimal point and exponent are optional, fill out the
553    structure D.  Exponent is appropriate if the significand is
554    treated as an integer, and normalizedExponent if the significand
555    is taken to have the decimal point after a single leading
556    non-zero digit.
557 
558    If the value is zero, V->firstSigDigit points to a non-digit, and
559    the return exponent is zero.
560 */
561 struct decimalInfo {
562   const char *firstSigDigit;
563   const char *lastSigDigit;
564   int exponent;
565   int normalizedExponent;
566 };
567 
568 static Error interpretDecimal(StringRef::iterator begin,
569                               StringRef::iterator end, decimalInfo *D) {
570   StringRef::iterator dot = end;
571 
572   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
573   if (!PtrOrErr)
574     return PtrOrErr.takeError();
575   StringRef::iterator p = *PtrOrErr;
576 
577   D->firstSigDigit = p;
578   D->exponent = 0;
579   D->normalizedExponent = 0;
580 
581   for (; p != end; ++p) {
582     if (*p == '.') {
583       if (dot != end)
584         return createError("String contains multiple dots");
585       dot = p++;
586       if (p == end)
587         break;
588     }
589     if (decDigitValue(*p) >= 10U)
590       break;
591   }
592 
593   if (p != end) {
594     if (*p != 'e' && *p != 'E')
595       return createError("Invalid character in significand");
596     if (p == begin)
597       return createError("Significand has no digits");
598     if (dot != end && p - begin == 1)
599       return createError("Significand has no digits");
600 
601     /* p points to the first non-digit in the string */
602     auto ExpOrErr = readExponent(p + 1, end);
603     if (!ExpOrErr)
604       return ExpOrErr.takeError();
605     D->exponent = *ExpOrErr;
606 
607     /* Implied decimal point?  */
608     if (dot == end)
609       dot = p;
610   }
611 
612   /* If number is all zeroes accept any exponent.  */
613   if (p != D->firstSigDigit) {
614     /* Drop insignificant trailing zeroes.  */
615     if (p != begin) {
616       do
617         do
618           p--;
619         while (p != begin && *p == '0');
620       while (p != begin && *p == '.');
621     }
622 
623     /* Adjust the exponents for any decimal point.  */
624     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
625     D->normalizedExponent = (D->exponent +
626               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
627                                       - (dot > D->firstSigDigit && dot < p)));
628   }
629 
630   D->lastSigDigit = p;
631   return Error::success();
632 }
633 
634 /* Return the trailing fraction of a hexadecimal number.
635    DIGITVALUE is the first hex digit of the fraction, P points to
636    the next digit.  */
637 static Expected<lostFraction>
638 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
639                             unsigned int digitValue) {
640   unsigned int hexDigit;
641 
642   /* If the first trailing digit isn't 0 or 8 we can work out the
643      fraction immediately.  */
644   if (digitValue > 8)
645     return lfMoreThanHalf;
646   else if (digitValue < 8 && digitValue > 0)
647     return lfLessThanHalf;
648 
649   // Otherwise we need to find the first non-zero digit.
650   while (p != end && (*p == '0' || *p == '.'))
651     p++;
652 
653   if (p == end)
654     return createError("Invalid trailing hexadecimal fraction!");
655 
656   hexDigit = hexDigitValue(*p);
657 
658   /* If we ran off the end it is exactly zero or one-half, otherwise
659      a little more.  */
660   if (hexDigit == UINT_MAX)
661     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
662   else
663     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
664 }
665 
666 /* Return the fraction lost were a bignum truncated losing the least
667    significant BITS bits.  */
668 static lostFraction
669 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
670                               unsigned int partCount,
671                               unsigned int bits)
672 {
673   unsigned int lsb;
674 
675   lsb = APInt::tcLSB(parts, partCount);
676 
677   /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX.  */
678   if (bits <= lsb)
679     return lfExactlyZero;
680   if (bits == lsb + 1)
681     return lfExactlyHalf;
682   if (bits <= partCount * APFloatBase::integerPartWidth &&
683       APInt::tcExtractBit(parts, bits - 1))
684     return lfMoreThanHalf;
685 
686   return lfLessThanHalf;
687 }
688 
689 /* Shift DST right BITS bits noting lost fraction.  */
690 static lostFraction
691 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
692 {
693   lostFraction lost_fraction;
694 
695   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
696 
697   APInt::tcShiftRight(dst, parts, bits);
698 
699   return lost_fraction;
700 }
701 
702 /* Combine the effect of two lost fractions.  */
703 static lostFraction
704 combineLostFractions(lostFraction moreSignificant,
705                      lostFraction lessSignificant)
706 {
707   if (lessSignificant != lfExactlyZero) {
708     if (moreSignificant == lfExactlyZero)
709       moreSignificant = lfLessThanHalf;
710     else if (moreSignificant == lfExactlyHalf)
711       moreSignificant = lfMoreThanHalf;
712   }
713 
714   return moreSignificant;
715 }
716 
717 /* The error from the true value, in half-ulps, on multiplying two
718    floating point numbers, which differ from the value they
719    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
720    than the returned value.
721 
722    See "How to Read Floating Point Numbers Accurately" by William D
723    Clinger.  */
724 static unsigned int
725 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
726 {
727   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
728 
729   if (HUerr1 + HUerr2 == 0)
730     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
731   else
732     return inexactMultiply + 2 * (HUerr1 + HUerr2);
733 }
734 
735 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
736    when the least significant BITS are truncated.  BITS cannot be
737    zero.  */
738 static APFloatBase::integerPart
739 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
740                  bool isNearest) {
741   unsigned int count, partBits;
742   APFloatBase::integerPart part, boundary;
743 
744   assert(bits != 0);
745 
746   bits--;
747   count = bits / APFloatBase::integerPartWidth;
748   partBits = bits % APFloatBase::integerPartWidth + 1;
749 
750   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
751 
752   if (isNearest)
753     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
754   else
755     boundary = 0;
756 
757   if (count == 0) {
758     if (part - boundary <= boundary - part)
759       return part - boundary;
760     else
761       return boundary - part;
762   }
763 
764   if (part == boundary) {
765     while (--count)
766       if (parts[count])
767         return ~(APFloatBase::integerPart) 0; /* A lot.  */
768 
769     return parts[0];
770   } else if (part == boundary - 1) {
771     while (--count)
772       if (~parts[count])
773         return ~(APFloatBase::integerPart) 0; /* A lot.  */
774 
775     return -parts[0];
776   }
777 
778   return ~(APFloatBase::integerPart) 0; /* A lot.  */
779 }
780 
781 /* Place pow(5, power) in DST, and return the number of parts used.
782    DST must be at least one part larger than size of the answer.  */
783 static unsigned int
784 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
785   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
786   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
787   pow5s[0] = 78125 * 5;
788 
789   unsigned int partsCount = 1;
790   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
791   unsigned int result;
792   assert(power <= maxExponent);
793 
794   p1 = dst;
795   p2 = scratch;
796 
797   *p1 = firstEightPowers[power & 7];
798   power >>= 3;
799 
800   result = 1;
801   pow5 = pow5s;
802 
803   for (unsigned int n = 0; power; power >>= 1, n++) {
804     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
805     if (n != 0) {
806       APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
807                             partsCount, partsCount);
808       partsCount *= 2;
809       if (pow5[partsCount - 1] == 0)
810         partsCount--;
811     }
812 
813     if (power & 1) {
814       APFloatBase::integerPart *tmp;
815 
816       APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
817       result += partsCount;
818       if (p2[result - 1] == 0)
819         result--;
820 
821       /* Now result is in p1 with partsCount parts and p2 is scratch
822          space.  */
823       tmp = p1;
824       p1 = p2;
825       p2 = tmp;
826     }
827 
828     pow5 += partsCount;
829   }
830 
831   if (p1 != dst)
832     APInt::tcAssign(dst, p1, result);
833 
834   return result;
835 }
836 
837 /* Zero at the end to avoid modular arithmetic when adding one; used
838    when rounding up during hexadecimal output.  */
839 static const char hexDigitsLower[] = "0123456789abcdef0";
840 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
841 static const char infinityL[] = "infinity";
842 static const char infinityU[] = "INFINITY";
843 static const char NaNL[] = "nan";
844 static const char NaNU[] = "NAN";
845 
846 /* Write out an integerPart in hexadecimal, starting with the most
847    significant nibble.  Write out exactly COUNT hexdigits, return
848    COUNT.  */
849 static unsigned int
850 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
851            const char *hexDigitChars)
852 {
853   unsigned int result = count;
854 
855   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
856 
857   part >>= (APFloatBase::integerPartWidth - 4 * count);
858   while (count--) {
859     dst[count] = hexDigitChars[part & 0xf];
860     part >>= 4;
861   }
862 
863   return result;
864 }
865 
866 /* Write out an unsigned decimal integer.  */
867 static char *
868 writeUnsignedDecimal (char *dst, unsigned int n)
869 {
870   char buff[40], *p;
871 
872   p = buff;
873   do
874     *p++ = '0' + n % 10;
875   while (n /= 10);
876 
877   do
878     *dst++ = *--p;
879   while (p != buff);
880 
881   return dst;
882 }
883 
884 /* Write out a signed decimal integer.  */
885 static char *
886 writeSignedDecimal (char *dst, int value)
887 {
888   if (value < 0) {
889     *dst++ = '-';
890     dst = writeUnsignedDecimal(dst, -(unsigned) value);
891   } else
892     dst = writeUnsignedDecimal(dst, value);
893 
894   return dst;
895 }
896 
897 namespace detail {
898 /* Constructors.  */
899 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
900   unsigned int count;
901 
902   semantics = ourSemantics;
903   count = partCount();
904   if (count > 1)
905     significand.parts = new integerPart[count];
906 }
907 
908 void IEEEFloat::freeSignificand() {
909   if (needsCleanup())
910     delete [] significand.parts;
911 }
912 
913 void IEEEFloat::assign(const IEEEFloat &rhs) {
914   assert(semantics == rhs.semantics);
915 
916   sign = rhs.sign;
917   category = rhs.category;
918   exponent = rhs.exponent;
919   if (isFiniteNonZero() || category == fcNaN)
920     copySignificand(rhs);
921 }
922 
923 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
924   assert(isFiniteNonZero() || category == fcNaN);
925   assert(rhs.partCount() >= partCount());
926 
927   APInt::tcAssign(significandParts(), rhs.significandParts(),
928                   partCount());
929 }
930 
931 /* Make this number a NaN, with an arbitrary but deterministic value
932    for the significand.  If double or longer, this is a signalling NaN,
933    which may not be ideal.  If float, this is QNaN(0).  */
934 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
935   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
936     llvm_unreachable("This floating point format does not support NaN");
937 
938   if (Negative && !semantics->hasSignedRepr)
939     llvm_unreachable(
940         "This floating point format does not support signed values");
941 
942   category = fcNaN;
943   sign = Negative;
944   exponent = exponentNaN();
945 
946   integerPart *significand = significandParts();
947   unsigned numParts = partCount();
948 
949   APInt fill_storage;
950   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
951     // Finite-only types do not distinguish signalling and quiet NaN, so
952     // make them all signalling.
953     SNaN = false;
954     if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
955       sign = true;
956       fill_storage = APInt::getZero(semantics->precision - 1);
957     } else {
958       fill_storage = APInt::getAllOnes(semantics->precision - 1);
959     }
960     fill = &fill_storage;
961   }
962 
963   // Set the significand bits to the fill.
964   if (!fill || fill->getNumWords() < numParts)
965     APInt::tcSet(significand, 0, numParts);
966   if (fill) {
967     APInt::tcAssign(significand, fill->getRawData(),
968                     std::min(fill->getNumWords(), numParts));
969 
970     // Zero out the excess bits of the significand.
971     unsigned bitsToPreserve = semantics->precision - 1;
972     unsigned part = bitsToPreserve / 64;
973     bitsToPreserve %= 64;
974     significand[part] &= ((1ULL << bitsToPreserve) - 1);
975     for (part++; part != numParts; ++part)
976       significand[part] = 0;
977   }
978 
979   unsigned QNaNBit =
980       (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
981 
982   if (SNaN) {
983     // We always have to clear the QNaN bit to make it an SNaN.
984     APInt::tcClearBit(significand, QNaNBit);
985 
986     // If there are no bits set in the payload, we have to set
987     // *something* to make it a NaN instead of an infinity;
988     // conventionally, this is the next bit down from the QNaN bit.
989     if (APInt::tcIsZero(significand, numParts))
990       APInt::tcSetBit(significand, QNaNBit - 1);
991   } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
992     // The only NaN is a quiet NaN, and it has no bits sets in the significand.
993     // Do nothing.
994   } else {
995     // We always have to set the QNaN bit to make it a QNaN.
996     APInt::tcSetBit(significand, QNaNBit);
997   }
998 
999   // For x87 extended precision, we want to make a NaN, not a
1000   // pseudo-NaN.  Maybe we should expose the ability to make
1001   // pseudo-NaNs?
1002   if (semantics == &semX87DoubleExtended)
1003     APInt::tcSetBit(significand, QNaNBit + 1);
1004 }
1005 
1006 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
1007   if (this != &rhs) {
1008     if (semantics != rhs.semantics) {
1009       freeSignificand();
1010       initialize(rhs.semantics);
1011     }
1012     assign(rhs);
1013   }
1014 
1015   return *this;
1016 }
1017 
1018 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
1019   freeSignificand();
1020 
1021   semantics = rhs.semantics;
1022   significand = rhs.significand;
1023   exponent = rhs.exponent;
1024   category = rhs.category;
1025   sign = rhs.sign;
1026 
1027   rhs.semantics = &semBogus;
1028   return *this;
1029 }
1030 
1031 bool IEEEFloat::isDenormal() const {
1032   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1033          (APInt::tcExtractBit(significandParts(),
1034                               semantics->precision - 1) == 0);
1035 }
1036 
1037 bool IEEEFloat::isSmallest() const {
1038   // The smallest number by magnitude in our format will be the smallest
1039   // denormal, i.e. the floating point number with exponent being minimum
1040   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1041   return isFiniteNonZero() && exponent == semantics->minExponent &&
1042     significandMSB() == 0;
1043 }
1044 
1045 bool IEEEFloat::isSmallestNormalized() const {
1046   return getCategory() == fcNormal && exponent == semantics->minExponent &&
1047          isSignificandAllZerosExceptMSB();
1048 }
1049 
1050 unsigned int IEEEFloat::getNumHighBits() const {
1051   const unsigned int PartCount = partCountForBits(semantics->precision);
1052   const unsigned int Bits = PartCount * integerPartWidth;
1053 
1054   // Compute how many bits are used in the final word.
1055   // When precision is just 1, it represents the 'Pth'
1056   // Precision bit and not the actual significand bit.
1057   const unsigned int NumHighBits = (semantics->precision > 1)
1058                                        ? (Bits - semantics->precision + 1)
1059                                        : (Bits - semantics->precision);
1060   return NumHighBits;
1061 }
1062 
1063 bool IEEEFloat::isSignificandAllOnes() const {
1064   // Test if the significand excluding the integral bit is all ones. This allows
1065   // us to test for binade boundaries.
1066   const integerPart *Parts = significandParts();
1067   const unsigned PartCount = partCountForBits(semantics->precision);
1068   for (unsigned i = 0; i < PartCount - 1; i++)
1069     if (~Parts[i])
1070       return false;
1071 
1072   // Set the unused high bits to all ones when we compare.
1073   const unsigned NumHighBits = getNumHighBits();
1074   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1075          "Can not have more high bits to fill than integerPartWidth");
1076   const integerPart HighBitFill =
1077     ~integerPart(0) << (integerPartWidth - NumHighBits);
1078   if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
1079     return false;
1080 
1081   return true;
1082 }
1083 
1084 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1085   // Test if the significand excluding the integral bit is all ones except for
1086   // the least significant bit.
1087   const integerPart *Parts = significandParts();
1088 
1089   if (Parts[0] & 1)
1090     return false;
1091 
1092   const unsigned PartCount = partCountForBits(semantics->precision);
1093   for (unsigned i = 0; i < PartCount - 1; i++) {
1094     if (~Parts[i] & ~unsigned{!i})
1095       return false;
1096   }
1097 
1098   // Set the unused high bits to all ones when we compare.
1099   const unsigned NumHighBits = getNumHighBits();
1100   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1101          "Can not have more high bits to fill than integerPartWidth");
1102   const integerPart HighBitFill = ~integerPart(0)
1103                                   << (integerPartWidth - NumHighBits);
1104   if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1105     return false;
1106 
1107   return true;
1108 }
1109 
1110 bool IEEEFloat::isSignificandAllZeros() const {
1111   // Test if the significand excluding the integral bit is all zeros. This
1112   // allows us to test for binade boundaries.
1113   const integerPart *Parts = significandParts();
1114   const unsigned PartCount = partCountForBits(semantics->precision);
1115 
1116   for (unsigned i = 0; i < PartCount - 1; i++)
1117     if (Parts[i])
1118       return false;
1119 
1120   // Compute how many bits are used in the final word.
1121   const unsigned NumHighBits = getNumHighBits();
1122   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1123          "clear than integerPartWidth");
1124   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1125 
1126   if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1127     return false;
1128 
1129   return true;
1130 }
1131 
1132 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1133   const integerPart *Parts = significandParts();
1134   const unsigned PartCount = partCountForBits(semantics->precision);
1135 
1136   for (unsigned i = 0; i < PartCount - 1; i++) {
1137     if (Parts[i])
1138       return false;
1139   }
1140 
1141   const unsigned NumHighBits = getNumHighBits();
1142   const integerPart MSBMask = integerPart(1)
1143                               << (integerPartWidth - NumHighBits);
1144   return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1145 }
1146 
1147 bool IEEEFloat::isLargest() const {
1148   bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1149   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1150       semantics->nanEncoding == fltNanEncoding::AllOnes) {
1151     // The largest number by magnitude in our format will be the floating point
1152     // number with maximum exponent and with significand that is all ones except
1153     // the LSB.
1154     return (IsMaxExp && APFloat::hasSignificand(*semantics))
1155                ? isSignificandAllOnesExceptLSB()
1156                : IsMaxExp;
1157   } else {
1158     // The largest number by magnitude in our format will be the floating point
1159     // number with maximum exponent and with significand that is all ones.
1160     return IsMaxExp && isSignificandAllOnes();
1161   }
1162 }
1163 
1164 bool IEEEFloat::isInteger() const {
1165   // This could be made more efficient; I'm going for obviously correct.
1166   if (!isFinite()) return false;
1167   IEEEFloat truncated = *this;
1168   truncated.roundToIntegral(rmTowardZero);
1169   return compare(truncated) == cmpEqual;
1170 }
1171 
1172 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1173   if (this == &rhs)
1174     return true;
1175   if (semantics != rhs.semantics ||
1176       category != rhs.category ||
1177       sign != rhs.sign)
1178     return false;
1179   if (category==fcZero || category==fcInfinity)
1180     return true;
1181 
1182   if (isFiniteNonZero() && exponent != rhs.exponent)
1183     return false;
1184 
1185   return std::equal(significandParts(), significandParts() + partCount(),
1186                     rhs.significandParts());
1187 }
1188 
1189 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1190   initialize(&ourSemantics);
1191   sign = 0;
1192   category = fcNormal;
1193   zeroSignificand();
1194   exponent = ourSemantics.precision - 1;
1195   significandParts()[0] = value;
1196   normalize(rmNearestTiesToEven, lfExactlyZero);
1197 }
1198 
1199 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1200   initialize(&ourSemantics);
1201   // The Float8E8MOFNU format does not have a representation
1202   // for zero. So, use the closest representation instead.
1203   // Moreover, the all-zero encoding represents a valid
1204   // normal value (which is the smallestNormalized here).
1205   // Hence, we call makeSmallestNormalized (where category is
1206   // 'fcNormal') instead of makeZero (where category is 'fcZero').
1207   ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1208 }
1209 
1210 // Delegate to the previous constructor, because later copy constructor may
1211 // actually inspects category, which can't be garbage.
1212 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1213     : IEEEFloat(ourSemantics) {}
1214 
1215 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1216   initialize(rhs.semantics);
1217   assign(rhs);
1218 }
1219 
1220 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1221   *this = std::move(rhs);
1222 }
1223 
1224 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1225 
1226 unsigned int IEEEFloat::partCount() const {
1227   return partCountForBits(semantics->precision + 1);
1228 }
1229 
1230 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1231   return const_cast<IEEEFloat *>(this)->significandParts();
1232 }
1233 
1234 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1235   if (partCount() > 1)
1236     return significand.parts;
1237   else
1238     return &significand.part;
1239 }
1240 
1241 void IEEEFloat::zeroSignificand() {
1242   APInt::tcSet(significandParts(), 0, partCount());
1243 }
1244 
1245 /* Increment an fcNormal floating point number's significand.  */
1246 void IEEEFloat::incrementSignificand() {
1247   integerPart carry;
1248 
1249   carry = APInt::tcIncrement(significandParts(), partCount());
1250 
1251   /* Our callers should never cause us to overflow.  */
1252   assert(carry == 0);
1253   (void)carry;
1254 }
1255 
1256 /* Add the significand of the RHS.  Returns the carry flag.  */
1257 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1258   integerPart *parts;
1259 
1260   parts = significandParts();
1261 
1262   assert(semantics == rhs.semantics);
1263   assert(exponent == rhs.exponent);
1264 
1265   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1266 }
1267 
1268 /* Subtract the significand of the RHS with a borrow flag.  Returns
1269    the borrow flag.  */
1270 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1271                                                       integerPart borrow) {
1272   integerPart *parts;
1273 
1274   parts = significandParts();
1275 
1276   assert(semantics == rhs.semantics);
1277   assert(exponent == rhs.exponent);
1278 
1279   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1280                            partCount());
1281 }
1282 
1283 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1284    on to the full-precision result of the multiplication.  Returns the
1285    lost fraction.  */
1286 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1287                                             IEEEFloat addend,
1288                                             bool ignoreAddend) {
1289   unsigned int omsb;        // One, not zero, based MSB.
1290   unsigned int partsCount, newPartsCount, precision;
1291   integerPart *lhsSignificand;
1292   integerPart scratch[4];
1293   integerPart *fullSignificand;
1294   lostFraction lost_fraction;
1295   bool ignored;
1296 
1297   assert(semantics == rhs.semantics);
1298 
1299   precision = semantics->precision;
1300 
1301   // Allocate space for twice as many bits as the original significand, plus one
1302   // extra bit for the addition to overflow into.
1303   newPartsCount = partCountForBits(precision * 2 + 1);
1304 
1305   if (newPartsCount > 4)
1306     fullSignificand = new integerPart[newPartsCount];
1307   else
1308     fullSignificand = scratch;
1309 
1310   lhsSignificand = significandParts();
1311   partsCount = partCount();
1312 
1313   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1314                         rhs.significandParts(), partsCount, partsCount);
1315 
1316   lost_fraction = lfExactlyZero;
1317   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1318   exponent += rhs.exponent;
1319 
1320   // Assume the operands involved in the multiplication are single-precision
1321   // FP, and the two multiplicants are:
1322   //   *this = a23 . a22 ... a0 * 2^e1
1323   //     rhs = b23 . b22 ... b0 * 2^e2
1324   // the result of multiplication is:
1325   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1326   // Note that there are three significant bits at the left-hand side of the
1327   // radix point: two for the multiplication, and an overflow bit for the
1328   // addition (that will always be zero at this point). Move the radix point
1329   // toward left by two bits, and adjust exponent accordingly.
1330   exponent += 2;
1331 
1332   if (!ignoreAddend && addend.isNonZero()) {
1333     // The intermediate result of the multiplication has "2 * precision"
1334     // signicant bit; adjust the addend to be consistent with mul result.
1335     //
1336     Significand savedSignificand = significand;
1337     const fltSemantics *savedSemantics = semantics;
1338     fltSemantics extendedSemantics;
1339     opStatus status;
1340     unsigned int extendedPrecision;
1341 
1342     // Normalize our MSB to one below the top bit to allow for overflow.
1343     extendedPrecision = 2 * precision + 1;
1344     if (omsb != extendedPrecision - 1) {
1345       assert(extendedPrecision > omsb);
1346       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1347                          (extendedPrecision - 1) - omsb);
1348       exponent -= (extendedPrecision - 1) - omsb;
1349     }
1350 
1351     /* Create new semantics.  */
1352     extendedSemantics = *semantics;
1353     extendedSemantics.precision = extendedPrecision;
1354 
1355     if (newPartsCount == 1)
1356       significand.part = fullSignificand[0];
1357     else
1358       significand.parts = fullSignificand;
1359     semantics = &extendedSemantics;
1360 
1361     // Make a copy so we can convert it to the extended semantics.
1362     // Note that we cannot convert the addend directly, as the extendedSemantics
1363     // is a local variable (which we take a reference to).
1364     IEEEFloat extendedAddend(addend);
1365     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1366     assert(status == opOK);
1367     (void)status;
1368 
1369     // Shift the significand of the addend right by one bit. This guarantees
1370     // that the high bit of the significand is zero (same as fullSignificand),
1371     // so the addition will overflow (if it does overflow at all) into the top bit.
1372     lost_fraction = extendedAddend.shiftSignificandRight(1);
1373     assert(lost_fraction == lfExactlyZero &&
1374            "Lost precision while shifting addend for fused-multiply-add.");
1375 
1376     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1377 
1378     /* Restore our state.  */
1379     if (newPartsCount == 1)
1380       fullSignificand[0] = significand.part;
1381     significand = savedSignificand;
1382     semantics = savedSemantics;
1383 
1384     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1385   }
1386 
1387   // Convert the result having "2 * precision" significant-bits back to the one
1388   // having "precision" significant-bits. First, move the radix point from
1389   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1390   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1391   exponent -= precision + 1;
1392 
1393   // In case MSB resides at the left-hand side of radix point, shift the
1394   // mantissa right by some amount to make sure the MSB reside right before
1395   // the radix point (i.e. "MSB . rest-significant-bits").
1396   //
1397   // Note that the result is not normalized when "omsb < precision". So, the
1398   // caller needs to call IEEEFloat::normalize() if normalized value is
1399   // expected.
1400   if (omsb > precision) {
1401     unsigned int bits, significantParts;
1402     lostFraction lf;
1403 
1404     bits = omsb - precision;
1405     significantParts = partCountForBits(omsb);
1406     lf = shiftRight(fullSignificand, significantParts, bits);
1407     lost_fraction = combineLostFractions(lf, lost_fraction);
1408     exponent += bits;
1409   }
1410 
1411   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1412 
1413   if (newPartsCount > 4)
1414     delete [] fullSignificand;
1415 
1416   return lost_fraction;
1417 }
1418 
1419 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1420   // When the given semantics has zero, the addend here is a zero.
1421   // i.e . it belongs to the 'fcZero' category.
1422   // But when the semantics does not support zero, we need to
1423   // explicitly convey that this addend should be ignored
1424   // for multiplication.
1425   return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1426 }
1427 
1428 /* Multiply the significands of LHS and RHS to DST.  */
1429 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1430   unsigned int bit, i, partsCount;
1431   const integerPart *rhsSignificand;
1432   integerPart *lhsSignificand, *dividend, *divisor;
1433   integerPart scratch[4];
1434   lostFraction lost_fraction;
1435 
1436   assert(semantics == rhs.semantics);
1437 
1438   lhsSignificand = significandParts();
1439   rhsSignificand = rhs.significandParts();
1440   partsCount = partCount();
1441 
1442   if (partsCount > 2)
1443     dividend = new integerPart[partsCount * 2];
1444   else
1445     dividend = scratch;
1446 
1447   divisor = dividend + partsCount;
1448 
1449   /* Copy the dividend and divisor as they will be modified in-place.  */
1450   for (i = 0; i < partsCount; i++) {
1451     dividend[i] = lhsSignificand[i];
1452     divisor[i] = rhsSignificand[i];
1453     lhsSignificand[i] = 0;
1454   }
1455 
1456   exponent -= rhs.exponent;
1457 
1458   unsigned int precision = semantics->precision;
1459 
1460   /* Normalize the divisor.  */
1461   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1462   if (bit) {
1463     exponent += bit;
1464     APInt::tcShiftLeft(divisor, partsCount, bit);
1465   }
1466 
1467   /* Normalize the dividend.  */
1468   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1469   if (bit) {
1470     exponent -= bit;
1471     APInt::tcShiftLeft(dividend, partsCount, bit);
1472   }
1473 
1474   /* Ensure the dividend >= divisor initially for the loop below.
1475      Incidentally, this means that the division loop below is
1476      guaranteed to set the integer bit to one.  */
1477   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1478     exponent--;
1479     APInt::tcShiftLeft(dividend, partsCount, 1);
1480     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1481   }
1482 
1483   /* Long division.  */
1484   for (bit = precision; bit; bit -= 1) {
1485     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1486       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1487       APInt::tcSetBit(lhsSignificand, bit - 1);
1488     }
1489 
1490     APInt::tcShiftLeft(dividend, partsCount, 1);
1491   }
1492 
1493   /* Figure out the lost fraction.  */
1494   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1495 
1496   if (cmp > 0)
1497     lost_fraction = lfMoreThanHalf;
1498   else if (cmp == 0)
1499     lost_fraction = lfExactlyHalf;
1500   else if (APInt::tcIsZero(dividend, partsCount))
1501     lost_fraction = lfExactlyZero;
1502   else
1503     lost_fraction = lfLessThanHalf;
1504 
1505   if (partsCount > 2)
1506     delete [] dividend;
1507 
1508   return lost_fraction;
1509 }
1510 
1511 unsigned int IEEEFloat::significandMSB() const {
1512   return APInt::tcMSB(significandParts(), partCount());
1513 }
1514 
1515 unsigned int IEEEFloat::significandLSB() const {
1516   return APInt::tcLSB(significandParts(), partCount());
1517 }
1518 
1519 /* Note that a zero result is NOT normalized to fcZero.  */
1520 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1521   /* Our exponent should not overflow.  */
1522   assert((ExponentType) (exponent + bits) >= exponent);
1523 
1524   exponent += bits;
1525 
1526   return shiftRight(significandParts(), partCount(), bits);
1527 }
1528 
1529 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1530 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1531   assert(bits < semantics->precision ||
1532          (semantics->precision == 1 && bits <= 1));
1533 
1534   if (bits) {
1535     unsigned int partsCount = partCount();
1536 
1537     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1538     exponent -= bits;
1539 
1540     assert(!APInt::tcIsZero(significandParts(), partsCount));
1541   }
1542 }
1543 
1544 IEEEFloat::cmpResult
1545 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1546   int compare;
1547 
1548   assert(semantics == rhs.semantics);
1549   assert(isFiniteNonZero());
1550   assert(rhs.isFiniteNonZero());
1551 
1552   compare = exponent - rhs.exponent;
1553 
1554   /* If exponents are equal, do an unsigned bignum comparison of the
1555      significands.  */
1556   if (compare == 0)
1557     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1558                                partCount());
1559 
1560   if (compare > 0)
1561     return cmpGreaterThan;
1562   else if (compare < 0)
1563     return cmpLessThan;
1564   else
1565     return cmpEqual;
1566 }
1567 
1568 /* Set the least significant BITS bits of a bignum, clear the
1569    rest.  */
1570 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1571                                       unsigned bits) {
1572   unsigned i = 0;
1573   while (bits > APInt::APINT_BITS_PER_WORD) {
1574     dst[i++] = ~(APInt::WordType)0;
1575     bits -= APInt::APINT_BITS_PER_WORD;
1576   }
1577 
1578   if (bits)
1579     dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1580 
1581   while (i < parts)
1582     dst[i++] = 0;
1583 }
1584 
1585 /* Handle overflow.  Sign is preserved.  We either become infinity or
1586    the largest finite number.  */
1587 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1588   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {
1589     /* Infinity?  */
1590     if (rounding_mode == rmNearestTiesToEven ||
1591         rounding_mode == rmNearestTiesToAway ||
1592         (rounding_mode == rmTowardPositive && !sign) ||
1593         (rounding_mode == rmTowardNegative && sign)) {
1594       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1595         makeNaN(false, sign);
1596       else
1597         category = fcInfinity;
1598       return static_cast<opStatus>(opOverflow | opInexact);
1599     }
1600   }
1601 
1602   /* Otherwise we become the largest finite number.  */
1603   category = fcNormal;
1604   exponent = semantics->maxExponent;
1605   tcSetLeastSignificantBits(significandParts(), partCount(),
1606                             semantics->precision);
1607   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1608       semantics->nanEncoding == fltNanEncoding::AllOnes)
1609     APInt::tcClearBit(significandParts(), 0);
1610 
1611   return opInexact;
1612 }
1613 
1614 /* Returns TRUE if, when truncating the current number, with BIT the
1615    new LSB, with the given lost fraction and rounding mode, the result
1616    would need to be rounded away from zero (i.e., by increasing the
1617    signficand).  This routine must work for fcZero of both signs, and
1618    fcNormal numbers.  */
1619 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1620                                   lostFraction lost_fraction,
1621                                   unsigned int bit) const {
1622   /* NaNs and infinities should not have lost fractions.  */
1623   assert(isFiniteNonZero() || category == fcZero);
1624 
1625   /* Current callers never pass this so we don't handle it.  */
1626   assert(lost_fraction != lfExactlyZero);
1627 
1628   switch (rounding_mode) {
1629   case rmNearestTiesToAway:
1630     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1631 
1632   case rmNearestTiesToEven:
1633     if (lost_fraction == lfMoreThanHalf)
1634       return true;
1635 
1636     /* Our zeroes don't have a significand to test.  */
1637     if (lost_fraction == lfExactlyHalf && category != fcZero)
1638       return APInt::tcExtractBit(significandParts(), bit);
1639 
1640     return false;
1641 
1642   case rmTowardZero:
1643     return false;
1644 
1645   case rmTowardPositive:
1646     return !sign;
1647 
1648   case rmTowardNegative:
1649     return sign;
1650 
1651   default:
1652     break;
1653   }
1654   llvm_unreachable("Invalid rounding mode found");
1655 }
1656 
1657 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1658                                          lostFraction lost_fraction) {
1659   unsigned int omsb;                /* One, not zero, based MSB.  */
1660   int exponentChange;
1661 
1662   if (!isFiniteNonZero())
1663     return opOK;
1664 
1665   /* Before rounding normalize the exponent of fcNormal numbers.  */
1666   omsb = significandMSB() + 1;
1667 
1668   if (omsb) {
1669     /* OMSB is numbered from 1.  We want to place it in the integer
1670        bit numbered PRECISION if possible, with a compensating change in
1671        the exponent.  */
1672     exponentChange = omsb - semantics->precision;
1673 
1674     /* If the resulting exponent is too high, overflow according to
1675        the rounding mode.  */
1676     if (exponent + exponentChange > semantics->maxExponent)
1677       return handleOverflow(rounding_mode);
1678 
1679     /* Subnormal numbers have exponent minExponent, and their MSB
1680        is forced based on that.  */
1681     if (exponent + exponentChange < semantics->minExponent)
1682       exponentChange = semantics->minExponent - exponent;
1683 
1684     /* Shifting left is easy as we don't lose precision.  */
1685     if (exponentChange < 0) {
1686       assert(lost_fraction == lfExactlyZero);
1687 
1688       shiftSignificandLeft(-exponentChange);
1689 
1690       return opOK;
1691     }
1692 
1693     if (exponentChange > 0) {
1694       lostFraction lf;
1695 
1696       /* Shift right and capture any new lost fraction.  */
1697       lf = shiftSignificandRight(exponentChange);
1698 
1699       lost_fraction = combineLostFractions(lf, lost_fraction);
1700 
1701       /* Keep OMSB up-to-date.  */
1702       if (omsb > (unsigned) exponentChange)
1703         omsb -= exponentChange;
1704       else
1705         omsb = 0;
1706     }
1707   }
1708 
1709   // The all-ones values is an overflow if NaN is all ones. If NaN is
1710   // represented by negative zero, then it is a valid finite value.
1711   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1712       semantics->nanEncoding == fltNanEncoding::AllOnes &&
1713       exponent == semantics->maxExponent && isSignificandAllOnes())
1714     return handleOverflow(rounding_mode);
1715 
1716   /* Now round the number according to rounding_mode given the lost
1717      fraction.  */
1718 
1719   /* As specified in IEEE 754, since we do not trap we do not report
1720      underflow for exact results.  */
1721   if (lost_fraction == lfExactlyZero) {
1722     /* Canonicalize zeroes.  */
1723     if (omsb == 0) {
1724       category = fcZero;
1725       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1726         sign = false;
1727       if (!semantics->hasZero)
1728         makeSmallestNormalized(false);
1729     }
1730 
1731     return opOK;
1732   }
1733 
1734   /* Increment the significand if we're rounding away from zero.  */
1735   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1736     if (omsb == 0)
1737       exponent = semantics->minExponent;
1738 
1739     incrementSignificand();
1740     omsb = significandMSB() + 1;
1741 
1742     /* Did the significand increment overflow?  */
1743     if (omsb == (unsigned) semantics->precision + 1) {
1744       /* Renormalize by incrementing the exponent and shifting our
1745          significand right one.  However if we already have the
1746          maximum exponent we overflow to infinity.  */
1747       if (exponent == semantics->maxExponent)
1748         // Invoke overflow handling with a rounding mode that will guarantee
1749         // that the result gets turned into the correct infinity representation.
1750         // This is needed instead of just setting the category to infinity to
1751         // account for 8-bit floating point types that have no inf, only NaN.
1752         return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1753 
1754       shiftSignificandRight(1);
1755 
1756       return opInexact;
1757     }
1758 
1759     // The all-ones values is an overflow if NaN is all ones. If NaN is
1760     // represented by negative zero, then it is a valid finite value.
1761     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1762         semantics->nanEncoding == fltNanEncoding::AllOnes &&
1763         exponent == semantics->maxExponent && isSignificandAllOnes())
1764       return handleOverflow(rounding_mode);
1765   }
1766 
1767   /* The normal case - we were and are not denormal, and any
1768      significand increment above didn't overflow.  */
1769   if (omsb == semantics->precision)
1770     return opInexact;
1771 
1772   /* We have a non-zero denormal.  */
1773   assert(omsb < semantics->precision);
1774 
1775   /* Canonicalize zeroes.  */
1776   if (omsb == 0) {
1777     category = fcZero;
1778     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1779       sign = false;
1780     // This condition handles the case where the semantics
1781     // does not have zero but uses the all-zero encoding
1782     // to represent the smallest normal value.
1783     if (!semantics->hasZero)
1784       makeSmallestNormalized(false);
1785   }
1786 
1787   /* The fcZero case is a denormal that underflowed to zero.  */
1788   return (opStatus) (opUnderflow | opInexact);
1789 }
1790 
1791 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1792                                                      bool subtract) {
1793   switch (PackCategoriesIntoKey(category, rhs.category)) {
1794   default:
1795     llvm_unreachable(nullptr);
1796 
1797   case PackCategoriesIntoKey(fcZero, fcNaN):
1798   case PackCategoriesIntoKey(fcNormal, fcNaN):
1799   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1800     assign(rhs);
1801     [[fallthrough]];
1802   case PackCategoriesIntoKey(fcNaN, fcZero):
1803   case PackCategoriesIntoKey(fcNaN, fcNormal):
1804   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1805   case PackCategoriesIntoKey(fcNaN, fcNaN):
1806     if (isSignaling()) {
1807       makeQuiet();
1808       return opInvalidOp;
1809     }
1810     return rhs.isSignaling() ? opInvalidOp : opOK;
1811 
1812   case PackCategoriesIntoKey(fcNormal, fcZero):
1813   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1814   case PackCategoriesIntoKey(fcInfinity, fcZero):
1815     return opOK;
1816 
1817   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1818   case PackCategoriesIntoKey(fcZero, fcInfinity):
1819     category = fcInfinity;
1820     sign = rhs.sign ^ subtract;
1821     return opOK;
1822 
1823   case PackCategoriesIntoKey(fcZero, fcNormal):
1824     assign(rhs);
1825     sign = rhs.sign ^ subtract;
1826     return opOK;
1827 
1828   case PackCategoriesIntoKey(fcZero, fcZero):
1829     /* Sign depends on rounding mode; handled by caller.  */
1830     return opOK;
1831 
1832   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1833     /* Differently signed infinities can only be validly
1834        subtracted.  */
1835     if (((sign ^ rhs.sign)!=0) != subtract) {
1836       makeNaN();
1837       return opInvalidOp;
1838     }
1839 
1840     return opOK;
1841 
1842   case PackCategoriesIntoKey(fcNormal, fcNormal):
1843     return opDivByZero;
1844   }
1845 }
1846 
1847 /* Add or subtract two normal numbers.  */
1848 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1849                                                  bool subtract) {
1850   integerPart carry;
1851   lostFraction lost_fraction;
1852   int bits;
1853 
1854   /* Determine if the operation on the absolute values is effectively
1855      an addition or subtraction.  */
1856   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1857 
1858   /* Are we bigger exponent-wise than the RHS?  */
1859   bits = exponent - rhs.exponent;
1860 
1861   /* Subtraction is more subtle than one might naively expect.  */
1862   if (subtract) {
1863     if ((bits < 0) && !semantics->hasSignedRepr)
1864       llvm_unreachable(
1865           "This floating point format does not support signed values");
1866 
1867     IEEEFloat temp_rhs(rhs);
1868 
1869     if (bits == 0)
1870       lost_fraction = lfExactlyZero;
1871     else if (bits > 0) {
1872       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1873       shiftSignificandLeft(1);
1874     } else {
1875       lost_fraction = shiftSignificandRight(-bits - 1);
1876       temp_rhs.shiftSignificandLeft(1);
1877     }
1878 
1879     // Should we reverse the subtraction.
1880     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1881       carry = temp_rhs.subtractSignificand
1882         (*this, lost_fraction != lfExactlyZero);
1883       copySignificand(temp_rhs);
1884       sign = !sign;
1885     } else {
1886       carry = subtractSignificand
1887         (temp_rhs, lost_fraction != lfExactlyZero);
1888     }
1889 
1890     /* Invert the lost fraction - it was on the RHS and
1891        subtracted.  */
1892     if (lost_fraction == lfLessThanHalf)
1893       lost_fraction = lfMoreThanHalf;
1894     else if (lost_fraction == lfMoreThanHalf)
1895       lost_fraction = lfLessThanHalf;
1896 
1897     /* The code above is intended to ensure that no borrow is
1898        necessary.  */
1899     assert(!carry);
1900     (void)carry;
1901   } else {
1902     if (bits > 0) {
1903       IEEEFloat temp_rhs(rhs);
1904 
1905       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1906       carry = addSignificand(temp_rhs);
1907     } else {
1908       lost_fraction = shiftSignificandRight(-bits);
1909       carry = addSignificand(rhs);
1910     }
1911 
1912     /* We have a guard bit; generating a carry cannot happen.  */
1913     assert(!carry);
1914     (void)carry;
1915   }
1916 
1917   return lost_fraction;
1918 }
1919 
1920 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1921   switch (PackCategoriesIntoKey(category, rhs.category)) {
1922   default:
1923     llvm_unreachable(nullptr);
1924 
1925   case PackCategoriesIntoKey(fcZero, fcNaN):
1926   case PackCategoriesIntoKey(fcNormal, fcNaN):
1927   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1928     assign(rhs);
1929     sign = false;
1930     [[fallthrough]];
1931   case PackCategoriesIntoKey(fcNaN, fcZero):
1932   case PackCategoriesIntoKey(fcNaN, fcNormal):
1933   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1934   case PackCategoriesIntoKey(fcNaN, fcNaN):
1935     sign ^= rhs.sign; // restore the original sign
1936     if (isSignaling()) {
1937       makeQuiet();
1938       return opInvalidOp;
1939     }
1940     return rhs.isSignaling() ? opInvalidOp : opOK;
1941 
1942   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1943   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1944   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1945     category = fcInfinity;
1946     return opOK;
1947 
1948   case PackCategoriesIntoKey(fcZero, fcNormal):
1949   case PackCategoriesIntoKey(fcNormal, fcZero):
1950   case PackCategoriesIntoKey(fcZero, fcZero):
1951     category = fcZero;
1952     return opOK;
1953 
1954   case PackCategoriesIntoKey(fcZero, fcInfinity):
1955   case PackCategoriesIntoKey(fcInfinity, fcZero):
1956     makeNaN();
1957     return opInvalidOp;
1958 
1959   case PackCategoriesIntoKey(fcNormal, fcNormal):
1960     return opOK;
1961   }
1962 }
1963 
1964 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1965   switch (PackCategoriesIntoKey(category, rhs.category)) {
1966   default:
1967     llvm_unreachable(nullptr);
1968 
1969   case PackCategoriesIntoKey(fcZero, fcNaN):
1970   case PackCategoriesIntoKey(fcNormal, fcNaN):
1971   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1972     assign(rhs);
1973     sign = false;
1974     [[fallthrough]];
1975   case PackCategoriesIntoKey(fcNaN, fcZero):
1976   case PackCategoriesIntoKey(fcNaN, fcNormal):
1977   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1978   case PackCategoriesIntoKey(fcNaN, fcNaN):
1979     sign ^= rhs.sign; // restore the original sign
1980     if (isSignaling()) {
1981       makeQuiet();
1982       return opInvalidOp;
1983     }
1984     return rhs.isSignaling() ? opInvalidOp : opOK;
1985 
1986   case PackCategoriesIntoKey(fcInfinity, fcZero):
1987   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1988   case PackCategoriesIntoKey(fcZero, fcInfinity):
1989   case PackCategoriesIntoKey(fcZero, fcNormal):
1990     return opOK;
1991 
1992   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1993     category = fcZero;
1994     return opOK;
1995 
1996   case PackCategoriesIntoKey(fcNormal, fcZero):
1997     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1998       makeNaN(false, sign);
1999     else
2000       category = fcInfinity;
2001     return opDivByZero;
2002 
2003   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2004   case PackCategoriesIntoKey(fcZero, fcZero):
2005     makeNaN();
2006     return opInvalidOp;
2007 
2008   case PackCategoriesIntoKey(fcNormal, fcNormal):
2009     return opOK;
2010   }
2011 }
2012 
2013 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
2014   switch (PackCategoriesIntoKey(category, rhs.category)) {
2015   default:
2016     llvm_unreachable(nullptr);
2017 
2018   case PackCategoriesIntoKey(fcZero, fcNaN):
2019   case PackCategoriesIntoKey(fcNormal, fcNaN):
2020   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2021     assign(rhs);
2022     [[fallthrough]];
2023   case PackCategoriesIntoKey(fcNaN, fcZero):
2024   case PackCategoriesIntoKey(fcNaN, fcNormal):
2025   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2026   case PackCategoriesIntoKey(fcNaN, fcNaN):
2027     if (isSignaling()) {
2028       makeQuiet();
2029       return opInvalidOp;
2030     }
2031     return rhs.isSignaling() ? opInvalidOp : opOK;
2032 
2033   case PackCategoriesIntoKey(fcZero, fcInfinity):
2034   case PackCategoriesIntoKey(fcZero, fcNormal):
2035   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2036     return opOK;
2037 
2038   case PackCategoriesIntoKey(fcNormal, fcZero):
2039   case PackCategoriesIntoKey(fcInfinity, fcZero):
2040   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2041   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2042   case PackCategoriesIntoKey(fcZero, fcZero):
2043     makeNaN();
2044     return opInvalidOp;
2045 
2046   case PackCategoriesIntoKey(fcNormal, fcNormal):
2047     return opOK;
2048   }
2049 }
2050 
2051 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
2052   switch (PackCategoriesIntoKey(category, rhs.category)) {
2053   default:
2054     llvm_unreachable(nullptr);
2055 
2056   case PackCategoriesIntoKey(fcZero, fcNaN):
2057   case PackCategoriesIntoKey(fcNormal, fcNaN):
2058   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2059     assign(rhs);
2060     [[fallthrough]];
2061   case PackCategoriesIntoKey(fcNaN, fcZero):
2062   case PackCategoriesIntoKey(fcNaN, fcNormal):
2063   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2064   case PackCategoriesIntoKey(fcNaN, fcNaN):
2065     if (isSignaling()) {
2066       makeQuiet();
2067       return opInvalidOp;
2068     }
2069     return rhs.isSignaling() ? opInvalidOp : opOK;
2070 
2071   case PackCategoriesIntoKey(fcZero, fcInfinity):
2072   case PackCategoriesIntoKey(fcZero, fcNormal):
2073   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2074     return opOK;
2075 
2076   case PackCategoriesIntoKey(fcNormal, fcZero):
2077   case PackCategoriesIntoKey(fcInfinity, fcZero):
2078   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2079   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2080   case PackCategoriesIntoKey(fcZero, fcZero):
2081     makeNaN();
2082     return opInvalidOp;
2083 
2084   case PackCategoriesIntoKey(fcNormal, fcNormal):
2085     return opDivByZero; // fake status, indicating this is not a special case
2086   }
2087 }
2088 
2089 /* Change sign.  */
2090 void IEEEFloat::changeSign() {
2091   // With NaN-as-negative-zero, neither NaN or negative zero can change
2092   // their signs.
2093   if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2094       (isZero() || isNaN()))
2095     return;
2096   /* Look mummy, this one's easy.  */
2097   sign = !sign;
2098 }
2099 
2100 /* Normalized addition or subtraction.  */
2101 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2102                                              roundingMode rounding_mode,
2103                                              bool subtract) {
2104   opStatus fs;
2105 
2106   fs = addOrSubtractSpecials(rhs, subtract);
2107 
2108   /* This return code means it was not a simple case.  */
2109   if (fs == opDivByZero) {
2110     lostFraction lost_fraction;
2111 
2112     lost_fraction = addOrSubtractSignificand(rhs, subtract);
2113     fs = normalize(rounding_mode, lost_fraction);
2114 
2115     /* Can only be zero if we lost no fraction.  */
2116     assert(category != fcZero || lost_fraction == lfExactlyZero);
2117   }
2118 
2119   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2120      positive zero unless rounding to minus infinity, except that
2121      adding two like-signed zeroes gives that zero.  */
2122   if (category == fcZero) {
2123     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2124       sign = (rounding_mode == rmTowardNegative);
2125     // NaN-in-negative-zero means zeros need to be normalized to +0.
2126     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2127       sign = false;
2128   }
2129 
2130   return fs;
2131 }
2132 
2133 /* Normalized addition.  */
2134 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2135                                    roundingMode rounding_mode) {
2136   return addOrSubtract(rhs, rounding_mode, false);
2137 }
2138 
2139 /* Normalized subtraction.  */
2140 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2141                                         roundingMode rounding_mode) {
2142   return addOrSubtract(rhs, rounding_mode, true);
2143 }
2144 
2145 /* Normalized multiply.  */
2146 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2147                                         roundingMode rounding_mode) {
2148   opStatus fs;
2149 
2150   sign ^= rhs.sign;
2151   fs = multiplySpecials(rhs);
2152 
2153   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2154     sign = false;
2155   if (isFiniteNonZero()) {
2156     lostFraction lost_fraction = multiplySignificand(rhs);
2157     fs = normalize(rounding_mode, lost_fraction);
2158     if (lost_fraction != lfExactlyZero)
2159       fs = (opStatus) (fs | opInexact);
2160   }
2161 
2162   return fs;
2163 }
2164 
2165 /* Normalized divide.  */
2166 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2167                                       roundingMode rounding_mode) {
2168   opStatus fs;
2169 
2170   sign ^= rhs.sign;
2171   fs = divideSpecials(rhs);
2172 
2173   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2174     sign = false;
2175   if (isFiniteNonZero()) {
2176     lostFraction lost_fraction = divideSignificand(rhs);
2177     fs = normalize(rounding_mode, lost_fraction);
2178     if (lost_fraction != lfExactlyZero)
2179       fs = (opStatus) (fs | opInexact);
2180   }
2181 
2182   return fs;
2183 }
2184 
2185 /* Normalized remainder.  */
2186 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2187   opStatus fs;
2188   unsigned int origSign = sign;
2189 
2190   // First handle the special cases.
2191   fs = remainderSpecials(rhs);
2192   if (fs != opDivByZero)
2193     return fs;
2194 
2195   fs = opOK;
2196 
2197   // Make sure the current value is less than twice the denom. If the addition
2198   // did not succeed (an overflow has happened), which means that the finite
2199   // value we currently posses must be less than twice the denom (as we are
2200   // using the same semantics).
2201   IEEEFloat P2 = rhs;
2202   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2203     fs = mod(P2);
2204     assert(fs == opOK);
2205   }
2206 
2207   // Lets work with absolute numbers.
2208   IEEEFloat P = rhs;
2209   P.sign = false;
2210   sign = false;
2211 
2212   //
2213   // To calculate the remainder we use the following scheme.
2214   //
2215   // The remainder is defained as follows:
2216   //
2217   // remainder = numer - rquot * denom = x - r * p
2218   //
2219   // Where r is the result of: x/p, rounded toward the nearest integral value
2220   // (with halfway cases rounded toward the even number).
2221   //
2222   // Currently, (after x mod 2p):
2223   // r is the number of 2p's present inside x, which is inherently, an even
2224   // number of p's.
2225   //
2226   // We may split the remaining calculation into 4 options:
2227   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2228   // - if x == 0.5p then we round to the nearest even number which is 0, and we
2229   //   are done as well.
2230   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2231   //   to subtract 1p at least once.
2232   // - if x >= p then we must subtract p at least once, as x must be a
2233   //   remainder.
2234   //
2235   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2236   //
2237   // We can now split the remaining calculation to the following 3 options:
2238   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2239   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2240   //   must round up to the next even number. so we must subtract p once more.
2241   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2242   //   integral, and subtract p once more.
2243   //
2244 
2245   // Extend the semantics to prevent an overflow/underflow or inexact result.
2246   bool losesInfo;
2247   fltSemantics extendedSemantics = *semantics;
2248   extendedSemantics.maxExponent++;
2249   extendedSemantics.minExponent--;
2250   extendedSemantics.precision += 2;
2251 
2252   IEEEFloat VEx = *this;
2253   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2254   assert(fs == opOK && !losesInfo);
2255   IEEEFloat PEx = P;
2256   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2257   assert(fs == opOK && !losesInfo);
2258 
2259   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2260   // any fraction.
2261   fs = VEx.add(VEx, rmNearestTiesToEven);
2262   assert(fs == opOK);
2263 
2264   if (VEx.compare(PEx) == cmpGreaterThan) {
2265     fs = subtract(P, rmNearestTiesToEven);
2266     assert(fs == opOK);
2267 
2268     // Make VEx = this.add(this), but because we have different semantics, we do
2269     // not want to `convert` again, so we just subtract PEx twice (which equals
2270     // to the desired value).
2271     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2272     assert(fs == opOK);
2273     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2274     assert(fs == opOK);
2275 
2276     cmpResult result = VEx.compare(PEx);
2277     if (result == cmpGreaterThan || result == cmpEqual) {
2278       fs = subtract(P, rmNearestTiesToEven);
2279       assert(fs == opOK);
2280     }
2281   }
2282 
2283   if (isZero()) {
2284     sign = origSign;    // IEEE754 requires this
2285     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2286       // But some 8-bit floats only have positive 0.
2287       sign = false;
2288   }
2289 
2290   else
2291     sign ^= origSign;
2292   return fs;
2293 }
2294 
2295 /* Normalized llvm frem (C fmod). */
2296 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2297   opStatus fs;
2298   fs = modSpecials(rhs);
2299   unsigned int origSign = sign;
2300 
2301   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2302          compareAbsoluteValue(rhs) != cmpLessThan) {
2303     int Exp = ilogb(*this) - ilogb(rhs);
2304     IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2305     // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2306     // check for it.
2307     if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2308       V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2309     V.sign = sign;
2310 
2311     fs = subtract(V, rmNearestTiesToEven);
2312 
2313     // When the semantics supports zero, this loop's
2314     // exit-condition is handled by the 'isFiniteNonZero'
2315     // category check above. However, when the semantics
2316     // does not have 'fcZero' and we have reached the
2317     // minimum possible value, (and any further subtract
2318     // will underflow to the same value) explicitly
2319     // provide an exit-path here.
2320     if (!semantics->hasZero && this->isSmallest())
2321       break;
2322 
2323     assert(fs==opOK);
2324   }
2325   if (isZero()) {
2326     sign = origSign; // fmod requires this
2327     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2328       sign = false;
2329   }
2330   return fs;
2331 }
2332 
2333 /* Normalized fused-multiply-add.  */
2334 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2335                                                 const IEEEFloat &addend,
2336                                                 roundingMode rounding_mode) {
2337   opStatus fs;
2338 
2339   /* Post-multiplication sign, before addition.  */
2340   sign ^= multiplicand.sign;
2341 
2342   /* If and only if all arguments are normal do we need to do an
2343      extended-precision calculation.  */
2344   if (isFiniteNonZero() &&
2345       multiplicand.isFiniteNonZero() &&
2346       addend.isFinite()) {
2347     lostFraction lost_fraction;
2348 
2349     lost_fraction = multiplySignificand(multiplicand, addend);
2350     fs = normalize(rounding_mode, lost_fraction);
2351     if (lost_fraction != lfExactlyZero)
2352       fs = (opStatus) (fs | opInexact);
2353 
2354     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2355        positive zero unless rounding to minus infinity, except that
2356        adding two like-signed zeroes gives that zero.  */
2357     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2358       sign = (rounding_mode == rmTowardNegative);
2359       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2360         sign = false;
2361     }
2362   } else {
2363     fs = multiplySpecials(multiplicand);
2364 
2365     /* FS can only be opOK or opInvalidOp.  There is no more work
2366        to do in the latter case.  The IEEE-754R standard says it is
2367        implementation-defined in this case whether, if ADDEND is a
2368        quiet NaN, we raise invalid op; this implementation does so.
2369 
2370        If we need to do the addition we can do so with normal
2371        precision.  */
2372     if (fs == opOK)
2373       fs = addOrSubtract(addend, rounding_mode, false);
2374   }
2375 
2376   return fs;
2377 }
2378 
2379 /* Rounding-mode correct round to integral value.  */
2380 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2381   opStatus fs;
2382 
2383   if (isInfinity())
2384     // [IEEE Std 754-2008 6.1]:
2385     // The behavior of infinity in floating-point arithmetic is derived from the
2386     // limiting cases of real arithmetic with operands of arbitrarily
2387     // large magnitude, when such a limit exists.
2388     // ...
2389     // Operations on infinite operands are usually exact and therefore signal no
2390     // exceptions ...
2391     return opOK;
2392 
2393   if (isNaN()) {
2394     if (isSignaling()) {
2395       // [IEEE Std 754-2008 6.2]:
2396       // Under default exception handling, any operation signaling an invalid
2397       // operation exception and for which a floating-point result is to be
2398       // delivered shall deliver a quiet NaN.
2399       makeQuiet();
2400       // [IEEE Std 754-2008 6.2]:
2401       // Signaling NaNs shall be reserved operands that, under default exception
2402       // handling, signal the invalid operation exception(see 7.2) for every
2403       // general-computational and signaling-computational operation except for
2404       // the conversions described in 5.12.
2405       return opInvalidOp;
2406     } else {
2407       // [IEEE Std 754-2008 6.2]:
2408       // For an operation with quiet NaN inputs, other than maximum and minimum
2409       // operations, if a floating-point result is to be delivered the result
2410       // shall be a quiet NaN which should be one of the input NaNs.
2411       // ...
2412       // Every general-computational and quiet-computational operation involving
2413       // one or more input NaNs, none of them signaling, shall signal no
2414       // exception, except fusedMultiplyAdd might signal the invalid operation
2415       // exception(see 7.2).
2416       return opOK;
2417     }
2418   }
2419 
2420   if (isZero()) {
2421     // [IEEE Std 754-2008 6.3]:
2422     // ... the sign of the result of conversions, the quantize operation, the
2423     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2424     // the sign of the first or only operand.
2425     return opOK;
2426   }
2427 
2428   // If the exponent is large enough, we know that this value is already
2429   // integral, and the arithmetic below would potentially cause it to saturate
2430   // to +/-Inf.  Bail out early instead.
2431   if (exponent+1 >= (int)semanticsPrecision(*semantics))
2432     return opOK;
2433 
2434   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2435   // precision of our format, and then subtract it back off again.  The choice
2436   // of rounding modes for the addition/subtraction determines the rounding mode
2437   // for our integral rounding as well.
2438   // NOTE: When the input value is negative, we do subtraction followed by
2439   // addition instead.
2440   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2441   IntegerConstant <<= semanticsPrecision(*semantics)-1;
2442   IEEEFloat MagicConstant(*semantics);
2443   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2444                                       rmNearestTiesToEven);
2445   assert(fs == opOK);
2446   MagicConstant.sign = sign;
2447 
2448   // Preserve the input sign so that we can handle the case of zero result
2449   // correctly.
2450   bool inputSign = isNegative();
2451 
2452   fs = add(MagicConstant, rounding_mode);
2453 
2454   // Current value and 'MagicConstant' are both integers, so the result of the
2455   // subtraction is always exact according to Sterbenz' lemma.
2456   subtract(MagicConstant, rounding_mode);
2457 
2458   // Restore the input sign.
2459   if (inputSign != isNegative())
2460     changeSign();
2461 
2462   return fs;
2463 }
2464 
2465 
2466 /* Comparison requires normalized numbers.  */
2467 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2468   cmpResult result;
2469 
2470   assert(semantics == rhs.semantics);
2471 
2472   switch (PackCategoriesIntoKey(category, rhs.category)) {
2473   default:
2474     llvm_unreachable(nullptr);
2475 
2476   case PackCategoriesIntoKey(fcNaN, fcZero):
2477   case PackCategoriesIntoKey(fcNaN, fcNormal):
2478   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2479   case PackCategoriesIntoKey(fcNaN, fcNaN):
2480   case PackCategoriesIntoKey(fcZero, fcNaN):
2481   case PackCategoriesIntoKey(fcNormal, fcNaN):
2482   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2483     return cmpUnordered;
2484 
2485   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2486   case PackCategoriesIntoKey(fcInfinity, fcZero):
2487   case PackCategoriesIntoKey(fcNormal, fcZero):
2488     if (sign)
2489       return cmpLessThan;
2490     else
2491       return cmpGreaterThan;
2492 
2493   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2494   case PackCategoriesIntoKey(fcZero, fcInfinity):
2495   case PackCategoriesIntoKey(fcZero, fcNormal):
2496     if (rhs.sign)
2497       return cmpGreaterThan;
2498     else
2499       return cmpLessThan;
2500 
2501   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2502     if (sign == rhs.sign)
2503       return cmpEqual;
2504     else if (sign)
2505       return cmpLessThan;
2506     else
2507       return cmpGreaterThan;
2508 
2509   case PackCategoriesIntoKey(fcZero, fcZero):
2510     return cmpEqual;
2511 
2512   case PackCategoriesIntoKey(fcNormal, fcNormal):
2513     break;
2514   }
2515 
2516   /* Two normal numbers.  Do they have the same sign?  */
2517   if (sign != rhs.sign) {
2518     if (sign)
2519       result = cmpLessThan;
2520     else
2521       result = cmpGreaterThan;
2522   } else {
2523     /* Compare absolute values; invert result if negative.  */
2524     result = compareAbsoluteValue(rhs);
2525 
2526     if (sign) {
2527       if (result == cmpLessThan)
2528         result = cmpGreaterThan;
2529       else if (result == cmpGreaterThan)
2530         result = cmpLessThan;
2531     }
2532   }
2533 
2534   return result;
2535 }
2536 
2537 /// IEEEFloat::convert - convert a value of one floating point type to another.
2538 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2539 /// records whether the transformation lost information, i.e. whether
2540 /// converting the result back to the original type will produce the
2541 /// original value (this is almost the same as return value==fsOK, but there
2542 /// are edge cases where this is not so).
2543 
2544 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2545                                        roundingMode rounding_mode,
2546                                        bool *losesInfo) {
2547   lostFraction lostFraction;
2548   unsigned int newPartCount, oldPartCount;
2549   opStatus fs;
2550   int shift;
2551   const fltSemantics &fromSemantics = *semantics;
2552   bool is_signaling = isSignaling();
2553 
2554   lostFraction = lfExactlyZero;
2555   newPartCount = partCountForBits(toSemantics.precision + 1);
2556   oldPartCount = partCount();
2557   shift = toSemantics.precision - fromSemantics.precision;
2558 
2559   bool X86SpecialNan = false;
2560   if (&fromSemantics == &semX87DoubleExtended &&
2561       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2562       (!(*significandParts() & 0x8000000000000000ULL) ||
2563        !(*significandParts() & 0x4000000000000000ULL))) {
2564     // x86 has some unusual NaNs which cannot be represented in any other
2565     // format; note them here.
2566     X86SpecialNan = true;
2567   }
2568 
2569   // If this is a truncation of a denormal number, and the target semantics
2570   // has larger exponent range than the source semantics (this can happen
2571   // when truncating from PowerPC double-double to double format), the
2572   // right shift could lose result mantissa bits.  Adjust exponent instead
2573   // of performing excessive shift.
2574   // Also do a similar trick in case shifting denormal would produce zero
2575   // significand as this case isn't handled correctly by normalize.
2576   if (shift < 0 && isFiniteNonZero()) {
2577     int omsb = significandMSB() + 1;
2578     int exponentChange = omsb - fromSemantics.precision;
2579     if (exponent + exponentChange < toSemantics.minExponent)
2580       exponentChange = toSemantics.minExponent - exponent;
2581     if (exponentChange < shift)
2582       exponentChange = shift;
2583     if (exponentChange < 0) {
2584       shift -= exponentChange;
2585       exponent += exponentChange;
2586     } else if (omsb <= -shift) {
2587       exponentChange = omsb + shift - 1; // leave at least one bit set
2588       shift -= exponentChange;
2589       exponent += exponentChange;
2590     }
2591   }
2592 
2593   // If this is a truncation, perform the shift before we narrow the storage.
2594   if (shift < 0 && (isFiniteNonZero() ||
2595                     (category == fcNaN && semantics->nonFiniteBehavior !=
2596                                               fltNonfiniteBehavior::NanOnly)))
2597     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2598 
2599   // Fix the storage so it can hold to new value.
2600   if (newPartCount > oldPartCount) {
2601     // The new type requires more storage; make it available.
2602     integerPart *newParts;
2603     newParts = new integerPart[newPartCount];
2604     APInt::tcSet(newParts, 0, newPartCount);
2605     if (isFiniteNonZero() || category==fcNaN)
2606       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2607     freeSignificand();
2608     significand.parts = newParts;
2609   } else if (newPartCount == 1 && oldPartCount != 1) {
2610     // Switch to built-in storage for a single part.
2611     integerPart newPart = 0;
2612     if (isFiniteNonZero() || category==fcNaN)
2613       newPart = significandParts()[0];
2614     freeSignificand();
2615     significand.part = newPart;
2616   }
2617 
2618   // Now that we have the right storage, switch the semantics.
2619   semantics = &toSemantics;
2620 
2621   // If this is an extension, perform the shift now that the storage is
2622   // available.
2623   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2624     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2625 
2626   if (isFiniteNonZero()) {
2627     fs = normalize(rounding_mode, lostFraction);
2628     *losesInfo = (fs != opOK);
2629   } else if (category == fcNaN) {
2630     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2631       *losesInfo =
2632           fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2633       makeNaN(false, sign);
2634       return is_signaling ? opInvalidOp : opOK;
2635     }
2636 
2637     // If NaN is negative zero, we need to create a new NaN to avoid converting
2638     // NaN to -Inf.
2639     if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2640         semantics->nanEncoding != fltNanEncoding::NegativeZero)
2641       makeNaN(false, false);
2642 
2643     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2644 
2645     // For x87 extended precision, we want to make a NaN, not a special NaN if
2646     // the input wasn't special either.
2647     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2648       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2649 
2650     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2651     // This also guarantees that a sNaN does not become Inf on a truncation
2652     // that loses all payload bits.
2653     if (is_signaling) {
2654       makeQuiet();
2655       fs = opInvalidOp;
2656     } else {
2657       fs = opOK;
2658     }
2659   } else if (category == fcInfinity &&
2660              semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2661     makeNaN(false, sign);
2662     *losesInfo = true;
2663     fs = opInexact;
2664   } else if (category == fcZero &&
2665              semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2666     // Negative zero loses info, but positive zero doesn't.
2667     *losesInfo =
2668         fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2669     fs = *losesInfo ? opInexact : opOK;
2670     // NaN is negative zero means -0 -> +0, which can lose information
2671     sign = false;
2672   } else {
2673     *losesInfo = false;
2674     fs = opOK;
2675   }
2676 
2677   if (category == fcZero && !semantics->hasZero)
2678     makeSmallestNormalized(false);
2679   return fs;
2680 }
2681 
2682 /* Convert a floating point number to an integer according to the
2683    rounding mode.  If the rounded integer value is out of range this
2684    returns an invalid operation exception and the contents of the
2685    destination parts are unspecified.  If the rounded value is in
2686    range but the floating point number is not the exact integer, the C
2687    standard doesn't require an inexact exception to be raised.  IEEE
2688    854 does require it so we do that.
2689 
2690    Note that for conversions to integer type the C standard requires
2691    round-to-zero to always be used.  */
2692 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2693     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2694     roundingMode rounding_mode, bool *isExact) const {
2695   lostFraction lost_fraction;
2696   const integerPart *src;
2697   unsigned int dstPartsCount, truncatedBits;
2698 
2699   *isExact = false;
2700 
2701   /* Handle the three special cases first.  */
2702   if (category == fcInfinity || category == fcNaN)
2703     return opInvalidOp;
2704 
2705   dstPartsCount = partCountForBits(width);
2706   assert(dstPartsCount <= parts.size() && "Integer too big");
2707 
2708   if (category == fcZero) {
2709     APInt::tcSet(parts.data(), 0, dstPartsCount);
2710     // Negative zero can't be represented as an int.
2711     *isExact = !sign;
2712     return opOK;
2713   }
2714 
2715   src = significandParts();
2716 
2717   /* Step 1: place our absolute value, with any fraction truncated, in
2718      the destination.  */
2719   if (exponent < 0) {
2720     /* Our absolute value is less than one; truncate everything.  */
2721     APInt::tcSet(parts.data(), 0, dstPartsCount);
2722     /* For exponent -1 the integer bit represents .5, look at that.
2723        For smaller exponents leftmost truncated bit is 0. */
2724     truncatedBits = semantics->precision -1U - exponent;
2725   } else {
2726     /* We want the most significant (exponent + 1) bits; the rest are
2727        truncated.  */
2728     unsigned int bits = exponent + 1U;
2729 
2730     /* Hopelessly large in magnitude?  */
2731     if (bits > width)
2732       return opInvalidOp;
2733 
2734     if (bits < semantics->precision) {
2735       /* We truncate (semantics->precision - bits) bits.  */
2736       truncatedBits = semantics->precision - bits;
2737       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2738     } else {
2739       /* We want at least as many bits as are available.  */
2740       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2741                        0);
2742       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2743                          bits - semantics->precision);
2744       truncatedBits = 0;
2745     }
2746   }
2747 
2748   /* Step 2: work out any lost fraction, and increment the absolute
2749      value if we would round away from zero.  */
2750   if (truncatedBits) {
2751     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2752                                                   truncatedBits);
2753     if (lost_fraction != lfExactlyZero &&
2754         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2755       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2756         return opInvalidOp;     /* Overflow.  */
2757     }
2758   } else {
2759     lost_fraction = lfExactlyZero;
2760   }
2761 
2762   /* Step 3: check if we fit in the destination.  */
2763   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2764 
2765   if (sign) {
2766     if (!isSigned) {
2767       /* Negative numbers cannot be represented as unsigned.  */
2768       if (omsb != 0)
2769         return opInvalidOp;
2770     } else {
2771       /* It takes omsb bits to represent the unsigned integer value.
2772          We lose a bit for the sign, but care is needed as the
2773          maximally negative integer is a special case.  */
2774       if (omsb == width &&
2775           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2776         return opInvalidOp;
2777 
2778       /* This case can happen because of rounding.  */
2779       if (omsb > width)
2780         return opInvalidOp;
2781     }
2782 
2783     APInt::tcNegate (parts.data(), dstPartsCount);
2784   } else {
2785     if (omsb >= width + !isSigned)
2786       return opInvalidOp;
2787   }
2788 
2789   if (lost_fraction == lfExactlyZero) {
2790     *isExact = true;
2791     return opOK;
2792   } else
2793     return opInexact;
2794 }
2795 
2796 /* Same as convertToSignExtendedInteger, except we provide
2797    deterministic values in case of an invalid operation exception,
2798    namely zero for NaNs and the minimal or maximal value respectively
2799    for underflow or overflow.
2800    The *isExact output tells whether the result is exact, in the sense
2801    that converting it back to the original floating point type produces
2802    the original value.  This is almost equivalent to result==opOK,
2803    except for negative zeroes.
2804 */
2805 IEEEFloat::opStatus
2806 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2807                             unsigned int width, bool isSigned,
2808                             roundingMode rounding_mode, bool *isExact) const {
2809   opStatus fs;
2810 
2811   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2812                                     isExact);
2813 
2814   if (fs == opInvalidOp) {
2815     unsigned int bits, dstPartsCount;
2816 
2817     dstPartsCount = partCountForBits(width);
2818     assert(dstPartsCount <= parts.size() && "Integer too big");
2819 
2820     if (category == fcNaN)
2821       bits = 0;
2822     else if (sign)
2823       bits = isSigned;
2824     else
2825       bits = width - isSigned;
2826 
2827     tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2828     if (sign && isSigned)
2829       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2830   }
2831 
2832   return fs;
2833 }
2834 
2835 /* Convert an unsigned integer SRC to a floating point number,
2836    rounding according to ROUNDING_MODE.  The sign of the floating
2837    point number is not modified.  */
2838 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2839     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2840   unsigned int omsb, precision, dstCount;
2841   integerPart *dst;
2842   lostFraction lost_fraction;
2843 
2844   category = fcNormal;
2845   omsb = APInt::tcMSB(src, srcCount) + 1;
2846   dst = significandParts();
2847   dstCount = partCount();
2848   precision = semantics->precision;
2849 
2850   /* We want the most significant PRECISION bits of SRC.  There may not
2851      be that many; extract what we can.  */
2852   if (precision <= omsb) {
2853     exponent = omsb - 1;
2854     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2855                                                   omsb - precision);
2856     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2857   } else {
2858     exponent = precision - 1;
2859     lost_fraction = lfExactlyZero;
2860     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2861   }
2862 
2863   return normalize(rounding_mode, lost_fraction);
2864 }
2865 
2866 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2867                                                 roundingMode rounding_mode) {
2868   unsigned int partCount = Val.getNumWords();
2869   APInt api = Val;
2870 
2871   sign = false;
2872   if (isSigned && api.isNegative()) {
2873     sign = true;
2874     api = -api;
2875   }
2876 
2877   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2878 }
2879 
2880 /* Convert a two's complement integer SRC to a floating point number,
2881    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2882    integer is signed, in which case it must be sign-extended.  */
2883 IEEEFloat::opStatus
2884 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2885                                           unsigned int srcCount, bool isSigned,
2886                                           roundingMode rounding_mode) {
2887   opStatus status;
2888 
2889   if (isSigned &&
2890       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2891     integerPart *copy;
2892 
2893     /* If we're signed and negative negate a copy.  */
2894     sign = true;
2895     copy = new integerPart[srcCount];
2896     APInt::tcAssign(copy, src, srcCount);
2897     APInt::tcNegate(copy, srcCount);
2898     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2899     delete [] copy;
2900   } else {
2901     sign = false;
2902     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2903   }
2904 
2905   return status;
2906 }
2907 
2908 /* FIXME: should this just take a const APInt reference?  */
2909 IEEEFloat::opStatus
2910 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2911                                           unsigned int width, bool isSigned,
2912                                           roundingMode rounding_mode) {
2913   unsigned int partCount = partCountForBits(width);
2914   APInt api = APInt(width, ArrayRef(parts, partCount));
2915 
2916   sign = false;
2917   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2918     sign = true;
2919     api = -api;
2920   }
2921 
2922   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2923 }
2924 
2925 Expected<IEEEFloat::opStatus>
2926 IEEEFloat::convertFromHexadecimalString(StringRef s,
2927                                         roundingMode rounding_mode) {
2928   lostFraction lost_fraction = lfExactlyZero;
2929 
2930   category = fcNormal;
2931   zeroSignificand();
2932   exponent = 0;
2933 
2934   integerPart *significand = significandParts();
2935   unsigned partsCount = partCount();
2936   unsigned bitPos = partsCount * integerPartWidth;
2937   bool computedTrailingFraction = false;
2938 
2939   // Skip leading zeroes and any (hexa)decimal point.
2940   StringRef::iterator begin = s.begin();
2941   StringRef::iterator end = s.end();
2942   StringRef::iterator dot;
2943   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2944   if (!PtrOrErr)
2945     return PtrOrErr.takeError();
2946   StringRef::iterator p = *PtrOrErr;
2947   StringRef::iterator firstSignificantDigit = p;
2948 
2949   while (p != end) {
2950     integerPart hex_value;
2951 
2952     if (*p == '.') {
2953       if (dot != end)
2954         return createError("String contains multiple dots");
2955       dot = p++;
2956       continue;
2957     }
2958 
2959     hex_value = hexDigitValue(*p);
2960     if (hex_value == UINT_MAX)
2961       break;
2962 
2963     p++;
2964 
2965     // Store the number while we have space.
2966     if (bitPos) {
2967       bitPos -= 4;
2968       hex_value <<= bitPos % integerPartWidth;
2969       significand[bitPos / integerPartWidth] |= hex_value;
2970     } else if (!computedTrailingFraction) {
2971       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2972       if (!FractOrErr)
2973         return FractOrErr.takeError();
2974       lost_fraction = *FractOrErr;
2975       computedTrailingFraction = true;
2976     }
2977   }
2978 
2979   /* Hex floats require an exponent but not a hexadecimal point.  */
2980   if (p == end)
2981     return createError("Hex strings require an exponent");
2982   if (*p != 'p' && *p != 'P')
2983     return createError("Invalid character in significand");
2984   if (p == begin)
2985     return createError("Significand has no digits");
2986   if (dot != end && p - begin == 1)
2987     return createError("Significand has no digits");
2988 
2989   /* Ignore the exponent if we are zero.  */
2990   if (p != firstSignificantDigit) {
2991     int expAdjustment;
2992 
2993     /* Implicit hexadecimal point?  */
2994     if (dot == end)
2995       dot = p;
2996 
2997     /* Calculate the exponent adjustment implicit in the number of
2998        significant digits.  */
2999     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
3000     if (expAdjustment < 0)
3001       expAdjustment++;
3002     expAdjustment = expAdjustment * 4 - 1;
3003 
3004     /* Adjust for writing the significand starting at the most
3005        significant nibble.  */
3006     expAdjustment += semantics->precision;
3007     expAdjustment -= partsCount * integerPartWidth;
3008 
3009     /* Adjust for the given exponent.  */
3010     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
3011     if (!ExpOrErr)
3012       return ExpOrErr.takeError();
3013     exponent = *ExpOrErr;
3014   }
3015 
3016   return normalize(rounding_mode, lost_fraction);
3017 }
3018 
3019 IEEEFloat::opStatus
3020 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
3021                                         unsigned sigPartCount, int exp,
3022                                         roundingMode rounding_mode) {
3023   unsigned int parts, pow5PartCount;
3024   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
3025   integerPart pow5Parts[maxPowerOfFiveParts];
3026   bool isNearest;
3027 
3028   isNearest = (rounding_mode == rmNearestTiesToEven ||
3029                rounding_mode == rmNearestTiesToAway);
3030 
3031   parts = partCountForBits(semantics->precision + 11);
3032 
3033   /* Calculate pow(5, abs(exp)).  */
3034   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
3035 
3036   for (;; parts *= 2) {
3037     opStatus sigStatus, powStatus;
3038     unsigned int excessPrecision, truncatedBits;
3039 
3040     calcSemantics.precision = parts * integerPartWidth - 1;
3041     excessPrecision = calcSemantics.precision - semantics->precision;
3042     truncatedBits = excessPrecision;
3043 
3044     IEEEFloat decSig(calcSemantics, uninitialized);
3045     decSig.makeZero(sign);
3046     IEEEFloat pow5(calcSemantics);
3047 
3048     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
3049                                                 rmNearestTiesToEven);
3050     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
3051                                               rmNearestTiesToEven);
3052     /* Add exp, as 10^n = 5^n * 2^n.  */
3053     decSig.exponent += exp;
3054 
3055     lostFraction calcLostFraction;
3056     integerPart HUerr, HUdistance;
3057     unsigned int powHUerr;
3058 
3059     if (exp >= 0) {
3060       /* multiplySignificand leaves the precision-th bit set to 1.  */
3061       calcLostFraction = decSig.multiplySignificand(pow5);
3062       powHUerr = powStatus != opOK;
3063     } else {
3064       calcLostFraction = decSig.divideSignificand(pow5);
3065       /* Denormal numbers have less precision.  */
3066       if (decSig.exponent < semantics->minExponent) {
3067         excessPrecision += (semantics->minExponent - decSig.exponent);
3068         truncatedBits = excessPrecision;
3069         if (excessPrecision > calcSemantics.precision)
3070           excessPrecision = calcSemantics.precision;
3071       }
3072       /* Extra half-ulp lost in reciprocal of exponent.  */
3073       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
3074     }
3075 
3076     /* Both multiplySignificand and divideSignificand return the
3077        result with the integer bit set.  */
3078     assert(APInt::tcExtractBit
3079            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3080 
3081     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3082                        powHUerr);
3083     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3084                                       excessPrecision, isNearest);
3085 
3086     /* Are we guaranteed to round correctly if we truncate?  */
3087     if (HUdistance >= HUerr) {
3088       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3089                        calcSemantics.precision - excessPrecision,
3090                        excessPrecision);
3091       /* Take the exponent of decSig.  If we tcExtract-ed less bits
3092          above we must adjust our exponent to compensate for the
3093          implicit right shift.  */
3094       exponent = (decSig.exponent + semantics->precision
3095                   - (calcSemantics.precision - excessPrecision));
3096       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3097                                                        decSig.partCount(),
3098                                                        truncatedBits);
3099       return normalize(rounding_mode, calcLostFraction);
3100     }
3101   }
3102 }
3103 
3104 Expected<IEEEFloat::opStatus>
3105 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3106   decimalInfo D;
3107   opStatus fs;
3108 
3109   /* Scan the text.  */
3110   StringRef::iterator p = str.begin();
3111   if (Error Err = interpretDecimal(p, str.end(), &D))
3112     return std::move(Err);
3113 
3114   /* Handle the quick cases.  First the case of no significant digits,
3115      i.e. zero, and then exponents that are obviously too large or too
3116      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
3117      definitely overflows if
3118 
3119            (exp - 1) * L >= maxExponent
3120 
3121      and definitely underflows to zero where
3122 
3123            (exp + 1) * L <= minExponent - precision
3124 
3125      With integer arithmetic the tightest bounds for L are
3126 
3127            93/28 < L < 196/59            [ numerator <= 256 ]
3128            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
3129   */
3130 
3131   // Test if we have a zero number allowing for strings with no null terminators
3132   // and zero decimals with non-zero exponents.
3133   //
3134   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3135   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3136   // be at most one dot. On the other hand, if we have a zero with a non-zero
3137   // exponent, then we know that D.firstSigDigit will be non-numeric.
3138   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3139     category = fcZero;
3140     fs = opOK;
3141     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3142       sign = false;
3143     if (!semantics->hasZero)
3144       makeSmallestNormalized(false);
3145 
3146     /* Check whether the normalized exponent is high enough to overflow
3147        max during the log-rebasing in the max-exponent check below. */
3148   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3149     fs = handleOverflow(rounding_mode);
3150 
3151   /* If it wasn't, then it also wasn't high enough to overflow max
3152      during the log-rebasing in the min-exponent check.  Check that it
3153      won't overflow min in either check, then perform the min-exponent
3154      check. */
3155   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3156              (D.normalizedExponent + 1) * 28738 <=
3157                8651 * (semantics->minExponent - (int) semantics->precision)) {
3158     /* Underflow to zero and round.  */
3159     category = fcNormal;
3160     zeroSignificand();
3161     fs = normalize(rounding_mode, lfLessThanHalf);
3162 
3163   /* We can finally safely perform the max-exponent check. */
3164   } else if ((D.normalizedExponent - 1) * 42039
3165              >= 12655 * semantics->maxExponent) {
3166     /* Overflow and round.  */
3167     fs = handleOverflow(rounding_mode);
3168   } else {
3169     integerPart *decSignificand;
3170     unsigned int partCount;
3171 
3172     /* A tight upper bound on number of bits required to hold an
3173        N-digit decimal integer is N * 196 / 59.  Allocate enough space
3174        to hold the full significand, and an extra part required by
3175        tcMultiplyPart.  */
3176     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3177     partCount = partCountForBits(1 + 196 * partCount / 59);
3178     decSignificand = new integerPart[partCount + 1];
3179     partCount = 0;
3180 
3181     /* Convert to binary efficiently - we do almost all multiplication
3182        in an integerPart.  When this would overflow do we do a single
3183        bignum multiplication, and then revert again to multiplication
3184        in an integerPart.  */
3185     do {
3186       integerPart decValue, val, multiplier;
3187 
3188       val = 0;
3189       multiplier = 1;
3190 
3191       do {
3192         if (*p == '.') {
3193           p++;
3194           if (p == str.end()) {
3195             break;
3196           }
3197         }
3198         decValue = decDigitValue(*p++);
3199         if (decValue >= 10U) {
3200           delete[] decSignificand;
3201           return createError("Invalid character in significand");
3202         }
3203         multiplier *= 10;
3204         val = val * 10 + decValue;
3205         /* The maximum number that can be multiplied by ten with any
3206            digit added without overflowing an integerPart.  */
3207       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3208 
3209       /* Multiply out the current part.  */
3210       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3211                             partCount, partCount + 1, false);
3212 
3213       /* If we used another part (likely but not guaranteed), increase
3214          the count.  */
3215       if (decSignificand[partCount])
3216         partCount++;
3217     } while (p <= D.lastSigDigit);
3218 
3219     category = fcNormal;
3220     fs = roundSignificandWithExponent(decSignificand, partCount,
3221                                       D.exponent, rounding_mode);
3222 
3223     delete [] decSignificand;
3224   }
3225 
3226   return fs;
3227 }
3228 
3229 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3230   const size_t MIN_NAME_SIZE = 3;
3231 
3232   if (str.size() < MIN_NAME_SIZE)
3233     return false;
3234 
3235   if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3236     makeInf(false);
3237     return true;
3238   }
3239 
3240   bool IsNegative = str.front() == '-';
3241   if (IsNegative) {
3242     str = str.drop_front();
3243     if (str.size() < MIN_NAME_SIZE)
3244       return false;
3245 
3246     if (str == "inf" || str == "INFINITY" || str == "Inf") {
3247       makeInf(true);
3248       return true;
3249     }
3250   }
3251 
3252   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3253   bool IsSignaling = str.front() == 's' || str.front() == 'S';
3254   if (IsSignaling) {
3255     str = str.drop_front();
3256     if (str.size() < MIN_NAME_SIZE)
3257       return false;
3258   }
3259 
3260   if (str.starts_with("nan") || str.starts_with("NaN")) {
3261     str = str.drop_front(3);
3262 
3263     // A NaN without payload.
3264     if (str.empty()) {
3265       makeNaN(IsSignaling, IsNegative);
3266       return true;
3267     }
3268 
3269     // Allow the payload to be inside parentheses.
3270     if (str.front() == '(') {
3271       // Parentheses should be balanced (and not empty).
3272       if (str.size() <= 2 || str.back() != ')')
3273         return false;
3274 
3275       str = str.slice(1, str.size() - 1);
3276     }
3277 
3278     // Determine the payload number's radix.
3279     unsigned Radix = 10;
3280     if (str[0] == '0') {
3281       if (str.size() > 1 && tolower(str[1]) == 'x') {
3282         str = str.drop_front(2);
3283         Radix = 16;
3284       } else
3285         Radix = 8;
3286     }
3287 
3288     // Parse the payload and make the NaN.
3289     APInt Payload;
3290     if (!str.getAsInteger(Radix, Payload)) {
3291       makeNaN(IsSignaling, IsNegative, &Payload);
3292       return true;
3293     }
3294   }
3295 
3296   return false;
3297 }
3298 
3299 Expected<IEEEFloat::opStatus>
3300 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3301   if (str.empty())
3302     return createError("Invalid string length");
3303 
3304   // Handle special cases.
3305   if (convertFromStringSpecials(str))
3306     return opOK;
3307 
3308   /* Handle a leading minus sign.  */
3309   StringRef::iterator p = str.begin();
3310   size_t slen = str.size();
3311   sign = *p == '-' ? 1 : 0;
3312   if (sign && !semantics->hasSignedRepr)
3313     llvm_unreachable(
3314         "This floating point format does not support signed values");
3315 
3316   if (*p == '-' || *p == '+') {
3317     p++;
3318     slen--;
3319     if (!slen)
3320       return createError("String has no digits");
3321   }
3322 
3323   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3324     if (slen == 2)
3325       return createError("Invalid string");
3326     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3327                                         rounding_mode);
3328   }
3329 
3330   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3331 }
3332 
3333 /* Write out a hexadecimal representation of the floating point value
3334    to DST, which must be of sufficient size, in the C99 form
3335    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
3336    excluding the terminating NUL.
3337 
3338    If UPPERCASE, the output is in upper case, otherwise in lower case.
3339 
3340    HEXDIGITS digits appear altogether, rounding the value if
3341    necessary.  If HEXDIGITS is 0, the minimal precision to display the
3342    number precisely is used instead.  If nothing would appear after
3343    the decimal point it is suppressed.
3344 
3345    The decimal exponent is always printed and has at least one digit.
3346    Zero values display an exponent of zero.  Infinities and NaNs
3347    appear as "infinity" or "nan" respectively.
3348 
3349    The above rules are as specified by C99.  There is ambiguity about
3350    what the leading hexadecimal digit should be.  This implementation
3351    uses whatever is necessary so that the exponent is displayed as
3352    stored.  This implies the exponent will fall within the IEEE format
3353    range, and the leading hexadecimal digit will be 0 (for denormals),
3354    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3355    any other digits zero).
3356 */
3357 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3358                                            bool upperCase,
3359                                            roundingMode rounding_mode) const {
3360   char *p;
3361 
3362   p = dst;
3363   if (sign)
3364     *dst++ = '-';
3365 
3366   switch (category) {
3367   case fcInfinity:
3368     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3369     dst += sizeof infinityL - 1;
3370     break;
3371 
3372   case fcNaN:
3373     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3374     dst += sizeof NaNU - 1;
3375     break;
3376 
3377   case fcZero:
3378     *dst++ = '0';
3379     *dst++ = upperCase ? 'X': 'x';
3380     *dst++ = '0';
3381     if (hexDigits > 1) {
3382       *dst++ = '.';
3383       memset (dst, '0', hexDigits - 1);
3384       dst += hexDigits - 1;
3385     }
3386     *dst++ = upperCase ? 'P': 'p';
3387     *dst++ = '0';
3388     break;
3389 
3390   case fcNormal:
3391     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3392     break;
3393   }
3394 
3395   *dst = 0;
3396 
3397   return static_cast<unsigned int>(dst - p);
3398 }
3399 
3400 /* Does the hard work of outputting the correctly rounded hexadecimal
3401    form of a normal floating point number with the specified number of
3402    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
3403    digits necessary to print the value precisely is output.  */
3404 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3405                                           bool upperCase,
3406                                           roundingMode rounding_mode) const {
3407   unsigned int count, valueBits, shift, partsCount, outputDigits;
3408   const char *hexDigitChars;
3409   const integerPart *significand;
3410   char *p;
3411   bool roundUp;
3412 
3413   *dst++ = '0';
3414   *dst++ = upperCase ? 'X': 'x';
3415 
3416   roundUp = false;
3417   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3418 
3419   significand = significandParts();
3420   partsCount = partCount();
3421 
3422   /* +3 because the first digit only uses the single integer bit, so
3423      we have 3 virtual zero most-significant-bits.  */
3424   valueBits = semantics->precision + 3;
3425   shift = integerPartWidth - valueBits % integerPartWidth;
3426 
3427   /* The natural number of digits required ignoring trailing
3428      insignificant zeroes.  */
3429   outputDigits = (valueBits - significandLSB () + 3) / 4;
3430 
3431   /* hexDigits of zero means use the required number for the
3432      precision.  Otherwise, see if we are truncating.  If we are,
3433      find out if we need to round away from zero.  */
3434   if (hexDigits) {
3435     if (hexDigits < outputDigits) {
3436       /* We are dropping non-zero bits, so need to check how to round.
3437          "bits" is the number of dropped bits.  */
3438       unsigned int bits;
3439       lostFraction fraction;
3440 
3441       bits = valueBits - hexDigits * 4;
3442       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3443       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3444     }
3445     outputDigits = hexDigits;
3446   }
3447 
3448   /* Write the digits consecutively, and start writing in the location
3449      of the hexadecimal point.  We move the most significant digit
3450      left and add the hexadecimal point later.  */
3451   p = ++dst;
3452 
3453   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3454 
3455   while (outputDigits && count) {
3456     integerPart part;
3457 
3458     /* Put the most significant integerPartWidth bits in "part".  */
3459     if (--count == partsCount)
3460       part = 0;  /* An imaginary higher zero part.  */
3461     else
3462       part = significand[count] << shift;
3463 
3464     if (count && shift)
3465       part |= significand[count - 1] >> (integerPartWidth - shift);
3466 
3467     /* Convert as much of "part" to hexdigits as we can.  */
3468     unsigned int curDigits = integerPartWidth / 4;
3469 
3470     if (curDigits > outputDigits)
3471       curDigits = outputDigits;
3472     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3473     outputDigits -= curDigits;
3474   }
3475 
3476   if (roundUp) {
3477     char *q = dst;
3478 
3479     /* Note that hexDigitChars has a trailing '0'.  */
3480     do {
3481       q--;
3482       *q = hexDigitChars[hexDigitValue (*q) + 1];
3483     } while (*q == '0');
3484     assert(q >= p);
3485   } else {
3486     /* Add trailing zeroes.  */
3487     memset (dst, '0', outputDigits);
3488     dst += outputDigits;
3489   }
3490 
3491   /* Move the most significant digit to before the point, and if there
3492      is something after the decimal point add it.  This must come
3493      after rounding above.  */
3494   p[-1] = p[0];
3495   if (dst -1 == p)
3496     dst--;
3497   else
3498     p[0] = '.';
3499 
3500   /* Finally output the exponent.  */
3501   *dst++ = upperCase ? 'P': 'p';
3502 
3503   return writeSignedDecimal (dst, exponent);
3504 }
3505 
3506 hash_code hash_value(const IEEEFloat &Arg) {
3507   if (!Arg.isFiniteNonZero())
3508     return hash_combine((uint8_t)Arg.category,
3509                         // NaN has no sign, fix it at zero.
3510                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3511                         Arg.semantics->precision);
3512 
3513   // Normal floats need their exponent and significand hashed.
3514   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3515                       Arg.semantics->precision, Arg.exponent,
3516                       hash_combine_range(
3517                         Arg.significandParts(),
3518                         Arg.significandParts() + Arg.partCount()));
3519 }
3520 
3521 // Conversion from APFloat to/from host float/double.  It may eventually be
3522 // possible to eliminate these and have everybody deal with APFloats, but that
3523 // will take a while.  This approach will not easily extend to long double.
3524 // Current implementation requires integerPartWidth==64, which is correct at
3525 // the moment but could be made more general.
3526 
3527 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3528 // the actual IEEE respresentations.  We compensate for that here.
3529 
3530 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3531   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3532   assert(partCount()==2);
3533 
3534   uint64_t myexponent, mysignificand;
3535 
3536   if (isFiniteNonZero()) {
3537     myexponent = exponent+16383; //bias
3538     mysignificand = significandParts()[0];
3539     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3540       myexponent = 0;   // denormal
3541   } else if (category==fcZero) {
3542     myexponent = 0;
3543     mysignificand = 0;
3544   } else if (category==fcInfinity) {
3545     myexponent = 0x7fff;
3546     mysignificand = 0x8000000000000000ULL;
3547   } else {
3548     assert(category == fcNaN && "Unknown category");
3549     myexponent = 0x7fff;
3550     mysignificand = significandParts()[0];
3551   }
3552 
3553   uint64_t words[2];
3554   words[0] = mysignificand;
3555   words[1] =  ((uint64_t)(sign & 1) << 15) |
3556               (myexponent & 0x7fffLL);
3557   return APInt(80, words);
3558 }
3559 
3560 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3561   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3562   assert(partCount()==2);
3563 
3564   uint64_t words[2];
3565   opStatus fs;
3566   bool losesInfo;
3567 
3568   // Convert number to double.  To avoid spurious underflows, we re-
3569   // normalize against the "double" minExponent first, and only *then*
3570   // truncate the mantissa.  The result of that second conversion
3571   // may be inexact, but should never underflow.
3572   // Declare fltSemantics before APFloat that uses it (and
3573   // saves pointer to it) to ensure correct destruction order.
3574   fltSemantics extendedSemantics = *semantics;
3575   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3576   IEEEFloat extended(*this);
3577   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3578   assert(fs == opOK && !losesInfo);
3579   (void)fs;
3580 
3581   IEEEFloat u(extended);
3582   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3583   assert(fs == opOK || fs == opInexact);
3584   (void)fs;
3585   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3586 
3587   // If conversion was exact or resulted in a special case, we're done;
3588   // just set the second double to zero.  Otherwise, re-convert back to
3589   // the extended format and compute the difference.  This now should
3590   // convert exactly to double.
3591   if (u.isFiniteNonZero() && losesInfo) {
3592     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3593     assert(fs == opOK && !losesInfo);
3594     (void)fs;
3595 
3596     IEEEFloat v(extended);
3597     v.subtract(u, rmNearestTiesToEven);
3598     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3599     assert(fs == opOK && !losesInfo);
3600     (void)fs;
3601     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3602   } else {
3603     words[1] = 0;
3604   }
3605 
3606   return APInt(128, words);
3607 }
3608 
3609 template <const fltSemantics &S>
3610 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3611   assert(semantics == &S);
3612   const int bias =
3613       (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1);
3614   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3615   constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3616   constexpr integerPart integer_bit =
3617       integerPart{1} << (trailing_significand_bits % integerPartWidth);
3618   constexpr uint64_t significand_mask = integer_bit - 1;
3619   constexpr unsigned int exponent_bits =
3620       trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3621                                 : S.sizeInBits;
3622   static_assert(exponent_bits < 64);
3623   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3624 
3625   uint64_t myexponent;
3626   std::array<integerPart, partCountForBits(trailing_significand_bits)>
3627       mysignificand;
3628 
3629   if (isFiniteNonZero()) {
3630     myexponent = exponent + bias;
3631     std::copy_n(significandParts(), mysignificand.size(),
3632                 mysignificand.begin());
3633     if (myexponent == 1 &&
3634         !(significandParts()[integer_bit_part] & integer_bit))
3635       myexponent = 0; // denormal
3636   } else if (category == fcZero) {
3637     if (!S.hasZero)
3638       llvm_unreachable("semantics does not support zero!");
3639     myexponent = ::exponentZero(S) + bias;
3640     mysignificand.fill(0);
3641   } else if (category == fcInfinity) {
3642     if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3643         S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3644       llvm_unreachable("semantics don't support inf!");
3645     myexponent = ::exponentInf(S) + bias;
3646     mysignificand.fill(0);
3647   } else {
3648     assert(category == fcNaN && "Unknown category!");
3649     if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3650       llvm_unreachable("semantics don't support NaN!");
3651     myexponent = ::exponentNaN(S) + bias;
3652     std::copy_n(significandParts(), mysignificand.size(),
3653                 mysignificand.begin());
3654   }
3655   std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3656   auto words_iter =
3657       std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3658   if constexpr (significand_mask != 0) {
3659     // Clear the integer bit.
3660     words[mysignificand.size() - 1] &= significand_mask;
3661   }
3662   std::fill(words_iter, words.end(), uint64_t{0});
3663   constexpr size_t last_word = words.size() - 1;
3664   uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3665                           << ((S.sizeInBits - 1) % 64);
3666   words[last_word] |= shifted_sign;
3667   uint64_t shifted_exponent = (myexponent & exponent_mask)
3668                               << (trailing_significand_bits % 64);
3669   words[last_word] |= shifted_exponent;
3670   if constexpr (last_word == 0) {
3671     return APInt(S.sizeInBits, words[0]);
3672   }
3673   return APInt(S.sizeInBits, words);
3674 }
3675 
3676 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3677   assert(partCount() == 2);
3678   return convertIEEEFloatToAPInt<semIEEEquad>();
3679 }
3680 
3681 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3682   assert(partCount()==1);
3683   return convertIEEEFloatToAPInt<semIEEEdouble>();
3684 }
3685 
3686 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3687   assert(partCount()==1);
3688   return convertIEEEFloatToAPInt<semIEEEsingle>();
3689 }
3690 
3691 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3692   assert(partCount() == 1);
3693   return convertIEEEFloatToAPInt<semBFloat>();
3694 }
3695 
3696 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3697   assert(partCount()==1);
3698   return convertIEEEFloatToAPInt<semIEEEhalf>();
3699 }
3700 
3701 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3702   assert(partCount() == 1);
3703   return convertIEEEFloatToAPInt<semFloat8E5M2>();
3704 }
3705 
3706 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3707   assert(partCount() == 1);
3708   return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3709 }
3710 
3711 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3712   assert(partCount() == 1);
3713   return convertIEEEFloatToAPInt<semFloat8E4M3>();
3714 }
3715 
3716 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3717   assert(partCount() == 1);
3718   return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3719 }
3720 
3721 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3722   assert(partCount() == 1);
3723   return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3724 }
3725 
3726 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3727   assert(partCount() == 1);
3728   return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3729 }
3730 
3731 APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3732   assert(partCount() == 1);
3733   return convertIEEEFloatToAPInt<semFloat8E3M4>();
3734 }
3735 
3736 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3737   assert(partCount() == 1);
3738   return convertIEEEFloatToAPInt<semFloatTF32>();
3739 }
3740 
3741 APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3742   assert(partCount() == 1);
3743   return convertIEEEFloatToAPInt<semFloat8E8M0FNU>();
3744 }
3745 
3746 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3747   assert(partCount() == 1);
3748   return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3749 }
3750 
3751 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3752   assert(partCount() == 1);
3753   return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3754 }
3755 
3756 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3757   assert(partCount() == 1);
3758   return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
3759 }
3760 
3761 // This function creates an APInt that is just a bit map of the floating
3762 // point constant as it would appear in memory.  It is not a conversion,
3763 // and treating the result as a normal integer is unlikely to be useful.
3764 
3765 APInt IEEEFloat::bitcastToAPInt() const {
3766   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3767     return convertHalfAPFloatToAPInt();
3768 
3769   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3770     return convertBFloatAPFloatToAPInt();
3771 
3772   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3773     return convertFloatAPFloatToAPInt();
3774 
3775   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3776     return convertDoubleAPFloatToAPInt();
3777 
3778   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3779     return convertQuadrupleAPFloatToAPInt();
3780 
3781   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3782     return convertPPCDoubleDoubleAPFloatToAPInt();
3783 
3784   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3785     return convertFloat8E5M2APFloatToAPInt();
3786 
3787   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3788     return convertFloat8E5M2FNUZAPFloatToAPInt();
3789 
3790   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
3791     return convertFloat8E4M3APFloatToAPInt();
3792 
3793   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3794     return convertFloat8E4M3FNAPFloatToAPInt();
3795 
3796   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3797     return convertFloat8E4M3FNUZAPFloatToAPInt();
3798 
3799   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3800     return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3801 
3802   if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4)
3803     return convertFloat8E3M4APFloatToAPInt();
3804 
3805   if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3806     return convertFloatTF32APFloatToAPInt();
3807 
3808   if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU)
3809     return convertFloat8E8M0FNUAPFloatToAPInt();
3810 
3811   if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3812     return convertFloat6E3M2FNAPFloatToAPInt();
3813 
3814   if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3815     return convertFloat6E2M3FNAPFloatToAPInt();
3816 
3817   if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
3818     return convertFloat4E2M1FNAPFloatToAPInt();
3819 
3820   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3821          "unknown format!");
3822   return convertF80LongDoubleAPFloatToAPInt();
3823 }
3824 
3825 float IEEEFloat::convertToFloat() const {
3826   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3827          "Float semantics are not IEEEsingle");
3828   APInt api = bitcastToAPInt();
3829   return api.bitsToFloat();
3830 }
3831 
3832 double IEEEFloat::convertToDouble() const {
3833   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3834          "Float semantics are not IEEEdouble");
3835   APInt api = bitcastToAPInt();
3836   return api.bitsToDouble();
3837 }
3838 
3839 #ifdef HAS_IEE754_FLOAT128
3840 float128 IEEEFloat::convertToQuad() const {
3841   assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3842          "Float semantics are not IEEEquads");
3843   APInt api = bitcastToAPInt();
3844   return api.bitsToQuad();
3845 }
3846 #endif
3847 
3848 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3849 /// does not support these bit patterns:
3850 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3851 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3852 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3853 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3854 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3855 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3856   uint64_t i1 = api.getRawData()[0];
3857   uint64_t i2 = api.getRawData()[1];
3858   uint64_t myexponent = (i2 & 0x7fff);
3859   uint64_t mysignificand = i1;
3860   uint8_t myintegerbit = mysignificand >> 63;
3861 
3862   initialize(&semX87DoubleExtended);
3863   assert(partCount()==2);
3864 
3865   sign = static_cast<unsigned int>(i2>>15);
3866   if (myexponent == 0 && mysignificand == 0) {
3867     makeZero(sign);
3868   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3869     makeInf(sign);
3870   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3871              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3872     category = fcNaN;
3873     exponent = exponentNaN();
3874     significandParts()[0] = mysignificand;
3875     significandParts()[1] = 0;
3876   } else {
3877     category = fcNormal;
3878     exponent = myexponent - 16383;
3879     significandParts()[0] = mysignificand;
3880     significandParts()[1] = 0;
3881     if (myexponent==0)          // denormal
3882       exponent = -16382;
3883   }
3884 }
3885 
3886 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3887   uint64_t i1 = api.getRawData()[0];
3888   uint64_t i2 = api.getRawData()[1];
3889   opStatus fs;
3890   bool losesInfo;
3891 
3892   // Get the first double and convert to our format.
3893   initFromDoubleAPInt(APInt(64, i1));
3894   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3895   assert(fs == opOK && !losesInfo);
3896   (void)fs;
3897 
3898   // Unless we have a special case, add in second double.
3899   if (isFiniteNonZero()) {
3900     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3901     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3902     assert(fs == opOK && !losesInfo);
3903     (void)fs;
3904 
3905     add(v, rmNearestTiesToEven);
3906   }
3907 }
3908 
3909 // The E8M0 format has the following characteristics:
3910 // It is an 8-bit unsigned format with only exponents (no actual significand).
3911 // No encodings for {zero, infinities or denorms}.
3912 // NaN is represented by all 1's.
3913 // Bias is 127.
3914 void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3915   const uint64_t exponent_mask = 0xff;
3916   uint64_t val = api.getRawData()[0];
3917   uint64_t myexponent = (val & exponent_mask);
3918 
3919   initialize(&semFloat8E8M0FNU);
3920   assert(partCount() == 1);
3921 
3922   // This format has unsigned representation only
3923   sign = 0;
3924 
3925   // Set the significand
3926   // This format does not have any significand but the 'Pth' precision bit is
3927   // always set to 1 for consistency in APFloat's internal representation.
3928   uint64_t mysignificand = 1;
3929   significandParts()[0] = mysignificand;
3930 
3931   // This format can either have a NaN or fcNormal
3932   // All 1's i.e. 255 is a NaN
3933   if (val == exponent_mask) {
3934     category = fcNaN;
3935     exponent = exponentNaN();
3936     return;
3937   }
3938   // Handle fcNormal...
3939   category = fcNormal;
3940   exponent = myexponent - 127; // 127 is bias
3941   return;
3942 }
3943 template <const fltSemantics &S>
3944 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3945   assert(api.getBitWidth() == S.sizeInBits);
3946   constexpr integerPart integer_bit = integerPart{1}
3947                                       << ((S.precision - 1) % integerPartWidth);
3948   constexpr uint64_t significand_mask = integer_bit - 1;
3949   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3950   constexpr unsigned int stored_significand_parts =
3951       partCountForBits(trailing_significand_bits);
3952   constexpr unsigned int exponent_bits =
3953       S.sizeInBits - 1 - trailing_significand_bits;
3954   static_assert(exponent_bits < 64);
3955   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3956   constexpr int bias = -(S.minExponent - 1);
3957 
3958   // Copy the bits of the significand. We need to clear out the exponent and
3959   // sign bit in the last word.
3960   std::array<integerPart, stored_significand_parts> mysignificand;
3961   std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3962   if constexpr (significand_mask != 0) {
3963     mysignificand[mysignificand.size() - 1] &= significand_mask;
3964   }
3965 
3966   // We assume the last word holds the sign bit, the exponent, and potentially
3967   // some of the trailing significand field.
3968   uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3969   uint64_t myexponent =
3970       (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3971 
3972   initialize(&S);
3973   assert(partCount() == mysignificand.size());
3974 
3975   sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3976 
3977   bool all_zero_significand =
3978       llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3979 
3980   bool is_zero = myexponent == 0 && all_zero_significand;
3981 
3982   if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3983     if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3984       makeInf(sign);
3985       return;
3986     }
3987   }
3988 
3989   bool is_nan = false;
3990 
3991   if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3992     is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3993   } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3994     bool all_ones_significand =
3995         std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3996                     [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3997         (!significand_mask ||
3998          mysignificand[mysignificand.size() - 1] == significand_mask);
3999     is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
4000   } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
4001     is_nan = is_zero && sign;
4002   }
4003 
4004   if (is_nan) {
4005     category = fcNaN;
4006     exponent = ::exponentNaN(S);
4007     std::copy_n(mysignificand.begin(), mysignificand.size(),
4008                 significandParts());
4009     return;
4010   }
4011 
4012   if (is_zero) {
4013     makeZero(sign);
4014     return;
4015   }
4016 
4017   category = fcNormal;
4018   exponent = myexponent - bias;
4019   std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
4020   if (myexponent == 0) // denormal
4021     exponent = S.minExponent;
4022   else
4023     significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
4024 }
4025 
4026 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
4027   initFromIEEEAPInt<semIEEEquad>(api);
4028 }
4029 
4030 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
4031   initFromIEEEAPInt<semIEEEdouble>(api);
4032 }
4033 
4034 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
4035   initFromIEEEAPInt<semIEEEsingle>(api);
4036 }
4037 
4038 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
4039   initFromIEEEAPInt<semBFloat>(api);
4040 }
4041 
4042 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
4043   initFromIEEEAPInt<semIEEEhalf>(api);
4044 }
4045 
4046 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
4047   initFromIEEEAPInt<semFloat8E5M2>(api);
4048 }
4049 
4050 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
4051   initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
4052 }
4053 
4054 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
4055   initFromIEEEAPInt<semFloat8E4M3>(api);
4056 }
4057 
4058 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
4059   initFromIEEEAPInt<semFloat8E4M3FN>(api);
4060 }
4061 
4062 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
4063   initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
4064 }
4065 
4066 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
4067   initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
4068 }
4069 
4070 void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
4071   initFromIEEEAPInt<semFloat8E3M4>(api);
4072 }
4073 
4074 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
4075   initFromIEEEAPInt<semFloatTF32>(api);
4076 }
4077 
4078 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
4079   initFromIEEEAPInt<semFloat6E3M2FN>(api);
4080 }
4081 
4082 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
4083   initFromIEEEAPInt<semFloat6E2M3FN>(api);
4084 }
4085 
4086 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
4087   initFromIEEEAPInt<semFloat4E2M1FN>(api);
4088 }
4089 
4090 /// Treat api as containing the bits of a floating point number.
4091 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
4092   assert(api.getBitWidth() == Sem->sizeInBits);
4093   if (Sem == &semIEEEhalf)
4094     return initFromHalfAPInt(api);
4095   if (Sem == &semBFloat)
4096     return initFromBFloatAPInt(api);
4097   if (Sem == &semIEEEsingle)
4098     return initFromFloatAPInt(api);
4099   if (Sem == &semIEEEdouble)
4100     return initFromDoubleAPInt(api);
4101   if (Sem == &semX87DoubleExtended)
4102     return initFromF80LongDoubleAPInt(api);
4103   if (Sem == &semIEEEquad)
4104     return initFromQuadrupleAPInt(api);
4105   if (Sem == &semPPCDoubleDoubleLegacy)
4106     return initFromPPCDoubleDoubleAPInt(api);
4107   if (Sem == &semFloat8E5M2)
4108     return initFromFloat8E5M2APInt(api);
4109   if (Sem == &semFloat8E5M2FNUZ)
4110     return initFromFloat8E5M2FNUZAPInt(api);
4111   if (Sem == &semFloat8E4M3)
4112     return initFromFloat8E4M3APInt(api);
4113   if (Sem == &semFloat8E4M3FN)
4114     return initFromFloat8E4M3FNAPInt(api);
4115   if (Sem == &semFloat8E4M3FNUZ)
4116     return initFromFloat8E4M3FNUZAPInt(api);
4117   if (Sem == &semFloat8E4M3B11FNUZ)
4118     return initFromFloat8E4M3B11FNUZAPInt(api);
4119   if (Sem == &semFloat8E3M4)
4120     return initFromFloat8E3M4APInt(api);
4121   if (Sem == &semFloatTF32)
4122     return initFromFloatTF32APInt(api);
4123   if (Sem == &semFloat8E8M0FNU)
4124     return initFromFloat8E8M0FNUAPInt(api);
4125   if (Sem == &semFloat6E3M2FN)
4126     return initFromFloat6E3M2FNAPInt(api);
4127   if (Sem == &semFloat6E2M3FN)
4128     return initFromFloat6E2M3FNAPInt(api);
4129   if (Sem == &semFloat4E2M1FN)
4130     return initFromFloat4E2M1FNAPInt(api);
4131 
4132   llvm_unreachable(nullptr);
4133 }
4134 
4135 /// Make this number the largest magnitude normal number in the given
4136 /// semantics.
4137 void IEEEFloat::makeLargest(bool Negative) {
4138   if (Negative && !semantics->hasSignedRepr)
4139     llvm_unreachable(
4140         "This floating point format does not support signed values");
4141   // We want (in interchange format):
4142   //   sign = {Negative}
4143   //   exponent = 1..10
4144   //   significand = 1..1
4145   category = fcNormal;
4146   sign = Negative;
4147   exponent = semantics->maxExponent;
4148 
4149   // Use memset to set all but the highest integerPart to all ones.
4150   integerPart *significand = significandParts();
4151   unsigned PartCount = partCount();
4152   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4153 
4154   // Set the high integerPart especially setting all unused top bits for
4155   // internal consistency.
4156   const unsigned NumUnusedHighBits =
4157     PartCount*integerPartWidth - semantics->precision;
4158   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4159                                    ? (~integerPart(0) >> NumUnusedHighBits)
4160                                    : 0;
4161   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4162       semantics->nanEncoding == fltNanEncoding::AllOnes &&
4163       (semantics->precision > 1))
4164     significand[0] &= ~integerPart(1);
4165 }
4166 
4167 /// Make this number the smallest magnitude denormal number in the given
4168 /// semantics.
4169 void IEEEFloat::makeSmallest(bool Negative) {
4170   if (Negative && !semantics->hasSignedRepr)
4171     llvm_unreachable(
4172         "This floating point format does not support signed values");
4173   // We want (in interchange format):
4174   //   sign = {Negative}
4175   //   exponent = 0..0
4176   //   significand = 0..01
4177   category = fcNormal;
4178   sign = Negative;
4179   exponent = semantics->minExponent;
4180   APInt::tcSet(significandParts(), 1, partCount());
4181 }
4182 
4183 void IEEEFloat::makeSmallestNormalized(bool Negative) {
4184   if (Negative && !semantics->hasSignedRepr)
4185     llvm_unreachable(
4186         "This floating point format does not support signed values");
4187   // We want (in interchange format):
4188   //   sign = {Negative}
4189   //   exponent = 0..0
4190   //   significand = 10..0
4191 
4192   category = fcNormal;
4193   zeroSignificand();
4194   sign = Negative;
4195   exponent = semantics->minExponent;
4196   APInt::tcSetBit(significandParts(), semantics->precision - 1);
4197 }
4198 
4199 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4200   initFromAPInt(&Sem, API);
4201 }
4202 
4203 IEEEFloat::IEEEFloat(float f) {
4204   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
4205 }
4206 
4207 IEEEFloat::IEEEFloat(double d) {
4208   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
4209 }
4210 
4211 namespace {
4212   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4213     Buffer.append(Str.begin(), Str.end());
4214   }
4215 
4216   /// Removes data from the given significand until it is no more
4217   /// precise than is required for the desired precision.
4218   void AdjustToPrecision(APInt &significand,
4219                          int &exp, unsigned FormatPrecision) {
4220     unsigned bits = significand.getActiveBits();
4221 
4222     // 196/59 is a very slight overestimate of lg_2(10).
4223     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4224 
4225     if (bits <= bitsRequired) return;
4226 
4227     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4228     if (!tensRemovable) return;
4229 
4230     exp += tensRemovable;
4231 
4232     APInt divisor(significand.getBitWidth(), 1);
4233     APInt powten(significand.getBitWidth(), 10);
4234     while (true) {
4235       if (tensRemovable & 1)
4236         divisor *= powten;
4237       tensRemovable >>= 1;
4238       if (!tensRemovable) break;
4239       powten *= powten;
4240     }
4241 
4242     significand = significand.udiv(divisor);
4243 
4244     // Truncate the significand down to its active bit count.
4245     significand = significand.trunc(significand.getActiveBits());
4246   }
4247 
4248 
4249   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4250                          int &exp, unsigned FormatPrecision) {
4251     unsigned N = buffer.size();
4252     if (N <= FormatPrecision) return;
4253 
4254     // The most significant figures are the last ones in the buffer.
4255     unsigned FirstSignificant = N - FormatPrecision;
4256 
4257     // Round.
4258     // FIXME: this probably shouldn't use 'round half up'.
4259 
4260     // Rounding down is just a truncation, except we also want to drop
4261     // trailing zeros from the new result.
4262     if (buffer[FirstSignificant - 1] < '5') {
4263       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4264         FirstSignificant++;
4265 
4266       exp += FirstSignificant;
4267       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4268       return;
4269     }
4270 
4271     // Rounding up requires a decimal add-with-carry.  If we continue
4272     // the carry, the newly-introduced zeros will just be truncated.
4273     for (unsigned I = FirstSignificant; I != N; ++I) {
4274       if (buffer[I] == '9') {
4275         FirstSignificant++;
4276       } else {
4277         buffer[I]++;
4278         break;
4279       }
4280     }
4281 
4282     // If we carried through, we have exactly one digit of precision.
4283     if (FirstSignificant == N) {
4284       exp += FirstSignificant;
4285       buffer.clear();
4286       buffer.push_back('1');
4287       return;
4288     }
4289 
4290     exp += FirstSignificant;
4291     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4292   }
4293 
4294   void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4295                     APInt significand, unsigned FormatPrecision,
4296                     unsigned FormatMaxPadding, bool TruncateZero) {
4297     const int semanticsPrecision = significand.getBitWidth();
4298 
4299     if (isNeg)
4300       Str.push_back('-');
4301 
4302     // Set FormatPrecision if zero.  We want to do this before we
4303     // truncate trailing zeros, as those are part of the precision.
4304     if (!FormatPrecision) {
4305       // We use enough digits so the number can be round-tripped back to an
4306       // APFloat. The formula comes from "How to Print Floating-Point Numbers
4307       // Accurately" by Steele and White.
4308       // FIXME: Using a formula based purely on the precision is conservative;
4309       // we can print fewer digits depending on the actual value being printed.
4310 
4311       // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4312       FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4313     }
4314 
4315     // Ignore trailing binary zeros.
4316     int trailingZeros = significand.countr_zero();
4317     exp += trailingZeros;
4318     significand.lshrInPlace(trailingZeros);
4319 
4320     // Change the exponent from 2^e to 10^e.
4321     if (exp == 0) {
4322       // Nothing to do.
4323     } else if (exp > 0) {
4324       // Just shift left.
4325       significand = significand.zext(semanticsPrecision + exp);
4326       significand <<= exp;
4327       exp = 0;
4328     } else { /* exp < 0 */
4329       int texp = -exp;
4330 
4331       // We transform this using the identity:
4332       //   (N)(2^-e) == (N)(5^e)(10^-e)
4333       // This means we have to multiply N (the significand) by 5^e.
4334       // To avoid overflow, we have to operate on numbers large
4335       // enough to store N * 5^e:
4336       //   log2(N * 5^e) == log2(N) + e * log2(5)
4337       //                 <= semantics->precision + e * 137 / 59
4338       //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4339 
4340       unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4341 
4342       // Multiply significand by 5^e.
4343       //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4344       significand = significand.zext(precision);
4345       APInt five_to_the_i(precision, 5);
4346       while (true) {
4347         if (texp & 1)
4348           significand *= five_to_the_i;
4349 
4350         texp >>= 1;
4351         if (!texp)
4352           break;
4353         five_to_the_i *= five_to_the_i;
4354       }
4355     }
4356 
4357     AdjustToPrecision(significand, exp, FormatPrecision);
4358 
4359     SmallVector<char, 256> buffer;
4360 
4361     // Fill the buffer.
4362     unsigned precision = significand.getBitWidth();
4363     if (precision < 4) {
4364       // We need enough precision to store the value 10.
4365       precision = 4;
4366       significand = significand.zext(precision);
4367     }
4368     APInt ten(precision, 10);
4369     APInt digit(precision, 0);
4370 
4371     bool inTrail = true;
4372     while (significand != 0) {
4373       // digit <- significand % 10
4374       // significand <- significand / 10
4375       APInt::udivrem(significand, ten, significand, digit);
4376 
4377       unsigned d = digit.getZExtValue();
4378 
4379       // Drop trailing zeros.
4380       if (inTrail && !d)
4381         exp++;
4382       else {
4383         buffer.push_back((char) ('0' + d));
4384         inTrail = false;
4385       }
4386     }
4387 
4388     assert(!buffer.empty() && "no characters in buffer!");
4389 
4390     // Drop down to FormatPrecision.
4391     // TODO: don't do more precise calculations above than are required.
4392     AdjustToPrecision(buffer, exp, FormatPrecision);
4393 
4394     unsigned NDigits = buffer.size();
4395 
4396     // Check whether we should use scientific notation.
4397     bool FormatScientific;
4398     if (!FormatMaxPadding)
4399       FormatScientific = true;
4400     else {
4401       if (exp >= 0) {
4402         // 765e3 --> 765000
4403         //              ^^^
4404         // But we shouldn't make the number look more precise than it is.
4405         FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4406                             NDigits + (unsigned) exp > FormatPrecision);
4407       } else {
4408         // Power of the most significant digit.
4409         int MSD = exp + (int) (NDigits - 1);
4410         if (MSD >= 0) {
4411           // 765e-2 == 7.65
4412           FormatScientific = false;
4413         } else {
4414           // 765e-5 == 0.00765
4415           //           ^ ^^
4416           FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4417         }
4418       }
4419     }
4420 
4421     // Scientific formatting is pretty straightforward.
4422     if (FormatScientific) {
4423       exp += (NDigits - 1);
4424 
4425       Str.push_back(buffer[NDigits-1]);
4426       Str.push_back('.');
4427       if (NDigits == 1 && TruncateZero)
4428         Str.push_back('0');
4429       else
4430         for (unsigned I = 1; I != NDigits; ++I)
4431           Str.push_back(buffer[NDigits-1-I]);
4432       // Fill with zeros up to FormatPrecision.
4433       if (!TruncateZero && FormatPrecision > NDigits - 1)
4434         Str.append(FormatPrecision - NDigits + 1, '0');
4435       // For !TruncateZero we use lower 'e'.
4436       Str.push_back(TruncateZero ? 'E' : 'e');
4437 
4438       Str.push_back(exp >= 0 ? '+' : '-');
4439       if (exp < 0)
4440         exp = -exp;
4441       SmallVector<char, 6> expbuf;
4442       do {
4443         expbuf.push_back((char) ('0' + (exp % 10)));
4444         exp /= 10;
4445       } while (exp);
4446       // Exponent always at least two digits if we do not truncate zeros.
4447       if (!TruncateZero && expbuf.size() < 2)
4448         expbuf.push_back('0');
4449       for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4450         Str.push_back(expbuf[E-1-I]);
4451       return;
4452     }
4453 
4454     // Non-scientific, positive exponents.
4455     if (exp >= 0) {
4456       for (unsigned I = 0; I != NDigits; ++I)
4457         Str.push_back(buffer[NDigits-1-I]);
4458       for (unsigned I = 0; I != (unsigned) exp; ++I)
4459         Str.push_back('0');
4460       return;
4461     }
4462 
4463     // Non-scientific, negative exponents.
4464 
4465     // The number of digits to the left of the decimal point.
4466     int NWholeDigits = exp + (int) NDigits;
4467 
4468     unsigned I = 0;
4469     if (NWholeDigits > 0) {
4470       for (; I != (unsigned) NWholeDigits; ++I)
4471         Str.push_back(buffer[NDigits-I-1]);
4472       Str.push_back('.');
4473     } else {
4474       unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4475 
4476       Str.push_back('0');
4477       Str.push_back('.');
4478       for (unsigned Z = 1; Z != NZeros; ++Z)
4479         Str.push_back('0');
4480     }
4481 
4482     for (; I != NDigits; ++I)
4483       Str.push_back(buffer[NDigits-I-1]);
4484 
4485   }
4486 } // namespace
4487 
4488 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4489                          unsigned FormatMaxPadding, bool TruncateZero) const {
4490   switch (category) {
4491   case fcInfinity:
4492     if (isNegative())
4493       return append(Str, "-Inf");
4494     else
4495       return append(Str, "+Inf");
4496 
4497   case fcNaN: return append(Str, "NaN");
4498 
4499   case fcZero:
4500     if (isNegative())
4501       Str.push_back('-');
4502 
4503     if (!FormatMaxPadding) {
4504       if (TruncateZero)
4505         append(Str, "0.0E+0");
4506       else {
4507         append(Str, "0.0");
4508         if (FormatPrecision > 1)
4509           Str.append(FormatPrecision - 1, '0');
4510         append(Str, "e+00");
4511       }
4512     } else
4513       Str.push_back('0');
4514     return;
4515 
4516   case fcNormal:
4517     break;
4518   }
4519 
4520   // Decompose the number into an APInt and an exponent.
4521   int exp = exponent - ((int) semantics->precision - 1);
4522   APInt significand(
4523       semantics->precision,
4524       ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4525 
4526   toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4527                FormatMaxPadding, TruncateZero);
4528 
4529 }
4530 
4531 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4532   // Special floats and denormals have no exact inverse.
4533   if (!isFiniteNonZero())
4534     return false;
4535 
4536   // Check that the number is a power of two by making sure that only the
4537   // integer bit is set in the significand.
4538   if (significandLSB() != semantics->precision - 1)
4539     return false;
4540 
4541   // Get the inverse.
4542   IEEEFloat reciprocal(*semantics, 1ULL);
4543   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4544     return false;
4545 
4546   // Avoid multiplication with a denormal, it is not safe on all platforms and
4547   // may be slower than a normal division.
4548   if (reciprocal.isDenormal())
4549     return false;
4550 
4551   assert(reciprocal.isFiniteNonZero() &&
4552          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4553 
4554   if (inv)
4555     *inv = APFloat(reciprocal, *semantics);
4556 
4557   return true;
4558 }
4559 
4560 int IEEEFloat::getExactLog2Abs() const {
4561   if (!isFinite() || isZero())
4562     return INT_MIN;
4563 
4564   const integerPart *Parts = significandParts();
4565   const int PartCount = partCountForBits(semantics->precision);
4566 
4567   int PopCount = 0;
4568   for (int i = 0; i < PartCount; ++i) {
4569     PopCount += llvm::popcount(Parts[i]);
4570     if (PopCount > 1)
4571       return INT_MIN;
4572   }
4573 
4574   if (exponent != semantics->minExponent)
4575     return exponent;
4576 
4577   int CountrParts = 0;
4578   for (int i = 0; i < PartCount;
4579        ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4580     if (Parts[i] != 0) {
4581       return exponent - semantics->precision + CountrParts +
4582              llvm::countr_zero(Parts[i]) + 1;
4583     }
4584   }
4585 
4586   llvm_unreachable("didn't find the set bit");
4587 }
4588 
4589 bool IEEEFloat::isSignaling() const {
4590   if (!isNaN())
4591     return false;
4592   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4593       semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4594     return false;
4595 
4596   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4597   // first bit of the trailing significand being 0.
4598   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4599 }
4600 
4601 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4602 ///
4603 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4604 /// appropriate sign switching before/after the computation.
4605 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4606   // If we are performing nextDown, swap sign so we have -x.
4607   if (nextDown)
4608     changeSign();
4609 
4610   // Compute nextUp(x)
4611   opStatus result = opOK;
4612 
4613   // Handle each float category separately.
4614   switch (category) {
4615   case fcInfinity:
4616     // nextUp(+inf) = +inf
4617     if (!isNegative())
4618       break;
4619     // nextUp(-inf) = -getLargest()
4620     makeLargest(true);
4621     break;
4622   case fcNaN:
4623     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4624     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4625     //                     change the payload.
4626     if (isSignaling()) {
4627       result = opInvalidOp;
4628       // For consistency, propagate the sign of the sNaN to the qNaN.
4629       makeNaN(false, isNegative(), nullptr);
4630     }
4631     break;
4632   case fcZero:
4633     // nextUp(pm 0) = +getSmallest()
4634     makeSmallest(false);
4635     break;
4636   case fcNormal:
4637     // nextUp(-getSmallest()) = -0
4638     if (isSmallest() && isNegative()) {
4639       APInt::tcSet(significandParts(), 0, partCount());
4640       category = fcZero;
4641       exponent = 0;
4642       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4643         sign = false;
4644       if (!semantics->hasZero)
4645         makeSmallestNormalized(false);
4646       break;
4647     }
4648 
4649     if (isLargest() && !isNegative()) {
4650       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4651         // nextUp(getLargest()) == NAN
4652         makeNaN();
4653         break;
4654       } else if (semantics->nonFiniteBehavior ==
4655                  fltNonfiniteBehavior::FiniteOnly) {
4656         // nextUp(getLargest()) == getLargest()
4657         break;
4658       } else {
4659         // nextUp(getLargest()) == INFINITY
4660         APInt::tcSet(significandParts(), 0, partCount());
4661         category = fcInfinity;
4662         exponent = semantics->maxExponent + 1;
4663         break;
4664       }
4665     }
4666 
4667     // nextUp(normal) == normal + inc.
4668     if (isNegative()) {
4669       // If we are negative, we need to decrement the significand.
4670 
4671       // We only cross a binade boundary that requires adjusting the exponent
4672       // if:
4673       //   1. exponent != semantics->minExponent. This implies we are not in the
4674       //   smallest binade or are dealing with denormals.
4675       //   2. Our significand excluding the integral bit is all zeros.
4676       bool WillCrossBinadeBoundary =
4677         exponent != semantics->minExponent && isSignificandAllZeros();
4678 
4679       // Decrement the significand.
4680       //
4681       // We always do this since:
4682       //   1. If we are dealing with a non-binade decrement, by definition we
4683       //   just decrement the significand.
4684       //   2. If we are dealing with a normal -> normal binade decrement, since
4685       //   we have an explicit integral bit the fact that all bits but the
4686       //   integral bit are zero implies that subtracting one will yield a
4687       //   significand with 0 integral bit and 1 in all other spots. Thus we
4688       //   must just adjust the exponent and set the integral bit to 1.
4689       //   3. If we are dealing with a normal -> denormal binade decrement,
4690       //   since we set the integral bit to 0 when we represent denormals, we
4691       //   just decrement the significand.
4692       integerPart *Parts = significandParts();
4693       APInt::tcDecrement(Parts, partCount());
4694 
4695       if (WillCrossBinadeBoundary) {
4696         // Our result is a normal number. Do the following:
4697         // 1. Set the integral bit to 1.
4698         // 2. Decrement the exponent.
4699         APInt::tcSetBit(Parts, semantics->precision - 1);
4700         exponent--;
4701       }
4702     } else {
4703       // If we are positive, we need to increment the significand.
4704 
4705       // We only cross a binade boundary that requires adjusting the exponent if
4706       // the input is not a denormal and all of said input's significand bits
4707       // are set. If all of said conditions are true: clear the significand, set
4708       // the integral bit to 1, and increment the exponent. If we have a
4709       // denormal always increment since moving denormals and the numbers in the
4710       // smallest normal binade have the same exponent in our representation.
4711       // If there are only exponents, any increment always crosses the
4712       // BinadeBoundary.
4713       bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4714                                      (!isDenormal() && isSignificandAllOnes());
4715 
4716       if (WillCrossBinadeBoundary) {
4717         integerPart *Parts = significandParts();
4718         APInt::tcSet(Parts, 0, partCount());
4719         APInt::tcSetBit(Parts, semantics->precision - 1);
4720         assert(exponent != semantics->maxExponent &&
4721                "We can not increment an exponent beyond the maxExponent allowed"
4722                " by the given floating point semantics.");
4723         exponent++;
4724       } else {
4725         incrementSignificand();
4726       }
4727     }
4728     break;
4729   }
4730 
4731   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4732   if (nextDown)
4733     changeSign();
4734 
4735   return result;
4736 }
4737 
4738 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4739   return ::exponentNaN(*semantics);
4740 }
4741 
4742 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4743   return ::exponentInf(*semantics);
4744 }
4745 
4746 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4747   return ::exponentZero(*semantics);
4748 }
4749 
4750 void IEEEFloat::makeInf(bool Negative) {
4751   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4752     llvm_unreachable("This floating point format does not support Inf");
4753 
4754   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4755     // There is no Inf, so make NaN instead.
4756     makeNaN(false, Negative);
4757     return;
4758   }
4759   category = fcInfinity;
4760   sign = Negative;
4761   exponent = exponentInf();
4762   APInt::tcSet(significandParts(), 0, partCount());
4763 }
4764 
4765 void IEEEFloat::makeZero(bool Negative) {
4766   if (!semantics->hasZero)
4767     llvm_unreachable("This floating point format does not support Zero");
4768 
4769   category = fcZero;
4770   sign = Negative;
4771   if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4772     // Merge negative zero to positive because 0b10000...000 is used for NaN
4773     sign = false;
4774   }
4775   exponent = exponentZero();
4776   APInt::tcSet(significandParts(), 0, partCount());
4777 }
4778 
4779 void IEEEFloat::makeQuiet() {
4780   assert(isNaN());
4781   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4782     APInt::tcSetBit(significandParts(), semantics->precision - 2);
4783 }
4784 
4785 int ilogb(const IEEEFloat &Arg) {
4786   if (Arg.isNaN())
4787     return IEEEFloat::IEK_NaN;
4788   if (Arg.isZero())
4789     return IEEEFloat::IEK_Zero;
4790   if (Arg.isInfinity())
4791     return IEEEFloat::IEK_Inf;
4792   if (!Arg.isDenormal())
4793     return Arg.exponent;
4794 
4795   IEEEFloat Normalized(Arg);
4796   int SignificandBits = Arg.getSemantics().precision - 1;
4797 
4798   Normalized.exponent += SignificandBits;
4799   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4800   return Normalized.exponent - SignificandBits;
4801 }
4802 
4803 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4804   auto MaxExp = X.getSemantics().maxExponent;
4805   auto MinExp = X.getSemantics().minExponent;
4806 
4807   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4808   // overflow; clamp it to a safe range before adding, but ensure that the range
4809   // is large enough that the clamp does not change the result. The range we
4810   // need to support is the difference between the largest possible exponent and
4811   // the normalized exponent of half the smallest denormal.
4812 
4813   int SignificandBits = X.getSemantics().precision - 1;
4814   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4815 
4816   // Clamp to one past the range ends to let normalize handle overlflow.
4817   X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4818   X.normalize(RoundingMode, lfExactlyZero);
4819   if (X.isNaN())
4820     X.makeQuiet();
4821   return X;
4822 }
4823 
4824 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4825   Exp = ilogb(Val);
4826 
4827   // Quiet signalling nans.
4828   if (Exp == IEEEFloat::IEK_NaN) {
4829     IEEEFloat Quiet(Val);
4830     Quiet.makeQuiet();
4831     return Quiet;
4832   }
4833 
4834   if (Exp == IEEEFloat::IEK_Inf)
4835     return Val;
4836 
4837   // 1 is added because frexp is defined to return a normalized fraction in
4838   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4839   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4840   return scalbn(Val, -Exp, RM);
4841 }
4842 
4843 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4844     : Semantics(&S),
4845       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4846   assert(Semantics == &semPPCDoubleDouble);
4847 }
4848 
4849 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4850     : Semantics(&S),
4851       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4852                             APFloat(semIEEEdouble, uninitialized)}) {
4853   assert(Semantics == &semPPCDoubleDouble);
4854 }
4855 
4856 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4857     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4858                                            APFloat(semIEEEdouble)}) {
4859   assert(Semantics == &semPPCDoubleDouble);
4860 }
4861 
4862 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4863     : Semantics(&S),
4864       Floats(new APFloat[2]{
4865           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4866           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4867   assert(Semantics == &semPPCDoubleDouble);
4868 }
4869 
4870 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4871                              APFloat &&Second)
4872     : Semantics(&S),
4873       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4874   assert(Semantics == &semPPCDoubleDouble);
4875   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4876   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4877 }
4878 
4879 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4880     : Semantics(RHS.Semantics),
4881       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4882                                          APFloat(RHS.Floats[1])}
4883                         : nullptr) {
4884   assert(Semantics == &semPPCDoubleDouble);
4885 }
4886 
4887 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4888     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4889   RHS.Semantics = &semBogus;
4890   assert(Semantics == &semPPCDoubleDouble);
4891 }
4892 
4893 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4894   if (Semantics == RHS.Semantics && RHS.Floats) {
4895     Floats[0] = RHS.Floats[0];
4896     Floats[1] = RHS.Floats[1];
4897   } else if (this != &RHS) {
4898     this->~DoubleAPFloat();
4899     new (this) DoubleAPFloat(RHS);
4900   }
4901   return *this;
4902 }
4903 
4904 // Implement addition, subtraction, multiplication and division based on:
4905 // "Software for Doubled-Precision Floating-Point Computations",
4906 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4907 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4908                                          const APFloat &c, const APFloat &cc,
4909                                          roundingMode RM) {
4910   int Status = opOK;
4911   APFloat z = a;
4912   Status |= z.add(c, RM);
4913   if (!z.isFinite()) {
4914     if (!z.isInfinity()) {
4915       Floats[0] = std::move(z);
4916       Floats[1].makeZero(/* Neg = */ false);
4917       return (opStatus)Status;
4918     }
4919     Status = opOK;
4920     auto AComparedToC = a.compareAbsoluteValue(c);
4921     z = cc;
4922     Status |= z.add(aa, RM);
4923     if (AComparedToC == APFloat::cmpGreaterThan) {
4924       // z = cc + aa + c + a;
4925       Status |= z.add(c, RM);
4926       Status |= z.add(a, RM);
4927     } else {
4928       // z = cc + aa + a + c;
4929       Status |= z.add(a, RM);
4930       Status |= z.add(c, RM);
4931     }
4932     if (!z.isFinite()) {
4933       Floats[0] = std::move(z);
4934       Floats[1].makeZero(/* Neg = */ false);
4935       return (opStatus)Status;
4936     }
4937     Floats[0] = z;
4938     APFloat zz = aa;
4939     Status |= zz.add(cc, RM);
4940     if (AComparedToC == APFloat::cmpGreaterThan) {
4941       // Floats[1] = a - z + c + zz;
4942       Floats[1] = a;
4943       Status |= Floats[1].subtract(z, RM);
4944       Status |= Floats[1].add(c, RM);
4945       Status |= Floats[1].add(zz, RM);
4946     } else {
4947       // Floats[1] = c - z + a + zz;
4948       Floats[1] = c;
4949       Status |= Floats[1].subtract(z, RM);
4950       Status |= Floats[1].add(a, RM);
4951       Status |= Floats[1].add(zz, RM);
4952     }
4953   } else {
4954     // q = a - z;
4955     APFloat q = a;
4956     Status |= q.subtract(z, RM);
4957 
4958     // zz = q + c + (a - (q + z)) + aa + cc;
4959     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4960     auto zz = q;
4961     Status |= zz.add(c, RM);
4962     Status |= q.add(z, RM);
4963     Status |= q.subtract(a, RM);
4964     q.changeSign();
4965     Status |= zz.add(q, RM);
4966     Status |= zz.add(aa, RM);
4967     Status |= zz.add(cc, RM);
4968     if (zz.isZero() && !zz.isNegative()) {
4969       Floats[0] = std::move(z);
4970       Floats[1].makeZero(/* Neg = */ false);
4971       return opOK;
4972     }
4973     Floats[0] = z;
4974     Status |= Floats[0].add(zz, RM);
4975     if (!Floats[0].isFinite()) {
4976       Floats[1].makeZero(/* Neg = */ false);
4977       return (opStatus)Status;
4978     }
4979     Floats[1] = std::move(z);
4980     Status |= Floats[1].subtract(Floats[0], RM);
4981     Status |= Floats[1].add(zz, RM);
4982   }
4983   return (opStatus)Status;
4984 }
4985 
4986 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4987                                                 const DoubleAPFloat &RHS,
4988                                                 DoubleAPFloat &Out,
4989                                                 roundingMode RM) {
4990   if (LHS.getCategory() == fcNaN) {
4991     Out = LHS;
4992     return opOK;
4993   }
4994   if (RHS.getCategory() == fcNaN) {
4995     Out = RHS;
4996     return opOK;
4997   }
4998   if (LHS.getCategory() == fcZero) {
4999     Out = RHS;
5000     return opOK;
5001   }
5002   if (RHS.getCategory() == fcZero) {
5003     Out = LHS;
5004     return opOK;
5005   }
5006   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
5007       LHS.isNegative() != RHS.isNegative()) {
5008     Out.makeNaN(false, Out.isNegative(), nullptr);
5009     return opInvalidOp;
5010   }
5011   if (LHS.getCategory() == fcInfinity) {
5012     Out = LHS;
5013     return opOK;
5014   }
5015   if (RHS.getCategory() == fcInfinity) {
5016     Out = RHS;
5017     return opOK;
5018   }
5019   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
5020 
5021   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
5022       CC(RHS.Floats[1]);
5023   assert(&A.getSemantics() == &semIEEEdouble);
5024   assert(&AA.getSemantics() == &semIEEEdouble);
5025   assert(&C.getSemantics() == &semIEEEdouble);
5026   assert(&CC.getSemantics() == &semIEEEdouble);
5027   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
5028   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
5029   return Out.addImpl(A, AA, C, CC, RM);
5030 }
5031 
5032 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
5033                                      roundingMode RM) {
5034   return addWithSpecial(*this, RHS, *this, RM);
5035 }
5036 
5037 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
5038                                           roundingMode RM) {
5039   changeSign();
5040   auto Ret = add(RHS, RM);
5041   changeSign();
5042   return Ret;
5043 }
5044 
5045 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
5046                                           APFloat::roundingMode RM) {
5047   const auto &LHS = *this;
5048   auto &Out = *this;
5049   /* Interesting observation: For special categories, finding the lowest
5050      common ancestor of the following layered graph gives the correct
5051      return category:
5052 
5053         NaN
5054        /   \
5055      Zero  Inf
5056        \   /
5057        Normal
5058 
5059      e.g. NaN * NaN = NaN
5060           Zero * Inf = NaN
5061           Normal * Zero = Zero
5062           Normal * Inf = Inf
5063   */
5064   if (LHS.getCategory() == fcNaN) {
5065     Out = LHS;
5066     return opOK;
5067   }
5068   if (RHS.getCategory() == fcNaN) {
5069     Out = RHS;
5070     return opOK;
5071   }
5072   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
5073       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
5074     Out.makeNaN(false, false, nullptr);
5075     return opOK;
5076   }
5077   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
5078     Out = LHS;
5079     return opOK;
5080   }
5081   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
5082     Out = RHS;
5083     return opOK;
5084   }
5085   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
5086          "Special cases not handled exhaustively");
5087 
5088   int Status = opOK;
5089   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
5090   // t = a * c
5091   APFloat T = A;
5092   Status |= T.multiply(C, RM);
5093   if (!T.isFiniteNonZero()) {
5094     Floats[0] = T;
5095     Floats[1].makeZero(/* Neg = */ false);
5096     return (opStatus)Status;
5097   }
5098 
5099   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
5100   APFloat Tau = A;
5101   T.changeSign();
5102   Status |= Tau.fusedMultiplyAdd(C, T, RM);
5103   T.changeSign();
5104   {
5105     // v = a * d
5106     APFloat V = A;
5107     Status |= V.multiply(D, RM);
5108     // w = b * c
5109     APFloat W = B;
5110     Status |= W.multiply(C, RM);
5111     Status |= V.add(W, RM);
5112     // tau += v + w
5113     Status |= Tau.add(V, RM);
5114   }
5115   // u = t + tau
5116   APFloat U = T;
5117   Status |= U.add(Tau, RM);
5118 
5119   Floats[0] = U;
5120   if (!U.isFinite()) {
5121     Floats[1].makeZero(/* Neg = */ false);
5122   } else {
5123     // Floats[1] = (t - u) + tau
5124     Status |= T.subtract(U, RM);
5125     Status |= T.add(Tau, RM);
5126     Floats[1] = T;
5127   }
5128   return (opStatus)Status;
5129 }
5130 
5131 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
5132                                         APFloat::roundingMode RM) {
5133   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5134   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5135   auto Ret =
5136       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
5137   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5138   return Ret;
5139 }
5140 
5141 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
5142   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5143   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5144   auto Ret =
5145       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5146   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5147   return Ret;
5148 }
5149 
5150 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
5151   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5152   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5153   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5154   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5155   return Ret;
5156 }
5157 
5158 APFloat::opStatus
5159 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
5160                                 const DoubleAPFloat &Addend,
5161                                 APFloat::roundingMode RM) {
5162   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5163   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5164   auto Ret = Tmp.fusedMultiplyAdd(
5165       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
5166       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
5167   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5168   return Ret;
5169 }
5170 
5171 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
5172   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5173   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5174   auto Ret = Tmp.roundToIntegral(RM);
5175   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5176   return Ret;
5177 }
5178 
5179 void DoubleAPFloat::changeSign() {
5180   Floats[0].changeSign();
5181   Floats[1].changeSign();
5182 }
5183 
5184 APFloat::cmpResult
5185 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
5186   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5187   if (Result != cmpEqual)
5188     return Result;
5189   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5190   if (Result == cmpLessThan || Result == cmpGreaterThan) {
5191     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
5192     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
5193     if (Against && !RHSAgainst)
5194       return cmpLessThan;
5195     if (!Against && RHSAgainst)
5196       return cmpGreaterThan;
5197     if (!Against && !RHSAgainst)
5198       return Result;
5199     if (Against && RHSAgainst)
5200       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
5201   }
5202   return Result;
5203 }
5204 
5205 APFloat::fltCategory DoubleAPFloat::getCategory() const {
5206   return Floats[0].getCategory();
5207 }
5208 
5209 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5210 
5211 void DoubleAPFloat::makeInf(bool Neg) {
5212   Floats[0].makeInf(Neg);
5213   Floats[1].makeZero(/* Neg = */ false);
5214 }
5215 
5216 void DoubleAPFloat::makeZero(bool Neg) {
5217   Floats[0].makeZero(Neg);
5218   Floats[1].makeZero(/* Neg = */ false);
5219 }
5220 
5221 void DoubleAPFloat::makeLargest(bool Neg) {
5222   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5223   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5224   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5225   if (Neg)
5226     changeSign();
5227 }
5228 
5229 void DoubleAPFloat::makeSmallest(bool Neg) {
5230   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5231   Floats[0].makeSmallest(Neg);
5232   Floats[1].makeZero(/* Neg = */ false);
5233 }
5234 
5235 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
5236   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5237   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
5238   if (Neg)
5239     Floats[0].changeSign();
5240   Floats[1].makeZero(/* Neg = */ false);
5241 }
5242 
5243 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5244   Floats[0].makeNaN(SNaN, Neg, fill);
5245   Floats[1].makeZero(/* Neg = */ false);
5246 }
5247 
5248 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
5249   auto Result = Floats[0].compare(RHS.Floats[0]);
5250   // |Float[0]| > |Float[1]|
5251   if (Result == APFloat::cmpEqual)
5252     return Floats[1].compare(RHS.Floats[1]);
5253   return Result;
5254 }
5255 
5256 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
5257   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5258          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5259 }
5260 
5261 hash_code hash_value(const DoubleAPFloat &Arg) {
5262   if (Arg.Floats)
5263     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5264   return hash_combine(Arg.Semantics);
5265 }
5266 
5267 APInt DoubleAPFloat::bitcastToAPInt() const {
5268   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5269   uint64_t Data[] = {
5270       Floats[0].bitcastToAPInt().getRawData()[0],
5271       Floats[1].bitcastToAPInt().getRawData()[0],
5272   };
5273   return APInt(128, 2, Data);
5274 }
5275 
5276 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
5277                                                              roundingMode RM) {
5278   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5279   APFloat Tmp(semPPCDoubleDoubleLegacy);
5280   auto Ret = Tmp.convertFromString(S, RM);
5281   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5282   return Ret;
5283 }
5284 
5285 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5286   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5287   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5288   auto Ret = Tmp.next(nextDown);
5289   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5290   return Ret;
5291 }
5292 
5293 APFloat::opStatus
5294 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5295                                 unsigned int Width, bool IsSigned,
5296                                 roundingMode RM, bool *IsExact) const {
5297   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5298   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5299       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5300 }
5301 
5302 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5303                                                   bool IsSigned,
5304                                                   roundingMode RM) {
5305   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5306   APFloat Tmp(semPPCDoubleDoubleLegacy);
5307   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5308   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5309   return Ret;
5310 }
5311 
5312 APFloat::opStatus
5313 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5314                                               unsigned int InputSize,
5315                                               bool IsSigned, roundingMode RM) {
5316   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5317   APFloat Tmp(semPPCDoubleDoubleLegacy);
5318   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5319   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5320   return Ret;
5321 }
5322 
5323 APFloat::opStatus
5324 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5325                                               unsigned int InputSize,
5326                                               bool IsSigned, roundingMode RM) {
5327   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5328   APFloat Tmp(semPPCDoubleDoubleLegacy);
5329   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5330   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5331   return Ret;
5332 }
5333 
5334 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5335                                                unsigned int HexDigits,
5336                                                bool UpperCase,
5337                                                roundingMode RM) const {
5338   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5339   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5340       .convertToHexString(DST, HexDigits, UpperCase, RM);
5341 }
5342 
5343 bool DoubleAPFloat::isDenormal() const {
5344   return getCategory() == fcNormal &&
5345          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5346           // (double)(Hi + Lo) == Hi defines a normal number.
5347           Floats[0] != Floats[0] + Floats[1]);
5348 }
5349 
5350 bool DoubleAPFloat::isSmallest() const {
5351   if (getCategory() != fcNormal)
5352     return false;
5353   DoubleAPFloat Tmp(*this);
5354   Tmp.makeSmallest(this->isNegative());
5355   return Tmp.compare(*this) == cmpEqual;
5356 }
5357 
5358 bool DoubleAPFloat::isSmallestNormalized() const {
5359   if (getCategory() != fcNormal)
5360     return false;
5361 
5362   DoubleAPFloat Tmp(*this);
5363   Tmp.makeSmallestNormalized(this->isNegative());
5364   return Tmp.compare(*this) == cmpEqual;
5365 }
5366 
5367 bool DoubleAPFloat::isLargest() const {
5368   if (getCategory() != fcNormal)
5369     return false;
5370   DoubleAPFloat Tmp(*this);
5371   Tmp.makeLargest(this->isNegative());
5372   return Tmp.compare(*this) == cmpEqual;
5373 }
5374 
5375 bool DoubleAPFloat::isInteger() const {
5376   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5377   return Floats[0].isInteger() && Floats[1].isInteger();
5378 }
5379 
5380 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5381                              unsigned FormatPrecision,
5382                              unsigned FormatMaxPadding,
5383                              bool TruncateZero) const {
5384   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5385   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5386       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5387 }
5388 
5389 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5390   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5391   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5392   if (!inv)
5393     return Tmp.getExactInverse(nullptr);
5394   APFloat Inv(semPPCDoubleDoubleLegacy);
5395   auto Ret = Tmp.getExactInverse(&Inv);
5396   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5397   return Ret;
5398 }
5399 
5400 int DoubleAPFloat::getExactLog2() const {
5401   // TODO: Implement me
5402   return INT_MIN;
5403 }
5404 
5405 int DoubleAPFloat::getExactLog2Abs() const {
5406   // TODO: Implement me
5407   return INT_MIN;
5408 }
5409 
5410 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5411                      APFloat::roundingMode RM) {
5412   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5413   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5414                        scalbn(Arg.Floats[1], Exp, RM));
5415 }
5416 
5417 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5418                     APFloat::roundingMode RM) {
5419   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5420   APFloat First = frexp(Arg.Floats[0], Exp, RM);
5421   APFloat Second = Arg.Floats[1];
5422   if (Arg.getCategory() == APFloat::fcNormal)
5423     Second = scalbn(Second, -Exp, RM);
5424   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5425 }
5426 
5427 } // namespace detail
5428 
5429 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5430   if (usesLayout<IEEEFloat>(Semantics)) {
5431     new (&IEEE) IEEEFloat(std::move(F));
5432     return;
5433   }
5434   if (usesLayout<DoubleAPFloat>(Semantics)) {
5435     const fltSemantics& S = F.getSemantics();
5436     new (&Double)
5437         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5438                       APFloat(semIEEEdouble));
5439     return;
5440   }
5441   llvm_unreachable("Unexpected semantics");
5442 }
5443 
5444 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5445                                                        roundingMode RM) {
5446   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5447 }
5448 
5449 hash_code hash_value(const APFloat &Arg) {
5450   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5451     return hash_value(Arg.U.IEEE);
5452   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5453     return hash_value(Arg.U.Double);
5454   llvm_unreachable("Unexpected semantics");
5455 }
5456 
5457 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5458     : APFloat(Semantics) {
5459   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5460   assert(StatusOrErr && "Invalid floating point representation");
5461   consumeError(StatusOrErr.takeError());
5462 }
5463 
5464 FPClassTest APFloat::classify() const {
5465   if (isZero())
5466     return isNegative() ? fcNegZero : fcPosZero;
5467   if (isNormal())
5468     return isNegative() ? fcNegNormal : fcPosNormal;
5469   if (isDenormal())
5470     return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5471   if (isInfinity())
5472     return isNegative() ? fcNegInf : fcPosInf;
5473   assert(isNaN() && "Other class of FP constant");
5474   return isSignaling() ? fcSNan : fcQNan;
5475 }
5476 
5477 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5478                                    roundingMode RM, bool *losesInfo) {
5479   if (&getSemantics() == &ToSemantics) {
5480     *losesInfo = false;
5481     return opOK;
5482   }
5483   if (usesLayout<IEEEFloat>(getSemantics()) &&
5484       usesLayout<IEEEFloat>(ToSemantics))
5485     return U.IEEE.convert(ToSemantics, RM, losesInfo);
5486   if (usesLayout<IEEEFloat>(getSemantics()) &&
5487       usesLayout<DoubleAPFloat>(ToSemantics)) {
5488     assert(&ToSemantics == &semPPCDoubleDouble);
5489     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5490     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5491     return Ret;
5492   }
5493   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5494       usesLayout<IEEEFloat>(ToSemantics)) {
5495     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5496     *this = APFloat(std::move(getIEEE()), ToSemantics);
5497     return Ret;
5498   }
5499   llvm_unreachable("Unexpected semantics");
5500 }
5501 
5502 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5503   return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5504 }
5505 
5506 void APFloat::print(raw_ostream &OS) const {
5507   SmallVector<char, 16> Buffer;
5508   toString(Buffer);
5509   OS << Buffer;
5510 }
5511 
5512 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5513 LLVM_DUMP_METHOD void APFloat::dump() const {
5514   print(dbgs());
5515   dbgs() << '\n';
5516 }
5517 #endif
5518 
5519 void APFloat::Profile(FoldingSetNodeID &NID) const {
5520   NID.Add(bitcastToAPInt());
5521 }
5522 
5523 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5524    an APSInt, whose initial bit-width and signed-ness are used to determine the
5525    precision of the conversion.
5526  */
5527 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5528                                             roundingMode rounding_mode,
5529                                             bool *isExact) const {
5530   unsigned bitWidth = result.getBitWidth();
5531   SmallVector<uint64_t, 4> parts(result.getNumWords());
5532   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5533                                      rounding_mode, isExact);
5534   // Keeps the original signed-ness.
5535   result = APInt(bitWidth, parts);
5536   return status;
5537 }
5538 
5539 double APFloat::convertToDouble() const {
5540   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5541     return getIEEE().convertToDouble();
5542   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5543          "Float semantics is not representable by IEEEdouble");
5544   APFloat Temp = *this;
5545   bool LosesInfo;
5546   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5547   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5548   (void)St;
5549   return Temp.getIEEE().convertToDouble();
5550 }
5551 
5552 #ifdef HAS_IEE754_FLOAT128
5553 float128 APFloat::convertToQuad() const {
5554   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
5555     return getIEEE().convertToQuad();
5556   assert(getSemantics().isRepresentableBy(semIEEEquad) &&
5557          "Float semantics is not representable by IEEEquad");
5558   APFloat Temp = *this;
5559   bool LosesInfo;
5560   opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5561   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5562   (void)St;
5563   return Temp.getIEEE().convertToQuad();
5564 }
5565 #endif
5566 
5567 float APFloat::convertToFloat() const {
5568   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5569     return getIEEE().convertToFloat();
5570   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5571          "Float semantics is not representable by IEEEsingle");
5572   APFloat Temp = *this;
5573   bool LosesInfo;
5574   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5575   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5576   (void)St;
5577   return Temp.getIEEE().convertToFloat();
5578 }
5579 
5580 } // namespace llvm
5581 
5582 #undef APFLOAT_DISPATCH_ON_SEMANTICS
5583