xref: /llvm-project/flang/lib/Decimal/decimal-to-binary.cpp (revision 788be0d9fc6aeca548c90bac5ebe6990dd3c66ec)
1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "big-radix-floating-point.h"
10 #include "flang/Common/bit-population-count.h"
11 #include "flang/Common/leading-zero-bit-count.h"
12 #include "flang/Decimal/binary-floating-point.h"
13 #include "flang/Decimal/decimal.h"
14 #include "flang/Runtime/freestanding-tools.h"
15 #include <cinttypes>
16 #include <cstring>
17 #include <utility>
18 
19 // Some environments, viz. glibc 2.17 and *BSD, allow the macro HUGE
20 // to leak out of <math.h>.
21 #undef HUGE
22 
23 namespace Fortran::decimal {
24 
25 template <int PREC, int LOG10RADIX>
ParseNumber(const char * & p,bool & inexact,const char * end)26 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
27     const char *&p, bool &inexact, const char *end) {
28   SetToZero();
29   if (end && p >= end) {
30     return false;
31   }
32   // Skip leading spaces
33   for (; p != end && *p == ' '; ++p) {
34   }
35   if (p == end) {
36     return false;
37   }
38   const char *q{p};
39   isNegative_ = *q == '-';
40   if (*q == '-' || *q == '+') {
41     ++q;
42   }
43   const char *start{q};
44   for (; q != end && *q == '0'; ++q) {
45   }
46   const char *firstDigit{q};
47   for (; q != end && *q >= '0' && *q <= '9'; ++q) {
48   }
49   const char *point{nullptr};
50   if (q != end && *q == '.') {
51     point = q;
52     for (++q; q != end && *q >= '0' && *q <= '9'; ++q) {
53     }
54   }
55   if (q == start || (q == start + 1 && start == point)) {
56     return false; // require at least one digit
57   }
58   // There's a valid number here; set the reference argument to point to
59   // the first character afterward, which might be an exponent part.
60   p = q;
61   // Strip off trailing zeroes
62   if (point) {
63     while (q[-1] == '0') {
64       --q;
65     }
66     if (q[-1] == '.') {
67       point = nullptr;
68       --q;
69     }
70   }
71   if (!point) {
72     while (q > firstDigit && q[-1] == '0') {
73       --q;
74       ++exponent_;
75     }
76   }
77   // Trim any excess digits
78   const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)};
79   if (q > limit) {
80     inexact = true;
81     if (point >= limit) {
82       q = point;
83       point = nullptr;
84     }
85     if (!point) {
86       exponent_ += q - limit;
87     }
88     q = limit;
89   }
90   if (point) {
91     exponent_ -= static_cast<int>(q - point - 1);
92   }
93   if (q == firstDigit) {
94     exponent_ = 0; // all zeros
95   }
96   // Rack the decimal digits up into big Digits.
97   for (auto times{radix}; q-- > firstDigit;) {
98     if (*q != '.') {
99       if (times == radix) {
100         digit_[digits_++] = *q - '0';
101         times = 10;
102       } else {
103         digit_[digits_ - 1] += times * (*q - '0');
104         times *= 10;
105       }
106     }
107   }
108   // Look for an optional exponent field.
109   if (p == end) {
110     return true;
111   }
112   q = p;
113   switch (*q) {
114   case 'e':
115   case 'E':
116   case 'd':
117   case 'D':
118   case 'q':
119   case 'Q': {
120     if (++q == end) {
121       break;
122     }
123     bool negExpo{*q == '-'};
124     if (*q == '-' || *q == '+') {
125       ++q;
126     }
127     if (q != end && *q >= '0' && *q <= '9') {
128       int expo{0};
129       for (; q != end && *q == '0'; ++q) {
130       }
131       const char *expDig{q};
132       for (; q != end && *q >= '0' && *q <= '9'; ++q) {
133         expo = 10 * expo + *q - '0';
134       }
135       if (q >= expDig + 8) {
136         // There's a ridiculous number of nonzero exponent digits.
137         // The decimal->binary conversion routine will cope with
138         // returning 0 or Inf, but we must ensure that "expo" didn't
139         // overflow back around to something legal.
140         expo = 10 * Real::decimalRange;
141         exponent_ = 0;
142       }
143       p = q; // exponent is valid; advance the termination pointer
144       if (negExpo) {
145         exponent_ -= expo;
146       } else {
147         exponent_ += expo;
148       }
149     }
150   } break;
151   default:
152     break;
153   }
154   return true;
155 }
156 
157 template <int PREC, int LOG10RADIX>
158 void BigRadixFloatingPointNumber<PREC,
LoseLeastSignificantDigit()159     LOG10RADIX>::LoseLeastSignificantDigit() {
160   Digit LSD{digit_[0]};
161   for (int j{0}; j < digits_ - 1; ++j) {
162     digit_[j] = digit_[j + 1];
163   }
164   digit_[digits_ - 1] = 0;
165   bool incr{false};
166   switch (rounding_) {
167   case RoundNearest:
168     incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
169     break;
170   case RoundUp:
171     incr = LSD > 0 && !isNegative_;
172     break;
173   case RoundDown:
174     incr = LSD > 0 && isNegative_;
175     break;
176   case RoundToZero:
177     break;
178   case RoundCompatible:
179     incr = LSD >= radix / 2;
180     break;
181   }
182   for (int j{0}; (digit_[j] += incr) == radix; ++j) {
183     digit_[j] = 0;
184   }
185 }
186 
187 // This local utility class represents an unrounded nonnegative
188 // binary floating-point value with an unbiased (i.e., signed)
189 // binary exponent, an integer value (not a fraction) with an implied
190 // binary point to its *right*, and some guard bits for rounding.
191 template <int PREC> class IntermediateFloat {
192 public:
193   static constexpr int precision{PREC};
194   using IntType = common::HostUnsignedIntType<precision>;
195   static constexpr IntType topBit{IntType{1} << (precision - 1)};
196   static constexpr IntType mask{topBit + (topBit - 1)};
197 
IntermediateFloat()198   RT_API_ATTRS IntermediateFloat() {}
199   IntermediateFloat(const IntermediateFloat &) = default;
200 
201   // Assumes that exponent_ is valid on entry, and may increment it.
202   // Returns the number of guard_ bits that have been determined.
SetTo(UINT n)203   template <typename UINT> RT_API_ATTRS bool SetTo(UINT n) {
204     static constexpr int nBits{CHAR_BIT * sizeof n};
205     if constexpr (precision >= nBits) {
206       value_ = n;
207       guard_ = 0;
208       return 0;
209     } else {
210       int shift{common::BitsNeededFor(n) - precision};
211       if (shift <= 0) {
212         value_ = n;
213         guard_ = 0;
214         return 0;
215       } else {
216         value_ = n >> shift;
217         exponent_ += shift;
218         n <<= nBits - shift;
219         guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0);
220         return shift;
221       }
222     }
223   }
224 
ShiftIn(int bit=0)225   RT_API_ATTRS void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; }
IsFull() const226   RT_API_ATTRS bool IsFull() const { return value_ >= topBit; }
AdjustExponent(int by)227   RT_API_ATTRS void AdjustExponent(int by) { exponent_ += by; }
SetGuard(int g)228   RT_API_ATTRS void SetGuard(int g) {
229     guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1);
230   }
231 
232   RT_API_ATTRS ConversionToBinaryResult<PREC> ToBinary(
233       bool isNegative, FortranRounding) const;
234 
235 private:
236   static constexpr int guardBits{3}; // guard, round, sticky
237   using GuardType = int;
238   static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)};
239 
240   IntType value_{0};
241   GuardType guard_{0};
242   int exponent_{0};
243 };
244 
245 // The standard says that these overflow cases round to "representable"
246 // numbers, and some popular compilers interpret that to mean +/-HUGE()
247 // rather than +/-Inf.
RoundOverflowToHuge(enum FortranRounding rounding,bool isNegative)248 static inline RT_API_ATTRS constexpr bool RoundOverflowToHuge(
249     enum FortranRounding rounding, bool isNegative) {
250   return rounding == RoundToZero || (!isNegative && rounding == RoundDown) ||
251       (isNegative && rounding == RoundUp);
252 }
253 
254 template <int PREC>
ToBinary(bool isNegative,FortranRounding rounding) const255 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
256     bool isNegative, FortranRounding rounding) const {
257   using Binary = BinaryFloatingPointNumber<PREC>;
258   // Create a fraction with a binary point to the left of the integer
259   // value_, and bias the exponent.
260   IntType fraction{value_};
261   GuardType guard{guard_};
262   int expo{exponent_ + Binary::exponentBias + (precision - 1)};
263   while (expo < 1 && (fraction > 0 || guard > oneHalf)) {
264     guard = (guard & 1) | (guard >> 1) |
265         ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1));
266     fraction >>= 1;
267     ++expo;
268   }
269   int flags{Exact};
270   if (guard != 0) {
271     flags |= Inexact;
272   }
273   if (fraction == 0) {
274     if (guard <= oneHalf) {
275       if ((!isNegative && rounding == RoundUp) ||
276           (isNegative && rounding == RoundDown)) {
277         // round to least nonzero value
278         expo = 0;
279       } else { // round to zero
280         if (guard != 0) {
281           flags |= Underflow;
282         }
283         Binary zero;
284         if (isNegative) {
285           zero.Negate();
286         }
287         return {
288             std::move(zero), static_cast<enum ConversionResultFlags>(flags)};
289       }
290     }
291   } else {
292     // The value is nonzero; normalize it.
293     while (fraction < topBit && expo > 1) {
294       --expo;
295       fraction = fraction * 2 + (guard >> (guardBits - 2));
296       guard =
297           (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1);
298     }
299   }
300   // Apply rounding
301   bool incr{false};
302   switch (rounding) {
303   case RoundNearest:
304     incr = guard > oneHalf || (guard == oneHalf && (fraction & 1));
305     break;
306   case RoundUp:
307     incr = guard != 0 && !isNegative;
308     break;
309   case RoundDown:
310     incr = guard != 0 && isNegative;
311     break;
312   case RoundToZero:
313     break;
314   case RoundCompatible:
315     incr = guard >= oneHalf;
316     break;
317   }
318   if (incr) {
319     if (fraction == mask) {
320       // rounding causes a carry
321       ++expo;
322       fraction = topBit;
323     } else {
324       ++fraction;
325     }
326   }
327   if (expo == 1 && fraction < topBit) {
328     expo = 0; // subnormal
329     flags |= Underflow;
330   } else if (expo == 0) {
331     flags |= Underflow;
332   } else if (expo >= Binary::maxExponent) {
333     if (RoundOverflowToHuge(rounding, isNegative)) {
334       expo = Binary::maxExponent - 1;
335       fraction = mask;
336     } else { // Inf
337       expo = Binary::maxExponent;
338       flags |= Overflow;
339       if constexpr (Binary::bits == 80) { // x87
340         fraction = IntType{1} << 63;
341       } else {
342         fraction = 0;
343       }
344     }
345   }
346   using Raw = typename Binary::RawType;
347   Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1);
348   raw |= static_cast<Raw>(expo) << Binary::significandBits;
349   if constexpr (Binary::isImplicitMSB) {
350     fraction &= ~topBit;
351   }
352   raw |= fraction;
353   return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)};
354 }
355 
356 template <int PREC, int LOG10RADIX>
357 ConversionToBinaryResult<PREC>
ConvertToBinary()358 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
359   // On entry, *this holds a multi-precision integer value in a radix of a
360   // large power of ten.  Its radix point is defined to be to the right of its
361   // digits, and "exponent_" is the power of ten by which it is to be scaled.
362   Normalize();
363   if (digits_ == 0) { // zero value
364     return {Real{SignBit()}};
365   }
366   // The value is not zero:  x = D. * 10.**E
367   // Shift our perspective on the radix (& decimal) point so that
368   // it sits to the *left* of the digits: i.e., x = .D * 10.**E
369   exponent_ += digits_ * log10Radix;
370   // Sanity checks for ridiculous exponents
371   static constexpr int crazy{2 * Real::decimalRange + log10Radix};
372   if (exponent_ < -crazy) {
373     enum ConversionResultFlags flags {
374       static_cast<enum ConversionResultFlags>(Inexact | Underflow)
375     };
376     if ((!isNegative_ && rounding_ == RoundUp) ||
377         (isNegative_ && rounding_ == RoundDown)) {
378       // return least nonzero value
379       return {Real{Raw{1} | SignBit()}, flags};
380     } else { // underflow to +/-0.
381       return {Real{SignBit()}, flags};
382     }
383   } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf
384     if (RoundOverflowToHuge(rounding_, isNegative_)) {
385       return {Real{HUGE()}};
386     } else {
387       return {Real{Infinity()}, Overflow};
388     }
389   }
390   // Apply any negative decimal exponent by multiplication
391   // by a power of two, adjusting the binary exponent to compensate.
392   IntermediateFloat<PREC> f;
393   while (exponent_ < log10Radix) {
394     // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
395     f.AdjustExponent(-9);
396     digitLimit_ = digits_;
397     if (int carry{MultiplyWithoutNormalization<512>()}) {
398       // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
399       PushCarry(carry);
400       exponent_ += log10Radix;
401     }
402   }
403   // Apply any positive decimal exponent greater than
404   // is needed to treat the topmost digit as an integer
405   // part by multiplying by 10 or 10000 repeatedly.
406   while (exponent_ > log10Radix) {
407     digitLimit_ = digits_;
408     int carry;
409     if (exponent_ >= log10Radix + 4) {
410       // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
411       exponent_ -= 4;
412       carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>();
413       f.AdjustExponent(4);
414     } else {
415       // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
416       --exponent_;
417       carry = MultiplyWithoutNormalization<5>();
418       f.AdjustExponent(1);
419     }
420     if (carry != 0) {
421       // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
422       PushCarry(carry);
423       exponent_ += log10Radix;
424     }
425   }
426   // So exponent_ is now log10Radix, meaning that the
427   // MSD can be taken as an integer part and transferred
428   // to the binary result.
429   // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
430   int guardShift{f.SetTo(digit_[--digits_])};
431   // Transfer additional bits until the result is normal.
432   digitLimit_ = digits_;
433   while (!f.IsFull()) {
434     // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
435     f.AdjustExponent(-1);
436     std::uint32_t carry = MultiplyWithoutNormalization<2>();
437     f.ShiftIn(carry);
438   }
439   // Get the next few bits for rounding.  Allow for some guard bits
440   // that may have already been set in f.SetTo() above.
441   int guard{0};
442   if (guardShift == 0) {
443     guard = MultiplyWithoutNormalization<4>();
444   } else if (guardShift == 1) {
445     guard = MultiplyWithoutNormalization<2>();
446   }
447   guard = guard + guard + !IsZero();
448   f.SetGuard(guard);
449   return f.ToBinary(isNegative_, rounding_);
450 }
451 
452 template <int PREC, int LOG10RADIX>
453 ConversionToBinaryResult<PREC>
ConvertToBinary(const char * & p,const char * limit)454 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(
455     const char *&p, const char *limit) {
456   bool inexact{false};
457   if (ParseNumber(p, inexact, limit)) {
458     auto result{ConvertToBinary()};
459     if (inexact) {
460       result.flags =
461           static_cast<enum ConversionResultFlags>(result.flags | Inexact);
462     }
463     return result;
464   } else {
465     // Could not parse a decimal floating-point number.  p has been
466     // advanced over any leading spaces.  Most Fortran compilers set
467     // the sign bit for -NaN.
468     const char *q{p};
469     if (!limit || q < limit) {
470       isNegative_ = *q == '-';
471       if (isNegative_ || *q == '+') {
472         ++q;
473       }
474     }
475     if ((!limit || limit >= q + 3) && runtime::toupper(q[0]) == 'N' &&
476         runtime::toupper(q[1]) == 'A' && runtime::toupper(q[2]) == 'N') {
477       // NaN
478       p = q + 3;
479       bool isQuiet{true};
480       if ((!limit || p < limit) && *p == '(') {
481         int depth{1};
482         do {
483           ++p;
484           if (limit && p >= limit) {
485             // Invalid input
486             return {Real{NaN(false)}, Invalid};
487           } else if (*p == '(') {
488             ++depth;
489           } else if (*p == ')') {
490             --depth;
491           } else if (*p != ' ') {
492             // Implementation dependent, but other compilers
493             // all return quiet NaNs.
494           }
495         } while (depth > 0);
496         ++p;
497       }
498       return {Real{NaN(isQuiet)}};
499     } else { // Inf?
500       if ((!limit || limit >= q + 3) && runtime::toupper(q[0]) == 'I' &&
501           runtime::toupper(q[1]) == 'N' && runtime::toupper(q[2]) == 'F') {
502         if ((!limit || limit >= q + 8) && runtime::toupper(q[3]) == 'I' &&
503             runtime::toupper(q[4]) == 'N' && runtime::toupper(q[5]) == 'I' &&
504             runtime::toupper(q[6]) == 'T' && runtime::toupper(q[7]) == 'Y') {
505           p = q + 8;
506         } else {
507           p = q + 3;
508         }
509         return {Real{Infinity()}};
510       } else {
511         // Invalid input
512         return {Real{NaN()}, Invalid};
513       }
514     }
515   }
516 }
517 
518 template <int PREC>
ConvertToBinary(const char * & p,enum FortranRounding rounding,const char * end)519 ConversionToBinaryResult<PREC> ConvertToBinary(
520     const char *&p, enum FortranRounding rounding, const char *end) {
521   return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end);
522 }
523 
524 template ConversionToBinaryResult<8> ConvertToBinary<8>(
525     const char *&, enum FortranRounding, const char *end);
526 template ConversionToBinaryResult<11> ConvertToBinary<11>(
527     const char *&, enum FortranRounding, const char *end);
528 template ConversionToBinaryResult<24> ConvertToBinary<24>(
529     const char *&, enum FortranRounding, const char *end);
530 template ConversionToBinaryResult<53> ConvertToBinary<53>(
531     const char *&, enum FortranRounding, const char *end);
532 template ConversionToBinaryResult<64> ConvertToBinary<64>(
533     const char *&, enum FortranRounding, const char *end);
534 template ConversionToBinaryResult<113> ConvertToBinary<113>(
535     const char *&, enum FortranRounding, const char *end);
536 
537 extern "C" {
538 RT_EXT_API_GROUP_BEGIN
539 
ConvertDecimalToFloat(const char ** p,float * f,enum FortranRounding rounding)540 enum ConversionResultFlags ConvertDecimalToFloat(
541     const char **p, float *f, enum FortranRounding rounding) {
542   auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)};
543   std::memcpy(reinterpret_cast<void *>(f),
544       reinterpret_cast<const void *>(&result.binary), sizeof *f);
545   return result.flags;
546 }
ConvertDecimalToDouble(const char ** p,double * d,enum FortranRounding rounding)547 enum ConversionResultFlags ConvertDecimalToDouble(
548     const char **p, double *d, enum FortranRounding rounding) {
549   auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)};
550   std::memcpy(reinterpret_cast<void *>(d),
551       reinterpret_cast<const void *>(&result.binary), sizeof *d);
552   return result.flags;
553 }
ConvertDecimalToLongDouble(const char ** p,long double * ld,enum FortranRounding rounding)554 enum ConversionResultFlags ConvertDecimalToLongDouble(
555     const char **p, long double *ld, enum FortranRounding rounding) {
556   auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)};
557   std::memcpy(reinterpret_cast<void *>(ld),
558       reinterpret_cast<const void *>(&result.binary), sizeof *ld);
559   return result.flags;
560 }
561 
562 RT_EXT_API_GROUP_END
563 } // extern "C"
564 } // namespace Fortran::decimal
565