xref: /llvm-project/flang/lib/Decimal/decimal-to-binary.cpp (revision ed1b24bf8b76871ab00f365d8dc066b3f7f76202)
1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "big-radix-floating-point.h"
10 #include "flang/Common/bit-population-count.h"
11 #include "flang/Common/leading-zero-bit-count.h"
12 #include "flang/Decimal/binary-floating-point.h"
13 #include "flang/Decimal/decimal.h"
14 #include "flang/Runtime/freestanding-tools.h"
15 #include <cinttypes>
16 #include <cstring>
17 #include <utility>
18 
19 namespace Fortran::decimal {
20 
21 template <int PREC, int LOG10RADIX>
22 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
23     const char *&p, bool &inexact, const char *end) {
24   SetToZero();
25   if (end && p >= end) {
26     return false;
27   }
28   // Skip leading spaces
29   for (; p != end && *p == ' '; ++p) {
30   }
31   if (p == end) {
32     return false;
33   }
34   const char *q{p};
35   isNegative_ = *q == '-';
36   if (*q == '-' || *q == '+') {
37     ++q;
38   }
39   const char *start{q};
40   for (; q != end && *q == '0'; ++q) {
41   }
42   const char *firstDigit{q};
43   for (; q != end && *q >= '0' && *q <= '9'; ++q) {
44   }
45   const char *point{nullptr};
46   if (q != end && *q == '.') {
47     point = q;
48     for (++q; q != end && *q >= '0' && *q <= '9'; ++q) {
49     }
50   }
51   if (q == start || (q == start + 1 && start == point)) {
52     return false; // require at least one digit
53   }
54   // There's a valid number here; set the reference argument to point to
55   // the first character afterward, which might be an exponent part.
56   p = q;
57   // Strip off trailing zeroes
58   if (point) {
59     while (q[-1] == '0') {
60       --q;
61     }
62     if (q[-1] == '.') {
63       point = nullptr;
64       --q;
65     }
66   }
67   if (!point) {
68     while (q > firstDigit && q[-1] == '0') {
69       --q;
70       ++exponent_;
71     }
72   }
73   // Trim any excess digits
74   const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)};
75   if (q > limit) {
76     inexact = true;
77     if (point >= limit) {
78       q = point;
79       point = nullptr;
80     }
81     if (!point) {
82       exponent_ += q - limit;
83     }
84     q = limit;
85   }
86   if (point) {
87     exponent_ -= static_cast<int>(q - point - 1);
88   }
89   if (q == firstDigit) {
90     exponent_ = 0; // all zeros
91   }
92   // Rack the decimal digits up into big Digits.
93   for (auto times{radix}; q-- > firstDigit;) {
94     if (*q != '.') {
95       if (times == radix) {
96         digit_[digits_++] = *q - '0';
97         times = 10;
98       } else {
99         digit_[digits_ - 1] += times * (*q - '0');
100         times *= 10;
101       }
102     }
103   }
104   // Look for an optional exponent field.
105   if (p == end) {
106     return true;
107   }
108   q = p;
109   switch (*q) {
110   case 'e':
111   case 'E':
112   case 'd':
113   case 'D':
114   case 'q':
115   case 'Q': {
116     if (++q == end) {
117       break;
118     }
119     bool negExpo{*q == '-'};
120     if (*q == '-' || *q == '+') {
121       ++q;
122     }
123     if (q != end && *q >= '0' && *q <= '9') {
124       int expo{0};
125       for (; q != end && *q == '0'; ++q) {
126       }
127       const char *expDig{q};
128       for (; q != end && *q >= '0' && *q <= '9'; ++q) {
129         expo = 10 * expo + *q - '0';
130       }
131       if (q >= expDig + 8) {
132         // There's a ridiculous number of nonzero exponent digits.
133         // The decimal->binary conversion routine will cope with
134         // returning 0 or Inf, but we must ensure that "expo" didn't
135         // overflow back around to something legal.
136         expo = 10 * Real::decimalRange;
137         exponent_ = 0;
138       }
139       p = q; // exponent is valid; advance the termination pointer
140       if (negExpo) {
141         exponent_ -= expo;
142       } else {
143         exponent_ += expo;
144       }
145     }
146   } break;
147   default:
148     break;
149   }
150   return true;
151 }
152 
153 template <int PREC, int LOG10RADIX>
154 void BigRadixFloatingPointNumber<PREC,
155     LOG10RADIX>::LoseLeastSignificantDigit() {
156   Digit LSD{digit_[0]};
157   for (int j{0}; j < digits_ - 1; ++j) {
158     digit_[j] = digit_[j + 1];
159   }
160   digit_[digits_ - 1] = 0;
161   bool incr{false};
162   switch (rounding_) {
163   case RoundNearest:
164     incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
165     break;
166   case RoundUp:
167     incr = LSD > 0 && !isNegative_;
168     break;
169   case RoundDown:
170     incr = LSD > 0 && isNegative_;
171     break;
172   case RoundToZero:
173     break;
174   case RoundCompatible:
175     incr = LSD >= radix / 2;
176     break;
177   }
178   for (int j{0}; (digit_[j] += incr) == radix; ++j) {
179     digit_[j] = 0;
180   }
181 }
182 
183 // This local utility class represents an unrounded nonnegative
184 // binary floating-point value with an unbiased (i.e., signed)
185 // binary exponent, an integer value (not a fraction) with an implied
186 // binary point to its *right*, and some guard bits for rounding.
187 template <int PREC> class IntermediateFloat {
188 public:
189   static constexpr int precision{PREC};
190   using IntType = common::HostUnsignedIntType<precision>;
191   static constexpr IntType topBit{IntType{1} << (precision - 1)};
192   static constexpr IntType mask{topBit + (topBit - 1)};
193 
194   RT_API_ATTRS IntermediateFloat() {}
195   IntermediateFloat(const IntermediateFloat &) = default;
196 
197   // Assumes that exponent_ is valid on entry, and may increment it.
198   // Returns the number of guard_ bits that have been determined.
199   template <typename UINT> RT_API_ATTRS bool SetTo(UINT n) {
200     static constexpr int nBits{CHAR_BIT * sizeof n};
201     if constexpr (precision >= nBits) {
202       value_ = n;
203       guard_ = 0;
204       return 0;
205     } else {
206       int shift{common::BitsNeededFor(n) - precision};
207       if (shift <= 0) {
208         value_ = n;
209         guard_ = 0;
210         return 0;
211       } else {
212         value_ = n >> shift;
213         exponent_ += shift;
214         n <<= nBits - shift;
215         guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0);
216         return shift;
217       }
218     }
219   }
220 
221   RT_API_ATTRS void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; }
222   RT_API_ATTRS bool IsFull() const { return value_ >= topBit; }
223   RT_API_ATTRS void AdjustExponent(int by) { exponent_ += by; }
224   RT_API_ATTRS void SetGuard(int g) {
225     guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1);
226   }
227 
228   RT_API_ATTRS ConversionToBinaryResult<PREC> ToBinary(
229       bool isNegative, FortranRounding) const;
230 
231 private:
232   static constexpr int guardBits{3}; // guard, round, sticky
233   using GuardType = int;
234   static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)};
235 
236   IntType value_{0};
237   GuardType guard_{0};
238   int exponent_{0};
239 };
240 
241 // The standard says that these overflow cases round to "representable"
242 // numbers, and some popular compilers interpret that to mean +/-HUGE()
243 // rather than +/-Inf.
244 static inline RT_API_ATTRS constexpr bool RoundOverflowToHuge(
245     enum FortranRounding rounding, bool isNegative) {
246   return rounding == RoundToZero || (!isNegative && rounding == RoundDown) ||
247       (isNegative && rounding == RoundUp);
248 }
249 
250 template <int PREC>
251 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
252     bool isNegative, FortranRounding rounding) const {
253   using Binary = BinaryFloatingPointNumber<PREC>;
254   // Create a fraction with a binary point to the left of the integer
255   // value_, and bias the exponent.
256   IntType fraction{value_};
257   GuardType guard{guard_};
258   int expo{exponent_ + Binary::exponentBias + (precision - 1)};
259   while (expo < 1 && (fraction > 0 || guard > oneHalf)) {
260     guard = (guard & 1) | (guard >> 1) |
261         ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1));
262     fraction >>= 1;
263     ++expo;
264   }
265   int flags{Exact};
266   if (guard != 0) {
267     flags |= Inexact;
268   }
269   if (fraction == 0) {
270     if (guard <= oneHalf) {
271       if ((!isNegative && rounding == RoundUp) ||
272           (isNegative && rounding == RoundDown)) {
273         // round to least nonzero value
274         expo = 0;
275       } else { // round to zero
276         if (guard != 0) {
277           flags |= Underflow;
278         }
279         Binary zero;
280         if (isNegative) {
281           zero.Negate();
282         }
283         return {
284             std::move(zero), static_cast<enum ConversionResultFlags>(flags)};
285       }
286     }
287   } else {
288     // The value is nonzero; normalize it.
289     while (fraction < topBit && expo > 1) {
290       --expo;
291       fraction = fraction * 2 + (guard >> (guardBits - 2));
292       guard =
293           (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1);
294     }
295   }
296   // Apply rounding
297   bool incr{false};
298   switch (rounding) {
299   case RoundNearest:
300     incr = guard > oneHalf || (guard == oneHalf && (fraction & 1));
301     break;
302   case RoundUp:
303     incr = guard != 0 && !isNegative;
304     break;
305   case RoundDown:
306     incr = guard != 0 && isNegative;
307     break;
308   case RoundToZero:
309     break;
310   case RoundCompatible:
311     incr = guard >= oneHalf;
312     break;
313   }
314   if (incr) {
315     if (fraction == mask) {
316       // rounding causes a carry
317       ++expo;
318       fraction = topBit;
319     } else {
320       ++fraction;
321     }
322   }
323   if (expo == 1 && fraction < topBit) {
324     expo = 0; // subnormal
325     flags |= Underflow;
326   } else if (expo == 0) {
327     flags |= Underflow;
328   } else if (expo >= Binary::maxExponent) {
329     if (RoundOverflowToHuge(rounding, isNegative)) {
330       expo = Binary::maxExponent - 1;
331       fraction = mask;
332     } else { // Inf
333       expo = Binary::maxExponent;
334       flags |= Overflow;
335       if constexpr (Binary::bits == 80) { // x87
336         fraction = IntType{1} << 63;
337       } else {
338         fraction = 0;
339       }
340     }
341   }
342   using Raw = typename Binary::RawType;
343   Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1);
344   raw |= static_cast<Raw>(expo) << Binary::significandBits;
345   if constexpr (Binary::isImplicitMSB) {
346     fraction &= ~topBit;
347   }
348   raw |= fraction;
349   return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)};
350 }
351 
352 template <int PREC, int LOG10RADIX>
353 ConversionToBinaryResult<PREC>
354 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
355   // On entry, *this holds a multi-precision integer value in a radix of a
356   // large power of ten.  Its radix point is defined to be to the right of its
357   // digits, and "exponent_" is the power of ten by which it is to be scaled.
358   Normalize();
359   if (digits_ == 0) { // zero value
360     return {Real{SignBit()}};
361   }
362   // The value is not zero:  x = D. * 10.**E
363   // Shift our perspective on the radix (& decimal) point so that
364   // it sits to the *left* of the digits: i.e., x = .D * 10.**E
365   exponent_ += digits_ * log10Radix;
366   // Sanity checks for ridiculous exponents
367   static constexpr int crazy{2 * Real::decimalRange + log10Radix};
368   if (exponent_ < -crazy) {
369     enum ConversionResultFlags flags {
370       static_cast<enum ConversionResultFlags>(Inexact | Underflow)
371     };
372     if ((!isNegative_ && rounding_ == RoundUp) ||
373         (isNegative_ && rounding_ == RoundDown)) {
374       // return least nonzero value
375       return {Real{Raw{1} | SignBit()}, flags};
376     } else { // underflow to +/-0.
377       return {Real{SignBit()}, flags};
378     }
379   } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf
380     if (RoundOverflowToHuge(rounding_, isNegative_)) {
381       return {Real{HUGE()}};
382     } else {
383       return {Real{Infinity()}, Overflow};
384     }
385   }
386   // Apply any negative decimal exponent by multiplication
387   // by a power of two, adjusting the binary exponent to compensate.
388   IntermediateFloat<PREC> f;
389   while (exponent_ < log10Radix) {
390     // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
391     f.AdjustExponent(-9);
392     digitLimit_ = digits_;
393     if (int carry{MultiplyWithoutNormalization<512>()}) {
394       // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
395       PushCarry(carry);
396       exponent_ += log10Radix;
397     }
398   }
399   // Apply any positive decimal exponent greater than
400   // is needed to treat the topmost digit as an integer
401   // part by multiplying by 10 or 10000 repeatedly.
402   while (exponent_ > log10Radix) {
403     digitLimit_ = digits_;
404     int carry;
405     if (exponent_ >= log10Radix + 4) {
406       // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
407       exponent_ -= 4;
408       carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>();
409       f.AdjustExponent(4);
410     } else {
411       // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
412       --exponent_;
413       carry = MultiplyWithoutNormalization<5>();
414       f.AdjustExponent(1);
415     }
416     if (carry != 0) {
417       // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
418       PushCarry(carry);
419       exponent_ += log10Radix;
420     }
421   }
422   // So exponent_ is now log10Radix, meaning that the
423   // MSD can be taken as an integer part and transferred
424   // to the binary result.
425   // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
426   int guardShift{f.SetTo(digit_[--digits_])};
427   // Transfer additional bits until the result is normal.
428   digitLimit_ = digits_;
429   while (!f.IsFull()) {
430     // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
431     f.AdjustExponent(-1);
432     std::uint32_t carry = MultiplyWithoutNormalization<2>();
433     f.ShiftIn(carry);
434   }
435   // Get the next few bits for rounding.  Allow for some guard bits
436   // that may have already been set in f.SetTo() above.
437   int guard{0};
438   if (guardShift == 0) {
439     guard = MultiplyWithoutNormalization<4>();
440   } else if (guardShift == 1) {
441     guard = MultiplyWithoutNormalization<2>();
442   }
443   guard = guard + guard + !IsZero();
444   f.SetGuard(guard);
445   return f.ToBinary(isNegative_, rounding_);
446 }
447 
448 template <int PREC, int LOG10RADIX>
449 ConversionToBinaryResult<PREC>
450 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(
451     const char *&p, const char *limit) {
452   bool inexact{false};
453   if (ParseNumber(p, inexact, limit)) {
454     auto result{ConvertToBinary()};
455     if (inexact) {
456       result.flags =
457           static_cast<enum ConversionResultFlags>(result.flags | Inexact);
458     }
459     return result;
460   } else {
461     // Could not parse a decimal floating-point number.  p has been
462     // advanced over any leading spaces.  Most Fortran compilers set
463     // the sign bit for -NaN.
464     const char *q{p};
465     if (!limit || q < limit) {
466       isNegative_ = *q == '-';
467       if (isNegative_ || *q == '+') {
468         ++q;
469       }
470     }
471     if ((!limit || limit >= q + 3) && runtime::toupper(q[0]) == 'N' &&
472         runtime::toupper(q[1]) == 'A' && runtime::toupper(q[2]) == 'N') {
473       // NaN
474       p = q + 3;
475       bool isQuiet{true};
476       if ((!limit || p < limit) && *p == '(') {
477         int depth{1};
478         do {
479           ++p;
480           if (limit && p >= limit) {
481             // Invalid input
482             return {Real{NaN(false)}, Invalid};
483           } else if (*p == '(') {
484             ++depth;
485           } else if (*p == ')') {
486             --depth;
487           } else if (*p != ' ') {
488             // Implementation dependent, but other compilers
489             // all return quiet NaNs.
490           }
491         } while (depth > 0);
492         ++p;
493       }
494       return {Real{NaN(isQuiet)}};
495     } else { // Inf?
496       if ((!limit || limit >= q + 3) && runtime::toupper(q[0]) == 'I' &&
497           runtime::toupper(q[1]) == 'N' && runtime::toupper(q[2]) == 'F') {
498         if ((!limit || limit >= q + 8) && runtime::toupper(q[3]) == 'I' &&
499             runtime::toupper(q[4]) == 'N' && runtime::toupper(q[5]) == 'I' &&
500             runtime::toupper(q[6]) == 'T' && runtime::toupper(q[7]) == 'Y') {
501           p = q + 8;
502         } else {
503           p = q + 3;
504         }
505         return {Real{Infinity()}};
506       } else {
507         // Invalid input
508         return {Real{NaN()}, Invalid};
509       }
510     }
511   }
512 }
513 
514 template <int PREC>
515 ConversionToBinaryResult<PREC> ConvertToBinary(
516     const char *&p, enum FortranRounding rounding, const char *end) {
517   return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end);
518 }
519 
520 template ConversionToBinaryResult<8> ConvertToBinary<8>(
521     const char *&, enum FortranRounding, const char *end);
522 template ConversionToBinaryResult<11> ConvertToBinary<11>(
523     const char *&, enum FortranRounding, const char *end);
524 template ConversionToBinaryResult<24> ConvertToBinary<24>(
525     const char *&, enum FortranRounding, const char *end);
526 template ConversionToBinaryResult<53> ConvertToBinary<53>(
527     const char *&, enum FortranRounding, const char *end);
528 template ConversionToBinaryResult<64> ConvertToBinary<64>(
529     const char *&, enum FortranRounding, const char *end);
530 template ConversionToBinaryResult<113> ConvertToBinary<113>(
531     const char *&, enum FortranRounding, const char *end);
532 
533 extern "C" {
534 RT_EXT_API_GROUP_BEGIN
535 
536 enum ConversionResultFlags ConvertDecimalToFloat(
537     const char **p, float *f, enum FortranRounding rounding) {
538   auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)};
539   std::memcpy(reinterpret_cast<void *>(f),
540       reinterpret_cast<const void *>(&result.binary), sizeof *f);
541   return result.flags;
542 }
543 enum ConversionResultFlags ConvertDecimalToDouble(
544     const char **p, double *d, enum FortranRounding rounding) {
545   auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)};
546   std::memcpy(reinterpret_cast<void *>(d),
547       reinterpret_cast<const void *>(&result.binary), sizeof *d);
548   return result.flags;
549 }
550 enum ConversionResultFlags ConvertDecimalToLongDouble(
551     const char **p, long double *ld, enum FortranRounding rounding) {
552   auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)};
553   std::memcpy(reinterpret_cast<void *>(ld),
554       reinterpret_cast<const void *>(&result.binary), sizeof *ld);
555   return result.flags;
556 }
557 
558 RT_EXT_API_GROUP_END
559 } // extern "C"
560 } // namespace Fortran::decimal
561