xref: /llvm-project/flang/lib/Decimal/decimal-to-binary.cpp (revision f45723cded56b1e66b572fba0b71d117db6caa2e)
1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "big-radix-floating-point.h"
10 #include "flang/Common/bit-population-count.h"
11 #include "flang/Common/leading-zero-bit-count.h"
12 #include "flang/Decimal/binary-floating-point.h"
13 #include "flang/Decimal/decimal.h"
14 #include <cinttypes>
15 #include <cstring>
16 #include <ctype.h>
17 
18 namespace Fortran::decimal {
19 
20 template <int PREC, int LOG10RADIX>
21 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
22     const char *&p, bool &inexact, const char *end) {
23   SetToZero();
24   if (end && p >= end) {
25     return false;
26   }
27   // Skip leading spaces
28   for (; p != end && *p == ' '; ++p) {
29   }
30   if (p == end) {
31     return false;
32   }
33   const char *q{p};
34   isNegative_ = *q == '-';
35   if (*q == '-' || *q == '+') {
36     ++q;
37   }
38   const char *start{q};
39   for (; q != end && *q == '0'; ++q) {
40   }
41   const char *firstDigit{q};
42   for (; q != end && *q >= '0' && *q <= '9'; ++q) {
43   }
44   const char *point{nullptr};
45   if (q != end && *q == '.') {
46     point = q;
47     for (++q; q != end && *q >= '0' && *q <= '9'; ++q) {
48     }
49   }
50   if (q == start || (q == start + 1 && start == point)) {
51     return false; // require at least one digit
52   }
53   // There's a valid number here; set the reference argument to point to
54   // the first character afterward, which might be an exponent part.
55   p = q;
56   // Strip off trailing zeroes
57   if (point) {
58     while (q[-1] == '0') {
59       --q;
60     }
61     if (q[-1] == '.') {
62       point = nullptr;
63       --q;
64     }
65   }
66   if (!point) {
67     while (q > firstDigit && q[-1] == '0') {
68       --q;
69       ++exponent_;
70     }
71   }
72   // Trim any excess digits
73   const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)};
74   if (q > limit) {
75     inexact = true;
76     if (point >= limit) {
77       q = point;
78       point = nullptr;
79     }
80     if (!point) {
81       exponent_ += q - limit;
82     }
83     q = limit;
84   }
85   if (point) {
86     exponent_ -= static_cast<int>(q - point - 1);
87   }
88   if (q == firstDigit) {
89     exponent_ = 0; // all zeros
90   }
91   // Rack the decimal digits up into big Digits.
92   for (auto times{radix}; q-- > firstDigit;) {
93     if (*q != '.') {
94       if (times == radix) {
95         digit_[digits_++] = *q - '0';
96         times = 10;
97       } else {
98         digit_[digits_ - 1] += times * (*q - '0');
99         times *= 10;
100       }
101     }
102   }
103   // Look for an optional exponent field.
104   if (p == end) {
105     return true;
106   }
107   q = p;
108   switch (*q) {
109   case 'e':
110   case 'E':
111   case 'd':
112   case 'D':
113   case 'q':
114   case 'Q': {
115     if (++q == end) {
116       break;
117     }
118     bool negExpo{*q == '-'};
119     if (*q == '-' || *q == '+') {
120       ++q;
121     }
122     if (q != end && *q >= '0' && *q <= '9') {
123       int expo{0};
124       for (; q != end && *q == '0'; ++q) {
125       }
126       const char *expDig{q};
127       for (; q != end && *q >= '0' && *q <= '9'; ++q) {
128         expo = 10 * expo + *q - '0';
129       }
130       if (q >= expDig + 8) {
131         // There's a ridiculous number of nonzero exponent digits.
132         // The decimal->binary conversion routine will cope with
133         // returning 0 or Inf, but we must ensure that "expo" didn't
134         // overflow back around to something legal.
135         expo = 10 * Real::decimalRange;
136         exponent_ = 0;
137       }
138       p = q; // exponent is valid; advance the termination pointer
139       if (negExpo) {
140         exponent_ -= expo;
141       } else {
142         exponent_ += expo;
143       }
144     }
145   } break;
146   default:
147     break;
148   }
149   return true;
150 }
151 
152 template <int PREC, int LOG10RADIX>
153 void BigRadixFloatingPointNumber<PREC,
154     LOG10RADIX>::LoseLeastSignificantDigit() {
155   Digit LSD{digit_[0]};
156   for (int j{0}; j < digits_ - 1; ++j) {
157     digit_[j] = digit_[j + 1];
158   }
159   digit_[digits_ - 1] = 0;
160   bool incr{false};
161   switch (rounding_) {
162   case RoundNearest:
163     incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
164     break;
165   case RoundUp:
166     incr = LSD > 0 && !isNegative_;
167     break;
168   case RoundDown:
169     incr = LSD > 0 && isNegative_;
170     break;
171   case RoundToZero:
172     break;
173   case RoundCompatible:
174     incr = LSD >= radix / 2;
175     break;
176   }
177   for (int j{0}; (digit_[j] += incr) == radix; ++j) {
178     digit_[j] = 0;
179   }
180 }
181 
182 // This local utility class represents an unrounded nonnegative
183 // binary floating-point value with an unbiased (i.e., signed)
184 // binary exponent, an integer value (not a fraction) with an implied
185 // binary point to its *right*, and some guard bits for rounding.
186 template <int PREC> class IntermediateFloat {
187 public:
188   static constexpr int precision{PREC};
189   using IntType = common::HostUnsignedIntType<precision>;
190   static constexpr IntType topBit{IntType{1} << (precision - 1)};
191   static constexpr IntType mask{topBit + (topBit - 1)};
192 
193   IntermediateFloat() {}
194   IntermediateFloat(const IntermediateFloat &) = default;
195 
196   // Assumes that exponent_ is valid on entry, and may increment it.
197   // Returns the number of guard_ bits that have been determined.
198   template <typename UINT> bool SetTo(UINT n) {
199     static constexpr int nBits{CHAR_BIT * sizeof n};
200     if constexpr (precision >= nBits) {
201       value_ = n;
202       guard_ = 0;
203       return 0;
204     } else {
205       int shift{common::BitsNeededFor(n) - precision};
206       if (shift <= 0) {
207         value_ = n;
208         guard_ = 0;
209         return 0;
210       } else {
211         value_ = n >> shift;
212         exponent_ += shift;
213         n <<= nBits - shift;
214         guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0);
215         return shift;
216       }
217     }
218   }
219 
220   void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; }
221   bool IsFull() const { return value_ >= topBit; }
222   void AdjustExponent(int by) { exponent_ += by; }
223   void SetGuard(int g) {
224     guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1);
225   }
226 
227   ConversionToBinaryResult<PREC> ToBinary(
228       bool isNegative, FortranRounding) const;
229 
230 private:
231   static constexpr int guardBits{3}; // guard, round, sticky
232   using GuardType = int;
233   static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)};
234 
235   IntType value_{0};
236   GuardType guard_{0};
237   int exponent_{0};
238 };
239 
240 // The standard says that these overflow cases round to "representable"
241 // numbers, and some popular compilers interpret that to mean +/-HUGE()
242 // rather than +/-Inf.
243 static inline constexpr bool RoundOverflowToHuge(
244     enum FortranRounding rounding, bool isNegative) {
245   return rounding == RoundToZero || (!isNegative && rounding == RoundDown) ||
246       (isNegative && rounding == RoundUp);
247 }
248 
249 template <int PREC>
250 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
251     bool isNegative, FortranRounding rounding) const {
252   using Binary = BinaryFloatingPointNumber<PREC>;
253   // Create a fraction with a binary point to the left of the integer
254   // value_, and bias the exponent.
255   IntType fraction{value_};
256   GuardType guard{guard_};
257   int expo{exponent_ + Binary::exponentBias + (precision - 1)};
258   while (expo < 1 && (fraction > 0 || guard > oneHalf)) {
259     guard = (guard & 1) | (guard >> 1) |
260         ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1));
261     fraction >>= 1;
262     ++expo;
263   }
264   int flags{Exact};
265   if (guard != 0) {
266     flags |= Inexact;
267   }
268   if (fraction == 0) {
269     if (guard <= oneHalf) {
270       if ((!isNegative && rounding == RoundUp) ||
271           (isNegative && rounding == RoundDown)) {
272         // round to least nonzero value
273         expo = 0;
274       } else { // round to zero
275         if (guard != 0) {
276           flags |= Underflow;
277         }
278         return {Binary{}, static_cast<enum ConversionResultFlags>(flags)};
279       }
280     }
281   } else {
282     // The value is nonzero; normalize it.
283     while (fraction < topBit && expo > 1) {
284       --expo;
285       fraction = fraction * 2 + (guard >> (guardBits - 2));
286       guard =
287           (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1);
288     }
289   }
290   // Apply rounding
291   bool incr{false};
292   switch (rounding) {
293   case RoundNearest:
294     incr = guard > oneHalf || (guard == oneHalf && (fraction & 1));
295     break;
296   case RoundUp:
297     incr = guard != 0 && !isNegative;
298     break;
299   case RoundDown:
300     incr = guard != 0 && isNegative;
301     break;
302   case RoundToZero:
303     break;
304   case RoundCompatible:
305     incr = guard >= oneHalf;
306     break;
307   }
308   if (incr) {
309     if (fraction == mask) {
310       // rounding causes a carry
311       ++expo;
312       fraction = topBit;
313     } else {
314       ++fraction;
315     }
316   }
317   if (expo == 1 && fraction < topBit) {
318     expo = 0; // subnormal
319     flags |= Underflow;
320   } else if (expo == 0) {
321     flags |= Underflow;
322   } else if (expo >= Binary::maxExponent) {
323     if (RoundOverflowToHuge(rounding, isNegative)) {
324       expo = Binary::maxExponent - 1;
325       fraction = mask;
326     } else { // Inf
327       expo = Binary::maxExponent;
328       flags |= Overflow;
329       if constexpr (Binary::bits == 80) { // x87
330         fraction = IntType{1} << 63;
331       } else {
332         fraction = 0;
333       }
334     }
335   }
336   using Raw = typename Binary::RawType;
337   Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1);
338   raw |= static_cast<Raw>(expo) << Binary::significandBits;
339   if constexpr (Binary::isImplicitMSB) {
340     fraction &= ~topBit;
341   }
342   raw |= fraction;
343   return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)};
344 }
345 
346 template <int PREC, int LOG10RADIX>
347 ConversionToBinaryResult<PREC>
348 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
349   // On entry, *this holds a multi-precision integer value in a radix of a
350   // large power of ten.  Its radix point is defined to be to the right of its
351   // digits, and "exponent_" is the power of ten by which it is to be scaled.
352   Normalize();
353   if (digits_ == 0) { // zero value
354     return {Real{SignBit()}};
355   }
356   // The value is not zero:  x = D. * 10.**E
357   // Shift our perspective on the radix (& decimal) point so that
358   // it sits to the *left* of the digits: i.e., x = .D * 10.**E
359   exponent_ += digits_ * log10Radix;
360   // Sanity checks for ridiculous exponents
361   static constexpr int crazy{2 * Real::decimalRange + log10Radix};
362   if (exponent_ < -crazy) {
363     enum ConversionResultFlags flags {
364       static_cast<enum ConversionResultFlags>(Inexact | Underflow)
365     };
366     if ((!isNegative_ && rounding_ == RoundUp) ||
367         (isNegative_ && rounding_ == RoundDown)) {
368       // return least nonzero value
369       return {Real{Raw{1} | SignBit()}, flags};
370     } else { // underflow to +/-0.
371       return {Real{SignBit()}, flags};
372     }
373   } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf
374     if (RoundOverflowToHuge(rounding_, isNegative_)) {
375       return {Real{HUGE()}};
376     } else {
377       return {Real{Infinity()}, Overflow};
378     }
379   }
380   // Apply any negative decimal exponent by multiplication
381   // by a power of two, adjusting the binary exponent to compensate.
382   IntermediateFloat<PREC> f;
383   while (exponent_ < log10Radix) {
384     // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
385     f.AdjustExponent(-9);
386     digitLimit_ = digits_;
387     if (int carry{MultiplyWithoutNormalization<512>()}) {
388       // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
389       PushCarry(carry);
390       exponent_ += log10Radix;
391     }
392   }
393   // Apply any positive decimal exponent greater than
394   // is needed to treat the topmost digit as an integer
395   // part by multiplying by 10 or 10000 repeatedly.
396   while (exponent_ > log10Radix) {
397     digitLimit_ = digits_;
398     int carry;
399     if (exponent_ >= log10Radix + 4) {
400       // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
401       exponent_ -= 4;
402       carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>();
403       f.AdjustExponent(4);
404     } else {
405       // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
406       --exponent_;
407       carry = MultiplyWithoutNormalization<5>();
408       f.AdjustExponent(1);
409     }
410     if (carry != 0) {
411       // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
412       PushCarry(carry);
413       exponent_ += log10Radix;
414     }
415   }
416   // So exponent_ is now log10Radix, meaning that the
417   // MSD can be taken as an integer part and transferred
418   // to the binary result.
419   // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
420   int guardShift{f.SetTo(digit_[--digits_])};
421   // Transfer additional bits until the result is normal.
422   digitLimit_ = digits_;
423   while (!f.IsFull()) {
424     // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
425     f.AdjustExponent(-1);
426     std::uint32_t carry = MultiplyWithoutNormalization<2>();
427     f.ShiftIn(carry);
428   }
429   // Get the next few bits for rounding.  Allow for some guard bits
430   // that may have already been set in f.SetTo() above.
431   int guard{0};
432   if (guardShift == 0) {
433     guard = MultiplyWithoutNormalization<4>();
434   } else if (guardShift == 1) {
435     guard = MultiplyWithoutNormalization<2>();
436   }
437   guard = guard + guard + !IsZero();
438   f.SetGuard(guard);
439   return f.ToBinary(isNegative_, rounding_);
440 }
441 
442 template <int PREC, int LOG10RADIX>
443 ConversionToBinaryResult<PREC>
444 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(
445     const char *&p, const char *limit) {
446   bool inexact{false};
447   if (ParseNumber(p, inexact, limit)) {
448     auto result{ConvertToBinary()};
449     if (inexact) {
450       result.flags =
451           static_cast<enum ConversionResultFlags>(result.flags | Inexact);
452     }
453     return result;
454   } else {
455     // Could not parse a decimal floating-point number.  p has been
456     // advanced over any leading spaces.  Most Fortran compilers set
457     // the sign bit for -NaN.
458     const char *q{p};
459     if (!limit || q < limit) {
460       isNegative_ = *q == '-';
461       if (isNegative_ || *q == '+') {
462         ++q;
463       }
464     }
465     if ((!limit || limit >= q + 3) && toupper(q[0]) == 'N' &&
466         toupper(q[1]) == 'A' && toupper(q[2]) == 'N') {
467       // NaN
468       p = q + 3;
469       bool isQuiet{true};
470       if ((!limit || p < limit) && *p == '(') {
471         int depth{1};
472         do {
473           ++p;
474           if (limit && p >= limit) {
475             // Invalid input
476             return {Real{NaN(false)}, Invalid};
477           } else if (*p == '(') {
478             ++depth;
479           } else if (*p == ')') {
480             --depth;
481           } else if (*p != ' ') {
482             // Implementation dependent, but other compilers
483             // all return quiet NaNs.
484           }
485         } while (depth > 0);
486         ++p;
487       }
488       return {Real{NaN(isQuiet)}};
489     } else { // Inf?
490       if ((!limit || limit >= q + 3) && toupper(q[0]) == 'I' &&
491           toupper(q[1]) == 'N' && toupper(q[2]) == 'F') {
492         if ((!limit || limit >= q + 8) && toupper(q[3]) == 'I' &&
493             toupper(q[4]) == 'N' && toupper(q[5]) == 'I' &&
494             toupper(q[6]) == 'T' && toupper(q[7]) == 'Y') {
495           p = q + 8;
496         } else {
497           p = q + 3;
498         }
499         return {Real{Infinity()}};
500       } else {
501         // Invalid input
502         return {Real{NaN()}, Invalid};
503       }
504     }
505   }
506 }
507 
508 template <int PREC>
509 ConversionToBinaryResult<PREC> ConvertToBinary(
510     const char *&p, enum FortranRounding rounding, const char *end) {
511   return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end);
512 }
513 
514 template ConversionToBinaryResult<8> ConvertToBinary<8>(
515     const char *&, enum FortranRounding, const char *end);
516 template ConversionToBinaryResult<11> ConvertToBinary<11>(
517     const char *&, enum FortranRounding, const char *end);
518 template ConversionToBinaryResult<24> ConvertToBinary<24>(
519     const char *&, enum FortranRounding, const char *end);
520 template ConversionToBinaryResult<53> ConvertToBinary<53>(
521     const char *&, enum FortranRounding, const char *end);
522 template ConversionToBinaryResult<64> ConvertToBinary<64>(
523     const char *&, enum FortranRounding, const char *end);
524 template ConversionToBinaryResult<113> ConvertToBinary<113>(
525     const char *&, enum FortranRounding, const char *end);
526 
527 extern "C" {
528 enum ConversionResultFlags ConvertDecimalToFloat(
529     const char **p, float *f, enum FortranRounding rounding) {
530   auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)};
531   std::memcpy(reinterpret_cast<void *>(f),
532       reinterpret_cast<const void *>(&result.binary), sizeof *f);
533   return result.flags;
534 }
535 enum ConversionResultFlags ConvertDecimalToDouble(
536     const char **p, double *d, enum FortranRounding rounding) {
537   auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)};
538   std::memcpy(reinterpret_cast<void *>(d),
539       reinterpret_cast<const void *>(&result.binary), sizeof *d);
540   return result.flags;
541 }
542 enum ConversionResultFlags ConvertDecimalToLongDouble(
543     const char **p, long double *ld, enum FortranRounding rounding) {
544   auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)};
545   std::memcpy(reinterpret_cast<void *>(ld),
546       reinterpret_cast<const void *>(&result.binary), sizeof *ld);
547   return result.flags;
548 }
549 }
550 } // namespace Fortran::decimal
551