xref: /llvm-project/flang/lib/Decimal/big-radix-floating-point.h (revision 64ab3302d5a130c00b66a6957b2e7f0c9b9c537d)
1 //===-- lib/Decimal/big-radix-floating-point.h ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
10 #define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
11 
12 // This is a helper class for use in floating-point conversions
13 // between binary decimal representations.  It holds a multiple-precision
14 // integer value using digits of a radix that is a large even power of ten
15 // (10,000,000,000,000,000 by default, 10**16).  These digits are accompanied
16 // by a signed exponent that denotes multiplication by a power of ten.
17 // The effective radix point is to the right of the digits (i.e., they do
18 // not represent a fraction).
19 //
20 // The operations supported by this class are limited to those required
21 // for conversions between binary and decimal representations; it is not
22 // a general-purpose facility.
23 
24 #include "flang/Common/bit-population-count.h"
25 #include "flang/Common/leading-zero-bit-count.h"
26 #include "flang/Common/uint128.h"
27 #include "flang/Common/unsigned-const-division.h"
28 #include "flang/Decimal/binary-floating-point.h"
29 #include "flang/Decimal/decimal.h"
30 #include <cinttypes>
31 #include <limits>
32 #include <type_traits>
33 
34 namespace Fortran::decimal {
35 
36 static constexpr std::uint64_t TenToThe(int power) {
37   return power <= 0 ? 1 : 10 * TenToThe(power - 1);
38 }
39 
40 // 10**(LOG10RADIX + 3) must be < 2**wordbits, and LOG10RADIX must be
41 // even, so that pairs of decimal digits do not straddle Digits.
42 // So LOG10RADIX must be 16 or 6.
43 template<int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
44 public:
45   using Real = BinaryFloatingPointNumber<PREC>;
46   static constexpr int log10Radix{LOG10RADIX};
47 
48 private:
49   static constexpr std::uint64_t uint64Radix{TenToThe(log10Radix)};
50   static constexpr int minDigitBits{
51       64 - common::LeadingZeroBitCount(uint64Radix)};
52   using Digit = common::HostUnsignedIntType<minDigitBits>;
53   static constexpr Digit radix{uint64Radix};
54   static_assert(radix < std::numeric_limits<Digit>::max() / 1000,
55       "radix is somehow too big");
56   static_assert(radix > std::numeric_limits<Digit>::max() / 10000,
57       "radix is somehow too small");
58 
59   // The base-2 logarithm of the least significant bit that can arise
60   // in a subnormal IEEE floating-point number.
61   static constexpr int minLog2AnyBit{
62       -Real::exponentBias - Real::binaryPrecision};
63 
64   // The number of Digits needed to represent the smallest subnormal.
65   static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix};
66 
67 public:
68   explicit BigRadixFloatingPointNumber(
69       enum FortranRounding rounding = RoundDefault)
70     : rounding_{rounding} {}
71 
72   // Converts a binary floating point value.
73   explicit BigRadixFloatingPointNumber(
74       Real, enum FortranRounding = RoundDefault);
75 
76   BigRadixFloatingPointNumber &SetToZero() {
77     isNegative_ = false;
78     digits_ = 0;
79     exponent_ = 0;
80     return *this;
81   }
82 
83   // Converts decimal floating-point to binary.
84   ConversionToBinaryResult<PREC> ConvertToBinary();
85 
86   // Parses and converts to binary.  Handles leading spaces,
87   // "NaN", & optionally-signed "Inf".  Does not skip internal
88   // spaces.
89   // The argument is a reference to a pointer that is left
90   // pointing to the first character that wasn't parsed.
91   ConversionToBinaryResult<PREC> ConvertToBinary(const char *&);
92 
93   // Formats a decimal floating-point number to a user buffer.
94   // May emit "NaN" or "Inf", or an possibly-signed integer.
95   // No decimal point is written, but if it were, it would be
96   // after the last digit; the effective decimal exponent is
97   // returned as part of the result structure so that it can be
98   // formatted by the client.
99   ConversionToDecimalResult ConvertToDecimal(
100       char *, std::size_t, enum DecimalConversionFlags, int digits) const;
101 
102   // Discard decimal digits not needed to distinguish this value
103   // from the decimal encodings of two others (viz., the nearest binary
104   // floating-point numbers immediately below and above this one).
105   // The last decimal digit may not be uniquely determined in all
106   // cases, and will be the mean value when that is so (e.g., if
107   // last decimal digit values 6-8 would all work, it'll be a 7).
108   // This minimization necessarily assumes that the value will be
109   // emitted and read back into the same (or less precise) format
110   // with default rounding to the nearest value.
111   void Minimize(
112       BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more);
113 
114 private:
115   BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that)
116     : digits_{that.digits_}, exponent_{that.exponent_},
117       isNegative_{that.isNegative_}, rounding_{that.rounding_} {
118     for (int j{0}; j < digits_; ++j) {
119       digit_[j] = that.digit_[j];
120     }
121   }
122 
123   bool IsZero() const {
124     // Don't assume normalization.
125     for (int j{0}; j < digits_; ++j) {
126       if (digit_[j] != 0) {
127         return false;
128       }
129     }
130     return true;
131   }
132 
133   // Predicate: true when 10*value would cause a carry.
134   // (When this happens during decimal-to-binary conversion,
135   // there are more digits in the input string than can be
136   // represented precisely.)
137   bool IsFull() const {
138     return digits_ == digitLimit_ && digit_[digits_ - 1] >= radix / 10;
139   }
140 
141   // Sets *this to an unsigned integer value.
142   // Returns any remainder.
143   template<typename UINT> UINT SetTo(UINT n) {
144     static_assert(
145         std::is_same_v<UINT, common::uint128_t> || std::is_unsigned_v<UINT>);
146     SetToZero();
147     while (n != 0) {
148       auto q{common::DivideUnsignedBy<UINT, 10>(n)};
149       if (n != q * 10) {
150         break;
151       }
152       ++exponent_;
153       n = q;
154     }
155     if constexpr (sizeof n < sizeof(Digit)) {
156       if (n != 0) {
157         digit_[digits_++] = n;
158       }
159       return 0;
160     } else {
161       while (n != 0 && digits_ < digitLimit_) {
162         auto q{common::DivideUnsignedBy<UINT, radix>(n)};
163         digit_[digits_++] = static_cast<Digit>(n - q * radix);
164         n = q;
165       }
166       return n;
167     }
168   }
169 
170   int RemoveLeastOrderZeroDigits() {
171     int remove{0};
172     if (digits_ > 0 && digit_[0] == 0) {
173       while (remove < digits_ && digit_[remove] == 0) {
174         ++remove;
175       }
176       if (remove >= digits_) {
177         digits_ = 0;
178       } else if (remove > 0) {
179         for (int j{0}; j + remove < digits_; ++j) {
180           digit_[j] = digit_[j + remove];
181         }
182         digits_ -= remove;
183       }
184     }
185     return remove;
186   }
187 
188   void RemoveLeadingZeroDigits() {
189     while (digits_ > 0 && digit_[digits_ - 1] == 0) {
190       --digits_;
191     }
192   }
193 
194   void Normalize() {
195     RemoveLeadingZeroDigits();
196     exponent_ += RemoveLeastOrderZeroDigits() * log10Radix;
197   }
198 
199   // This limited divisibility test only works for even divisors of the radix,
200   // which is fine since it's only ever used with 2 and 5.
201   template<int N> bool IsDivisibleBy() const {
202     static_assert(N > 1 && radix % N == 0, "bad modulus");
203     return digits_ == 0 || (digit_[0] % N) == 0;
204   }
205 
206   template<unsigned DIVISOR> int DivideBy() {
207     Digit remainder{0};
208     for (int j{digits_ - 1}; j >= 0; --j) {
209       Digit q{common::DivideUnsignedBy<Digit, DIVISOR>(digit_[j])};
210       Digit nrem{digit_[j] - DIVISOR * q};
211       digit_[j] = q + (radix / DIVISOR) * remainder;
212       remainder = nrem;
213     }
214     return remainder;
215   }
216 
217   int DivideByPowerOfTwo(int twoPow) {  // twoPow <= LOG10RADIX
218     int remainder{0};
219     for (int j{digits_ - 1}; j >= 0; --j) {
220       Digit q{digit_[j] >> twoPow};
221       int nrem = digit_[j] - (q << twoPow);
222       digit_[j] = q + (radix >> twoPow) * remainder;
223       remainder = nrem;
224     }
225     return remainder;
226   }
227 
228   int AddCarry(int position = 0, int carry = 1) {
229     for (; position < digits_; ++position) {
230       Digit v{digit_[position] + carry};
231       if (v < radix) {
232         digit_[position] = v;
233         return 0;
234       }
235       digit_[position] = v - radix;
236       carry = 1;
237     }
238     if (digits_ < digitLimit_) {
239       digit_[digits_++] = carry;
240       return 0;
241     }
242     Normalize();
243     if (digits_ < digitLimit_) {
244       digit_[digits_++] = carry;
245       return 0;
246     }
247     return carry;
248   }
249 
250   void Decrement() {
251     for (int j{0}; digit_[j]-- == 0; ++j) {
252       digit_[j] = radix - 1;
253     }
254   }
255 
256   template<int N> int MultiplyByHelper(int carry = 0) {
257     for (int j{0}; j < digits_; ++j) {
258       auto v{N * digit_[j] + carry};
259       carry = common::DivideUnsignedBy<Digit, radix>(v);
260       digit_[j] = v - carry * radix;  // i.e., v % radix
261     }
262     return carry;
263   }
264 
265   template<int N> int MultiplyBy(int carry = 0) {
266     if (int newCarry{MultiplyByHelper<N>(carry)}) {
267       return AddCarry(digits_, newCarry);
268     } else {
269       return 0;
270     }
271   }
272 
273   template<int N> int MultiplyWithoutNormalization() {
274     if (int carry{MultiplyByHelper<N>(0)}) {
275       if (digits_ < digitLimit_) {
276         digit_[digits_++] = carry;
277         return 0;
278       } else {
279         return carry;
280       }
281     } else {
282       return 0;
283     }
284   }
285 
286   template<int N> void MultiplyByRounded() {
287     if (int carry{MultiplyBy<N>()}) {
288       LoseLeastSignificantDigit();
289       digit_[digits_ - 1] += carry;
290       exponent_ += log10Radix;
291     }
292   }
293 
294   void LoseLeastSignificantDigit();  // with rounding
295 
296   void PushCarry(int carry) {
297     if (digits_ == maxDigits && RemoveLeastOrderZeroDigits() == 0) {
298       LoseLeastSignificantDigit();
299       digit_[digits_ - 1] += carry;
300     } else {
301       digit_[digits_++] = carry;
302     }
303   }
304 
305   // Adds another number and then divides by two.
306   // Assumes same exponent and sign.
307   // Returns true when the the result has effectively been rounded down.
308   bool Mean(const BigRadixFloatingPointNumber &);
309 
310   bool ParseNumber(const char *&, bool &inexact);
311 
312   using Raw = typename Real::RawType;
313   constexpr Raw SignBit() const { return Raw{isNegative_} << (Real::bits - 1); }
314   constexpr Raw Infinity() const {
315     return (Raw{Real::maxExponent} << Real::significandBits) | SignBit();
316   }
317   static constexpr Raw NaN() {
318     return (Raw{Real::maxExponent} << Real::significandBits) |
319         (Raw{1} << (Real::significandBits - 2));
320   }
321 
322   Digit digit_[maxDigits];  // in little-endian order: digit_[0] is LSD
323   int digits_{0};  // # of elements in digit_[] array; zero when zero
324   int digitLimit_{maxDigits};  // precision clamp
325   int exponent_{0};  // signed power of ten
326   bool isNegative_{false};
327   enum FortranRounding rounding_ { RoundDefault };
328 };
329 }
330 #endif
331