xref: /llvm-project/flang/include/flang/Decimal/binary-floating-point.h (revision 1444e5acfb75630c23b118c39454a05cf3792d35)
1 //===-- include/flang/Decimal/binary-floating-point.h -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_
10 #define FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_
11 
12 // Access and manipulate the fields of an IEEE-754 binary
13 // floating-point value via a generalized template.
14 
15 #include "flang/Common/api-attrs.h"
16 #include "flang/Common/real.h"
17 #include "flang/Common/uint128.h"
18 #include <cinttypes>
19 #include <climits>
20 #include <cstring>
21 #include <type_traits>
22 
23 namespace Fortran::decimal {
24 
25 enum FortranRounding {
26   RoundNearest, /* RN and RP */
27   RoundUp, /* RU */
28   RoundDown, /* RD */
29   RoundToZero, /* RZ - no rounding */
30   RoundCompatible, /* RC: like RN, but ties go away from 0 */
31 };
32 
33 template <int BINARY_PRECISION> class BinaryFloatingPointNumber {
34 public:
35   static constexpr common::RealCharacteristics realChars{BINARY_PRECISION};
36   static constexpr int binaryPrecision{BINARY_PRECISION};
37   static constexpr int bits{realChars.bits};
38   static constexpr int isImplicitMSB{realChars.isImplicitMSB};
39   static constexpr int significandBits{realChars.significandBits};
40   static constexpr int exponentBits{realChars.exponentBits};
41   static constexpr int exponentBias{realChars.exponentBias};
42   static constexpr int maxExponent{realChars.maxExponent};
43   static constexpr int decimalPrecision{realChars.decimalPrecision};
44   static constexpr int decimalRange{realChars.decimalRange};
45   static constexpr int maxDecimalConversionDigits{
46       realChars.maxDecimalConversionDigits};
47 
48   using RawType = common::HostUnsignedIntType<bits>;
49   static_assert(CHAR_BIT * sizeof(RawType) >= bits);
50   RT_OFFLOAD_VAR_GROUP_BEGIN
51   static constexpr RawType significandMask{(RawType{1} << significandBits) - 1};
52 
BinaryFloatingPointNumber()53   constexpr RT_API_ATTRS BinaryFloatingPointNumber() {} // zero
54   RT_OFFLOAD_VAR_GROUP_END
55   constexpr BinaryFloatingPointNumber(
56       const BinaryFloatingPointNumber &that) = default;
57   constexpr BinaryFloatingPointNumber(
58       BinaryFloatingPointNumber &&that) = default;
59   constexpr BinaryFloatingPointNumber &operator=(
60       const BinaryFloatingPointNumber &that) = default;
61   constexpr BinaryFloatingPointNumber &operator=(
62       BinaryFloatingPointNumber &&that) = default;
BinaryFloatingPointNumber(RawType raw)63   constexpr explicit RT_API_ATTRS BinaryFloatingPointNumber(RawType raw)
64       : raw_{raw} {}
65 
raw()66   RT_API_ATTRS RawType raw() const { return raw_; }
67 
68   template <typename A>
BinaryFloatingPointNumber(A x)69   explicit constexpr RT_API_ATTRS BinaryFloatingPointNumber(A x) {
70     static_assert(sizeof raw_ <= sizeof x);
71     std::memcpy(reinterpret_cast<void *>(&raw_),
72         reinterpret_cast<const void *>(&x), sizeof raw_);
73   }
74 
BiasedExponent()75   constexpr RT_API_ATTRS int BiasedExponent() const {
76     return static_cast<int>(
77         (raw_ >> significandBits) & ((1 << exponentBits) - 1));
78   }
UnbiasedExponent()79   constexpr RT_API_ATTRS int UnbiasedExponent() const {
80     int biased{BiasedExponent()};
81     return biased - exponentBias + (biased == 0);
82   }
Significand()83   constexpr RT_API_ATTRS RawType Significand() const {
84     return raw_ & significandMask;
85   }
Fraction()86   constexpr RT_API_ATTRS RawType Fraction() const {
87     RawType sig{Significand()};
88     if (isImplicitMSB && BiasedExponent() > 0) {
89       sig |= RawType{1} << significandBits;
90     }
91     return sig;
92   }
93 
IsZero()94   constexpr RT_API_ATTRS bool IsZero() const {
95     return (raw_ & ((RawType{1} << (bits - 1)) - 1)) == 0;
96   }
IsNaN()97   constexpr RT_API_ATTRS bool IsNaN() const {
98     auto expo{BiasedExponent()};
99     auto sig{Significand()};
100     if constexpr (bits == 80) { // x87
101       if (expo == maxExponent) {
102         return sig != (significandMask >> 1) + 1;
103       } else {
104         return expo != 0 && !(sig & (RawType{1} << (significandBits - 1)));
105         ;
106       }
107     } else {
108       return expo == maxExponent && sig != 0;
109     }
110   }
IsInfinite()111   constexpr RT_API_ATTRS bool IsInfinite() const {
112     if constexpr (bits == 80) { // x87
113       return BiasedExponent() == maxExponent &&
114           Significand() == ((significandMask >> 1) + 1);
115     } else {
116       return BiasedExponent() == maxExponent && Significand() == 0;
117     }
118   }
IsMaximalFiniteMagnitude()119   constexpr RT_API_ATTRS bool IsMaximalFiniteMagnitude() const {
120     return BiasedExponent() == maxExponent - 1 &&
121         Significand() == significandMask;
122   }
IsNegative()123   constexpr RT_API_ATTRS bool IsNegative() const {
124     return ((raw_ >> (bits - 1)) & 1) != 0;
125   }
126 
Negate()127   constexpr RT_API_ATTRS void Negate() { raw_ ^= RawType{1} << (bits - 1); }
128 
129   // For calculating the nearest neighbors of a floating-point value
Previous()130   constexpr RT_API_ATTRS void Previous() {
131     RemoveExplicitMSB();
132     --raw_;
133     InsertExplicitMSB();
134   }
Next()135   constexpr RT_API_ATTRS void Next() {
136     RemoveExplicitMSB();
137     ++raw_;
138     InsertExplicitMSB();
139   }
140 
Infinity(bool isNegative)141   static constexpr RT_API_ATTRS BinaryFloatingPointNumber Infinity(
142       bool isNegative) {
143     RawType result{RawType{maxExponent} << significandBits};
144     if (isNegative) {
145       result |= RawType{1} << (bits - 1);
146     }
147     return BinaryFloatingPointNumber{result};
148   }
149 
150   // Returns true when the result is exact
RoundToBits(int keepBits,enum FortranRounding mode)151   constexpr RT_API_ATTRS bool RoundToBits(
152       int keepBits, enum FortranRounding mode) {
153     if (IsNaN() || IsInfinite() || keepBits >= binaryPrecision) {
154       return true;
155     }
156     int lostBits{keepBits < binaryPrecision ? binaryPrecision - keepBits : 0};
157     RawType lostMask{static_cast<RawType>((RawType{1} << lostBits) - 1)};
158     if (RawType lost{static_cast<RawType>(raw_ & lostMask)}; lost != 0) {
159       bool increase{false};
160       switch (mode) {
161       case RoundNearest:
162         if (lost >> (lostBits - 1) != 0) { // >= tie
163           if ((lost & (lostMask >> 1)) != 0) {
164             increase = true; // > tie
165           } else {
166             increase = ((raw_ >> lostBits) & 1) != 0; // tie to even
167           }
168         }
169         break;
170       case RoundUp:
171         increase = !IsNegative();
172         break;
173       case RoundDown:
174         increase = IsNegative();
175         break;
176       case RoundToZero:
177         break;
178       case RoundCompatible:
179         increase = lost >> (lostBits - 1) != 0; // >= tie
180         break;
181       }
182       if (increase) {
183         raw_ |= lostMask;
184         Next();
185       }
186       return false; // inexact
187     } else {
188       return true; // exact
189     }
190   }
191 
192 private:
RemoveExplicitMSB()193   constexpr RT_API_ATTRS void RemoveExplicitMSB() {
194     if constexpr (!isImplicitMSB) {
195       raw_ = (raw_ & (significandMask >> 1)) | ((raw_ & ~significandMask) >> 1);
196     }
197   }
InsertExplicitMSB()198   constexpr RT_API_ATTRS void InsertExplicitMSB() {
199     if constexpr (!isImplicitMSB) {
200       constexpr RawType mask{significandMask >> 1};
201       raw_ = (raw_ & mask) | ((raw_ & ~mask) << 1);
202       if (BiasedExponent() > 0) {
203         raw_ |= RawType{1} << (significandBits - 1);
204       }
205     }
206   }
207 
208   RawType raw_{0};
209 };
210 } // namespace Fortran::decimal
211 #endif
212