xref: /llvm-project/flang/include/flang/Common/real.h (revision 1444e5acfb75630c23b118c39454a05cf3792d35)
1 //===-- include/flang/Common/real.h -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_COMMON_REAL_H_
10 #define FORTRAN_COMMON_REAL_H_
11 
12 // Characteristics of IEEE-754 & related binary floating-point numbers.
13 // The various representations are distinguished by their binary precisions
14 // (number of explicit significand bits and any implicit MSB in the fraction).
15 
16 #include "flang/Common/api-attrs.h"
17 #include <cinttypes>
18 
19 namespace Fortran::common {
20 
21 // Total representation size in bits for each type
BitsForBinaryPrecision(int binaryPrecision)22 static constexpr int BitsForBinaryPrecision(int binaryPrecision) {
23   switch (binaryPrecision) {
24   case 8: // IEEE single (truncated): 1+8+7 with implicit bit
25     return 16;
26   case 11: // IEEE half precision: 1+5+10 with implicit bit
27     return 16;
28   case 24: // IEEE single precision: 1+8+23 with implicit bit
29     return 32;
30   case 53: // IEEE double precision: 1+11+52 with implicit bit
31     return 64;
32   case 64: // x87 extended precision: 1+15+64, no implicit bit
33     return 80;
34   case 106: // "double-double": 2*(1+11+52 with implicit bit)
35     return 128;
36   case 113: // IEEE quad precision: 1+15+112 with implicit bit
37     return 128;
38   default:
39     return -1;
40   }
41 }
42 
43 // Maximum number of significant decimal digits in the fraction of an
44 // exact conversion in each type; computed by converting the value
45 // with the minimum exponent (biased to 1) and all fractional bits set.
MaxDecimalConversionDigits(int binaryPrecision)46 static constexpr int MaxDecimalConversionDigits(int binaryPrecision) {
47   switch (binaryPrecision) {
48   case 8: // IEEE single (truncated): 1+8+7 with implicit bit
49     return 96;
50   case 11: // IEEE half precision: 1+5+10 with implicit bit
51     return 21;
52   case 24: // IEEE single precision: 1+8+23 with implicit bit
53     return 112;
54   case 53: // IEEE double precision: 1+11+52 with implicit bit
55     return 767;
56   case 64: // x87 extended precision: 1+15+64, no implicit bit
57     return 11514;
58   case 106: // "double-double": 2*(1+11+52 with implicit bit)
59     return 2 * 767;
60   case 113: // IEEE quad precision: 1+15+112 with implicit bit
61     return 11563;
62   default:
63     return -1;
64   }
65 }
66 
MaxHexadecimalConversionDigits(int binaryPrecision)67 static constexpr int MaxHexadecimalConversionDigits(int binaryPrecision) {
68   return binaryPrecision >= 0 ? (binaryPrecision + 3) / 4 : binaryPrecision;
69 }
70 
RealKindForPrecision(int binaryPrecision)71 static constexpr int RealKindForPrecision(int binaryPrecision) {
72   switch (binaryPrecision) {
73   case 8: // IEEE single (truncated): 1+8+7 with implicit bit
74     return 3;
75   case 11: // IEEE half precision: 1+5+10 with implicit bit
76     return 2;
77   case 24: // IEEE single precision: 1+8+23 with implicit bit
78     return 4;
79   case 53: // IEEE double precision: 1+11+52 with implicit bit
80     return 8;
81   case 64: // x87 extended precision: 1+15+64, no implicit bit
82     return 10;
83   // TODO: case 106: return kind for double/double
84   case 113: // IEEE quad precision: 1+15+112 with implicit bit
85     return 16;
86   default:
87     return -1;
88   }
89 }
90 
PrecisionOfRealKind(int kind)91 static constexpr int PrecisionOfRealKind(int kind) {
92   switch (kind) {
93   case 2: // IEEE half precision: 1+5+10 with implicit bit
94     return 11;
95   case 3: // IEEE single (truncated): 1+8+7 with implicit bit
96     return 8;
97   case 4: // IEEE single precision: 1+8+23 with implicit bit
98     return 24;
99   case 8: // IEEE double precision: 1+11+52 with implicit bit
100     return 53;
101   case 10: // x87 extended precision: 1+15+64, no implicit bit
102     return 64;
103   // TODO: case kind for double/double: return 106;
104   case 16: // IEEE quad precision: 1+15+112 with implicit bit
105     return 113;
106   default:
107     return -1;
108   }
109 }
110 
111 // RealCharacteristics is constexpr, but also useful when constructed
112 // with a non-constant precision argument.
113 class RealCharacteristics {
114 public:
RealCharacteristics(int p)115   explicit constexpr RealCharacteristics(int p) : binaryPrecision{p} {}
116 
117   int binaryPrecision;
118   int bits{BitsForBinaryPrecision(binaryPrecision)};
119   bool isImplicitMSB{binaryPrecision != 64 /*x87*/};
120   int significandBits{binaryPrecision - isImplicitMSB};
121   int exponentBits{bits - significandBits - 1 /*sign*/};
122   int maxExponent{(1 << exponentBits) - 1};
123   int exponentBias{maxExponent / 2};
124   int decimalPrecision{LogBaseTwoToLogBaseTen(binaryPrecision - 1)};
125   int decimalRange{LogBaseTwoToLogBaseTen(exponentBias - 1)};
126   // Number of significant decimal digits in the fraction of the
127   // exact conversion of the least nonzero subnormal.
128   int maxDecimalConversionDigits{MaxDecimalConversionDigits(binaryPrecision)};
129   int maxHexadecimalConversionDigits{
130       MaxHexadecimalConversionDigits(binaryPrecision)};
131 
132 private:
133   // Converts bit widths to whole decimal digits
LogBaseTwoToLogBaseTen(int logb2)134   static constexpr int LogBaseTwoToLogBaseTen(int logb2) {
135     constexpr std::int64_t LogBaseTenOfTwoTimesTenToThe12th{301029995664};
136     constexpr std::int64_t TenToThe12th{1000000000000};
137     std::int64_t logb10{
138         (logb2 * LogBaseTenOfTwoTimesTenToThe12th) / TenToThe12th};
139     return static_cast<int>(logb10);
140   }
141 };
142 
143 } // namespace Fortran::common
144 #endif // FORTRAN_COMMON_REAL_H_
145