1 //===-- lib/fp_lib.h - Floating-point utilities -------------------*- C -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is dual licensed under the MIT and the University of Illinois Open 6 // Source Licenses. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a configuration header for soft-float routines in compiler-rt. 11 // This file does not provide any part of the compiler-rt interface, but defines 12 // many useful constants and utility routines that are used in the 13 // implementation of the soft-float routines in compiler-rt. 14 // 15 // Assumes that float, double and long double correspond to the IEEE-754 16 // binary32, binary64 and binary 128 types, respectively, and that integer 17 // endianness matches floating point endianness on the target platform. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #ifndef FP_LIB_HEADER 22 #define FP_LIB_HEADER 23 24 #include <stdint.h> 25 #include <stdbool.h> 26 #include <limits.h> 27 #include "int_lib.h" 28 29 #if defined SINGLE_PRECISION 30 31 typedef uint32_t rep_t; 32 typedef int32_t srep_t; 33 typedef float fp_t; 34 #define REP_C UINT32_C 35 #define significandBits 23 36 37 static inline int rep_clz(rep_t a) { 38 return __builtin_clz(a); 39 } 40 41 // 32x32 --> 64 bit multiply 42 static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { 43 const uint64_t product = (uint64_t)a*b; 44 *hi = product >> 32; 45 *lo = product; 46 } 47 COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); 48 49 #elif defined DOUBLE_PRECISION 50 51 typedef uint64_t rep_t; 52 typedef int64_t srep_t; 53 typedef double fp_t; 54 #define REP_C UINT64_C 55 #define significandBits 52 56 57 static inline int rep_clz(rep_t a) { 58 #if defined __LP64__ 59 return __builtin_clzl(a); 60 #else 61 if (a & REP_C(0xffffffff00000000)) 62 return __builtin_clz(a >> 32); 63 else 64 return 32 + __builtin_clz(a & REP_C(0xffffffff)); 65 #endif 66 } 67 68 #define loWord(a) (a & 0xffffffffU) 69 #define hiWord(a) (a >> 32) 70 71 // 64x64 -> 128 wide multiply for platforms that don't have such an operation; 72 // many 64-bit platforms have this operation, but they tend to have hardware 73 // floating-point, so we don't bother with a special case for them here. 74 static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { 75 // Each of the component 32x32 -> 64 products 76 const uint64_t plolo = loWord(a) * loWord(b); 77 const uint64_t plohi = loWord(a) * hiWord(b); 78 const uint64_t philo = hiWord(a) * loWord(b); 79 const uint64_t phihi = hiWord(a) * hiWord(b); 80 // Sum terms that contribute to lo in a way that allows us to get the carry 81 const uint64_t r0 = loWord(plolo); 82 const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); 83 *lo = r0 + (r1 << 32); 84 // Sum terms contributing to hi with the carry from lo 85 *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; 86 } 87 #undef loWord 88 #undef hiWord 89 90 COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); 91 92 #elif defined QUAD_PRECISION 93 #if __LDBL_MANT_DIG__ == 113 94 #define CRT_LDBL_128BIT 95 typedef __uint128_t rep_t; 96 typedef __int128_t srep_t; 97 typedef long double fp_t; 98 #define REP_C (__uint128_t) 99 // Note: Since there is no explicit way to tell compiler the constant is a 100 // 128-bit integer, we let the constant be casted to 128-bit integer 101 #define significandBits 112 102 103 static inline int rep_clz(rep_t a) { 104 const union 105 { 106 __uint128_t ll; 107 #if _YUGA_BIG_ENDIAN 108 struct { uint64_t high, low; } s; 109 #else 110 struct { uint64_t low, high; } s; 111 #endif 112 } uu = { .ll = a }; 113 114 uint64_t word; 115 uint64_t add; 116 117 if (uu.s.high){ 118 word = uu.s.high; 119 add = 0; 120 } 121 else{ 122 word = uu.s.low; 123 add = 64; 124 } 125 return __builtin_clzll(word) + add; 126 } 127 128 #define Word_LoMask UINT64_C(0x00000000ffffffff) 129 #define Word_HiMask UINT64_C(0xffffffff00000000) 130 #define Word_FullMask UINT64_C(0xffffffffffffffff) 131 #define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask) 132 #define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask) 133 #define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask) 134 #define Word_4(a) (uint64_t)(a & Word_LoMask) 135 136 // 128x128 -> 256 wide multiply for platforms that don't have such an operation; 137 // many 64-bit platforms have this operation, but they tend to have hardware 138 // floating-point, so we don't bother with a special case for them here. 139 static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { 140 141 const uint64_t product11 = Word_1(a) * Word_1(b); 142 const uint64_t product12 = Word_1(a) * Word_2(b); 143 const uint64_t product13 = Word_1(a) * Word_3(b); 144 const uint64_t product14 = Word_1(a) * Word_4(b); 145 const uint64_t product21 = Word_2(a) * Word_1(b); 146 const uint64_t product22 = Word_2(a) * Word_2(b); 147 const uint64_t product23 = Word_2(a) * Word_3(b); 148 const uint64_t product24 = Word_2(a) * Word_4(b); 149 const uint64_t product31 = Word_3(a) * Word_1(b); 150 const uint64_t product32 = Word_3(a) * Word_2(b); 151 const uint64_t product33 = Word_3(a) * Word_3(b); 152 const uint64_t product34 = Word_3(a) * Word_4(b); 153 const uint64_t product41 = Word_4(a) * Word_1(b); 154 const uint64_t product42 = Word_4(a) * Word_2(b); 155 const uint64_t product43 = Word_4(a) * Word_3(b); 156 const uint64_t product44 = Word_4(a) * Word_4(b); 157 158 const __uint128_t sum0 = (__uint128_t)product44; 159 const __uint128_t sum1 = (__uint128_t)product34 + 160 (__uint128_t)product43; 161 const __uint128_t sum2 = (__uint128_t)product24 + 162 (__uint128_t)product33 + 163 (__uint128_t)product42; 164 const __uint128_t sum3 = (__uint128_t)product14 + 165 (__uint128_t)product23 + 166 (__uint128_t)product32 + 167 (__uint128_t)product41; 168 const __uint128_t sum4 = (__uint128_t)product13 + 169 (__uint128_t)product22 + 170 (__uint128_t)product31; 171 const __uint128_t sum5 = (__uint128_t)product12 + 172 (__uint128_t)product21; 173 const __uint128_t sum6 = (__uint128_t)product11; 174 175 const __uint128_t r0 = (sum0 & Word_FullMask) + 176 ((sum1 & Word_LoMask) << 32); 177 const __uint128_t r1 = (sum0 >> 64) + 178 ((sum1 >> 32) & Word_FullMask) + 179 (sum2 & Word_FullMask) + 180 ((sum3 << 32) & Word_HiMask); 181 182 *lo = r0 + (r1 << 64); 183 *hi = (r1 >> 64) + 184 (sum1 >> 96) + 185 (sum2 >> 64) + 186 (sum3 >> 32) + 187 sum4 + 188 (sum5 << 32) + 189 (sum6 << 64); 190 } 191 #undef Word_1 192 #undef Word_2 193 #undef Word_3 194 #undef Word_4 195 #undef Word_HiMask 196 #undef Word_LoMask 197 #undef Word_FullMask 198 #endif // __LDBL_MANT_DIG__ == 113 199 #else 200 #error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. 201 #endif 202 203 #if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || defined(CRT_LDBL_128BIT) 204 #define typeWidth (sizeof(rep_t)*CHAR_BIT) 205 #define exponentBits (typeWidth - significandBits - 1) 206 #define maxExponent ((1 << exponentBits) - 1) 207 #define exponentBias (maxExponent >> 1) 208 209 #define implicitBit (REP_C(1) << significandBits) 210 #define significandMask (implicitBit - 1U) 211 #define signBit (REP_C(1) << (significandBits + exponentBits)) 212 #define absMask (signBit - 1U) 213 #define exponentMask (absMask ^ significandMask) 214 #define oneRep ((rep_t)exponentBias << significandBits) 215 #define infRep exponentMask 216 #define quietBit (implicitBit >> 1) 217 #define qnanRep (exponentMask | quietBit) 218 219 static inline rep_t toRep(fp_t x) { 220 const union { fp_t f; rep_t i; } rep = {.f = x}; 221 return rep.i; 222 } 223 224 static inline fp_t fromRep(rep_t x) { 225 const union { fp_t f; rep_t i; } rep = {.i = x}; 226 return rep.f; 227 } 228 229 static inline int normalize(rep_t *significand) { 230 const int shift = rep_clz(*significand) - rep_clz(implicitBit); 231 *significand <<= shift; 232 return 1 - shift; 233 } 234 235 static inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) { 236 *hi = *hi << count | *lo >> (typeWidth - count); 237 *lo = *lo << count; 238 } 239 240 static inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) { 241 if (count < typeWidth) { 242 const bool sticky = *lo << (typeWidth - count); 243 *lo = *hi << (typeWidth - count) | *lo >> count | sticky; 244 *hi = *hi >> count; 245 } 246 else if (count < 2*typeWidth) { 247 const bool sticky = *hi << (2*typeWidth - count) | *lo; 248 *lo = *hi >> (count - typeWidth) | sticky; 249 *hi = 0; 250 } else { 251 const bool sticky = *hi | *lo; 252 *lo = sticky; 253 *hi = 0; 254 } 255 } 256 #endif 257 258 #endif // FP_LIB_HEADER 259