1 //=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Set source and destination precision setting 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef FP_TRUNC_HEADER 14 #define FP_TRUNC_HEADER 15 16 #include "int_lib.h" 17 18 #if defined SRC_SINGLE 19 typedef float src_t; 20 typedef uint32_t src_rep_t; 21 #define SRC_REP_C UINT32_C 22 static const int srcBits = sizeof(src_t) * CHAR_BIT; 23 static const int srcSigFracBits = 23; 24 // -1 accounts for the sign bit. 25 // srcBits - srcSigFracBits - 1 26 static const int srcExpBits = 8; 27 28 #elif defined SRC_DOUBLE 29 typedef double src_t; 30 typedef uint64_t src_rep_t; 31 #define SRC_REP_C UINT64_C 32 static const int srcBits = sizeof(src_t) * CHAR_BIT; 33 static const int srcSigFracBits = 52; 34 // -1 accounts for the sign bit. 35 // srcBits - srcSigFracBits - 1 36 static const int srcExpBits = 11; 37 38 #elif defined SRC_80 39 typedef xf_float src_t; 40 typedef __uint128_t src_rep_t; 41 #define SRC_REP_C (__uint128_t) 42 // sign bit, exponent and significand occupy the lower 80 bits. 43 static const int srcBits = 80; 44 static const int srcSigFracBits = 63; 45 // -1 accounts for the sign bit. 46 // -1 accounts for the explicitly stored integer bit. 47 // srcBits - srcSigFracBits - 1 - 1 48 static const int srcExpBits = 15; 49 50 #elif defined SRC_QUAD 51 typedef tf_float src_t; 52 typedef __uint128_t src_rep_t; 53 #define SRC_REP_C (__uint128_t) 54 static const int srcBits = sizeof(src_t) * CHAR_BIT; 55 static const int srcSigFracBits = 112; 56 // -1 accounts for the sign bit. 57 // srcBits - srcSigFracBits - 1 58 static const int srcExpBits = 15; 59 60 #else 61 #error Source should be double precision or quad precision! 62 #endif // end source precision 63 64 #if defined DST_DOUBLE 65 typedef double dst_t; 66 typedef uint64_t dst_rep_t; 67 #define DST_REP_C UINT64_C 68 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 69 static const int dstSigFracBits = 52; 70 // -1 accounts for the sign bit. 71 // dstBits - dstSigFracBits - 1 72 static const int dstExpBits = 11; 73 74 #elif defined DST_80 75 typedef xf_float dst_t; 76 typedef __uint128_t dst_rep_t; 77 #define DST_REP_C (__uint128_t) 78 static const int dstBits = 80; 79 static const int dstSigFracBits = 63; 80 // -1 accounts for the sign bit. 81 // -1 accounts for the explicitly stored integer bit. 82 // dstBits - dstSigFracBits - 1 - 1 83 static const int dstExpBits = 15; 84 85 #elif defined DST_SINGLE 86 typedef float dst_t; 87 typedef uint32_t dst_rep_t; 88 #define DST_REP_C UINT32_C 89 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 90 static const int dstSigFracBits = 23; 91 // -1 accounts for the sign bit. 92 // dstBits - dstSigFracBits - 1 93 static const int dstExpBits = 8; 94 95 #elif defined DST_HALF 96 #ifdef COMPILER_RT_HAS_FLOAT16 97 typedef _Float16 dst_t; 98 #else 99 typedef uint16_t dst_t; 100 #endif 101 typedef uint16_t dst_rep_t; 102 #define DST_REP_C UINT16_C 103 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 104 static const int dstSigFracBits = 10; 105 // -1 accounts for the sign bit. 106 // dstBits - dstSigFracBits - 1 107 static const int dstExpBits = 5; 108 109 #elif defined DST_BFLOAT 110 typedef __bf16 dst_t; 111 typedef uint16_t dst_rep_t; 112 #define DST_REP_C UINT16_C 113 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 114 static const int dstSigFracBits = 7; 115 // -1 accounts for the sign bit. 116 // dstBits - dstSigFracBits - 1 117 static const int dstExpBits = 8; 118 119 #else 120 #error Destination should be single precision or double precision! 121 #endif // end destination precision 122 123 // TODO: These helper routines should be placed into fp_lib.h 124 // Currently they depend on macros/constants defined above. 125 126 static inline src_rep_t extract_sign_from_src(src_rep_t x) { 127 const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1); 128 return (x & srcSignMask) >> (srcBits - 1); 129 } 130 131 static inline src_rep_t extract_exp_from_src(src_rep_t x) { 132 const int srcSigBits = srcBits - 1 - srcExpBits; 133 const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits; 134 return (x & srcExpMask) >> srcSigBits; 135 } 136 137 static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) { 138 const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1; 139 return x & srcSigFracMask; 140 } 141 142 static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) { 143 dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac; 144 // Set the explicit integer bit in F80 if present. 145 if (dstBits == 80 && exp) { 146 result |= (DST_REP_C(1) << dstSigFracBits); 147 } 148 return result; 149 } 150 151 // End of specialization parameters. Two helper routines for conversion to and 152 // from the representation of floating-point data as integer values follow. 153 154 static inline src_rep_t srcToRep(src_t x) { 155 const union { 156 src_t f; 157 src_rep_t i; 158 } rep = {.f = x}; 159 return rep.i; 160 } 161 162 static inline dst_t dstFromRep(dst_rep_t x) { 163 const union { 164 dst_t f; 165 dst_rep_t i; 166 } rep = {.i = x}; 167 return rep.f; 168 } 169 170 #endif // FP_TRUNC_HEADER 171