1 //===-- Single-precision general exp/log functions ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H 10 #define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H 11 12 #include "common_constants.h" 13 #include "src/__support/CPP/bit.h" 14 #include "src/__support/CPP/optional.h" 15 #include "src/__support/FPUtil/FEnvImpl.h" 16 #include "src/__support/FPUtil/FPBits.h" 17 #include "src/__support/FPUtil/PolyEval.h" 18 #include "src/__support/FPUtil/nearest_integer.h" 19 #include "src/__support/common.h" 20 #include "src/__support/macros/config.h" 21 #include "src/__support/macros/properties/cpu_features.h" 22 23 namespace LIBC_NAMESPACE_DECL { 24 25 struct ExpBase { 26 // Base = e 27 static constexpr int MID_BITS = 5; 28 static constexpr int MID_MASK = (1 << MID_BITS) - 1; 29 // log2(e) * 2^5 30 static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS); 31 // High and low parts of -log(2) * 2^(-5) 32 static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS); 33 static constexpr double M_LOGB_2_LO = 34 -0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS); 35 // Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya 36 // with: 37 // > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN)); 38 static constexpr int64_t EXP_2_MID[1 << MID_BITS] = { 39 0x3ff0000000000000, 0x3ff059b0d3158574, 0x3ff0b5586cf9890f, 40 0x3ff11301d0125b51, 0x3ff172b83c7d517b, 0x3ff1d4873168b9aa, 41 0x3ff2387a6e756238, 0x3ff29e9df51fdee1, 0x3ff306fe0a31b715, 42 0x3ff371a7373aa9cb, 0x3ff3dea64c123422, 0x3ff44e086061892d, 43 0x3ff4bfdad5362a27, 0x3ff5342b569d4f82, 0x3ff5ab07dd485429, 44 0x3ff6247eb03a5585, 0x3ff6a09e667f3bcd, 0x3ff71f75e8ec5f74, 45 0x3ff7a11473eb0187, 0x3ff82589994cce13, 0x3ff8ace5422aa0db, 46 0x3ff93737b0cdc5e5, 0x3ff9c49182a3f090, 0x3ffa5503b23e255d, 47 0x3ffae89f995ad3ad, 0x3ffb7f76f2fb5e47, 0x3ffc199bdd85529c, 48 0x3ffcb720dcef9069, 0x3ffd5818dcfba487, 0x3ffdfc97337b9b5f, 49 0x3ffea4afa2a490da, 0x3fff50765b6e4540, 50 }; 51 52 // Approximating e^dx with degree-5 minimax polynomial generated by Sollya: 53 // > Q = fpminimax(expm1(x)/x, 4, [|1, D...|], [-log(2)/64, log(2)/64]); 54 // Then: 55 // e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[3] * dx^5. 56 static constexpr double COEFFS[4] = { 57 0x1.ffffffffe5bc8p-2, 0x1.555555555cd67p-3, 0x1.5555c2a9b48b4p-5, 58 0x1.11112a0e34bdbp-7}; 59 60 LIBC_INLINE static double powb_lo(double dx) { 61 using fputil::multiply_add; 62 double dx2 = dx * dx; 63 double c0 = 1.0 + dx; 64 // c1 = COEFFS[0] + COEFFS[1] * dx 65 double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]); 66 // c2 = COEFFS[2] + COEFFS[3] * dx 67 double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]); 68 // r = c4 + c5 * dx^4 69 // = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7 70 return fputil::polyeval(dx2, c0, c1, c2); 71 } 72 }; 73 74 struct Exp10Base : public ExpBase { 75 // log2(10) * 2^5 76 static constexpr double LOG2_B = 0x1.a934f0979a371p1 * (1 << MID_BITS); 77 // High and low parts of -log10(2) * 2^(-5). 78 // Notice that since |x * log2(10)| < 150: 79 // |k| = |round(x * log2(10) * 2^5)| < 2^8 * 2^5 = 2^13 80 // So when the FMA instructions are not available, in order for the product 81 // k * M_LOGB_2_HI 82 // to be exact, we only store the high part of log10(2) up to 38 bits 83 // (= 53 - 15) of precision. 84 // It is generated by Sollya with: 85 // > round(log10(2), 44, RN); 86 static constexpr double M_LOGB_2_HI = -0x1.34413509f8p-2 / (1 << MID_BITS); 87 // > round(log10(2) - 0x1.34413509f8p-2, D, RN); 88 static constexpr double M_LOGB_2_LO = 0x1.80433b83b532ap-44 / (1 << MID_BITS); 89 90 // Approximating 10^dx with degree-5 minimax polynomial generated by Sollya: 91 // > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]); 92 // Then: 93 // 10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5. 94 static constexpr double COEFFS[5] = {0x1.26bb1bbb55515p1, 0x1.53524c73bd3eap1, 95 0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0, 96 0x1.1429e74a98f43p-1}; 97 98 static double powb_lo(double dx) { 99 using fputil::multiply_add; 100 double dx2 = dx * dx; 101 // c0 = 1 + COEFFS[0] * dx 102 double c0 = multiply_add(dx, Exp10Base::COEFFS[0], 1.0); 103 // c1 = COEFFS[1] + COEFFS[2] * dx 104 double c1 = multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]); 105 // c2 = COEFFS[3] + COEFFS[4] * dx 106 double c2 = multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]); 107 // r = c0 + dx^2 * (c1 + c2 * dx^2) 108 // = c0 + c1 * dx^2 + c2 * dx^4 109 // = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5. 110 return fputil::polyeval(dx2, c0, c1, c2); 111 } 112 }; 113 114 constexpr int LOG_P1_BITS = 6; 115 constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS; 116 117 // N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40] 118 extern const double LOG_P1_LOG2[LOG_P1_SIZE]; 119 120 // N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40] 121 extern const double LOG_P1_1_OVER[LOG_P1_SIZE]; 122 123 // Taylor series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers 124 // K_LOG2_ODD starts from x^3 125 extern const double K_LOG2_ODD[4]; 126 extern const double K_LOG2_EVEN[4]; 127 128 // Output of range reduction for exp_b: (2^(mid + hi), lo) 129 // where: 130 // b^x = 2^(mid + hi) * b^lo 131 struct exp_b_reduc_t { 132 double mh; // 2^(mid + hi) 133 double lo; 134 }; 135 136 // The function correctly calculates b^x value with at least float precision 137 // in a limited range. 138 // Range reduction: 139 // b^x = 2^(hi + mid) * b^lo 140 // where: 141 // x = (hi + mid) * log_b(2) + lo 142 // hi is an integer, 143 // 0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer 144 // -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1) 145 // Base class needs to provide the following constants: 146 // - MID_BITS : number of bits after decimal points used for mid 147 // - MID_MASK : 2^MID_BITS - 1, mask to extract mid bits 148 // - LOG2_B : log2(b) * 2^MID_BITS for scaling 149 // - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS) 150 // - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS) 151 // - EXP_2_MID : look up table for bit fields of 2^mid 152 // Return: 153 // { 2^(hi + mid), lo } 154 template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) { 155 double xd = static_cast<double>(x); 156 // kd = round((hi + mid) * log2(b) * 2^MID_BITS) 157 double kd = fputil::nearest_integer(Base::LOG2_B * xd); 158 // k = round((hi + mid) * log2(b) * 2^MID_BITS) 159 int k = static_cast<int>(kd); 160 // hi = floor(kd * 2^(-MID_BITS)) 161 // exp_hi = shift hi to the exponent field of double precision. 162 uint64_t exp_hi = static_cast<uint64_t>(k >> Base::MID_BITS) 163 << fputil::FPBits<double>::FRACTION_LEN; 164 // mh = 2^hi * 2^mid 165 // mh_bits = bit field of mh 166 uint64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi; 167 double mh = fputil::FPBits<double>(mh_bits).get_val(); 168 // dx = lo = x - (hi + mid) * log(2) 169 double dx = fputil::multiply_add( 170 kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd)); 171 return {mh, dx}; 172 } 173 174 // The function correctly calculates sinh(x) and cosh(x) by calculating exp(x) 175 // and exp(-x) simultaneously. 176 // To compute e^x, we perform the following range 177 // reduction: find hi, mid, lo such that: 178 // x = (hi + mid) * log(2) + lo, in which 179 // hi is an integer, 180 // 0 <= mid * 2^5 < 32 is an integer 181 // -2^(-6) <= lo * log2(e) <= 2^-6. 182 // In particular, 183 // hi + mid = round(x * log2(e) * 2^5) * 2^(-5). 184 // Then, 185 // e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo. 186 // 2^mid is stored in the lookup table of 32 elements. 187 // e^lo is computed using a degree-5 minimax polynomial 188 // generated by Sollya: 189 // e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5 190 // = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4) 191 // = P_even + lo * P_odd 192 // We perform 2^hi * 2^mid by simply add hi to the exponent field 193 // of 2^mid. 194 // To compute e^(-x), notice that: 195 // e^(-x) = 2^(-(hi + mid)) * e^(-lo) 196 // ~ 2^(-(hi + mid)) * P(-lo) 197 // = 2^(-(hi + mid)) * (P_even - lo * P_odd) 198 // So: 199 // sinh(x) = (e^x - e^(-x)) / 2 200 // ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) - 201 // 2^(-(hi + mid)) * (P_even - lo * P_odd)) 202 // = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) + 203 // lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) 204 // And similarly: 205 // cosh(x) = (e^x + e^(-x)) / 2 206 // ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) + 207 // lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) 208 // The main point of these formulas is that the expensive part of calculating 209 // the polynomials approximating lower parts of e^(x) and e^(-x) are shared 210 // and only done once. 211 template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) { 212 double xd = static_cast<double>(x); 213 214 // kd = round(x * log2(e) * 2^5) 215 // k_p = round(x * log2(e) * 2^5) 216 // k_m = round(-x * log2(e) * 2^5) 217 double kd; 218 int k_p, k_m; 219 220 #ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT 221 kd = fputil::nearest_integer(ExpBase::LOG2_B * xd); 222 k_p = static_cast<int>(kd); 223 k_m = -k_p; 224 #else 225 constexpr double HALF_WAY[2] = {0.5, -0.5}; 226 227 k_p = static_cast<int>( 228 fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f])); 229 k_m = -k_p; 230 kd = static_cast<double>(k_p); 231 #endif // LIBC_TARGET_CPU_HAS_NEAREST_INT 232 233 // hi = floor(kf * 2^(-5)) 234 // exp_hi = shift hi to the exponent field of double precision. 235 int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS)) 236 << fputil::FPBits<double>::FRACTION_LEN; 237 int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS)) 238 << fputil::FPBits<double>::FRACTION_LEN; 239 // mh_p = 2^(hi + mid) 240 // mh_m = 2^(-(hi + mid)) 241 // mh_bits_* = bit field of mh_* 242 int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p; 243 int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m; 244 double mh_p = fputil::FPBits<double>(uint64_t(mh_bits_p)).get_val(); 245 double mh_m = fputil::FPBits<double>(uint64_t(mh_bits_m)).get_val(); 246 // mh_sum = 2^(hi + mid) + 2^(-(hi + mid)) 247 double mh_sum = mh_p + mh_m; 248 // mh_diff = 2^(hi + mid) - 2^(-(hi + mid)) 249 double mh_diff = mh_p - mh_m; 250 251 // dx = lo = x - (hi + mid) * log(2) 252 double dx = 253 fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO, 254 fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd)); 255 double dx2 = dx * dx; 256 257 // c0 = 1 + COEFFS[0] * lo^2 258 // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2 259 double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5, 260 ExpBase::COEFFS[2] * 0.5); 261 // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2 262 double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5, 263 ExpBase::COEFFS[3] * 0.5); 264 265 double r; 266 if constexpr (is_sinh) 267 r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff); 268 else 269 r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum); 270 return r; 271 } 272 273 // x should be positive, normal finite value 274 LIBC_INLINE static double log2_eval(double x) { 275 using FPB = fputil::FPBits<double>; 276 FPB bs(x); 277 278 double result = 0; 279 result += bs.get_exponent(); 280 281 int p1 = (bs.get_mantissa() >> (FPB::FRACTION_LEN - LOG_P1_BITS)) & 282 (LOG_P1_SIZE - 1); 283 284 bs.set_uintval(bs.uintval() & (FPB::FRACTION_MASK >> LOG_P1_BITS)); 285 bs.set_biased_exponent(FPB::EXP_BIAS); 286 double dx = (bs.get_val() - 1.0) * LOG_P1_1_OVER[p1]; 287 288 // Taylor series for log(2,1+x) 289 double c1 = fputil::multiply_add(dx, K_LOG2_ODD[0], K_LOG2_EVEN[0]); 290 double c2 = fputil::multiply_add(dx, K_LOG2_ODD[1], K_LOG2_EVEN[1]); 291 double c3 = fputil::multiply_add(dx, K_LOG2_ODD[2], K_LOG2_EVEN[2]); 292 double c4 = fputil::multiply_add(dx, K_LOG2_ODD[3], K_LOG2_EVEN[3]); 293 294 // c0 = dx * (1.0 / ln(2)) + LOG_P1_LOG2[p1] 295 double c0 = fputil::multiply_add(dx, 0x1.71547652b82fep+0, LOG_P1_LOG2[p1]); 296 result += LIBC_NAMESPACE::fputil::polyeval(dx * dx, c0, c1, c2, c3, c4); 297 return result; 298 } 299 300 // x should be positive, normal finite value 301 LIBC_INLINE static double log_eval(double x) { 302 // For x = 2^ex * (1 + mx) 303 // log(x) = ex * log(2) + log(1 + mx) 304 using FPB = fputil::FPBits<double>; 305 FPB bs(x); 306 307 double ex = static_cast<double>(bs.get_exponent()); 308 309 // p1 is the leading 7 bits of mx, i.e. 310 // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7). 311 int p1 = static_cast<int>(bs.get_mantissa() >> (FPB::FRACTION_LEN - 7)); 312 313 // Set bs to (1 + (mx - p1*2^(-7)) 314 bs.set_uintval(bs.uintval() & (FPB::FRACTION_MASK >> 7)); 315 bs.set_biased_exponent(FPB::EXP_BIAS); 316 // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)). 317 double dx = (bs.get_val() - 1.0) * ONE_OVER_F[p1]; 318 319 // Minimax polynomial of log(1 + dx) generated by Sollya with: 320 // > P = fpminimax(log(1 + x)/x, 6, [|D...|], [0, 2^-7]); 321 const double COEFFS[6] = {-0x1.ffffffffffffcp-2, 0x1.5555555552ddep-2, 322 -0x1.ffffffefe562dp-3, 0x1.9999817d3a50fp-3, 323 -0x1.554317b3f67a5p-3, 0x1.1dc5c45e09c18p-3}; 324 double dx2 = dx * dx; 325 double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); 326 double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); 327 double c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); 328 329 double p = fputil::polyeval(dx2, dx, c1, c2, c3); 330 double result = 331 fputil::multiply_add(ex, /*log(2)*/ 0x1.62e42fefa39efp-1, LOG_F[p1] + p); 332 return result; 333 } 334 335 // Rounding tests for 2^hi * (mid + lo) when the output might be denormal. We 336 // assume further that 1 <= mid < 2, mid + lo < 2, and |lo| << mid. 337 // Notice that, if 0 < x < 2^-1022, 338 // double(2^-1022 + x) - 2^-1022 = double(x). 339 // So if we scale x up by 2^1022, we can use 340 // double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range. 341 LIBC_INLINE cpp::optional<double> ziv_test_denorm(int hi, double mid, double lo, 342 double err) { 343 using FPBits = typename fputil::FPBits<double>; 344 345 // Scaling factor = 1/(min normal number) = 2^1022 346 int64_t exp_hi = static_cast<int64_t>(hi + 1022) << FPBits::FRACTION_LEN; 347 double mid_hi = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(mid)); 348 double lo_scaled = 349 (lo != 0.0) ? cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(lo)) 350 : 0.0; 351 352 double extra_factor = 0.0; 353 uint64_t scale_down = 0x3FE0'0000'0000'0000; // 1022 in the exponent field. 354 355 // Result is denormal if (mid_hi + lo_scale < 1.0). 356 if ((1.0 - mid_hi) > lo_scaled) { 357 // Extra rounding step is needed, which adds more rounding errors. 358 err += 0x1.0p-52; 359 extra_factor = 1.0; 360 scale_down = 0x3FF0'0000'0000'0000; // 1023 in the exponent field. 361 } 362 363 double err_scaled = 364 cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(err)); 365 366 double lo_u = lo_scaled + err_scaled; 367 double lo_l = lo_scaled - err_scaled; 368 369 // By adding 1.0, the results will have similar rounding points as denormal 370 // outputs. 371 double upper = extra_factor + (mid_hi + lo_u); 372 double lower = extra_factor + (mid_hi + lo_l); 373 374 if (LIBC_LIKELY(upper == lower)) { 375 return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(upper) - scale_down); 376 } 377 378 return cpp::nullopt; 379 } 380 381 } // namespace LIBC_NAMESPACE_DECL 382 383 #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H 384