1d08a801bSTue Ly //===-- Single-precision log(x) function ----------------------------------===// 2d08a801bSTue Ly // 3d08a801bSTue Ly // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4d08a801bSTue Ly // See https://llvm.org/LICENSE.txt for license information. 5d08a801bSTue Ly // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6d08a801bSTue Ly // 7d08a801bSTue Ly //===----------------------------------------------------------------------===// 8d08a801bSTue Ly 9d08a801bSTue Ly #include "src/math/logf.h" 109e7688c7STue Ly #include "common_constants.h" // Lookup table for (1/f) and log(f) 1182df72ccSTue Ly #include "src/__support/FPUtil/FEnvImpl.h" 12d08a801bSTue Ly #include "src/__support/FPUtil/FPBits.h" 13d08a801bSTue Ly #include "src/__support/FPUtil/PolyEval.h" 14ae2d8b49STue Ly #include "src/__support/FPUtil/except_value_utils.h" 15ae2d8b49STue Ly #include "src/__support/FPUtil/multiply_add.h" 16d08a801bSTue Ly #include "src/__support/common.h" 175ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 184663d784STue Ly #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY 194663d784STue Ly #include "src/__support/macros/properties/cpu_features.h" 20d08a801bSTue Ly 2182df72ccSTue Ly // This is an algorithm for log(x) in single precision which is correctly 2282df72ccSTue Ly // rounded for all rounding modes, based on the implementation of log(x) from 2382df72ccSTue Ly // the RLIBM project at: 24d08a801bSTue Ly // https://people.cs.rutgers.edu/~sn349/rlibm 25d08a801bSTue Ly 26d08a801bSTue Ly // Step 1 - Range reduction: 27d08a801bSTue Ly // For x = 2^m * 1.mant, log(x) = m * log(2) + log(1.m) 28d08a801bSTue Ly // If x is denormal, we normalize it by multiplying x by 2^23 and subtracting 29d08a801bSTue Ly // m by 23. 30d08a801bSTue Ly 31d08a801bSTue Ly // Step 2 - Another range reduction: 32d08a801bSTue Ly // To compute log(1.mant), let f be the highest 8 bits including the hidden 33d08a801bSTue Ly // bit, and d be the difference (1.mant - f), i.e. the remaining 16 bits of the 34d08a801bSTue Ly // mantissa. Then we have the following approximation formula: 35d08a801bSTue Ly // log(1.mant) = log(f) + log(1.mant / f) 36d08a801bSTue Ly // = log(f) + log(1 + d/f) 37d08a801bSTue Ly // ~ log(f) + P(d/f) 38d08a801bSTue Ly // since d/f is sufficiently small. 39d08a801bSTue Ly // log(f) and 1/f are then stored in two 2^7 = 128 entries look-up tables. 40d08a801bSTue Ly 41d08a801bSTue Ly // Step 3 - Polynomial approximation: 42d08a801bSTue Ly // To compute P(d/f), we use a single degree-5 polynomial in double precision 43d08a801bSTue Ly // which provides correct rounding for all but few exception values. 44d08a801bSTue Ly // For more detail about how this polynomial is obtained, please refer to the 45d08a801bSTue Ly // paper: 46d08a801bSTue Ly // Lim, J. and Nagarakatte, S., "One Polynomial Approximation to Produce 47d08a801bSTue Ly // Correctly Rounded Results of an Elementary Function for Multiple 48d08a801bSTue Ly // Representations and Rounding Modes", Proceedings of the 49th ACM SIGPLAN 49d08a801bSTue Ly // Symposium on Principles of Programming Languages (POPL-2022), Philadelphia, 50d08a801bSTue Ly // USA, January 16-22, 2022. 51d08a801bSTue Ly // https://people.cs.rutgers.edu/~sn349/papers/rlibmall-popl-2022.pdf 52d08a801bSTue Ly 535ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 54d08a801bSTue Ly 55d08a801bSTue Ly LLVM_LIBC_FUNCTION(float, logf, (float x)) { 56d08a801bSTue Ly constexpr double LOG_2 = 0x1.62e42fefa39efp-1; 57d08a801bSTue Ly using FPBits = typename fputil::FPBits<float>; 582137894aSGuillaume Chatelet 59d08a801bSTue Ly FPBits xbits(x); 60ae2d8b49STue Ly uint32_t x_u = xbits.uintval(); 6182df72ccSTue Ly 623546f4daSGuillaume Chatelet int m = -FPBits::EXP_BIAS; 63bc8e87efSTue Ly 64ae2d8b49STue Ly using fputil::round_result_slightly_down; 65ae2d8b49STue Ly using fputil::round_result_slightly_up; 66ae2d8b49STue Ly 67bc8e87efSTue Ly // Small inputs 68bc8e87efSTue Ly if (x_u < 0x4c5d65a5U) { 69bc8e87efSTue Ly // Hard-to-round cases. 70ae2d8b49STue Ly switch (x_u) { 71bc8e87efSTue Ly case 0x3f7f4d6fU: // x = 0x1.fe9adep-1f 72bc8e87efSTue Ly return round_result_slightly_up(-0x1.659ec8p-9f); 73ae2d8b49STue Ly case 0x41178febU: // x = 0x1.2f1fd6p+3f 74ae2d8b49STue Ly return round_result_slightly_up(0x1.1fcbcep+1f); 75bc8e87efSTue Ly #ifdef LIBC_TARGET_CPU_HAS_FMA 76bc8e87efSTue Ly case 0x3f800000U: // x = 1.0f 77bc8e87efSTue Ly return 0.0f; 78bc8e87efSTue Ly #else 79bc8e87efSTue Ly case 0x1e88452dU: // x = 0x1.108a5ap-66f 80bc8e87efSTue Ly return round_result_slightly_up(-0x1.6d7b18p+5f); 81bc8e87efSTue Ly #endif // LIBC_TARGET_CPU_HAS_FMA 82bc8e87efSTue Ly } 83bc8e87efSTue Ly // Subnormal inputs. 846b02d2f8SGuillaume Chatelet if (LIBC_UNLIKELY(x_u < FPBits::min_normal().uintval())) { 85*0f4b3c40Slntue if (x == 0.0f) { 86bc8e87efSTue Ly // Return -inf and raise FE_DIVBYZERO 87bc8e87efSTue Ly fputil::set_errno_if_required(ERANGE); 88bc8e87efSTue Ly fputil::raise_except_if_required(FE_DIVBYZERO); 892856db0dSGuillaume Chatelet return FPBits::inf(Sign::NEG).get_val(); 90bc8e87efSTue Ly } 91bc8e87efSTue Ly // Normalize denormal inputs. 92d02471edSGuillaume Chatelet xbits = FPBits(xbits.get_val() * 0x1.0p23f); 93bc8e87efSTue Ly m -= 23; 94bc8e87efSTue Ly x_u = xbits.uintval(); 95bc8e87efSTue Ly } 96bc8e87efSTue Ly } else { 97bc8e87efSTue Ly // Hard-to-round cases. 98bc8e87efSTue Ly switch (x_u) { 99ae2d8b49STue Ly case 0x4c5d65a5U: // x = 0x1.bacb4ap+25f 100ae2d8b49STue Ly return round_result_slightly_down(0x1.1e0696p+4f); 101ae2d8b49STue Ly case 0x65d890d3U: // x = 0x1.b121a6p+76f 102ae2d8b49STue Ly return round_result_slightly_down(0x1.a9a3f2p+5f); 103ae2d8b49STue Ly case 0x6f31a8ecU: // x = 0x1.6351d8p+95f 104ae2d8b49STue Ly return round_result_slightly_down(0x1.08b512p+6f); 105ae2d8b49STue Ly case 0x7a17f30aU: // x = 0x1.2fe614p+117f 106ae2d8b49STue Ly return round_result_slightly_up(0x1.451436p+6f); 1074663d784STue Ly #ifndef LIBC_TARGET_CPU_HAS_FMA 108bc8e87efSTue Ly case 0x500ffb03U: // x = 0x1.1ff606p+33f 109bc8e87efSTue Ly return round_result_slightly_up(0x1.6fdd34p+4f); 110bc8e87efSTue Ly case 0x5cd69e88U: // x = 0x1.ad3d1p+58f 111bc8e87efSTue Ly return round_result_slightly_up(0x1.45c146p+5f); 112ae2d8b49STue Ly case 0x5ee8984eU: // x = 0x1.d1309cp+62f; 113ae2d8b49STue Ly return round_result_slightly_up(0x1.5c9442p+5f); 1144663d784STue Ly #endif // LIBC_TARGET_CPU_HAS_FMA 11582df72ccSTue Ly } 116bc8e87efSTue Ly // Exceptional inputs. 1176b02d2f8SGuillaume Chatelet if (LIBC_UNLIKELY(x_u > FPBits::max_normal().uintval())) { 118bc8e87efSTue Ly if (x_u == 0x8000'0000U) { 119ae2d8b49STue Ly // Return -inf and raise FE_DIVBYZERO 12031c39439STue Ly fputil::set_errno_if_required(ERANGE); 12131c39439STue Ly fputil::raise_except_if_required(FE_DIVBYZERO); 1226b02d2f8SGuillaume Chatelet return FPBits::inf(Sign::NEG).get_val(); 123d08a801bSTue Ly } 12411ec512fSGuillaume Chatelet if (xbits.is_neg() && !xbits.is_nan()) { 125ae2d8b49STue Ly // Return NaN and raise FE_INVALID 12631c39439STue Ly fputil::set_errno_if_required(EDOM); 12731c39439STue Ly fputil::raise_except_if_required(FE_INVALID); 128ace383dfSGuillaume Chatelet return FPBits::quiet_nan().get_val(); 129d08a801bSTue Ly } 130bc8e87efSTue Ly // x is +inf or nan 131d08a801bSTue Ly return x; 132d08a801bSTue Ly } 133d08a801bSTue Ly } 134d08a801bSTue Ly 135bc8e87efSTue Ly #ifndef LIBC_TARGET_CPU_HAS_FMA 136bc8e87efSTue Ly // Returning the correct +0 when x = 1.0 for non-FMA targets with FE_DOWNWARD 137bc8e87efSTue Ly // rounding mode. 138bc8e87efSTue Ly if (LIBC_UNLIKELY((x_u & 0x007f'ffffU) == 0)) 139bc8e87efSTue Ly return static_cast<float>( 1407b387d27SGuillaume Chatelet static_cast<double>(m + xbits.get_biased_exponent()) * LOG_2); 141bc8e87efSTue Ly #endif // LIBC_TARGET_CPU_HAS_FMA 142bc8e87efSTue Ly 143bc8e87efSTue Ly uint32_t mant = xbits.get_mantissa(); 144bc8e87efSTue Ly // Extract 7 leading fractional bits of the mantissa 145bc8e87efSTue Ly int index = mant >> 16; 146bc8e87efSTue Ly // Add unbiased exponent. Add an extra 1 if the 7 leading fractional bits are 147bc8e87efSTue Ly // all 1's. 148bc8e87efSTue Ly m += static_cast<int>((x_u + (1 << 16)) >> 23); 149bc8e87efSTue Ly 150d08a801bSTue Ly // Set bits to 1.m 1517b387d27SGuillaume Chatelet xbits.set_biased_exponent(0x7F); 152d08a801bSTue Ly 1532856db0dSGuillaume Chatelet float u = xbits.get_val(); 154bc8e87efSTue Ly double v; 155bc8e87efSTue Ly #ifdef LIBC_TARGET_CPU_HAS_FMA 156bc8e87efSTue Ly v = static_cast<double>(fputil::multiply_add(u, R[index], -1.0f)); // Exact. 157bc8e87efSTue Ly #else 158bc8e87efSTue Ly v = fputil::multiply_add(static_cast<double>(u), RD[index], -1.0); // Exact 159bc8e87efSTue Ly #endif // LIBC_TARGET_CPU_HAS_FMA 160d08a801bSTue Ly 161bc8e87efSTue Ly // Degree-5 polynomial approximation of log generated by Sollya with: 162bc8e87efSTue Ly // > P = fpminimax(log(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]); 163bc8e87efSTue Ly constexpr double COEFFS[4] = {-0x1.000000000fe63p-1, 0x1.555556e963c16p-2, 164bc8e87efSTue Ly -0x1.000028dedf986p-2, 0x1.966681bfda7f7p-3}; 165bc8e87efSTue Ly double v2 = v * v; // Exact 166bc8e87efSTue Ly double p2 = fputil::multiply_add(v, COEFFS[3], COEFFS[2]); 167bc8e87efSTue Ly double p1 = fputil::multiply_add(v, COEFFS[1], COEFFS[0]); 168bc8e87efSTue Ly double p0 = LOG_R[index] + v; 169bc8e87efSTue Ly double r = fputil::multiply_add(static_cast<double>(m), LOG_2, 170bc8e87efSTue Ly fputil::polyeval(v2, p0, p1, p2)); 17182df72ccSTue Ly return static_cast<float>(r); 172d08a801bSTue Ly } 173d08a801bSTue Ly 1745ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 175