19e7688c7STue Ly //===-- Single-precision log1p(x) function --------------------------------===// 29e7688c7STue Ly // 39e7688c7STue Ly // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49e7688c7STue Ly // See https://llvm.org/LICENSE.txt for license information. 59e7688c7STue Ly // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69e7688c7STue Ly // 79e7688c7STue Ly //===----------------------------------------------------------------------===// 89e7688c7STue Ly 99e7688c7STue Ly #include "src/math/log1pf.h" 109e7688c7STue Ly #include "common_constants.h" // Lookup table for (1/f) and log(f) 1176ec69a9STue Ly #include "src/__support/FPUtil/FEnvImpl.h" 129e7688c7STue Ly #include "src/__support/FPUtil/FMA.h" 139e7688c7STue Ly #include "src/__support/FPUtil/FPBits.h" 149e7688c7STue Ly #include "src/__support/FPUtil/PolyEval.h" 15ae2d8b49STue Ly #include "src/__support/FPUtil/except_value_utils.h" 16ae2d8b49STue Ly #include "src/__support/FPUtil/multiply_add.h" 179e7688c7STue Ly #include "src/__support/common.h" 18*5ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 194663d784STue Ly #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY 204663d784STue Ly #include "src/__support/macros/properties/cpu_features.h" 219e7688c7STue Ly 229e7688c7STue Ly // This is an algorithm for log10(x) in single precision which is 239e7688c7STue Ly // correctly rounded for all rounding modes. 249e7688c7STue Ly // - An exhaustive test show that when x >= 2^45, log1pf(x) == logf(x) 259e7688c7STue Ly // for all rounding modes. 26ae2d8b49STue Ly // - When 2^(-6) <= |x| < 2^45, the sum (double(x) + 1.0) is exact, 279e7688c7STue Ly // so we can adapt the correctly rounded algorithm of logf to compute 289e7688c7STue Ly // log(double(x) + 1.0) correctly. For more information about the logf 299e7688c7STue Ly // algorithm, see `libc/src/math/generic/logf.cpp`. 30ae2d8b49STue Ly // - When |x| < 2^(-6), we use a degree-8 polynomial in double precision 319e7688c7STue Ly // generated with Sollya using the following command: 32ae2d8b49STue Ly // fpminimax(log(1 + x)/x, 7, [|D...|], [-2^-6; 2^-6]); 339e7688c7STue Ly 34*5ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 359e7688c7STue Ly 369e7688c7STue Ly namespace internal { 379e7688c7STue Ly 38ae2d8b49STue Ly // We don't need to treat denormal and 0 39ae2d8b49STue Ly LIBC_INLINE float log(double x) { 409e7688c7STue Ly constexpr double LOG_2 = 0x1.62e42fefa39efp-1; 419e7688c7STue Ly 429e7688c7STue Ly using FPBits = typename fputil::FPBits<double>; 439e7688c7STue Ly FPBits xbits(x); 449e7688c7STue Ly 45ae2d8b49STue Ly uint64_t x_u = xbits.uintval(); 469e7688c7STue Ly 476b02d2f8SGuillaume Chatelet if (LIBC_UNLIKELY(x_u > FPBits::max_normal().uintval())) { 4811ec512fSGuillaume Chatelet if (xbits.is_neg() && !xbits.is_nan()) { 4931c39439STue Ly fputil::set_errno_if_required(EDOM); 5031c39439STue Ly fputil::raise_except_if_required(FE_INVALID); 51ace383dfSGuillaume Chatelet return fputil::FPBits<float>::quiet_nan().get_val(); 529e7688c7STue Ly } 539e7688c7STue Ly return static_cast<float>(x); 549e7688c7STue Ly } 559e7688c7STue Ly 569e7688c7STue Ly double m = static_cast<double>(xbits.get_exponent()); 579e7688c7STue Ly 589e7688c7STue Ly // Get the 8 highest bits, use 7 bits (excluding the implicit hidden bit) for 599e7688c7STue Ly // lookup tables. 60493cc71dSGuillaume Chatelet int f_index = static_cast<int>(xbits.get_mantissa() >> 613546f4daSGuillaume Chatelet (fputil::FPBits<double>::FRACTION_LEN - 7)); 629e7688c7STue Ly 63ae2d8b49STue Ly // Set bits to 1.m 647b387d27SGuillaume Chatelet xbits.set_biased_exponent(0x3FF); 657e7ecef9SGuillaume Chatelet FPBits f = xbits; 66ae2d8b49STue Ly 679e7688c7STue Ly // Clear the lowest 45 bits. 6824a903c4SGuillaume Chatelet f.set_uintval(f.uintval() & ~0x0000'1FFF'FFFF'FFFFULL); 699e7688c7STue Ly 702856db0dSGuillaume Chatelet double d = xbits.get_val() - f.get_val(); 719e7688c7STue Ly d *= ONE_OVER_F[f_index]; 729e7688c7STue Ly 73c5f8a0a1STue Ly double extra_factor = fputil::multiply_add(m, LOG_2, LOG_F[f_index]); 749e7688c7STue Ly 759e7688c7STue Ly double r = fputil::polyeval(d, extra_factor, 0x1.fffffffffffacp-1, 769e7688c7STue Ly -0x1.fffffffef9cb2p-2, 0x1.5555513bc679ap-2, 779e7688c7STue Ly -0x1.fff4805ea441p-3, 0x1.930180dbde91ap-3); 789e7688c7STue Ly 799e7688c7STue Ly return static_cast<float>(r); 809e7688c7STue Ly } 819e7688c7STue Ly 829e7688c7STue Ly } // namespace internal 839e7688c7STue Ly 849e7688c7STue Ly LLVM_LIBC_FUNCTION(float, log1pf, (float x)) { 859e7688c7STue Ly using FPBits = typename fputil::FPBits<float>; 869e7688c7STue Ly FPBits xbits(x); 87ae2d8b49STue Ly uint32_t x_u = xbits.uintval(); 88ae2d8b49STue Ly uint32_t x_a = x_u & 0x7fff'ffffU; 899e7688c7STue Ly double xd = static_cast<double>(x); 909e7688c7STue Ly 91ae2d8b49STue Ly // Use log1p(x) = log(1 + x) for |x| > 2^-6; 92ae2d8b49STue Ly if (x_a > 0x3c80'0000U) { 939e7688c7STue Ly // Hard-to-round cases. 94ae2d8b49STue Ly switch (x_u) { 95ae2d8b49STue Ly case 0x41078febU: // x = 0x1.0f1fd6p3 96ae2d8b49STue Ly return fputil::round_result_slightly_up(0x1.1fcbcep1f); 979e7688c7STue Ly case 0x5cd69e88U: // x = 0x1.ad3d1p+58f 98ae2d8b49STue Ly return fputil::round_result_slightly_up(0x1.45c146p+5f); 999e7688c7STue Ly case 0x65d890d3U: // x = 0x1.b121a6p+76f 100ae2d8b49STue Ly return fputil::round_result_slightly_down(0x1.a9a3f2p+5f); 1019e7688c7STue Ly case 0x6f31a8ecU: // x = 0x1.6351d8p+95f 102ae2d8b49STue Ly return fputil::round_result_slightly_down(0x1.08b512p+6f); 1039e7688c7STue Ly case 0x7a17f30aU: // x = 0x1.2fe614p+117f 104ae2d8b49STue Ly return fputil::round_result_slightly_up(0x1.451436p+6f); 1059e7688c7STue Ly case 0xbd1d20afU: // x = -0x1.3a415ep-5f 106ae2d8b49STue Ly return fputil::round_result_slightly_up(-0x1.407112p-5f); 107ae2d8b49STue Ly case 0xbf800000U: // x = -1.0 10831c39439STue Ly fputil::set_errno_if_required(ERANGE); 10931c39439STue Ly fputil::raise_except_if_required(FE_DIVBYZERO); 1102137894aSGuillaume Chatelet return FPBits::inf(Sign::NEG).get_val(); 1114663d784STue Ly #ifndef LIBC_TARGET_CPU_HAS_FMA 112ae2d8b49STue Ly case 0x4cc1c80bU: // x = 0x1.839016p+26f 113ae2d8b49STue Ly return fputil::round_result_slightly_down(0x1.26fc04p+4f); 114ae2d8b49STue Ly case 0x5ee8984eU: // x = 0x1.d1309cp+62f 115ae2d8b49STue Ly return fputil::round_result_slightly_up(0x1.5c9442p+5f); 116ae2d8b49STue Ly case 0x665e7ca6U: // x = 0x1.bcf94cp+77f 117ae2d8b49STue Ly return fputil::round_result_slightly_up(0x1.af66cp+5f); 118ae2d8b49STue Ly case 0x79e7ec37U: // x = 0x1.cfd86ep+116f 119da28593dSlntue return fputil::round_result_slightly_up(0x1.43ff6ep+6f); 1204663d784STue Ly #endif // LIBC_TARGET_CPU_HAS_FMA 1219e7688c7STue Ly } 1229e7688c7STue Ly 1239e7688c7STue Ly return internal::log(xd + 1.0); 1249e7688c7STue Ly } 1259e7688c7STue Ly 126ae2d8b49STue Ly // |x| <= 2^-6. 1279e7688c7STue Ly // Hard-to round cases. 128ae2d8b49STue Ly switch (x_u) { 1299e7688c7STue Ly case 0x35400003U: // x = 0x1.800006p-21f 130ae2d8b49STue Ly return fputil::round_result_slightly_down(0x1.7ffffep-21f); 1319e7688c7STue Ly case 0x3710001bU: // x = 0x1.200036p-17f 132ae2d8b49STue Ly return fputil::round_result_slightly_down(0x1.1fffe6p-17f); 133ae2d8b49STue Ly case 0xb53ffffdU: // x = -0x1.7ffffap-21 134ae2d8b49STue Ly return fputil::round_result_slightly_down(-0x1.800002p-21f); 135ae2d8b49STue Ly case 0xb70fffe5U: // x = -0x1.1fffcap-17 136ae2d8b49STue Ly return fputil::round_result_slightly_down(-0x1.20001ap-17f); 137ae2d8b49STue Ly case 0xbb0ec8c4U: // x = -0x1.1d9188p-9 138ae2d8b49STue Ly return fputil::round_result_slightly_up(-0x1.1de14ap-9f); 1399e7688c7STue Ly } 1409e7688c7STue Ly 141ae2d8b49STue Ly // Polymial generated by Sollya with: 142ae2d8b49STue Ly // > fpminimax(log(1 + x)/x, 7, [|D...|], [-2^-6; 2^-6]); 143ae2d8b49STue Ly const double COEFFS[7] = {-0x1.0000000000000p-1, 0x1.5555555556aadp-2, 144ae2d8b49STue Ly -0x1.000000000181ap-2, 0x1.999998998124ep-3, 145ae2d8b49STue Ly -0x1.55555452e2a2bp-3, 0x1.24adb8cde4aa7p-3, 146ae2d8b49STue Ly -0x1.0019db915ef6fp-3}; 147ae2d8b49STue Ly 148ae2d8b49STue Ly double xsq = xd * xd; 149ae2d8b49STue Ly double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]); 150ae2d8b49STue Ly double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]); 151ae2d8b49STue Ly double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]); 152ae2d8b49STue Ly double r = fputil::polyeval(xsq, xd, c0, c1, c2, COEFFS[6]); 153ae2d8b49STue Ly 1547d11a592SAlex Brachet return static_cast<float>(r); 1559e7688c7STue Ly } 1569e7688c7STue Ly 157*5ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 158