xref: /llvm-project/libc/src/math/generic/log10f.cpp (revision 0f4b3c409fbd756d826c89d5539d9ea22bcc56aa)
1e581841eSTue Ly //===-- Single-precision log10(x) function --------------------------------===//
2e581841eSTue Ly //
3e581841eSTue Ly // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e581841eSTue Ly // See https://llvm.org/LICENSE.txt for license information.
5e581841eSTue Ly // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e581841eSTue Ly //
7e581841eSTue Ly //===----------------------------------------------------------------------===//
8e581841eSTue Ly 
9e581841eSTue Ly #include "src/math/log10f.h"
10e581841eSTue Ly #include "common_constants.h" // Lookup table for (1/f)
1176ec69a9STue Ly #include "src/__support/FPUtil/FEnvImpl.h"
12e581841eSTue Ly #include "src/__support/FPUtil/FMA.h"
13e581841eSTue Ly #include "src/__support/FPUtil/FPBits.h"
14e581841eSTue Ly #include "src/__support/FPUtil/PolyEval.h"
15ae2d8b49STue Ly #include "src/__support/FPUtil/except_value_utils.h"
16ae2d8b49STue Ly #include "src/__support/FPUtil/multiply_add.h"
17e581841eSTue Ly #include "src/__support/common.h"
185ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
194663d784STue Ly #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
204663d784STue Ly #include "src/__support/macros/properties/cpu_features.h"
21e581841eSTue Ly 
22e581841eSTue Ly // This is an algorithm for log10(x) in single precision which is
23e581841eSTue Ly // correctly rounded for all rounding modes, based on the implementation of
24e581841eSTue Ly // log10(x) from the RLIBM project at:
25e581841eSTue Ly // https://people.cs.rutgers.edu/~sn349/rlibm
26e581841eSTue Ly 
27e581841eSTue Ly // Step 1 - Range reduction:
28e581841eSTue Ly //   For x = 2^m * 1.mant, log(x) = m * log10(2) + log10(1.m)
29e581841eSTue Ly //   If x is denormal, we normalize it by multiplying x by 2^23 and subtracting
30e581841eSTue Ly //   m by 23.
31e581841eSTue Ly 
32e581841eSTue Ly // Step 2 - Another range reduction:
33e581841eSTue Ly //   To compute log(1.mant), let f be the highest 8 bits including the hidden
34e581841eSTue Ly // bit, and d be the difference (1.mant - f), i.e. the remaining 16 bits of the
35e581841eSTue Ly // mantissa. Then we have the following approximation formula:
36e581841eSTue Ly //   log10(1.mant) = log10(f) + log10(1.mant / f)
37e581841eSTue Ly //                 = log10(f) + log10(1 + d/f)
38e581841eSTue Ly //                 ~ log10(f) + P(d/f)
39e581841eSTue Ly // since d/f is sufficiently small.
40e581841eSTue Ly //   log10(f) and 1/f are then stored in two 2^7 = 128 entries look-up tables.
41e581841eSTue Ly 
42e581841eSTue Ly // Step 3 - Polynomial approximation:
43e581841eSTue Ly //   To compute P(d/f), we use a single degree-5 polynomial in double precision
44e581841eSTue Ly // which provides correct rounding for all but few exception values.
45e581841eSTue Ly //   For more detail about how this polynomial is obtained, please refer to the
46e581841eSTue Ly // papers:
47e581841eSTue Ly //   Lim, J. and Nagarakatte, S., "One Polynomial Approximation to Produce
48e581841eSTue Ly // Correctly Rounded Results of an Elementary Function for Multiple
49e581841eSTue Ly // Representations and Rounding Modes", Proceedings of the 49th ACM SIGPLAN
50e581841eSTue Ly // Symposium on Principles of Programming Languages (POPL-2022), Philadelphia,
51e581841eSTue Ly // USA, Jan. 16-22, 2022.
52e581841eSTue Ly // https://people.cs.rutgers.edu/~sn349/papers/rlibmall-popl-2022.pdf
53e581841eSTue Ly //   Aanjaneya, M., Lim, J., and Nagarakatte, S., "RLibm-Prog: Progressive
54e581841eSTue Ly // Polynomial Approximations for Fast Correctly Rounded Math Libraries",
55e581841eSTue Ly // Dept. of Comp. Sci., Rutgets U., Technical Report DCS-TR-758, Nov. 2021.
56e581841eSTue Ly // https://arxiv.org/pdf/2111.12852.pdf.
57e581841eSTue Ly 
585ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
59e581841eSTue Ly 
609af8dca7STue Ly // Lookup table for -log10(r) where r is defined in common_constants.cpp.
619af8dca7STue Ly static constexpr double LOG10_R[128] = {
629af8dca7STue Ly     0x0.0000000000000p+0, 0x1.be76bd77b4fc3p-9, 0x1.c03a80ae5e054p-8,
639af8dca7STue Ly     0x1.51824c7587ebp-7,  0x1.c3d0837784c41p-7, 0x1.1b85d6044e9aep-6,
649af8dca7STue Ly     0x1.559bd2406c3bap-6, 0x1.902c31d62a843p-6, 0x1.cb38fccd8bfdbp-6,
659af8dca7STue Ly     0x1.e8eeb09f2f6cbp-6, 0x1.125d0432ea20ep-5, 0x1.30838cdc2fbfdp-5,
669af8dca7STue Ly     0x1.3faf7c663060ep-5, 0x1.5e3966b7e9295p-5, 0x1.7d070145f4fd7p-5,
679af8dca7STue Ly     0x1.8c878eeb05074p-5, 0x1.abbcebd84fcap-5,  0x1.bb7209d1e24e5p-5,
689af8dca7STue Ly     0x1.db11ed766abf4p-5, 0x1.eafd05035bd3bp-5, 0x1.0585283764178p-4,
699af8dca7STue Ly     0x1.0d966cc6500fap-4, 0x1.1dd5460c8b16fp-4, 0x1.2603072a25f82p-4,
709af8dca7STue Ly     0x1.367ba3aaa1883p-4, 0x1.3ec6ad5407868p-4, 0x1.4f7aad9bbcbafp-4,
719af8dca7STue Ly     0x1.57e3d47c3af7bp-4, 0x1.605735ee985f1p-4, 0x1.715d0ce367afcp-4,
729af8dca7STue Ly     0x1.79efb57b0f803p-4, 0x1.828cfed29a215p-4, 0x1.93e7de0fc3e8p-4,
739af8dca7STue Ly     0x1.9ca5aa1729f45p-4, 0x1.a56e8325f5c87p-4, 0x1.ae4285509950bp-4,
749af8dca7STue Ly     0x1.b721cd17157e3p-4, 0x1.c902a19e65111p-4, 0x1.d204698cb42bdp-4,
759af8dca7STue Ly     0x1.db11ed766abf4p-4, 0x1.e42b4c16caaf3p-4, 0x1.ed50a4a26eafcp-4,
769af8dca7STue Ly     0x1.ffbfc2bbc7803p-4, 0x1.0484e4942aa43p-3, 0x1.093025a19976cp-3,
779af8dca7STue Ly     0x1.0de1b56356b04p-3, 0x1.1299a4fb3e306p-3, 0x1.175805d1587c1p-3,
789af8dca7STue Ly     0x1.1c1ce9955c0c6p-3, 0x1.20e8624038fedp-3, 0x1.25ba8215af7fcp-3,
799af8dca7STue Ly     0x1.2a935ba5f1479p-3, 0x1.2f7301cf4e87bp-3, 0x1.345987bfeea91p-3,
809af8dca7STue Ly     0x1.394700f7953fdp-3, 0x1.3e3b8149739d4p-3, 0x1.43371cde076c2p-3,
819af8dca7STue Ly     0x1.4839e83506c87p-3, 0x1.4d43f8275a483p-3, 0x1.525561e9256eep-3,
829af8dca7STue Ly     0x1.576e3b0bde0a7p-3, 0x1.5c8e998072fe2p-3, 0x1.61b6939983048p-3,
839af8dca7STue Ly     0x1.66e6400da3f77p-3, 0x1.6c1db5f9bb336p-3, 0x1.6c1db5f9bb336p-3,
849af8dca7STue Ly     0x1.715d0ce367afcp-3, 0x1.76a45cbb7e6ffp-3, 0x1.7bf3bde099f3p-3,
859af8dca7STue Ly     0x1.814b4921bd52bp-3, 0x1.86ab17c10bc7fp-3, 0x1.86ab17c10bc7fp-3,
869af8dca7STue Ly     0x1.8c13437695532p-3, 0x1.9183e673394fap-3, 0x1.96fd1b639fc09p-3,
879af8dca7STue Ly     0x1.9c7efd734a2f9p-3, 0x1.a209a84fbcff8p-3, 0x1.a209a84fbcff8p-3,
889af8dca7STue Ly     0x1.a79d382bc21d9p-3, 0x1.ad39c9c2c608p-3,  0x1.b2df7a5c50299p-3,
899af8dca7STue Ly     0x1.b2df7a5c50299p-3, 0x1.b88e67cf9798p-3,  0x1.be46b087354bcp-3,
909af8dca7STue Ly     0x1.c4087384f4f8p-3,  0x1.c4087384f4f8p-3,  0x1.c9d3d065c5b42p-3,
919af8dca7STue Ly     0x1.cfa8e765cbb72p-3, 0x1.cfa8e765cbb72p-3, 0x1.d587d96494759p-3,
929af8dca7STue Ly     0x1.db70c7e96e7f3p-3, 0x1.db70c7e96e7f3p-3, 0x1.e163d527e68cfp-3,
939af8dca7STue Ly     0x1.e76124046b3f3p-3, 0x1.e76124046b3f3p-3, 0x1.ed68d819191fcp-3,
949af8dca7STue Ly     0x1.f37b15bab08d1p-3, 0x1.f37b15bab08d1p-3, 0x1.f99801fdb749dp-3,
959af8dca7STue Ly     0x1.ffbfc2bbc7803p-3, 0x1.ffbfc2bbc7803p-3, 0x1.02f93f4c87101p-2,
969af8dca7STue Ly     0x1.06182e84fd4acp-2, 0x1.06182e84fd4acp-2, 0x1.093cc32c90f84p-2,
979af8dca7STue Ly     0x1.093cc32c90f84p-2, 0x1.0c6711d6abd7ap-2, 0x1.0f972f87ff3d6p-2,
989af8dca7STue Ly     0x1.0f972f87ff3d6p-2, 0x1.12cd31b9c99ffp-2, 0x1.12cd31b9c99ffp-2,
999af8dca7STue Ly     0x1.16092e5d3a9a6p-2, 0x1.194b3bdef6b9ep-2, 0x1.194b3bdef6b9ep-2,
1009af8dca7STue Ly     0x1.1c93712abc7ffp-2, 0x1.1c93712abc7ffp-2, 0x1.1fe1e5af2c141p-2,
1019af8dca7STue Ly     0x1.1fe1e5af2c141p-2, 0x1.2336b161b3337p-2, 0x1.2336b161b3337p-2,
1029af8dca7STue Ly     0x1.2691ecc29f042p-2, 0x1.2691ecc29f042p-2, 0x1.29f3b0e15584bp-2,
1039af8dca7STue Ly     0x1.29f3b0e15584bp-2, 0x1.2d5c1760b86bbp-2, 0x1.2d5c1760b86bbp-2,
1049af8dca7STue Ly     0x1.30cb3a7bb3625p-2, 0x1.34413509f79ffp-2};
105e581841eSTue Ly 
106e581841eSTue Ly LLVM_LIBC_FUNCTION(float, log10f, (float x)) {
107e581841eSTue Ly   constexpr double LOG10_2 = 0x1.34413509f79ffp-2;
108e581841eSTue Ly 
109e581841eSTue Ly   using FPBits = typename fputil::FPBits<float>;
1102137894aSGuillaume Chatelet 
111e581841eSTue Ly   FPBits xbits(x);
112ae2d8b49STue Ly   uint32_t x_u = xbits.uintval();
113e581841eSTue Ly 
114e581841eSTue Ly   // Exact powers of 10 and other hard-to-round cases.
1159af8dca7STue Ly   if (LIBC_UNLIKELY((x_u & 0x3FF) == 0)) {
116ae2d8b49STue Ly     switch (x_u) {
1179af8dca7STue Ly     case 0x3f80'0000U: // x = 1
1189af8dca7STue Ly       return 0.0f;
119e581841eSTue Ly     case 0x4120'0000U: // x = 10
120e581841eSTue Ly       return 1.0f;
121e581841eSTue Ly     case 0x42c8'0000U: // x = 100
122e581841eSTue Ly       return 2.0f;
123e581841eSTue Ly     case 0x447a'0000U: // x = 1,000
124e581841eSTue Ly       return 3.0f;
125e581841eSTue Ly     case 0x461c'4000U: // x = 10,000
126e581841eSTue Ly       return 4.0f;
127e581841eSTue Ly     case 0x47c3'5000U: // x = 100,000
128e581841eSTue Ly       return 5.0f;
129e581841eSTue Ly     case 0x4974'2400U: // x = 1,000,000
130e581841eSTue Ly       return 6.0f;
1319af8dca7STue Ly     }
1329af8dca7STue Ly   } else {
1339af8dca7STue Ly     switch (x_u) {
134e581841eSTue Ly     case 0x4b18'9680U: // x = 10,000,000
135e581841eSTue Ly       return 7.0f;
136e581841eSTue Ly     case 0x4cbe'bc20U: // x = 100,000,000
137e581841eSTue Ly       return 8.0f;
138e581841eSTue Ly     case 0x4e6e'6b28U: // x = 1,000,000,000
139e581841eSTue Ly       return 9.0f;
140e581841eSTue Ly     case 0x5015'02f9U: // x = 10,000,000,000
141e581841eSTue Ly       return 10.0f;
1429af8dca7STue Ly     case 0x0efe'ee7aU: // x = 0x1.fddcf4p-98f
1439af8dca7STue Ly       return fputil::round_result_slightly_up(-0x1.d33a46p+4f);
1449af8dca7STue Ly     case 0x3f5f'de1bU: // x = 0x1.bfbc36p-1f
1459af8dca7STue Ly       return fputil::round_result_slightly_up(-0x1.dd2c6ep-5f);
1469af8dca7STue Ly     case 0x3f80'70d8U: // x = 0x1.00e1bp0f
1479af8dca7STue Ly       return fputil::round_result_slightly_up(0x1.8762c4p-10f);
1489af8dca7STue Ly #ifndef LIBC_TARGET_CPU_HAS_FMA
1499af8dca7STue Ly     case 0x08ae'a356U: // x = 0x1.5d46acp-110f
1509af8dca7STue Ly       return fputil::round_result_slightly_up(-0x1.07d3b4p+5f);
1519af8dca7STue Ly     case 0x120b'93dcU: // x = 0x1.1727b8p-91f
1529af8dca7STue Ly       return fputil::round_result_slightly_down(-0x1.b5b2aep+4f);
1539af8dca7STue Ly     case 0x13ae'78d3U: // x = 0x1.5cf1a6p-88f
1549af8dca7STue Ly       return fputil::round_result_slightly_down(-0x1.a5b2aep+4f);
155e581841eSTue Ly     case 0x4f13'4f83U: // x = 2471461632.0
156ae2d8b49STue Ly       return fputil::round_result_slightly_down(0x1.2c9314p+3f);
157ae2d8b49STue Ly     case 0x7956'ba5eU: // x = 69683218960000541503257137270226944.0
158ae2d8b49STue Ly       return fputil::round_result_slightly_up(0x1.16bebap+5f);
1594663d784STue Ly #endif // LIBC_TARGET_CPU_HAS_FMA
160e581841eSTue Ly     }
1619af8dca7STue Ly   }
162e581841eSTue Ly 
1633546f4daSGuillaume Chatelet   int m = -FPBits::EXP_BIAS;
164ae2d8b49STue Ly 
1656b02d2f8SGuillaume Chatelet   if (LIBC_UNLIKELY(x_u < FPBits::min_normal().uintval() ||
1666b02d2f8SGuillaume Chatelet                     x_u > FPBits::max_normal().uintval())) {
167*0f4b3c40Slntue     if (x == 0.0f) {
168ae2d8b49STue Ly       // Return -inf and raise FE_DIVBYZERO
16931c39439STue Ly       fputil::set_errno_if_required(ERANGE);
17031c39439STue Ly       fputil::raise_except_if_required(FE_DIVBYZERO);
1716b02d2f8SGuillaume Chatelet       return FPBits::inf(Sign::NEG).get_val();
172e581841eSTue Ly     }
17311ec512fSGuillaume Chatelet     if (xbits.is_neg() && !xbits.is_nan()) {
174ae2d8b49STue Ly       // Return NaN and raise FE_INVALID
17531c39439STue Ly       fputil::set_errno_if_required(EDOM);
17631c39439STue Ly       fputil::raise_except_if_required(FE_INVALID);
177ace383dfSGuillaume Chatelet       return FPBits::quiet_nan().get_val();
178e581841eSTue Ly     }
179e581841eSTue Ly     if (xbits.is_inf_or_nan()) {
180e581841eSTue Ly       return x;
181e581841eSTue Ly     }
182e581841eSTue Ly     // Normalize denormal inputs.
183d02471edSGuillaume Chatelet     xbits = FPBits(xbits.get_val() * 0x1.0p23f);
184ae2d8b49STue Ly     m -= 23;
1859af8dca7STue Ly     x_u = xbits.uintval();
186e581841eSTue Ly   }
187e581841eSTue Ly 
1889af8dca7STue Ly   // Add unbiased exponent.
1899af8dca7STue Ly   m += static_cast<int>(x_u >> 23);
1909af8dca7STue Ly   // Extract 7 leading fractional bits of the mantissa
1919af8dca7STue Ly   int index = (x_u >> 16) & 0x7F;
192e581841eSTue Ly   // Set bits to 1.m
1937b387d27SGuillaume Chatelet   xbits.set_biased_exponent(0x7F);
194e581841eSTue Ly 
1952856db0dSGuillaume Chatelet   float u = xbits.get_val();
1969af8dca7STue Ly   double v;
1979af8dca7STue Ly #ifdef LIBC_TARGET_CPU_HAS_FMA
1989af8dca7STue Ly   v = static_cast<double>(fputil::multiply_add(u, R[index], -1.0f)); // Exact.
1999af8dca7STue Ly #else
2009af8dca7STue Ly   v = fputil::multiply_add(static_cast<double>(u),
2019af8dca7STue Ly                            static_cast<double>(R[index]), -1.0); // Exact
2029af8dca7STue Ly #endif // LIBC_TARGET_CPU_HAS_FMA
203e581841eSTue Ly 
2049af8dca7STue Ly   // Degree-5 polynomial approximation of log10 generated by:
2059af8dca7STue Ly   // > P = fpminimax(log10(1 + x)/x, 4, [|D...|], [-2^-8, 2^-7]);
2069af8dca7STue Ly   constexpr double COEFFS[5] = {0x1.bcb7b1526e2e5p-2, -0x1.bcb7b1528d43dp-3,
2079af8dca7STue Ly                                 0x1.287a77eb4ca0dp-3, -0x1.bcb8110a181b5p-4,
2089af8dca7STue Ly                                 0x1.60e7e3e747129p-4};
2099af8dca7STue Ly   double v2 = v * v; // Exact
2109af8dca7STue Ly   double p2 = fputil::multiply_add(v, COEFFS[4], COEFFS[3]);
2119af8dca7STue Ly   double p1 = fputil::multiply_add(v, COEFFS[2], COEFFS[1]);
2129af8dca7STue Ly   double p0 = fputil::multiply_add(v, COEFFS[0], LOG10_R[index]);
2139af8dca7STue Ly   double r = fputil::multiply_add(static_cast<double>(m), LOG10_2,
2149af8dca7STue Ly                                   fputil::polyeval(v2, p0, p1, p2));
215e581841eSTue Ly 
216e581841eSTue Ly   return static_cast<float>(r);
217e581841eSTue Ly }
218e581841eSTue Ly 
2195ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
220