xref: /llvm-project/libc/src/math/generic/expm1f.cpp (revision 46944b0cbc9a9d8daad0182c40fcd3560bc9ca35)
164af346bSTue Ly //===-- Single-precision e^x - 1 function ---------------------------------===//
24e5f8b4dSTue Ly //
34e5f8b4dSTue Ly // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44e5f8b4dSTue Ly // See https://llvm.org/LICENSE.txt for license information.
54e5f8b4dSTue Ly // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64e5f8b4dSTue Ly //
74e5f8b4dSTue Ly //===----------------------------------------------------------------------===//
84e5f8b4dSTue Ly 
94e5f8b4dSTue Ly #include "src/math/expm1f.h"
1064af346bSTue Ly #include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
11c120edc7SMichael Jones #include "src/__support/FPUtil/BasicOperations.h"
1264af346bSTue Ly #include "src/__support/FPUtil/FEnvImpl.h"
1364af346bSTue Ly #include "src/__support/FPUtil/FMA.h"
1464af346bSTue Ly #include "src/__support/FPUtil/FPBits.h"
15c120edc7SMichael Jones #include "src/__support/FPUtil/PolyEval.h"
16628fbbefSTue Ly #include "src/__support/FPUtil/multiply_add.h"
17628fbbefSTue Ly #include "src/__support/FPUtil/nearest_integer.h"
18a9824312STue Ly #include "src/__support/FPUtil/rounding_mode.h"
194e5f8b4dSTue Ly #include "src/__support/common.h"
20*5ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
21737e1cd1SGuillaume Chatelet #include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
22737e1cd1SGuillaume Chatelet #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
2364af346bSTue Ly 
24*5ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
254e5f8b4dSTue Ly 
264e5f8b4dSTue Ly LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
2764af346bSTue Ly   using FPBits = typename fputil::FPBits<float>;
2864af346bSTue Ly   FPBits xbits(x);
294e5f8b4dSTue Ly 
30a5466f04STue Ly   uint32_t x_u = xbits.uintval();
31a5466f04STue Ly   uint32_t x_abs = x_u & 0x7fff'ffffU;
32a5466f04STue Ly 
33a5466f04STue Ly   // Exceptional value
3429f8e076SGuillaume Chatelet   if (LIBC_UNLIKELY(x_u == 0x3e35'bec5U)) { // x = 0x1.6b7d8ap-3f
35a9824312STue Ly     int round_mode = fputil::quick_get_round();
36a5466f04STue Ly     if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD)
37a5466f04STue Ly       return 0x1.8dbe64p-3f;
38a5466f04STue Ly     return 0x1.8dbe62p-3f;
39a5466f04STue Ly   }
40a5466f04STue Ly 
41a2569a76SGuillaume Chatelet #if !defined(LIBC_TARGET_CPU_HAS_FMA)
4229f8e076SGuillaume Chatelet   if (LIBC_UNLIKELY(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f
43a9824312STue Ly     int round_mode = fputil::quick_get_round();
44484319f4STue Ly     if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
45484319f4STue Ly       return -0x1.71c884p-4f;
46484319f4STue Ly     return -0x1.71c882p-4f;
47484319f4STue Ly   }
48a2569a76SGuillaume Chatelet #endif // LIBC_TARGET_CPU_HAS_FMA
49484319f4STue Ly 
50a5466f04STue Ly   // When |x| > 25*log(2), or nan
5129f8e076SGuillaume Chatelet   if (LIBC_UNLIKELY(x_abs >= 0x418a'a123U)) {
52a5466f04STue Ly     // x < log(2^-25)
5311ec512fSGuillaume Chatelet     if (xbits.is_neg()) {
5464af346bSTue Ly       // exp(-Inf) = 0
5564af346bSTue Ly       if (xbits.is_inf())
5664af346bSTue Ly         return -1.0f;
5764af346bSTue Ly       // exp(nan) = nan
5864af346bSTue Ly       if (xbits.is_nan())
5964af346bSTue Ly         return x;
60a9824312STue Ly       int round_mode = fputil::quick_get_round();
6164af346bSTue Ly       if (round_mode == FE_UPWARD || round_mode == FE_TOWARDZERO)
6264af346bSTue Ly         return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f
6364af346bSTue Ly       return -1.0f;
64a5466f04STue Ly     } else {
6564af346bSTue Ly       // x >= 89 or nan
66a5466f04STue Ly       if (xbits.uintval() >= 0x42b2'0000) {
6764af346bSTue Ly         if (xbits.uintval() < 0x7f80'0000U) {
68a9824312STue Ly           int rounding = fputil::quick_get_round();
6964af346bSTue Ly           if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
706b02d2f8SGuillaume Chatelet             return FPBits::max_normal().get_val();
7164af346bSTue Ly 
720aa9593cSTue Ly           fputil::set_errno_if_required(ERANGE);
730aa9593cSTue Ly           fputil::raise_except_if_required(FE_OVERFLOW);
744e5f8b4dSTue Ly         }
756b02d2f8SGuillaume Chatelet         return x + FPBits::inf().get_val();
764e5f8b4dSTue Ly       }
77a5466f04STue Ly     }
78a5466f04STue Ly   }
7964af346bSTue Ly 
8064af346bSTue Ly   // |x| < 2^-4
81a5466f04STue Ly   if (x_abs < 0x3d80'0000U) {
8264af346bSTue Ly     // |x| < 2^-25
83a5466f04STue Ly     if (x_abs < 0x3300'0000U) {
8464af346bSTue Ly       // x = -0.0f
8529f8e076SGuillaume Chatelet       if (LIBC_UNLIKELY(xbits.uintval() == 0x8000'0000U))
8664af346bSTue Ly         return x;
87a5466f04STue Ly         // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
88a5466f04STue Ly         // is:
89a5466f04STue Ly         //   |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
90a5466f04STue Ly         //                               = |x|
91a5466f04STue Ly         //                               < 2^-25
92a5466f04STue Ly         //                               < epsilon(1)/2.
9364af346bSTue Ly         // So the correctly rounded values of expm1(x) are:
9464af346bSTue Ly         //   = x + eps(x) if rounding mode = FE_UPWARD,
95484319f4STue Ly         //                   or (rounding mode = FE_TOWARDZERO and x is
96484319f4STue Ly         //                   negative),
9764af346bSTue Ly         //   = x otherwise.
9864af346bSTue Ly         // To simplify the rounding decision and make it more efficient, we use
9964af346bSTue Ly         //   fma(x, x, x) ~ x + x^2 instead.
100484319f4STue Ly         // Note: to use the formula x + x^2 to decide the correct rounding, we
101484319f4STue Ly         // do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
102484319f4STue Ly         // 2^-76. For targets without FMA instructions, we simply use double for
103484319f4STue Ly         // intermediate results as it is more efficient than using an emulated
104484319f4STue Ly         // version of FMA.
105a2569a76SGuillaume Chatelet #if defined(LIBC_TARGET_CPU_HAS_FMA)
106f3aceeeeSOverMighty       return fputil::fma<float>(x, x, x);
107484319f4STue Ly #else
108484319f4STue Ly       double xd = x;
109484319f4STue Ly       return static_cast<float>(fputil::multiply_add(xd, xd, xd));
110a2569a76SGuillaume Chatelet #endif // LIBC_TARGET_CPU_HAS_FMA
11164af346bSTue Ly     }
112a5466f04STue Ly 
113da28593dSlntue     constexpr double COEFFS[] = {0x1p-1,
114da28593dSlntue                                  0x1.55555555557ddp-3,
115da28593dSlntue                                  0x1.55555555552fap-5,
116da28593dSlntue                                  0x1.111110fcd58b7p-7,
117da28593dSlntue                                  0x1.6c16c1717660bp-10,
118da28593dSlntue                                  0x1.a0241f0006d62p-13,
119da28593dSlntue                                  0x1.a01e3f8d3c06p-16};
120da28593dSlntue 
12164af346bSTue Ly     // 2^-25 <= |x| < 2^-4
12264af346bSTue Ly     double xd = static_cast<double>(x);
12364af346bSTue Ly     double xsq = xd * xd;
12464af346bSTue Ly     // Degree-8 minimax polynomial generated by Sollya with:
12564af346bSTue Ly     // > display = hexadecimal;
126a5466f04STue Ly     // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]);
127da28593dSlntue 
128da28593dSlntue     double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]);
129da28593dSlntue     double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]);
130da28593dSlntue     double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]);
131da28593dSlntue 
132da28593dSlntue     double r = fputil::polyeval(xsq, c0, c1, c2, COEFFS[6]);
133c5f8a0a1STue Ly     return static_cast<float>(fputil::multiply_add(r, xsq, xd));
13464af346bSTue Ly   }
13564af346bSTue Ly 
136a5466f04STue Ly   // For -18 < x < 89, to compute expm1(x), we perform the following range
13764af346bSTue Ly   // reduction: find hi, mid, lo such that:
13864af346bSTue Ly   //   x = hi + mid + lo, in which
13964af346bSTue Ly   //     hi is an integer,
14064af346bSTue Ly   //     mid * 2^7 is an integer
14164af346bSTue Ly   //     -2^(-8) <= lo < 2^-8.
14264af346bSTue Ly   // In particular,
14364af346bSTue Ly   //   hi + mid = round(x * 2^7) * 2^(-7).
14464af346bSTue Ly   // Then,
145a5466f04STue Ly   //   expm1(x) = exp(hi + mid + lo) - 1 = exp(hi) * exp(mid) * exp(lo) - 1.
14664af346bSTue Ly   // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
147a5466f04STue Ly   // respectively.  exp(lo) is computed using a degree-4 minimax polynomial
14864af346bSTue Ly   // generated by Sollya.
14964af346bSTue Ly 
15064af346bSTue Ly   // x_hi = hi + mid.
151628fbbefSTue Ly   float kf = fputil::nearest_integer(x * 0x1.0p7f);
152628fbbefSTue Ly   int x_hi = static_cast<int>(kf);
15364af346bSTue Ly   // Subtract (hi + mid) from x to get lo.
154628fbbefSTue Ly   double xd = static_cast<double>(fputil::multiply_add(kf, -0x1.0p-7f, x));
15564af346bSTue Ly   x_hi += 104 << 7;
15664af346bSTue Ly   // hi = x_hi >> 7
15764af346bSTue Ly   double exp_hi = EXP_M1[x_hi >> 7];
15864af346bSTue Ly   // lo = x_hi & 0x0000'007fU;
15964af346bSTue Ly   double exp_mid = EXP_M2[x_hi & 0x7f];
16064af346bSTue Ly   double exp_hi_mid = exp_hi * exp_mid;
161a5466f04STue Ly   // Degree-4 minimax polynomial generated by Sollya with the following
16264af346bSTue Ly   // commands:
16364af346bSTue Ly   //   > display = hexadecimal;
164a5466f04STue Ly   //   > Q = fpminimax(expm1(x)/x, 3, [|D...|], [-2^-8, 2^-8]);
16564af346bSTue Ly   //   > Q;
166a5466f04STue Ly   double exp_lo =
167a5466f04STue Ly       fputil::polyeval(xd, 0x1.0p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1,
168a5466f04STue Ly                        0x1.555566668e5e7p-3, 0x1.55555555ef243p-5);
169c5f8a0a1STue Ly   return static_cast<float>(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0));
1704e5f8b4dSTue Ly }
1714e5f8b4dSTue Ly 
172*5ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
173