xref: /llvm-project/libc/src/math/generic/tanhf.cpp (revision 5ff3ff33ff930e4ec49da7910612d8a41eb068cb)
15ef987c9SKirill Okhotnikov //===-- Single-precision tanh function ------------------------------------===//
25ef987c9SKirill Okhotnikov //
35ef987c9SKirill Okhotnikov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ef987c9SKirill Okhotnikov // See https://llvm.org/LICENSE.txt for license information.
55ef987c9SKirill Okhotnikov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ef987c9SKirill Okhotnikov //
75ef987c9SKirill Okhotnikov //===----------------------------------------------------------------------===//
85ef987c9SKirill Okhotnikov 
95ef987c9SKirill Okhotnikov #include "src/math/tanhf.h"
105ef987c9SKirill Okhotnikov #include "src/__support/FPUtil/FPBits.h"
115dbd5118STue Ly #include "src/__support/FPUtil/PolyEval.h"
125dbd5118STue Ly #include "src/__support/FPUtil/multiply_add.h"
135dbd5118STue Ly #include "src/__support/FPUtil/nearest_integer.h"
14*5ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
15737e1cd1SGuillaume Chatelet #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
165dbd5118STue Ly #include "src/__support/macros/properties/cpu_features.h"
1789ed5b7cSKirill Okhotnikov #include "src/math/generic/explogxf.h"
185ef987c9SKirill Okhotnikov 
19*5ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
205ef987c9SKirill Okhotnikov 
215dbd5118STue Ly // 2^6 * log2(e)
225dbd5118STue Ly constexpr double LOG2_E_EXP2_6 = ExpBase::LOG2_B * 2.0;
235dbd5118STue Ly 
245ef987c9SKirill Okhotnikov LLVM_LIBC_FUNCTION(float, tanhf, (float x)) {
255ef987c9SKirill Okhotnikov   using FPBits = typename fputil::FPBits<float>;
265ef987c9SKirill Okhotnikov   FPBits xbits(x);
27ea43c8eeSGuillaume Chatelet   uint32_t x_abs = xbits.abs().uintval();
285ef987c9SKirill Okhotnikov 
2911ec512fSGuillaume Chatelet   const int sign_index = xbits.is_neg() ? 1 : 0;
3011ec512fSGuillaume Chatelet 
315dbd5118STue Ly   // When |x| >= 15, or x is inf or nan, or |x| <= 0.078125
325dbd5118STue Ly   if (LIBC_UNLIKELY((x_abs >= 0x4170'0000U) || (x_abs <= 0x3da0'0000U))) {
335dbd5118STue Ly     if (x_abs <= 0x3da0'0000U) {
345ef987c9SKirill Okhotnikov       // |x| <= 0.078125
355dbd5118STue Ly       if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
365dbd5118STue Ly         // |x| <= 2^-26
375dbd5118STue Ly         return (x_abs != 0)
385dbd5118STue Ly                    ? static_cast<float>(x - 0x1.5555555555555p-2 * x * x * x)
395dbd5118STue Ly                    : x;
405dbd5118STue Ly       }
415dbd5118STue Ly 
425dbd5118STue Ly       const double TAYLOR[] = {-0x1.5555555555555p-2, 0x1.1111111111111p-3,
435dbd5118STue Ly                                -0x1.ba1ba1ba1ba1cp-5, 0x1.664f4882c10fap-6,
445dbd5118STue Ly                                -0x1.226e355e6c23dp-7};
455ef987c9SKirill Okhotnikov       double xdbl = x;
465ef987c9SKirill Okhotnikov       double x2 = xdbl * xdbl;
475dbd5118STue Ly       // Taylor polynomial.
485dbd5118STue Ly       double x4 = x2 * x2;
495dbd5118STue Ly       double c0 = x2 * TAYLOR[0];
505dbd5118STue Ly       double c1 = fputil::multiply_add(x2, TAYLOR[2], TAYLOR[1]);
515dbd5118STue Ly       double c2 = fputil::multiply_add(x2, TAYLOR[4], TAYLOR[3]);
525dbd5118STue Ly       double pe = fputil::polyeval(x4, c0, c1, c2);
535dbd5118STue Ly 
547d11a592SAlex Brachet       return static_cast<float>(fputil::multiply_add(xdbl, pe, xdbl));
555ef987c9SKirill Okhotnikov     }
565ef987c9SKirill Okhotnikov 
575dbd5118STue Ly     // |x| >= 15
585dbd5118STue Ly     if (LIBC_UNLIKELY(xbits.is_nan()))
595dbd5118STue Ly       return x + 1.0f; // sNaN to qNaN + signal
605dbd5118STue Ly 
61e328d193SMichael Jones     constexpr float SIGNS[2][2] = {{1.0f, -0x1.0p-25f}, {-1.0f, 0x1.0p-25f}};
625dbd5118STue Ly 
635dbd5118STue Ly     if (LIBC_UNLIKELY(xbits.is_inf()))
6411ec512fSGuillaume Chatelet       return SIGNS[sign_index][0];
655dbd5118STue Ly 
6611ec512fSGuillaume Chatelet     return SIGNS[sign_index][0] + SIGNS[sign_index][1];
675ef987c9SKirill Okhotnikov   }
685ef987c9SKirill Okhotnikov 
695dbd5118STue Ly   // Range reduction: e^(2x) = 2^(hi + mid) * e^lo
705dbd5118STue Ly   // Let  k = round( x * 2^6 * log2(e)),
715dbd5118STue Ly   // So   k  = (hi + mid) * 2^5
725dbd5118STue Ly   // Then lo = 2x - (hi + mid) * log(2) = 2x - k * 2^-5 * log(2).
735dbd5118STue Ly 
745dbd5118STue Ly   double xd = static_cast<double>(x);
755dbd5118STue Ly   // k = round( x* 2^6 * log2(e) )
765dbd5118STue Ly   double k;
775dbd5118STue Ly   // mk = -k
785dbd5118STue Ly   int mk;
795dbd5118STue Ly #ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
805dbd5118STue Ly   k = fputil::nearest_integer(xd * LOG2_E_EXP2_6);
815dbd5118STue Ly   mk = -static_cast<int>(k);
824973eee1STue Ly #else
835dbd5118STue Ly   constexpr double HALF_WAY[2] = {-0.5, 0.5};
845dbd5118STue Ly 
855dbd5118STue Ly   mk = static_cast<int>(
8611ec512fSGuillaume Chatelet       fputil::multiply_add(xd, -LOG2_E_EXP2_6, HALF_WAY[sign_index]));
875dbd5118STue Ly   k = static_cast<double>(-mk);
885dbd5118STue Ly #endif // LIBC_TARGET_CPU_HAS_NEAREST_INT
895dbd5118STue Ly   // -hi = floor(-k * 2^(-MID_BITS))
905dbd5118STue Ly   // exp_mhi = shift -hi to the exponent field of double precision.
915dbd5118STue Ly   int64_t exp_mhi = static_cast<int64_t>(mk >> ExpBase::MID_BITS)
92c09e6905SGuillaume Chatelet                     << fputil::FPBits<double>::FRACTION_LEN;
935dbd5118STue Ly   // mh = 2^(-hi - mid)
945dbd5118STue Ly   int64_t mh_bits = ExpBase::EXP_2_MID[mk & ExpBase::MID_MASK] + exp_mhi;
955dbd5118STue Ly   double mh = fputil::FPBits<double>(uint64_t(mh_bits)).get_val();
965dbd5118STue Ly   // dx = lo/2 = x - (hi + mid) * log(2)/2 = x - k * 2^-6 * log(2)
975dbd5118STue Ly   double dx = fputil::multiply_add(
985dbd5118STue Ly       k, ExpBase::M_LOGB_2_LO * 0.5,
995dbd5118STue Ly       fputil::multiply_add(k, ExpBase::M_LOGB_2_HI * 0.5, xd));
1005dbd5118STue Ly 
1015dbd5118STue Ly   // > P = fpminimax(expm1(2*x)/x, 4, [|D...|], [-log(2)/128, log(2)/128]);
1025dbd5118STue Ly   constexpr double COEFFS[] = {0x1.ffffffffe5bc8p0, 0x1.555555555cd67p0,
1035dbd5118STue Ly                                0x1.5555c2a9b48b4p-1, 0x1.11112a0e34bdbp-2};
1045dbd5118STue Ly 
1055dbd5118STue Ly   double dx2 = dx * dx;
1065dbd5118STue Ly   double c0 = fputil::multiply_add(dx, 2.0, 1.0);
1075dbd5118STue Ly   double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
1085dbd5118STue Ly   double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
1095dbd5118STue Ly   double r = fputil::polyeval(dx2, c0, c1, c2);
1105dbd5118STue Ly 
1115dbd5118STue Ly   // tanh(x) = sinh(x) / cosh(x)
1125dbd5118STue Ly   //         = (e^x - e^(-x)) / (e^x + e^(-x))
1135dbd5118STue Ly   //         = (e^(2x) - 1) / (e^(2x) + 1)
1145dbd5118STue Ly   //         = (2^(hi + mid) * e^lo - 1) / (2^(hi + mid) * e^lo + 1)
1155dbd5118STue Ly   //         = (e^lo - 2^(-hi - mid)) / (e^lo + 2^(-hi - mid))
1165dbd5118STue Ly   //         = (r - mh) / (r + mh)
1175dbd5118STue Ly   return static_cast<float>((r - mh) / (r + mh));
1185ef987c9SKirill Okhotnikov }
1195ef987c9SKirill Okhotnikov 
120*5ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
121