xref: /llvm-project/libc/src/math/generic/logf16.cpp (revision 6d347fdfbd018b6555a754219fda461e166f2a64)
165cf7afbSOverMighty //===-- Half-precision log(x) function ------------------------------------===//
265cf7afbSOverMighty //
365cf7afbSOverMighty // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
465cf7afbSOverMighty // See https://llvm.org/LICENSE.txt for license information.
565cf7afbSOverMighty // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
665cf7afbSOverMighty //
765cf7afbSOverMighty //===----------------------------------------------------------------------===//
865cf7afbSOverMighty 
965cf7afbSOverMighty #include "src/math/logf16.h"
1065cf7afbSOverMighty #include "expxf16.h"
1165cf7afbSOverMighty #include "hdr/errno_macros.h"
1265cf7afbSOverMighty #include "hdr/fenv_macros.h"
1365cf7afbSOverMighty #include "src/__support/FPUtil/FEnvImpl.h"
1465cf7afbSOverMighty #include "src/__support/FPUtil/FPBits.h"
1565cf7afbSOverMighty #include "src/__support/FPUtil/PolyEval.h"
1665cf7afbSOverMighty #include "src/__support/FPUtil/cast.h"
1765cf7afbSOverMighty #include "src/__support/FPUtil/except_value_utils.h"
1865cf7afbSOverMighty #include "src/__support/FPUtil/multiply_add.h"
1965cf7afbSOverMighty #include "src/__support/common.h"
2065cf7afbSOverMighty #include "src/__support/macros/config.h"
2165cf7afbSOverMighty #include "src/__support/macros/optimization.h"
2265cf7afbSOverMighty #include "src/__support/macros/properties/cpu_features.h"
2365cf7afbSOverMighty 
2465cf7afbSOverMighty namespace LIBC_NAMESPACE_DECL {
2565cf7afbSOverMighty 
2665cf7afbSOverMighty #ifdef LIBC_TARGET_CPU_HAS_FMA
2765cf7afbSOverMighty static constexpr size_t N_LOGF16_EXCEPTS = 5;
2865cf7afbSOverMighty #else
2965cf7afbSOverMighty static constexpr size_t N_LOGF16_EXCEPTS = 11;
3065cf7afbSOverMighty #endif
3165cf7afbSOverMighty 
3265cf7afbSOverMighty static constexpr fputil::ExceptValues<float16, N_LOGF16_EXCEPTS>
3365cf7afbSOverMighty     LOGF16_EXCEPTS = {{
3465cf7afbSOverMighty // (input, RZ output, RU offset, RD offset, RN offset)
3565cf7afbSOverMighty #ifndef LIBC_TARGET_CPU_HAS_FMA
3665cf7afbSOverMighty         // x = 0x1.61cp-13, logf16(x) = -0x1.16p+3 (RZ)
3765cf7afbSOverMighty         {0x0987U, 0xc858U, 0U, 1U, 0U},
3865cf7afbSOverMighty         // x = 0x1.f2p-12, logf16(x) = -0x1.e98p+2 (RZ)
3965cf7afbSOverMighty         {0x0fc8U, 0xc7a6U, 0U, 1U, 1U},
4065cf7afbSOverMighty #endif
4165cf7afbSOverMighty         // x = 0x1.4d4p-9, logf16(x) = -0x1.7e4p+2 (RZ)
4265cf7afbSOverMighty         {0x1935U, 0xc5f9U, 0U, 1U, 0U},
4365cf7afbSOverMighty         // x = 0x1.5ep-8, logf16(x) = -0x1.4ecp+2 (RZ)
4465cf7afbSOverMighty         {0x1d78U, 0xc53bU, 0U, 1U, 0U},
4565cf7afbSOverMighty #ifndef LIBC_TARGET_CPU_HAS_FMA
4665cf7afbSOverMighty         // x = 0x1.fdp-1, logf16(x) = -0x1.81p-8 (RZ)
4765cf7afbSOverMighty         {0x3bf4U, 0x9e04U, 0U, 1U, 1U},
4865cf7afbSOverMighty         // x = 0x1.fep-1, logf16(x) = -0x1.008p-8 (RZ)
4965cf7afbSOverMighty         {0x3bf8U, 0x9c02U, 0U, 1U, 0U},
5065cf7afbSOverMighty #endif
5165cf7afbSOverMighty         // x = 0x1.ffp-1, logf16(x) = -0x1.004p-9 (RZ)
5265cf7afbSOverMighty         {0x3bfcU, 0x9801U, 0U, 1U, 0U},
5365cf7afbSOverMighty         // x = 0x1.ff8p-1, logf16(x) = -0x1p-10 (RZ)
5465cf7afbSOverMighty         {0x3bfeU, 0x9400U, 0U, 1U, 1U},
5565cf7afbSOverMighty #ifdef LIBC_TARGET_CPU_HAS_FMA
5665cf7afbSOverMighty         // x = 0x1.4c4p+1, logf16(x) = 0x1.e84p-1 (RZ)
5765cf7afbSOverMighty         {0x4131U, 0x3ba1U, 1U, 0U, 1U},
5865cf7afbSOverMighty #else
5965cf7afbSOverMighty         // x = 0x1.75p+2, logf16(x) = 0x1.c34p+0 (RZ)
6065cf7afbSOverMighty         {0x45d4U, 0x3f0dU, 1U, 0U, 0U},
6165cf7afbSOverMighty         // x = 0x1.75p+2, logf16(x) = 0x1.c34p+0 (RZ)
6265cf7afbSOverMighty         {0x45d4U, 0x3f0dU, 1U, 0U, 0U},
6365cf7afbSOverMighty         // x = 0x1.d5p+9, logf16(x) = 0x1.b5cp+2 (RZ)
6465cf7afbSOverMighty         {0x6354U, 0x46d7U, 1U, 0U, 1U},
6565cf7afbSOverMighty #endif
6665cf7afbSOverMighty     }};
6765cf7afbSOverMighty 
6865cf7afbSOverMighty LLVM_LIBC_FUNCTION(float16, logf16, (float16 x)) {
6965cf7afbSOverMighty   using FPBits = fputil::FPBits<float16>;
7065cf7afbSOverMighty   FPBits x_bits(x);
7165cf7afbSOverMighty 
7265cf7afbSOverMighty   uint16_t x_u = x_bits.uintval();
7365cf7afbSOverMighty 
7465cf7afbSOverMighty   // If x <= 0, or x is 1, or x is +inf, or x is NaN.
7565cf7afbSOverMighty   if (LIBC_UNLIKELY(x_u == 0U || x_u == 0x3c00U || x_u >= 0x7c00U)) {
7665cf7afbSOverMighty     // log(NaN) = NaN
7765cf7afbSOverMighty     if (x_bits.is_nan()) {
7865cf7afbSOverMighty       if (x_bits.is_signaling_nan()) {
7965cf7afbSOverMighty         fputil::raise_except_if_required(FE_INVALID);
8065cf7afbSOverMighty         return FPBits::quiet_nan().get_val();
8165cf7afbSOverMighty       }
8265cf7afbSOverMighty 
8365cf7afbSOverMighty       return x;
8465cf7afbSOverMighty     }
8565cf7afbSOverMighty 
8665cf7afbSOverMighty     // log(+/-0) = −inf
8765cf7afbSOverMighty     if ((x_u & 0x7fffU) == 0U) {
8865cf7afbSOverMighty       fputil::raise_except_if_required(FE_DIVBYZERO);
8965cf7afbSOverMighty       return FPBits::inf(Sign::NEG).get_val();
9065cf7afbSOverMighty     }
9165cf7afbSOverMighty 
9265cf7afbSOverMighty     if (x_u == 0x3c00U)
9365cf7afbSOverMighty       return FPBits::zero().get_val();
9465cf7afbSOverMighty 
9565cf7afbSOverMighty     // When x < 0.
9665cf7afbSOverMighty     if (x_u > 0x8000U) {
9765cf7afbSOverMighty       fputil::set_errno_if_required(EDOM);
9865cf7afbSOverMighty       fputil::raise_except_if_required(FE_INVALID);
9965cf7afbSOverMighty       return FPBits::quiet_nan().get_val();
10065cf7afbSOverMighty     }
10165cf7afbSOverMighty 
10265cf7afbSOverMighty     // log(+inf) = +inf
10365cf7afbSOverMighty     return FPBits::inf().get_val();
10465cf7afbSOverMighty   }
10565cf7afbSOverMighty 
10665cf7afbSOverMighty   if (auto r = LOGF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
10765cf7afbSOverMighty     return r.value();
10865cf7afbSOverMighty 
10965cf7afbSOverMighty   // To compute log(x), we perform the following range reduction:
11065cf7afbSOverMighty   //   x = 2^m * 1.mant,
11165cf7afbSOverMighty   //   log(x) = m * log(2) + log(1.mant).
11265cf7afbSOverMighty   // To compute log(1.mant), let f be the highest 6 bits including the hidden
11365cf7afbSOverMighty   // bit, and d be the difference (1.mant - f), i.e., the remaining 5 bits of
11465cf7afbSOverMighty   // the mantissa, then:
11565cf7afbSOverMighty   //   log(1.mant) = log(f) + log(1.mant / f)
11665cf7afbSOverMighty   //               = log(f) + log(1 + d/f)
11765cf7afbSOverMighty   // since d/f is sufficiently small.
118*6d347fdfSOverMighty   // We store log(f) and 1/f in the lookup tables LOGF_F and ONE_OVER_F_F
11965cf7afbSOverMighty   // respectively.
12065cf7afbSOverMighty 
12165cf7afbSOverMighty   int m = -FPBits::EXP_BIAS;
12265cf7afbSOverMighty 
12365cf7afbSOverMighty   // When x is subnormal, normalize it.
12465cf7afbSOverMighty   if ((x_u & FPBits::EXP_MASK) == 0U) {
12565cf7afbSOverMighty     // Can't pass an integer to fputil::cast directly.
12665cf7afbSOverMighty     constexpr float NORMALIZE_EXP = 1U << FPBits::FRACTION_LEN;
12765cf7afbSOverMighty     x_bits = FPBits(x_bits.get_val() * fputil::cast<float16>(NORMALIZE_EXP));
12865cf7afbSOverMighty     x_u = x_bits.uintval();
12965cf7afbSOverMighty     m -= FPBits::FRACTION_LEN;
13065cf7afbSOverMighty   }
13165cf7afbSOverMighty 
13265cf7afbSOverMighty   uint16_t mant = x_bits.get_mantissa();
13365cf7afbSOverMighty   // Leading 10 - 5 = 5 bits of the mantissa.
13465cf7afbSOverMighty   int f = mant >> 5;
13565cf7afbSOverMighty   // Unbiased exponent.
13665cf7afbSOverMighty   m += x_u >> FPBits::FRACTION_LEN;
13765cf7afbSOverMighty 
13865cf7afbSOverMighty   // Set bits to 1.mant instead of 2^m * 1.mant.
13965cf7afbSOverMighty   x_bits.set_biased_exponent(FPBits::EXP_BIAS);
14065cf7afbSOverMighty   float mant_f = x_bits.get_val();
14165cf7afbSOverMighty   // v = 1.mant * 1/f - 1 = d/f
14265cf7afbSOverMighty   float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f);
14365cf7afbSOverMighty 
14465cf7afbSOverMighty   // Degree-3 minimax polynomial generated by Sollya with the following
14565cf7afbSOverMighty   // commands:
14665cf7afbSOverMighty   //   > display = hexadecimal;
14765cf7afbSOverMighty   //   > P = fpminimax(log(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
14865cf7afbSOverMighty   //   > x * P;
14965cf7afbSOverMighty   float log1p_d_over_f =
15065cf7afbSOverMighty       v * fputil::polyeval(v, 0x1p+0f, -0x1.001804p-1f, 0x1.557ef6p-2f);
15165cf7afbSOverMighty   // log(1.mant) = log(f) + log(1 + d/f)
15265cf7afbSOverMighty   float log_1_mant = LOGF_F[f] + log1p_d_over_f;
15365cf7afbSOverMighty   return fputil::cast<float16>(
15465cf7afbSOverMighty       fputil::multiply_add(static_cast<float>(m), LOGF_2, log_1_mant));
15565cf7afbSOverMighty }
15665cf7afbSOverMighty 
15765cf7afbSOverMighty } // namespace LIBC_NAMESPACE_DECL
158