165cf7afbSOverMighty //===-- Half-precision log(x) function ------------------------------------===// 265cf7afbSOverMighty // 365cf7afbSOverMighty // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 465cf7afbSOverMighty // See https://llvm.org/LICENSE.txt for license information. 565cf7afbSOverMighty // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 665cf7afbSOverMighty // 765cf7afbSOverMighty //===----------------------------------------------------------------------===// 865cf7afbSOverMighty 965cf7afbSOverMighty #include "src/math/logf16.h" 1065cf7afbSOverMighty #include "expxf16.h" 1165cf7afbSOverMighty #include "hdr/errno_macros.h" 1265cf7afbSOverMighty #include "hdr/fenv_macros.h" 1365cf7afbSOverMighty #include "src/__support/FPUtil/FEnvImpl.h" 1465cf7afbSOverMighty #include "src/__support/FPUtil/FPBits.h" 1565cf7afbSOverMighty #include "src/__support/FPUtil/PolyEval.h" 1665cf7afbSOverMighty #include "src/__support/FPUtil/cast.h" 1765cf7afbSOverMighty #include "src/__support/FPUtil/except_value_utils.h" 1865cf7afbSOverMighty #include "src/__support/FPUtil/multiply_add.h" 1965cf7afbSOverMighty #include "src/__support/common.h" 2065cf7afbSOverMighty #include "src/__support/macros/config.h" 2165cf7afbSOverMighty #include "src/__support/macros/optimization.h" 2265cf7afbSOverMighty #include "src/__support/macros/properties/cpu_features.h" 2365cf7afbSOverMighty 2465cf7afbSOverMighty namespace LIBC_NAMESPACE_DECL { 2565cf7afbSOverMighty 2665cf7afbSOverMighty #ifdef LIBC_TARGET_CPU_HAS_FMA 2765cf7afbSOverMighty static constexpr size_t N_LOGF16_EXCEPTS = 5; 2865cf7afbSOverMighty #else 2965cf7afbSOverMighty static constexpr size_t N_LOGF16_EXCEPTS = 11; 3065cf7afbSOverMighty #endif 3165cf7afbSOverMighty 3265cf7afbSOverMighty static constexpr fputil::ExceptValues<float16, N_LOGF16_EXCEPTS> 3365cf7afbSOverMighty LOGF16_EXCEPTS = {{ 3465cf7afbSOverMighty // (input, RZ output, RU offset, RD offset, RN offset) 3565cf7afbSOverMighty #ifndef LIBC_TARGET_CPU_HAS_FMA 3665cf7afbSOverMighty // x = 0x1.61cp-13, logf16(x) = -0x1.16p+3 (RZ) 3765cf7afbSOverMighty {0x0987U, 0xc858U, 0U, 1U, 0U}, 3865cf7afbSOverMighty // x = 0x1.f2p-12, logf16(x) = -0x1.e98p+2 (RZ) 3965cf7afbSOverMighty {0x0fc8U, 0xc7a6U, 0U, 1U, 1U}, 4065cf7afbSOverMighty #endif 4165cf7afbSOverMighty // x = 0x1.4d4p-9, logf16(x) = -0x1.7e4p+2 (RZ) 4265cf7afbSOverMighty {0x1935U, 0xc5f9U, 0U, 1U, 0U}, 4365cf7afbSOverMighty // x = 0x1.5ep-8, logf16(x) = -0x1.4ecp+2 (RZ) 4465cf7afbSOverMighty {0x1d78U, 0xc53bU, 0U, 1U, 0U}, 4565cf7afbSOverMighty #ifndef LIBC_TARGET_CPU_HAS_FMA 4665cf7afbSOverMighty // x = 0x1.fdp-1, logf16(x) = -0x1.81p-8 (RZ) 4765cf7afbSOverMighty {0x3bf4U, 0x9e04U, 0U, 1U, 1U}, 4865cf7afbSOverMighty // x = 0x1.fep-1, logf16(x) = -0x1.008p-8 (RZ) 4965cf7afbSOverMighty {0x3bf8U, 0x9c02U, 0U, 1U, 0U}, 5065cf7afbSOverMighty #endif 5165cf7afbSOverMighty // x = 0x1.ffp-1, logf16(x) = -0x1.004p-9 (RZ) 5265cf7afbSOverMighty {0x3bfcU, 0x9801U, 0U, 1U, 0U}, 5365cf7afbSOverMighty // x = 0x1.ff8p-1, logf16(x) = -0x1p-10 (RZ) 5465cf7afbSOverMighty {0x3bfeU, 0x9400U, 0U, 1U, 1U}, 5565cf7afbSOverMighty #ifdef LIBC_TARGET_CPU_HAS_FMA 5665cf7afbSOverMighty // x = 0x1.4c4p+1, logf16(x) = 0x1.e84p-1 (RZ) 5765cf7afbSOverMighty {0x4131U, 0x3ba1U, 1U, 0U, 1U}, 5865cf7afbSOverMighty #else 5965cf7afbSOverMighty // x = 0x1.75p+2, logf16(x) = 0x1.c34p+0 (RZ) 6065cf7afbSOverMighty {0x45d4U, 0x3f0dU, 1U, 0U, 0U}, 6165cf7afbSOverMighty // x = 0x1.75p+2, logf16(x) = 0x1.c34p+0 (RZ) 6265cf7afbSOverMighty {0x45d4U, 0x3f0dU, 1U, 0U, 0U}, 6365cf7afbSOverMighty // x = 0x1.d5p+9, logf16(x) = 0x1.b5cp+2 (RZ) 6465cf7afbSOverMighty {0x6354U, 0x46d7U, 1U, 0U, 1U}, 6565cf7afbSOverMighty #endif 6665cf7afbSOverMighty }}; 6765cf7afbSOverMighty 6865cf7afbSOverMighty LLVM_LIBC_FUNCTION(float16, logf16, (float16 x)) { 6965cf7afbSOverMighty using FPBits = fputil::FPBits<float16>; 7065cf7afbSOverMighty FPBits x_bits(x); 7165cf7afbSOverMighty 7265cf7afbSOverMighty uint16_t x_u = x_bits.uintval(); 7365cf7afbSOverMighty 7465cf7afbSOverMighty // If x <= 0, or x is 1, or x is +inf, or x is NaN. 7565cf7afbSOverMighty if (LIBC_UNLIKELY(x_u == 0U || x_u == 0x3c00U || x_u >= 0x7c00U)) { 7665cf7afbSOverMighty // log(NaN) = NaN 7765cf7afbSOverMighty if (x_bits.is_nan()) { 7865cf7afbSOverMighty if (x_bits.is_signaling_nan()) { 7965cf7afbSOverMighty fputil::raise_except_if_required(FE_INVALID); 8065cf7afbSOverMighty return FPBits::quiet_nan().get_val(); 8165cf7afbSOverMighty } 8265cf7afbSOverMighty 8365cf7afbSOverMighty return x; 8465cf7afbSOverMighty } 8565cf7afbSOverMighty 8665cf7afbSOverMighty // log(+/-0) = −inf 8765cf7afbSOverMighty if ((x_u & 0x7fffU) == 0U) { 8865cf7afbSOverMighty fputil::raise_except_if_required(FE_DIVBYZERO); 8965cf7afbSOverMighty return FPBits::inf(Sign::NEG).get_val(); 9065cf7afbSOverMighty } 9165cf7afbSOverMighty 9265cf7afbSOverMighty if (x_u == 0x3c00U) 9365cf7afbSOverMighty return FPBits::zero().get_val(); 9465cf7afbSOverMighty 9565cf7afbSOverMighty // When x < 0. 9665cf7afbSOverMighty if (x_u > 0x8000U) { 9765cf7afbSOverMighty fputil::set_errno_if_required(EDOM); 9865cf7afbSOverMighty fputil::raise_except_if_required(FE_INVALID); 9965cf7afbSOverMighty return FPBits::quiet_nan().get_val(); 10065cf7afbSOverMighty } 10165cf7afbSOverMighty 10265cf7afbSOverMighty // log(+inf) = +inf 10365cf7afbSOverMighty return FPBits::inf().get_val(); 10465cf7afbSOverMighty } 10565cf7afbSOverMighty 10665cf7afbSOverMighty if (auto r = LOGF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) 10765cf7afbSOverMighty return r.value(); 10865cf7afbSOverMighty 10965cf7afbSOverMighty // To compute log(x), we perform the following range reduction: 11065cf7afbSOverMighty // x = 2^m * 1.mant, 11165cf7afbSOverMighty // log(x) = m * log(2) + log(1.mant). 11265cf7afbSOverMighty // To compute log(1.mant), let f be the highest 6 bits including the hidden 11365cf7afbSOverMighty // bit, and d be the difference (1.mant - f), i.e., the remaining 5 bits of 11465cf7afbSOverMighty // the mantissa, then: 11565cf7afbSOverMighty // log(1.mant) = log(f) + log(1.mant / f) 11665cf7afbSOverMighty // = log(f) + log(1 + d/f) 11765cf7afbSOverMighty // since d/f is sufficiently small. 118*6d347fdfSOverMighty // We store log(f) and 1/f in the lookup tables LOGF_F and ONE_OVER_F_F 11965cf7afbSOverMighty // respectively. 12065cf7afbSOverMighty 12165cf7afbSOverMighty int m = -FPBits::EXP_BIAS; 12265cf7afbSOverMighty 12365cf7afbSOverMighty // When x is subnormal, normalize it. 12465cf7afbSOverMighty if ((x_u & FPBits::EXP_MASK) == 0U) { 12565cf7afbSOverMighty // Can't pass an integer to fputil::cast directly. 12665cf7afbSOverMighty constexpr float NORMALIZE_EXP = 1U << FPBits::FRACTION_LEN; 12765cf7afbSOverMighty x_bits = FPBits(x_bits.get_val() * fputil::cast<float16>(NORMALIZE_EXP)); 12865cf7afbSOverMighty x_u = x_bits.uintval(); 12965cf7afbSOverMighty m -= FPBits::FRACTION_LEN; 13065cf7afbSOverMighty } 13165cf7afbSOverMighty 13265cf7afbSOverMighty uint16_t mant = x_bits.get_mantissa(); 13365cf7afbSOverMighty // Leading 10 - 5 = 5 bits of the mantissa. 13465cf7afbSOverMighty int f = mant >> 5; 13565cf7afbSOverMighty // Unbiased exponent. 13665cf7afbSOverMighty m += x_u >> FPBits::FRACTION_LEN; 13765cf7afbSOverMighty 13865cf7afbSOverMighty // Set bits to 1.mant instead of 2^m * 1.mant. 13965cf7afbSOverMighty x_bits.set_biased_exponent(FPBits::EXP_BIAS); 14065cf7afbSOverMighty float mant_f = x_bits.get_val(); 14165cf7afbSOverMighty // v = 1.mant * 1/f - 1 = d/f 14265cf7afbSOverMighty float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f); 14365cf7afbSOverMighty 14465cf7afbSOverMighty // Degree-3 minimax polynomial generated by Sollya with the following 14565cf7afbSOverMighty // commands: 14665cf7afbSOverMighty // > display = hexadecimal; 14765cf7afbSOverMighty // > P = fpminimax(log(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]); 14865cf7afbSOverMighty // > x * P; 14965cf7afbSOverMighty float log1p_d_over_f = 15065cf7afbSOverMighty v * fputil::polyeval(v, 0x1p+0f, -0x1.001804p-1f, 0x1.557ef6p-2f); 15165cf7afbSOverMighty // log(1.mant) = log(f) + log(1 + d/f) 15265cf7afbSOverMighty float log_1_mant = LOGF_F[f] + log1p_d_over_f; 15365cf7afbSOverMighty return fputil::cast<float16>( 15465cf7afbSOverMighty fputil::multiply_add(static_cast<float>(m), LOGF_2, log_1_mant)); 15565cf7afbSOverMighty } 15665cf7afbSOverMighty 15765cf7afbSOverMighty } // namespace LIBC_NAMESPACE_DECL 158