1ce65d4e9SOverMighty //===-- Half-precision 2^x - 1 function -----------------------------------===// 2ce65d4e9SOverMighty // 3ce65d4e9SOverMighty // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4ce65d4e9SOverMighty // See https://llvm.org/LICENSE.txt for license information. 5ce65d4e9SOverMighty // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ce65d4e9SOverMighty // 7ce65d4e9SOverMighty //===----------------------------------------------------------------------===// 8ce65d4e9SOverMighty 9ce65d4e9SOverMighty #include "src/math/exp2m1f16.h" 10ce65d4e9SOverMighty #include "expxf16.h" 11ce65d4e9SOverMighty #include "hdr/errno_macros.h" 12ce65d4e9SOverMighty #include "hdr/fenv_macros.h" 13ce65d4e9SOverMighty #include "src/__support/FPUtil/FEnvImpl.h" 14ce65d4e9SOverMighty #include "src/__support/FPUtil/FPBits.h" 15ce65d4e9SOverMighty #include "src/__support/FPUtil/PolyEval.h" 16ce65d4e9SOverMighty #include "src/__support/FPUtil/cast.h" 17ce65d4e9SOverMighty #include "src/__support/FPUtil/except_value_utils.h" 18ce65d4e9SOverMighty #include "src/__support/FPUtil/multiply_add.h" 19ce65d4e9SOverMighty #include "src/__support/FPUtil/rounding_mode.h" 20ce65d4e9SOverMighty #include "src/__support/common.h" 21ce65d4e9SOverMighty #include "src/__support/macros/config.h" 22ce65d4e9SOverMighty #include "src/__support/macros/optimization.h" 23ce65d4e9SOverMighty #include "src/__support/macros/properties/cpu_features.h" 24ce65d4e9SOverMighty 25ce65d4e9SOverMighty namespace LIBC_NAMESPACE_DECL { 26ce65d4e9SOverMighty 27ce65d4e9SOverMighty static constexpr fputil::ExceptValues<float16, 6> EXP2M1F16_EXCEPTS_LO = {{ 28ce65d4e9SOverMighty // (input, RZ output, RU offset, RD offset, RN offset) 29ce65d4e9SOverMighty // x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ) 30ce65d4e9SOverMighty {0x0b3dU, 0x0904U, 1U, 0U, 1U}, 31ce65d4e9SOverMighty // x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ) 32ce65d4e9SOverMighty {0x0d3fU, 0x0b45U, 1U, 0U, 1U}, 33ce65d4e9SOverMighty // x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ) 34ce65d4e9SOverMighty {0x118cU, 0x0fb1U, 1U, 0U, 0U}, 35ce65d4e9SOverMighty // x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ) 36ce65d4e9SOverMighty {0x21bcU, 0x1ffaU, 1U, 0U, 1U}, 37ce65d4e9SOverMighty // x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ) 38ce65d4e9SOverMighty {0x9718U, 0x94eaU, 0U, 1U, 0U}, 39ce65d4e9SOverMighty // x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ) 40ce65d4e9SOverMighty {0x973fU, 0x9505U, 0U, 1U, 0U}, 41ce65d4e9SOverMighty }}; 42ce65d4e9SOverMighty 43ce65d4e9SOverMighty #ifdef LIBC_TARGET_CPU_HAS_FMA 44ce65d4e9SOverMighty static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6; 45ce65d4e9SOverMighty #else 46ce65d4e9SOverMighty static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7; 47ce65d4e9SOverMighty #endif 48ce65d4e9SOverMighty 49ce65d4e9SOverMighty static constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI> 50ce65d4e9SOverMighty EXP2M1F16_EXCEPTS_HI = {{ 51ce65d4e9SOverMighty // (input, RZ output, RU offset, RD offset, RN offset) 52ce65d4e9SOverMighty // x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ) 53ce65d4e9SOverMighty {0x3396U, 0x31b7U, 1U, 0U, 0U}, 54ce65d4e9SOverMighty #ifndef LIBC_TARGET_CPU_HAS_FMA 55ce65d4e9SOverMighty // x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ) 56ce65d4e9SOverMighty {0x34baU, 0x3345U, 1U, 0U, 0U}, 57ce65d4e9SOverMighty #endif 58ce65d4e9SOverMighty // x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ) 59ce65d4e9SOverMighty {0x36b6U, 0x3566U, 1U, 0U, 0U}, 60ce65d4e9SOverMighty #ifdef LIBC_TARGET_CPU_HAS_FMA 61ce65d4e9SOverMighty // x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ) 62ce65d4e9SOverMighty {0x37b7U, 0x3659U, 1U, 0U, 1U}, 63ce65d4e9SOverMighty #endif 64ce65d4e9SOverMighty // x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ) 65ce65d4e9SOverMighty {0xb201U, 0xafcdU, 0U, 1U, 1U}, 66ce65d4e9SOverMighty // x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ) 67ce65d4e9SOverMighty {0xb3ccU, 0xb0f9U, 0U, 1U, 0U}, 68ce65d4e9SOverMighty // x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ) 69ce65d4e9SOverMighty {0xb8a5U, 0xb54cU, 0U, 1U, 1U}, 70ce65d4e9SOverMighty #ifndef LIBC_TARGET_CPU_HAS_FMA 71ce65d4e9SOverMighty // x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ) 72ce65d4e9SOverMighty {0xba8dU, 0xb6edU, 0U, 1U, 1U}, 73ce65d4e9SOverMighty #endif 74ce65d4e9SOverMighty }}; 75ce65d4e9SOverMighty 76ce65d4e9SOverMighty LLVM_LIBC_FUNCTION(float16, exp2m1f16, (float16 x)) { 77ce65d4e9SOverMighty using FPBits = fputil::FPBits<float16>; 78ce65d4e9SOverMighty FPBits x_bits(x); 79ce65d4e9SOverMighty 80ce65d4e9SOverMighty uint16_t x_u = x_bits.uintval(); 81ce65d4e9SOverMighty uint16_t x_abs = x_u & 0x7fffU; 82ce65d4e9SOverMighty 83ce65d4e9SOverMighty // When |x| <= 2^(-3), or |x| >= 11, or x is NaN. 84ce65d4e9SOverMighty if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) { 85ce65d4e9SOverMighty // exp2m1(NaN) = NaN 86ce65d4e9SOverMighty if (x_bits.is_nan()) { 87ce65d4e9SOverMighty if (x_bits.is_signaling_nan()) { 88ce65d4e9SOverMighty fputil::raise_except_if_required(FE_INVALID); 89ce65d4e9SOverMighty return FPBits::quiet_nan().get_val(); 90ce65d4e9SOverMighty } 91ce65d4e9SOverMighty 92ce65d4e9SOverMighty return x; 93ce65d4e9SOverMighty } 94ce65d4e9SOverMighty 95ce65d4e9SOverMighty // When x >= 16. 96ce65d4e9SOverMighty if (x_u >= 0x4c00 && x_bits.is_pos()) { 97ce65d4e9SOverMighty // exp2m1(+inf) = +inf 98ce65d4e9SOverMighty if (x_bits.is_inf()) 99ce65d4e9SOverMighty return FPBits::inf().get_val(); 100ce65d4e9SOverMighty 101ce65d4e9SOverMighty switch (fputil::quick_get_round()) { 102ce65d4e9SOverMighty case FE_TONEAREST: 103ce65d4e9SOverMighty case FE_UPWARD: 104ce65d4e9SOverMighty fputil::set_errno_if_required(ERANGE); 105ce65d4e9SOverMighty fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); 106ce65d4e9SOverMighty return FPBits::inf().get_val(); 107ce65d4e9SOverMighty default: 108ce65d4e9SOverMighty return FPBits::max_normal().get_val(); 109ce65d4e9SOverMighty } 110ce65d4e9SOverMighty } 111ce65d4e9SOverMighty 112ce65d4e9SOverMighty // When x < -11. 113ce65d4e9SOverMighty if (x_u > 0xc980U) { 114ce65d4e9SOverMighty // exp2m1(-inf) = -1 115ce65d4e9SOverMighty if (x_bits.is_inf()) 116ce65d4e9SOverMighty return FPBits::one(Sign::NEG).get_val(); 117ce65d4e9SOverMighty 118ce65d4e9SOverMighty // When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1. 119ce65d4e9SOverMighty if (x_u < 0xca00U) 120ce65d4e9SOverMighty return fputil::round_result_slightly_down( 121ce65d4e9SOverMighty fputil::cast<float16>(-0x1.ffcp-1)); 122ce65d4e9SOverMighty 123ce65d4e9SOverMighty // When x <= -12, round(2^x - 1, HP, RN) = -1. 124ce65d4e9SOverMighty switch (fputil::quick_get_round()) { 125ce65d4e9SOverMighty case FE_TONEAREST: 126ce65d4e9SOverMighty case FE_DOWNWARD: 127ce65d4e9SOverMighty return FPBits::one(Sign::NEG).get_val(); 128ce65d4e9SOverMighty default: 129*aabdd8f8SOverMighty return fputil::cast<float16>(-0x1.ffcp-1); 130ce65d4e9SOverMighty } 131ce65d4e9SOverMighty } 132ce65d4e9SOverMighty 133ce65d4e9SOverMighty // When |x| <= 2^(-3). 134ce65d4e9SOverMighty if (x_abs <= 0x3000U) { 135ce65d4e9SOverMighty if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u); 136ce65d4e9SOverMighty LIBC_UNLIKELY(r.has_value())) 137ce65d4e9SOverMighty return r.value(); 138ce65d4e9SOverMighty 139ce65d4e9SOverMighty float xf = x; 140ce65d4e9SOverMighty // Degree-5 minimax polynomial generated by Sollya with the following 141ce65d4e9SOverMighty // commands: 142ce65d4e9SOverMighty // > display = hexadecimal; 143ce65d4e9SOverMighty // > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]); 144ce65d4e9SOverMighty // > x * P; 145ce65d4e9SOverMighty return fputil::cast<float16>( 146ce65d4e9SOverMighty xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f, 147ce65d4e9SOverMighty 0x1.c6af88p-5f, 0x1.3b45d6p-7f, 148ce65d4e9SOverMighty 0x1.641e7cp-10f)); 149ce65d4e9SOverMighty } 150ce65d4e9SOverMighty } 151ce65d4e9SOverMighty 152ce65d4e9SOverMighty if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) 153ce65d4e9SOverMighty return r.value(); 154ce65d4e9SOverMighty 155ce65d4e9SOverMighty // exp2(x) = exp2(hi + mid) * exp2(lo) 156ce65d4e9SOverMighty auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x); 157ce65d4e9SOverMighty // exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1 158ce65d4e9SOverMighty return fputil::cast<float16>( 159ce65d4e9SOverMighty fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f)); 160ce65d4e9SOverMighty } 161ce65d4e9SOverMighty 162ce65d4e9SOverMighty } // namespace LIBC_NAMESPACE_DECL 163