1*6a865b6dSwldfngrs //===-- Half-precision cos(x) function ------------------------------------===// 2*6a865b6dSwldfngrs // 3*6a865b6dSwldfngrs // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*6a865b6dSwldfngrs // See https://llvm.org/LICENSE.txt for license information. 5*6a865b6dSwldfngrs // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*6a865b6dSwldfngrs // 7*6a865b6dSwldfngrs //===----------------------------------------------------------------------===// 8*6a865b6dSwldfngrs 9*6a865b6dSwldfngrs #include "src/math/cosf16.h" 10*6a865b6dSwldfngrs #include "hdr/errno_macros.h" 11*6a865b6dSwldfngrs #include "hdr/fenv_macros.h" 12*6a865b6dSwldfngrs #include "sincosf16_utils.h" 13*6a865b6dSwldfngrs #include "src/__support/FPUtil/FEnvImpl.h" 14*6a865b6dSwldfngrs #include "src/__support/FPUtil/FPBits.h" 15*6a865b6dSwldfngrs #include "src/__support/FPUtil/cast.h" 16*6a865b6dSwldfngrs #include "src/__support/FPUtil/except_value_utils.h" 17*6a865b6dSwldfngrs #include "src/__support/FPUtil/multiply_add.h" 18*6a865b6dSwldfngrs #include "src/__support/macros/optimization.h" 19*6a865b6dSwldfngrs 20*6a865b6dSwldfngrs namespace LIBC_NAMESPACE_DECL { 21*6a865b6dSwldfngrs 22*6a865b6dSwldfngrs constexpr size_t N_EXCEPTS = 4; 23*6a865b6dSwldfngrs 24*6a865b6dSwldfngrs constexpr fputil::ExceptValues<float16, N_EXCEPTS> COSF16_EXCEPTS{{ 25*6a865b6dSwldfngrs // (input, RZ output, RU offset, RD offset, RN offset) 26*6a865b6dSwldfngrs {0x2b7c, 0x3bfc, 1, 0, 1}, 27*6a865b6dSwldfngrs {0x4ac1, 0x38b5, 1, 0, 0}, 28*6a865b6dSwldfngrs {0x5c49, 0xb8c6, 0, 1, 0}, 29*6a865b6dSwldfngrs {0x7acc, 0xa474, 0, 1, 0}, 30*6a865b6dSwldfngrs }}; 31*6a865b6dSwldfngrs 32*6a865b6dSwldfngrs LLVM_LIBC_FUNCTION(float16, cosf16, (float16 x)) { 33*6a865b6dSwldfngrs using FPBits = fputil::FPBits<float16>; 34*6a865b6dSwldfngrs FPBits xbits(x); 35*6a865b6dSwldfngrs 36*6a865b6dSwldfngrs uint16_t x_u = xbits.uintval(); 37*6a865b6dSwldfngrs uint16_t x_abs = x_u & 0x7fff; 38*6a865b6dSwldfngrs float xf = x; 39*6a865b6dSwldfngrs 40*6a865b6dSwldfngrs // Range reduction: 41*6a865b6dSwldfngrs // For |x| > pi/32, we perform range reduction as follows: 42*6a865b6dSwldfngrs // Find k and y such that: 43*6a865b6dSwldfngrs // x = (k + y) * pi/32 44*6a865b6dSwldfngrs // k is an integer, |y| < 0.5 45*6a865b6dSwldfngrs // 46*6a865b6dSwldfngrs // This is done by performing: 47*6a865b6dSwldfngrs // k = round(x * 32/pi) 48*6a865b6dSwldfngrs // y = x * 32/pi - k 49*6a865b6dSwldfngrs // 50*6a865b6dSwldfngrs // Once k and y are computed, we then deduce the answer by the cosine of sum 51*6a865b6dSwldfngrs // formula: 52*6a865b6dSwldfngrs // cos(x) = cos((k + y) * pi/32) 53*6a865b6dSwldfngrs // = cos(k * pi/32) * cos(y * pi/32) - 54*6a865b6dSwldfngrs // sin(k * pi/32) * sin(y * pi/32) 55*6a865b6dSwldfngrs 56*6a865b6dSwldfngrs // Handle exceptional values 57*6a865b6dSwldfngrs if (auto r = COSF16_EXCEPTS.lookup(x_abs); LIBC_UNLIKELY(r.has_value())) 58*6a865b6dSwldfngrs return r.value(); 59*6a865b6dSwldfngrs 60*6a865b6dSwldfngrs // cos(+/-0) = 1 61*6a865b6dSwldfngrs if (LIBC_UNLIKELY(x_abs == 0U)) 62*6a865b6dSwldfngrs return fputil::cast<float16>(1.0f); 63*6a865b6dSwldfngrs 64*6a865b6dSwldfngrs // cos(+/-inf) = NaN, and cos(NaN) = NaN 65*6a865b6dSwldfngrs if (xbits.is_inf_or_nan()) { 66*6a865b6dSwldfngrs if (xbits.is_inf()) { 67*6a865b6dSwldfngrs fputil::set_errno_if_required(EDOM); 68*6a865b6dSwldfngrs fputil::raise_except_if_required(FE_INVALID); 69*6a865b6dSwldfngrs } 70*6a865b6dSwldfngrs 71*6a865b6dSwldfngrs return x + FPBits::quiet_nan().get_val(); 72*6a865b6dSwldfngrs } 73*6a865b6dSwldfngrs 74*6a865b6dSwldfngrs float sin_k, cos_k, sin_y, cosm1_y; 75*6a865b6dSwldfngrs sincosf16_eval(xf, sin_k, cos_k, sin_y, cosm1_y); 76*6a865b6dSwldfngrs // Since, cosm1_y = cos_y - 1, therefore: 77*6a865b6dSwldfngrs // cos(x) = cos_k * cos_y - sin_k * sin_y 78*6a865b6dSwldfngrs // = cos_k * (cos_y - 1 + 1) - sin_k * sin_y 79*6a865b6dSwldfngrs // = cos_k * cosm1_y - sin_k * sin_y + cos_k 80*6a865b6dSwldfngrs return fputil::cast<float16>(fputil::multiply_add( 81*6a865b6dSwldfngrs cos_k, cosm1_y, fputil::multiply_add(-sin_k, sin_y, cos_k))); 82*6a865b6dSwldfngrs } 83*6a865b6dSwldfngrs 84*6a865b6dSwldfngrs } // namespace LIBC_NAMESPACE_DECL 85