116903aceSlntue //===-- Double-precision sin function -------------------------------------===// 216903aceSlntue // 316903aceSlntue // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 416903aceSlntue // See https://llvm.org/LICENSE.txt for license information. 516903aceSlntue // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 616903aceSlntue // 716903aceSlntue //===----------------------------------------------------------------------===// 816903aceSlntue 916903aceSlntue #include "src/math/sin.h" 1016903aceSlntue #include "hdr/errno_macros.h" 1116903aceSlntue #include "src/__support/FPUtil/FEnvImpl.h" 1216903aceSlntue #include "src/__support/FPUtil/FPBits.h" 1316903aceSlntue #include "src/__support/FPUtil/double_double.h" 1416903aceSlntue #include "src/__support/FPUtil/dyadic_float.h" 1516903aceSlntue #include "src/__support/FPUtil/multiply_add.h" 1616903aceSlntue #include "src/__support/FPUtil/rounding_mode.h" 1716903aceSlntue #include "src/__support/common.h" 185ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 1916903aceSlntue #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY 2016903aceSlntue #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA 2151e9430aSlntue #include "src/math/generic/range_reduction_double_common.h" 2216903aceSlntue #include "src/math/generic/sincos_eval.h" 2316903aceSlntue 2451e9430aSlntue #ifdef LIBC_TARGET_CPU_HAS_FMA 2551e9430aSlntue #include "range_reduction_double_fma.h" 2651e9430aSlntue #else 2751e9430aSlntue #include "range_reduction_double_nofma.h" 2851e9430aSlntue #endif // LIBC_TARGET_CPU_HAS_FMA 2916903aceSlntue 305ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 3116903aceSlntue 3216903aceSlntue using DoubleDouble = fputil::DoubleDouble; 3316903aceSlntue using Float128 = typename fputil::DyadicFloat<128>; 3416903aceSlntue 3516903aceSlntue LLVM_LIBC_FUNCTION(double, sin, (double x)) { 3616903aceSlntue using FPBits = typename fputil::FPBits<double>; 3716903aceSlntue FPBits xbits(x); 3816903aceSlntue 3916903aceSlntue uint16_t x_e = xbits.get_biased_exponent(); 4016903aceSlntue 4116903aceSlntue DoubleDouble y; 4216903aceSlntue unsigned k; 4351e9430aSlntue LargeRangeReduction range_reduction_large{}; 4416903aceSlntue 4551e9430aSlntue // |x| < 2^16 4616903aceSlntue if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) { 4751e9430aSlntue // |x| < 2^-7 4851e9430aSlntue if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 7)) { 4951e9430aSlntue // |x| < 2^-26, |sin(x) - x| < ulp(x)/2. 5016903aceSlntue if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 26)) { 5116903aceSlntue // Signed zeros. 5216903aceSlntue if (LIBC_UNLIKELY(x == 0.0)) 53*0f4b3c40Slntue return x + x; // Make sure it works with FTZ/DAZ. 5416903aceSlntue 5516903aceSlntue #ifdef LIBC_TARGET_CPU_HAS_FMA 5616903aceSlntue return fputil::multiply_add(x, -0x1.0p-54, x); 5716903aceSlntue #else 5816903aceSlntue if (LIBC_UNLIKELY(x_e < 4)) { 5916903aceSlntue int rounding_mode = fputil::quick_get_round(); 6016903aceSlntue if (rounding_mode == FE_TOWARDZERO || 6116903aceSlntue (xbits.sign() == Sign::POS && rounding_mode == FE_DOWNWARD) || 6216903aceSlntue (xbits.sign() == Sign::NEG && rounding_mode == FE_UPWARD)) 6316903aceSlntue return FPBits(xbits.uintval() - 1).get_val(); 6416903aceSlntue } 6516903aceSlntue return fputil::multiply_add(x, -0x1.0p-54, x); 6616903aceSlntue #endif // LIBC_TARGET_CPU_HAS_FMA 6716903aceSlntue } 6851e9430aSlntue // No range reduction needed. 6951e9430aSlntue k = 0; 7051e9430aSlntue y.lo = 0.0; 7151e9430aSlntue y.hi = x; 7251e9430aSlntue } else { 7351e9430aSlntue // Small range reduction. 7416903aceSlntue k = range_reduction_small(x, y); 7551e9430aSlntue } 7616903aceSlntue } else { 7716903aceSlntue // Inf or NaN 7816903aceSlntue if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) { 7916903aceSlntue // sin(+-Inf) = NaN 8016903aceSlntue if (xbits.get_mantissa() == 0) { 8116903aceSlntue fputil::set_errno_if_required(EDOM); 8216903aceSlntue fputil::raise_except_if_required(FE_INVALID); 8316903aceSlntue } 8416903aceSlntue return x + FPBits::quiet_nan().get_val(); 8516903aceSlntue } 8616903aceSlntue 8716903aceSlntue // Large range reduction. 8851e9430aSlntue k = range_reduction_large.fast(x, y); 8916903aceSlntue } 9016903aceSlntue 9116903aceSlntue DoubleDouble sin_y, cos_y; 9216903aceSlntue 9351e9430aSlntue [[maybe_unused]] double err = generic::sincos_eval(y, sin_y, cos_y); 9416903aceSlntue 9516903aceSlntue // Look up sin(k * pi/128) and cos(k * pi/128) 9651e9430aSlntue #ifdef LIBC_MATH_HAS_SMALL_TABLES 9751e9430aSlntue // Memory saving versions. Use 65-entry table. 9851e9430aSlntue auto get_idx_dd = [](unsigned kk) -> DoubleDouble { 9951e9430aSlntue unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); 10051e9430aSlntue DoubleDouble ans = SIN_K_PI_OVER_128[idx]; 10151e9430aSlntue if (kk & 128) { 10251e9430aSlntue ans.hi = -ans.hi; 10351e9430aSlntue ans.lo = -ans.lo; 10451e9430aSlntue } 10551e9430aSlntue return ans; 10651e9430aSlntue }; 10751e9430aSlntue DoubleDouble sin_k = get_idx_dd(k); 10851e9430aSlntue DoubleDouble cos_k = get_idx_dd(k + 64); 10951e9430aSlntue #else 11016903aceSlntue // Fast look up version, but needs 256-entry table. 11116903aceSlntue // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). 11216903aceSlntue DoubleDouble sin_k = SIN_K_PI_OVER_128[k & 255]; 11316903aceSlntue DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 255]; 11451e9430aSlntue #endif 11516903aceSlntue 11616903aceSlntue // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128). 11716903aceSlntue // So k is an integer and -pi / 256 <= y <= pi / 256. 11816903aceSlntue // Then sin(x) = sin((k * pi/128 + y) 11916903aceSlntue // = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128) 12051e9430aSlntue DoubleDouble sin_k_cos_y = fputil::quick_mult(cos_y, sin_k); 12151e9430aSlntue DoubleDouble cos_k_sin_y = fputil::quick_mult(sin_y, cos_k); 12216903aceSlntue 12316903aceSlntue DoubleDouble rr = fputil::exact_add<false>(sin_k_cos_y.hi, cos_k_sin_y.hi); 12416903aceSlntue rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo; 12516903aceSlntue 12651e9430aSlntue #ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS 12716903aceSlntue return rr.hi + rr.lo; 12816903aceSlntue #else 12916903aceSlntue // Accurate test and pass for correctly rounded implementation. 13088f80aebSlntue 13151e9430aSlntue double rlp = rr.lo + err; 13251e9430aSlntue double rlm = rr.lo - err; 13316903aceSlntue 13416903aceSlntue double r_upper = rr.hi + rlp; // (rr.lo + ERR); 13516903aceSlntue double r_lower = rr.hi + rlm; // (rr.lo - ERR); 13616903aceSlntue 13716903aceSlntue // Ziv's rounding test. 13816903aceSlntue if (LIBC_LIKELY(r_upper == r_lower)) 13916903aceSlntue return r_upper; 14016903aceSlntue 14188f80aebSlntue Float128 u_f128, sin_u, cos_u; 14216903aceSlntue if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) 14351e9430aSlntue u_f128 = range_reduction_small_f128(x); 14416903aceSlntue else 14516903aceSlntue u_f128 = range_reduction_large.accurate(); 14616903aceSlntue 14788f80aebSlntue generic::sincos_eval(u_f128, sin_u, cos_u); 14816903aceSlntue 14916903aceSlntue auto get_sin_k = [](unsigned kk) -> Float128 { 15016903aceSlntue unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); 15151e9430aSlntue Float128 ans = SIN_K_PI_OVER_128_F128[idx]; 15216903aceSlntue if (kk & 128) 15316903aceSlntue ans.sign = Sign::NEG; 15416903aceSlntue return ans; 15516903aceSlntue }; 15616903aceSlntue 15716903aceSlntue // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). 15816903aceSlntue Float128 sin_k_f128 = get_sin_k(k); 15916903aceSlntue Float128 cos_k_f128 = get_sin_k(k + 64); 16016903aceSlntue 16116903aceSlntue // sin(x) = sin((k * pi/128 + u) 16216903aceSlntue // = sin(u) * cos(k*pi/128) + cos(u) * sin(k*pi/128) 16316903aceSlntue Float128 r = fputil::quick_add(fputil::quick_mul(sin_k_f128, cos_u), 16416903aceSlntue fputil::quick_mul(cos_k_f128, sin_u)); 16516903aceSlntue 16616903aceSlntue // TODO: Add assertion if Ziv's accuracy tests fail in debug mode. 16716903aceSlntue // https://github.com/llvm/llvm-project/issues/96452. 16816903aceSlntue 16916903aceSlntue return static_cast<double>(r); 17051e9430aSlntue #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS 17116903aceSlntue } 17216903aceSlntue 1735ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 174