14080f174Slntue //===-- Double-precision sincos function ----------------------------------===// 24080f174Slntue // 34080f174Slntue // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 44080f174Slntue // See https://llvm.org/LICENSE.txt for license information. 54080f174Slntue // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64080f174Slntue // 74080f174Slntue //===----------------------------------------------------------------------===// 84080f174Slntue 94080f174Slntue #include "src/math/sincos.h" 104080f174Slntue #include "hdr/errno_macros.h" 114080f174Slntue #include "src/__support/FPUtil/FEnvImpl.h" 124080f174Slntue #include "src/__support/FPUtil/FPBits.h" 134080f174Slntue #include "src/__support/FPUtil/double_double.h" 144080f174Slntue #include "src/__support/FPUtil/dyadic_float.h" 154080f174Slntue #include "src/__support/FPUtil/except_value_utils.h" 164080f174Slntue #include "src/__support/FPUtil/multiply_add.h" 174080f174Slntue #include "src/__support/FPUtil/rounding_mode.h" 184080f174Slntue #include "src/__support/common.h" 195ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 204080f174Slntue #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY 214080f174Slntue #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA 22*51e9430aSlntue #include "src/math/generic/range_reduction_double_common.h" 234080f174Slntue #include "src/math/generic/sincos_eval.h" 244080f174Slntue 25*51e9430aSlntue #ifdef LIBC_TARGET_CPU_HAS_FMA 26*51e9430aSlntue #include "range_reduction_double_fma.h" 27*51e9430aSlntue #else 28*51e9430aSlntue #include "range_reduction_double_nofma.h" 29*51e9430aSlntue #endif // LIBC_TARGET_CPU_HAS_FMA 304080f174Slntue 315ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 324080f174Slntue 334080f174Slntue using DoubleDouble = fputil::DoubleDouble; 344080f174Slntue using Float128 = typename fputil::DyadicFloat<128>; 354080f174Slntue 364080f174Slntue LLVM_LIBC_FUNCTION(void, sincos, (double x, double *sin_x, double *cos_x)) { 374080f174Slntue using FPBits = typename fputil::FPBits<double>; 384080f174Slntue FPBits xbits(x); 394080f174Slntue 404080f174Slntue uint16_t x_e = xbits.get_biased_exponent(); 414080f174Slntue 424080f174Slntue DoubleDouble y; 434080f174Slntue unsigned k; 44*51e9430aSlntue LargeRangeReduction range_reduction_large{}; 454080f174Slntue 46*51e9430aSlntue // |x| < 2^16 474080f174Slntue if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) { 48*51e9430aSlntue // |x| < 2^-7 49*51e9430aSlntue if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 7)) { 504080f174Slntue // |x| < 2^-27 514080f174Slntue if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) { 524080f174Slntue // Signed zeros. 534080f174Slntue if (LIBC_UNLIKELY(x == 0.0)) { 544080f174Slntue *sin_x = x; 554080f174Slntue *cos_x = 1.0; 564080f174Slntue return; 574080f174Slntue } 584080f174Slntue 594080f174Slntue // For |x| < 2^-27, max(|sin(x) - x|, |cos(x) - 1|) < ulp(x)/2. 604080f174Slntue #ifdef LIBC_TARGET_CPU_HAS_FMA 614080f174Slntue *sin_x = fputil::multiply_add(x, -0x1.0p-54, x); 624080f174Slntue *cos_x = fputil::multiply_add(x, -x, 1.0); 634080f174Slntue #else 644080f174Slntue *cos_x = fputil::round_result_slightly_down(1.0); 654080f174Slntue 664080f174Slntue if (LIBC_UNLIKELY(x_e < 4)) { 674080f174Slntue int rounding_mode = fputil::quick_get_round(); 684080f174Slntue if (rounding_mode == FE_TOWARDZERO || 694080f174Slntue (xbits.sign() == Sign::POS && rounding_mode == FE_DOWNWARD) || 704080f174Slntue (xbits.sign() == Sign::NEG && rounding_mode == FE_UPWARD)) 714080f174Slntue *sin_x = FPBits(xbits.uintval() - 1).get_val(); 724080f174Slntue } 734080f174Slntue *sin_x = fputil::multiply_add(x, -0x1.0p-54, x); 744080f174Slntue #endif // LIBC_TARGET_CPU_HAS_FMA 754080f174Slntue return; 764080f174Slntue } 77*51e9430aSlntue // No range reduction needed. 78*51e9430aSlntue k = 0; 79*51e9430aSlntue y.lo = 0.0; 80*51e9430aSlntue y.hi = x; 81*51e9430aSlntue } else { 82*51e9430aSlntue // Small range reduction. 834080f174Slntue k = range_reduction_small(x, y); 84*51e9430aSlntue } 854080f174Slntue } else { 864080f174Slntue // Inf or NaN 874080f174Slntue if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) { 884080f174Slntue // sin(+-Inf) = NaN 894080f174Slntue if (xbits.get_mantissa() == 0) { 904080f174Slntue fputil::set_errno_if_required(EDOM); 914080f174Slntue fputil::raise_except_if_required(FE_INVALID); 924080f174Slntue } 934080f174Slntue *sin_x = *cos_x = x + FPBits::quiet_nan().get_val(); 944080f174Slntue return; 954080f174Slntue } 964080f174Slntue 974080f174Slntue // Large range reduction. 98*51e9430aSlntue k = range_reduction_large.fast(x, y); 994080f174Slntue } 1004080f174Slntue 1014080f174Slntue DoubleDouble sin_y, cos_y; 1024080f174Slntue 103*51e9430aSlntue [[maybe_unused]] double err = generic::sincos_eval(y, sin_y, cos_y); 1044080f174Slntue 1054080f174Slntue // Look up sin(k * pi/128) and cos(k * pi/128) 106*51e9430aSlntue #ifdef LIBC_MATH_HAS_SMALL_TABLES 107*51e9430aSlntue // Memory saving versions. Use 65-entry table. 108*51e9430aSlntue auto get_idx_dd = [](unsigned kk) -> DoubleDouble { 109*51e9430aSlntue unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); 110*51e9430aSlntue DoubleDouble ans = SIN_K_PI_OVER_128[idx]; 111*51e9430aSlntue if (kk & 128) { 112*51e9430aSlntue ans.hi = -ans.hi; 113*51e9430aSlntue ans.lo = -ans.lo; 114*51e9430aSlntue } 115*51e9430aSlntue return ans; 116*51e9430aSlntue }; 117*51e9430aSlntue DoubleDouble sin_k = get_idx_dd(k); 118*51e9430aSlntue DoubleDouble cos_k = get_idx_dd(k + 64); 119*51e9430aSlntue #else 1204080f174Slntue // Fast look up version, but needs 256-entry table. 1214080f174Slntue // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). 1224080f174Slntue DoubleDouble sin_k = SIN_K_PI_OVER_128[k & 255]; 1234080f174Slntue DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 255]; 124*51e9430aSlntue #endif // LIBC_MATH_HAS_SMALL_TABLES 125*51e9430aSlntue 1264080f174Slntue DoubleDouble msin_k{-sin_k.lo, -sin_k.hi}; 1274080f174Slntue 1284080f174Slntue // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128). 1294080f174Slntue // So k is an integer and -pi / 256 <= y <= pi / 256. 1304080f174Slntue // Then sin(x) = sin((k * pi/128 + y) 1314080f174Slntue // = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128) 132*51e9430aSlntue DoubleDouble sin_k_cos_y = fputil::quick_mult(cos_y, sin_k); 133*51e9430aSlntue DoubleDouble cos_k_sin_y = fputil::quick_mult(sin_y, cos_k); 1344080f174Slntue // cos(x) = cos((k * pi/128 + y) 1354080f174Slntue // = cos(y) * cos(k*pi/128) - sin(y) * sin(k*pi/128) 136*51e9430aSlntue DoubleDouble cos_k_cos_y = fputil::quick_mult(cos_y, cos_k); 137*51e9430aSlntue DoubleDouble msin_k_sin_y = fputil::quick_mult(sin_y, msin_k); 1384080f174Slntue 1394080f174Slntue DoubleDouble sin_dd = 1404080f174Slntue fputil::exact_add<false>(sin_k_cos_y.hi, cos_k_sin_y.hi); 1414080f174Slntue DoubleDouble cos_dd = 1424080f174Slntue fputil::exact_add<false>(cos_k_cos_y.hi, msin_k_sin_y.hi); 1434080f174Slntue sin_dd.lo += sin_k_cos_y.lo + cos_k_sin_y.lo; 1444080f174Slntue cos_dd.lo += msin_k_sin_y.lo + cos_k_cos_y.lo; 1454080f174Slntue 146*51e9430aSlntue #ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS 1474080f174Slntue *sin_x = sin_dd.hi + sin_dd.lo; 1484080f174Slntue *cos_x = cos_dd.hi + cos_dd.lo; 1494080f174Slntue return; 1504080f174Slntue #else 1514080f174Slntue // Accurate test and pass for correctly rounded implementation. 1524080f174Slntue 153*51e9430aSlntue double sin_lp = sin_dd.lo + err; 154*51e9430aSlntue double sin_lm = sin_dd.lo - err; 155*51e9430aSlntue double cos_lp = cos_dd.lo + err; 156*51e9430aSlntue double cos_lm = cos_dd.lo - err; 1574080f174Slntue 1584080f174Slntue double sin_upper = sin_dd.hi + sin_lp; 1594080f174Slntue double sin_lower = sin_dd.hi + sin_lm; 1604080f174Slntue double cos_upper = cos_dd.hi + cos_lp; 1614080f174Slntue double cos_lower = cos_dd.hi + cos_lm; 1624080f174Slntue 1634080f174Slntue // Ziv's rounding test. 1644080f174Slntue if (LIBC_LIKELY(sin_upper == sin_lower && cos_upper == cos_lower)) { 1654080f174Slntue *sin_x = sin_upper; 1664080f174Slntue *cos_x = cos_upper; 1674080f174Slntue return; 1684080f174Slntue } 1694080f174Slntue 1704080f174Slntue Float128 u_f128, sin_u, cos_u; 1714080f174Slntue if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) 172*51e9430aSlntue u_f128 = range_reduction_small_f128(x); 1734080f174Slntue else 1744080f174Slntue u_f128 = range_reduction_large.accurate(); 1754080f174Slntue 1764080f174Slntue generic::sincos_eval(u_f128, sin_u, cos_u); 1774080f174Slntue 1784080f174Slntue auto get_sin_k = [](unsigned kk) -> Float128 { 1794080f174Slntue unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); 180*51e9430aSlntue Float128 ans = SIN_K_PI_OVER_128_F128[idx]; 1814080f174Slntue if (kk & 128) 1824080f174Slntue ans.sign = Sign::NEG; 1834080f174Slntue return ans; 1844080f174Slntue }; 1854080f174Slntue 1864080f174Slntue // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). 1874080f174Slntue Float128 sin_k_f128 = get_sin_k(k); 1884080f174Slntue Float128 cos_k_f128 = get_sin_k(k + 64); 1894080f174Slntue Float128 msin_k_f128 = get_sin_k(k + 128); 1904080f174Slntue 1914080f174Slntue // TODO: Add assertion if Ziv's accuracy tests fail in debug mode. 1924080f174Slntue // https://github.com/llvm/llvm-project/issues/96452. 1934080f174Slntue 1944080f174Slntue if (sin_upper == sin_lower) 1954080f174Slntue *sin_x = sin_upper; 1964080f174Slntue else 1974080f174Slntue // sin(x) = sin((k * pi/128 + u) 1984080f174Slntue // = sin(u) * cos(k*pi/128) + cos(u) * sin(k*pi/128) 1994080f174Slntue *sin_x = static_cast<double>( 2004080f174Slntue fputil::quick_add(fputil::quick_mul(sin_k_f128, cos_u), 2014080f174Slntue fputil::quick_mul(cos_k_f128, sin_u))); 2024080f174Slntue 2034080f174Slntue if (cos_upper == cos_lower) 2044080f174Slntue *cos_x = cos_upper; 2054080f174Slntue else 2064080f174Slntue // cos(x) = cos((k * pi/128 + u) 2074080f174Slntue // = cos(u) * cos(k*pi/128) - sin(u) * sin(k*pi/128) 2084080f174Slntue *cos_x = static_cast<double>( 2094080f174Slntue fputil::quick_add(fputil::quick_mul(cos_k_f128, cos_u), 2104080f174Slntue fputil::quick_mul(msin_k_f128, sin_u))); 2114080f174Slntue 212*51e9430aSlntue #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS 2134080f174Slntue } 2144080f174Slntue 2155ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 216