xref: /llvm-project/libc/src/math/generic/sincos.cpp (revision 51e9430a0c767243411d4b81c284700f89719277)
14080f174Slntue //===-- Double-precision sincos function ----------------------------------===//
24080f174Slntue //
34080f174Slntue // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44080f174Slntue // See https://llvm.org/LICENSE.txt for license information.
54080f174Slntue // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64080f174Slntue //
74080f174Slntue //===----------------------------------------------------------------------===//
84080f174Slntue 
94080f174Slntue #include "src/math/sincos.h"
104080f174Slntue #include "hdr/errno_macros.h"
114080f174Slntue #include "src/__support/FPUtil/FEnvImpl.h"
124080f174Slntue #include "src/__support/FPUtil/FPBits.h"
134080f174Slntue #include "src/__support/FPUtil/double_double.h"
144080f174Slntue #include "src/__support/FPUtil/dyadic_float.h"
154080f174Slntue #include "src/__support/FPUtil/except_value_utils.h"
164080f174Slntue #include "src/__support/FPUtil/multiply_add.h"
174080f174Slntue #include "src/__support/FPUtil/rounding_mode.h"
184080f174Slntue #include "src/__support/common.h"
195ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
204080f174Slntue #include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
214080f174Slntue #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
22*51e9430aSlntue #include "src/math/generic/range_reduction_double_common.h"
234080f174Slntue #include "src/math/generic/sincos_eval.h"
244080f174Slntue 
25*51e9430aSlntue #ifdef LIBC_TARGET_CPU_HAS_FMA
26*51e9430aSlntue #include "range_reduction_double_fma.h"
27*51e9430aSlntue #else
28*51e9430aSlntue #include "range_reduction_double_nofma.h"
29*51e9430aSlntue #endif // LIBC_TARGET_CPU_HAS_FMA
304080f174Slntue 
315ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
324080f174Slntue 
334080f174Slntue using DoubleDouble = fputil::DoubleDouble;
344080f174Slntue using Float128 = typename fputil::DyadicFloat<128>;
354080f174Slntue 
364080f174Slntue LLVM_LIBC_FUNCTION(void, sincos, (double x, double *sin_x, double *cos_x)) {
374080f174Slntue   using FPBits = typename fputil::FPBits<double>;
384080f174Slntue   FPBits xbits(x);
394080f174Slntue 
404080f174Slntue   uint16_t x_e = xbits.get_biased_exponent();
414080f174Slntue 
424080f174Slntue   DoubleDouble y;
434080f174Slntue   unsigned k;
44*51e9430aSlntue   LargeRangeReduction range_reduction_large{};
454080f174Slntue 
46*51e9430aSlntue   // |x| < 2^16
474080f174Slntue   if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) {
48*51e9430aSlntue     // |x| < 2^-7
49*51e9430aSlntue     if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 7)) {
504080f174Slntue       // |x| < 2^-27
514080f174Slntue       if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) {
524080f174Slntue         // Signed zeros.
534080f174Slntue         if (LIBC_UNLIKELY(x == 0.0)) {
544080f174Slntue           *sin_x = x;
554080f174Slntue           *cos_x = 1.0;
564080f174Slntue           return;
574080f174Slntue         }
584080f174Slntue 
594080f174Slntue         // For |x| < 2^-27, max(|sin(x) - x|, |cos(x) - 1|) < ulp(x)/2.
604080f174Slntue #ifdef LIBC_TARGET_CPU_HAS_FMA
614080f174Slntue         *sin_x = fputil::multiply_add(x, -0x1.0p-54, x);
624080f174Slntue         *cos_x = fputil::multiply_add(x, -x, 1.0);
634080f174Slntue #else
644080f174Slntue         *cos_x = fputil::round_result_slightly_down(1.0);
654080f174Slntue 
664080f174Slntue         if (LIBC_UNLIKELY(x_e < 4)) {
674080f174Slntue           int rounding_mode = fputil::quick_get_round();
684080f174Slntue           if (rounding_mode == FE_TOWARDZERO ||
694080f174Slntue               (xbits.sign() == Sign::POS && rounding_mode == FE_DOWNWARD) ||
704080f174Slntue               (xbits.sign() == Sign::NEG && rounding_mode == FE_UPWARD))
714080f174Slntue             *sin_x = FPBits(xbits.uintval() - 1).get_val();
724080f174Slntue         }
734080f174Slntue         *sin_x = fputil::multiply_add(x, -0x1.0p-54, x);
744080f174Slntue #endif // LIBC_TARGET_CPU_HAS_FMA
754080f174Slntue         return;
764080f174Slntue       }
77*51e9430aSlntue       // No range reduction needed.
78*51e9430aSlntue       k = 0;
79*51e9430aSlntue       y.lo = 0.0;
80*51e9430aSlntue       y.hi = x;
81*51e9430aSlntue     } else {
82*51e9430aSlntue       // Small range reduction.
834080f174Slntue       k = range_reduction_small(x, y);
84*51e9430aSlntue     }
854080f174Slntue   } else {
864080f174Slntue     // Inf or NaN
874080f174Slntue     if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) {
884080f174Slntue       // sin(+-Inf) = NaN
894080f174Slntue       if (xbits.get_mantissa() == 0) {
904080f174Slntue         fputil::set_errno_if_required(EDOM);
914080f174Slntue         fputil::raise_except_if_required(FE_INVALID);
924080f174Slntue       }
934080f174Slntue       *sin_x = *cos_x = x + FPBits::quiet_nan().get_val();
944080f174Slntue       return;
954080f174Slntue     }
964080f174Slntue 
974080f174Slntue     // Large range reduction.
98*51e9430aSlntue     k = range_reduction_large.fast(x, y);
994080f174Slntue   }
1004080f174Slntue 
1014080f174Slntue   DoubleDouble sin_y, cos_y;
1024080f174Slntue 
103*51e9430aSlntue   [[maybe_unused]] double err = generic::sincos_eval(y, sin_y, cos_y);
1044080f174Slntue 
1054080f174Slntue   // Look up sin(k * pi/128) and cos(k * pi/128)
106*51e9430aSlntue #ifdef LIBC_MATH_HAS_SMALL_TABLES
107*51e9430aSlntue   // Memory saving versions.  Use 65-entry table.
108*51e9430aSlntue   auto get_idx_dd = [](unsigned kk) -> DoubleDouble {
109*51e9430aSlntue     unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
110*51e9430aSlntue     DoubleDouble ans = SIN_K_PI_OVER_128[idx];
111*51e9430aSlntue     if (kk & 128) {
112*51e9430aSlntue       ans.hi = -ans.hi;
113*51e9430aSlntue       ans.lo = -ans.lo;
114*51e9430aSlntue     }
115*51e9430aSlntue     return ans;
116*51e9430aSlntue   };
117*51e9430aSlntue   DoubleDouble sin_k = get_idx_dd(k);
118*51e9430aSlntue   DoubleDouble cos_k = get_idx_dd(k + 64);
119*51e9430aSlntue #else
1204080f174Slntue   // Fast look up version, but needs 256-entry table.
1214080f174Slntue   // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
1224080f174Slntue   DoubleDouble sin_k = SIN_K_PI_OVER_128[k & 255];
1234080f174Slntue   DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 255];
124*51e9430aSlntue #endif // LIBC_MATH_HAS_SMALL_TABLES
125*51e9430aSlntue 
1264080f174Slntue   DoubleDouble msin_k{-sin_k.lo, -sin_k.hi};
1274080f174Slntue 
1284080f174Slntue   // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128).
1294080f174Slntue   // So k is an integer and -pi / 256 <= y <= pi / 256.
1304080f174Slntue   // Then sin(x) = sin((k * pi/128 + y)
1314080f174Slntue   //             = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128)
132*51e9430aSlntue   DoubleDouble sin_k_cos_y = fputil::quick_mult(cos_y, sin_k);
133*51e9430aSlntue   DoubleDouble cos_k_sin_y = fputil::quick_mult(sin_y, cos_k);
1344080f174Slntue   //      cos(x) = cos((k * pi/128 + y)
1354080f174Slntue   //             = cos(y) * cos(k*pi/128) - sin(y) * sin(k*pi/128)
136*51e9430aSlntue   DoubleDouble cos_k_cos_y = fputil::quick_mult(cos_y, cos_k);
137*51e9430aSlntue   DoubleDouble msin_k_sin_y = fputil::quick_mult(sin_y, msin_k);
1384080f174Slntue 
1394080f174Slntue   DoubleDouble sin_dd =
1404080f174Slntue       fputil::exact_add<false>(sin_k_cos_y.hi, cos_k_sin_y.hi);
1414080f174Slntue   DoubleDouble cos_dd =
1424080f174Slntue       fputil::exact_add<false>(cos_k_cos_y.hi, msin_k_sin_y.hi);
1434080f174Slntue   sin_dd.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
1444080f174Slntue   cos_dd.lo += msin_k_sin_y.lo + cos_k_cos_y.lo;
1454080f174Slntue 
146*51e9430aSlntue #ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
1474080f174Slntue   *sin_x = sin_dd.hi + sin_dd.lo;
1484080f174Slntue   *cos_x = cos_dd.hi + cos_dd.lo;
1494080f174Slntue   return;
1504080f174Slntue #else
1514080f174Slntue   // Accurate test and pass for correctly rounded implementation.
1524080f174Slntue 
153*51e9430aSlntue   double sin_lp = sin_dd.lo + err;
154*51e9430aSlntue   double sin_lm = sin_dd.lo - err;
155*51e9430aSlntue   double cos_lp = cos_dd.lo + err;
156*51e9430aSlntue   double cos_lm = cos_dd.lo - err;
1574080f174Slntue 
1584080f174Slntue   double sin_upper = sin_dd.hi + sin_lp;
1594080f174Slntue   double sin_lower = sin_dd.hi + sin_lm;
1604080f174Slntue   double cos_upper = cos_dd.hi + cos_lp;
1614080f174Slntue   double cos_lower = cos_dd.hi + cos_lm;
1624080f174Slntue 
1634080f174Slntue   // Ziv's rounding test.
1644080f174Slntue   if (LIBC_LIKELY(sin_upper == sin_lower && cos_upper == cos_lower)) {
1654080f174Slntue     *sin_x = sin_upper;
1664080f174Slntue     *cos_x = cos_upper;
1674080f174Slntue     return;
1684080f174Slntue   }
1694080f174Slntue 
1704080f174Slntue   Float128 u_f128, sin_u, cos_u;
1714080f174Slntue   if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
172*51e9430aSlntue     u_f128 = range_reduction_small_f128(x);
1734080f174Slntue   else
1744080f174Slntue     u_f128 = range_reduction_large.accurate();
1754080f174Slntue 
1764080f174Slntue   generic::sincos_eval(u_f128, sin_u, cos_u);
1774080f174Slntue 
1784080f174Slntue   auto get_sin_k = [](unsigned kk) -> Float128 {
1794080f174Slntue     unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
180*51e9430aSlntue     Float128 ans = SIN_K_PI_OVER_128_F128[idx];
1814080f174Slntue     if (kk & 128)
1824080f174Slntue       ans.sign = Sign::NEG;
1834080f174Slntue     return ans;
1844080f174Slntue   };
1854080f174Slntue 
1864080f174Slntue   // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
1874080f174Slntue   Float128 sin_k_f128 = get_sin_k(k);
1884080f174Slntue   Float128 cos_k_f128 = get_sin_k(k + 64);
1894080f174Slntue   Float128 msin_k_f128 = get_sin_k(k + 128);
1904080f174Slntue 
1914080f174Slntue   // TODO: Add assertion if Ziv's accuracy tests fail in debug mode.
1924080f174Slntue   // https://github.com/llvm/llvm-project/issues/96452.
1934080f174Slntue 
1944080f174Slntue   if (sin_upper == sin_lower)
1954080f174Slntue     *sin_x = sin_upper;
1964080f174Slntue   else
1974080f174Slntue     // sin(x) = sin((k * pi/128 + u)
1984080f174Slntue     //        = sin(u) * cos(k*pi/128) + cos(u) * sin(k*pi/128)
1994080f174Slntue     *sin_x = static_cast<double>(
2004080f174Slntue         fputil::quick_add(fputil::quick_mul(sin_k_f128, cos_u),
2014080f174Slntue                           fputil::quick_mul(cos_k_f128, sin_u)));
2024080f174Slntue 
2034080f174Slntue   if (cos_upper == cos_lower)
2044080f174Slntue     *cos_x = cos_upper;
2054080f174Slntue   else
2064080f174Slntue     // cos(x) = cos((k * pi/128 + u)
2074080f174Slntue     //        = cos(u) * cos(k*pi/128) - sin(u) * sin(k*pi/128)
2084080f174Slntue     *cos_x = static_cast<double>(
2094080f174Slntue         fputil::quick_add(fputil::quick_mul(cos_k_f128, cos_u),
2104080f174Slntue                           fputil::quick_mul(msin_k_f128, sin_u)));
2114080f174Slntue 
212*51e9430aSlntue #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
2134080f174Slntue }
2144080f174Slntue 
2155ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
216