1131dda9aSTue Ly //===-- Collection of utils for sinf/cosf/sincosf ---------------*- C++ -*-===// 2bbb75554SSiva Chandra // 3bbb75554SSiva Chandra // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4bbb75554SSiva Chandra // See https://llvm.org/LICENSE.txt for license information. 5bbb75554SSiva Chandra // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6bbb75554SSiva Chandra // 7bbb75554SSiva Chandra //===----------------------------------------------------------------------===// 8bbb75554SSiva Chandra 9270547f3SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_UTILS_H 10270547f3SGuillaume Chatelet #define LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_UTILS_H 11bbb75554SSiva Chandra 12131dda9aSTue Ly #include "src/__support/FPUtil/FPBits.h" 13131dda9aSTue Ly #include "src/__support/FPUtil/PolyEval.h" 14*5ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 15737e1cd1SGuillaume Chatelet #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA 16bbb75554SSiva Chandra 17a2569a76SGuillaume Chatelet #if defined(LIBC_TARGET_CPU_HAS_FMA) 18131dda9aSTue Ly #include "range_reduction_fma.h" 19b6bc9d72SGuillaume Chatelet // using namespace LIBC_NAMESPACE::fma; 20b6bc9d72SGuillaume Chatelet using LIBC_NAMESPACE::fma::FAST_PASS_BOUND; 21b6bc9d72SGuillaume Chatelet using LIBC_NAMESPACE::fma::large_range_reduction; 22b6bc9d72SGuillaume Chatelet using LIBC_NAMESPACE::fma::small_range_reduction; 23ea93c538SHendrik Hübner 24131dda9aSTue Ly #else 25131dda9aSTue Ly #include "range_reduction.h" 26b6bc9d72SGuillaume Chatelet // using namespace LIBC_NAMESPACE::generic; 27b6bc9d72SGuillaume Chatelet using LIBC_NAMESPACE::generic::FAST_PASS_BOUND; 28b6bc9d72SGuillaume Chatelet using LIBC_NAMESPACE::generic::large_range_reduction; 29b6bc9d72SGuillaume Chatelet using LIBC_NAMESPACE::generic::small_range_reduction; 30a2569a76SGuillaume Chatelet #endif // LIBC_TARGET_CPU_HAS_FMA 31bbb75554SSiva Chandra 32*5ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 33bbb75554SSiva Chandra 3442f18379STue Ly // Lookup table for sin(k * pi / 32) with k = 0, ..., 63. 35131dda9aSTue Ly // Table is generated with Sollya as follow: 36131dda9aSTue Ly // > display = hexadecimal; 3742f18379STue Ly // > for k from 0 to 63 do { D(sin(k * pi/32)); }; 3842f18379STue Ly const double SIN_K_PI_OVER_32[64] = { 3942f18379STue Ly 0x0.0000000000000p+0, 0x1.917a6bc29b42cp-4, 0x1.8f8b83c69a60bp-3, 4042f18379STue Ly 0x1.294062ed59f06p-2, 0x1.87de2a6aea963p-2, 0x1.e2b5d3806f63bp-2, 4142f18379STue Ly 0x1.1c73b39ae68c8p-1, 0x1.44cf325091dd6p-1, 0x1.6a09e667f3bcdp-1, 4242f18379STue Ly 0x1.8bc806b151741p-1, 0x1.a9b66290ea1a3p-1, 0x1.c38b2f180bdb1p-1, 4342f18379STue Ly 0x1.d906bcf328d46p-1, 0x1.e9f4156c62ddap-1, 0x1.f6297cff75cbp-1, 4442f18379STue Ly 0x1.fd88da3d12526p-1, 0x1.0000000000000p+0, 0x1.fd88da3d12526p-1, 4542f18379STue Ly 0x1.f6297cff75cbp-1, 0x1.e9f4156c62ddap-1, 0x1.d906bcf328d46p-1, 4642f18379STue Ly 0x1.c38b2f180bdb1p-1, 0x1.a9b66290ea1a3p-1, 0x1.8bc806b151741p-1, 4742f18379STue Ly 0x1.6a09e667f3bcdp-1, 0x1.44cf325091dd6p-1, 0x1.1c73b39ae68c8p-1, 4842f18379STue Ly 0x1.e2b5d3806f63bp-2, 0x1.87de2a6aea963p-2, 0x1.294062ed59f06p-2, 4942f18379STue Ly 0x1.8f8b83c69a60bp-3, 0x1.917a6bc29b42cp-4, 0x0.0000000000000p+0, 5042f18379STue Ly -0x1.917a6bc29b42cp-4, -0x1.8f8b83c69a60bp-3, -0x1.294062ed59f06p-2, 5142f18379STue Ly -0x1.87de2a6aea963p-2, -0x1.e2b5d3806f63bp-2, -0x1.1c73b39ae68c8p-1, 5242f18379STue Ly -0x1.44cf325091dd6p-1, -0x1.6a09e667f3bcdp-1, -0x1.8bc806b151741p-1, 5342f18379STue Ly -0x1.a9b66290ea1a3p-1, -0x1.c38b2f180bdb1p-1, -0x1.d906bcf328d46p-1, 5442f18379STue Ly -0x1.e9f4156c62ddap-1, -0x1.f6297cff75cbp-1, -0x1.fd88da3d12526p-1, 5542f18379STue Ly -0x1.0000000000000p+0, -0x1.fd88da3d12526p-1, -0x1.f6297cff75cbp-1, 5642f18379STue Ly -0x1.e9f4156c62ddap-1, -0x1.d906bcf328d46p-1, -0x1.c38b2f180bdb1p-1, 5742f18379STue Ly -0x1.a9b66290ea1a3p-1, -0x1.8bc806b151741p-1, -0x1.6a09e667f3bcdp-1, 5842f18379STue Ly -0x1.44cf325091dd6p-1, -0x1.1c73b39ae68c8p-1, -0x1.e2b5d3806f63bp-2, 5942f18379STue Ly -0x1.87de2a6aea963p-2, -0x1.294062ed59f06p-2, -0x1.8f8b83c69a60bp-3, 6042f18379STue Ly -0x1.917a6bc29b42cp-4, 6142f18379STue Ly }; 62bbb75554SSiva Chandra 63ea93c538SHendrik Hübner static LIBC_INLINE void sincosf_poly_eval(int64_t k, double y, double &sin_k, 64ea93c538SHendrik Hübner double &cos_k, double &sin_y, 65ea93c538SHendrik Hübner double &cosm1_y) { 6642f18379STue Ly // After range reduction, k = round(x * 32 / pi) and y = (x * 32 / pi) - k. 67131dda9aSTue Ly // So k is an integer and -0.5 <= y <= 0.5. 6842f18379STue Ly // Then sin(x) = sin((k + y)*pi/32) 6942f18379STue Ly // = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32) 70bbb75554SSiva Chandra 7142f18379STue Ly sin_k = SIN_K_PI_OVER_32[k & 63]; 7242f18379STue Ly // cos(k * pi/32) = sin(k * pi/32 + pi/2) = sin((k + 16) * pi/32). 7342f18379STue Ly // cos_k = cos(k * pi/32) 7442f18379STue Ly cos_k = SIN_K_PI_OVER_32[(k + 16) & 63]; 75bbb75554SSiva Chandra 76131dda9aSTue Ly double ysq = y * y; 77bbb75554SSiva Chandra 7842f18379STue Ly // Degree-6 minimax even polynomial for sin(y*pi/32)/y generated by Sollya 79131dda9aSTue Ly // with: 8042f18379STue Ly // > Q = fpminimax(sin(y*pi/32)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]); 8142f18379STue Ly sin_y = 8242f18379STue Ly y * fputil::polyeval(ysq, 0x1.921fb54442d18p-4, -0x1.4abbce625abb1p-13, 8342f18379STue Ly 0x1.466bc624f2776p-24, -0x1.32c3a619d4a7ep-36); 8482d6e770STue Ly // Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya with: 8542f18379STue Ly // > P = fpminimax(cos(x*pi/32), [|0, 2, 4, 6|], [|1, D...|], [0, 0.5]); 8642f18379STue Ly // Note that cosm1_y = cos(y*pi/32) - 1. 8742f18379STue Ly cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be430bp-8, 8842f18379STue Ly 0x1.03c1f070c2e27p-18, -0x1.55cc84bd942p-30); 89bbb75554SSiva Chandra } 90bbb75554SSiva Chandra 91ea93c538SHendrik Hübner LIBC_INLINE void sincosf_eval(double xd, uint32_t x_abs, double &sin_k, 92ea93c538SHendrik Hübner double &cos_k, double &sin_y, double &cosm1_y) { 93ea93c538SHendrik Hübner int64_t k; 94ea93c538SHendrik Hübner double y; 95ea93c538SHendrik Hübner 96ea93c538SHendrik Hübner if (LIBC_LIKELY(x_abs < FAST_PASS_BOUND)) { 97ea93c538SHendrik Hübner k = small_range_reduction(xd, y); 98ea93c538SHendrik Hübner } else { 99ea93c538SHendrik Hübner fputil::FPBits<float> x_bits(x_abs); 100ea93c538SHendrik Hübner k = large_range_reduction(xd, x_bits.get_exponent(), y); 101ea93c538SHendrik Hübner } 102ea93c538SHendrik Hübner 103ea93c538SHendrik Hübner sincosf_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y); 104ea93c538SHendrik Hübner } 105ea93c538SHendrik Hübner 106ea93c538SHendrik Hübner // Return k and y, where 107ea93c538SHendrik Hübner // k = round(x * 32) and y = (x * 32) - k. 108ea93c538SHendrik Hübner // => pi * x = (k + y) * pi / 32 109ea93c538SHendrik Hübner static LIBC_INLINE int64_t range_reduction_sincospi(double x, double &y) { 110ea93c538SHendrik Hübner double kd = fputil::nearest_integer(x * 32); 111ea93c538SHendrik Hübner y = fputil::multiply_add<double>(x, 32.0, -kd); 112ea93c538SHendrik Hübner 113ea93c538SHendrik Hübner return static_cast<int64_t>(kd); 114ea93c538SHendrik Hübner } 115ea93c538SHendrik Hübner 116ea93c538SHendrik Hübner LIBC_INLINE void sincospif_eval(double xd, double &sin_k, double &cos_k, 117ea93c538SHendrik Hübner double &sin_y, double &cosm1_y) { 118ea93c538SHendrik Hübner double y; 119ea93c538SHendrik Hübner int64_t k = range_reduction_sincospi(xd, y); 120ea93c538SHendrik Hübner sincosf_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y); 121ea93c538SHendrik Hübner } 122ea93c538SHendrik Hübner 123*5ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 124bbb75554SSiva Chandra 125270547f3SGuillaume Chatelet #endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_UTILS_H 126