1a68bbf42STue Ly //===-- Extra range reduction steps for accurate pass of logarithms -------===// 2a68bbf42STue Ly // 3a68bbf42STue Ly // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a68bbf42STue Ly // See https://llvm.org/LICENSE.txt for license information. 5a68bbf42STue Ly // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a68bbf42STue Ly // 7a68bbf42STue Ly //===----------------------------------------------------------------------===// 8a68bbf42STue Ly 9a68bbf42STue Ly #ifndef LLVM_LIBC_SRC_MATH_GENERIC_LOG_RANGE_REDUCTION_H 10a68bbf42STue Ly #define LLVM_LIBC_SRC_MATH_GENERIC_LOG_RANGE_REDUCTION_H 11a68bbf42STue Ly 12a68bbf42STue Ly #include "common_constants.h" 13a68bbf42STue Ly #include "src/__support/FPUtil/dyadic_float.h" 14*5ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 1509efe848SGuillaume Chatelet #include "src/__support/uint128.h" 16a68bbf42STue Ly 17*5ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 18a68bbf42STue Ly 19a68bbf42STue Ly // Struct to store -log*(r) for 4 range reduction steps. 20a68bbf42STue Ly struct LogRR { 21a68bbf42STue Ly fputil::DyadicFloat<128> step_1[128]; 22a68bbf42STue Ly fputil::DyadicFloat<128> step_2[193]; 23a68bbf42STue Ly fputil::DyadicFloat<128> step_3[161]; 24a68bbf42STue Ly fputil::DyadicFloat<128> step_4[130]; 25a68bbf42STue Ly }; 26a68bbf42STue Ly 27a68bbf42STue Ly // Perform logarithm range reduction steps 2-4. 28a68bbf42STue Ly // Inputs from the first step of range reduction: 29a68bbf42STue Ly // m_x : the reduced argument after the first step of range reduction 30a68bbf42STue Ly // satisfying -2^-8 <= m_x < 2^-7 and ulp(m_x) >= 2^-60. 31a68bbf42STue Ly // idx1: index of the -log(r1) table from the first step. 32a68bbf42STue Ly // Outputs of the extra range reduction steps: 33a68bbf42STue Ly // sum: adding -log(r1) - log(r2) - log(r3) - log(r4) to the resulted sum. 34a68bbf42STue Ly // return value: the reduced argument v satisfying: 35a68bbf42STue Ly // -0x1.0002143p-29 <= v < 0x1p-29, and ulp(v) >= 2^(-125). 36a68bbf42STue Ly LIBC_INLINE fputil::DyadicFloat<128> 37a68bbf42STue Ly log_range_reduction(double m_x, const LogRR &log_table, 38a68bbf42STue Ly fputil::DyadicFloat<128> &sum) { 39a68bbf42STue Ly using Float128 = typename fputil::DyadicFloat<128>; 40a68bbf42STue Ly using MType = typename Float128::MantissaType; 41a68bbf42STue Ly 42a68bbf42STue Ly int64_t v = static_cast<int64_t>(m_x * 0x1.0p60); // ulp = 2^-60 43a68bbf42STue Ly 44a68bbf42STue Ly // Range reduction - Step 2 45a68bbf42STue Ly // Output range: vv2 in [-0x1.3ffcp-15, 0x1.3e3dp-15]. 46a68bbf42STue Ly // idx2 = trunc(2^14 * (v + 2^-8 + 2^-15)) 47a68bbf42STue Ly size_t idx2 = static_cast<size_t>((v + 0x10'2000'0000'0000) >> 46); 48a68bbf42STue Ly sum = fputil::quick_add(sum, log_table.step_2[idx2]); 49a68bbf42STue Ly 50a68bbf42STue Ly int64_t s2 = static_cast<int64_t>(S2[idx2]); // |s| <= 2^-7, ulp = 2^-16 51a68bbf42STue Ly int64_t sv2 = s2 * v; // |s*v| < 2^-14, ulp = 2^(-60-16) = 2^-76 52a68bbf42STue Ly int64_t spv2 = (s2 << 44) + v; // |s + v| < 2^-14, ulp = 2^-60 53a68bbf42STue Ly int64_t vv2 = (spv2 << 16) + sv2; // |vv2| < 2^-14, ulp = 2^-76 54a68bbf42STue Ly 55a68bbf42STue Ly // Range reduction - Step 3 56a68bbf42STue Ly // Output range: vv3 in [-0x1.01928p-22 , 0x1p-22] 57a68bbf42STue Ly // idx3 = trunc(2^21 * (v + 80*2^-21 + 2^-22)) 58a68bbf42STue Ly size_t idx3 = static_cast<size_t>((vv2 + 0x2840'0000'0000'0000) >> 55); 59a68bbf42STue Ly sum = fputil::quick_add(sum, log_table.step_3[idx3]); 60a68bbf42STue Ly 61a68bbf42STue Ly int64_t s3 = static_cast<int64_t>(S3[idx3]); // |s| < 2^-13, ulp = 2^-21 62a68bbf42STue Ly int64_t spv3 = (s3 << 55) + vv2; // |s + v| < 2^-21, ulp = 2^-76 63a68bbf42STue Ly // |s*v| < 2^-27, ulp = 2^(-76-21) = 2^-97 641557256aSTue Ly Int128 sv3 = static_cast<Int128>(s3) * static_cast<Int128>(vv2); 65a68bbf42STue Ly // |vv3| < 2^-21, ulp = 2^-97 661557256aSTue Ly Int128 vv3 = (static_cast<Int128>(spv3) << 21) + sv3; 67a68bbf42STue Ly 68a68bbf42STue Ly // Range reduction - Step 4 69a68bbf42STue Ly // Output range: vv4 in [-0x1.0002143p-29 , 0x1p-29] 70a68bbf42STue Ly // idx4 = trunc(2^21 * (v + 65*2^-28 + 2^-29)) 71a68bbf42STue Ly size_t idx4 = static_cast<size_t>((static_cast<int>(vv3 >> 68) + 131) >> 1); 72a68bbf42STue Ly 73a68bbf42STue Ly sum = fputil::quick_add(sum, log_table.step_4[idx4]); 74a68bbf42STue Ly 751557256aSTue Ly Int128 s4 = static_cast<Int128>(S4[idx4]); // |s| < 2^-21, ulp = 2^-28 76a68bbf42STue Ly // |s + v| < 2^-28, ulp = 2^-97 771557256aSTue Ly Int128 spv4 = (s4 << 69) + vv3; 78a68bbf42STue Ly // |s*v| < 2^-42, ulp = 2^(-97-28) = 2^-125 791557256aSTue Ly Int128 sv4 = s4 * vv3; 80a68bbf42STue Ly // |vv4| < 2^-28, ulp = 2^-125 811557256aSTue Ly Int128 vv4 = (spv4 << 28) + sv4; 82a68bbf42STue Ly 83e6a6a90fSGuillaume Chatelet return (vv4 < 0) ? Float128(Sign::NEG, -125, 84a68bbf42STue Ly MType({static_cast<uint64_t>(-vv4), 85a68bbf42STue Ly static_cast<uint64_t>((-vv4) >> 64)})) 86e6a6a90fSGuillaume Chatelet : Float128(Sign::POS, -125, 87a68bbf42STue Ly MType({static_cast<uint64_t>(vv4), 88a68bbf42STue Ly static_cast<uint64_t>(vv4 >> 64)})); 89a68bbf42STue Ly } 90a68bbf42STue Ly 91*5ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 92a68bbf42STue Ly 93a68bbf42STue Ly #endif // LLVM_LIBC_SRC_MATH_GENERIC_LOG_RANGE_REDUCTION_H 94