19fb049c8SOverMighty //===-- Implementation of fmul function -----------------------------------===// 2263be9fbSJob Henandez Lara // 3263be9fbSJob Henandez Lara // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4263be9fbSJob Henandez Lara // See https://llvm.org/LICENSE.txt for license information. 5263be9fbSJob Henandez Lara // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6263be9fbSJob Henandez Lara // 7263be9fbSJob Henandez Lara //===----------------------------------------------------------------------===// 8263be9fbSJob Henandez Lara #include "src/math/fmul.h" 9*a205a854SJob Henandez Lara #include "hdr/errno_macros.h" 10*a205a854SJob Henandez Lara #include "hdr/fenv_macros.h" 11*a205a854SJob Henandez Lara #include "src/__support/FPUtil/double_double.h" 129fb049c8SOverMighty #include "src/__support/FPUtil/generic/mul.h" 13263be9fbSJob Henandez Lara #include "src/__support/common.h" 145ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 15263be9fbSJob Henandez Lara 165ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 17263be9fbSJob Henandez Lara 18263be9fbSJob Henandez Lara LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) { 19*a205a854SJob Henandez Lara 20*a205a854SJob Henandez Lara // Without FMA instructions, fputil::exact_mult is not 21*a205a854SJob Henandez Lara // correctly rounded for all rounding modes, so we fall 22*a205a854SJob Henandez Lara // back to the generic `fmul` implementation 23*a205a854SJob Henandez Lara 24*a205a854SJob Henandez Lara #ifndef LIBC_TARGET_CPU_HAS_FMA 259fb049c8SOverMighty return fputil::generic::mul<float>(x, y); 26*a205a854SJob Henandez Lara #else 27*a205a854SJob Henandez Lara fputil::DoubleDouble prod = fputil::exact_mult(x, y); 28*a205a854SJob Henandez Lara using DoubleBits = fputil::FPBits<double>; 29*a205a854SJob Henandez Lara using DoubleStorageType = typename DoubleBits::StorageType; 30*a205a854SJob Henandez Lara using FloatBits = fputil::FPBits<float>; 31*a205a854SJob Henandez Lara using FloatStorageType = typename FloatBits::StorageType; 32*a205a854SJob Henandez Lara DoubleBits x_bits(x); 33*a205a854SJob Henandez Lara DoubleBits y_bits(y); 34*a205a854SJob Henandez Lara 35*a205a854SJob Henandez Lara Sign result_sign = x_bits.sign() == y_bits.sign() ? Sign::POS : Sign::NEG; 36*a205a854SJob Henandez Lara double result = prod.hi; 37*a205a854SJob Henandez Lara DoubleBits hi_bits(prod.hi), lo_bits(prod.lo); 38*a205a854SJob Henandez Lara // Check for cases where we need to propagate the sticky bits: 39*a205a854SJob Henandez Lara constexpr uint64_t STICKY_MASK = 0xFFF'FFF; // Lower (52 - 23 - 1 = 28 bits) 40*a205a854SJob Henandez Lara uint64_t sticky_bits = (hi_bits.uintval() & STICKY_MASK); 41*a205a854SJob Henandez Lara if (LIBC_UNLIKELY(sticky_bits == 0)) { 42*a205a854SJob Henandez Lara // Might need to propagate sticky bits: 43*a205a854SJob Henandez Lara if (!(lo_bits.is_inf_or_nan() || lo_bits.is_zero())) { 44*a205a854SJob Henandez Lara // Now prod.lo is nonzero and finite, we need to propagate sticky bits. 45*a205a854SJob Henandez Lara if (lo_bits.sign() != hi_bits.sign()) 46*a205a854SJob Henandez Lara result = DoubleBits(hi_bits.uintval() - 1).get_val(); 47*a205a854SJob Henandez Lara else 48*a205a854SJob Henandez Lara result = DoubleBits(hi_bits.uintval() | 1).get_val(); 49*a205a854SJob Henandez Lara } 50263be9fbSJob Henandez Lara } 51263be9fbSJob Henandez Lara 52*a205a854SJob Henandez Lara float result_f = static_cast<float>(result); 53*a205a854SJob Henandez Lara FloatBits rf_bits(result_f); 54*a205a854SJob Henandez Lara uint32_t rf_exp = rf_bits.get_biased_exponent(); 55*a205a854SJob Henandez Lara if (LIBC_LIKELY(rf_exp > 0 && rf_exp < 2 * FloatBits::EXP_BIAS + 1)) { 56*a205a854SJob Henandez Lara return result_f; 57*a205a854SJob Henandez Lara } 58*a205a854SJob Henandez Lara 59*a205a854SJob Henandez Lara // Now result_f is either inf/nan/zero/denormal. 60*a205a854SJob Henandez Lara if (x_bits.is_nan() || y_bits.is_nan()) { 61*a205a854SJob Henandez Lara if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan()) 62*a205a854SJob Henandez Lara fputil::raise_except_if_required(FE_INVALID); 63*a205a854SJob Henandez Lara 64*a205a854SJob Henandez Lara if (x_bits.is_quiet_nan()) { 65*a205a854SJob Henandez Lara DoubleStorageType x_payload = x_bits.get_mantissa(); 66*a205a854SJob Henandez Lara x_payload >>= DoubleBits::FRACTION_LEN - FloatBits::FRACTION_LEN; 67*a205a854SJob Henandez Lara return FloatBits::quiet_nan(x_bits.sign(), 68*a205a854SJob Henandez Lara static_cast<FloatStorageType>(x_payload)) 69*a205a854SJob Henandez Lara .get_val(); 70*a205a854SJob Henandez Lara } 71*a205a854SJob Henandez Lara 72*a205a854SJob Henandez Lara if (y_bits.is_quiet_nan()) { 73*a205a854SJob Henandez Lara DoubleStorageType y_payload = y_bits.get_mantissa(); 74*a205a854SJob Henandez Lara y_payload >>= DoubleBits::FRACTION_LEN - FloatBits::FRACTION_LEN; 75*a205a854SJob Henandez Lara return FloatBits::quiet_nan(y_bits.sign(), 76*a205a854SJob Henandez Lara static_cast<FloatStorageType>(y_payload)) 77*a205a854SJob Henandez Lara .get_val(); 78*a205a854SJob Henandez Lara } 79*a205a854SJob Henandez Lara 80*a205a854SJob Henandez Lara return FloatBits::quiet_nan().get_val(); 81*a205a854SJob Henandez Lara } 82*a205a854SJob Henandez Lara 83*a205a854SJob Henandez Lara if (x_bits.is_inf()) { 84*a205a854SJob Henandez Lara if (y_bits.is_zero()) { 85*a205a854SJob Henandez Lara fputil::set_errno_if_required(EDOM); 86*a205a854SJob Henandez Lara fputil::raise_except_if_required(FE_INVALID); 87*a205a854SJob Henandez Lara 88*a205a854SJob Henandez Lara return FloatBits::quiet_nan().get_val(); 89*a205a854SJob Henandez Lara } 90*a205a854SJob Henandez Lara 91*a205a854SJob Henandez Lara return FloatBits::inf(result_sign).get_val(); 92*a205a854SJob Henandez Lara } 93*a205a854SJob Henandez Lara 94*a205a854SJob Henandez Lara if (y_bits.is_inf()) { 95*a205a854SJob Henandez Lara if (x_bits.is_zero()) { 96*a205a854SJob Henandez Lara fputil::set_errno_if_required(EDOM); 97*a205a854SJob Henandez Lara fputil::raise_except_if_required(FE_INVALID); 98*a205a854SJob Henandez Lara return FloatBits::quiet_nan().get_val(); 99*a205a854SJob Henandez Lara } 100*a205a854SJob Henandez Lara 101*a205a854SJob Henandez Lara return FloatBits::inf(result_sign).get_val(); 102*a205a854SJob Henandez Lara } 103*a205a854SJob Henandez Lara 104*a205a854SJob Henandez Lara // Now either x or y is zero, and the other one is finite. 105*a205a854SJob Henandez Lara if (rf_bits.is_inf()) { 106*a205a854SJob Henandez Lara fputil::set_errno_if_required(ERANGE); 107*a205a854SJob Henandez Lara return FloatBits::inf(result_sign).get_val(); 108*a205a854SJob Henandez Lara } 109*a205a854SJob Henandez Lara 110*a205a854SJob Henandez Lara if (x_bits.is_zero() || y_bits.is_zero()) 111*a205a854SJob Henandez Lara return FloatBits::zero(result_sign).get_val(); 112*a205a854SJob Henandez Lara 113*a205a854SJob Henandez Lara fputil::set_errno_if_required(ERANGE); 114*a205a854SJob Henandez Lara fputil::raise_except_if_required(FE_UNDERFLOW); 115*a205a854SJob Henandez Lara return result_f; 116*a205a854SJob Henandez Lara 117*a205a854SJob Henandez Lara #endif 118*a205a854SJob Henandez Lara } 1195ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 120