xref: /llvm-project/libc/src/math/generic/fmul.cpp (revision a205a854e06d36c1d0def3e3bc3743defdb6abc1)
19fb049c8SOverMighty //===-- Implementation of fmul function -----------------------------------===//
2263be9fbSJob Henandez Lara //
3263be9fbSJob Henandez Lara // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4263be9fbSJob Henandez Lara // See https://llvm.org/LICENSE.txt for license information.
5263be9fbSJob Henandez Lara // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6263be9fbSJob Henandez Lara //
7263be9fbSJob Henandez Lara //===----------------------------------------------------------------------===//
8263be9fbSJob Henandez Lara #include "src/math/fmul.h"
9*a205a854SJob Henandez Lara #include "hdr/errno_macros.h"
10*a205a854SJob Henandez Lara #include "hdr/fenv_macros.h"
11*a205a854SJob Henandez Lara #include "src/__support/FPUtil/double_double.h"
129fb049c8SOverMighty #include "src/__support/FPUtil/generic/mul.h"
13263be9fbSJob Henandez Lara #include "src/__support/common.h"
145ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
15263be9fbSJob Henandez Lara 
165ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
17263be9fbSJob Henandez Lara 
18263be9fbSJob Henandez Lara LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
19*a205a854SJob Henandez Lara 
20*a205a854SJob Henandez Lara   // Without FMA instructions, fputil::exact_mult is not
21*a205a854SJob Henandez Lara   // correctly rounded for all rounding modes, so we fall
22*a205a854SJob Henandez Lara   // back to the generic `fmul` implementation
23*a205a854SJob Henandez Lara 
24*a205a854SJob Henandez Lara #ifndef LIBC_TARGET_CPU_HAS_FMA
259fb049c8SOverMighty   return fputil::generic::mul<float>(x, y);
26*a205a854SJob Henandez Lara #else
27*a205a854SJob Henandez Lara   fputil::DoubleDouble prod = fputil::exact_mult(x, y);
28*a205a854SJob Henandez Lara   using DoubleBits = fputil::FPBits<double>;
29*a205a854SJob Henandez Lara   using DoubleStorageType = typename DoubleBits::StorageType;
30*a205a854SJob Henandez Lara   using FloatBits = fputil::FPBits<float>;
31*a205a854SJob Henandez Lara   using FloatStorageType = typename FloatBits::StorageType;
32*a205a854SJob Henandez Lara   DoubleBits x_bits(x);
33*a205a854SJob Henandez Lara   DoubleBits y_bits(y);
34*a205a854SJob Henandez Lara 
35*a205a854SJob Henandez Lara   Sign result_sign = x_bits.sign() == y_bits.sign() ? Sign::POS : Sign::NEG;
36*a205a854SJob Henandez Lara   double result = prod.hi;
37*a205a854SJob Henandez Lara   DoubleBits hi_bits(prod.hi), lo_bits(prod.lo);
38*a205a854SJob Henandez Lara   // Check for cases where we need to propagate the sticky bits:
39*a205a854SJob Henandez Lara   constexpr uint64_t STICKY_MASK = 0xFFF'FFF; // Lower (52 - 23 - 1 = 28 bits)
40*a205a854SJob Henandez Lara   uint64_t sticky_bits = (hi_bits.uintval() & STICKY_MASK);
41*a205a854SJob Henandez Lara   if (LIBC_UNLIKELY(sticky_bits == 0)) {
42*a205a854SJob Henandez Lara     // Might need to propagate sticky bits:
43*a205a854SJob Henandez Lara     if (!(lo_bits.is_inf_or_nan() || lo_bits.is_zero())) {
44*a205a854SJob Henandez Lara       // Now prod.lo is nonzero and finite, we need to propagate sticky bits.
45*a205a854SJob Henandez Lara       if (lo_bits.sign() != hi_bits.sign())
46*a205a854SJob Henandez Lara         result = DoubleBits(hi_bits.uintval() - 1).get_val();
47*a205a854SJob Henandez Lara       else
48*a205a854SJob Henandez Lara         result = DoubleBits(hi_bits.uintval() | 1).get_val();
49*a205a854SJob Henandez Lara     }
50263be9fbSJob Henandez Lara   }
51263be9fbSJob Henandez Lara 
52*a205a854SJob Henandez Lara   float result_f = static_cast<float>(result);
53*a205a854SJob Henandez Lara   FloatBits rf_bits(result_f);
54*a205a854SJob Henandez Lara   uint32_t rf_exp = rf_bits.get_biased_exponent();
55*a205a854SJob Henandez Lara   if (LIBC_LIKELY(rf_exp > 0 && rf_exp < 2 * FloatBits::EXP_BIAS + 1)) {
56*a205a854SJob Henandez Lara     return result_f;
57*a205a854SJob Henandez Lara   }
58*a205a854SJob Henandez Lara 
59*a205a854SJob Henandez Lara   // Now result_f is either inf/nan/zero/denormal.
60*a205a854SJob Henandez Lara   if (x_bits.is_nan() || y_bits.is_nan()) {
61*a205a854SJob Henandez Lara     if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan())
62*a205a854SJob Henandez Lara       fputil::raise_except_if_required(FE_INVALID);
63*a205a854SJob Henandez Lara 
64*a205a854SJob Henandez Lara     if (x_bits.is_quiet_nan()) {
65*a205a854SJob Henandez Lara       DoubleStorageType x_payload = x_bits.get_mantissa();
66*a205a854SJob Henandez Lara       x_payload >>= DoubleBits::FRACTION_LEN - FloatBits::FRACTION_LEN;
67*a205a854SJob Henandez Lara       return FloatBits::quiet_nan(x_bits.sign(),
68*a205a854SJob Henandez Lara                                   static_cast<FloatStorageType>(x_payload))
69*a205a854SJob Henandez Lara           .get_val();
70*a205a854SJob Henandez Lara     }
71*a205a854SJob Henandez Lara 
72*a205a854SJob Henandez Lara     if (y_bits.is_quiet_nan()) {
73*a205a854SJob Henandez Lara       DoubleStorageType y_payload = y_bits.get_mantissa();
74*a205a854SJob Henandez Lara       y_payload >>= DoubleBits::FRACTION_LEN - FloatBits::FRACTION_LEN;
75*a205a854SJob Henandez Lara       return FloatBits::quiet_nan(y_bits.sign(),
76*a205a854SJob Henandez Lara                                   static_cast<FloatStorageType>(y_payload))
77*a205a854SJob Henandez Lara           .get_val();
78*a205a854SJob Henandez Lara     }
79*a205a854SJob Henandez Lara 
80*a205a854SJob Henandez Lara     return FloatBits::quiet_nan().get_val();
81*a205a854SJob Henandez Lara   }
82*a205a854SJob Henandez Lara 
83*a205a854SJob Henandez Lara   if (x_bits.is_inf()) {
84*a205a854SJob Henandez Lara     if (y_bits.is_zero()) {
85*a205a854SJob Henandez Lara       fputil::set_errno_if_required(EDOM);
86*a205a854SJob Henandez Lara       fputil::raise_except_if_required(FE_INVALID);
87*a205a854SJob Henandez Lara 
88*a205a854SJob Henandez Lara       return FloatBits::quiet_nan().get_val();
89*a205a854SJob Henandez Lara     }
90*a205a854SJob Henandez Lara 
91*a205a854SJob Henandez Lara     return FloatBits::inf(result_sign).get_val();
92*a205a854SJob Henandez Lara   }
93*a205a854SJob Henandez Lara 
94*a205a854SJob Henandez Lara   if (y_bits.is_inf()) {
95*a205a854SJob Henandez Lara     if (x_bits.is_zero()) {
96*a205a854SJob Henandez Lara       fputil::set_errno_if_required(EDOM);
97*a205a854SJob Henandez Lara       fputil::raise_except_if_required(FE_INVALID);
98*a205a854SJob Henandez Lara       return FloatBits::quiet_nan().get_val();
99*a205a854SJob Henandez Lara     }
100*a205a854SJob Henandez Lara 
101*a205a854SJob Henandez Lara     return FloatBits::inf(result_sign).get_val();
102*a205a854SJob Henandez Lara   }
103*a205a854SJob Henandez Lara 
104*a205a854SJob Henandez Lara   // Now either x or y is zero, and the other one is finite.
105*a205a854SJob Henandez Lara   if (rf_bits.is_inf()) {
106*a205a854SJob Henandez Lara     fputil::set_errno_if_required(ERANGE);
107*a205a854SJob Henandez Lara     return FloatBits::inf(result_sign).get_val();
108*a205a854SJob Henandez Lara   }
109*a205a854SJob Henandez Lara 
110*a205a854SJob Henandez Lara   if (x_bits.is_zero() || y_bits.is_zero())
111*a205a854SJob Henandez Lara     return FloatBits::zero(result_sign).get_val();
112*a205a854SJob Henandez Lara 
113*a205a854SJob Henandez Lara   fputil::set_errno_if_required(ERANGE);
114*a205a854SJob Henandez Lara   fputil::raise_except_if_required(FE_UNDERFLOW);
115*a205a854SJob Henandez Lara   return result_f;
116*a205a854SJob Henandez Lara 
117*a205a854SJob Henandez Lara #endif
118*a205a854SJob Henandez Lara }
1195ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
120