xref: /llvm-project/libc/src/__support/FPUtil/generic/FMod.h (revision 5ff3ff33ff930e4ec49da7910612d8a41eb068cb)
1b8e8012aSKirill Okhotnikov //===-- Common header for fmod implementations ------------------*- C++ -*-===//
2b8e8012aSKirill Okhotnikov //
3b8e8012aSKirill Okhotnikov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b8e8012aSKirill Okhotnikov // See https://llvm.org/LICENSE.txt for license information.
5b8e8012aSKirill Okhotnikov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b8e8012aSKirill Okhotnikov //
7b8e8012aSKirill Okhotnikov //===----------------------------------------------------------------------===//
8b8e8012aSKirill Okhotnikov 
9270547f3SGuillaume Chatelet #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMOD_H
10270547f3SGuillaume Chatelet #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMOD_H
11b8e8012aSKirill Okhotnikov 
121d894788SGuillaume Chatelet #include "src/__support/CPP/bit.h"
1391eb0b65SGuillaume Chatelet #include "src/__support/CPP/limits.h"
14f7226150SGuillaume Chatelet #include "src/__support/CPP/type_traits.h"
15b8e8012aSKirill Okhotnikov #include "src/__support/FPUtil/FEnvImpl.h"
16b8e8012aSKirill Okhotnikov #include "src/__support/FPUtil/FPBits.h"
17*5ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
18737e1cd1SGuillaume Chatelet #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
19b8e8012aSKirill Okhotnikov 
20*5ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
21b8e8012aSKirill Okhotnikov namespace fputil {
22b8e8012aSKirill Okhotnikov namespace generic {
23b8e8012aSKirill Okhotnikov 
24b8e8012aSKirill Okhotnikov //  Objective:
25b8e8012aSKirill Okhotnikov //    The  algorithm uses  integer arithmetic  (max uint64_t)  for general case.
26b8e8012aSKirill Okhotnikov //    Some common  cases, like  abs(x) < abs(y)  or  abs(x) < 1000 *  abs(y) are
27b8e8012aSKirill Okhotnikov //    treated specially to increase  performance.  The part of checking  special
28b8e8012aSKirill Okhotnikov //    cases, numbers NaN, INF etc. treated separately.
29b8e8012aSKirill Okhotnikov //
30b8e8012aSKirill Okhotnikov //  Objective:
31b8e8012aSKirill Okhotnikov //    1) FMod definition (https://cplusplus.com/reference/cmath/fmod/):
32b8e8012aSKirill Okhotnikov //       fmod = numer - tquot * denom, where tquot is the truncated
33b8e8012aSKirill Okhotnikov //       (i.e., rounded towards zero) result of: numer/denom.
34b8e8012aSKirill Okhotnikov //    2) FMod with negative x and/or y can be trivially converted to fmod for
35b8e8012aSKirill Okhotnikov //       positive x and y. Therefore the algorithm below works only with
36b8e8012aSKirill Okhotnikov //       positive numbers.
37b8e8012aSKirill Okhotnikov //    3) All positive floating point numbers can be represented as m * 2^e,
38b8e8012aSKirill Okhotnikov //       where "m" is positive integer and "e" is signed.
39b8e8012aSKirill Okhotnikov //    4) FMod function can be calculated in integer numbers (x > y):
40b8e8012aSKirill Okhotnikov //         fmod = m_x * 2^e_x - tquot * m_y * 2^e_y
41b8e8012aSKirill Okhotnikov //              = 2^e_y * (m_x * 2^(e_x - e^y) - tquot * m_y).
42b8e8012aSKirill Okhotnikov //       All variables in parentheses are unsigned integers.
43b8e8012aSKirill Okhotnikov //
44b8e8012aSKirill Okhotnikov //  Mathematical background:
45b8e8012aSKirill Okhotnikov //    Input x,y in the algorithm is represented (mathematically) like m_x*2^e_x
46b8e8012aSKirill Okhotnikov //    and m_y*2^e_y. This is an ambiguous number representation. For example:
47b8e8012aSKirill Okhotnikov //      m * 2^e = (2 * m) * 2^(e-1)
48b8e8012aSKirill Okhotnikov //    The algorithm uses the facts that
49b8e8012aSKirill Okhotnikov //      r = a % b = (a % (N * b)) % b,
50b8e8012aSKirill Okhotnikov //      (a * c) % (b * c) = (a % b) * c
51b8e8012aSKirill Okhotnikov //    where N is positive  integer number. a, b and c - positive. Let's  adopt
52b8e8012aSKirill Okhotnikov //    the formula for representation above.
53b8e8012aSKirill Okhotnikov //      a = m_x * 2^e_x, b = m_y * 2^e_y, N = 2^k
54b8e8012aSKirill Okhotnikov //      r(k) = a % b = (m_x * 2^e_x) % (2^k * m_y * 2^e_y)
55b8e8012aSKirill Okhotnikov //           = 2^(e_y + k) * (m_x * 2^(e_x - e_y - k) % m_y)
56b8e8012aSKirill Okhotnikov //      r(k) = m_r * 2^e_r = (m_x % m_y) * 2^(m_y + k)
57b8e8012aSKirill Okhotnikov //           = (2^p * (m_x % m_y) * 2^(e_y + k - p))
58b8e8012aSKirill Okhotnikov //        m_r = 2^p * (m_x % m_y), e_r = m_y + k - p
59b8e8012aSKirill Okhotnikov //
60b8e8012aSKirill Okhotnikov //  Algorithm description:
61b8e8012aSKirill Okhotnikov //  First, let write x = m_x * 2^e_x and y = m_y * 2^e_y with m_x, m_y, e_x, e_y
62b8e8012aSKirill Okhotnikov //  are integers (m_x amd m_y positive).
63b8e8012aSKirill Okhotnikov //  Then the naive  implementation of the fmod function with a simple
64b8e8012aSKirill Okhotnikov //  for/while loop:
65b8e8012aSKirill Okhotnikov //      while (e_x > e_y) {
66b8e8012aSKirill Okhotnikov //        m_x *= 2; --e_x; //  m_x * 2^e_x == 2 * m_x * 2^(e_x - 1)
67b8e8012aSKirill Okhotnikov //        m_x %= m_y;
68b8e8012aSKirill Okhotnikov //      }
69b8e8012aSKirill Okhotnikov //  On the other hand, the algorithm exploits the fact that m_x, m_y are the
70b8e8012aSKirill Okhotnikov //  mantissas of floating point numbers, which use less bits than the storage
71b8e8012aSKirill Okhotnikov //  integers: 24 / 32 for floats and 53 / 64 for doubles, so if in each step of
72b8e8012aSKirill Okhotnikov //  the iteration, we can left shift m_x as many bits as the storage integer
73b8e8012aSKirill Okhotnikov //  type can hold, the exponent reduction per step will be at least 32 - 24 = 8
74b8e8012aSKirill Okhotnikov //  for floats and 64 - 53 = 11 for doubles (double example below):
75b8e8012aSKirill Okhotnikov //      while (e_x > e_y) {
76b8e8012aSKirill Okhotnikov //        m_x <<= 11; e_x -= 11; //  m_x * 2^e_x == 2^11 * m_x * 2^(e_x - 11)
77b8e8012aSKirill Okhotnikov //        m_x %= m_y;
78b8e8012aSKirill Okhotnikov //      }
79b8e8012aSKirill Okhotnikov //  Some extra improvements are done:
80b8e8012aSKirill Okhotnikov //    1) Shift m_y maximum to the right, which can significantly improve
81b8e8012aSKirill Okhotnikov //       performance for small integer numbers (y = 3 for example).
82b8e8012aSKirill Okhotnikov //       The m_x shift in the loop can be 62 instead of 11 for double.
83b8e8012aSKirill Okhotnikov //    2) For some architectures with very slow division, it can be better to
84b8e8012aSKirill Okhotnikov //       calculate inverse value ones, and after do multiplication in the loop.
85b8e8012aSKirill Okhotnikov //    3) "likely" special cases are treated specially to improve performance.
86b8e8012aSKirill Okhotnikov //
87b8e8012aSKirill Okhotnikov //  Simple example:
88b8e8012aSKirill Okhotnikov //  The examples below use byte for simplicity.
89b8e8012aSKirill Okhotnikov //    1) Shift hy maximum to right without losing bits and increase iy value
90b8e8012aSKirill Okhotnikov //       m_y = 0b00101100 e_y = 20 after shift m_y = 0b00001011 e_y = 22.
91b8e8012aSKirill Okhotnikov //    2) m_x = m_x % m_y.
92b8e8012aSKirill Okhotnikov //    3) Move m_x maximum to left. Note that after (m_x = m_x % m_y) CLZ in m_x
93b8e8012aSKirill Okhotnikov //    is not lower than CLZ in m_y. m_x=0b00001001 e_x = 100, m_x=0b10010000,
94b8e8012aSKirill Okhotnikov //       e_x = 100-4 = 96.
95b8e8012aSKirill Okhotnikov //    4) Repeat (2) until e_x == e_y.
96b8e8012aSKirill Okhotnikov //
97b8e8012aSKirill Okhotnikov //  Complexity analysis (double):
98b8e8012aSKirill Okhotnikov //    Converting x,y to (m_x,e_x),(m_y, e_y): CTZ/shift/AND/OR/if. Loop  count:
99b8e8012aSKirill Okhotnikov //      (m_x - m_y) / (64 -  "length of m_y").
100b8e8012aSKirill Okhotnikov //      max("length of m_y")  = 53,
101b8e8012aSKirill Okhotnikov //      max(e_x - e_y)  = 2048
102b8e8012aSKirill Okhotnikov //    Maximum operation is  186. For rare "unrealistic" cases.
103b8e8012aSKirill Okhotnikov //
104b8e8012aSKirill Okhotnikov //  Special cases (double):
105b8e8012aSKirill Okhotnikov //    Supposing  that  case  where |y| > 1e-292 and |x/y|<2000  is  very  common
106b8e8012aSKirill Okhotnikov //    special processing is implemented. No m_y alignment, no loop:
107b8e8012aSKirill Okhotnikov //      result = (m_x * 2^(e_x - e_y)) % m_y.
108b8e8012aSKirill Okhotnikov //    When x and y are both subnormal (rare case but...) the
109b8e8012aSKirill Okhotnikov //      result = m_x % m_y.
110b8e8012aSKirill Okhotnikov //    Simplified conversion back to double.
111b8e8012aSKirill Okhotnikov 
112b8e8012aSKirill Okhotnikov // Exceptional cases handler according to cppreference.com
113b8e8012aSKirill Okhotnikov //    https://en.cppreference.com/w/cpp/numeric/math/fmod
114b8e8012aSKirill Okhotnikov // and POSIX standard described in Linux man
115b8e8012aSKirill Okhotnikov //   https://man7.org/linux/man-pages/man3/fmod.3p.html
116b8e8012aSKirill Okhotnikov // C standard for the function is not full, so not by default (although it can
117b8e8012aSKirill Okhotnikov // be implemented in another handler.
11827aca975SKirill Okhotnikov // Signaling NaN converted to quiet NaN with FE_INVALID exception.
11927aca975SKirill Okhotnikov //    https://www.open-std.org/JTC1/SC22/WG14/www/docs/n1011.htm
1204d21e752Slntue template <typename T> struct FModDivisionSimpleHelper {
1214d21e752Slntue   LIBC_INLINE constexpr static T execute(int exp_diff, int sides_zeroes_count,
1224d21e752Slntue                                          T m_x, T m_y) {
1234d21e752Slntue     while (exp_diff > sides_zeroes_count) {
1244d21e752Slntue       exp_diff -= sides_zeroes_count;
1254d21e752Slntue       m_x <<= sides_zeroes_count;
1264d21e752Slntue       m_x %= m_y;
1274d21e752Slntue     }
1284d21e752Slntue     m_x <<= exp_diff;
1294d21e752Slntue     m_x %= m_y;
1304d21e752Slntue     return m_x;
1314d21e752Slntue   }
1324d21e752Slntue };
133b8e8012aSKirill Okhotnikov 
1344d21e752Slntue template <typename T> struct FModDivisionInvMultHelper {
1354d21e752Slntue   LIBC_INLINE constexpr static T execute(int exp_diff, int sides_zeroes_count,
1364d21e752Slntue                                          T m_x, T m_y) {
1374d21e752Slntue     constexpr int LENGTH = sizeof(T) * CHAR_BIT;
1384d21e752Slntue     if (exp_diff > sides_zeroes_count) {
1394d21e752Slntue       T inv_hy = (cpp::numeric_limits<T>::max() / m_y);
1404d21e752Slntue       while (exp_diff > sides_zeroes_count) {
1414d21e752Slntue         exp_diff -= sides_zeroes_count;
1424d21e752Slntue         T hd = (m_x * inv_hy) >> (LENGTH - sides_zeroes_count);
1434d21e752Slntue         m_x <<= sides_zeroes_count;
1444d21e752Slntue         m_x -= hd * m_y;
1454d21e752Slntue         while (LIBC_UNLIKELY(m_x > m_y))
1464d21e752Slntue           m_x -= m_y;
1474d21e752Slntue       }
1484d21e752Slntue       T hd = (m_x * inv_hy) >> (LENGTH - exp_diff);
1494d21e752Slntue       m_x <<= exp_diff;
1504d21e752Slntue       m_x -= hd * m_y;
1514d21e752Slntue       while (LIBC_UNLIKELY(m_x > m_y))
1524d21e752Slntue         m_x -= m_y;
1534d21e752Slntue     } else {
1544d21e752Slntue       m_x <<= exp_diff;
1554d21e752Slntue       m_x %= m_y;
1564d21e752Slntue     }
1574d21e752Slntue     return m_x;
1584d21e752Slntue   }
1594d21e752Slntue };
1604d21e752Slntue 
1614d21e752Slntue template <typename T, typename U = typename FPBits<T>::StorageType,
1624d21e752Slntue           typename DivisionHelper = FModDivisionSimpleHelper<U>>
1634d21e752Slntue class FMod {
1640ccec541SMikhail R. Gadelha   static_assert(cpp::is_floating_point_v<T> &&
1650ccec541SMikhail R. Gadelha                     is_unsigned_integral_or_big_int_v<U> &&
1664d21e752Slntue                     (sizeof(U) * CHAR_BIT > FPBits<T>::FRACTION_LEN),
1674d21e752Slntue                 "FMod instantiated with invalid type.");
1684d21e752Slntue 
1694d21e752Slntue private:
1704d21e752Slntue   using FPB = FPBits<T>;
1714d21e752Slntue   using StorageType = typename FPB::StorageType;
172b8e8012aSKirill Okhotnikov 
173e35c7149STue Ly   LIBC_INLINE static bool pre_check(T x, T y, T &out) {
174a9af317fSGuillaume Chatelet     using FPB = fputil::FPBits<T>;
175ace383dfSGuillaume Chatelet     const T quiet_nan = FPB::quiet_nan().get_val();
176a9af317fSGuillaume Chatelet     FPB sx(x), sy(y);
17729f8e076SGuillaume Chatelet     if (LIBC_LIKELY(!sy.is_zero() && !sy.is_inf_or_nan() &&
1784d21e752Slntue                     !sx.is_inf_or_nan()))
179b8e8012aSKirill Okhotnikov       return false;
180b8e8012aSKirill Okhotnikov 
18127aca975SKirill Okhotnikov     if (sx.is_nan() || sy.is_nan()) {
1824d21e752Slntue       if (sx.is_signaling_nan() || sy.is_signaling_nan())
183e35c7149STue Ly         fputil::raise_except_if_required(FE_INVALID);
184e35c7149STue Ly       out = quiet_nan;
185b8e8012aSKirill Okhotnikov       return true;
186b8e8012aSKirill Okhotnikov     }
187b8e8012aSKirill Okhotnikov 
18827aca975SKirill Okhotnikov     if (sx.is_inf() || sy.is_zero()) {
189e35c7149STue Ly       fputil::raise_except_if_required(FE_INVALID);
190e35c7149STue Ly       fputil::set_errno_if_required(EDOM);
191e35c7149STue Ly       out = quiet_nan;
19227aca975SKirill Okhotnikov       return true;
19327aca975SKirill Okhotnikov     }
19427aca975SKirill Okhotnikov 
195b8e8012aSKirill Okhotnikov     out = x;
196b8e8012aSKirill Okhotnikov     return true;
197b8e8012aSKirill Okhotnikov   }
198b8e8012aSKirill Okhotnikov 
199a9af317fSGuillaume Chatelet   LIBC_INLINE static constexpr FPB eval_internal(FPB sx, FPB sy) {
200b8e8012aSKirill Okhotnikov 
20129f8e076SGuillaume Chatelet     if (LIBC_LIKELY(sx.uintval() <= sy.uintval())) {
202b8e8012aSKirill Okhotnikov       if (sx.uintval() < sy.uintval())
203b8e8012aSKirill Okhotnikov         return sx;             // |x|<|y| return x
2044d21e752Slntue       return FPB::zero();      // |x|=|y| return 0.0
205b8e8012aSKirill Okhotnikov     }
206b8e8012aSKirill Okhotnikov 
2077b387d27SGuillaume Chatelet     int e_x = sx.get_biased_exponent();
2087b387d27SGuillaume Chatelet     int e_y = sy.get_biased_exponent();
209b8e8012aSKirill Okhotnikov 
2103546f4daSGuillaume Chatelet     // Most common case where |y| is "very normal" and |x/y| < 2^EXP_LEN
2113546f4daSGuillaume Chatelet     if (LIBC_LIKELY(e_y > int(FPB::FRACTION_LEN) &&
2123546f4daSGuillaume Chatelet                     e_x - e_y <= int(FPB::EXP_LEN))) {
2133546f4daSGuillaume Chatelet       StorageType m_x = sx.get_explicit_mantissa();
2143546f4daSGuillaume Chatelet       StorageType m_y = sy.get_explicit_mantissa();
2150cdb0b74SOverMighty       StorageType d = (e_x == e_y)
2160cdb0b74SOverMighty                           ? (m_x - m_y)
2170cdb0b74SOverMighty                           : static_cast<StorageType>(m_x << (e_x - e_y)) % m_y;
218b8e8012aSKirill Okhotnikov       if (d == 0)
2194d21e752Slntue         return FPB::zero();
220b8e8012aSKirill Okhotnikov       // iy - 1 because of "zero power" for number with power 1
221a9af317fSGuillaume Chatelet       return FPB::make_value(d, e_y - 1);
222b8e8012aSKirill Okhotnikov     }
2234d21e752Slntue     // Both subnormal special case.
22429f8e076SGuillaume Chatelet     if (LIBC_UNLIKELY(e_x == 0 && e_y == 0)) {
225a9af317fSGuillaume Chatelet       FPB d;
226b8e8012aSKirill Okhotnikov       d.set_mantissa(sx.uintval() % sy.uintval());
227b8e8012aSKirill Okhotnikov       return d;
228b8e8012aSKirill Okhotnikov     }
229b8e8012aSKirill Okhotnikov 
230b8e8012aSKirill Okhotnikov     // Note that hx is not subnormal by conditions above.
2314d21e752Slntue     U m_x = static_cast<U>(sx.get_explicit_mantissa());
232b8e8012aSKirill Okhotnikov     e_x--;
233b8e8012aSKirill Okhotnikov 
2344d21e752Slntue     U m_y = static_cast<U>(sy.get_explicit_mantissa());
2354d21e752Slntue     constexpr int DEFAULT_LEAD_ZEROS =
2364d21e752Slntue         sizeof(U) * CHAR_BIT - FPB::FRACTION_LEN - 1;
2374d21e752Slntue     int lead_zeros_m_y = DEFAULT_LEAD_ZEROS;
23829f8e076SGuillaume Chatelet     if (LIBC_LIKELY(e_y > 0)) {
239b8e8012aSKirill Okhotnikov       e_y--;
240b8e8012aSKirill Okhotnikov     } else {
2414d21e752Slntue       m_y = static_cast<U>(sy.get_mantissa());
2421d894788SGuillaume Chatelet       lead_zeros_m_y = cpp::countl_zero(m_y);
243b8e8012aSKirill Okhotnikov     }
244b8e8012aSKirill Okhotnikov 
245b8e8012aSKirill Okhotnikov     // Assume hy != 0
2461d894788SGuillaume Chatelet     int tail_zeros_m_y = cpp::countr_zero(m_y);
247b8e8012aSKirill Okhotnikov     int sides_zeroes_count = lead_zeros_m_y + tail_zeros_m_y;
248b8e8012aSKirill Okhotnikov     // n > 0 by conditions above
249b8e8012aSKirill Okhotnikov     int exp_diff = e_x - e_y;
250b8e8012aSKirill Okhotnikov     {
251b8e8012aSKirill Okhotnikov       // Shift hy right until the end or n = 0
252b8e8012aSKirill Okhotnikov       int right_shift = exp_diff < tail_zeros_m_y ? exp_diff : tail_zeros_m_y;
253b8e8012aSKirill Okhotnikov       m_y >>= right_shift;
254b8e8012aSKirill Okhotnikov       exp_diff -= right_shift;
255b8e8012aSKirill Okhotnikov       e_y += right_shift;
256b8e8012aSKirill Okhotnikov     }
257b8e8012aSKirill Okhotnikov 
258b8e8012aSKirill Okhotnikov     {
259b8e8012aSKirill Okhotnikov       // Shift hx left until the end or n = 0
2604d21e752Slntue       int left_shift =
2614d21e752Slntue           exp_diff < DEFAULT_LEAD_ZEROS ? exp_diff : DEFAULT_LEAD_ZEROS;
262b8e8012aSKirill Okhotnikov       m_x <<= left_shift;
263b8e8012aSKirill Okhotnikov       exp_diff -= left_shift;
264b8e8012aSKirill Okhotnikov     }
265b8e8012aSKirill Okhotnikov 
266b8e8012aSKirill Okhotnikov     m_x %= m_y;
26729f8e076SGuillaume Chatelet     if (LIBC_UNLIKELY(m_x == 0))
2684d21e752Slntue       return FPB::zero();
269b8e8012aSKirill Okhotnikov 
270b8e8012aSKirill Okhotnikov     if (exp_diff == 0)
2714d21e752Slntue       return FPB::make_value(static_cast<StorageType>(m_x), e_y);
272b8e8012aSKirill Okhotnikov 
2734d21e752Slntue     // hx next can't be 0, because hx < hy, hy % 2 == 1 hx * 2^i % hy != 0
274b8e8012aSKirill Okhotnikov     m_x = DivisionHelper::execute(exp_diff, sides_zeroes_count, m_x, m_y);
2754d21e752Slntue     return FPB::make_value(static_cast<StorageType>(m_x), e_y);
276b8e8012aSKirill Okhotnikov   }
277b8e8012aSKirill Okhotnikov 
278b8e8012aSKirill Okhotnikov public:
27959c809cdSSiva Chandra Reddy   LIBC_INLINE static T eval(T x, T y) {
2804d21e752Slntue     if (T out; LIBC_UNLIKELY(pre_check(x, y, out)))
281b8e8012aSKirill Okhotnikov       return out;
282a9af317fSGuillaume Chatelet     FPB sx(x), sy(y);
28311ec512fSGuillaume Chatelet     Sign sign = sx.sign();
28411ec512fSGuillaume Chatelet     sx.set_sign(Sign::POS);
28511ec512fSGuillaume Chatelet     sy.set_sign(Sign::POS);
286a9af317fSGuillaume Chatelet     FPB result = eval_internal(sx, sy);
287b8e8012aSKirill Okhotnikov     result.set_sign(sign);
288b8e8012aSKirill Okhotnikov     return result.get_val();
289b8e8012aSKirill Okhotnikov   }
290b8e8012aSKirill Okhotnikov };
291b8e8012aSKirill Okhotnikov 
292b8e8012aSKirill Okhotnikov } // namespace generic
293b8e8012aSKirill Okhotnikov } // namespace fputil
294*5ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
295b8e8012aSKirill Okhotnikov 
296270547f3SGuillaume Chatelet #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMOD_H
297