136ac495dSmrg /* Division and remainder routines for Tile.
2*8feb0f0bSmrg Copyright (C) 2011-2020 Free Software Foundation, Inc.
336ac495dSmrg Contributed by Walter Lee (walt@tilera.com)
436ac495dSmrg
536ac495dSmrg This file is free software; you can redistribute it and/or modify it
636ac495dSmrg under the terms of the GNU General Public License as published by the
736ac495dSmrg Free Software Foundation; either version 3, or (at your option) any
836ac495dSmrg later version.
936ac495dSmrg
1036ac495dSmrg This file is distributed in the hope that it will be useful, but
1136ac495dSmrg WITHOUT ANY WARRANTY; without even the implied warranty of
1236ac495dSmrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1336ac495dSmrg General Public License for more details.
1436ac495dSmrg
1536ac495dSmrg Under Section 7 of GPL version 3, you are granted additional
1636ac495dSmrg permissions described in the GCC Runtime Library Exception, version
1736ac495dSmrg 3.1, as published by the Free Software Foundation.
1836ac495dSmrg
1936ac495dSmrg You should have received a copy of the GNU General Public License and
2036ac495dSmrg a copy of the GCC Runtime Library Exception along with this program;
2136ac495dSmrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
2236ac495dSmrg <http://www.gnu.org/licenses/>. */
2336ac495dSmrg
2436ac495dSmrg typedef int int32_t;
2536ac495dSmrg typedef unsigned uint32_t;
2636ac495dSmrg typedef long long int64_t;
2736ac495dSmrg typedef unsigned long long uint64_t;
2836ac495dSmrg
2936ac495dSmrg /* Raise signal 8 (SIGFPE) with code 1 (FPE_INTDIV). */
3036ac495dSmrg static inline void
raise_intdiv(void)3136ac495dSmrg raise_intdiv (void)
3236ac495dSmrg {
3336ac495dSmrg asm ("{ raise; moveli zero, 8 + (1 << 6) }");
3436ac495dSmrg }
3536ac495dSmrg
3636ac495dSmrg
3736ac495dSmrg #ifndef __tilegx__
3836ac495dSmrg /*__udivsi3 - 32 bit integer unsigned divide */
3936ac495dSmrg static inline uint32_t __attribute__ ((always_inline))
__udivsi3_inline(uint32_t dividend,uint32_t divisor)4036ac495dSmrg __udivsi3_inline (uint32_t dividend, uint32_t divisor)
4136ac495dSmrg {
4236ac495dSmrg /* Divide out any power of two factor from dividend and divisor.
4336ac495dSmrg Note that when dividing by zero the divisor will remain zero,
4436ac495dSmrg which is all we need to detect that case below. */
4536ac495dSmrg const int power_of_two_factor = __insn_ctz (divisor);
4636ac495dSmrg divisor >>= power_of_two_factor;
4736ac495dSmrg dividend >>= power_of_two_factor;
4836ac495dSmrg
4936ac495dSmrg /* Checks for division by power of two or division by zero. */
5036ac495dSmrg if (divisor <= 1)
5136ac495dSmrg {
5236ac495dSmrg if (divisor == 0)
5336ac495dSmrg {
5436ac495dSmrg raise_intdiv ();
5536ac495dSmrg return 0;
5636ac495dSmrg }
5736ac495dSmrg return dividend;
5836ac495dSmrg }
5936ac495dSmrg
6036ac495dSmrg /* Compute (a / b) by repeatedly finding the largest N
6136ac495dSmrg such that (b << N) <= a. For each such N, set bit N in the
6236ac495dSmrg quotient, subtract (b << N) from a, and keep going. Think of this as
6336ac495dSmrg the reverse of the "shift-and-add" that a multiply does. The values
6436ac495dSmrg of N are precisely those shift counts.
6536ac495dSmrg
6636ac495dSmrg Finding N is easy. First, use clz(b) - clz(a) to find the N
6736ac495dSmrg that lines up the high bit of (b << N) with the high bit of a.
6836ac495dSmrg Any larger value of N would definitely make (b << N) > a,
6936ac495dSmrg which is too big.
7036ac495dSmrg
7136ac495dSmrg Then, if (b << N) > a (because it has larger low bits), decrement
7236ac495dSmrg N by one. This adjustment will definitely make (b << N) less
7336ac495dSmrg than a, because a's high bit is now one higher than b's. */
7436ac495dSmrg
7536ac495dSmrg /* Precomputing the max_ values allows us to avoid a subtract
7636ac495dSmrg in the inner loop and just right shift by clz(remainder). */
7736ac495dSmrg const int divisor_clz = __insn_clz (divisor);
7836ac495dSmrg const uint32_t max_divisor = divisor << divisor_clz;
7936ac495dSmrg const uint32_t max_qbit = 1 << divisor_clz;
8036ac495dSmrg
8136ac495dSmrg uint32_t quotient = 0;
8236ac495dSmrg uint32_t remainder = dividend;
8336ac495dSmrg
8436ac495dSmrg while (remainder >= divisor)
8536ac495dSmrg {
8636ac495dSmrg int shift = __insn_clz (remainder);
8736ac495dSmrg uint32_t scaled_divisor = max_divisor >> shift;
8836ac495dSmrg uint32_t quotient_bit = max_qbit >> shift;
8936ac495dSmrg
9036ac495dSmrg int too_big = (scaled_divisor > remainder);
9136ac495dSmrg scaled_divisor >>= too_big;
9236ac495dSmrg quotient_bit >>= too_big;
9336ac495dSmrg remainder -= scaled_divisor;
9436ac495dSmrg quotient |= quotient_bit;
9536ac495dSmrg }
9636ac495dSmrg return quotient;
9736ac495dSmrg }
9836ac495dSmrg #endif /* !__tilegx__ */
9936ac495dSmrg
10036ac495dSmrg
10136ac495dSmrg /* __udivdi3 - 64 bit integer unsigned divide */
10236ac495dSmrg static inline uint64_t __attribute__ ((always_inline))
__udivdi3_inline(uint64_t dividend,uint64_t divisor)10336ac495dSmrg __udivdi3_inline (uint64_t dividend, uint64_t divisor)
10436ac495dSmrg {
10536ac495dSmrg /* Divide out any power of two factor from dividend and divisor.
10636ac495dSmrg Note that when dividing by zero the divisor will remain zero,
10736ac495dSmrg which is all we need to detect that case below. */
10836ac495dSmrg const int power_of_two_factor = __builtin_ctzll (divisor);
10936ac495dSmrg divisor >>= power_of_two_factor;
11036ac495dSmrg dividend >>= power_of_two_factor;
11136ac495dSmrg
11236ac495dSmrg /* Checks for division by power of two or division by zero. */
11336ac495dSmrg if (divisor <= 1)
11436ac495dSmrg {
11536ac495dSmrg if (divisor == 0)
11636ac495dSmrg {
11736ac495dSmrg raise_intdiv ();
11836ac495dSmrg return 0;
11936ac495dSmrg }
12036ac495dSmrg return dividend;
12136ac495dSmrg }
12236ac495dSmrg
12336ac495dSmrg #ifndef __tilegx__
12436ac495dSmrg if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
12536ac495dSmrg {
12636ac495dSmrg /* Operands both fit in 32 bits, so use faster 32 bit algorithm. */
12736ac495dSmrg return __udivsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
12836ac495dSmrg }
12936ac495dSmrg #endif /* !__tilegx__ */
13036ac495dSmrg
13136ac495dSmrg /* See algorithm description in __udivsi3 */
13236ac495dSmrg
13336ac495dSmrg const int divisor_clz = __builtin_clzll (divisor);
13436ac495dSmrg const uint64_t max_divisor = divisor << divisor_clz;
13536ac495dSmrg const uint64_t max_qbit = 1ULL << divisor_clz;
13636ac495dSmrg
13736ac495dSmrg uint64_t quotient = 0;
13836ac495dSmrg uint64_t remainder = dividend;
13936ac495dSmrg
14036ac495dSmrg while (remainder >= divisor)
14136ac495dSmrg {
14236ac495dSmrg int shift = __builtin_clzll (remainder);
14336ac495dSmrg uint64_t scaled_divisor = max_divisor >> shift;
14436ac495dSmrg uint64_t quotient_bit = max_qbit >> shift;
14536ac495dSmrg
14636ac495dSmrg int too_big = (scaled_divisor > remainder);
14736ac495dSmrg scaled_divisor >>= too_big;
14836ac495dSmrg quotient_bit >>= too_big;
14936ac495dSmrg remainder -= scaled_divisor;
15036ac495dSmrg quotient |= quotient_bit;
15136ac495dSmrg }
15236ac495dSmrg return quotient;
15336ac495dSmrg }
15436ac495dSmrg
15536ac495dSmrg
15636ac495dSmrg #ifndef __tilegx__
15736ac495dSmrg /* __umodsi3 - 32 bit integer unsigned modulo */
15836ac495dSmrg static inline uint32_t __attribute__ ((always_inline))
__umodsi3_inline(uint32_t dividend,uint32_t divisor)15936ac495dSmrg __umodsi3_inline (uint32_t dividend, uint32_t divisor)
16036ac495dSmrg {
16136ac495dSmrg /* Shortcircuit mod by a power of two (and catch mod by zero). */
16236ac495dSmrg const uint32_t mask = divisor - 1;
16336ac495dSmrg if ((divisor & mask) == 0)
16436ac495dSmrg {
16536ac495dSmrg if (divisor == 0)
16636ac495dSmrg {
16736ac495dSmrg raise_intdiv ();
16836ac495dSmrg return 0;
16936ac495dSmrg }
17036ac495dSmrg return dividend & mask;
17136ac495dSmrg }
17236ac495dSmrg
17336ac495dSmrg /* We compute the remainder (a % b) by repeatedly subtracting off
17436ac495dSmrg multiples of b from a until a < b. The key is that subtracting
17536ac495dSmrg off a multiple of b does not affect the result mod b.
17636ac495dSmrg
17736ac495dSmrg To make the algorithm run efficiently, we need to subtract
17836ac495dSmrg off a large multiple of b at each step. We subtract the largest
17936ac495dSmrg (b << N) that is <= a.
18036ac495dSmrg
18136ac495dSmrg Finding N is easy. First, use clz(b) - clz(a) to find the N
18236ac495dSmrg that lines up the high bit of (b << N) with the high bit of a.
18336ac495dSmrg Any larger value of N would definitely make (b << N) > a,
18436ac495dSmrg which is too big.
18536ac495dSmrg
18636ac495dSmrg Then, if (b << N) > a (because it has larger low bits), decrement
18736ac495dSmrg N by one. This adjustment will definitely make (b << N) less
18836ac495dSmrg than a, because a's high bit is now one higher than b's. */
18936ac495dSmrg const uint32_t max_divisor = divisor << __insn_clz (divisor);
19036ac495dSmrg
19136ac495dSmrg uint32_t remainder = dividend;
19236ac495dSmrg while (remainder >= divisor)
19336ac495dSmrg {
19436ac495dSmrg const int shift = __insn_clz (remainder);
19536ac495dSmrg uint32_t scaled_divisor = max_divisor >> shift;
19636ac495dSmrg scaled_divisor >>= (scaled_divisor > remainder);
19736ac495dSmrg remainder -= scaled_divisor;
19836ac495dSmrg }
19936ac495dSmrg
20036ac495dSmrg return remainder;
20136ac495dSmrg }
20236ac495dSmrg #endif /* !__tilegx__ */
20336ac495dSmrg
20436ac495dSmrg
20536ac495dSmrg /* __umoddi3 - 64 bit integer unsigned modulo */
20636ac495dSmrg static inline uint64_t __attribute__ ((always_inline))
__umoddi3_inline(uint64_t dividend,uint64_t divisor)20736ac495dSmrg __umoddi3_inline (uint64_t dividend, uint64_t divisor)
20836ac495dSmrg {
20936ac495dSmrg #ifndef __tilegx__
21036ac495dSmrg if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
21136ac495dSmrg {
21236ac495dSmrg /* Operands both fit in 32 bits, so use faster 32 bit algorithm. */
21336ac495dSmrg return __umodsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
21436ac495dSmrg }
21536ac495dSmrg #endif /* !__tilegx__ */
21636ac495dSmrg
21736ac495dSmrg /* Shortcircuit mod by a power of two (and catch mod by zero). */
21836ac495dSmrg const uint64_t mask = divisor - 1;
21936ac495dSmrg if ((divisor & mask) == 0)
22036ac495dSmrg {
22136ac495dSmrg if (divisor == 0)
22236ac495dSmrg {
22336ac495dSmrg raise_intdiv ();
22436ac495dSmrg return 0;
22536ac495dSmrg }
22636ac495dSmrg return dividend & mask;
22736ac495dSmrg }
22836ac495dSmrg
22936ac495dSmrg /* See algorithm description in __umodsi3 */
23036ac495dSmrg const uint64_t max_divisor = divisor << __builtin_clzll (divisor);
23136ac495dSmrg
23236ac495dSmrg uint64_t remainder = dividend;
23336ac495dSmrg while (remainder >= divisor)
23436ac495dSmrg {
23536ac495dSmrg const int shift = __builtin_clzll (remainder);
23636ac495dSmrg uint64_t scaled_divisor = max_divisor >> shift;
23736ac495dSmrg scaled_divisor >>= (scaled_divisor > remainder);
23836ac495dSmrg remainder -= scaled_divisor;
23936ac495dSmrg }
24036ac495dSmrg
24136ac495dSmrg return remainder;
24236ac495dSmrg }
24336ac495dSmrg
24436ac495dSmrg
24536ac495dSmrg uint32_t __udivsi3 (uint32_t dividend, uint32_t divisor);
24636ac495dSmrg #ifdef L_tile_udivsi3
24736ac495dSmrg uint32_t
__udivsi3(uint32_t dividend,uint32_t divisor)24836ac495dSmrg __udivsi3 (uint32_t dividend, uint32_t divisor)
24936ac495dSmrg {
25036ac495dSmrg #ifndef __tilegx__
25136ac495dSmrg return __udivsi3_inline (dividend, divisor);
25236ac495dSmrg #else /* !__tilegx__ */
25336ac495dSmrg uint64_t n = __udivdi3_inline (((uint64_t) dividend), ((uint64_t) divisor));
25436ac495dSmrg return (uint32_t) n;
25536ac495dSmrg #endif /* !__tilegx__ */
25636ac495dSmrg }
25736ac495dSmrg #endif
25836ac495dSmrg
25936ac495dSmrg #define ABS(x) ((x) >= 0 ? (x) : -(x))
26036ac495dSmrg
26136ac495dSmrg int32_t __divsi3 (int32_t dividend, int32_t divisor);
26236ac495dSmrg #ifdef L_tile_divsi3
26336ac495dSmrg /* __divsi3 - 32 bit integer signed divide */
26436ac495dSmrg int32_t
__divsi3(int32_t dividend,int32_t divisor)26536ac495dSmrg __divsi3 (int32_t dividend, int32_t divisor)
26636ac495dSmrg {
26736ac495dSmrg #ifndef __tilegx__
26836ac495dSmrg uint32_t n = __udivsi3_inline (ABS (dividend), ABS (divisor));
26936ac495dSmrg #else /* !__tilegx__ */
27036ac495dSmrg uint64_t n =
27136ac495dSmrg __udivdi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
27236ac495dSmrg #endif /* !__tilegx__ */
27336ac495dSmrg if ((dividend ^ divisor) < 0)
27436ac495dSmrg n = -n;
27536ac495dSmrg return (int32_t) n;
27636ac495dSmrg }
27736ac495dSmrg #endif
27836ac495dSmrg
27936ac495dSmrg
28036ac495dSmrg uint64_t __udivdi3 (uint64_t dividend, uint64_t divisor);
28136ac495dSmrg #ifdef L_tile_udivdi3
28236ac495dSmrg uint64_t
__udivdi3(uint64_t dividend,uint64_t divisor)28336ac495dSmrg __udivdi3 (uint64_t dividend, uint64_t divisor)
28436ac495dSmrg {
28536ac495dSmrg return __udivdi3_inline (dividend, divisor);
28636ac495dSmrg }
28736ac495dSmrg #endif
28836ac495dSmrg
28936ac495dSmrg /*__divdi3 - 64 bit integer signed divide */
29036ac495dSmrg int64_t __divdi3 (int64_t dividend, int64_t divisor);
29136ac495dSmrg #ifdef L_tile_divdi3
29236ac495dSmrg int64_t
__divdi3(int64_t dividend,int64_t divisor)29336ac495dSmrg __divdi3 (int64_t dividend, int64_t divisor)
29436ac495dSmrg {
29536ac495dSmrg uint64_t n = __udivdi3_inline (ABS (dividend), ABS (divisor));
29636ac495dSmrg if ((dividend ^ divisor) < 0)
29736ac495dSmrg n = -n;
29836ac495dSmrg return (int64_t) n;
29936ac495dSmrg }
30036ac495dSmrg #endif
30136ac495dSmrg
30236ac495dSmrg
30336ac495dSmrg uint32_t __umodsi3 (uint32_t dividend, uint32_t divisor);
30436ac495dSmrg #ifdef L_tile_umodsi3
30536ac495dSmrg uint32_t
__umodsi3(uint32_t dividend,uint32_t divisor)30636ac495dSmrg __umodsi3 (uint32_t dividend, uint32_t divisor)
30736ac495dSmrg {
30836ac495dSmrg #ifndef __tilegx__
30936ac495dSmrg return __umodsi3_inline (dividend, divisor);
31036ac495dSmrg #else /* !__tilegx__ */
31136ac495dSmrg return __umoddi3_inline ((uint64_t) dividend, (uint64_t) divisor);
31236ac495dSmrg #endif /* !__tilegx__ */
31336ac495dSmrg }
31436ac495dSmrg #endif
31536ac495dSmrg
31636ac495dSmrg
31736ac495dSmrg /* __modsi3 - 32 bit integer signed modulo */
31836ac495dSmrg int32_t __modsi3 (int32_t dividend, int32_t divisor);
31936ac495dSmrg #ifdef L_tile_modsi3
32036ac495dSmrg int32_t
__modsi3(int32_t dividend,int32_t divisor)32136ac495dSmrg __modsi3 (int32_t dividend, int32_t divisor)
32236ac495dSmrg {
32336ac495dSmrg #ifndef __tilegx__
32436ac495dSmrg uint32_t remainder = __umodsi3_inline (ABS (dividend), ABS (divisor));
32536ac495dSmrg #else /* !__tilegx__ */
32636ac495dSmrg uint64_t remainder =
32736ac495dSmrg __umoddi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
32836ac495dSmrg #endif /* !__tilegx__ */
32936ac495dSmrg return (int32_t) ((dividend >= 0) ? remainder : -remainder);
33036ac495dSmrg }
33136ac495dSmrg #endif
33236ac495dSmrg
33336ac495dSmrg
33436ac495dSmrg uint64_t __umoddi3 (uint64_t dividend, uint64_t divisor);
33536ac495dSmrg #ifdef L_tile_umoddi3
33636ac495dSmrg uint64_t
__umoddi3(uint64_t dividend,uint64_t divisor)33736ac495dSmrg __umoddi3 (uint64_t dividend, uint64_t divisor)
33836ac495dSmrg {
33936ac495dSmrg return __umoddi3_inline (dividend, divisor);
34036ac495dSmrg }
34136ac495dSmrg #endif
34236ac495dSmrg
34336ac495dSmrg
34436ac495dSmrg /* __moddi3 - 64 bit integer signed modulo */
34536ac495dSmrg int64_t __moddi3 (int64_t dividend, int64_t divisor);
34636ac495dSmrg #ifdef L_tile_moddi3
34736ac495dSmrg int64_t
__moddi3(int64_t dividend,int64_t divisor)34836ac495dSmrg __moddi3 (int64_t dividend, int64_t divisor)
34936ac495dSmrg {
35036ac495dSmrg uint64_t remainder = __umoddi3_inline (ABS (dividend), ABS (divisor));
35136ac495dSmrg return (int64_t) ((dividend >= 0) ? remainder : -remainder);
35236ac495dSmrg }
35336ac495dSmrg #endif
354