148fb7bfaSmrg /* Division and remainder routines for Tile.
2*b1e83836Smrg Copyright (C) 2011-2022 Free Software Foundation, Inc.
348fb7bfaSmrg Contributed by Walter Lee (walt@tilera.com)
448fb7bfaSmrg
548fb7bfaSmrg This file is free software; you can redistribute it and/or modify it
648fb7bfaSmrg under the terms of the GNU General Public License as published by the
748fb7bfaSmrg Free Software Foundation; either version 3, or (at your option) any
848fb7bfaSmrg later version.
948fb7bfaSmrg
1048fb7bfaSmrg This file is distributed in the hope that it will be useful, but
1148fb7bfaSmrg WITHOUT ANY WARRANTY; without even the implied warranty of
1248fb7bfaSmrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1348fb7bfaSmrg General Public License for more details.
1448fb7bfaSmrg
1548fb7bfaSmrg Under Section 7 of GPL version 3, you are granted additional
1648fb7bfaSmrg permissions described in the GCC Runtime Library Exception, version
1748fb7bfaSmrg 3.1, as published by the Free Software Foundation.
1848fb7bfaSmrg
1948fb7bfaSmrg You should have received a copy of the GNU General Public License and
2048fb7bfaSmrg a copy of the GCC Runtime Library Exception along with this program;
2148fb7bfaSmrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
2248fb7bfaSmrg <http://www.gnu.org/licenses/>. */
2348fb7bfaSmrg
2448fb7bfaSmrg typedef int int32_t;
2548fb7bfaSmrg typedef unsigned uint32_t;
2648fb7bfaSmrg typedef long long int64_t;
2748fb7bfaSmrg typedef unsigned long long uint64_t;
2848fb7bfaSmrg
2948fb7bfaSmrg /* Raise signal 8 (SIGFPE) with code 1 (FPE_INTDIV). */
3048fb7bfaSmrg static inline void
raise_intdiv(void)3148fb7bfaSmrg raise_intdiv (void)
3248fb7bfaSmrg {
3348fb7bfaSmrg asm ("{ raise; moveli zero, 8 + (1 << 6) }");
3448fb7bfaSmrg }
3548fb7bfaSmrg
3648fb7bfaSmrg
3748fb7bfaSmrg #ifndef __tilegx__
3848fb7bfaSmrg /*__udivsi3 - 32 bit integer unsigned divide */
3948fb7bfaSmrg static inline uint32_t __attribute__ ((always_inline))
__udivsi3_inline(uint32_t dividend,uint32_t divisor)4048fb7bfaSmrg __udivsi3_inline (uint32_t dividend, uint32_t divisor)
4148fb7bfaSmrg {
4248fb7bfaSmrg /* Divide out any power of two factor from dividend and divisor.
4348fb7bfaSmrg Note that when dividing by zero the divisor will remain zero,
4448fb7bfaSmrg which is all we need to detect that case below. */
4548fb7bfaSmrg const int power_of_two_factor = __insn_ctz (divisor);
4648fb7bfaSmrg divisor >>= power_of_two_factor;
4748fb7bfaSmrg dividend >>= power_of_two_factor;
4848fb7bfaSmrg
4948fb7bfaSmrg /* Checks for division by power of two or division by zero. */
5048fb7bfaSmrg if (divisor <= 1)
5148fb7bfaSmrg {
5248fb7bfaSmrg if (divisor == 0)
5348fb7bfaSmrg {
5448fb7bfaSmrg raise_intdiv ();
5548fb7bfaSmrg return 0;
5648fb7bfaSmrg }
5748fb7bfaSmrg return dividend;
5848fb7bfaSmrg }
5948fb7bfaSmrg
6048fb7bfaSmrg /* Compute (a / b) by repeatedly finding the largest N
6148fb7bfaSmrg such that (b << N) <= a. For each such N, set bit N in the
6248fb7bfaSmrg quotient, subtract (b << N) from a, and keep going. Think of this as
6348fb7bfaSmrg the reverse of the "shift-and-add" that a multiply does. The values
6448fb7bfaSmrg of N are precisely those shift counts.
6548fb7bfaSmrg
6648fb7bfaSmrg Finding N is easy. First, use clz(b) - clz(a) to find the N
6748fb7bfaSmrg that lines up the high bit of (b << N) with the high bit of a.
6848fb7bfaSmrg Any larger value of N would definitely make (b << N) > a,
6948fb7bfaSmrg which is too big.
7048fb7bfaSmrg
7148fb7bfaSmrg Then, if (b << N) > a (because it has larger low bits), decrement
7248fb7bfaSmrg N by one. This adjustment will definitely make (b << N) less
7348fb7bfaSmrg than a, because a's high bit is now one higher than b's. */
7448fb7bfaSmrg
7548fb7bfaSmrg /* Precomputing the max_ values allows us to avoid a subtract
7648fb7bfaSmrg in the inner loop and just right shift by clz(remainder). */
7748fb7bfaSmrg const int divisor_clz = __insn_clz (divisor);
7848fb7bfaSmrg const uint32_t max_divisor = divisor << divisor_clz;
7948fb7bfaSmrg const uint32_t max_qbit = 1 << divisor_clz;
8048fb7bfaSmrg
8148fb7bfaSmrg uint32_t quotient = 0;
8248fb7bfaSmrg uint32_t remainder = dividend;
8348fb7bfaSmrg
8448fb7bfaSmrg while (remainder >= divisor)
8548fb7bfaSmrg {
8648fb7bfaSmrg int shift = __insn_clz (remainder);
8748fb7bfaSmrg uint32_t scaled_divisor = max_divisor >> shift;
8848fb7bfaSmrg uint32_t quotient_bit = max_qbit >> shift;
8948fb7bfaSmrg
9048fb7bfaSmrg int too_big = (scaled_divisor > remainder);
9148fb7bfaSmrg scaled_divisor >>= too_big;
9248fb7bfaSmrg quotient_bit >>= too_big;
9348fb7bfaSmrg remainder -= scaled_divisor;
9448fb7bfaSmrg quotient |= quotient_bit;
9548fb7bfaSmrg }
9648fb7bfaSmrg return quotient;
9748fb7bfaSmrg }
9848fb7bfaSmrg #endif /* !__tilegx__ */
9948fb7bfaSmrg
10048fb7bfaSmrg
10148fb7bfaSmrg /* __udivdi3 - 64 bit integer unsigned divide */
10248fb7bfaSmrg static inline uint64_t __attribute__ ((always_inline))
__udivdi3_inline(uint64_t dividend,uint64_t divisor)10348fb7bfaSmrg __udivdi3_inline (uint64_t dividend, uint64_t divisor)
10448fb7bfaSmrg {
10548fb7bfaSmrg /* Divide out any power of two factor from dividend and divisor.
10648fb7bfaSmrg Note that when dividing by zero the divisor will remain zero,
10748fb7bfaSmrg which is all we need to detect that case below. */
10848fb7bfaSmrg const int power_of_two_factor = __builtin_ctzll (divisor);
10948fb7bfaSmrg divisor >>= power_of_two_factor;
11048fb7bfaSmrg dividend >>= power_of_two_factor;
11148fb7bfaSmrg
11248fb7bfaSmrg /* Checks for division by power of two or division by zero. */
11348fb7bfaSmrg if (divisor <= 1)
11448fb7bfaSmrg {
11548fb7bfaSmrg if (divisor == 0)
11648fb7bfaSmrg {
11748fb7bfaSmrg raise_intdiv ();
11848fb7bfaSmrg return 0;
11948fb7bfaSmrg }
12048fb7bfaSmrg return dividend;
12148fb7bfaSmrg }
12248fb7bfaSmrg
12348fb7bfaSmrg #ifndef __tilegx__
12448fb7bfaSmrg if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
12548fb7bfaSmrg {
12648fb7bfaSmrg /* Operands both fit in 32 bits, so use faster 32 bit algorithm. */
12748fb7bfaSmrg return __udivsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
12848fb7bfaSmrg }
12948fb7bfaSmrg #endif /* !__tilegx__ */
13048fb7bfaSmrg
13148fb7bfaSmrg /* See algorithm description in __udivsi3 */
13248fb7bfaSmrg
13348fb7bfaSmrg const int divisor_clz = __builtin_clzll (divisor);
13448fb7bfaSmrg const uint64_t max_divisor = divisor << divisor_clz;
13548fb7bfaSmrg const uint64_t max_qbit = 1ULL << divisor_clz;
13648fb7bfaSmrg
13748fb7bfaSmrg uint64_t quotient = 0;
13848fb7bfaSmrg uint64_t remainder = dividend;
13948fb7bfaSmrg
14048fb7bfaSmrg while (remainder >= divisor)
14148fb7bfaSmrg {
14248fb7bfaSmrg int shift = __builtin_clzll (remainder);
14348fb7bfaSmrg uint64_t scaled_divisor = max_divisor >> shift;
14448fb7bfaSmrg uint64_t quotient_bit = max_qbit >> shift;
14548fb7bfaSmrg
14648fb7bfaSmrg int too_big = (scaled_divisor > remainder);
14748fb7bfaSmrg scaled_divisor >>= too_big;
14848fb7bfaSmrg quotient_bit >>= too_big;
14948fb7bfaSmrg remainder -= scaled_divisor;
15048fb7bfaSmrg quotient |= quotient_bit;
15148fb7bfaSmrg }
15248fb7bfaSmrg return quotient;
15348fb7bfaSmrg }
15448fb7bfaSmrg
15548fb7bfaSmrg
15648fb7bfaSmrg #ifndef __tilegx__
15748fb7bfaSmrg /* __umodsi3 - 32 bit integer unsigned modulo */
15848fb7bfaSmrg static inline uint32_t __attribute__ ((always_inline))
__umodsi3_inline(uint32_t dividend,uint32_t divisor)15948fb7bfaSmrg __umodsi3_inline (uint32_t dividend, uint32_t divisor)
16048fb7bfaSmrg {
16148fb7bfaSmrg /* Shortcircuit mod by a power of two (and catch mod by zero). */
16248fb7bfaSmrg const uint32_t mask = divisor - 1;
16348fb7bfaSmrg if ((divisor & mask) == 0)
16448fb7bfaSmrg {
16548fb7bfaSmrg if (divisor == 0)
16648fb7bfaSmrg {
16748fb7bfaSmrg raise_intdiv ();
16848fb7bfaSmrg return 0;
16948fb7bfaSmrg }
17048fb7bfaSmrg return dividend & mask;
17148fb7bfaSmrg }
17248fb7bfaSmrg
17348fb7bfaSmrg /* We compute the remainder (a % b) by repeatedly subtracting off
17448fb7bfaSmrg multiples of b from a until a < b. The key is that subtracting
17548fb7bfaSmrg off a multiple of b does not affect the result mod b.
17648fb7bfaSmrg
17748fb7bfaSmrg To make the algorithm run efficiently, we need to subtract
17848fb7bfaSmrg off a large multiple of b at each step. We subtract the largest
17948fb7bfaSmrg (b << N) that is <= a.
18048fb7bfaSmrg
18148fb7bfaSmrg Finding N is easy. First, use clz(b) - clz(a) to find the N
18248fb7bfaSmrg that lines up the high bit of (b << N) with the high bit of a.
18348fb7bfaSmrg Any larger value of N would definitely make (b << N) > a,
18448fb7bfaSmrg which is too big.
18548fb7bfaSmrg
18648fb7bfaSmrg Then, if (b << N) > a (because it has larger low bits), decrement
18748fb7bfaSmrg N by one. This adjustment will definitely make (b << N) less
18848fb7bfaSmrg than a, because a's high bit is now one higher than b's. */
18948fb7bfaSmrg const uint32_t max_divisor = divisor << __insn_clz (divisor);
19048fb7bfaSmrg
19148fb7bfaSmrg uint32_t remainder = dividend;
19248fb7bfaSmrg while (remainder >= divisor)
19348fb7bfaSmrg {
19448fb7bfaSmrg const int shift = __insn_clz (remainder);
19548fb7bfaSmrg uint32_t scaled_divisor = max_divisor >> shift;
19648fb7bfaSmrg scaled_divisor >>= (scaled_divisor > remainder);
19748fb7bfaSmrg remainder -= scaled_divisor;
19848fb7bfaSmrg }
19948fb7bfaSmrg
20048fb7bfaSmrg return remainder;
20148fb7bfaSmrg }
20248fb7bfaSmrg #endif /* !__tilegx__ */
20348fb7bfaSmrg
20448fb7bfaSmrg
20548fb7bfaSmrg /* __umoddi3 - 64 bit integer unsigned modulo */
20648fb7bfaSmrg static inline uint64_t __attribute__ ((always_inline))
__umoddi3_inline(uint64_t dividend,uint64_t divisor)20748fb7bfaSmrg __umoddi3_inline (uint64_t dividend, uint64_t divisor)
20848fb7bfaSmrg {
20948fb7bfaSmrg #ifndef __tilegx__
21048fb7bfaSmrg if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
21148fb7bfaSmrg {
21248fb7bfaSmrg /* Operands both fit in 32 bits, so use faster 32 bit algorithm. */
21348fb7bfaSmrg return __umodsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
21448fb7bfaSmrg }
21548fb7bfaSmrg #endif /* !__tilegx__ */
21648fb7bfaSmrg
21748fb7bfaSmrg /* Shortcircuit mod by a power of two (and catch mod by zero). */
21848fb7bfaSmrg const uint64_t mask = divisor - 1;
21948fb7bfaSmrg if ((divisor & mask) == 0)
22048fb7bfaSmrg {
22148fb7bfaSmrg if (divisor == 0)
22248fb7bfaSmrg {
22348fb7bfaSmrg raise_intdiv ();
22448fb7bfaSmrg return 0;
22548fb7bfaSmrg }
22648fb7bfaSmrg return dividend & mask;
22748fb7bfaSmrg }
22848fb7bfaSmrg
22948fb7bfaSmrg /* See algorithm description in __umodsi3 */
23048fb7bfaSmrg const uint64_t max_divisor = divisor << __builtin_clzll (divisor);
23148fb7bfaSmrg
23248fb7bfaSmrg uint64_t remainder = dividend;
23348fb7bfaSmrg while (remainder >= divisor)
23448fb7bfaSmrg {
23548fb7bfaSmrg const int shift = __builtin_clzll (remainder);
23648fb7bfaSmrg uint64_t scaled_divisor = max_divisor >> shift;
23748fb7bfaSmrg scaled_divisor >>= (scaled_divisor > remainder);
23848fb7bfaSmrg remainder -= scaled_divisor;
23948fb7bfaSmrg }
24048fb7bfaSmrg
24148fb7bfaSmrg return remainder;
24248fb7bfaSmrg }
24348fb7bfaSmrg
24448fb7bfaSmrg
24548fb7bfaSmrg uint32_t __udivsi3 (uint32_t dividend, uint32_t divisor);
24648fb7bfaSmrg #ifdef L_tile_udivsi3
24748fb7bfaSmrg uint32_t
__udivsi3(uint32_t dividend,uint32_t divisor)24848fb7bfaSmrg __udivsi3 (uint32_t dividend, uint32_t divisor)
24948fb7bfaSmrg {
25048fb7bfaSmrg #ifndef __tilegx__
25148fb7bfaSmrg return __udivsi3_inline (dividend, divisor);
25248fb7bfaSmrg #else /* !__tilegx__ */
25348fb7bfaSmrg uint64_t n = __udivdi3_inline (((uint64_t) dividend), ((uint64_t) divisor));
25448fb7bfaSmrg return (uint32_t) n;
25548fb7bfaSmrg #endif /* !__tilegx__ */
25648fb7bfaSmrg }
25748fb7bfaSmrg #endif
25848fb7bfaSmrg
25948fb7bfaSmrg #define ABS(x) ((x) >= 0 ? (x) : -(x))
26048fb7bfaSmrg
26148fb7bfaSmrg int32_t __divsi3 (int32_t dividend, int32_t divisor);
26248fb7bfaSmrg #ifdef L_tile_divsi3
26348fb7bfaSmrg /* __divsi3 - 32 bit integer signed divide */
26448fb7bfaSmrg int32_t
__divsi3(int32_t dividend,int32_t divisor)26548fb7bfaSmrg __divsi3 (int32_t dividend, int32_t divisor)
26648fb7bfaSmrg {
26748fb7bfaSmrg #ifndef __tilegx__
26848fb7bfaSmrg uint32_t n = __udivsi3_inline (ABS (dividend), ABS (divisor));
26948fb7bfaSmrg #else /* !__tilegx__ */
27048fb7bfaSmrg uint64_t n =
27148fb7bfaSmrg __udivdi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
27248fb7bfaSmrg #endif /* !__tilegx__ */
27348fb7bfaSmrg if ((dividend ^ divisor) < 0)
27448fb7bfaSmrg n = -n;
27548fb7bfaSmrg return (int32_t) n;
27648fb7bfaSmrg }
27748fb7bfaSmrg #endif
27848fb7bfaSmrg
27948fb7bfaSmrg
28048fb7bfaSmrg uint64_t __udivdi3 (uint64_t dividend, uint64_t divisor);
28148fb7bfaSmrg #ifdef L_tile_udivdi3
28248fb7bfaSmrg uint64_t
__udivdi3(uint64_t dividend,uint64_t divisor)28348fb7bfaSmrg __udivdi3 (uint64_t dividend, uint64_t divisor)
28448fb7bfaSmrg {
28548fb7bfaSmrg return __udivdi3_inline (dividend, divisor);
28648fb7bfaSmrg }
28748fb7bfaSmrg #endif
28848fb7bfaSmrg
28948fb7bfaSmrg /*__divdi3 - 64 bit integer signed divide */
29048fb7bfaSmrg int64_t __divdi3 (int64_t dividend, int64_t divisor);
29148fb7bfaSmrg #ifdef L_tile_divdi3
29248fb7bfaSmrg int64_t
__divdi3(int64_t dividend,int64_t divisor)29348fb7bfaSmrg __divdi3 (int64_t dividend, int64_t divisor)
29448fb7bfaSmrg {
29548fb7bfaSmrg uint64_t n = __udivdi3_inline (ABS (dividend), ABS (divisor));
29648fb7bfaSmrg if ((dividend ^ divisor) < 0)
29748fb7bfaSmrg n = -n;
29848fb7bfaSmrg return (int64_t) n;
29948fb7bfaSmrg }
30048fb7bfaSmrg #endif
30148fb7bfaSmrg
30248fb7bfaSmrg
30348fb7bfaSmrg uint32_t __umodsi3 (uint32_t dividend, uint32_t divisor);
30448fb7bfaSmrg #ifdef L_tile_umodsi3
30548fb7bfaSmrg uint32_t
__umodsi3(uint32_t dividend,uint32_t divisor)30648fb7bfaSmrg __umodsi3 (uint32_t dividend, uint32_t divisor)
30748fb7bfaSmrg {
30848fb7bfaSmrg #ifndef __tilegx__
30948fb7bfaSmrg return __umodsi3_inline (dividend, divisor);
31048fb7bfaSmrg #else /* !__tilegx__ */
31148fb7bfaSmrg return __umoddi3_inline ((uint64_t) dividend, (uint64_t) divisor);
31248fb7bfaSmrg #endif /* !__tilegx__ */
31348fb7bfaSmrg }
31448fb7bfaSmrg #endif
31548fb7bfaSmrg
31648fb7bfaSmrg
31748fb7bfaSmrg /* __modsi3 - 32 bit integer signed modulo */
31848fb7bfaSmrg int32_t __modsi3 (int32_t dividend, int32_t divisor);
31948fb7bfaSmrg #ifdef L_tile_modsi3
32048fb7bfaSmrg int32_t
__modsi3(int32_t dividend,int32_t divisor)32148fb7bfaSmrg __modsi3 (int32_t dividend, int32_t divisor)
32248fb7bfaSmrg {
32348fb7bfaSmrg #ifndef __tilegx__
32448fb7bfaSmrg uint32_t remainder = __umodsi3_inline (ABS (dividend), ABS (divisor));
32548fb7bfaSmrg #else /* !__tilegx__ */
32648fb7bfaSmrg uint64_t remainder =
32748fb7bfaSmrg __umoddi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
32848fb7bfaSmrg #endif /* !__tilegx__ */
32948fb7bfaSmrg return (int32_t) ((dividend >= 0) ? remainder : -remainder);
33048fb7bfaSmrg }
33148fb7bfaSmrg #endif
33248fb7bfaSmrg
33348fb7bfaSmrg
33448fb7bfaSmrg uint64_t __umoddi3 (uint64_t dividend, uint64_t divisor);
33548fb7bfaSmrg #ifdef L_tile_umoddi3
33648fb7bfaSmrg uint64_t
__umoddi3(uint64_t dividend,uint64_t divisor)33748fb7bfaSmrg __umoddi3 (uint64_t dividend, uint64_t divisor)
33848fb7bfaSmrg {
33948fb7bfaSmrg return __umoddi3_inline (dividend, divisor);
34048fb7bfaSmrg }
34148fb7bfaSmrg #endif
34248fb7bfaSmrg
34348fb7bfaSmrg
34448fb7bfaSmrg /* __moddi3 - 64 bit integer signed modulo */
34548fb7bfaSmrg int64_t __moddi3 (int64_t dividend, int64_t divisor);
34648fb7bfaSmrg #ifdef L_tile_moddi3
34748fb7bfaSmrg int64_t
__moddi3(int64_t dividend,int64_t divisor)34848fb7bfaSmrg __moddi3 (int64_t dividend, int64_t divisor)
34948fb7bfaSmrg {
35048fb7bfaSmrg uint64_t remainder = __umoddi3_inline (ABS (dividend), ABS (divisor));
35148fb7bfaSmrg return (int64_t) ((dividend >= 0) ? remainder : -remainder);
35248fb7bfaSmrg }
35348fb7bfaSmrg #endif
354