xref: /netbsd-src/external/gpl3/gcc/dist/libgcc/config/tilepro/softdivide.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
148fb7bfaSmrg /* Division and remainder routines for Tile.
2*b1e83836Smrg    Copyright (C) 2011-2022 Free Software Foundation, Inc.
348fb7bfaSmrg    Contributed by Walter Lee (walt@tilera.com)
448fb7bfaSmrg 
548fb7bfaSmrg    This file is free software; you can redistribute it and/or modify it
648fb7bfaSmrg    under the terms of the GNU General Public License as published by the
748fb7bfaSmrg    Free Software Foundation; either version 3, or (at your option) any
848fb7bfaSmrg    later version.
948fb7bfaSmrg 
1048fb7bfaSmrg    This file is distributed in the hope that it will be useful, but
1148fb7bfaSmrg    WITHOUT ANY WARRANTY; without even the implied warranty of
1248fb7bfaSmrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1348fb7bfaSmrg    General Public License for more details.
1448fb7bfaSmrg 
1548fb7bfaSmrg    Under Section 7 of GPL version 3, you are granted additional
1648fb7bfaSmrg    permissions described in the GCC Runtime Library Exception, version
1748fb7bfaSmrg    3.1, as published by the Free Software Foundation.
1848fb7bfaSmrg 
1948fb7bfaSmrg    You should have received a copy of the GNU General Public License and
2048fb7bfaSmrg    a copy of the GCC Runtime Library Exception along with this program;
2148fb7bfaSmrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
2248fb7bfaSmrg    <http://www.gnu.org/licenses/>.  */
2348fb7bfaSmrg 
2448fb7bfaSmrg typedef int int32_t;
2548fb7bfaSmrg typedef unsigned uint32_t;
2648fb7bfaSmrg typedef long long int64_t;
2748fb7bfaSmrg typedef unsigned long long uint64_t;
2848fb7bfaSmrg 
2948fb7bfaSmrg /* Raise signal 8 (SIGFPE) with code 1 (FPE_INTDIV).  */
3048fb7bfaSmrg static inline void
raise_intdiv(void)3148fb7bfaSmrg raise_intdiv (void)
3248fb7bfaSmrg {
3348fb7bfaSmrg   asm ("{ raise; moveli zero, 8 + (1 << 6) }");
3448fb7bfaSmrg }
3548fb7bfaSmrg 
3648fb7bfaSmrg 
3748fb7bfaSmrg #ifndef __tilegx__
3848fb7bfaSmrg /*__udivsi3 - 32 bit integer unsigned divide  */
3948fb7bfaSmrg static inline uint32_t __attribute__ ((always_inline))
__udivsi3_inline(uint32_t dividend,uint32_t divisor)4048fb7bfaSmrg __udivsi3_inline (uint32_t dividend, uint32_t divisor)
4148fb7bfaSmrg {
4248fb7bfaSmrg   /* Divide out any power of two factor from dividend and divisor.
4348fb7bfaSmrg      Note that when dividing by zero the divisor will remain zero,
4448fb7bfaSmrg      which is all we need to detect that case below.  */
4548fb7bfaSmrg   const int power_of_two_factor = __insn_ctz (divisor);
4648fb7bfaSmrg   divisor >>= power_of_two_factor;
4748fb7bfaSmrg   dividend >>= power_of_two_factor;
4848fb7bfaSmrg 
4948fb7bfaSmrg   /* Checks for division by power of two or division by zero.  */
5048fb7bfaSmrg   if (divisor <= 1)
5148fb7bfaSmrg     {
5248fb7bfaSmrg       if (divisor == 0)
5348fb7bfaSmrg 	{
5448fb7bfaSmrg 	  raise_intdiv ();
5548fb7bfaSmrg 	  return 0;
5648fb7bfaSmrg 	}
5748fb7bfaSmrg       return dividend;
5848fb7bfaSmrg     }
5948fb7bfaSmrg 
6048fb7bfaSmrg   /* Compute (a / b) by repeatedly finding the largest N
6148fb7bfaSmrg      such that (b << N) <= a. For each such N, set bit N in the
6248fb7bfaSmrg      quotient, subtract (b << N) from a, and keep going. Think of this as
6348fb7bfaSmrg      the reverse of the "shift-and-add" that a multiply does. The values
6448fb7bfaSmrg      of N are precisely those shift counts.
6548fb7bfaSmrg 
6648fb7bfaSmrg      Finding N is easy. First, use clz(b) - clz(a) to find the N
6748fb7bfaSmrg      that lines up the high bit of (b << N) with the high bit of a.
6848fb7bfaSmrg      Any larger value of N would definitely make (b << N) > a,
6948fb7bfaSmrg      which is too big.
7048fb7bfaSmrg 
7148fb7bfaSmrg      Then, if (b << N) > a (because it has larger low bits), decrement
7248fb7bfaSmrg      N by one.  This adjustment will definitely make (b << N) less
7348fb7bfaSmrg      than a, because a's high bit is now one higher than b's.  */
7448fb7bfaSmrg 
7548fb7bfaSmrg   /* Precomputing the max_ values allows us to avoid a subtract
7648fb7bfaSmrg      in the inner loop and just right shift by clz(remainder).  */
7748fb7bfaSmrg   const int divisor_clz = __insn_clz (divisor);
7848fb7bfaSmrg   const uint32_t max_divisor = divisor << divisor_clz;
7948fb7bfaSmrg   const uint32_t max_qbit = 1 << divisor_clz;
8048fb7bfaSmrg 
8148fb7bfaSmrg   uint32_t quotient = 0;
8248fb7bfaSmrg   uint32_t remainder = dividend;
8348fb7bfaSmrg 
8448fb7bfaSmrg   while (remainder >= divisor)
8548fb7bfaSmrg     {
8648fb7bfaSmrg       int shift = __insn_clz (remainder);
8748fb7bfaSmrg       uint32_t scaled_divisor = max_divisor >> shift;
8848fb7bfaSmrg       uint32_t quotient_bit = max_qbit >> shift;
8948fb7bfaSmrg 
9048fb7bfaSmrg       int too_big = (scaled_divisor > remainder);
9148fb7bfaSmrg       scaled_divisor >>= too_big;
9248fb7bfaSmrg       quotient_bit >>= too_big;
9348fb7bfaSmrg       remainder -= scaled_divisor;
9448fb7bfaSmrg       quotient |= quotient_bit;
9548fb7bfaSmrg     }
9648fb7bfaSmrg   return quotient;
9748fb7bfaSmrg }
9848fb7bfaSmrg #endif /* !__tilegx__ */
9948fb7bfaSmrg 
10048fb7bfaSmrg 
10148fb7bfaSmrg /* __udivdi3 - 64 bit integer unsigned divide  */
10248fb7bfaSmrg static inline uint64_t __attribute__ ((always_inline))
__udivdi3_inline(uint64_t dividend,uint64_t divisor)10348fb7bfaSmrg __udivdi3_inline (uint64_t dividend, uint64_t divisor)
10448fb7bfaSmrg {
10548fb7bfaSmrg   /* Divide out any power of two factor from dividend and divisor.
10648fb7bfaSmrg      Note that when dividing by zero the divisor will remain zero,
10748fb7bfaSmrg      which is all we need to detect that case below.  */
10848fb7bfaSmrg   const int power_of_two_factor = __builtin_ctzll (divisor);
10948fb7bfaSmrg   divisor >>= power_of_two_factor;
11048fb7bfaSmrg   dividend >>= power_of_two_factor;
11148fb7bfaSmrg 
11248fb7bfaSmrg   /* Checks for division by power of two or division by zero.  */
11348fb7bfaSmrg   if (divisor <= 1)
11448fb7bfaSmrg     {
11548fb7bfaSmrg       if (divisor == 0)
11648fb7bfaSmrg 	{
11748fb7bfaSmrg 	  raise_intdiv ();
11848fb7bfaSmrg 	  return 0;
11948fb7bfaSmrg 	}
12048fb7bfaSmrg       return dividend;
12148fb7bfaSmrg     }
12248fb7bfaSmrg 
12348fb7bfaSmrg #ifndef __tilegx__
12448fb7bfaSmrg   if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
12548fb7bfaSmrg     {
12648fb7bfaSmrg       /* Operands both fit in 32 bits, so use faster 32 bit algorithm.  */
12748fb7bfaSmrg       return __udivsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
12848fb7bfaSmrg     }
12948fb7bfaSmrg #endif /* !__tilegx__ */
13048fb7bfaSmrg 
13148fb7bfaSmrg   /* See algorithm description in __udivsi3  */
13248fb7bfaSmrg 
13348fb7bfaSmrg   const int divisor_clz = __builtin_clzll (divisor);
13448fb7bfaSmrg   const uint64_t max_divisor = divisor << divisor_clz;
13548fb7bfaSmrg   const uint64_t max_qbit = 1ULL << divisor_clz;
13648fb7bfaSmrg 
13748fb7bfaSmrg   uint64_t quotient = 0;
13848fb7bfaSmrg   uint64_t remainder = dividend;
13948fb7bfaSmrg 
14048fb7bfaSmrg   while (remainder >= divisor)
14148fb7bfaSmrg     {
14248fb7bfaSmrg       int shift = __builtin_clzll (remainder);
14348fb7bfaSmrg       uint64_t scaled_divisor = max_divisor >> shift;
14448fb7bfaSmrg       uint64_t quotient_bit = max_qbit >> shift;
14548fb7bfaSmrg 
14648fb7bfaSmrg       int too_big = (scaled_divisor > remainder);
14748fb7bfaSmrg       scaled_divisor >>= too_big;
14848fb7bfaSmrg       quotient_bit >>= too_big;
14948fb7bfaSmrg       remainder -= scaled_divisor;
15048fb7bfaSmrg       quotient |= quotient_bit;
15148fb7bfaSmrg     }
15248fb7bfaSmrg   return quotient;
15348fb7bfaSmrg }
15448fb7bfaSmrg 
15548fb7bfaSmrg 
15648fb7bfaSmrg #ifndef __tilegx__
15748fb7bfaSmrg /* __umodsi3 - 32 bit integer unsigned modulo  */
15848fb7bfaSmrg static inline uint32_t __attribute__ ((always_inline))
__umodsi3_inline(uint32_t dividend,uint32_t divisor)15948fb7bfaSmrg __umodsi3_inline (uint32_t dividend, uint32_t divisor)
16048fb7bfaSmrg {
16148fb7bfaSmrg   /* Shortcircuit mod by a power of two (and catch mod by zero).  */
16248fb7bfaSmrg   const uint32_t mask = divisor - 1;
16348fb7bfaSmrg   if ((divisor & mask) == 0)
16448fb7bfaSmrg     {
16548fb7bfaSmrg       if (divisor == 0)
16648fb7bfaSmrg 	{
16748fb7bfaSmrg 	  raise_intdiv ();
16848fb7bfaSmrg 	  return 0;
16948fb7bfaSmrg 	}
17048fb7bfaSmrg       return dividend & mask;
17148fb7bfaSmrg     }
17248fb7bfaSmrg 
17348fb7bfaSmrg   /* We compute the remainder (a % b) by repeatedly subtracting off
17448fb7bfaSmrg      multiples of b from a until a < b. The key is that subtracting
17548fb7bfaSmrg      off a multiple of b does not affect the result mod b.
17648fb7bfaSmrg 
17748fb7bfaSmrg      To make the algorithm run efficiently, we need to subtract
17848fb7bfaSmrg      off a large multiple of b at each step. We subtract the largest
17948fb7bfaSmrg      (b << N) that is <= a.
18048fb7bfaSmrg 
18148fb7bfaSmrg      Finding N is easy. First, use clz(b) - clz(a) to find the N
18248fb7bfaSmrg      that lines up the high bit of (b << N) with the high bit of a.
18348fb7bfaSmrg      Any larger value of N would definitely make (b << N) > a,
18448fb7bfaSmrg      which is too big.
18548fb7bfaSmrg 
18648fb7bfaSmrg      Then, if (b << N) > a (because it has larger low bits), decrement
18748fb7bfaSmrg      N by one.  This adjustment will definitely make (b << N) less
18848fb7bfaSmrg      than a, because a's high bit is now one higher than b's.  */
18948fb7bfaSmrg   const uint32_t max_divisor = divisor << __insn_clz (divisor);
19048fb7bfaSmrg 
19148fb7bfaSmrg   uint32_t remainder = dividend;
19248fb7bfaSmrg   while (remainder >= divisor)
19348fb7bfaSmrg     {
19448fb7bfaSmrg       const int shift = __insn_clz (remainder);
19548fb7bfaSmrg       uint32_t scaled_divisor = max_divisor >> shift;
19648fb7bfaSmrg       scaled_divisor >>= (scaled_divisor > remainder);
19748fb7bfaSmrg       remainder -= scaled_divisor;
19848fb7bfaSmrg     }
19948fb7bfaSmrg 
20048fb7bfaSmrg   return remainder;
20148fb7bfaSmrg }
20248fb7bfaSmrg #endif /* !__tilegx__ */
20348fb7bfaSmrg 
20448fb7bfaSmrg 
20548fb7bfaSmrg /* __umoddi3 - 64 bit integer unsigned modulo  */
20648fb7bfaSmrg static inline uint64_t __attribute__ ((always_inline))
__umoddi3_inline(uint64_t dividend,uint64_t divisor)20748fb7bfaSmrg __umoddi3_inline (uint64_t dividend, uint64_t divisor)
20848fb7bfaSmrg {
20948fb7bfaSmrg #ifndef __tilegx__
21048fb7bfaSmrg   if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
21148fb7bfaSmrg     {
21248fb7bfaSmrg       /* Operands both fit in 32 bits, so use faster 32 bit algorithm.  */
21348fb7bfaSmrg       return __umodsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
21448fb7bfaSmrg     }
21548fb7bfaSmrg #endif /* !__tilegx__ */
21648fb7bfaSmrg 
21748fb7bfaSmrg   /* Shortcircuit mod by a power of two (and catch mod by zero).  */
21848fb7bfaSmrg   const uint64_t mask = divisor - 1;
21948fb7bfaSmrg   if ((divisor & mask) == 0)
22048fb7bfaSmrg     {
22148fb7bfaSmrg       if (divisor == 0)
22248fb7bfaSmrg 	{
22348fb7bfaSmrg 	  raise_intdiv ();
22448fb7bfaSmrg 	  return 0;
22548fb7bfaSmrg 	}
22648fb7bfaSmrg       return dividend & mask;
22748fb7bfaSmrg     }
22848fb7bfaSmrg 
22948fb7bfaSmrg   /* See algorithm description in __umodsi3  */
23048fb7bfaSmrg   const uint64_t max_divisor = divisor << __builtin_clzll (divisor);
23148fb7bfaSmrg 
23248fb7bfaSmrg   uint64_t remainder = dividend;
23348fb7bfaSmrg   while (remainder >= divisor)
23448fb7bfaSmrg     {
23548fb7bfaSmrg       const int shift = __builtin_clzll (remainder);
23648fb7bfaSmrg       uint64_t scaled_divisor = max_divisor >> shift;
23748fb7bfaSmrg       scaled_divisor >>= (scaled_divisor > remainder);
23848fb7bfaSmrg       remainder -= scaled_divisor;
23948fb7bfaSmrg     }
24048fb7bfaSmrg 
24148fb7bfaSmrg   return remainder;
24248fb7bfaSmrg }
24348fb7bfaSmrg 
24448fb7bfaSmrg 
24548fb7bfaSmrg uint32_t __udivsi3 (uint32_t dividend, uint32_t divisor);
24648fb7bfaSmrg #ifdef L_tile_udivsi3
24748fb7bfaSmrg uint32_t
__udivsi3(uint32_t dividend,uint32_t divisor)24848fb7bfaSmrg __udivsi3 (uint32_t dividend, uint32_t divisor)
24948fb7bfaSmrg {
25048fb7bfaSmrg #ifndef __tilegx__
25148fb7bfaSmrg   return __udivsi3_inline (dividend, divisor);
25248fb7bfaSmrg #else /* !__tilegx__ */
25348fb7bfaSmrg   uint64_t n = __udivdi3_inline (((uint64_t) dividend), ((uint64_t) divisor));
25448fb7bfaSmrg   return (uint32_t) n;
25548fb7bfaSmrg #endif /* !__tilegx__ */
25648fb7bfaSmrg }
25748fb7bfaSmrg #endif
25848fb7bfaSmrg 
25948fb7bfaSmrg #define ABS(x) ((x) >= 0 ? (x) : -(x))
26048fb7bfaSmrg 
26148fb7bfaSmrg int32_t __divsi3 (int32_t dividend, int32_t divisor);
26248fb7bfaSmrg #ifdef L_tile_divsi3
26348fb7bfaSmrg /* __divsi3 - 32 bit integer signed divide  */
26448fb7bfaSmrg int32_t
__divsi3(int32_t dividend,int32_t divisor)26548fb7bfaSmrg __divsi3 (int32_t dividend, int32_t divisor)
26648fb7bfaSmrg {
26748fb7bfaSmrg #ifndef __tilegx__
26848fb7bfaSmrg   uint32_t n = __udivsi3_inline (ABS (dividend), ABS (divisor));
26948fb7bfaSmrg #else /* !__tilegx__ */
27048fb7bfaSmrg   uint64_t n =
27148fb7bfaSmrg     __udivdi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
27248fb7bfaSmrg #endif /* !__tilegx__ */
27348fb7bfaSmrg   if ((dividend ^ divisor) < 0)
27448fb7bfaSmrg     n = -n;
27548fb7bfaSmrg   return (int32_t) n;
27648fb7bfaSmrg }
27748fb7bfaSmrg #endif
27848fb7bfaSmrg 
27948fb7bfaSmrg 
28048fb7bfaSmrg uint64_t __udivdi3 (uint64_t dividend, uint64_t divisor);
28148fb7bfaSmrg #ifdef L_tile_udivdi3
28248fb7bfaSmrg uint64_t
__udivdi3(uint64_t dividend,uint64_t divisor)28348fb7bfaSmrg __udivdi3 (uint64_t dividend, uint64_t divisor)
28448fb7bfaSmrg {
28548fb7bfaSmrg   return __udivdi3_inline (dividend, divisor);
28648fb7bfaSmrg }
28748fb7bfaSmrg #endif
28848fb7bfaSmrg 
28948fb7bfaSmrg /*__divdi3 - 64 bit integer signed divide  */
29048fb7bfaSmrg int64_t __divdi3 (int64_t dividend, int64_t divisor);
29148fb7bfaSmrg #ifdef L_tile_divdi3
29248fb7bfaSmrg int64_t
__divdi3(int64_t dividend,int64_t divisor)29348fb7bfaSmrg __divdi3 (int64_t dividend, int64_t divisor)
29448fb7bfaSmrg {
29548fb7bfaSmrg   uint64_t n = __udivdi3_inline (ABS (dividend), ABS (divisor));
29648fb7bfaSmrg   if ((dividend ^ divisor) < 0)
29748fb7bfaSmrg     n = -n;
29848fb7bfaSmrg   return (int64_t) n;
29948fb7bfaSmrg }
30048fb7bfaSmrg #endif
30148fb7bfaSmrg 
30248fb7bfaSmrg 
30348fb7bfaSmrg uint32_t __umodsi3 (uint32_t dividend, uint32_t divisor);
30448fb7bfaSmrg #ifdef L_tile_umodsi3
30548fb7bfaSmrg uint32_t
__umodsi3(uint32_t dividend,uint32_t divisor)30648fb7bfaSmrg __umodsi3 (uint32_t dividend, uint32_t divisor)
30748fb7bfaSmrg {
30848fb7bfaSmrg #ifndef __tilegx__
30948fb7bfaSmrg   return __umodsi3_inline (dividend, divisor);
31048fb7bfaSmrg #else /* !__tilegx__ */
31148fb7bfaSmrg   return __umoddi3_inline ((uint64_t) dividend, (uint64_t) divisor);
31248fb7bfaSmrg #endif /* !__tilegx__ */
31348fb7bfaSmrg }
31448fb7bfaSmrg #endif
31548fb7bfaSmrg 
31648fb7bfaSmrg 
31748fb7bfaSmrg /* __modsi3 - 32 bit integer signed modulo  */
31848fb7bfaSmrg int32_t __modsi3 (int32_t dividend, int32_t divisor);
31948fb7bfaSmrg #ifdef L_tile_modsi3
32048fb7bfaSmrg int32_t
__modsi3(int32_t dividend,int32_t divisor)32148fb7bfaSmrg __modsi3 (int32_t dividend, int32_t divisor)
32248fb7bfaSmrg {
32348fb7bfaSmrg #ifndef __tilegx__
32448fb7bfaSmrg   uint32_t remainder = __umodsi3_inline (ABS (dividend), ABS (divisor));
32548fb7bfaSmrg #else /* !__tilegx__ */
32648fb7bfaSmrg   uint64_t remainder =
32748fb7bfaSmrg     __umoddi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
32848fb7bfaSmrg #endif /* !__tilegx__ */
32948fb7bfaSmrg   return (int32_t) ((dividend >= 0) ? remainder : -remainder);
33048fb7bfaSmrg }
33148fb7bfaSmrg #endif
33248fb7bfaSmrg 
33348fb7bfaSmrg 
33448fb7bfaSmrg uint64_t __umoddi3 (uint64_t dividend, uint64_t divisor);
33548fb7bfaSmrg #ifdef L_tile_umoddi3
33648fb7bfaSmrg uint64_t
__umoddi3(uint64_t dividend,uint64_t divisor)33748fb7bfaSmrg __umoddi3 (uint64_t dividend, uint64_t divisor)
33848fb7bfaSmrg {
33948fb7bfaSmrg   return __umoddi3_inline (dividend, divisor);
34048fb7bfaSmrg }
34148fb7bfaSmrg #endif
34248fb7bfaSmrg 
34348fb7bfaSmrg 
34448fb7bfaSmrg /* __moddi3 - 64 bit integer signed modulo  */
34548fb7bfaSmrg int64_t __moddi3 (int64_t dividend, int64_t divisor);
34648fb7bfaSmrg #ifdef L_tile_moddi3
34748fb7bfaSmrg int64_t
__moddi3(int64_t dividend,int64_t divisor)34848fb7bfaSmrg __moddi3 (int64_t dividend, int64_t divisor)
34948fb7bfaSmrg {
35048fb7bfaSmrg   uint64_t remainder = __umoddi3_inline (ABS (dividend), ABS (divisor));
35148fb7bfaSmrg   return (int64_t) ((dividend >= 0) ? remainder : -remainder);
35248fb7bfaSmrg }
35348fb7bfaSmrg #endif
354