xref: /netbsd-src/external/gpl3/gcc.old/dist/libgcc/config/tilepro/softdivide.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
136ac495dSmrg /* Division and remainder routines for Tile.
2*8feb0f0bSmrg    Copyright (C) 2011-2020 Free Software Foundation, Inc.
336ac495dSmrg    Contributed by Walter Lee (walt@tilera.com)
436ac495dSmrg 
536ac495dSmrg    This file is free software; you can redistribute it and/or modify it
636ac495dSmrg    under the terms of the GNU General Public License as published by the
736ac495dSmrg    Free Software Foundation; either version 3, or (at your option) any
836ac495dSmrg    later version.
936ac495dSmrg 
1036ac495dSmrg    This file is distributed in the hope that it will be useful, but
1136ac495dSmrg    WITHOUT ANY WARRANTY; without even the implied warranty of
1236ac495dSmrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1336ac495dSmrg    General Public License for more details.
1436ac495dSmrg 
1536ac495dSmrg    Under Section 7 of GPL version 3, you are granted additional
1636ac495dSmrg    permissions described in the GCC Runtime Library Exception, version
1736ac495dSmrg    3.1, as published by the Free Software Foundation.
1836ac495dSmrg 
1936ac495dSmrg    You should have received a copy of the GNU General Public License and
2036ac495dSmrg    a copy of the GCC Runtime Library Exception along with this program;
2136ac495dSmrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
2236ac495dSmrg    <http://www.gnu.org/licenses/>.  */
2336ac495dSmrg 
2436ac495dSmrg typedef int int32_t;
2536ac495dSmrg typedef unsigned uint32_t;
2636ac495dSmrg typedef long long int64_t;
2736ac495dSmrg typedef unsigned long long uint64_t;
2836ac495dSmrg 
2936ac495dSmrg /* Raise signal 8 (SIGFPE) with code 1 (FPE_INTDIV).  */
3036ac495dSmrg static inline void
raise_intdiv(void)3136ac495dSmrg raise_intdiv (void)
3236ac495dSmrg {
3336ac495dSmrg   asm ("{ raise; moveli zero, 8 + (1 << 6) }");
3436ac495dSmrg }
3536ac495dSmrg 
3636ac495dSmrg 
3736ac495dSmrg #ifndef __tilegx__
3836ac495dSmrg /*__udivsi3 - 32 bit integer unsigned divide  */
3936ac495dSmrg static inline uint32_t __attribute__ ((always_inline))
__udivsi3_inline(uint32_t dividend,uint32_t divisor)4036ac495dSmrg __udivsi3_inline (uint32_t dividend, uint32_t divisor)
4136ac495dSmrg {
4236ac495dSmrg   /* Divide out any power of two factor from dividend and divisor.
4336ac495dSmrg      Note that when dividing by zero the divisor will remain zero,
4436ac495dSmrg      which is all we need to detect that case below.  */
4536ac495dSmrg   const int power_of_two_factor = __insn_ctz (divisor);
4636ac495dSmrg   divisor >>= power_of_two_factor;
4736ac495dSmrg   dividend >>= power_of_two_factor;
4836ac495dSmrg 
4936ac495dSmrg   /* Checks for division by power of two or division by zero.  */
5036ac495dSmrg   if (divisor <= 1)
5136ac495dSmrg     {
5236ac495dSmrg       if (divisor == 0)
5336ac495dSmrg 	{
5436ac495dSmrg 	  raise_intdiv ();
5536ac495dSmrg 	  return 0;
5636ac495dSmrg 	}
5736ac495dSmrg       return dividend;
5836ac495dSmrg     }
5936ac495dSmrg 
6036ac495dSmrg   /* Compute (a / b) by repeatedly finding the largest N
6136ac495dSmrg      such that (b << N) <= a. For each such N, set bit N in the
6236ac495dSmrg      quotient, subtract (b << N) from a, and keep going. Think of this as
6336ac495dSmrg      the reverse of the "shift-and-add" that a multiply does. The values
6436ac495dSmrg      of N are precisely those shift counts.
6536ac495dSmrg 
6636ac495dSmrg      Finding N is easy. First, use clz(b) - clz(a) to find the N
6736ac495dSmrg      that lines up the high bit of (b << N) with the high bit of a.
6836ac495dSmrg      Any larger value of N would definitely make (b << N) > a,
6936ac495dSmrg      which is too big.
7036ac495dSmrg 
7136ac495dSmrg      Then, if (b << N) > a (because it has larger low bits), decrement
7236ac495dSmrg      N by one.  This adjustment will definitely make (b << N) less
7336ac495dSmrg      than a, because a's high bit is now one higher than b's.  */
7436ac495dSmrg 
7536ac495dSmrg   /* Precomputing the max_ values allows us to avoid a subtract
7636ac495dSmrg      in the inner loop and just right shift by clz(remainder).  */
7736ac495dSmrg   const int divisor_clz = __insn_clz (divisor);
7836ac495dSmrg   const uint32_t max_divisor = divisor << divisor_clz;
7936ac495dSmrg   const uint32_t max_qbit = 1 << divisor_clz;
8036ac495dSmrg 
8136ac495dSmrg   uint32_t quotient = 0;
8236ac495dSmrg   uint32_t remainder = dividend;
8336ac495dSmrg 
8436ac495dSmrg   while (remainder >= divisor)
8536ac495dSmrg     {
8636ac495dSmrg       int shift = __insn_clz (remainder);
8736ac495dSmrg       uint32_t scaled_divisor = max_divisor >> shift;
8836ac495dSmrg       uint32_t quotient_bit = max_qbit >> shift;
8936ac495dSmrg 
9036ac495dSmrg       int too_big = (scaled_divisor > remainder);
9136ac495dSmrg       scaled_divisor >>= too_big;
9236ac495dSmrg       quotient_bit >>= too_big;
9336ac495dSmrg       remainder -= scaled_divisor;
9436ac495dSmrg       quotient |= quotient_bit;
9536ac495dSmrg     }
9636ac495dSmrg   return quotient;
9736ac495dSmrg }
9836ac495dSmrg #endif /* !__tilegx__ */
9936ac495dSmrg 
10036ac495dSmrg 
10136ac495dSmrg /* __udivdi3 - 64 bit integer unsigned divide  */
10236ac495dSmrg static inline uint64_t __attribute__ ((always_inline))
__udivdi3_inline(uint64_t dividend,uint64_t divisor)10336ac495dSmrg __udivdi3_inline (uint64_t dividend, uint64_t divisor)
10436ac495dSmrg {
10536ac495dSmrg   /* Divide out any power of two factor from dividend and divisor.
10636ac495dSmrg      Note that when dividing by zero the divisor will remain zero,
10736ac495dSmrg      which is all we need to detect that case below.  */
10836ac495dSmrg   const int power_of_two_factor = __builtin_ctzll (divisor);
10936ac495dSmrg   divisor >>= power_of_two_factor;
11036ac495dSmrg   dividend >>= power_of_two_factor;
11136ac495dSmrg 
11236ac495dSmrg   /* Checks for division by power of two or division by zero.  */
11336ac495dSmrg   if (divisor <= 1)
11436ac495dSmrg     {
11536ac495dSmrg       if (divisor == 0)
11636ac495dSmrg 	{
11736ac495dSmrg 	  raise_intdiv ();
11836ac495dSmrg 	  return 0;
11936ac495dSmrg 	}
12036ac495dSmrg       return dividend;
12136ac495dSmrg     }
12236ac495dSmrg 
12336ac495dSmrg #ifndef __tilegx__
12436ac495dSmrg   if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
12536ac495dSmrg     {
12636ac495dSmrg       /* Operands both fit in 32 bits, so use faster 32 bit algorithm.  */
12736ac495dSmrg       return __udivsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
12836ac495dSmrg     }
12936ac495dSmrg #endif /* !__tilegx__ */
13036ac495dSmrg 
13136ac495dSmrg   /* See algorithm description in __udivsi3  */
13236ac495dSmrg 
13336ac495dSmrg   const int divisor_clz = __builtin_clzll (divisor);
13436ac495dSmrg   const uint64_t max_divisor = divisor << divisor_clz;
13536ac495dSmrg   const uint64_t max_qbit = 1ULL << divisor_clz;
13636ac495dSmrg 
13736ac495dSmrg   uint64_t quotient = 0;
13836ac495dSmrg   uint64_t remainder = dividend;
13936ac495dSmrg 
14036ac495dSmrg   while (remainder >= divisor)
14136ac495dSmrg     {
14236ac495dSmrg       int shift = __builtin_clzll (remainder);
14336ac495dSmrg       uint64_t scaled_divisor = max_divisor >> shift;
14436ac495dSmrg       uint64_t quotient_bit = max_qbit >> shift;
14536ac495dSmrg 
14636ac495dSmrg       int too_big = (scaled_divisor > remainder);
14736ac495dSmrg       scaled_divisor >>= too_big;
14836ac495dSmrg       quotient_bit >>= too_big;
14936ac495dSmrg       remainder -= scaled_divisor;
15036ac495dSmrg       quotient |= quotient_bit;
15136ac495dSmrg     }
15236ac495dSmrg   return quotient;
15336ac495dSmrg }
15436ac495dSmrg 
15536ac495dSmrg 
15636ac495dSmrg #ifndef __tilegx__
15736ac495dSmrg /* __umodsi3 - 32 bit integer unsigned modulo  */
15836ac495dSmrg static inline uint32_t __attribute__ ((always_inline))
__umodsi3_inline(uint32_t dividend,uint32_t divisor)15936ac495dSmrg __umodsi3_inline (uint32_t dividend, uint32_t divisor)
16036ac495dSmrg {
16136ac495dSmrg   /* Shortcircuit mod by a power of two (and catch mod by zero).  */
16236ac495dSmrg   const uint32_t mask = divisor - 1;
16336ac495dSmrg   if ((divisor & mask) == 0)
16436ac495dSmrg     {
16536ac495dSmrg       if (divisor == 0)
16636ac495dSmrg 	{
16736ac495dSmrg 	  raise_intdiv ();
16836ac495dSmrg 	  return 0;
16936ac495dSmrg 	}
17036ac495dSmrg       return dividend & mask;
17136ac495dSmrg     }
17236ac495dSmrg 
17336ac495dSmrg   /* We compute the remainder (a % b) by repeatedly subtracting off
17436ac495dSmrg      multiples of b from a until a < b. The key is that subtracting
17536ac495dSmrg      off a multiple of b does not affect the result mod b.
17636ac495dSmrg 
17736ac495dSmrg      To make the algorithm run efficiently, we need to subtract
17836ac495dSmrg      off a large multiple of b at each step. We subtract the largest
17936ac495dSmrg      (b << N) that is <= a.
18036ac495dSmrg 
18136ac495dSmrg      Finding N is easy. First, use clz(b) - clz(a) to find the N
18236ac495dSmrg      that lines up the high bit of (b << N) with the high bit of a.
18336ac495dSmrg      Any larger value of N would definitely make (b << N) > a,
18436ac495dSmrg      which is too big.
18536ac495dSmrg 
18636ac495dSmrg      Then, if (b << N) > a (because it has larger low bits), decrement
18736ac495dSmrg      N by one.  This adjustment will definitely make (b << N) less
18836ac495dSmrg      than a, because a's high bit is now one higher than b's.  */
18936ac495dSmrg   const uint32_t max_divisor = divisor << __insn_clz (divisor);
19036ac495dSmrg 
19136ac495dSmrg   uint32_t remainder = dividend;
19236ac495dSmrg   while (remainder >= divisor)
19336ac495dSmrg     {
19436ac495dSmrg       const int shift = __insn_clz (remainder);
19536ac495dSmrg       uint32_t scaled_divisor = max_divisor >> shift;
19636ac495dSmrg       scaled_divisor >>= (scaled_divisor > remainder);
19736ac495dSmrg       remainder -= scaled_divisor;
19836ac495dSmrg     }
19936ac495dSmrg 
20036ac495dSmrg   return remainder;
20136ac495dSmrg }
20236ac495dSmrg #endif /* !__tilegx__ */
20336ac495dSmrg 
20436ac495dSmrg 
20536ac495dSmrg /* __umoddi3 - 64 bit integer unsigned modulo  */
20636ac495dSmrg static inline uint64_t __attribute__ ((always_inline))
__umoddi3_inline(uint64_t dividend,uint64_t divisor)20736ac495dSmrg __umoddi3_inline (uint64_t dividend, uint64_t divisor)
20836ac495dSmrg {
20936ac495dSmrg #ifndef __tilegx__
21036ac495dSmrg   if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
21136ac495dSmrg     {
21236ac495dSmrg       /* Operands both fit in 32 bits, so use faster 32 bit algorithm.  */
21336ac495dSmrg       return __umodsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
21436ac495dSmrg     }
21536ac495dSmrg #endif /* !__tilegx__ */
21636ac495dSmrg 
21736ac495dSmrg   /* Shortcircuit mod by a power of two (and catch mod by zero).  */
21836ac495dSmrg   const uint64_t mask = divisor - 1;
21936ac495dSmrg   if ((divisor & mask) == 0)
22036ac495dSmrg     {
22136ac495dSmrg       if (divisor == 0)
22236ac495dSmrg 	{
22336ac495dSmrg 	  raise_intdiv ();
22436ac495dSmrg 	  return 0;
22536ac495dSmrg 	}
22636ac495dSmrg       return dividend & mask;
22736ac495dSmrg     }
22836ac495dSmrg 
22936ac495dSmrg   /* See algorithm description in __umodsi3  */
23036ac495dSmrg   const uint64_t max_divisor = divisor << __builtin_clzll (divisor);
23136ac495dSmrg 
23236ac495dSmrg   uint64_t remainder = dividend;
23336ac495dSmrg   while (remainder >= divisor)
23436ac495dSmrg     {
23536ac495dSmrg       const int shift = __builtin_clzll (remainder);
23636ac495dSmrg       uint64_t scaled_divisor = max_divisor >> shift;
23736ac495dSmrg       scaled_divisor >>= (scaled_divisor > remainder);
23836ac495dSmrg       remainder -= scaled_divisor;
23936ac495dSmrg     }
24036ac495dSmrg 
24136ac495dSmrg   return remainder;
24236ac495dSmrg }
24336ac495dSmrg 
24436ac495dSmrg 
24536ac495dSmrg uint32_t __udivsi3 (uint32_t dividend, uint32_t divisor);
24636ac495dSmrg #ifdef L_tile_udivsi3
24736ac495dSmrg uint32_t
__udivsi3(uint32_t dividend,uint32_t divisor)24836ac495dSmrg __udivsi3 (uint32_t dividend, uint32_t divisor)
24936ac495dSmrg {
25036ac495dSmrg #ifndef __tilegx__
25136ac495dSmrg   return __udivsi3_inline (dividend, divisor);
25236ac495dSmrg #else /* !__tilegx__ */
25336ac495dSmrg   uint64_t n = __udivdi3_inline (((uint64_t) dividend), ((uint64_t) divisor));
25436ac495dSmrg   return (uint32_t) n;
25536ac495dSmrg #endif /* !__tilegx__ */
25636ac495dSmrg }
25736ac495dSmrg #endif
25836ac495dSmrg 
25936ac495dSmrg #define ABS(x) ((x) >= 0 ? (x) : -(x))
26036ac495dSmrg 
26136ac495dSmrg int32_t __divsi3 (int32_t dividend, int32_t divisor);
26236ac495dSmrg #ifdef L_tile_divsi3
26336ac495dSmrg /* __divsi3 - 32 bit integer signed divide  */
26436ac495dSmrg int32_t
__divsi3(int32_t dividend,int32_t divisor)26536ac495dSmrg __divsi3 (int32_t dividend, int32_t divisor)
26636ac495dSmrg {
26736ac495dSmrg #ifndef __tilegx__
26836ac495dSmrg   uint32_t n = __udivsi3_inline (ABS (dividend), ABS (divisor));
26936ac495dSmrg #else /* !__tilegx__ */
27036ac495dSmrg   uint64_t n =
27136ac495dSmrg     __udivdi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
27236ac495dSmrg #endif /* !__tilegx__ */
27336ac495dSmrg   if ((dividend ^ divisor) < 0)
27436ac495dSmrg     n = -n;
27536ac495dSmrg   return (int32_t) n;
27636ac495dSmrg }
27736ac495dSmrg #endif
27836ac495dSmrg 
27936ac495dSmrg 
28036ac495dSmrg uint64_t __udivdi3 (uint64_t dividend, uint64_t divisor);
28136ac495dSmrg #ifdef L_tile_udivdi3
28236ac495dSmrg uint64_t
__udivdi3(uint64_t dividend,uint64_t divisor)28336ac495dSmrg __udivdi3 (uint64_t dividend, uint64_t divisor)
28436ac495dSmrg {
28536ac495dSmrg   return __udivdi3_inline (dividend, divisor);
28636ac495dSmrg }
28736ac495dSmrg #endif
28836ac495dSmrg 
28936ac495dSmrg /*__divdi3 - 64 bit integer signed divide  */
29036ac495dSmrg int64_t __divdi3 (int64_t dividend, int64_t divisor);
29136ac495dSmrg #ifdef L_tile_divdi3
29236ac495dSmrg int64_t
__divdi3(int64_t dividend,int64_t divisor)29336ac495dSmrg __divdi3 (int64_t dividend, int64_t divisor)
29436ac495dSmrg {
29536ac495dSmrg   uint64_t n = __udivdi3_inline (ABS (dividend), ABS (divisor));
29636ac495dSmrg   if ((dividend ^ divisor) < 0)
29736ac495dSmrg     n = -n;
29836ac495dSmrg   return (int64_t) n;
29936ac495dSmrg }
30036ac495dSmrg #endif
30136ac495dSmrg 
30236ac495dSmrg 
30336ac495dSmrg uint32_t __umodsi3 (uint32_t dividend, uint32_t divisor);
30436ac495dSmrg #ifdef L_tile_umodsi3
30536ac495dSmrg uint32_t
__umodsi3(uint32_t dividend,uint32_t divisor)30636ac495dSmrg __umodsi3 (uint32_t dividend, uint32_t divisor)
30736ac495dSmrg {
30836ac495dSmrg #ifndef __tilegx__
30936ac495dSmrg   return __umodsi3_inline (dividend, divisor);
31036ac495dSmrg #else /* !__tilegx__ */
31136ac495dSmrg   return __umoddi3_inline ((uint64_t) dividend, (uint64_t) divisor);
31236ac495dSmrg #endif /* !__tilegx__ */
31336ac495dSmrg }
31436ac495dSmrg #endif
31536ac495dSmrg 
31636ac495dSmrg 
31736ac495dSmrg /* __modsi3 - 32 bit integer signed modulo  */
31836ac495dSmrg int32_t __modsi3 (int32_t dividend, int32_t divisor);
31936ac495dSmrg #ifdef L_tile_modsi3
32036ac495dSmrg int32_t
__modsi3(int32_t dividend,int32_t divisor)32136ac495dSmrg __modsi3 (int32_t dividend, int32_t divisor)
32236ac495dSmrg {
32336ac495dSmrg #ifndef __tilegx__
32436ac495dSmrg   uint32_t remainder = __umodsi3_inline (ABS (dividend), ABS (divisor));
32536ac495dSmrg #else /* !__tilegx__ */
32636ac495dSmrg   uint64_t remainder =
32736ac495dSmrg     __umoddi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
32836ac495dSmrg #endif /* !__tilegx__ */
32936ac495dSmrg   return (int32_t) ((dividend >= 0) ? remainder : -remainder);
33036ac495dSmrg }
33136ac495dSmrg #endif
33236ac495dSmrg 
33336ac495dSmrg 
33436ac495dSmrg uint64_t __umoddi3 (uint64_t dividend, uint64_t divisor);
33536ac495dSmrg #ifdef L_tile_umoddi3
33636ac495dSmrg uint64_t
__umoddi3(uint64_t dividend,uint64_t divisor)33736ac495dSmrg __umoddi3 (uint64_t dividend, uint64_t divisor)
33836ac495dSmrg {
33936ac495dSmrg   return __umoddi3_inline (dividend, divisor);
34036ac495dSmrg }
34136ac495dSmrg #endif
34236ac495dSmrg 
34336ac495dSmrg 
34436ac495dSmrg /* __moddi3 - 64 bit integer signed modulo  */
34536ac495dSmrg int64_t __moddi3 (int64_t dividend, int64_t divisor);
34636ac495dSmrg #ifdef L_tile_moddi3
34736ac495dSmrg int64_t
__moddi3(int64_t dividend,int64_t divisor)34836ac495dSmrg __moddi3 (int64_t dividend, int64_t divisor)
34936ac495dSmrg {
35036ac495dSmrg   uint64_t remainder = __umoddi3_inline (ABS (dividend), ABS (divisor));
35136ac495dSmrg   return (int64_t) ((dividend >= 0) ? remainder : -remainder);
35236ac495dSmrg }
35336ac495dSmrg #endif
354