xref: /dflybsd-src/contrib/gmp/mpn/generic/gcd_1.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_gcd_1 -- mpn and limb greatest common divisor.
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino Copyright 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
486d7f5d3SJohn Marino 
586d7f5d3SJohn Marino This file is part of the GNU MP Library.
686d7f5d3SJohn Marino 
786d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
886d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
986d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1086d7f5d3SJohn Marino option) any later version.
1186d7f5d3SJohn Marino 
1286d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1386d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1486d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1586d7f5d3SJohn Marino License for more details.
1686d7f5d3SJohn Marino 
1786d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
1886d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
1986d7f5d3SJohn Marino 
2086d7f5d3SJohn Marino #include "gmp.h"
2186d7f5d3SJohn Marino #include "gmp-impl.h"
2286d7f5d3SJohn Marino #include "longlong.h"
2386d7f5d3SJohn Marino 
2486d7f5d3SJohn Marino #ifndef GCD_1_METHOD
2586d7f5d3SJohn Marino #define GCD_1_METHOD 2
2686d7f5d3SJohn Marino #endif
2786d7f5d3SJohn Marino 
2886d7f5d3SJohn Marino #define USE_ZEROTAB 0
2986d7f5d3SJohn Marino 
3086d7f5d3SJohn Marino #if USE_ZEROTAB
3186d7f5d3SJohn Marino static const unsigned char zerotab[16] = {
3286d7f5d3SJohn Marino   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
3386d7f5d3SJohn Marino };
3486d7f5d3SJohn Marino #endif
3586d7f5d3SJohn Marino 
3686d7f5d3SJohn Marino /* Does not work for U == 0 or V == 0.  It would be tough to make it work for
3786d7f5d3SJohn Marino    V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.
3886d7f5d3SJohn Marino 
3986d7f5d3SJohn Marino    The threshold for doing u%v when size==1 will vary by CPU according to
4086d7f5d3SJohn Marino    the speed of a division and the code generated for the main loop.  Any
4186d7f5d3SJohn Marino    tuning for this is left to a CPU specific implementation.  */
4286d7f5d3SJohn Marino 
4386d7f5d3SJohn Marino mp_limb_t
mpn_gcd_1(mp_srcptr up,mp_size_t size,mp_limb_t vlimb)4486d7f5d3SJohn Marino mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
4586d7f5d3SJohn Marino {
4686d7f5d3SJohn Marino   mp_limb_t      ulimb;
4786d7f5d3SJohn Marino   unsigned long  zero_bits, u_low_zero_bits;
4886d7f5d3SJohn Marino 
4986d7f5d3SJohn Marino   ASSERT (size >= 1);
5086d7f5d3SJohn Marino   ASSERT (vlimb != 0);
5186d7f5d3SJohn Marino   ASSERT_MPN_NONZERO_P (up, size);
5286d7f5d3SJohn Marino 
5386d7f5d3SJohn Marino   ulimb = up[0];
5486d7f5d3SJohn Marino 
5586d7f5d3SJohn Marino   /* Need vlimb odd for modexact, want it odd to get common zeros. */
5686d7f5d3SJohn Marino   count_trailing_zeros (zero_bits, vlimb);
5786d7f5d3SJohn Marino   vlimb >>= zero_bits;
5886d7f5d3SJohn Marino 
5986d7f5d3SJohn Marino   if (size > 1)
6086d7f5d3SJohn Marino     {
6186d7f5d3SJohn Marino       /* Must get common zeros before the mod reduction.  If ulimb==0 then
6286d7f5d3SJohn Marino 	 vlimb already gives the common zeros.  */
6386d7f5d3SJohn Marino       if (ulimb != 0)
6486d7f5d3SJohn Marino 	{
6586d7f5d3SJohn Marino 	  count_trailing_zeros (u_low_zero_bits, ulimb);
6686d7f5d3SJohn Marino 	  zero_bits = MIN (zero_bits, u_low_zero_bits);
6786d7f5d3SJohn Marino 	}
6886d7f5d3SJohn Marino 
6986d7f5d3SJohn Marino       ulimb = MPN_MOD_OR_MODEXACT_1_ODD (up, size, vlimb);
7086d7f5d3SJohn Marino       if (ulimb == 0)
7186d7f5d3SJohn Marino 	goto done;
7286d7f5d3SJohn Marino 
7386d7f5d3SJohn Marino       goto strip_u_maybe;
7486d7f5d3SJohn Marino     }
7586d7f5d3SJohn Marino 
7686d7f5d3SJohn Marino   /* size==1, so up[0]!=0 */
7786d7f5d3SJohn Marino   count_trailing_zeros (u_low_zero_bits, ulimb);
7886d7f5d3SJohn Marino   ulimb >>= u_low_zero_bits;
7986d7f5d3SJohn Marino   zero_bits = MIN (zero_bits, u_low_zero_bits);
8086d7f5d3SJohn Marino 
8186d7f5d3SJohn Marino   /* make u bigger */
8286d7f5d3SJohn Marino   if (vlimb > ulimb)
8386d7f5d3SJohn Marino     MP_LIMB_T_SWAP (ulimb, vlimb);
8486d7f5d3SJohn Marino 
8586d7f5d3SJohn Marino   /* if u is much bigger than v, reduce using a division rather than
8686d7f5d3SJohn Marino      chipping away at it bit-by-bit */
8786d7f5d3SJohn Marino   if ((ulimb >> 16) > vlimb)
8886d7f5d3SJohn Marino     {
8986d7f5d3SJohn Marino       ulimb %= vlimb;
9086d7f5d3SJohn Marino       if (ulimb == 0)
9186d7f5d3SJohn Marino 	goto done;
9286d7f5d3SJohn Marino       goto strip_u_maybe;
9386d7f5d3SJohn Marino     }
9486d7f5d3SJohn Marino 
9586d7f5d3SJohn Marino   ASSERT (ulimb & 1);
9686d7f5d3SJohn Marino   ASSERT (vlimb & 1);
9786d7f5d3SJohn Marino 
9886d7f5d3SJohn Marino #if GCD_1_METHOD == 1
9986d7f5d3SJohn Marino   while (ulimb != vlimb)
10086d7f5d3SJohn Marino     {
10186d7f5d3SJohn Marino       ASSERT (ulimb & 1);
10286d7f5d3SJohn Marino       ASSERT (vlimb & 1);
10386d7f5d3SJohn Marino 
10486d7f5d3SJohn Marino       if (ulimb > vlimb)
10586d7f5d3SJohn Marino 	{
10686d7f5d3SJohn Marino 	  ulimb -= vlimb;
10786d7f5d3SJohn Marino 	  do
10886d7f5d3SJohn Marino 	    {
10986d7f5d3SJohn Marino 	      ulimb >>= 1;
11086d7f5d3SJohn Marino 	      ASSERT (ulimb != 0);
11186d7f5d3SJohn Marino 	    strip_u_maybe:
11286d7f5d3SJohn Marino 	      ;
11386d7f5d3SJohn Marino 	    }
11486d7f5d3SJohn Marino 	  while ((ulimb & 1) == 0);
11586d7f5d3SJohn Marino 	}
11686d7f5d3SJohn Marino       else /*  vlimb > ulimb.  */
11786d7f5d3SJohn Marino 	{
11886d7f5d3SJohn Marino 	  vlimb -= ulimb;
11986d7f5d3SJohn Marino 	  do
12086d7f5d3SJohn Marino 	    {
12186d7f5d3SJohn Marino 	      vlimb >>= 1;
12286d7f5d3SJohn Marino 	      ASSERT (vlimb != 0);
12386d7f5d3SJohn Marino 	    }
12486d7f5d3SJohn Marino 	  while ((vlimb & 1) == 0);
12586d7f5d3SJohn Marino 	}
12686d7f5d3SJohn Marino     }
12786d7f5d3SJohn Marino #else
12886d7f5d3SJohn Marino # if GCD_1_METHOD  == 2
12986d7f5d3SJohn Marino 
13086d7f5d3SJohn Marino   ulimb >>= 1;
13186d7f5d3SJohn Marino   vlimb >>= 1;
13286d7f5d3SJohn Marino 
13386d7f5d3SJohn Marino   while (ulimb != vlimb)
13486d7f5d3SJohn Marino     {
13586d7f5d3SJohn Marino       int c;
13686d7f5d3SJohn Marino       mp_limb_t t = ulimb - vlimb;
13786d7f5d3SJohn Marino       mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (t);
13886d7f5d3SJohn Marino 
13986d7f5d3SJohn Marino       /* v <-- min (u, v) */
14086d7f5d3SJohn Marino       vlimb += (vgtu & t);
14186d7f5d3SJohn Marino 
14286d7f5d3SJohn Marino       /* u <-- |u - v| */
14386d7f5d3SJohn Marino       ulimb = (t ^ vgtu) - vgtu;
14486d7f5d3SJohn Marino 
14586d7f5d3SJohn Marino #if USE_ZEROTAB
14686d7f5d3SJohn Marino       /* Number of trailing zeros is the same no matter if we look at
14786d7f5d3SJohn Marino        * t or ulimb, but using t gives more parallelism. */
14886d7f5d3SJohn Marino       c = zerotab[t & 15];
14986d7f5d3SJohn Marino 
15086d7f5d3SJohn Marino       while (UNLIKELY (c == 4))
15186d7f5d3SJohn Marino 	{
15286d7f5d3SJohn Marino 	  ulimb >>= 4;
15386d7f5d3SJohn Marino 	  if (0)
15486d7f5d3SJohn Marino 	  strip_u_maybe:
15586d7f5d3SJohn Marino 	    vlimb >>= 1;
15686d7f5d3SJohn Marino 
15786d7f5d3SJohn Marino 	  c = zerotab[ulimb & 15];
15886d7f5d3SJohn Marino 	}
15986d7f5d3SJohn Marino #else
16086d7f5d3SJohn Marino       if (0)
16186d7f5d3SJohn Marino 	{
16286d7f5d3SJohn Marino 	strip_u_maybe:
16386d7f5d3SJohn Marino 	  vlimb >>= 1;
16486d7f5d3SJohn Marino 	  t = ulimb;
16586d7f5d3SJohn Marino 	}
16686d7f5d3SJohn Marino       count_trailing_zeros (c, t);
16786d7f5d3SJohn Marino #endif
16886d7f5d3SJohn Marino       ulimb >>= (c + 1);
16986d7f5d3SJohn Marino     }
17086d7f5d3SJohn Marino 
17186d7f5d3SJohn Marino   vlimb = (vlimb << 1) | 1;
17286d7f5d3SJohn Marino # else
17386d7f5d3SJohn Marino #  error Unknown GCD_1_METHOD
17486d7f5d3SJohn Marino # endif
17586d7f5d3SJohn Marino #endif
17686d7f5d3SJohn Marino 
17786d7f5d3SJohn Marino  done:
17886d7f5d3SJohn Marino   return vlimb << zero_bits;
17986d7f5d3SJohn Marino }
180