xref: /dflybsd-src/contrib/gmp/mpn/generic/gcdext_1.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_gcdext -- Extended Greatest Common Divisor.
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
486d7f5d3SJohn Marino Foundation, Inc.
586d7f5d3SJohn Marino 
686d7f5d3SJohn Marino This file is part of the GNU MP Library.
786d7f5d3SJohn Marino 
886d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
986d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1086d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1186d7f5d3SJohn Marino option) any later version.
1286d7f5d3SJohn Marino 
1386d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1486d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1586d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1686d7f5d3SJohn Marino License for more details.
1786d7f5d3SJohn Marino 
1886d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
1986d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2086d7f5d3SJohn Marino 
2186d7f5d3SJohn Marino #include "gmp.h"
2286d7f5d3SJohn Marino #include "gmp-impl.h"
2386d7f5d3SJohn Marino #include "longlong.h"
2486d7f5d3SJohn Marino 
2586d7f5d3SJohn Marino #ifndef GCDEXT_1_USE_BINARY
2686d7f5d3SJohn Marino #define GCDEXT_1_USE_BINARY 0
2786d7f5d3SJohn Marino #endif
2886d7f5d3SJohn Marino 
2986d7f5d3SJohn Marino #ifndef GCDEXT_1_BINARY_METHOD
3086d7f5d3SJohn Marino #define GCDEXT_1_BINARY_METHOD 2
3186d7f5d3SJohn Marino #endif
3286d7f5d3SJohn Marino 
3386d7f5d3SJohn Marino #ifndef USE_ZEROTAB
3486d7f5d3SJohn Marino #define USE_ZEROTAB 1
3586d7f5d3SJohn Marino #endif
3686d7f5d3SJohn Marino 
3786d7f5d3SJohn Marino #if GCDEXT_1_USE_BINARY
3886d7f5d3SJohn Marino 
3986d7f5d3SJohn Marino #if USE_ZEROTAB
4086d7f5d3SJohn Marino static unsigned char zerotab[0x40] = {
4186d7f5d3SJohn Marino   6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4286d7f5d3SJohn Marino   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4386d7f5d3SJohn Marino   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4486d7f5d3SJohn Marino   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4586d7f5d3SJohn Marino };
4686d7f5d3SJohn Marino #endif
4786d7f5d3SJohn Marino 
4886d7f5d3SJohn Marino mp_limb_t
mpn_gcdext_1(mp_limb_signed_t * sp,mp_limb_signed_t * tp,mp_limb_t u,mp_limb_t v)4986d7f5d3SJohn Marino mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
5086d7f5d3SJohn Marino 	      mp_limb_t u, mp_limb_t v)
5186d7f5d3SJohn Marino {
5286d7f5d3SJohn Marino   /* Maintain
5386d7f5d3SJohn Marino 
5486d7f5d3SJohn Marino      U = t1 u + t0 v
5586d7f5d3SJohn Marino      V = s1 u + s0 v
5686d7f5d3SJohn Marino 
5786d7f5d3SJohn Marino      where U, V are the inputs (without any shared power of two),
5886d7f5d3SJohn Marino      and the matris has determinant � 2^{shift}.
5986d7f5d3SJohn Marino   */
6086d7f5d3SJohn Marino   mp_limb_t s0 = 1;
6186d7f5d3SJohn Marino   mp_limb_t t0 = 0;
6286d7f5d3SJohn Marino   mp_limb_t s1 = 0;
6386d7f5d3SJohn Marino   mp_limb_t t1 = 1;
6486d7f5d3SJohn Marino   mp_limb_t ug;
6586d7f5d3SJohn Marino   mp_limb_t vg;
6686d7f5d3SJohn Marino   mp_limb_t ugh;
6786d7f5d3SJohn Marino   mp_limb_t vgh;
6886d7f5d3SJohn Marino   unsigned zero_bits;
6986d7f5d3SJohn Marino   unsigned shift;
7086d7f5d3SJohn Marino   unsigned i;
7186d7f5d3SJohn Marino #if GCDEXT_1_BINARY_METHOD == 2
7286d7f5d3SJohn Marino   mp_limb_t det_sign;
7386d7f5d3SJohn Marino #endif
7486d7f5d3SJohn Marino 
7586d7f5d3SJohn Marino   ASSERT (u > 0);
7686d7f5d3SJohn Marino   ASSERT (v > 0);
7786d7f5d3SJohn Marino 
7886d7f5d3SJohn Marino   count_trailing_zeros (zero_bits, u | v);
7986d7f5d3SJohn Marino   u >>= zero_bits;
8086d7f5d3SJohn Marino   v >>= zero_bits;
8186d7f5d3SJohn Marino 
8286d7f5d3SJohn Marino   if ((u & 1) == 0)
8386d7f5d3SJohn Marino     {
8486d7f5d3SJohn Marino       count_trailing_zeros (shift, u);
8586d7f5d3SJohn Marino       u >>= shift;
8686d7f5d3SJohn Marino       t1 <<= shift;
8786d7f5d3SJohn Marino     }
8886d7f5d3SJohn Marino   else if ((v & 1) == 0)
8986d7f5d3SJohn Marino     {
9086d7f5d3SJohn Marino       count_trailing_zeros (shift, v);
9186d7f5d3SJohn Marino       v >>= shift;
9286d7f5d3SJohn Marino       s0 <<= shift;
9386d7f5d3SJohn Marino     }
9486d7f5d3SJohn Marino   else
9586d7f5d3SJohn Marino     shift = 0;
9686d7f5d3SJohn Marino 
9786d7f5d3SJohn Marino #if GCDEXT_1_BINARY_METHOD == 1
9886d7f5d3SJohn Marino   while (u != v)
9986d7f5d3SJohn Marino     {
10086d7f5d3SJohn Marino       unsigned count;
10186d7f5d3SJohn Marino       if (u > v)
10286d7f5d3SJohn Marino 	{
10386d7f5d3SJohn Marino 	  u -= v;
10486d7f5d3SJohn Marino #if USE_ZEROTAB
10586d7f5d3SJohn Marino 	  count = zerotab [u & 0x3f];
10686d7f5d3SJohn Marino 	  u >>= count;
10786d7f5d3SJohn Marino 	  if (UNLIKELY (count == 6))
10886d7f5d3SJohn Marino 	    {
10986d7f5d3SJohn Marino 	      unsigned c;
11086d7f5d3SJohn Marino 	      do
11186d7f5d3SJohn Marino 		{
11286d7f5d3SJohn Marino 		  c = zerotab[u & 0x3f];
11386d7f5d3SJohn Marino 		  u >>= c;
11486d7f5d3SJohn Marino 		  count += c;
11586d7f5d3SJohn Marino 		}
11686d7f5d3SJohn Marino 	      while (c == 6);
11786d7f5d3SJohn Marino 	    }
11886d7f5d3SJohn Marino #else
11986d7f5d3SJohn Marino 	  count_trailing_zeros (count, u);
12086d7f5d3SJohn Marino 	  u >>= count;
12186d7f5d3SJohn Marino #endif
12286d7f5d3SJohn Marino 	  t0 += t1; t1 <<= count;
12386d7f5d3SJohn Marino 	  s0 += s1; s1 <<= count;
12486d7f5d3SJohn Marino 	}
12586d7f5d3SJohn Marino       else
12686d7f5d3SJohn Marino 	{
12786d7f5d3SJohn Marino 	  v -= u;
12886d7f5d3SJohn Marino #if USE_ZEROTAB
12986d7f5d3SJohn Marino 	  count = zerotab [v & 0x3f];
13086d7f5d3SJohn Marino 	  v >>= count;
13186d7f5d3SJohn Marino 	  if (UNLIKELY (count == 6))
13286d7f5d3SJohn Marino 	    {
13386d7f5d3SJohn Marino 	      unsigned c;
13486d7f5d3SJohn Marino 	      do
13586d7f5d3SJohn Marino 		{
13686d7f5d3SJohn Marino 		  c = zerotab[v & 0x3f];
13786d7f5d3SJohn Marino 		  v >>= c;
13886d7f5d3SJohn Marino 		  count += c;
13986d7f5d3SJohn Marino 		}
14086d7f5d3SJohn Marino 	      while (c == 6);
14186d7f5d3SJohn Marino 	    }
14286d7f5d3SJohn Marino #else
14386d7f5d3SJohn Marino 	  count_trailing_zeros (count, v);
14486d7f5d3SJohn Marino 	  v >>= count;
14586d7f5d3SJohn Marino #endif
14686d7f5d3SJohn Marino 	  t1 += t0; t0 <<= count;
14786d7f5d3SJohn Marino 	  s1 += s0; s0 <<= count;
14886d7f5d3SJohn Marino 	}
14986d7f5d3SJohn Marino       shift += count;
15086d7f5d3SJohn Marino     }
15186d7f5d3SJohn Marino #else
15286d7f5d3SJohn Marino # if GCDEXT_1_BINARY_METHOD == 2
15386d7f5d3SJohn Marino   u >>= 1;
15486d7f5d3SJohn Marino   v >>= 1;
15586d7f5d3SJohn Marino 
15686d7f5d3SJohn Marino   det_sign = 0;
15786d7f5d3SJohn Marino 
15886d7f5d3SJohn Marino   while (u != v)
15986d7f5d3SJohn Marino     {
16086d7f5d3SJohn Marino       unsigned count;
16186d7f5d3SJohn Marino       mp_limb_t d =  u - v;
16286d7f5d3SJohn Marino       mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
16386d7f5d3SJohn Marino       mp_limb_t sx;
16486d7f5d3SJohn Marino       mp_limb_t tx;
16586d7f5d3SJohn Marino 
16686d7f5d3SJohn Marino       /* When v <= u (vgtu == 0), the updates are:
16786d7f5d3SJohn Marino 
16886d7f5d3SJohn Marino 	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
16986d7f5d3SJohn Marino 	   (t1, t0) <-- (t1 << count, t0 + t1)
17086d7f5d3SJohn Marino 
17186d7f5d3SJohn Marino 	 and when v > 0, the updates are
17286d7f5d3SJohn Marino 
17386d7f5d3SJohn Marino 	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
17486d7f5d3SJohn Marino 	   (t1, t0) <-- (t0 << count, t0 + t1)
17586d7f5d3SJohn Marino 
17686d7f5d3SJohn Marino 	 and similarly for s1, s0
17786d7f5d3SJohn Marino       */
17886d7f5d3SJohn Marino 
17986d7f5d3SJohn Marino       /* v <-- min (u, v) */
18086d7f5d3SJohn Marino       v += (vgtu & d);
18186d7f5d3SJohn Marino 
18286d7f5d3SJohn Marino       /* u <-- |u - v| */
18386d7f5d3SJohn Marino       u = (d ^ vgtu) - vgtu;
18486d7f5d3SJohn Marino 
18586d7f5d3SJohn Marino       /* Number of trailing zeros is the same no matter if we look at
18686d7f5d3SJohn Marino        * d or u, but using d gives more parallelism. */
18786d7f5d3SJohn Marino #if USE_ZEROTAB
18886d7f5d3SJohn Marino       count = zerotab[d & 0x3f];
18986d7f5d3SJohn Marino       if (UNLIKELY (count == 6))
19086d7f5d3SJohn Marino 	{
19186d7f5d3SJohn Marino 	  unsigned c = 6;
19286d7f5d3SJohn Marino 	  do
19386d7f5d3SJohn Marino 	    {
19486d7f5d3SJohn Marino 	      d >>= c;
19586d7f5d3SJohn Marino 	      c = zerotab[d & 0x3f];
19686d7f5d3SJohn Marino 	      count += c;
19786d7f5d3SJohn Marino 	    }
19886d7f5d3SJohn Marino 	  while (c == 6);
19986d7f5d3SJohn Marino 	}
20086d7f5d3SJohn Marino #else
20186d7f5d3SJohn Marino       count_trailing_zeros (count, d);
20286d7f5d3SJohn Marino #endif
20386d7f5d3SJohn Marino       det_sign ^= vgtu;
20486d7f5d3SJohn Marino 
20586d7f5d3SJohn Marino       tx = vgtu & (t0 - t1);
20686d7f5d3SJohn Marino       sx = vgtu & (s0 - s1);
20786d7f5d3SJohn Marino       t0 += t1;
20886d7f5d3SJohn Marino       s0 += s1;
20986d7f5d3SJohn Marino       t1 += tx;
21086d7f5d3SJohn Marino       s1 += sx;
21186d7f5d3SJohn Marino 
21286d7f5d3SJohn Marino       count++;
21386d7f5d3SJohn Marino       u >>= count;
21486d7f5d3SJohn Marino       t1 <<= count;
21586d7f5d3SJohn Marino       s1 <<= count;
21686d7f5d3SJohn Marino       shift += count;
21786d7f5d3SJohn Marino     }
21886d7f5d3SJohn Marino   u = (u << 1) + 1;
21986d7f5d3SJohn Marino # else /* GCDEXT_1_BINARY_METHOD == 2 */
22086d7f5d3SJohn Marino #  error Unknown GCDEXT_1_BINARY_METHOD
22186d7f5d3SJohn Marino # endif
22286d7f5d3SJohn Marino #endif
22386d7f5d3SJohn Marino 
22486d7f5d3SJohn Marino   /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
22586d7f5d3SJohn Marino   ug = t0 + t1;
22686d7f5d3SJohn Marino   vg = s0 + s1;
22786d7f5d3SJohn Marino 
22886d7f5d3SJohn Marino   ugh = ug/2 + (ug & 1);
22986d7f5d3SJohn Marino   vgh = vg/2 + (vg & 1);
23086d7f5d3SJohn Marino 
23186d7f5d3SJohn Marino   /* Now �2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
23286d7f5d3SJohn Marino      s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
23386d7f5d3SJohn Marino   for (i = 0; i < shift; i++)
23486d7f5d3SJohn Marino     {
23586d7f5d3SJohn Marino       mp_limb_t mask = - ( (s0 | t0) & 1);
23686d7f5d3SJohn Marino 
23786d7f5d3SJohn Marino       s0 /= 2;
23886d7f5d3SJohn Marino       t0 /= 2;
23986d7f5d3SJohn Marino       s0 += mask & vgh;
24086d7f5d3SJohn Marino       t0 += mask & ugh;
24186d7f5d3SJohn Marino     }
24286d7f5d3SJohn Marino   /* FIXME: Try simplifying this condition. */
24386d7f5d3SJohn Marino   if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )
24486d7f5d3SJohn Marino     {
24586d7f5d3SJohn Marino       s0 -= vg;
24686d7f5d3SJohn Marino       t0 -= ug;
24786d7f5d3SJohn Marino     }
24886d7f5d3SJohn Marino #if GCDEXT_1_BINARY_METHOD == 2
24986d7f5d3SJohn Marino   /* Conditional negation. */
25086d7f5d3SJohn Marino   s0 = (s0 ^ det_sign) - det_sign;
25186d7f5d3SJohn Marino   t0 = (t0 ^ det_sign) - det_sign;
25286d7f5d3SJohn Marino #endif
25386d7f5d3SJohn Marino   *sp = s0;
25486d7f5d3SJohn Marino   *tp = -t0;
25586d7f5d3SJohn Marino 
25686d7f5d3SJohn Marino   return u << zero_bits;
25786d7f5d3SJohn Marino }
25886d7f5d3SJohn Marino 
25986d7f5d3SJohn Marino #else /* !GCDEXT_1_USE_BINARY */
26086d7f5d3SJohn Marino 
26186d7f5d3SJohn Marino 
26286d7f5d3SJohn Marino /* FIXME: Takes two single-word limbs. It could be extended to a
26386d7f5d3SJohn Marino  * function that accepts a bignum for the first input, and only
26486d7f5d3SJohn Marino  * returns the first co-factor. */
26586d7f5d3SJohn Marino 
26686d7f5d3SJohn Marino mp_limb_t
mpn_gcdext_1(mp_limb_signed_t * up,mp_limb_signed_t * vp,mp_limb_t a,mp_limb_t b)26786d7f5d3SJohn Marino mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
26886d7f5d3SJohn Marino 	      mp_limb_t a, mp_limb_t b)
26986d7f5d3SJohn Marino {
27086d7f5d3SJohn Marino   /* Maintain
27186d7f5d3SJohn Marino 
27286d7f5d3SJohn Marino      a =  u0 A + v0 B
27386d7f5d3SJohn Marino      b =  u1 A + v1 B
27486d7f5d3SJohn Marino 
27586d7f5d3SJohn Marino      where A, B are the original inputs.
27686d7f5d3SJohn Marino   */
27786d7f5d3SJohn Marino   mp_limb_signed_t u0 = 1;
27886d7f5d3SJohn Marino   mp_limb_signed_t v0 = 0;
27986d7f5d3SJohn Marino   mp_limb_signed_t u1 = 0;
28086d7f5d3SJohn Marino   mp_limb_signed_t v1 = 1;
28186d7f5d3SJohn Marino 
28286d7f5d3SJohn Marino   ASSERT (a > 0);
28386d7f5d3SJohn Marino   ASSERT (b > 0);
28486d7f5d3SJohn Marino 
28586d7f5d3SJohn Marino   if (a < b)
28686d7f5d3SJohn Marino     goto divide_by_b;
28786d7f5d3SJohn Marino 
28886d7f5d3SJohn Marino   for (;;)
28986d7f5d3SJohn Marino     {
29086d7f5d3SJohn Marino       mp_limb_t q;
29186d7f5d3SJohn Marino 
29286d7f5d3SJohn Marino       q = a / b;
29386d7f5d3SJohn Marino       a -= q * b;
29486d7f5d3SJohn Marino 
29586d7f5d3SJohn Marino       if (a == 0)
29686d7f5d3SJohn Marino 	{
29786d7f5d3SJohn Marino 	  *up = u1;
29886d7f5d3SJohn Marino 	  *vp = v1;
29986d7f5d3SJohn Marino 	  return b;
30086d7f5d3SJohn Marino 	}
30186d7f5d3SJohn Marino       u0 -= q * u1;
30286d7f5d3SJohn Marino       v0 -= q * v1;
30386d7f5d3SJohn Marino 
30486d7f5d3SJohn Marino     divide_by_b:
30586d7f5d3SJohn Marino       q = b / a;
30686d7f5d3SJohn Marino       b -= q * a;
30786d7f5d3SJohn Marino 
30886d7f5d3SJohn Marino       if (b == 0)
30986d7f5d3SJohn Marino 	{
31086d7f5d3SJohn Marino 	  *up = u0;
31186d7f5d3SJohn Marino 	  *vp = v0;
31286d7f5d3SJohn Marino 	  return a;
31386d7f5d3SJohn Marino 	}
31486d7f5d3SJohn Marino       u1 -= q * u0;
31586d7f5d3SJohn Marino       v1 -= q * v0;
31686d7f5d3SJohn Marino     }
31786d7f5d3SJohn Marino }
31886d7f5d3SJohn Marino #endif /* !GCDEXT_1_USE_BINARY */
319