xref: /dflybsd-src/contrib/gmp/mpn/generic/gcdext_lehmer.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_gcdext -- Extended Greatest Common Divisor.
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
486d7f5d3SJohn Marino Foundation, Inc.
586d7f5d3SJohn Marino 
686d7f5d3SJohn Marino This file is part of the GNU MP Library.
786d7f5d3SJohn Marino 
886d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
986d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1086d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1186d7f5d3SJohn Marino option) any later version.
1286d7f5d3SJohn Marino 
1386d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1486d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1586d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1686d7f5d3SJohn Marino License for more details.
1786d7f5d3SJohn Marino 
1886d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
1986d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2086d7f5d3SJohn Marino 
2186d7f5d3SJohn Marino #include "gmp.h"
2286d7f5d3SJohn Marino #include "gmp-impl.h"
2386d7f5d3SJohn Marino #include "longlong.h"
2486d7f5d3SJohn Marino 
2586d7f5d3SJohn Marino /* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
2686d7f5d3SJohn Marino    multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
2786d7f5d3SJohn Marino    needed for the division, with most n for the quotient, and n+1 for
2886d7f5d3SJohn Marino    the product q u0. In all, 4n + 3. */
2986d7f5d3SJohn Marino 
3086d7f5d3SJohn Marino mp_size_t
mpn_gcdext_lehmer_n(mp_ptr gp,mp_ptr up,mp_size_t * usize,mp_ptr ap,mp_ptr bp,mp_size_t n,mp_ptr tp)3186d7f5d3SJohn Marino mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
3286d7f5d3SJohn Marino 		     mp_ptr ap, mp_ptr bp, mp_size_t n,
3386d7f5d3SJohn Marino 		     mp_ptr tp)
3486d7f5d3SJohn Marino {
3586d7f5d3SJohn Marino   mp_size_t ualloc = n + 1;
3686d7f5d3SJohn Marino 
3786d7f5d3SJohn Marino   /* Keeps track of the second row of the reduction matrix
3886d7f5d3SJohn Marino    *
3986d7f5d3SJohn Marino    *   M = (v0, v1 ; u0, u1)
4086d7f5d3SJohn Marino    *
4186d7f5d3SJohn Marino    * which correspond to the first column of the inverse
4286d7f5d3SJohn Marino    *
4386d7f5d3SJohn Marino    *   M^{-1} = (u1, -v1; -u0, v0)
4486d7f5d3SJohn Marino    */
4586d7f5d3SJohn Marino 
4686d7f5d3SJohn Marino   mp_size_t un;
4786d7f5d3SJohn Marino   mp_ptr u0;
4886d7f5d3SJohn Marino   mp_ptr u1;
4986d7f5d3SJohn Marino   mp_ptr u2;
5086d7f5d3SJohn Marino 
5186d7f5d3SJohn Marino   MPN_ZERO (tp, 3*ualloc);
5286d7f5d3SJohn Marino   u0 = tp; tp += ualloc;
5386d7f5d3SJohn Marino   u1 = tp; tp += ualloc;
5486d7f5d3SJohn Marino   u2 = tp; tp += ualloc;
5586d7f5d3SJohn Marino 
5686d7f5d3SJohn Marino   u1[0] = 1; un = 1;
5786d7f5d3SJohn Marino 
5886d7f5d3SJohn Marino   /* FIXME: Handle n == 2 differently, after the loop? */
5986d7f5d3SJohn Marino   while (n >= 2)
6086d7f5d3SJohn Marino     {
6186d7f5d3SJohn Marino       struct hgcd_matrix1 M;
6286d7f5d3SJohn Marino       mp_limb_t ah, al, bh, bl;
6386d7f5d3SJohn Marino       mp_limb_t mask;
6486d7f5d3SJohn Marino 
6586d7f5d3SJohn Marino       mask = ap[n-1] | bp[n-1];
6686d7f5d3SJohn Marino       ASSERT (mask > 0);
6786d7f5d3SJohn Marino 
6886d7f5d3SJohn Marino       if (mask & GMP_NUMB_HIGHBIT)
6986d7f5d3SJohn Marino 	{
7086d7f5d3SJohn Marino 	  ah = ap[n-1]; al = ap[n-2];
7186d7f5d3SJohn Marino 	  bh = bp[n-1]; bl = bp[n-2];
7286d7f5d3SJohn Marino 	}
7386d7f5d3SJohn Marino       else if (n == 2)
7486d7f5d3SJohn Marino 	{
7586d7f5d3SJohn Marino 	  /* We use the full inputs without truncation, so we can
7686d7f5d3SJohn Marino 	     safely shift left. */
7786d7f5d3SJohn Marino 	  int shift;
7886d7f5d3SJohn Marino 
7986d7f5d3SJohn Marino 	  count_leading_zeros (shift, mask);
8086d7f5d3SJohn Marino 	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
8186d7f5d3SJohn Marino 	  al = ap[0] << shift;
8286d7f5d3SJohn Marino 	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
8386d7f5d3SJohn Marino 	  bl = bp[0] << shift;
8486d7f5d3SJohn Marino 	}
8586d7f5d3SJohn Marino       else
8686d7f5d3SJohn Marino 	{
8786d7f5d3SJohn Marino 	  int shift;
8886d7f5d3SJohn Marino 
8986d7f5d3SJohn Marino 	  count_leading_zeros (shift, mask);
9086d7f5d3SJohn Marino 	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
9186d7f5d3SJohn Marino 	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
9286d7f5d3SJohn Marino 	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
9386d7f5d3SJohn Marino 	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
9486d7f5d3SJohn Marino 	}
9586d7f5d3SJohn Marino 
9686d7f5d3SJohn Marino       /* Try an mpn_nhgcd2 step */
9786d7f5d3SJohn Marino       if (mpn_hgcd2 (ah, al, bh, bl, &M))
9886d7f5d3SJohn Marino 	{
9986d7f5d3SJohn Marino 	  n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
10086d7f5d3SJohn Marino 	  MP_PTR_SWAP (ap, tp);
10186d7f5d3SJohn Marino 	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
10286d7f5d3SJohn Marino 	  MP_PTR_SWAP (u0, u2);
10386d7f5d3SJohn Marino 	}
10486d7f5d3SJohn Marino       else
10586d7f5d3SJohn Marino 	{
10686d7f5d3SJohn Marino 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
10786d7f5d3SJohn Marino 	     small, or the difference is very small. Perform one
10886d7f5d3SJohn Marino 	     subtraction followed by one division. */
10986d7f5d3SJohn Marino 	  mp_size_t gn;
11086d7f5d3SJohn Marino 	  mp_size_t updated_un = un;
11186d7f5d3SJohn Marino 
11286d7f5d3SJohn Marino 	  /* Temporary storage n for the quotient and ualloc for the
11386d7f5d3SJohn Marino 	     new cofactor. */
11486d7f5d3SJohn Marino 	  n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
11586d7f5d3SJohn Marino 				      u0, u1, &updated_un, tp, u2);
11686d7f5d3SJohn Marino 	  if (n == 0)
11786d7f5d3SJohn Marino 	    return gn;
11886d7f5d3SJohn Marino 
11986d7f5d3SJohn Marino 	  un = updated_un;
12086d7f5d3SJohn Marino 	}
12186d7f5d3SJohn Marino     }
12286d7f5d3SJohn Marino   ASSERT_ALWAYS (ap[0] > 0);
12386d7f5d3SJohn Marino   ASSERT_ALWAYS (bp[0] > 0);
12486d7f5d3SJohn Marino 
12586d7f5d3SJohn Marino   if (ap[0] == bp[0])
12686d7f5d3SJohn Marino     {
12786d7f5d3SJohn Marino       int c;
12886d7f5d3SJohn Marino 
12986d7f5d3SJohn Marino       /* Which cofactor to return now? Candidates are +u1 and -u0,
13086d7f5d3SJohn Marino 	 depending on which of a and b was most recently reduced,
13186d7f5d3SJohn Marino 	 which we don't keep track of. So compare and get the smallest
13286d7f5d3SJohn Marino 	 one. */
13386d7f5d3SJohn Marino 
13486d7f5d3SJohn Marino       gp[0] = ap[0];
13586d7f5d3SJohn Marino 
13686d7f5d3SJohn Marino       MPN_CMP (c, u0, u1, un);
13786d7f5d3SJohn Marino       ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
13886d7f5d3SJohn Marino       if (c < 0)
13986d7f5d3SJohn Marino 	{
14086d7f5d3SJohn Marino 	  MPN_NORMALIZE (u0, un);
14186d7f5d3SJohn Marino 	  MPN_COPY (up, u0, un);
14286d7f5d3SJohn Marino 	  *usize = -un;
14386d7f5d3SJohn Marino 	}
14486d7f5d3SJohn Marino       else
14586d7f5d3SJohn Marino 	{
14686d7f5d3SJohn Marino 	  MPN_NORMALIZE_NOT_ZERO (u1, un);
14786d7f5d3SJohn Marino 	  MPN_COPY (up, u1, un);
14886d7f5d3SJohn Marino 	  *usize = un;
14986d7f5d3SJohn Marino 	}
15086d7f5d3SJohn Marino       return 1;
15186d7f5d3SJohn Marino     }
15286d7f5d3SJohn Marino   else
15386d7f5d3SJohn Marino     {
15486d7f5d3SJohn Marino       mp_limb_t uh, vh;
15586d7f5d3SJohn Marino       mp_limb_signed_t u;
15686d7f5d3SJohn Marino       mp_limb_signed_t v;
15786d7f5d3SJohn Marino       int negate;
15886d7f5d3SJohn Marino 
15986d7f5d3SJohn Marino       gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
16086d7f5d3SJohn Marino 
16186d7f5d3SJohn Marino       /* Set up = u u1 - v u0. Keep track of size, un grows by one or
16286d7f5d3SJohn Marino 	 two limbs. */
16386d7f5d3SJohn Marino 
16486d7f5d3SJohn Marino       if (u == 0)
16586d7f5d3SJohn Marino 	{
16686d7f5d3SJohn Marino 	  ASSERT (v == 1);
16786d7f5d3SJohn Marino 	  MPN_NORMALIZE (u0, un);
16886d7f5d3SJohn Marino 	  MPN_COPY (up, u0, un);
16986d7f5d3SJohn Marino 	  *usize = -un;
17086d7f5d3SJohn Marino 	  return 1;
17186d7f5d3SJohn Marino 	}
17286d7f5d3SJohn Marino       else if (v == 0)
17386d7f5d3SJohn Marino 	{
17486d7f5d3SJohn Marino 	  ASSERT (u == 1);
17586d7f5d3SJohn Marino 	  MPN_NORMALIZE (u1, un);
17686d7f5d3SJohn Marino 	  MPN_COPY (up, u1, un);
17786d7f5d3SJohn Marino 	  *usize = un;
17886d7f5d3SJohn Marino 	  return 1;
17986d7f5d3SJohn Marino 	}
18086d7f5d3SJohn Marino       else if (u > 0)
18186d7f5d3SJohn Marino 	{
18286d7f5d3SJohn Marino 	  negate = 0;
18386d7f5d3SJohn Marino 	  ASSERT (v < 0);
18486d7f5d3SJohn Marino 	  v = -v;
18586d7f5d3SJohn Marino 	}
18686d7f5d3SJohn Marino       else
18786d7f5d3SJohn Marino 	{
18886d7f5d3SJohn Marino 	  negate = 1;
18986d7f5d3SJohn Marino 	  ASSERT (v > 0);
19086d7f5d3SJohn Marino 	  u = -u;
19186d7f5d3SJohn Marino 	}
19286d7f5d3SJohn Marino 
19386d7f5d3SJohn Marino       uh = mpn_mul_1 (up, u1, un, u);
19486d7f5d3SJohn Marino       vh = mpn_addmul_1 (up, u0, un, v);
19586d7f5d3SJohn Marino 
19686d7f5d3SJohn Marino       if ( (uh | vh) > 0)
19786d7f5d3SJohn Marino 	{
19886d7f5d3SJohn Marino 	  uh += vh;
19986d7f5d3SJohn Marino 	  up[un++] = uh;
20086d7f5d3SJohn Marino 	  if (uh < vh)
20186d7f5d3SJohn Marino 	    up[un++] = 1;
20286d7f5d3SJohn Marino 	}
20386d7f5d3SJohn Marino 
20486d7f5d3SJohn Marino       MPN_NORMALIZE_NOT_ZERO (up, un);
20586d7f5d3SJohn Marino 
20686d7f5d3SJohn Marino       *usize = negate ? -un : un;
20786d7f5d3SJohn Marino       return 1;
20886d7f5d3SJohn Marino     }
20986d7f5d3SJohn Marino }
210