xref: /dflybsd-src/contrib/gmp/mpn/generic/gcdext_lehmer.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
1*86d7f5d3SJohn Marino /* mpn_gcdext -- Extended Greatest Common Divisor.
2*86d7f5d3SJohn Marino 
3*86d7f5d3SJohn Marino Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
4*86d7f5d3SJohn Marino Foundation, Inc.
5*86d7f5d3SJohn Marino 
6*86d7f5d3SJohn Marino This file is part of the GNU MP Library.
7*86d7f5d3SJohn Marino 
8*86d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
9*86d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
10*86d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
11*86d7f5d3SJohn Marino option) any later version.
12*86d7f5d3SJohn Marino 
13*86d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
14*86d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15*86d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16*86d7f5d3SJohn Marino License for more details.
17*86d7f5d3SJohn Marino 
18*86d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
19*86d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20*86d7f5d3SJohn Marino 
21*86d7f5d3SJohn Marino #include "gmp.h"
22*86d7f5d3SJohn Marino #include "gmp-impl.h"
23*86d7f5d3SJohn Marino #include "longlong.h"
24*86d7f5d3SJohn Marino 
25*86d7f5d3SJohn Marino /* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
26*86d7f5d3SJohn Marino    multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
27*86d7f5d3SJohn Marino    needed for the division, with most n for the quotient, and n+1 for
28*86d7f5d3SJohn Marino    the product q u0. In all, 4n + 3. */
29*86d7f5d3SJohn Marino 
30*86d7f5d3SJohn Marino mp_size_t
mpn_gcdext_lehmer_n(mp_ptr gp,mp_ptr up,mp_size_t * usize,mp_ptr ap,mp_ptr bp,mp_size_t n,mp_ptr tp)31*86d7f5d3SJohn Marino mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
32*86d7f5d3SJohn Marino 		     mp_ptr ap, mp_ptr bp, mp_size_t n,
33*86d7f5d3SJohn Marino 		     mp_ptr tp)
34*86d7f5d3SJohn Marino {
35*86d7f5d3SJohn Marino   mp_size_t ualloc = n + 1;
36*86d7f5d3SJohn Marino 
37*86d7f5d3SJohn Marino   /* Keeps track of the second row of the reduction matrix
38*86d7f5d3SJohn Marino    *
39*86d7f5d3SJohn Marino    *   M = (v0, v1 ; u0, u1)
40*86d7f5d3SJohn Marino    *
41*86d7f5d3SJohn Marino    * which correspond to the first column of the inverse
42*86d7f5d3SJohn Marino    *
43*86d7f5d3SJohn Marino    *   M^{-1} = (u1, -v1; -u0, v0)
44*86d7f5d3SJohn Marino    */
45*86d7f5d3SJohn Marino 
46*86d7f5d3SJohn Marino   mp_size_t un;
47*86d7f5d3SJohn Marino   mp_ptr u0;
48*86d7f5d3SJohn Marino   mp_ptr u1;
49*86d7f5d3SJohn Marino   mp_ptr u2;
50*86d7f5d3SJohn Marino 
51*86d7f5d3SJohn Marino   MPN_ZERO (tp, 3*ualloc);
52*86d7f5d3SJohn Marino   u0 = tp; tp += ualloc;
53*86d7f5d3SJohn Marino   u1 = tp; tp += ualloc;
54*86d7f5d3SJohn Marino   u2 = tp; tp += ualloc;
55*86d7f5d3SJohn Marino 
56*86d7f5d3SJohn Marino   u1[0] = 1; un = 1;
57*86d7f5d3SJohn Marino 
58*86d7f5d3SJohn Marino   /* FIXME: Handle n == 2 differently, after the loop? */
59*86d7f5d3SJohn Marino   while (n >= 2)
60*86d7f5d3SJohn Marino     {
61*86d7f5d3SJohn Marino       struct hgcd_matrix1 M;
62*86d7f5d3SJohn Marino       mp_limb_t ah, al, bh, bl;
63*86d7f5d3SJohn Marino       mp_limb_t mask;
64*86d7f5d3SJohn Marino 
65*86d7f5d3SJohn Marino       mask = ap[n-1] | bp[n-1];
66*86d7f5d3SJohn Marino       ASSERT (mask > 0);
67*86d7f5d3SJohn Marino 
68*86d7f5d3SJohn Marino       if (mask & GMP_NUMB_HIGHBIT)
69*86d7f5d3SJohn Marino 	{
70*86d7f5d3SJohn Marino 	  ah = ap[n-1]; al = ap[n-2];
71*86d7f5d3SJohn Marino 	  bh = bp[n-1]; bl = bp[n-2];
72*86d7f5d3SJohn Marino 	}
73*86d7f5d3SJohn Marino       else if (n == 2)
74*86d7f5d3SJohn Marino 	{
75*86d7f5d3SJohn Marino 	  /* We use the full inputs without truncation, so we can
76*86d7f5d3SJohn Marino 	     safely shift left. */
77*86d7f5d3SJohn Marino 	  int shift;
78*86d7f5d3SJohn Marino 
79*86d7f5d3SJohn Marino 	  count_leading_zeros (shift, mask);
80*86d7f5d3SJohn Marino 	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
81*86d7f5d3SJohn Marino 	  al = ap[0] << shift;
82*86d7f5d3SJohn Marino 	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
83*86d7f5d3SJohn Marino 	  bl = bp[0] << shift;
84*86d7f5d3SJohn Marino 	}
85*86d7f5d3SJohn Marino       else
86*86d7f5d3SJohn Marino 	{
87*86d7f5d3SJohn Marino 	  int shift;
88*86d7f5d3SJohn Marino 
89*86d7f5d3SJohn Marino 	  count_leading_zeros (shift, mask);
90*86d7f5d3SJohn Marino 	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
91*86d7f5d3SJohn Marino 	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
92*86d7f5d3SJohn Marino 	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
93*86d7f5d3SJohn Marino 	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
94*86d7f5d3SJohn Marino 	}
95*86d7f5d3SJohn Marino 
96*86d7f5d3SJohn Marino       /* Try an mpn_nhgcd2 step */
97*86d7f5d3SJohn Marino       if (mpn_hgcd2 (ah, al, bh, bl, &M))
98*86d7f5d3SJohn Marino 	{
99*86d7f5d3SJohn Marino 	  n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
100*86d7f5d3SJohn Marino 	  MP_PTR_SWAP (ap, tp);
101*86d7f5d3SJohn Marino 	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
102*86d7f5d3SJohn Marino 	  MP_PTR_SWAP (u0, u2);
103*86d7f5d3SJohn Marino 	}
104*86d7f5d3SJohn Marino       else
105*86d7f5d3SJohn Marino 	{
106*86d7f5d3SJohn Marino 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
107*86d7f5d3SJohn Marino 	     small, or the difference is very small. Perform one
108*86d7f5d3SJohn Marino 	     subtraction followed by one division. */
109*86d7f5d3SJohn Marino 	  mp_size_t gn;
110*86d7f5d3SJohn Marino 	  mp_size_t updated_un = un;
111*86d7f5d3SJohn Marino 
112*86d7f5d3SJohn Marino 	  /* Temporary storage n for the quotient and ualloc for the
113*86d7f5d3SJohn Marino 	     new cofactor. */
114*86d7f5d3SJohn Marino 	  n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
115*86d7f5d3SJohn Marino 				      u0, u1, &updated_un, tp, u2);
116*86d7f5d3SJohn Marino 	  if (n == 0)
117*86d7f5d3SJohn Marino 	    return gn;
118*86d7f5d3SJohn Marino 
119*86d7f5d3SJohn Marino 	  un = updated_un;
120*86d7f5d3SJohn Marino 	}
121*86d7f5d3SJohn Marino     }
122*86d7f5d3SJohn Marino   ASSERT_ALWAYS (ap[0] > 0);
123*86d7f5d3SJohn Marino   ASSERT_ALWAYS (bp[0] > 0);
124*86d7f5d3SJohn Marino 
125*86d7f5d3SJohn Marino   if (ap[0] == bp[0])
126*86d7f5d3SJohn Marino     {
127*86d7f5d3SJohn Marino       int c;
128*86d7f5d3SJohn Marino 
129*86d7f5d3SJohn Marino       /* Which cofactor to return now? Candidates are +u1 and -u0,
130*86d7f5d3SJohn Marino 	 depending on which of a and b was most recently reduced,
131*86d7f5d3SJohn Marino 	 which we don't keep track of. So compare and get the smallest
132*86d7f5d3SJohn Marino 	 one. */
133*86d7f5d3SJohn Marino 
134*86d7f5d3SJohn Marino       gp[0] = ap[0];
135*86d7f5d3SJohn Marino 
136*86d7f5d3SJohn Marino       MPN_CMP (c, u0, u1, un);
137*86d7f5d3SJohn Marino       ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
138*86d7f5d3SJohn Marino       if (c < 0)
139*86d7f5d3SJohn Marino 	{
140*86d7f5d3SJohn Marino 	  MPN_NORMALIZE (u0, un);
141*86d7f5d3SJohn Marino 	  MPN_COPY (up, u0, un);
142*86d7f5d3SJohn Marino 	  *usize = -un;
143*86d7f5d3SJohn Marino 	}
144*86d7f5d3SJohn Marino       else
145*86d7f5d3SJohn Marino 	{
146*86d7f5d3SJohn Marino 	  MPN_NORMALIZE_NOT_ZERO (u1, un);
147*86d7f5d3SJohn Marino 	  MPN_COPY (up, u1, un);
148*86d7f5d3SJohn Marino 	  *usize = un;
149*86d7f5d3SJohn Marino 	}
150*86d7f5d3SJohn Marino       return 1;
151*86d7f5d3SJohn Marino     }
152*86d7f5d3SJohn Marino   else
153*86d7f5d3SJohn Marino     {
154*86d7f5d3SJohn Marino       mp_limb_t uh, vh;
155*86d7f5d3SJohn Marino       mp_limb_signed_t u;
156*86d7f5d3SJohn Marino       mp_limb_signed_t v;
157*86d7f5d3SJohn Marino       int negate;
158*86d7f5d3SJohn Marino 
159*86d7f5d3SJohn Marino       gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
160*86d7f5d3SJohn Marino 
161*86d7f5d3SJohn Marino       /* Set up = u u1 - v u0. Keep track of size, un grows by one or
162*86d7f5d3SJohn Marino 	 two limbs. */
163*86d7f5d3SJohn Marino 
164*86d7f5d3SJohn Marino       if (u == 0)
165*86d7f5d3SJohn Marino 	{
166*86d7f5d3SJohn Marino 	  ASSERT (v == 1);
167*86d7f5d3SJohn Marino 	  MPN_NORMALIZE (u0, un);
168*86d7f5d3SJohn Marino 	  MPN_COPY (up, u0, un);
169*86d7f5d3SJohn Marino 	  *usize = -un;
170*86d7f5d3SJohn Marino 	  return 1;
171*86d7f5d3SJohn Marino 	}
172*86d7f5d3SJohn Marino       else if (v == 0)
173*86d7f5d3SJohn Marino 	{
174*86d7f5d3SJohn Marino 	  ASSERT (u == 1);
175*86d7f5d3SJohn Marino 	  MPN_NORMALIZE (u1, un);
176*86d7f5d3SJohn Marino 	  MPN_COPY (up, u1, un);
177*86d7f5d3SJohn Marino 	  *usize = un;
178*86d7f5d3SJohn Marino 	  return 1;
179*86d7f5d3SJohn Marino 	}
180*86d7f5d3SJohn Marino       else if (u > 0)
181*86d7f5d3SJohn Marino 	{
182*86d7f5d3SJohn Marino 	  negate = 0;
183*86d7f5d3SJohn Marino 	  ASSERT (v < 0);
184*86d7f5d3SJohn Marino 	  v = -v;
185*86d7f5d3SJohn Marino 	}
186*86d7f5d3SJohn Marino       else
187*86d7f5d3SJohn Marino 	{
188*86d7f5d3SJohn Marino 	  negate = 1;
189*86d7f5d3SJohn Marino 	  ASSERT (v > 0);
190*86d7f5d3SJohn Marino 	  u = -u;
191*86d7f5d3SJohn Marino 	}
192*86d7f5d3SJohn Marino 
193*86d7f5d3SJohn Marino       uh = mpn_mul_1 (up, u1, un, u);
194*86d7f5d3SJohn Marino       vh = mpn_addmul_1 (up, u0, un, v);
195*86d7f5d3SJohn Marino 
196*86d7f5d3SJohn Marino       if ( (uh | vh) > 0)
197*86d7f5d3SJohn Marino 	{
198*86d7f5d3SJohn Marino 	  uh += vh;
199*86d7f5d3SJohn Marino 	  up[un++] = uh;
200*86d7f5d3SJohn Marino 	  if (uh < vh)
201*86d7f5d3SJohn Marino 	    up[un++] = 1;
202*86d7f5d3SJohn Marino 	}
203*86d7f5d3SJohn Marino 
204*86d7f5d3SJohn Marino       MPN_NORMALIZE_NOT_ZERO (up, un);
205*86d7f5d3SJohn Marino 
206*86d7f5d3SJohn Marino       *usize = negate ? -un : un;
207*86d7f5d3SJohn Marino       return 1;
208*86d7f5d3SJohn Marino     }
209*86d7f5d3SJohn Marino }
210