1*86d7f5d3SJohn Marino /* mpn_gcdext -- Extended Greatest Common Divisor.
2*86d7f5d3SJohn Marino
3*86d7f5d3SJohn Marino Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
4*86d7f5d3SJohn Marino Foundation, Inc.
5*86d7f5d3SJohn Marino
6*86d7f5d3SJohn Marino This file is part of the GNU MP Library.
7*86d7f5d3SJohn Marino
8*86d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
9*86d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
10*86d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
11*86d7f5d3SJohn Marino option) any later version.
12*86d7f5d3SJohn Marino
13*86d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
14*86d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15*86d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16*86d7f5d3SJohn Marino License for more details.
17*86d7f5d3SJohn Marino
18*86d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
19*86d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
20*86d7f5d3SJohn Marino
21*86d7f5d3SJohn Marino #include "gmp.h"
22*86d7f5d3SJohn Marino #include "gmp-impl.h"
23*86d7f5d3SJohn Marino #include "longlong.h"
24*86d7f5d3SJohn Marino
25*86d7f5d3SJohn Marino /* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
26*86d7f5d3SJohn Marino multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
27*86d7f5d3SJohn Marino needed for the division, with most n for the quotient, and n+1 for
28*86d7f5d3SJohn Marino the product q u0. In all, 4n + 3. */
29*86d7f5d3SJohn Marino
30*86d7f5d3SJohn Marino mp_size_t
mpn_gcdext_lehmer_n(mp_ptr gp,mp_ptr up,mp_size_t * usize,mp_ptr ap,mp_ptr bp,mp_size_t n,mp_ptr tp)31*86d7f5d3SJohn Marino mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
32*86d7f5d3SJohn Marino mp_ptr ap, mp_ptr bp, mp_size_t n,
33*86d7f5d3SJohn Marino mp_ptr tp)
34*86d7f5d3SJohn Marino {
35*86d7f5d3SJohn Marino mp_size_t ualloc = n + 1;
36*86d7f5d3SJohn Marino
37*86d7f5d3SJohn Marino /* Keeps track of the second row of the reduction matrix
38*86d7f5d3SJohn Marino *
39*86d7f5d3SJohn Marino * M = (v0, v1 ; u0, u1)
40*86d7f5d3SJohn Marino *
41*86d7f5d3SJohn Marino * which correspond to the first column of the inverse
42*86d7f5d3SJohn Marino *
43*86d7f5d3SJohn Marino * M^{-1} = (u1, -v1; -u0, v0)
44*86d7f5d3SJohn Marino */
45*86d7f5d3SJohn Marino
46*86d7f5d3SJohn Marino mp_size_t un;
47*86d7f5d3SJohn Marino mp_ptr u0;
48*86d7f5d3SJohn Marino mp_ptr u1;
49*86d7f5d3SJohn Marino mp_ptr u2;
50*86d7f5d3SJohn Marino
51*86d7f5d3SJohn Marino MPN_ZERO (tp, 3*ualloc);
52*86d7f5d3SJohn Marino u0 = tp; tp += ualloc;
53*86d7f5d3SJohn Marino u1 = tp; tp += ualloc;
54*86d7f5d3SJohn Marino u2 = tp; tp += ualloc;
55*86d7f5d3SJohn Marino
56*86d7f5d3SJohn Marino u1[0] = 1; un = 1;
57*86d7f5d3SJohn Marino
58*86d7f5d3SJohn Marino /* FIXME: Handle n == 2 differently, after the loop? */
59*86d7f5d3SJohn Marino while (n >= 2)
60*86d7f5d3SJohn Marino {
61*86d7f5d3SJohn Marino struct hgcd_matrix1 M;
62*86d7f5d3SJohn Marino mp_limb_t ah, al, bh, bl;
63*86d7f5d3SJohn Marino mp_limb_t mask;
64*86d7f5d3SJohn Marino
65*86d7f5d3SJohn Marino mask = ap[n-1] | bp[n-1];
66*86d7f5d3SJohn Marino ASSERT (mask > 0);
67*86d7f5d3SJohn Marino
68*86d7f5d3SJohn Marino if (mask & GMP_NUMB_HIGHBIT)
69*86d7f5d3SJohn Marino {
70*86d7f5d3SJohn Marino ah = ap[n-1]; al = ap[n-2];
71*86d7f5d3SJohn Marino bh = bp[n-1]; bl = bp[n-2];
72*86d7f5d3SJohn Marino }
73*86d7f5d3SJohn Marino else if (n == 2)
74*86d7f5d3SJohn Marino {
75*86d7f5d3SJohn Marino /* We use the full inputs without truncation, so we can
76*86d7f5d3SJohn Marino safely shift left. */
77*86d7f5d3SJohn Marino int shift;
78*86d7f5d3SJohn Marino
79*86d7f5d3SJohn Marino count_leading_zeros (shift, mask);
80*86d7f5d3SJohn Marino ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
81*86d7f5d3SJohn Marino al = ap[0] << shift;
82*86d7f5d3SJohn Marino bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
83*86d7f5d3SJohn Marino bl = bp[0] << shift;
84*86d7f5d3SJohn Marino }
85*86d7f5d3SJohn Marino else
86*86d7f5d3SJohn Marino {
87*86d7f5d3SJohn Marino int shift;
88*86d7f5d3SJohn Marino
89*86d7f5d3SJohn Marino count_leading_zeros (shift, mask);
90*86d7f5d3SJohn Marino ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
91*86d7f5d3SJohn Marino al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
92*86d7f5d3SJohn Marino bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
93*86d7f5d3SJohn Marino bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
94*86d7f5d3SJohn Marino }
95*86d7f5d3SJohn Marino
96*86d7f5d3SJohn Marino /* Try an mpn_nhgcd2 step */
97*86d7f5d3SJohn Marino if (mpn_hgcd2 (ah, al, bh, bl, &M))
98*86d7f5d3SJohn Marino {
99*86d7f5d3SJohn Marino n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
100*86d7f5d3SJohn Marino MP_PTR_SWAP (ap, tp);
101*86d7f5d3SJohn Marino un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
102*86d7f5d3SJohn Marino MP_PTR_SWAP (u0, u2);
103*86d7f5d3SJohn Marino }
104*86d7f5d3SJohn Marino else
105*86d7f5d3SJohn Marino {
106*86d7f5d3SJohn Marino /* mpn_hgcd2 has failed. Then either one of a or b is very
107*86d7f5d3SJohn Marino small, or the difference is very small. Perform one
108*86d7f5d3SJohn Marino subtraction followed by one division. */
109*86d7f5d3SJohn Marino mp_size_t gn;
110*86d7f5d3SJohn Marino mp_size_t updated_un = un;
111*86d7f5d3SJohn Marino
112*86d7f5d3SJohn Marino /* Temporary storage n for the quotient and ualloc for the
113*86d7f5d3SJohn Marino new cofactor. */
114*86d7f5d3SJohn Marino n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
115*86d7f5d3SJohn Marino u0, u1, &updated_un, tp, u2);
116*86d7f5d3SJohn Marino if (n == 0)
117*86d7f5d3SJohn Marino return gn;
118*86d7f5d3SJohn Marino
119*86d7f5d3SJohn Marino un = updated_un;
120*86d7f5d3SJohn Marino }
121*86d7f5d3SJohn Marino }
122*86d7f5d3SJohn Marino ASSERT_ALWAYS (ap[0] > 0);
123*86d7f5d3SJohn Marino ASSERT_ALWAYS (bp[0] > 0);
124*86d7f5d3SJohn Marino
125*86d7f5d3SJohn Marino if (ap[0] == bp[0])
126*86d7f5d3SJohn Marino {
127*86d7f5d3SJohn Marino int c;
128*86d7f5d3SJohn Marino
129*86d7f5d3SJohn Marino /* Which cofactor to return now? Candidates are +u1 and -u0,
130*86d7f5d3SJohn Marino depending on which of a and b was most recently reduced,
131*86d7f5d3SJohn Marino which we don't keep track of. So compare and get the smallest
132*86d7f5d3SJohn Marino one. */
133*86d7f5d3SJohn Marino
134*86d7f5d3SJohn Marino gp[0] = ap[0];
135*86d7f5d3SJohn Marino
136*86d7f5d3SJohn Marino MPN_CMP (c, u0, u1, un);
137*86d7f5d3SJohn Marino ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
138*86d7f5d3SJohn Marino if (c < 0)
139*86d7f5d3SJohn Marino {
140*86d7f5d3SJohn Marino MPN_NORMALIZE (u0, un);
141*86d7f5d3SJohn Marino MPN_COPY (up, u0, un);
142*86d7f5d3SJohn Marino *usize = -un;
143*86d7f5d3SJohn Marino }
144*86d7f5d3SJohn Marino else
145*86d7f5d3SJohn Marino {
146*86d7f5d3SJohn Marino MPN_NORMALIZE_NOT_ZERO (u1, un);
147*86d7f5d3SJohn Marino MPN_COPY (up, u1, un);
148*86d7f5d3SJohn Marino *usize = un;
149*86d7f5d3SJohn Marino }
150*86d7f5d3SJohn Marino return 1;
151*86d7f5d3SJohn Marino }
152*86d7f5d3SJohn Marino else
153*86d7f5d3SJohn Marino {
154*86d7f5d3SJohn Marino mp_limb_t uh, vh;
155*86d7f5d3SJohn Marino mp_limb_signed_t u;
156*86d7f5d3SJohn Marino mp_limb_signed_t v;
157*86d7f5d3SJohn Marino int negate;
158*86d7f5d3SJohn Marino
159*86d7f5d3SJohn Marino gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
160*86d7f5d3SJohn Marino
161*86d7f5d3SJohn Marino /* Set up = u u1 - v u0. Keep track of size, un grows by one or
162*86d7f5d3SJohn Marino two limbs. */
163*86d7f5d3SJohn Marino
164*86d7f5d3SJohn Marino if (u == 0)
165*86d7f5d3SJohn Marino {
166*86d7f5d3SJohn Marino ASSERT (v == 1);
167*86d7f5d3SJohn Marino MPN_NORMALIZE (u0, un);
168*86d7f5d3SJohn Marino MPN_COPY (up, u0, un);
169*86d7f5d3SJohn Marino *usize = -un;
170*86d7f5d3SJohn Marino return 1;
171*86d7f5d3SJohn Marino }
172*86d7f5d3SJohn Marino else if (v == 0)
173*86d7f5d3SJohn Marino {
174*86d7f5d3SJohn Marino ASSERT (u == 1);
175*86d7f5d3SJohn Marino MPN_NORMALIZE (u1, un);
176*86d7f5d3SJohn Marino MPN_COPY (up, u1, un);
177*86d7f5d3SJohn Marino *usize = un;
178*86d7f5d3SJohn Marino return 1;
179*86d7f5d3SJohn Marino }
180*86d7f5d3SJohn Marino else if (u > 0)
181*86d7f5d3SJohn Marino {
182*86d7f5d3SJohn Marino negate = 0;
183*86d7f5d3SJohn Marino ASSERT (v < 0);
184*86d7f5d3SJohn Marino v = -v;
185*86d7f5d3SJohn Marino }
186*86d7f5d3SJohn Marino else
187*86d7f5d3SJohn Marino {
188*86d7f5d3SJohn Marino negate = 1;
189*86d7f5d3SJohn Marino ASSERT (v > 0);
190*86d7f5d3SJohn Marino u = -u;
191*86d7f5d3SJohn Marino }
192*86d7f5d3SJohn Marino
193*86d7f5d3SJohn Marino uh = mpn_mul_1 (up, u1, un, u);
194*86d7f5d3SJohn Marino vh = mpn_addmul_1 (up, u0, un, v);
195*86d7f5d3SJohn Marino
196*86d7f5d3SJohn Marino if ( (uh | vh) > 0)
197*86d7f5d3SJohn Marino {
198*86d7f5d3SJohn Marino uh += vh;
199*86d7f5d3SJohn Marino up[un++] = uh;
200*86d7f5d3SJohn Marino if (uh < vh)
201*86d7f5d3SJohn Marino up[un++] = 1;
202*86d7f5d3SJohn Marino }
203*86d7f5d3SJohn Marino
204*86d7f5d3SJohn Marino MPN_NORMALIZE_NOT_ZERO (up, un);
205*86d7f5d3SJohn Marino
206*86d7f5d3SJohn Marino *usize = negate ? -un : un;
207*86d7f5d3SJohn Marino return 1;
208*86d7f5d3SJohn Marino }
209*86d7f5d3SJohn Marino }
210