xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/gcd.c (revision a24efa7dea9f1f56c3bdb15a927d3516792ace1c)
1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
2 
3 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
4 2004, 2005, 2008, 2010, 2012 Free Software Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20 
21 #include "gmp.h"
22 #include "gmp-impl.h"
23 #include "longlong.h"
24 
25 /* Uses the HGCD operation described in
26 
27      N. M�ller, On Sch�nhage's algorithm and subquadratic integer gcd
28      computation, Math. Comp. 77 (2008), 589-607.
29 
30   to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
31   then uses Lehmer's algorithm.
32 */
33 
34 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
35  * 2)/3, which gives a balanced multiplication in
36  * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
37  * performance. The matrix-vector multiplication is then
38  * 4:1-unbalanced, with matrix elements of size n/6, and vector
39  * elements of size p = 2n/3. */
40 
41 /* From analysis of the theoretical running time, it appears that when
42  * multiplication takes time O(n^alpha), p should be chosen so that
43  * the ratio of the time for the mpn_hgcd call, and the time for the
44  * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
45  * 1). */
46 #ifdef TUNE_GCD_P
47 #define P_TABLE_SIZE 10000
48 mp_size_t p_table[P_TABLE_SIZE];
49 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
50 #else
51 #define CHOOSE_P(n) (2*(n) / 3)
52 #endif
53 
54 struct gcd_ctx
55 {
56   mp_ptr gp;
57   mp_size_t gn;
58 };
59 
60 static void
61 gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
62 	  mp_srcptr qp, mp_size_t qn, int d)
63 {
64   struct gcd_ctx *ctx = (struct gcd_ctx *) p;
65   MPN_COPY (ctx->gp, gp, gn);
66   ctx->gn = gn;
67 }
68 
69 #if GMP_NAIL_BITS > 0
70 /* Nail supports should be easy, replacing the sub_ddmmss with nails
71  * logic. */
72 #error Nails not supported.
73 #endif
74 
75 /* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
76    Both U and V must be odd. */
77 static inline mp_size_t
78 gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
79 {
80   mp_limb_t u0, u1, v0, v1;
81   mp_size_t gn;
82 
83   u0 = up[0];
84   u1 = up[1];
85   v0 = vp[0];
86   v1 = vp[1];
87 
88   ASSERT (u0 & 1);
89   ASSERT (v0 & 1);
90 
91   /* Check for u0 != v0 needed to ensure that argument to
92    * count_trailing_zeros is non-zero. */
93   while (u1 != v1 && u0 != v0)
94     {
95       unsigned long int r;
96       if (u1 > v1)
97 	{
98 	  sub_ddmmss (u1, u0, u1, u0, v1, v0);
99 	  count_trailing_zeros (r, u0);
100 	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
101 	  u1 >>= r;
102 	}
103       else  /* u1 < v1.  */
104 	{
105 	  sub_ddmmss (v1, v0, v1, v0, u1, u0);
106 	  count_trailing_zeros (r, v0);
107 	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
108 	  v1 >>= r;
109 	}
110     }
111 
112   gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
113 
114   /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
115   if (u1 == v1 && u0 == v0)
116     return gn;
117 
118   v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
119   gp[0] = mpn_gcd_1 (gp, gn, v0);
120 
121   return 1;
122 }
123 
124 mp_size_t
125 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
126 {
127   mp_size_t talloc;
128   mp_size_t scratch;
129   mp_size_t matrix_scratch;
130 
131   struct gcd_ctx ctx;
132   mp_ptr tp;
133   TMP_DECL;
134 
135   ASSERT (usize >= n);
136   ASSERT (n > 0);
137   ASSERT (vp[n-1] > 0);
138 
139   /* FIXME: Check for small sizes first, before setting up temporary
140      storage etc. */
141   talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
142 
143   /* For initial division */
144   scratch = usize - n + 1;
145   if (scratch > talloc)
146     talloc = scratch;
147 
148 #if TUNE_GCD_P
149   if (CHOOSE_P (n) > 0)
150 #else
151   if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
152 #endif
153     {
154       mp_size_t hgcd_scratch;
155       mp_size_t update_scratch;
156       mp_size_t p = CHOOSE_P (n);
157       mp_size_t scratch;
158 #if TUNE_GCD_P
159       /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
160 	 is increasing */
161       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
162       hgcd_scratch = mpn_hgcd_itch (n);
163       update_scratch = 2*(n - 1);
164 #else
165       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
166       hgcd_scratch = mpn_hgcd_itch (n - p);
167       update_scratch = p + n - 1;
168 #endif
169       scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
170       if (scratch > talloc)
171 	talloc = scratch;
172     }
173 
174   TMP_MARK;
175   tp = TMP_ALLOC_LIMBS(talloc);
176 
177   if (usize > n)
178     {
179       mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
180 
181       if (mpn_zero_p (up, n))
182 	{
183 	  MPN_COPY (gp, vp, n);
184 	  ctx.gn = n;
185 	  goto done;
186 	}
187     }
188 
189   ctx.gp = gp;
190 
191 #if TUNE_GCD_P
192   while (CHOOSE_P (n) > 0)
193 #else
194   while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
195 #endif
196     {
197       struct hgcd_matrix M;
198       mp_size_t p = CHOOSE_P (n);
199       mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
200       mp_size_t nn;
201       mpn_hgcd_matrix_init (&M, n - p, tp);
202       nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
203       if (nn > 0)
204 	{
205 	  ASSERT (M.n <= (n - p - 1)/2);
206 	  ASSERT (M.n + p <= (p + n - 1) / 2);
207 	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
208 	  n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
209 	}
210       else
211 	{
212 	  /* Temporary storage n */
213 	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
214 	  if (n == 0)
215 	    goto done;
216 	}
217     }
218 
219   while (n > 2)
220     {
221       struct hgcd_matrix1 M;
222       mp_limb_t uh, ul, vh, vl;
223       mp_limb_t mask;
224 
225       mask = up[n-1] | vp[n-1];
226       ASSERT (mask > 0);
227 
228       if (mask & GMP_NUMB_HIGHBIT)
229 	{
230 	  uh = up[n-1]; ul = up[n-2];
231 	  vh = vp[n-1]; vl = vp[n-2];
232 	}
233       else
234 	{
235 	  int shift;
236 
237 	  count_leading_zeros (shift, mask);
238 	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
239 	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
240 	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
241 	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
242 	}
243 
244       /* Try an mpn_hgcd2 step */
245       if (mpn_hgcd2 (uh, ul, vh, vl, &M))
246 	{
247 	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
248 	  MP_PTR_SWAP (up, tp);
249 	}
250       else
251 	{
252 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
253 	     small, or the difference is very small. Perform one
254 	     subtraction followed by one division. */
255 
256 	  /* Temporary storage n */
257 	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
258 	  if (n == 0)
259 	    goto done;
260 	}
261     }
262 
263   ASSERT(up[n-1] | vp[n-1]);
264 
265   if (n == 1)
266     {
267       *gp = mpn_gcd_1(up, 1, vp[0]);
268       ctx.gn = 1;
269       goto done;
270     }
271 
272   /* Due to the calling convention for mpn_gcd, at most one can be
273      even. */
274 
275   if (! (up[0] & 1))
276     MP_PTR_SWAP (up, vp);
277 
278   ASSERT (up[0] & 1);
279 
280   if (vp[0] == 0)
281     {
282       *gp = mpn_gcd_1 (up, 2, vp[1]);
283       ctx.gn = 1;
284       goto done;
285     }
286   else if (! (vp[0] & 1))
287     {
288       int r;
289       count_trailing_zeros (r, vp[0]);
290       vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
291       vp[1] >>= r;
292     }
293 
294   ctx.gn = gcd_2(gp, up, vp);
295 
296 done:
297   TMP_FREE;
298   return ctx.gn;
299 }
300