1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers. 2 3 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 4 2004, 2005, 2008, 2010, 2012 Free Software Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MP Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21 #include "gmp.h" 22 #include "gmp-impl.h" 23 #include "longlong.h" 24 25 /* Uses the HGCD operation described in 26 27 N. M�ller, On Sch�nhage's algorithm and subquadratic integer gcd 28 computation, Math. Comp. 77 (2008), 589-607. 29 30 to reduce inputs until they are of size below GCD_DC_THRESHOLD, and 31 then uses Lehmer's algorithm. 32 */ 33 34 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n + 35 * 2)/3, which gives a balanced multiplication in 36 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better 37 * performance. The matrix-vector multiplication is then 38 * 4:1-unbalanced, with matrix elements of size n/6, and vector 39 * elements of size p = 2n/3. */ 40 41 /* From analysis of the theoretical running time, it appears that when 42 * multiplication takes time O(n^alpha), p should be chosen so that 43 * the ratio of the time for the mpn_hgcd call, and the time for the 44 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha - 45 * 1). */ 46 #ifdef TUNE_GCD_P 47 #define P_TABLE_SIZE 10000 48 mp_size_t p_table[P_TABLE_SIZE]; 49 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3) 50 #else 51 #define CHOOSE_P(n) (2*(n) / 3) 52 #endif 53 54 struct gcd_ctx 55 { 56 mp_ptr gp; 57 mp_size_t gn; 58 }; 59 60 static void 61 gcd_hook (void *p, mp_srcptr gp, mp_size_t gn, 62 mp_srcptr qp, mp_size_t qn, int d) 63 { 64 struct gcd_ctx *ctx = (struct gcd_ctx *) p; 65 MPN_COPY (ctx->gp, gp, gn); 66 ctx->gn = gn; 67 } 68 69 #if GMP_NAIL_BITS > 0 70 /* Nail supports should be easy, replacing the sub_ddmmss with nails 71 * logic. */ 72 #error Nails not supported. 73 #endif 74 75 /* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2. 76 Both U and V must be odd. */ 77 static inline mp_size_t 78 gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp) 79 { 80 mp_limb_t u0, u1, v0, v1; 81 mp_size_t gn; 82 83 u0 = up[0]; 84 u1 = up[1]; 85 v0 = vp[0]; 86 v1 = vp[1]; 87 88 ASSERT (u0 & 1); 89 ASSERT (v0 & 1); 90 91 /* Check for u0 != v0 needed to ensure that argument to 92 * count_trailing_zeros is non-zero. */ 93 while (u1 != v1 && u0 != v0) 94 { 95 unsigned long int r; 96 if (u1 > v1) 97 { 98 sub_ddmmss (u1, u0, u1, u0, v1, v0); 99 count_trailing_zeros (r, u0); 100 u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r); 101 u1 >>= r; 102 } 103 else /* u1 < v1. */ 104 { 105 sub_ddmmss (v1, v0, v1, v0, u1, u0); 106 count_trailing_zeros (r, v0); 107 v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r); 108 v1 >>= r; 109 } 110 } 111 112 gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0); 113 114 /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */ 115 if (u1 == v1 && u0 == v0) 116 return gn; 117 118 v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0); 119 gp[0] = mpn_gcd_1 (gp, gn, v0); 120 121 return 1; 122 } 123 124 mp_size_t 125 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n) 126 { 127 mp_size_t talloc; 128 mp_size_t scratch; 129 mp_size_t matrix_scratch; 130 131 struct gcd_ctx ctx; 132 mp_ptr tp; 133 TMP_DECL; 134 135 ASSERT (usize >= n); 136 ASSERT (n > 0); 137 ASSERT (vp[n-1] > 0); 138 139 /* FIXME: Check for small sizes first, before setting up temporary 140 storage etc. */ 141 talloc = MPN_GCD_SUBDIV_STEP_ITCH(n); 142 143 /* For initial division */ 144 scratch = usize - n + 1; 145 if (scratch > talloc) 146 talloc = scratch; 147 148 #if TUNE_GCD_P 149 if (CHOOSE_P (n) > 0) 150 #else 151 if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) 152 #endif 153 { 154 mp_size_t hgcd_scratch; 155 mp_size_t update_scratch; 156 mp_size_t p = CHOOSE_P (n); 157 mp_size_t scratch; 158 #if TUNE_GCD_P 159 /* Worst case, since we don't guarantee that n - CHOOSE_P(n) 160 is increasing */ 161 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n); 162 hgcd_scratch = mpn_hgcd_itch (n); 163 update_scratch = 2*(n - 1); 164 #else 165 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 166 hgcd_scratch = mpn_hgcd_itch (n - p); 167 update_scratch = p + n - 1; 168 #endif 169 scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); 170 if (scratch > talloc) 171 talloc = scratch; 172 } 173 174 TMP_MARK; 175 tp = TMP_ALLOC_LIMBS(talloc); 176 177 if (usize > n) 178 { 179 mpn_tdiv_qr (tp, up, 0, up, usize, vp, n); 180 181 if (mpn_zero_p (up, n)) 182 { 183 MPN_COPY (gp, vp, n); 184 ctx.gn = n; 185 goto done; 186 } 187 } 188 189 ctx.gp = gp; 190 191 #if TUNE_GCD_P 192 while (CHOOSE_P (n) > 0) 193 #else 194 while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) 195 #endif 196 { 197 struct hgcd_matrix M; 198 mp_size_t p = CHOOSE_P (n); 199 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 200 mp_size_t nn; 201 mpn_hgcd_matrix_init (&M, n - p, tp); 202 nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch); 203 if (nn > 0) 204 { 205 ASSERT (M.n <= (n - p - 1)/2); 206 ASSERT (M.n + p <= (p + n - 1) / 2); 207 /* Temporary storage 2 (p + M->n) <= p + n - 1. */ 208 n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch); 209 } 210 else 211 { 212 /* Temporary storage n */ 213 n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp); 214 if (n == 0) 215 goto done; 216 } 217 } 218 219 while (n > 2) 220 { 221 struct hgcd_matrix1 M; 222 mp_limb_t uh, ul, vh, vl; 223 mp_limb_t mask; 224 225 mask = up[n-1] | vp[n-1]; 226 ASSERT (mask > 0); 227 228 if (mask & GMP_NUMB_HIGHBIT) 229 { 230 uh = up[n-1]; ul = up[n-2]; 231 vh = vp[n-1]; vl = vp[n-2]; 232 } 233 else 234 { 235 int shift; 236 237 count_leading_zeros (shift, mask); 238 uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]); 239 ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]); 240 vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]); 241 vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]); 242 } 243 244 /* Try an mpn_hgcd2 step */ 245 if (mpn_hgcd2 (uh, ul, vh, vl, &M)) 246 { 247 n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n); 248 MP_PTR_SWAP (up, tp); 249 } 250 else 251 { 252 /* mpn_hgcd2 has failed. Then either one of a or b is very 253 small, or the difference is very small. Perform one 254 subtraction followed by one division. */ 255 256 /* Temporary storage n */ 257 n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp); 258 if (n == 0) 259 goto done; 260 } 261 } 262 263 ASSERT(up[n-1] | vp[n-1]); 264 265 if (n == 1) 266 { 267 *gp = mpn_gcd_1(up, 1, vp[0]); 268 ctx.gn = 1; 269 goto done; 270 } 271 272 /* Due to the calling convention for mpn_gcd, at most one can be 273 even. */ 274 275 if (! (up[0] & 1)) 276 MP_PTR_SWAP (up, vp); 277 278 ASSERT (up[0] & 1); 279 280 if (vp[0] == 0) 281 { 282 *gp = mpn_gcd_1 (up, 2, vp[1]); 283 ctx.gn = 1; 284 goto done; 285 } 286 else if (! (vp[0] & 1)) 287 { 288 int r; 289 count_trailing_zeros (r, vp[0]); 290 vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r); 291 vp[1] >>= r; 292 } 293 294 ctx.gn = gcd_2(gp, up, vp); 295 296 done: 297 TMP_FREE; 298 return ctx.gn; 299 } 300