1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers. 2 3 Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software 4 Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of either: 10 11 * the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your 13 option) any later version. 14 15 or 16 17 * the GNU General Public License as published by the Free Software 18 Foundation; either version 2 of the License, or (at your option) any 19 later version. 20 21 or both in parallel, as here. 22 23 The GNU MP Library is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 for more details. 27 28 You should have received copies of the GNU General Public License and the 29 GNU Lesser General Public License along with the GNU MP Library. If not, 30 see https://www.gnu.org/licenses/. */ 31 32 #include "gmp-impl.h" 33 #include "longlong.h" 34 35 /* Uses the HGCD operation described in 36 37 N. Möller, On Schönhage's algorithm and subquadratic integer gcd 38 computation, Math. Comp. 77 (2008), 589-607. 39 40 to reduce inputs until they are of size below GCD_DC_THRESHOLD, and 41 then uses Lehmer's algorithm. 42 */ 43 44 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n + 45 * 2)/3, which gives a balanced multiplication in 46 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better 47 * performance. The matrix-vector multiplication is then 48 * 4:1-unbalanced, with matrix elements of size n/6, and vector 49 * elements of size p = 2n/3. */ 50 51 /* From analysis of the theoretical running time, it appears that when 52 * multiplication takes time O(n^alpha), p should be chosen so that 53 * the ratio of the time for the mpn_hgcd call, and the time for the 54 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha - 55 * 1). */ 56 #ifdef TUNE_GCD_P 57 #define P_TABLE_SIZE 10000 58 mp_size_t p_table[P_TABLE_SIZE]; 59 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3) 60 #else 61 #define CHOOSE_P(n) (2*(n) / 3) 62 #endif 63 64 struct gcd_ctx 65 { 66 mp_ptr gp; 67 mp_size_t gn; 68 }; 69 70 static void 71 gcd_hook (void *p, mp_srcptr gp, mp_size_t gn, 72 mp_srcptr qp, mp_size_t qn, int d) 73 { 74 struct gcd_ctx *ctx = (struct gcd_ctx *) p; 75 MPN_COPY (ctx->gp, gp, gn); 76 ctx->gn = gn; 77 } 78 79 mp_size_t 80 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n) 81 { 82 mp_size_t talloc; 83 mp_size_t scratch; 84 mp_size_t matrix_scratch; 85 86 struct gcd_ctx ctx; 87 mp_ptr tp; 88 TMP_DECL; 89 90 ASSERT (usize >= n); 91 ASSERT (n > 0); 92 ASSERT (vp[n-1] > 0); 93 94 /* FIXME: Check for small sizes first, before setting up temporary 95 storage etc. */ 96 talloc = MPN_GCD_SUBDIV_STEP_ITCH(n); 97 98 /* For initial division */ 99 scratch = usize - n + 1; 100 if (scratch > talloc) 101 talloc = scratch; 102 103 #if TUNE_GCD_P 104 if (CHOOSE_P (n) > 0) 105 #else 106 if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) 107 #endif 108 { 109 mp_size_t hgcd_scratch; 110 mp_size_t update_scratch; 111 mp_size_t p = CHOOSE_P (n); 112 mp_size_t scratch; 113 #if TUNE_GCD_P 114 /* Worst case, since we don't guarantee that n - CHOOSE_P(n) 115 is increasing */ 116 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n); 117 hgcd_scratch = mpn_hgcd_itch (n); 118 update_scratch = 2*(n - 1); 119 #else 120 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 121 hgcd_scratch = mpn_hgcd_itch (n - p); 122 update_scratch = p + n - 1; 123 #endif 124 scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); 125 if (scratch > talloc) 126 talloc = scratch; 127 } 128 129 TMP_MARK; 130 tp = TMP_ALLOC_LIMBS(talloc); 131 132 if (usize > n) 133 { 134 mpn_tdiv_qr (tp, up, 0, up, usize, vp, n); 135 136 if (mpn_zero_p (up, n)) 137 { 138 MPN_COPY (gp, vp, n); 139 ctx.gn = n; 140 goto done; 141 } 142 } 143 144 ctx.gp = gp; 145 146 #if TUNE_GCD_P 147 while (CHOOSE_P (n) > 0) 148 #else 149 while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) 150 #endif 151 { 152 struct hgcd_matrix M; 153 mp_size_t p = CHOOSE_P (n); 154 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 155 mp_size_t nn; 156 mpn_hgcd_matrix_init (&M, n - p, tp); 157 nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch); 158 if (nn > 0) 159 { 160 ASSERT (M.n <= (n - p - 1)/2); 161 ASSERT (M.n + p <= (p + n - 1) / 2); 162 /* Temporary storage 2 (p + M->n) <= p + n - 1. */ 163 n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch); 164 } 165 else 166 { 167 /* Temporary storage n */ 168 n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp); 169 if (n == 0) 170 goto done; 171 } 172 } 173 174 while (n > 2) 175 { 176 struct hgcd_matrix1 M; 177 mp_limb_t uh, ul, vh, vl; 178 mp_limb_t mask; 179 180 mask = up[n-1] | vp[n-1]; 181 ASSERT (mask > 0); 182 183 if (mask & GMP_NUMB_HIGHBIT) 184 { 185 uh = up[n-1]; ul = up[n-2]; 186 vh = vp[n-1]; vl = vp[n-2]; 187 } 188 else 189 { 190 int shift; 191 192 count_leading_zeros (shift, mask); 193 uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]); 194 ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]); 195 vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]); 196 vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]); 197 } 198 199 /* Try an mpn_hgcd2 step */ 200 if (mpn_hgcd2 (uh, ul, vh, vl, &M)) 201 { 202 n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n); 203 MP_PTR_SWAP (up, tp); 204 } 205 else 206 { 207 /* mpn_hgcd2 has failed. Then either one of a or b is very 208 small, or the difference is very small. Perform one 209 subtraction followed by one division. */ 210 211 /* Temporary storage n */ 212 n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp); 213 if (n == 0) 214 goto done; 215 } 216 } 217 218 ASSERT(up[n-1] | vp[n-1]); 219 220 /* Due to the calling convention for mpn_gcd, at most one can be even. */ 221 if ((up[0] & 1) == 0) 222 MP_PTR_SWAP (up, vp); 223 ASSERT ((up[0] & 1) != 0); 224 225 { 226 mp_limb_t u0, u1, v0, v1; 227 mp_double_limb_t g; 228 229 u0 = up[0]; 230 v0 = vp[0]; 231 232 if (n == 1) 233 { 234 int cnt; 235 count_trailing_zeros (cnt, v0); 236 *gp = mpn_gcd_11 (u0, v0 >> cnt); 237 ctx.gn = 1; 238 goto done; 239 } 240 241 v1 = vp[1]; 242 if (UNLIKELY (v0 == 0)) 243 { 244 v0 = v1; 245 v1 = 0; 246 /* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could 247 when this situation occurs internally. */ 248 } 249 if ((v0 & 1) == 0) 250 { 251 int cnt; 252 count_trailing_zeros (cnt, v0); 253 v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt); 254 v1 >>= cnt; 255 } 256 257 u1 = up[1]; 258 g = mpn_gcd_22 (u1, u0, v1, v0); 259 gp[0] = g.d0; 260 gp[1] = g.d1; 261 ctx.gn = 1 + (g.d1 > 0); 262 } 263 done: 264 TMP_FREE; 265 return ctx.gn; 266 } 267