1 /* jacobi.c 2 3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 6 7 Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation, 8 Inc. 9 10 This file is part of the GNU MP Library. 11 12 The GNU MP Library is free software; you can redistribute it and/or modify 13 it under the terms of either: 14 15 * the GNU Lesser General Public License as published by the Free 16 Software Foundation; either version 3 of the License, or (at your 17 option) any later version. 18 19 or 20 21 * the GNU General Public License as published by the Free Software 22 Foundation; either version 2 of the License, or (at your option) any 23 later version. 24 25 or both in parallel, as here. 26 27 The GNU MP Library is distributed in the hope that it will be useful, but 28 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 29 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 30 for more details. 31 32 You should have received copies of the GNU General Public License and the 33 GNU Lesser General Public License along with the GNU MP Library. If not, 34 see https://www.gnu.org/licenses/. */ 35 36 #include "gmp-impl.h" 37 #include "longlong.h" 38 39 #ifndef JACOBI_DC_THRESHOLD 40 #define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD 41 #endif 42 43 /* Schönhage's rules: 44 * 45 * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3 46 * 47 * If r1 is odd, then 48 * 49 * (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1) 50 * 51 * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)]. 52 * 53 * If r1 is even, r2 must be odd. We have 54 * 55 * (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0) 56 * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1) 57 * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1) 58 * 59 * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating 60 * q1 times gives 61 * 62 * (r1 | r0) = (r1 | r2) = (r3 | r2) 63 * 64 * On the other hand, if r1 = 2 (mod 4), the sign factor is 65 * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent 66 * 67 * (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2 68 * = q1 (r0-1)/2 + q1 (q1-1)/2 69 * 70 * and we can summarize the even case as 71 * 72 * (r1 | r0) = t(r1, r0, q1) (r3 | r2) 73 * 74 * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)} 75 * 76 * What about termination? The remainder sequence ends with (0|1) = 1 77 * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is 78 * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and 79 * hence non-zero. We may have r3 = r1 - q2 r2 = 0. 80 * 81 * Examples: (11|15) = - (15|11) = - (4|11) 82 * (4|11) = (4| 3) = (1| 3) 83 * (1| 3) = (3|1) = (0|1) = 1 84 * 85 * (2|7) = (2|1) = (0|1) = 1 86 * 87 * Detail: (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5) 88 * (2|5) = (2-5|5) = (-1|5)(3|5) = (5|3) = (2|3) 89 * (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1) 90 * 91 */ 92 93 /* In principle, the state consists of four variables: e (one bit), a, 94 b (two bits each), d (one bit). Collected factors are (-1)^e. a and 95 b are the least significant bits of the current remainders. d 96 (denominator) is 0 if we're currently subtracting multiplies of a 97 from b, and 1 if we're subtracting b from a. 98 99 e is stored in the least significant bit, while a, b and d are 100 coded as only 13 distinct values in bits 1-4, according to the 101 following table. For rows not mentioning d, the value is either 102 implied, or it doesn't matter. */ 103 104 #if WANT_ASSERT 105 static const struct 106 { 107 unsigned char a; 108 unsigned char b; 109 } decode_table[13] = { 110 /* 0 */ { 0, 1 }, 111 /* 1 */ { 0, 3 }, 112 /* 2 */ { 1, 1 }, 113 /* 3 */ { 1, 3 }, 114 /* 4 */ { 2, 1 }, 115 /* 5 */ { 2, 3 }, 116 /* 6 */ { 3, 1 }, 117 /* 7 */ { 3, 3 }, /* d = 1 */ 118 /* 8 */ { 1, 0 }, 119 /* 9 */ { 1, 2 }, 120 /* 10 */ { 3, 0 }, 121 /* 11 */ { 3, 2 }, 122 /* 12 */ { 3, 3 }, /* d = 0 */ 123 }; 124 #define JACOBI_A(bits) (decode_table[(bits)>>1].a) 125 #define JACOBI_B(bits) (decode_table[(bits)>>1].b) 126 #endif /* WANT_ASSERT */ 127 128 const unsigned char jacobi_table[208] = { 129 #include "jacobitab.h" 130 }; 131 132 #define BITS_FAIL 31 133 134 static void 135 jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn, 136 mp_srcptr qp, mp_size_t qn, int d) 137 { 138 unsigned *bitsp = (unsigned *) p; 139 140 if (gp) 141 { 142 ASSERT (gn > 0); 143 if (gn != 1 || gp[0] != 1) 144 { 145 *bitsp = BITS_FAIL; 146 return; 147 } 148 } 149 150 if (qp) 151 { 152 ASSERT (qn > 0); 153 ASSERT (d >= 0); 154 *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3); 155 } 156 } 157 158 #define CHOOSE_P(n) (2*(n) / 3) 159 160 int 161 mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits) 162 { 163 mp_size_t scratch; 164 mp_size_t matrix_scratch; 165 mp_ptr tp; 166 167 TMP_DECL; 168 169 ASSERT (n > 0); 170 ASSERT ( (ap[n-1] | bp[n-1]) > 0); 171 ASSERT ( (bp[0] | ap[0]) & 1); 172 173 /* FIXME: Check for small sizes first, before setting up temporary 174 storage etc. */ 175 scratch = MPN_GCD_SUBDIV_STEP_ITCH(n); 176 177 if (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD)) 178 { 179 mp_size_t hgcd_scratch; 180 mp_size_t update_scratch; 181 mp_size_t p = CHOOSE_P (n); 182 mp_size_t dc_scratch; 183 184 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 185 hgcd_scratch = mpn_hgcd_itch (n - p); 186 update_scratch = p + n - 1; 187 188 dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); 189 if (dc_scratch > scratch) 190 scratch = dc_scratch; 191 } 192 193 TMP_MARK; 194 tp = TMP_ALLOC_LIMBS(scratch); 195 196 while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD)) 197 { 198 struct hgcd_matrix M; 199 mp_size_t p = 2*n/3; 200 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 201 mp_size_t nn; 202 mpn_hgcd_matrix_init (&M, n - p, tp); 203 204 nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits, 205 tp + matrix_scratch); 206 if (nn > 0) 207 { 208 ASSERT (M.n <= (n - p - 1)/2); 209 ASSERT (M.n + p <= (p + n - 1) / 2); 210 /* Temporary storage 2 (p + M->n) <= p + n - 1. */ 211 n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch); 212 } 213 else 214 { 215 /* Temporary storage n */ 216 n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp); 217 if (!n) 218 { 219 TMP_FREE; 220 return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); 221 } 222 } 223 } 224 225 while (n > 2) 226 { 227 struct hgcd_matrix1 M; 228 mp_limb_t ah, al, bh, bl; 229 mp_limb_t mask; 230 231 mask = ap[n-1] | bp[n-1]; 232 ASSERT (mask > 0); 233 234 if (mask & GMP_NUMB_HIGHBIT) 235 { 236 ah = ap[n-1]; al = ap[n-2]; 237 bh = bp[n-1]; bl = bp[n-2]; 238 } 239 else 240 { 241 int shift; 242 243 count_leading_zeros (shift, mask); 244 ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); 245 al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); 246 bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); 247 bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); 248 } 249 250 /* Try an mpn_nhgcd2 step */ 251 if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits)) 252 { 253 n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n); 254 MP_PTR_SWAP (ap, tp); 255 } 256 else 257 { 258 /* mpn_hgcd2 has failed. Then either one of a or b is very 259 small, or the difference is very small. Perform one 260 subtraction followed by one division. */ 261 n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp); 262 if (!n) 263 { 264 TMP_FREE; 265 return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); 266 } 267 } 268 } 269 270 if (bits >= 16) 271 MP_PTR_SWAP (ap, bp); 272 273 ASSERT (bp[0] & 1); 274 275 if (n == 1) 276 { 277 mp_limb_t al, bl; 278 al = ap[0]; 279 bl = bp[0]; 280 281 TMP_FREE; 282 if (bl == 1) 283 return 1 - 2*(bits & 1); 284 else 285 return mpn_jacobi_base (al, bl, bits << 1); 286 } 287 288 else 289 { 290 int res = mpn_jacobi_2 (ap, bp, bits & 1); 291 TMP_FREE; 292 return res; 293 } 294 } 295