1 /* jacobi.c 2 3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 6 7 Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2010, 2011 Free Software 8 Foundation, Inc. 9 10 This file is part of the GNU MP Library. 11 12 The GNU MP Library is free software; you can redistribute it and/or modify 13 it under the terms of the GNU Lesser General Public License as published by 14 the Free Software Foundation; either version 3 of the License, or (at your 15 option) any later version. 16 17 The GNU MP Library is distributed in the hope that it will be useful, but 18 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 19 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 20 License for more details. 21 22 You should have received a copy of the GNU Lesser General Public License 23 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 24 25 #include "gmp.h" 26 #include "gmp-impl.h" 27 #include "longlong.h" 28 29 #ifndef JACOBI_DC_THRESHOLD 30 #define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD 31 #endif 32 33 /* Schönhage's rules: 34 * 35 * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3 36 * 37 * If r1 is odd, then 38 * 39 * (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1) 40 * 41 * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)]. 42 * 43 * If r1 is even, r2 must be odd. We have 44 * 45 * (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0) 46 * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1) 47 * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1) 48 * 49 * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating 50 * q1 times gives 51 * 52 * (r1 | r0) = (r1 | r2) = (r3 | r2) 53 * 54 * On the other hand, if r1 = 2 (mod 4), the sign factor is 55 * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent 56 * 57 * (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2 58 * = q1 (r0-1)/2 + q1 (q1-1)/2 59 * 60 * and we can summarize the even case as 61 * 62 * (r1 | r0) = t(r1, r0, q1) (r3 | r2) 63 * 64 * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)} 65 * 66 * What about termination? The remainder sequence ends with (0|1) = 1 67 * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is 68 * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and 69 * hence non-zero. We may have r3 = r1 - q2 r2 = 0. 70 * 71 * Examples: (11|15) = - (15|11) = - (4|11) 72 * (4|11) = (4| 3) = (1| 3) 73 * (1| 3) = (3|1) = (0|1) = 1 74 * 75 * (2|7) = (2|1) = (0|1) = 1 76 * 77 * Detail: (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5) 78 * (2|5) = (2-5|5) = (-1|5)(3|5) = (5|3) = (2|3) 79 * (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1) 80 * 81 */ 82 83 /* In principle, the state consists of four variables: e (one bit), a, 84 b (two bits each), d (one bit). Collected factors are (-1)^e. a and 85 b are the least significant bits of the current remainders. d 86 (denominator) is 0 if we're currently subtracting multiplies of a 87 from b, and 1 if we're subtracting b from a. 88 89 e is stored in the least significant bit, while a, b and d are 90 coded as only 13 distinct values in bits 1-4, according to the 91 following table. For rows not mentioning d, the value is either 92 implied, or it doesn't matter. */ 93 94 #if WANT_ASSERT 95 static const struct 96 { 97 unsigned char a; 98 unsigned char b; 99 } decode_table[13] = { 100 /* 0 */ { 0, 1 }, 101 /* 1 */ { 0, 3 }, 102 /* 2 */ { 1, 1 }, 103 /* 3 */ { 1, 3 }, 104 /* 4 */ { 2, 1 }, 105 /* 5 */ { 2, 3 }, 106 /* 6 */ { 3, 1 }, 107 /* 7 */ { 3, 3 }, /* d = 1 */ 108 /* 8 */ { 1, 0 }, 109 /* 9 */ { 1, 2 }, 110 /* 10 */ { 3, 0 }, 111 /* 11 */ { 3, 2 }, 112 /* 12 */ { 3, 3 }, /* d = 0 */ 113 }; 114 #define JACOBI_A(bits) (decode_table[(bits)>>1].a) 115 #define JACOBI_B(bits) (decode_table[(bits)>>1].b) 116 #endif /* WANT_ASSERT */ 117 118 const unsigned char jacobi_table[208] = { 119 #include "jacobitab.h" 120 }; 121 122 #define BITS_FAIL 31 123 124 static void 125 jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn, 126 mp_srcptr qp, mp_size_t qn, int d) 127 { 128 unsigned *bitsp = (unsigned *) p; 129 130 if (gp) 131 { 132 ASSERT (gn > 0); 133 if (gn != 1 || gp[0] != 1) 134 { 135 *bitsp = BITS_FAIL; 136 return; 137 } 138 } 139 140 if (qp) 141 { 142 ASSERT (qn > 0); 143 ASSERT (d >= 0); 144 *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3); 145 } 146 } 147 148 #define CHOOSE_P(n) (2*(n) / 3) 149 150 int 151 mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits) 152 { 153 mp_size_t scratch; 154 mp_size_t matrix_scratch; 155 mp_ptr tp; 156 157 TMP_DECL; 158 159 ASSERT (n > 0); 160 ASSERT ( (ap[n-1] | bp[n-1]) > 0); 161 ASSERT ( (bp[0] | ap[0]) & 1); 162 163 /* FIXME: Check for small sizes first, before setting up temporary 164 storage etc. */ 165 scratch = MPN_GCD_SUBDIV_STEP_ITCH(n); 166 167 if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) 168 { 169 mp_size_t hgcd_scratch; 170 mp_size_t update_scratch; 171 mp_size_t p = CHOOSE_P (n); 172 mp_size_t dc_scratch; 173 174 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 175 hgcd_scratch = mpn_hgcd_itch (n - p); 176 update_scratch = p + n - 1; 177 178 dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); 179 if (dc_scratch > scratch) 180 scratch = dc_scratch; 181 } 182 183 TMP_MARK; 184 tp = TMP_ALLOC_LIMBS(scratch); 185 186 while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD)) 187 { 188 struct hgcd_matrix M; 189 mp_size_t p = 2*n/3; 190 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 191 mp_size_t nn; 192 mpn_hgcd_matrix_init (&M, n - p, tp); 193 194 nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits, 195 tp + matrix_scratch); 196 if (nn > 0) 197 { 198 ASSERT (M.n <= (n - p - 1)/2); 199 ASSERT (M.n + p <= (p + n - 1) / 2); 200 /* Temporary storage 2 (p + M->n) <= p + n - 1. */ 201 n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch); 202 } 203 else 204 { 205 /* Temporary storage n */ 206 n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp); 207 if (!n) 208 { 209 TMP_FREE; 210 return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); 211 } 212 } 213 } 214 215 while (n > 2) 216 { 217 struct hgcd_matrix1 M; 218 mp_limb_t ah, al, bh, bl; 219 mp_limb_t mask; 220 221 mask = ap[n-1] | bp[n-1]; 222 ASSERT (mask > 0); 223 224 if (mask & GMP_NUMB_HIGHBIT) 225 { 226 ah = ap[n-1]; al = ap[n-2]; 227 bh = bp[n-1]; bl = bp[n-2]; 228 } 229 else 230 { 231 int shift; 232 233 count_leading_zeros (shift, mask); 234 ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); 235 al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); 236 bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); 237 bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); 238 } 239 240 /* Try an mpn_nhgcd2 step */ 241 if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits)) 242 { 243 n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n); 244 MP_PTR_SWAP (ap, tp); 245 } 246 else 247 { 248 /* mpn_hgcd2 has failed. Then either one of a or b is very 249 small, or the difference is very small. Perform one 250 subtraction followed by one division. */ 251 n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp); 252 if (!n) 253 { 254 TMP_FREE; 255 return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); 256 } 257 } 258 } 259 260 if (bits >= 16) 261 MP_PTR_SWAP (ap, bp); 262 263 ASSERT (bp[0] & 1); 264 265 if (n == 1) 266 { 267 mp_limb_t al, bl; 268 al = ap[0]; 269 bl = bp[0]; 270 271 TMP_FREE; 272 if (bl == 1) 273 return 1 - 2*(bits & 1); 274 else 275 return mpn_jacobi_base (al, bl, bits << 1); 276 } 277 278 else 279 { 280 int res = mpn_jacobi_2 (ap, bp, bits & 1); 281 TMP_FREE; 282 return res; 283 } 284 } 285