xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/jacobi.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* jacobi.c
2 
3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6 
7 Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
8 Inc.
9 
10 This file is part of the GNU MP Library.
11 
12 The GNU MP Library is free software; you can redistribute it and/or modify
13 it under the terms of either:
14 
15   * the GNU Lesser General Public License as published by the Free
16     Software Foundation; either version 3 of the License, or (at your
17     option) any later version.
18 
19 or
20 
21   * the GNU General Public License as published by the Free Software
22     Foundation; either version 2 of the License, or (at your option) any
23     later version.
24 
25 or both in parallel, as here.
26 
27 The GNU MP Library is distributed in the hope that it will be useful, but
28 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
30 for more details.
31 
32 You should have received copies of the GNU General Public License and the
33 GNU Lesser General Public License along with the GNU MP Library.  If not,
34 see https://www.gnu.org/licenses/.  */
35 
36 #include "gmp.h"
37 #include "gmp-impl.h"
38 #include "longlong.h"
39 
40 #ifndef JACOBI_DC_THRESHOLD
41 #define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
42 #endif
43 
44 /* Schönhage's rules:
45  *
46  * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
47  *
48  * If r1 is odd, then
49  *
50  *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
51  *
52  * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
53  *
54  * If r1 is even, r2 must be odd. We have
55  *
56  *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
57  *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
58  *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
59  *
60  * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
61  * q1 times gives
62  *
63  *   (r1 | r0) = (r1 | r2) = (r3 | r2)
64  *
65  * On the other hand, if r1 = 2 (mod 4), the sign factor is
66  * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
67  *
68  *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
69  *   = q1 (r0-1)/2 + q1 (q1-1)/2
70  *
71  * and we can summarize the even case as
72  *
73  *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
74  *
75  * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
76  *
77  * What about termination? The remainder sequence ends with (0|1) = 1
78  * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
79  * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
80  * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
81  *
82  * Examples: (11|15) = - (15|11) = - (4|11)
83  *            (4|11) =    (4| 3) =   (1| 3)
84  *            (1| 3) = (3|1) = (0|1) = 1
85  *
86  *             (2|7) = (2|1) = (0|1) = 1
87  *
88  * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
89  *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
90  *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
91  *
92  */
93 
94 /* In principle, the state consists of four variables: e (one bit), a,
95    b (two bits each), d (one bit). Collected factors are (-1)^e. a and
96    b are the least significant bits of the current remainders. d
97    (denominator) is 0 if we're currently subtracting multiplies of a
98    from b, and 1 if we're subtracting b from a.
99 
100    e is stored in the least significant bit, while a, b and d are
101    coded as only 13 distinct values in bits 1-4, according to the
102    following table. For rows not mentioning d, the value is either
103    implied, or it doesn't matter. */
104 
105 #if WANT_ASSERT
106 static const struct
107 {
108   unsigned char a;
109   unsigned char b;
110 } decode_table[13] = {
111   /*  0 */ { 0, 1 },
112   /*  1 */ { 0, 3 },
113   /*  2 */ { 1, 1 },
114   /*  3 */ { 1, 3 },
115   /*  4 */ { 2, 1 },
116   /*  5 */ { 2, 3 },
117   /*  6 */ { 3, 1 },
118   /*  7 */ { 3, 3 }, /* d = 1 */
119   /*  8 */ { 1, 0 },
120   /*  9 */ { 1, 2 },
121   /* 10 */ { 3, 0 },
122   /* 11 */ { 3, 2 },
123   /* 12 */ { 3, 3 }, /* d = 0 */
124 };
125 #define JACOBI_A(bits) (decode_table[(bits)>>1].a)
126 #define JACOBI_B(bits) (decode_table[(bits)>>1].b)
127 #endif /* WANT_ASSERT */
128 
129 const unsigned char jacobi_table[208] = {
130 #include "jacobitab.h"
131 };
132 
133 #define BITS_FAIL 31
134 
135 static void
136 jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
137 	     mp_srcptr qp, mp_size_t qn, int d)
138 {
139   unsigned *bitsp = (unsigned *) p;
140 
141   if (gp)
142     {
143       ASSERT (gn > 0);
144       if (gn != 1 || gp[0] != 1)
145 	{
146 	  *bitsp = BITS_FAIL;
147 	  return;
148 	}
149     }
150 
151   if (qp)
152     {
153       ASSERT (qn > 0);
154       ASSERT (d >= 0);
155       *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
156     }
157 }
158 
159 #define CHOOSE_P(n) (2*(n) / 3)
160 
161 int
162 mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
163 {
164   mp_size_t scratch;
165   mp_size_t matrix_scratch;
166   mp_ptr tp;
167 
168   TMP_DECL;
169 
170   ASSERT (n > 0);
171   ASSERT ( (ap[n-1] | bp[n-1]) > 0);
172   ASSERT ( (bp[0] | ap[0]) & 1);
173 
174   /* FIXME: Check for small sizes first, before setting up temporary
175      storage etc. */
176   scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
177 
178   if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
179     {
180       mp_size_t hgcd_scratch;
181       mp_size_t update_scratch;
182       mp_size_t p = CHOOSE_P (n);
183       mp_size_t dc_scratch;
184 
185       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
186       hgcd_scratch = mpn_hgcd_itch (n - p);
187       update_scratch = p + n - 1;
188 
189       dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
190       if (dc_scratch > scratch)
191 	scratch = dc_scratch;
192     }
193 
194   TMP_MARK;
195   tp = TMP_ALLOC_LIMBS(scratch);
196 
197   while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
198     {
199       struct hgcd_matrix M;
200       mp_size_t p = 2*n/3;
201       mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
202       mp_size_t nn;
203       mpn_hgcd_matrix_init (&M, n - p, tp);
204 
205       nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
206 			    tp + matrix_scratch);
207       if (nn > 0)
208 	{
209 	  ASSERT (M.n <= (n - p - 1)/2);
210 	  ASSERT (M.n + p <= (p + n - 1) / 2);
211 	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
212 	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
213 	}
214       else
215 	{
216 	  /* Temporary storage n */
217 	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
218 	  if (!n)
219 	    {
220 	      TMP_FREE;
221 	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
222 	    }
223 	}
224     }
225 
226   while (n > 2)
227     {
228       struct hgcd_matrix1 M;
229       mp_limb_t ah, al, bh, bl;
230       mp_limb_t mask;
231 
232       mask = ap[n-1] | bp[n-1];
233       ASSERT (mask > 0);
234 
235       if (mask & GMP_NUMB_HIGHBIT)
236 	{
237 	  ah = ap[n-1]; al = ap[n-2];
238 	  bh = bp[n-1]; bl = bp[n-2];
239 	}
240       else
241 	{
242 	  int shift;
243 
244 	  count_leading_zeros (shift, mask);
245 	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
246 	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
247 	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
248 	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
249 	}
250 
251       /* Try an mpn_nhgcd2 step */
252       if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
253 	{
254 	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
255 	  MP_PTR_SWAP (ap, tp);
256 	}
257       else
258 	{
259 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
260 	     small, or the difference is very small. Perform one
261 	     subtraction followed by one division. */
262 	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
263 	  if (!n)
264 	    {
265 	      TMP_FREE;
266 	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
267 	    }
268 	}
269     }
270 
271   if (bits >= 16)
272     MP_PTR_SWAP (ap, bp);
273 
274   ASSERT (bp[0] & 1);
275 
276   if (n == 1)
277     {
278       mp_limb_t al, bl;
279       al = ap[0];
280       bl = bp[0];
281 
282       TMP_FREE;
283       if (bl == 1)
284 	return 1 - 2*(bits & 1);
285       else
286 	return mpn_jacobi_base (al, bl, bits << 1);
287     }
288 
289   else
290     {
291       int res = mpn_jacobi_2 (ap, bp, bits & 1);
292       TMP_FREE;
293       return res;
294     }
295 }
296