xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/jacobi.c (revision aceb213538ec08a74028e213127af18aa17bf1cf)
1 /* jacobi.c
2 
3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6 
7 Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2010, 2011 Free Software
8 Foundation, Inc.
9 
10 This file is part of the GNU MP Library.
11 
12 The GNU MP Library is free software; you can redistribute it and/or modify
13 it under the terms of the GNU Lesser General Public License as published by
14 the Free Software Foundation; either version 3 of the License, or (at your
15 option) any later version.
16 
17 The GNU MP Library is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
19 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
20 License for more details.
21 
22 You should have received a copy of the GNU Lesser General Public License
23 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
24 
25 #include "gmp.h"
26 #include "gmp-impl.h"
27 #include "longlong.h"
28 
29 #ifndef JACOBI_DC_THRESHOLD
30 #define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
31 #endif
32 
33 /* Schönhage's rules:
34  *
35  * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
36  *
37  * If r1 is odd, then
38  *
39  *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
40  *
41  * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
42  *
43  * If r1 is even, r2 must be odd. We have
44  *
45  *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
46  *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
47  *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
48  *
49  * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
50  * q1 times gives
51  *
52  *   (r1 | r0) = (r1 | r2) = (r3 | r2)
53  *
54  * On the other hand, if r1 = 2 (mod 4), the sign factor is
55  * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
56  *
57  *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
58  *   = q1 (r0-1)/2 + q1 (q1-1)/2
59  *
60  * and we can summarize the even case as
61  *
62  *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
63  *
64  * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
65  *
66  * What about termination? The remainder sequence ends with (0|1) = 1
67  * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
68  * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
69  * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
70  *
71  * Examples: (11|15) = - (15|11) = - (4|11)
72  *            (4|11) =    (4| 3) =   (1| 3)
73  *            (1| 3) = (3|1) = (0|1) = 1
74  *
75  *             (2|7) = (2|1) = (0|1) = 1
76  *
77  * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
78  *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
79  *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
80  *
81  */
82 
83 /* In principle, the state consists of four variables: e (one bit), a,
84    b (two bits each), d (one bit). Collected factors are (-1)^e. a and
85    b are the least significant bits of the current remainders. d
86    (denominator) is 0 if we're currently subtracting multiplies of a
87    from b, and 1 if we're subtracting b from a.
88 
89    e is stored in the least significant bit, while a, b and d are
90    coded as only 13 distinct values in bits 1-4, according to the
91    following table. For rows not mentioning d, the value is either
92    implied, or it doesn't matter. */
93 
94 #if WANT_ASSERT
95 static const struct
96 {
97   unsigned char a;
98   unsigned char b;
99 } decode_table[13] = {
100   /*  0 */ { 0, 1 },
101   /*  1 */ { 0, 3 },
102   /*  2 */ { 1, 1 },
103   /*  3 */ { 1, 3 },
104   /*  4 */ { 2, 1 },
105   /*  5 */ { 2, 3 },
106   /*  6 */ { 3, 1 },
107   /*  7 */ { 3, 3 }, /* d = 1 */
108   /*  8 */ { 1, 0 },
109   /*  9 */ { 1, 2 },
110   /* 10 */ { 3, 0 },
111   /* 11 */ { 3, 2 },
112   /* 12 */ { 3, 3 }, /* d = 0 */
113 };
114 #define JACOBI_A(bits) (decode_table[(bits)>>1].a)
115 #define JACOBI_B(bits) (decode_table[(bits)>>1].b)
116 #endif /* WANT_ASSERT */
117 
118 const unsigned char jacobi_table[208] = {
119 #include "jacobitab.h"
120 };
121 
122 #define BITS_FAIL 31
123 
124 static void
125 jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
126 	     mp_srcptr qp, mp_size_t qn, int d)
127 {
128   unsigned *bitsp = (unsigned *) p;
129 
130   if (gp)
131     {
132       ASSERT (gn > 0);
133       if (gn != 1 || gp[0] != 1)
134 	{
135 	  *bitsp = BITS_FAIL;
136 	  return;
137 	}
138     }
139 
140   if (qp)
141     {
142       ASSERT (qn > 0);
143       ASSERT (d >= 0);
144       *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
145     }
146 }
147 
148 #define CHOOSE_P(n) (2*(n) / 3)
149 
150 int
151 mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
152 {
153   mp_size_t scratch;
154   mp_size_t matrix_scratch;
155   mp_ptr tp;
156 
157   TMP_DECL;
158 
159   ASSERT (n > 0);
160   ASSERT ( (ap[n-1] | bp[n-1]) > 0);
161   ASSERT ( (bp[0] | ap[0]) & 1);
162 
163   /* FIXME: Check for small sizes first, before setting up temporary
164      storage etc. */
165   scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
166 
167   if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
168     {
169       mp_size_t hgcd_scratch;
170       mp_size_t update_scratch;
171       mp_size_t p = CHOOSE_P (n);
172       mp_size_t dc_scratch;
173 
174       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
175       hgcd_scratch = mpn_hgcd_itch (n - p);
176       update_scratch = p + n - 1;
177 
178       dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
179       if (dc_scratch > scratch)
180 	scratch = dc_scratch;
181     }
182 
183   TMP_MARK;
184   tp = TMP_ALLOC_LIMBS(scratch);
185 
186   while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
187     {
188       struct hgcd_matrix M;
189       mp_size_t p = 2*n/3;
190       mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
191       mp_size_t nn;
192       mpn_hgcd_matrix_init (&M, n - p, tp);
193 
194       nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
195 			    tp + matrix_scratch);
196       if (nn > 0)
197 	{
198 	  ASSERT (M.n <= (n - p - 1)/2);
199 	  ASSERT (M.n + p <= (p + n - 1) / 2);
200 	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
201 	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
202 	}
203       else
204 	{
205 	  /* Temporary storage n */
206 	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
207 	  if (!n)
208 	    {
209 	      TMP_FREE;
210 	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
211 	    }
212 	}
213     }
214 
215   while (n > 2)
216     {
217       struct hgcd_matrix1 M;
218       mp_limb_t ah, al, bh, bl;
219       mp_limb_t mask;
220 
221       mask = ap[n-1] | bp[n-1];
222       ASSERT (mask > 0);
223 
224       if (mask & GMP_NUMB_HIGHBIT)
225 	{
226 	  ah = ap[n-1]; al = ap[n-2];
227 	  bh = bp[n-1]; bl = bp[n-2];
228 	}
229       else
230 	{
231 	  int shift;
232 
233 	  count_leading_zeros (shift, mask);
234 	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
235 	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
236 	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
237 	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
238 	}
239 
240       /* Try an mpn_nhgcd2 step */
241       if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
242 	{
243 	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
244 	  MP_PTR_SWAP (ap, tp);
245 	}
246       else
247 	{
248 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
249 	     small, or the difference is very small. Perform one
250 	     subtraction followed by one division. */
251 	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
252 	  if (!n)
253 	    {
254 	      TMP_FREE;
255 	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
256 	    }
257 	}
258     }
259 
260   if (bits >= 16)
261     MP_PTR_SWAP (ap, bp);
262 
263   ASSERT (bp[0] & 1);
264 
265   if (n == 1)
266     {
267       mp_limb_t al, bl;
268       al = ap[0];
269       bl = bp[0];
270 
271       TMP_FREE;
272       if (bl == 1)
273 	return 1 - 2*(bits & 1);
274       else
275 	return mpn_jacobi_base (al, bl, bits << 1);
276     }
277 
278   else
279     {
280       int res = mpn_jacobi_2 (ap, bp, bits & 1);
281       TMP_FREE;
282       return res;
283     }
284 }
285