xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/hgcd2_jacobi.c (revision 63aea4bd5b445e491ff0389fe27ec78b3099dba3)
1 /* hgcd2_jacobi.c
2 
3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6 
7 Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2011 Free Software
8 Foundation, Inc.
9 
10 This file is part of the GNU MP Library.
11 
12 The GNU MP Library is free software; you can redistribute it and/or modify
13 it under the terms of the GNU Lesser General Public License as published by
14 the Free Software Foundation; either version 3 of the License, or (at your
15 option) any later version.
16 
17 The GNU MP Library is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
19 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
20 License for more details.
21 
22 You should have received a copy of the GNU Lesser General Public License
23 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
24 
25 #include "gmp.h"
26 #include "gmp-impl.h"
27 #include "longlong.h"
28 
29 #if GMP_NAIL_BITS > 0
30 #error Nails not supported.
31 #endif
32 
33 /* FIXME: Duplicated in hgcd2.c. Should move to gmp-impl.h, and
34    possibly be renamed. */
35 static inline mp_limb_t
36 div1 (mp_ptr rp,
37       mp_limb_t n0,
38       mp_limb_t d0)
39 {
40   mp_limb_t q = 0;
41 
42   if ((mp_limb_signed_t) n0 < 0)
43     {
44       int cnt;
45       for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
46 	{
47 	  d0 = d0 << 1;
48 	}
49 
50       q = 0;
51       while (cnt)
52 	{
53 	  q <<= 1;
54 	  if (n0 >= d0)
55 	    {
56 	      n0 = n0 - d0;
57 	      q |= 1;
58 	    }
59 	  d0 = d0 >> 1;
60 	  cnt--;
61 	}
62     }
63   else
64     {
65       int cnt;
66       for (cnt = 0; n0 >= d0; cnt++)
67 	{
68 	  d0 = d0 << 1;
69 	}
70 
71       q = 0;
72       while (cnt)
73 	{
74 	  d0 = d0 >> 1;
75 	  q <<= 1;
76 	  if (n0 >= d0)
77 	    {
78 	      n0 = n0 - d0;
79 	      q |= 1;
80 	    }
81 	  cnt--;
82 	}
83     }
84   *rp = n0;
85   return q;
86 }
87 
88 /* Two-limb division optimized for small quotients.  */
89 static inline mp_limb_t
90 div2 (mp_ptr rp,
91       mp_limb_t nh, mp_limb_t nl,
92       mp_limb_t dh, mp_limb_t dl)
93 {
94   mp_limb_t q = 0;
95 
96   if ((mp_limb_signed_t) nh < 0)
97     {
98       int cnt;
99       for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
100 	{
101 	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
102 	  dl = dl << 1;
103 	}
104 
105       while (cnt)
106 	{
107 	  q <<= 1;
108 	  if (nh > dh || (nh == dh && nl >= dl))
109 	    {
110 	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
111 	      q |= 1;
112 	    }
113 	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
114 	  dh = dh >> 1;
115 	  cnt--;
116 	}
117     }
118   else
119     {
120       int cnt;
121       for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
122 	{
123 	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
124 	  dl = dl << 1;
125 	}
126 
127       while (cnt)
128 	{
129 	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
130 	  dh = dh >> 1;
131 	  q <<= 1;
132 	  if (nh > dh || (nh == dh && nl >= dl))
133 	    {
134 	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
135 	      q |= 1;
136 	    }
137 	  cnt--;
138 	}
139     }
140 
141   rp[0] = nl;
142   rp[1] = nh;
143 
144   return q;
145 }
146 
147 int
148 mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
149 		  struct hgcd_matrix1 *M, unsigned *bitsp)
150 {
151   mp_limb_t u00, u01, u10, u11;
152   unsigned bits = *bitsp;
153 
154   if (ah < 2 || bh < 2)
155     return 0;
156 
157   if (ah > bh || (ah == bh && al > bl))
158     {
159       sub_ddmmss (ah, al, ah, al, bh, bl);
160       if (ah < 2)
161 	return 0;
162 
163       u00 = u01 = u11 = 1;
164       u10 = 0;
165       bits = mpn_jacobi_update (bits, 1, 1);
166     }
167   else
168     {
169       sub_ddmmss (bh, bl, bh, bl, ah, al);
170       if (bh < 2)
171 	return 0;
172 
173       u00 = u10 = u11 = 1;
174       u01 = 0;
175       bits = mpn_jacobi_update (bits, 0, 1);
176     }
177 
178   if (ah < bh)
179     goto subtract_a;
180 
181   for (;;)
182     {
183       ASSERT (ah >= bh);
184       if (ah == bh)
185 	goto done;
186 
187       if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
188 	{
189 	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
190 	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
191 
192 	  break;
193 	}
194 
195       /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
196 	 1), affecting the second column of M. */
197       ASSERT (ah > bh);
198       sub_ddmmss (ah, al, ah, al, bh, bl);
199 
200       if (ah < 2)
201 	goto done;
202 
203       if (ah <= bh)
204 	{
205 	  /* Use q = 1 */
206 	  u01 += u00;
207 	  u11 += u10;
208 	  bits = mpn_jacobi_update (bits, 1, 1);
209 	}
210       else
211 	{
212 	  mp_limb_t r[2];
213 	  mp_limb_t q = div2 (r, ah, al, bh, bl);
214 	  al = r[0]; ah = r[1];
215 	  if (ah < 2)
216 	    {
217 	      /* A is too small, but q is correct. */
218 	      u01 += q * u00;
219 	      u11 += q * u10;
220 	      bits = mpn_jacobi_update (bits, 1, q & 3);
221 	      goto done;
222 	    }
223 	  q++;
224 	  u01 += q * u00;
225 	  u11 += q * u10;
226 	  bits = mpn_jacobi_update (bits, 1, q & 3);
227 	}
228     subtract_a:
229       ASSERT (bh >= ah);
230       if (ah == bh)
231 	goto done;
232 
233       if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
234 	{
235 	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
236 	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
237 
238 	  goto subtract_a1;
239 	}
240 
241       /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
242 	 1), affecting the first column of M. */
243       sub_ddmmss (bh, bl, bh, bl, ah, al);
244 
245       if (bh < 2)
246 	goto done;
247 
248       if (bh <= ah)
249 	{
250 	  /* Use q = 1 */
251 	  u00 += u01;
252 	  u10 += u11;
253 	  bits = mpn_jacobi_update (bits, 0, 1);
254 	}
255       else
256 	{
257 	  mp_limb_t r[2];
258 	  mp_limb_t q = div2 (r, bh, bl, ah, al);
259 	  bl = r[0]; bh = r[1];
260 	  if (bh < 2)
261 	    {
262 	      /* B is too small, but q is correct. */
263 	      u00 += q * u01;
264 	      u10 += q * u11;
265 	      bits = mpn_jacobi_update (bits, 0, q & 3);
266 	      goto done;
267 	    }
268 	  q++;
269 	  u00 += q * u01;
270 	  u10 += q * u11;
271 	  bits = mpn_jacobi_update (bits, 0, q & 3);
272 	}
273     }
274 
275   /* NOTE: Since we discard the least significant half limb, we don't
276      get a truly maximal M (corresponding to |a - b| <
277      2^{GMP_LIMB_BITS +1}). */
278   /* Single precision loop */
279   for (;;)
280     {
281       ASSERT (ah >= bh);
282       if (ah == bh)
283 	break;
284 
285       ah -= bh;
286       if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
287 	break;
288 
289       if (ah <= bh)
290 	{
291 	  /* Use q = 1 */
292 	  u01 += u00;
293 	  u11 += u10;
294 	  bits = mpn_jacobi_update (bits, 1, 1);
295 	}
296       else
297 	{
298 	  mp_limb_t r;
299 	  mp_limb_t q = div1 (&r, ah, bh);
300 	  ah = r;
301 	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
302 	    {
303 	      /* A is too small, but q is correct. */
304 	      u01 += q * u00;
305 	      u11 += q * u10;
306 	      bits = mpn_jacobi_update (bits, 1, q & 3);
307 	      break;
308 	    }
309 	  q++;
310 	  u01 += q * u00;
311 	  u11 += q * u10;
312 	  bits = mpn_jacobi_update (bits, 1, q & 3);
313 	}
314     subtract_a1:
315       ASSERT (bh >= ah);
316       if (ah == bh)
317 	break;
318 
319       bh -= ah;
320       if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
321 	break;
322 
323       if (bh <= ah)
324 	{
325 	  /* Use q = 1 */
326 	  u00 += u01;
327 	  u10 += u11;
328 	  bits = mpn_jacobi_update (bits, 0, 1);
329 	}
330       else
331 	{
332 	  mp_limb_t r;
333 	  mp_limb_t q = div1 (&r, bh, ah);
334 	  bh = r;
335 	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
336 	    {
337 	      /* B is too small, but q is correct. */
338 	      u00 += q * u01;
339 	      u10 += q * u11;
340 	      bits = mpn_jacobi_update (bits, 0, q & 3);
341 	      break;
342 	    }
343 	  q++;
344 	  u00 += q * u01;
345 	  u10 += q * u11;
346 	  bits = mpn_jacobi_update (bits, 0, q & 3);
347 	}
348     }
349 
350  done:
351   M->u[0][0] = u00; M->u[0][1] = u01;
352   M->u[1][0] = u10; M->u[1][1] = u11;
353   *bitsp = bits;
354 
355   return 1;
356 }
357