xref: /netbsd-src/external/lgpl3/gmp/dist/tests/refmpn.c (revision ca453df649ce9db45b64d73678ba06cbccf9aa11)
1 /* Reference mpn functions, designed to be simple, portable and independent
2    of the normal gmp code.  Speed isn't a consideration.
3 
4 Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
5 2007, 2008, 2009 Free Software Foundation, Inc.
6 
7 This file is part of the GNU MP Library.
8 
9 The GNU MP Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 The GNU MP Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17 License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
21 
22 
23 /* Most routines have assertions representing what the mpn routines are
24    supposed to accept.  Many of these reference routines do sensible things
25    outside these ranges (eg. for size==0), but the assertions are present to
26    pick up bad parameters passed here that are about to be passed the same
27    to a real mpn routine being compared.  */
28 
29 /* always do assertion checking */
30 #define WANT_ASSERT  1
31 
32 #include <stdio.h>  /* for NULL */
33 #include <stdlib.h> /* for malloc */
34 
35 #include "gmp.h"
36 #include "gmp-impl.h"
37 #include "longlong.h"
38 
39 #include "tests.h"
40 
41 
42 
43 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
44    in bytes. */
45 int
46 byte_overlap_p (const void *v_xp, mp_size_t xsize,
47 		const void *v_yp, mp_size_t ysize)
48 {
49   const char *xp = v_xp;
50   const char *yp = v_yp;
51 
52   ASSERT (xsize >= 0);
53   ASSERT (ysize >= 0);
54 
55   /* no wraparounds */
56   ASSERT (xp+xsize >= xp);
57   ASSERT (yp+ysize >= yp);
58 
59   if (xp + xsize <= yp)
60     return 0;
61 
62   if (yp + ysize <= xp)
63     return 0;
64 
65   return 1;
66 }
67 
68 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
69 int
70 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
71 {
72   return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB,
73 			 yp, ysize * BYTES_PER_MP_LIMB);
74 }
75 
76 /* Check overlap for a routine defined to work low to high. */
77 int
78 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
79 {
80   return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
81 }
82 
83 /* Check overlap for a routine defined to work high to low. */
84 int
85 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
86 {
87   return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
88 }
89 
90 /* Check overlap for a standard routine requiring equal or separate. */
91 int
92 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
93 {
94   return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
95 }
96 int
97 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
98 			       mp_size_t size)
99 {
100   return (refmpn_overlap_fullonly_p (dst, src1, size)
101 	  && refmpn_overlap_fullonly_p (dst, src2, size));
102 }
103 
104 
105 mp_ptr
106 refmpn_malloc_limbs (mp_size_t size)
107 {
108   mp_ptr  p;
109   ASSERT (size >= 0);
110   if (size == 0)
111     size = 1;
112   p = (mp_ptr) malloc ((size_t) (size * BYTES_PER_MP_LIMB));
113   ASSERT (p != NULL);
114   return p;
115 }
116 
117 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
118  * memory allocated by refmpn_malloc_limbs_aligned. */
119 void
120 refmpn_free_limbs (mp_ptr p)
121 {
122   free (p);
123 }
124 
125 mp_ptr
126 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
127 {
128   mp_ptr  p;
129   p = refmpn_malloc_limbs (size);
130   refmpn_copyi (p, ptr, size);
131   return p;
132 }
133 
134 /* malloc n limbs on a multiple of m bytes boundary */
135 mp_ptr
136 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
137 {
138   return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
139 }
140 
141 
142 void
143 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
144 {
145   mp_size_t  i;
146   ASSERT (size >= 0);
147   for (i = 0; i < size; i++)
148     ptr[i] = value;
149 }
150 
151 void
152 refmpn_zero (mp_ptr ptr, mp_size_t size)
153 {
154   refmpn_fill (ptr, size, CNST_LIMB(0));
155 }
156 
157 void
158 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
159 {
160   ASSERT (newsize >= oldsize);
161   refmpn_zero (ptr+oldsize, newsize-oldsize);
162 }
163 
164 int
165 refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
166 {
167   mp_size_t  i;
168   for (i = 0; i < size; i++)
169     if (ptr[i] != 0)
170       return 0;
171   return 1;
172 }
173 
174 mp_size_t
175 refmpn_normalize (mp_srcptr ptr, mp_size_t size)
176 {
177   ASSERT (size >= 0);
178   while (size > 0 && ptr[size-1] == 0)
179     size--;
180   return size;
181 }
182 
183 /* the highest one bit in x */
184 mp_limb_t
185 refmpn_msbone (mp_limb_t x)
186 {
187   mp_limb_t  n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
188 
189   while (n != 0)
190     {
191       if (x & n)
192 	break;
193       n >>= 1;
194     }
195   return n;
196 }
197 
198 /* a mask of the highest one bit plus and all bits below */
199 mp_limb_t
200 refmpn_msbone_mask (mp_limb_t x)
201 {
202   if (x == 0)
203     return 0;
204 
205   return (refmpn_msbone (x) << 1) - 1;
206 }
207 
208 /* How many digits in the given base will fit in a limb.
209    Notice that the product b is allowed to be equal to the limit
210    2^GMP_NUMB_BITS, this ensures the result for base==2 will be
211    GMP_NUMB_BITS (and similarly other powers of 2).  */
212 int
213 refmpn_chars_per_limb (int base)
214 {
215   mp_limb_t  limit[2], b[2];
216   int        chars_per_limb;
217 
218   ASSERT (base >= 2);
219 
220   limit[0] = 0;  /* limit = 2^GMP_NUMB_BITS */
221   limit[1] = 1;
222   b[0] = 1;      /* b = 1 */
223   b[1] = 0;
224 
225   chars_per_limb = 0;
226   for (;;)
227     {
228       if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
229 	break;
230       if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
231 	break;
232       chars_per_limb++;
233     }
234   return chars_per_limb;
235 }
236 
237 /* The biggest value base**n which fits in GMP_NUMB_BITS. */
238 mp_limb_t
239 refmpn_big_base (int base)
240 {
241   int        chars_per_limb = refmpn_chars_per_limb (base);
242   int        i;
243   mp_limb_t  bb;
244 
245   ASSERT (base >= 2);
246   bb = 1;
247   for (i = 0; i < chars_per_limb; i++)
248     bb *= base;
249   return bb;
250 }
251 
252 
253 void
254 refmpn_setbit (mp_ptr ptr, unsigned long bit)
255 {
256   ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
257 }
258 
259 void
260 refmpn_clrbit (mp_ptr ptr, unsigned long bit)
261 {
262   ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
263 }
264 
265 #define REFMPN_TSTBIT(ptr,bit) \
266   (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
267 
268 int
269 refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
270 {
271   return REFMPN_TSTBIT (ptr, bit);
272 }
273 
274 unsigned long
275 refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
276 {
277   while (REFMPN_TSTBIT (ptr, bit) != 0)
278     bit++;
279   return bit;
280 }
281 
282 unsigned long
283 refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
284 {
285   while (REFMPN_TSTBIT (ptr, bit) == 0)
286     bit++;
287   return bit;
288 }
289 
290 void
291 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
292 {
293   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
294   refmpn_copyi (rp, sp, size);
295 }
296 
297 void
298 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
299 {
300   mp_size_t i;
301 
302   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
303   ASSERT (size >= 0);
304 
305   for (i = 0; i < size; i++)
306     rp[i] = sp[i];
307 }
308 
309 void
310 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
311 {
312   mp_size_t i;
313 
314   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
315   ASSERT (size >= 0);
316 
317   for (i = size-1; i >= 0; i--)
318     rp[i] = sp[i];
319 }
320 
321 /* Copy {xp,xsize} to {wp,wsize}.  If x is shorter, then pad w with low
322    zeros to wsize.  If x is longer, then copy just the high wsize limbs.  */
323 void
324 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
325 {
326   ASSERT (wsize >= 0);
327   ASSERT (xsize >= 0);
328 
329   /* high part of x if x bigger than w */
330   if (xsize > wsize)
331     {
332       xp += xsize - wsize;
333       xsize = wsize;
334     }
335 
336   refmpn_copy (wp + wsize-xsize, xp, xsize);
337   refmpn_zero (wp, wsize-xsize);
338 }
339 
340 int
341 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
342 {
343   mp_size_t  i;
344 
345   ASSERT (size >= 1);
346   ASSERT_MPN (xp, size);
347   ASSERT_MPN (yp, size);
348 
349   for (i = size-1; i >= 0; i--)
350     {
351       if (xp[i] > yp[i])  return 1;
352       if (xp[i] < yp[i])  return -1;
353     }
354   return 0;
355 }
356 
357 int
358 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
359 {
360   if (size == 0)
361     return 0;
362   else
363     return refmpn_cmp (xp, yp, size);
364 }
365 
366 int
367 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
368 		     mp_srcptr yp, mp_size_t ysize)
369 {
370   int  opp, cmp;
371 
372   ASSERT_MPN (xp, xsize);
373   ASSERT_MPN (yp, ysize);
374 
375   opp = (xsize < ysize);
376   if (opp)
377     MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
378 
379   if (! refmpn_zero_p (xp+ysize, xsize-ysize))
380     cmp = 1;
381   else
382     cmp = refmpn_cmp (xp, yp, ysize);
383 
384   return (opp ? -cmp : cmp);
385 }
386 
387 int
388 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
389 {
390   mp_size_t  i;
391   ASSERT (size >= 0);
392 
393   for (i = 0; i < size; i++)
394       if (xp[i] != yp[i])
395 	return 0;
396   return 1;
397 }
398 
399 
400 #define LOGOPS(operation)                                               \
401   {                                                                     \
402     mp_size_t  i;                                                       \
403 									\
404     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
405     ASSERT (size >= 1);                                                 \
406     ASSERT_MPN (s1p, size);                                             \
407     ASSERT_MPN (s2p, size);                                             \
408 									\
409     for (i = 0; i < size; i++)                                          \
410       rp[i] = operation;                                                \
411   }
412 
413 void
414 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
415 {
416   LOGOPS (s1p[i] & s2p[i]);
417 }
418 void
419 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
420 {
421   LOGOPS (s1p[i] & ~s2p[i]);
422 }
423 void
424 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
425 {
426   LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
427 }
428 void
429 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
430 {
431   LOGOPS (s1p[i] | s2p[i]);
432 }
433 void
434 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
435 {
436   LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
437 }
438 void
439 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
440 {
441   LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
442 }
443 void
444 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
445 {
446   LOGOPS (s1p[i] ^ s2p[i]);
447 }
448 void
449 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
450 {
451   LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
452 }
453 
454 
455 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */
456 void
457 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
458 		   mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
459 {
460   *dl = ml - sl;
461   *dh = mh - sh - (ml < sl);
462 }
463 
464 
465 /* set *w to x+y, return 0 or 1 carry */
466 mp_limb_t
467 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
468 {
469   mp_limb_t  sum, cy;
470 
471   ASSERT_LIMB (x);
472   ASSERT_LIMB (y);
473 
474   sum = x + y;
475 #if GMP_NAIL_BITS == 0
476   *w = sum;
477   cy = (sum < x);
478 #else
479   *w = sum & GMP_NUMB_MASK;
480   cy = (sum >> GMP_NUMB_BITS);
481 #endif
482   return cy;
483 }
484 
485 /* set *w to x-y, return 0 or 1 borrow */
486 mp_limb_t
487 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
488 {
489   mp_limb_t  diff, cy;
490 
491   ASSERT_LIMB (x);
492   ASSERT_LIMB (y);
493 
494   diff = x - y;
495 #if GMP_NAIL_BITS == 0
496   *w = diff;
497   cy = (diff > x);
498 #else
499   *w = diff & GMP_NUMB_MASK;
500   cy = (diff >> GMP_NUMB_BITS) & 1;
501 #endif
502   return cy;
503 }
504 
505 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
506 mp_limb_t
507 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
508 {
509   mp_limb_t  r;
510 
511   ASSERT_LIMB (x);
512   ASSERT_LIMB (y);
513   ASSERT (c == 0 || c == 1);
514 
515   r = ref_addc_limb (w, x, y);
516   return r + ref_addc_limb (w, *w, c);
517 }
518 
519 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
520 mp_limb_t
521 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
522 {
523   mp_limb_t  r;
524 
525   ASSERT_LIMB (x);
526   ASSERT_LIMB (y);
527   ASSERT (c == 0 || c == 1);
528 
529   r = ref_subc_limb (w, x, y);
530   return r + ref_subc_limb (w, *w, c);
531 }
532 
533 
534 #define AORS_1(operation)                               \
535   {                                                     \
536     mp_limb_t  i;                                       \
537 							\
538     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
539     ASSERT (size >= 1);                                 \
540     ASSERT_MPN (sp, size);                              \
541     ASSERT_LIMB (n);                                    \
542 							\
543     for (i = 0; i < size; i++)                          \
544       n = operation (&rp[i], sp[i], n);                 \
545     return n;                                           \
546   }
547 
548 mp_limb_t
549 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
550 {
551   AORS_1 (ref_addc_limb);
552 }
553 mp_limb_t
554 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
555 {
556   AORS_1 (ref_subc_limb);
557 }
558 
559 #define AORS_NC(operation)                                              \
560   {                                                                     \
561     mp_size_t  i;                                                       \
562 									\
563     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
564     ASSERT (carry == 0 || carry == 1);                                  \
565     ASSERT (size >= 1);                                                 \
566     ASSERT_MPN (s1p, size);                                             \
567     ASSERT_MPN (s2p, size);                                             \
568 									\
569     for (i = 0; i < size; i++)                                          \
570       carry = operation (&rp[i], s1p[i], s2p[i], carry);                \
571     return carry;                                                       \
572   }
573 
574 mp_limb_t
575 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
576 	       mp_limb_t carry)
577 {
578   AORS_NC (adc);
579 }
580 mp_limb_t
581 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
582 	       mp_limb_t carry)
583 {
584   AORS_NC (sbb);
585 }
586 
587 
588 mp_limb_t
589 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
590 {
591   return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
592 }
593 mp_limb_t
594 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
595 {
596   return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
597 }
598 
599 mp_limb_t
600 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
601 		 mp_size_t n, unsigned int s)
602 {
603   mp_limb_t cy;
604   mp_ptr tp;
605 
606   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
607   ASSERT (n >= 1);
608   ASSERT (0 < s && s < GMP_NUMB_BITS);
609   ASSERT_MPN (up, n);
610   ASSERT_MPN (vp, n);
611 
612   tp = refmpn_malloc_limbs (n);
613   cy  = refmpn_lshift (tp, vp, n, s);
614   cy += refmpn_add_n (rp, up, tp, n);
615   free (tp);
616   return cy;
617 }
618 mp_limb_t
619 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
620 {
621   return refmpn_addlsh_n (rp, up, vp, n, 1);
622 }
623 mp_limb_t
624 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
625 {
626   return refmpn_addlsh_n (rp, up, vp, n, 2);
627 }
628 
629 mp_limb_t
630 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
631 		 mp_size_t n, unsigned int s)
632 {
633   mp_limb_t cy;
634   mp_ptr tp;
635 
636   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
637   ASSERT (n >= 1);
638   ASSERT (0 < s && s < GMP_NUMB_BITS);
639   ASSERT_MPN (up, n);
640   ASSERT_MPN (vp, n);
641 
642   tp = refmpn_malloc_limbs (n);
643   cy  = mpn_lshift (tp, vp, n, s);
644   cy += mpn_sub_n (rp, up, tp, n);
645   free (tp);
646   return cy;
647 }
648 mp_limb_t
649 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
650 {
651   return refmpn_sublsh_n (rp, up, vp, n, 1);
652 }
653 
654 mp_limb_signed_t
655 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
656 		 mp_size_t n, unsigned int s)
657 {
658   mp_limb_signed_t cy;
659   mp_ptr tp;
660 
661   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
662   ASSERT (n >= 1);
663   ASSERT (0 < s && s < GMP_NUMB_BITS);
664   ASSERT_MPN (up, n);
665   ASSERT_MPN (vp, n);
666 
667   tp = refmpn_malloc_limbs (n);
668   cy  = mpn_lshift (tp, vp, n, s);
669   cy -= mpn_sub_n (rp, tp, up, n);
670   free (tp);
671   return cy;
672 }
673 mp_limb_signed_t
674 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
675 {
676   return refmpn_rsblsh_n (rp, up, vp, n, 1);
677 }
678 mp_limb_signed_t
679 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
680 {
681   return refmpn_rsblsh_n (rp, up, vp, n, 2);
682 }
683 
684 mp_limb_t
685 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
686 {
687   mp_limb_t cya, cys;
688 
689   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
690   ASSERT (n >= 1);
691   ASSERT_MPN (up, n);
692   ASSERT_MPN (vp, n);
693 
694   cya = mpn_add_n (rp, up, vp, n);
695   cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
696   rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
697   return cys;
698 }
699 mp_limb_t
700 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
701 {
702   mp_limb_t cya, cys;
703 
704   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
705   ASSERT (n >= 1);
706   ASSERT_MPN (up, n);
707   ASSERT_MPN (vp, n);
708 
709   cya = mpn_sub_n (rp, up, vp, n);
710   cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
711   rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
712   return cys;
713 }
714 
715 /* Twos complement, return borrow. */
716 mp_limb_t
717 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
718 {
719   mp_ptr     zeros;
720   mp_limb_t  ret;
721 
722   ASSERT (size >= 1);
723 
724   zeros = refmpn_malloc_limbs (size);
725   refmpn_fill (zeros, size, CNST_LIMB(0));
726   ret = refmpn_sub_n (dst, zeros, src, size);
727   free (zeros);
728   return ret;
729 }
730 
731 
732 #define AORS(aors_n, aors_1)                                    \
733   {                                                             \
734     mp_limb_t  c;                                               \
735     ASSERT (s1size >= s2size);                                  \
736     ASSERT (s2size >= 1);                                       \
737     c = aors_n (rp, s1p, s2p, s2size);                          \
738     if (s1size-s2size != 0)                                     \
739       c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \
740     return c;                                                   \
741   }
742 mp_limb_t
743 refmpn_add (mp_ptr rp,
744 	    mp_srcptr s1p, mp_size_t s1size,
745 	    mp_srcptr s2p, mp_size_t s2size)
746 {
747   AORS (refmpn_add_n, refmpn_add_1);
748 }
749 mp_limb_t
750 refmpn_sub (mp_ptr rp,
751 	    mp_srcptr s1p, mp_size_t s1size,
752 	    mp_srcptr s2p, mp_size_t s2size)
753 {
754   AORS (refmpn_sub_n, refmpn_sub_1);
755 }
756 
757 
758 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
759 #define SHIFTLOW(x)  ((x) >> GMP_LIMB_BITS/2)
760 
761 #define LOWMASK   (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
762 #define HIGHMASK  SHIFTHIGH(LOWMASK)
763 
764 #define LOWPART(x)   ((x) & LOWMASK)
765 #define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)
766 
767 /* Set return:*lo to x*y, using full limbs not nails. */
768 mp_limb_t
769 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
770 {
771   mp_limb_t  hi, s;
772 
773   *lo = LOWPART(x) * LOWPART(y);
774   hi = HIGHPART(x) * HIGHPART(y);
775 
776   s = LOWPART(x) * HIGHPART(y);
777   hi += HIGHPART(s);
778   s = SHIFTHIGH(LOWPART(s));
779   *lo += s;
780   hi += (*lo < s);
781 
782   s = HIGHPART(x) * LOWPART(y);
783   hi += HIGHPART(s);
784   s = SHIFTHIGH(LOWPART(s));
785   *lo += s;
786   hi += (*lo < s);
787 
788   return hi;
789 }
790 
791 mp_limb_t
792 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
793 {
794   return refmpn_umul_ppmm (lo, x, y);
795 }
796 
797 mp_limb_t
798 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
799 	       mp_limb_t carry)
800 {
801   mp_size_t  i;
802   mp_limb_t  hi, lo;
803 
804   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
805   ASSERT (size >= 1);
806   ASSERT_MPN (sp, size);
807   ASSERT_LIMB (multiplier);
808   ASSERT_LIMB (carry);
809 
810   multiplier <<= GMP_NAIL_BITS;
811   for (i = 0; i < size; i++)
812     {
813       hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
814       lo >>= GMP_NAIL_BITS;
815       ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
816       rp[i] = lo;
817       carry = hi;
818     }
819   return carry;
820 }
821 
822 mp_limb_t
823 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
824 {
825   return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
826 }
827 
828 
829 mp_limb_t
830 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
831 	      mp_srcptr mult, mp_size_t msize)
832 {
833   mp_ptr     src_copy;
834   mp_limb_t  ret;
835   mp_size_t  i;
836 
837   ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
838   ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
839   ASSERT (size >= msize);
840   ASSERT_MPN (mult, msize);
841 
842   /* in case dst==src */
843   src_copy = refmpn_malloc_limbs (size);
844   refmpn_copyi (src_copy, src, size);
845   src = src_copy;
846 
847   dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
848   for (i = 1; i < msize-1; i++)
849     dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
850   ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
851 
852   free (src_copy);
853   return ret;
854 }
855 
856 mp_limb_t
857 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
858 {
859   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
860 }
861 mp_limb_t
862 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
863 {
864   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
865 }
866 mp_limb_t
867 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
868 {
869   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
870 }
871 
872 #define AORSMUL_1C(operation_n)                                 \
873   {                                                             \
874     mp_ptr     p;                                               \
875     mp_limb_t  ret;                                             \
876 								\
877     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \
878 								\
879     p = refmpn_malloc_limbs (size);                             \
880     ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \
881     ret += operation_n (rp, rp, p, size);                       \
882 								\
883     free (p);                                                   \
884     return ret;                                                 \
885   }
886 
887 mp_limb_t
888 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
889 		  mp_limb_t multiplier, mp_limb_t carry)
890 {
891   AORSMUL_1C (refmpn_add_n);
892 }
893 mp_limb_t
894 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
895 		  mp_limb_t multiplier, mp_limb_t carry)
896 {
897   AORSMUL_1C (refmpn_sub_n);
898 }
899 
900 
901 mp_limb_t
902 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
903 {
904   return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
905 }
906 mp_limb_t
907 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
908 {
909   return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
910 }
911 
912 
913 mp_limb_t
914 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
915 		 mp_srcptr mult, mp_size_t msize)
916 {
917   mp_ptr     src_copy;
918   mp_limb_t  ret;
919   mp_size_t  i;
920 
921   ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
922   ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
923   ASSERT (size >= msize);
924   ASSERT_MPN (mult, msize);
925 
926   /* in case dst==src */
927   src_copy = refmpn_malloc_limbs (size);
928   refmpn_copyi (src_copy, src, size);
929   src = src_copy;
930 
931   for (i = 0; i < msize-1; i++)
932     dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
933   ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
934 
935   free (src_copy);
936   return ret;
937 }
938 
939 mp_limb_t
940 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
941 {
942   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
943 }
944 mp_limb_t
945 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
946 {
947   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
948 }
949 mp_limb_t
950 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
951 {
952   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
953 }
954 mp_limb_t
955 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
956 {
957   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
958 }
959 mp_limb_t
960 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
961 {
962   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
963 }
964 mp_limb_t
965 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
966 {
967   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
968 }
969 mp_limb_t
970 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
971 {
972   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
973 }
974 
975 mp_limb_t
976 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
977 		  mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
978 		  mp_limb_t carry)
979 {
980   mp_ptr p;
981   mp_limb_t acy, scy;
982 
983   /* Destinations can't overlap. */
984   ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
985   ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
986   ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
987   ASSERT (size >= 1);
988 
989   /* in case r1p==s1p or r1p==s2p */
990   p = refmpn_malloc_limbs (size);
991 
992   acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
993   scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
994   refmpn_copyi (r1p, p, size);
995 
996   free (p);
997   return 2 * acy + scy;
998 }
999 
1000 mp_limb_t
1001 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
1002 		 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1003 {
1004   return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
1005 }
1006 
1007 
1008 /* Right shift hi,lo and return the low limb of the result.
1009    Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1010 mp_limb_t
1011 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1012 {
1013   ASSERT (shift < GMP_NUMB_BITS);
1014   if (shift == 0)
1015     return lo;
1016   else
1017     return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
1018 }
1019 
1020 /* Left shift hi,lo and return the high limb of the result.
1021    Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1022 mp_limb_t
1023 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1024 {
1025   ASSERT (shift < GMP_NUMB_BITS);
1026   if (shift == 0)
1027     return hi;
1028   else
1029     return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
1030 }
1031 
1032 
1033 mp_limb_t
1034 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1035 {
1036   mp_limb_t  ret;
1037   mp_size_t  i;
1038 
1039   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1040   ASSERT (size >= 1);
1041   ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1042   ASSERT_MPN (sp, size);
1043 
1044   ret = rshift_make (sp[0], CNST_LIMB(0), shift);
1045 
1046   for (i = 0; i < size-1; i++)
1047     rp[i] = rshift_make (sp[i+1], sp[i], shift);
1048 
1049   rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
1050   return ret;
1051 }
1052 
1053 mp_limb_t
1054 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1055 {
1056   mp_limb_t  ret;
1057   mp_size_t  i;
1058 
1059   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1060   ASSERT (size >= 1);
1061   ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1062   ASSERT_MPN (sp, size);
1063 
1064   ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
1065 
1066   for (i = size-2; i >= 0; i--)
1067     rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
1068 
1069   rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
1070   return ret;
1071 }
1072 
1073 void
1074 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1075 {
1076   mp_size_t i;
1077 
1078   /* We work downwards since mpn_lshiftc needs that. */
1079   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1080 
1081   for (i = size - 1; i >= 0; i--)
1082     rp[i] = (~sp[i]) & GMP_NUMB_MASK;
1083 }
1084 
1085 mp_limb_t
1086 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1087 {
1088   mp_limb_t res;
1089 
1090   /* No asserts here, refmpn_lshift will assert what we need. */
1091 
1092   res = refmpn_lshift (rp, sp, size, shift);
1093   refmpn_com (rp, rp, size);
1094   return res;
1095 }
1096 
1097 /* accepting shift==0 and doing a plain copyi or copyd in that case */
1098 mp_limb_t
1099 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1100 {
1101   if (shift == 0)
1102     {
1103       refmpn_copyi (rp, sp, size);
1104       return 0;
1105     }
1106   else
1107     {
1108       return refmpn_rshift (rp, sp, size, shift);
1109     }
1110 }
1111 mp_limb_t
1112 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1113 {
1114   if (shift == 0)
1115     {
1116       refmpn_copyd (rp, sp, size);
1117       return 0;
1118     }
1119   else
1120     {
1121       return refmpn_lshift (rp, sp, size, shift);
1122     }
1123 }
1124 
1125 /* accepting size==0 too */
1126 mp_limb_t
1127 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1128 			   unsigned shift)
1129 {
1130   return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
1131 }
1132 mp_limb_t
1133 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1134 			   unsigned shift)
1135 {
1136   return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
1137 }
1138 
1139 /* Divide h,l by d, return quotient, store remainder to *rp.
1140    Operates on full limbs, not nails.
1141    Must have h < d.
1142    __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
1143 mp_limb_t
1144 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
1145 {
1146   mp_limb_t  q, r;
1147   int  n;
1148 
1149   ASSERT (d != 0);
1150   ASSERT (h < d);
1151 
1152 #if 0
1153   udiv_qrnnd (q, r, h, l, d);
1154   *rp = r;
1155   return q;
1156 #endif
1157 
1158   n = refmpn_count_leading_zeros (d);
1159   d <<= n;
1160 
1161   if (n != 0)
1162     {
1163       h = (h << n) | (l >> (GMP_LIMB_BITS - n));
1164       l <<= n;
1165     }
1166 
1167   __udiv_qrnnd_c (q, r, h, l, d);
1168   r >>= n;
1169   *rp = r;
1170   return q;
1171 }
1172 
1173 mp_limb_t
1174 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
1175 {
1176   return refmpn_udiv_qrnnd (rp, h, l, d);
1177 }
1178 
1179 /* This little subroutine avoids some bad code generation from i386 gcc 3.0
1180    -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized).  */
1181 static mp_limb_t
1182 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1183 			     mp_limb_t divisor, mp_limb_t carry)
1184 {
1185   mp_size_t  i;
1186   mp_limb_t rem[1];
1187   for (i = size-1; i >= 0; i--)
1188     {
1189       rp[i] = refmpn_udiv_qrnnd (rem, carry,
1190 				 sp[i] << GMP_NAIL_BITS,
1191 				 divisor << GMP_NAIL_BITS);
1192       carry = *rem >> GMP_NAIL_BITS;
1193     }
1194   return carry;
1195 }
1196 
1197 mp_limb_t
1198 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1199 		  mp_limb_t divisor, mp_limb_t carry)
1200 {
1201   mp_ptr     sp_orig;
1202   mp_ptr     prod;
1203   mp_limb_t  carry_orig;
1204 
1205   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1206   ASSERT (size >= 0);
1207   ASSERT (carry < divisor);
1208   ASSERT_MPN (sp, size);
1209   ASSERT_LIMB (divisor);
1210   ASSERT_LIMB (carry);
1211 
1212   if (size == 0)
1213     return carry;
1214 
1215   sp_orig = refmpn_memdup_limbs (sp, size);
1216   prod = refmpn_malloc_limbs (size);
1217   carry_orig = carry;
1218 
1219   carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
1220 
1221   /* check by multiplying back */
1222 #if 0
1223   printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
1224 	  size, divisor, carry_orig, carry);
1225   mpn_trace("s",sp_copy,size);
1226   mpn_trace("r",rp,size);
1227   printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
1228   mpn_trace("p",prod,size);
1229 #endif
1230   ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
1231   ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
1232   free (sp_orig);
1233   free (prod);
1234 
1235   return carry;
1236 }
1237 
1238 mp_limb_t
1239 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1240 {
1241   return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
1242 }
1243 
1244 
1245 mp_limb_t
1246 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1247 	       mp_limb_t carry)
1248 {
1249   mp_ptr  p = refmpn_malloc_limbs (size);
1250   carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
1251   free (p);
1252   return carry;
1253 }
1254 
1255 mp_limb_t
1256 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1257 {
1258   return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
1259 }
1260 
1261 mp_limb_t
1262 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1263 		     mp_limb_t inverse)
1264 {
1265   ASSERT (divisor & GMP_NUMB_HIGHBIT);
1266   ASSERT (inverse == refmpn_invert_limb (divisor));
1267   return refmpn_mod_1 (sp, size, divisor);
1268 }
1269 
1270 /* This implementation will be rather slow, but has the advantage of being
1271    in a different style than the libgmp versions.  */
1272 mp_limb_t
1273 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
1274 {
1275   ASSERT ((GMP_NUMB_BITS % 4) == 0);
1276   return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
1277 }
1278 
1279 
1280 mp_limb_t
1281 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
1282 		  mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1283 		  mp_limb_t carry)
1284 {
1285   mp_ptr  z;
1286 
1287   z = refmpn_malloc_limbs (xsize);
1288   refmpn_fill (z, xsize, CNST_LIMB(0));
1289 
1290   carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
1291   carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
1292 
1293   free (z);
1294   return carry;
1295 }
1296 
1297 mp_limb_t
1298 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
1299 		 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1300 {
1301   return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
1302 }
1303 
1304 mp_limb_t
1305 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
1306 			mp_srcptr sp, mp_size_t size,
1307 			mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
1308 {
1309   ASSERT (size >= 0);
1310   ASSERT (shift == refmpn_count_leading_zeros (divisor));
1311   ASSERT (inverse == refmpn_invert_limb (divisor << shift));
1312 
1313   return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
1314 }
1315 
1316 mp_limb_t
1317 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
1318 		 mp_ptr np, mp_size_t nn,
1319 		 mp_srcptr dp)
1320 {
1321   mp_ptr tp;
1322   mp_limb_t qh;
1323 
1324   tp = refmpn_malloc_limbs (nn + qxn);
1325   refmpn_zero (tp, qxn);
1326   refmpn_copyi (tp + qxn, np, nn);
1327   qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
1328   refmpn_copyi (np, tp, 2);
1329   free (tp);
1330   return qh;
1331 }
1332 
1333 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
1334    paper, figure 8.1 m', where b=2^GMP_LIMB_BITS.  Note that -d-1 < d
1335    since d has the high bit set. */
1336 
1337 mp_limb_t
1338 refmpn_invert_limb (mp_limb_t d)
1339 {
1340   mp_limb_t r;
1341   ASSERT (d & GMP_LIMB_HIGHBIT);
1342   return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
1343 }
1344 
1345 void
1346 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1347 {
1348   mp_ptr qp, tp;
1349   TMP_DECL;
1350   TMP_MARK;
1351 
1352   tp = TMP_ALLOC_LIMBS (2 * n);
1353   qp = TMP_ALLOC_LIMBS (n + 1);
1354 
1355   MPN_ZERO (tp, 2 * n);  mpn_sub_1 (tp, tp, 2 * n, 1);
1356 
1357   refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
1358   refmpn_copyi (rp, qp, n);
1359 
1360   TMP_FREE;
1361 }
1362 
1363 void
1364 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1365 {
1366   mp_ptr tp;
1367   mp_limb_t binv;
1368   TMP_DECL;
1369   TMP_MARK;
1370 
1371   /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
1372      code.  To make up for it, we check that the inverse is correct using a
1373      multiply.  */
1374 
1375   tp = TMP_ALLOC_LIMBS (2 * n);
1376 
1377   MPN_ZERO (tp, n);
1378   tp[0] = 1;
1379   binvert_limb (binv, up[0]);
1380   mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
1381 
1382   refmpn_mul_n (tp, rp, up, n);
1383   ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
1384 
1385   TMP_FREE;
1386 }
1387 
1388 /* The aim is to produce a dst quotient and return a remainder c, satisfying
1389    c*b^n + src-i == 3*dst, where i is the incoming carry.
1390 
1391    Some value c==0, c==1 or c==2 will satisfy, so just try each.
1392 
1393    If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
1394    remainder from the first division attempt determines the correct
1395    remainder (3-c), but don't bother with that, since we can't guarantee
1396    anything about GMP_NUMB_BITS when using nails.
1397 
1398    If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
1399    complement negative, ie. b^n+a-i, and the calculation produces c1
1400    satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1.  This
1401    means it's enough to just add any borrow back at the end.
1402 
1403    A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
1404    mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
1405    or 1 respectively, so with 1 added the final return value is still in the
1406    prescribed range 0 to 2. */
1407 
1408 mp_limb_t
1409 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
1410 {
1411   mp_ptr     spcopy;
1412   mp_limb_t  c, cs;
1413 
1414   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1415   ASSERT (size >= 1);
1416   ASSERT (carry <= 2);
1417   ASSERT_MPN (sp, size);
1418 
1419   spcopy = refmpn_malloc_limbs (size);
1420   cs = refmpn_sub_1 (spcopy, sp, size, carry);
1421 
1422   for (c = 0; c <= 2; c++)
1423     if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
1424       goto done;
1425   ASSERT_FAIL (no value of c satisfies);
1426 
1427  done:
1428   c += cs;
1429   ASSERT (c <= 2);
1430 
1431   free (spcopy);
1432   return c;
1433 }
1434 
1435 mp_limb_t
1436 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1437 {
1438   return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
1439 }
1440 
1441 
1442 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
1443 void
1444 refmpn_mul_basecase (mp_ptr prodp,
1445 		     mp_srcptr up, mp_size_t usize,
1446 		     mp_srcptr vp, mp_size_t vsize)
1447 {
1448   mp_size_t i;
1449 
1450   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1451   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1452   ASSERT (usize >= vsize);
1453   ASSERT (vsize >= 1);
1454   ASSERT_MPN (up, usize);
1455   ASSERT_MPN (vp, vsize);
1456 
1457   prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
1458   for (i = 1; i < vsize; i++)
1459     prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
1460 }
1461 
1462 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
1463 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
1464 #if WANT_FFT
1465 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
1466 #else
1467 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
1468 #endif
1469 
1470 void
1471 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
1472 {
1473   mp_ptr tp;
1474   mp_size_t tn;
1475   mp_limb_t cy;
1476 
1477   if (vn < TOOM3_THRESHOLD)
1478     {
1479       /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own
1480 	 mul_basecase.  */
1481       if (vn != 0)
1482 	refmpn_mul_basecase (wp, up, un, vp, vn);
1483       else
1484 	MPN_ZERO (wp, un);
1485       return;
1486     }
1487 
1488   if (vn < TOOM4_THRESHOLD)
1489     {
1490       /* In the mpn_toom33_mul range, use mpn_toom22_mul.  */
1491       tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
1492       tp = refmpn_malloc_limbs (tn);
1493       mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1494     }
1495   else if (vn < FFT_THRESHOLD)
1496     {
1497       /* In the mpn_toom44_mul range, use mpn_toom33_mul.  */
1498       tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
1499       tp = refmpn_malloc_limbs (tn);
1500       mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1501     }
1502   else
1503     {
1504       /* Finally, for the largest operands, use mpn_toom44_mul.  */
1505       tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
1506       tp = refmpn_malloc_limbs (tn);
1507       mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1508     }
1509 
1510   if (un != vn)
1511     {
1512       if (un - vn < vn)
1513 	refmpn_mul (wp + vn, vp, vn, up + vn, un - vn);
1514       else
1515 	refmpn_mul (wp + vn, up + vn, un - vn, vp, vn);
1516 
1517       MPN_COPY (wp, tp, vn);
1518       cy = refmpn_add (wp + vn, wp + vn, un, tp + vn, vn);
1519     }
1520   else
1521     {
1522       MPN_COPY (wp, tp, 2 * vn);
1523     }
1524 
1525   free (tp);
1526 }
1527 
1528 void
1529 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1530 {
1531   refmpn_mul (prodp, up, size, vp, size);
1532 }
1533 
1534 void
1535 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1536 {
1537   mp_ptr tp = refmpn_malloc_limbs (2*size);
1538   refmpn_mul (tp, up, size, vp, size);
1539   refmpn_copyi (prodp, tp, size);
1540   free (tp);
1541 }
1542 
1543 void
1544 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
1545 {
1546   refmpn_mul (dst, src, size, src, size);
1547 }
1548 
1549 /* Allowing usize<vsize, usize==0 or vsize==0. */
1550 void
1551 refmpn_mul_any (mp_ptr prodp,
1552 		     mp_srcptr up, mp_size_t usize,
1553 		     mp_srcptr vp, mp_size_t vsize)
1554 {
1555   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1556   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1557   ASSERT (usize >= 0);
1558   ASSERT (vsize >= 0);
1559   ASSERT_MPN (up, usize);
1560   ASSERT_MPN (vp, vsize);
1561 
1562   if (usize == 0)
1563     {
1564       refmpn_fill (prodp, vsize, CNST_LIMB(0));
1565       return;
1566     }
1567 
1568   if (vsize == 0)
1569     {
1570       refmpn_fill (prodp, usize, CNST_LIMB(0));
1571       return;
1572     }
1573 
1574   if (usize >= vsize)
1575     refmpn_mul (prodp, up, usize, vp, vsize);
1576   else
1577     refmpn_mul (prodp, vp, vsize, up, usize);
1578 }
1579 
1580 
1581 mp_limb_t
1582 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
1583 {
1584   mp_limb_t  x;
1585   int  twos;
1586 
1587   ASSERT (y != 0);
1588   ASSERT (! refmpn_zero_p (xp, xsize));
1589   ASSERT_MPN (xp, xsize);
1590   ASSERT_LIMB (y);
1591 
1592   x = refmpn_mod_1 (xp, xsize, y);
1593   if (x == 0)
1594     return y;
1595 
1596   twos = 0;
1597   while ((x & 1) == 0 && (y & 1) == 0)
1598     {
1599       x >>= 1;
1600       y >>= 1;
1601       twos++;
1602     }
1603 
1604   for (;;)
1605     {
1606       while ((x & 1) == 0)  x >>= 1;
1607       while ((y & 1) == 0)  y >>= 1;
1608 
1609       if (x < y)
1610 	MP_LIMB_T_SWAP (x, y);
1611 
1612       x -= y;
1613       if (x == 0)
1614 	break;
1615     }
1616 
1617   return y << twos;
1618 }
1619 
1620 
1621 /* Based on the full limb x, not nails. */
1622 unsigned
1623 refmpn_count_leading_zeros (mp_limb_t x)
1624 {
1625   unsigned  n = 0;
1626 
1627   ASSERT (x != 0);
1628 
1629   while ((x & GMP_LIMB_HIGHBIT) == 0)
1630     {
1631       x <<= 1;
1632       n++;
1633     }
1634   return n;
1635 }
1636 
1637 /* Full limbs allowed, not limited to nails. */
1638 unsigned
1639 refmpn_count_trailing_zeros (mp_limb_t x)
1640 {
1641   unsigned  n = 0;
1642 
1643   ASSERT (x != 0);
1644   ASSERT_LIMB (x);
1645 
1646   while ((x & 1) == 0)
1647     {
1648       x >>= 1;
1649       n++;
1650     }
1651   return n;
1652 }
1653 
1654 /* Strip factors of two (low zero bits) from {p,size} by right shifting.
1655    The return value is the number of twos stripped.  */
1656 mp_size_t
1657 refmpn_strip_twos (mp_ptr p, mp_size_t size)
1658 {
1659   mp_size_t  limbs;
1660   unsigned   shift;
1661 
1662   ASSERT (size >= 1);
1663   ASSERT (! refmpn_zero_p (p, size));
1664   ASSERT_MPN (p, size);
1665 
1666   for (limbs = 0; p[0] == 0; limbs++)
1667     {
1668       refmpn_copyi (p, p+1, size-1);
1669       p[size-1] = 0;
1670     }
1671 
1672   shift = refmpn_count_trailing_zeros (p[0]);
1673   if (shift)
1674     refmpn_rshift (p, p, size, shift);
1675 
1676   return limbs*GMP_NUMB_BITS + shift;
1677 }
1678 
1679 mp_limb_t
1680 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
1681 {
1682   int       cmp;
1683 
1684   ASSERT (ysize >= 1);
1685   ASSERT (xsize >= ysize);
1686   ASSERT ((xp[0] & 1) != 0);
1687   ASSERT ((yp[0] & 1) != 0);
1688   /* ASSERT (xp[xsize-1] != 0); */  /* don't think x needs to be odd */
1689   ASSERT (yp[ysize-1] != 0);
1690   ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
1691   ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
1692   ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
1693   if (xsize == ysize)
1694     ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
1695   ASSERT_MPN (xp, xsize);
1696   ASSERT_MPN (yp, ysize);
1697 
1698   refmpn_strip_twos (xp, xsize);
1699   MPN_NORMALIZE (xp, xsize);
1700   MPN_NORMALIZE (yp, ysize);
1701 
1702   for (;;)
1703     {
1704       cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
1705       if (cmp == 0)
1706 	break;
1707       if (cmp < 0)
1708 	MPN_PTR_SWAP (xp,xsize, yp,ysize);
1709 
1710       ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
1711 
1712       refmpn_strip_twos (xp, xsize);
1713       MPN_NORMALIZE (xp, xsize);
1714     }
1715 
1716   refmpn_copyi (gp, xp, xsize);
1717   return xsize;
1718 }
1719 
1720 unsigned long
1721 ref_popc_limb (mp_limb_t src)
1722 {
1723   unsigned long  count;
1724   int  i;
1725 
1726   count = 0;
1727   for (i = 0; i < GMP_LIMB_BITS; i++)
1728     {
1729       count += (src & 1);
1730       src >>= 1;
1731     }
1732   return count;
1733 }
1734 
1735 unsigned long
1736 refmpn_popcount (mp_srcptr sp, mp_size_t size)
1737 {
1738   unsigned long  count = 0;
1739   mp_size_t  i;
1740 
1741   ASSERT (size >= 0);
1742   ASSERT_MPN (sp, size);
1743 
1744   for (i = 0; i < size; i++)
1745     count += ref_popc_limb (sp[i]);
1746   return count;
1747 }
1748 
1749 unsigned long
1750 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1751 {
1752   mp_ptr  d;
1753   unsigned long  count;
1754 
1755   ASSERT (size >= 0);
1756   ASSERT_MPN (s1p, size);
1757   ASSERT_MPN (s2p, size);
1758 
1759   if (size == 0)
1760     return 0;
1761 
1762   d = refmpn_malloc_limbs (size);
1763   refmpn_xor_n (d, s1p, s2p, size);
1764   count = refmpn_popcount (d, size);
1765   free (d);
1766   return count;
1767 }
1768 
1769 
1770 /* set r to a%d */
1771 void
1772 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
1773 {
1774   mp_limb_t  D[2];
1775   int        n;
1776 
1777   ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
1778   ASSERT_MPN (a, 2);
1779   ASSERT_MPN (d, 2);
1780 
1781   D[1] = d[1], D[0] = d[0];
1782   r[1] = a[1], r[0] = a[0];
1783   n = 0;
1784 
1785   for (;;)
1786     {
1787       if (D[1] & GMP_NUMB_HIGHBIT)
1788 	break;
1789       if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
1790 	break;
1791       refmpn_lshift (D, D, (mp_size_t) 2, 1);
1792       n++;
1793       ASSERT (n <= GMP_NUMB_BITS);
1794     }
1795 
1796   while (n >= 0)
1797     {
1798       if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
1799 	ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
1800       refmpn_rshift (D, D, (mp_size_t) 2, 1);
1801       n--;
1802     }
1803 
1804   ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
1805 }
1806 
1807 
1808 
1809 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
1810    particular the trial quotient is allowed to be 2 too big. */
1811 mp_limb_t
1812 refmpn_sb_div_qr (mp_ptr qp,
1813 		  mp_ptr np, mp_size_t nsize,
1814 		  mp_srcptr dp, mp_size_t dsize)
1815 {
1816   mp_limb_t  retval = 0;
1817   mp_size_t  i;
1818   mp_limb_t  d1 = dp[dsize-1];
1819   mp_ptr     np_orig = refmpn_memdup_limbs (np, nsize);
1820 
1821   ASSERT (nsize >= dsize);
1822   /* ASSERT (dsize > 2); */
1823   ASSERT (dsize >= 2);
1824   ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
1825   ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
1826   ASSERT_MPN (np, nsize);
1827   ASSERT_MPN (dp, dsize);
1828 
1829   i = nsize-dsize;
1830   if (refmpn_cmp (np+i, dp, dsize) >= 0)
1831     {
1832       ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
1833       retval = 1;
1834     }
1835 
1836   for (i--; i >= 0; i--)
1837     {
1838       mp_limb_t  n0 = np[i+dsize];
1839       mp_limb_t  n1 = np[i+dsize-1];
1840       mp_limb_t  q, dummy_r;
1841 
1842       ASSERT (n0 <= d1);
1843       if (n0 == d1)
1844 	q = GMP_NUMB_MAX;
1845       else
1846 	q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
1847 			       d1 << GMP_NAIL_BITS);
1848 
1849       n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
1850       ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
1851       if (n0)
1852 	{
1853 	  q--;
1854 	  if (! refmpn_add_n (np+i, np+i, dp, dsize))
1855 	    {
1856 	      q--;
1857 	      ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
1858 	    }
1859 	}
1860       np[i+dsize] = 0;
1861 
1862       qp[i] = q;
1863     }
1864 
1865   /* remainder < divisor */
1866 #if 0		/* ASSERT triggers gcc 4.2.1 bug */
1867   ASSERT (refmpn_cmp (np, dp, dsize) < 0);
1868 #endif
1869 
1870   /* multiply back to original */
1871   {
1872     mp_ptr  mp = refmpn_malloc_limbs (nsize);
1873 
1874     refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
1875     if (retval)
1876       ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
1877     ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
1878     ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
1879 
1880     free (mp);
1881   }
1882 
1883   free (np_orig);
1884   return retval;
1885 }
1886 
1887 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
1888    particular the trial quotient is allowed to be 2 too big. */
1889 void
1890 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
1891 		mp_ptr np, mp_size_t nsize,
1892 		mp_srcptr dp, mp_size_t dsize)
1893 {
1894   ASSERT (qxn == 0);
1895   ASSERT_MPN (np, nsize);
1896   ASSERT_MPN (dp, dsize);
1897   ASSERT (dsize > 0);
1898   ASSERT (dp[dsize-1] != 0);
1899 
1900   if (dsize == 1)
1901     {
1902       rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
1903       return;
1904     }
1905   else
1906     {
1907       mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);
1908       mp_ptr  d2p = refmpn_malloc_limbs (dsize);
1909       int     norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
1910 
1911       n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
1912       ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
1913 
1914       refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
1915       refmpn_rshift_or_copy (rp, n2p, dsize, norm);
1916 
1917       /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
1918       free (n2p);
1919       free (d2p);
1920     }
1921 }
1922 
1923 void
1924 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
1925 {
1926   mp_size_t j;
1927   mp_limb_t cy;
1928 
1929   ASSERT_MPN (up, 2*n);
1930   /* ASSERT about directed overlap rp, up */
1931   /* ASSERT about overlap rp, mp */
1932   /* ASSERT about overlap up, mp */
1933 
1934   for (j = n - 1; j >= 0; j--)
1935     {
1936       up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
1937       up++;
1938     }
1939   cy = mpn_add_n (rp, up, up - n, n);
1940   if (cy != 0)
1941     mpn_sub_n (rp, rp, mp, n);
1942 }
1943 
1944 size_t
1945 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
1946 {
1947   unsigned char  *d;
1948   size_t  dsize;
1949 
1950   ASSERT (size >= 0);
1951   ASSERT (base >= 2);
1952   ASSERT (base < numberof (mp_bases));
1953   ASSERT (size == 0 || src[size-1] != 0);
1954   ASSERT_MPN (src, size);
1955 
1956   MPN_SIZEINBASE (dsize, src, size, base);
1957   ASSERT (dsize >= 1);
1958   ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * BYTES_PER_MP_LIMB));
1959 
1960   if (size == 0)
1961     {
1962       dst[0] = 0;
1963       return 1;
1964     }
1965 
1966   /* don't clobber input for power of 2 bases */
1967   if (POW2_P (base))
1968     src = refmpn_memdup_limbs (src, size);
1969 
1970   d = dst + dsize;
1971   do
1972     {
1973       d--;
1974       ASSERT (d >= dst);
1975       *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
1976       size -= (src[size-1] == 0);
1977     }
1978   while (size != 0);
1979 
1980   /* Move result back and decrement dsize if we didn't generate
1981      the maximum possible digits.  */
1982   if (d != dst)
1983     {
1984       size_t i;
1985       dsize -= d - dst;
1986       for (i = 0; i < dsize; i++)
1987 	dst[i] = d[i];
1988     }
1989 
1990   if (POW2_P (base))
1991     free (src);
1992 
1993   return dsize;
1994 }
1995 
1996 
1997 mp_limb_t
1998 ref_bswap_limb (mp_limb_t src)
1999 {
2000   mp_limb_t  dst;
2001   int        i;
2002 
2003   dst = 0;
2004   for (i = 0; i < BYTES_PER_MP_LIMB; i++)
2005     {
2006       dst = (dst << 8) + (src & 0xFF);
2007       src >>= 8;
2008     }
2009   return dst;
2010 }
2011 
2012 
2013 /* These random functions are mostly for transitional purposes while adding
2014    nail support, since they're independent of the normal mpn routines.  They
2015    can probably be removed when those normal routines are reliable, though
2016    perhaps something independent would still be useful at times.  */
2017 
2018 #if GMP_LIMB_BITS == 32
2019 #define RAND_A  CNST_LIMB(0x29CF535)
2020 #endif
2021 #if GMP_LIMB_BITS == 64
2022 #define RAND_A  CNST_LIMB(0xBAECD515DAF0B49D)
2023 #endif
2024 
2025 mp_limb_t  refmpn_random_seed;
2026 
2027 mp_limb_t
2028 refmpn_random_half (void)
2029 {
2030   refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
2031   return (refmpn_random_seed >> GMP_LIMB_BITS/2);
2032 }
2033 
2034 mp_limb_t
2035 refmpn_random_limb (void)
2036 {
2037   return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
2038 	   | refmpn_random_half ()) & GMP_NUMB_MASK;
2039 }
2040 
2041 void
2042 refmpn_random (mp_ptr ptr, mp_size_t size)
2043 {
2044   mp_size_t  i;
2045   if (GMP_NAIL_BITS == 0)
2046     {
2047       mpn_random (ptr, size);
2048       return;
2049     }
2050 
2051   for (i = 0; i < size; i++)
2052     ptr[i] = refmpn_random_limb ();
2053 }
2054 
2055 void
2056 refmpn_random2 (mp_ptr ptr, mp_size_t size)
2057 {
2058   mp_size_t  i;
2059   mp_limb_t  bit, mask, limb;
2060   int        run;
2061 
2062   if (GMP_NAIL_BITS == 0)
2063     {
2064       mpn_random2 (ptr, size);
2065       return;
2066     }
2067 
2068 #define RUN_MODULUS  32
2069 
2070   /* start with ones at a random pos in the high limb */
2071   bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
2072   mask = 0;
2073   run = 0;
2074 
2075   for (i = size-1; i >= 0; i--)
2076     {
2077       limb = 0;
2078       do
2079 	{
2080 	  if (run == 0)
2081 	    {
2082 	      run = (refmpn_random_half () % RUN_MODULUS) + 1;
2083 	      mask = ~mask;
2084 	    }
2085 
2086 	  limb |= (bit & mask);
2087 	  bit >>= 1;
2088 	  run--;
2089 	}
2090       while (bit != 0);
2091 
2092       ptr[i] = limb;
2093       bit = GMP_NUMB_HIGHBIT;
2094     }
2095 }
2096 
2097 /* This is a simple bitwise algorithm working high to low across "s" and
2098    testing each time whether setting the bit would make s^2 exceed n.  */
2099 mp_size_t
2100 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
2101 {
2102   mp_ptr     tp, dp;
2103   mp_size_t  ssize, talloc, tsize, dsize, ret, ilimbs;
2104   unsigned   ibit;
2105   long       i;
2106   mp_limb_t  c;
2107 
2108   ASSERT (nsize >= 0);
2109 
2110   /* If n==0, then s=0 and r=0.  */
2111   if (nsize == 0)
2112     return 0;
2113 
2114   ASSERT (np[nsize - 1] != 0);
2115   ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
2116   ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
2117   ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
2118 
2119   /* root */
2120   ssize = (nsize+1)/2;
2121   refmpn_zero (sp, ssize);
2122 
2123   /* the remainder so far */
2124   dp = refmpn_memdup_limbs (np, nsize);
2125   dsize = nsize;
2126 
2127   /* temporary */
2128   talloc = 2*ssize + 1;
2129   tp = refmpn_malloc_limbs (talloc);
2130 
2131   for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
2132     {
2133       /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
2134 	 is added to it */
2135 
2136       ilimbs = (i+1) / GMP_NUMB_BITS;
2137       ibit = (i+1) % GMP_NUMB_BITS;
2138       refmpn_zero (tp, ilimbs);
2139       c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
2140       tsize = ilimbs + ssize;
2141       tp[tsize] = c;
2142       tsize += (c != 0);
2143 
2144       ilimbs = (2*i) / GMP_NUMB_BITS;
2145       ibit = (2*i) % GMP_NUMB_BITS;
2146       if (ilimbs + 1 > tsize)
2147 	{
2148 	  refmpn_zero_extend (tp, tsize, ilimbs + 1);
2149 	  tsize = ilimbs + 1;
2150 	}
2151       c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
2152 			CNST_LIMB(1) << ibit);
2153       ASSERT (tsize < talloc);
2154       tp[tsize] = c;
2155       tsize += (c != 0);
2156 
2157       if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
2158 	{
2159 	  /* set this bit in s and subtract from the remainder */
2160 	  refmpn_setbit (sp, i);
2161 
2162 	  ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
2163 	  dsize = refmpn_normalize (dp, dsize);
2164 	}
2165     }
2166 
2167   if (rp == NULL)
2168     {
2169       ret = ! refmpn_zero_p (dp, dsize);
2170     }
2171   else
2172     {
2173       ASSERT (dsize == 0 || dp[dsize-1] != 0);
2174       refmpn_copy (rp, dp, dsize);
2175       ret = dsize;
2176     }
2177 
2178   free (dp);
2179   free (tp);
2180   return ret;
2181 }
2182