xref: /netbsd-src/external/lgpl3/gmp/dist/tests/refmpn.c (revision fdd524d4ccd2bb0c6f67401e938dabf773eb0372)
1 /* Reference mpn functions, designed to be simple, portable and independent
2    of the normal gmp code.  Speed isn't a consideration.
3 
4 Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
5 2007, 2008, 2009, 2011, 2012, 2013 Free Software Foundation, Inc.
6 
7 This file is part of the GNU MP Library test suite.
8 
9 The GNU MP Library test suite is free software; you can redistribute it
10 and/or modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 3 of the License,
12 or (at your option) any later version.
13 
14 The GNU MP Library test suite is distributed in the hope that it will be
15 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
17 Public License for more details.
18 
19 You should have received a copy of the GNU General Public License along with
20 the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
21 
22 
23 /* Most routines have assertions representing what the mpn routines are
24    supposed to accept.  Many of these reference routines do sensible things
25    outside these ranges (eg. for size==0), but the assertions are present to
26    pick up bad parameters passed here that are about to be passed the same
27    to a real mpn routine being compared.  */
28 
29 /* always do assertion checking */
30 #define WANT_ASSERT  1
31 
32 #include <stdio.h>  /* for NULL */
33 #include <stdlib.h> /* for malloc */
34 
35 #include "gmp.h"
36 #include "gmp-impl.h"
37 #include "longlong.h"
38 
39 #include "tests.h"
40 
41 
42 
43 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
44    in bytes. */
45 int
46 byte_overlap_p (const void *v_xp, mp_size_t xsize,
47 		const void *v_yp, mp_size_t ysize)
48 {
49   const char *xp = (const char *) v_xp;
50   const char *yp = (const char *) v_yp;
51 
52   ASSERT (xsize >= 0);
53   ASSERT (ysize >= 0);
54 
55   /* no wraparounds */
56   ASSERT (xp+xsize >= xp);
57   ASSERT (yp+ysize >= yp);
58 
59   if (xp + xsize <= yp)
60     return 0;
61 
62   if (yp + ysize <= xp)
63     return 0;
64 
65   return 1;
66 }
67 
68 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
69 int
70 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
71 {
72   return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB,
73 			 yp, ysize * BYTES_PER_MP_LIMB);
74 }
75 
76 /* Check overlap for a routine defined to work low to high. */
77 int
78 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
79 {
80   return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
81 }
82 
83 /* Check overlap for a routine defined to work high to low. */
84 int
85 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
86 {
87   return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
88 }
89 
90 /* Check overlap for a standard routine requiring equal or separate. */
91 int
92 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
93 {
94   return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
95 }
96 int
97 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
98 			       mp_size_t size)
99 {
100   return (refmpn_overlap_fullonly_p (dst, src1, size)
101 	  && refmpn_overlap_fullonly_p (dst, src2, size));
102 }
103 
104 
105 mp_ptr
106 refmpn_malloc_limbs (mp_size_t size)
107 {
108   mp_ptr  p;
109   ASSERT (size >= 0);
110   if (size == 0)
111     size = 1;
112   p = (mp_ptr) malloc ((size_t) (size * BYTES_PER_MP_LIMB));
113   ASSERT (p != NULL);
114   return p;
115 }
116 
117 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
118  * memory allocated by refmpn_malloc_limbs_aligned. */
119 void
120 refmpn_free_limbs (mp_ptr p)
121 {
122   free (p);
123 }
124 
125 mp_ptr
126 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
127 {
128   mp_ptr  p;
129   p = refmpn_malloc_limbs (size);
130   refmpn_copyi (p, ptr, size);
131   return p;
132 }
133 
134 /* malloc n limbs on a multiple of m bytes boundary */
135 mp_ptr
136 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
137 {
138   return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
139 }
140 
141 
142 void
143 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
144 {
145   mp_size_t  i;
146   ASSERT (size >= 0);
147   for (i = 0; i < size; i++)
148     ptr[i] = value;
149 }
150 
151 void
152 refmpn_zero (mp_ptr ptr, mp_size_t size)
153 {
154   refmpn_fill (ptr, size, CNST_LIMB(0));
155 }
156 
157 void
158 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
159 {
160   ASSERT (newsize >= oldsize);
161   refmpn_zero (ptr+oldsize, newsize-oldsize);
162 }
163 
164 int
165 refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
166 {
167   mp_size_t  i;
168   for (i = 0; i < size; i++)
169     if (ptr[i] != 0)
170       return 0;
171   return 1;
172 }
173 
174 mp_size_t
175 refmpn_normalize (mp_srcptr ptr, mp_size_t size)
176 {
177   ASSERT (size >= 0);
178   while (size > 0 && ptr[size-1] == 0)
179     size--;
180   return size;
181 }
182 
183 /* the highest one bit in x */
184 mp_limb_t
185 refmpn_msbone (mp_limb_t x)
186 {
187   mp_limb_t  n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
188 
189   while (n != 0)
190     {
191       if (x & n)
192 	break;
193       n >>= 1;
194     }
195   return n;
196 }
197 
198 /* a mask of the highest one bit plus and all bits below */
199 mp_limb_t
200 refmpn_msbone_mask (mp_limb_t x)
201 {
202   if (x == 0)
203     return 0;
204 
205   return (refmpn_msbone (x) << 1) - 1;
206 }
207 
208 /* How many digits in the given base will fit in a limb.
209    Notice that the product b is allowed to be equal to the limit
210    2^GMP_NUMB_BITS, this ensures the result for base==2 will be
211    GMP_NUMB_BITS (and similarly other powers of 2).  */
212 int
213 refmpn_chars_per_limb (int base)
214 {
215   mp_limb_t  limit[2], b[2];
216   int        chars_per_limb;
217 
218   ASSERT (base >= 2);
219 
220   limit[0] = 0;  /* limit = 2^GMP_NUMB_BITS */
221   limit[1] = 1;
222   b[0] = 1;      /* b = 1 */
223   b[1] = 0;
224 
225   chars_per_limb = 0;
226   for (;;)
227     {
228       if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
229 	break;
230       if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
231 	break;
232       chars_per_limb++;
233     }
234   return chars_per_limb;
235 }
236 
237 /* The biggest value base**n which fits in GMP_NUMB_BITS. */
238 mp_limb_t
239 refmpn_big_base (int base)
240 {
241   int        chars_per_limb = refmpn_chars_per_limb (base);
242   int        i;
243   mp_limb_t  bb;
244 
245   ASSERT (base >= 2);
246   bb = 1;
247   for (i = 0; i < chars_per_limb; i++)
248     bb *= base;
249   return bb;
250 }
251 
252 
253 void
254 refmpn_setbit (mp_ptr ptr, unsigned long bit)
255 {
256   ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
257 }
258 
259 void
260 refmpn_clrbit (mp_ptr ptr, unsigned long bit)
261 {
262   ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
263 }
264 
265 #define REFMPN_TSTBIT(ptr,bit) \
266   (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
267 
268 int
269 refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
270 {
271   return REFMPN_TSTBIT (ptr, bit);
272 }
273 
274 unsigned long
275 refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
276 {
277   while (REFMPN_TSTBIT (ptr, bit) != 0)
278     bit++;
279   return bit;
280 }
281 
282 unsigned long
283 refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
284 {
285   while (REFMPN_TSTBIT (ptr, bit) == 0)
286     bit++;
287   return bit;
288 }
289 
290 void
291 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
292 {
293   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
294   refmpn_copyi (rp, sp, size);
295 }
296 
297 void
298 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
299 {
300   mp_size_t i;
301 
302   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
303   ASSERT (size >= 0);
304 
305   for (i = 0; i < size; i++)
306     rp[i] = sp[i];
307 }
308 
309 void
310 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
311 {
312   mp_size_t i;
313 
314   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
315   ASSERT (size >= 0);
316 
317   for (i = size-1; i >= 0; i--)
318     rp[i] = sp[i];
319 }
320 
321 /* Copy {xp,xsize} to {wp,wsize}.  If x is shorter, then pad w with low
322    zeros to wsize.  If x is longer, then copy just the high wsize limbs.  */
323 void
324 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
325 {
326   ASSERT (wsize >= 0);
327   ASSERT (xsize >= 0);
328 
329   /* high part of x if x bigger than w */
330   if (xsize > wsize)
331     {
332       xp += xsize - wsize;
333       xsize = wsize;
334     }
335 
336   refmpn_copy (wp + wsize-xsize, xp, xsize);
337   refmpn_zero (wp, wsize-xsize);
338 }
339 
340 int
341 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
342 {
343   mp_size_t  i;
344 
345   ASSERT (size >= 1);
346   ASSERT_MPN (xp, size);
347   ASSERT_MPN (yp, size);
348 
349   for (i = size-1; i >= 0; i--)
350     {
351       if (xp[i] > yp[i])  return 1;
352       if (xp[i] < yp[i])  return -1;
353     }
354   return 0;
355 }
356 
357 int
358 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
359 {
360   if (size == 0)
361     return 0;
362   else
363     return refmpn_cmp (xp, yp, size);
364 }
365 
366 int
367 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
368 		     mp_srcptr yp, mp_size_t ysize)
369 {
370   int  opp, cmp;
371 
372   ASSERT_MPN (xp, xsize);
373   ASSERT_MPN (yp, ysize);
374 
375   opp = (xsize < ysize);
376   if (opp)
377     MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
378 
379   if (! refmpn_zero_p (xp+ysize, xsize-ysize))
380     cmp = 1;
381   else
382     cmp = refmpn_cmp (xp, yp, ysize);
383 
384   return (opp ? -cmp : cmp);
385 }
386 
387 int
388 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
389 {
390   mp_size_t  i;
391   ASSERT (size >= 0);
392 
393   for (i = 0; i < size; i++)
394       if (xp[i] != yp[i])
395 	return 0;
396   return 1;
397 }
398 
399 
400 #define LOGOPS(operation)                                               \
401   {                                                                     \
402     mp_size_t  i;                                                       \
403 									\
404     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
405     ASSERT (size >= 1);                                                 \
406     ASSERT_MPN (s1p, size);                                             \
407     ASSERT_MPN (s2p, size);                                             \
408 									\
409     for (i = 0; i < size; i++)                                          \
410       rp[i] = operation;                                                \
411   }
412 
413 void
414 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
415 {
416   LOGOPS (s1p[i] & s2p[i]);
417 }
418 void
419 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
420 {
421   LOGOPS (s1p[i] & ~s2p[i]);
422 }
423 void
424 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
425 {
426   LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
427 }
428 void
429 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
430 {
431   LOGOPS (s1p[i] | s2p[i]);
432 }
433 void
434 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
435 {
436   LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
437 }
438 void
439 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
440 {
441   LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
442 }
443 void
444 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
445 {
446   LOGOPS (s1p[i] ^ s2p[i]);
447 }
448 void
449 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
450 {
451   LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
452 }
453 
454 
455 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */
456 void
457 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
458 		   mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
459 {
460   *dl = ml - sl;
461   *dh = mh - sh - (ml < sl);
462 }
463 
464 
465 /* set *w to x+y, return 0 or 1 carry */
466 mp_limb_t
467 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
468 {
469   mp_limb_t  sum, cy;
470 
471   ASSERT_LIMB (x);
472   ASSERT_LIMB (y);
473 
474   sum = x + y;
475 #if GMP_NAIL_BITS == 0
476   *w = sum;
477   cy = (sum < x);
478 #else
479   *w = sum & GMP_NUMB_MASK;
480   cy = (sum >> GMP_NUMB_BITS);
481 #endif
482   return cy;
483 }
484 
485 /* set *w to x-y, return 0 or 1 borrow */
486 mp_limb_t
487 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
488 {
489   mp_limb_t  diff, cy;
490 
491   ASSERT_LIMB (x);
492   ASSERT_LIMB (y);
493 
494   diff = x - y;
495 #if GMP_NAIL_BITS == 0
496   *w = diff;
497   cy = (diff > x);
498 #else
499   *w = diff & GMP_NUMB_MASK;
500   cy = (diff >> GMP_NUMB_BITS) & 1;
501 #endif
502   return cy;
503 }
504 
505 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
506 mp_limb_t
507 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
508 {
509   mp_limb_t  r;
510 
511   ASSERT_LIMB (x);
512   ASSERT_LIMB (y);
513   ASSERT (c == 0 || c == 1);
514 
515   r = ref_addc_limb (w, x, y);
516   return r + ref_addc_limb (w, *w, c);
517 }
518 
519 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
520 mp_limb_t
521 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
522 {
523   mp_limb_t  r;
524 
525   ASSERT_LIMB (x);
526   ASSERT_LIMB (y);
527   ASSERT (c == 0 || c == 1);
528 
529   r = ref_subc_limb (w, x, y);
530   return r + ref_subc_limb (w, *w, c);
531 }
532 
533 
534 #define AORS_1(operation)                               \
535   {                                                     \
536     mp_size_t  i;                                       \
537 							\
538     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
539     ASSERT (size >= 1);                                 \
540     ASSERT_MPN (sp, size);                              \
541     ASSERT_LIMB (n);                                    \
542 							\
543     for (i = 0; i < size; i++)                          \
544       n = operation (&rp[i], sp[i], n);                 \
545     return n;                                           \
546   }
547 
548 mp_limb_t
549 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
550 {
551   AORS_1 (ref_addc_limb);
552 }
553 mp_limb_t
554 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
555 {
556   AORS_1 (ref_subc_limb);
557 }
558 
559 #define AORS_NC(operation)                                              \
560   {                                                                     \
561     mp_size_t  i;                                                       \
562 									\
563     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
564     ASSERT (carry == 0 || carry == 1);                                  \
565     ASSERT (size >= 1);                                                 \
566     ASSERT_MPN (s1p, size);                                             \
567     ASSERT_MPN (s2p, size);                                             \
568 									\
569     for (i = 0; i < size; i++)                                          \
570       carry = operation (&rp[i], s1p[i], s2p[i], carry);                \
571     return carry;                                                       \
572   }
573 
574 mp_limb_t
575 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
576 	       mp_limb_t carry)
577 {
578   AORS_NC (adc);
579 }
580 mp_limb_t
581 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
582 	       mp_limb_t carry)
583 {
584   AORS_NC (sbb);
585 }
586 
587 
588 mp_limb_t
589 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
590 {
591   return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
592 }
593 mp_limb_t
594 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
595 {
596   return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
597 }
598 
599 mp_limb_t
600 refmpn_addcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd)
601 {
602   if (cnd != 0)
603     return refmpn_add_n (rp, s1p, s2p, size);
604   else
605     {
606       refmpn_copyi (rp, s1p, size);
607       return 0;
608     }
609 }
610 mp_limb_t
611 refmpn_subcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd)
612 {
613   if (cnd != 0)
614     return refmpn_sub_n (rp, s1p, s2p, size);
615   else
616     {
617       refmpn_copyi (rp, s1p, size);
618       return 0;
619     }
620 }
621 
622 
623 #define AORS_ERR1_N(operation)						\
624   {                                                                     \
625     mp_size_t  i;                                                       \
626     mp_limb_t carry2;							\
627 									\
628     ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
629     ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
630     ASSERT (! refmpn_overlap_p (rp, size, yp, size));			\
631     ASSERT (! refmpn_overlap_p (ep, 2, s1p, size));			\
632     ASSERT (! refmpn_overlap_p (ep, 2, s2p, size));			\
633     ASSERT (! refmpn_overlap_p (ep, 2, yp, size));			\
634     ASSERT (! refmpn_overlap_p (ep, 2, rp, size));			\
635 									\
636     ASSERT (carry == 0 || carry == 1);					\
637     ASSERT (size >= 1);							\
638     ASSERT_MPN (s1p, size);						\
639     ASSERT_MPN (s2p, size);						\
640     ASSERT_MPN (yp, size);						\
641 									\
642     ep[0] = ep[1] = CNST_LIMB(0);					\
643 									\
644     for (i = 0; i < size; i++)                                          \
645       {									\
646 	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
647 	if (carry == 1)							\
648 	  {								\
649 	    carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]);	\
650 	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
651 	    ASSERT (carry2 == 0);					\
652 	  }								\
653       }									\
654     return carry;                                                       \
655   }
656 
657 mp_limb_t
658 refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
659 		   mp_ptr ep, mp_srcptr yp,
660 		   mp_size_t size, mp_limb_t carry)
661 {
662   AORS_ERR1_N (adc);
663 }
664 mp_limb_t
665 refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
666 		   mp_ptr ep, mp_srcptr yp,
667 		   mp_size_t size, mp_limb_t carry)
668 {
669   AORS_ERR1_N (sbb);
670 }
671 
672 
673 #define AORS_ERR2_N(operation)						\
674   {                                                                     \
675     mp_size_t  i;                                                       \
676     mp_limb_t carry2;							\
677 									\
678     ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
679     ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
680     ASSERT (! refmpn_overlap_p (rp, size, y1p, size));			\
681     ASSERT (! refmpn_overlap_p (rp, size, y2p, size));			\
682     ASSERT (! refmpn_overlap_p (ep, 4, s1p, size));			\
683     ASSERT (! refmpn_overlap_p (ep, 4, s2p, size));			\
684     ASSERT (! refmpn_overlap_p (ep, 4, y1p, size));			\
685     ASSERT (! refmpn_overlap_p (ep, 4, y2p, size));			\
686     ASSERT (! refmpn_overlap_p (ep, 4, rp, size));			\
687 									\
688     ASSERT (carry == 0 || carry == 1);					\
689     ASSERT (size >= 1);							\
690     ASSERT_MPN (s1p, size);						\
691     ASSERT_MPN (s2p, size);						\
692     ASSERT_MPN (y1p, size);						\
693     ASSERT_MPN (y2p, size);						\
694 									\
695     ep[0] = ep[1] = CNST_LIMB(0);					\
696     ep[2] = ep[3] = CNST_LIMB(0);					\
697 									\
698     for (i = 0; i < size; i++)                                          \
699       {									\
700 	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
701 	if (carry == 1)							\
702 	  {								\
703 	    carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);	\
704 	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
705 	    ASSERT (carry2 == 0);					\
706 	    carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);	\
707 	    carry2 = ref_addc_limb (&ep[3], ep[3], carry2);		\
708 	    ASSERT (carry2 == 0);					\
709 	  }								\
710       }									\
711     return carry;                                                       \
712   }
713 
714 mp_limb_t
715 refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
716 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
717 		   mp_size_t size, mp_limb_t carry)
718 {
719   AORS_ERR2_N (adc);
720 }
721 mp_limb_t
722 refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
723 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
724 		   mp_size_t size, mp_limb_t carry)
725 {
726   AORS_ERR2_N (sbb);
727 }
728 
729 
730 #define AORS_ERR3_N(operation)						\
731   {                                                                     \
732     mp_size_t  i;                                                       \
733     mp_limb_t carry2;							\
734 									\
735     ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
736     ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
737     ASSERT (! refmpn_overlap_p (rp, size, y1p, size));			\
738     ASSERT (! refmpn_overlap_p (rp, size, y2p, size));			\
739     ASSERT (! refmpn_overlap_p (rp, size, y3p, size));			\
740     ASSERT (! refmpn_overlap_p (ep, 6, s1p, size));			\
741     ASSERT (! refmpn_overlap_p (ep, 6, s2p, size));			\
742     ASSERT (! refmpn_overlap_p (ep, 6, y1p, size));			\
743     ASSERT (! refmpn_overlap_p (ep, 6, y2p, size));			\
744     ASSERT (! refmpn_overlap_p (ep, 6, y3p, size));			\
745     ASSERT (! refmpn_overlap_p (ep, 6, rp, size));			\
746 									\
747     ASSERT (carry == 0 || carry == 1);					\
748     ASSERT (size >= 1);							\
749     ASSERT_MPN (s1p, size);						\
750     ASSERT_MPN (s2p, size);						\
751     ASSERT_MPN (y1p, size);						\
752     ASSERT_MPN (y2p, size);						\
753     ASSERT_MPN (y3p, size);						\
754 									\
755     ep[0] = ep[1] = CNST_LIMB(0);					\
756     ep[2] = ep[3] = CNST_LIMB(0);					\
757     ep[4] = ep[5] = CNST_LIMB(0);					\
758 									\
759     for (i = 0; i < size; i++)                                          \
760       {									\
761 	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
762 	if (carry == 1)							\
763 	  {								\
764 	    carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);	\
765 	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
766 	    ASSERT (carry2 == 0);					\
767 	    carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);	\
768 	    carry2 = ref_addc_limb (&ep[3], ep[3], carry2);		\
769 	    ASSERT (carry2 == 0);					\
770 	    carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]);	\
771 	    carry2 = ref_addc_limb (&ep[5], ep[5], carry2);		\
772 	    ASSERT (carry2 == 0);					\
773 	  }								\
774       }									\
775     return carry;                                                       \
776   }
777 
778 mp_limb_t
779 refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
780 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
781 		   mp_size_t size, mp_limb_t carry)
782 {
783   AORS_ERR3_N (adc);
784 }
785 mp_limb_t
786 refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
787 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
788 		   mp_size_t size, mp_limb_t carry)
789 {
790   AORS_ERR3_N (sbb);
791 }
792 
793 
794 mp_limb_t
795 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
796 		 mp_size_t n, unsigned int s)
797 {
798   mp_limb_t cy;
799   mp_ptr tp;
800 
801   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
802   ASSERT (n >= 1);
803   ASSERT (0 < s && s < GMP_NUMB_BITS);
804   ASSERT_MPN (up, n);
805   ASSERT_MPN (vp, n);
806 
807   tp = refmpn_malloc_limbs (n);
808   cy  = refmpn_lshift (tp, vp, n, s);
809   cy += refmpn_add_n (rp, up, tp, n);
810   free (tp);
811   return cy;
812 }
813 mp_limb_t
814 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
815 {
816   return refmpn_addlsh_n (rp, up, vp, n, 1);
817 }
818 mp_limb_t
819 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
820 {
821   return refmpn_addlsh_n (rp, up, vp, n, 2);
822 }
823 mp_limb_t
824 refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
825 {
826   return refmpn_addlsh_n (rp, rp, vp, n, s);
827 }
828 mp_limb_t
829 refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
830 {
831   return refmpn_addlsh_n (rp, rp, vp, n, 1);
832 }
833 mp_limb_t
834 refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
835 {
836   return refmpn_addlsh_n (rp, rp, vp, n, 2);
837 }
838 mp_limb_t
839 refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
840 {
841   return refmpn_addlsh_n (rp, vp, rp, n, s);
842 }
843 mp_limb_t
844 refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
845 {
846   return refmpn_addlsh_n (rp, vp, rp, n, 1);
847 }
848 mp_limb_t
849 refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
850 {
851   return refmpn_addlsh_n (rp, vp, rp, n, 2);
852 }
853 mp_limb_t
854 refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
855 		 mp_size_t n, unsigned int s, mp_limb_t carry)
856 {
857   mp_limb_t cy;
858 
859   ASSERT (carry >= 0 && carry <= (CNST_LIMB(1) << s));
860 
861   cy = refmpn_addlsh_n (rp, up, vp, n, s);
862   cy += refmpn_add_1 (rp, rp, n, carry);
863   return cy;
864 }
865 mp_limb_t
866 refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
867 {
868   return refmpn_addlsh_nc (rp, up, vp, n, 1, carry);
869 }
870 mp_limb_t
871 refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
872 {
873   return refmpn_addlsh_nc (rp, up, vp, n, 2, carry);
874 }
875 
876 mp_limb_t
877 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
878 		 mp_size_t n, unsigned int s)
879 {
880   mp_limb_t cy;
881   mp_ptr tp;
882 
883   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
884   ASSERT (n >= 1);
885   ASSERT (0 < s && s < GMP_NUMB_BITS);
886   ASSERT_MPN (up, n);
887   ASSERT_MPN (vp, n);
888 
889   tp = refmpn_malloc_limbs (n);
890   cy  = mpn_lshift (tp, vp, n, s);
891   cy += mpn_sub_n (rp, up, tp, n);
892   free (tp);
893   return cy;
894 }
895 mp_limb_t
896 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
897 {
898   return refmpn_sublsh_n (rp, up, vp, n, 1);
899 }
900 mp_limb_t
901 refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
902 {
903   return refmpn_sublsh_n (rp, up, vp, n, 2);
904 }
905 mp_limb_t
906 refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
907 {
908   return refmpn_sublsh_n (rp, rp, vp, n, s);
909 }
910 mp_limb_t
911 refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
912 {
913   return refmpn_sublsh_n (rp, rp, vp, n, 1);
914 }
915 mp_limb_t
916 refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
917 {
918   return refmpn_sublsh_n (rp, rp, vp, n, 2);
919 }
920 mp_limb_t
921 refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
922 {
923   return refmpn_sublsh_n (rp, vp, rp, n, s);
924 }
925 mp_limb_t
926 refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
927 {
928   return refmpn_sublsh_n (rp, vp, rp, n, 1);
929 }
930 mp_limb_t
931 refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
932 {
933   return refmpn_sublsh_n (rp, vp, rp, n, 2);
934 }
935 mp_limb_t
936 refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
937 		 mp_size_t n, unsigned int s, mp_limb_t carry)
938 {
939   mp_limb_t cy;
940 
941   ASSERT (carry >= 0 && carry <= (CNST_LIMB(1) << s));
942 
943   cy = refmpn_sublsh_n (rp, up, vp, n, s);
944   cy += refmpn_sub_1 (rp, rp, n, carry);
945   return cy;
946 }
947 mp_limb_t
948 refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
949 {
950   return refmpn_sublsh_nc (rp, up, vp, n, 1, carry);
951 }
952 mp_limb_t
953 refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
954 {
955   return refmpn_sublsh_nc (rp, up, vp, n, 2, carry);
956 }
957 
958 mp_limb_signed_t
959 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
960 		 mp_size_t n, unsigned int s)
961 {
962   mp_limb_signed_t cy;
963   mp_ptr tp;
964 
965   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
966   ASSERT (n >= 1);
967   ASSERT (0 < s && s < GMP_NUMB_BITS);
968   ASSERT_MPN (up, n);
969   ASSERT_MPN (vp, n);
970 
971   tp = refmpn_malloc_limbs (n);
972   cy  = mpn_lshift (tp, vp, n, s);
973   cy -= mpn_sub_n (rp, tp, up, n);
974   free (tp);
975   return cy;
976 }
977 mp_limb_signed_t
978 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
979 {
980   return refmpn_rsblsh_n (rp, up, vp, n, 1);
981 }
982 mp_limb_signed_t
983 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
984 {
985   return refmpn_rsblsh_n (rp, up, vp, n, 2);
986 }
987 mp_limb_signed_t
988 refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
989 		 mp_size_t n, unsigned int s, mp_limb_signed_t carry)
990 {
991   mp_limb_signed_t cy;
992 
993   ASSERT (carry == -1 || (carry >> s) == 0);
994 
995   cy = refmpn_rsblsh_n (rp, up, vp, n, s);
996   if (carry > 0)
997     cy += refmpn_add_1 (rp, rp, n, carry);
998   else
999     cy -= refmpn_sub_1 (rp, rp, n, -carry);
1000   return cy;
1001 }
1002 mp_limb_signed_t
1003 refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1004 {
1005   return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry);
1006 }
1007 mp_limb_signed_t
1008 refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1009 {
1010   return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry);
1011 }
1012 
1013 mp_limb_t
1014 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1015 {
1016   mp_limb_t cya, cys;
1017 
1018   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1019   ASSERT (n >= 1);
1020   ASSERT_MPN (up, n);
1021   ASSERT_MPN (vp, n);
1022 
1023   cya = mpn_add_n (rp, up, vp, n);
1024   cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1025   rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1026   return cys;
1027 }
1028 mp_limb_t
1029 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1030 {
1031   mp_limb_t cya, cys;
1032 
1033   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1034   ASSERT (n >= 1);
1035   ASSERT_MPN (up, n);
1036   ASSERT_MPN (vp, n);
1037 
1038   cya = mpn_sub_n (rp, up, vp, n);
1039   cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1040   rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1041   return cys;
1042 }
1043 
1044 /* Twos complement, return borrow. */
1045 mp_limb_t
1046 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
1047 {
1048   mp_ptr     zeros;
1049   mp_limb_t  ret;
1050 
1051   ASSERT (size >= 1);
1052 
1053   zeros = refmpn_malloc_limbs (size);
1054   refmpn_fill (zeros, size, CNST_LIMB(0));
1055   ret = refmpn_sub_n (dst, zeros, src, size);
1056   free (zeros);
1057   return ret;
1058 }
1059 
1060 
1061 #define AORS(aors_n, aors_1)                                    \
1062   {                                                             \
1063     mp_limb_t  c;                                               \
1064     ASSERT (s1size >= s2size);                                  \
1065     ASSERT (s2size >= 1);                                       \
1066     c = aors_n (rp, s1p, s2p, s2size);                          \
1067     if (s1size-s2size != 0)                                     \
1068       c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \
1069     return c;                                                   \
1070   }
1071 mp_limb_t
1072 refmpn_add (mp_ptr rp,
1073 	    mp_srcptr s1p, mp_size_t s1size,
1074 	    mp_srcptr s2p, mp_size_t s2size)
1075 {
1076   AORS (refmpn_add_n, refmpn_add_1);
1077 }
1078 mp_limb_t
1079 refmpn_sub (mp_ptr rp,
1080 	    mp_srcptr s1p, mp_size_t s1size,
1081 	    mp_srcptr s2p, mp_size_t s2size)
1082 {
1083   AORS (refmpn_sub_n, refmpn_sub_1);
1084 }
1085 
1086 
1087 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
1088 #define SHIFTLOW(x)  ((x) >> GMP_LIMB_BITS/2)
1089 
1090 #define LOWMASK   (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
1091 #define HIGHMASK  SHIFTHIGH(LOWMASK)
1092 
1093 #define LOWPART(x)   ((x) & LOWMASK)
1094 #define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)
1095 
1096 /* Set return:*lo to x*y, using full limbs not nails. */
1097 mp_limb_t
1098 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
1099 {
1100   mp_limb_t  hi, s;
1101 
1102   *lo = LOWPART(x) * LOWPART(y);
1103   hi = HIGHPART(x) * HIGHPART(y);
1104 
1105   s = LOWPART(x) * HIGHPART(y);
1106   hi += HIGHPART(s);
1107   s = SHIFTHIGH(LOWPART(s));
1108   *lo += s;
1109   hi += (*lo < s);
1110 
1111   s = HIGHPART(x) * LOWPART(y);
1112   hi += HIGHPART(s);
1113   s = SHIFTHIGH(LOWPART(s));
1114   *lo += s;
1115   hi += (*lo < s);
1116 
1117   return hi;
1118 }
1119 
1120 mp_limb_t
1121 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
1122 {
1123   return refmpn_umul_ppmm (lo, x, y);
1124 }
1125 
1126 mp_limb_t
1127 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
1128 	       mp_limb_t carry)
1129 {
1130   mp_size_t  i;
1131   mp_limb_t  hi, lo;
1132 
1133   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1134   ASSERT (size >= 1);
1135   ASSERT_MPN (sp, size);
1136   ASSERT_LIMB (multiplier);
1137   ASSERT_LIMB (carry);
1138 
1139   multiplier <<= GMP_NAIL_BITS;
1140   for (i = 0; i < size; i++)
1141     {
1142       hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
1143       lo >>= GMP_NAIL_BITS;
1144       ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
1145       rp[i] = lo;
1146       carry = hi;
1147     }
1148   return carry;
1149 }
1150 
1151 mp_limb_t
1152 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1153 {
1154   return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1155 }
1156 
1157 
1158 mp_limb_t
1159 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1160 	      mp_srcptr mult, mp_size_t msize)
1161 {
1162   mp_ptr     src_copy;
1163   mp_limb_t  ret;
1164   mp_size_t  i;
1165 
1166   ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
1167   ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1168   ASSERT (size >= msize);
1169   ASSERT_MPN (mult, msize);
1170 
1171   /* in case dst==src */
1172   src_copy = refmpn_malloc_limbs (size);
1173   refmpn_copyi (src_copy, src, size);
1174   src = src_copy;
1175 
1176   dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
1177   for (i = 1; i < msize-1; i++)
1178     dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1179   ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1180 
1181   free (src_copy);
1182   return ret;
1183 }
1184 
1185 mp_limb_t
1186 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1187 {
1188   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
1189 }
1190 mp_limb_t
1191 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1192 {
1193   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
1194 }
1195 mp_limb_t
1196 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1197 {
1198   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
1199 }
1200 mp_limb_t
1201 refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1202 {
1203   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
1204 }
1205 mp_limb_t
1206 refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1207 {
1208   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
1209 }
1210 
1211 #define AORSMUL_1C(operation_n)                                 \
1212   {                                                             \
1213     mp_ptr     p;                                               \
1214     mp_limb_t  ret;                                             \
1215 								\
1216     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \
1217 								\
1218     p = refmpn_malloc_limbs (size);                             \
1219     ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \
1220     ret += operation_n (rp, rp, p, size);                       \
1221 								\
1222     free (p);                                                   \
1223     return ret;                                                 \
1224   }
1225 
1226 mp_limb_t
1227 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1228 		  mp_limb_t multiplier, mp_limb_t carry)
1229 {
1230   AORSMUL_1C (refmpn_add_n);
1231 }
1232 mp_limb_t
1233 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1234 		  mp_limb_t multiplier, mp_limb_t carry)
1235 {
1236   AORSMUL_1C (refmpn_sub_n);
1237 }
1238 
1239 
1240 mp_limb_t
1241 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1242 {
1243   return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1244 }
1245 mp_limb_t
1246 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1247 {
1248   return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1249 }
1250 
1251 
1252 mp_limb_t
1253 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1254 		 mp_srcptr mult, mp_size_t msize)
1255 {
1256   mp_ptr     src_copy;
1257   mp_limb_t  ret;
1258   mp_size_t  i;
1259 
1260   ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
1261   ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1262   ASSERT (size >= msize);
1263   ASSERT_MPN (mult, msize);
1264 
1265   /* in case dst==src */
1266   src_copy = refmpn_malloc_limbs (size);
1267   refmpn_copyi (src_copy, src, size);
1268   src = src_copy;
1269 
1270   for (i = 0; i < msize-1; i++)
1271     dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1272   ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1273 
1274   free (src_copy);
1275   return ret;
1276 }
1277 
1278 mp_limb_t
1279 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1280 {
1281   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
1282 }
1283 mp_limb_t
1284 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1285 {
1286   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
1287 }
1288 mp_limb_t
1289 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1290 {
1291   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
1292 }
1293 mp_limb_t
1294 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1295 {
1296   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
1297 }
1298 mp_limb_t
1299 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1300 {
1301   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
1302 }
1303 mp_limb_t
1304 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1305 {
1306   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
1307 }
1308 mp_limb_t
1309 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1310 {
1311   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
1312 }
1313 
1314 mp_limb_t
1315 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
1316 		  mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
1317 		  mp_limb_t carry)
1318 {
1319   mp_ptr p;
1320   mp_limb_t acy, scy;
1321 
1322   /* Destinations can't overlap. */
1323   ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
1324   ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
1325   ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
1326   ASSERT (size >= 1);
1327 
1328   /* in case r1p==s1p or r1p==s2p */
1329   p = refmpn_malloc_limbs (size);
1330 
1331   acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
1332   scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
1333   refmpn_copyi (r1p, p, size);
1334 
1335   free (p);
1336   return 2 * acy + scy;
1337 }
1338 
1339 mp_limb_t
1340 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
1341 		 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1342 {
1343   return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
1344 }
1345 
1346 
1347 /* Right shift hi,lo and return the low limb of the result.
1348    Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1349 mp_limb_t
1350 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1351 {
1352   ASSERT (shift < GMP_NUMB_BITS);
1353   if (shift == 0)
1354     return lo;
1355   else
1356     return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
1357 }
1358 
1359 /* Left shift hi,lo and return the high limb of the result.
1360    Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1361 mp_limb_t
1362 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1363 {
1364   ASSERT (shift < GMP_NUMB_BITS);
1365   if (shift == 0)
1366     return hi;
1367   else
1368     return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
1369 }
1370 
1371 
1372 mp_limb_t
1373 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1374 {
1375   mp_limb_t  ret;
1376   mp_size_t  i;
1377 
1378   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1379   ASSERT (size >= 1);
1380   ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1381   ASSERT_MPN (sp, size);
1382 
1383   ret = rshift_make (sp[0], CNST_LIMB(0), shift);
1384 
1385   for (i = 0; i < size-1; i++)
1386     rp[i] = rshift_make (sp[i+1], sp[i], shift);
1387 
1388   rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
1389   return ret;
1390 }
1391 
1392 mp_limb_t
1393 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1394 {
1395   mp_limb_t  ret;
1396   mp_size_t  i;
1397 
1398   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1399   ASSERT (size >= 1);
1400   ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1401   ASSERT_MPN (sp, size);
1402 
1403   ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
1404 
1405   for (i = size-2; i >= 0; i--)
1406     rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
1407 
1408   rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
1409   return ret;
1410 }
1411 
1412 void
1413 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1414 {
1415   mp_size_t i;
1416 
1417   /* We work downwards since mpn_lshiftc needs that. */
1418   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1419 
1420   for (i = size - 1; i >= 0; i--)
1421     rp[i] = (~sp[i]) & GMP_NUMB_MASK;
1422 }
1423 
1424 mp_limb_t
1425 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1426 {
1427   mp_limb_t res;
1428 
1429   /* No asserts here, refmpn_lshift will assert what we need. */
1430 
1431   res = refmpn_lshift (rp, sp, size, shift);
1432   refmpn_com (rp, rp, size);
1433   return res;
1434 }
1435 
1436 /* accepting shift==0 and doing a plain copyi or copyd in that case */
1437 mp_limb_t
1438 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1439 {
1440   if (shift == 0)
1441     {
1442       refmpn_copyi (rp, sp, size);
1443       return 0;
1444     }
1445   else
1446     {
1447       return refmpn_rshift (rp, sp, size, shift);
1448     }
1449 }
1450 mp_limb_t
1451 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1452 {
1453   if (shift == 0)
1454     {
1455       refmpn_copyd (rp, sp, size);
1456       return 0;
1457     }
1458   else
1459     {
1460       return refmpn_lshift (rp, sp, size, shift);
1461     }
1462 }
1463 
1464 /* accepting size==0 too */
1465 mp_limb_t
1466 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1467 			   unsigned shift)
1468 {
1469   return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
1470 }
1471 mp_limb_t
1472 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1473 			   unsigned shift)
1474 {
1475   return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
1476 }
1477 
1478 /* Divide h,l by d, return quotient, store remainder to *rp.
1479    Operates on full limbs, not nails.
1480    Must have h < d.
1481    __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
1482 mp_limb_t
1483 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
1484 {
1485   mp_limb_t  q, r;
1486   int  n;
1487 
1488   ASSERT (d != 0);
1489   ASSERT (h < d);
1490 
1491 #if 0
1492   udiv_qrnnd (q, r, h, l, d);
1493   *rp = r;
1494   return q;
1495 #endif
1496 
1497   n = refmpn_count_leading_zeros (d);
1498   d <<= n;
1499 
1500   if (n != 0)
1501     {
1502       h = (h << n) | (l >> (GMP_LIMB_BITS - n));
1503       l <<= n;
1504     }
1505 
1506   __udiv_qrnnd_c (q, r, h, l, d);
1507   r >>= n;
1508   *rp = r;
1509   return q;
1510 }
1511 
1512 mp_limb_t
1513 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
1514 {
1515   return refmpn_udiv_qrnnd (rp, h, l, d);
1516 }
1517 
1518 /* This little subroutine avoids some bad code generation from i386 gcc 3.0
1519    -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized).  */
1520 static mp_limb_t
1521 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1522 			     mp_limb_t divisor, mp_limb_t carry)
1523 {
1524   mp_size_t  i;
1525   mp_limb_t rem[1];
1526   for (i = size-1; i >= 0; i--)
1527     {
1528       rp[i] = refmpn_udiv_qrnnd (rem, carry,
1529 				 sp[i] << GMP_NAIL_BITS,
1530 				 divisor << GMP_NAIL_BITS);
1531       carry = *rem >> GMP_NAIL_BITS;
1532     }
1533   return carry;
1534 }
1535 
1536 mp_limb_t
1537 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1538 		  mp_limb_t divisor, mp_limb_t carry)
1539 {
1540   mp_ptr     sp_orig;
1541   mp_ptr     prod;
1542   mp_limb_t  carry_orig;
1543 
1544   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1545   ASSERT (size >= 0);
1546   ASSERT (carry < divisor);
1547   ASSERT_MPN (sp, size);
1548   ASSERT_LIMB (divisor);
1549   ASSERT_LIMB (carry);
1550 
1551   if (size == 0)
1552     return carry;
1553 
1554   sp_orig = refmpn_memdup_limbs (sp, size);
1555   prod = refmpn_malloc_limbs (size);
1556   carry_orig = carry;
1557 
1558   carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
1559 
1560   /* check by multiplying back */
1561 #if 0
1562   printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
1563 	  size, divisor, carry_orig, carry);
1564   mpn_trace("s",sp_copy,size);
1565   mpn_trace("r",rp,size);
1566   printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
1567   mpn_trace("p",prod,size);
1568 #endif
1569   ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
1570   ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
1571   free (sp_orig);
1572   free (prod);
1573 
1574   return carry;
1575 }
1576 
1577 mp_limb_t
1578 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1579 {
1580   return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
1581 }
1582 
1583 
1584 mp_limb_t
1585 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1586 	       mp_limb_t carry)
1587 {
1588   mp_ptr  p = refmpn_malloc_limbs (size);
1589   carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
1590   free (p);
1591   return carry;
1592 }
1593 
1594 mp_limb_t
1595 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1596 {
1597   return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
1598 }
1599 
1600 mp_limb_t
1601 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1602 		     mp_limb_t inverse)
1603 {
1604   ASSERT (divisor & GMP_NUMB_HIGHBIT);
1605   ASSERT (inverse == refmpn_invert_limb (divisor));
1606   return refmpn_mod_1 (sp, size, divisor);
1607 }
1608 
1609 /* This implementation will be rather slow, but has the advantage of being
1610    in a different style than the libgmp versions.  */
1611 mp_limb_t
1612 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
1613 {
1614   ASSERT ((GMP_NUMB_BITS % 4) == 0);
1615   return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
1616 }
1617 
1618 
1619 mp_limb_t
1620 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
1621 		  mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1622 		  mp_limb_t carry)
1623 {
1624   mp_ptr  z;
1625 
1626   z = refmpn_malloc_limbs (xsize);
1627   refmpn_fill (z, xsize, CNST_LIMB(0));
1628 
1629   carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
1630   carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
1631 
1632   free (z);
1633   return carry;
1634 }
1635 
1636 mp_limb_t
1637 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
1638 		 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1639 {
1640   return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
1641 }
1642 
1643 mp_limb_t
1644 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
1645 			mp_srcptr sp, mp_size_t size,
1646 			mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
1647 {
1648   ASSERT (size >= 0);
1649   ASSERT (shift == refmpn_count_leading_zeros (divisor));
1650   ASSERT (inverse == refmpn_invert_limb (divisor << shift));
1651 
1652   return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
1653 }
1654 
1655 mp_limb_t
1656 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
1657 		 mp_ptr np, mp_size_t nn,
1658 		 mp_srcptr dp)
1659 {
1660   mp_ptr tp;
1661   mp_limb_t qh;
1662 
1663   tp = refmpn_malloc_limbs (nn + qxn);
1664   refmpn_zero (tp, qxn);
1665   refmpn_copyi (tp + qxn, np, nn);
1666   qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
1667   refmpn_copyi (np, tp, 2);
1668   free (tp);
1669   return qh;
1670 }
1671 
1672 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
1673    paper, figure 8.1 m', where b=2^GMP_LIMB_BITS.  Note that -d-1 < d
1674    since d has the high bit set. */
1675 
1676 mp_limb_t
1677 refmpn_invert_limb (mp_limb_t d)
1678 {
1679   mp_limb_t r;
1680   ASSERT (d & GMP_LIMB_HIGHBIT);
1681   return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
1682 }
1683 
1684 void
1685 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1686 {
1687   mp_ptr qp, tp;
1688   TMP_DECL;
1689   TMP_MARK;
1690 
1691   tp = TMP_ALLOC_LIMBS (2 * n);
1692   qp = TMP_ALLOC_LIMBS (n + 1);
1693 
1694   MPN_ZERO (tp, 2 * n);  mpn_sub_1 (tp, tp, 2 * n, 1);
1695 
1696   refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
1697   refmpn_copyi (rp, qp, n);
1698 
1699   TMP_FREE;
1700 }
1701 
1702 void
1703 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1704 {
1705   mp_ptr tp;
1706   mp_limb_t binv;
1707   TMP_DECL;
1708   TMP_MARK;
1709 
1710   /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
1711      code.  To make up for it, we check that the inverse is correct using a
1712      multiply.  */
1713 
1714   tp = TMP_ALLOC_LIMBS (2 * n);
1715 
1716   MPN_ZERO (tp, n);
1717   tp[0] = 1;
1718   binvert_limb (binv, up[0]);
1719   mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
1720 
1721   refmpn_mul_n (tp, rp, up, n);
1722   ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
1723 
1724   TMP_FREE;
1725 }
1726 
1727 /* The aim is to produce a dst quotient and return a remainder c, satisfying
1728    c*b^n + src-i == 3*dst, where i is the incoming carry.
1729 
1730    Some value c==0, c==1 or c==2 will satisfy, so just try each.
1731 
1732    If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
1733    remainder from the first division attempt determines the correct
1734    remainder (3-c), but don't bother with that, since we can't guarantee
1735    anything about GMP_NUMB_BITS when using nails.
1736 
1737    If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
1738    complement negative, ie. b^n+a-i, and the calculation produces c1
1739    satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1.  This
1740    means it's enough to just add any borrow back at the end.
1741 
1742    A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
1743    mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
1744    or 1 respectively, so with 1 added the final return value is still in the
1745    prescribed range 0 to 2. */
1746 
1747 mp_limb_t
1748 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
1749 {
1750   mp_ptr     spcopy;
1751   mp_limb_t  c, cs;
1752 
1753   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1754   ASSERT (size >= 1);
1755   ASSERT (carry <= 2);
1756   ASSERT_MPN (sp, size);
1757 
1758   spcopy = refmpn_malloc_limbs (size);
1759   cs = refmpn_sub_1 (spcopy, sp, size, carry);
1760 
1761   for (c = 0; c <= 2; c++)
1762     if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
1763       goto done;
1764   ASSERT_FAIL (no value of c satisfies);
1765 
1766  done:
1767   c += cs;
1768   ASSERT (c <= 2);
1769 
1770   free (spcopy);
1771   return c;
1772 }
1773 
1774 mp_limb_t
1775 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1776 {
1777   return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
1778 }
1779 
1780 
1781 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
1782 void
1783 refmpn_mul_basecase (mp_ptr prodp,
1784 		     mp_srcptr up, mp_size_t usize,
1785 		     mp_srcptr vp, mp_size_t vsize)
1786 {
1787   mp_size_t i;
1788 
1789   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1790   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1791   ASSERT (usize >= vsize);
1792   ASSERT (vsize >= 1);
1793   ASSERT_MPN (up, usize);
1794   ASSERT_MPN (vp, vsize);
1795 
1796   prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
1797   for (i = 1; i < vsize; i++)
1798     prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
1799 }
1800 
1801 
1802 /* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */
1803 void
1804 refmpn_mulmid_basecase (mp_ptr rp,
1805 			mp_srcptr up, mp_size_t un,
1806 			mp_srcptr vp, mp_size_t vn)
1807 {
1808   mp_limb_t cy;
1809   mp_size_t i;
1810 
1811   ASSERT (un >= vn);
1812   ASSERT (vn >= 1);
1813   ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un));
1814   ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn));
1815   ASSERT_MPN (up, un);
1816   ASSERT_MPN (vp, vn);
1817 
1818   rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]);
1819   rp[un - vn + 2] = CNST_LIMB (0);
1820   for (i = 1; i < vn; i++)
1821     {
1822       cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]);
1823       cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy);
1824       cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy);
1825       ASSERT (cy == 0);
1826     }
1827 }
1828 
1829 void
1830 refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,
1831 		      mp_ptr scratch)
1832 {
1833   refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1834 }
1835 
1836 void
1837 refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1838 {
1839   /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1840      off products near the middle product region boundary */
1841   refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1842 }
1843 
1844 void
1845 refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un,
1846 	       mp_srcptr vp, mp_size_t vn)
1847 {
1848   /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1849      off products near the middle product region boundary */
1850   refmpn_mulmid_basecase (rp, up, un, vp, vn);
1851 }
1852 
1853 
1854 
1855 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
1856 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
1857 #define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD))
1858 #if WANT_FFT
1859 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
1860 #else
1861 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
1862 #endif
1863 
1864 void
1865 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
1866 {
1867   mp_ptr tp;
1868   mp_size_t tn;
1869 
1870   if (vn < TOOM3_THRESHOLD)
1871     {
1872       /* In the mpn_mul_basecase and toom2 range, use our own mul_basecase.  */
1873       if (vn != 0)
1874 	refmpn_mul_basecase (wp, up, un, vp, vn);
1875       else
1876 	MPN_ZERO (wp, un);
1877       return;
1878     }
1879 
1880   if (vn < TOOM4_THRESHOLD)
1881     {
1882       /* In the toom3 range, use mpn_toom22_mul.  */
1883       tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
1884       tp = refmpn_malloc_limbs (tn);
1885       mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1886     }
1887   else if (vn < TOOM6_THRESHOLD)
1888     {
1889       /* In the toom4 range, use mpn_toom33_mul.  */
1890       tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
1891       tp = refmpn_malloc_limbs (tn);
1892       mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1893     }
1894   else if (vn < FFT_THRESHOLD)
1895     {
1896       /* In the toom6 range, use mpn_toom44_mul.  */
1897       tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
1898       tp = refmpn_malloc_limbs (tn);
1899       mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1900     }
1901   else
1902     {
1903       /* Finally, for the largest operands, use mpn_toom6h_mul.  */
1904       tn = 2 * vn + mpn_toom6h_mul_itch (vn, vn);
1905       tp = refmpn_malloc_limbs (tn);
1906       mpn_toom6h_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1907     }
1908 
1909   if (un != vn)
1910     {
1911       if (un - vn < vn)
1912 	refmpn_mul (wp + vn, vp, vn, up + vn, un - vn);
1913       else
1914 	refmpn_mul (wp + vn, up + vn, un - vn, vp, vn);
1915 
1916       MPN_COPY (wp, tp, vn);
1917       ASSERT_NOCARRY (refmpn_add (wp + vn, wp + vn, un, tp + vn, vn));
1918     }
1919   else
1920     {
1921       MPN_COPY (wp, tp, 2 * vn);
1922     }
1923 
1924   free (tp);
1925 }
1926 
1927 void
1928 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1929 {
1930   refmpn_mul (prodp, up, size, vp, size);
1931 }
1932 
1933 void
1934 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1935 {
1936   mp_ptr tp = refmpn_malloc_limbs (2*size);
1937   refmpn_mul (tp, up, size, vp, size);
1938   refmpn_copyi (prodp, tp, size);
1939   free (tp);
1940 }
1941 
1942 void
1943 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
1944 {
1945   refmpn_mul (dst, src, size, src, size);
1946 }
1947 
1948 /* Allowing usize<vsize, usize==0 or vsize==0. */
1949 void
1950 refmpn_mul_any (mp_ptr prodp,
1951 		     mp_srcptr up, mp_size_t usize,
1952 		     mp_srcptr vp, mp_size_t vsize)
1953 {
1954   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1955   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1956   ASSERT (usize >= 0);
1957   ASSERT (vsize >= 0);
1958   ASSERT_MPN (up, usize);
1959   ASSERT_MPN (vp, vsize);
1960 
1961   if (usize == 0)
1962     {
1963       refmpn_fill (prodp, vsize, CNST_LIMB(0));
1964       return;
1965     }
1966 
1967   if (vsize == 0)
1968     {
1969       refmpn_fill (prodp, usize, CNST_LIMB(0));
1970       return;
1971     }
1972 
1973   if (usize >= vsize)
1974     refmpn_mul (prodp, up, usize, vp, vsize);
1975   else
1976     refmpn_mul (prodp, vp, vsize, up, usize);
1977 }
1978 
1979 
1980 mp_limb_t
1981 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
1982 {
1983   mp_limb_t  x;
1984   int  twos;
1985 
1986   ASSERT (y != 0);
1987   ASSERT (! refmpn_zero_p (xp, xsize));
1988   ASSERT_MPN (xp, xsize);
1989   ASSERT_LIMB (y);
1990 
1991   x = refmpn_mod_1 (xp, xsize, y);
1992   if (x == 0)
1993     return y;
1994 
1995   twos = 0;
1996   while ((x & 1) == 0 && (y & 1) == 0)
1997     {
1998       x >>= 1;
1999       y >>= 1;
2000       twos++;
2001     }
2002 
2003   for (;;)
2004     {
2005       while ((x & 1) == 0)  x >>= 1;
2006       while ((y & 1) == 0)  y >>= 1;
2007 
2008       if (x < y)
2009 	MP_LIMB_T_SWAP (x, y);
2010 
2011       x -= y;
2012       if (x == 0)
2013 	break;
2014     }
2015 
2016   return y << twos;
2017 }
2018 
2019 
2020 /* Based on the full limb x, not nails. */
2021 unsigned
2022 refmpn_count_leading_zeros (mp_limb_t x)
2023 {
2024   unsigned  n = 0;
2025 
2026   ASSERT (x != 0);
2027 
2028   while ((x & GMP_LIMB_HIGHBIT) == 0)
2029     {
2030       x <<= 1;
2031       n++;
2032     }
2033   return n;
2034 }
2035 
2036 /* Full limbs allowed, not limited to nails. */
2037 unsigned
2038 refmpn_count_trailing_zeros (mp_limb_t x)
2039 {
2040   unsigned  n = 0;
2041 
2042   ASSERT (x != 0);
2043   ASSERT_LIMB (x);
2044 
2045   while ((x & 1) == 0)
2046     {
2047       x >>= 1;
2048       n++;
2049     }
2050   return n;
2051 }
2052 
2053 /* Strip factors of two (low zero bits) from {p,size} by right shifting.
2054    The return value is the number of twos stripped.  */
2055 mp_size_t
2056 refmpn_strip_twos (mp_ptr p, mp_size_t size)
2057 {
2058   mp_size_t  limbs;
2059   unsigned   shift;
2060 
2061   ASSERT (size >= 1);
2062   ASSERT (! refmpn_zero_p (p, size));
2063   ASSERT_MPN (p, size);
2064 
2065   for (limbs = 0; p[0] == 0; limbs++)
2066     {
2067       refmpn_copyi (p, p+1, size-1);
2068       p[size-1] = 0;
2069     }
2070 
2071   shift = refmpn_count_trailing_zeros (p[0]);
2072   if (shift)
2073     refmpn_rshift (p, p, size, shift);
2074 
2075   return limbs*GMP_NUMB_BITS + shift;
2076 }
2077 
2078 mp_limb_t
2079 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
2080 {
2081   int       cmp;
2082 
2083   ASSERT (ysize >= 1);
2084   ASSERT (xsize >= ysize);
2085   ASSERT ((xp[0] & 1) != 0);
2086   ASSERT ((yp[0] & 1) != 0);
2087   /* ASSERT (xp[xsize-1] != 0); */  /* don't think x needs to be odd */
2088   ASSERT (yp[ysize-1] != 0);
2089   ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
2090   ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
2091   ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
2092   if (xsize == ysize)
2093     ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
2094   ASSERT_MPN (xp, xsize);
2095   ASSERT_MPN (yp, ysize);
2096 
2097   refmpn_strip_twos (xp, xsize);
2098   MPN_NORMALIZE (xp, xsize);
2099   MPN_NORMALIZE (yp, ysize);
2100 
2101   for (;;)
2102     {
2103       cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
2104       if (cmp == 0)
2105 	break;
2106       if (cmp < 0)
2107 	MPN_PTR_SWAP (xp,xsize, yp,ysize);
2108 
2109       ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
2110 
2111       refmpn_strip_twos (xp, xsize);
2112       MPN_NORMALIZE (xp, xsize);
2113     }
2114 
2115   refmpn_copyi (gp, xp, xsize);
2116   return xsize;
2117 }
2118 
2119 unsigned long
2120 ref_popc_limb (mp_limb_t src)
2121 {
2122   unsigned long  count;
2123   int  i;
2124 
2125   count = 0;
2126   for (i = 0; i < GMP_LIMB_BITS; i++)
2127     {
2128       count += (src & 1);
2129       src >>= 1;
2130     }
2131   return count;
2132 }
2133 
2134 unsigned long
2135 refmpn_popcount (mp_srcptr sp, mp_size_t size)
2136 {
2137   unsigned long  count = 0;
2138   mp_size_t  i;
2139 
2140   ASSERT (size >= 0);
2141   ASSERT_MPN (sp, size);
2142 
2143   for (i = 0; i < size; i++)
2144     count += ref_popc_limb (sp[i]);
2145   return count;
2146 }
2147 
2148 unsigned long
2149 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
2150 {
2151   mp_ptr  d;
2152   unsigned long  count;
2153 
2154   ASSERT (size >= 0);
2155   ASSERT_MPN (s1p, size);
2156   ASSERT_MPN (s2p, size);
2157 
2158   if (size == 0)
2159     return 0;
2160 
2161   d = refmpn_malloc_limbs (size);
2162   refmpn_xor_n (d, s1p, s2p, size);
2163   count = refmpn_popcount (d, size);
2164   free (d);
2165   return count;
2166 }
2167 
2168 
2169 /* set r to a%d */
2170 void
2171 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
2172 {
2173   mp_limb_t  D[2];
2174   int        n;
2175 
2176   ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
2177   ASSERT_MPN (a, 2);
2178   ASSERT_MPN (d, 2);
2179 
2180   D[1] = d[1], D[0] = d[0];
2181   r[1] = a[1], r[0] = a[0];
2182   n = 0;
2183 
2184   for (;;)
2185     {
2186       if (D[1] & GMP_NUMB_HIGHBIT)
2187 	break;
2188       if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
2189 	break;
2190       refmpn_lshift (D, D, (mp_size_t) 2, 1);
2191       n++;
2192       ASSERT (n <= GMP_NUMB_BITS);
2193     }
2194 
2195   while (n >= 0)
2196     {
2197       if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
2198 	ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
2199       refmpn_rshift (D, D, (mp_size_t) 2, 1);
2200       n--;
2201     }
2202 
2203   ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
2204 }
2205 
2206 
2207 
2208 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2209    particular the trial quotient is allowed to be 2 too big. */
2210 mp_limb_t
2211 refmpn_sb_div_qr (mp_ptr qp,
2212 		  mp_ptr np, mp_size_t nsize,
2213 		  mp_srcptr dp, mp_size_t dsize)
2214 {
2215   mp_limb_t  retval = 0;
2216   mp_size_t  i;
2217   mp_limb_t  d1 = dp[dsize-1];
2218   mp_ptr     np_orig = refmpn_memdup_limbs (np, nsize);
2219 
2220   ASSERT (nsize >= dsize);
2221   /* ASSERT (dsize > 2); */
2222   ASSERT (dsize >= 2);
2223   ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
2224   ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
2225   ASSERT_MPN (np, nsize);
2226   ASSERT_MPN (dp, dsize);
2227 
2228   i = nsize-dsize;
2229   if (refmpn_cmp (np+i, dp, dsize) >= 0)
2230     {
2231       ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
2232       retval = 1;
2233     }
2234 
2235   for (i--; i >= 0; i--)
2236     {
2237       mp_limb_t  n0 = np[i+dsize];
2238       mp_limb_t  n1 = np[i+dsize-1];
2239       mp_limb_t  q, dummy_r;
2240 
2241       ASSERT (n0 <= d1);
2242       if (n0 == d1)
2243 	q = GMP_NUMB_MAX;
2244       else
2245 	q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
2246 			       d1 << GMP_NAIL_BITS);
2247 
2248       n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
2249       ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
2250       if (n0)
2251 	{
2252 	  q--;
2253 	  if (! refmpn_add_n (np+i, np+i, dp, dsize))
2254 	    {
2255 	      q--;
2256 	      ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
2257 	    }
2258 	}
2259       np[i+dsize] = 0;
2260 
2261       qp[i] = q;
2262     }
2263 
2264   /* remainder < divisor */
2265 #if 0		/* ASSERT triggers gcc 4.2.1 bug */
2266   ASSERT (refmpn_cmp (np, dp, dsize) < 0);
2267 #endif
2268 
2269   /* multiply back to original */
2270   {
2271     mp_ptr  mp = refmpn_malloc_limbs (nsize);
2272 
2273     refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
2274     if (retval)
2275       ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
2276     ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
2277     ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
2278 
2279     free (mp);
2280   }
2281 
2282   free (np_orig);
2283   return retval;
2284 }
2285 
2286 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2287    particular the trial quotient is allowed to be 2 too big. */
2288 void
2289 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
2290 		mp_ptr np, mp_size_t nsize,
2291 		mp_srcptr dp, mp_size_t dsize)
2292 {
2293   ASSERT (qxn == 0);
2294   ASSERT_MPN (np, nsize);
2295   ASSERT_MPN (dp, dsize);
2296   ASSERT (dsize > 0);
2297   ASSERT (dp[dsize-1] != 0);
2298 
2299   if (dsize == 1)
2300     {
2301       rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
2302       return;
2303     }
2304   else
2305     {
2306       mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);
2307       mp_ptr  d2p = refmpn_malloc_limbs (dsize);
2308       int     norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
2309 
2310       n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
2311       ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
2312 
2313       refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
2314       refmpn_rshift_or_copy (rp, n2p, dsize, norm);
2315 
2316       /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
2317       free (n2p);
2318       free (d2p);
2319     }
2320 }
2321 
2322 mp_limb_t
2323 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
2324 {
2325   mp_size_t j;
2326   mp_limb_t cy;
2327 
2328   ASSERT_MPN (up, 2*n);
2329   /* ASSERT about directed overlap rp, up */
2330   /* ASSERT about overlap rp, mp */
2331   /* ASSERT about overlap up, mp */
2332 
2333   for (j = n - 1; j >= 0; j--)
2334     {
2335       up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
2336       up++;
2337     }
2338   cy = mpn_add_n (rp, up, up - n, n);
2339   return cy;
2340 }
2341 
2342 size_t
2343 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
2344 {
2345   unsigned char  *d;
2346   size_t  dsize;
2347 
2348   ASSERT (size >= 0);
2349   ASSERT (base >= 2);
2350   ASSERT (base < numberof (mp_bases));
2351   ASSERT (size == 0 || src[size-1] != 0);
2352   ASSERT_MPN (src, size);
2353 
2354   MPN_SIZEINBASE (dsize, src, size, base);
2355   ASSERT (dsize >= 1);
2356   ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * BYTES_PER_MP_LIMB));
2357 
2358   if (size == 0)
2359     {
2360       dst[0] = 0;
2361       return 1;
2362     }
2363 
2364   /* don't clobber input for power of 2 bases */
2365   if (POW2_P (base))
2366     src = refmpn_memdup_limbs (src, size);
2367 
2368   d = dst + dsize;
2369   do
2370     {
2371       d--;
2372       ASSERT (d >= dst);
2373       *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
2374       size -= (src[size-1] == 0);
2375     }
2376   while (size != 0);
2377 
2378   /* Move result back and decrement dsize if we didn't generate
2379      the maximum possible digits.  */
2380   if (d != dst)
2381     {
2382       size_t i;
2383       dsize -= d - dst;
2384       for (i = 0; i < dsize; i++)
2385 	dst[i] = d[i];
2386     }
2387 
2388   if (POW2_P (base))
2389     free (src);
2390 
2391   return dsize;
2392 }
2393 
2394 
2395 mp_limb_t
2396 ref_bswap_limb (mp_limb_t src)
2397 {
2398   mp_limb_t  dst;
2399   int        i;
2400 
2401   dst = 0;
2402   for (i = 0; i < BYTES_PER_MP_LIMB; i++)
2403     {
2404       dst = (dst << 8) + (src & 0xFF);
2405       src >>= 8;
2406     }
2407   return dst;
2408 }
2409 
2410 
2411 /* These random functions are mostly for transitional purposes while adding
2412    nail support, since they're independent of the normal mpn routines.  They
2413    can probably be removed when those normal routines are reliable, though
2414    perhaps something independent would still be useful at times.  */
2415 
2416 #if GMP_LIMB_BITS == 32
2417 #define RAND_A  CNST_LIMB(0x29CF535)
2418 #endif
2419 #if GMP_LIMB_BITS == 64
2420 #define RAND_A  CNST_LIMB(0xBAECD515DAF0B49D)
2421 #endif
2422 
2423 mp_limb_t  refmpn_random_seed;
2424 
2425 mp_limb_t
2426 refmpn_random_half (void)
2427 {
2428   refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
2429   return (refmpn_random_seed >> GMP_LIMB_BITS/2);
2430 }
2431 
2432 mp_limb_t
2433 refmpn_random_limb (void)
2434 {
2435   return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
2436 	   | refmpn_random_half ()) & GMP_NUMB_MASK;
2437 }
2438 
2439 void
2440 refmpn_random (mp_ptr ptr, mp_size_t size)
2441 {
2442   mp_size_t  i;
2443   if (GMP_NAIL_BITS == 0)
2444     {
2445       mpn_random (ptr, size);
2446       return;
2447     }
2448 
2449   for (i = 0; i < size; i++)
2450     ptr[i] = refmpn_random_limb ();
2451 }
2452 
2453 void
2454 refmpn_random2 (mp_ptr ptr, mp_size_t size)
2455 {
2456   mp_size_t  i;
2457   mp_limb_t  bit, mask, limb;
2458   int        run;
2459 
2460   if (GMP_NAIL_BITS == 0)
2461     {
2462       mpn_random2 (ptr, size);
2463       return;
2464     }
2465 
2466 #define RUN_MODULUS  32
2467 
2468   /* start with ones at a random pos in the high limb */
2469   bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
2470   mask = 0;
2471   run = 0;
2472 
2473   for (i = size-1; i >= 0; i--)
2474     {
2475       limb = 0;
2476       do
2477 	{
2478 	  if (run == 0)
2479 	    {
2480 	      run = (refmpn_random_half () % RUN_MODULUS) + 1;
2481 	      mask = ~mask;
2482 	    }
2483 
2484 	  limb |= (bit & mask);
2485 	  bit >>= 1;
2486 	  run--;
2487 	}
2488       while (bit != 0);
2489 
2490       ptr[i] = limb;
2491       bit = GMP_NUMB_HIGHBIT;
2492     }
2493 }
2494 
2495 /* This is a simple bitwise algorithm working high to low across "s" and
2496    testing each time whether setting the bit would make s^2 exceed n.  */
2497 mp_size_t
2498 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
2499 {
2500   mp_ptr     tp, dp;
2501   mp_size_t  ssize, talloc, tsize, dsize, ret, ilimbs;
2502   unsigned   ibit;
2503   long       i;
2504   mp_limb_t  c;
2505 
2506   ASSERT (nsize >= 0);
2507 
2508   /* If n==0, then s=0 and r=0.  */
2509   if (nsize == 0)
2510     return 0;
2511 
2512   ASSERT (np[nsize - 1] != 0);
2513   ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
2514   ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
2515   ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
2516 
2517   /* root */
2518   ssize = (nsize+1)/2;
2519   refmpn_zero (sp, ssize);
2520 
2521   /* the remainder so far */
2522   dp = refmpn_memdup_limbs (np, nsize);
2523   dsize = nsize;
2524 
2525   /* temporary */
2526   talloc = 2*ssize + 1;
2527   tp = refmpn_malloc_limbs (talloc);
2528 
2529   for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
2530     {
2531       /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
2532 	 is added to it */
2533 
2534       ilimbs = (i+1) / GMP_NUMB_BITS;
2535       ibit = (i+1) % GMP_NUMB_BITS;
2536       refmpn_zero (tp, ilimbs);
2537       c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
2538       tsize = ilimbs + ssize;
2539       tp[tsize] = c;
2540       tsize += (c != 0);
2541 
2542       ilimbs = (2*i) / GMP_NUMB_BITS;
2543       ibit = (2*i) % GMP_NUMB_BITS;
2544       if (ilimbs + 1 > tsize)
2545 	{
2546 	  refmpn_zero_extend (tp, tsize, ilimbs + 1);
2547 	  tsize = ilimbs + 1;
2548 	}
2549       c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
2550 			CNST_LIMB(1) << ibit);
2551       ASSERT (tsize < talloc);
2552       tp[tsize] = c;
2553       tsize += (c != 0);
2554 
2555       if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
2556 	{
2557 	  /* set this bit in s and subtract from the remainder */
2558 	  refmpn_setbit (sp, i);
2559 
2560 	  ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
2561 	  dsize = refmpn_normalize (dp, dsize);
2562 	}
2563     }
2564 
2565   if (rp == NULL)
2566     {
2567       ret = ! refmpn_zero_p (dp, dsize);
2568     }
2569   else
2570     {
2571       ASSERT (dsize == 0 || dp[dsize-1] != 0);
2572       refmpn_copy (rp, dp, dsize);
2573       ret = dsize;
2574     }
2575 
2576   free (dp);
2577   free (tp);
2578   return ret;
2579 }
2580