1 /* Reference mpn functions, designed to be simple, portable and independent 2 of the normal gmp code. Speed isn't a consideration. 3 4 Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 5 2007, 2008, 2009, 2011, 2012, 2013 Free Software Foundation, Inc. 6 7 This file is part of the GNU MP Library test suite. 8 9 The GNU MP Library test suite is free software; you can redistribute it 10 and/or modify it under the terms of the GNU General Public License as 11 published by the Free Software Foundation; either version 3 of the License, 12 or (at your option) any later version. 13 14 The GNU MP Library test suite is distributed in the hope that it will be 15 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 17 Public License for more details. 18 19 You should have received a copy of the GNU General Public License along with 20 the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */ 21 22 23 /* Most routines have assertions representing what the mpn routines are 24 supposed to accept. Many of these reference routines do sensible things 25 outside these ranges (eg. for size==0), but the assertions are present to 26 pick up bad parameters passed here that are about to be passed the same 27 to a real mpn routine being compared. */ 28 29 /* always do assertion checking */ 30 #define WANT_ASSERT 1 31 32 #include <stdio.h> /* for NULL */ 33 #include <stdlib.h> /* for malloc */ 34 35 #include "gmp.h" 36 #include "gmp-impl.h" 37 #include "longlong.h" 38 39 #include "tests.h" 40 41 42 43 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes 44 in bytes. */ 45 int 46 byte_overlap_p (const void *v_xp, mp_size_t xsize, 47 const void *v_yp, mp_size_t ysize) 48 { 49 const char *xp = (const char *) v_xp; 50 const char *yp = (const char *) v_yp; 51 52 ASSERT (xsize >= 0); 53 ASSERT (ysize >= 0); 54 55 /* no wraparounds */ 56 ASSERT (xp+xsize >= xp); 57 ASSERT (yp+ysize >= yp); 58 59 if (xp + xsize <= yp) 60 return 0; 61 62 if (yp + ysize <= xp) 63 return 0; 64 65 return 1; 66 } 67 68 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */ 69 int 70 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize) 71 { 72 return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB, 73 yp, ysize * BYTES_PER_MP_LIMB); 74 } 75 76 /* Check overlap for a routine defined to work low to high. */ 77 int 78 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 79 { 80 return (dst <= src || ! refmpn_overlap_p (dst, size, src, size)); 81 } 82 83 /* Check overlap for a routine defined to work high to low. */ 84 int 85 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 86 { 87 return (dst >= src || ! refmpn_overlap_p (dst, size, src, size)); 88 } 89 90 /* Check overlap for a standard routine requiring equal or separate. */ 91 int 92 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 93 { 94 return (dst == src || ! refmpn_overlap_p (dst, size, src, size)); 95 } 96 int 97 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2, 98 mp_size_t size) 99 { 100 return (refmpn_overlap_fullonly_p (dst, src1, size) 101 && refmpn_overlap_fullonly_p (dst, src2, size)); 102 } 103 104 105 mp_ptr 106 refmpn_malloc_limbs (mp_size_t size) 107 { 108 mp_ptr p; 109 ASSERT (size >= 0); 110 if (size == 0) 111 size = 1; 112 p = (mp_ptr) malloc ((size_t) (size * BYTES_PER_MP_LIMB)); 113 ASSERT (p != NULL); 114 return p; 115 } 116 117 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free 118 * memory allocated by refmpn_malloc_limbs_aligned. */ 119 void 120 refmpn_free_limbs (mp_ptr p) 121 { 122 free (p); 123 } 124 125 mp_ptr 126 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size) 127 { 128 mp_ptr p; 129 p = refmpn_malloc_limbs (size); 130 refmpn_copyi (p, ptr, size); 131 return p; 132 } 133 134 /* malloc n limbs on a multiple of m bytes boundary */ 135 mp_ptr 136 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m) 137 { 138 return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m); 139 } 140 141 142 void 143 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value) 144 { 145 mp_size_t i; 146 ASSERT (size >= 0); 147 for (i = 0; i < size; i++) 148 ptr[i] = value; 149 } 150 151 void 152 refmpn_zero (mp_ptr ptr, mp_size_t size) 153 { 154 refmpn_fill (ptr, size, CNST_LIMB(0)); 155 } 156 157 void 158 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize) 159 { 160 ASSERT (newsize >= oldsize); 161 refmpn_zero (ptr+oldsize, newsize-oldsize); 162 } 163 164 int 165 refmpn_zero_p (mp_srcptr ptr, mp_size_t size) 166 { 167 mp_size_t i; 168 for (i = 0; i < size; i++) 169 if (ptr[i] != 0) 170 return 0; 171 return 1; 172 } 173 174 mp_size_t 175 refmpn_normalize (mp_srcptr ptr, mp_size_t size) 176 { 177 ASSERT (size >= 0); 178 while (size > 0 && ptr[size-1] == 0) 179 size--; 180 return size; 181 } 182 183 /* the highest one bit in x */ 184 mp_limb_t 185 refmpn_msbone (mp_limb_t x) 186 { 187 mp_limb_t n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1); 188 189 while (n != 0) 190 { 191 if (x & n) 192 break; 193 n >>= 1; 194 } 195 return n; 196 } 197 198 /* a mask of the highest one bit plus and all bits below */ 199 mp_limb_t 200 refmpn_msbone_mask (mp_limb_t x) 201 { 202 if (x == 0) 203 return 0; 204 205 return (refmpn_msbone (x) << 1) - 1; 206 } 207 208 /* How many digits in the given base will fit in a limb. 209 Notice that the product b is allowed to be equal to the limit 210 2^GMP_NUMB_BITS, this ensures the result for base==2 will be 211 GMP_NUMB_BITS (and similarly other powers of 2). */ 212 int 213 refmpn_chars_per_limb (int base) 214 { 215 mp_limb_t limit[2], b[2]; 216 int chars_per_limb; 217 218 ASSERT (base >= 2); 219 220 limit[0] = 0; /* limit = 2^GMP_NUMB_BITS */ 221 limit[1] = 1; 222 b[0] = 1; /* b = 1 */ 223 b[1] = 0; 224 225 chars_per_limb = 0; 226 for (;;) 227 { 228 if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base)) 229 break; 230 if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0) 231 break; 232 chars_per_limb++; 233 } 234 return chars_per_limb; 235 } 236 237 /* The biggest value base**n which fits in GMP_NUMB_BITS. */ 238 mp_limb_t 239 refmpn_big_base (int base) 240 { 241 int chars_per_limb = refmpn_chars_per_limb (base); 242 int i; 243 mp_limb_t bb; 244 245 ASSERT (base >= 2); 246 bb = 1; 247 for (i = 0; i < chars_per_limb; i++) 248 bb *= base; 249 return bb; 250 } 251 252 253 void 254 refmpn_setbit (mp_ptr ptr, unsigned long bit) 255 { 256 ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS); 257 } 258 259 void 260 refmpn_clrbit (mp_ptr ptr, unsigned long bit) 261 { 262 ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS)); 263 } 264 265 #define REFMPN_TSTBIT(ptr,bit) \ 266 (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0) 267 268 int 269 refmpn_tstbit (mp_srcptr ptr, unsigned long bit) 270 { 271 return REFMPN_TSTBIT (ptr, bit); 272 } 273 274 unsigned long 275 refmpn_scan0 (mp_srcptr ptr, unsigned long bit) 276 { 277 while (REFMPN_TSTBIT (ptr, bit) != 0) 278 bit++; 279 return bit; 280 } 281 282 unsigned long 283 refmpn_scan1 (mp_srcptr ptr, unsigned long bit) 284 { 285 while (REFMPN_TSTBIT (ptr, bit) == 0) 286 bit++; 287 return bit; 288 } 289 290 void 291 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size) 292 { 293 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 294 refmpn_copyi (rp, sp, size); 295 } 296 297 void 298 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size) 299 { 300 mp_size_t i; 301 302 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 303 ASSERT (size >= 0); 304 305 for (i = 0; i < size; i++) 306 rp[i] = sp[i]; 307 } 308 309 void 310 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size) 311 { 312 mp_size_t i; 313 314 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 315 ASSERT (size >= 0); 316 317 for (i = size-1; i >= 0; i--) 318 rp[i] = sp[i]; 319 } 320 321 /* Copy {xp,xsize} to {wp,wsize}. If x is shorter, then pad w with low 322 zeros to wsize. If x is longer, then copy just the high wsize limbs. */ 323 void 324 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize) 325 { 326 ASSERT (wsize >= 0); 327 ASSERT (xsize >= 0); 328 329 /* high part of x if x bigger than w */ 330 if (xsize > wsize) 331 { 332 xp += xsize - wsize; 333 xsize = wsize; 334 } 335 336 refmpn_copy (wp + wsize-xsize, xp, xsize); 337 refmpn_zero (wp, wsize-xsize); 338 } 339 340 int 341 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 342 { 343 mp_size_t i; 344 345 ASSERT (size >= 1); 346 ASSERT_MPN (xp, size); 347 ASSERT_MPN (yp, size); 348 349 for (i = size-1; i >= 0; i--) 350 { 351 if (xp[i] > yp[i]) return 1; 352 if (xp[i] < yp[i]) return -1; 353 } 354 return 0; 355 } 356 357 int 358 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 359 { 360 if (size == 0) 361 return 0; 362 else 363 return refmpn_cmp (xp, yp, size); 364 } 365 366 int 367 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize, 368 mp_srcptr yp, mp_size_t ysize) 369 { 370 int opp, cmp; 371 372 ASSERT_MPN (xp, xsize); 373 ASSERT_MPN (yp, ysize); 374 375 opp = (xsize < ysize); 376 if (opp) 377 MPN_SRCPTR_SWAP (xp,xsize, yp,ysize); 378 379 if (! refmpn_zero_p (xp+ysize, xsize-ysize)) 380 cmp = 1; 381 else 382 cmp = refmpn_cmp (xp, yp, ysize); 383 384 return (opp ? -cmp : cmp); 385 } 386 387 int 388 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 389 { 390 mp_size_t i; 391 ASSERT (size >= 0); 392 393 for (i = 0; i < size; i++) 394 if (xp[i] != yp[i]) 395 return 0; 396 return 1; 397 } 398 399 400 #define LOGOPS(operation) \ 401 { \ 402 mp_size_t i; \ 403 \ 404 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ 405 ASSERT (size >= 1); \ 406 ASSERT_MPN (s1p, size); \ 407 ASSERT_MPN (s2p, size); \ 408 \ 409 for (i = 0; i < size; i++) \ 410 rp[i] = operation; \ 411 } 412 413 void 414 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 415 { 416 LOGOPS (s1p[i] & s2p[i]); 417 } 418 void 419 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 420 { 421 LOGOPS (s1p[i] & ~s2p[i]); 422 } 423 void 424 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 425 { 426 LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK); 427 } 428 void 429 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 430 { 431 LOGOPS (s1p[i] | s2p[i]); 432 } 433 void 434 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 435 { 436 LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK)); 437 } 438 void 439 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 440 { 441 LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK); 442 } 443 void 444 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 445 { 446 LOGOPS (s1p[i] ^ s2p[i]); 447 } 448 void 449 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 450 { 451 LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK); 452 } 453 454 455 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */ 456 void 457 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl, 458 mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl) 459 { 460 *dl = ml - sl; 461 *dh = mh - sh - (ml < sl); 462 } 463 464 465 /* set *w to x+y, return 0 or 1 carry */ 466 mp_limb_t 467 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) 468 { 469 mp_limb_t sum, cy; 470 471 ASSERT_LIMB (x); 472 ASSERT_LIMB (y); 473 474 sum = x + y; 475 #if GMP_NAIL_BITS == 0 476 *w = sum; 477 cy = (sum < x); 478 #else 479 *w = sum & GMP_NUMB_MASK; 480 cy = (sum >> GMP_NUMB_BITS); 481 #endif 482 return cy; 483 } 484 485 /* set *w to x-y, return 0 or 1 borrow */ 486 mp_limb_t 487 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) 488 { 489 mp_limb_t diff, cy; 490 491 ASSERT_LIMB (x); 492 ASSERT_LIMB (y); 493 494 diff = x - y; 495 #if GMP_NAIL_BITS == 0 496 *w = diff; 497 cy = (diff > x); 498 #else 499 *w = diff & GMP_NUMB_MASK; 500 cy = (diff >> GMP_NUMB_BITS) & 1; 501 #endif 502 return cy; 503 } 504 505 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */ 506 mp_limb_t 507 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) 508 { 509 mp_limb_t r; 510 511 ASSERT_LIMB (x); 512 ASSERT_LIMB (y); 513 ASSERT (c == 0 || c == 1); 514 515 r = ref_addc_limb (w, x, y); 516 return r + ref_addc_limb (w, *w, c); 517 } 518 519 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */ 520 mp_limb_t 521 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) 522 { 523 mp_limb_t r; 524 525 ASSERT_LIMB (x); 526 ASSERT_LIMB (y); 527 ASSERT (c == 0 || c == 1); 528 529 r = ref_subc_limb (w, x, y); 530 return r + ref_subc_limb (w, *w, c); 531 } 532 533 534 #define AORS_1(operation) \ 535 { \ 536 mp_size_t i; \ 537 \ 538 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ 539 ASSERT (size >= 1); \ 540 ASSERT_MPN (sp, size); \ 541 ASSERT_LIMB (n); \ 542 \ 543 for (i = 0; i < size; i++) \ 544 n = operation (&rp[i], sp[i], n); \ 545 return n; \ 546 } 547 548 mp_limb_t 549 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) 550 { 551 AORS_1 (ref_addc_limb); 552 } 553 mp_limb_t 554 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) 555 { 556 AORS_1 (ref_subc_limb); 557 } 558 559 #define AORS_NC(operation) \ 560 { \ 561 mp_size_t i; \ 562 \ 563 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ 564 ASSERT (carry == 0 || carry == 1); \ 565 ASSERT (size >= 1); \ 566 ASSERT_MPN (s1p, size); \ 567 ASSERT_MPN (s2p, size); \ 568 \ 569 for (i = 0; i < size; i++) \ 570 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 571 return carry; \ 572 } 573 574 mp_limb_t 575 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 576 mp_limb_t carry) 577 { 578 AORS_NC (adc); 579 } 580 mp_limb_t 581 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 582 mp_limb_t carry) 583 { 584 AORS_NC (sbb); 585 } 586 587 588 mp_limb_t 589 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 590 { 591 return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0)); 592 } 593 mp_limb_t 594 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 595 { 596 return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0)); 597 } 598 599 mp_limb_t 600 refmpn_addcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd) 601 { 602 if (cnd != 0) 603 return refmpn_add_n (rp, s1p, s2p, size); 604 else 605 { 606 refmpn_copyi (rp, s1p, size); 607 return 0; 608 } 609 } 610 mp_limb_t 611 refmpn_subcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd) 612 { 613 if (cnd != 0) 614 return refmpn_sub_n (rp, s1p, s2p, size); 615 else 616 { 617 refmpn_copyi (rp, s1p, size); 618 return 0; 619 } 620 } 621 622 623 #define AORS_ERR1_N(operation) \ 624 { \ 625 mp_size_t i; \ 626 mp_limb_t carry2; \ 627 \ 628 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ 629 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ 630 ASSERT (! refmpn_overlap_p (rp, size, yp, size)); \ 631 ASSERT (! refmpn_overlap_p (ep, 2, s1p, size)); \ 632 ASSERT (! refmpn_overlap_p (ep, 2, s2p, size)); \ 633 ASSERT (! refmpn_overlap_p (ep, 2, yp, size)); \ 634 ASSERT (! refmpn_overlap_p (ep, 2, rp, size)); \ 635 \ 636 ASSERT (carry == 0 || carry == 1); \ 637 ASSERT (size >= 1); \ 638 ASSERT_MPN (s1p, size); \ 639 ASSERT_MPN (s2p, size); \ 640 ASSERT_MPN (yp, size); \ 641 \ 642 ep[0] = ep[1] = CNST_LIMB(0); \ 643 \ 644 for (i = 0; i < size; i++) \ 645 { \ 646 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 647 if (carry == 1) \ 648 { \ 649 carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]); \ 650 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ 651 ASSERT (carry2 == 0); \ 652 } \ 653 } \ 654 return carry; \ 655 } 656 657 mp_limb_t 658 refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 659 mp_ptr ep, mp_srcptr yp, 660 mp_size_t size, mp_limb_t carry) 661 { 662 AORS_ERR1_N (adc); 663 } 664 mp_limb_t 665 refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 666 mp_ptr ep, mp_srcptr yp, 667 mp_size_t size, mp_limb_t carry) 668 { 669 AORS_ERR1_N (sbb); 670 } 671 672 673 #define AORS_ERR2_N(operation) \ 674 { \ 675 mp_size_t i; \ 676 mp_limb_t carry2; \ 677 \ 678 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ 679 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ 680 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \ 681 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \ 682 ASSERT (! refmpn_overlap_p (ep, 4, s1p, size)); \ 683 ASSERT (! refmpn_overlap_p (ep, 4, s2p, size)); \ 684 ASSERT (! refmpn_overlap_p (ep, 4, y1p, size)); \ 685 ASSERT (! refmpn_overlap_p (ep, 4, y2p, size)); \ 686 ASSERT (! refmpn_overlap_p (ep, 4, rp, size)); \ 687 \ 688 ASSERT (carry == 0 || carry == 1); \ 689 ASSERT (size >= 1); \ 690 ASSERT_MPN (s1p, size); \ 691 ASSERT_MPN (s2p, size); \ 692 ASSERT_MPN (y1p, size); \ 693 ASSERT_MPN (y2p, size); \ 694 \ 695 ep[0] = ep[1] = CNST_LIMB(0); \ 696 ep[2] = ep[3] = CNST_LIMB(0); \ 697 \ 698 for (i = 0; i < size; i++) \ 699 { \ 700 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 701 if (carry == 1) \ 702 { \ 703 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \ 704 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ 705 ASSERT (carry2 == 0); \ 706 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \ 707 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \ 708 ASSERT (carry2 == 0); \ 709 } \ 710 } \ 711 return carry; \ 712 } 713 714 mp_limb_t 715 refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 716 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, 717 mp_size_t size, mp_limb_t carry) 718 { 719 AORS_ERR2_N (adc); 720 } 721 mp_limb_t 722 refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 723 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, 724 mp_size_t size, mp_limb_t carry) 725 { 726 AORS_ERR2_N (sbb); 727 } 728 729 730 #define AORS_ERR3_N(operation) \ 731 { \ 732 mp_size_t i; \ 733 mp_limb_t carry2; \ 734 \ 735 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ 736 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ 737 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \ 738 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \ 739 ASSERT (! refmpn_overlap_p (rp, size, y3p, size)); \ 740 ASSERT (! refmpn_overlap_p (ep, 6, s1p, size)); \ 741 ASSERT (! refmpn_overlap_p (ep, 6, s2p, size)); \ 742 ASSERT (! refmpn_overlap_p (ep, 6, y1p, size)); \ 743 ASSERT (! refmpn_overlap_p (ep, 6, y2p, size)); \ 744 ASSERT (! refmpn_overlap_p (ep, 6, y3p, size)); \ 745 ASSERT (! refmpn_overlap_p (ep, 6, rp, size)); \ 746 \ 747 ASSERT (carry == 0 || carry == 1); \ 748 ASSERT (size >= 1); \ 749 ASSERT_MPN (s1p, size); \ 750 ASSERT_MPN (s2p, size); \ 751 ASSERT_MPN (y1p, size); \ 752 ASSERT_MPN (y2p, size); \ 753 ASSERT_MPN (y3p, size); \ 754 \ 755 ep[0] = ep[1] = CNST_LIMB(0); \ 756 ep[2] = ep[3] = CNST_LIMB(0); \ 757 ep[4] = ep[5] = CNST_LIMB(0); \ 758 \ 759 for (i = 0; i < size; i++) \ 760 { \ 761 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 762 if (carry == 1) \ 763 { \ 764 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \ 765 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ 766 ASSERT (carry2 == 0); \ 767 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \ 768 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \ 769 ASSERT (carry2 == 0); \ 770 carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]); \ 771 carry2 = ref_addc_limb (&ep[5], ep[5], carry2); \ 772 ASSERT (carry2 == 0); \ 773 } \ 774 } \ 775 return carry; \ 776 } 777 778 mp_limb_t 779 refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 780 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p, 781 mp_size_t size, mp_limb_t carry) 782 { 783 AORS_ERR3_N (adc); 784 } 785 mp_limb_t 786 refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 787 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p, 788 mp_size_t size, mp_limb_t carry) 789 { 790 AORS_ERR3_N (sbb); 791 } 792 793 794 mp_limb_t 795 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 796 mp_size_t n, unsigned int s) 797 { 798 mp_limb_t cy; 799 mp_ptr tp; 800 801 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 802 ASSERT (n >= 1); 803 ASSERT (0 < s && s < GMP_NUMB_BITS); 804 ASSERT_MPN (up, n); 805 ASSERT_MPN (vp, n); 806 807 tp = refmpn_malloc_limbs (n); 808 cy = refmpn_lshift (tp, vp, n, s); 809 cy += refmpn_add_n (rp, up, tp, n); 810 free (tp); 811 return cy; 812 } 813 mp_limb_t 814 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 815 { 816 return refmpn_addlsh_n (rp, up, vp, n, 1); 817 } 818 mp_limb_t 819 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 820 { 821 return refmpn_addlsh_n (rp, up, vp, n, 2); 822 } 823 mp_limb_t 824 refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 825 { 826 return refmpn_addlsh_n (rp, rp, vp, n, s); 827 } 828 mp_limb_t 829 refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 830 { 831 return refmpn_addlsh_n (rp, rp, vp, n, 1); 832 } 833 mp_limb_t 834 refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 835 { 836 return refmpn_addlsh_n (rp, rp, vp, n, 2); 837 } 838 mp_limb_t 839 refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 840 { 841 return refmpn_addlsh_n (rp, vp, rp, n, s); 842 } 843 mp_limb_t 844 refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 845 { 846 return refmpn_addlsh_n (rp, vp, rp, n, 1); 847 } 848 mp_limb_t 849 refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 850 { 851 return refmpn_addlsh_n (rp, vp, rp, n, 2); 852 } 853 mp_limb_t 854 refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 855 mp_size_t n, unsigned int s, mp_limb_t carry) 856 { 857 mp_limb_t cy; 858 859 ASSERT (carry >= 0 && carry <= (CNST_LIMB(1) << s)); 860 861 cy = refmpn_addlsh_n (rp, up, vp, n, s); 862 cy += refmpn_add_1 (rp, rp, n, carry); 863 return cy; 864 } 865 mp_limb_t 866 refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 867 { 868 return refmpn_addlsh_nc (rp, up, vp, n, 1, carry); 869 } 870 mp_limb_t 871 refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 872 { 873 return refmpn_addlsh_nc (rp, up, vp, n, 2, carry); 874 } 875 876 mp_limb_t 877 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 878 mp_size_t n, unsigned int s) 879 { 880 mp_limb_t cy; 881 mp_ptr tp; 882 883 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 884 ASSERT (n >= 1); 885 ASSERT (0 < s && s < GMP_NUMB_BITS); 886 ASSERT_MPN (up, n); 887 ASSERT_MPN (vp, n); 888 889 tp = refmpn_malloc_limbs (n); 890 cy = mpn_lshift (tp, vp, n, s); 891 cy += mpn_sub_n (rp, up, tp, n); 892 free (tp); 893 return cy; 894 } 895 mp_limb_t 896 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 897 { 898 return refmpn_sublsh_n (rp, up, vp, n, 1); 899 } 900 mp_limb_t 901 refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 902 { 903 return refmpn_sublsh_n (rp, up, vp, n, 2); 904 } 905 mp_limb_t 906 refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 907 { 908 return refmpn_sublsh_n (rp, rp, vp, n, s); 909 } 910 mp_limb_t 911 refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 912 { 913 return refmpn_sublsh_n (rp, rp, vp, n, 1); 914 } 915 mp_limb_t 916 refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 917 { 918 return refmpn_sublsh_n (rp, rp, vp, n, 2); 919 } 920 mp_limb_t 921 refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 922 { 923 return refmpn_sublsh_n (rp, vp, rp, n, s); 924 } 925 mp_limb_t 926 refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 927 { 928 return refmpn_sublsh_n (rp, vp, rp, n, 1); 929 } 930 mp_limb_t 931 refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 932 { 933 return refmpn_sublsh_n (rp, vp, rp, n, 2); 934 } 935 mp_limb_t 936 refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 937 mp_size_t n, unsigned int s, mp_limb_t carry) 938 { 939 mp_limb_t cy; 940 941 ASSERT (carry >= 0 && carry <= (CNST_LIMB(1) << s)); 942 943 cy = refmpn_sublsh_n (rp, up, vp, n, s); 944 cy += refmpn_sub_1 (rp, rp, n, carry); 945 return cy; 946 } 947 mp_limb_t 948 refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 949 { 950 return refmpn_sublsh_nc (rp, up, vp, n, 1, carry); 951 } 952 mp_limb_t 953 refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 954 { 955 return refmpn_sublsh_nc (rp, up, vp, n, 2, carry); 956 } 957 958 mp_limb_signed_t 959 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 960 mp_size_t n, unsigned int s) 961 { 962 mp_limb_signed_t cy; 963 mp_ptr tp; 964 965 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 966 ASSERT (n >= 1); 967 ASSERT (0 < s && s < GMP_NUMB_BITS); 968 ASSERT_MPN (up, n); 969 ASSERT_MPN (vp, n); 970 971 tp = refmpn_malloc_limbs (n); 972 cy = mpn_lshift (tp, vp, n, s); 973 cy -= mpn_sub_n (rp, tp, up, n); 974 free (tp); 975 return cy; 976 } 977 mp_limb_signed_t 978 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 979 { 980 return refmpn_rsblsh_n (rp, up, vp, n, 1); 981 } 982 mp_limb_signed_t 983 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 984 { 985 return refmpn_rsblsh_n (rp, up, vp, n, 2); 986 } 987 mp_limb_signed_t 988 refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 989 mp_size_t n, unsigned int s, mp_limb_signed_t carry) 990 { 991 mp_limb_signed_t cy; 992 993 ASSERT (carry == -1 || (carry >> s) == 0); 994 995 cy = refmpn_rsblsh_n (rp, up, vp, n, s); 996 if (carry > 0) 997 cy += refmpn_add_1 (rp, rp, n, carry); 998 else 999 cy -= refmpn_sub_1 (rp, rp, n, -carry); 1000 return cy; 1001 } 1002 mp_limb_signed_t 1003 refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry) 1004 { 1005 return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry); 1006 } 1007 mp_limb_signed_t 1008 refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry) 1009 { 1010 return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry); 1011 } 1012 1013 mp_limb_t 1014 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 1015 { 1016 mp_limb_t cya, cys; 1017 1018 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 1019 ASSERT (n >= 1); 1020 ASSERT_MPN (up, n); 1021 ASSERT_MPN (vp, n); 1022 1023 cya = mpn_add_n (rp, up, vp, n); 1024 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); 1025 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); 1026 return cys; 1027 } 1028 mp_limb_t 1029 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 1030 { 1031 mp_limb_t cya, cys; 1032 1033 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 1034 ASSERT (n >= 1); 1035 ASSERT_MPN (up, n); 1036 ASSERT_MPN (vp, n); 1037 1038 cya = mpn_sub_n (rp, up, vp, n); 1039 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); 1040 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); 1041 return cys; 1042 } 1043 1044 /* Twos complement, return borrow. */ 1045 mp_limb_t 1046 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size) 1047 { 1048 mp_ptr zeros; 1049 mp_limb_t ret; 1050 1051 ASSERT (size >= 1); 1052 1053 zeros = refmpn_malloc_limbs (size); 1054 refmpn_fill (zeros, size, CNST_LIMB(0)); 1055 ret = refmpn_sub_n (dst, zeros, src, size); 1056 free (zeros); 1057 return ret; 1058 } 1059 1060 1061 #define AORS(aors_n, aors_1) \ 1062 { \ 1063 mp_limb_t c; \ 1064 ASSERT (s1size >= s2size); \ 1065 ASSERT (s2size >= 1); \ 1066 c = aors_n (rp, s1p, s2p, s2size); \ 1067 if (s1size-s2size != 0) \ 1068 c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \ 1069 return c; \ 1070 } 1071 mp_limb_t 1072 refmpn_add (mp_ptr rp, 1073 mp_srcptr s1p, mp_size_t s1size, 1074 mp_srcptr s2p, mp_size_t s2size) 1075 { 1076 AORS (refmpn_add_n, refmpn_add_1); 1077 } 1078 mp_limb_t 1079 refmpn_sub (mp_ptr rp, 1080 mp_srcptr s1p, mp_size_t s1size, 1081 mp_srcptr s2p, mp_size_t s2size) 1082 { 1083 AORS (refmpn_sub_n, refmpn_sub_1); 1084 } 1085 1086 1087 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2) 1088 #define SHIFTLOW(x) ((x) >> GMP_LIMB_BITS/2) 1089 1090 #define LOWMASK (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1) 1091 #define HIGHMASK SHIFTHIGH(LOWMASK) 1092 1093 #define LOWPART(x) ((x) & LOWMASK) 1094 #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK) 1095 1096 /* Set return:*lo to x*y, using full limbs not nails. */ 1097 mp_limb_t 1098 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y) 1099 { 1100 mp_limb_t hi, s; 1101 1102 *lo = LOWPART(x) * LOWPART(y); 1103 hi = HIGHPART(x) * HIGHPART(y); 1104 1105 s = LOWPART(x) * HIGHPART(y); 1106 hi += HIGHPART(s); 1107 s = SHIFTHIGH(LOWPART(s)); 1108 *lo += s; 1109 hi += (*lo < s); 1110 1111 s = HIGHPART(x) * LOWPART(y); 1112 hi += HIGHPART(s); 1113 s = SHIFTHIGH(LOWPART(s)); 1114 *lo += s; 1115 hi += (*lo < s); 1116 1117 return hi; 1118 } 1119 1120 mp_limb_t 1121 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo) 1122 { 1123 return refmpn_umul_ppmm (lo, x, y); 1124 } 1125 1126 mp_limb_t 1127 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier, 1128 mp_limb_t carry) 1129 { 1130 mp_size_t i; 1131 mp_limb_t hi, lo; 1132 1133 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 1134 ASSERT (size >= 1); 1135 ASSERT_MPN (sp, size); 1136 ASSERT_LIMB (multiplier); 1137 ASSERT_LIMB (carry); 1138 1139 multiplier <<= GMP_NAIL_BITS; 1140 for (i = 0; i < size; i++) 1141 { 1142 hi = refmpn_umul_ppmm (&lo, sp[i], multiplier); 1143 lo >>= GMP_NAIL_BITS; 1144 ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry))); 1145 rp[i] = lo; 1146 carry = hi; 1147 } 1148 return carry; 1149 } 1150 1151 mp_limb_t 1152 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 1153 { 1154 return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 1155 } 1156 1157 1158 mp_limb_t 1159 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, 1160 mp_srcptr mult, mp_size_t msize) 1161 { 1162 mp_ptr src_copy; 1163 mp_limb_t ret; 1164 mp_size_t i; 1165 1166 ASSERT (refmpn_overlap_fullonly_p (dst, src, size)); 1167 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); 1168 ASSERT (size >= msize); 1169 ASSERT_MPN (mult, msize); 1170 1171 /* in case dst==src */ 1172 src_copy = refmpn_malloc_limbs (size); 1173 refmpn_copyi (src_copy, src, size); 1174 src = src_copy; 1175 1176 dst[size] = refmpn_mul_1 (dst, src, size, mult[0]); 1177 for (i = 1; i < msize-1; i++) 1178 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1179 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1180 1181 free (src_copy); 1182 return ret; 1183 } 1184 1185 mp_limb_t 1186 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1187 { 1188 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2); 1189 } 1190 mp_limb_t 1191 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1192 { 1193 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3); 1194 } 1195 mp_limb_t 1196 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1197 { 1198 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4); 1199 } 1200 mp_limb_t 1201 refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1202 { 1203 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5); 1204 } 1205 mp_limb_t 1206 refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1207 { 1208 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6); 1209 } 1210 1211 #define AORSMUL_1C(operation_n) \ 1212 { \ 1213 mp_ptr p; \ 1214 mp_limb_t ret; \ 1215 \ 1216 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ 1217 \ 1218 p = refmpn_malloc_limbs (size); \ 1219 ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \ 1220 ret += operation_n (rp, rp, p, size); \ 1221 \ 1222 free (p); \ 1223 return ret; \ 1224 } 1225 1226 mp_limb_t 1227 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1228 mp_limb_t multiplier, mp_limb_t carry) 1229 { 1230 AORSMUL_1C (refmpn_add_n); 1231 } 1232 mp_limb_t 1233 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1234 mp_limb_t multiplier, mp_limb_t carry) 1235 { 1236 AORSMUL_1C (refmpn_sub_n); 1237 } 1238 1239 1240 mp_limb_t 1241 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 1242 { 1243 return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 1244 } 1245 mp_limb_t 1246 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 1247 { 1248 return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 1249 } 1250 1251 1252 mp_limb_t 1253 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, 1254 mp_srcptr mult, mp_size_t msize) 1255 { 1256 mp_ptr src_copy; 1257 mp_limb_t ret; 1258 mp_size_t i; 1259 1260 ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size)); 1261 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); 1262 ASSERT (size >= msize); 1263 ASSERT_MPN (mult, msize); 1264 1265 /* in case dst==src */ 1266 src_copy = refmpn_malloc_limbs (size); 1267 refmpn_copyi (src_copy, src, size); 1268 src = src_copy; 1269 1270 for (i = 0; i < msize-1; i++) 1271 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1272 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1273 1274 free (src_copy); 1275 return ret; 1276 } 1277 1278 mp_limb_t 1279 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1280 { 1281 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2); 1282 } 1283 mp_limb_t 1284 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1285 { 1286 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3); 1287 } 1288 mp_limb_t 1289 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1290 { 1291 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4); 1292 } 1293 mp_limb_t 1294 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1295 { 1296 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5); 1297 } 1298 mp_limb_t 1299 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1300 { 1301 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6); 1302 } 1303 mp_limb_t 1304 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1305 { 1306 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7); 1307 } 1308 mp_limb_t 1309 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1310 { 1311 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8); 1312 } 1313 1314 mp_limb_t 1315 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p, 1316 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 1317 mp_limb_t carry) 1318 { 1319 mp_ptr p; 1320 mp_limb_t acy, scy; 1321 1322 /* Destinations can't overlap. */ 1323 ASSERT (! refmpn_overlap_p (r1p, size, r2p, size)); 1324 ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size)); 1325 ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size)); 1326 ASSERT (size >= 1); 1327 1328 /* in case r1p==s1p or r1p==s2p */ 1329 p = refmpn_malloc_limbs (size); 1330 1331 acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1); 1332 scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1); 1333 refmpn_copyi (r1p, p, size); 1334 1335 free (p); 1336 return 2 * acy + scy; 1337 } 1338 1339 mp_limb_t 1340 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, 1341 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 1342 { 1343 return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0)); 1344 } 1345 1346 1347 /* Right shift hi,lo and return the low limb of the result. 1348 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ 1349 mp_limb_t 1350 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) 1351 { 1352 ASSERT (shift < GMP_NUMB_BITS); 1353 if (shift == 0) 1354 return lo; 1355 else 1356 return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK; 1357 } 1358 1359 /* Left shift hi,lo and return the high limb of the result. 1360 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ 1361 mp_limb_t 1362 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) 1363 { 1364 ASSERT (shift < GMP_NUMB_BITS); 1365 if (shift == 0) 1366 return hi; 1367 else 1368 return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK; 1369 } 1370 1371 1372 mp_limb_t 1373 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1374 { 1375 mp_limb_t ret; 1376 mp_size_t i; 1377 1378 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 1379 ASSERT (size >= 1); 1380 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); 1381 ASSERT_MPN (sp, size); 1382 1383 ret = rshift_make (sp[0], CNST_LIMB(0), shift); 1384 1385 for (i = 0; i < size-1; i++) 1386 rp[i] = rshift_make (sp[i+1], sp[i], shift); 1387 1388 rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift); 1389 return ret; 1390 } 1391 1392 mp_limb_t 1393 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1394 { 1395 mp_limb_t ret; 1396 mp_size_t i; 1397 1398 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 1399 ASSERT (size >= 1); 1400 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); 1401 ASSERT_MPN (sp, size); 1402 1403 ret = lshift_make (CNST_LIMB(0), sp[size-1], shift); 1404 1405 for (i = size-2; i >= 0; i--) 1406 rp[i+1] = lshift_make (sp[i+1], sp[i], shift); 1407 1408 rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift); 1409 return ret; 1410 } 1411 1412 void 1413 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1414 { 1415 mp_size_t i; 1416 1417 /* We work downwards since mpn_lshiftc needs that. */ 1418 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 1419 1420 for (i = size - 1; i >= 0; i--) 1421 rp[i] = (~sp[i]) & GMP_NUMB_MASK; 1422 } 1423 1424 mp_limb_t 1425 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1426 { 1427 mp_limb_t res; 1428 1429 /* No asserts here, refmpn_lshift will assert what we need. */ 1430 1431 res = refmpn_lshift (rp, sp, size, shift); 1432 refmpn_com (rp, rp, size); 1433 return res; 1434 } 1435 1436 /* accepting shift==0 and doing a plain copyi or copyd in that case */ 1437 mp_limb_t 1438 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1439 { 1440 if (shift == 0) 1441 { 1442 refmpn_copyi (rp, sp, size); 1443 return 0; 1444 } 1445 else 1446 { 1447 return refmpn_rshift (rp, sp, size, shift); 1448 } 1449 } 1450 mp_limb_t 1451 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1452 { 1453 if (shift == 0) 1454 { 1455 refmpn_copyd (rp, sp, size); 1456 return 0; 1457 } 1458 else 1459 { 1460 return refmpn_lshift (rp, sp, size, shift); 1461 } 1462 } 1463 1464 /* accepting size==0 too */ 1465 mp_limb_t 1466 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1467 unsigned shift) 1468 { 1469 return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift)); 1470 } 1471 mp_limb_t 1472 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1473 unsigned shift) 1474 { 1475 return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift)); 1476 } 1477 1478 /* Divide h,l by d, return quotient, store remainder to *rp. 1479 Operates on full limbs, not nails. 1480 Must have h < d. 1481 __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */ 1482 mp_limb_t 1483 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d) 1484 { 1485 mp_limb_t q, r; 1486 int n; 1487 1488 ASSERT (d != 0); 1489 ASSERT (h < d); 1490 1491 #if 0 1492 udiv_qrnnd (q, r, h, l, d); 1493 *rp = r; 1494 return q; 1495 #endif 1496 1497 n = refmpn_count_leading_zeros (d); 1498 d <<= n; 1499 1500 if (n != 0) 1501 { 1502 h = (h << n) | (l >> (GMP_LIMB_BITS - n)); 1503 l <<= n; 1504 } 1505 1506 __udiv_qrnnd_c (q, r, h, l, d); 1507 r >>= n; 1508 *rp = r; 1509 return q; 1510 } 1511 1512 mp_limb_t 1513 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp) 1514 { 1515 return refmpn_udiv_qrnnd (rp, h, l, d); 1516 } 1517 1518 /* This little subroutine avoids some bad code generation from i386 gcc 3.0 1519 -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */ 1520 static mp_limb_t 1521 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1522 mp_limb_t divisor, mp_limb_t carry) 1523 { 1524 mp_size_t i; 1525 mp_limb_t rem[1]; 1526 for (i = size-1; i >= 0; i--) 1527 { 1528 rp[i] = refmpn_udiv_qrnnd (rem, carry, 1529 sp[i] << GMP_NAIL_BITS, 1530 divisor << GMP_NAIL_BITS); 1531 carry = *rem >> GMP_NAIL_BITS; 1532 } 1533 return carry; 1534 } 1535 1536 mp_limb_t 1537 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1538 mp_limb_t divisor, mp_limb_t carry) 1539 { 1540 mp_ptr sp_orig; 1541 mp_ptr prod; 1542 mp_limb_t carry_orig; 1543 1544 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 1545 ASSERT (size >= 0); 1546 ASSERT (carry < divisor); 1547 ASSERT_MPN (sp, size); 1548 ASSERT_LIMB (divisor); 1549 ASSERT_LIMB (carry); 1550 1551 if (size == 0) 1552 return carry; 1553 1554 sp_orig = refmpn_memdup_limbs (sp, size); 1555 prod = refmpn_malloc_limbs (size); 1556 carry_orig = carry; 1557 1558 carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry); 1559 1560 /* check by multiplying back */ 1561 #if 0 1562 printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n", 1563 size, divisor, carry_orig, carry); 1564 mpn_trace("s",sp_copy,size); 1565 mpn_trace("r",rp,size); 1566 printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry)); 1567 mpn_trace("p",prod,size); 1568 #endif 1569 ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig); 1570 ASSERT (refmpn_cmp (prod, sp_orig, size) == 0); 1571 free (sp_orig); 1572 free (prod); 1573 1574 return carry; 1575 } 1576 1577 mp_limb_t 1578 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1579 { 1580 return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0)); 1581 } 1582 1583 1584 mp_limb_t 1585 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1586 mp_limb_t carry) 1587 { 1588 mp_ptr p = refmpn_malloc_limbs (size); 1589 carry = refmpn_divmod_1c (p, sp, size, divisor, carry); 1590 free (p); 1591 return carry; 1592 } 1593 1594 mp_limb_t 1595 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1596 { 1597 return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0)); 1598 } 1599 1600 mp_limb_t 1601 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1602 mp_limb_t inverse) 1603 { 1604 ASSERT (divisor & GMP_NUMB_HIGHBIT); 1605 ASSERT (inverse == refmpn_invert_limb (divisor)); 1606 return refmpn_mod_1 (sp, size, divisor); 1607 } 1608 1609 /* This implementation will be rather slow, but has the advantage of being 1610 in a different style than the libgmp versions. */ 1611 mp_limb_t 1612 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n) 1613 { 1614 ASSERT ((GMP_NUMB_BITS % 4) == 0); 1615 return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1); 1616 } 1617 1618 1619 mp_limb_t 1620 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize, 1621 mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1622 mp_limb_t carry) 1623 { 1624 mp_ptr z; 1625 1626 z = refmpn_malloc_limbs (xsize); 1627 refmpn_fill (z, xsize, CNST_LIMB(0)); 1628 1629 carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry); 1630 carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry); 1631 1632 free (z); 1633 return carry; 1634 } 1635 1636 mp_limb_t 1637 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize, 1638 mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1639 { 1640 return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0)); 1641 } 1642 1643 mp_limb_t 1644 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize, 1645 mp_srcptr sp, mp_size_t size, 1646 mp_limb_t divisor, mp_limb_t inverse, unsigned shift) 1647 { 1648 ASSERT (size >= 0); 1649 ASSERT (shift == refmpn_count_leading_zeros (divisor)); 1650 ASSERT (inverse == refmpn_invert_limb (divisor << shift)); 1651 1652 return refmpn_divrem_1 (rp, xsize, sp, size, divisor); 1653 } 1654 1655 mp_limb_t 1656 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn, 1657 mp_ptr np, mp_size_t nn, 1658 mp_srcptr dp) 1659 { 1660 mp_ptr tp; 1661 mp_limb_t qh; 1662 1663 tp = refmpn_malloc_limbs (nn + qxn); 1664 refmpn_zero (tp, qxn); 1665 refmpn_copyi (tp + qxn, np, nn); 1666 qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2); 1667 refmpn_copyi (np, tp, 2); 1668 free (tp); 1669 return qh; 1670 } 1671 1672 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers 1673 paper, figure 8.1 m', where b=2^GMP_LIMB_BITS. Note that -d-1 < d 1674 since d has the high bit set. */ 1675 1676 mp_limb_t 1677 refmpn_invert_limb (mp_limb_t d) 1678 { 1679 mp_limb_t r; 1680 ASSERT (d & GMP_LIMB_HIGHBIT); 1681 return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d); 1682 } 1683 1684 void 1685 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) 1686 { 1687 mp_ptr qp, tp; 1688 TMP_DECL; 1689 TMP_MARK; 1690 1691 tp = TMP_ALLOC_LIMBS (2 * n); 1692 qp = TMP_ALLOC_LIMBS (n + 1); 1693 1694 MPN_ZERO (tp, 2 * n); mpn_sub_1 (tp, tp, 2 * n, 1); 1695 1696 refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n); 1697 refmpn_copyi (rp, qp, n); 1698 1699 TMP_FREE; 1700 } 1701 1702 void 1703 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) 1704 { 1705 mp_ptr tp; 1706 mp_limb_t binv; 1707 TMP_DECL; 1708 TMP_MARK; 1709 1710 /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing 1711 code. To make up for it, we check that the inverse is correct using a 1712 multiply. */ 1713 1714 tp = TMP_ALLOC_LIMBS (2 * n); 1715 1716 MPN_ZERO (tp, n); 1717 tp[0] = 1; 1718 binvert_limb (binv, up[0]); 1719 mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv); 1720 1721 refmpn_mul_n (tp, rp, up, n); 1722 ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1)); 1723 1724 TMP_FREE; 1725 } 1726 1727 /* The aim is to produce a dst quotient and return a remainder c, satisfying 1728 c*b^n + src-i == 3*dst, where i is the incoming carry. 1729 1730 Some value c==0, c==1 or c==2 will satisfy, so just try each. 1731 1732 If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero 1733 remainder from the first division attempt determines the correct 1734 remainder (3-c), but don't bother with that, since we can't guarantee 1735 anything about GMP_NUMB_BITS when using nails. 1736 1737 If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos 1738 complement negative, ie. b^n+a-i, and the calculation produces c1 1739 satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This 1740 means it's enough to just add any borrow back at the end. 1741 1742 A borrow only occurs when a==0 or a==1, and, by the same reasoning as in 1743 mpn/generic/diveby3.c, the c1 that results in those cases will only be 0 1744 or 1 respectively, so with 1 added the final return value is still in the 1745 prescribed range 0 to 2. */ 1746 1747 mp_limb_t 1748 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry) 1749 { 1750 mp_ptr spcopy; 1751 mp_limb_t c, cs; 1752 1753 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 1754 ASSERT (size >= 1); 1755 ASSERT (carry <= 2); 1756 ASSERT_MPN (sp, size); 1757 1758 spcopy = refmpn_malloc_limbs (size); 1759 cs = refmpn_sub_1 (spcopy, sp, size, carry); 1760 1761 for (c = 0; c <= 2; c++) 1762 if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0) 1763 goto done; 1764 ASSERT_FAIL (no value of c satisfies); 1765 1766 done: 1767 c += cs; 1768 ASSERT (c <= 2); 1769 1770 free (spcopy); 1771 return c; 1772 } 1773 1774 mp_limb_t 1775 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1776 { 1777 return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0)); 1778 } 1779 1780 1781 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */ 1782 void 1783 refmpn_mul_basecase (mp_ptr prodp, 1784 mp_srcptr up, mp_size_t usize, 1785 mp_srcptr vp, mp_size_t vsize) 1786 { 1787 mp_size_t i; 1788 1789 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); 1790 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); 1791 ASSERT (usize >= vsize); 1792 ASSERT (vsize >= 1); 1793 ASSERT_MPN (up, usize); 1794 ASSERT_MPN (vp, vsize); 1795 1796 prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]); 1797 for (i = 1; i < vsize; i++) 1798 prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]); 1799 } 1800 1801 1802 /* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */ 1803 void 1804 refmpn_mulmid_basecase (mp_ptr rp, 1805 mp_srcptr up, mp_size_t un, 1806 mp_srcptr vp, mp_size_t vn) 1807 { 1808 mp_limb_t cy; 1809 mp_size_t i; 1810 1811 ASSERT (un >= vn); 1812 ASSERT (vn >= 1); 1813 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un)); 1814 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn)); 1815 ASSERT_MPN (up, un); 1816 ASSERT_MPN (vp, vn); 1817 1818 rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]); 1819 rp[un - vn + 2] = CNST_LIMB (0); 1820 for (i = 1; i < vn; i++) 1821 { 1822 cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]); 1823 cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy); 1824 cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy); 1825 ASSERT (cy == 0); 1826 } 1827 } 1828 1829 void 1830 refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, 1831 mp_ptr scratch) 1832 { 1833 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n); 1834 } 1835 1836 void 1837 refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 1838 { 1839 /* FIXME: this could be made faster by using refmpn_mul and then subtracting 1840 off products near the middle product region boundary */ 1841 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n); 1842 } 1843 1844 void 1845 refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un, 1846 mp_srcptr vp, mp_size_t vn) 1847 { 1848 /* FIXME: this could be made faster by using refmpn_mul and then subtracting 1849 off products near the middle product region boundary */ 1850 refmpn_mulmid_basecase (rp, up, un, vp, vn); 1851 } 1852 1853 1854 1855 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD)) 1856 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD)) 1857 #define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD)) 1858 #if WANT_FFT 1859 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD)) 1860 #else 1861 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */ 1862 #endif 1863 1864 void 1865 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) 1866 { 1867 mp_ptr tp; 1868 mp_size_t tn; 1869 1870 if (vn < TOOM3_THRESHOLD) 1871 { 1872 /* In the mpn_mul_basecase and toom2 range, use our own mul_basecase. */ 1873 if (vn != 0) 1874 refmpn_mul_basecase (wp, up, un, vp, vn); 1875 else 1876 MPN_ZERO (wp, un); 1877 return; 1878 } 1879 1880 if (vn < TOOM4_THRESHOLD) 1881 { 1882 /* In the toom3 range, use mpn_toom22_mul. */ 1883 tn = 2 * vn + mpn_toom22_mul_itch (vn, vn); 1884 tp = refmpn_malloc_limbs (tn); 1885 mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn); 1886 } 1887 else if (vn < TOOM6_THRESHOLD) 1888 { 1889 /* In the toom4 range, use mpn_toom33_mul. */ 1890 tn = 2 * vn + mpn_toom33_mul_itch (vn, vn); 1891 tp = refmpn_malloc_limbs (tn); 1892 mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn); 1893 } 1894 else if (vn < FFT_THRESHOLD) 1895 { 1896 /* In the toom6 range, use mpn_toom44_mul. */ 1897 tn = 2 * vn + mpn_toom44_mul_itch (vn, vn); 1898 tp = refmpn_malloc_limbs (tn); 1899 mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn); 1900 } 1901 else 1902 { 1903 /* Finally, for the largest operands, use mpn_toom6h_mul. */ 1904 tn = 2 * vn + mpn_toom6h_mul_itch (vn, vn); 1905 tp = refmpn_malloc_limbs (tn); 1906 mpn_toom6h_mul (tp, up, vn, vp, vn, tp + 2 * vn); 1907 } 1908 1909 if (un != vn) 1910 { 1911 if (un - vn < vn) 1912 refmpn_mul (wp + vn, vp, vn, up + vn, un - vn); 1913 else 1914 refmpn_mul (wp + vn, up + vn, un - vn, vp, vn); 1915 1916 MPN_COPY (wp, tp, vn); 1917 ASSERT_NOCARRY (refmpn_add (wp + vn, wp + vn, un, tp + vn, vn)); 1918 } 1919 else 1920 { 1921 MPN_COPY (wp, tp, 2 * vn); 1922 } 1923 1924 free (tp); 1925 } 1926 1927 void 1928 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) 1929 { 1930 refmpn_mul (prodp, up, size, vp, size); 1931 } 1932 1933 void 1934 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) 1935 { 1936 mp_ptr tp = refmpn_malloc_limbs (2*size); 1937 refmpn_mul (tp, up, size, vp, size); 1938 refmpn_copyi (prodp, tp, size); 1939 free (tp); 1940 } 1941 1942 void 1943 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size) 1944 { 1945 refmpn_mul (dst, src, size, src, size); 1946 } 1947 1948 /* Allowing usize<vsize, usize==0 or vsize==0. */ 1949 void 1950 refmpn_mul_any (mp_ptr prodp, 1951 mp_srcptr up, mp_size_t usize, 1952 mp_srcptr vp, mp_size_t vsize) 1953 { 1954 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); 1955 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); 1956 ASSERT (usize >= 0); 1957 ASSERT (vsize >= 0); 1958 ASSERT_MPN (up, usize); 1959 ASSERT_MPN (vp, vsize); 1960 1961 if (usize == 0) 1962 { 1963 refmpn_fill (prodp, vsize, CNST_LIMB(0)); 1964 return; 1965 } 1966 1967 if (vsize == 0) 1968 { 1969 refmpn_fill (prodp, usize, CNST_LIMB(0)); 1970 return; 1971 } 1972 1973 if (usize >= vsize) 1974 refmpn_mul (prodp, up, usize, vp, vsize); 1975 else 1976 refmpn_mul (prodp, vp, vsize, up, usize); 1977 } 1978 1979 1980 mp_limb_t 1981 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y) 1982 { 1983 mp_limb_t x; 1984 int twos; 1985 1986 ASSERT (y != 0); 1987 ASSERT (! refmpn_zero_p (xp, xsize)); 1988 ASSERT_MPN (xp, xsize); 1989 ASSERT_LIMB (y); 1990 1991 x = refmpn_mod_1 (xp, xsize, y); 1992 if (x == 0) 1993 return y; 1994 1995 twos = 0; 1996 while ((x & 1) == 0 && (y & 1) == 0) 1997 { 1998 x >>= 1; 1999 y >>= 1; 2000 twos++; 2001 } 2002 2003 for (;;) 2004 { 2005 while ((x & 1) == 0) x >>= 1; 2006 while ((y & 1) == 0) y >>= 1; 2007 2008 if (x < y) 2009 MP_LIMB_T_SWAP (x, y); 2010 2011 x -= y; 2012 if (x == 0) 2013 break; 2014 } 2015 2016 return y << twos; 2017 } 2018 2019 2020 /* Based on the full limb x, not nails. */ 2021 unsigned 2022 refmpn_count_leading_zeros (mp_limb_t x) 2023 { 2024 unsigned n = 0; 2025 2026 ASSERT (x != 0); 2027 2028 while ((x & GMP_LIMB_HIGHBIT) == 0) 2029 { 2030 x <<= 1; 2031 n++; 2032 } 2033 return n; 2034 } 2035 2036 /* Full limbs allowed, not limited to nails. */ 2037 unsigned 2038 refmpn_count_trailing_zeros (mp_limb_t x) 2039 { 2040 unsigned n = 0; 2041 2042 ASSERT (x != 0); 2043 ASSERT_LIMB (x); 2044 2045 while ((x & 1) == 0) 2046 { 2047 x >>= 1; 2048 n++; 2049 } 2050 return n; 2051 } 2052 2053 /* Strip factors of two (low zero bits) from {p,size} by right shifting. 2054 The return value is the number of twos stripped. */ 2055 mp_size_t 2056 refmpn_strip_twos (mp_ptr p, mp_size_t size) 2057 { 2058 mp_size_t limbs; 2059 unsigned shift; 2060 2061 ASSERT (size >= 1); 2062 ASSERT (! refmpn_zero_p (p, size)); 2063 ASSERT_MPN (p, size); 2064 2065 for (limbs = 0; p[0] == 0; limbs++) 2066 { 2067 refmpn_copyi (p, p+1, size-1); 2068 p[size-1] = 0; 2069 } 2070 2071 shift = refmpn_count_trailing_zeros (p[0]); 2072 if (shift) 2073 refmpn_rshift (p, p, size, shift); 2074 2075 return limbs*GMP_NUMB_BITS + shift; 2076 } 2077 2078 mp_limb_t 2079 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize) 2080 { 2081 int cmp; 2082 2083 ASSERT (ysize >= 1); 2084 ASSERT (xsize >= ysize); 2085 ASSERT ((xp[0] & 1) != 0); 2086 ASSERT ((yp[0] & 1) != 0); 2087 /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */ 2088 ASSERT (yp[ysize-1] != 0); 2089 ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize)); 2090 ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize)); 2091 ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize)); 2092 if (xsize == ysize) 2093 ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1])); 2094 ASSERT_MPN (xp, xsize); 2095 ASSERT_MPN (yp, ysize); 2096 2097 refmpn_strip_twos (xp, xsize); 2098 MPN_NORMALIZE (xp, xsize); 2099 MPN_NORMALIZE (yp, ysize); 2100 2101 for (;;) 2102 { 2103 cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize); 2104 if (cmp == 0) 2105 break; 2106 if (cmp < 0) 2107 MPN_PTR_SWAP (xp,xsize, yp,ysize); 2108 2109 ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize)); 2110 2111 refmpn_strip_twos (xp, xsize); 2112 MPN_NORMALIZE (xp, xsize); 2113 } 2114 2115 refmpn_copyi (gp, xp, xsize); 2116 return xsize; 2117 } 2118 2119 unsigned long 2120 ref_popc_limb (mp_limb_t src) 2121 { 2122 unsigned long count; 2123 int i; 2124 2125 count = 0; 2126 for (i = 0; i < GMP_LIMB_BITS; i++) 2127 { 2128 count += (src & 1); 2129 src >>= 1; 2130 } 2131 return count; 2132 } 2133 2134 unsigned long 2135 refmpn_popcount (mp_srcptr sp, mp_size_t size) 2136 { 2137 unsigned long count = 0; 2138 mp_size_t i; 2139 2140 ASSERT (size >= 0); 2141 ASSERT_MPN (sp, size); 2142 2143 for (i = 0; i < size; i++) 2144 count += ref_popc_limb (sp[i]); 2145 return count; 2146 } 2147 2148 unsigned long 2149 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 2150 { 2151 mp_ptr d; 2152 unsigned long count; 2153 2154 ASSERT (size >= 0); 2155 ASSERT_MPN (s1p, size); 2156 ASSERT_MPN (s2p, size); 2157 2158 if (size == 0) 2159 return 0; 2160 2161 d = refmpn_malloc_limbs (size); 2162 refmpn_xor_n (d, s1p, s2p, size); 2163 count = refmpn_popcount (d, size); 2164 free (d); 2165 return count; 2166 } 2167 2168 2169 /* set r to a%d */ 2170 void 2171 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2]) 2172 { 2173 mp_limb_t D[2]; 2174 int n; 2175 2176 ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2)); 2177 ASSERT_MPN (a, 2); 2178 ASSERT_MPN (d, 2); 2179 2180 D[1] = d[1], D[0] = d[0]; 2181 r[1] = a[1], r[0] = a[0]; 2182 n = 0; 2183 2184 for (;;) 2185 { 2186 if (D[1] & GMP_NUMB_HIGHBIT) 2187 break; 2188 if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0) 2189 break; 2190 refmpn_lshift (D, D, (mp_size_t) 2, 1); 2191 n++; 2192 ASSERT (n <= GMP_NUMB_BITS); 2193 } 2194 2195 while (n >= 0) 2196 { 2197 if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0) 2198 ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2)); 2199 refmpn_rshift (D, D, (mp_size_t) 2, 1); 2200 n--; 2201 } 2202 2203 ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0); 2204 } 2205 2206 2207 2208 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in 2209 particular the trial quotient is allowed to be 2 too big. */ 2210 mp_limb_t 2211 refmpn_sb_div_qr (mp_ptr qp, 2212 mp_ptr np, mp_size_t nsize, 2213 mp_srcptr dp, mp_size_t dsize) 2214 { 2215 mp_limb_t retval = 0; 2216 mp_size_t i; 2217 mp_limb_t d1 = dp[dsize-1]; 2218 mp_ptr np_orig = refmpn_memdup_limbs (np, nsize); 2219 2220 ASSERT (nsize >= dsize); 2221 /* ASSERT (dsize > 2); */ 2222 ASSERT (dsize >= 2); 2223 ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT); 2224 ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np); 2225 ASSERT_MPN (np, nsize); 2226 ASSERT_MPN (dp, dsize); 2227 2228 i = nsize-dsize; 2229 if (refmpn_cmp (np+i, dp, dsize) >= 0) 2230 { 2231 ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize)); 2232 retval = 1; 2233 } 2234 2235 for (i--; i >= 0; i--) 2236 { 2237 mp_limb_t n0 = np[i+dsize]; 2238 mp_limb_t n1 = np[i+dsize-1]; 2239 mp_limb_t q, dummy_r; 2240 2241 ASSERT (n0 <= d1); 2242 if (n0 == d1) 2243 q = GMP_NUMB_MAX; 2244 else 2245 q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS, 2246 d1 << GMP_NAIL_BITS); 2247 2248 n0 -= refmpn_submul_1 (np+i, dp, dsize, q); 2249 ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX); 2250 if (n0) 2251 { 2252 q--; 2253 if (! refmpn_add_n (np+i, np+i, dp, dsize)) 2254 { 2255 q--; 2256 ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize)); 2257 } 2258 } 2259 np[i+dsize] = 0; 2260 2261 qp[i] = q; 2262 } 2263 2264 /* remainder < divisor */ 2265 #if 0 /* ASSERT triggers gcc 4.2.1 bug */ 2266 ASSERT (refmpn_cmp (np, dp, dsize) < 0); 2267 #endif 2268 2269 /* multiply back to original */ 2270 { 2271 mp_ptr mp = refmpn_malloc_limbs (nsize); 2272 2273 refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize); 2274 if (retval) 2275 ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize)); 2276 ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize)); 2277 ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0); 2278 2279 free (mp); 2280 } 2281 2282 free (np_orig); 2283 return retval; 2284 } 2285 2286 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in 2287 particular the trial quotient is allowed to be 2 too big. */ 2288 void 2289 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, 2290 mp_ptr np, mp_size_t nsize, 2291 mp_srcptr dp, mp_size_t dsize) 2292 { 2293 ASSERT (qxn == 0); 2294 ASSERT_MPN (np, nsize); 2295 ASSERT_MPN (dp, dsize); 2296 ASSERT (dsize > 0); 2297 ASSERT (dp[dsize-1] != 0); 2298 2299 if (dsize == 1) 2300 { 2301 rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]); 2302 return; 2303 } 2304 else 2305 { 2306 mp_ptr n2p = refmpn_malloc_limbs (nsize+1); 2307 mp_ptr d2p = refmpn_malloc_limbs (dsize); 2308 int norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS; 2309 2310 n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm); 2311 ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm)); 2312 2313 refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize); 2314 refmpn_rshift_or_copy (rp, n2p, dsize, norm); 2315 2316 /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */ 2317 free (n2p); 2318 free (d2p); 2319 } 2320 } 2321 2322 mp_limb_t 2323 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) 2324 { 2325 mp_size_t j; 2326 mp_limb_t cy; 2327 2328 ASSERT_MPN (up, 2*n); 2329 /* ASSERT about directed overlap rp, up */ 2330 /* ASSERT about overlap rp, mp */ 2331 /* ASSERT about overlap up, mp */ 2332 2333 for (j = n - 1; j >= 0; j--) 2334 { 2335 up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK); 2336 up++; 2337 } 2338 cy = mpn_add_n (rp, up, up - n, n); 2339 return cy; 2340 } 2341 2342 size_t 2343 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size) 2344 { 2345 unsigned char *d; 2346 size_t dsize; 2347 2348 ASSERT (size >= 0); 2349 ASSERT (base >= 2); 2350 ASSERT (base < numberof (mp_bases)); 2351 ASSERT (size == 0 || src[size-1] != 0); 2352 ASSERT_MPN (src, size); 2353 2354 MPN_SIZEINBASE (dsize, src, size, base); 2355 ASSERT (dsize >= 1); 2356 ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * BYTES_PER_MP_LIMB)); 2357 2358 if (size == 0) 2359 { 2360 dst[0] = 0; 2361 return 1; 2362 } 2363 2364 /* don't clobber input for power of 2 bases */ 2365 if (POW2_P (base)) 2366 src = refmpn_memdup_limbs (src, size); 2367 2368 d = dst + dsize; 2369 do 2370 { 2371 d--; 2372 ASSERT (d >= dst); 2373 *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base); 2374 size -= (src[size-1] == 0); 2375 } 2376 while (size != 0); 2377 2378 /* Move result back and decrement dsize if we didn't generate 2379 the maximum possible digits. */ 2380 if (d != dst) 2381 { 2382 size_t i; 2383 dsize -= d - dst; 2384 for (i = 0; i < dsize; i++) 2385 dst[i] = d[i]; 2386 } 2387 2388 if (POW2_P (base)) 2389 free (src); 2390 2391 return dsize; 2392 } 2393 2394 2395 mp_limb_t 2396 ref_bswap_limb (mp_limb_t src) 2397 { 2398 mp_limb_t dst; 2399 int i; 2400 2401 dst = 0; 2402 for (i = 0; i < BYTES_PER_MP_LIMB; i++) 2403 { 2404 dst = (dst << 8) + (src & 0xFF); 2405 src >>= 8; 2406 } 2407 return dst; 2408 } 2409 2410 2411 /* These random functions are mostly for transitional purposes while adding 2412 nail support, since they're independent of the normal mpn routines. They 2413 can probably be removed when those normal routines are reliable, though 2414 perhaps something independent would still be useful at times. */ 2415 2416 #if GMP_LIMB_BITS == 32 2417 #define RAND_A CNST_LIMB(0x29CF535) 2418 #endif 2419 #if GMP_LIMB_BITS == 64 2420 #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D) 2421 #endif 2422 2423 mp_limb_t refmpn_random_seed; 2424 2425 mp_limb_t 2426 refmpn_random_half (void) 2427 { 2428 refmpn_random_seed = refmpn_random_seed * RAND_A + 1; 2429 return (refmpn_random_seed >> GMP_LIMB_BITS/2); 2430 } 2431 2432 mp_limb_t 2433 refmpn_random_limb (void) 2434 { 2435 return ((refmpn_random_half () << (GMP_LIMB_BITS/2)) 2436 | refmpn_random_half ()) & GMP_NUMB_MASK; 2437 } 2438 2439 void 2440 refmpn_random (mp_ptr ptr, mp_size_t size) 2441 { 2442 mp_size_t i; 2443 if (GMP_NAIL_BITS == 0) 2444 { 2445 mpn_random (ptr, size); 2446 return; 2447 } 2448 2449 for (i = 0; i < size; i++) 2450 ptr[i] = refmpn_random_limb (); 2451 } 2452 2453 void 2454 refmpn_random2 (mp_ptr ptr, mp_size_t size) 2455 { 2456 mp_size_t i; 2457 mp_limb_t bit, mask, limb; 2458 int run; 2459 2460 if (GMP_NAIL_BITS == 0) 2461 { 2462 mpn_random2 (ptr, size); 2463 return; 2464 } 2465 2466 #define RUN_MODULUS 32 2467 2468 /* start with ones at a random pos in the high limb */ 2469 bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS); 2470 mask = 0; 2471 run = 0; 2472 2473 for (i = size-1; i >= 0; i--) 2474 { 2475 limb = 0; 2476 do 2477 { 2478 if (run == 0) 2479 { 2480 run = (refmpn_random_half () % RUN_MODULUS) + 1; 2481 mask = ~mask; 2482 } 2483 2484 limb |= (bit & mask); 2485 bit >>= 1; 2486 run--; 2487 } 2488 while (bit != 0); 2489 2490 ptr[i] = limb; 2491 bit = GMP_NUMB_HIGHBIT; 2492 } 2493 } 2494 2495 /* This is a simple bitwise algorithm working high to low across "s" and 2496 testing each time whether setting the bit would make s^2 exceed n. */ 2497 mp_size_t 2498 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize) 2499 { 2500 mp_ptr tp, dp; 2501 mp_size_t ssize, talloc, tsize, dsize, ret, ilimbs; 2502 unsigned ibit; 2503 long i; 2504 mp_limb_t c; 2505 2506 ASSERT (nsize >= 0); 2507 2508 /* If n==0, then s=0 and r=0. */ 2509 if (nsize == 0) 2510 return 0; 2511 2512 ASSERT (np[nsize - 1] != 0); 2513 ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize)); 2514 ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize)); 2515 ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize)); 2516 2517 /* root */ 2518 ssize = (nsize+1)/2; 2519 refmpn_zero (sp, ssize); 2520 2521 /* the remainder so far */ 2522 dp = refmpn_memdup_limbs (np, nsize); 2523 dsize = nsize; 2524 2525 /* temporary */ 2526 talloc = 2*ssize + 1; 2527 tp = refmpn_malloc_limbs (talloc); 2528 2529 for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--) 2530 { 2531 /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i 2532 is added to it */ 2533 2534 ilimbs = (i+1) / GMP_NUMB_BITS; 2535 ibit = (i+1) % GMP_NUMB_BITS; 2536 refmpn_zero (tp, ilimbs); 2537 c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit); 2538 tsize = ilimbs + ssize; 2539 tp[tsize] = c; 2540 tsize += (c != 0); 2541 2542 ilimbs = (2*i) / GMP_NUMB_BITS; 2543 ibit = (2*i) % GMP_NUMB_BITS; 2544 if (ilimbs + 1 > tsize) 2545 { 2546 refmpn_zero_extend (tp, tsize, ilimbs + 1); 2547 tsize = ilimbs + 1; 2548 } 2549 c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs, 2550 CNST_LIMB(1) << ibit); 2551 ASSERT (tsize < talloc); 2552 tp[tsize] = c; 2553 tsize += (c != 0); 2554 2555 if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0) 2556 { 2557 /* set this bit in s and subtract from the remainder */ 2558 refmpn_setbit (sp, i); 2559 2560 ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize)); 2561 dsize = refmpn_normalize (dp, dsize); 2562 } 2563 } 2564 2565 if (rp == NULL) 2566 { 2567 ret = ! refmpn_zero_p (dp, dsize); 2568 } 2569 else 2570 { 2571 ASSERT (dsize == 0 || dp[dsize-1] != 0); 2572 refmpn_copy (rp, dp, dsize); 2573 ret = dsize; 2574 } 2575 2576 free (dp); 2577 free (tp); 2578 return ret; 2579 } 2580