1 /* Reference mpn functions, designed to be simple, portable and independent 2 of the normal gmp code. Speed isn't a consideration. 3 4 Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 5 2007, 2008, 2009 Free Software Foundation, Inc. 6 7 This file is part of the GNU MP Library. 8 9 The GNU MP Library is free software; you can redistribute it and/or modify 10 it under the terms of the GNU Lesser General Public License as published by 11 the Free Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14 The GNU MP Library is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17 License for more details. 18 19 You should have received a copy of the GNU Lesser General Public License 20 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 21 22 23 /* Most routines have assertions representing what the mpn routines are 24 supposed to accept. Many of these reference routines do sensible things 25 outside these ranges (eg. for size==0), but the assertions are present to 26 pick up bad parameters passed here that are about to be passed the same 27 to a real mpn routine being compared. */ 28 29 /* always do assertion checking */ 30 #define WANT_ASSERT 1 31 32 #include <stdio.h> /* for NULL */ 33 #include <stdlib.h> /* for malloc */ 34 35 #include "gmp.h" 36 #include "gmp-impl.h" 37 #include "longlong.h" 38 39 #include "tests.h" 40 41 42 43 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes 44 in bytes. */ 45 int 46 byte_overlap_p (const void *v_xp, mp_size_t xsize, 47 const void *v_yp, mp_size_t ysize) 48 { 49 const char *xp = v_xp; 50 const char *yp = v_yp; 51 52 ASSERT (xsize >= 0); 53 ASSERT (ysize >= 0); 54 55 /* no wraparounds */ 56 ASSERT (xp+xsize >= xp); 57 ASSERT (yp+ysize >= yp); 58 59 if (xp + xsize <= yp) 60 return 0; 61 62 if (yp + ysize <= xp) 63 return 0; 64 65 return 1; 66 } 67 68 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */ 69 int 70 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize) 71 { 72 return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB, 73 yp, ysize * BYTES_PER_MP_LIMB); 74 } 75 76 /* Check overlap for a routine defined to work low to high. */ 77 int 78 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 79 { 80 return (dst <= src || ! refmpn_overlap_p (dst, size, src, size)); 81 } 82 83 /* Check overlap for a routine defined to work high to low. */ 84 int 85 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 86 { 87 return (dst >= src || ! refmpn_overlap_p (dst, size, src, size)); 88 } 89 90 /* Check overlap for a standard routine requiring equal or separate. */ 91 int 92 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 93 { 94 return (dst == src || ! refmpn_overlap_p (dst, size, src, size)); 95 } 96 int 97 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2, 98 mp_size_t size) 99 { 100 return (refmpn_overlap_fullonly_p (dst, src1, size) 101 && refmpn_overlap_fullonly_p (dst, src2, size)); 102 } 103 104 105 mp_ptr 106 refmpn_malloc_limbs (mp_size_t size) 107 { 108 mp_ptr p; 109 ASSERT (size >= 0); 110 if (size == 0) 111 size = 1; 112 p = (mp_ptr) malloc ((size_t) (size * BYTES_PER_MP_LIMB)); 113 ASSERT (p != NULL); 114 return p; 115 } 116 117 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free 118 * memory allocated by refmpn_malloc_limbs_aligned. */ 119 void 120 refmpn_free_limbs (mp_ptr p) 121 { 122 free (p); 123 } 124 125 mp_ptr 126 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size) 127 { 128 mp_ptr p; 129 p = refmpn_malloc_limbs (size); 130 refmpn_copyi (p, ptr, size); 131 return p; 132 } 133 134 /* malloc n limbs on a multiple of m bytes boundary */ 135 mp_ptr 136 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m) 137 { 138 return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m); 139 } 140 141 142 void 143 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value) 144 { 145 mp_size_t i; 146 ASSERT (size >= 0); 147 for (i = 0; i < size; i++) 148 ptr[i] = value; 149 } 150 151 void 152 refmpn_zero (mp_ptr ptr, mp_size_t size) 153 { 154 refmpn_fill (ptr, size, CNST_LIMB(0)); 155 } 156 157 void 158 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize) 159 { 160 ASSERT (newsize >= oldsize); 161 refmpn_zero (ptr+oldsize, newsize-oldsize); 162 } 163 164 int 165 refmpn_zero_p (mp_srcptr ptr, mp_size_t size) 166 { 167 mp_size_t i; 168 for (i = 0; i < size; i++) 169 if (ptr[i] != 0) 170 return 0; 171 return 1; 172 } 173 174 mp_size_t 175 refmpn_normalize (mp_srcptr ptr, mp_size_t size) 176 { 177 ASSERT (size >= 0); 178 while (size > 0 && ptr[size-1] == 0) 179 size--; 180 return size; 181 } 182 183 /* the highest one bit in x */ 184 mp_limb_t 185 refmpn_msbone (mp_limb_t x) 186 { 187 mp_limb_t n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1); 188 189 while (n != 0) 190 { 191 if (x & n) 192 break; 193 n >>= 1; 194 } 195 return n; 196 } 197 198 /* a mask of the highest one bit plus and all bits below */ 199 mp_limb_t 200 refmpn_msbone_mask (mp_limb_t x) 201 { 202 if (x == 0) 203 return 0; 204 205 return (refmpn_msbone (x) << 1) - 1; 206 } 207 208 /* How many digits in the given base will fit in a limb. 209 Notice that the product b is allowed to be equal to the limit 210 2^GMP_NUMB_BITS, this ensures the result for base==2 will be 211 GMP_NUMB_BITS (and similarly other powers of 2). */ 212 int 213 refmpn_chars_per_limb (int base) 214 { 215 mp_limb_t limit[2], b[2]; 216 int chars_per_limb; 217 218 ASSERT (base >= 2); 219 220 limit[0] = 0; /* limit = 2^GMP_NUMB_BITS */ 221 limit[1] = 1; 222 b[0] = 1; /* b = 1 */ 223 b[1] = 0; 224 225 chars_per_limb = 0; 226 for (;;) 227 { 228 if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base)) 229 break; 230 if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0) 231 break; 232 chars_per_limb++; 233 } 234 return chars_per_limb; 235 } 236 237 /* The biggest value base**n which fits in GMP_NUMB_BITS. */ 238 mp_limb_t 239 refmpn_big_base (int base) 240 { 241 int chars_per_limb = refmpn_chars_per_limb (base); 242 int i; 243 mp_limb_t bb; 244 245 ASSERT (base >= 2); 246 bb = 1; 247 for (i = 0; i < chars_per_limb; i++) 248 bb *= base; 249 return bb; 250 } 251 252 253 void 254 refmpn_setbit (mp_ptr ptr, unsigned long bit) 255 { 256 ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS); 257 } 258 259 void 260 refmpn_clrbit (mp_ptr ptr, unsigned long bit) 261 { 262 ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS)); 263 } 264 265 #define REFMPN_TSTBIT(ptr,bit) \ 266 (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0) 267 268 int 269 refmpn_tstbit (mp_srcptr ptr, unsigned long bit) 270 { 271 return REFMPN_TSTBIT (ptr, bit); 272 } 273 274 unsigned long 275 refmpn_scan0 (mp_srcptr ptr, unsigned long bit) 276 { 277 while (REFMPN_TSTBIT (ptr, bit) != 0) 278 bit++; 279 return bit; 280 } 281 282 unsigned long 283 refmpn_scan1 (mp_srcptr ptr, unsigned long bit) 284 { 285 while (REFMPN_TSTBIT (ptr, bit) == 0) 286 bit++; 287 return bit; 288 } 289 290 void 291 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size) 292 { 293 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 294 refmpn_copyi (rp, sp, size); 295 } 296 297 void 298 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size) 299 { 300 mp_size_t i; 301 302 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 303 ASSERT (size >= 0); 304 305 for (i = 0; i < size; i++) 306 rp[i] = sp[i]; 307 } 308 309 void 310 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size) 311 { 312 mp_size_t i; 313 314 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 315 ASSERT (size >= 0); 316 317 for (i = size-1; i >= 0; i--) 318 rp[i] = sp[i]; 319 } 320 321 /* Copy {xp,xsize} to {wp,wsize}. If x is shorter, then pad w with low 322 zeros to wsize. If x is longer, then copy just the high wsize limbs. */ 323 void 324 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize) 325 { 326 ASSERT (wsize >= 0); 327 ASSERT (xsize >= 0); 328 329 /* high part of x if x bigger than w */ 330 if (xsize > wsize) 331 { 332 xp += xsize - wsize; 333 xsize = wsize; 334 } 335 336 refmpn_copy (wp + wsize-xsize, xp, xsize); 337 refmpn_zero (wp, wsize-xsize); 338 } 339 340 int 341 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 342 { 343 mp_size_t i; 344 345 ASSERT (size >= 1); 346 ASSERT_MPN (xp, size); 347 ASSERT_MPN (yp, size); 348 349 for (i = size-1; i >= 0; i--) 350 { 351 if (xp[i] > yp[i]) return 1; 352 if (xp[i] < yp[i]) return -1; 353 } 354 return 0; 355 } 356 357 int 358 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 359 { 360 if (size == 0) 361 return 0; 362 else 363 return refmpn_cmp (xp, yp, size); 364 } 365 366 int 367 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize, 368 mp_srcptr yp, mp_size_t ysize) 369 { 370 int opp, cmp; 371 372 ASSERT_MPN (xp, xsize); 373 ASSERT_MPN (yp, ysize); 374 375 opp = (xsize < ysize); 376 if (opp) 377 MPN_SRCPTR_SWAP (xp,xsize, yp,ysize); 378 379 if (! refmpn_zero_p (xp+ysize, xsize-ysize)) 380 cmp = 1; 381 else 382 cmp = refmpn_cmp (xp, yp, ysize); 383 384 return (opp ? -cmp : cmp); 385 } 386 387 int 388 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 389 { 390 mp_size_t i; 391 ASSERT (size >= 0); 392 393 for (i = 0; i < size; i++) 394 if (xp[i] != yp[i]) 395 return 0; 396 return 1; 397 } 398 399 400 #define LOGOPS(operation) \ 401 { \ 402 mp_size_t i; \ 403 \ 404 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ 405 ASSERT (size >= 1); \ 406 ASSERT_MPN (s1p, size); \ 407 ASSERT_MPN (s2p, size); \ 408 \ 409 for (i = 0; i < size; i++) \ 410 rp[i] = operation; \ 411 } 412 413 void 414 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 415 { 416 LOGOPS (s1p[i] & s2p[i]); 417 } 418 void 419 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 420 { 421 LOGOPS (s1p[i] & ~s2p[i]); 422 } 423 void 424 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 425 { 426 LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK); 427 } 428 void 429 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 430 { 431 LOGOPS (s1p[i] | s2p[i]); 432 } 433 void 434 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 435 { 436 LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK)); 437 } 438 void 439 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 440 { 441 LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK); 442 } 443 void 444 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 445 { 446 LOGOPS (s1p[i] ^ s2p[i]); 447 } 448 void 449 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 450 { 451 LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK); 452 } 453 454 455 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */ 456 void 457 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl, 458 mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl) 459 { 460 *dl = ml - sl; 461 *dh = mh - sh - (ml < sl); 462 } 463 464 465 /* set *w to x+y, return 0 or 1 carry */ 466 mp_limb_t 467 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) 468 { 469 mp_limb_t sum, cy; 470 471 ASSERT_LIMB (x); 472 ASSERT_LIMB (y); 473 474 sum = x + y; 475 #if GMP_NAIL_BITS == 0 476 *w = sum; 477 cy = (sum < x); 478 #else 479 *w = sum & GMP_NUMB_MASK; 480 cy = (sum >> GMP_NUMB_BITS); 481 #endif 482 return cy; 483 } 484 485 /* set *w to x-y, return 0 or 1 borrow */ 486 mp_limb_t 487 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) 488 { 489 mp_limb_t diff, cy; 490 491 ASSERT_LIMB (x); 492 ASSERT_LIMB (y); 493 494 diff = x - y; 495 #if GMP_NAIL_BITS == 0 496 *w = diff; 497 cy = (diff > x); 498 #else 499 *w = diff & GMP_NUMB_MASK; 500 cy = (diff >> GMP_NUMB_BITS) & 1; 501 #endif 502 return cy; 503 } 504 505 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */ 506 mp_limb_t 507 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) 508 { 509 mp_limb_t r; 510 511 ASSERT_LIMB (x); 512 ASSERT_LIMB (y); 513 ASSERT (c == 0 || c == 1); 514 515 r = ref_addc_limb (w, x, y); 516 return r + ref_addc_limb (w, *w, c); 517 } 518 519 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */ 520 mp_limb_t 521 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) 522 { 523 mp_limb_t r; 524 525 ASSERT_LIMB (x); 526 ASSERT_LIMB (y); 527 ASSERT (c == 0 || c == 1); 528 529 r = ref_subc_limb (w, x, y); 530 return r + ref_subc_limb (w, *w, c); 531 } 532 533 534 #define AORS_1(operation) \ 535 { \ 536 mp_limb_t i; \ 537 \ 538 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ 539 ASSERT (size >= 1); \ 540 ASSERT_MPN (sp, size); \ 541 ASSERT_LIMB (n); \ 542 \ 543 for (i = 0; i < size; i++) \ 544 n = operation (&rp[i], sp[i], n); \ 545 return n; \ 546 } 547 548 mp_limb_t 549 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) 550 { 551 AORS_1 (ref_addc_limb); 552 } 553 mp_limb_t 554 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) 555 { 556 AORS_1 (ref_subc_limb); 557 } 558 559 #define AORS_NC(operation) \ 560 { \ 561 mp_size_t i; \ 562 \ 563 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ 564 ASSERT (carry == 0 || carry == 1); \ 565 ASSERT (size >= 1); \ 566 ASSERT_MPN (s1p, size); \ 567 ASSERT_MPN (s2p, size); \ 568 \ 569 for (i = 0; i < size; i++) \ 570 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 571 return carry; \ 572 } 573 574 mp_limb_t 575 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 576 mp_limb_t carry) 577 { 578 AORS_NC (adc); 579 } 580 mp_limb_t 581 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 582 mp_limb_t carry) 583 { 584 AORS_NC (sbb); 585 } 586 587 588 mp_limb_t 589 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 590 { 591 return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0)); 592 } 593 mp_limb_t 594 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 595 { 596 return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0)); 597 } 598 599 mp_limb_t 600 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 601 mp_size_t n, unsigned int s) 602 { 603 mp_limb_t cy; 604 mp_ptr tp; 605 606 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 607 ASSERT (n >= 1); 608 ASSERT (0 < s && s < GMP_NUMB_BITS); 609 ASSERT_MPN (up, n); 610 ASSERT_MPN (vp, n); 611 612 tp = refmpn_malloc_limbs (n); 613 cy = refmpn_lshift (tp, vp, n, s); 614 cy += refmpn_add_n (rp, up, tp, n); 615 free (tp); 616 return cy; 617 } 618 mp_limb_t 619 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 620 { 621 return refmpn_addlsh_n (rp, up, vp, n, 1); 622 } 623 mp_limb_t 624 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 625 { 626 return refmpn_addlsh_n (rp, up, vp, n, 2); 627 } 628 629 mp_limb_t 630 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 631 mp_size_t n, unsigned int s) 632 { 633 mp_limb_t cy; 634 mp_ptr tp; 635 636 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 637 ASSERT (n >= 1); 638 ASSERT (0 < s && s < GMP_NUMB_BITS); 639 ASSERT_MPN (up, n); 640 ASSERT_MPN (vp, n); 641 642 tp = refmpn_malloc_limbs (n); 643 cy = mpn_lshift (tp, vp, n, s); 644 cy += mpn_sub_n (rp, up, tp, n); 645 free (tp); 646 return cy; 647 } 648 mp_limb_t 649 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 650 { 651 return refmpn_sublsh_n (rp, up, vp, n, 1); 652 } 653 654 mp_limb_signed_t 655 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 656 mp_size_t n, unsigned int s) 657 { 658 mp_limb_signed_t cy; 659 mp_ptr tp; 660 661 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 662 ASSERT (n >= 1); 663 ASSERT (0 < s && s < GMP_NUMB_BITS); 664 ASSERT_MPN (up, n); 665 ASSERT_MPN (vp, n); 666 667 tp = refmpn_malloc_limbs (n); 668 cy = mpn_lshift (tp, vp, n, s); 669 cy -= mpn_sub_n (rp, tp, up, n); 670 free (tp); 671 return cy; 672 } 673 mp_limb_signed_t 674 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 675 { 676 return refmpn_rsblsh_n (rp, up, vp, n, 1); 677 } 678 mp_limb_signed_t 679 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 680 { 681 return refmpn_rsblsh_n (rp, up, vp, n, 2); 682 } 683 684 mp_limb_t 685 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 686 { 687 mp_limb_t cya, cys; 688 689 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 690 ASSERT (n >= 1); 691 ASSERT_MPN (up, n); 692 ASSERT_MPN (vp, n); 693 694 cya = mpn_add_n (rp, up, vp, n); 695 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); 696 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); 697 return cys; 698 } 699 mp_limb_t 700 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 701 { 702 mp_limb_t cya, cys; 703 704 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 705 ASSERT (n >= 1); 706 ASSERT_MPN (up, n); 707 ASSERT_MPN (vp, n); 708 709 cya = mpn_sub_n (rp, up, vp, n); 710 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); 711 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); 712 return cys; 713 } 714 715 /* Twos complement, return borrow. */ 716 mp_limb_t 717 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size) 718 { 719 mp_ptr zeros; 720 mp_limb_t ret; 721 722 ASSERT (size >= 1); 723 724 zeros = refmpn_malloc_limbs (size); 725 refmpn_fill (zeros, size, CNST_LIMB(0)); 726 ret = refmpn_sub_n (dst, zeros, src, size); 727 free (zeros); 728 return ret; 729 } 730 731 732 #define AORS(aors_n, aors_1) \ 733 { \ 734 mp_limb_t c; \ 735 ASSERT (s1size >= s2size); \ 736 ASSERT (s2size >= 1); \ 737 c = aors_n (rp, s1p, s2p, s2size); \ 738 if (s1size-s2size != 0) \ 739 c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \ 740 return c; \ 741 } 742 mp_limb_t 743 refmpn_add (mp_ptr rp, 744 mp_srcptr s1p, mp_size_t s1size, 745 mp_srcptr s2p, mp_size_t s2size) 746 { 747 AORS (refmpn_add_n, refmpn_add_1); 748 } 749 mp_limb_t 750 refmpn_sub (mp_ptr rp, 751 mp_srcptr s1p, mp_size_t s1size, 752 mp_srcptr s2p, mp_size_t s2size) 753 { 754 AORS (refmpn_sub_n, refmpn_sub_1); 755 } 756 757 758 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2) 759 #define SHIFTLOW(x) ((x) >> GMP_LIMB_BITS/2) 760 761 #define LOWMASK (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1) 762 #define HIGHMASK SHIFTHIGH(LOWMASK) 763 764 #define LOWPART(x) ((x) & LOWMASK) 765 #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK) 766 767 /* Set return:*lo to x*y, using full limbs not nails. */ 768 mp_limb_t 769 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y) 770 { 771 mp_limb_t hi, s; 772 773 *lo = LOWPART(x) * LOWPART(y); 774 hi = HIGHPART(x) * HIGHPART(y); 775 776 s = LOWPART(x) * HIGHPART(y); 777 hi += HIGHPART(s); 778 s = SHIFTHIGH(LOWPART(s)); 779 *lo += s; 780 hi += (*lo < s); 781 782 s = HIGHPART(x) * LOWPART(y); 783 hi += HIGHPART(s); 784 s = SHIFTHIGH(LOWPART(s)); 785 *lo += s; 786 hi += (*lo < s); 787 788 return hi; 789 } 790 791 mp_limb_t 792 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo) 793 { 794 return refmpn_umul_ppmm (lo, x, y); 795 } 796 797 mp_limb_t 798 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier, 799 mp_limb_t carry) 800 { 801 mp_size_t i; 802 mp_limb_t hi, lo; 803 804 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 805 ASSERT (size >= 1); 806 ASSERT_MPN (sp, size); 807 ASSERT_LIMB (multiplier); 808 ASSERT_LIMB (carry); 809 810 multiplier <<= GMP_NAIL_BITS; 811 for (i = 0; i < size; i++) 812 { 813 hi = refmpn_umul_ppmm (&lo, sp[i], multiplier); 814 lo >>= GMP_NAIL_BITS; 815 ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry))); 816 rp[i] = lo; 817 carry = hi; 818 } 819 return carry; 820 } 821 822 mp_limb_t 823 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 824 { 825 return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 826 } 827 828 829 mp_limb_t 830 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, 831 mp_srcptr mult, mp_size_t msize) 832 { 833 mp_ptr src_copy; 834 mp_limb_t ret; 835 mp_size_t i; 836 837 ASSERT (refmpn_overlap_fullonly_p (dst, src, size)); 838 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); 839 ASSERT (size >= msize); 840 ASSERT_MPN (mult, msize); 841 842 /* in case dst==src */ 843 src_copy = refmpn_malloc_limbs (size); 844 refmpn_copyi (src_copy, src, size); 845 src = src_copy; 846 847 dst[size] = refmpn_mul_1 (dst, src, size, mult[0]); 848 for (i = 1; i < msize-1; i++) 849 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); 850 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); 851 852 free (src_copy); 853 return ret; 854 } 855 856 mp_limb_t 857 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 858 { 859 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2); 860 } 861 mp_limb_t 862 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 863 { 864 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3); 865 } 866 mp_limb_t 867 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 868 { 869 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4); 870 } 871 872 #define AORSMUL_1C(operation_n) \ 873 { \ 874 mp_ptr p; \ 875 mp_limb_t ret; \ 876 \ 877 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ 878 \ 879 p = refmpn_malloc_limbs (size); \ 880 ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \ 881 ret += operation_n (rp, rp, p, size); \ 882 \ 883 free (p); \ 884 return ret; \ 885 } 886 887 mp_limb_t 888 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 889 mp_limb_t multiplier, mp_limb_t carry) 890 { 891 AORSMUL_1C (refmpn_add_n); 892 } 893 mp_limb_t 894 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 895 mp_limb_t multiplier, mp_limb_t carry) 896 { 897 AORSMUL_1C (refmpn_sub_n); 898 } 899 900 901 mp_limb_t 902 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 903 { 904 return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 905 } 906 mp_limb_t 907 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 908 { 909 return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 910 } 911 912 913 mp_limb_t 914 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, 915 mp_srcptr mult, mp_size_t msize) 916 { 917 mp_ptr src_copy; 918 mp_limb_t ret; 919 mp_size_t i; 920 921 ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size)); 922 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); 923 ASSERT (size >= msize); 924 ASSERT_MPN (mult, msize); 925 926 /* in case dst==src */ 927 src_copy = refmpn_malloc_limbs (size); 928 refmpn_copyi (src_copy, src, size); 929 src = src_copy; 930 931 for (i = 0; i < msize-1; i++) 932 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); 933 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); 934 935 free (src_copy); 936 return ret; 937 } 938 939 mp_limb_t 940 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 941 { 942 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2); 943 } 944 mp_limb_t 945 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 946 { 947 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3); 948 } 949 mp_limb_t 950 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 951 { 952 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4); 953 } 954 mp_limb_t 955 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 956 { 957 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5); 958 } 959 mp_limb_t 960 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 961 { 962 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6); 963 } 964 mp_limb_t 965 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 966 { 967 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7); 968 } 969 mp_limb_t 970 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 971 { 972 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8); 973 } 974 975 mp_limb_t 976 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p, 977 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 978 mp_limb_t carry) 979 { 980 mp_ptr p; 981 mp_limb_t acy, scy; 982 983 /* Destinations can't overlap. */ 984 ASSERT (! refmpn_overlap_p (r1p, size, r2p, size)); 985 ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size)); 986 ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size)); 987 ASSERT (size >= 1); 988 989 /* in case r1p==s1p or r1p==s2p */ 990 p = refmpn_malloc_limbs (size); 991 992 acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1); 993 scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1); 994 refmpn_copyi (r1p, p, size); 995 996 free (p); 997 return 2 * acy + scy; 998 } 999 1000 mp_limb_t 1001 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, 1002 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 1003 { 1004 return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0)); 1005 } 1006 1007 1008 /* Right shift hi,lo and return the low limb of the result. 1009 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ 1010 mp_limb_t 1011 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) 1012 { 1013 ASSERT (shift < GMP_NUMB_BITS); 1014 if (shift == 0) 1015 return lo; 1016 else 1017 return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK; 1018 } 1019 1020 /* Left shift hi,lo and return the high limb of the result. 1021 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ 1022 mp_limb_t 1023 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) 1024 { 1025 ASSERT (shift < GMP_NUMB_BITS); 1026 if (shift == 0) 1027 return hi; 1028 else 1029 return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK; 1030 } 1031 1032 1033 mp_limb_t 1034 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1035 { 1036 mp_limb_t ret; 1037 mp_size_t i; 1038 1039 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 1040 ASSERT (size >= 1); 1041 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); 1042 ASSERT_MPN (sp, size); 1043 1044 ret = rshift_make (sp[0], CNST_LIMB(0), shift); 1045 1046 for (i = 0; i < size-1; i++) 1047 rp[i] = rshift_make (sp[i+1], sp[i], shift); 1048 1049 rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift); 1050 return ret; 1051 } 1052 1053 mp_limb_t 1054 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1055 { 1056 mp_limb_t ret; 1057 mp_size_t i; 1058 1059 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 1060 ASSERT (size >= 1); 1061 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); 1062 ASSERT_MPN (sp, size); 1063 1064 ret = lshift_make (CNST_LIMB(0), sp[size-1], shift); 1065 1066 for (i = size-2; i >= 0; i--) 1067 rp[i+1] = lshift_make (sp[i+1], sp[i], shift); 1068 1069 rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift); 1070 return ret; 1071 } 1072 1073 void 1074 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1075 { 1076 mp_size_t i; 1077 1078 /* We work downwards since mpn_lshiftc needs that. */ 1079 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 1080 1081 for (i = size - 1; i >= 0; i--) 1082 rp[i] = (~sp[i]) & GMP_NUMB_MASK; 1083 } 1084 1085 mp_limb_t 1086 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1087 { 1088 mp_limb_t res; 1089 1090 /* No asserts here, refmpn_lshift will assert what we need. */ 1091 1092 res = refmpn_lshift (rp, sp, size, shift); 1093 refmpn_com (rp, rp, size); 1094 return res; 1095 } 1096 1097 /* accepting shift==0 and doing a plain copyi or copyd in that case */ 1098 mp_limb_t 1099 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1100 { 1101 if (shift == 0) 1102 { 1103 refmpn_copyi (rp, sp, size); 1104 return 0; 1105 } 1106 else 1107 { 1108 return refmpn_rshift (rp, sp, size, shift); 1109 } 1110 } 1111 mp_limb_t 1112 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1113 { 1114 if (shift == 0) 1115 { 1116 refmpn_copyd (rp, sp, size); 1117 return 0; 1118 } 1119 else 1120 { 1121 return refmpn_lshift (rp, sp, size, shift); 1122 } 1123 } 1124 1125 /* accepting size==0 too */ 1126 mp_limb_t 1127 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1128 unsigned shift) 1129 { 1130 return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift)); 1131 } 1132 mp_limb_t 1133 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1134 unsigned shift) 1135 { 1136 return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift)); 1137 } 1138 1139 /* Divide h,l by d, return quotient, store remainder to *rp. 1140 Operates on full limbs, not nails. 1141 Must have h < d. 1142 __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */ 1143 mp_limb_t 1144 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d) 1145 { 1146 mp_limb_t q, r; 1147 int n; 1148 1149 ASSERT (d != 0); 1150 ASSERT (h < d); 1151 1152 #if 0 1153 udiv_qrnnd (q, r, h, l, d); 1154 *rp = r; 1155 return q; 1156 #endif 1157 1158 n = refmpn_count_leading_zeros (d); 1159 d <<= n; 1160 1161 if (n != 0) 1162 { 1163 h = (h << n) | (l >> (GMP_LIMB_BITS - n)); 1164 l <<= n; 1165 } 1166 1167 __udiv_qrnnd_c (q, r, h, l, d); 1168 r >>= n; 1169 *rp = r; 1170 return q; 1171 } 1172 1173 mp_limb_t 1174 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp) 1175 { 1176 return refmpn_udiv_qrnnd (rp, h, l, d); 1177 } 1178 1179 /* This little subroutine avoids some bad code generation from i386 gcc 3.0 1180 -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */ 1181 static mp_limb_t 1182 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1183 mp_limb_t divisor, mp_limb_t carry) 1184 { 1185 mp_size_t i; 1186 mp_limb_t rem[1]; 1187 for (i = size-1; i >= 0; i--) 1188 { 1189 rp[i] = refmpn_udiv_qrnnd (rem, carry, 1190 sp[i] << GMP_NAIL_BITS, 1191 divisor << GMP_NAIL_BITS); 1192 carry = *rem >> GMP_NAIL_BITS; 1193 } 1194 return carry; 1195 } 1196 1197 mp_limb_t 1198 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1199 mp_limb_t divisor, mp_limb_t carry) 1200 { 1201 mp_ptr sp_orig; 1202 mp_ptr prod; 1203 mp_limb_t carry_orig; 1204 1205 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 1206 ASSERT (size >= 0); 1207 ASSERT (carry < divisor); 1208 ASSERT_MPN (sp, size); 1209 ASSERT_LIMB (divisor); 1210 ASSERT_LIMB (carry); 1211 1212 if (size == 0) 1213 return carry; 1214 1215 sp_orig = refmpn_memdup_limbs (sp, size); 1216 prod = refmpn_malloc_limbs (size); 1217 carry_orig = carry; 1218 1219 carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry); 1220 1221 /* check by multiplying back */ 1222 #if 0 1223 printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n", 1224 size, divisor, carry_orig, carry); 1225 mpn_trace("s",sp_copy,size); 1226 mpn_trace("r",rp,size); 1227 printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry)); 1228 mpn_trace("p",prod,size); 1229 #endif 1230 ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig); 1231 ASSERT (refmpn_cmp (prod, sp_orig, size) == 0); 1232 free (sp_orig); 1233 free (prod); 1234 1235 return carry; 1236 } 1237 1238 mp_limb_t 1239 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1240 { 1241 return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0)); 1242 } 1243 1244 1245 mp_limb_t 1246 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1247 mp_limb_t carry) 1248 { 1249 mp_ptr p = refmpn_malloc_limbs (size); 1250 carry = refmpn_divmod_1c (p, sp, size, divisor, carry); 1251 free (p); 1252 return carry; 1253 } 1254 1255 mp_limb_t 1256 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1257 { 1258 return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0)); 1259 } 1260 1261 mp_limb_t 1262 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1263 mp_limb_t inverse) 1264 { 1265 ASSERT (divisor & GMP_NUMB_HIGHBIT); 1266 ASSERT (inverse == refmpn_invert_limb (divisor)); 1267 return refmpn_mod_1 (sp, size, divisor); 1268 } 1269 1270 /* This implementation will be rather slow, but has the advantage of being 1271 in a different style than the libgmp versions. */ 1272 mp_limb_t 1273 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n) 1274 { 1275 ASSERT ((GMP_NUMB_BITS % 4) == 0); 1276 return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1); 1277 } 1278 1279 1280 mp_limb_t 1281 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize, 1282 mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1283 mp_limb_t carry) 1284 { 1285 mp_ptr z; 1286 1287 z = refmpn_malloc_limbs (xsize); 1288 refmpn_fill (z, xsize, CNST_LIMB(0)); 1289 1290 carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry); 1291 carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry); 1292 1293 free (z); 1294 return carry; 1295 } 1296 1297 mp_limb_t 1298 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize, 1299 mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1300 { 1301 return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0)); 1302 } 1303 1304 mp_limb_t 1305 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize, 1306 mp_srcptr sp, mp_size_t size, 1307 mp_limb_t divisor, mp_limb_t inverse, unsigned shift) 1308 { 1309 ASSERT (size >= 0); 1310 ASSERT (shift == refmpn_count_leading_zeros (divisor)); 1311 ASSERT (inverse == refmpn_invert_limb (divisor << shift)); 1312 1313 return refmpn_divrem_1 (rp, xsize, sp, size, divisor); 1314 } 1315 1316 mp_limb_t 1317 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn, 1318 mp_ptr np, mp_size_t nn, 1319 mp_srcptr dp) 1320 { 1321 mp_ptr tp; 1322 mp_limb_t qh; 1323 1324 tp = refmpn_malloc_limbs (nn + qxn); 1325 refmpn_zero (tp, qxn); 1326 refmpn_copyi (tp + qxn, np, nn); 1327 qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2); 1328 refmpn_copyi (np, tp, 2); 1329 free (tp); 1330 return qh; 1331 } 1332 1333 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers 1334 paper, figure 8.1 m', where b=2^GMP_LIMB_BITS. Note that -d-1 < d 1335 since d has the high bit set. */ 1336 1337 mp_limb_t 1338 refmpn_invert_limb (mp_limb_t d) 1339 { 1340 mp_limb_t r; 1341 ASSERT (d & GMP_LIMB_HIGHBIT); 1342 return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d); 1343 } 1344 1345 void 1346 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) 1347 { 1348 mp_ptr qp, tp; 1349 TMP_DECL; 1350 TMP_MARK; 1351 1352 tp = TMP_ALLOC_LIMBS (2 * n); 1353 qp = TMP_ALLOC_LIMBS (n + 1); 1354 1355 MPN_ZERO (tp, 2 * n); mpn_sub_1 (tp, tp, 2 * n, 1); 1356 1357 refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n); 1358 refmpn_copyi (rp, qp, n); 1359 1360 TMP_FREE; 1361 } 1362 1363 void 1364 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) 1365 { 1366 mp_ptr tp; 1367 mp_limb_t binv; 1368 TMP_DECL; 1369 TMP_MARK; 1370 1371 /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing 1372 code. To make up for it, we check that the inverse is correct using a 1373 multiply. */ 1374 1375 tp = TMP_ALLOC_LIMBS (2 * n); 1376 1377 MPN_ZERO (tp, n); 1378 tp[0] = 1; 1379 binvert_limb (binv, up[0]); 1380 mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv); 1381 1382 refmpn_mul_n (tp, rp, up, n); 1383 ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1)); 1384 1385 TMP_FREE; 1386 } 1387 1388 /* The aim is to produce a dst quotient and return a remainder c, satisfying 1389 c*b^n + src-i == 3*dst, where i is the incoming carry. 1390 1391 Some value c==0, c==1 or c==2 will satisfy, so just try each. 1392 1393 If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero 1394 remainder from the first division attempt determines the correct 1395 remainder (3-c), but don't bother with that, since we can't guarantee 1396 anything about GMP_NUMB_BITS when using nails. 1397 1398 If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos 1399 complement negative, ie. b^n+a-i, and the calculation produces c1 1400 satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This 1401 means it's enough to just add any borrow back at the end. 1402 1403 A borrow only occurs when a==0 or a==1, and, by the same reasoning as in 1404 mpn/generic/diveby3.c, the c1 that results in those cases will only be 0 1405 or 1 respectively, so with 1 added the final return value is still in the 1406 prescribed range 0 to 2. */ 1407 1408 mp_limb_t 1409 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry) 1410 { 1411 mp_ptr spcopy; 1412 mp_limb_t c, cs; 1413 1414 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 1415 ASSERT (size >= 1); 1416 ASSERT (carry <= 2); 1417 ASSERT_MPN (sp, size); 1418 1419 spcopy = refmpn_malloc_limbs (size); 1420 cs = refmpn_sub_1 (spcopy, sp, size, carry); 1421 1422 for (c = 0; c <= 2; c++) 1423 if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0) 1424 goto done; 1425 ASSERT_FAIL (no value of c satisfies); 1426 1427 done: 1428 c += cs; 1429 ASSERT (c <= 2); 1430 1431 free (spcopy); 1432 return c; 1433 } 1434 1435 mp_limb_t 1436 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1437 { 1438 return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0)); 1439 } 1440 1441 1442 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */ 1443 void 1444 refmpn_mul_basecase (mp_ptr prodp, 1445 mp_srcptr up, mp_size_t usize, 1446 mp_srcptr vp, mp_size_t vsize) 1447 { 1448 mp_size_t i; 1449 1450 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); 1451 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); 1452 ASSERT (usize >= vsize); 1453 ASSERT (vsize >= 1); 1454 ASSERT_MPN (up, usize); 1455 ASSERT_MPN (vp, vsize); 1456 1457 prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]); 1458 for (i = 1; i < vsize; i++) 1459 prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]); 1460 } 1461 1462 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD)) 1463 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD)) 1464 #if WANT_FFT 1465 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD)) 1466 #else 1467 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */ 1468 #endif 1469 1470 void 1471 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) 1472 { 1473 mp_ptr tp; 1474 mp_size_t tn; 1475 mp_limb_t cy; 1476 1477 if (vn < TOOM3_THRESHOLD) 1478 { 1479 /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own 1480 mul_basecase. */ 1481 if (vn != 0) 1482 refmpn_mul_basecase (wp, up, un, vp, vn); 1483 else 1484 MPN_ZERO (wp, un); 1485 return; 1486 } 1487 1488 if (vn < TOOM4_THRESHOLD) 1489 { 1490 /* In the mpn_toom33_mul range, use mpn_toom22_mul. */ 1491 tn = 2 * vn + mpn_toom22_mul_itch (vn, vn); 1492 tp = refmpn_malloc_limbs (tn); 1493 mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn); 1494 } 1495 else if (vn < FFT_THRESHOLD) 1496 { 1497 /* In the mpn_toom44_mul range, use mpn_toom33_mul. */ 1498 tn = 2 * vn + mpn_toom33_mul_itch (vn, vn); 1499 tp = refmpn_malloc_limbs (tn); 1500 mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn); 1501 } 1502 else 1503 { 1504 /* Finally, for the largest operands, use mpn_toom44_mul. */ 1505 tn = 2 * vn + mpn_toom44_mul_itch (vn, vn); 1506 tp = refmpn_malloc_limbs (tn); 1507 mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn); 1508 } 1509 1510 if (un != vn) 1511 { 1512 if (un - vn < vn) 1513 refmpn_mul (wp + vn, vp, vn, up + vn, un - vn); 1514 else 1515 refmpn_mul (wp + vn, up + vn, un - vn, vp, vn); 1516 1517 MPN_COPY (wp, tp, vn); 1518 cy = refmpn_add (wp + vn, wp + vn, un, tp + vn, vn); 1519 } 1520 else 1521 { 1522 MPN_COPY (wp, tp, 2 * vn); 1523 } 1524 1525 free (tp); 1526 } 1527 1528 void 1529 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) 1530 { 1531 refmpn_mul (prodp, up, size, vp, size); 1532 } 1533 1534 void 1535 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) 1536 { 1537 mp_ptr tp = refmpn_malloc_limbs (2*size); 1538 refmpn_mul (tp, up, size, vp, size); 1539 refmpn_copyi (prodp, tp, size); 1540 free (tp); 1541 } 1542 1543 void 1544 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size) 1545 { 1546 refmpn_mul (dst, src, size, src, size); 1547 } 1548 1549 /* Allowing usize<vsize, usize==0 or vsize==0. */ 1550 void 1551 refmpn_mul_any (mp_ptr prodp, 1552 mp_srcptr up, mp_size_t usize, 1553 mp_srcptr vp, mp_size_t vsize) 1554 { 1555 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); 1556 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); 1557 ASSERT (usize >= 0); 1558 ASSERT (vsize >= 0); 1559 ASSERT_MPN (up, usize); 1560 ASSERT_MPN (vp, vsize); 1561 1562 if (usize == 0) 1563 { 1564 refmpn_fill (prodp, vsize, CNST_LIMB(0)); 1565 return; 1566 } 1567 1568 if (vsize == 0) 1569 { 1570 refmpn_fill (prodp, usize, CNST_LIMB(0)); 1571 return; 1572 } 1573 1574 if (usize >= vsize) 1575 refmpn_mul (prodp, up, usize, vp, vsize); 1576 else 1577 refmpn_mul (prodp, vp, vsize, up, usize); 1578 } 1579 1580 1581 mp_limb_t 1582 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y) 1583 { 1584 mp_limb_t x; 1585 int twos; 1586 1587 ASSERT (y != 0); 1588 ASSERT (! refmpn_zero_p (xp, xsize)); 1589 ASSERT_MPN (xp, xsize); 1590 ASSERT_LIMB (y); 1591 1592 x = refmpn_mod_1 (xp, xsize, y); 1593 if (x == 0) 1594 return y; 1595 1596 twos = 0; 1597 while ((x & 1) == 0 && (y & 1) == 0) 1598 { 1599 x >>= 1; 1600 y >>= 1; 1601 twos++; 1602 } 1603 1604 for (;;) 1605 { 1606 while ((x & 1) == 0) x >>= 1; 1607 while ((y & 1) == 0) y >>= 1; 1608 1609 if (x < y) 1610 MP_LIMB_T_SWAP (x, y); 1611 1612 x -= y; 1613 if (x == 0) 1614 break; 1615 } 1616 1617 return y << twos; 1618 } 1619 1620 1621 /* Based on the full limb x, not nails. */ 1622 unsigned 1623 refmpn_count_leading_zeros (mp_limb_t x) 1624 { 1625 unsigned n = 0; 1626 1627 ASSERT (x != 0); 1628 1629 while ((x & GMP_LIMB_HIGHBIT) == 0) 1630 { 1631 x <<= 1; 1632 n++; 1633 } 1634 return n; 1635 } 1636 1637 /* Full limbs allowed, not limited to nails. */ 1638 unsigned 1639 refmpn_count_trailing_zeros (mp_limb_t x) 1640 { 1641 unsigned n = 0; 1642 1643 ASSERT (x != 0); 1644 ASSERT_LIMB (x); 1645 1646 while ((x & 1) == 0) 1647 { 1648 x >>= 1; 1649 n++; 1650 } 1651 return n; 1652 } 1653 1654 /* Strip factors of two (low zero bits) from {p,size} by right shifting. 1655 The return value is the number of twos stripped. */ 1656 mp_size_t 1657 refmpn_strip_twos (mp_ptr p, mp_size_t size) 1658 { 1659 mp_size_t limbs; 1660 unsigned shift; 1661 1662 ASSERT (size >= 1); 1663 ASSERT (! refmpn_zero_p (p, size)); 1664 ASSERT_MPN (p, size); 1665 1666 for (limbs = 0; p[0] == 0; limbs++) 1667 { 1668 refmpn_copyi (p, p+1, size-1); 1669 p[size-1] = 0; 1670 } 1671 1672 shift = refmpn_count_trailing_zeros (p[0]); 1673 if (shift) 1674 refmpn_rshift (p, p, size, shift); 1675 1676 return limbs*GMP_NUMB_BITS + shift; 1677 } 1678 1679 mp_limb_t 1680 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize) 1681 { 1682 int cmp; 1683 1684 ASSERT (ysize >= 1); 1685 ASSERT (xsize >= ysize); 1686 ASSERT ((xp[0] & 1) != 0); 1687 ASSERT ((yp[0] & 1) != 0); 1688 /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */ 1689 ASSERT (yp[ysize-1] != 0); 1690 ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize)); 1691 ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize)); 1692 ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize)); 1693 if (xsize == ysize) 1694 ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1])); 1695 ASSERT_MPN (xp, xsize); 1696 ASSERT_MPN (yp, ysize); 1697 1698 refmpn_strip_twos (xp, xsize); 1699 MPN_NORMALIZE (xp, xsize); 1700 MPN_NORMALIZE (yp, ysize); 1701 1702 for (;;) 1703 { 1704 cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize); 1705 if (cmp == 0) 1706 break; 1707 if (cmp < 0) 1708 MPN_PTR_SWAP (xp,xsize, yp,ysize); 1709 1710 ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize)); 1711 1712 refmpn_strip_twos (xp, xsize); 1713 MPN_NORMALIZE (xp, xsize); 1714 } 1715 1716 refmpn_copyi (gp, xp, xsize); 1717 return xsize; 1718 } 1719 1720 unsigned long 1721 ref_popc_limb (mp_limb_t src) 1722 { 1723 unsigned long count; 1724 int i; 1725 1726 count = 0; 1727 for (i = 0; i < GMP_LIMB_BITS; i++) 1728 { 1729 count += (src & 1); 1730 src >>= 1; 1731 } 1732 return count; 1733 } 1734 1735 unsigned long 1736 refmpn_popcount (mp_srcptr sp, mp_size_t size) 1737 { 1738 unsigned long count = 0; 1739 mp_size_t i; 1740 1741 ASSERT (size >= 0); 1742 ASSERT_MPN (sp, size); 1743 1744 for (i = 0; i < size; i++) 1745 count += ref_popc_limb (sp[i]); 1746 return count; 1747 } 1748 1749 unsigned long 1750 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 1751 { 1752 mp_ptr d; 1753 unsigned long count; 1754 1755 ASSERT (size >= 0); 1756 ASSERT_MPN (s1p, size); 1757 ASSERT_MPN (s2p, size); 1758 1759 if (size == 0) 1760 return 0; 1761 1762 d = refmpn_malloc_limbs (size); 1763 refmpn_xor_n (d, s1p, s2p, size); 1764 count = refmpn_popcount (d, size); 1765 free (d); 1766 return count; 1767 } 1768 1769 1770 /* set r to a%d */ 1771 void 1772 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2]) 1773 { 1774 mp_limb_t D[2]; 1775 int n; 1776 1777 ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2)); 1778 ASSERT_MPN (a, 2); 1779 ASSERT_MPN (d, 2); 1780 1781 D[1] = d[1], D[0] = d[0]; 1782 r[1] = a[1], r[0] = a[0]; 1783 n = 0; 1784 1785 for (;;) 1786 { 1787 if (D[1] & GMP_NUMB_HIGHBIT) 1788 break; 1789 if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0) 1790 break; 1791 refmpn_lshift (D, D, (mp_size_t) 2, 1); 1792 n++; 1793 ASSERT (n <= GMP_NUMB_BITS); 1794 } 1795 1796 while (n >= 0) 1797 { 1798 if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0) 1799 ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2)); 1800 refmpn_rshift (D, D, (mp_size_t) 2, 1); 1801 n--; 1802 } 1803 1804 ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0); 1805 } 1806 1807 1808 1809 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in 1810 particular the trial quotient is allowed to be 2 too big. */ 1811 mp_limb_t 1812 refmpn_sb_div_qr (mp_ptr qp, 1813 mp_ptr np, mp_size_t nsize, 1814 mp_srcptr dp, mp_size_t dsize) 1815 { 1816 mp_limb_t retval = 0; 1817 mp_size_t i; 1818 mp_limb_t d1 = dp[dsize-1]; 1819 mp_ptr np_orig = refmpn_memdup_limbs (np, nsize); 1820 1821 ASSERT (nsize >= dsize); 1822 /* ASSERT (dsize > 2); */ 1823 ASSERT (dsize >= 2); 1824 ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT); 1825 ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np); 1826 ASSERT_MPN (np, nsize); 1827 ASSERT_MPN (dp, dsize); 1828 1829 i = nsize-dsize; 1830 if (refmpn_cmp (np+i, dp, dsize) >= 0) 1831 { 1832 ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize)); 1833 retval = 1; 1834 } 1835 1836 for (i--; i >= 0; i--) 1837 { 1838 mp_limb_t n0 = np[i+dsize]; 1839 mp_limb_t n1 = np[i+dsize-1]; 1840 mp_limb_t q, dummy_r; 1841 1842 ASSERT (n0 <= d1); 1843 if (n0 == d1) 1844 q = GMP_NUMB_MAX; 1845 else 1846 q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS, 1847 d1 << GMP_NAIL_BITS); 1848 1849 n0 -= refmpn_submul_1 (np+i, dp, dsize, q); 1850 ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX); 1851 if (n0) 1852 { 1853 q--; 1854 if (! refmpn_add_n (np+i, np+i, dp, dsize)) 1855 { 1856 q--; 1857 ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize)); 1858 } 1859 } 1860 np[i+dsize] = 0; 1861 1862 qp[i] = q; 1863 } 1864 1865 /* remainder < divisor */ 1866 #if 0 /* ASSERT triggers gcc 4.2.1 bug */ 1867 ASSERT (refmpn_cmp (np, dp, dsize) < 0); 1868 #endif 1869 1870 /* multiply back to original */ 1871 { 1872 mp_ptr mp = refmpn_malloc_limbs (nsize); 1873 1874 refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize); 1875 if (retval) 1876 ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize)); 1877 ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize)); 1878 ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0); 1879 1880 free (mp); 1881 } 1882 1883 free (np_orig); 1884 return retval; 1885 } 1886 1887 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in 1888 particular the trial quotient is allowed to be 2 too big. */ 1889 void 1890 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, 1891 mp_ptr np, mp_size_t nsize, 1892 mp_srcptr dp, mp_size_t dsize) 1893 { 1894 ASSERT (qxn == 0); 1895 ASSERT_MPN (np, nsize); 1896 ASSERT_MPN (dp, dsize); 1897 ASSERT (dsize > 0); 1898 ASSERT (dp[dsize-1] != 0); 1899 1900 if (dsize == 1) 1901 { 1902 rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]); 1903 return; 1904 } 1905 else 1906 { 1907 mp_ptr n2p = refmpn_malloc_limbs (nsize+1); 1908 mp_ptr d2p = refmpn_malloc_limbs (dsize); 1909 int norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS; 1910 1911 n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm); 1912 ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm)); 1913 1914 refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize); 1915 refmpn_rshift_or_copy (rp, n2p, dsize, norm); 1916 1917 /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */ 1918 free (n2p); 1919 free (d2p); 1920 } 1921 } 1922 1923 void 1924 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) 1925 { 1926 mp_size_t j; 1927 mp_limb_t cy; 1928 1929 ASSERT_MPN (up, 2*n); 1930 /* ASSERT about directed overlap rp, up */ 1931 /* ASSERT about overlap rp, mp */ 1932 /* ASSERT about overlap up, mp */ 1933 1934 for (j = n - 1; j >= 0; j--) 1935 { 1936 up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK); 1937 up++; 1938 } 1939 cy = mpn_add_n (rp, up, up - n, n); 1940 if (cy != 0) 1941 mpn_sub_n (rp, rp, mp, n); 1942 } 1943 1944 size_t 1945 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size) 1946 { 1947 unsigned char *d; 1948 size_t dsize; 1949 1950 ASSERT (size >= 0); 1951 ASSERT (base >= 2); 1952 ASSERT (base < numberof (mp_bases)); 1953 ASSERT (size == 0 || src[size-1] != 0); 1954 ASSERT_MPN (src, size); 1955 1956 MPN_SIZEINBASE (dsize, src, size, base); 1957 ASSERT (dsize >= 1); 1958 ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * BYTES_PER_MP_LIMB)); 1959 1960 if (size == 0) 1961 { 1962 dst[0] = 0; 1963 return 1; 1964 } 1965 1966 /* don't clobber input for power of 2 bases */ 1967 if (POW2_P (base)) 1968 src = refmpn_memdup_limbs (src, size); 1969 1970 d = dst + dsize; 1971 do 1972 { 1973 d--; 1974 ASSERT (d >= dst); 1975 *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base); 1976 size -= (src[size-1] == 0); 1977 } 1978 while (size != 0); 1979 1980 /* Move result back and decrement dsize if we didn't generate 1981 the maximum possible digits. */ 1982 if (d != dst) 1983 { 1984 size_t i; 1985 dsize -= d - dst; 1986 for (i = 0; i < dsize; i++) 1987 dst[i] = d[i]; 1988 } 1989 1990 if (POW2_P (base)) 1991 free (src); 1992 1993 return dsize; 1994 } 1995 1996 1997 mp_limb_t 1998 ref_bswap_limb (mp_limb_t src) 1999 { 2000 mp_limb_t dst; 2001 int i; 2002 2003 dst = 0; 2004 for (i = 0; i < BYTES_PER_MP_LIMB; i++) 2005 { 2006 dst = (dst << 8) + (src & 0xFF); 2007 src >>= 8; 2008 } 2009 return dst; 2010 } 2011 2012 2013 /* These random functions are mostly for transitional purposes while adding 2014 nail support, since they're independent of the normal mpn routines. They 2015 can probably be removed when those normal routines are reliable, though 2016 perhaps something independent would still be useful at times. */ 2017 2018 #if GMP_LIMB_BITS == 32 2019 #define RAND_A CNST_LIMB(0x29CF535) 2020 #endif 2021 #if GMP_LIMB_BITS == 64 2022 #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D) 2023 #endif 2024 2025 mp_limb_t refmpn_random_seed; 2026 2027 mp_limb_t 2028 refmpn_random_half (void) 2029 { 2030 refmpn_random_seed = refmpn_random_seed * RAND_A + 1; 2031 return (refmpn_random_seed >> GMP_LIMB_BITS/2); 2032 } 2033 2034 mp_limb_t 2035 refmpn_random_limb (void) 2036 { 2037 return ((refmpn_random_half () << (GMP_LIMB_BITS/2)) 2038 | refmpn_random_half ()) & GMP_NUMB_MASK; 2039 } 2040 2041 void 2042 refmpn_random (mp_ptr ptr, mp_size_t size) 2043 { 2044 mp_size_t i; 2045 if (GMP_NAIL_BITS == 0) 2046 { 2047 mpn_random (ptr, size); 2048 return; 2049 } 2050 2051 for (i = 0; i < size; i++) 2052 ptr[i] = refmpn_random_limb (); 2053 } 2054 2055 void 2056 refmpn_random2 (mp_ptr ptr, mp_size_t size) 2057 { 2058 mp_size_t i; 2059 mp_limb_t bit, mask, limb; 2060 int run; 2061 2062 if (GMP_NAIL_BITS == 0) 2063 { 2064 mpn_random2 (ptr, size); 2065 return; 2066 } 2067 2068 #define RUN_MODULUS 32 2069 2070 /* start with ones at a random pos in the high limb */ 2071 bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS); 2072 mask = 0; 2073 run = 0; 2074 2075 for (i = size-1; i >= 0; i--) 2076 { 2077 limb = 0; 2078 do 2079 { 2080 if (run == 0) 2081 { 2082 run = (refmpn_random_half () % RUN_MODULUS) + 1; 2083 mask = ~mask; 2084 } 2085 2086 limb |= (bit & mask); 2087 bit >>= 1; 2088 run--; 2089 } 2090 while (bit != 0); 2091 2092 ptr[i] = limb; 2093 bit = GMP_NUMB_HIGHBIT; 2094 } 2095 } 2096 2097 /* This is a simple bitwise algorithm working high to low across "s" and 2098 testing each time whether setting the bit would make s^2 exceed n. */ 2099 mp_size_t 2100 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize) 2101 { 2102 mp_ptr tp, dp; 2103 mp_size_t ssize, talloc, tsize, dsize, ret, ilimbs; 2104 unsigned ibit; 2105 long i; 2106 mp_limb_t c; 2107 2108 ASSERT (nsize >= 0); 2109 2110 /* If n==0, then s=0 and r=0. */ 2111 if (nsize == 0) 2112 return 0; 2113 2114 ASSERT (np[nsize - 1] != 0); 2115 ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize)); 2116 ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize)); 2117 ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize)); 2118 2119 /* root */ 2120 ssize = (nsize+1)/2; 2121 refmpn_zero (sp, ssize); 2122 2123 /* the remainder so far */ 2124 dp = refmpn_memdup_limbs (np, nsize); 2125 dsize = nsize; 2126 2127 /* temporary */ 2128 talloc = 2*ssize + 1; 2129 tp = refmpn_malloc_limbs (talloc); 2130 2131 for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--) 2132 { 2133 /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i 2134 is added to it */ 2135 2136 ilimbs = (i+1) / GMP_NUMB_BITS; 2137 ibit = (i+1) % GMP_NUMB_BITS; 2138 refmpn_zero (tp, ilimbs); 2139 c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit); 2140 tsize = ilimbs + ssize; 2141 tp[tsize] = c; 2142 tsize += (c != 0); 2143 2144 ilimbs = (2*i) / GMP_NUMB_BITS; 2145 ibit = (2*i) % GMP_NUMB_BITS; 2146 if (ilimbs + 1 > tsize) 2147 { 2148 refmpn_zero_extend (tp, tsize, ilimbs + 1); 2149 tsize = ilimbs + 1; 2150 } 2151 c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs, 2152 CNST_LIMB(1) << ibit); 2153 ASSERT (tsize < talloc); 2154 tp[tsize] = c; 2155 tsize += (c != 0); 2156 2157 if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0) 2158 { 2159 /* set this bit in s and subtract from the remainder */ 2160 refmpn_setbit (sp, i); 2161 2162 ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize)); 2163 dsize = refmpn_normalize (dp, dsize); 2164 } 2165 } 2166 2167 if (rp == NULL) 2168 { 2169 ret = ! refmpn_zero_p (dp, dsize); 2170 } 2171 else 2172 { 2173 ASSERT (dsize == 0 || dp[dsize-1] != 0); 2174 refmpn_copy (rp, dp, dsize); 2175 ret = dsize; 2176 } 2177 2178 free (dp); 2179 free (tp); 2180 return ret; 2181 } 2182