1 /*- 2 * Copyright (c) 2012-2019 Alistair Crooks <agc@NetBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 /* LibTomMath, multiple-precision integer library -- Tom St Denis 26 * 27 * LibTomMath is a library that provides multiple-precision 28 * integer arithmetic as well as number theoretic functionality. 29 * 30 * The library was designed directly after the MPI library by 31 * Michael Fromberger but has been written from scratch with 32 * additional optimizations in place. 33 * 34 * The library is free for all purposes without any express 35 * guarantee it works. 36 * 37 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 38 */ 39 #include "config.h" 40 41 #include <sys/types.h> 42 #include <sys/param.h> 43 44 #ifdef _KERNEL 45 # include <sys/kmem.h> 46 #else 47 # include <arpa/inet.h> 48 # include <limits.h> 49 # include <stdarg.h> 50 # include <stdio.h> 51 # include <stdlib.h> 52 # include <string.h> 53 # include <unistd.h> 54 #endif 55 56 #include "bn.h" 57 58 /**************************************************************************/ 59 60 /* LibTomMath, multiple-precision integer library -- Tom St Denis 61 * 62 * LibTomMath is a library that provides multiple-precision 63 * integer arithmetic as well as number theoretic functionality. 64 * 65 * The library was designed directly after the MPI library by 66 * Michael Fromberger but has been written from scratch with 67 * additional optimizations in place. 68 * 69 * The library is free for all purposes without any express 70 * guarantee it works. 71 * 72 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 73 */ 74 75 #define MP_PREC 32 76 #define DIGIT_BIT 28 77 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1)) 78 79 #define MP_WARRAY /*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1))) 80 81 #define MP_NO 0 82 #define MP_YES 1 83 84 #ifndef USE_ARG 85 #define USE_ARG(x) /*LINTED*/(void)&(x) 86 #endif 87 88 #ifndef __arraycount 89 #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0])) 90 #endif 91 92 #ifndef MIN 93 #define MIN(a,b) (((a)<(b))?(a):(b)) 94 #endif 95 96 #define MP_ISZERO(a) (((a)->used == 0) ? MP_YES : MP_NO) 97 98 typedef int mp_err; 99 100 static int signed_multiply(mp_int * a, mp_int * b, mp_int * c); 101 static int square(mp_int * a, mp_int * b); 102 103 static int signed_subtract_word(mp_int *a, mp_digit b, mp_int *c); 104 105 static inline void * 106 allocate(size_t n, size_t m) 107 { 108 return calloc(n, m); 109 } 110 111 static inline void 112 deallocate(void *v, size_t sz) 113 { 114 USE_ARG(sz); 115 free(v); 116 } 117 118 /* set to zero */ 119 static inline void 120 mp_zero(mp_int *a) 121 { 122 a->sign = MP_ZPOS; 123 a->used = 0; 124 memset(a->dp, 0x0, a->alloc * sizeof(*a->dp)); 125 } 126 127 /* grow as required */ 128 static int 129 mp_grow(mp_int *a, int size) 130 { 131 mp_digit *tmp; 132 133 /* if the alloc size is smaller alloc more ram */ 134 if (a->alloc < size) { 135 /* ensure there are always at least MP_PREC digits extra on top */ 136 size += (MP_PREC * 2) - (size % MP_PREC); 137 138 /* reallocate the array a->dp 139 * 140 * We store the return in a temporary variable 141 * in case the operation failed we don't want 142 * to overwrite the dp member of a. 143 */ 144 tmp = realloc(a->dp, sizeof(*tmp) * size); 145 if (tmp == NULL) { 146 /* reallocation failed but "a" is still valid [can be freed] */ 147 return MP_MEM; 148 } 149 150 /* reallocation succeeded so set a->dp */ 151 a->dp = tmp; 152 /* zero excess digits */ 153 memset(&a->dp[a->alloc], 0x0, (size - a->alloc) * sizeof(*a->dp)); 154 a->alloc = size; 155 } 156 return MP_OKAY; 157 } 158 159 /* shift left a certain amount of digits */ 160 static int 161 lshift_digits(mp_int * a, int b) 162 { 163 mp_digit *top, *bottom; 164 int x, res; 165 166 /* if its less than zero return */ 167 if (b <= 0) { 168 return MP_OKAY; 169 } 170 171 /* grow to fit the new digits */ 172 if (a->alloc < a->used + b) { 173 if ((res = mp_grow(a, a->used + b)) != MP_OKAY) { 174 return res; 175 } 176 } 177 178 /* increment the used by the shift amount then copy upwards */ 179 a->used += b; 180 181 /* top */ 182 top = a->dp + a->used - 1; 183 184 /* base */ 185 bottom = a->dp + a->used - 1 - b; 186 187 /* much like rshift_digits this is implemented using a sliding window 188 * except the window goes the otherway around. Copying from 189 * the bottom to the top. 190 */ 191 for (x = a->used - 1; x >= b; x--) { 192 *top-- = *bottom--; 193 } 194 195 /* zero the lower digits */ 196 memset(a->dp, 0x0, b * sizeof(*a->dp)); 197 return MP_OKAY; 198 } 199 200 /* trim unused digits 201 * 202 * This is used to ensure that leading zero digits are 203 * trimed and the leading "used" digit will be non-zero 204 * Typically very fast. Also fixes the sign if there 205 * are no more leading digits 206 */ 207 static void 208 trim_unused_digits(mp_int * a) 209 { 210 /* decrease used while the most significant digit is 211 * zero. 212 */ 213 while (a->used > 0 && a->dp[a->used - 1] == 0) { 214 a->used -= 1; 215 } 216 /* reset the sign flag if used == 0 */ 217 if (a->used == 0) { 218 a->sign = MP_ZPOS; 219 } 220 } 221 222 /* copy, b = a */ 223 static int 224 mp_copy(PGPV_BIGNUM *a, PGPV_BIGNUM *b) 225 { 226 int res; 227 228 /* if dst == src do nothing */ 229 if (a == b) { 230 return MP_OKAY; 231 } 232 if (a == NULL || b == NULL) { 233 return MP_VAL; 234 } 235 236 /* grow dest */ 237 if (b->alloc < a->used) { 238 if ((res = mp_grow(b, a->used)) != MP_OKAY) { 239 return res; 240 } 241 } 242 243 memcpy(b->dp, a->dp, a->used * sizeof(*b->dp)); 244 if (b->used > a->used) { 245 memset(&b->dp[a->used], 0x0, (b->used - a->used) * sizeof(*b->dp)); 246 } 247 248 /* copy used count and sign */ 249 b->used = a->used; 250 b->sign = a->sign; 251 return MP_OKAY; 252 } 253 254 /* shift left by a certain bit count */ 255 static int 256 lshift_bits(mp_int *a, int b, mp_int *c) 257 { 258 mp_digit d; 259 int res; 260 261 /* copy */ 262 if (a != c) { 263 if ((res = mp_copy(a, c)) != MP_OKAY) { 264 return res; 265 } 266 } 267 268 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { 269 if ((res = mp_grow(c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { 270 return res; 271 } 272 } 273 274 /* shift by as many digits in the bit count */ 275 if (b >= (int)DIGIT_BIT) { 276 if ((res = lshift_digits(c, b / DIGIT_BIT)) != MP_OKAY) { 277 return res; 278 } 279 } 280 281 /* shift any bit count < DIGIT_BIT */ 282 d = (mp_digit) (b % DIGIT_BIT); 283 if (d != 0) { 284 mp_digit *tmpc, shift, mask, carry, rr; 285 int x; 286 287 /* bitmask for carries */ 288 mask = (((mp_digit)1) << d) - 1; 289 290 /* shift for msbs */ 291 shift = DIGIT_BIT - d; 292 293 /* alias */ 294 tmpc = c->dp; 295 296 /* carry */ 297 carry = 0; 298 for (x = 0; x < c->used; x++) { 299 /* get the higher bits of the current word */ 300 rr = (*tmpc >> shift) & mask; 301 302 /* shift the current word and OR in the carry */ 303 *tmpc = ((*tmpc << d) | carry) & MP_MASK; 304 ++tmpc; 305 306 /* set the carry to the carry bits of the current word */ 307 carry = rr; 308 } 309 310 /* set final carry */ 311 if (carry != 0) { 312 c->dp[c->used++] = carry; 313 } 314 } 315 trim_unused_digits(c); 316 return MP_OKAY; 317 } 318 319 /* reads a unsigned char array, assumes the msb is stored first [big endian] */ 320 static int 321 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c) 322 { 323 int res; 324 325 /* make sure there are at least two digits */ 326 if (a->alloc < 2) { 327 if ((res = mp_grow(a, 2)) != MP_OKAY) { 328 return res; 329 } 330 } 331 332 /* zero the int */ 333 mp_zero(a); 334 335 /* read the bytes in */ 336 while (c-- > 0) { 337 if ((res = lshift_bits(a, 8, a)) != MP_OKAY) { 338 return res; 339 } 340 341 a->dp[0] |= *b++; 342 a->used += 1; 343 } 344 trim_unused_digits(a); 345 return MP_OKAY; 346 } 347 348 /* returns the number of bits in an mpi */ 349 static int 350 mp_count_bits(const mp_int *a) 351 { 352 int r; 353 mp_digit q; 354 355 /* shortcut */ 356 if (a->used == 0) { 357 return 0; 358 } 359 360 /* get number of digits and add that */ 361 r = (a->used - 1) * DIGIT_BIT; 362 363 /* take the last digit and count the bits in it */ 364 for (q = a->dp[a->used - 1]; q > ((mp_digit) 0) ; r++) { 365 q >>= ((mp_digit) 1); 366 } 367 return r; 368 } 369 370 /* compare maginitude of two ints (unsigned) */ 371 static int 372 compare_magnitude(mp_int * a, mp_int * b) 373 { 374 int n; 375 mp_digit *tmpa, *tmpb; 376 377 /* compare based on # of non-zero digits */ 378 if (a->used > b->used) { 379 return MP_GT; 380 } 381 382 if (a->used < b->used) { 383 return MP_LT; 384 } 385 386 /* alias for a */ 387 tmpa = a->dp + (a->used - 1); 388 389 /* alias for b */ 390 tmpb = b->dp + (a->used - 1); 391 392 /* compare based on digits */ 393 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { 394 if (*tmpa > *tmpb) { 395 return MP_GT; 396 } 397 398 if (*tmpa < *tmpb) { 399 return MP_LT; 400 } 401 } 402 return MP_EQ; 403 } 404 405 /* compare two ints (signed)*/ 406 static int 407 signed_compare(mp_int * a, mp_int * b) 408 { 409 /* compare based on sign */ 410 if (a->sign != b->sign) { 411 return (a->sign == MP_NEG) ? MP_LT : MP_GT; 412 } 413 return (a->sign == MP_NEG) ? compare_magnitude(b, a) : compare_magnitude(a, b); 414 } 415 416 /* get the size for an unsigned equivalent */ 417 static int 418 mp_unsigned_bin_size(mp_int * a) 419 { 420 int size = mp_count_bits(a); 421 422 return (size / 8 + ((size & 7) != 0 ? 1 : 0)); 423 } 424 425 /* init a new mp_int */ 426 static int 427 mp_init(mp_int * a) 428 { 429 /* allocate memory required and clear it */ 430 a->dp = allocate(1, sizeof(*a->dp) * MP_PREC); 431 if (a->dp == NULL) { 432 return MP_MEM; 433 } 434 435 /* set the digits to zero */ 436 memset(a->dp, 0x0, MP_PREC * sizeof(*a->dp)); 437 438 /* set the used to zero, allocated digits to the default precision 439 * and sign to positive */ 440 a->used = 0; 441 a->alloc = MP_PREC; 442 a->sign = MP_ZPOS; 443 444 return MP_OKAY; 445 } 446 447 /* clear one (frees) */ 448 static void 449 mp_clear(mp_int * a) 450 { 451 /* only do anything if a hasn't been freed previously */ 452 if (a->dp != NULL) { 453 memset(a->dp, 0x0, a->used * sizeof(*a->dp)); 454 455 /* free ram */ 456 deallocate(a->dp, (size_t)a->alloc); 457 458 /* reset members to make debugging easier */ 459 a->dp = NULL; 460 a->alloc = a->used = 0; 461 a->sign = MP_ZPOS; 462 } 463 } 464 465 static int 466 mp_init_multi(mp_int *mp, ...) 467 { 468 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ 469 int n = 0; /* Number of ok inits */ 470 mp_int* cur_arg = mp; 471 va_list args; 472 473 va_start(args, mp); /* init args to next argument from caller */ 474 while (cur_arg != NULL) { 475 if (mp_init(cur_arg) != MP_OKAY) { 476 /* Oops - error! Back-track and mp_clear what we already 477 succeeded in init-ing, then return error. 478 */ 479 va_list clean_args; 480 481 /* end the current list */ 482 va_end(args); 483 484 /* now start cleaning up */ 485 cur_arg = mp; 486 va_start(clean_args, mp); 487 while (n--) { 488 mp_clear(cur_arg); 489 cur_arg = va_arg(clean_args, mp_int*); 490 } 491 va_end(clean_args); 492 res = MP_MEM; 493 break; 494 } 495 n++; 496 cur_arg = va_arg(args, mp_int*); 497 } 498 va_end(args); 499 return res; /* Assumed ok, if error flagged above. */ 500 } 501 502 /* init an mp_init for a given size */ 503 static int 504 mp_init_size(mp_int * a, int size) 505 { 506 /* pad size so there are always extra digits */ 507 size += (MP_PREC * 2) - (size % MP_PREC); 508 509 /* alloc mem */ 510 a->dp = allocate(1, sizeof(*a->dp) * size); 511 if (a->dp == NULL) { 512 return MP_MEM; 513 } 514 515 /* set the members */ 516 a->used = 0; 517 a->alloc = size; 518 a->sign = MP_ZPOS; 519 520 /* zero the digits */ 521 memset(a->dp, 0x0, size * sizeof(*a->dp)); 522 return MP_OKAY; 523 } 524 525 /* creates "a" then copies b into it */ 526 static int 527 mp_init_copy(mp_int * a, mp_int * b) 528 { 529 int res; 530 531 if ((res = mp_init(a)) != MP_OKAY) { 532 return res; 533 } 534 return mp_copy(b, a); 535 } 536 537 /* low level addition, based on HAC pp.594, Algorithm 14.7 */ 538 static int 539 basic_add(mp_int * a, mp_int * b, mp_int * c) 540 { 541 mp_int *x; 542 int olduse, res, min, max; 543 544 /* find sizes, we let |a| <= |b| which means we have to sort 545 * them. "x" will point to the input with the most digits 546 */ 547 if (a->used > b->used) { 548 min = b->used; 549 max = a->used; 550 x = a; 551 } else { 552 min = a->used; 553 max = b->used; 554 x = b; 555 } 556 557 /* init result */ 558 if (c->alloc < max + 1) { 559 if ((res = mp_grow(c, max + 1)) != MP_OKAY) { 560 return res; 561 } 562 } 563 564 /* get old used digit count and set new one */ 565 olduse = c->used; 566 c->used = max + 1; 567 568 { 569 mp_digit carry, *tmpa, *tmpb, *tmpc; 570 int i; 571 572 /* alias for digit pointers */ 573 574 /* first input */ 575 tmpa = a->dp; 576 577 /* second input */ 578 tmpb = b->dp; 579 580 /* destination */ 581 tmpc = c->dp; 582 583 /* zero the carry */ 584 carry = 0; 585 for (i = 0; i < min; i++) { 586 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ 587 *tmpc = *tmpa++ + *tmpb++ + carry; 588 589 /* U = carry bit of T[i] */ 590 carry = *tmpc >> ((mp_digit)DIGIT_BIT); 591 592 /* take away carry bit from T[i] */ 593 *tmpc++ &= MP_MASK; 594 } 595 596 /* now copy higher words if any, that is in A+B 597 * if A or B has more digits add those in 598 */ 599 if (min != max) { 600 for (; i < max; i++) { 601 /* T[i] = X[i] + U */ 602 *tmpc = x->dp[i] + carry; 603 604 /* U = carry bit of T[i] */ 605 carry = *tmpc >> ((mp_digit)DIGIT_BIT); 606 607 /* take away carry bit from T[i] */ 608 *tmpc++ &= MP_MASK; 609 } 610 } 611 612 /* add carry */ 613 *tmpc++ = carry; 614 615 /* clear digits above oldused */ 616 if (olduse > c->used) { 617 memset(tmpc, 0x0, (olduse - c->used) * sizeof(*c->dp)); 618 } 619 } 620 621 trim_unused_digits(c); 622 return MP_OKAY; 623 } 624 625 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ 626 static int 627 basic_subtract(mp_int * a, mp_int * b, mp_int * c) 628 { 629 int olduse, res, min, max; 630 631 /* find sizes */ 632 min = b->used; 633 max = a->used; 634 635 /* init result */ 636 if (c->alloc < max) { 637 if ((res = mp_grow(c, max)) != MP_OKAY) { 638 return res; 639 } 640 } 641 olduse = c->used; 642 c->used = max; 643 644 { 645 mp_digit carry, *tmpa, *tmpb, *tmpc; 646 int i; 647 648 /* alias for digit pointers */ 649 tmpa = a->dp; 650 tmpb = b->dp; 651 tmpc = c->dp; 652 653 /* set carry to zero */ 654 carry = 0; 655 for (i = 0; i < min; i++) { 656 /* T[i] = A[i] - B[i] - U */ 657 *tmpc = *tmpa++ - *tmpb++ - carry; 658 659 /* U = carry bit of T[i] 660 * Note this saves performing an AND operation since 661 * if a carry does occur it will propagate all the way to the 662 * MSB. As a result a single shift is enough to get the carry 663 */ 664 carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1)); 665 666 /* Clear carry from T[i] */ 667 *tmpc++ &= MP_MASK; 668 } 669 670 /* now copy higher words if any, e.g. if A has more digits than B */ 671 for (; i < max; i++) { 672 /* T[i] = A[i] - U */ 673 *tmpc = *tmpa++ - carry; 674 675 /* U = carry bit of T[i] */ 676 carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1)); 677 678 /* Clear carry from T[i] */ 679 *tmpc++ &= MP_MASK; 680 } 681 682 /* clear digits above used (since we may not have grown result above) */ 683 if (olduse > c->used) { 684 memset(tmpc, 0x0, (olduse - c->used) * sizeof(*a->dp)); 685 } 686 } 687 688 trim_unused_digits(c); 689 return MP_OKAY; 690 } 691 692 /* high level subtraction (handles signs) */ 693 static int 694 signed_subtract(mp_int * a, mp_int * b, mp_int * c) 695 { 696 int sa, sb, res; 697 698 sa = a->sign; 699 sb = b->sign; 700 701 if (sa != sb) { 702 /* subtract a negative from a positive, OR */ 703 /* subtract a positive from a negative. */ 704 /* In either case, ADD their magnitudes, */ 705 /* and use the sign of the first number. */ 706 c->sign = sa; 707 res = basic_add(a, b, c); 708 } else { 709 /* subtract a positive from a positive, OR */ 710 /* subtract a negative from a negative. */ 711 /* First, take the difference between their */ 712 /* magnitudes, then... */ 713 if (compare_magnitude(a, b) != MP_LT) { 714 /* Copy the sign from the first */ 715 c->sign = sa; 716 /* The first has a larger or equal magnitude */ 717 res = basic_subtract(a, b, c); 718 } else { 719 /* The result has the *opposite* sign from */ 720 /* the first number. */ 721 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; 722 /* The second has a larger magnitude */ 723 res = basic_subtract(b, a, c); 724 } 725 } 726 return res; 727 } 728 729 /* shift right a certain amount of digits */ 730 static int 731 rshift_digits(mp_int * a, int b) 732 { 733 /* if b <= 0 then ignore it */ 734 if (b <= 0) { 735 return 0; 736 } 737 738 /* if b > used then simply zero it and return */ 739 if (a->used <= b) { 740 mp_zero(a); 741 return 0; 742 } 743 744 /* this is implemented as a sliding window where 745 * the window is b-digits long and digits from 746 * the top of the window are copied to the bottom 747 * 748 * e.g. 749 750 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> 751 /\ | ----> 752 \-------------------/ ----> 753 */ 754 memmove(a->dp, &a->dp[b], (a->used - b) * sizeof(*a->dp)); 755 memset(&a->dp[a->used - b], 0x0, b * sizeof(*a->dp)); 756 757 /* remove excess digits */ 758 a->used -= b; 759 return 1; 760 } 761 762 /* multiply by a digit */ 763 static int 764 multiply_digit(mp_int * a, mp_digit b, mp_int * c) 765 { 766 mp_digit carry, *tmpa, *tmpc; 767 mp_word r; 768 int ix, res, olduse; 769 770 /* make sure c is big enough to hold a*b */ 771 if (c->alloc < a->used + 1) { 772 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 773 return res; 774 } 775 } 776 777 /* get the original destinations used count */ 778 olduse = c->used; 779 780 /* set the sign */ 781 c->sign = a->sign; 782 783 /* alias for a->dp [source] */ 784 tmpa = a->dp; 785 786 /* alias for c->dp [dest] */ 787 tmpc = c->dp; 788 789 /* zero carry */ 790 carry = 0; 791 792 /* compute columns */ 793 for (ix = 0; ix < a->used; ix++) { 794 /* compute product and carry sum for this term */ 795 r = ((mp_word) carry) + ((mp_word)*tmpa++) * ((mp_word)b); 796 797 /* mask off higher bits to get a single digit */ 798 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); 799 800 /* send carry into next iteration */ 801 carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 802 } 803 804 /* store final carry [if any] and increment ix offset */ 805 *tmpc++ = carry; 806 ++ix; 807 if (olduse > ix) { 808 memset(tmpc, 0x0, (olduse - ix) * sizeof(*tmpc)); 809 } 810 811 /* set used count */ 812 c->used = a->used + 1; 813 trim_unused_digits(c); 814 815 return MP_OKAY; 816 } 817 818 /* high level addition (handles signs) */ 819 static int 820 signed_add(mp_int * a, mp_int * b, mp_int * c) 821 { 822 int asign, bsign, res; 823 824 /* get sign of both inputs */ 825 asign = a->sign; 826 bsign = b->sign; 827 828 /* handle two cases, not four */ 829 if (asign == bsign) { 830 /* both positive or both negative */ 831 /* add their magnitudes, copy the sign */ 832 c->sign = asign; 833 res = basic_add(a, b, c); 834 } else { 835 /* one positive, the other negative */ 836 /* subtract the one with the greater magnitude from */ 837 /* the one of the lesser magnitude. The result gets */ 838 /* the sign of the one with the greater magnitude. */ 839 if (compare_magnitude(a, b) == MP_LT) { 840 c->sign = bsign; 841 res = basic_subtract(b, a, c); 842 } else { 843 c->sign = asign; 844 res = basic_subtract(a, b, c); 845 } 846 } 847 return res; 848 } 849 850 /* swap the elements of two integers, for cases where you can't simply swap the 851 * mp_int pointers around 852 */ 853 static void 854 mp_exch(mp_int *a, mp_int *b) 855 { 856 mp_int t; 857 858 t = *a; 859 *a = *b; 860 *b = t; 861 } 862 863 /* calc a value mod 2**b */ 864 static int 865 modulo_2_to_power(mp_int * a, int b, mp_int * c) 866 { 867 int x, res; 868 869 /* if b is <= 0 then zero the int */ 870 if (b <= 0) { 871 mp_zero(c); 872 return MP_OKAY; 873 } 874 875 /* if the modulus is larger than the value than return */ 876 if (b >= (int) (a->used * DIGIT_BIT)) { 877 res = mp_copy(a, c); 878 return res; 879 } 880 881 /* copy */ 882 if ((res = mp_copy(a, c)) != MP_OKAY) { 883 return res; 884 } 885 886 /* zero digits above the last digit of the modulus */ 887 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { 888 c->dp[x] = 0; 889 } 890 /* clear the digit that is not completely outside/inside the modulus */ 891 c->dp[b / DIGIT_BIT] &= 892 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); 893 trim_unused_digits(c); 894 return MP_OKAY; 895 } 896 897 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ 898 static int 899 rshift_bits(mp_int * a, int b, mp_int * c, mp_int * d) 900 { 901 mp_digit D, r, rr; 902 int x, res; 903 mp_int t; 904 905 906 /* if the shift count is <= 0 then we do no work */ 907 if (b <= 0) { 908 res = mp_copy(a, c); 909 if (d != NULL) { 910 mp_zero(d); 911 } 912 return res; 913 } 914 915 if ((res = mp_init(&t)) != MP_OKAY) { 916 return res; 917 } 918 919 /* get the remainder */ 920 if (d != NULL) { 921 if ((res = modulo_2_to_power(a, b, &t)) != MP_OKAY) { 922 mp_clear(&t); 923 return res; 924 } 925 } 926 927 /* copy */ 928 if ((res = mp_copy(a, c)) != MP_OKAY) { 929 mp_clear(&t); 930 return res; 931 } 932 933 /* shift by as many digits in the bit count */ 934 if (b >= (int)DIGIT_BIT) { 935 rshift_digits(c, b / DIGIT_BIT); 936 } 937 938 /* shift any bit count < DIGIT_BIT */ 939 D = (mp_digit) (b % DIGIT_BIT); 940 if (D != 0) { 941 mp_digit *tmpc, mask, shift; 942 943 /* mask */ 944 mask = (((mp_digit)1) << D) - 1; 945 946 /* shift for lsb */ 947 shift = DIGIT_BIT - D; 948 949 /* alias */ 950 tmpc = c->dp + (c->used - 1); 951 952 /* carry */ 953 r = 0; 954 for (x = c->used - 1; x >= 0; x--) { 955 /* get the lower bits of this word in a temp */ 956 rr = *tmpc & mask; 957 958 /* shift the current word and mix in the carry bits from the previous word */ 959 *tmpc = (*tmpc >> D) | (r << shift); 960 --tmpc; 961 962 /* set the carry to the carry bits of the current word found above */ 963 r = rr; 964 } 965 } 966 trim_unused_digits(c); 967 if (d != NULL) { 968 mp_exch(&t, d); 969 } 970 mp_clear(&t); 971 return MP_OKAY; 972 } 973 974 /* integer signed division. 975 * c*b + d == a [e.g. a/b, c=quotient, d=remainder] 976 * HAC pp.598 Algorithm 14.20 977 * 978 * Note that the description in HAC is horribly 979 * incomplete. For example, it doesn't consider 980 * the case where digits are removed from 'x' in 981 * the inner loop. It also doesn't consider the 982 * case that y has fewer than three digits, etc.. 983 * 984 * The overall algorithm is as described as 985 * 14.20 from HAC but fixed to treat these cases. 986 */ 987 static int 988 signed_divide(mp_int *c, mp_int *d, mp_int *a, mp_int *b) 989 { 990 mp_int q, x, y, t1, t2; 991 int res, n, t, i, norm, neg; 992 993 /* is divisor zero ? */ 994 if (MP_ISZERO(b) == MP_YES) { 995 return MP_VAL; 996 } 997 998 /* if a < b then q=0, r = a */ 999 if (compare_magnitude(a, b) == MP_LT) { 1000 if (d != NULL) { 1001 res = mp_copy(a, d); 1002 } else { 1003 res = MP_OKAY; 1004 } 1005 if (c != NULL) { 1006 mp_zero(c); 1007 } 1008 return res; 1009 } 1010 1011 if ((res = mp_init_size(&q, a->used + 2)) != MP_OKAY) { 1012 return res; 1013 } 1014 q.used = a->used + 2; 1015 1016 if ((res = mp_init(&t1)) != MP_OKAY) { 1017 goto LBL_Q; 1018 } 1019 1020 if ((res = mp_init(&t2)) != MP_OKAY) { 1021 goto LBL_T1; 1022 } 1023 1024 if ((res = mp_init_copy(&x, a)) != MP_OKAY) { 1025 goto LBL_T2; 1026 } 1027 1028 if ((res = mp_init_copy(&y, b)) != MP_OKAY) { 1029 goto LBL_X; 1030 } 1031 1032 /* fix the sign */ 1033 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 1034 x.sign = y.sign = MP_ZPOS; 1035 1036 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ 1037 norm = mp_count_bits(&y) % DIGIT_BIT; 1038 if (norm < (int)(DIGIT_BIT-1)) { 1039 norm = (DIGIT_BIT-1) - norm; 1040 if ((res = lshift_bits(&x, norm, &x)) != MP_OKAY) { 1041 goto LBL_Y; 1042 } 1043 if ((res = lshift_bits(&y, norm, &y)) != MP_OKAY) { 1044 goto LBL_Y; 1045 } 1046 } else { 1047 norm = 0; 1048 } 1049 1050 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ 1051 n = x.used - 1; 1052 t = y.used - 1; 1053 1054 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ 1055 if ((res = lshift_digits(&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ 1056 goto LBL_Y; 1057 } 1058 1059 while (signed_compare(&x, &y) != MP_LT) { 1060 ++(q.dp[n - t]); 1061 if ((res = signed_subtract(&x, &y, &x)) != MP_OKAY) { 1062 goto LBL_Y; 1063 } 1064 } 1065 1066 /* reset y by shifting it back down */ 1067 rshift_digits(&y, n - t); 1068 1069 /* step 3. for i from n down to (t + 1) */ 1070 for (i = n; i >= (t + 1); i--) { 1071 if (i > x.used) { 1072 continue; 1073 } 1074 1075 /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 1076 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ 1077 if (x.dp[i] == y.dp[t]) { 1078 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); 1079 } else { 1080 mp_word tmp; 1081 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); 1082 tmp |= ((mp_word) x.dp[i - 1]); 1083 tmp /= ((mp_word) y.dp[t]); 1084 if (tmp > (mp_word) MP_MASK) { 1085 tmp = MP_MASK; 1086 } 1087 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); 1088 } 1089 1090 /* while (q{i-t-1} * (yt * b + y{t-1})) > 1091 xi * b**2 + xi-1 * b + xi-2 1092 do q{i-t-1} -= 1; 1093 */ 1094 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; 1095 do { 1096 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; 1097 1098 /* find left hand */ 1099 mp_zero(&t1); 1100 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; 1101 t1.dp[1] = y.dp[t]; 1102 t1.used = 2; 1103 if ((res = multiply_digit(&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1104 goto LBL_Y; 1105 } 1106 1107 /* find right hand */ 1108 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; 1109 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; 1110 t2.dp[2] = x.dp[i]; 1111 t2.used = 3; 1112 } while (compare_magnitude(&t1, &t2) == MP_GT); 1113 1114 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ 1115 if ((res = multiply_digit(&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1116 goto LBL_Y; 1117 } 1118 1119 if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) { 1120 goto LBL_Y; 1121 } 1122 1123 if ((res = signed_subtract(&x, &t1, &x)) != MP_OKAY) { 1124 goto LBL_Y; 1125 } 1126 1127 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ 1128 if (x.sign == MP_NEG) { 1129 if ((res = mp_copy(&y, &t1)) != MP_OKAY) { 1130 goto LBL_Y; 1131 } 1132 if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) { 1133 goto LBL_Y; 1134 } 1135 if ((res = signed_add(&x, &t1, &x)) != MP_OKAY) { 1136 goto LBL_Y; 1137 } 1138 1139 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; 1140 } 1141 } 1142 1143 /* now q is the quotient and x is the remainder 1144 * [which we have to normalize] 1145 */ 1146 1147 /* get sign before writing to c */ 1148 x.sign = x.used == 0 ? MP_ZPOS : a->sign; 1149 1150 if (c != NULL) { 1151 trim_unused_digits(&q); 1152 mp_exch(&q, c); 1153 c->sign = neg; 1154 } 1155 1156 if (d != NULL) { 1157 rshift_bits(&x, norm, &x, NULL); 1158 mp_exch(&x, d); 1159 } 1160 1161 res = MP_OKAY; 1162 1163 LBL_Y: 1164 mp_clear(&y); 1165 LBL_X: 1166 mp_clear(&x); 1167 LBL_T2: 1168 mp_clear(&t2); 1169 LBL_T1: 1170 mp_clear(&t1); 1171 LBL_Q: 1172 mp_clear(&q); 1173 return res; 1174 } 1175 1176 /* c = a mod b, 0 <= c < b */ 1177 static int 1178 modulo(mp_int * a, mp_int * b, mp_int * c) 1179 { 1180 mp_int t; 1181 int res; 1182 1183 if ((res = mp_init(&t)) != MP_OKAY) { 1184 return res; 1185 } 1186 1187 if ((res = signed_divide(NULL, &t, a, b)) != MP_OKAY) { 1188 mp_clear(&t); 1189 return res; 1190 } 1191 1192 if (t.sign != b->sign) { 1193 res = signed_add(b, &t, c); 1194 } else { 1195 res = MP_OKAY; 1196 mp_exch(&t, c); 1197 } 1198 1199 mp_clear(&t); 1200 return res; 1201 } 1202 1203 /* set to a digit */ 1204 static void 1205 set_word(mp_int * a, mp_digit b) 1206 { 1207 mp_zero(a); 1208 a->dp[0] = b & MP_MASK; 1209 a->used = (a->dp[0] != 0) ? 1 : 0; 1210 } 1211 1212 /* b = a/2 */ 1213 static int 1214 half(mp_int * a, mp_int * b) 1215 { 1216 int x, res, oldused; 1217 1218 /* copy */ 1219 if (b->alloc < a->used) { 1220 if ((res = mp_grow(b, a->used)) != MP_OKAY) { 1221 return res; 1222 } 1223 } 1224 1225 oldused = b->used; 1226 b->used = a->used; 1227 { 1228 mp_digit r, rr, *tmpa, *tmpb; 1229 1230 /* source alias */ 1231 tmpa = a->dp + b->used - 1; 1232 1233 /* dest alias */ 1234 tmpb = b->dp + b->used - 1; 1235 1236 /* carry */ 1237 r = 0; 1238 for (x = b->used - 1; x >= 0; x--) { 1239 /* get the carry for the next iteration */ 1240 rr = *tmpa & 1; 1241 1242 /* shift the current digit, add in carry and store */ 1243 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); 1244 1245 /* forward carry to next iteration */ 1246 r = rr; 1247 } 1248 1249 /* zero excess digits */ 1250 tmpb = b->dp + b->used; 1251 for (x = b->used; x < oldused; x++) { 1252 *tmpb++ = 0; 1253 } 1254 } 1255 b->sign = a->sign; 1256 trim_unused_digits(b); 1257 return MP_OKAY; 1258 } 1259 1260 /* compare a digit */ 1261 static int 1262 compare_digit(mp_int * a, mp_digit b) 1263 { 1264 /* compare based on sign */ 1265 if (a->sign == MP_NEG) { 1266 return MP_LT; 1267 } 1268 1269 /* compare based on magnitude */ 1270 if (a->used > 1) { 1271 return MP_GT; 1272 } 1273 1274 /* compare the only digit of a to b */ 1275 if (a->dp[0] > b) { 1276 return MP_GT; 1277 } else if (a->dp[0] < b) { 1278 return MP_LT; 1279 } else { 1280 return MP_EQ; 1281 } 1282 } 1283 1284 static void 1285 mp_clear_multi(mp_int *mp, ...) 1286 { 1287 mp_int* next_mp = mp; 1288 va_list args; 1289 1290 va_start(args, mp); 1291 while (next_mp != NULL) { 1292 mp_clear(next_mp); 1293 next_mp = va_arg(args, mp_int*); 1294 } 1295 va_end(args); 1296 } 1297 1298 /* computes the modular inverse via binary extended euclidean algorithm, 1299 * that is c = 1/a mod b 1300 * 1301 * Based on slow invmod except this is optimized for the case where b is 1302 * odd as per HAC Note 14.64 on pp. 610 1303 */ 1304 static int 1305 fast_modular_inverse(mp_int * a, mp_int * b, mp_int * c) 1306 { 1307 mp_int x, y, u, v, B, D; 1308 int res, neg; 1309 1310 /* 2. [modified] b must be odd */ 1311 if (MP_ISZERO(b) == MP_YES) { 1312 return MP_VAL; 1313 } 1314 1315 /* init all our temps */ 1316 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) { 1317 return res; 1318 } 1319 1320 /* x == modulus, y == value to invert */ 1321 if ((res = mp_copy(b, &x)) != MP_OKAY) { 1322 goto LBL_ERR; 1323 } 1324 1325 /* we need y = |a| */ 1326 if ((res = modulo(a, b, &y)) != MP_OKAY) { 1327 goto LBL_ERR; 1328 } 1329 1330 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1331 if ((res = mp_copy(&x, &u)) != MP_OKAY) { 1332 goto LBL_ERR; 1333 } 1334 if ((res = mp_copy(&y, &v)) != MP_OKAY) { 1335 goto LBL_ERR; 1336 } 1337 set_word(&D, 1); 1338 1339 top: 1340 /* 4. while u is even do */ 1341 while (PGPV_BN_is_even(&u) == 1) { 1342 /* 4.1 u = u/2 */ 1343 if ((res = half(&u, &u)) != MP_OKAY) { 1344 goto LBL_ERR; 1345 } 1346 /* 4.2 if B is odd then */ 1347 if (PGPV_BN_is_odd(&B) == 1) { 1348 if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) { 1349 goto LBL_ERR; 1350 } 1351 } 1352 /* B = B/2 */ 1353 if ((res = half(&B, &B)) != MP_OKAY) { 1354 goto LBL_ERR; 1355 } 1356 } 1357 1358 /* 5. while v is even do */ 1359 while (PGPV_BN_is_even(&v) == 1) { 1360 /* 5.1 v = v/2 */ 1361 if ((res = half(&v, &v)) != MP_OKAY) { 1362 goto LBL_ERR; 1363 } 1364 /* 5.2 if D is odd then */ 1365 if (PGPV_BN_is_odd(&D) == 1) { 1366 /* D = (D-x)/2 */ 1367 if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) { 1368 goto LBL_ERR; 1369 } 1370 } 1371 /* D = D/2 */ 1372 if ((res = half(&D, &D)) != MP_OKAY) { 1373 goto LBL_ERR; 1374 } 1375 } 1376 1377 /* 6. if u >= v then */ 1378 if (signed_compare(&u, &v) != MP_LT) { 1379 /* u = u - v, B = B - D */ 1380 if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) { 1381 goto LBL_ERR; 1382 } 1383 1384 if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) { 1385 goto LBL_ERR; 1386 } 1387 } else { 1388 /* v - v - u, D = D - B */ 1389 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 1390 goto LBL_ERR; 1391 } 1392 1393 if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) { 1394 goto LBL_ERR; 1395 } 1396 } 1397 1398 /* if not zero goto step 4 */ 1399 if (MP_ISZERO(&u) == MP_NO) { 1400 goto top; 1401 } 1402 1403 /* now a = C, b = D, gcd == g*v */ 1404 1405 /* if v != 1 then there is no inverse */ 1406 if (compare_digit(&v, 1) != MP_EQ) { 1407 res = MP_VAL; 1408 goto LBL_ERR; 1409 } 1410 1411 /* b is now the inverse */ 1412 neg = a->sign; 1413 while (D.sign == MP_NEG) { 1414 if ((res = signed_add(&D, b, &D)) != MP_OKAY) { 1415 goto LBL_ERR; 1416 } 1417 } 1418 mp_exch(&D, c); 1419 c->sign = neg; 1420 res = MP_OKAY; 1421 1422 LBL_ERR: 1423 mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); 1424 return res; 1425 } 1426 1427 /* hac 14.61, pp608 */ 1428 static int 1429 slow_modular_inverse(mp_int * a, mp_int * b, mp_int * c) 1430 { 1431 mp_int x, y, u, v, A, B, C, D; 1432 int res; 1433 1434 /* b cannot be negative */ 1435 if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) { 1436 return MP_VAL; 1437 } 1438 1439 /* init temps */ 1440 if ((res = mp_init_multi(&x, &y, &u, &v, 1441 &A, &B, &C, &D, NULL)) != MP_OKAY) { 1442 return res; 1443 } 1444 1445 /* x = a, y = b */ 1446 if ((res = modulo(a, b, &x)) != MP_OKAY) { 1447 goto LBL_ERR; 1448 } 1449 if ((res = mp_copy(b, &y)) != MP_OKAY) { 1450 goto LBL_ERR; 1451 } 1452 1453 /* 2. [modified] if x,y are both even then return an error! */ 1454 if (PGPV_BN_is_even(&x) == 1 && PGPV_BN_is_even(&y) == 1) { 1455 res = MP_VAL; 1456 goto LBL_ERR; 1457 } 1458 1459 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1460 if ((res = mp_copy(&x, &u)) != MP_OKAY) { 1461 goto LBL_ERR; 1462 } 1463 if ((res = mp_copy(&y, &v)) != MP_OKAY) { 1464 goto LBL_ERR; 1465 } 1466 set_word(&A, 1); 1467 set_word(&D, 1); 1468 1469 top: 1470 /* 4. while u is even do */ 1471 while (PGPV_BN_is_even(&u) == 1) { 1472 /* 4.1 u = u/2 */ 1473 if ((res = half(&u, &u)) != MP_OKAY) { 1474 goto LBL_ERR; 1475 } 1476 /* 4.2 if A or B is odd then */ 1477 if (PGPV_BN_is_odd(&A) == 1 || PGPV_BN_is_odd(&B) == 1) { 1478 /* A = (A+y)/2, B = (B-x)/2 */ 1479 if ((res = signed_add(&A, &y, &A)) != MP_OKAY) { 1480 goto LBL_ERR; 1481 } 1482 if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) { 1483 goto LBL_ERR; 1484 } 1485 } 1486 /* A = A/2, B = B/2 */ 1487 if ((res = half(&A, &A)) != MP_OKAY) { 1488 goto LBL_ERR; 1489 } 1490 if ((res = half(&B, &B)) != MP_OKAY) { 1491 goto LBL_ERR; 1492 } 1493 } 1494 1495 /* 5. while v is even do */ 1496 while (PGPV_BN_is_even(&v) == 1) { 1497 /* 5.1 v = v/2 */ 1498 if ((res = half(&v, &v)) != MP_OKAY) { 1499 goto LBL_ERR; 1500 } 1501 /* 5.2 if C or D is odd then */ 1502 if (PGPV_BN_is_odd(&C) == 1 || PGPV_BN_is_odd(&D) == 1) { 1503 /* C = (C+y)/2, D = (D-x)/2 */ 1504 if ((res = signed_add(&C, &y, &C)) != MP_OKAY) { 1505 goto LBL_ERR; 1506 } 1507 if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) { 1508 goto LBL_ERR; 1509 } 1510 } 1511 /* C = C/2, D = D/2 */ 1512 if ((res = half(&C, &C)) != MP_OKAY) { 1513 goto LBL_ERR; 1514 } 1515 if ((res = half(&D, &D)) != MP_OKAY) { 1516 goto LBL_ERR; 1517 } 1518 } 1519 1520 /* 6. if u >= v then */ 1521 if (signed_compare(&u, &v) != MP_LT) { 1522 /* u = u - v, A = A - C, B = B - D */ 1523 if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) { 1524 goto LBL_ERR; 1525 } 1526 1527 if ((res = signed_subtract(&A, &C, &A)) != MP_OKAY) { 1528 goto LBL_ERR; 1529 } 1530 1531 if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) { 1532 goto LBL_ERR; 1533 } 1534 } else { 1535 /* v - v - u, C = C - A, D = D - B */ 1536 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 1537 goto LBL_ERR; 1538 } 1539 1540 if ((res = signed_subtract(&C, &A, &C)) != MP_OKAY) { 1541 goto LBL_ERR; 1542 } 1543 1544 if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) { 1545 goto LBL_ERR; 1546 } 1547 } 1548 1549 /* if not zero goto step 4 */ 1550 if (PGPV_BN_is_zero(&u) == 0) { 1551 goto top; 1552 } 1553 /* now a = C, b = D, gcd == g*v */ 1554 1555 /* if v != 1 then there is no inverse */ 1556 if (compare_digit(&v, 1) != MP_EQ) { 1557 res = MP_VAL; 1558 goto LBL_ERR; 1559 } 1560 1561 /* if its too low */ 1562 while (compare_digit(&C, 0) == MP_LT) { 1563 if ((res = signed_add(&C, b, &C)) != MP_OKAY) { 1564 goto LBL_ERR; 1565 } 1566 } 1567 1568 /* too big */ 1569 while (compare_magnitude(&C, b) != MP_LT) { 1570 if ((res = signed_subtract(&C, b, &C)) != MP_OKAY) { 1571 goto LBL_ERR; 1572 } 1573 } 1574 1575 /* C is now the inverse */ 1576 mp_exch(&C, c); 1577 res = MP_OKAY; 1578 LBL_ERR: 1579 mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL); 1580 return res; 1581 } 1582 1583 static int 1584 modular_inverse(mp_int *c, mp_int *a, mp_int *b) 1585 { 1586 /* b cannot be negative */ 1587 if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) { 1588 return MP_VAL; 1589 } 1590 1591 /* if the modulus is odd we can use a faster routine instead */ 1592 if (PGPV_BN_is_odd(b) == 1) { 1593 return fast_modular_inverse(a, b, c); 1594 } 1595 return slow_modular_inverse(a, b, c); 1596 } 1597 1598 /* b = |a| 1599 * 1600 * Simple function copies the input and fixes the sign to positive 1601 */ 1602 static int 1603 absolute(mp_int * a, mp_int * b) 1604 { 1605 int res; 1606 1607 /* copy a to b */ 1608 if (a != b) { 1609 if ((res = mp_copy(a, b)) != MP_OKAY) { 1610 return res; 1611 } 1612 } 1613 1614 /* force the sign of b to positive */ 1615 b->sign = MP_ZPOS; 1616 1617 return MP_OKAY; 1618 } 1619 1620 /* determines if reduce_2k_l can be used */ 1621 static int 1622 mp_reduce_is_2k_l(mp_int *a) 1623 { 1624 int ix, iy; 1625 1626 if (a->used == 0) { 1627 return MP_NO; 1628 } else if (a->used == 1) { 1629 return MP_YES; 1630 } else if (a->used > 1) { 1631 /* if more than half of the digits are -1 we're sold */ 1632 for (iy = ix = 0; ix < a->used; ix++) { 1633 if (a->dp[ix] == MP_MASK) { 1634 ++iy; 1635 } 1636 } 1637 return (iy >= (a->used/2)) ? MP_YES : MP_NO; 1638 1639 } 1640 return MP_NO; 1641 } 1642 1643 /* computes a = 2**b 1644 * 1645 * Simple algorithm which zeroes the int, grows it then just sets one bit 1646 * as required. 1647 */ 1648 static int 1649 mp_2expt(mp_int * a, int b) 1650 { 1651 int res; 1652 1653 /* zero a as per default */ 1654 mp_zero(a); 1655 1656 /* grow a to accomodate the single bit */ 1657 if ((res = mp_grow(a, b / DIGIT_BIT + 1)) != MP_OKAY) { 1658 return res; 1659 } 1660 1661 /* set the used count of where the bit will go */ 1662 a->used = b / DIGIT_BIT + 1; 1663 1664 /* put the single bit in its place */ 1665 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); 1666 1667 return MP_OKAY; 1668 } 1669 1670 /* pre-calculate the value required for Barrett reduction 1671 * For a given modulus "b" it calulates the value required in "a" 1672 */ 1673 static int 1674 mp_reduce_setup(mp_int * a, mp_int * b) 1675 { 1676 int res; 1677 1678 if ((res = mp_2expt(a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { 1679 return res; 1680 } 1681 return signed_divide(a, NULL, a, b); 1682 } 1683 1684 /* b = a*2 */ 1685 static int 1686 doubled(mp_int * a, mp_int * b) 1687 { 1688 int x, res, oldused; 1689 1690 /* grow to accomodate result */ 1691 if (b->alloc < a->used + 1) { 1692 if ((res = mp_grow(b, a->used + 1)) != MP_OKAY) { 1693 return res; 1694 } 1695 } 1696 1697 oldused = b->used; 1698 b->used = a->used; 1699 1700 { 1701 mp_digit r, rr, *tmpa, *tmpb; 1702 1703 /* alias for source */ 1704 tmpa = a->dp; 1705 1706 /* alias for dest */ 1707 tmpb = b->dp; 1708 1709 /* carry */ 1710 r = 0; 1711 for (x = 0; x < a->used; x++) { 1712 1713 /* get what will be the *next* carry bit from the 1714 * MSB of the current digit 1715 */ 1716 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); 1717 1718 /* now shift up this digit, add in the carry [from the previous] */ 1719 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; 1720 1721 /* copy the carry that would be from the source 1722 * digit into the next iteration 1723 */ 1724 r = rr; 1725 } 1726 1727 /* new leading digit? */ 1728 if (r != 0) { 1729 /* add a MSB which is always 1 at this point */ 1730 *tmpb = 1; 1731 ++(b->used); 1732 } 1733 1734 /* now zero any excess digits on the destination 1735 * that we didn't write to 1736 */ 1737 tmpb = b->dp + b->used; 1738 for (x = b->used; x < oldused; x++) { 1739 *tmpb++ = 0; 1740 } 1741 } 1742 b->sign = a->sign; 1743 return MP_OKAY; 1744 } 1745 1746 /* divide by three (based on routine from MPI and the GMP manual) */ 1747 static int 1748 third(mp_int * a, mp_int *c, mp_digit * d) 1749 { 1750 mp_int q; 1751 mp_word w, t; 1752 mp_digit b; 1753 int res, ix; 1754 1755 /* b = 2**DIGIT_BIT / 3 */ 1756 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3); 1757 1758 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 1759 return res; 1760 } 1761 1762 q.used = a->used; 1763 q.sign = a->sign; 1764 w = 0; 1765 for (ix = a->used - 1; ix >= 0; ix--) { 1766 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 1767 1768 if (w >= 3) { 1769 /* multiply w by [1/3] */ 1770 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT); 1771 1772 /* now subtract 3 * [w/3] from w, to get the remainder */ 1773 w -= t+t+t; 1774 1775 /* fixup the remainder as required since 1776 * the optimization is not exact. 1777 */ 1778 while (w >= 3) { 1779 t += 1; 1780 w -= 3; 1781 } 1782 } else { 1783 t = 0; 1784 } 1785 q.dp[ix] = (mp_digit)t; 1786 } 1787 1788 /* [optional] store the remainder */ 1789 if (d != NULL) { 1790 *d = (mp_digit)w; 1791 } 1792 1793 /* [optional] store the quotient */ 1794 if (c != NULL) { 1795 trim_unused_digits(&q); 1796 mp_exch(&q, c); 1797 } 1798 mp_clear(&q); 1799 1800 return res; 1801 } 1802 1803 /* multiplication using the Toom-Cook 3-way algorithm 1804 * 1805 * Much more complicated than Karatsuba but has a lower 1806 * asymptotic running time of O(N**1.464). This algorithm is 1807 * only particularly useful on VERY large inputs 1808 * (we're talking 1000s of digits here...). 1809 */ 1810 static int 1811 toom_cook_multiply(mp_int *a, mp_int *b, mp_int *c) 1812 { 1813 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; 1814 int res, B; 1815 1816 /* init temps */ 1817 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 1818 &a0, &a1, &a2, &b0, &b1, 1819 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { 1820 return res; 1821 } 1822 1823 /* B */ 1824 B = MIN(a->used, b->used) / 3; 1825 1826 /* a = a2 * B**2 + a1 * B + a0 */ 1827 if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 1828 goto ERR; 1829 } 1830 1831 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 1832 goto ERR; 1833 } 1834 rshift_digits(&a1, B); 1835 modulo_2_to_power(&a1, DIGIT_BIT * B, &a1); 1836 1837 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 1838 goto ERR; 1839 } 1840 rshift_digits(&a2, B*2); 1841 1842 /* b = b2 * B**2 + b1 * B + b0 */ 1843 if ((res = modulo_2_to_power(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { 1844 goto ERR; 1845 } 1846 1847 if ((res = mp_copy(b, &b1)) != MP_OKAY) { 1848 goto ERR; 1849 } 1850 rshift_digits(&b1, B); 1851 modulo_2_to_power(&b1, DIGIT_BIT * B, &b1); 1852 1853 if ((res = mp_copy(b, &b2)) != MP_OKAY) { 1854 goto ERR; 1855 } 1856 rshift_digits(&b2, B*2); 1857 1858 /* w0 = a0*b0 */ 1859 if ((res = signed_multiply(&a0, &b0, &w0)) != MP_OKAY) { 1860 goto ERR; 1861 } 1862 1863 /* w4 = a2 * b2 */ 1864 if ((res = signed_multiply(&a2, &b2, &w4)) != MP_OKAY) { 1865 goto ERR; 1866 } 1867 1868 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ 1869 if ((res = doubled(&a0, &tmp1)) != MP_OKAY) { 1870 goto ERR; 1871 } 1872 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1873 goto ERR; 1874 } 1875 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 1876 goto ERR; 1877 } 1878 if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 1879 goto ERR; 1880 } 1881 1882 if ((res = doubled(&b0, &tmp2)) != MP_OKAY) { 1883 goto ERR; 1884 } 1885 if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1886 goto ERR; 1887 } 1888 if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) { 1889 goto ERR; 1890 } 1891 if ((res = signed_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { 1892 goto ERR; 1893 } 1894 1895 if ((res = signed_multiply(&tmp1, &tmp2, &w1)) != MP_OKAY) { 1896 goto ERR; 1897 } 1898 1899 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ 1900 if ((res = doubled(&a2, &tmp1)) != MP_OKAY) { 1901 goto ERR; 1902 } 1903 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1904 goto ERR; 1905 } 1906 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 1907 goto ERR; 1908 } 1909 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1910 goto ERR; 1911 } 1912 1913 if ((res = doubled(&b2, &tmp2)) != MP_OKAY) { 1914 goto ERR; 1915 } 1916 if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1917 goto ERR; 1918 } 1919 if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) { 1920 goto ERR; 1921 } 1922 if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1923 goto ERR; 1924 } 1925 1926 if ((res = signed_multiply(&tmp1, &tmp2, &w3)) != MP_OKAY) { 1927 goto ERR; 1928 } 1929 1930 1931 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ 1932 if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) { 1933 goto ERR; 1934 } 1935 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1936 goto ERR; 1937 } 1938 if ((res = signed_add(&b2, &b1, &tmp2)) != MP_OKAY) { 1939 goto ERR; 1940 } 1941 if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1942 goto ERR; 1943 } 1944 if ((res = signed_multiply(&tmp1, &tmp2, &w2)) != MP_OKAY) { 1945 goto ERR; 1946 } 1947 1948 /* now solve the matrix 1949 1950 0 0 0 0 1 1951 1 2 4 8 16 1952 1 1 1 1 1 1953 16 8 4 2 1 1954 1 0 0 0 0 1955 1956 using 12 subtractions, 4 shifts, 1957 2 small divisions and 1 small multiplication 1958 */ 1959 1960 /* r1 - r4 */ 1961 if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) { 1962 goto ERR; 1963 } 1964 /* r3 - r0 */ 1965 if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) { 1966 goto ERR; 1967 } 1968 /* r1/2 */ 1969 if ((res = half(&w1, &w1)) != MP_OKAY) { 1970 goto ERR; 1971 } 1972 /* r3/2 */ 1973 if ((res = half(&w3, &w3)) != MP_OKAY) { 1974 goto ERR; 1975 } 1976 /* r2 - r0 - r4 */ 1977 if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) { 1978 goto ERR; 1979 } 1980 if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) { 1981 goto ERR; 1982 } 1983 /* r1 - r2 */ 1984 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 1985 goto ERR; 1986 } 1987 /* r3 - r2 */ 1988 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 1989 goto ERR; 1990 } 1991 /* r1 - 8r0 */ 1992 if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) { 1993 goto ERR; 1994 } 1995 if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) { 1996 goto ERR; 1997 } 1998 /* r3 - 8r4 */ 1999 if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) { 2000 goto ERR; 2001 } 2002 if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) { 2003 goto ERR; 2004 } 2005 /* 3r2 - r1 - r3 */ 2006 if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) { 2007 goto ERR; 2008 } 2009 if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) { 2010 goto ERR; 2011 } 2012 if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) { 2013 goto ERR; 2014 } 2015 /* r1 - r2 */ 2016 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2017 goto ERR; 2018 } 2019 /* r3 - r2 */ 2020 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2021 goto ERR; 2022 } 2023 /* r1/3 */ 2024 if ((res = third(&w1, &w1, NULL)) != MP_OKAY) { 2025 goto ERR; 2026 } 2027 /* r3/3 */ 2028 if ((res = third(&w3, &w3, NULL)) != MP_OKAY) { 2029 goto ERR; 2030 } 2031 2032 /* at this point shift W[n] by B*n */ 2033 if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) { 2034 goto ERR; 2035 } 2036 if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) { 2037 goto ERR; 2038 } 2039 if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) { 2040 goto ERR; 2041 } 2042 if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) { 2043 goto ERR; 2044 } 2045 2046 if ((res = signed_add(&w0, &w1, c)) != MP_OKAY) { 2047 goto ERR; 2048 } 2049 if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2050 goto ERR; 2051 } 2052 if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2053 goto ERR; 2054 } 2055 if ((res = signed_add(&tmp1, c, c)) != MP_OKAY) { 2056 goto ERR; 2057 } 2058 2059 ERR: 2060 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 2061 &a0, &a1, &a2, &b0, &b1, 2062 &b2, &tmp1, &tmp2, NULL); 2063 return res; 2064 } 2065 2066 #define TOOM_MUL_CUTOFF 350 2067 #define KARATSUBA_MUL_CUTOFF 80 2068 2069 /* c = |a| * |b| using Karatsuba Multiplication using 2070 * three half size multiplications 2071 * 2072 * Let B represent the radix [e.g. 2**DIGIT_BIT] and 2073 * let n represent half of the number of digits in 2074 * the min(a,b) 2075 * 2076 * a = a1 * B**n + a0 2077 * b = b1 * B**n + b0 2078 * 2079 * Then, a * b => 2080 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 2081 * 2082 * Note that a1b1 and a0b0 are used twice and only need to be 2083 * computed once. So in total three half size (half # of 2084 * digit) multiplications are performed, a0b0, a1b1 and 2085 * (a1+b1)(a0+b0) 2086 * 2087 * Note that a multiplication of half the digits requires 2088 * 1/4th the number of single precision multiplications so in 2089 * total after one call 25% of the single precision multiplications 2090 * are saved. Note also that the call to signed_multiply can end up back 2091 * in this function if the a0, a1, b0, or b1 are above the threshold. 2092 * This is known as divide-and-conquer and leads to the famous 2093 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 2094 * the standard O(N**2) that the baseline/comba methods use. 2095 * Generally though the overhead of this method doesn't pay off 2096 * until a certain size (N ~ 80) is reached. 2097 */ 2098 static int 2099 karatsuba_multiply(mp_int * a, mp_int * b, mp_int * c) 2100 { 2101 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; 2102 int B; 2103 int err; 2104 2105 /* default the return code to an error */ 2106 err = MP_MEM; 2107 2108 /* min # of digits */ 2109 B = MIN(a->used, b->used); 2110 2111 /* now divide in two */ 2112 B = (int)((unsigned)B >> 1); 2113 2114 /* init copy all the temps */ 2115 if (mp_init_size(&x0, B) != MP_OKAY) { 2116 goto ERR; 2117 } 2118 if (mp_init_size(&x1, a->used - B) != MP_OKAY) { 2119 goto X0; 2120 } 2121 if (mp_init_size(&y0, B) != MP_OKAY) { 2122 goto X1; 2123 } 2124 if (mp_init_size(&y1, b->used - B) != MP_OKAY) { 2125 goto Y0; 2126 } 2127 /* init temps */ 2128 if (mp_init_size(&t1, B * 2) != MP_OKAY) { 2129 goto Y1; 2130 } 2131 if (mp_init_size(&x0y0, B * 2) != MP_OKAY) { 2132 goto T1; 2133 } 2134 if (mp_init_size(&x1y1, B * 2) != MP_OKAY) { 2135 goto X0Y0; 2136 } 2137 /* now shift the digits */ 2138 x0.used = y0.used = B; 2139 x1.used = a->used - B; 2140 y1.used = b->used - B; 2141 2142 { 2143 int x; 2144 mp_digit *tmpa, *tmpb, *tmpx, *tmpy; 2145 2146 /* we copy the digits directly instead of using higher level functions 2147 * since we also need to shift the digits 2148 */ 2149 tmpa = a->dp; 2150 tmpb = b->dp; 2151 2152 tmpx = x0.dp; 2153 tmpy = y0.dp; 2154 for (x = 0; x < B; x++) { 2155 *tmpx++ = *tmpa++; 2156 *tmpy++ = *tmpb++; 2157 } 2158 2159 tmpx = x1.dp; 2160 for (x = B; x < a->used; x++) { 2161 *tmpx++ = *tmpa++; 2162 } 2163 2164 tmpy = y1.dp; 2165 for (x = B; x < b->used; x++) { 2166 *tmpy++ = *tmpb++; 2167 } 2168 } 2169 2170 /* only need to clamp the lower words since by definition the 2171 * upper words x1/y1 must have a known number of digits 2172 */ 2173 trim_unused_digits(&x0); 2174 trim_unused_digits(&y0); 2175 2176 /* now calc the products x0y0 and x1y1 */ 2177 /* after this x0 is no longer required, free temp [x0==t2]! */ 2178 if (signed_multiply(&x0, &y0, &x0y0) != MP_OKAY) { 2179 goto X1Y1; /* x0y0 = x0*y0 */ 2180 } 2181 if (signed_multiply(&x1, &y1, &x1y1) != MP_OKAY) { 2182 goto X1Y1; /* x1y1 = x1*y1 */ 2183 } 2184 /* now calc x1+x0 and y1+y0 */ 2185 if (basic_add(&x1, &x0, &t1) != MP_OKAY) { 2186 goto X1Y1; /* t1 = x1 - x0 */ 2187 } 2188 if (basic_add(&y1, &y0, &x0) != MP_OKAY) { 2189 goto X1Y1; /* t2 = y1 - y0 */ 2190 } 2191 if (signed_multiply(&t1, &x0, &t1) != MP_OKAY) { 2192 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ 2193 } 2194 /* add x0y0 */ 2195 if (signed_add(&x0y0, &x1y1, &x0) != MP_OKAY) { 2196 goto X1Y1; /* t2 = x0y0 + x1y1 */ 2197 } 2198 if (basic_subtract(&t1, &x0, &t1) != MP_OKAY) { 2199 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ 2200 } 2201 /* shift by B */ 2202 if (lshift_digits(&t1, B) != MP_OKAY) { 2203 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ 2204 } 2205 if (lshift_digits(&x1y1, B * 2) != MP_OKAY) { 2206 goto X1Y1; /* x1y1 = x1y1 << 2*B */ 2207 } 2208 if (signed_add(&x0y0, &t1, &t1) != MP_OKAY) { 2209 goto X1Y1; /* t1 = x0y0 + t1 */ 2210 } 2211 if (signed_add(&t1, &x1y1, c) != MP_OKAY) { 2212 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ 2213 } 2214 /* Algorithm succeeded set the return code to MP_OKAY */ 2215 err = MP_OKAY; 2216 2217 X1Y1: 2218 mp_clear(&x1y1); 2219 X0Y0: 2220 mp_clear(&x0y0); 2221 T1: 2222 mp_clear(&t1); 2223 Y1: 2224 mp_clear(&y1); 2225 Y0: 2226 mp_clear(&y0); 2227 X1: 2228 mp_clear(&x1); 2229 X0: 2230 mp_clear(&x0); 2231 ERR: 2232 return err; 2233 } 2234 2235 /* Fast (comba) multiplier 2236 * 2237 * This is the fast column-array [comba] multiplier. It is 2238 * designed to compute the columns of the product first 2239 * then handle the carries afterwards. This has the effect 2240 * of making the nested loops that compute the columns very 2241 * simple and schedulable on super-scalar processors. 2242 * 2243 * This has been modified to produce a variable number of 2244 * digits of output so if say only a half-product is required 2245 * you don't have to compute the upper half (a feature 2246 * required for fast Barrett reduction). 2247 * 2248 * Based on Algorithm 14.12 on pp.595 of HAC. 2249 * 2250 */ 2251 static int 2252 fast_col_array_multiply(mp_int * a, mp_int * b, mp_int * c, int digs) 2253 { 2254 int olduse, res, pa, ix, iz; 2255 /*LINTED*/ 2256 mp_digit W[MP_WARRAY]; 2257 mp_word _W; 2258 2259 /* grow the destination as required */ 2260 if (c->alloc < digs) { 2261 if ((res = mp_grow(c, digs)) != MP_OKAY) { 2262 return res; 2263 } 2264 } 2265 2266 /* number of output digits to produce */ 2267 pa = MIN(digs, a->used + b->used); 2268 2269 /* clear the carry */ 2270 _W = 0; 2271 for (ix = 0; ix < pa; ix++) { 2272 int tx, ty; 2273 int iy; 2274 mp_digit *tmpx, *tmpy; 2275 2276 /* get offsets into the two bignums */ 2277 ty = MIN(b->used-1, ix); 2278 tx = ix - ty; 2279 2280 /* setup temp aliases */ 2281 tmpx = a->dp + tx; 2282 tmpy = b->dp + ty; 2283 2284 /* this is the number of times the loop will iterrate, essentially 2285 while (tx++ < a->used && ty-- >= 0) { ... } 2286 */ 2287 iy = MIN(a->used-tx, ty+1); 2288 2289 /* execute loop */ 2290 for (iz = 0; iz < iy; ++iz) { 2291 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2292 2293 } 2294 2295 /* store term */ 2296 W[ix] = ((mp_digit)_W) & MP_MASK; 2297 2298 /* make next carry */ 2299 _W = _W >> ((mp_word)DIGIT_BIT); 2300 } 2301 2302 /* setup dest */ 2303 olduse = c->used; 2304 c->used = pa; 2305 2306 { 2307 mp_digit *tmpc; 2308 tmpc = c->dp; 2309 for (ix = 0; ix < pa+1; ix++) { 2310 /* now extract the previous digit [below the carry] */ 2311 *tmpc++ = (ix < pa) ? W[ix] : 0; 2312 } 2313 2314 /* clear unused digits [that existed in the old copy of c] */ 2315 for (; ix < olduse; ix++) { 2316 *tmpc++ = 0; 2317 } 2318 } 2319 trim_unused_digits(c); 2320 return MP_OKAY; 2321 } 2322 2323 /* return 1 if we can use fast column array multiply */ 2324 /* 2325 * The fast multiplier can be used if the output will 2326 * have less than MP_WARRAY digits and the number of 2327 * digits won't affect carry propagation 2328 */ 2329 static inline int 2330 can_use_fast_column_array(int ndigits, int used) 2331 { 2332 return (((unsigned)ndigits < MP_WARRAY) && 2333 used < (1 << (unsigned)((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT)))); 2334 } 2335 2336 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */ 2337 /* Revision: 1.2 $ */ 2338 /* Date: 2011/03/18 16:22:09 $ */ 2339 2340 2341 /* multiplies |a| * |b| and only computes upto digs digits of result 2342 * HAC pp. 595, Algorithm 14.12 Modified so you can control how 2343 * many digits of output are created. 2344 */ 2345 static int 2346 basic_multiply_partial_lower(mp_int * a, mp_int * b, mp_int * c, int digs) 2347 { 2348 mp_int t; 2349 int res, pa, pb, ix, iy; 2350 mp_digit u; 2351 mp_word r; 2352 mp_digit tmpx, *tmpt, *tmpy; 2353 2354 /* can we use the fast multiplier? */ 2355 if (can_use_fast_column_array(digs, MIN(a->used, b->used))) { 2356 return fast_col_array_multiply(a, b, c, digs); 2357 } 2358 2359 if ((res = mp_init_size(&t, digs)) != MP_OKAY) { 2360 return res; 2361 } 2362 t.used = digs; 2363 2364 /* compute the digits of the product directly */ 2365 pa = a->used; 2366 for (ix = 0; ix < pa; ix++) { 2367 /* set the carry to zero */ 2368 u = 0; 2369 2370 /* limit ourselves to making digs digits of output */ 2371 pb = MIN(b->used, digs - ix); 2372 2373 /* setup some aliases */ 2374 /* copy of the digit from a used within the nested loop */ 2375 tmpx = a->dp[ix]; 2376 2377 /* an alias for the destination shifted ix places */ 2378 tmpt = t.dp + ix; 2379 2380 /* an alias for the digits of b */ 2381 tmpy = b->dp; 2382 2383 /* compute the columns of the output and propagate the carry */ 2384 for (iy = 0; iy < pb; iy++) { 2385 /* compute the column as a mp_word */ 2386 r = ((mp_word)*tmpt) + 2387 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2388 ((mp_word) u); 2389 2390 /* the new column is the lower part of the result */ 2391 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2392 2393 /* get the carry word from the result */ 2394 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2395 } 2396 /* set carry if it is placed below digs */ 2397 if (ix + iy < digs) { 2398 *tmpt = u; 2399 } 2400 } 2401 2402 trim_unused_digits(&t); 2403 mp_exch(&t, c); 2404 2405 mp_clear(&t); 2406 return MP_OKAY; 2407 } 2408 2409 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */ 2410 /* Revision: 1.3 $ */ 2411 /* Date: 2011/03/18 16:43:04 $ */ 2412 2413 /* high level multiplication (handles sign) */ 2414 static int 2415 signed_multiply(mp_int * a, mp_int * b, mp_int * c) 2416 { 2417 int res, neg; 2418 2419 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 2420 /* use Toom-Cook? */ 2421 if (MIN(a->used, b->used) >= TOOM_MUL_CUTOFF) { 2422 res = toom_cook_multiply(a, b, c); 2423 } else if (MIN(a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { 2424 /* use Karatsuba? */ 2425 res = karatsuba_multiply(a, b, c); 2426 } else { 2427 /* can we use the fast multiplier? */ 2428 int digs = a->used + b->used + 1; 2429 2430 if (can_use_fast_column_array(digs, MIN(a->used, b->used))) { 2431 res = fast_col_array_multiply(a, b, c, digs); 2432 } else { 2433 res = basic_multiply_partial_lower(a, b, c, (a)->used + (b)->used + 1); 2434 } 2435 } 2436 c->sign = (c->used > 0) ? neg : MP_ZPOS; 2437 return res; 2438 } 2439 2440 /* this is a modified version of fast_s_mul_digs that only produces 2441 * output digits *above* digs. See the comments for fast_s_mul_digs 2442 * to see how it works. 2443 * 2444 * This is used in the Barrett reduction since for one of the multiplications 2445 * only the higher digits were needed. This essentially halves the work. 2446 * 2447 * Based on Algorithm 14.12 on pp.595 of HAC. 2448 */ 2449 static int 2450 fast_basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs) 2451 { 2452 int olduse, res, pa, ix, iz; 2453 mp_digit W[MP_WARRAY]; 2454 mp_word _W; 2455 2456 /* grow the destination as required */ 2457 pa = a->used + b->used; 2458 if (c->alloc < pa) { 2459 if ((res = mp_grow(c, pa)) != MP_OKAY) { 2460 return res; 2461 } 2462 } 2463 2464 /* number of output digits to produce */ 2465 pa = a->used + b->used; 2466 _W = 0; 2467 for (ix = digs; ix < pa; ix++) { 2468 int tx, ty, iy; 2469 mp_digit *tmpx, *tmpy; 2470 2471 /* get offsets into the two bignums */ 2472 ty = MIN(b->used-1, ix); 2473 tx = ix - ty; 2474 2475 /* setup temp aliases */ 2476 tmpx = a->dp + tx; 2477 tmpy = b->dp + ty; 2478 2479 /* this is the number of times the loop will iterrate, essentially its 2480 while (tx++ < a->used && ty-- >= 0) { ... } 2481 */ 2482 iy = MIN(a->used-tx, ty+1); 2483 2484 /* execute loop */ 2485 for (iz = 0; iz < iy; iz++) { 2486 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2487 } 2488 2489 /* store term */ 2490 W[ix] = ((mp_digit)_W) & MP_MASK; 2491 2492 /* make next carry */ 2493 _W = _W >> ((mp_word)DIGIT_BIT); 2494 } 2495 2496 /* setup dest */ 2497 olduse = c->used; 2498 c->used = pa; 2499 2500 { 2501 mp_digit *tmpc; 2502 2503 tmpc = c->dp + digs; 2504 for (ix = digs; ix < pa; ix++) { 2505 /* now extract the previous digit [below the carry] */ 2506 *tmpc++ = W[ix]; 2507 } 2508 2509 /* clear unused digits [that existed in the old copy of c] */ 2510 for (; ix < olduse; ix++) { 2511 *tmpc++ = 0; 2512 } 2513 } 2514 trim_unused_digits(c); 2515 return MP_OKAY; 2516 } 2517 2518 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */ 2519 /* Revision: 1.1.1.1 $ */ 2520 /* Date: 2011/03/12 22:58:18 $ */ 2521 2522 /* multiplies |a| * |b| and does not compute the lower digs digits 2523 * [meant to get the higher part of the product] 2524 */ 2525 static int 2526 basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs) 2527 { 2528 mp_int t; 2529 int res, pa, pb, ix, iy; 2530 mp_digit carry; 2531 mp_word r; 2532 mp_digit tmpx, *tmpt, *tmpy; 2533 2534 /* can we use the fast multiplier? */ 2535 if (can_use_fast_column_array(a->used + b->used + 1, MIN(a->used, b->used))) { 2536 return fast_basic_multiply_partial_upper(a, b, c, digs); 2537 } 2538 2539 if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) { 2540 return res; 2541 } 2542 t.used = a->used + b->used + 1; 2543 2544 pa = a->used; 2545 pb = b->used; 2546 for (ix = 0; ix < pa; ix++) { 2547 /* clear the carry */ 2548 carry = 0; 2549 2550 /* left hand side of A[ix] * B[iy] */ 2551 tmpx = a->dp[ix]; 2552 2553 /* alias to the address of where the digits will be stored */ 2554 tmpt = &(t.dp[digs]); 2555 2556 /* alias for where to read the right hand side from */ 2557 tmpy = b->dp + (digs - ix); 2558 2559 for (iy = digs - ix; iy < pb; iy++) { 2560 /* calculate the double precision result */ 2561 r = ((mp_word)*tmpt) + 2562 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2563 ((mp_word) carry); 2564 2565 /* get the lower part */ 2566 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2567 2568 /* carry the carry */ 2569 carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2570 } 2571 *tmpt = carry; 2572 } 2573 trim_unused_digits(&t); 2574 mp_exch(&t, c); 2575 mp_clear(&t); 2576 return MP_OKAY; 2577 } 2578 2579 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */ 2580 /* Revision: 1.3 $ */ 2581 /* Date: 2011/03/18 16:43:04 $ */ 2582 2583 /* reduces x mod m, assumes 0 < x < m**2, mu is 2584 * precomputed via mp_reduce_setup. 2585 * From HAC pp.604 Algorithm 14.42 2586 */ 2587 static int 2588 mp_reduce(mp_int * x, mp_int * m, mp_int * mu) 2589 { 2590 mp_int q; 2591 int res, um = m->used; 2592 2593 /* q = x */ 2594 if ((res = mp_init_copy(&q, x)) != MP_OKAY) { 2595 return res; 2596 } 2597 2598 /* q1 = x / b**(k-1) */ 2599 rshift_digits(&q, um - 1); 2600 2601 /* according to HAC this optimization is ok */ 2602 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { 2603 if ((res = signed_multiply(&q, mu, &q)) != MP_OKAY) { 2604 goto CLEANUP; 2605 } 2606 } else { 2607 if ((res = basic_multiply_partial_upper(&q, mu, &q, um)) != MP_OKAY) { 2608 goto CLEANUP; 2609 } 2610 } 2611 2612 /* q3 = q2 / b**(k+1) */ 2613 rshift_digits(&q, um + 1); 2614 2615 /* x = x mod b**(k+1), quick (no division) */ 2616 if ((res = modulo_2_to_power(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { 2617 goto CLEANUP; 2618 } 2619 2620 /* q = q * m mod b**(k+1), quick (no division) */ 2621 if ((res = basic_multiply_partial_lower(&q, m, &q, um + 1)) != MP_OKAY) { 2622 goto CLEANUP; 2623 } 2624 2625 /* x = x - q */ 2626 if ((res = signed_subtract(x, &q, x)) != MP_OKAY) { 2627 goto CLEANUP; 2628 } 2629 2630 /* If x < 0, add b**(k+1) to it */ 2631 if (compare_digit(x, 0) == MP_LT) { 2632 set_word(&q, 1); 2633 if ((res = lshift_digits(&q, um + 1)) != MP_OKAY) { 2634 goto CLEANUP; 2635 } 2636 if ((res = signed_add(x, &q, x)) != MP_OKAY) { 2637 goto CLEANUP; 2638 } 2639 } 2640 2641 /* Back off if it's too big */ 2642 while (signed_compare(x, m) != MP_LT) { 2643 if ((res = basic_subtract(x, m, x)) != MP_OKAY) { 2644 goto CLEANUP; 2645 } 2646 } 2647 2648 CLEANUP: 2649 mp_clear(&q); 2650 2651 return res; 2652 } 2653 2654 /* determines the setup value */ 2655 static int 2656 mp_reduce_2k_setup_l(mp_int *a, mp_int *d) 2657 { 2658 int res; 2659 mp_int tmp; 2660 2661 if ((res = mp_init(&tmp)) != MP_OKAY) { 2662 return res; 2663 } 2664 2665 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { 2666 goto ERR; 2667 } 2668 2669 if ((res = basic_subtract(&tmp, a, d)) != MP_OKAY) { 2670 goto ERR; 2671 } 2672 2673 ERR: 2674 mp_clear(&tmp); 2675 return res; 2676 } 2677 2678 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */ 2679 /* Revision: 1.1.1.1 $ */ 2680 /* Date: 2011/03/12 22:58:18 $ */ 2681 2682 /* reduces a modulo n where n is of the form 2**p - d 2683 This differs from reduce_2k since "d" can be larger 2684 than a single digit. 2685 */ 2686 static int 2687 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) 2688 { 2689 mp_int q; 2690 int p, res; 2691 2692 if ((res = mp_init(&q)) != MP_OKAY) { 2693 return res; 2694 } 2695 2696 p = mp_count_bits(n); 2697 top: 2698 /* q = a/2**p, a = a mod 2**p */ 2699 if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) { 2700 goto ERR; 2701 } 2702 2703 /* q = q * d */ 2704 if ((res = signed_multiply(&q, d, &q)) != MP_OKAY) { 2705 goto ERR; 2706 } 2707 2708 /* a = a + q */ 2709 if ((res = basic_add(a, &q, a)) != MP_OKAY) { 2710 goto ERR; 2711 } 2712 2713 if (compare_magnitude(a, n) != MP_LT) { 2714 basic_subtract(a, n, a); 2715 goto top; 2716 } 2717 2718 ERR: 2719 mp_clear(&q); 2720 return res; 2721 } 2722 2723 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */ 2724 /* Revision: 1.1.1.1 $ */ 2725 /* Date: 2011/03/12 22:58:18 $ */ 2726 2727 /* squaring using Toom-Cook 3-way algorithm */ 2728 static int 2729 toom_cook_square(mp_int *a, mp_int *b) 2730 { 2731 mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; 2732 int res, B; 2733 2734 /* init temps */ 2735 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { 2736 return res; 2737 } 2738 2739 /* B */ 2740 B = a->used / 3; 2741 2742 /* a = a2 * B**2 + a1 * B + a0 */ 2743 if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 2744 goto ERR; 2745 } 2746 2747 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 2748 goto ERR; 2749 } 2750 rshift_digits(&a1, B); 2751 modulo_2_to_power(&a1, DIGIT_BIT * B, &a1); 2752 2753 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 2754 goto ERR; 2755 } 2756 rshift_digits(&a2, B*2); 2757 2758 /* w0 = a0*a0 */ 2759 if ((res = square(&a0, &w0)) != MP_OKAY) { 2760 goto ERR; 2761 } 2762 2763 /* w4 = a2 * a2 */ 2764 if ((res = square(&a2, &w4)) != MP_OKAY) { 2765 goto ERR; 2766 } 2767 2768 /* w1 = (a2 + 2(a1 + 2a0))**2 */ 2769 if ((res = doubled(&a0, &tmp1)) != MP_OKAY) { 2770 goto ERR; 2771 } 2772 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2773 goto ERR; 2774 } 2775 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 2776 goto ERR; 2777 } 2778 if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 2779 goto ERR; 2780 } 2781 2782 if ((res = square(&tmp1, &w1)) != MP_OKAY) { 2783 goto ERR; 2784 } 2785 2786 /* w3 = (a0 + 2(a1 + 2a2))**2 */ 2787 if ((res = doubled(&a2, &tmp1)) != MP_OKAY) { 2788 goto ERR; 2789 } 2790 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2791 goto ERR; 2792 } 2793 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 2794 goto ERR; 2795 } 2796 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2797 goto ERR; 2798 } 2799 2800 if ((res = square(&tmp1, &w3)) != MP_OKAY) { 2801 goto ERR; 2802 } 2803 2804 2805 /* w2 = (a2 + a1 + a0)**2 */ 2806 if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) { 2807 goto ERR; 2808 } 2809 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2810 goto ERR; 2811 } 2812 if ((res = square(&tmp1, &w2)) != MP_OKAY) { 2813 goto ERR; 2814 } 2815 2816 /* now solve the matrix 2817 2818 0 0 0 0 1 2819 1 2 4 8 16 2820 1 1 1 1 1 2821 16 8 4 2 1 2822 1 0 0 0 0 2823 2824 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. 2825 */ 2826 2827 /* r1 - r4 */ 2828 if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) { 2829 goto ERR; 2830 } 2831 /* r3 - r0 */ 2832 if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) { 2833 goto ERR; 2834 } 2835 /* r1/2 */ 2836 if ((res = half(&w1, &w1)) != MP_OKAY) { 2837 goto ERR; 2838 } 2839 /* r3/2 */ 2840 if ((res = half(&w3, &w3)) != MP_OKAY) { 2841 goto ERR; 2842 } 2843 /* r2 - r0 - r4 */ 2844 if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) { 2845 goto ERR; 2846 } 2847 if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) { 2848 goto ERR; 2849 } 2850 /* r1 - r2 */ 2851 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2852 goto ERR; 2853 } 2854 /* r3 - r2 */ 2855 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2856 goto ERR; 2857 } 2858 /* r1 - 8r0 */ 2859 if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) { 2860 goto ERR; 2861 } 2862 if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) { 2863 goto ERR; 2864 } 2865 /* r3 - 8r4 */ 2866 if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) { 2867 goto ERR; 2868 } 2869 if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) { 2870 goto ERR; 2871 } 2872 /* 3r2 - r1 - r3 */ 2873 if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) { 2874 goto ERR; 2875 } 2876 if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) { 2877 goto ERR; 2878 } 2879 if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) { 2880 goto ERR; 2881 } 2882 /* r1 - r2 */ 2883 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2884 goto ERR; 2885 } 2886 /* r3 - r2 */ 2887 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2888 goto ERR; 2889 } 2890 /* r1/3 */ 2891 if ((res = third(&w1, &w1, NULL)) != MP_OKAY) { 2892 goto ERR; 2893 } 2894 /* r3/3 */ 2895 if ((res = third(&w3, &w3, NULL)) != MP_OKAY) { 2896 goto ERR; 2897 } 2898 2899 /* at this point shift W[n] by B*n */ 2900 if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) { 2901 goto ERR; 2902 } 2903 if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) { 2904 goto ERR; 2905 } 2906 if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) { 2907 goto ERR; 2908 } 2909 if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) { 2910 goto ERR; 2911 } 2912 2913 if ((res = signed_add(&w0, &w1, b)) != MP_OKAY) { 2914 goto ERR; 2915 } 2916 if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2917 goto ERR; 2918 } 2919 if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2920 goto ERR; 2921 } 2922 if ((res = signed_add(&tmp1, b, b)) != MP_OKAY) { 2923 goto ERR; 2924 } 2925 2926 ERR: 2927 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); 2928 return res; 2929 } 2930 2931 2932 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */ 2933 /* Revision: 1.1.1.1 $ */ 2934 /* Date: 2011/03/12 22:58:18 $ */ 2935 2936 /* Karatsuba squaring, computes b = a*a using three 2937 * half size squarings 2938 * 2939 * See comments of karatsuba_mul for details. It 2940 * is essentially the same algorithm but merely 2941 * tuned to perform recursive squarings. 2942 */ 2943 static int 2944 karatsuba_square(mp_int * a, mp_int * b) 2945 { 2946 mp_int x0, x1, t1, t2, x0x0, x1x1; 2947 int B, err; 2948 2949 err = MP_MEM; 2950 2951 /* min # of digits */ 2952 B = a->used; 2953 2954 /* now divide in two */ 2955 B = (unsigned)B >> 1; 2956 2957 /* init copy all the temps */ 2958 if (mp_init_size(&x0, B) != MP_OKAY) { 2959 goto ERR; 2960 } 2961 if (mp_init_size(&x1, a->used - B) != MP_OKAY) { 2962 goto X0; 2963 } 2964 /* init temps */ 2965 if (mp_init_size(&t1, a->used * 2) != MP_OKAY) { 2966 goto X1; 2967 } 2968 if (mp_init_size(&t2, a->used * 2) != MP_OKAY) { 2969 goto T1; 2970 } 2971 if (mp_init_size(&x0x0, B * 2) != MP_OKAY) { 2972 goto T2; 2973 } 2974 if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) { 2975 goto X0X0; 2976 } 2977 2978 memcpy(x0.dp, a->dp, B * sizeof(*x0.dp)); 2979 memcpy(x1.dp, &a->dp[B], (a->used - B) * sizeof(*x1.dp)); 2980 2981 x0.used = B; 2982 x1.used = a->used - B; 2983 2984 trim_unused_digits(&x0); 2985 2986 /* now calc the products x0*x0 and x1*x1 */ 2987 if (square(&x0, &x0x0) != MP_OKAY) { 2988 goto X1X1; /* x0x0 = x0*x0 */ 2989 } 2990 if (square(&x1, &x1x1) != MP_OKAY) { 2991 goto X1X1; /* x1x1 = x1*x1 */ 2992 } 2993 /* now calc (x1+x0)**2 */ 2994 if (basic_add(&x1, &x0, &t1) != MP_OKAY) { 2995 goto X1X1; /* t1 = x1 - x0 */ 2996 } 2997 if (square(&t1, &t1) != MP_OKAY) { 2998 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ 2999 } 3000 /* add x0y0 */ 3001 if (basic_add(&x0x0, &x1x1, &t2) != MP_OKAY) { 3002 goto X1X1; /* t2 = x0x0 + x1x1 */ 3003 } 3004 if (basic_subtract(&t1, &t2, &t1) != MP_OKAY) { 3005 goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ 3006 } 3007 /* shift by B */ 3008 if (lshift_digits(&t1, B) != MP_OKAY) { 3009 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ 3010 } 3011 if (lshift_digits(&x1x1, B * 2) != MP_OKAY) { 3012 goto X1X1; /* x1x1 = x1x1 << 2*B */ 3013 } 3014 if (signed_add(&x0x0, &t1, &t1) != MP_OKAY) { 3015 goto X1X1; /* t1 = x0x0 + t1 */ 3016 } 3017 if (signed_add(&t1, &x1x1, b) != MP_OKAY) { 3018 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ 3019 } 3020 err = MP_OKAY; 3021 3022 X1X1: 3023 mp_clear(&x1x1); 3024 X0X0: 3025 mp_clear(&x0x0); 3026 T2: 3027 mp_clear(&t2); 3028 T1: 3029 mp_clear(&t1); 3030 X1: 3031 mp_clear(&x1); 3032 X0: 3033 mp_clear(&x0); 3034 ERR: 3035 return err; 3036 } 3037 3038 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */ 3039 /* Revision: 1.2 $ */ 3040 /* Date: 2011/03/12 23:43:54 $ */ 3041 3042 /* the jist of squaring... 3043 * you do like mult except the offset of the tmpx [one that 3044 * starts closer to zero] can't equal the offset of tmpy. 3045 * So basically you set up iy like before then you min it with 3046 * (ty-tx) so that it never happens. You double all those 3047 * you add in the inner loop 3048 3049 After that loop you do the squares and add them in. 3050 */ 3051 3052 static int 3053 fast_basic_square(mp_int * a, mp_int * b) 3054 { 3055 int olduse, res, pa, ix, iz; 3056 mp_digit W[MP_WARRAY], *tmpx; 3057 mp_word W1; 3058 3059 /* grow the destination as required */ 3060 pa = a->used + a->used; 3061 if (b->alloc < pa) { 3062 if ((res = mp_grow(b, pa)) != MP_OKAY) { 3063 return res; 3064 } 3065 } 3066 3067 /* number of output digits to produce */ 3068 W1 = 0; 3069 for (ix = 0; ix < pa; ix++) { 3070 int tx, ty, iy; 3071 mp_word _W; 3072 mp_digit *tmpy; 3073 3074 /* clear counter */ 3075 _W = 0; 3076 3077 /* get offsets into the two bignums */ 3078 ty = MIN(a->used-1, ix); 3079 tx = ix - ty; 3080 3081 /* setup temp aliases */ 3082 tmpx = a->dp + tx; 3083 tmpy = a->dp + ty; 3084 3085 /* this is the number of times the loop will iterrate, essentially 3086 while (tx++ < a->used && ty-- >= 0) { ... } 3087 */ 3088 iy = MIN(a->used-tx, ty+1); 3089 3090 /* now for squaring tx can never equal ty 3091 * we halve the distance since they approach at a rate of 2x 3092 * and we have to round because odd cases need to be executed 3093 */ 3094 iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1)); 3095 3096 /* execute loop */ 3097 for (iz = 0; iz < iy; iz++) { 3098 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 3099 } 3100 3101 /* double the inner product and add carry */ 3102 _W = _W + _W + W1; 3103 3104 /* even columns have the square term in them */ 3105 if ((ix&1) == 0) { 3106 _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]); 3107 } 3108 3109 /* store it */ 3110 W[ix] = (mp_digit)(_W & MP_MASK); 3111 3112 /* make next carry */ 3113 W1 = _W >> ((mp_word)DIGIT_BIT); 3114 } 3115 3116 /* setup dest */ 3117 olduse = b->used; 3118 b->used = a->used+a->used; 3119 3120 { 3121 mp_digit *tmpb; 3122 tmpb = b->dp; 3123 for (ix = 0; ix < pa; ix++) { 3124 *tmpb++ = W[ix] & MP_MASK; 3125 } 3126 3127 /* clear unused digits [that existed in the old copy of c] */ 3128 for (; ix < olduse; ix++) { 3129 *tmpb++ = 0; 3130 } 3131 } 3132 trim_unused_digits(b); 3133 return MP_OKAY; 3134 } 3135 3136 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */ 3137 /* Revision: 1.3 $ */ 3138 /* Date: 2011/03/18 16:43:04 $ */ 3139 3140 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ 3141 static int 3142 basic_square(mp_int * a, mp_int * b) 3143 { 3144 mp_int t; 3145 int res, ix, iy, pa; 3146 mp_word r; 3147 mp_digit carry, tmpx, *tmpt; 3148 3149 pa = a->used; 3150 if ((res = mp_init_size(&t, 2*pa + 1)) != MP_OKAY) { 3151 return res; 3152 } 3153 3154 /* default used is maximum possible size */ 3155 t.used = 2*pa + 1; 3156 3157 for (ix = 0; ix < pa; ix++) { 3158 /* first calculate the digit at 2*ix */ 3159 /* calculate double precision result */ 3160 r = ((mp_word) t.dp[2*ix]) + 3161 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); 3162 3163 /* store lower part in result */ 3164 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); 3165 3166 /* get the carry */ 3167 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3168 3169 /* left hand side of A[ix] * A[iy] */ 3170 tmpx = a->dp[ix]; 3171 3172 /* alias for where to store the results */ 3173 tmpt = t.dp + (2*ix + 1); 3174 3175 for (iy = ix + 1; iy < pa; iy++) { 3176 /* first calculate the product */ 3177 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); 3178 3179 /* now calculate the double precision result, note we use 3180 * addition instead of *2 since it's easier to optimize 3181 */ 3182 r = ((mp_word) *tmpt) + r + r + ((mp_word) carry); 3183 3184 /* store lower part */ 3185 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3186 3187 /* get carry */ 3188 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3189 } 3190 /* propagate upwards */ 3191 while (carry != ((mp_digit) 0)) { 3192 r = ((mp_word) *tmpt) + ((mp_word) carry); 3193 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3194 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3195 } 3196 } 3197 3198 trim_unused_digits(&t); 3199 mp_exch(&t, b); 3200 mp_clear(&t); 3201 return MP_OKAY; 3202 } 3203 3204 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */ 3205 /* Revision: 1.1.1.1 $ */ 3206 /* Date: 2011/03/12 22:58:18 $ */ 3207 3208 #define TOOM_SQR_CUTOFF 400 3209 #define KARATSUBA_SQR_CUTOFF 120 3210 3211 /* computes b = a*a */ 3212 static int 3213 square(mp_int * a, mp_int * b) 3214 { 3215 int res; 3216 3217 /* use Toom-Cook? */ 3218 if (a->used >= TOOM_SQR_CUTOFF) { 3219 res = toom_cook_square(a, b); 3220 /* Karatsuba? */ 3221 } else if (a->used >= KARATSUBA_SQR_CUTOFF) { 3222 res = karatsuba_square(a, b); 3223 } else { 3224 /* can we use the fast comba multiplier? */ 3225 if (can_use_fast_column_array(a->used + a->used + 1, a->used)) { 3226 res = fast_basic_square(a, b); 3227 } else { 3228 res = basic_square(a, b); 3229 } 3230 } 3231 b->sign = MP_ZPOS; 3232 return res; 3233 } 3234 3235 /* find window size */ 3236 static inline int 3237 find_window_size(mp_int *X) 3238 { 3239 int x; 3240 3241 x = mp_count_bits(X); 3242 return (x <= 7) ? 2 : (x <= 36) ? 3 : (x <= 140) ? 4 : (x <= 450) ? 5 : (x <= 1303) ? 6 : (x <= 3529) ? 7 : 8; 3243 } 3244 3245 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */ 3246 /* Revision: 1.3 $ */ 3247 /* Date: 2011/03/18 16:43:04 $ */ 3248 3249 #define TAB_SIZE 256 3250 3251 static int 3252 basic_exponent_mod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 3253 { 3254 mp_digit buf; 3255 mp_int M[TAB_SIZE], res, mu; 3256 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 3257 int (*redux)(mp_int*,mp_int*,mp_int*); 3258 3259 winsize = find_window_size(X); 3260 3261 /* init M array */ 3262 /* init first cell */ 3263 if ((err = mp_init(&M[1])) != MP_OKAY) { 3264 return err; 3265 } 3266 3267 /* now init the second half of the array */ 3268 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3269 if ((err = mp_init(&M[x])) != MP_OKAY) { 3270 for (y = 1<<(winsize-1); y < x; y++) { 3271 mp_clear(&M[y]); 3272 } 3273 mp_clear(&M[1]); 3274 return err; 3275 } 3276 } 3277 3278 /* create mu, used for Barrett reduction */ 3279 if ((err = mp_init(&mu)) != MP_OKAY) { 3280 goto LBL_M; 3281 } 3282 3283 if (redmode == 0) { 3284 if ((err = mp_reduce_setup(&mu, P)) != MP_OKAY) { 3285 goto LBL_MU; 3286 } 3287 redux = mp_reduce; 3288 } else { 3289 if ((err = mp_reduce_2k_setup_l(P, &mu)) != MP_OKAY) { 3290 goto LBL_MU; 3291 } 3292 redux = mp_reduce_2k_l; 3293 } 3294 3295 /* create M table 3296 * 3297 * The M table contains powers of the base, 3298 * e.g. M[x] = G**x mod P 3299 * 3300 * The first half of the table is not 3301 * computed though accept for M[0] and M[1] 3302 */ 3303 if ((err = modulo(G, P, &M[1])) != MP_OKAY) { 3304 goto LBL_MU; 3305 } 3306 3307 /* compute the value at M[1<<(winsize-1)] by squaring 3308 * M[1] (winsize-1) times 3309 */ 3310 if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 3311 goto LBL_MU; 3312 } 3313 3314 for (x = 0; x < (winsize - 1); x++) { 3315 /* square it */ 3316 if ((err = square(&M[1 << (winsize - 1)], 3317 &M[1 << (winsize - 1)])) != MP_OKAY) { 3318 goto LBL_MU; 3319 } 3320 3321 /* reduce modulo P */ 3322 if ((err = redux(&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { 3323 goto LBL_MU; 3324 } 3325 } 3326 3327 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) 3328 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) 3329 */ 3330 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 3331 if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 3332 goto LBL_MU; 3333 } 3334 if ((err = redux(&M[x], P, &mu)) != MP_OKAY) { 3335 goto LBL_MU; 3336 } 3337 } 3338 3339 /* setup result */ 3340 if ((err = mp_init(&res)) != MP_OKAY) { 3341 goto LBL_MU; 3342 } 3343 set_word(&res, 1); 3344 3345 /* set initial mode and bit cnt */ 3346 mode = 0; 3347 bitcnt = 1; 3348 buf = 0; 3349 digidx = X->used - 1; 3350 bitcpy = 0; 3351 bitbuf = 0; 3352 3353 for (;;) { 3354 /* grab next digit as required */ 3355 if (--bitcnt == 0) { 3356 /* if digidx == -1 we are out of digits */ 3357 if (digidx == -1) { 3358 break; 3359 } 3360 /* read next digit and reset the bitcnt */ 3361 buf = X->dp[digidx--]; 3362 bitcnt = (int) DIGIT_BIT; 3363 } 3364 3365 /* grab the next msb from the exponent */ 3366 y = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; 3367 buf <<= (mp_digit)1; 3368 3369 /* if the bit is zero and mode == 0 then we ignore it 3370 * These represent the leading zero bits before the first 1 bit 3371 * in the exponent. Technically this opt is not required but it 3372 * does lower the # of trivial squaring/reductions used 3373 */ 3374 if (mode == 0 && y == 0) { 3375 continue; 3376 } 3377 3378 /* if the bit is zero and mode == 1 then we square */ 3379 if (mode == 1 && y == 0) { 3380 if ((err = square(&res, &res)) != MP_OKAY) { 3381 goto LBL_RES; 3382 } 3383 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3384 goto LBL_RES; 3385 } 3386 continue; 3387 } 3388 3389 /* else we add it to the window */ 3390 bitbuf |= (y << (winsize - ++bitcpy)); 3391 mode = 2; 3392 3393 if (bitcpy == winsize) { 3394 /* ok window is filled so square as required and multiply */ 3395 /* square first */ 3396 for (x = 0; x < winsize; x++) { 3397 if ((err = square(&res, &res)) != MP_OKAY) { 3398 goto LBL_RES; 3399 } 3400 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3401 goto LBL_RES; 3402 } 3403 } 3404 3405 /* then multiply */ 3406 if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) { 3407 goto LBL_RES; 3408 } 3409 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3410 goto LBL_RES; 3411 } 3412 3413 /* empty window and reset */ 3414 bitcpy = 0; 3415 bitbuf = 0; 3416 mode = 1; 3417 } 3418 } 3419 3420 /* if bits remain then square/multiply */ 3421 if (mode == 2 && bitcpy > 0) { 3422 /* square then multiply if the bit is set */ 3423 for (x = 0; x < bitcpy; x++) { 3424 if ((err = square(&res, &res)) != MP_OKAY) { 3425 goto LBL_RES; 3426 } 3427 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3428 goto LBL_RES; 3429 } 3430 3431 bitbuf <<= 1; 3432 if ((bitbuf & (1 << winsize)) != 0) { 3433 /* then multiply */ 3434 if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) { 3435 goto LBL_RES; 3436 } 3437 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3438 goto LBL_RES; 3439 } 3440 } 3441 } 3442 } 3443 3444 mp_exch(&res, Y); 3445 err = MP_OKAY; 3446 LBL_RES: 3447 mp_clear(&res); 3448 LBL_MU: 3449 mp_clear(&mu); 3450 LBL_M: 3451 mp_clear(&M[1]); 3452 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3453 mp_clear(&M[x]); 3454 } 3455 return err; 3456 } 3457 3458 /* determines if a number is a valid DR modulus */ 3459 static int 3460 is_diminished_radix_modulus(mp_int *a) 3461 { 3462 int ix; 3463 3464 /* must be at least two digits */ 3465 if (a->used < 2) { 3466 return 0; 3467 } 3468 3469 /* must be of the form b**k - a [a <= b] so all 3470 * but the first digit must be equal to -1 (mod b). 3471 */ 3472 for (ix = 1; ix < a->used; ix++) { 3473 if (a->dp[ix] != MP_MASK) { 3474 return 0; 3475 } 3476 } 3477 return 1; 3478 } 3479 3480 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */ 3481 /* Revision: 1.1.1.1 $ */ 3482 /* Date: 2011/03/12 22:58:18 $ */ 3483 3484 /* determines if mp_reduce_2k can be used */ 3485 static int 3486 mp_reduce_is_2k(mp_int *a) 3487 { 3488 int ix, iy, iw; 3489 mp_digit iz; 3490 3491 if (a->used == 0) { 3492 return MP_NO; 3493 } 3494 if (a->used == 1) { 3495 return MP_YES; 3496 } 3497 if (a->used > 1) { 3498 iy = mp_count_bits(a); 3499 iz = 1; 3500 iw = 1; 3501 3502 /* Test every bit from the second digit up, must be 1 */ 3503 for (ix = DIGIT_BIT; ix < iy; ix++) { 3504 if ((a->dp[iw] & iz) == 0) { 3505 return MP_NO; 3506 } 3507 iz <<= 1; 3508 if (iz > (mp_digit)MP_MASK) { 3509 ++iw; 3510 iz = 1; 3511 } 3512 } 3513 } 3514 return MP_YES; 3515 } 3516 3517 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */ 3518 /* Revision: 1.1.1.1 $ */ 3519 /* Date: 2011/03/12 22:58:18 $ */ 3520 3521 3522 /* d = a * b (mod c) */ 3523 static int 3524 multiply_modulo(mp_int *d, mp_int * a, mp_int * b, mp_int * c) 3525 { 3526 mp_int t; 3527 int res; 3528 3529 if ((res = mp_init(&t)) != MP_OKAY) { 3530 return res; 3531 } 3532 3533 if ((res = signed_multiply(a, b, &t)) != MP_OKAY) { 3534 mp_clear(&t); 3535 return res; 3536 } 3537 res = modulo(&t, c, d); 3538 mp_clear(&t); 3539 return res; 3540 } 3541 3542 /* d = a + b (mod c) */ 3543 static int 3544 add_modulo(mp_int *d, mp_int * a, mp_int * b, mp_int * c) 3545 { 3546 mp_int t; 3547 int res; 3548 3549 if ((res = mp_init(&t)) != MP_OKAY) { 3550 return res; 3551 } 3552 if ((res = signed_add(a, b, &t)) != MP_OKAY) { 3553 mp_clear(&t); 3554 return res; 3555 } 3556 res = modulo(&t, c, d); 3557 mp_clear(&t); 3558 return res; 3559 } 3560 3561 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */ 3562 /* Revision: 1.1.1.1 $ */ 3563 /* Date: 2011/03/12 22:58:18 $ */ 3564 3565 /* setups the montgomery reduction stuff */ 3566 static int 3567 mp_montgomery_setup(mp_int * n, mp_digit * rho) 3568 { 3569 mp_digit x, b; 3570 3571 /* fast inversion mod 2**k 3572 * 3573 * Based on the fact that 3574 * 3575 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) 3576 * => 2*X*A - X*X*A*A = 1 3577 * => 2*(1) - (1) = 1 3578 */ 3579 b = n->dp[0]; 3580 3581 if ((b & 1) == 0) { 3582 return MP_VAL; 3583 } 3584 3585 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ 3586 x *= 2 - b * x; /* here x*a==1 mod 2**8 */ 3587 x *= 2 - b * x; /* here x*a==1 mod 2**16 */ 3588 x *= 2 - b * x; /* here x*a==1 mod 2**32 */ 3589 if (/*CONSTCOND*/sizeof(mp_digit) == 8) { 3590 x *= 2 - b * x; /* here x*a==1 mod 2**64 */ 3591 } 3592 3593 /* rho = -1/m mod b */ 3594 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; 3595 3596 return MP_OKAY; 3597 } 3598 3599 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */ 3600 /* Revision: 1.1.1.1 $ */ 3601 /* Date: 2011/03/12 22:58:18 $ */ 3602 3603 /* computes xR**-1 == x (mod N) via Montgomery Reduction 3604 * 3605 * This is an optimized implementation of montgomery_reduce 3606 * which uses the comba method to quickly calculate the columns of the 3607 * reduction. 3608 * 3609 * Based on Algorithm 14.32 on pp.601 of HAC. 3610 */ 3611 static int 3612 fast_mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho) 3613 { 3614 int ix, res, olduse; 3615 /*LINTED*/ 3616 mp_word W[MP_WARRAY]; 3617 3618 /* get old used count */ 3619 olduse = x->used; 3620 3621 /* grow a as required */ 3622 if (x->alloc < n->used + 1) { 3623 if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) { 3624 return res; 3625 } 3626 } 3627 3628 /* first we have to get the digits of the input into 3629 * an array of double precision words W[...] 3630 */ 3631 { 3632 mp_word *_W; 3633 mp_digit *tmpx; 3634 3635 /* alias for the W[] array */ 3636 _W = W; 3637 3638 /* alias for the digits of x*/ 3639 tmpx = x->dp; 3640 3641 /* copy the digits of a into W[0..a->used-1] */ 3642 for (ix = 0; ix < x->used; ix++) { 3643 *_W++ = *tmpx++; 3644 } 3645 3646 /* zero the high words of W[a->used..m->used*2] */ 3647 for (; ix < n->used * 2 + 1; ix++) { 3648 *_W++ = 0; 3649 } 3650 } 3651 3652 /* now we proceed to zero successive digits 3653 * from the least significant upwards 3654 */ 3655 for (ix = 0; ix < n->used; ix++) { 3656 /* mu = ai * m' mod b 3657 * 3658 * We avoid a double precision multiplication (which isn't required) 3659 * by casting the value down to a mp_digit. Note this requires 3660 * that W[ix-1] have the carry cleared (see after the inner loop) 3661 */ 3662 mp_digit mu; 3663 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); 3664 3665 /* a = a + mu * m * b**i 3666 * 3667 * This is computed in place and on the fly. The multiplication 3668 * by b**i is handled by offseting which columns the results 3669 * are added to. 3670 * 3671 * Note the comba method normally doesn't handle carries in the 3672 * inner loop In this case we fix the carry from the previous 3673 * column since the Montgomery reduction requires digits of the 3674 * result (so far) [see above] to work. This is 3675 * handled by fixing up one carry after the inner loop. The 3676 * carry fixups are done in order so after these loops the 3677 * first m->used words of W[] have the carries fixed 3678 */ 3679 { 3680 int iy; 3681 mp_digit *tmpn; 3682 mp_word *_W; 3683 3684 /* alias for the digits of the modulus */ 3685 tmpn = n->dp; 3686 3687 /* Alias for the columns set by an offset of ix */ 3688 _W = W + ix; 3689 3690 /* inner loop */ 3691 for (iy = 0; iy < n->used; iy++) { 3692 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); 3693 } 3694 } 3695 3696 /* now fix carry for next digit, W[ix+1] */ 3697 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); 3698 } 3699 3700 /* now we have to propagate the carries and 3701 * shift the words downward [all those least 3702 * significant digits we zeroed]. 3703 */ 3704 { 3705 mp_digit *tmpx; 3706 mp_word *_W, *_W1; 3707 3708 /* nox fix rest of carries */ 3709 3710 /* alias for current word */ 3711 _W1 = W + ix; 3712 3713 /* alias for next word, where the carry goes */ 3714 _W = W + ++ix; 3715 3716 for (; ix <= n->used * 2 + 1; ix++) { 3717 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); 3718 } 3719 3720 /* copy out, A = A/b**n 3721 * 3722 * The result is A/b**n but instead of converting from an 3723 * array of mp_word to mp_digit than calling rshift_digits 3724 * we just copy them in the right order 3725 */ 3726 3727 /* alias for destination word */ 3728 tmpx = x->dp; 3729 3730 /* alias for shifted double precision result */ 3731 _W = W + n->used; 3732 3733 for (ix = 0; ix < n->used + 1; ix++) { 3734 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); 3735 } 3736 3737 /* zero oldused digits, if the input a was larger than 3738 * m->used+1 we'll have to clear the digits 3739 */ 3740 for (; ix < olduse; ix++) { 3741 *tmpx++ = 0; 3742 } 3743 } 3744 3745 /* set the max used and clamp */ 3746 x->used = n->used + 1; 3747 trim_unused_digits(x); 3748 3749 /* if A >= m then A = A - m */ 3750 if (compare_magnitude(x, n) != MP_LT) { 3751 return basic_subtract(x, n, x); 3752 } 3753 return MP_OKAY; 3754 } 3755 3756 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */ 3757 /* Revision: 1.2 $ */ 3758 /* Date: 2011/03/18 16:22:09 $ */ 3759 3760 /* computes xR**-1 == x (mod N) via Montgomery Reduction */ 3761 static int 3762 mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho) 3763 { 3764 int ix, res, digs; 3765 mp_digit mu; 3766 3767 /* can the fast reduction [comba] method be used? 3768 * 3769 * Note that unlike in mul you're safely allowed *less* 3770 * than the available columns [255 per default] since carries 3771 * are fixed up in the inner loop. 3772 */ 3773 digs = n->used * 2 + 1; 3774 if (can_use_fast_column_array(digs, n->used)) { 3775 return fast_mp_montgomery_reduce(x, n, rho); 3776 } 3777 3778 /* grow the input as required */ 3779 if (x->alloc < digs) { 3780 if ((res = mp_grow(x, digs)) != MP_OKAY) { 3781 return res; 3782 } 3783 } 3784 x->used = digs; 3785 3786 for (ix = 0; ix < n->used; ix++) { 3787 /* mu = ai * rho mod b 3788 * 3789 * The value of rho must be precalculated via 3790 * montgomery_setup() such that 3791 * it equals -1/n0 mod b this allows the 3792 * following inner loop to reduce the 3793 * input one digit at a time 3794 */ 3795 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); 3796 3797 /* a = a + mu * m * b**i */ 3798 { 3799 int iy; 3800 mp_digit *tmpn, *tmpx, carry; 3801 mp_word r; 3802 3803 /* alias for digits of the modulus */ 3804 tmpn = n->dp; 3805 3806 /* alias for the digits of x [the input] */ 3807 tmpx = x->dp + ix; 3808 3809 /* set the carry to zero */ 3810 carry = 0; 3811 3812 /* Multiply and add in place */ 3813 for (iy = 0; iy < n->used; iy++) { 3814 /* compute product and sum */ 3815 r = ((mp_word)mu) * ((mp_word)*tmpn++) + 3816 ((mp_word) carry) + ((mp_word) * tmpx); 3817 3818 /* get carry */ 3819 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3820 3821 /* fix digit */ 3822 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); 3823 } 3824 /* At this point the ix'th digit of x should be zero */ 3825 3826 3827 /* propagate carries upwards as required*/ 3828 while (carry) { 3829 *tmpx += carry; 3830 carry = *tmpx >> DIGIT_BIT; 3831 *tmpx++ &= MP_MASK; 3832 } 3833 } 3834 } 3835 3836 /* at this point the n.used'th least 3837 * significant digits of x are all zero 3838 * which means we can shift x to the 3839 * right by n.used digits and the 3840 * residue is unchanged. 3841 */ 3842 3843 /* x = x/b**n.used */ 3844 trim_unused_digits(x); 3845 rshift_digits(x, n->used); 3846 3847 /* if x >= n then x = x - n */ 3848 if (compare_magnitude(x, n) != MP_LT) { 3849 return basic_subtract(x, n, x); 3850 } 3851 3852 return MP_OKAY; 3853 } 3854 3855 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */ 3856 /* Revision: 1.3 $ */ 3857 /* Date: 2011/03/18 16:43:04 $ */ 3858 3859 /* determines the setup value */ 3860 static void 3861 diminished_radix_setup(mp_int *a, mp_digit *d) 3862 { 3863 /* the casts are required if DIGIT_BIT is one less than 3864 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31] 3865 */ 3866 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 3867 ((mp_word)a->dp[0])); 3868 } 3869 3870 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */ 3871 /* Revision: 1.1.1.1 $ */ 3872 /* Date: 2011/03/12 22:58:18 $ */ 3873 3874 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm. 3875 * 3876 * Based on algorithm from the paper 3877 * 3878 * "Generating Efficient Primes for Discrete Log Cryptosystems" 3879 * Chae Hoon Lim, Pil Joong Lee, 3880 * POSTECH Information Research Laboratories 3881 * 3882 * The modulus must be of a special format [see manual] 3883 * 3884 * Has been modified to use algorithm 7.10 from the LTM book instead 3885 * 3886 * Input x must be in the range 0 <= x <= (n-1)**2 3887 */ 3888 static int 3889 diminished_radix_reduce(mp_int * x, mp_int * n, mp_digit k) 3890 { 3891 int err, i, m; 3892 mp_word r; 3893 mp_digit mu, *tmpx1, *tmpx2; 3894 3895 /* m = digits in modulus */ 3896 m = n->used; 3897 3898 /* ensure that "x" has at least 2m digits */ 3899 if (x->alloc < m + m) { 3900 if ((err = mp_grow(x, m + m)) != MP_OKAY) { 3901 return err; 3902 } 3903 } 3904 3905 /* top of loop, this is where the code resumes if 3906 * another reduction pass is required. 3907 */ 3908 top: 3909 /* aliases for digits */ 3910 /* alias for lower half of x */ 3911 tmpx1 = x->dp; 3912 3913 /* alias for upper half of x, or x/B**m */ 3914 tmpx2 = x->dp + m; 3915 3916 /* set carry to zero */ 3917 mu = 0; 3918 3919 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ 3920 for (i = 0; i < m; i++) { 3921 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; 3922 *tmpx1++ = (mp_digit)(r & MP_MASK); 3923 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); 3924 } 3925 3926 /* set final carry */ 3927 *tmpx1++ = mu; 3928 3929 /* zero words above m */ 3930 for (i = m + 1; i < x->used; i++) { 3931 *tmpx1++ = 0; 3932 } 3933 3934 /* clamp, sub and return */ 3935 trim_unused_digits(x); 3936 3937 /* if x >= n then subtract and reduce again 3938 * Each successive "recursion" makes the input smaller and smaller. 3939 */ 3940 if (compare_magnitude(x, n) != MP_LT) { 3941 basic_subtract(x, n, x); 3942 goto top; 3943 } 3944 return MP_OKAY; 3945 } 3946 3947 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */ 3948 /* Revision: 1.1.1.1 $ */ 3949 /* Date: 2011/03/12 22:58:18 $ */ 3950 3951 /* determines the setup value */ 3952 static int 3953 mp_reduce_2k_setup(mp_int *a, mp_digit *d) 3954 { 3955 int res, p; 3956 mp_int tmp; 3957 3958 if ((res = mp_init(&tmp)) != MP_OKAY) { 3959 return res; 3960 } 3961 3962 p = mp_count_bits(a); 3963 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { 3964 mp_clear(&tmp); 3965 return res; 3966 } 3967 3968 if ((res = basic_subtract(&tmp, a, &tmp)) != MP_OKAY) { 3969 mp_clear(&tmp); 3970 return res; 3971 } 3972 3973 *d = tmp.dp[0]; 3974 mp_clear(&tmp); 3975 return MP_OKAY; 3976 } 3977 3978 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */ 3979 /* Revision: 1.1.1.1 $ */ 3980 /* Date: 2011/03/12 22:58:18 $ */ 3981 3982 /* reduces a modulo n where n is of the form 2**p - d */ 3983 static int 3984 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) 3985 { 3986 mp_int q; 3987 int p, res; 3988 3989 if ((res = mp_init(&q)) != MP_OKAY) { 3990 return res; 3991 } 3992 3993 p = mp_count_bits(n); 3994 top: 3995 /* q = a/2**p, a = a mod 2**p */ 3996 if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) { 3997 goto ERR; 3998 } 3999 4000 if (d != 1) { 4001 /* q = q * d */ 4002 if ((res = multiply_digit(&q, d, &q)) != MP_OKAY) { 4003 goto ERR; 4004 } 4005 } 4006 4007 /* a = a + q */ 4008 if ((res = basic_add(a, &q, a)) != MP_OKAY) { 4009 goto ERR; 4010 } 4011 4012 if (compare_magnitude(a, n) != MP_LT) { 4013 basic_subtract(a, n, a); 4014 goto top; 4015 } 4016 4017 ERR: 4018 mp_clear(&q); 4019 return res; 4020 } 4021 4022 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */ 4023 /* Revision: 1.1.1.1 $ */ 4024 /* Date: 2011/03/12 22:58:18 $ */ 4025 4026 /* 4027 * shifts with subtractions when the result is greater than b. 4028 * 4029 * The method is slightly modified to shift B unconditionally upto just under 4030 * the leading bit of b. This saves alot of multiple precision shifting. 4031 */ 4032 static int 4033 mp_montgomery_calc_normalization(mp_int * a, mp_int * b) 4034 { 4035 int x, bits, res; 4036 4037 /* how many bits of last digit does b use */ 4038 bits = mp_count_bits(b) % DIGIT_BIT; 4039 4040 if (b->used > 1) { 4041 if ((res = mp_2expt(a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { 4042 return res; 4043 } 4044 } else { 4045 set_word(a, 1); 4046 bits = 1; 4047 } 4048 4049 4050 /* now compute C = A * B mod b */ 4051 for (x = bits - 1; x < (int)DIGIT_BIT; x++) { 4052 if ((res = doubled(a, a)) != MP_OKAY) { 4053 return res; 4054 } 4055 if (compare_magnitude(a, b) != MP_LT) { 4056 if ((res = basic_subtract(a, b, a)) != MP_OKAY) { 4057 return res; 4058 } 4059 } 4060 } 4061 4062 return MP_OKAY; 4063 } 4064 4065 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */ 4066 /* Revision: 1.1.1.1 $ */ 4067 /* Date: 2011/03/12 22:58:18 $ */ 4068 4069 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 4070 * 4071 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. 4072 * The value of k changes based on the size of the exponent. 4073 * 4074 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] 4075 */ 4076 4077 #define TAB_SIZE 256 4078 4079 static int 4080 fast_exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 4081 { 4082 mp_int M[TAB_SIZE], res; 4083 mp_digit buf, mp; 4084 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 4085 4086 /* use a pointer to the reduction algorithm. This allows us to use 4087 * one of many reduction algorithms without modding the guts of 4088 * the code with if statements everywhere. 4089 */ 4090 int (*redux)(mp_int*,mp_int*,mp_digit); 4091 4092 winsize = find_window_size(X); 4093 4094 /* init M array */ 4095 /* init first cell */ 4096 if ((err = mp_init(&M[1])) != MP_OKAY) { 4097 return err; 4098 } 4099 4100 /* now init the second half of the array */ 4101 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4102 if ((err = mp_init(&M[x])) != MP_OKAY) { 4103 for (y = 1<<(winsize-1); y < x; y++) { 4104 mp_clear(&M[y]); 4105 } 4106 mp_clear(&M[1]); 4107 return err; 4108 } 4109 } 4110 4111 /* determine and setup reduction code */ 4112 if (redmode == 0) { 4113 /* now setup montgomery */ 4114 if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) { 4115 goto LBL_M; 4116 } 4117 4118 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ 4119 if (can_use_fast_column_array(P->used + P->used + 1, P->used)) { 4120 redux = fast_mp_montgomery_reduce; 4121 } else { 4122 /* use slower baseline Montgomery method */ 4123 redux = mp_montgomery_reduce; 4124 } 4125 } else if (redmode == 1) { 4126 /* setup DR reduction for moduli of the form B**k - b */ 4127 diminished_radix_setup(P, &mp); 4128 redux = diminished_radix_reduce; 4129 } else { 4130 /* setup DR reduction for moduli of the form 2**k - b */ 4131 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { 4132 goto LBL_M; 4133 } 4134 redux = mp_reduce_2k; 4135 } 4136 4137 /* setup result */ 4138 if ((err = mp_init(&res)) != MP_OKAY) { 4139 goto LBL_M; 4140 } 4141 4142 /* create M table 4143 * 4144 4145 * 4146 * The first half of the table is not computed though accept for M[0] and M[1] 4147 */ 4148 4149 if (redmode == 0) { 4150 /* now we need R mod m */ 4151 if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) { 4152 goto LBL_RES; 4153 } 4154 4155 /* now set M[1] to G * R mod m */ 4156 if ((err = multiply_modulo(&M[1], G, &res, P)) != MP_OKAY) { 4157 goto LBL_RES; 4158 } 4159 } else { 4160 set_word(&res, 1); 4161 if ((err = modulo(G, P, &M[1])) != MP_OKAY) { 4162 goto LBL_RES; 4163 } 4164 } 4165 4166 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ 4167 if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 4168 goto LBL_RES; 4169 } 4170 4171 for (x = 0; x < (winsize - 1); x++) { 4172 if ((err = square(&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { 4173 goto LBL_RES; 4174 } 4175 if ((err = (*redux)(&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { 4176 goto LBL_RES; 4177 } 4178 } 4179 4180 /* create upper table */ 4181 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 4182 if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 4183 goto LBL_RES; 4184 } 4185 if ((err = (*redux)(&M[x], P, mp)) != MP_OKAY) { 4186 goto LBL_RES; 4187 } 4188 } 4189 4190 /* set initial mode and bit cnt */ 4191 mode = 0; 4192 bitcnt = 1; 4193 buf = 0; 4194 digidx = X->used - 1; 4195 bitcpy = 0; 4196 bitbuf = 0; 4197 4198 for (;;) { 4199 /* grab next digit as required */ 4200 if (--bitcnt == 0) { 4201 /* if digidx == -1 we are out of digits so break */ 4202 if (digidx == -1) { 4203 break; 4204 } 4205 /* read next digit and reset bitcnt */ 4206 buf = X->dp[digidx--]; 4207 bitcnt = (int)DIGIT_BIT; 4208 } 4209 4210 /* grab the next msb from the exponent */ 4211 y = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1; 4212 buf <<= (mp_digit)1; 4213 4214 /* if the bit is zero and mode == 0 then we ignore it 4215 * These represent the leading zero bits before the first 1 bit 4216 * in the exponent. Technically this opt is not required but it 4217 * does lower the # of trivial squaring/reductions used 4218 */ 4219 if (mode == 0 && y == 0) { 4220 continue; 4221 } 4222 4223 /* if the bit is zero and mode == 1 then we square */ 4224 if (mode == 1 && y == 0) { 4225 if ((err = square(&res, &res)) != MP_OKAY) { 4226 goto LBL_RES; 4227 } 4228 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4229 goto LBL_RES; 4230 } 4231 continue; 4232 } 4233 4234 /* else we add it to the window */ 4235 bitbuf |= (y << (winsize - ++bitcpy)); 4236 mode = 2; 4237 4238 if (bitcpy == winsize) { 4239 /* ok window is filled so square as required and multiply */ 4240 /* square first */ 4241 for (x = 0; x < winsize; x++) { 4242 if ((err = square(&res, &res)) != MP_OKAY) { 4243 goto LBL_RES; 4244 } 4245 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4246 goto LBL_RES; 4247 } 4248 } 4249 4250 /* then multiply */ 4251 if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) { 4252 goto LBL_RES; 4253 } 4254 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4255 goto LBL_RES; 4256 } 4257 4258 /* empty window and reset */ 4259 bitcpy = 0; 4260 bitbuf = 0; 4261 mode = 1; 4262 } 4263 } 4264 4265 /* if bits remain then square/multiply */ 4266 if (mode == 2 && bitcpy > 0) { 4267 /* square then multiply if the bit is set */ 4268 for (x = 0; x < bitcpy; x++) { 4269 if ((err = square(&res, &res)) != MP_OKAY) { 4270 goto LBL_RES; 4271 } 4272 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4273 goto LBL_RES; 4274 } 4275 4276 /* get next bit of the window */ 4277 bitbuf <<= 1; 4278 if ((bitbuf & (1 << winsize)) != 0) { 4279 /* then multiply */ 4280 if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) { 4281 goto LBL_RES; 4282 } 4283 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4284 goto LBL_RES; 4285 } 4286 } 4287 } 4288 } 4289 4290 if (redmode == 0) { 4291 /* fixup result if Montgomery reduction is used 4292 * recall that any value in a Montgomery system is 4293 * actually multiplied by R mod n. So we have 4294 * to reduce one more time to cancel out the factor 4295 * of R. 4296 */ 4297 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4298 goto LBL_RES; 4299 } 4300 } 4301 4302 /* swap res with Y */ 4303 mp_exch(&res, Y); 4304 err = MP_OKAY; 4305 LBL_RES: 4306 mp_clear(&res); 4307 LBL_M: 4308 mp_clear(&M[1]); 4309 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4310 mp_clear(&M[x]); 4311 } 4312 return err; 4313 } 4314 4315 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_exponent_modulo.c,v $ */ 4316 /* Revision: 1.4 $ */ 4317 /* Date: 2011/03/18 16:43:04 $ */ 4318 4319 /* this is a shell function that calls either the normal or Montgomery 4320 * exptmod functions. Originally the call to the montgomery code was 4321 * embedded in the normal function but that wasted alot of stack space 4322 * for nothing (since 99% of the time the Montgomery code would be called) 4323 */ 4324 static int 4325 exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int *Y) 4326 { 4327 int diminished_radix; 4328 4329 /* modulus P must be positive */ 4330 if (P->sign == MP_NEG) { 4331 return MP_VAL; 4332 } 4333 4334 /* if exponent X is negative we have to recurse */ 4335 if (X->sign == MP_NEG) { 4336 mp_int tmpG, tmpX; 4337 int err; 4338 4339 /* first compute 1/G mod P */ 4340 if ((err = mp_init(&tmpG)) != MP_OKAY) { 4341 return err; 4342 } 4343 if ((err = modular_inverse(&tmpG, G, P)) != MP_OKAY) { 4344 mp_clear(&tmpG); 4345 return err; 4346 } 4347 4348 /* now get |X| */ 4349 if ((err = mp_init(&tmpX)) != MP_OKAY) { 4350 mp_clear(&tmpG); 4351 return err; 4352 } 4353 if ((err = absolute(X, &tmpX)) != MP_OKAY) { 4354 mp_clear_multi(&tmpG, &tmpX, NULL); 4355 return err; 4356 } 4357 4358 /* and now compute (1/G)**|X| instead of G**X [X < 0] */ 4359 err = exponent_modulo(&tmpG, &tmpX, P, Y); 4360 mp_clear_multi(&tmpG, &tmpX, NULL); 4361 return err; 4362 } 4363 4364 /* modified diminished radix reduction */ 4365 if (mp_reduce_is_2k_l(P) == MP_YES) { 4366 return basic_exponent_mod(G, X, P, Y, 1); 4367 } 4368 4369 /* is it a DR modulus? */ 4370 diminished_radix = is_diminished_radix_modulus(P); 4371 4372 /* if not, is it a unrestricted DR modulus? */ 4373 if (!diminished_radix) { 4374 diminished_radix = mp_reduce_is_2k(P) << 1; 4375 } 4376 4377 /* if the modulus is odd or diminished_radix, use the montgomery method */ 4378 if (PGPV_BN_is_odd(P) == 1 || diminished_radix) { 4379 return fast_exponent_modulo(G, X, P, Y, diminished_radix); 4380 } 4381 /* otherwise use the generic Barrett reduction technique */ 4382 return basic_exponent_mod(G, X, P, Y, 0); 4383 } 4384 4385 /* reverse an array, used for radix code */ 4386 static void 4387 bn_reverse(unsigned char *s, int len) 4388 { 4389 int ix, iy; 4390 uint8_t t; 4391 4392 for (ix = 0, iy = len - 1; ix < iy ; ix++, --iy) { 4393 t = s[ix]; 4394 s[ix] = s[iy]; 4395 s[iy] = t; 4396 } 4397 } 4398 4399 static inline int 4400 is_power_of_two(mp_digit b, int *p) 4401 { 4402 int x; 4403 4404 /* fast return if no power of two */ 4405 if ((b==0) || (b & (b-1))) { 4406 return 0; 4407 } 4408 4409 for (x = 0; x < DIGIT_BIT; x++) { 4410 if (b == (((mp_digit)1)<<x)) { 4411 *p = x; 4412 return 1; 4413 } 4414 } 4415 return 0; 4416 } 4417 4418 /* single digit division (based on routine from MPI) */ 4419 static int 4420 signed_divide_word(mp_int *a, mp_digit b, mp_int *c, mp_digit *d) 4421 { 4422 mp_int q; 4423 mp_word w; 4424 mp_digit t; 4425 int res, ix; 4426 4427 /* cannot divide by zero */ 4428 if (b == 0) { 4429 return MP_VAL; 4430 } 4431 4432 /* quick outs */ 4433 if (b == 1 || MP_ISZERO(a) == 1) { 4434 if (d != NULL) { 4435 *d = 0; 4436 } 4437 if (c != NULL) { 4438 return mp_copy(a, c); 4439 } 4440 return MP_OKAY; 4441 } 4442 4443 /* power of two ? */ 4444 if (is_power_of_two(b, &ix) == 1) { 4445 if (d != NULL) { 4446 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1); 4447 } 4448 if (c != NULL) { 4449 return rshift_bits(a, ix, c, NULL); 4450 } 4451 return MP_OKAY; 4452 } 4453 4454 /* three? */ 4455 if (b == 3) { 4456 return third(a, c, d); 4457 } 4458 4459 /* no easy answer [c'est la vie]. Just division */ 4460 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 4461 return res; 4462 } 4463 4464 q.used = a->used; 4465 q.sign = a->sign; 4466 w = 0; 4467 for (ix = a->used - 1; ix >= 0; ix--) { 4468 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 4469 4470 if (w >= b) { 4471 t = (mp_digit)(w / b); 4472 w -= ((mp_word)t) * ((mp_word)b); 4473 } else { 4474 t = 0; 4475 } 4476 q.dp[ix] = (mp_digit)t; 4477 } 4478 4479 if (d != NULL) { 4480 *d = (mp_digit)w; 4481 } 4482 4483 if (c != NULL) { 4484 trim_unused_digits(&q); 4485 mp_exch(&q, c); 4486 } 4487 mp_clear(&q); 4488 4489 return res; 4490 } 4491 4492 static const mp_digit ltm_prime_tab[] = { 4493 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 4494 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, 4495 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, 4496 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 4497 #ifndef MP_8BIT 4498 0x0083, 4499 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, 4500 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, 4501 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, 4502 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, 4503 4504 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, 4505 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, 4506 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, 4507 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, 4508 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, 4509 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, 4510 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, 4511 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, 4512 4513 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, 4514 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, 4515 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, 4516 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, 4517 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, 4518 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, 4519 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, 4520 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, 4521 4522 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, 4523 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, 4524 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, 4525 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, 4526 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, 4527 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, 4528 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, 4529 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 4530 #endif 4531 }; 4532 4533 #define PRIME_SIZE __arraycount(ltm_prime_tab) 4534 4535 static inline int 4536 mp_prime_is_divisible(mp_int *a, int *result) 4537 { 4538 int err, ix; 4539 mp_digit res; 4540 4541 /* default to not */ 4542 *result = MP_NO; 4543 4544 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4545 /* what is a mod LBL_prime_tab[ix] */ 4546 if ((err = signed_divide_word(a, ltm_prime_tab[ix], NULL, &res)) != MP_OKAY) { 4547 return err; 4548 } 4549 4550 /* is the residue zero? */ 4551 if (res == 0) { 4552 *result = MP_YES; 4553 return MP_OKAY; 4554 } 4555 } 4556 4557 return MP_OKAY; 4558 } 4559 4560 /* single digit addition */ 4561 static int 4562 add_single_digit(mp_int *a, mp_digit b, mp_int *c) 4563 { 4564 int res, ix, oldused; 4565 mp_digit *tmpa, *tmpc, mu; 4566 4567 /* grow c as required */ 4568 if (c->alloc < a->used + 1) { 4569 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4570 return res; 4571 } 4572 } 4573 4574 /* if a is negative and |a| >= b, call c = |a| - b */ 4575 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { 4576 /* temporarily fix sign of a */ 4577 a->sign = MP_ZPOS; 4578 4579 /* c = |a| - b */ 4580 res = signed_subtract_word(a, b, c); 4581 4582 /* fix sign */ 4583 a->sign = c->sign = MP_NEG; 4584 4585 /* clamp */ 4586 trim_unused_digits(c); 4587 4588 return res; 4589 } 4590 4591 /* old number of used digits in c */ 4592 oldused = c->used; 4593 4594 /* sign always positive */ 4595 c->sign = MP_ZPOS; 4596 4597 /* source alias */ 4598 tmpa = a->dp; 4599 4600 /* destination alias */ 4601 tmpc = c->dp; 4602 4603 /* if a is positive */ 4604 if (a->sign == MP_ZPOS) { 4605 /* add digit, after this we're propagating 4606 * the carry. 4607 */ 4608 *tmpc = *tmpa++ + b; 4609 mu = *tmpc >> DIGIT_BIT; 4610 *tmpc++ &= MP_MASK; 4611 4612 /* now handle rest of the digits */ 4613 for (ix = 1; ix < a->used; ix++) { 4614 *tmpc = *tmpa++ + mu; 4615 mu = *tmpc >> DIGIT_BIT; 4616 *tmpc++ &= MP_MASK; 4617 } 4618 /* set final carry */ 4619 ix++; 4620 *tmpc++ = mu; 4621 4622 /* setup size */ 4623 c->used = a->used + 1; 4624 } else { 4625 /* a was negative and |a| < b */ 4626 c->used = 1; 4627 4628 /* the result is a single digit */ 4629 if (a->used == 1) { 4630 *tmpc++ = b - a->dp[0]; 4631 } else { 4632 *tmpc++ = b; 4633 } 4634 4635 /* setup count so the clearing of oldused 4636 * can fall through correctly 4637 */ 4638 ix = 1; 4639 } 4640 4641 /* now zero to oldused */ 4642 while (ix++ < oldused) { 4643 *tmpc++ = 0; 4644 } 4645 trim_unused_digits(c); 4646 4647 return MP_OKAY; 4648 } 4649 4650 /* single digit subtraction */ 4651 static int 4652 signed_subtract_word(mp_int *a, mp_digit b, mp_int *c) 4653 { 4654 mp_digit *tmpa, *tmpc, mu; 4655 int res, ix, oldused; 4656 4657 /* grow c as required */ 4658 if (c->alloc < a->used + 1) { 4659 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4660 return res; 4661 } 4662 } 4663 4664 /* if a is negative just do an unsigned 4665 * addition [with fudged signs] 4666 */ 4667 if (a->sign == MP_NEG) { 4668 a->sign = MP_ZPOS; 4669 res = add_single_digit(a, b, c); 4670 a->sign = c->sign = MP_NEG; 4671 4672 /* clamp */ 4673 trim_unused_digits(c); 4674 4675 return res; 4676 } 4677 4678 /* setup regs */ 4679 oldused = c->used; 4680 tmpa = a->dp; 4681 tmpc = c->dp; 4682 4683 /* if a <= b simply fix the single digit */ 4684 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { 4685 if (a->used == 1) { 4686 *tmpc++ = b - *tmpa; 4687 } else { 4688 *tmpc++ = b; 4689 } 4690 ix = 1; 4691 4692 /* negative/1digit */ 4693 c->sign = MP_NEG; 4694 c->used = 1; 4695 } else { 4696 /* positive/size */ 4697 c->sign = MP_ZPOS; 4698 c->used = a->used; 4699 4700 /* subtract first digit */ 4701 *tmpc = *tmpa++ - b; 4702 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4703 *tmpc++ &= MP_MASK; 4704 4705 /* handle rest of the digits */ 4706 for (ix = 1; ix < a->used; ix++) { 4707 *tmpc = *tmpa++ - mu; 4708 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4709 *tmpc++ &= MP_MASK; 4710 } 4711 } 4712 4713 /* zero excess digits */ 4714 while (ix++ < oldused) { 4715 *tmpc++ = 0; 4716 } 4717 trim_unused_digits(c); 4718 return MP_OKAY; 4719 } 4720 4721 static const int lnz[16] = { 4722 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 4723 }; 4724 4725 /* Counts the number of lsbs which are zero before the first zero bit */ 4726 static int 4727 mp_cnt_lsb(mp_int *a) 4728 { 4729 int x; 4730 mp_digit q, qq; 4731 4732 /* easy out */ 4733 if (MP_ISZERO(a) == 1) { 4734 return 0; 4735 } 4736 4737 /* scan lower digits until non-zero */ 4738 for (x = 0; x < a->used && a->dp[x] == 0; x++) { 4739 } 4740 q = a->dp[x]; 4741 x *= DIGIT_BIT; 4742 4743 /* now scan this digit until a 1 is found */ 4744 if ((q & 1) == 0) { 4745 do { 4746 qq = q & 15; 4747 /* LINTED previous op ensures range of qq */ 4748 x += lnz[qq]; 4749 q >>= 4; 4750 } while (qq == 0); 4751 } 4752 return x; 4753 } 4754 4755 /* c = a * a (mod b) */ 4756 static int 4757 square_modulo(mp_int *a, mp_int *b, mp_int *c) 4758 { 4759 int res; 4760 mp_int t; 4761 4762 if ((res = mp_init(&t)) != MP_OKAY) { 4763 return res; 4764 } 4765 4766 if ((res = square(a, &t)) != MP_OKAY) { 4767 mp_clear(&t); 4768 return res; 4769 } 4770 res = modulo(&t, b, c); 4771 mp_clear(&t); 4772 return res; 4773 } 4774 4775 static int 4776 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result) 4777 { 4778 mp_int n1, y, r; 4779 int s, j, err; 4780 4781 /* default */ 4782 *result = MP_NO; 4783 4784 /* ensure b > 1 */ 4785 if (compare_digit(b, 1) != MP_GT) { 4786 return MP_VAL; 4787 } 4788 4789 /* get n1 = a - 1 */ 4790 if ((err = mp_init_copy(&n1, a)) != MP_OKAY) { 4791 return err; 4792 } 4793 if ((err = signed_subtract_word(&n1, 1, &n1)) != MP_OKAY) { 4794 goto LBL_N1; 4795 } 4796 4797 /* set 2**s * r = n1 */ 4798 if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) { 4799 goto LBL_N1; 4800 } 4801 4802 /* count the number of least significant bits 4803 * which are zero 4804 */ 4805 s = mp_cnt_lsb(&r); 4806 4807 /* now divide n - 1 by 2**s */ 4808 if ((err = rshift_bits(&r, s, &r, NULL)) != MP_OKAY) { 4809 goto LBL_R; 4810 } 4811 4812 /* compute y = b**r mod a */ 4813 if ((err = mp_init(&y)) != MP_OKAY) { 4814 goto LBL_R; 4815 } 4816 if ((err = exponent_modulo(b, &r, a, &y)) != MP_OKAY) { 4817 goto LBL_Y; 4818 } 4819 4820 /* if y != 1 and y != n1 do */ 4821 if (compare_digit(&y, 1) != MP_EQ && signed_compare(&y, &n1) != MP_EQ) { 4822 j = 1; 4823 /* while j <= s-1 and y != n1 */ 4824 while ((j <= (s - 1)) && signed_compare(&y, &n1) != MP_EQ) { 4825 if ((err = square_modulo(&y, a, &y)) != MP_OKAY) { 4826 goto LBL_Y; 4827 } 4828 4829 /* if y == 1 then composite */ 4830 if (compare_digit(&y, 1) == MP_EQ) { 4831 goto LBL_Y; 4832 } 4833 4834 ++j; 4835 } 4836 4837 /* if y != n1 then composite */ 4838 if (signed_compare(&y, &n1) != MP_EQ) { 4839 goto LBL_Y; 4840 } 4841 } 4842 4843 /* probably prime now */ 4844 *result = MP_YES; 4845 LBL_Y: 4846 mp_clear(&y); 4847 LBL_R: 4848 mp_clear(&r); 4849 LBL_N1: 4850 mp_clear(&n1); 4851 return err; 4852 } 4853 4854 /* performs a variable number of rounds of Miller-Rabin 4855 * 4856 * Probability of error after t rounds is no more than 4857 4858 * 4859 * Sets result to 1 if probably prime, 0 otherwise 4860 */ 4861 static int 4862 mp_prime_is_prime(mp_int *a, int t, int *result) 4863 { 4864 mp_int b; 4865 int ix, err, res; 4866 4867 /* default to no */ 4868 *result = MP_NO; 4869 4870 /* valid value of t? */ 4871 if (t <= 0 || t > (int)PRIME_SIZE) { 4872 return MP_VAL; 4873 } 4874 4875 /* is the input equal to one of the primes in the table? */ 4876 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4877 if (compare_digit(a, ltm_prime_tab[ix]) == MP_EQ) { 4878 *result = 1; 4879 return MP_OKAY; 4880 } 4881 } 4882 4883 /* first perform trial division */ 4884 if ((err = mp_prime_is_divisible(a, &res)) != MP_OKAY) { 4885 return err; 4886 } 4887 4888 /* return if it was trivially divisible */ 4889 if (res == MP_YES) { 4890 return MP_OKAY; 4891 } 4892 4893 /* now perform the miller-rabin rounds */ 4894 if ((err = mp_init(&b)) != MP_OKAY) { 4895 return err; 4896 } 4897 4898 for (ix = 0; ix < t; ix++) { 4899 /* set the prime */ 4900 set_word(&b, ltm_prime_tab[ix]); 4901 4902 if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) { 4903 goto LBL_B; 4904 } 4905 4906 if (res == MP_NO) { 4907 goto LBL_B; 4908 } 4909 } 4910 4911 /* passed the test */ 4912 *result = MP_YES; 4913 LBL_B: 4914 mp_clear(&b); 4915 return err; 4916 } 4917 4918 /* returns size of ASCII reprensentation */ 4919 static int 4920 mp_radix_size(mp_int *a, int radix, int *size) 4921 { 4922 int res, digs; 4923 mp_int t; 4924 mp_digit d; 4925 4926 *size = 0; 4927 4928 /* special case for binary */ 4929 if (radix == 2) { 4930 *size = mp_count_bits(a) + (a->sign == MP_NEG ? 1 : 0) + 1; 4931 return MP_OKAY; 4932 } 4933 4934 /* make sure the radix is in range */ 4935 if (radix < 2 || radix > 64) { 4936 return MP_VAL; 4937 } 4938 4939 if (MP_ISZERO(a) == MP_YES) { 4940 *size = 2; 4941 return MP_OKAY; 4942 } 4943 4944 /* digs is the digit count */ 4945 digs = 0; 4946 4947 /* if it's negative add one for the sign */ 4948 if (a->sign == MP_NEG) { 4949 ++digs; 4950 } 4951 4952 /* init a copy of the input */ 4953 if ((res = mp_init_copy(&t, a)) != MP_OKAY) { 4954 return res; 4955 } 4956 4957 /* force temp to positive */ 4958 t.sign = MP_ZPOS; 4959 4960 /* fetch out all of the digits */ 4961 while (MP_ISZERO(&t) == MP_NO) { 4962 if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 4963 mp_clear(&t); 4964 return res; 4965 } 4966 ++digs; 4967 } 4968 mp_clear(&t); 4969 4970 /* return digs + 1, the 1 is for the NULL byte that would be required. */ 4971 *size = digs + 1; 4972 return MP_OKAY; 4973 } 4974 4975 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; 4976 4977 /* stores a bignum as a ASCII string in a given radix (2..64) 4978 * 4979 * Stores upto maxlen-1 chars and always a NULL byte 4980 */ 4981 static int 4982 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) 4983 { 4984 int res, digs; 4985 mp_int t; 4986 mp_digit d; 4987 char *_s = str; 4988 4989 /* check range of the maxlen, radix */ 4990 if (maxlen < 2 || radix < 2 || radix > 64) { 4991 return MP_VAL; 4992 } 4993 4994 /* quick out if its zero */ 4995 if (MP_ISZERO(a) == MP_YES) { 4996 *str++ = '0'; 4997 *str = '\0'; 4998 return MP_OKAY; 4999 } 5000 5001 if ((res = mp_init_copy(&t, a)) != MP_OKAY) { 5002 return res; 5003 } 5004 5005 /* if it is negative output a - */ 5006 if (t.sign == MP_NEG) { 5007 /* we have to reverse our digits later... but not the - sign!! */ 5008 ++_s; 5009 5010 /* store the flag and mark the number as positive */ 5011 *str++ = '-'; 5012 t.sign = MP_ZPOS; 5013 5014 /* subtract a char */ 5015 --maxlen; 5016 } 5017 5018 digs = 0; 5019 while (MP_ISZERO(&t) == 0) { 5020 if (--maxlen < 1) { 5021 /* no more room */ 5022 break; 5023 } 5024 if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 5025 mp_clear(&t); 5026 return res; 5027 } 5028 /* LINTED -- radix' range is checked above, limits d's range */ 5029 *str++ = mp_s_rmap[d]; 5030 ++digs; 5031 } 5032 5033 /* reverse the digits of the string. In this case _s points 5034 * to the first digit [exluding the sign] of the number 5035 */ 5036 bn_reverse((unsigned char *)_s, digs); 5037 5038 /* append a NULL so the string is properly terminated */ 5039 *str = '\0'; 5040 5041 mp_clear(&t); 5042 return MP_OKAY; 5043 } 5044 5045 static char * 5046 formatbn(const PGPV_BIGNUM *a, const int radix) 5047 { 5048 char *s; 5049 int len; 5050 5051 if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) { 5052 return NULL; 5053 } 5054 if ((s = allocate(1, (size_t)len)) != NULL) { 5055 if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) { 5056 deallocate(s, (size_t)len); 5057 return NULL; 5058 } 5059 } 5060 return s; 5061 } 5062 5063 static int 5064 mp_getradix_num(mp_int *a, int radix, char *s) 5065 { 5066 int err, ch, neg, y; 5067 5068 /* clear a */ 5069 mp_zero(a); 5070 /* if first digit is - then set negative */ 5071 if ((ch = *s++) == '-') { 5072 neg = MP_NEG; 5073 ch = *s++; 5074 } else { 5075 neg = MP_ZPOS; 5076 } 5077 for (;;) { 5078 /* fold lower to upper case */ 5079 if (ch >= 'a' && ch <= 'z') { 5080 ch = (ch - 'a') + 'A'; 5081 } 5082 /* find index y in the radix map */ 5083 for (y = 0; y < radix; y++) { 5084 if (mp_s_rmap[y] == ch) { 5085 break; 5086 } 5087 } 5088 if (y == radix) { 5089 break; 5090 } 5091 /* shift up and add */ 5092 if ((err = multiply_digit(a, radix, a)) != MP_OKAY) { 5093 return err; 5094 } 5095 if ((err = add_single_digit(a, y, a)) != MP_OKAY) { 5096 return err; 5097 } 5098 ch = *s++; 5099 } 5100 if (compare_digit(a, 0) != MP_EQ) { 5101 a->sign = neg; 5102 } 5103 return MP_OKAY; 5104 } 5105 5106 static int 5107 getbn(PGPV_BIGNUM **a, const char *str, int radix) 5108 { 5109 int len; 5110 5111 if (a == NULL || str == NULL || (*a = PGPV_BN_new()) == NULL) { 5112 return 0; 5113 } 5114 if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) { 5115 return 0; 5116 } 5117 mp_radix_size(__UNCONST(*a), radix, &len); 5118 return len - 1; 5119 } 5120 5121 /* d = a - b (mod c) */ 5122 static int 5123 subtract_modulo(mp_int *a, mp_int *b, mp_int *c, mp_int *d) 5124 { 5125 int res; 5126 mp_int t; 5127 5128 5129 if ((res = mp_init(&t)) != MP_OKAY) { 5130 return res; 5131 } 5132 5133 if ((res = signed_subtract(a, b, &t)) != MP_OKAY) { 5134 mp_clear(&t); 5135 return res; 5136 } 5137 res = modulo(&t, c, d); 5138 mp_clear(&t); 5139 return res; 5140 } 5141 5142 /* bn_mp_gcd.c */ 5143 /* Greatest Common Divisor using the binary method */ 5144 static int 5145 mp_gcd(mp_int *a, mp_int *b, mp_int *c) 5146 { 5147 mp_int u, v; 5148 int k, u_lsb, v_lsb, res; 5149 5150 /* either zero than gcd is the largest */ 5151 if (PGPV_BN_is_zero(a) == MP_YES) { 5152 return absolute(b, c); 5153 } 5154 if (PGPV_BN_is_zero(b) == MP_YES) { 5155 return absolute(a, c); 5156 } 5157 5158 /* get copies of a and b we can modify */ 5159 if ((res = mp_init_copy(&u, a)) != MP_OKAY) { 5160 return res; 5161 } 5162 5163 if ((res = mp_init_copy(&v, b)) != MP_OKAY) { 5164 goto LBL_U; 5165 } 5166 5167 /* must be positive for the remainder of the algorithm */ 5168 u.sign = v.sign = MP_ZPOS; 5169 5170 /* B1. Find the common power of two for u and v */ 5171 u_lsb = mp_cnt_lsb(&u); 5172 v_lsb = mp_cnt_lsb(&v); 5173 k = MIN(u_lsb, v_lsb); 5174 5175 if (k > 0) { 5176 /* divide the power of two out */ 5177 if ((res = rshift_bits(&u, k, &u, NULL)) != MP_OKAY) { 5178 goto LBL_V; 5179 } 5180 5181 if ((res = rshift_bits(&v, k, &v, NULL)) != MP_OKAY) { 5182 goto LBL_V; 5183 } 5184 } 5185 5186 /* divide any remaining factors of two out */ 5187 if (u_lsb != k) { 5188 if ((res = rshift_bits(&u, u_lsb - k, &u, NULL)) != MP_OKAY) { 5189 goto LBL_V; 5190 } 5191 } 5192 5193 if (v_lsb != k) { 5194 if ((res = rshift_bits(&v, v_lsb - k, &v, NULL)) != MP_OKAY) { 5195 goto LBL_V; 5196 } 5197 } 5198 5199 while (PGPV_BN_is_zero(&v) == 0) { 5200 /* make sure v is the largest */ 5201 if (compare_magnitude(&u, &v) == MP_GT) { 5202 /* swap u and v to make sure v is >= u */ 5203 mp_exch(&u, &v); 5204 } 5205 5206 /* subtract smallest from largest */ 5207 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 5208 goto LBL_V; 5209 } 5210 5211 /* Divide out all factors of two */ 5212 if ((res = rshift_bits(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) { 5213 goto LBL_V; 5214 } 5215 } 5216 5217 /* multiply by 2**k which we divided out at the beginning */ 5218 if ((res = lshift_bits(&u, k, c)) != MP_OKAY) { 5219 goto LBL_V; 5220 } 5221 c->sign = MP_ZPOS; 5222 res = MP_OKAY; 5223 LBL_V: 5224 mp_clear (&u); 5225 LBL_U: 5226 mp_clear (&v); 5227 return res; 5228 } 5229 5230 /**************************************************************************/ 5231 5232 /* PGPV_BIGNUM emulation layer */ 5233 5234 /* essentiually, these are just wrappers around the libtommath functions */ 5235 /* usually the order of args changes */ 5236 /* the PGPV_BIGNUM API tends to have more const poisoning */ 5237 /* these wrappers also check the arguments passed for sanity */ 5238 5239 PGPV_BIGNUM * 5240 PGPV_BN_bin2bn(const uint8_t *data, int len, PGPV_BIGNUM *ret) 5241 { 5242 if (data == NULL) { 5243 return PGPV_BN_new(); 5244 } 5245 if (ret == NULL) { 5246 ret = PGPV_BN_new(); 5247 } 5248 return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL; 5249 } 5250 5251 /* store in unsigned [big endian] format */ 5252 int 5253 PGPV_BN_bn2bin(const PGPV_BIGNUM *a, unsigned char *b) 5254 { 5255 PGPV_BIGNUM t; 5256 int x; 5257 5258 if (a == NULL || b == NULL) { 5259 return -1; 5260 } 5261 if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) { 5262 return -1; 5263 } 5264 for (x = 0; !PGPV_BN_is_zero(&t) ; ) { 5265 b[x++] = (unsigned char) (t.dp[0] & 0xff); 5266 if (rshift_bits(&t, 8, &t, NULL) != MP_OKAY) { 5267 mp_clear(&t); 5268 return -1; 5269 } 5270 } 5271 bn_reverse(b, x); 5272 mp_clear(&t); 5273 return x; 5274 } 5275 5276 void 5277 PGPV_BN_init(PGPV_BIGNUM *a) 5278 { 5279 if (a != NULL) { 5280 mp_init(a); 5281 } 5282 } 5283 5284 PGPV_BIGNUM * 5285 PGPV_BN_new(void) 5286 { 5287 PGPV_BIGNUM *a; 5288 5289 if ((a = allocate(1, sizeof(*a))) != NULL) { 5290 mp_init(a); 5291 } 5292 return a; 5293 } 5294 5295 /* copy, b = a */ 5296 int 5297 PGPV_BN_copy(PGPV_BIGNUM *b, const PGPV_BIGNUM *a) 5298 { 5299 if (a == NULL || b == NULL) { 5300 return MP_VAL; 5301 } 5302 return mp_copy(__UNCONST(a), b); 5303 } 5304 5305 PGPV_BIGNUM * 5306 PGPV_BN_dup(const PGPV_BIGNUM *a) 5307 { 5308 PGPV_BIGNUM *ret; 5309 5310 if (a == NULL) { 5311 return NULL; 5312 } 5313 if ((ret = PGPV_BN_new()) != NULL) { 5314 PGPV_BN_copy(ret, a); 5315 } 5316 return ret; 5317 } 5318 5319 void 5320 PGPV_BN_swap(PGPV_BIGNUM *a, PGPV_BIGNUM *b) 5321 { 5322 if (a && b) { 5323 mp_exch(a, b); 5324 } 5325 } 5326 5327 int 5328 PGPV_BN_lshift(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, int n) 5329 { 5330 if (r == NULL || a == NULL || n < 0) { 5331 return 0; 5332 } 5333 PGPV_BN_copy(r, a); 5334 return lshift_digits(r, n) == MP_OKAY; 5335 } 5336 5337 int 5338 PGPV_BN_lshift1(PGPV_BIGNUM *r, PGPV_BIGNUM *a) 5339 { 5340 if (r == NULL || a == NULL) { 5341 return 0; 5342 } 5343 PGPV_BN_copy(r, a); 5344 return lshift_digits(r, 1) == MP_OKAY; 5345 } 5346 5347 int 5348 PGPV_BN_rshift(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, int n) 5349 { 5350 if (r == NULL || a == NULL || n < 0) { 5351 return MP_VAL; 5352 } 5353 PGPV_BN_copy(r, a); 5354 return rshift_digits(r, n) == MP_OKAY; 5355 } 5356 5357 int 5358 PGPV_BN_rshift1(PGPV_BIGNUM *r, PGPV_BIGNUM *a) 5359 { 5360 if (r == NULL || a == NULL) { 5361 return 0; 5362 } 5363 PGPV_BN_copy(r, a); 5364 return rshift_digits(r, 1) == MP_OKAY; 5365 } 5366 5367 int 5368 PGPV_BN_set_word(PGPV_BIGNUM *a, PGPV_BN_ULONG w) 5369 { 5370 if (a == NULL) { 5371 return 0; 5372 } 5373 set_word(a, w); 5374 return 1; 5375 } 5376 5377 int 5378 PGPV_BN_add(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b) 5379 { 5380 if (a == NULL || b == NULL || r == NULL) { 5381 return 0; 5382 } 5383 return signed_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5384 } 5385 5386 int 5387 PGPV_BN_sub(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b) 5388 { 5389 if (a == NULL || b == NULL || r == NULL) { 5390 return 0; 5391 } 5392 return signed_subtract(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5393 } 5394 5395 int 5396 PGPV_BN_mul(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b, PGPV_BN_CTX *ctx) 5397 { 5398 if (a == NULL || b == NULL || r == NULL) { 5399 return 0; 5400 } 5401 USE_ARG(ctx); 5402 return signed_multiply(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5403 } 5404 5405 int 5406 PGPV_BN_div(PGPV_BIGNUM *dv, PGPV_BIGNUM *rem, const PGPV_BIGNUM *a, const PGPV_BIGNUM *d, PGPV_BN_CTX *ctx) 5407 { 5408 if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) { 5409 return 0; 5410 } 5411 USE_ARG(ctx); 5412 return signed_divide(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY; 5413 } 5414 5415 /* perform a bit operation on the 2 bignums */ 5416 int 5417 PGPV_BN_bitop(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, char op, const PGPV_BIGNUM *b) 5418 { 5419 unsigned ndigits; 5420 mp_digit ad; 5421 mp_digit bd; 5422 int i; 5423 5424 if (a == NULL || b == NULL || r == NULL) { 5425 return 0; 5426 } 5427 if (PGPV_BN_cmp(__UNCONST(a), __UNCONST(b)) >= 0) { 5428 PGPV_BN_copy(r, a); 5429 ndigits = a->used; 5430 } else { 5431 PGPV_BN_copy(r, b); 5432 ndigits = b->used; 5433 } 5434 for (i = 0 ; i < (int)ndigits ; i++) { 5435 ad = (i > a->used) ? 0 : a->dp[i]; 5436 bd = (i > b->used) ? 0 : b->dp[i]; 5437 switch(op) { 5438 case '&': 5439 r->dp[i] = (ad & bd); 5440 break; 5441 case '|': 5442 r->dp[i] = (ad | bd); 5443 break; 5444 case '^': 5445 r->dp[i] = (ad ^ bd); 5446 break; 5447 default: 5448 break; 5449 } 5450 } 5451 return 1; 5452 } 5453 5454 void 5455 PGPV_BN_free(PGPV_BIGNUM *a) 5456 { 5457 if (a) { 5458 mp_clear(a); 5459 free(a); 5460 } 5461 } 5462 5463 void 5464 PGPV_BN_clear(PGPV_BIGNUM *a) 5465 { 5466 if (a) { 5467 mp_clear(a); 5468 } 5469 } 5470 5471 void 5472 PGPV_BN_clear_free(PGPV_BIGNUM *a) 5473 { 5474 PGPV_BN_clear(a); 5475 free(a); 5476 } 5477 5478 int 5479 PGPV_BN_num_bytes(const PGPV_BIGNUM *a) 5480 { 5481 if (a == NULL) { 5482 return MP_VAL; 5483 } 5484 return mp_unsigned_bin_size(__UNCONST(a)); 5485 } 5486 5487 int 5488 PGPV_BN_num_bits(const PGPV_BIGNUM *a) 5489 { 5490 if (a == NULL) { 5491 return 0; 5492 } 5493 return mp_count_bits(a); 5494 } 5495 5496 void 5497 PGPV_BN_set_negative(PGPV_BIGNUM *a, int n) 5498 { 5499 if (a) { 5500 a->sign = (n) ? MP_NEG : 0; 5501 } 5502 } 5503 5504 int 5505 PGPV_BN_cmp(PGPV_BIGNUM *a, PGPV_BIGNUM *b) 5506 { 5507 if (a == NULL || b == NULL) { 5508 return MP_VAL; 5509 } 5510 switch(signed_compare(a, b)) { 5511 case MP_LT: 5512 return -1; 5513 case MP_GT: 5514 return 1; 5515 case MP_EQ: 5516 default: 5517 return 0; 5518 } 5519 } 5520 5521 int 5522 PGPV_BN_mod_exp(PGPV_BIGNUM *Y, PGPV_BIGNUM *G, const PGPV_BIGNUM *X, const PGPV_BIGNUM *P, PGPV_BN_CTX *ctx) 5523 { 5524 if (Y == NULL || G == NULL || X == NULL || P == NULL) { 5525 return MP_VAL; 5526 } 5527 USE_ARG(ctx); 5528 return exponent_modulo(G, __UNCONST(X), __UNCONST(P), Y) == MP_OKAY; 5529 } 5530 5531 PGPV_BIGNUM * 5532 PGPV_BN_mod_inverse(PGPV_BIGNUM *r, PGPV_BIGNUM *a, const PGPV_BIGNUM *n, PGPV_BN_CTX *ctx) 5533 { 5534 USE_ARG(ctx); 5535 if (r == NULL || a == NULL || n == NULL) { 5536 return NULL; 5537 } 5538 return (modular_inverse(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL; 5539 } 5540 5541 int 5542 PGPV_BN_mod_mul(PGPV_BIGNUM *ret, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx) 5543 { 5544 USE_ARG(ctx); 5545 if (ret == NULL || a == NULL || b == NULL || m == NULL) { 5546 return 0; 5547 } 5548 return multiply_modulo(ret, a, b, __UNCONST(m)) == MP_OKAY; 5549 } 5550 5551 int 5552 PGPV_BN_mod_add(PGPV_BIGNUM *ret, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx) 5553 { 5554 USE_ARG(ctx); 5555 if (ret == NULL || a == NULL || b == NULL || m == NULL) { 5556 return 0; 5557 } 5558 return add_modulo(ret, a, b, __UNCONST(m)) == MP_OKAY; 5559 } 5560 5561 PGPV_BN_CTX * 5562 PGPV_BN_CTX_new(void) 5563 { 5564 return allocate(1, sizeof(PGPV_BN_CTX)); 5565 } 5566 5567 void 5568 PGPV_BN_CTX_init(PGPV_BN_CTX *c) 5569 { 5570 if (c != NULL) { 5571 c->arraysize = 15; 5572 if ((c->v = allocate(sizeof(*c->v), c->arraysize)) == NULL) { 5573 c->arraysize = 0; 5574 } 5575 } 5576 } 5577 5578 PGPV_BIGNUM * 5579 PGPV_BN_CTX_get(PGPV_BN_CTX *ctx) 5580 { 5581 if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) { 5582 return NULL; 5583 } 5584 return ctx->v[ctx->count++] = PGPV_BN_new(); 5585 } 5586 5587 void 5588 PGPV_BN_CTX_start(PGPV_BN_CTX *ctx) 5589 { 5590 PGPV_BN_CTX_init(ctx); 5591 } 5592 5593 void 5594 PGPV_BN_CTX_free(PGPV_BN_CTX *c) 5595 { 5596 unsigned i; 5597 5598 if (c != NULL && c->v != NULL) { 5599 for (i = 0 ; i < c->count ; i++) { 5600 PGPV_BN_clear_free(c->v[i]); 5601 } 5602 deallocate(c->v, sizeof(*c->v) * c->arraysize); 5603 } 5604 } 5605 5606 void 5607 PGPV_BN_CTX_end(PGPV_BN_CTX *ctx) 5608 { 5609 PGPV_BN_CTX_free(ctx); 5610 } 5611 5612 char * 5613 PGPV_BN_bn2hex(const PGPV_BIGNUM *a) 5614 { 5615 return (a == NULL) ? NULL : formatbn(a, 16); 5616 } 5617 5618 char * 5619 PGPV_BN_bn2dec(const PGPV_BIGNUM *a) 5620 { 5621 return (a == NULL) ? NULL : formatbn(a, 10); 5622 } 5623 5624 char * 5625 PGPV_BN_bn2radix(const PGPV_BIGNUM *a, unsigned radix) 5626 { 5627 return (a == NULL) ? NULL : formatbn(a, (int)radix); 5628 } 5629 5630 #ifndef _KERNEL 5631 int 5632 PGPV_BN_print_fp(FILE *fp, const PGPV_BIGNUM *a) 5633 { 5634 char *s; 5635 int ret; 5636 5637 if (fp == NULL || a == NULL) { 5638 return 0; 5639 } 5640 s = PGPV_BN_bn2hex(a); 5641 ret = fprintf(fp, "%s", s); 5642 deallocate(s, strlen(s) + 1); 5643 return ret; 5644 } 5645 #endif 5646 5647 #ifdef PGPV_BN_RAND_NEEDED 5648 int 5649 PGPV_BN_rand(PGPV_BIGNUM *rnd, int bits, int top, int bottom) 5650 { 5651 uint64_t r; 5652 int digits; 5653 int i; 5654 5655 if (rnd == NULL) { 5656 return 0; 5657 } 5658 mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT)); 5659 for (i = 0 ; i < digits ; i++) { 5660 r = (uint64_t)arc4random(); 5661 r <<= 32; 5662 r |= arc4random(); 5663 rnd->dp[i] = (r & MP_MASK); 5664 rnd->used += 1; 5665 } 5666 if (top == 0) { 5667 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5668 } 5669 if (top == 1) { 5670 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5671 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1))); 5672 } 5673 if (bottom) { 5674 rnd->dp[0] |= 0x1; 5675 } 5676 return 1; 5677 } 5678 5679 int 5680 PGPV_BN_rand_range(PGPV_BIGNUM *rnd, PGPV_BIGNUM *range) 5681 { 5682 if (rnd == NULL || range == NULL || PGPV_BN_is_zero(range)) { 5683 return 0; 5684 } 5685 PGPV_BN_rand(rnd, PGPV_BN_num_bits(range), 1, 0); 5686 return modulo(rnd, range, rnd) == MP_OKAY; 5687 } 5688 #endif 5689 5690 int 5691 PGPV_BN_is_prime(const PGPV_BIGNUM *a, int checks, void (*callback)(int, int, void *), PGPV_BN_CTX *ctx, void *cb_arg) 5692 { 5693 int primality; 5694 5695 if (a == NULL) { 5696 return 0; 5697 } 5698 USE_ARG(ctx); 5699 USE_ARG(cb_arg); 5700 USE_ARG(callback); 5701 return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0; 5702 } 5703 5704 const PGPV_BIGNUM * 5705 PGPV_BN_value_one(void) 5706 { 5707 static mp_digit digit = 1UL; 5708 static const PGPV_BIGNUM one = { &digit, 1, 1, 0 }; 5709 5710 return &one; 5711 } 5712 5713 int 5714 PGPV_BN_hex2bn(PGPV_BIGNUM **a, const char *str) 5715 { 5716 return getbn(a, str, 16); 5717 } 5718 5719 int 5720 PGPV_BN_dec2bn(PGPV_BIGNUM **a, const char *str) 5721 { 5722 return getbn(a, str, 10); 5723 } 5724 5725 int 5726 PGPV_BN_radix2bn(PGPV_BIGNUM **a, const char *str, unsigned radix) 5727 { 5728 return getbn(a, str, (int)radix); 5729 } 5730 5731 int 5732 PGPV_BN_mod_sub(PGPV_BIGNUM *r, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx) 5733 { 5734 USE_ARG(ctx); 5735 if (r == NULL || a == NULL || b == NULL || m == NULL) { 5736 return 0; 5737 } 5738 return subtract_modulo(a, b, __UNCONST(m), r) == MP_OKAY; 5739 } 5740 5741 int 5742 PGPV_BN_is_bit_set(const PGPV_BIGNUM *a, int n) 5743 { 5744 if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) { 5745 return 0; 5746 } 5747 return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0; 5748 } 5749 5750 /* raise 'a' to power of 'b' */ 5751 int 5752 PGPV_BN_raise(PGPV_BIGNUM *res, PGPV_BIGNUM *a, PGPV_BIGNUM *b) 5753 { 5754 uint64_t exponent; 5755 PGPV_BIGNUM *power; 5756 PGPV_BIGNUM *temp; 5757 char *t; 5758 5759 t = PGPV_BN_bn2dec(b); 5760 exponent = (uint64_t)strtoull(t, NULL, 10); 5761 free(t); 5762 if (exponent == 0) { 5763 PGPV_BN_copy(res, PGPV_BN_value_one()); 5764 } else { 5765 power = PGPV_BN_dup(a); 5766 for ( ; (exponent & 1) == 0 ; exponent >>= 1) { 5767 PGPV_BN_mul(power, power, power, NULL); 5768 } 5769 temp = PGPV_BN_dup(power); 5770 for (exponent >>= 1 ; exponent > 0 ; exponent >>= 1) { 5771 PGPV_BN_mul(power, power, power, NULL); 5772 if (exponent & 1) { 5773 PGPV_BN_mul(temp, power, temp, NULL); 5774 } 5775 } 5776 PGPV_BN_copy(res, temp); 5777 PGPV_BN_free(power); 5778 PGPV_BN_free(temp); 5779 } 5780 return 1; 5781 } 5782 5783 /* compute the factorial */ 5784 int 5785 PGPV_BN_factorial(PGPV_BIGNUM *res, PGPV_BIGNUM *f) 5786 { 5787 PGPV_BIGNUM *one; 5788 PGPV_BIGNUM *i; 5789 5790 i = PGPV_BN_dup(f); 5791 one = __UNCONST(PGPV_BN_value_one()); 5792 PGPV_BN_sub(i, i, one); 5793 PGPV_BN_copy(res, f); 5794 while (PGPV_BN_cmp(i, one) > 0) { 5795 PGPV_BN_mul(res, res, i, NULL); 5796 PGPV_BN_sub(i, i, one); 5797 } 5798 PGPV_BN_free(i); 5799 return 1; 5800 } 5801 5802 /* get greatest common divisor */ 5803 int 5804 PGPV_BN_gcd(PGPV_BIGNUM *r, PGPV_BIGNUM *a, PGPV_BIGNUM *b, PGPV_BN_CTX *ctx) 5805 { 5806 return mp_gcd(a, b, r); 5807 } 5808 5809 int 5810 PGPV_BN_sub_word(PGPV_BIGNUM *a, PGPV_BN_ULONG w) 5811 { 5812 PGPV_BIGNUM *bnw; 5813 5814 bnw = PGPV_BN_new(); 5815 PGPV_BN_set_word(bnw, w); 5816 PGPV_BN_sub(a, a, bnw); 5817 PGPV_BN_free(bnw); 5818 return 1; 5819 } 5820 5821 int 5822 PGPV_BN_add_word(PGPV_BIGNUM *a, PGPV_BN_ULONG w) 5823 { 5824 PGPV_BIGNUM *bnw; 5825 5826 bnw = PGPV_BN_new(); 5827 PGPV_BN_set_word(bnw, w); 5828 PGPV_BN_add(a, a, bnw); 5829 PGPV_BN_free(bnw); 5830 return 1; 5831 } 5832