1 /*- 2 * Copyright (c) 2012 Alistair Crooks <agc@NetBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 /* LibTomMath, multiple-precision integer library -- Tom St Denis 26 * 27 * LibTomMath is a library that provides multiple-precision 28 * integer arithmetic as well as number theoretic functionality. 29 * 30 * The library was designed directly after the MPI library by 31 * Michael Fromberger but has been written from scratch with 32 * additional optimizations in place. 33 * 34 * The library is free for all purposes without any express 35 * guarantee it works. 36 * 37 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 38 */ 39 #include "config.h" 40 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <sys/param.h> 44 45 #ifdef _KERNEL 46 # include <sys/kmem.h> 47 #else 48 # include <arpa/inet.h> 49 # include <ctype.h> 50 # include <inttypes.h> 51 # include <stdarg.h> 52 # include <stdio.h> 53 # include <stdlib.h> 54 # include <string.h> 55 # include <time.h> 56 # include <unistd.h> 57 #endif 58 59 #include "misc.h" 60 #include "bn.h" 61 #include "digest.h" 62 63 /**************************************************************************/ 64 65 /* LibTomMath, multiple-precision integer library -- Tom St Denis 66 * 67 * LibTomMath is a library that provides multiple-precision 68 * integer arithmetic as well as number theoretic functionality. 69 * 70 * The library was designed directly after the MPI library by 71 * Michael Fromberger but has been written from scratch with 72 * additional optimizations in place. 73 * 74 * The library is free for all purposes without any express 75 * guarantee it works. 76 * 77 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 78 */ 79 80 #define MP_PREC 32 81 #define DIGIT_BIT 28 82 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1)) 83 84 #define MP_WARRAY /*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1))) 85 86 #define MP_NO 0 87 #define MP_YES 1 88 89 #ifndef USE_ARG 90 #define USE_ARG(x) /*LINTED*/(void)&(x) 91 #endif 92 93 #ifndef __arraycount 94 #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0])) 95 #endif 96 97 #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO) 98 99 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1) 100 101 typedef int mp_err; 102 103 static int mp_mul(mp_int * a, mp_int * b, mp_int * c); 104 static int mp_sqr(mp_int * a, mp_int * b); 105 106 static int mp_sub_d(mp_int *a, mp_digit b, mp_int *c); 107 108 /* set to zero */ 109 static void 110 mp_zero(mp_int *a) 111 { 112 int n; 113 mp_digit *tmp; 114 115 a->sign = MP_ZPOS; 116 a->used = 0; 117 118 tmp = a->dp; 119 /* XXX - memset */ 120 for (n = 0; n < a->alloc; n++) { 121 *tmp++ = 0; 122 } 123 } 124 125 /* grow as required */ 126 static int 127 mp_grow(mp_int *a, int size) 128 { 129 int i; 130 mp_digit *tmp; 131 132 /* if the alloc size is smaller alloc more ram */ 133 if (a->alloc < size) { 134 /* ensure there are always at least MP_PREC digits extra on top */ 135 size += (MP_PREC * 2) - (size % MP_PREC); 136 137 /* reallocate the array a->dp 138 * 139 * We store the return in a temporary variable 140 * in case the operation failed we don't want 141 * to overwrite the dp member of a. 142 */ 143 tmp = realloc(a->dp, sizeof(*tmp) * size); 144 if (tmp == NULL) { 145 /* reallocation failed but "a" is still valid [can be freed] */ 146 return MP_MEM; 147 } 148 149 /* reallocation succeeded so set a->dp */ 150 a->dp = tmp; 151 152 /* zero excess digits */ 153 i = a->alloc; 154 a->alloc = size; 155 for (; i < a->alloc; i++) { 156 a->dp[i] = 0; 157 } 158 } 159 return MP_OKAY; 160 } 161 162 /* shift left a certain amount of digits */ 163 static int 164 mp_lshd (mp_int * a, int b) 165 { 166 int x, res; 167 168 /* if its less than zero return */ 169 if (b <= 0) { 170 return MP_OKAY; 171 } 172 173 /* grow to fit the new digits */ 174 if (a->alloc < a->used + b) { 175 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) { 176 return res; 177 } 178 } 179 180 { 181 mp_digit *top, *bottom; 182 183 /* increment the used by the shift amount then copy upwards */ 184 a->used += b; 185 186 /* top */ 187 top = a->dp + a->used - 1; 188 189 /* base */ 190 bottom = a->dp + a->used - 1 - b; 191 192 /* much like mp_rshd this is implemented using a sliding window 193 * except the window goes the otherway around. Copying from 194 * the bottom to the top. see bn_mp_rshd.c for more info. 195 */ 196 for (x = a->used - 1; x >= b; x--) { 197 *top-- = *bottom--; 198 } 199 200 /* zero the lower digits */ 201 top = a->dp; 202 for (x = 0; x < b; x++) { 203 *top++ = 0; 204 } 205 } 206 return MP_OKAY; 207 } 208 209 /* trim unused digits 210 * 211 * This is used to ensure that leading zero digits are 212 * trimed and the leading "used" digit will be non-zero 213 * Typically very fast. Also fixes the sign if there 214 * are no more leading digits 215 */ 216 static void 217 mp_clamp (mp_int * a) 218 { 219 /* decrease used while the most significant digit is 220 * zero. 221 */ 222 while (a->used > 0 && a->dp[a->used - 1] == 0) { 223 --(a->used); 224 } 225 226 /* reset the sign flag if used == 0 */ 227 if (a->used == 0) { 228 a->sign = MP_ZPOS; 229 } 230 } 231 232 /* copy, b = a */ 233 static int 234 mp_copy(BIGNUM *a, BIGNUM *b) 235 { 236 int res, n; 237 238 /* if dst == src do nothing */ 239 if (a == b) { 240 return MP_OKAY; 241 } 242 if (a == NULL || b == NULL) { 243 return MP_VAL; 244 } 245 246 /* grow dest */ 247 if (b->alloc < a->used) { 248 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 249 return res; 250 } 251 } 252 253 /* zero b and copy the parameters over */ 254 { 255 mp_digit *tmpa, *tmpb; 256 257 /* pointer aliases */ 258 259 /* source */ 260 tmpa = a->dp; 261 262 /* destination */ 263 tmpb = b->dp; 264 265 /* copy all the digits */ 266 for (n = 0; n < a->used; n++) { 267 *tmpb++ = *tmpa++; 268 } 269 270 /* clear high digits */ 271 for (; n < b->used; n++) { 272 *tmpb++ = 0; 273 } 274 } 275 276 /* copy used count and sign */ 277 b->used = a->used; 278 b->sign = a->sign; 279 return MP_OKAY; 280 } 281 282 /* shift left by a certain bit count */ 283 static int 284 mp_mul_2d(mp_int *a, int b, mp_int *c) 285 { 286 mp_digit d; 287 int res; 288 289 /* copy */ 290 if (a != c) { 291 if ((res = mp_copy (a, c)) != MP_OKAY) { 292 return res; 293 } 294 } 295 296 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { 297 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { 298 return res; 299 } 300 } 301 302 /* shift by as many digits in the bit count */ 303 if (b >= (int)DIGIT_BIT) { 304 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) { 305 return res; 306 } 307 } 308 309 /* shift any bit count < DIGIT_BIT */ 310 d = (mp_digit) (b % DIGIT_BIT); 311 if (d != 0) { 312 mp_digit *tmpc, shift, mask, r, rr; 313 int x; 314 315 /* bitmask for carries */ 316 mask = (((mp_digit)1) << d) - 1; 317 318 /* shift for msbs */ 319 shift = DIGIT_BIT - d; 320 321 /* alias */ 322 tmpc = c->dp; 323 324 /* carry */ 325 r = 0; 326 for (x = 0; x < c->used; x++) { 327 /* get the higher bits of the current word */ 328 rr = (*tmpc >> shift) & mask; 329 330 /* shift the current word and OR in the carry */ 331 *tmpc = ((*tmpc << d) | r) & MP_MASK; 332 ++tmpc; 333 334 /* set the carry to the carry bits of the current word */ 335 r = rr; 336 } 337 338 /* set final carry */ 339 if (r != 0) { 340 c->dp[(c->used)++] = r; 341 } 342 } 343 mp_clamp (c); 344 return MP_OKAY; 345 } 346 347 /* reads a unsigned char array, assumes the msb is stored first [big endian] */ 348 static int 349 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c) 350 { 351 int res; 352 353 /* make sure there are at least two digits */ 354 if (a->alloc < 2) { 355 if ((res = mp_grow(a, 2)) != MP_OKAY) { 356 return res; 357 } 358 } 359 360 /* zero the int */ 361 mp_zero (a); 362 363 /* read the bytes in */ 364 while (c-- > 0) { 365 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) { 366 return res; 367 } 368 369 a->dp[0] |= *b++; 370 a->used += 1; 371 } 372 mp_clamp (a); 373 return MP_OKAY; 374 } 375 376 /* returns the number of bits in an int */ 377 static int 378 mp_count_bits(const mp_int *a) 379 { 380 int r; 381 mp_digit q; 382 383 /* shortcut */ 384 if (a->used == 0) { 385 return 0; 386 } 387 388 /* get number of digits and add that */ 389 r = (a->used - 1) * DIGIT_BIT; 390 391 /* take the last digit and count the bits in it */ 392 q = a->dp[a->used - 1]; 393 while (q > ((mp_digit) 0)) { 394 ++r; 395 q >>= ((mp_digit) 1); 396 } 397 return r; 398 } 399 400 /* compare maginitude of two ints (unsigned) */ 401 static int 402 mp_cmp_mag (mp_int * a, mp_int * b) 403 { 404 int n; 405 mp_digit *tmpa, *tmpb; 406 407 /* compare based on # of non-zero digits */ 408 if (a->used > b->used) { 409 return MP_GT; 410 } 411 412 if (a->used < b->used) { 413 return MP_LT; 414 } 415 416 /* alias for a */ 417 tmpa = a->dp + (a->used - 1); 418 419 /* alias for b */ 420 tmpb = b->dp + (a->used - 1); 421 422 /* compare based on digits */ 423 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { 424 if (*tmpa > *tmpb) { 425 return MP_GT; 426 } 427 428 if (*tmpa < *tmpb) { 429 return MP_LT; 430 } 431 } 432 return MP_EQ; 433 } 434 435 /* compare two ints (signed)*/ 436 static int 437 mp_cmp (mp_int * a, mp_int * b) 438 { 439 /* compare based on sign */ 440 if (a->sign != b->sign) { 441 if (a->sign == MP_NEG) { 442 return MP_LT; 443 } else { 444 return MP_GT; 445 } 446 } 447 448 /* compare digits */ 449 if (a->sign == MP_NEG) { 450 /* if negative compare opposite direction */ 451 return mp_cmp_mag(b, a); 452 } else { 453 return mp_cmp_mag(a, b); 454 } 455 } 456 457 /* get the size for an unsigned equivalent */ 458 static int 459 mp_unsigned_bin_size (mp_int * a) 460 { 461 int size = mp_count_bits (a); 462 return (size / 8 + ((size & 7) != 0 ? 1 : 0)); 463 } 464 465 /* init a new mp_int */ 466 static int 467 mp_init (mp_int * a) 468 { 469 int i; 470 471 /* allocate memory required and clear it */ 472 a->dp = netpgp_allocate(1, sizeof (*a->dp) * MP_PREC); 473 if (a->dp == NULL) { 474 return MP_MEM; 475 } 476 477 /* set the digits to zero */ 478 for (i = 0; i < MP_PREC; i++) { 479 a->dp[i] = 0; 480 } 481 482 /* set the used to zero, allocated digits to the default precision 483 * and sign to positive */ 484 a->used = 0; 485 a->alloc = MP_PREC; 486 a->sign = MP_ZPOS; 487 488 return MP_OKAY; 489 } 490 491 /* clear one (frees) */ 492 static void 493 mp_clear (mp_int * a) 494 { 495 int i; 496 497 /* only do anything if a hasn't been freed previously */ 498 if (a->dp != NULL) { 499 /* first zero the digits */ 500 for (i = 0; i < a->used; i++) { 501 a->dp[i] = 0; 502 } 503 504 /* free ram */ 505 netpgp_deallocate(a->dp, (size_t)a->alloc); 506 507 /* reset members to make debugging easier */ 508 a->dp = NULL; 509 a->alloc = a->used = 0; 510 a->sign = MP_ZPOS; 511 } 512 } 513 514 static int 515 mp_init_multi(mp_int *mp, ...) 516 { 517 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ 518 int n = 0; /* Number of ok inits */ 519 mp_int* cur_arg = mp; 520 va_list args; 521 522 va_start(args, mp); /* init args to next argument from caller */ 523 while (cur_arg != NULL) { 524 if (mp_init(cur_arg) != MP_OKAY) { 525 /* Oops - error! Back-track and mp_clear what we already 526 succeeded in init-ing, then return error. 527 */ 528 va_list clean_args; 529 530 /* end the current list */ 531 va_end(args); 532 533 /* now start cleaning up */ 534 cur_arg = mp; 535 va_start(clean_args, mp); 536 while (n--) { 537 mp_clear(cur_arg); 538 cur_arg = va_arg(clean_args, mp_int*); 539 } 540 va_end(clean_args); 541 res = MP_MEM; 542 break; 543 } 544 n++; 545 cur_arg = va_arg(args, mp_int*); 546 } 547 va_end(args); 548 return res; /* Assumed ok, if error flagged above. */ 549 } 550 551 /* init an mp_init for a given size */ 552 static int 553 mp_init_size (mp_int * a, int size) 554 { 555 int x; 556 557 /* pad size so there are always extra digits */ 558 size += (MP_PREC * 2) - (size % MP_PREC); 559 560 /* alloc mem */ 561 a->dp = netpgp_allocate (1, sizeof (*a->dp) * size); 562 if (a->dp == NULL) { 563 return MP_MEM; 564 } 565 566 /* set the members */ 567 a->used = 0; 568 a->alloc = size; 569 a->sign = MP_ZPOS; 570 571 /* zero the digits */ 572 for (x = 0; x < size; x++) { 573 a->dp[x] = 0; 574 } 575 576 return MP_OKAY; 577 } 578 579 /* creates "a" then copies b into it */ 580 static int mp_init_copy (mp_int * a, mp_int * b) 581 { 582 int res; 583 584 if ((res = mp_init (a)) != MP_OKAY) { 585 return res; 586 } 587 return mp_copy (b, a); 588 } 589 590 /* low level addition, based on HAC pp.594, Algorithm 14.7 */ 591 static int 592 s_mp_add (mp_int * a, mp_int * b, mp_int * c) 593 { 594 mp_int *x; 595 int olduse, res, min, max; 596 597 /* find sizes, we let |a| <= |b| which means we have to sort 598 * them. "x" will point to the input with the most digits 599 */ 600 if (a->used > b->used) { 601 min = b->used; 602 max = a->used; 603 x = a; 604 } else { 605 min = a->used; 606 max = b->used; 607 x = b; 608 } 609 610 /* init result */ 611 if (c->alloc < max + 1) { 612 if ((res = mp_grow (c, max + 1)) != MP_OKAY) { 613 return res; 614 } 615 } 616 617 /* get old used digit count and set new one */ 618 olduse = c->used; 619 c->used = max + 1; 620 621 { 622 mp_digit u, *tmpa, *tmpb, *tmpc; 623 int i; 624 625 /* alias for digit pointers */ 626 627 /* first input */ 628 tmpa = a->dp; 629 630 /* second input */ 631 tmpb = b->dp; 632 633 /* destination */ 634 tmpc = c->dp; 635 636 /* zero the carry */ 637 u = 0; 638 for (i = 0; i < min; i++) { 639 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ 640 *tmpc = *tmpa++ + *tmpb++ + u; 641 642 /* U = carry bit of T[i] */ 643 u = *tmpc >> ((mp_digit)DIGIT_BIT); 644 645 /* take away carry bit from T[i] */ 646 *tmpc++ &= MP_MASK; 647 } 648 649 /* now copy higher words if any, that is in A+B 650 * if A or B has more digits add those in 651 */ 652 if (min != max) { 653 for (; i < max; i++) { 654 /* T[i] = X[i] + U */ 655 *tmpc = x->dp[i] + u; 656 657 /* U = carry bit of T[i] */ 658 u = *tmpc >> ((mp_digit)DIGIT_BIT); 659 660 /* take away carry bit from T[i] */ 661 *tmpc++ &= MP_MASK; 662 } 663 } 664 665 /* add carry */ 666 *tmpc++ = u; 667 668 /* clear digits above oldused */ 669 for (i = c->used; i < olduse; i++) { 670 *tmpc++ = 0; 671 } 672 } 673 674 mp_clamp (c); 675 return MP_OKAY; 676 } 677 678 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ 679 static int 680 s_mp_sub (mp_int * a, mp_int * b, mp_int * c) 681 { 682 int olduse, res, min, max; 683 684 /* find sizes */ 685 min = b->used; 686 max = a->used; 687 688 /* init result */ 689 if (c->alloc < max) { 690 if ((res = mp_grow (c, max)) != MP_OKAY) { 691 return res; 692 } 693 } 694 olduse = c->used; 695 c->used = max; 696 697 { 698 mp_digit u, *tmpa, *tmpb, *tmpc; 699 int i; 700 701 /* alias for digit pointers */ 702 tmpa = a->dp; 703 tmpb = b->dp; 704 tmpc = c->dp; 705 706 /* set carry to zero */ 707 u = 0; 708 for (i = 0; i < min; i++) { 709 /* T[i] = A[i] - B[i] - U */ 710 *tmpc = *tmpa++ - *tmpb++ - u; 711 712 /* U = carry bit of T[i] 713 * Note this saves performing an AND operation since 714 * if a carry does occur it will propagate all the way to the 715 * MSB. As a result a single shift is enough to get the carry 716 */ 717 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 718 719 /* Clear carry from T[i] */ 720 *tmpc++ &= MP_MASK; 721 } 722 723 /* now copy higher words if any, e.g. if A has more digits than B */ 724 for (; i < max; i++) { 725 /* T[i] = A[i] - U */ 726 *tmpc = *tmpa++ - u; 727 728 /* U = carry bit of T[i] */ 729 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 730 731 /* Clear carry from T[i] */ 732 *tmpc++ &= MP_MASK; 733 } 734 735 /* clear digits above used (since we may not have grown result above) */ 736 for (i = c->used; i < olduse; i++) { 737 *tmpc++ = 0; 738 } 739 } 740 741 mp_clamp (c); 742 return MP_OKAY; 743 } 744 745 /* high level subtraction (handles signs) */ 746 static int 747 mp_sub (mp_int * a, mp_int * b, mp_int * c) 748 { 749 int sa, sb, res; 750 751 sa = a->sign; 752 sb = b->sign; 753 754 if (sa != sb) { 755 /* subtract a negative from a positive, OR */ 756 /* subtract a positive from a negative. */ 757 /* In either case, ADD their magnitudes, */ 758 /* and use the sign of the first number. */ 759 c->sign = sa; 760 res = s_mp_add (a, b, c); 761 } else { 762 /* subtract a positive from a positive, OR */ 763 /* subtract a negative from a negative. */ 764 /* First, take the difference between their */ 765 /* magnitudes, then... */ 766 if (mp_cmp_mag (a, b) != MP_LT) { 767 /* Copy the sign from the first */ 768 c->sign = sa; 769 /* The first has a larger or equal magnitude */ 770 res = s_mp_sub (a, b, c); 771 } else { 772 /* The result has the *opposite* sign from */ 773 /* the first number. */ 774 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; 775 /* The second has a larger magnitude */ 776 res = s_mp_sub (b, a, c); 777 } 778 } 779 return res; 780 } 781 782 /* shift right a certain amount of digits */ 783 static int mp_rshd (mp_int * a, int b) 784 { 785 int x; 786 787 /* if b <= 0 then ignore it */ 788 if (b <= 0) { 789 return 0; 790 } 791 792 /* if b > used then simply zero it and return */ 793 if (a->used <= b) { 794 mp_zero (a); 795 return 0; 796 } 797 798 { 799 mp_digit *bottom, *top; 800 801 /* shift the digits down */ 802 803 /* bottom */ 804 bottom = a->dp; 805 806 /* top [offset into digits] */ 807 top = a->dp + b; 808 809 /* this is implemented as a sliding window where 810 * the window is b-digits long and digits from 811 * the top of the window are copied to the bottom 812 * 813 * e.g. 814 815 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> 816 /\ | ----> 817 \-------------------/ ----> 818 */ 819 for (x = 0; x < (a->used - b); x++) { 820 *bottom++ = *top++; 821 } 822 823 /* zero the top digits */ 824 for (; x < a->used; x++) { 825 *bottom++ = 0; 826 } 827 } 828 829 /* remove excess digits */ 830 a->used -= b; 831 return 1; 832 } 833 834 /* multiply by a digit */ 835 static int 836 mp_mul_d (mp_int * a, mp_digit b, mp_int * c) 837 { 838 mp_digit u, *tmpa, *tmpc; 839 mp_word r; 840 int ix, res, olduse; 841 842 /* make sure c is big enough to hold a*b */ 843 if (c->alloc < a->used + 1) { 844 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) { 845 return res; 846 } 847 } 848 849 /* get the original destinations used count */ 850 olduse = c->used; 851 852 /* set the sign */ 853 c->sign = a->sign; 854 855 /* alias for a->dp [source] */ 856 tmpa = a->dp; 857 858 /* alias for c->dp [dest] */ 859 tmpc = c->dp; 860 861 /* zero carry */ 862 u = 0; 863 864 /* compute columns */ 865 for (ix = 0; ix < a->used; ix++) { 866 /* compute product and carry sum for this term */ 867 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); 868 869 /* mask off higher bits to get a single digit */ 870 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); 871 872 /* send carry into next iteration */ 873 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 874 } 875 876 /* store final carry [if any] and increment ix offset */ 877 *tmpc++ = u; 878 ++ix; 879 880 /* now zero digits above the top */ 881 while (ix++ < olduse) { 882 *tmpc++ = 0; 883 } 884 885 /* set used count */ 886 c->used = a->used + 1; 887 mp_clamp(c); 888 889 return MP_OKAY; 890 } 891 892 /* high level addition (handles signs) */ 893 static int mp_add (mp_int * a, mp_int * b, mp_int * c) 894 { 895 int sa, sb, res; 896 897 /* get sign of both inputs */ 898 sa = a->sign; 899 sb = b->sign; 900 901 /* handle two cases, not four */ 902 if (sa == sb) { 903 /* both positive or both negative */ 904 /* add their magnitudes, copy the sign */ 905 c->sign = sa; 906 res = s_mp_add (a, b, c); 907 } else { 908 /* one positive, the other negative */ 909 /* subtract the one with the greater magnitude from */ 910 /* the one of the lesser magnitude. The result gets */ 911 /* the sign of the one with the greater magnitude. */ 912 if (mp_cmp_mag (a, b) == MP_LT) { 913 c->sign = sb; 914 res = s_mp_sub (b, a, c); 915 } else { 916 c->sign = sa; 917 res = s_mp_sub (a, b, c); 918 } 919 } 920 return res; 921 } 922 923 /* swap the elements of two integers, for cases where you can't simply swap the 924 * mp_int pointers around 925 */ 926 static void 927 mp_exch(mp_int *a, mp_int *b) 928 { 929 mp_int t; 930 931 t = *a; 932 *a = *b; 933 *b = t; 934 } 935 936 /* calc a value mod 2**b */ 937 static int 938 mp_mod_2d (mp_int * a, int b, mp_int * c) 939 { 940 int x, res; 941 942 /* if b is <= 0 then zero the int */ 943 if (b <= 0) { 944 mp_zero (c); 945 return MP_OKAY; 946 } 947 948 /* if the modulus is larger than the value than return */ 949 if (b >= (int) (a->used * DIGIT_BIT)) { 950 res = mp_copy (a, c); 951 return res; 952 } 953 954 /* copy */ 955 if ((res = mp_copy (a, c)) != MP_OKAY) { 956 return res; 957 } 958 959 /* zero digits above the last digit of the modulus */ 960 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { 961 c->dp[x] = 0; 962 } 963 /* clear the digit that is not completely outside/inside the modulus */ 964 c->dp[b / DIGIT_BIT] &= 965 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); 966 mp_clamp (c); 967 return MP_OKAY; 968 } 969 970 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ 971 static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) 972 { 973 mp_digit D, r, rr; 974 int x, res; 975 mp_int t; 976 977 978 /* if the shift count is <= 0 then we do no work */ 979 if (b <= 0) { 980 res = mp_copy (a, c); 981 if (d != NULL) { 982 mp_zero (d); 983 } 984 return res; 985 } 986 987 if ((res = mp_init (&t)) != MP_OKAY) { 988 return res; 989 } 990 991 /* get the remainder */ 992 if (d != NULL) { 993 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) { 994 mp_clear (&t); 995 return res; 996 } 997 } 998 999 /* copy */ 1000 if ((res = mp_copy (a, c)) != MP_OKAY) { 1001 mp_clear (&t); 1002 return res; 1003 } 1004 1005 /* shift by as many digits in the bit count */ 1006 if (b >= (int)DIGIT_BIT) { 1007 mp_rshd (c, b / DIGIT_BIT); 1008 } 1009 1010 /* shift any bit count < DIGIT_BIT */ 1011 D = (mp_digit) (b % DIGIT_BIT); 1012 if (D != 0) { 1013 mp_digit *tmpc, mask, shift; 1014 1015 /* mask */ 1016 mask = (((mp_digit)1) << D) - 1; 1017 1018 /* shift for lsb */ 1019 shift = DIGIT_BIT - D; 1020 1021 /* alias */ 1022 tmpc = c->dp + (c->used - 1); 1023 1024 /* carry */ 1025 r = 0; 1026 for (x = c->used - 1; x >= 0; x--) { 1027 /* get the lower bits of this word in a temp */ 1028 rr = *tmpc & mask; 1029 1030 /* shift the current word and mix in the carry bits from the previous word */ 1031 *tmpc = (*tmpc >> D) | (r << shift); 1032 --tmpc; 1033 1034 /* set the carry to the carry bits of the current word found above */ 1035 r = rr; 1036 } 1037 } 1038 mp_clamp (c); 1039 if (d != NULL) { 1040 mp_exch (&t, d); 1041 } 1042 mp_clear (&t); 1043 return MP_OKAY; 1044 } 1045 1046 /* integer signed division. 1047 * c*b + d == a [e.g. a/b, c=quotient, d=remainder] 1048 * HAC pp.598 Algorithm 14.20 1049 * 1050 * Note that the description in HAC is horribly 1051 * incomplete. For example, it doesn't consider 1052 * the case where digits are removed from 'x' in 1053 * the inner loop. It also doesn't consider the 1054 * case that y has fewer than three digits, etc.. 1055 * 1056 * The overall algorithm is as described as 1057 * 14.20 from HAC but fixed to treat these cases. 1058 */ 1059 static int 1060 mp_div(mp_int *c, mp_int *d, mp_int *a, mp_int *b) 1061 { 1062 mp_int q, x, y, t1, t2; 1063 int res, n, t, i, norm, neg; 1064 1065 /* is divisor zero ? */ 1066 if (BN_is_zero (b) == 1) { 1067 return MP_VAL; 1068 } 1069 1070 /* if a < b then q=0, r = a */ 1071 if (mp_cmp_mag (a, b) == MP_LT) { 1072 if (d != NULL) { 1073 res = mp_copy (a, d); 1074 } else { 1075 res = MP_OKAY; 1076 } 1077 if (c != NULL) { 1078 mp_zero (c); 1079 } 1080 return res; 1081 } 1082 1083 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) { 1084 return res; 1085 } 1086 q.used = a->used + 2; 1087 1088 if ((res = mp_init (&t1)) != MP_OKAY) { 1089 goto LBL_Q; 1090 } 1091 1092 if ((res = mp_init (&t2)) != MP_OKAY) { 1093 goto LBL_T1; 1094 } 1095 1096 if ((res = mp_init_copy (&x, a)) != MP_OKAY) { 1097 goto LBL_T2; 1098 } 1099 1100 if ((res = mp_init_copy (&y, b)) != MP_OKAY) { 1101 goto LBL_X; 1102 } 1103 1104 /* fix the sign */ 1105 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 1106 x.sign = y.sign = MP_ZPOS; 1107 1108 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ 1109 norm = mp_count_bits(&y) % DIGIT_BIT; 1110 if (norm < (int)(DIGIT_BIT-1)) { 1111 norm = (DIGIT_BIT-1) - norm; 1112 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) { 1113 goto LBL_Y; 1114 } 1115 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) { 1116 goto LBL_Y; 1117 } 1118 } else { 1119 norm = 0; 1120 } 1121 1122 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ 1123 n = x.used - 1; 1124 t = y.used - 1; 1125 1126 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ 1127 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ 1128 goto LBL_Y; 1129 } 1130 1131 while (mp_cmp (&x, &y) != MP_LT) { 1132 ++(q.dp[n - t]); 1133 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) { 1134 goto LBL_Y; 1135 } 1136 } 1137 1138 /* reset y by shifting it back down */ 1139 mp_rshd (&y, n - t); 1140 1141 /* step 3. for i from n down to (t + 1) */ 1142 for (i = n; i >= (t + 1); i--) { 1143 if (i > x.used) { 1144 continue; 1145 } 1146 1147 /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 1148 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ 1149 if (x.dp[i] == y.dp[t]) { 1150 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); 1151 } else { 1152 mp_word tmp; 1153 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); 1154 tmp |= ((mp_word) x.dp[i - 1]); 1155 tmp /= ((mp_word) y.dp[t]); 1156 if (tmp > (mp_word) MP_MASK) 1157 tmp = MP_MASK; 1158 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); 1159 } 1160 1161 /* while (q{i-t-1} * (yt * b + y{t-1})) > 1162 xi * b**2 + xi-1 * b + xi-2 1163 1164 do q{i-t-1} -= 1; 1165 */ 1166 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; 1167 do { 1168 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; 1169 1170 /* find left hand */ 1171 mp_zero (&t1); 1172 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; 1173 t1.dp[1] = y.dp[t]; 1174 t1.used = 2; 1175 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1176 goto LBL_Y; 1177 } 1178 1179 /* find right hand */ 1180 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; 1181 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; 1182 t2.dp[2] = x.dp[i]; 1183 t2.used = 3; 1184 } while (mp_cmp_mag(&t1, &t2) == MP_GT); 1185 1186 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ 1187 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1188 goto LBL_Y; 1189 } 1190 1191 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 1192 goto LBL_Y; 1193 } 1194 1195 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) { 1196 goto LBL_Y; 1197 } 1198 1199 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ 1200 if (x.sign == MP_NEG) { 1201 if ((res = mp_copy (&y, &t1)) != MP_OKAY) { 1202 goto LBL_Y; 1203 } 1204 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 1205 goto LBL_Y; 1206 } 1207 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) { 1208 goto LBL_Y; 1209 } 1210 1211 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; 1212 } 1213 } 1214 1215 /* now q is the quotient and x is the remainder 1216 * [which we have to normalize] 1217 */ 1218 1219 /* get sign before writing to c */ 1220 x.sign = x.used == 0 ? MP_ZPOS : a->sign; 1221 1222 if (c != NULL) { 1223 mp_clamp (&q); 1224 mp_exch (&q, c); 1225 c->sign = neg; 1226 } 1227 1228 if (d != NULL) { 1229 mp_div_2d (&x, norm, &x, NULL); 1230 mp_exch (&x, d); 1231 } 1232 1233 res = MP_OKAY; 1234 1235 LBL_Y:mp_clear (&y); 1236 LBL_X:mp_clear (&x); 1237 LBL_T2:mp_clear (&t2); 1238 LBL_T1:mp_clear (&t1); 1239 LBL_Q:mp_clear (&q); 1240 return res; 1241 } 1242 1243 /* c = a mod b, 0 <= c < b */ 1244 static int 1245 mp_mod (mp_int * a, mp_int * b, mp_int * c) 1246 { 1247 mp_int t; 1248 int res; 1249 1250 if ((res = mp_init (&t)) != MP_OKAY) { 1251 return res; 1252 } 1253 1254 if ((res = mp_div (NULL, &t, a, b)) != MP_OKAY) { 1255 mp_clear (&t); 1256 return res; 1257 } 1258 1259 if (t.sign != b->sign) { 1260 res = mp_add (b, &t, c); 1261 } else { 1262 res = MP_OKAY; 1263 mp_exch (&t, c); 1264 } 1265 1266 mp_clear (&t); 1267 return res; 1268 } 1269 1270 /* set to a digit */ 1271 static void mp_set (mp_int * a, mp_digit b) 1272 { 1273 mp_zero (a); 1274 a->dp[0] = b & MP_MASK; 1275 a->used = (a->dp[0] != 0) ? 1 : 0; 1276 } 1277 1278 /* b = a/2 */ 1279 static int mp_div_2(mp_int * a, mp_int * b) 1280 { 1281 int x, res, oldused; 1282 1283 /* copy */ 1284 if (b->alloc < a->used) { 1285 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 1286 return res; 1287 } 1288 } 1289 1290 oldused = b->used; 1291 b->used = a->used; 1292 { 1293 mp_digit r, rr, *tmpa, *tmpb; 1294 1295 /* source alias */ 1296 tmpa = a->dp + b->used - 1; 1297 1298 /* dest alias */ 1299 tmpb = b->dp + b->used - 1; 1300 1301 /* carry */ 1302 r = 0; 1303 for (x = b->used - 1; x >= 0; x--) { 1304 /* get the carry for the next iteration */ 1305 rr = *tmpa & 1; 1306 1307 /* shift the current digit, add in carry and store */ 1308 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); 1309 1310 /* forward carry to next iteration */ 1311 r = rr; 1312 } 1313 1314 /* zero excess digits */ 1315 tmpb = b->dp + b->used; 1316 for (x = b->used; x < oldused; x++) { 1317 *tmpb++ = 0; 1318 } 1319 } 1320 b->sign = a->sign; 1321 mp_clamp (b); 1322 return MP_OKAY; 1323 } 1324 1325 /* compare a digit */ 1326 static int mp_cmp_d(mp_int * a, mp_digit b) 1327 { 1328 /* compare based on sign */ 1329 if (a->sign == MP_NEG) { 1330 return MP_LT; 1331 } 1332 1333 /* compare based on magnitude */ 1334 if (a->used > 1) { 1335 return MP_GT; 1336 } 1337 1338 /* compare the only digit of a to b */ 1339 if (a->dp[0] > b) { 1340 return MP_GT; 1341 } else if (a->dp[0] < b) { 1342 return MP_LT; 1343 } else { 1344 return MP_EQ; 1345 } 1346 } 1347 1348 static void mp_clear_multi(mp_int *mp, ...) 1349 { 1350 mp_int* next_mp = mp; 1351 va_list args; 1352 va_start(args, mp); 1353 while (next_mp != NULL) { 1354 mp_clear(next_mp); 1355 next_mp = va_arg(args, mp_int*); 1356 } 1357 va_end(args); 1358 } 1359 1360 /* computes the modular inverse via binary extended euclidean algorithm, 1361 * that is c = 1/a mod b 1362 * 1363 * Based on slow invmod except this is optimized for the case where b is 1364 * odd as per HAC Note 14.64 on pp. 610 1365 */ 1366 static int 1367 fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c) 1368 { 1369 mp_int x, y, u, v, B, D; 1370 int res, neg; 1371 1372 /* 2. [modified] b must be odd */ 1373 if (BN_is_even (b) == 1) { 1374 return MP_VAL; 1375 } 1376 1377 /* init all our temps */ 1378 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) { 1379 return res; 1380 } 1381 1382 /* x == modulus, y == value to invert */ 1383 if ((res = mp_copy (b, &x)) != MP_OKAY) { 1384 goto LBL_ERR; 1385 } 1386 1387 /* we need y = |a| */ 1388 if ((res = mp_mod (a, b, &y)) != MP_OKAY) { 1389 goto LBL_ERR; 1390 } 1391 1392 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1393 if ((res = mp_copy (&x, &u)) != MP_OKAY) { 1394 goto LBL_ERR; 1395 } 1396 if ((res = mp_copy (&y, &v)) != MP_OKAY) { 1397 goto LBL_ERR; 1398 } 1399 mp_set (&D, 1); 1400 1401 top: 1402 /* 4. while u is even do */ 1403 while (BN_is_even (&u) == 1) { 1404 /* 4.1 u = u/2 */ 1405 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { 1406 goto LBL_ERR; 1407 } 1408 /* 4.2 if B is odd then */ 1409 if (BN_is_odd (&B) == 1) { 1410 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { 1411 goto LBL_ERR; 1412 } 1413 } 1414 /* B = B/2 */ 1415 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { 1416 goto LBL_ERR; 1417 } 1418 } 1419 1420 /* 5. while v is even do */ 1421 while (BN_is_even (&v) == 1) { 1422 /* 5.1 v = v/2 */ 1423 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { 1424 goto LBL_ERR; 1425 } 1426 /* 5.2 if D is odd then */ 1427 if (BN_is_odd (&D) == 1) { 1428 /* D = (D-x)/2 */ 1429 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { 1430 goto LBL_ERR; 1431 } 1432 } 1433 /* D = D/2 */ 1434 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { 1435 goto LBL_ERR; 1436 } 1437 } 1438 1439 /* 6. if u >= v then */ 1440 if (mp_cmp (&u, &v) != MP_LT) { 1441 /* u = u - v, B = B - D */ 1442 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { 1443 goto LBL_ERR; 1444 } 1445 1446 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { 1447 goto LBL_ERR; 1448 } 1449 } else { 1450 /* v - v - u, D = D - B */ 1451 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { 1452 goto LBL_ERR; 1453 } 1454 1455 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { 1456 goto LBL_ERR; 1457 } 1458 } 1459 1460 /* if not zero goto step 4 */ 1461 if (BN_is_zero (&u) == 0) { 1462 goto top; 1463 } 1464 1465 /* now a = C, b = D, gcd == g*v */ 1466 1467 /* if v != 1 then there is no inverse */ 1468 if (mp_cmp_d (&v, 1) != MP_EQ) { 1469 res = MP_VAL; 1470 goto LBL_ERR; 1471 } 1472 1473 /* b is now the inverse */ 1474 neg = a->sign; 1475 while (D.sign == MP_NEG) { 1476 if ((res = mp_add (&D, b, &D)) != MP_OKAY) { 1477 goto LBL_ERR; 1478 } 1479 } 1480 mp_exch (&D, c); 1481 c->sign = neg; 1482 res = MP_OKAY; 1483 1484 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); 1485 return res; 1486 } 1487 1488 /* hac 14.61, pp608 */ 1489 static int 1490 mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c) 1491 { 1492 mp_int x, y, u, v, A, B, C, D; 1493 int res; 1494 1495 /* b cannot be negative */ 1496 if (b->sign == MP_NEG || BN_is_zero(b) == 1) { 1497 return MP_VAL; 1498 } 1499 1500 /* init temps */ 1501 if ((res = mp_init_multi(&x, &y, &u, &v, 1502 &A, &B, &C, &D, NULL)) != MP_OKAY) { 1503 return res; 1504 } 1505 1506 /* x = a, y = b */ 1507 if ((res = mp_mod(a, b, &x)) != MP_OKAY) { 1508 goto LBL_ERR; 1509 } 1510 if ((res = mp_copy (b, &y)) != MP_OKAY) { 1511 goto LBL_ERR; 1512 } 1513 1514 /* 2. [modified] if x,y are both even then return an error! */ 1515 if (BN_is_even (&x) == 1 && BN_is_even (&y) == 1) { 1516 res = MP_VAL; 1517 goto LBL_ERR; 1518 } 1519 1520 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1521 if ((res = mp_copy (&x, &u)) != MP_OKAY) { 1522 goto LBL_ERR; 1523 } 1524 if ((res = mp_copy (&y, &v)) != MP_OKAY) { 1525 goto LBL_ERR; 1526 } 1527 mp_set (&A, 1); 1528 mp_set (&D, 1); 1529 1530 top: 1531 /* 4. while u is even do */ 1532 while (BN_is_even (&u) == 1) { 1533 /* 4.1 u = u/2 */ 1534 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { 1535 goto LBL_ERR; 1536 } 1537 /* 4.2 if A or B is odd then */ 1538 if (BN_is_odd (&A) == 1 || BN_is_odd (&B) == 1) { 1539 /* A = (A+y)/2, B = (B-x)/2 */ 1540 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) { 1541 goto LBL_ERR; 1542 } 1543 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { 1544 goto LBL_ERR; 1545 } 1546 } 1547 /* A = A/2, B = B/2 */ 1548 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) { 1549 goto LBL_ERR; 1550 } 1551 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { 1552 goto LBL_ERR; 1553 } 1554 } 1555 1556 /* 5. while v is even do */ 1557 while (BN_is_even (&v) == 1) { 1558 /* 5.1 v = v/2 */ 1559 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { 1560 goto LBL_ERR; 1561 } 1562 /* 5.2 if C or D is odd then */ 1563 if (BN_is_odd (&C) == 1 || BN_is_odd (&D) == 1) { 1564 /* C = (C+y)/2, D = (D-x)/2 */ 1565 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) { 1566 goto LBL_ERR; 1567 } 1568 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { 1569 goto LBL_ERR; 1570 } 1571 } 1572 /* C = C/2, D = D/2 */ 1573 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) { 1574 goto LBL_ERR; 1575 } 1576 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { 1577 goto LBL_ERR; 1578 } 1579 } 1580 1581 /* 6. if u >= v then */ 1582 if (mp_cmp (&u, &v) != MP_LT) { 1583 /* u = u - v, A = A - C, B = B - D */ 1584 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { 1585 goto LBL_ERR; 1586 } 1587 1588 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) { 1589 goto LBL_ERR; 1590 } 1591 1592 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { 1593 goto LBL_ERR; 1594 } 1595 } else { 1596 /* v - v - u, C = C - A, D = D - B */ 1597 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { 1598 goto LBL_ERR; 1599 } 1600 1601 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) { 1602 goto LBL_ERR; 1603 } 1604 1605 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { 1606 goto LBL_ERR; 1607 } 1608 } 1609 1610 /* if not zero goto step 4 */ 1611 if (BN_is_zero (&u) == 0) 1612 goto top; 1613 1614 /* now a = C, b = D, gcd == g*v */ 1615 1616 /* if v != 1 then there is no inverse */ 1617 if (mp_cmp_d (&v, 1) != MP_EQ) { 1618 res = MP_VAL; 1619 goto LBL_ERR; 1620 } 1621 1622 /* if its too low */ 1623 while (mp_cmp_d(&C, 0) == MP_LT) { 1624 if ((res = mp_add(&C, b, &C)) != MP_OKAY) { 1625 goto LBL_ERR; 1626 } 1627 } 1628 1629 /* too big */ 1630 while (mp_cmp_mag(&C, b) != MP_LT) { 1631 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) { 1632 goto LBL_ERR; 1633 } 1634 } 1635 1636 /* C is now the inverse */ 1637 mp_exch (&C, c); 1638 res = MP_OKAY; 1639 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); 1640 return res; 1641 } 1642 1643 static int 1644 mp_invmod(mp_int *c, mp_int *a, mp_int *b) 1645 { 1646 /* b cannot be negative */ 1647 if (b->sign == MP_NEG || BN_is_zero(b) == 1) { 1648 return MP_VAL; 1649 } 1650 1651 /* if the modulus is odd we can use a faster routine instead */ 1652 if (BN_is_odd (b) == 1) { 1653 return fast_mp_invmod(a, b, c); 1654 } 1655 1656 return mp_invmod_slow(a, b, c); 1657 1658 /*NOTREACHED*/ 1659 return MP_VAL; 1660 } 1661 1662 /* b = |a| 1663 * 1664 * Simple function copies the input and fixes the sign to positive 1665 */ 1666 static int 1667 mp_abs (mp_int * a, mp_int * b) 1668 { 1669 int res; 1670 1671 /* copy a to b */ 1672 if (a != b) { 1673 if ((res = mp_copy (a, b)) != MP_OKAY) { 1674 return res; 1675 } 1676 } 1677 1678 /* force the sign of b to positive */ 1679 b->sign = MP_ZPOS; 1680 1681 return MP_OKAY; 1682 } 1683 1684 /* determines if reduce_2k_l can be used */ 1685 static int mp_reduce_is_2k_l(mp_int *a) 1686 { 1687 int ix, iy; 1688 1689 if (a->used == 0) { 1690 return MP_NO; 1691 } else if (a->used == 1) { 1692 return MP_YES; 1693 } else if (a->used > 1) { 1694 /* if more than half of the digits are -1 we're sold */ 1695 for (iy = ix = 0; ix < a->used; ix++) { 1696 if (a->dp[ix] == MP_MASK) { 1697 ++iy; 1698 } 1699 } 1700 return (iy >= (a->used/2)) ? MP_YES : MP_NO; 1701 1702 } 1703 return MP_NO; 1704 } 1705 1706 /* computes a = 2**b 1707 * 1708 * Simple algorithm which zeroes the int, grows it then just sets one bit 1709 * as required. 1710 */ 1711 static int 1712 mp_2expt (mp_int * a, int b) 1713 { 1714 int res; 1715 1716 /* zero a as per default */ 1717 mp_zero (a); 1718 1719 /* grow a to accomodate the single bit */ 1720 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) { 1721 return res; 1722 } 1723 1724 /* set the used count of where the bit will go */ 1725 a->used = b / DIGIT_BIT + 1; 1726 1727 /* put the single bit in its place */ 1728 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); 1729 1730 return MP_OKAY; 1731 } 1732 1733 /* pre-calculate the value required for Barrett reduction 1734 * For a given modulus "b" it calulates the value required in "a" 1735 */ 1736 static int mp_reduce_setup (mp_int * a, mp_int * b) 1737 { 1738 int res; 1739 1740 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { 1741 return res; 1742 } 1743 return mp_div (a, NULL, a, b); 1744 } 1745 1746 /* b = a*2 */ 1747 static int mp_mul_2(mp_int * a, mp_int * b) 1748 { 1749 int x, res, oldused; 1750 1751 /* grow to accomodate result */ 1752 if (b->alloc < a->used + 1) { 1753 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) { 1754 return res; 1755 } 1756 } 1757 1758 oldused = b->used; 1759 b->used = a->used; 1760 1761 { 1762 mp_digit r, rr, *tmpa, *tmpb; 1763 1764 /* alias for source */ 1765 tmpa = a->dp; 1766 1767 /* alias for dest */ 1768 tmpb = b->dp; 1769 1770 /* carry */ 1771 r = 0; 1772 for (x = 0; x < a->used; x++) { 1773 1774 /* get what will be the *next* carry bit from the 1775 * MSB of the current digit 1776 */ 1777 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); 1778 1779 /* now shift up this digit, add in the carry [from the previous] */ 1780 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; 1781 1782 /* copy the carry that would be from the source 1783 * digit into the next iteration 1784 */ 1785 r = rr; 1786 } 1787 1788 /* new leading digit? */ 1789 if (r != 0) { 1790 /* add a MSB which is always 1 at this point */ 1791 *tmpb = 1; 1792 ++(b->used); 1793 } 1794 1795 /* now zero any excess digits on the destination 1796 * that we didn't write to 1797 */ 1798 tmpb = b->dp + b->used; 1799 for (x = b->used; x < oldused; x++) { 1800 *tmpb++ = 0; 1801 } 1802 } 1803 b->sign = a->sign; 1804 return MP_OKAY; 1805 } 1806 1807 /* divide by three (based on routine from MPI and the GMP manual) */ 1808 static int 1809 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d) 1810 { 1811 mp_int q; 1812 mp_word w, t; 1813 mp_digit b; 1814 int res, ix; 1815 1816 /* b = 2**DIGIT_BIT / 3 */ 1817 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3); 1818 1819 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 1820 return res; 1821 } 1822 1823 q.used = a->used; 1824 q.sign = a->sign; 1825 w = 0; 1826 for (ix = a->used - 1; ix >= 0; ix--) { 1827 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 1828 1829 if (w >= 3) { 1830 /* multiply w by [1/3] */ 1831 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT); 1832 1833 /* now subtract 3 * [w/3] from w, to get the remainder */ 1834 w -= t+t+t; 1835 1836 /* fixup the remainder as required since 1837 * the optimization is not exact. 1838 */ 1839 while (w >= 3) { 1840 t += 1; 1841 w -= 3; 1842 } 1843 } else { 1844 t = 0; 1845 } 1846 q.dp[ix] = (mp_digit)t; 1847 } 1848 1849 /* [optional] store the remainder */ 1850 if (d != NULL) { 1851 *d = (mp_digit)w; 1852 } 1853 1854 /* [optional] store the quotient */ 1855 if (c != NULL) { 1856 mp_clamp(&q); 1857 mp_exch(&q, c); 1858 } 1859 mp_clear(&q); 1860 1861 return res; 1862 } 1863 1864 /* multiplication using the Toom-Cook 3-way algorithm 1865 * 1866 * Much more complicated than Karatsuba but has a lower 1867 * asymptotic running time of O(N**1.464). This algorithm is 1868 * only particularly useful on VERY large inputs 1869 * (we're talking 1000s of digits here...). 1870 */ 1871 static int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) 1872 { 1873 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; 1874 int res, B; 1875 1876 /* init temps */ 1877 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 1878 &a0, &a1, &a2, &b0, &b1, 1879 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { 1880 return res; 1881 } 1882 1883 /* B */ 1884 B = MIN(a->used, b->used) / 3; 1885 1886 /* a = a2 * B**2 + a1 * B + a0 */ 1887 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 1888 goto ERR; 1889 } 1890 1891 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 1892 goto ERR; 1893 } 1894 mp_rshd(&a1, B); 1895 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); 1896 1897 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 1898 goto ERR; 1899 } 1900 mp_rshd(&a2, B*2); 1901 1902 /* b = b2 * B**2 + b1 * B + b0 */ 1903 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { 1904 goto ERR; 1905 } 1906 1907 if ((res = mp_copy(b, &b1)) != MP_OKAY) { 1908 goto ERR; 1909 } 1910 mp_rshd(&b1, B); 1911 mp_mod_2d(&b1, DIGIT_BIT * B, &b1); 1912 1913 if ((res = mp_copy(b, &b2)) != MP_OKAY) { 1914 goto ERR; 1915 } 1916 mp_rshd(&b2, B*2); 1917 1918 /* w0 = a0*b0 */ 1919 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { 1920 goto ERR; 1921 } 1922 1923 /* w4 = a2 * b2 */ 1924 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { 1925 goto ERR; 1926 } 1927 1928 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ 1929 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { 1930 goto ERR; 1931 } 1932 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1933 goto ERR; 1934 } 1935 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 1936 goto ERR; 1937 } 1938 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 1939 goto ERR; 1940 } 1941 1942 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { 1943 goto ERR; 1944 } 1945 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1946 goto ERR; 1947 } 1948 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { 1949 goto ERR; 1950 } 1951 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { 1952 goto ERR; 1953 } 1954 1955 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { 1956 goto ERR; 1957 } 1958 1959 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ 1960 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { 1961 goto ERR; 1962 } 1963 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1964 goto ERR; 1965 } 1966 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 1967 goto ERR; 1968 } 1969 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1970 goto ERR; 1971 } 1972 1973 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { 1974 goto ERR; 1975 } 1976 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1977 goto ERR; 1978 } 1979 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { 1980 goto ERR; 1981 } 1982 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1983 goto ERR; 1984 } 1985 1986 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { 1987 goto ERR; 1988 } 1989 1990 1991 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ 1992 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { 1993 goto ERR; 1994 } 1995 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1996 goto ERR; 1997 } 1998 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) { 1999 goto ERR; 2000 } 2001 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 2002 goto ERR; 2003 } 2004 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { 2005 goto ERR; 2006 } 2007 2008 /* now solve the matrix 2009 2010 0 0 0 0 1 2011 1 2 4 8 16 2012 1 1 1 1 1 2013 16 8 4 2 1 2014 1 0 0 0 0 2015 2016 using 12 subtractions, 4 shifts, 2017 2 small divisions and 1 small multiplication 2018 */ 2019 2020 /* r1 - r4 */ 2021 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { 2022 goto ERR; 2023 } 2024 /* r3 - r0 */ 2025 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { 2026 goto ERR; 2027 } 2028 /* r1/2 */ 2029 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { 2030 goto ERR; 2031 } 2032 /* r3/2 */ 2033 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { 2034 goto ERR; 2035 } 2036 /* r2 - r0 - r4 */ 2037 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { 2038 goto ERR; 2039 } 2040 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { 2041 goto ERR; 2042 } 2043 /* r1 - r2 */ 2044 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2045 goto ERR; 2046 } 2047 /* r3 - r2 */ 2048 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2049 goto ERR; 2050 } 2051 /* r1 - 8r0 */ 2052 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { 2053 goto ERR; 2054 } 2055 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { 2056 goto ERR; 2057 } 2058 /* r3 - 8r4 */ 2059 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { 2060 goto ERR; 2061 } 2062 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { 2063 goto ERR; 2064 } 2065 /* 3r2 - r1 - r3 */ 2066 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { 2067 goto ERR; 2068 } 2069 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { 2070 goto ERR; 2071 } 2072 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { 2073 goto ERR; 2074 } 2075 /* r1 - r2 */ 2076 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2077 goto ERR; 2078 } 2079 /* r3 - r2 */ 2080 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2081 goto ERR; 2082 } 2083 /* r1/3 */ 2084 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { 2085 goto ERR; 2086 } 2087 /* r3/3 */ 2088 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { 2089 goto ERR; 2090 } 2091 2092 /* at this point shift W[n] by B*n */ 2093 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { 2094 goto ERR; 2095 } 2096 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { 2097 goto ERR; 2098 } 2099 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { 2100 goto ERR; 2101 } 2102 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { 2103 goto ERR; 2104 } 2105 2106 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { 2107 goto ERR; 2108 } 2109 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2110 goto ERR; 2111 } 2112 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2113 goto ERR; 2114 } 2115 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { 2116 goto ERR; 2117 } 2118 2119 ERR: 2120 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 2121 &a0, &a1, &a2, &b0, &b1, 2122 &b2, &tmp1, &tmp2, NULL); 2123 return res; 2124 } 2125 2126 #define TOOM_MUL_CUTOFF 350 2127 #define KARATSUBA_MUL_CUTOFF 80 2128 2129 /* c = |a| * |b| using Karatsuba Multiplication using 2130 * three half size multiplications 2131 * 2132 * Let B represent the radix [e.g. 2**DIGIT_BIT] and 2133 * let n represent half of the number of digits in 2134 * the min(a,b) 2135 * 2136 * a = a1 * B**n + a0 2137 * b = b1 * B**n + b0 2138 * 2139 * Then, a * b => 2140 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 2141 * 2142 * Note that a1b1 and a0b0 are used twice and only need to be 2143 * computed once. So in total three half size (half # of 2144 * digit) multiplications are performed, a0b0, a1b1 and 2145 * (a1+b1)(a0+b0) 2146 * 2147 * Note that a multiplication of half the digits requires 2148 * 1/4th the number of single precision multiplications so in 2149 * total after one call 25% of the single precision multiplications 2150 * are saved. Note also that the call to mp_mul can end up back 2151 * in this function if the a0, a1, b0, or b1 are above the threshold. 2152 * This is known as divide-and-conquer and leads to the famous 2153 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 2154 * the standard O(N**2) that the baseline/comba methods use. 2155 * Generally though the overhead of this method doesn't pay off 2156 * until a certain size (N ~ 80) is reached. 2157 */ 2158 static int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) 2159 { 2160 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; 2161 int B; 2162 int err; 2163 2164 /* default the return code to an error */ 2165 err = MP_MEM; 2166 2167 /* min # of digits */ 2168 B = MIN (a->used, b->used); 2169 2170 /* now divide in two */ 2171 B = (int)((unsigned)B >> 1); 2172 2173 /* init copy all the temps */ 2174 if (mp_init_size (&x0, B) != MP_OKAY) 2175 goto ERR; 2176 if (mp_init_size (&x1, a->used - B) != MP_OKAY) 2177 goto X0; 2178 if (mp_init_size (&y0, B) != MP_OKAY) 2179 goto X1; 2180 if (mp_init_size (&y1, b->used - B) != MP_OKAY) 2181 goto Y0; 2182 2183 /* init temps */ 2184 if (mp_init_size (&t1, B * 2) != MP_OKAY) 2185 goto Y1; 2186 if (mp_init_size (&x0y0, B * 2) != MP_OKAY) 2187 goto T1; 2188 if (mp_init_size (&x1y1, B * 2) != MP_OKAY) 2189 goto X0Y0; 2190 2191 /* now shift the digits */ 2192 x0.used = y0.used = B; 2193 x1.used = a->used - B; 2194 y1.used = b->used - B; 2195 2196 { 2197 int x; 2198 mp_digit *tmpa, *tmpb, *tmpx, *tmpy; 2199 2200 /* we copy the digits directly instead of using higher level functions 2201 * since we also need to shift the digits 2202 */ 2203 tmpa = a->dp; 2204 tmpb = b->dp; 2205 2206 tmpx = x0.dp; 2207 tmpy = y0.dp; 2208 for (x = 0; x < B; x++) { 2209 *tmpx++ = *tmpa++; 2210 *tmpy++ = *tmpb++; 2211 } 2212 2213 tmpx = x1.dp; 2214 for (x = B; x < a->used; x++) { 2215 *tmpx++ = *tmpa++; 2216 } 2217 2218 tmpy = y1.dp; 2219 for (x = B; x < b->used; x++) { 2220 *tmpy++ = *tmpb++; 2221 } 2222 } 2223 2224 /* only need to clamp the lower words since by definition the 2225 * upper words x1/y1 must have a known number of digits 2226 */ 2227 mp_clamp (&x0); 2228 mp_clamp (&y0); 2229 2230 /* now calc the products x0y0 and x1y1 */ 2231 /* after this x0 is no longer required, free temp [x0==t2]! */ 2232 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) 2233 goto X1Y1; /* x0y0 = x0*y0 */ 2234 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) 2235 goto X1Y1; /* x1y1 = x1*y1 */ 2236 2237 /* now calc x1+x0 and y1+y0 */ 2238 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) 2239 goto X1Y1; /* t1 = x1 - x0 */ 2240 if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) 2241 goto X1Y1; /* t2 = y1 - y0 */ 2242 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) 2243 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ 2244 2245 /* add x0y0 */ 2246 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) 2247 goto X1Y1; /* t2 = x0y0 + x1y1 */ 2248 if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) 2249 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ 2250 2251 /* shift by B */ 2252 if (mp_lshd (&t1, B) != MP_OKAY) 2253 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ 2254 if (mp_lshd (&x1y1, B * 2) != MP_OKAY) 2255 goto X1Y1; /* x1y1 = x1y1 << 2*B */ 2256 2257 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) 2258 goto X1Y1; /* t1 = x0y0 + t1 */ 2259 if (mp_add (&t1, &x1y1, c) != MP_OKAY) 2260 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ 2261 2262 /* Algorithm succeeded set the return code to MP_OKAY */ 2263 err = MP_OKAY; 2264 2265 X1Y1:mp_clear (&x1y1); 2266 X0Y0:mp_clear (&x0y0); 2267 T1:mp_clear (&t1); 2268 Y1:mp_clear (&y1); 2269 Y0:mp_clear (&y0); 2270 X1:mp_clear (&x1); 2271 X0:mp_clear (&x0); 2272 ERR: 2273 return err; 2274 } 2275 2276 /* Fast (comba) multiplier 2277 * 2278 * This is the fast column-array [comba] multiplier. It is 2279 * designed to compute the columns of the product first 2280 * then handle the carries afterwards. This has the effect 2281 * of making the nested loops that compute the columns very 2282 * simple and schedulable on super-scalar processors. 2283 * 2284 * This has been modified to produce a variable number of 2285 * digits of output so if say only a half-product is required 2286 * you don't have to compute the upper half (a feature 2287 * required for fast Barrett reduction). 2288 * 2289 * Based on Algorithm 14.12 on pp.595 of HAC. 2290 * 2291 */ 2292 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2293 { 2294 int olduse, res, pa, ix, iz; 2295 /*LINTED*/ 2296 mp_digit W[MP_WARRAY]; 2297 mp_word _W; 2298 2299 /* grow the destination as required */ 2300 if (c->alloc < digs) { 2301 if ((res = mp_grow (c, digs)) != MP_OKAY) { 2302 return res; 2303 } 2304 } 2305 2306 /* number of output digits to produce */ 2307 pa = MIN(digs, a->used + b->used); 2308 2309 /* clear the carry */ 2310 _W = 0; 2311 for (ix = 0; ix < pa; ix++) { 2312 int tx, ty; 2313 int iy; 2314 mp_digit *tmpx, *tmpy; 2315 2316 /* get offsets into the two bignums */ 2317 ty = MIN(b->used-1, ix); 2318 tx = ix - ty; 2319 2320 /* setup temp aliases */ 2321 tmpx = a->dp + tx; 2322 tmpy = b->dp + ty; 2323 2324 /* this is the number of times the loop will iterrate, essentially 2325 while (tx++ < a->used && ty-- >= 0) { ... } 2326 */ 2327 iy = MIN(a->used-tx, ty+1); 2328 2329 /* execute loop */ 2330 for (iz = 0; iz < iy; ++iz) { 2331 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2332 2333 } 2334 2335 /* store term */ 2336 W[ix] = ((mp_digit)_W) & MP_MASK; 2337 2338 /* make next carry */ 2339 _W = _W >> ((mp_word)DIGIT_BIT); 2340 } 2341 2342 /* setup dest */ 2343 olduse = c->used; 2344 c->used = pa; 2345 2346 { 2347 mp_digit *tmpc; 2348 tmpc = c->dp; 2349 for (ix = 0; ix < pa+1; ix++) { 2350 /* now extract the previous digit [below the carry] */ 2351 *tmpc++ = W[ix]; 2352 } 2353 2354 /* clear unused digits [that existed in the old copy of c] */ 2355 for (; ix < olduse; ix++) { 2356 *tmpc++ = 0; 2357 } 2358 } 2359 mp_clamp (c); 2360 return MP_OKAY; 2361 } 2362 2363 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */ 2364 /* Revision: 1.2 $ */ 2365 /* Date: 2011/03/18 16:22:09 $ */ 2366 2367 2368 /* multiplies |a| * |b| and only computes upto digs digits of result 2369 * HAC pp. 595, Algorithm 14.12 Modified so you can control how 2370 * many digits of output are created. 2371 */ 2372 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2373 { 2374 mp_int t; 2375 int res, pa, pb, ix, iy; 2376 mp_digit u; 2377 mp_word r; 2378 mp_digit tmpx, *tmpt, *tmpy; 2379 2380 /* can we use the fast multiplier? */ 2381 if (((unsigned)(digs) < MP_WARRAY) && 2382 MIN (a->used, b->used) < 2383 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2384 return fast_s_mp_mul_digs (a, b, c, digs); 2385 } 2386 2387 if ((res = mp_init_size (&t, digs)) != MP_OKAY) { 2388 return res; 2389 } 2390 t.used = digs; 2391 2392 /* compute the digits of the product directly */ 2393 pa = a->used; 2394 for (ix = 0; ix < pa; ix++) { 2395 /* set the carry to zero */ 2396 u = 0; 2397 2398 /* limit ourselves to making digs digits of output */ 2399 pb = MIN (b->used, digs - ix); 2400 2401 /* setup some aliases */ 2402 /* copy of the digit from a used within the nested loop */ 2403 tmpx = a->dp[ix]; 2404 2405 /* an alias for the destination shifted ix places */ 2406 tmpt = t.dp + ix; 2407 2408 /* an alias for the digits of b */ 2409 tmpy = b->dp; 2410 2411 /* compute the columns of the output and propagate the carry */ 2412 for (iy = 0; iy < pb; iy++) { 2413 /* compute the column as a mp_word */ 2414 r = ((mp_word)*tmpt) + 2415 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2416 ((mp_word) u); 2417 2418 /* the new column is the lower part of the result */ 2419 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2420 2421 /* get the carry word from the result */ 2422 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2423 } 2424 /* set carry if it is placed below digs */ 2425 if (ix + iy < digs) { 2426 *tmpt = u; 2427 } 2428 } 2429 2430 mp_clamp (&t); 2431 mp_exch (&t, c); 2432 2433 mp_clear (&t); 2434 return MP_OKAY; 2435 } 2436 2437 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */ 2438 /* Revision: 1.3 $ */ 2439 /* Date: 2011/03/18 16:43:04 $ */ 2440 2441 /* high level multiplication (handles sign) */ 2442 static int 2443 mp_mul(mp_int * a, mp_int * b, mp_int * c) 2444 { 2445 int res, neg; 2446 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 2447 2448 /* use Toom-Cook? */ 2449 if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) { 2450 res = mp_toom_mul(a, b, c); 2451 } else 2452 /* use Karatsuba? */ 2453 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { 2454 res = mp_karatsuba_mul (a, b, c); 2455 } else 2456 { 2457 /* can we use the fast multiplier? 2458 * 2459 * The fast multiplier can be used if the output will 2460 * have less than MP_WARRAY digits and the number of 2461 * digits won't affect carry propagation 2462 */ 2463 int digs = a->used + b->used + 1; 2464 2465 if (((unsigned)digs < MP_WARRAY) && 2466 MIN(a->used, b->used) <= 2467 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2468 res = fast_s_mp_mul_digs (a, b, c, digs); 2469 } else 2470 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */ 2471 2472 } 2473 c->sign = (c->used > 0) ? neg : MP_ZPOS; 2474 return res; 2475 } 2476 2477 /* this is a modified version of fast_s_mul_digs that only produces 2478 * output digits *above* digs. See the comments for fast_s_mul_digs 2479 * to see how it works. 2480 * 2481 * This is used in the Barrett reduction since for one of the multiplications 2482 * only the higher digits were needed. This essentially halves the work. 2483 * 2484 * Based on Algorithm 14.12 on pp.595 of HAC. 2485 */ 2486 static int 2487 fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2488 { 2489 int olduse, res, pa, ix, iz; 2490 mp_digit W[MP_WARRAY]; 2491 mp_word _W; 2492 2493 /* grow the destination as required */ 2494 pa = a->used + b->used; 2495 if (c->alloc < pa) { 2496 if ((res = mp_grow (c, pa)) != MP_OKAY) { 2497 return res; 2498 } 2499 } 2500 2501 /* number of output digits to produce */ 2502 pa = a->used + b->used; 2503 _W = 0; 2504 for (ix = digs; ix < pa; ix++) { 2505 int tx, ty, iy; 2506 mp_digit *tmpx, *tmpy; 2507 2508 /* get offsets into the two bignums */ 2509 ty = MIN(b->used-1, ix); 2510 tx = ix - ty; 2511 2512 /* setup temp aliases */ 2513 tmpx = a->dp + tx; 2514 tmpy = b->dp + ty; 2515 2516 /* this is the number of times the loop will iterrate, essentially its 2517 while (tx++ < a->used && ty-- >= 0) { ... } 2518 */ 2519 iy = MIN(a->used-tx, ty+1); 2520 2521 /* execute loop */ 2522 for (iz = 0; iz < iy; iz++) { 2523 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2524 } 2525 2526 /* store term */ 2527 W[ix] = ((mp_digit)_W) & MP_MASK; 2528 2529 /* make next carry */ 2530 _W = _W >> ((mp_word)DIGIT_BIT); 2531 } 2532 2533 /* setup dest */ 2534 olduse = c->used; 2535 c->used = pa; 2536 2537 { 2538 mp_digit *tmpc; 2539 2540 tmpc = c->dp + digs; 2541 for (ix = digs; ix < pa; ix++) { 2542 /* now extract the previous digit [below the carry] */ 2543 *tmpc++ = W[ix]; 2544 } 2545 2546 /* clear unused digits [that existed in the old copy of c] */ 2547 for (; ix < olduse; ix++) { 2548 *tmpc++ = 0; 2549 } 2550 } 2551 mp_clamp (c); 2552 return MP_OKAY; 2553 } 2554 2555 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */ 2556 /* Revision: 1.1.1.1 $ */ 2557 /* Date: 2011/03/12 22:58:18 $ */ 2558 2559 /* multiplies |a| * |b| and does not compute the lower digs digits 2560 * [meant to get the higher part of the product] 2561 */ 2562 static int 2563 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2564 { 2565 mp_int t; 2566 int res, pa, pb, ix, iy; 2567 mp_digit u; 2568 mp_word r; 2569 mp_digit tmpx, *tmpt, *tmpy; 2570 2571 /* can we use the fast multiplier? */ 2572 if (((unsigned)(a->used + b->used + 1) < MP_WARRAY) 2573 && MIN (a->used, b->used) < (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2574 return fast_s_mp_mul_high_digs (a, b, c, digs); 2575 } 2576 2577 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) { 2578 return res; 2579 } 2580 t.used = a->used + b->used + 1; 2581 2582 pa = a->used; 2583 pb = b->used; 2584 for (ix = 0; ix < pa; ix++) { 2585 /* clear the carry */ 2586 u = 0; 2587 2588 /* left hand side of A[ix] * B[iy] */ 2589 tmpx = a->dp[ix]; 2590 2591 /* alias to the address of where the digits will be stored */ 2592 tmpt = &(t.dp[digs]); 2593 2594 /* alias for where to read the right hand side from */ 2595 tmpy = b->dp + (digs - ix); 2596 2597 for (iy = digs - ix; iy < pb; iy++) { 2598 /* calculate the double precision result */ 2599 r = ((mp_word)*tmpt) + 2600 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2601 ((mp_word) u); 2602 2603 /* get the lower part */ 2604 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2605 2606 /* carry the carry */ 2607 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2608 } 2609 *tmpt = u; 2610 } 2611 mp_clamp (&t); 2612 mp_exch (&t, c); 2613 mp_clear (&t); 2614 return MP_OKAY; 2615 } 2616 2617 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */ 2618 /* Revision: 1.3 $ */ 2619 /* Date: 2011/03/18 16:43:04 $ */ 2620 2621 /* reduces x mod m, assumes 0 < x < m**2, mu is 2622 * precomputed via mp_reduce_setup. 2623 * From HAC pp.604 Algorithm 14.42 2624 */ 2625 static int 2626 mp_reduce (mp_int * x, mp_int * m, mp_int * mu) 2627 { 2628 mp_int q; 2629 int res, um = m->used; 2630 2631 /* q = x */ 2632 if ((res = mp_init_copy (&q, x)) != MP_OKAY) { 2633 return res; 2634 } 2635 2636 /* q1 = x / b**(k-1) */ 2637 mp_rshd (&q, um - 1); 2638 2639 /* according to HAC this optimization is ok */ 2640 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { 2641 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { 2642 goto CLEANUP; 2643 } 2644 } else { 2645 if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { 2646 goto CLEANUP; 2647 } 2648 } 2649 2650 /* q3 = q2 / b**(k+1) */ 2651 mp_rshd (&q, um + 1); 2652 2653 /* x = x mod b**(k+1), quick (no division) */ 2654 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { 2655 goto CLEANUP; 2656 } 2657 2658 /* q = q * m mod b**(k+1), quick (no division) */ 2659 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { 2660 goto CLEANUP; 2661 } 2662 2663 /* x = x - q */ 2664 if ((res = mp_sub (x, &q, x)) != MP_OKAY) { 2665 goto CLEANUP; 2666 } 2667 2668 /* If x < 0, add b**(k+1) to it */ 2669 if (mp_cmp_d (x, 0) == MP_LT) { 2670 mp_set (&q, 1); 2671 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) 2672 goto CLEANUP; 2673 if ((res = mp_add (x, &q, x)) != MP_OKAY) 2674 goto CLEANUP; 2675 } 2676 2677 /* Back off if it's too big */ 2678 while (mp_cmp (x, m) != MP_LT) { 2679 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { 2680 goto CLEANUP; 2681 } 2682 } 2683 2684 CLEANUP: 2685 mp_clear (&q); 2686 2687 return res; 2688 } 2689 2690 /* determines the setup value */ 2691 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d) 2692 { 2693 int res; 2694 mp_int tmp; 2695 2696 if ((res = mp_init(&tmp)) != MP_OKAY) { 2697 return res; 2698 } 2699 2700 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { 2701 goto ERR; 2702 } 2703 2704 if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) { 2705 goto ERR; 2706 } 2707 2708 ERR: 2709 mp_clear(&tmp); 2710 return res; 2711 } 2712 2713 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */ 2714 /* Revision: 1.1.1.1 $ */ 2715 /* Date: 2011/03/12 22:58:18 $ */ 2716 2717 /* reduces a modulo n where n is of the form 2**p - d 2718 This differs from reduce_2k since "d" can be larger 2719 than a single digit. 2720 */ 2721 static int 2722 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) 2723 { 2724 mp_int q; 2725 int p, res; 2726 2727 if ((res = mp_init(&q)) != MP_OKAY) { 2728 return res; 2729 } 2730 2731 p = mp_count_bits(n); 2732 top: 2733 /* q = a/2**p, a = a mod 2**p */ 2734 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { 2735 goto ERR; 2736 } 2737 2738 /* q = q * d */ 2739 if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 2740 goto ERR; 2741 } 2742 2743 /* a = a + q */ 2744 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { 2745 goto ERR; 2746 } 2747 2748 if (mp_cmp_mag(a, n) != MP_LT) { 2749 s_mp_sub(a, n, a); 2750 goto top; 2751 } 2752 2753 ERR: 2754 mp_clear(&q); 2755 return res; 2756 } 2757 2758 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */ 2759 /* Revision: 1.1.1.1 $ */ 2760 /* Date: 2011/03/12 22:58:18 $ */ 2761 2762 /* squaring using Toom-Cook 3-way algorithm */ 2763 static int 2764 mp_toom_sqr(mp_int *a, mp_int *b) 2765 { 2766 mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; 2767 int res, B; 2768 2769 /* init temps */ 2770 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { 2771 return res; 2772 } 2773 2774 /* B */ 2775 B = a->used / 3; 2776 2777 /* a = a2 * B**2 + a1 * B + a0 */ 2778 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 2779 goto ERR; 2780 } 2781 2782 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 2783 goto ERR; 2784 } 2785 mp_rshd(&a1, B); 2786 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); 2787 2788 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 2789 goto ERR; 2790 } 2791 mp_rshd(&a2, B*2); 2792 2793 /* w0 = a0*a0 */ 2794 if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) { 2795 goto ERR; 2796 } 2797 2798 /* w4 = a2 * a2 */ 2799 if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) { 2800 goto ERR; 2801 } 2802 2803 /* w1 = (a2 + 2(a1 + 2a0))**2 */ 2804 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { 2805 goto ERR; 2806 } 2807 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2808 goto ERR; 2809 } 2810 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 2811 goto ERR; 2812 } 2813 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 2814 goto ERR; 2815 } 2816 2817 if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) { 2818 goto ERR; 2819 } 2820 2821 /* w3 = (a0 + 2(a1 + 2a2))**2 */ 2822 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { 2823 goto ERR; 2824 } 2825 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2826 goto ERR; 2827 } 2828 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 2829 goto ERR; 2830 } 2831 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2832 goto ERR; 2833 } 2834 2835 if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) { 2836 goto ERR; 2837 } 2838 2839 2840 /* w2 = (a2 + a1 + a0)**2 */ 2841 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { 2842 goto ERR; 2843 } 2844 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2845 goto ERR; 2846 } 2847 if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) { 2848 goto ERR; 2849 } 2850 2851 /* now solve the matrix 2852 2853 0 0 0 0 1 2854 1 2 4 8 16 2855 1 1 1 1 1 2856 16 8 4 2 1 2857 1 0 0 0 0 2858 2859 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. 2860 */ 2861 2862 /* r1 - r4 */ 2863 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { 2864 goto ERR; 2865 } 2866 /* r3 - r0 */ 2867 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { 2868 goto ERR; 2869 } 2870 /* r1/2 */ 2871 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { 2872 goto ERR; 2873 } 2874 /* r3/2 */ 2875 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { 2876 goto ERR; 2877 } 2878 /* r2 - r0 - r4 */ 2879 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { 2880 goto ERR; 2881 } 2882 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { 2883 goto ERR; 2884 } 2885 /* r1 - r2 */ 2886 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2887 goto ERR; 2888 } 2889 /* r3 - r2 */ 2890 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2891 goto ERR; 2892 } 2893 /* r1 - 8r0 */ 2894 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { 2895 goto ERR; 2896 } 2897 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { 2898 goto ERR; 2899 } 2900 /* r3 - 8r4 */ 2901 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { 2902 goto ERR; 2903 } 2904 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { 2905 goto ERR; 2906 } 2907 /* 3r2 - r1 - r3 */ 2908 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { 2909 goto ERR; 2910 } 2911 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { 2912 goto ERR; 2913 } 2914 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { 2915 goto ERR; 2916 } 2917 /* r1 - r2 */ 2918 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2919 goto ERR; 2920 } 2921 /* r3 - r2 */ 2922 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2923 goto ERR; 2924 } 2925 /* r1/3 */ 2926 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { 2927 goto ERR; 2928 } 2929 /* r3/3 */ 2930 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { 2931 goto ERR; 2932 } 2933 2934 /* at this point shift W[n] by B*n */ 2935 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { 2936 goto ERR; 2937 } 2938 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { 2939 goto ERR; 2940 } 2941 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { 2942 goto ERR; 2943 } 2944 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { 2945 goto ERR; 2946 } 2947 2948 if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) { 2949 goto ERR; 2950 } 2951 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2952 goto ERR; 2953 } 2954 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2955 goto ERR; 2956 } 2957 if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) { 2958 goto ERR; 2959 } 2960 2961 ERR: 2962 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); 2963 return res; 2964 } 2965 2966 2967 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */ 2968 /* Revision: 1.1.1.1 $ */ 2969 /* Date: 2011/03/12 22:58:18 $ */ 2970 2971 /* Karatsuba squaring, computes b = a*a using three 2972 * half size squarings 2973 * 2974 * See comments of karatsuba_mul for details. It 2975 * is essentially the same algorithm but merely 2976 * tuned to perform recursive squarings. 2977 */ 2978 static int mp_karatsuba_sqr (mp_int * a, mp_int * b) 2979 { 2980 mp_int x0, x1, t1, t2, x0x0, x1x1; 2981 int B, err; 2982 2983 err = MP_MEM; 2984 2985 /* min # of digits */ 2986 B = a->used; 2987 2988 /* now divide in two */ 2989 B = (unsigned)B >> 1; 2990 2991 /* init copy all the temps */ 2992 if (mp_init_size (&x0, B) != MP_OKAY) 2993 goto ERR; 2994 if (mp_init_size (&x1, a->used - B) != MP_OKAY) 2995 goto X0; 2996 2997 /* init temps */ 2998 if (mp_init_size (&t1, a->used * 2) != MP_OKAY) 2999 goto X1; 3000 if (mp_init_size (&t2, a->used * 2) != MP_OKAY) 3001 goto T1; 3002 if (mp_init_size (&x0x0, B * 2) != MP_OKAY) 3003 goto T2; 3004 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY) 3005 goto X0X0; 3006 3007 { 3008 int x; 3009 mp_digit *dst, *src; 3010 3011 src = a->dp; 3012 3013 /* now shift the digits */ 3014 dst = x0.dp; 3015 for (x = 0; x < B; x++) { 3016 *dst++ = *src++; 3017 } 3018 3019 dst = x1.dp; 3020 for (x = B; x < a->used; x++) { 3021 *dst++ = *src++; 3022 } 3023 } 3024 3025 x0.used = B; 3026 x1.used = a->used - B; 3027 3028 mp_clamp (&x0); 3029 3030 /* now calc the products x0*x0 and x1*x1 */ 3031 if (mp_sqr (&x0, &x0x0) != MP_OKAY) 3032 goto X1X1; /* x0x0 = x0*x0 */ 3033 if (mp_sqr (&x1, &x1x1) != MP_OKAY) 3034 goto X1X1; /* x1x1 = x1*x1 */ 3035 3036 /* now calc (x1+x0)**2 */ 3037 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) 3038 goto X1X1; /* t1 = x1 - x0 */ 3039 if (mp_sqr (&t1, &t1) != MP_OKAY) 3040 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ 3041 3042 /* add x0y0 */ 3043 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) 3044 goto X1X1; /* t2 = x0x0 + x1x1 */ 3045 if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY) 3046 goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ 3047 3048 /* shift by B */ 3049 if (mp_lshd (&t1, B) != MP_OKAY) 3050 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ 3051 if (mp_lshd (&x1x1, B * 2) != MP_OKAY) 3052 goto X1X1; /* x1x1 = x1x1 << 2*B */ 3053 3054 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY) 3055 goto X1X1; /* t1 = x0x0 + t1 */ 3056 if (mp_add (&t1, &x1x1, b) != MP_OKAY) 3057 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ 3058 3059 err = MP_OKAY; 3060 3061 X1X1:mp_clear (&x1x1); 3062 X0X0:mp_clear (&x0x0); 3063 T2:mp_clear (&t2); 3064 T1:mp_clear (&t1); 3065 X1:mp_clear (&x1); 3066 X0:mp_clear (&x0); 3067 ERR: 3068 return err; 3069 } 3070 3071 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */ 3072 /* Revision: 1.2 $ */ 3073 /* Date: 2011/03/12 23:43:54 $ */ 3074 3075 /* the jist of squaring... 3076 * you do like mult except the offset of the tmpx [one that 3077 * starts closer to zero] can't equal the offset of tmpy. 3078 * So basically you set up iy like before then you min it with 3079 * (ty-tx) so that it never happens. You double all those 3080 * you add in the inner loop 3081 3082 After that loop you do the squares and add them in. 3083 */ 3084 3085 static int fast_s_mp_sqr (mp_int * a, mp_int * b) 3086 { 3087 int olduse, res, pa, ix, iz; 3088 mp_digit W[MP_WARRAY], *tmpx; 3089 mp_word W1; 3090 3091 /* grow the destination as required */ 3092 pa = a->used + a->used; 3093 if (b->alloc < pa) { 3094 if ((res = mp_grow (b, pa)) != MP_OKAY) { 3095 return res; 3096 } 3097 } 3098 3099 /* number of output digits to produce */ 3100 W1 = 0; 3101 for (ix = 0; ix < pa; ix++) { 3102 int tx, ty, iy; 3103 mp_word _W; 3104 mp_digit *tmpy; 3105 3106 /* clear counter */ 3107 _W = 0; 3108 3109 /* get offsets into the two bignums */ 3110 ty = MIN(a->used-1, ix); 3111 tx = ix - ty; 3112 3113 /* setup temp aliases */ 3114 tmpx = a->dp + tx; 3115 tmpy = a->dp + ty; 3116 3117 /* this is the number of times the loop will iterrate, essentially 3118 while (tx++ < a->used && ty-- >= 0) { ... } 3119 */ 3120 iy = MIN(a->used-tx, ty+1); 3121 3122 /* now for squaring tx can never equal ty 3123 * we halve the distance since they approach at a rate of 2x 3124 * and we have to round because odd cases need to be executed 3125 */ 3126 iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1)); 3127 3128 /* execute loop */ 3129 for (iz = 0; iz < iy; iz++) { 3130 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 3131 } 3132 3133 /* double the inner product and add carry */ 3134 _W = _W + _W + W1; 3135 3136 /* even columns have the square term in them */ 3137 if ((ix&1) == 0) { 3138 _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]); 3139 } 3140 3141 /* store it */ 3142 W[ix] = (mp_digit)(_W & MP_MASK); 3143 3144 /* make next carry */ 3145 W1 = _W >> ((mp_word)DIGIT_BIT); 3146 } 3147 3148 /* setup dest */ 3149 olduse = b->used; 3150 b->used = a->used+a->used; 3151 3152 { 3153 mp_digit *tmpb; 3154 tmpb = b->dp; 3155 for (ix = 0; ix < pa; ix++) { 3156 *tmpb++ = W[ix] & MP_MASK; 3157 } 3158 3159 /* clear unused digits [that existed in the old copy of c] */ 3160 for (; ix < olduse; ix++) { 3161 *tmpb++ = 0; 3162 } 3163 } 3164 mp_clamp (b); 3165 return MP_OKAY; 3166 } 3167 3168 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */ 3169 /* Revision: 1.3 $ */ 3170 /* Date: 2011/03/18 16:43:04 $ */ 3171 3172 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ 3173 static int 3174 s_mp_sqr (mp_int * a, mp_int * b) 3175 { 3176 mp_int t; 3177 int res, ix, iy, pa; 3178 mp_word r; 3179 mp_digit u, tmpx, *tmpt; 3180 3181 pa = a->used; 3182 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { 3183 return res; 3184 } 3185 3186 /* default used is maximum possible size */ 3187 t.used = 2*pa + 1; 3188 3189 for (ix = 0; ix < pa; ix++) { 3190 /* first calculate the digit at 2*ix */ 3191 /* calculate double precision result */ 3192 r = ((mp_word) t.dp[2*ix]) + 3193 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); 3194 3195 /* store lower part in result */ 3196 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); 3197 3198 /* get the carry */ 3199 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3200 3201 /* left hand side of A[ix] * A[iy] */ 3202 tmpx = a->dp[ix]; 3203 3204 /* alias for where to store the results */ 3205 tmpt = t.dp + (2*ix + 1); 3206 3207 for (iy = ix + 1; iy < pa; iy++) { 3208 /* first calculate the product */ 3209 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); 3210 3211 /* now calculate the double precision result, note we use 3212 * addition instead of *2 since it's easier to optimize 3213 */ 3214 r = ((mp_word) *tmpt) + r + r + ((mp_word) u); 3215 3216 /* store lower part */ 3217 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3218 3219 /* get carry */ 3220 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3221 } 3222 /* propagate upwards */ 3223 while (u != ((mp_digit) 0)) { 3224 r = ((mp_word) *tmpt) + ((mp_word) u); 3225 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3226 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3227 } 3228 } 3229 3230 mp_clamp (&t); 3231 mp_exch (&t, b); 3232 mp_clear (&t); 3233 return MP_OKAY; 3234 } 3235 3236 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */ 3237 /* Revision: 1.1.1.1 $ */ 3238 /* Date: 2011/03/12 22:58:18 $ */ 3239 3240 #define TOOM_SQR_CUTOFF 400 3241 #define KARATSUBA_SQR_CUTOFF 120 3242 3243 /* computes b = a*a */ 3244 static int 3245 mp_sqr (mp_int * a, mp_int * b) 3246 { 3247 int res; 3248 3249 /* use Toom-Cook? */ 3250 if (a->used >= TOOM_SQR_CUTOFF) { 3251 res = mp_toom_sqr(a, b); 3252 /* Karatsuba? */ 3253 } else 3254 if (a->used >= KARATSUBA_SQR_CUTOFF) { 3255 res = mp_karatsuba_sqr (a, b); 3256 } else 3257 { 3258 /* can we use the fast comba multiplier? */ 3259 if (((unsigned)a->used * 2 + 1) < MP_WARRAY && 3260 a->used < 3261 (1 << (unsigned)(sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) { 3262 res = fast_s_mp_sqr (a, b); 3263 } else 3264 res = s_mp_sqr (a, b); 3265 } 3266 b->sign = MP_ZPOS; 3267 return res; 3268 } 3269 3270 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */ 3271 /* Revision: 1.3 $ */ 3272 /* Date: 2011/03/18 16:43:04 $ */ 3273 3274 #define TAB_SIZE 256 3275 3276 static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 3277 { 3278 mp_int M[TAB_SIZE], res, mu; 3279 mp_digit buf; 3280 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 3281 int (*redux)(mp_int*,mp_int*,mp_int*); 3282 3283 /* find window size */ 3284 x = mp_count_bits (X); 3285 if (x <= 7) { 3286 winsize = 2; 3287 } else if (x <= 36) { 3288 winsize = 3; 3289 } else if (x <= 140) { 3290 winsize = 4; 3291 } else if (x <= 450) { 3292 winsize = 5; 3293 } else if (x <= 1303) { 3294 winsize = 6; 3295 } else if (x <= 3529) { 3296 winsize = 7; 3297 } else { 3298 winsize = 8; 3299 } 3300 3301 /* init M array */ 3302 /* init first cell */ 3303 if ((err = mp_init(&M[1])) != MP_OKAY) { 3304 return err; 3305 } 3306 3307 /* now init the second half of the array */ 3308 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3309 if ((err = mp_init(&M[x])) != MP_OKAY) { 3310 for (y = 1<<(winsize-1); y < x; y++) { 3311 mp_clear (&M[y]); 3312 } 3313 mp_clear(&M[1]); 3314 return err; 3315 } 3316 } 3317 3318 /* create mu, used for Barrett reduction */ 3319 if ((err = mp_init (&mu)) != MP_OKAY) { 3320 goto LBL_M; 3321 } 3322 3323 if (redmode == 0) { 3324 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) { 3325 goto LBL_MU; 3326 } 3327 redux = mp_reduce; 3328 } else { 3329 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) { 3330 goto LBL_MU; 3331 } 3332 redux = mp_reduce_2k_l; 3333 } 3334 3335 /* create M table 3336 * 3337 * The M table contains powers of the base, 3338 * e.g. M[x] = G**x mod P 3339 * 3340 * The first half of the table is not 3341 * computed though accept for M[0] and M[1] 3342 */ 3343 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) { 3344 goto LBL_MU; 3345 } 3346 3347 /* compute the value at M[1<<(winsize-1)] by squaring 3348 * M[1] (winsize-1) times 3349 */ 3350 if ((err = mp_copy ( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 3351 goto LBL_MU; 3352 } 3353 3354 for (x = 0; x < (winsize - 1); x++) { 3355 /* square it */ 3356 if ((err = mp_sqr (&M[1 << (winsize - 1)], 3357 &M[1 << (winsize - 1)])) != MP_OKAY) { 3358 goto LBL_MU; 3359 } 3360 3361 /* reduce modulo P */ 3362 if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { 3363 goto LBL_MU; 3364 } 3365 } 3366 3367 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) 3368 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) 3369 */ 3370 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 3371 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 3372 goto LBL_MU; 3373 } 3374 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) { 3375 goto LBL_MU; 3376 } 3377 } 3378 3379 /* setup result */ 3380 if ((err = mp_init (&res)) != MP_OKAY) { 3381 goto LBL_MU; 3382 } 3383 mp_set (&res, 1); 3384 3385 /* set initial mode and bit cnt */ 3386 mode = 0; 3387 bitcnt = 1; 3388 buf = 0; 3389 digidx = X->used - 1; 3390 bitcpy = 0; 3391 bitbuf = 0; 3392 3393 for (;;) { 3394 /* grab next digit as required */ 3395 if (--bitcnt == 0) { 3396 /* if digidx == -1 we are out of digits */ 3397 if (digidx == -1) { 3398 break; 3399 } 3400 /* read next digit and reset the bitcnt */ 3401 buf = X->dp[digidx--]; 3402 bitcnt = (int) DIGIT_BIT; 3403 } 3404 3405 /* grab the next msb from the exponent */ 3406 y = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; 3407 buf <<= (mp_digit)1; 3408 3409 /* if the bit is zero and mode == 0 then we ignore it 3410 * These represent the leading zero bits before the first 1 bit 3411 * in the exponent. Technically this opt is not required but it 3412 * does lower the # of trivial squaring/reductions used 3413 */ 3414 if (mode == 0 && y == 0) { 3415 continue; 3416 } 3417 3418 /* if the bit is zero and mode == 1 then we square */ 3419 if (mode == 1 && y == 0) { 3420 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3421 goto LBL_RES; 3422 } 3423 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3424 goto LBL_RES; 3425 } 3426 continue; 3427 } 3428 3429 /* else we add it to the window */ 3430 bitbuf |= (y << (winsize - ++bitcpy)); 3431 mode = 2; 3432 3433 if (bitcpy == winsize) { 3434 /* ok window is filled so square as required and multiply */ 3435 /* square first */ 3436 for (x = 0; x < winsize; x++) { 3437 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3438 goto LBL_RES; 3439 } 3440 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3441 goto LBL_RES; 3442 } 3443 } 3444 3445 /* then multiply */ 3446 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 3447 goto LBL_RES; 3448 } 3449 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3450 goto LBL_RES; 3451 } 3452 3453 /* empty window and reset */ 3454 bitcpy = 0; 3455 bitbuf = 0; 3456 mode = 1; 3457 } 3458 } 3459 3460 /* if bits remain then square/multiply */ 3461 if (mode == 2 && bitcpy > 0) { 3462 /* square then multiply if the bit is set */ 3463 for (x = 0; x < bitcpy; x++) { 3464 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3465 goto LBL_RES; 3466 } 3467 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3468 goto LBL_RES; 3469 } 3470 3471 bitbuf <<= 1; 3472 if ((bitbuf & (1 << winsize)) != 0) { 3473 /* then multiply */ 3474 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 3475 goto LBL_RES; 3476 } 3477 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3478 goto LBL_RES; 3479 } 3480 } 3481 } 3482 } 3483 3484 mp_exch (&res, Y); 3485 err = MP_OKAY; 3486 LBL_RES:mp_clear (&res); 3487 LBL_MU:mp_clear (&mu); 3488 LBL_M: 3489 mp_clear(&M[1]); 3490 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3491 mp_clear (&M[x]); 3492 } 3493 return err; 3494 } 3495 3496 /* determines if a number is a valid DR modulus */ 3497 static int 3498 mp_dr_is_modulus(mp_int *a) 3499 { 3500 int ix; 3501 3502 /* must be at least two digits */ 3503 if (a->used < 2) { 3504 return 0; 3505 } 3506 3507 /* must be of the form b**k - a [a <= b] so all 3508 * but the first digit must be equal to -1 (mod b). 3509 */ 3510 for (ix = 1; ix < a->used; ix++) { 3511 if (a->dp[ix] != MP_MASK) { 3512 return 0; 3513 } 3514 } 3515 return 1; 3516 } 3517 3518 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */ 3519 /* Revision: 1.1.1.1 $ */ 3520 /* Date: 2011/03/12 22:58:18 $ */ 3521 3522 /* determines if mp_reduce_2k can be used */ 3523 static int mp_reduce_is_2k(mp_int *a) 3524 { 3525 int ix, iy, iw; 3526 mp_digit iz; 3527 3528 if (a->used == 0) { 3529 return MP_NO; 3530 } else if (a->used == 1) { 3531 return MP_YES; 3532 } else if (a->used > 1) { 3533 iy = mp_count_bits(a); 3534 iz = 1; 3535 iw = 1; 3536 3537 /* Test every bit from the second digit up, must be 1 */ 3538 for (ix = DIGIT_BIT; ix < iy; ix++) { 3539 if ((a->dp[iw] & iz) == 0) { 3540 return MP_NO; 3541 } 3542 iz <<= 1; 3543 if (iz > (mp_digit)MP_MASK) { 3544 ++iw; 3545 iz = 1; 3546 } 3547 } 3548 } 3549 return MP_YES; 3550 } 3551 3552 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */ 3553 /* Revision: 1.1.1.1 $ */ 3554 /* Date: 2011/03/12 22:58:18 $ */ 3555 3556 3557 /* d = a * b (mod c) */ 3558 static int 3559 mp_mulmod (mp_int *d, mp_int * a, mp_int * b, mp_int * c) 3560 { 3561 int res; 3562 mp_int t; 3563 3564 if ((res = mp_init (&t)) != MP_OKAY) { 3565 return res; 3566 } 3567 3568 if ((res = mp_mul (a, b, &t)) != MP_OKAY) { 3569 mp_clear (&t); 3570 return res; 3571 } 3572 res = mp_mod (&t, c, d); 3573 mp_clear (&t); 3574 return res; 3575 } 3576 3577 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */ 3578 /* Revision: 1.1.1.1 $ */ 3579 /* Date: 2011/03/12 22:58:18 $ */ 3580 3581 /* setups the montgomery reduction stuff */ 3582 static int 3583 mp_montgomery_setup (mp_int * n, mp_digit * rho) 3584 { 3585 mp_digit x, b; 3586 3587 /* fast inversion mod 2**k 3588 * 3589 * Based on the fact that 3590 * 3591 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) 3592 * => 2*X*A - X*X*A*A = 1 3593 * => 2*(1) - (1) = 1 3594 */ 3595 b = n->dp[0]; 3596 3597 if ((b & 1) == 0) { 3598 return MP_VAL; 3599 } 3600 3601 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ 3602 x *= 2 - b * x; /* here x*a==1 mod 2**8 */ 3603 x *= 2 - b * x; /* here x*a==1 mod 2**16 */ 3604 x *= 2 - b * x; /* here x*a==1 mod 2**32 */ 3605 if (/*CONSTCOND*/sizeof(mp_digit) == 8) { 3606 x *= 2 - b * x; /* here x*a==1 mod 2**64 */ 3607 } 3608 3609 /* rho = -1/m mod b */ 3610 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; 3611 3612 return MP_OKAY; 3613 } 3614 3615 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */ 3616 /* Revision: 1.1.1.1 $ */ 3617 /* Date: 2011/03/12 22:58:18 $ */ 3618 3619 /* computes xR**-1 == x (mod N) via Montgomery Reduction 3620 * 3621 * This is an optimized implementation of montgomery_reduce 3622 * which uses the comba method to quickly calculate the columns of the 3623 * reduction. 3624 * 3625 * Based on Algorithm 14.32 on pp.601 of HAC. 3626 */ 3627 static int 3628 fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) 3629 { 3630 int ix, res, olduse; 3631 /*LINTED*/ 3632 mp_word W[MP_WARRAY]; 3633 3634 /* get old used count */ 3635 olduse = x->used; 3636 3637 /* grow a as required */ 3638 if (x->alloc < n->used + 1) { 3639 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { 3640 return res; 3641 } 3642 } 3643 3644 /* first we have to get the digits of the input into 3645 * an array of double precision words W[...] 3646 */ 3647 { 3648 mp_word *_W; 3649 mp_digit *tmpx; 3650 3651 /* alias for the W[] array */ 3652 _W = W; 3653 3654 /* alias for the digits of x*/ 3655 tmpx = x->dp; 3656 3657 /* copy the digits of a into W[0..a->used-1] */ 3658 for (ix = 0; ix < x->used; ix++) { 3659 *_W++ = *tmpx++; 3660 } 3661 3662 /* zero the high words of W[a->used..m->used*2] */ 3663 for (; ix < n->used * 2 + 1; ix++) { 3664 *_W++ = 0; 3665 } 3666 } 3667 3668 /* now we proceed to zero successive digits 3669 * from the least significant upwards 3670 */ 3671 for (ix = 0; ix < n->used; ix++) { 3672 /* mu = ai * m' mod b 3673 * 3674 * We avoid a double precision multiplication (which isn't required) 3675 * by casting the value down to a mp_digit. Note this requires 3676 * that W[ix-1] have the carry cleared (see after the inner loop) 3677 */ 3678 mp_digit mu; 3679 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); 3680 3681 /* a = a + mu * m * b**i 3682 * 3683 * This is computed in place and on the fly. The multiplication 3684 * by b**i is handled by offseting which columns the results 3685 * are added to. 3686 * 3687 * Note the comba method normally doesn't handle carries in the 3688 * inner loop In this case we fix the carry from the previous 3689 * column since the Montgomery reduction requires digits of the 3690 * result (so far) [see above] to work. This is 3691 * handled by fixing up one carry after the inner loop. The 3692 * carry fixups are done in order so after these loops the 3693 * first m->used words of W[] have the carries fixed 3694 */ 3695 { 3696 int iy; 3697 mp_digit *tmpn; 3698 mp_word *_W; 3699 3700 /* alias for the digits of the modulus */ 3701 tmpn = n->dp; 3702 3703 /* Alias for the columns set by an offset of ix */ 3704 _W = W + ix; 3705 3706 /* inner loop */ 3707 for (iy = 0; iy < n->used; iy++) { 3708 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); 3709 } 3710 } 3711 3712 /* now fix carry for next digit, W[ix+1] */ 3713 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); 3714 } 3715 3716 /* now we have to propagate the carries and 3717 * shift the words downward [all those least 3718 * significant digits we zeroed]. 3719 */ 3720 { 3721 mp_digit *tmpx; 3722 mp_word *_W, *_W1; 3723 3724 /* nox fix rest of carries */ 3725 3726 /* alias for current word */ 3727 _W1 = W + ix; 3728 3729 /* alias for next word, where the carry goes */ 3730 _W = W + ++ix; 3731 3732 for (; ix <= n->used * 2 + 1; ix++) { 3733 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); 3734 } 3735 3736 /* copy out, A = A/b**n 3737 * 3738 * The result is A/b**n but instead of converting from an 3739 * array of mp_word to mp_digit than calling mp_rshd 3740 * we just copy them in the right order 3741 */ 3742 3743 /* alias for destination word */ 3744 tmpx = x->dp; 3745 3746 /* alias for shifted double precision result */ 3747 _W = W + n->used; 3748 3749 for (ix = 0; ix < n->used + 1; ix++) { 3750 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); 3751 } 3752 3753 /* zero oldused digits, if the input a was larger than 3754 * m->used+1 we'll have to clear the digits 3755 */ 3756 for (; ix < olduse; ix++) { 3757 *tmpx++ = 0; 3758 } 3759 } 3760 3761 /* set the max used and clamp */ 3762 x->used = n->used + 1; 3763 mp_clamp (x); 3764 3765 /* if A >= m then A = A - m */ 3766 if (mp_cmp_mag (x, n) != MP_LT) { 3767 return s_mp_sub (x, n, x); 3768 } 3769 return MP_OKAY; 3770 } 3771 3772 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */ 3773 /* Revision: 1.2 $ */ 3774 /* Date: 2011/03/18 16:22:09 $ */ 3775 3776 /* computes xR**-1 == x (mod N) via Montgomery Reduction */ 3777 static int 3778 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) 3779 { 3780 int ix, res, digs; 3781 mp_digit mu; 3782 3783 /* can the fast reduction [comba] method be used? 3784 * 3785 * Note that unlike in mul you're safely allowed *less* 3786 * than the available columns [255 per default] since carries 3787 * are fixed up in the inner loop. 3788 */ 3789 digs = n->used * 2 + 1; 3790 if (((unsigned)digs < MP_WARRAY) && 3791 n->used < 3792 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 3793 return fast_mp_montgomery_reduce (x, n, rho); 3794 } 3795 3796 /* grow the input as required */ 3797 if (x->alloc < digs) { 3798 if ((res = mp_grow (x, digs)) != MP_OKAY) { 3799 return res; 3800 } 3801 } 3802 x->used = digs; 3803 3804 for (ix = 0; ix < n->used; ix++) { 3805 /* mu = ai * rho mod b 3806 * 3807 * The value of rho must be precalculated via 3808 * montgomery_setup() such that 3809 * it equals -1/n0 mod b this allows the 3810 * following inner loop to reduce the 3811 * input one digit at a time 3812 */ 3813 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); 3814 3815 /* a = a + mu * m * b**i */ 3816 { 3817 int iy; 3818 mp_digit *tmpn, *tmpx, u; 3819 mp_word r; 3820 3821 /* alias for digits of the modulus */ 3822 tmpn = n->dp; 3823 3824 /* alias for the digits of x [the input] */ 3825 tmpx = x->dp + ix; 3826 3827 /* set the carry to zero */ 3828 u = 0; 3829 3830 /* Multiply and add in place */ 3831 for (iy = 0; iy < n->used; iy++) { 3832 /* compute product and sum */ 3833 r = ((mp_word)mu) * ((mp_word)*tmpn++) + 3834 ((mp_word) u) + ((mp_word) * tmpx); 3835 3836 /* get carry */ 3837 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3838 3839 /* fix digit */ 3840 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); 3841 } 3842 /* At this point the ix'th digit of x should be zero */ 3843 3844 3845 /* propagate carries upwards as required*/ 3846 while (u) { 3847 *tmpx += u; 3848 u = *tmpx >> DIGIT_BIT; 3849 *tmpx++ &= MP_MASK; 3850 } 3851 } 3852 } 3853 3854 /* at this point the n.used'th least 3855 * significant digits of x are all zero 3856 * which means we can shift x to the 3857 * right by n.used digits and the 3858 * residue is unchanged. 3859 */ 3860 3861 /* x = x/b**n.used */ 3862 mp_clamp(x); 3863 mp_rshd (x, n->used); 3864 3865 /* if x >= n then x = x - n */ 3866 if (mp_cmp_mag (x, n) != MP_LT) { 3867 return s_mp_sub (x, n, x); 3868 } 3869 3870 return MP_OKAY; 3871 } 3872 3873 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */ 3874 /* Revision: 1.3 $ */ 3875 /* Date: 2011/03/18 16:43:04 $ */ 3876 3877 /* determines the setup value */ 3878 static void 3879 mp_dr_setup(mp_int *a, mp_digit *d) 3880 { 3881 /* the casts are required if DIGIT_BIT is one less than 3882 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31] 3883 */ 3884 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 3885 ((mp_word)a->dp[0])); 3886 } 3887 3888 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */ 3889 /* Revision: 1.1.1.1 $ */ 3890 /* Date: 2011/03/12 22:58:18 $ */ 3891 3892 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm. 3893 * 3894 * Based on algorithm from the paper 3895 * 3896 * "Generating Efficient Primes for Discrete Log Cryptosystems" 3897 * Chae Hoon Lim, Pil Joong Lee, 3898 * POSTECH Information Research Laboratories 3899 * 3900 * The modulus must be of a special format [see manual] 3901 * 3902 * Has been modified to use algorithm 7.10 from the LTM book instead 3903 * 3904 * Input x must be in the range 0 <= x <= (n-1)**2 3905 */ 3906 static int 3907 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) 3908 { 3909 int err, i, m; 3910 mp_word r; 3911 mp_digit mu, *tmpx1, *tmpx2; 3912 3913 /* m = digits in modulus */ 3914 m = n->used; 3915 3916 /* ensure that "x" has at least 2m digits */ 3917 if (x->alloc < m + m) { 3918 if ((err = mp_grow (x, m + m)) != MP_OKAY) { 3919 return err; 3920 } 3921 } 3922 3923 /* top of loop, this is where the code resumes if 3924 * another reduction pass is required. 3925 */ 3926 top: 3927 /* aliases for digits */ 3928 /* alias for lower half of x */ 3929 tmpx1 = x->dp; 3930 3931 /* alias for upper half of x, or x/B**m */ 3932 tmpx2 = x->dp + m; 3933 3934 /* set carry to zero */ 3935 mu = 0; 3936 3937 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ 3938 for (i = 0; i < m; i++) { 3939 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; 3940 *tmpx1++ = (mp_digit)(r & MP_MASK); 3941 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); 3942 } 3943 3944 /* set final carry */ 3945 *tmpx1++ = mu; 3946 3947 /* zero words above m */ 3948 for (i = m + 1; i < x->used; i++) { 3949 *tmpx1++ = 0; 3950 } 3951 3952 /* clamp, sub and return */ 3953 mp_clamp (x); 3954 3955 /* if x >= n then subtract and reduce again 3956 * Each successive "recursion" makes the input smaller and smaller. 3957 */ 3958 if (mp_cmp_mag (x, n) != MP_LT) { 3959 s_mp_sub(x, n, x); 3960 goto top; 3961 } 3962 return MP_OKAY; 3963 } 3964 3965 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */ 3966 /* Revision: 1.1.1.1 $ */ 3967 /* Date: 2011/03/12 22:58:18 $ */ 3968 3969 /* determines the setup value */ 3970 static int 3971 mp_reduce_2k_setup(mp_int *a, mp_digit *d) 3972 { 3973 int res, p; 3974 mp_int tmp; 3975 3976 if ((res = mp_init(&tmp)) != MP_OKAY) { 3977 return res; 3978 } 3979 3980 p = mp_count_bits(a); 3981 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { 3982 mp_clear(&tmp); 3983 return res; 3984 } 3985 3986 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) { 3987 mp_clear(&tmp); 3988 return res; 3989 } 3990 3991 *d = tmp.dp[0]; 3992 mp_clear(&tmp); 3993 return MP_OKAY; 3994 } 3995 3996 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */ 3997 /* Revision: 1.1.1.1 $ */ 3998 /* Date: 2011/03/12 22:58:18 $ */ 3999 4000 /* reduces a modulo n where n is of the form 2**p - d */ 4001 static int 4002 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) 4003 { 4004 mp_int q; 4005 int p, res; 4006 4007 if ((res = mp_init(&q)) != MP_OKAY) { 4008 return res; 4009 } 4010 4011 p = mp_count_bits(n); 4012 top: 4013 /* q = a/2**p, a = a mod 2**p */ 4014 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { 4015 goto ERR; 4016 } 4017 4018 if (d != 1) { 4019 /* q = q * d */ 4020 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { 4021 goto ERR; 4022 } 4023 } 4024 4025 /* a = a + q */ 4026 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { 4027 goto ERR; 4028 } 4029 4030 if (mp_cmp_mag(a, n) != MP_LT) { 4031 s_mp_sub(a, n, a); 4032 goto top; 4033 } 4034 4035 ERR: 4036 mp_clear(&q); 4037 return res; 4038 } 4039 4040 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */ 4041 /* Revision: 1.1.1.1 $ */ 4042 /* Date: 2011/03/12 22:58:18 $ */ 4043 4044 /* 4045 * shifts with subtractions when the result is greater than b. 4046 * 4047 * The method is slightly modified to shift B unconditionally upto just under 4048 * the leading bit of b. This saves alot of multiple precision shifting. 4049 */ 4050 static int 4051 mp_montgomery_calc_normalization (mp_int * a, mp_int * b) 4052 { 4053 int x, bits, res; 4054 4055 /* how many bits of last digit does b use */ 4056 bits = mp_count_bits (b) % DIGIT_BIT; 4057 4058 if (b->used > 1) { 4059 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { 4060 return res; 4061 } 4062 } else { 4063 mp_set(a, 1); 4064 bits = 1; 4065 } 4066 4067 4068 /* now compute C = A * B mod b */ 4069 for (x = bits - 1; x < (int)DIGIT_BIT; x++) { 4070 if ((res = mp_mul_2 (a, a)) != MP_OKAY) { 4071 return res; 4072 } 4073 if (mp_cmp_mag (a, b) != MP_LT) { 4074 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) { 4075 return res; 4076 } 4077 } 4078 } 4079 4080 return MP_OKAY; 4081 } 4082 4083 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */ 4084 /* Revision: 1.1.1.1 $ */ 4085 /* Date: 2011/03/12 22:58:18 $ */ 4086 4087 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 4088 * 4089 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. 4090 * The value of k changes based on the size of the exponent. 4091 * 4092 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] 4093 */ 4094 4095 #define TAB_SIZE 256 4096 4097 static int 4098 mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 4099 { 4100 mp_int M[TAB_SIZE], res; 4101 mp_digit buf, mp; 4102 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 4103 4104 /* use a pointer to the reduction algorithm. This allows us to use 4105 * one of many reduction algorithms without modding the guts of 4106 * the code with if statements everywhere. 4107 */ 4108 int (*redux)(mp_int*,mp_int*,mp_digit); 4109 4110 /* find window size */ 4111 x = mp_count_bits (X); 4112 if (x <= 7) { 4113 winsize = 2; 4114 } else if (x <= 36) { 4115 winsize = 3; 4116 } else if (x <= 140) { 4117 winsize = 4; 4118 } else if (x <= 450) { 4119 winsize = 5; 4120 } else if (x <= 1303) { 4121 winsize = 6; 4122 } else if (x <= 3529) { 4123 winsize = 7; 4124 } else { 4125 winsize = 8; 4126 } 4127 4128 /* init M array */ 4129 /* init first cell */ 4130 if ((err = mp_init(&M[1])) != MP_OKAY) { 4131 return err; 4132 } 4133 4134 /* now init the second half of the array */ 4135 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4136 if ((err = mp_init(&M[x])) != MP_OKAY) { 4137 for (y = 1<<(winsize-1); y < x; y++) { 4138 mp_clear (&M[y]); 4139 } 4140 mp_clear(&M[1]); 4141 return err; 4142 } 4143 } 4144 4145 /* determine and setup reduction code */ 4146 if (redmode == 0) { 4147 /* now setup montgomery */ 4148 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { 4149 goto LBL_M; 4150 } 4151 4152 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ 4153 if (((unsigned)(P->used * 2 + 1) < MP_WARRAY) && 4154 P->used < (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 4155 redux = fast_mp_montgomery_reduce; 4156 } else 4157 { 4158 /* use slower baseline Montgomery method */ 4159 redux = mp_montgomery_reduce; 4160 } 4161 } else if (redmode == 1) { 4162 /* setup DR reduction for moduli of the form B**k - b */ 4163 mp_dr_setup(P, &mp); 4164 redux = mp_dr_reduce; 4165 } else { 4166 /* setup DR reduction for moduli of the form 2**k - b */ 4167 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { 4168 goto LBL_M; 4169 } 4170 redux = mp_reduce_2k; 4171 } 4172 4173 /* setup result */ 4174 if ((err = mp_init (&res)) != MP_OKAY) { 4175 goto LBL_M; 4176 } 4177 4178 /* create M table 4179 * 4180 4181 * 4182 * The first half of the table is not computed though accept for M[0] and M[1] 4183 */ 4184 4185 if (redmode == 0) { 4186 /* now we need R mod m */ 4187 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { 4188 goto LBL_RES; 4189 } 4190 4191 /* now set M[1] to G * R mod m */ 4192 if ((err = mp_mulmod (&M[1], G, &res, P)) != MP_OKAY) { 4193 goto LBL_RES; 4194 } 4195 } else { 4196 mp_set(&res, 1); 4197 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { 4198 goto LBL_RES; 4199 } 4200 } 4201 4202 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ 4203 if ((err = mp_copy ( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 4204 goto LBL_RES; 4205 } 4206 4207 for (x = 0; x < (winsize - 1); x++) { 4208 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { 4209 goto LBL_RES; 4210 } 4211 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { 4212 goto LBL_RES; 4213 } 4214 } 4215 4216 /* create upper table */ 4217 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 4218 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 4219 goto LBL_RES; 4220 } 4221 if ((err = redux (&M[x], P, mp)) != MP_OKAY) { 4222 goto LBL_RES; 4223 } 4224 } 4225 4226 /* set initial mode and bit cnt */ 4227 mode = 0; 4228 bitcnt = 1; 4229 buf = 0; 4230 digidx = X->used - 1; 4231 bitcpy = 0; 4232 bitbuf = 0; 4233 4234 for (;;) { 4235 /* grab next digit as required */ 4236 if (--bitcnt == 0) { 4237 /* if digidx == -1 we are out of digits so break */ 4238 if (digidx == -1) { 4239 break; 4240 } 4241 /* read next digit and reset bitcnt */ 4242 buf = X->dp[digidx--]; 4243 bitcnt = (int)DIGIT_BIT; 4244 } 4245 4246 /* grab the next msb from the exponent */ 4247 y = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1; 4248 buf <<= (mp_digit)1; 4249 4250 /* if the bit is zero and mode == 0 then we ignore it 4251 * These represent the leading zero bits before the first 1 bit 4252 * in the exponent. Technically this opt is not required but it 4253 * does lower the # of trivial squaring/reductions used 4254 */ 4255 if (mode == 0 && y == 0) { 4256 continue; 4257 } 4258 4259 /* if the bit is zero and mode == 1 then we square */ 4260 if (mode == 1 && y == 0) { 4261 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 4262 goto LBL_RES; 4263 } 4264 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4265 goto LBL_RES; 4266 } 4267 continue; 4268 } 4269 4270 /* else we add it to the window */ 4271 bitbuf |= (y << (winsize - ++bitcpy)); 4272 mode = 2; 4273 4274 if (bitcpy == winsize) { 4275 /* ok window is filled so square as required and multiply */ 4276 /* square first */ 4277 for (x = 0; x < winsize; x++) { 4278 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 4279 goto LBL_RES; 4280 } 4281 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4282 goto LBL_RES; 4283 } 4284 } 4285 4286 /* then multiply */ 4287 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 4288 goto LBL_RES; 4289 } 4290 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4291 goto LBL_RES; 4292 } 4293 4294 /* empty window and reset */ 4295 bitcpy = 0; 4296 bitbuf = 0; 4297 mode = 1; 4298 } 4299 } 4300 4301 /* if bits remain then square/multiply */ 4302 if (mode == 2 && bitcpy > 0) { 4303 /* square then multiply if the bit is set */ 4304 for (x = 0; x < bitcpy; x++) { 4305 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 4306 goto LBL_RES; 4307 } 4308 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4309 goto LBL_RES; 4310 } 4311 4312 /* get next bit of the window */ 4313 bitbuf <<= 1; 4314 if ((bitbuf & (1 << winsize)) != 0) { 4315 /* then multiply */ 4316 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 4317 goto LBL_RES; 4318 } 4319 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4320 goto LBL_RES; 4321 } 4322 } 4323 } 4324 } 4325 4326 if (redmode == 0) { 4327 /* fixup result if Montgomery reduction is used 4328 * recall that any value in a Montgomery system is 4329 * actually multiplied by R mod n. So we have 4330 * to reduce one more time to cancel out the factor 4331 * of R. 4332 */ 4333 if ((err = redux(&res, P, mp)) != MP_OKAY) { 4334 goto LBL_RES; 4335 } 4336 } 4337 4338 /* swap res with Y */ 4339 mp_exch (&res, Y); 4340 err = MP_OKAY; 4341 LBL_RES:mp_clear (&res); 4342 LBL_M: 4343 mp_clear(&M[1]); 4344 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4345 mp_clear (&M[x]); 4346 } 4347 return err; 4348 } 4349 4350 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_exptmod_fast.c,v $ */ 4351 /* Revision: 1.4 $ */ 4352 /* Date: 2011/03/18 16:43:04 $ */ 4353 4354 /* this is a shell function that calls either the normal or Montgomery 4355 * exptmod functions. Originally the call to the montgomery code was 4356 * embedded in the normal function but that wasted alot of stack space 4357 * for nothing (since 99% of the time the Montgomery code would be called) 4358 */ 4359 static int 4360 mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int *Y) 4361 { 4362 int dr; 4363 4364 /* modulus P must be positive */ 4365 if (P->sign == MP_NEG) { 4366 return MP_VAL; 4367 } 4368 4369 /* if exponent X is negative we have to recurse */ 4370 if (X->sign == MP_NEG) { 4371 mp_int tmpG, tmpX; 4372 int err; 4373 4374 /* first compute 1/G mod P */ 4375 if ((err = mp_init(&tmpG)) != MP_OKAY) { 4376 return err; 4377 } 4378 if ((err = mp_invmod(&tmpG, G, P)) != MP_OKAY) { 4379 mp_clear(&tmpG); 4380 return err; 4381 } 4382 4383 /* now get |X| */ 4384 if ((err = mp_init(&tmpX)) != MP_OKAY) { 4385 mp_clear(&tmpG); 4386 return err; 4387 } 4388 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) { 4389 mp_clear_multi(&tmpG, &tmpX, NULL); 4390 return err; 4391 } 4392 4393 /* and now compute (1/G)**|X| instead of G**X [X < 0] */ 4394 err = mp_exptmod(&tmpG, &tmpX, P, Y); 4395 mp_clear_multi(&tmpG, &tmpX, NULL); 4396 return err; 4397 } 4398 4399 /* modified diminished radix reduction */ 4400 if (mp_reduce_is_2k_l(P) == MP_YES) { 4401 return s_mp_exptmod(G, X, P, Y, 1); 4402 } 4403 4404 /* is it a DR modulus? */ 4405 dr = mp_dr_is_modulus(P); 4406 4407 /* if not, is it a unrestricted DR modulus? */ 4408 if (dr == 0) { 4409 dr = mp_reduce_is_2k(P) << 1; 4410 } 4411 4412 /* if the modulus is odd or dr != 0 use the montgomery method */ 4413 if (BN_is_odd (P) == 1 || dr != 0) { 4414 return mp_exptmod_fast (G, X, P, Y, dr); 4415 } else { 4416 /* otherwise use the generic Barrett reduction technique */ 4417 return s_mp_exptmod (G, X, P, Y, 0); 4418 } 4419 } 4420 4421 /* reverse an array, used for radix code */ 4422 static void 4423 bn_reverse(unsigned char *s, int len) 4424 { 4425 int ix, iy; 4426 unsigned char t; 4427 4428 ix = 0; 4429 iy = len - 1; 4430 while (ix < iy) { 4431 t = s[ix]; 4432 s[ix] = s[iy]; 4433 s[iy] = t; 4434 ++ix; 4435 --iy; 4436 } 4437 } 4438 4439 static int 4440 s_is_power_of_two(mp_digit b, int *p) 4441 { 4442 int x; 4443 4444 /* fast return if no power of two */ 4445 if ((b==0) || (b & (b-1))) { 4446 return 0; 4447 } 4448 4449 for (x = 0; x < DIGIT_BIT; x++) { 4450 if (b == (((mp_digit)1)<<x)) { 4451 *p = x; 4452 return 1; 4453 } 4454 } 4455 return 0; 4456 } 4457 4458 /* single digit division (based on routine from MPI) */ 4459 static int 4460 mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d) 4461 { 4462 mp_int q; 4463 mp_word w; 4464 mp_digit t; 4465 int res, ix; 4466 4467 /* cannot divide by zero */ 4468 if (b == 0) { 4469 return MP_VAL; 4470 } 4471 4472 /* quick outs */ 4473 if (b == 1 || mp_iszero(a) == 1) { 4474 if (d != NULL) { 4475 *d = 0; 4476 } 4477 if (c != NULL) { 4478 return mp_copy(a, c); 4479 } 4480 return MP_OKAY; 4481 } 4482 4483 /* power of two ? */ 4484 if (s_is_power_of_two(b, &ix) == 1) { 4485 if (d != NULL) { 4486 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1); 4487 } 4488 if (c != NULL) { 4489 return mp_div_2d(a, ix, c, NULL); 4490 } 4491 return MP_OKAY; 4492 } 4493 4494 #ifdef BN_MP_DIV_3_C 4495 /* three? */ 4496 if (b == 3) { 4497 return mp_div_3(a, c, d); 4498 } 4499 #endif 4500 4501 /* no easy answer [c'est la vie]. Just division */ 4502 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 4503 return res; 4504 } 4505 4506 q.used = a->used; 4507 q.sign = a->sign; 4508 w = 0; 4509 for (ix = a->used - 1; ix >= 0; ix--) { 4510 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 4511 4512 if (w >= b) { 4513 t = (mp_digit)(w / b); 4514 w -= ((mp_word)t) * ((mp_word)b); 4515 } else { 4516 t = 0; 4517 } 4518 q.dp[ix] = (mp_digit)t; 4519 } 4520 4521 if (d != NULL) { 4522 *d = (mp_digit)w; 4523 } 4524 4525 if (c != NULL) { 4526 mp_clamp(&q); 4527 mp_exch(&q, c); 4528 } 4529 mp_clear(&q); 4530 4531 return res; 4532 } 4533 4534 static int 4535 mp_mod_d(mp_int *a, mp_digit b, mp_digit *c) 4536 { 4537 return mp_div_d(a, b, NULL, c); 4538 } 4539 4540 static const mp_digit ltm_prime_tab[] = { 4541 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 4542 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, 4543 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, 4544 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 4545 #ifndef MP_8BIT 4546 0x0083, 4547 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, 4548 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, 4549 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, 4550 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, 4551 4552 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, 4553 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, 4554 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, 4555 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, 4556 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, 4557 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, 4558 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, 4559 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, 4560 4561 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, 4562 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, 4563 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, 4564 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, 4565 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, 4566 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, 4567 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, 4568 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, 4569 4570 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, 4571 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, 4572 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, 4573 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, 4574 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, 4575 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, 4576 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, 4577 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 4578 #endif 4579 }; 4580 4581 #define PRIME_SIZE __arraycount(ltm_prime_tab) 4582 4583 static int 4584 mp_prime_is_divisible(mp_int *a, int *result) 4585 { 4586 int err, ix; 4587 mp_digit res; 4588 4589 /* default to not */ 4590 *result = MP_NO; 4591 4592 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4593 /* what is a mod LBL_prime_tab[ix] */ 4594 if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) { 4595 return err; 4596 } 4597 4598 /* is the residue zero? */ 4599 if (res == 0) { 4600 *result = MP_YES; 4601 return MP_OKAY; 4602 } 4603 } 4604 4605 return MP_OKAY; 4606 } 4607 4608 /* single digit addition */ 4609 static int 4610 mp_add_d(mp_int *a, mp_digit b, mp_int *c) 4611 { 4612 int res, ix, oldused; 4613 mp_digit *tmpa, *tmpc, mu; 4614 4615 /* grow c as required */ 4616 if (c->alloc < a->used + 1) { 4617 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4618 return res; 4619 } 4620 } 4621 4622 /* if a is negative and |a| >= b, call c = |a| - b */ 4623 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { 4624 /* temporarily fix sign of a */ 4625 a->sign = MP_ZPOS; 4626 4627 /* c = |a| - b */ 4628 res = mp_sub_d(a, b, c); 4629 4630 /* fix sign */ 4631 a->sign = c->sign = MP_NEG; 4632 4633 /* clamp */ 4634 mp_clamp(c); 4635 4636 return res; 4637 } 4638 4639 /* old number of used digits in c */ 4640 oldused = c->used; 4641 4642 /* sign always positive */ 4643 c->sign = MP_ZPOS; 4644 4645 /* source alias */ 4646 tmpa = a->dp; 4647 4648 /* destination alias */ 4649 tmpc = c->dp; 4650 4651 /* if a is positive */ 4652 if (a->sign == MP_ZPOS) { 4653 /* add digit, after this we're propagating 4654 * the carry. 4655 */ 4656 *tmpc = *tmpa++ + b; 4657 mu = *tmpc >> DIGIT_BIT; 4658 *tmpc++ &= MP_MASK; 4659 4660 /* now handle rest of the digits */ 4661 for (ix = 1; ix < a->used; ix++) { 4662 *tmpc = *tmpa++ + mu; 4663 mu = *tmpc >> DIGIT_BIT; 4664 *tmpc++ &= MP_MASK; 4665 } 4666 /* set final carry */ 4667 ix++; 4668 *tmpc++ = mu; 4669 4670 /* setup size */ 4671 c->used = a->used + 1; 4672 } else { 4673 /* a was negative and |a| < b */ 4674 c->used = 1; 4675 4676 /* the result is a single digit */ 4677 if (a->used == 1) { 4678 *tmpc++ = b - a->dp[0]; 4679 } else { 4680 *tmpc++ = b; 4681 } 4682 4683 /* setup count so the clearing of oldused 4684 * can fall through correctly 4685 */ 4686 ix = 1; 4687 } 4688 4689 /* now zero to oldused */ 4690 while (ix++ < oldused) { 4691 *tmpc++ = 0; 4692 } 4693 mp_clamp(c); 4694 4695 return MP_OKAY; 4696 } 4697 4698 /* single digit subtraction */ 4699 static int 4700 mp_sub_d(mp_int *a, mp_digit b, mp_int *c) 4701 { 4702 mp_digit *tmpa, *tmpc, mu; 4703 int res, ix, oldused; 4704 4705 /* grow c as required */ 4706 if (c->alloc < a->used + 1) { 4707 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4708 return res; 4709 } 4710 } 4711 4712 /* if a is negative just do an unsigned 4713 * addition [with fudged signs] 4714 */ 4715 if (a->sign == MP_NEG) { 4716 a->sign = MP_ZPOS; 4717 res = mp_add_d(a, b, c); 4718 a->sign = c->sign = MP_NEG; 4719 4720 /* clamp */ 4721 mp_clamp(c); 4722 4723 return res; 4724 } 4725 4726 /* setup regs */ 4727 oldused = c->used; 4728 tmpa = a->dp; 4729 tmpc = c->dp; 4730 4731 /* if a <= b simply fix the single digit */ 4732 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { 4733 if (a->used == 1) { 4734 *tmpc++ = b - *tmpa; 4735 } else { 4736 *tmpc++ = b; 4737 } 4738 ix = 1; 4739 4740 /* negative/1digit */ 4741 c->sign = MP_NEG; 4742 c->used = 1; 4743 } else { 4744 /* positive/size */ 4745 c->sign = MP_ZPOS; 4746 c->used = a->used; 4747 4748 /* subtract first digit */ 4749 *tmpc = *tmpa++ - b; 4750 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4751 *tmpc++ &= MP_MASK; 4752 4753 /* handle rest of the digits */ 4754 for (ix = 1; ix < a->used; ix++) { 4755 *tmpc = *tmpa++ - mu; 4756 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4757 *tmpc++ &= MP_MASK; 4758 } 4759 } 4760 4761 /* zero excess digits */ 4762 while (ix++ < oldused) { 4763 *tmpc++ = 0; 4764 } 4765 mp_clamp(c); 4766 return MP_OKAY; 4767 } 4768 4769 static const int lnz[16] = { 4770 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 4771 }; 4772 4773 /* Counts the number of lsbs which are zero before the first zero bit */ 4774 static int 4775 mp_cnt_lsb(mp_int *a) 4776 { 4777 int x; 4778 mp_digit q, qq; 4779 4780 /* easy out */ 4781 if (mp_iszero(a) == 1) { 4782 return 0; 4783 } 4784 4785 /* scan lower digits until non-zero */ 4786 for (x = 0; x < a->used && a->dp[x] == 0; x++); 4787 q = a->dp[x]; 4788 x *= DIGIT_BIT; 4789 4790 /* now scan this digit until a 1 is found */ 4791 if ((q & 1) == 0) { 4792 do { 4793 qq = q & 15; 4794 /* LINTED previous op ensures range of qq */ 4795 x += lnz[qq]; 4796 q >>= 4; 4797 } while (qq == 0); 4798 } 4799 return x; 4800 } 4801 4802 /* c = a * a (mod b) */ 4803 static int 4804 mp_sqrmod(mp_int *a, mp_int *b, mp_int *c) 4805 { 4806 int res; 4807 mp_int t; 4808 4809 if ((res = mp_init (&t)) != MP_OKAY) { 4810 return res; 4811 } 4812 4813 if ((res = mp_sqr (a, &t)) != MP_OKAY) { 4814 mp_clear (&t); 4815 return res; 4816 } 4817 res = mp_mod (&t, b, c); 4818 mp_clear (&t); 4819 return res; 4820 } 4821 static int 4822 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result) 4823 { 4824 mp_int n1, y, r; 4825 int s, j, err; 4826 4827 /* default */ 4828 *result = MP_NO; 4829 4830 /* ensure b > 1 */ 4831 if (mp_cmp_d(b, 1) != MP_GT) { 4832 return MP_VAL; 4833 } 4834 4835 /* get n1 = a - 1 */ 4836 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) { 4837 return err; 4838 } 4839 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) { 4840 goto LBL_N1; 4841 } 4842 4843 /* set 2**s * r = n1 */ 4844 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) { 4845 goto LBL_N1; 4846 } 4847 4848 /* count the number of least significant bits 4849 * which are zero 4850 */ 4851 s = mp_cnt_lsb(&r); 4852 4853 /* now divide n - 1 by 2**s */ 4854 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) { 4855 goto LBL_R; 4856 } 4857 4858 /* compute y = b**r mod a */ 4859 if ((err = mp_init (&y)) != MP_OKAY) { 4860 goto LBL_R; 4861 } 4862 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) { 4863 goto LBL_Y; 4864 } 4865 4866 /* if y != 1 and y != n1 do */ 4867 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) { 4868 j = 1; 4869 /* while j <= s-1 and y != n1 */ 4870 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) { 4871 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) { 4872 goto LBL_Y; 4873 } 4874 4875 /* if y == 1 then composite */ 4876 if (mp_cmp_d (&y, 1) == MP_EQ) { 4877 goto LBL_Y; 4878 } 4879 4880 ++j; 4881 } 4882 4883 /* if y != n1 then composite */ 4884 if (mp_cmp (&y, &n1) != MP_EQ) { 4885 goto LBL_Y; 4886 } 4887 } 4888 4889 /* probably prime now */ 4890 *result = MP_YES; 4891 LBL_Y:mp_clear (&y); 4892 LBL_R:mp_clear (&r); 4893 LBL_N1:mp_clear (&n1); 4894 return err; 4895 } 4896 4897 /* performs a variable number of rounds of Miller-Rabin 4898 * 4899 * Probability of error after t rounds is no more than 4900 4901 * 4902 * Sets result to 1 if probably prime, 0 otherwise 4903 */ 4904 static int 4905 mp_prime_is_prime(mp_int *a, int t, int *result) 4906 { 4907 mp_int b; 4908 int ix, err, res; 4909 4910 /* default to no */ 4911 *result = MP_NO; 4912 4913 /* valid value of t? */ 4914 if (t <= 0 || t > (int)PRIME_SIZE) { 4915 return MP_VAL; 4916 } 4917 4918 /* is the input equal to one of the primes in the table? */ 4919 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4920 if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) { 4921 *result = 1; 4922 return MP_OKAY; 4923 } 4924 } 4925 4926 /* first perform trial division */ 4927 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) { 4928 return err; 4929 } 4930 4931 /* return if it was trivially divisible */ 4932 if (res == MP_YES) { 4933 return MP_OKAY; 4934 } 4935 4936 /* now perform the miller-rabin rounds */ 4937 if ((err = mp_init (&b)) != MP_OKAY) { 4938 return err; 4939 } 4940 4941 for (ix = 0; ix < t; ix++) { 4942 /* set the prime */ 4943 mp_set (&b, ltm_prime_tab[ix]); 4944 4945 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) { 4946 goto LBL_B; 4947 } 4948 4949 if (res == MP_NO) { 4950 goto LBL_B; 4951 } 4952 } 4953 4954 /* passed the test */ 4955 *result = MP_YES; 4956 LBL_B:mp_clear (&b); 4957 return err; 4958 } 4959 4960 /* returns size of ASCII reprensentation */ 4961 static int 4962 mp_radix_size (mp_int *a, int radix, int *size) 4963 { 4964 int res, digs; 4965 mp_int t; 4966 mp_digit d; 4967 4968 *size = 0; 4969 4970 /* special case for binary */ 4971 if (radix == 2) { 4972 *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1; 4973 return MP_OKAY; 4974 } 4975 4976 /* make sure the radix is in range */ 4977 if (radix < 2 || radix > 64) { 4978 return MP_VAL; 4979 } 4980 4981 if (mp_iszero(a) == MP_YES) { 4982 *size = 2; 4983 return MP_OKAY; 4984 } 4985 4986 /* digs is the digit count */ 4987 digs = 0; 4988 4989 /* if it's negative add one for the sign */ 4990 if (a->sign == MP_NEG) { 4991 ++digs; 4992 } 4993 4994 /* init a copy of the input */ 4995 if ((res = mp_init_copy (&t, a)) != MP_OKAY) { 4996 return res; 4997 } 4998 4999 /* force temp to positive */ 5000 t.sign = MP_ZPOS; 5001 5002 /* fetch out all of the digits */ 5003 while (mp_iszero (&t) == MP_NO) { 5004 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 5005 mp_clear (&t); 5006 return res; 5007 } 5008 ++digs; 5009 } 5010 mp_clear (&t); 5011 5012 /* return digs + 1, the 1 is for the NULL byte that would be required. */ 5013 *size = digs + 1; 5014 return MP_OKAY; 5015 } 5016 5017 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; 5018 5019 /* stores a bignum as a ASCII string in a given radix (2..64) 5020 * 5021 * Stores upto maxlen-1 chars and always a NULL byte 5022 */ 5023 static int 5024 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) 5025 { 5026 int res, digs; 5027 mp_int t; 5028 mp_digit d; 5029 char *_s = str; 5030 5031 /* check range of the maxlen, radix */ 5032 if (maxlen < 2 || radix < 2 || radix > 64) { 5033 return MP_VAL; 5034 } 5035 5036 /* quick out if its zero */ 5037 if (mp_iszero(a) == MP_YES) { 5038 *str++ = '0'; 5039 *str = '\0'; 5040 return MP_OKAY; 5041 } 5042 5043 if ((res = mp_init_copy (&t, a)) != MP_OKAY) { 5044 return res; 5045 } 5046 5047 /* if it is negative output a - */ 5048 if (t.sign == MP_NEG) { 5049 /* we have to reverse our digits later... but not the - sign!! */ 5050 ++_s; 5051 5052 /* store the flag and mark the number as positive */ 5053 *str++ = '-'; 5054 t.sign = MP_ZPOS; 5055 5056 /* subtract a char */ 5057 --maxlen; 5058 } 5059 5060 digs = 0; 5061 while (mp_iszero (&t) == 0) { 5062 if (--maxlen < 1) { 5063 /* no more room */ 5064 break; 5065 } 5066 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 5067 mp_clear (&t); 5068 return res; 5069 } 5070 /* LINTED -- radix' range is checked above, limits d's range */ 5071 *str++ = mp_s_rmap[d]; 5072 ++digs; 5073 } 5074 5075 /* reverse the digits of the string. In this case _s points 5076 * to the first digit [exluding the sign] of the number 5077 */ 5078 bn_reverse ((unsigned char *)_s, digs); 5079 5080 /* append a NULL so the string is properly terminated */ 5081 *str = '\0'; 5082 5083 mp_clear (&t); 5084 return MP_OKAY; 5085 } 5086 5087 static char * 5088 formatbn(const BIGNUM *a, const int radix) 5089 { 5090 char *s; 5091 int len; 5092 5093 if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) { 5094 return NULL; 5095 } 5096 if ((s = netpgp_allocate(1, (size_t)len)) != NULL) { 5097 if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) { 5098 netpgp_deallocate(s, (size_t)len); 5099 return NULL; 5100 } 5101 } 5102 return s; 5103 } 5104 5105 static int 5106 mp_getradix_num(mp_int *a, int radix, char *s) 5107 { 5108 int err, ch, neg, y; 5109 5110 /* clear a */ 5111 mp_zero(a); 5112 5113 /* if first digit is - then set negative */ 5114 if ((ch = *s++) == '-') { 5115 neg = MP_NEG; 5116 ch = *s++; 5117 } else { 5118 neg = MP_ZPOS; 5119 } 5120 5121 for (;;) { 5122 /* find y in the radix map */ 5123 for (y = 0; y < radix; y++) { 5124 if (mp_s_rmap[y] == ch) { 5125 break; 5126 } 5127 } 5128 if (y == radix) { 5129 break; 5130 } 5131 5132 /* shift up and add */ 5133 if ((err = mp_mul_d(a, radix, a)) != MP_OKAY) { 5134 return err; 5135 } 5136 if ((err = mp_add_d(a, y, a)) != MP_OKAY) { 5137 return err; 5138 } 5139 5140 ch = *s++; 5141 } 5142 if (mp_cmp_d(a, 0) != MP_EQ) { 5143 a->sign = neg; 5144 } 5145 5146 return MP_OKAY; 5147 } 5148 5149 static int 5150 getbn(BIGNUM **a, const char *str, int radix) 5151 { 5152 int len; 5153 5154 if (a == NULL || str == NULL || (*a = BN_new()) == NULL) { 5155 return 0; 5156 } 5157 if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) { 5158 return 0; 5159 } 5160 mp_radix_size(__UNCONST(a), radix, &len); 5161 return len - 1; 5162 } 5163 5164 /* d = a - b (mod c) */ 5165 static int 5166 mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d) 5167 { 5168 int res; 5169 mp_int t; 5170 5171 5172 if ((res = mp_init (&t)) != MP_OKAY) { 5173 return res; 5174 } 5175 5176 if ((res = mp_sub (a, b, &t)) != MP_OKAY) { 5177 mp_clear (&t); 5178 return res; 5179 } 5180 res = mp_mod (&t, c, d); 5181 mp_clear (&t); 5182 return res; 5183 } 5184 5185 /**************************************************************************/ 5186 5187 /* BIGNUM emulation layer */ 5188 5189 /* essentiually, these are just wrappers around the libtommath functions */ 5190 /* usually the order of args changes */ 5191 /* the BIGNUM API tends to have more const poisoning */ 5192 /* these wrappers also check the arguments passed for sanity */ 5193 5194 BIGNUM * 5195 BN_bin2bn(const uint8_t *data, int len, BIGNUM *ret) 5196 { 5197 if (data == NULL) { 5198 return BN_new(); 5199 } 5200 if (ret == NULL) { 5201 ret = BN_new(); 5202 } 5203 return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL; 5204 } 5205 5206 /* store in unsigned [big endian] format */ 5207 int 5208 BN_bn2bin(const BIGNUM *a, unsigned char *b) 5209 { 5210 BIGNUM t; 5211 int x; 5212 5213 if (a == NULL || b == NULL) { 5214 return -1; 5215 } 5216 if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) { 5217 return -1; 5218 } 5219 for (x = 0; !BN_is_zero(&t) ; ) { 5220 b[x++] = (unsigned char) (t.dp[0] & 0xff); 5221 if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY) { 5222 mp_clear(&t); 5223 return -1; 5224 } 5225 } 5226 bn_reverse(b, x); 5227 mp_clear(&t); 5228 return x; 5229 } 5230 5231 void 5232 BN_init(BIGNUM *a) 5233 { 5234 if (a != NULL) { 5235 mp_init(a); 5236 } 5237 } 5238 5239 BIGNUM * 5240 BN_new(void) 5241 { 5242 BIGNUM *a; 5243 5244 if ((a = netpgp_allocate(1, sizeof(*a))) != NULL) { 5245 mp_init(a); 5246 } 5247 return a; 5248 } 5249 5250 /* copy, b = a */ 5251 int 5252 BN_copy(BIGNUM *b, const BIGNUM *a) 5253 { 5254 if (a == NULL || b == NULL) { 5255 return MP_VAL; 5256 } 5257 return mp_copy(__UNCONST(a), b); 5258 } 5259 5260 BIGNUM * 5261 BN_dup(const BIGNUM *a) 5262 { 5263 BIGNUM *ret; 5264 5265 if (a == NULL) { 5266 return NULL; 5267 } 5268 if ((ret = BN_new()) != NULL) { 5269 BN_copy(ret, a); 5270 } 5271 return ret; 5272 } 5273 5274 void 5275 BN_swap(BIGNUM *a, BIGNUM *b) 5276 { 5277 if (a && b) { 5278 mp_exch(a, b); 5279 } 5280 } 5281 5282 int 5283 BN_lshift(BIGNUM *r, const BIGNUM *a, int n) 5284 { 5285 if (r == NULL || a == NULL || n < 0) { 5286 return 0; 5287 } 5288 BN_copy(r, a); 5289 return mp_lshd(r, n) == MP_OKAY; 5290 } 5291 5292 int 5293 BN_lshift1(BIGNUM *r, BIGNUM *a) 5294 { 5295 if (r == NULL || a == NULL) { 5296 return 0; 5297 } 5298 BN_copy(r, a); 5299 return mp_lshd(r, 1) == MP_OKAY; 5300 } 5301 5302 int 5303 BN_rshift(BIGNUM *r, const BIGNUM *a, int n) 5304 { 5305 if (r == NULL || a == NULL || n < 0) { 5306 return MP_VAL; 5307 } 5308 BN_copy(r, a); 5309 return mp_rshd(r, n) == MP_OKAY; 5310 } 5311 5312 int 5313 BN_rshift1(BIGNUM *r, BIGNUM *a) 5314 { 5315 if (r == NULL || a == NULL) { 5316 return 0; 5317 } 5318 BN_copy(r, a); 5319 return mp_rshd(r, 1) == MP_OKAY; 5320 } 5321 5322 int 5323 BN_set_word(BIGNUM *a, BN_ULONG w) 5324 { 5325 if (a == NULL) { 5326 return 0; 5327 } 5328 mp_set(a, w); 5329 return 1; 5330 } 5331 5332 int 5333 BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 5334 { 5335 if (a == NULL || b == NULL || r == NULL) { 5336 return 0; 5337 } 5338 return mp_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5339 } 5340 5341 int 5342 BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 5343 { 5344 if (a == NULL || b == NULL || r == NULL) { 5345 return 0; 5346 } 5347 return mp_sub(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5348 } 5349 5350 int 5351 BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 5352 { 5353 if (a == NULL || b == NULL || r == NULL) { 5354 return 0; 5355 } 5356 USE_ARG(ctx); 5357 return mp_mul(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5358 } 5359 5360 int 5361 BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *a, const BIGNUM *d, BN_CTX *ctx) 5362 { 5363 if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) { 5364 return 0; 5365 } 5366 USE_ARG(ctx); 5367 return mp_div(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY; 5368 } 5369 5370 void 5371 BN_free(BIGNUM *a) 5372 { 5373 if (a) { 5374 mp_clear(a); 5375 } 5376 } 5377 5378 void 5379 BN_clear(BIGNUM *a) 5380 { 5381 if (a) { 5382 mp_clear(a); 5383 } 5384 } 5385 5386 void 5387 BN_clear_free(BIGNUM *a) 5388 { 5389 if (a) { 5390 mp_clear(a); 5391 } 5392 } 5393 5394 int 5395 BN_num_bytes(const BIGNUM *a) 5396 { 5397 if (a == NULL) { 5398 return MP_VAL; 5399 } 5400 return mp_unsigned_bin_size(__UNCONST(a)); 5401 } 5402 5403 int 5404 BN_num_bits(const BIGNUM *a) 5405 { 5406 if (a == NULL) { 5407 return 0; 5408 } 5409 return mp_count_bits(a); 5410 } 5411 5412 void 5413 BN_set_negative(BIGNUM *a, int n) 5414 { 5415 if (a) { 5416 a->sign = (n) ? MP_NEG : 0; 5417 } 5418 } 5419 5420 int 5421 BN_cmp(BIGNUM *a, BIGNUM *b) 5422 { 5423 if (a == NULL || b == NULL) { 5424 return MP_VAL; 5425 } 5426 switch(mp_cmp(a, b)) { 5427 case MP_LT: 5428 return -1; 5429 case MP_GT: 5430 return 1; 5431 case MP_EQ: 5432 default: 5433 return 0; 5434 } 5435 } 5436 5437 int 5438 BN_mod_exp(BIGNUM *Y, BIGNUM *G, BIGNUM *X, BIGNUM *P, BN_CTX *ctx) 5439 { 5440 if (Y == NULL || G == NULL || X == NULL || P == NULL) { 5441 return MP_VAL; 5442 } 5443 USE_ARG(ctx); 5444 return mp_exptmod(G, X, P, Y) == MP_OKAY; 5445 } 5446 5447 BIGNUM * 5448 BN_mod_inverse(BIGNUM *r, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) 5449 { 5450 USE_ARG(ctx); 5451 if (r == NULL || a == NULL || n == NULL) { 5452 return NULL; 5453 } 5454 return (mp_invmod(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL; 5455 } 5456 5457 int 5458 BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 5459 { 5460 USE_ARG(ctx); 5461 if (ret == NULL || a == NULL || b == NULL || m == NULL) { 5462 return 0; 5463 } 5464 return mp_mulmod(ret, a, b, __UNCONST(m)) == MP_OKAY; 5465 } 5466 5467 BN_CTX * 5468 BN_CTX_new(void) 5469 { 5470 return netpgp_allocate(1, sizeof(BN_CTX)); 5471 } 5472 5473 void 5474 BN_CTX_init(BN_CTX *c) 5475 { 5476 if (c != NULL) { 5477 c->arraysize = 15; 5478 if ((c->v = netpgp_allocate(sizeof(*c->v), c->arraysize)) == NULL) { 5479 c->arraysize = 0; 5480 } 5481 } 5482 } 5483 5484 BIGNUM * 5485 BN_CTX_get(BN_CTX *ctx) 5486 { 5487 if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) { 5488 return NULL; 5489 } 5490 return ctx->v[ctx->count++] = BN_new(); 5491 } 5492 5493 void 5494 BN_CTX_start(BN_CTX *ctx) 5495 { 5496 BN_CTX_init(ctx); 5497 } 5498 5499 void 5500 BN_CTX_free(BN_CTX *c) 5501 { 5502 unsigned i; 5503 5504 if (c != NULL && c->v != NULL) { 5505 for (i = 0 ; i < c->count ; i++) { 5506 BN_clear_free(c->v[i]); 5507 } 5508 netpgp_deallocate(c->v, sizeof(*c->v) * c->arraysize); 5509 } 5510 } 5511 5512 void 5513 BN_CTX_end(BN_CTX *ctx) 5514 { 5515 BN_CTX_free(ctx); 5516 } 5517 5518 char * 5519 BN_bn2hex(const BIGNUM *a) 5520 { 5521 return (a == NULL) ? NULL : formatbn(a, 16); 5522 } 5523 5524 char * 5525 BN_bn2dec(const BIGNUM *a) 5526 { 5527 return (a == NULL) ? NULL : formatbn(a, 10); 5528 } 5529 5530 #ifndef _KERNEL 5531 int 5532 BN_print_fp(FILE *fp, const BIGNUM *a) 5533 { 5534 char *s; 5535 int ret; 5536 5537 if (fp == NULL || a == NULL) { 5538 return 0; 5539 } 5540 s = BN_bn2hex(a); 5541 ret = fprintf(fp, "%s", s); 5542 netpgp_deallocate(s, strlen(s) + 1); 5543 return ret; 5544 } 5545 #endif 5546 5547 #ifdef BN_RAND_NEEDED 5548 int 5549 BN_rand(BIGNUM *rnd, int bits, int top, int bottom) 5550 { 5551 uint64_t r; 5552 int digits; 5553 int i; 5554 5555 if (rnd == NULL) { 5556 return 0; 5557 } 5558 mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT)); 5559 for (i = 0 ; i < digits ; i++) { 5560 r = (uint64_t)arc4random(); 5561 r <<= 32; 5562 r |= arc4random(); 5563 rnd->dp[i] = (r & MP_MASK); 5564 } 5565 if (top == 0) { 5566 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5567 } 5568 if (top == 1) { 5569 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5570 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1))); 5571 } 5572 if (bottom) { 5573 rnd->dp[0] |= 0x1; 5574 } 5575 return 1; 5576 } 5577 5578 int 5579 BN_rand_range(BIGNUM *rnd, BIGNUM *range) 5580 { 5581 if (rnd == NULL || range == NULL || BN_is_zero(range)) { 5582 return 0; 5583 } 5584 BN_rand(rnd, BN_num_bits(range), 1, 0); 5585 return mp_mod(rnd, range, rnd) == MP_OKAY; 5586 } 5587 #endif 5588 5589 int 5590 BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int, int, void *), BN_CTX *ctx, void *cb_arg) 5591 { 5592 int primality; 5593 5594 if (a == NULL) { 5595 return 0; 5596 } 5597 USE_ARG(ctx); 5598 USE_ARG(cb_arg); 5599 USE_ARG(callback); 5600 return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0; 5601 } 5602 5603 const BIGNUM * 5604 BN_value_one(void) 5605 { 5606 static mp_digit digit = 1UL; 5607 static const BIGNUM one = { &digit, 1, 1, 0 }; 5608 5609 return &one; 5610 } 5611 5612 int 5613 BN_hex2bn(BIGNUM **a, const char *str) 5614 { 5615 return getbn(a, str, 16); 5616 } 5617 5618 int 5619 BN_dec2bn(BIGNUM **a, const char *str) 5620 { 5621 return getbn(a, str, 10); 5622 } 5623 5624 int 5625 BN_mod_sub(BIGNUM *r, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 5626 { 5627 USE_ARG(ctx); 5628 if (r == NULL || a == NULL || b == NULL || m == NULL) { 5629 return 0; 5630 } 5631 return mp_submod(a, b, __UNCONST(m), r) == MP_OKAY; 5632 } 5633 5634 int 5635 BN_is_bit_set(const BIGNUM *a, int n) 5636 { 5637 if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) { 5638 return 0; 5639 } 5640 return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0; 5641 } 5642