1 /*- 2 * Copyright (c) 2012 Alistair Crooks <agc@NetBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 /* LibTomMath, multiple-precision integer library -- Tom St Denis 26 * 27 * LibTomMath is a library that provides multiple-precision 28 * integer arithmetic as well as number theoretic functionality. 29 * 30 * The library was designed directly after the MPI library by 31 * Michael Fromberger but has been written from scratch with 32 * additional optimizations in place. 33 * 34 * The library is free for all purposes without any express 35 * guarantee it works. 36 * 37 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 38 */ 39 #include <sys/types.h> 40 #include <sys/stat.h> 41 #include <sys/param.h> 42 43 #ifdef _KERNEL 44 # include <sys/kmem.h> 45 #else 46 # include <arpa/inet.h> 47 # include <ctype.h> 48 # include <inttypes.h> 49 # include <stdarg.h> 50 # include <stdio.h> 51 # include <stdlib.h> 52 # include <string.h> 53 # include <time.h> 54 # include <unistd.h> 55 #endif 56 57 #include "misc.h" 58 #include "bn.h" 59 #include "digest.h" 60 61 /**************************************************************************/ 62 63 /* LibTomMath, multiple-precision integer library -- Tom St Denis 64 * 65 * LibTomMath is a library that provides multiple-precision 66 * integer arithmetic as well as number theoretic functionality. 67 * 68 * The library was designed directly after the MPI library by 69 * Michael Fromberger but has been written from scratch with 70 * additional optimizations in place. 71 * 72 * The library is free for all purposes without any express 73 * guarantee it works. 74 * 75 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 76 */ 77 78 #define MP_PREC 32 79 #define DIGIT_BIT 28 80 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1)) 81 82 #define MP_WARRAY /*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1))) 83 84 #define MP_NO 0 85 #define MP_YES 1 86 87 #ifndef USE_ARG 88 #define USE_ARG(x) /*LINTED*/(void)&(x) 89 #endif 90 91 #ifndef __arraycount 92 #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0])) 93 #endif 94 95 #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO) 96 97 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1) 98 99 typedef int mp_err; 100 101 static int mp_mul(mp_int * a, mp_int * b, mp_int * c); 102 static int mp_sqr(mp_int * a, mp_int * b); 103 104 static int mp_sub_d(mp_int *a, mp_digit b, mp_int *c); 105 106 /* set to zero */ 107 static void 108 mp_zero(mp_int *a) 109 { 110 int n; 111 mp_digit *tmp; 112 113 a->sign = MP_ZPOS; 114 a->used = 0; 115 116 tmp = a->dp; 117 /* XXX - memset */ 118 for (n = 0; n < a->alloc; n++) { 119 *tmp++ = 0; 120 } 121 } 122 123 /* grow as required */ 124 static int 125 mp_grow(mp_int *a, int size) 126 { 127 int i; 128 mp_digit *tmp; 129 130 /* if the alloc size is smaller alloc more ram */ 131 if (a->alloc < size) { 132 /* ensure there are always at least MP_PREC digits extra on top */ 133 size += (MP_PREC * 2) - (size % MP_PREC); 134 135 /* reallocate the array a->dp 136 * 137 * We store the return in a temporary variable 138 * in case the operation failed we don't want 139 * to overwrite the dp member of a. 140 */ 141 tmp = realloc(a->dp, sizeof(*tmp) * size); 142 if (tmp == NULL) { 143 /* reallocation failed but "a" is still valid [can be freed] */ 144 return MP_MEM; 145 } 146 147 /* reallocation succeeded so set a->dp */ 148 a->dp = tmp; 149 150 /* zero excess digits */ 151 i = a->alloc; 152 a->alloc = size; 153 for (; i < a->alloc; i++) { 154 a->dp[i] = 0; 155 } 156 } 157 return MP_OKAY; 158 } 159 160 /* shift left a certain amount of digits */ 161 static int 162 mp_lshd (mp_int * a, int b) 163 { 164 int x, res; 165 166 /* if its less than zero return */ 167 if (b <= 0) { 168 return MP_OKAY; 169 } 170 171 /* grow to fit the new digits */ 172 if (a->alloc < a->used + b) { 173 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) { 174 return res; 175 } 176 } 177 178 { 179 mp_digit *top, *bottom; 180 181 /* increment the used by the shift amount then copy upwards */ 182 a->used += b; 183 184 /* top */ 185 top = a->dp + a->used - 1; 186 187 /* base */ 188 bottom = a->dp + a->used - 1 - b; 189 190 /* much like mp_rshd this is implemented using a sliding window 191 * except the window goes the otherway around. Copying from 192 * the bottom to the top. see bn_mp_rshd.c for more info. 193 */ 194 for (x = a->used - 1; x >= b; x--) { 195 *top-- = *bottom--; 196 } 197 198 /* zero the lower digits */ 199 top = a->dp; 200 for (x = 0; x < b; x++) { 201 *top++ = 0; 202 } 203 } 204 return MP_OKAY; 205 } 206 207 /* trim unused digits 208 * 209 * This is used to ensure that leading zero digits are 210 * trimed and the leading "used" digit will be non-zero 211 * Typically very fast. Also fixes the sign if there 212 * are no more leading digits 213 */ 214 static void 215 mp_clamp (mp_int * a) 216 { 217 /* decrease used while the most significant digit is 218 * zero. 219 */ 220 while (a->used > 0 && a->dp[a->used - 1] == 0) { 221 --(a->used); 222 } 223 224 /* reset the sign flag if used == 0 */ 225 if (a->used == 0) { 226 a->sign = MP_ZPOS; 227 } 228 } 229 230 /* copy, b = a */ 231 static int 232 mp_copy(BIGNUM *a, BIGNUM *b) 233 { 234 int res, n; 235 236 /* if dst == src do nothing */ 237 if (a == b) { 238 return MP_OKAY; 239 } 240 if (a == NULL || b == NULL) { 241 return MP_VAL; 242 } 243 244 /* grow dest */ 245 if (b->alloc < a->used) { 246 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 247 return res; 248 } 249 } 250 251 /* zero b and copy the parameters over */ 252 { 253 mp_digit *tmpa, *tmpb; 254 255 /* pointer aliases */ 256 257 /* source */ 258 tmpa = a->dp; 259 260 /* destination */ 261 tmpb = b->dp; 262 263 /* copy all the digits */ 264 for (n = 0; n < a->used; n++) { 265 *tmpb++ = *tmpa++; 266 } 267 268 /* clear high digits */ 269 for (; n < b->used; n++) { 270 *tmpb++ = 0; 271 } 272 } 273 274 /* copy used count and sign */ 275 b->used = a->used; 276 b->sign = a->sign; 277 return MP_OKAY; 278 } 279 280 /* shift left by a certain bit count */ 281 static int 282 mp_mul_2d(mp_int *a, int b, mp_int *c) 283 { 284 mp_digit d; 285 int res; 286 287 /* copy */ 288 if (a != c) { 289 if ((res = mp_copy (a, c)) != MP_OKAY) { 290 return res; 291 } 292 } 293 294 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { 295 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { 296 return res; 297 } 298 } 299 300 /* shift by as many digits in the bit count */ 301 if (b >= (int)DIGIT_BIT) { 302 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) { 303 return res; 304 } 305 } 306 307 /* shift any bit count < DIGIT_BIT */ 308 d = (mp_digit) (b % DIGIT_BIT); 309 if (d != 0) { 310 mp_digit *tmpc, shift, mask, r, rr; 311 int x; 312 313 /* bitmask for carries */ 314 mask = (((mp_digit)1) << d) - 1; 315 316 /* shift for msbs */ 317 shift = DIGIT_BIT - d; 318 319 /* alias */ 320 tmpc = c->dp; 321 322 /* carry */ 323 r = 0; 324 for (x = 0; x < c->used; x++) { 325 /* get the higher bits of the current word */ 326 rr = (*tmpc >> shift) & mask; 327 328 /* shift the current word and OR in the carry */ 329 *tmpc = ((*tmpc << d) | r) & MP_MASK; 330 ++tmpc; 331 332 /* set the carry to the carry bits of the current word */ 333 r = rr; 334 } 335 336 /* set final carry */ 337 if (r != 0) { 338 c->dp[(c->used)++] = r; 339 } 340 } 341 mp_clamp (c); 342 return MP_OKAY; 343 } 344 345 /* reads a unsigned char array, assumes the msb is stored first [big endian] */ 346 static int 347 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c) 348 { 349 int res; 350 351 /* make sure there are at least two digits */ 352 if (a->alloc < 2) { 353 if ((res = mp_grow(a, 2)) != MP_OKAY) { 354 return res; 355 } 356 } 357 358 /* zero the int */ 359 mp_zero (a); 360 361 /* read the bytes in */ 362 while (c-- > 0) { 363 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) { 364 return res; 365 } 366 367 a->dp[0] |= *b++; 368 a->used += 1; 369 } 370 mp_clamp (a); 371 return MP_OKAY; 372 } 373 374 /* returns the number of bits in an int */ 375 static int 376 mp_count_bits(const mp_int *a) 377 { 378 int r; 379 mp_digit q; 380 381 /* shortcut */ 382 if (a->used == 0) { 383 return 0; 384 } 385 386 /* get number of digits and add that */ 387 r = (a->used - 1) * DIGIT_BIT; 388 389 /* take the last digit and count the bits in it */ 390 q = a->dp[a->used - 1]; 391 while (q > ((mp_digit) 0)) { 392 ++r; 393 q >>= ((mp_digit) 1); 394 } 395 return r; 396 } 397 398 /* compare maginitude of two ints (unsigned) */ 399 static int 400 mp_cmp_mag (mp_int * a, mp_int * b) 401 { 402 int n; 403 mp_digit *tmpa, *tmpb; 404 405 /* compare based on # of non-zero digits */ 406 if (a->used > b->used) { 407 return MP_GT; 408 } 409 410 if (a->used < b->used) { 411 return MP_LT; 412 } 413 414 /* alias for a */ 415 tmpa = a->dp + (a->used - 1); 416 417 /* alias for b */ 418 tmpb = b->dp + (a->used - 1); 419 420 /* compare based on digits */ 421 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { 422 if (*tmpa > *tmpb) { 423 return MP_GT; 424 } 425 426 if (*tmpa < *tmpb) { 427 return MP_LT; 428 } 429 } 430 return MP_EQ; 431 } 432 433 /* compare two ints (signed)*/ 434 static int 435 mp_cmp (mp_int * a, mp_int * b) 436 { 437 /* compare based on sign */ 438 if (a->sign != b->sign) { 439 if (a->sign == MP_NEG) { 440 return MP_LT; 441 } else { 442 return MP_GT; 443 } 444 } 445 446 /* compare digits */ 447 if (a->sign == MP_NEG) { 448 /* if negative compare opposite direction */ 449 return mp_cmp_mag(b, a); 450 } else { 451 return mp_cmp_mag(a, b); 452 } 453 } 454 455 /* get the size for an unsigned equivalent */ 456 static int 457 mp_unsigned_bin_size (mp_int * a) 458 { 459 int size = mp_count_bits (a); 460 return (size / 8 + ((size & 7) != 0 ? 1 : 0)); 461 } 462 463 /* init a new mp_int */ 464 static int 465 mp_init (mp_int * a) 466 { 467 int i; 468 469 /* allocate memory required and clear it */ 470 a->dp = netpgp_allocate(1, sizeof (*a->dp) * MP_PREC); 471 if (a->dp == NULL) { 472 return MP_MEM; 473 } 474 475 /* set the digits to zero */ 476 for (i = 0; i < MP_PREC; i++) { 477 a->dp[i] = 0; 478 } 479 480 /* set the used to zero, allocated digits to the default precision 481 * and sign to positive */ 482 a->used = 0; 483 a->alloc = MP_PREC; 484 a->sign = MP_ZPOS; 485 486 return MP_OKAY; 487 } 488 489 /* clear one (frees) */ 490 static void 491 mp_clear (mp_int * a) 492 { 493 int i; 494 495 /* only do anything if a hasn't been freed previously */ 496 if (a->dp != NULL) { 497 /* first zero the digits */ 498 for (i = 0; i < a->used; i++) { 499 a->dp[i] = 0; 500 } 501 502 /* free ram */ 503 netpgp_deallocate(a->dp, (size_t)a->alloc); 504 505 /* reset members to make debugging easier */ 506 a->dp = NULL; 507 a->alloc = a->used = 0; 508 a->sign = MP_ZPOS; 509 } 510 } 511 512 static int 513 mp_init_multi(mp_int *mp, ...) 514 { 515 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ 516 int n = 0; /* Number of ok inits */ 517 mp_int* cur_arg = mp; 518 va_list args; 519 520 va_start(args, mp); /* init args to next argument from caller */ 521 while (cur_arg != NULL) { 522 if (mp_init(cur_arg) != MP_OKAY) { 523 /* Oops - error! Back-track and mp_clear what we already 524 succeeded in init-ing, then return error. 525 */ 526 va_list clean_args; 527 528 /* end the current list */ 529 va_end(args); 530 531 /* now start cleaning up */ 532 cur_arg = mp; 533 va_start(clean_args, mp); 534 while (n--) { 535 mp_clear(cur_arg); 536 cur_arg = va_arg(clean_args, mp_int*); 537 } 538 va_end(clean_args); 539 res = MP_MEM; 540 break; 541 } 542 n++; 543 cur_arg = va_arg(args, mp_int*); 544 } 545 va_end(args); 546 return res; /* Assumed ok, if error flagged above. */ 547 } 548 549 /* init an mp_init for a given size */ 550 static int 551 mp_init_size (mp_int * a, int size) 552 { 553 int x; 554 555 /* pad size so there are always extra digits */ 556 size += (MP_PREC * 2) - (size % MP_PREC); 557 558 /* alloc mem */ 559 a->dp = netpgp_allocate (1, sizeof (*a->dp) * size); 560 if (a->dp == NULL) { 561 return MP_MEM; 562 } 563 564 /* set the members */ 565 a->used = 0; 566 a->alloc = size; 567 a->sign = MP_ZPOS; 568 569 /* zero the digits */ 570 for (x = 0; x < size; x++) { 571 a->dp[x] = 0; 572 } 573 574 return MP_OKAY; 575 } 576 577 /* creates "a" then copies b into it */ 578 static int mp_init_copy (mp_int * a, const mp_int * b) 579 { 580 int res; 581 582 if ((res = mp_init (a)) != MP_OKAY) { 583 return res; 584 } 585 return mp_copy (b, a); 586 } 587 588 /* low level addition, based on HAC pp.594, Algorithm 14.7 */ 589 static int 590 s_mp_add (const mp_int * a, const mp_int * b, mp_int * c) 591 { 592 const mp_int *x; 593 int olduse, res, min, max; 594 595 /* find sizes, we let |a| <= |b| which means we have to sort 596 * them. "x" will point to the input with the most digits 597 */ 598 if (a->used > b->used) { 599 min = b->used; 600 max = a->used; 601 x = a; 602 } else { 603 min = a->used; 604 max = b->used; 605 x = b; 606 } 607 608 /* init result */ 609 if (c->alloc < max + 1) { 610 if ((res = mp_grow (c, max + 1)) != MP_OKAY) { 611 return res; 612 } 613 } 614 615 /* get old used digit count and set new one */ 616 olduse = c->used; 617 c->used = max + 1; 618 619 { 620 const mp_digit *tmpa, *tmpb; 621 mp_digit u, *tmpc; 622 int i; 623 624 /* alias for digit pointers */ 625 626 /* first input */ 627 tmpa = a->dp; 628 629 /* second input */ 630 tmpb = b->dp; 631 632 /* destination */ 633 tmpc = c->dp; 634 635 /* zero the carry */ 636 u = 0; 637 for (i = 0; i < min; i++) { 638 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ 639 *tmpc = *tmpa++ + *tmpb++ + u; 640 641 /* U = carry bit of T[i] */ 642 u = *tmpc >> ((mp_digit)DIGIT_BIT); 643 644 /* take away carry bit from T[i] */ 645 *tmpc++ &= MP_MASK; 646 } 647 648 /* now copy higher words if any, that is in A+B 649 * if A or B has more digits add those in 650 */ 651 if (min != max) { 652 for (; i < max; i++) { 653 /* T[i] = X[i] + U */ 654 *tmpc = x->dp[i] + u; 655 656 /* U = carry bit of T[i] */ 657 u = *tmpc >> ((mp_digit)DIGIT_BIT); 658 659 /* take away carry bit from T[i] */ 660 *tmpc++ &= MP_MASK; 661 } 662 } 663 664 /* add carry */ 665 *tmpc++ = u; 666 667 /* clear digits above oldused */ 668 for (i = c->used; i < olduse; i++) { 669 *tmpc++ = 0; 670 } 671 } 672 673 mp_clamp (c); 674 return MP_OKAY; 675 } 676 677 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ 678 static int 679 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c) 680 { 681 int olduse, res, min, max; 682 683 /* find sizes */ 684 min = b->used; 685 max = a->used; 686 687 /* init result */ 688 if (c->alloc < max) { 689 if ((res = mp_grow (c, max)) != MP_OKAY) { 690 return res; 691 } 692 } 693 olduse = c->used; 694 c->used = max; 695 696 { 697 const mp_digit *tmpa, *tmpb; 698 mp_digit u, *tmpc; 699 int i; 700 701 /* alias for digit pointers */ 702 tmpa = a->dp; 703 tmpb = b->dp; 704 tmpc = c->dp; 705 706 /* set carry to zero */ 707 u = 0; 708 for (i = 0; i < min; i++) { 709 /* T[i] = A[i] - B[i] - U */ 710 *tmpc = *tmpa++ - *tmpb++ - u; 711 712 /* U = carry bit of T[i] 713 * Note this saves performing an AND operation since 714 * if a carry does occur it will propagate all the way to the 715 * MSB. As a result a single shift is enough to get the carry 716 */ 717 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 718 719 /* Clear carry from T[i] */ 720 *tmpc++ &= MP_MASK; 721 } 722 723 /* now copy higher words if any, e.g. if A has more digits than B */ 724 for (; i < max; i++) { 725 /* T[i] = A[i] - U */ 726 *tmpc = *tmpa++ - u; 727 728 /* U = carry bit of T[i] */ 729 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 730 731 /* Clear carry from T[i] */ 732 *tmpc++ &= MP_MASK; 733 } 734 735 /* clear digits above used (since we may not have grown result above) */ 736 for (i = c->used; i < olduse; i++) { 737 *tmpc++ = 0; 738 } 739 } 740 741 mp_clamp (c); 742 return MP_OKAY; 743 } 744 745 /* high level subtraction (handles signs) */ 746 static int 747 mp_sub (const mp_int * a, const mp_int * b, mp_int * c) 748 { 749 int sa, sb, res; 750 751 sa = a->sign; 752 sb = b->sign; 753 754 if (sa != sb) { 755 /* subtract a negative from a positive, OR */ 756 /* subtract a positive from a negative. */ 757 /* In either case, ADD their magnitudes, */ 758 /* and use the sign of the first number. */ 759 c->sign = sa; 760 res = s_mp_add (a, b, c); 761 } else { 762 /* subtract a positive from a positive, OR */ 763 /* subtract a negative from a negative. */ 764 /* First, take the difference between their */ 765 /* magnitudes, then... */ 766 if (mp_cmp_mag (a, b) != MP_LT) { 767 /* Copy the sign from the first */ 768 c->sign = sa; 769 /* The first has a larger or equal magnitude */ 770 res = s_mp_sub (a, b, c); 771 } else { 772 /* The result has the *opposite* sign from */ 773 /* the first number. */ 774 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; 775 /* The second has a larger magnitude */ 776 res = s_mp_sub (b, a, c); 777 } 778 } 779 return res; 780 } 781 782 /* shift right a certain amount of digits */ 783 static int mp_rshd (mp_int * a, int b) 784 { 785 int x; 786 787 /* if b <= 0 then ignore it */ 788 if (b <= 0) { 789 return 0; 790 } 791 792 /* if b > used then simply zero it and return */ 793 if (a->used <= b) { 794 mp_zero (a); 795 return 0; 796 } 797 798 { 799 mp_digit *bottom, *top; 800 801 /* shift the digits down */ 802 803 /* bottom */ 804 bottom = a->dp; 805 806 /* top [offset into digits] */ 807 top = a->dp + b; 808 809 /* this is implemented as a sliding window where 810 * the window is b-digits long and digits from 811 * the top of the window are copied to the bottom 812 * 813 * e.g. 814 815 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> 816 /\ | ----> 817 \-------------------/ ----> 818 */ 819 for (x = 0; x < (a->used - b); x++) { 820 *bottom++ = *top++; 821 } 822 823 /* zero the top digits */ 824 for (; x < a->used; x++) { 825 *bottom++ = 0; 826 } 827 } 828 829 /* remove excess digits */ 830 a->used -= b; 831 return 1; 832 } 833 834 /* multiply by a digit */ 835 static int 836 mp_mul_d (const mp_int * a, mp_digit b, mp_int * c) 837 { 838 const mp_digit *tmpa; 839 mp_digit u, *tmpc; 840 mp_word r; 841 int ix, res, olduse; 842 843 /* make sure c is big enough to hold a*b */ 844 if (c->alloc < a->used + 1) { 845 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) { 846 return res; 847 } 848 } 849 850 /* get the original destinations used count */ 851 olduse = c->used; 852 853 /* set the sign */ 854 c->sign = a->sign; 855 856 /* alias for a->dp [source] */ 857 tmpa = a->dp; 858 859 /* alias for c->dp [dest] */ 860 tmpc = c->dp; 861 862 /* zero carry */ 863 u = 0; 864 865 /* compute columns */ 866 for (ix = 0; ix < a->used; ix++) { 867 /* compute product and carry sum for this term */ 868 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); 869 870 /* mask off higher bits to get a single digit */ 871 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); 872 873 /* send carry into next iteration */ 874 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 875 } 876 877 /* store final carry [if any] and increment ix offset */ 878 *tmpc++ = u; 879 ++ix; 880 881 /* now zero digits above the top */ 882 while (ix++ < olduse) { 883 *tmpc++ = 0; 884 } 885 886 /* set used count */ 887 c->used = a->used + 1; 888 mp_clamp(c); 889 890 return MP_OKAY; 891 } 892 893 /* high level addition (handles signs) */ 894 static int mp_add (const mp_int * a, const mp_int * b, mp_int * c) 895 { 896 int sa, sb, res; 897 898 /* get sign of both inputs */ 899 sa = a->sign; 900 sb = b->sign; 901 902 /* handle two cases, not four */ 903 if (sa == sb) { 904 /* both positive or both negative */ 905 /* add their magnitudes, copy the sign */ 906 c->sign = sa; 907 res = s_mp_add (a, b, c); 908 } else { 909 /* one positive, the other negative */ 910 /* subtract the one with the greater magnitude from */ 911 /* the one of the lesser magnitude. The result gets */ 912 /* the sign of the one with the greater magnitude. */ 913 if (mp_cmp_mag (a, b) == MP_LT) { 914 c->sign = sb; 915 res = s_mp_sub (b, a, c); 916 } else { 917 c->sign = sa; 918 res = s_mp_sub (a, b, c); 919 } 920 } 921 return res; 922 } 923 924 /* swap the elements of two integers, for cases where you can't simply swap the 925 * mp_int pointers around 926 */ 927 static void 928 mp_exch(mp_int *a, mp_int *b) 929 { 930 mp_int t; 931 932 t = *a; 933 *a = *b; 934 *b = t; 935 } 936 937 /* calc a value mod 2**b */ 938 static int 939 mp_mod_2d (const mp_int * a, int b, mp_int * c) 940 { 941 int x, res; 942 943 /* if b is <= 0 then zero the int */ 944 if (b <= 0) { 945 mp_zero (c); 946 return MP_OKAY; 947 } 948 949 /* if the modulus is larger than the value than return */ 950 if (b >= (int) (a->used * DIGIT_BIT)) { 951 res = mp_copy (a, c); 952 return res; 953 } 954 955 /* copy */ 956 if ((res = mp_copy (a, c)) != MP_OKAY) { 957 return res; 958 } 959 960 /* zero digits above the last digit of the modulus */ 961 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { 962 c->dp[x] = 0; 963 } 964 /* clear the digit that is not completely outside/inside the modulus */ 965 c->dp[b / DIGIT_BIT] &= 966 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); 967 mp_clamp (c); 968 return MP_OKAY; 969 } 970 971 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ 972 static int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d) 973 { 974 mp_digit D, r, rr; 975 int x, res; 976 mp_int t; 977 978 979 /* if the shift count is <= 0 then we do no work */ 980 if (b <= 0) { 981 res = mp_copy (a, c); 982 if (d != NULL) { 983 mp_zero (d); 984 } 985 return res; 986 } 987 988 if ((res = mp_init (&t)) != MP_OKAY) { 989 return res; 990 } 991 992 /* get the remainder */ 993 if (d != NULL) { 994 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) { 995 mp_clear (&t); 996 return res; 997 } 998 } 999 1000 /* copy */ 1001 if ((res = mp_copy (a, c)) != MP_OKAY) { 1002 mp_clear (&t); 1003 return res; 1004 } 1005 1006 /* shift by as many digits in the bit count */ 1007 if (b >= (int)DIGIT_BIT) { 1008 mp_rshd (c, b / DIGIT_BIT); 1009 } 1010 1011 /* shift any bit count < DIGIT_BIT */ 1012 D = (mp_digit) (b % DIGIT_BIT); 1013 if (D != 0) { 1014 mp_digit *tmpc, mask, shift; 1015 1016 /* mask */ 1017 mask = (((mp_digit)1) << D) - 1; 1018 1019 /* shift for lsb */ 1020 shift = DIGIT_BIT - D; 1021 1022 /* alias */ 1023 tmpc = c->dp + (c->used - 1); 1024 1025 /* carry */ 1026 r = 0; 1027 for (x = c->used - 1; x >= 0; x--) { 1028 /* get the lower bits of this word in a temp */ 1029 rr = *tmpc & mask; 1030 1031 /* shift the current word and mix in the carry bits from the previous word */ 1032 *tmpc = (*tmpc >> D) | (r << shift); 1033 --tmpc; 1034 1035 /* set the carry to the carry bits of the current word found above */ 1036 r = rr; 1037 } 1038 } 1039 mp_clamp (c); 1040 if (d != NULL) { 1041 mp_exch (&t, d); 1042 } 1043 mp_clear (&t); 1044 return MP_OKAY; 1045 } 1046 1047 /* integer signed division. 1048 * c*b + d == a [e.g. a/b, c=quotient, d=remainder] 1049 * HAC pp.598 Algorithm 14.20 1050 * 1051 * Note that the description in HAC is horribly 1052 * incomplete. For example, it doesn't consider 1053 * the case where digits are removed from 'x' in 1054 * the inner loop. It also doesn't consider the 1055 * case that y has fewer than three digits, etc.. 1056 * 1057 * The overall algorithm is as described as 1058 * 14.20 from HAC but fixed to treat these cases. 1059 */ 1060 static int 1061 mp_div(mp_int *c, mp_int *d, const mp_int *a, const mp_int *b) 1062 { 1063 mp_int q, x, y, t1, t2; 1064 int res, n, t, i, norm, neg; 1065 1066 /* is divisor zero ? */ 1067 if (BN_is_zero (b) == 1) { 1068 return MP_VAL; 1069 } 1070 1071 /* if a < b then q=0, r = a */ 1072 if (mp_cmp_mag (a, b) == MP_LT) { 1073 if (d != NULL) { 1074 res = mp_copy (a, d); 1075 } else { 1076 res = MP_OKAY; 1077 } 1078 if (c != NULL) { 1079 mp_zero (c); 1080 } 1081 return res; 1082 } 1083 1084 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) { 1085 return res; 1086 } 1087 q.used = a->used + 2; 1088 1089 if ((res = mp_init (&t1)) != MP_OKAY) { 1090 goto LBL_Q; 1091 } 1092 1093 if ((res = mp_init (&t2)) != MP_OKAY) { 1094 goto LBL_T1; 1095 } 1096 1097 if ((res = mp_init_copy (&x, a)) != MP_OKAY) { 1098 goto LBL_T2; 1099 } 1100 1101 if ((res = mp_init_copy (&y, b)) != MP_OKAY) { 1102 goto LBL_X; 1103 } 1104 1105 /* fix the sign */ 1106 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 1107 x.sign = y.sign = MP_ZPOS; 1108 1109 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ 1110 norm = mp_count_bits(&y) % DIGIT_BIT; 1111 if (norm < (int)(DIGIT_BIT-1)) { 1112 norm = (DIGIT_BIT-1) - norm; 1113 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) { 1114 goto LBL_Y; 1115 } 1116 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) { 1117 goto LBL_Y; 1118 } 1119 } else { 1120 norm = 0; 1121 } 1122 1123 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ 1124 n = x.used - 1; 1125 t = y.used - 1; 1126 1127 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ 1128 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ 1129 goto LBL_Y; 1130 } 1131 1132 while (mp_cmp (&x, &y) != MP_LT) { 1133 ++(q.dp[n - t]); 1134 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) { 1135 goto LBL_Y; 1136 } 1137 } 1138 1139 /* reset y by shifting it back down */ 1140 mp_rshd (&y, n - t); 1141 1142 /* step 3. for i from n down to (t + 1) */ 1143 for (i = n; i >= (t + 1); i--) { 1144 if (i > x.used) { 1145 continue; 1146 } 1147 1148 /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 1149 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ 1150 if (x.dp[i] == y.dp[t]) { 1151 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); 1152 } else { 1153 mp_word tmp; 1154 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); 1155 tmp |= ((mp_word) x.dp[i - 1]); 1156 tmp /= ((mp_word) y.dp[t]); 1157 if (tmp > (mp_word) MP_MASK) 1158 tmp = MP_MASK; 1159 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); 1160 } 1161 1162 /* while (q{i-t-1} * (yt * b + y{t-1})) > 1163 xi * b**2 + xi-1 * b + xi-2 1164 1165 do q{i-t-1} -= 1; 1166 */ 1167 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; 1168 do { 1169 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; 1170 1171 /* find left hand */ 1172 mp_zero (&t1); 1173 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; 1174 t1.dp[1] = y.dp[t]; 1175 t1.used = 2; 1176 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1177 goto LBL_Y; 1178 } 1179 1180 /* find right hand */ 1181 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; 1182 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; 1183 t2.dp[2] = x.dp[i]; 1184 t2.used = 3; 1185 } while (mp_cmp_mag(&t1, &t2) == MP_GT); 1186 1187 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ 1188 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1189 goto LBL_Y; 1190 } 1191 1192 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 1193 goto LBL_Y; 1194 } 1195 1196 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) { 1197 goto LBL_Y; 1198 } 1199 1200 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ 1201 if (x.sign == MP_NEG) { 1202 if ((res = mp_copy (&y, &t1)) != MP_OKAY) { 1203 goto LBL_Y; 1204 } 1205 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 1206 goto LBL_Y; 1207 } 1208 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) { 1209 goto LBL_Y; 1210 } 1211 1212 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; 1213 } 1214 } 1215 1216 /* now q is the quotient and x is the remainder 1217 * [which we have to normalize] 1218 */ 1219 1220 /* get sign before writing to c */ 1221 x.sign = x.used == 0 ? MP_ZPOS : a->sign; 1222 1223 if (c != NULL) { 1224 mp_clamp (&q); 1225 mp_exch (&q, c); 1226 c->sign = neg; 1227 } 1228 1229 if (d != NULL) { 1230 mp_div_2d (&x, norm, &x, NULL); 1231 mp_exch (&x, d); 1232 } 1233 1234 res = MP_OKAY; 1235 1236 LBL_Y:mp_clear (&y); 1237 LBL_X:mp_clear (&x); 1238 LBL_T2:mp_clear (&t2); 1239 LBL_T1:mp_clear (&t1); 1240 LBL_Q:mp_clear (&q); 1241 return res; 1242 } 1243 1244 /* c = a mod b, 0 <= c < b */ 1245 static int 1246 mp_mod (const mp_int * a, const mp_int * b, mp_int * c) 1247 { 1248 mp_int t; 1249 int res; 1250 1251 if ((res = mp_init (&t)) != MP_OKAY) { 1252 return res; 1253 } 1254 1255 if ((res = mp_div (NULL, &t, a, b)) != MP_OKAY) { 1256 mp_clear (&t); 1257 return res; 1258 } 1259 1260 if (t.sign != b->sign) { 1261 res = mp_add (b, &t, c); 1262 } else { 1263 res = MP_OKAY; 1264 mp_exch (&t, c); 1265 } 1266 1267 mp_clear (&t); 1268 return res; 1269 } 1270 1271 /* set to a digit */ 1272 static void mp_set (mp_int * a, mp_digit b) 1273 { 1274 mp_zero (a); 1275 a->dp[0] = b & MP_MASK; 1276 a->used = (a->dp[0] != 0) ? 1 : 0; 1277 } 1278 1279 /* b = a/2 */ 1280 static int mp_div_2(const mp_int * a, mp_int * b) 1281 { 1282 int x, res, oldused; 1283 1284 /* copy */ 1285 if (b->alloc < a->used) { 1286 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 1287 return res; 1288 } 1289 } 1290 1291 oldused = b->used; 1292 b->used = a->used; 1293 { 1294 mp_digit r, rr, *tmpa, *tmpb; 1295 1296 /* source alias */ 1297 tmpa = a->dp + b->used - 1; 1298 1299 /* dest alias */ 1300 tmpb = b->dp + b->used - 1; 1301 1302 /* carry */ 1303 r = 0; 1304 for (x = b->used - 1; x >= 0; x--) { 1305 /* get the carry for the next iteration */ 1306 rr = *tmpa & 1; 1307 1308 /* shift the current digit, add in carry and store */ 1309 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); 1310 1311 /* forward carry to next iteration */ 1312 r = rr; 1313 } 1314 1315 /* zero excess digits */ 1316 tmpb = b->dp + b->used; 1317 for (x = b->used; x < oldused; x++) { 1318 *tmpb++ = 0; 1319 } 1320 } 1321 b->sign = a->sign; 1322 mp_clamp (b); 1323 return MP_OKAY; 1324 } 1325 1326 /* compare a digit */ 1327 static int mp_cmp_d(const mp_int * a, mp_digit b) 1328 { 1329 /* compare based on sign */ 1330 if (a->sign == MP_NEG) { 1331 return MP_LT; 1332 } 1333 1334 /* compare based on magnitude */ 1335 if (a->used > 1) { 1336 return MP_GT; 1337 } 1338 1339 /* compare the only digit of a to b */ 1340 if (a->dp[0] > b) { 1341 return MP_GT; 1342 } else if (a->dp[0] < b) { 1343 return MP_LT; 1344 } else { 1345 return MP_EQ; 1346 } 1347 } 1348 1349 static void mp_clear_multi(mp_int *mp, ...) 1350 { 1351 mp_int* next_mp = mp; 1352 va_list args; 1353 va_start(args, mp); 1354 while (next_mp != NULL) { 1355 mp_clear(next_mp); 1356 next_mp = va_arg(args, mp_int*); 1357 } 1358 va_end(args); 1359 } 1360 1361 /* computes the modular inverse via binary extended euclidean algorithm, 1362 * that is c = 1/a mod b 1363 * 1364 * Based on slow invmod except this is optimized for the case where b is 1365 * odd as per HAC Note 14.64 on pp. 610 1366 */ 1367 static int 1368 fast_mp_invmod (const mp_int * a, const mp_int * b, mp_int * c) 1369 { 1370 mp_int x, y, u, v, B, D; 1371 int res, neg; 1372 1373 /* 2. [modified] b must be odd */ 1374 if (BN_is_even (b) == 1) { 1375 return MP_VAL; 1376 } 1377 1378 /* init all our temps */ 1379 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) { 1380 return res; 1381 } 1382 1383 /* x == modulus, y == value to invert */ 1384 if ((res = mp_copy (b, &x)) != MP_OKAY) { 1385 goto LBL_ERR; 1386 } 1387 1388 /* we need y = |a| */ 1389 if ((res = mp_mod (a, b, &y)) != MP_OKAY) { 1390 goto LBL_ERR; 1391 } 1392 1393 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1394 if ((res = mp_copy (&x, &u)) != MP_OKAY) { 1395 goto LBL_ERR; 1396 } 1397 if ((res = mp_copy (&y, &v)) != MP_OKAY) { 1398 goto LBL_ERR; 1399 } 1400 mp_set (&D, (unsigned long)1); 1401 1402 top: 1403 /* 4. while u is even do */ 1404 while (BN_is_even (&u) == 1) { 1405 /* 4.1 u = u/2 */ 1406 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { 1407 goto LBL_ERR; 1408 } 1409 /* 4.2 if B is odd then */ 1410 if (BN_is_odd (&B) == 1) { 1411 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { 1412 goto LBL_ERR; 1413 } 1414 } 1415 /* B = B/2 */ 1416 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { 1417 goto LBL_ERR; 1418 } 1419 } 1420 1421 /* 5. while v is even do */ 1422 while (BN_is_even (&v) == 1) { 1423 /* 5.1 v = v/2 */ 1424 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { 1425 goto LBL_ERR; 1426 } 1427 /* 5.2 if D is odd then */ 1428 if (BN_is_odd (&D) == 1) { 1429 /* D = (D-x)/2 */ 1430 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { 1431 goto LBL_ERR; 1432 } 1433 } 1434 /* D = D/2 */ 1435 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { 1436 goto LBL_ERR; 1437 } 1438 } 1439 1440 /* 6. if u >= v then */ 1441 if (mp_cmp (&u, &v) != MP_LT) { 1442 /* u = u - v, B = B - D */ 1443 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { 1444 goto LBL_ERR; 1445 } 1446 1447 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { 1448 goto LBL_ERR; 1449 } 1450 } else { 1451 /* v - v - u, D = D - B */ 1452 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { 1453 goto LBL_ERR; 1454 } 1455 1456 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { 1457 goto LBL_ERR; 1458 } 1459 } 1460 1461 /* if not zero goto step 4 */ 1462 if (BN_is_zero (&u) == 0) { 1463 goto top; 1464 } 1465 1466 /* now a = C, b = D, gcd == g*v */ 1467 1468 /* if v != 1 then there is no inverse */ 1469 if (mp_cmp_d (&v, (unsigned long)1) != MP_EQ) { 1470 res = MP_VAL; 1471 goto LBL_ERR; 1472 } 1473 1474 /* b is now the inverse */ 1475 neg = a->sign; 1476 while (D.sign == MP_NEG) { 1477 if ((res = mp_add (&D, b, &D)) != MP_OKAY) { 1478 goto LBL_ERR; 1479 } 1480 } 1481 mp_exch (&D, c); 1482 c->sign = neg; 1483 res = MP_OKAY; 1484 1485 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); 1486 return res; 1487 } 1488 1489 /* hac 14.61, pp608 */ 1490 static int 1491 mp_invmod_slow (const mp_int * a, const mp_int * b, mp_int * c) 1492 { 1493 mp_int x, y, u, v, A, B, C, D; 1494 int res; 1495 1496 /* b cannot be negative */ 1497 if (b->sign == MP_NEG || BN_is_zero(b) == 1) { 1498 return MP_VAL; 1499 } 1500 1501 /* init temps */ 1502 if ((res = mp_init_multi(&x, &y, &u, &v, 1503 &A, &B, &C, &D, NULL)) != MP_OKAY) { 1504 return res; 1505 } 1506 1507 /* x = a, y = b */ 1508 if ((res = mp_mod(a, b, &x)) != MP_OKAY) { 1509 goto LBL_ERR; 1510 } 1511 if ((res = mp_copy (b, &y)) != MP_OKAY) { 1512 goto LBL_ERR; 1513 } 1514 1515 /* 2. [modified] if x,y are both even then return an error! */ 1516 if (BN_is_even (&x) == 1 && BN_is_even (&y) == 1) { 1517 res = MP_VAL; 1518 goto LBL_ERR; 1519 } 1520 1521 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1522 if ((res = mp_copy (&x, &u)) != MP_OKAY) { 1523 goto LBL_ERR; 1524 } 1525 if ((res = mp_copy (&y, &v)) != MP_OKAY) { 1526 goto LBL_ERR; 1527 } 1528 mp_set (&A, (unsigned long)1); 1529 mp_set (&D, (unsigned long)1); 1530 1531 top: 1532 /* 4. while u is even do */ 1533 while (BN_is_even (&u) == 1) { 1534 /* 4.1 u = u/2 */ 1535 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { 1536 goto LBL_ERR; 1537 } 1538 /* 4.2 if A or B is odd then */ 1539 if (BN_is_odd (&A) == 1 || BN_is_odd (&B) == 1) { 1540 /* A = (A+y)/2, B = (B-x)/2 */ 1541 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) { 1542 goto LBL_ERR; 1543 } 1544 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { 1545 goto LBL_ERR; 1546 } 1547 } 1548 /* A = A/2, B = B/2 */ 1549 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) { 1550 goto LBL_ERR; 1551 } 1552 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { 1553 goto LBL_ERR; 1554 } 1555 } 1556 1557 /* 5. while v is even do */ 1558 while (BN_is_even (&v) == 1) { 1559 /* 5.1 v = v/2 */ 1560 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { 1561 goto LBL_ERR; 1562 } 1563 /* 5.2 if C or D is odd then */ 1564 if (BN_is_odd (&C) == 1 || BN_is_odd (&D) == 1) { 1565 /* C = (C+y)/2, D = (D-x)/2 */ 1566 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) { 1567 goto LBL_ERR; 1568 } 1569 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { 1570 goto LBL_ERR; 1571 } 1572 } 1573 /* C = C/2, D = D/2 */ 1574 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) { 1575 goto LBL_ERR; 1576 } 1577 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { 1578 goto LBL_ERR; 1579 } 1580 } 1581 1582 /* 6. if u >= v then */ 1583 if (mp_cmp (&u, &v) != MP_LT) { 1584 /* u = u - v, A = A - C, B = B - D */ 1585 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { 1586 goto LBL_ERR; 1587 } 1588 1589 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) { 1590 goto LBL_ERR; 1591 } 1592 1593 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { 1594 goto LBL_ERR; 1595 } 1596 } else { 1597 /* v - v - u, C = C - A, D = D - B */ 1598 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { 1599 goto LBL_ERR; 1600 } 1601 1602 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) { 1603 goto LBL_ERR; 1604 } 1605 1606 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { 1607 goto LBL_ERR; 1608 } 1609 } 1610 1611 /* if not zero goto step 4 */ 1612 if (BN_is_zero (&u) == 0) 1613 goto top; 1614 1615 /* now a = C, b = D, gcd == g*v */ 1616 1617 /* if v != 1 then there is no inverse */ 1618 if (mp_cmp_d (&v, 1) != MP_EQ) { 1619 res = MP_VAL; 1620 goto LBL_ERR; 1621 } 1622 1623 /* if its too low */ 1624 while (mp_cmp_d(&C, 0) == MP_LT) { 1625 if ((res = mp_add(&C, b, &C)) != MP_OKAY) { 1626 goto LBL_ERR; 1627 } 1628 } 1629 1630 /* too big */ 1631 while (mp_cmp_mag(&C, b) != MP_LT) { 1632 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) { 1633 goto LBL_ERR; 1634 } 1635 } 1636 1637 /* C is now the inverse */ 1638 mp_exch (&C, c); 1639 res = MP_OKAY; 1640 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); 1641 return res; 1642 } 1643 1644 static int 1645 mp_invmod(mp_int *c, const mp_int *a, const mp_int *b) 1646 { 1647 /* b cannot be negative */ 1648 if (b->sign == MP_NEG || BN_is_zero(b) == 1) { 1649 return MP_VAL; 1650 } 1651 1652 /* if the modulus is odd we can use a faster routine instead */ 1653 if (BN_is_odd (b) == 1) { 1654 return fast_mp_invmod(a, b, c); 1655 } 1656 1657 return mp_invmod_slow(a, b, c); 1658 1659 /*NOTREACHED*/ 1660 return MP_VAL; 1661 } 1662 1663 /* b = |a| 1664 * 1665 * Simple function copies the input and fixes the sign to positive 1666 */ 1667 static int 1668 mp_abs (const mp_int * a, mp_int * b) 1669 { 1670 int res; 1671 1672 /* copy a to b */ 1673 if (a != b) { 1674 if ((res = mp_copy (a, b)) != MP_OKAY) { 1675 return res; 1676 } 1677 } 1678 1679 /* force the sign of b to positive */ 1680 b->sign = MP_ZPOS; 1681 1682 return MP_OKAY; 1683 } 1684 1685 /* determines if reduce_2k_l can be used */ 1686 static int mp_reduce_is_2k_l(const mp_int *a) 1687 { 1688 int ix, iy; 1689 1690 if (a->used == 0) { 1691 return MP_NO; 1692 } else if (a->used == 1) { 1693 return MP_YES; 1694 } else if (a->used > 1) { 1695 /* if more than half of the digits are -1 we're sold */ 1696 for (iy = ix = 0; ix < a->used; ix++) { 1697 if (a->dp[ix] == MP_MASK) { 1698 ++iy; 1699 } 1700 } 1701 return (iy >= (a->used/2)) ? MP_YES : MP_NO; 1702 1703 } 1704 return MP_NO; 1705 } 1706 1707 /* computes a = 2**b 1708 * 1709 * Simple algorithm which zeroes the int, grows it then just sets one bit 1710 * as required. 1711 */ 1712 static int 1713 mp_2expt (mp_int * a, int b) 1714 { 1715 int res; 1716 1717 /* zero a as per default */ 1718 mp_zero (a); 1719 1720 /* grow a to accomodate the single bit */ 1721 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) { 1722 return res; 1723 } 1724 1725 /* set the used count of where the bit will go */ 1726 a->used = b / DIGIT_BIT + 1; 1727 1728 /* put the single bit in its place */ 1729 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); 1730 1731 return MP_OKAY; 1732 } 1733 1734 /* pre-calculate the value required for Barrett reduction 1735 * For a given modulus "b" it calulates the value required in "a" 1736 */ 1737 static int mp_reduce_setup (mp_int * a, mp_int * b) 1738 { 1739 int res; 1740 1741 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { 1742 return res; 1743 } 1744 return mp_div (a, NULL, a, b); 1745 } 1746 1747 /* b = a*2 */ 1748 static int mp_mul_2(mp_int * a, mp_int * b) 1749 { 1750 int x, res, oldused; 1751 1752 /* grow to accomodate result */ 1753 if (b->alloc < a->used + 1) { 1754 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) { 1755 return res; 1756 } 1757 } 1758 1759 oldused = b->used; 1760 b->used = a->used; 1761 1762 { 1763 mp_digit r, rr, *tmpa, *tmpb; 1764 1765 /* alias for source */ 1766 tmpa = a->dp; 1767 1768 /* alias for dest */ 1769 tmpb = b->dp; 1770 1771 /* carry */ 1772 r = 0; 1773 for (x = 0; x < a->used; x++) { 1774 1775 /* get what will be the *next* carry bit from the 1776 * MSB of the current digit 1777 */ 1778 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); 1779 1780 /* now shift up this digit, add in the carry [from the previous] */ 1781 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; 1782 1783 /* copy the carry that would be from the source 1784 * digit into the next iteration 1785 */ 1786 r = rr; 1787 } 1788 1789 /* new leading digit? */ 1790 if (r != 0) { 1791 /* add a MSB which is always 1 at this point */ 1792 *tmpb = 1; 1793 ++(b->used); 1794 } 1795 1796 /* now zero any excess digits on the destination 1797 * that we didn't write to 1798 */ 1799 tmpb = b->dp + b->used; 1800 for (x = b->used; x < oldused; x++) { 1801 *tmpb++ = 0; 1802 } 1803 } 1804 b->sign = a->sign; 1805 return MP_OKAY; 1806 } 1807 1808 /* divide by three (based on routine from MPI and the GMP manual) */ 1809 static int 1810 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d) 1811 { 1812 mp_int q; 1813 mp_word w, t; 1814 mp_digit b; 1815 int res, ix; 1816 1817 /* b = 2**DIGIT_BIT / 3 */ 1818 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3); 1819 1820 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 1821 return res; 1822 } 1823 1824 q.used = a->used; 1825 q.sign = a->sign; 1826 w = 0; 1827 for (ix = a->used - 1; ix >= 0; ix--) { 1828 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 1829 1830 if (w >= 3) { 1831 /* multiply w by [1/3] */ 1832 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT); 1833 1834 /* now subtract 3 * [w/3] from w, to get the remainder */ 1835 w -= t+t+t; 1836 1837 /* fixup the remainder as required since 1838 * the optimization is not exact. 1839 */ 1840 while (w >= 3) { 1841 t += 1; 1842 w -= 3; 1843 } 1844 } else { 1845 t = 0; 1846 } 1847 q.dp[ix] = (mp_digit)t; 1848 } 1849 1850 /* [optional] store the remainder */ 1851 if (d != NULL) { 1852 *d = (mp_digit)w; 1853 } 1854 1855 /* [optional] store the quotient */ 1856 if (c != NULL) { 1857 mp_clamp(&q); 1858 mp_exch(&q, c); 1859 } 1860 mp_clear(&q); 1861 1862 return res; 1863 } 1864 1865 /* multiplication using the Toom-Cook 3-way algorithm 1866 * 1867 * Much more complicated than Karatsuba but has a lower 1868 * asymptotic running time of O(N**1.464). This algorithm is 1869 * only particularly useful on VERY large inputs 1870 * (we're talking 1000s of digits here...). 1871 */ 1872 static int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) 1873 { 1874 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; 1875 int res, B; 1876 1877 /* init temps */ 1878 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 1879 &a0, &a1, &a2, &b0, &b1, 1880 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { 1881 return res; 1882 } 1883 1884 /* B */ 1885 B = MIN(a->used, b->used) / 3; 1886 1887 /* a = a2 * B**2 + a1 * B + a0 */ 1888 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 1889 goto ERR; 1890 } 1891 1892 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 1893 goto ERR; 1894 } 1895 mp_rshd(&a1, B); 1896 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); 1897 1898 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 1899 goto ERR; 1900 } 1901 mp_rshd(&a2, B*2); 1902 1903 /* b = b2 * B**2 + b1 * B + b0 */ 1904 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { 1905 goto ERR; 1906 } 1907 1908 if ((res = mp_copy(b, &b1)) != MP_OKAY) { 1909 goto ERR; 1910 } 1911 mp_rshd(&b1, B); 1912 mp_mod_2d(&b1, DIGIT_BIT * B, &b1); 1913 1914 if ((res = mp_copy(b, &b2)) != MP_OKAY) { 1915 goto ERR; 1916 } 1917 mp_rshd(&b2, B*2); 1918 1919 /* w0 = a0*b0 */ 1920 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { 1921 goto ERR; 1922 } 1923 1924 /* w4 = a2 * b2 */ 1925 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { 1926 goto ERR; 1927 } 1928 1929 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ 1930 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { 1931 goto ERR; 1932 } 1933 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1934 goto ERR; 1935 } 1936 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 1937 goto ERR; 1938 } 1939 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 1940 goto ERR; 1941 } 1942 1943 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { 1944 goto ERR; 1945 } 1946 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1947 goto ERR; 1948 } 1949 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { 1950 goto ERR; 1951 } 1952 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { 1953 goto ERR; 1954 } 1955 1956 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { 1957 goto ERR; 1958 } 1959 1960 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ 1961 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { 1962 goto ERR; 1963 } 1964 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1965 goto ERR; 1966 } 1967 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 1968 goto ERR; 1969 } 1970 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1971 goto ERR; 1972 } 1973 1974 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { 1975 goto ERR; 1976 } 1977 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1978 goto ERR; 1979 } 1980 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { 1981 goto ERR; 1982 } 1983 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1984 goto ERR; 1985 } 1986 1987 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { 1988 goto ERR; 1989 } 1990 1991 1992 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ 1993 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { 1994 goto ERR; 1995 } 1996 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1997 goto ERR; 1998 } 1999 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) { 2000 goto ERR; 2001 } 2002 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 2003 goto ERR; 2004 } 2005 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { 2006 goto ERR; 2007 } 2008 2009 /* now solve the matrix 2010 2011 0 0 0 0 1 2012 1 2 4 8 16 2013 1 1 1 1 1 2014 16 8 4 2 1 2015 1 0 0 0 0 2016 2017 using 12 subtractions, 4 shifts, 2018 2 small divisions and 1 small multiplication 2019 */ 2020 2021 /* r1 - r4 */ 2022 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { 2023 goto ERR; 2024 } 2025 /* r3 - r0 */ 2026 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { 2027 goto ERR; 2028 } 2029 /* r1/2 */ 2030 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { 2031 goto ERR; 2032 } 2033 /* r3/2 */ 2034 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { 2035 goto ERR; 2036 } 2037 /* r2 - r0 - r4 */ 2038 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { 2039 goto ERR; 2040 } 2041 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { 2042 goto ERR; 2043 } 2044 /* r1 - r2 */ 2045 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2046 goto ERR; 2047 } 2048 /* r3 - r2 */ 2049 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2050 goto ERR; 2051 } 2052 /* r1 - 8r0 */ 2053 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { 2054 goto ERR; 2055 } 2056 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { 2057 goto ERR; 2058 } 2059 /* r3 - 8r4 */ 2060 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { 2061 goto ERR; 2062 } 2063 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { 2064 goto ERR; 2065 } 2066 /* 3r2 - r1 - r3 */ 2067 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { 2068 goto ERR; 2069 } 2070 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { 2071 goto ERR; 2072 } 2073 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { 2074 goto ERR; 2075 } 2076 /* r1 - r2 */ 2077 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2078 goto ERR; 2079 } 2080 /* r3 - r2 */ 2081 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2082 goto ERR; 2083 } 2084 /* r1/3 */ 2085 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { 2086 goto ERR; 2087 } 2088 /* r3/3 */ 2089 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { 2090 goto ERR; 2091 } 2092 2093 /* at this point shift W[n] by B*n */ 2094 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { 2095 goto ERR; 2096 } 2097 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { 2098 goto ERR; 2099 } 2100 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { 2101 goto ERR; 2102 } 2103 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { 2104 goto ERR; 2105 } 2106 2107 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { 2108 goto ERR; 2109 } 2110 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2111 goto ERR; 2112 } 2113 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2114 goto ERR; 2115 } 2116 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { 2117 goto ERR; 2118 } 2119 2120 ERR: 2121 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 2122 &a0, &a1, &a2, &b0, &b1, 2123 &b2, &tmp1, &tmp2, NULL); 2124 return res; 2125 } 2126 2127 #define TOOM_MUL_CUTOFF 350 2128 #define KARATSUBA_MUL_CUTOFF 80 2129 2130 /* c = |a| * |b| using Karatsuba Multiplication using 2131 * three half size multiplications 2132 * 2133 * Let B represent the radix [e.g. 2**DIGIT_BIT] and 2134 * let n represent half of the number of digits in 2135 * the min(a,b) 2136 * 2137 * a = a1 * B**n + a0 2138 * b = b1 * B**n + b0 2139 * 2140 * Then, a * b => 2141 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 2142 * 2143 * Note that a1b1 and a0b0 are used twice and only need to be 2144 * computed once. So in total three half size (half # of 2145 * digit) multiplications are performed, a0b0, a1b1 and 2146 * (a1+b1)(a0+b0) 2147 * 2148 * Note that a multiplication of half the digits requires 2149 * 1/4th the number of single precision multiplications so in 2150 * total after one call 25% of the single precision multiplications 2151 * are saved. Note also that the call to mp_mul can end up back 2152 * in this function if the a0, a1, b0, or b1 are above the threshold. 2153 * This is known as divide-and-conquer and leads to the famous 2154 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 2155 * the standard O(N**2) that the baseline/comba methods use. 2156 * Generally though the overhead of this method doesn't pay off 2157 * until a certain size (N ~ 80) is reached. 2158 */ 2159 static int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) 2160 { 2161 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; 2162 int B; 2163 int err; 2164 2165 /* default the return code to an error */ 2166 err = MP_MEM; 2167 2168 /* min # of digits */ 2169 B = MIN (a->used, b->used); 2170 2171 /* now divide in two */ 2172 B = (int)((unsigned)B >> 1); 2173 2174 /* init copy all the temps */ 2175 if (mp_init_size (&x0, B) != MP_OKAY) 2176 goto ERR; 2177 if (mp_init_size (&x1, a->used - B) != MP_OKAY) 2178 goto X0; 2179 if (mp_init_size (&y0, B) != MP_OKAY) 2180 goto X1; 2181 if (mp_init_size (&y1, b->used - B) != MP_OKAY) 2182 goto Y0; 2183 2184 /* init temps */ 2185 if (mp_init_size (&t1, B * 2) != MP_OKAY) 2186 goto Y1; 2187 if (mp_init_size (&x0y0, B * 2) != MP_OKAY) 2188 goto T1; 2189 if (mp_init_size (&x1y1, B * 2) != MP_OKAY) 2190 goto X0Y0; 2191 2192 /* now shift the digits */ 2193 x0.used = y0.used = B; 2194 x1.used = a->used - B; 2195 y1.used = b->used - B; 2196 2197 { 2198 int x; 2199 mp_digit *tmpa, *tmpb, *tmpx, *tmpy; 2200 2201 /* we copy the digits directly instead of using higher level functions 2202 * since we also need to shift the digits 2203 */ 2204 tmpa = a->dp; 2205 tmpb = b->dp; 2206 2207 tmpx = x0.dp; 2208 tmpy = y0.dp; 2209 for (x = 0; x < B; x++) { 2210 *tmpx++ = *tmpa++; 2211 *tmpy++ = *tmpb++; 2212 } 2213 2214 tmpx = x1.dp; 2215 for (x = B; x < a->used; x++) { 2216 *tmpx++ = *tmpa++; 2217 } 2218 2219 tmpy = y1.dp; 2220 for (x = B; x < b->used; x++) { 2221 *tmpy++ = *tmpb++; 2222 } 2223 } 2224 2225 /* only need to clamp the lower words since by definition the 2226 * upper words x1/y1 must have a known number of digits 2227 */ 2228 mp_clamp (&x0); 2229 mp_clamp (&y0); 2230 2231 /* now calc the products x0y0 and x1y1 */ 2232 /* after this x0 is no longer required, free temp [x0==t2]! */ 2233 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) 2234 goto X1Y1; /* x0y0 = x0*y0 */ 2235 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) 2236 goto X1Y1; /* x1y1 = x1*y1 */ 2237 2238 /* now calc x1+x0 and y1+y0 */ 2239 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) 2240 goto X1Y1; /* t1 = x1 - x0 */ 2241 if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) 2242 goto X1Y1; /* t2 = y1 - y0 */ 2243 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) 2244 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ 2245 2246 /* add x0y0 */ 2247 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) 2248 goto X1Y1; /* t2 = x0y0 + x1y1 */ 2249 if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) 2250 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ 2251 2252 /* shift by B */ 2253 if (mp_lshd (&t1, B) != MP_OKAY) 2254 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ 2255 if (mp_lshd (&x1y1, B * 2) != MP_OKAY) 2256 goto X1Y1; /* x1y1 = x1y1 << 2*B */ 2257 2258 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) 2259 goto X1Y1; /* t1 = x0y0 + t1 */ 2260 if (mp_add (&t1, &x1y1, c) != MP_OKAY) 2261 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ 2262 2263 /* Algorithm succeeded set the return code to MP_OKAY */ 2264 err = MP_OKAY; 2265 2266 X1Y1:mp_clear (&x1y1); 2267 X0Y0:mp_clear (&x0y0); 2268 T1:mp_clear (&t1); 2269 Y1:mp_clear (&y1); 2270 Y0:mp_clear (&y0); 2271 X1:mp_clear (&x1); 2272 X0:mp_clear (&x0); 2273 ERR: 2274 return err; 2275 } 2276 2277 /* Fast (comba) multiplier 2278 * 2279 * This is the fast column-array [comba] multiplier. It is 2280 * designed to compute the columns of the product first 2281 * then handle the carries afterwards. This has the effect 2282 * of making the nested loops that compute the columns very 2283 * simple and schedulable on super-scalar processors. 2284 * 2285 * This has been modified to produce a variable number of 2286 * digits of output so if say only a half-product is required 2287 * you don't have to compute the upper half (a feature 2288 * required for fast Barrett reduction). 2289 * 2290 * Based on Algorithm 14.12 on pp.595 of HAC. 2291 * 2292 */ 2293 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2294 { 2295 int olduse, res, pa, ix, iz; 2296 /*LINTED*/ 2297 mp_digit W[MP_WARRAY]; 2298 mp_word _W; 2299 2300 /* grow the destination as required */ 2301 if (c->alloc < digs) { 2302 if ((res = mp_grow (c, digs)) != MP_OKAY) { 2303 return res; 2304 } 2305 } 2306 2307 /* number of output digits to produce */ 2308 pa = MIN(digs, a->used + b->used); 2309 2310 /* clear the carry */ 2311 _W = 0; 2312 for (ix = 0; ix < pa; ix++) { 2313 int tx, ty; 2314 int iy; 2315 mp_digit *tmpx, *tmpy; 2316 2317 /* get offsets into the two bignums */ 2318 ty = MIN(b->used-1, ix); 2319 tx = ix - ty; 2320 2321 /* setup temp aliases */ 2322 tmpx = a->dp + tx; 2323 tmpy = b->dp + ty; 2324 2325 /* this is the number of times the loop will iterrate, essentially 2326 while (tx++ < a->used && ty-- >= 0) { ... } 2327 */ 2328 iy = MIN(a->used-tx, ty+1); 2329 2330 /* execute loop */ 2331 for (iz = 0; iz < iy; ++iz) { 2332 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2333 2334 } 2335 2336 /* store term */ 2337 W[ix] = ((mp_digit)_W) & MP_MASK; 2338 2339 /* make next carry */ 2340 _W = _W >> ((mp_word)DIGIT_BIT); 2341 } 2342 2343 /* setup dest */ 2344 olduse = c->used; 2345 c->used = pa; 2346 2347 { 2348 mp_digit *tmpc; 2349 tmpc = c->dp; 2350 for (ix = 0; ix < pa+1; ix++) { 2351 /* now extract the previous digit [below the carry] */ 2352 *tmpc++ = W[ix]; 2353 } 2354 2355 /* clear unused digits [that existed in the old copy of c] */ 2356 for (; ix < olduse; ix++) { 2357 *tmpc++ = 0; 2358 } 2359 } 2360 mp_clamp (c); 2361 return MP_OKAY; 2362 } 2363 2364 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */ 2365 /* Revision: 1.2 $ */ 2366 /* Date: 2011/03/18 16:22:09 $ */ 2367 2368 2369 /* multiplies |a| * |b| and only computes upto digs digits of result 2370 * HAC pp. 595, Algorithm 14.12 Modified so you can control how 2371 * many digits of output are created. 2372 */ 2373 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2374 { 2375 mp_int t; 2376 int res, pa, pb, ix, iy; 2377 mp_digit u; 2378 mp_word r; 2379 mp_digit tmpx, *tmpt, *tmpy; 2380 2381 /* can we use the fast multiplier? */ 2382 if (((unsigned)(digs) < MP_WARRAY) && 2383 MIN (a->used, b->used) < 2384 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2385 return fast_s_mp_mul_digs (a, b, c, digs); 2386 } 2387 2388 if ((res = mp_init_size (&t, digs)) != MP_OKAY) { 2389 return res; 2390 } 2391 t.used = digs; 2392 2393 /* compute the digits of the product directly */ 2394 pa = a->used; 2395 for (ix = 0; ix < pa; ix++) { 2396 /* set the carry to zero */ 2397 u = 0; 2398 2399 /* limit ourselves to making digs digits of output */ 2400 pb = MIN (b->used, digs - ix); 2401 2402 /* setup some aliases */ 2403 /* copy of the digit from a used within the nested loop */ 2404 tmpx = a->dp[ix]; 2405 2406 /* an alias for the destination shifted ix places */ 2407 tmpt = t.dp + ix; 2408 2409 /* an alias for the digits of b */ 2410 tmpy = b->dp; 2411 2412 /* compute the columns of the output and propagate the carry */ 2413 for (iy = 0; iy < pb; iy++) { 2414 /* compute the column as a mp_word */ 2415 r = ((mp_word)*tmpt) + 2416 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2417 ((mp_word) u); 2418 2419 /* the new column is the lower part of the result */ 2420 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2421 2422 /* get the carry word from the result */ 2423 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2424 } 2425 /* set carry if it is placed below digs */ 2426 if (ix + iy < digs) { 2427 *tmpt = u; 2428 } 2429 } 2430 2431 mp_clamp (&t); 2432 mp_exch (&t, c); 2433 2434 mp_clear (&t); 2435 return MP_OKAY; 2436 } 2437 2438 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */ 2439 /* Revision: 1.3 $ */ 2440 /* Date: 2011/03/18 16:43:04 $ */ 2441 2442 /* high level multiplication (handles sign) */ 2443 static int 2444 mp_mul(mp_int * a, mp_int * b, mp_int * c) 2445 { 2446 int res, neg; 2447 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 2448 2449 /* use Toom-Cook? */ 2450 if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) { 2451 res = mp_toom_mul(a, b, c); 2452 } else 2453 /* use Karatsuba? */ 2454 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { 2455 res = mp_karatsuba_mul (a, b, c); 2456 } else 2457 { 2458 /* can we use the fast multiplier? 2459 * 2460 * The fast multiplier can be used if the output will 2461 * have less than MP_WARRAY digits and the number of 2462 * digits won't affect carry propagation 2463 */ 2464 int digs = a->used + b->used + 1; 2465 2466 if (((unsigned)digs < MP_WARRAY) && 2467 MIN(a->used, b->used) <= 2468 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2469 res = fast_s_mp_mul_digs (a, b, c, digs); 2470 } else 2471 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */ 2472 2473 } 2474 c->sign = (c->used > 0) ? neg : MP_ZPOS; 2475 return res; 2476 } 2477 2478 /* this is a modified version of fast_s_mul_digs that only produces 2479 * output digits *above* digs. See the comments for fast_s_mul_digs 2480 * to see how it works. 2481 * 2482 * This is used in the Barrett reduction since for one of the multiplications 2483 * only the higher digits were needed. This essentially halves the work. 2484 * 2485 * Based on Algorithm 14.12 on pp.595 of HAC. 2486 */ 2487 static int 2488 fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2489 { 2490 int olduse, res, pa, ix, iz; 2491 mp_digit W[MP_WARRAY]; 2492 mp_word _W; 2493 2494 /* grow the destination as required */ 2495 pa = a->used + b->used; 2496 if (c->alloc < pa) { 2497 if ((res = mp_grow (c, pa)) != MP_OKAY) { 2498 return res; 2499 } 2500 } 2501 2502 /* number of output digits to produce */ 2503 pa = a->used + b->used; 2504 _W = 0; 2505 for (ix = digs; ix < pa; ix++) { 2506 int tx, ty, iy; 2507 mp_digit *tmpx, *tmpy; 2508 2509 /* get offsets into the two bignums */ 2510 ty = MIN(b->used-1, ix); 2511 tx = ix - ty; 2512 2513 /* setup temp aliases */ 2514 tmpx = a->dp + tx; 2515 tmpy = b->dp + ty; 2516 2517 /* this is the number of times the loop will iterrate, essentially its 2518 while (tx++ < a->used && ty-- >= 0) { ... } 2519 */ 2520 iy = MIN(a->used-tx, ty+1); 2521 2522 /* execute loop */ 2523 for (iz = 0; iz < iy; iz++) { 2524 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2525 } 2526 2527 /* store term */ 2528 W[ix] = ((mp_digit)_W) & MP_MASK; 2529 2530 /* make next carry */ 2531 _W = _W >> ((mp_word)DIGIT_BIT); 2532 } 2533 2534 /* setup dest */ 2535 olduse = c->used; 2536 c->used = pa; 2537 2538 { 2539 mp_digit *tmpc; 2540 2541 tmpc = c->dp + digs; 2542 for (ix = digs; ix < pa; ix++) { 2543 /* now extract the previous digit [below the carry] */ 2544 *tmpc++ = W[ix]; 2545 } 2546 2547 /* clear unused digits [that existed in the old copy of c] */ 2548 for (; ix < olduse; ix++) { 2549 *tmpc++ = 0; 2550 } 2551 } 2552 mp_clamp (c); 2553 return MP_OKAY; 2554 } 2555 2556 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */ 2557 /* Revision: 1.1.1.1 $ */ 2558 /* Date: 2011/03/12 22:58:18 $ */ 2559 2560 /* multiplies |a| * |b| and does not compute the lower digs digits 2561 * [meant to get the higher part of the product] 2562 */ 2563 static int 2564 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2565 { 2566 mp_int t; 2567 int res, pa, pb, ix, iy; 2568 mp_digit u; 2569 mp_word r; 2570 mp_digit tmpx, *tmpt, *tmpy; 2571 2572 /* can we use the fast multiplier? */ 2573 if (((unsigned)(a->used + b->used + 1) < MP_WARRAY) 2574 && MIN (a->used, b->used) < (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2575 return fast_s_mp_mul_high_digs (a, b, c, digs); 2576 } 2577 2578 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) { 2579 return res; 2580 } 2581 t.used = a->used + b->used + 1; 2582 2583 pa = a->used; 2584 pb = b->used; 2585 for (ix = 0; ix < pa; ix++) { 2586 /* clear the carry */ 2587 u = 0; 2588 2589 /* left hand side of A[ix] * B[iy] */ 2590 tmpx = a->dp[ix]; 2591 2592 /* alias to the address of where the digits will be stored */ 2593 tmpt = &(t.dp[digs]); 2594 2595 /* alias for where to read the right hand side from */ 2596 tmpy = b->dp + (digs - ix); 2597 2598 for (iy = digs - ix; iy < pb; iy++) { 2599 /* calculate the double precision result */ 2600 r = ((mp_word)*tmpt) + 2601 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2602 ((mp_word) u); 2603 2604 /* get the lower part */ 2605 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2606 2607 /* carry the carry */ 2608 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2609 } 2610 *tmpt = u; 2611 } 2612 mp_clamp (&t); 2613 mp_exch (&t, c); 2614 mp_clear (&t); 2615 return MP_OKAY; 2616 } 2617 2618 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */ 2619 /* Revision: 1.3 $ */ 2620 /* Date: 2011/03/18 16:43:04 $ */ 2621 2622 /* reduces x mod m, assumes 0 < x < m**2, mu is 2623 * precomputed via mp_reduce_setup. 2624 * From HAC pp.604 Algorithm 14.42 2625 */ 2626 static int 2627 mp_reduce (mp_int * x, mp_int * m, mp_int * mu) 2628 { 2629 mp_int q; 2630 int res, um = m->used; 2631 2632 /* q = x */ 2633 if ((res = mp_init_copy (&q, x)) != MP_OKAY) { 2634 return res; 2635 } 2636 2637 /* q1 = x / b**(k-1) */ 2638 mp_rshd (&q, um - 1); 2639 2640 /* according to HAC this optimization is ok */ 2641 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { 2642 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { 2643 goto CLEANUP; 2644 } 2645 } else { 2646 if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { 2647 goto CLEANUP; 2648 } 2649 } 2650 2651 /* q3 = q2 / b**(k+1) */ 2652 mp_rshd (&q, um + 1); 2653 2654 /* x = x mod b**(k+1), quick (no division) */ 2655 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { 2656 goto CLEANUP; 2657 } 2658 2659 /* q = q * m mod b**(k+1), quick (no division) */ 2660 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { 2661 goto CLEANUP; 2662 } 2663 2664 /* x = x - q */ 2665 if ((res = mp_sub (x, &q, x)) != MP_OKAY) { 2666 goto CLEANUP; 2667 } 2668 2669 /* If x < 0, add b**(k+1) to it */ 2670 if (mp_cmp_d (x, 0) == MP_LT) { 2671 mp_set (&q, 1); 2672 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) 2673 goto CLEANUP; 2674 if ((res = mp_add (x, &q, x)) != MP_OKAY) 2675 goto CLEANUP; 2676 } 2677 2678 /* Back off if it's too big */ 2679 while (mp_cmp (x, m) != MP_LT) { 2680 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { 2681 goto CLEANUP; 2682 } 2683 } 2684 2685 CLEANUP: 2686 mp_clear (&q); 2687 2688 return res; 2689 } 2690 2691 /* determines the setup value */ 2692 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d) 2693 { 2694 int res; 2695 mp_int tmp; 2696 2697 if ((res = mp_init(&tmp)) != MP_OKAY) { 2698 return res; 2699 } 2700 2701 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { 2702 goto ERR; 2703 } 2704 2705 if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) { 2706 goto ERR; 2707 } 2708 2709 ERR: 2710 mp_clear(&tmp); 2711 return res; 2712 } 2713 2714 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */ 2715 /* Revision: 1.1.1.1 $ */ 2716 /* Date: 2011/03/12 22:58:18 $ */ 2717 2718 /* reduces a modulo n where n is of the form 2**p - d 2719 This differs from reduce_2k since "d" can be larger 2720 than a single digit. 2721 */ 2722 static int 2723 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) 2724 { 2725 mp_int q; 2726 int p, res; 2727 2728 if ((res = mp_init(&q)) != MP_OKAY) { 2729 return res; 2730 } 2731 2732 p = mp_count_bits(n); 2733 top: 2734 /* q = a/2**p, a = a mod 2**p */ 2735 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { 2736 goto ERR; 2737 } 2738 2739 /* q = q * d */ 2740 if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 2741 goto ERR; 2742 } 2743 2744 /* a = a + q */ 2745 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { 2746 goto ERR; 2747 } 2748 2749 if (mp_cmp_mag(a, n) != MP_LT) { 2750 s_mp_sub(a, n, a); 2751 goto top; 2752 } 2753 2754 ERR: 2755 mp_clear(&q); 2756 return res; 2757 } 2758 2759 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */ 2760 /* Revision: 1.1.1.1 $ */ 2761 /* Date: 2011/03/12 22:58:18 $ */ 2762 2763 /* squaring using Toom-Cook 3-way algorithm */ 2764 static int 2765 mp_toom_sqr(mp_int *a, mp_int *b) 2766 { 2767 mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; 2768 int res, B; 2769 2770 /* init temps */ 2771 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { 2772 return res; 2773 } 2774 2775 /* B */ 2776 B = a->used / 3; 2777 2778 /* a = a2 * B**2 + a1 * B + a0 */ 2779 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 2780 goto ERR; 2781 } 2782 2783 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 2784 goto ERR; 2785 } 2786 mp_rshd(&a1, B); 2787 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); 2788 2789 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 2790 goto ERR; 2791 } 2792 mp_rshd(&a2, B*2); 2793 2794 /* w0 = a0*a0 */ 2795 if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) { 2796 goto ERR; 2797 } 2798 2799 /* w4 = a2 * a2 */ 2800 if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) { 2801 goto ERR; 2802 } 2803 2804 /* w1 = (a2 + 2(a1 + 2a0))**2 */ 2805 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { 2806 goto ERR; 2807 } 2808 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2809 goto ERR; 2810 } 2811 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 2812 goto ERR; 2813 } 2814 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 2815 goto ERR; 2816 } 2817 2818 if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) { 2819 goto ERR; 2820 } 2821 2822 /* w3 = (a0 + 2(a1 + 2a2))**2 */ 2823 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { 2824 goto ERR; 2825 } 2826 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2827 goto ERR; 2828 } 2829 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 2830 goto ERR; 2831 } 2832 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2833 goto ERR; 2834 } 2835 2836 if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) { 2837 goto ERR; 2838 } 2839 2840 2841 /* w2 = (a2 + a1 + a0)**2 */ 2842 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { 2843 goto ERR; 2844 } 2845 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2846 goto ERR; 2847 } 2848 if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) { 2849 goto ERR; 2850 } 2851 2852 /* now solve the matrix 2853 2854 0 0 0 0 1 2855 1 2 4 8 16 2856 1 1 1 1 1 2857 16 8 4 2 1 2858 1 0 0 0 0 2859 2860 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. 2861 */ 2862 2863 /* r1 - r4 */ 2864 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { 2865 goto ERR; 2866 } 2867 /* r3 - r0 */ 2868 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { 2869 goto ERR; 2870 } 2871 /* r1/2 */ 2872 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { 2873 goto ERR; 2874 } 2875 /* r3/2 */ 2876 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { 2877 goto ERR; 2878 } 2879 /* r2 - r0 - r4 */ 2880 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { 2881 goto ERR; 2882 } 2883 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { 2884 goto ERR; 2885 } 2886 /* r1 - r2 */ 2887 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2888 goto ERR; 2889 } 2890 /* r3 - r2 */ 2891 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2892 goto ERR; 2893 } 2894 /* r1 - 8r0 */ 2895 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { 2896 goto ERR; 2897 } 2898 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { 2899 goto ERR; 2900 } 2901 /* r3 - 8r4 */ 2902 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { 2903 goto ERR; 2904 } 2905 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { 2906 goto ERR; 2907 } 2908 /* 3r2 - r1 - r3 */ 2909 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { 2910 goto ERR; 2911 } 2912 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { 2913 goto ERR; 2914 } 2915 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { 2916 goto ERR; 2917 } 2918 /* r1 - r2 */ 2919 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2920 goto ERR; 2921 } 2922 /* r3 - r2 */ 2923 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2924 goto ERR; 2925 } 2926 /* r1/3 */ 2927 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { 2928 goto ERR; 2929 } 2930 /* r3/3 */ 2931 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { 2932 goto ERR; 2933 } 2934 2935 /* at this point shift W[n] by B*n */ 2936 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { 2937 goto ERR; 2938 } 2939 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { 2940 goto ERR; 2941 } 2942 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { 2943 goto ERR; 2944 } 2945 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { 2946 goto ERR; 2947 } 2948 2949 if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) { 2950 goto ERR; 2951 } 2952 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2953 goto ERR; 2954 } 2955 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2956 goto ERR; 2957 } 2958 if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) { 2959 goto ERR; 2960 } 2961 2962 ERR: 2963 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); 2964 return res; 2965 } 2966 2967 2968 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */ 2969 /* Revision: 1.1.1.1 $ */ 2970 /* Date: 2011/03/12 22:58:18 $ */ 2971 2972 /* Karatsuba squaring, computes b = a*a using three 2973 * half size squarings 2974 * 2975 * See comments of karatsuba_mul for details. It 2976 * is essentially the same algorithm but merely 2977 * tuned to perform recursive squarings. 2978 */ 2979 static int mp_karatsuba_sqr (mp_int * a, mp_int * b) 2980 { 2981 mp_int x0, x1, t1, t2, x0x0, x1x1; 2982 int B, err; 2983 2984 err = MP_MEM; 2985 2986 /* min # of digits */ 2987 B = a->used; 2988 2989 /* now divide in two */ 2990 B = (unsigned)B >> 1; 2991 2992 /* init copy all the temps */ 2993 if (mp_init_size (&x0, B) != MP_OKAY) 2994 goto ERR; 2995 if (mp_init_size (&x1, a->used - B) != MP_OKAY) 2996 goto X0; 2997 2998 /* init temps */ 2999 if (mp_init_size (&t1, a->used * 2) != MP_OKAY) 3000 goto X1; 3001 if (mp_init_size (&t2, a->used * 2) != MP_OKAY) 3002 goto T1; 3003 if (mp_init_size (&x0x0, B * 2) != MP_OKAY) 3004 goto T2; 3005 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY) 3006 goto X0X0; 3007 3008 { 3009 int x; 3010 mp_digit *dst, *src; 3011 3012 src = a->dp; 3013 3014 /* now shift the digits */ 3015 dst = x0.dp; 3016 for (x = 0; x < B; x++) { 3017 *dst++ = *src++; 3018 } 3019 3020 dst = x1.dp; 3021 for (x = B; x < a->used; x++) { 3022 *dst++ = *src++; 3023 } 3024 } 3025 3026 x0.used = B; 3027 x1.used = a->used - B; 3028 3029 mp_clamp (&x0); 3030 3031 /* now calc the products x0*x0 and x1*x1 */ 3032 if (mp_sqr (&x0, &x0x0) != MP_OKAY) 3033 goto X1X1; /* x0x0 = x0*x0 */ 3034 if (mp_sqr (&x1, &x1x1) != MP_OKAY) 3035 goto X1X1; /* x1x1 = x1*x1 */ 3036 3037 /* now calc (x1+x0)**2 */ 3038 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) 3039 goto X1X1; /* t1 = x1 - x0 */ 3040 if (mp_sqr (&t1, &t1) != MP_OKAY) 3041 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ 3042 3043 /* add x0y0 */ 3044 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) 3045 goto X1X1; /* t2 = x0x0 + x1x1 */ 3046 if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY) 3047 goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ 3048 3049 /* shift by B */ 3050 if (mp_lshd (&t1, B) != MP_OKAY) 3051 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ 3052 if (mp_lshd (&x1x1, B * 2) != MP_OKAY) 3053 goto X1X1; /* x1x1 = x1x1 << 2*B */ 3054 3055 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY) 3056 goto X1X1; /* t1 = x0x0 + t1 */ 3057 if (mp_add (&t1, &x1x1, b) != MP_OKAY) 3058 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ 3059 3060 err = MP_OKAY; 3061 3062 X1X1:mp_clear (&x1x1); 3063 X0X0:mp_clear (&x0x0); 3064 T2:mp_clear (&t2); 3065 T1:mp_clear (&t1); 3066 X1:mp_clear (&x1); 3067 X0:mp_clear (&x0); 3068 ERR: 3069 return err; 3070 } 3071 3072 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */ 3073 /* Revision: 1.2 $ */ 3074 /* Date: 2011/03/12 23:43:54 $ */ 3075 3076 /* the jist of squaring... 3077 * you do like mult except the offset of the tmpx [one that 3078 * starts closer to zero] can't equal the offset of tmpy. 3079 * So basically you set up iy like before then you min it with 3080 * (ty-tx) so that it never happens. You double all those 3081 * you add in the inner loop 3082 3083 After that loop you do the squares and add them in. 3084 */ 3085 3086 static int fast_s_mp_sqr (mp_int * a, mp_int * b) 3087 { 3088 int olduse, res, pa, ix, iz; 3089 mp_digit W[MP_WARRAY], *tmpx; 3090 mp_word W1; 3091 3092 /* grow the destination as required */ 3093 pa = a->used + a->used; 3094 if (b->alloc < pa) { 3095 if ((res = mp_grow (b, pa)) != MP_OKAY) { 3096 return res; 3097 } 3098 } 3099 3100 /* number of output digits to produce */ 3101 W1 = 0; 3102 for (ix = 0; ix < pa; ix++) { 3103 int tx, ty, iy; 3104 mp_word _W; 3105 mp_digit *tmpy; 3106 3107 /* clear counter */ 3108 _W = 0; 3109 3110 /* get offsets into the two bignums */ 3111 ty = MIN(a->used-1, ix); 3112 tx = ix - ty; 3113 3114 /* setup temp aliases */ 3115 tmpx = a->dp + tx; 3116 tmpy = a->dp + ty; 3117 3118 /* this is the number of times the loop will iterrate, essentially 3119 while (tx++ < a->used && ty-- >= 0) { ... } 3120 */ 3121 iy = MIN(a->used-tx, ty+1); 3122 3123 /* now for squaring tx can never equal ty 3124 * we halve the distance since they approach at a rate of 2x 3125 * and we have to round because odd cases need to be executed 3126 */ 3127 iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1)); 3128 3129 /* execute loop */ 3130 for (iz = 0; iz < iy; iz++) { 3131 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 3132 } 3133 3134 /* double the inner product and add carry */ 3135 _W = _W + _W + W1; 3136 3137 /* even columns have the square term in them */ 3138 if ((ix&1) == 0) { 3139 _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]); 3140 } 3141 3142 /* store it */ 3143 W[ix] = (mp_digit)(_W & MP_MASK); 3144 3145 /* make next carry */ 3146 W1 = _W >> ((mp_word)DIGIT_BIT); 3147 } 3148 3149 /* setup dest */ 3150 olduse = b->used; 3151 b->used = a->used+a->used; 3152 3153 { 3154 mp_digit *tmpb; 3155 tmpb = b->dp; 3156 for (ix = 0; ix < pa; ix++) { 3157 *tmpb++ = W[ix] & MP_MASK; 3158 } 3159 3160 /* clear unused digits [that existed in the old copy of c] */ 3161 for (; ix < olduse; ix++) { 3162 *tmpb++ = 0; 3163 } 3164 } 3165 mp_clamp (b); 3166 return MP_OKAY; 3167 } 3168 3169 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */ 3170 /* Revision: 1.3 $ */ 3171 /* Date: 2011/03/18 16:43:04 $ */ 3172 3173 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ 3174 static int 3175 s_mp_sqr (mp_int * a, mp_int * b) 3176 { 3177 mp_int t; 3178 int res, ix, iy, pa; 3179 mp_word r; 3180 mp_digit u, tmpx, *tmpt; 3181 3182 pa = a->used; 3183 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { 3184 return res; 3185 } 3186 3187 /* default used is maximum possible size */ 3188 t.used = 2*pa + 1; 3189 3190 for (ix = 0; ix < pa; ix++) { 3191 /* first calculate the digit at 2*ix */ 3192 /* calculate double precision result */ 3193 r = ((mp_word) t.dp[2*ix]) + 3194 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); 3195 3196 /* store lower part in result */ 3197 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); 3198 3199 /* get the carry */ 3200 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3201 3202 /* left hand side of A[ix] * A[iy] */ 3203 tmpx = a->dp[ix]; 3204 3205 /* alias for where to store the results */ 3206 tmpt = t.dp + (2*ix + 1); 3207 3208 for (iy = ix + 1; iy < pa; iy++) { 3209 /* first calculate the product */ 3210 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); 3211 3212 /* now calculate the double precision result, note we use 3213 * addition instead of *2 since it's easier to optimize 3214 */ 3215 r = ((mp_word) *tmpt) + r + r + ((mp_word) u); 3216 3217 /* store lower part */ 3218 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3219 3220 /* get carry */ 3221 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3222 } 3223 /* propagate upwards */ 3224 while (u != ((mp_digit) 0)) { 3225 r = ((mp_word) *tmpt) + ((mp_word) u); 3226 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3227 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3228 } 3229 } 3230 3231 mp_clamp (&t); 3232 mp_exch (&t, b); 3233 mp_clear (&t); 3234 return MP_OKAY; 3235 } 3236 3237 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */ 3238 /* Revision: 1.1.1.1 $ */ 3239 /* Date: 2011/03/12 22:58:18 $ */ 3240 3241 #define TOOM_SQR_CUTOFF 400 3242 #define KARATSUBA_SQR_CUTOFF 120 3243 3244 /* computes b = a*a */ 3245 static int 3246 mp_sqr (mp_int * a, mp_int * b) 3247 { 3248 int res; 3249 3250 /* use Toom-Cook? */ 3251 if (a->used >= TOOM_SQR_CUTOFF) { 3252 res = mp_toom_sqr(a, b); 3253 /* Karatsuba? */ 3254 } else 3255 if (a->used >= KARATSUBA_SQR_CUTOFF) { 3256 res = mp_karatsuba_sqr (a, b); 3257 } else 3258 { 3259 /* can we use the fast comba multiplier? */ 3260 if (((unsigned)a->used * 2 + 1) < MP_WARRAY && 3261 a->used < 3262 (1 << (unsigned)(sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) { 3263 res = fast_s_mp_sqr (a, b); 3264 } else 3265 res = s_mp_sqr (a, b); 3266 } 3267 b->sign = MP_ZPOS; 3268 return res; 3269 } 3270 3271 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */ 3272 /* Revision: 1.3 $ */ 3273 /* Date: 2011/03/18 16:43:04 $ */ 3274 3275 #define TAB_SIZE 256 3276 3277 static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 3278 { 3279 mp_int M[TAB_SIZE], res, mu; 3280 mp_digit buf; 3281 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 3282 int (*redux)(mp_int*,mp_int*,mp_int*); 3283 3284 /* find window size */ 3285 x = mp_count_bits (X); 3286 if (x <= 7) { 3287 winsize = 2; 3288 } else if (x <= 36) { 3289 winsize = 3; 3290 } else if (x <= 140) { 3291 winsize = 4; 3292 } else if (x <= 450) { 3293 winsize = 5; 3294 } else if (x <= 1303) { 3295 winsize = 6; 3296 } else if (x <= 3529) { 3297 winsize = 7; 3298 } else { 3299 winsize = 8; 3300 } 3301 3302 /* init M array */ 3303 /* init first cell */ 3304 if ((err = mp_init(&M[1])) != MP_OKAY) { 3305 return err; 3306 } 3307 3308 /* now init the second half of the array */ 3309 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3310 if ((err = mp_init(&M[x])) != MP_OKAY) { 3311 for (y = 1<<(winsize-1); y < x; y++) { 3312 mp_clear (&M[y]); 3313 } 3314 mp_clear(&M[1]); 3315 return err; 3316 } 3317 } 3318 3319 /* create mu, used for Barrett reduction */ 3320 if ((err = mp_init (&mu)) != MP_OKAY) { 3321 goto LBL_M; 3322 } 3323 3324 if (redmode == 0) { 3325 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) { 3326 goto LBL_MU; 3327 } 3328 redux = mp_reduce; 3329 } else { 3330 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) { 3331 goto LBL_MU; 3332 } 3333 redux = mp_reduce_2k_l; 3334 } 3335 3336 /* create M table 3337 * 3338 * The M table contains powers of the base, 3339 * e.g. M[x] = G**x mod P 3340 * 3341 * The first half of the table is not 3342 * computed though accept for M[0] and M[1] 3343 */ 3344 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) { 3345 goto LBL_MU; 3346 } 3347 3348 /* compute the value at M[1<<(winsize-1)] by squaring 3349 * M[1] (winsize-1) times 3350 */ 3351 if ((err = mp_copy ( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 3352 goto LBL_MU; 3353 } 3354 3355 for (x = 0; x < (winsize - 1); x++) { 3356 /* square it */ 3357 if ((err = mp_sqr (&M[1 << (winsize - 1)], 3358 &M[1 << (winsize - 1)])) != MP_OKAY) { 3359 goto LBL_MU; 3360 } 3361 3362 /* reduce modulo P */ 3363 if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { 3364 goto LBL_MU; 3365 } 3366 } 3367 3368 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) 3369 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) 3370 */ 3371 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 3372 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 3373 goto LBL_MU; 3374 } 3375 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) { 3376 goto LBL_MU; 3377 } 3378 } 3379 3380 /* setup result */ 3381 if ((err = mp_init (&res)) != MP_OKAY) { 3382 goto LBL_MU; 3383 } 3384 mp_set (&res, 1); 3385 3386 /* set initial mode and bit cnt */ 3387 mode = 0; 3388 bitcnt = 1; 3389 buf = 0; 3390 digidx = X->used - 1; 3391 bitcpy = 0; 3392 bitbuf = 0; 3393 3394 for (;;) { 3395 /* grab next digit as required */ 3396 if (--bitcnt == 0) { 3397 /* if digidx == -1 we are out of digits */ 3398 if (digidx == -1) { 3399 break; 3400 } 3401 /* read next digit and reset the bitcnt */ 3402 buf = X->dp[digidx--]; 3403 bitcnt = (int) DIGIT_BIT; 3404 } 3405 3406 /* grab the next msb from the exponent */ 3407 y = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; 3408 buf <<= (mp_digit)1; 3409 3410 /* if the bit is zero and mode == 0 then we ignore it 3411 * These represent the leading zero bits before the first 1 bit 3412 * in the exponent. Technically this opt is not required but it 3413 * does lower the # of trivial squaring/reductions used 3414 */ 3415 if (mode == 0 && y == 0) { 3416 continue; 3417 } 3418 3419 /* if the bit is zero and mode == 1 then we square */ 3420 if (mode == 1 && y == 0) { 3421 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3422 goto LBL_RES; 3423 } 3424 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3425 goto LBL_RES; 3426 } 3427 continue; 3428 } 3429 3430 /* else we add it to the window */ 3431 bitbuf |= (y << (winsize - ++bitcpy)); 3432 mode = 2; 3433 3434 if (bitcpy == winsize) { 3435 /* ok window is filled so square as required and multiply */ 3436 /* square first */ 3437 for (x = 0; x < winsize; x++) { 3438 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3439 goto LBL_RES; 3440 } 3441 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3442 goto LBL_RES; 3443 } 3444 } 3445 3446 /* then multiply */ 3447 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 3448 goto LBL_RES; 3449 } 3450 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3451 goto LBL_RES; 3452 } 3453 3454 /* empty window and reset */ 3455 bitcpy = 0; 3456 bitbuf = 0; 3457 mode = 1; 3458 } 3459 } 3460 3461 /* if bits remain then square/multiply */ 3462 if (mode == 2 && bitcpy > 0) { 3463 /* square then multiply if the bit is set */ 3464 for (x = 0; x < bitcpy; x++) { 3465 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3466 goto LBL_RES; 3467 } 3468 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3469 goto LBL_RES; 3470 } 3471 3472 bitbuf <<= 1; 3473 if ((bitbuf & (1 << winsize)) != 0) { 3474 /* then multiply */ 3475 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 3476 goto LBL_RES; 3477 } 3478 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3479 goto LBL_RES; 3480 } 3481 } 3482 } 3483 } 3484 3485 mp_exch (&res, Y); 3486 err = MP_OKAY; 3487 LBL_RES:mp_clear (&res); 3488 LBL_MU:mp_clear (&mu); 3489 LBL_M: 3490 mp_clear(&M[1]); 3491 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3492 mp_clear (&M[x]); 3493 } 3494 return err; 3495 } 3496 3497 /* determines if a number is a valid DR modulus */ 3498 static int 3499 mp_dr_is_modulus(mp_int *a) 3500 { 3501 int ix; 3502 3503 /* must be at least two digits */ 3504 if (a->used < 2) { 3505 return 0; 3506 } 3507 3508 /* must be of the form b**k - a [a <= b] so all 3509 * but the first digit must be equal to -1 (mod b). 3510 */ 3511 for (ix = 1; ix < a->used; ix++) { 3512 if (a->dp[ix] != MP_MASK) { 3513 return 0; 3514 } 3515 } 3516 return 1; 3517 } 3518 3519 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */ 3520 /* Revision: 1.1.1.1 $ */ 3521 /* Date: 2011/03/12 22:58:18 $ */ 3522 3523 /* determines if mp_reduce_2k can be used */ 3524 static int mp_reduce_is_2k(mp_int *a) 3525 { 3526 int ix, iy, iw; 3527 mp_digit iz; 3528 3529 if (a->used == 0) { 3530 return MP_NO; 3531 } else if (a->used == 1) { 3532 return MP_YES; 3533 } else if (a->used > 1) { 3534 iy = mp_count_bits(a); 3535 iz = 1; 3536 iw = 1; 3537 3538 /* Test every bit from the second digit up, must be 1 */ 3539 for (ix = DIGIT_BIT; ix < iy; ix++) { 3540 if ((a->dp[iw] & iz) == 0) { 3541 return MP_NO; 3542 } 3543 iz <<= 1; 3544 if (iz > (mp_digit)MP_MASK) { 3545 ++iw; 3546 iz = 1; 3547 } 3548 } 3549 } 3550 return MP_YES; 3551 } 3552 3553 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */ 3554 /* Revision: 1.1.1.1 $ */ 3555 /* Date: 2011/03/12 22:58:18 $ */ 3556 3557 3558 /* d = a * b (mod c) */ 3559 static int 3560 mp_mulmod (mp_int *d, mp_int * a, mp_int * b, mp_int * c) 3561 { 3562 int res; 3563 mp_int t; 3564 3565 if ((res = mp_init (&t)) != MP_OKAY) { 3566 return res; 3567 } 3568 3569 if ((res = mp_mul (a, b, &t)) != MP_OKAY) { 3570 mp_clear (&t); 3571 return res; 3572 } 3573 res = mp_mod (&t, c, d); 3574 mp_clear (&t); 3575 return res; 3576 } 3577 3578 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */ 3579 /* Revision: 1.1.1.1 $ */ 3580 /* Date: 2011/03/12 22:58:18 $ */ 3581 3582 /* setups the montgomery reduction stuff */ 3583 static int 3584 mp_montgomery_setup (mp_int * n, mp_digit * rho) 3585 { 3586 mp_digit x, b; 3587 3588 /* fast inversion mod 2**k 3589 * 3590 * Based on the fact that 3591 * 3592 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) 3593 * => 2*X*A - X*X*A*A = 1 3594 * => 2*(1) - (1) = 1 3595 */ 3596 b = n->dp[0]; 3597 3598 if ((b & 1) == 0) { 3599 return MP_VAL; 3600 } 3601 3602 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ 3603 x *= 2 - b * x; /* here x*a==1 mod 2**8 */ 3604 x *= 2 - b * x; /* here x*a==1 mod 2**16 */ 3605 x *= 2 - b * x; /* here x*a==1 mod 2**32 */ 3606 if (/*CONSTCOND*/sizeof(mp_digit) == 8) { 3607 x *= 2 - b * x; /* here x*a==1 mod 2**64 */ 3608 } 3609 3610 /* rho = -1/m mod b */ 3611 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; 3612 3613 return MP_OKAY; 3614 } 3615 3616 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */ 3617 /* Revision: 1.1.1.1 $ */ 3618 /* Date: 2011/03/12 22:58:18 $ */ 3619 3620 /* computes xR**-1 == x (mod N) via Montgomery Reduction 3621 * 3622 * This is an optimized implementation of montgomery_reduce 3623 * which uses the comba method to quickly calculate the columns of the 3624 * reduction. 3625 * 3626 * Based on Algorithm 14.32 on pp.601 of HAC. 3627 */ 3628 static int 3629 fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) 3630 { 3631 int ix, res, olduse; 3632 /*LINTED*/ 3633 mp_word W[MP_WARRAY]; 3634 3635 /* get old used count */ 3636 olduse = x->used; 3637 3638 /* grow a as required */ 3639 if (x->alloc < n->used + 1) { 3640 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { 3641 return res; 3642 } 3643 } 3644 3645 /* first we have to get the digits of the input into 3646 * an array of double precision words W[...] 3647 */ 3648 { 3649 mp_word *_W; 3650 mp_digit *tmpx; 3651 3652 /* alias for the W[] array */ 3653 _W = W; 3654 3655 /* alias for the digits of x*/ 3656 tmpx = x->dp; 3657 3658 /* copy the digits of a into W[0..a->used-1] */ 3659 for (ix = 0; ix < x->used; ix++) { 3660 *_W++ = *tmpx++; 3661 } 3662 3663 /* zero the high words of W[a->used..m->used*2] */ 3664 for (; ix < n->used * 2 + 1; ix++) { 3665 *_W++ = 0; 3666 } 3667 } 3668 3669 /* now we proceed to zero successive digits 3670 * from the least significant upwards 3671 */ 3672 for (ix = 0; ix < n->used; ix++) { 3673 /* mu = ai * m' mod b 3674 * 3675 * We avoid a double precision multiplication (which isn't required) 3676 * by casting the value down to a mp_digit. Note this requires 3677 * that W[ix-1] have the carry cleared (see after the inner loop) 3678 */ 3679 mp_digit mu; 3680 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); 3681 3682 /* a = a + mu * m * b**i 3683 * 3684 * This is computed in place and on the fly. The multiplication 3685 * by b**i is handled by offseting which columns the results 3686 * are added to. 3687 * 3688 * Note the comba method normally doesn't handle carries in the 3689 * inner loop In this case we fix the carry from the previous 3690 * column since the Montgomery reduction requires digits of the 3691 * result (so far) [see above] to work. This is 3692 * handled by fixing up one carry after the inner loop. The 3693 * carry fixups are done in order so after these loops the 3694 * first m->used words of W[] have the carries fixed 3695 */ 3696 { 3697 int iy; 3698 mp_digit *tmpn; 3699 mp_word *_W; 3700 3701 /* alias for the digits of the modulus */ 3702 tmpn = n->dp; 3703 3704 /* Alias for the columns set by an offset of ix */ 3705 _W = W + ix; 3706 3707 /* inner loop */ 3708 for (iy = 0; iy < n->used; iy++) { 3709 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); 3710 } 3711 } 3712 3713 /* now fix carry for next digit, W[ix+1] */ 3714 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); 3715 } 3716 3717 /* now we have to propagate the carries and 3718 * shift the words downward [all those least 3719 * significant digits we zeroed]. 3720 */ 3721 { 3722 mp_digit *tmpx; 3723 mp_word *_W, *_W1; 3724 3725 /* nox fix rest of carries */ 3726 3727 /* alias for current word */ 3728 _W1 = W + ix; 3729 3730 /* alias for next word, where the carry goes */ 3731 _W = W + ++ix; 3732 3733 for (; ix <= n->used * 2 + 1; ix++) { 3734 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); 3735 } 3736 3737 /* copy out, A = A/b**n 3738 * 3739 * The result is A/b**n but instead of converting from an 3740 * array of mp_word to mp_digit than calling mp_rshd 3741 * we just copy them in the right order 3742 */ 3743 3744 /* alias for destination word */ 3745 tmpx = x->dp; 3746 3747 /* alias for shifted double precision result */ 3748 _W = W + n->used; 3749 3750 for (ix = 0; ix < n->used + 1; ix++) { 3751 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); 3752 } 3753 3754 /* zero oldused digits, if the input a was larger than 3755 * m->used+1 we'll have to clear the digits 3756 */ 3757 for (; ix < olduse; ix++) { 3758 *tmpx++ = 0; 3759 } 3760 } 3761 3762 /* set the max used and clamp */ 3763 x->used = n->used + 1; 3764 mp_clamp (x); 3765 3766 /* if A >= m then A = A - m */ 3767 if (mp_cmp_mag (x, n) != MP_LT) { 3768 return s_mp_sub (x, n, x); 3769 } 3770 return MP_OKAY; 3771 } 3772 3773 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */ 3774 /* Revision: 1.2 $ */ 3775 /* Date: 2011/03/18 16:22:09 $ */ 3776 3777 /* computes xR**-1 == x (mod N) via Montgomery Reduction */ 3778 static int 3779 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) 3780 { 3781 int ix, res, digs; 3782 mp_digit mu; 3783 3784 /* can the fast reduction [comba] method be used? 3785 * 3786 * Note that unlike in mul you're safely allowed *less* 3787 * than the available columns [255 per default] since carries 3788 * are fixed up in the inner loop. 3789 */ 3790 digs = n->used * 2 + 1; 3791 if (((unsigned)digs < MP_WARRAY) && 3792 n->used < 3793 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 3794 return fast_mp_montgomery_reduce (x, n, rho); 3795 } 3796 3797 /* grow the input as required */ 3798 if (x->alloc < digs) { 3799 if ((res = mp_grow (x, digs)) != MP_OKAY) { 3800 return res; 3801 } 3802 } 3803 x->used = digs; 3804 3805 for (ix = 0; ix < n->used; ix++) { 3806 /* mu = ai * rho mod b 3807 * 3808 * The value of rho must be precalculated via 3809 * montgomery_setup() such that 3810 * it equals -1/n0 mod b this allows the 3811 * following inner loop to reduce the 3812 * input one digit at a time 3813 */ 3814 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); 3815 3816 /* a = a + mu * m * b**i */ 3817 { 3818 int iy; 3819 mp_digit *tmpn, *tmpx, u; 3820 mp_word r; 3821 3822 /* alias for digits of the modulus */ 3823 tmpn = n->dp; 3824 3825 /* alias for the digits of x [the input] */ 3826 tmpx = x->dp + ix; 3827 3828 /* set the carry to zero */ 3829 u = 0; 3830 3831 /* Multiply and add in place */ 3832 for (iy = 0; iy < n->used; iy++) { 3833 /* compute product and sum */ 3834 r = ((mp_word)mu) * ((mp_word)*tmpn++) + 3835 ((mp_word) u) + ((mp_word) * tmpx); 3836 3837 /* get carry */ 3838 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3839 3840 /* fix digit */ 3841 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); 3842 } 3843 /* At this point the ix'th digit of x should be zero */ 3844 3845 3846 /* propagate carries upwards as required*/ 3847 while (u) { 3848 *tmpx += u; 3849 u = *tmpx >> DIGIT_BIT; 3850 *tmpx++ &= MP_MASK; 3851 } 3852 } 3853 } 3854 3855 /* at this point the n.used'th least 3856 * significant digits of x are all zero 3857 * which means we can shift x to the 3858 * right by n.used digits and the 3859 * residue is unchanged. 3860 */ 3861 3862 /* x = x/b**n.used */ 3863 mp_clamp(x); 3864 mp_rshd (x, n->used); 3865 3866 /* if x >= n then x = x - n */ 3867 if (mp_cmp_mag (x, n) != MP_LT) { 3868 return s_mp_sub (x, n, x); 3869 } 3870 3871 return MP_OKAY; 3872 } 3873 3874 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */ 3875 /* Revision: 1.3 $ */ 3876 /* Date: 2011/03/18 16:43:04 $ */ 3877 3878 /* determines the setup value */ 3879 static void 3880 mp_dr_setup(mp_int *a, mp_digit *d) 3881 { 3882 /* the casts are required if DIGIT_BIT is one less than 3883 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31] 3884 */ 3885 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 3886 ((mp_word)a->dp[0])); 3887 } 3888 3889 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */ 3890 /* Revision: 1.1.1.1 $ */ 3891 /* Date: 2011/03/12 22:58:18 $ */ 3892 3893 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm. 3894 * 3895 * Based on algorithm from the paper 3896 * 3897 * "Generating Efficient Primes for Discrete Log Cryptosystems" 3898 * Chae Hoon Lim, Pil Joong Lee, 3899 * POSTECH Information Research Laboratories 3900 * 3901 * The modulus must be of a special format [see manual] 3902 * 3903 * Has been modified to use algorithm 7.10 from the LTM book instead 3904 * 3905 * Input x must be in the range 0 <= x <= (n-1)**2 3906 */ 3907 static int 3908 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) 3909 { 3910 int err, i, m; 3911 mp_word r; 3912 mp_digit mu, *tmpx1, *tmpx2; 3913 3914 /* m = digits in modulus */ 3915 m = n->used; 3916 3917 /* ensure that "x" has at least 2m digits */ 3918 if (x->alloc < m + m) { 3919 if ((err = mp_grow (x, m + m)) != MP_OKAY) { 3920 return err; 3921 } 3922 } 3923 3924 /* top of loop, this is where the code resumes if 3925 * another reduction pass is required. 3926 */ 3927 top: 3928 /* aliases for digits */ 3929 /* alias for lower half of x */ 3930 tmpx1 = x->dp; 3931 3932 /* alias for upper half of x, or x/B**m */ 3933 tmpx2 = x->dp + m; 3934 3935 /* set carry to zero */ 3936 mu = 0; 3937 3938 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ 3939 for (i = 0; i < m; i++) { 3940 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; 3941 *tmpx1++ = (mp_digit)(r & MP_MASK); 3942 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); 3943 } 3944 3945 /* set final carry */ 3946 *tmpx1++ = mu; 3947 3948 /* zero words above m */ 3949 for (i = m + 1; i < x->used; i++) { 3950 *tmpx1++ = 0; 3951 } 3952 3953 /* clamp, sub and return */ 3954 mp_clamp (x); 3955 3956 /* if x >= n then subtract and reduce again 3957 * Each successive "recursion" makes the input smaller and smaller. 3958 */ 3959 if (mp_cmp_mag (x, n) != MP_LT) { 3960 s_mp_sub(x, n, x); 3961 goto top; 3962 } 3963 return MP_OKAY; 3964 } 3965 3966 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */ 3967 /* Revision: 1.1.1.1 $ */ 3968 /* Date: 2011/03/12 22:58:18 $ */ 3969 3970 /* determines the setup value */ 3971 static int 3972 mp_reduce_2k_setup(mp_int *a, mp_digit *d) 3973 { 3974 int res, p; 3975 mp_int tmp; 3976 3977 if ((res = mp_init(&tmp)) != MP_OKAY) { 3978 return res; 3979 } 3980 3981 p = mp_count_bits(a); 3982 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { 3983 mp_clear(&tmp); 3984 return res; 3985 } 3986 3987 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) { 3988 mp_clear(&tmp); 3989 return res; 3990 } 3991 3992 *d = tmp.dp[0]; 3993 mp_clear(&tmp); 3994 return MP_OKAY; 3995 } 3996 3997 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */ 3998 /* Revision: 1.1.1.1 $ */ 3999 /* Date: 2011/03/12 22:58:18 $ */ 4000 4001 /* reduces a modulo n where n is of the form 2**p - d */ 4002 static int 4003 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) 4004 { 4005 mp_int q; 4006 int p, res; 4007 4008 if ((res = mp_init(&q)) != MP_OKAY) { 4009 return res; 4010 } 4011 4012 p = mp_count_bits(n); 4013 top: 4014 /* q = a/2**p, a = a mod 2**p */ 4015 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { 4016 goto ERR; 4017 } 4018 4019 if (d != 1) { 4020 /* q = q * d */ 4021 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { 4022 goto ERR; 4023 } 4024 } 4025 4026 /* a = a + q */ 4027 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { 4028 goto ERR; 4029 } 4030 4031 if (mp_cmp_mag(a, n) != MP_LT) { 4032 s_mp_sub(a, n, a); 4033 goto top; 4034 } 4035 4036 ERR: 4037 mp_clear(&q); 4038 return res; 4039 } 4040 4041 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */ 4042 /* Revision: 1.1.1.1 $ */ 4043 /* Date: 2011/03/12 22:58:18 $ */ 4044 4045 /* 4046 * shifts with subtractions when the result is greater than b. 4047 * 4048 * The method is slightly modified to shift B unconditionally upto just under 4049 * the leading bit of b. This saves alot of multiple precision shifting. 4050 */ 4051 static int 4052 mp_montgomery_calc_normalization (mp_int * a, mp_int * b) 4053 { 4054 int x, bits, res; 4055 4056 /* how many bits of last digit does b use */ 4057 bits = mp_count_bits (b) % DIGIT_BIT; 4058 4059 if (b->used > 1) { 4060 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { 4061 return res; 4062 } 4063 } else { 4064 mp_set(a, 1); 4065 bits = 1; 4066 } 4067 4068 4069 /* now compute C = A * B mod b */ 4070 for (x = bits - 1; x < (int)DIGIT_BIT; x++) { 4071 if ((res = mp_mul_2 (a, a)) != MP_OKAY) { 4072 return res; 4073 } 4074 if (mp_cmp_mag (a, b) != MP_LT) { 4075 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) { 4076 return res; 4077 } 4078 } 4079 } 4080 4081 return MP_OKAY; 4082 } 4083 4084 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */ 4085 /* Revision: 1.1.1.1 $ */ 4086 /* Date: 2011/03/12 22:58:18 $ */ 4087 4088 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 4089 * 4090 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. 4091 * The value of k changes based on the size of the exponent. 4092 * 4093 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] 4094 */ 4095 4096 #define TAB_SIZE 256 4097 4098 static int 4099 mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 4100 { 4101 mp_int M[TAB_SIZE], res; 4102 mp_digit buf, mp; 4103 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 4104 4105 /* use a pointer to the reduction algorithm. This allows us to use 4106 * one of many reduction algorithms without modding the guts of 4107 * the code with if statements everywhere. 4108 */ 4109 int (*redux)(mp_int*,mp_int*,mp_digit); 4110 4111 /* find window size */ 4112 x = mp_count_bits (X); 4113 if (x <= 7) { 4114 winsize = 2; 4115 } else if (x <= 36) { 4116 winsize = 3; 4117 } else if (x <= 140) { 4118 winsize = 4; 4119 } else if (x <= 450) { 4120 winsize = 5; 4121 } else if (x <= 1303) { 4122 winsize = 6; 4123 } else if (x <= 3529) { 4124 winsize = 7; 4125 } else { 4126 winsize = 8; 4127 } 4128 4129 /* init M array */ 4130 /* init first cell */ 4131 if ((err = mp_init(&M[1])) != MP_OKAY) { 4132 return err; 4133 } 4134 4135 /* now init the second half of the array */ 4136 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4137 if ((err = mp_init(&M[x])) != MP_OKAY) { 4138 for (y = 1<<(winsize-1); y < x; y++) { 4139 mp_clear (&M[y]); 4140 } 4141 mp_clear(&M[1]); 4142 return err; 4143 } 4144 } 4145 4146 /* determine and setup reduction code */ 4147 if (redmode == 0) { 4148 /* now setup montgomery */ 4149 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { 4150 goto LBL_M; 4151 } 4152 4153 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ 4154 if (((unsigned)(P->used * 2 + 1) < MP_WARRAY) && 4155 P->used < (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 4156 redux = fast_mp_montgomery_reduce; 4157 } else 4158 { 4159 /* use slower baseline Montgomery method */ 4160 redux = mp_montgomery_reduce; 4161 } 4162 } else if (redmode == 1) { 4163 /* setup DR reduction for moduli of the form B**k - b */ 4164 mp_dr_setup(P, &mp); 4165 redux = mp_dr_reduce; 4166 } else { 4167 /* setup DR reduction for moduli of the form 2**k - b */ 4168 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { 4169 goto LBL_M; 4170 } 4171 redux = mp_reduce_2k; 4172 } 4173 4174 /* setup result */ 4175 if ((err = mp_init (&res)) != MP_OKAY) { 4176 goto LBL_M; 4177 } 4178 4179 /* create M table 4180 * 4181 4182 * 4183 * The first half of the table is not computed though accept for M[0] and M[1] 4184 */ 4185 4186 if (redmode == 0) { 4187 /* now we need R mod m */ 4188 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { 4189 goto LBL_RES; 4190 } 4191 4192 /* now set M[1] to G * R mod m */ 4193 if ((err = mp_mulmod (&M[1], G, &res, P)) != MP_OKAY) { 4194 goto LBL_RES; 4195 } 4196 } else { 4197 mp_set(&res, 1); 4198 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { 4199 goto LBL_RES; 4200 } 4201 } 4202 4203 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ 4204 if ((err = mp_copy ( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 4205 goto LBL_RES; 4206 } 4207 4208 for (x = 0; x < (winsize - 1); x++) { 4209 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { 4210 goto LBL_RES; 4211 } 4212 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { 4213 goto LBL_RES; 4214 } 4215 } 4216 4217 /* create upper table */ 4218 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 4219 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 4220 goto LBL_RES; 4221 } 4222 if ((err = redux (&M[x], P, mp)) != MP_OKAY) { 4223 goto LBL_RES; 4224 } 4225 } 4226 4227 /* set initial mode and bit cnt */ 4228 mode = 0; 4229 bitcnt = 1; 4230 buf = 0; 4231 digidx = X->used - 1; 4232 bitcpy = 0; 4233 bitbuf = 0; 4234 4235 for (;;) { 4236 /* grab next digit as required */ 4237 if (--bitcnt == 0) { 4238 /* if digidx == -1 we are out of digits so break */ 4239 if (digidx == -1) { 4240 break; 4241 } 4242 /* read next digit and reset bitcnt */ 4243 buf = X->dp[digidx--]; 4244 bitcnt = (int)DIGIT_BIT; 4245 } 4246 4247 /* grab the next msb from the exponent */ 4248 y = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1; 4249 buf <<= (mp_digit)1; 4250 4251 /* if the bit is zero and mode == 0 then we ignore it 4252 * These represent the leading zero bits before the first 1 bit 4253 * in the exponent. Technically this opt is not required but it 4254 * does lower the # of trivial squaring/reductions used 4255 */ 4256 if (mode == 0 && y == 0) { 4257 continue; 4258 } 4259 4260 /* if the bit is zero and mode == 1 then we square */ 4261 if (mode == 1 && y == 0) { 4262 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 4263 goto LBL_RES; 4264 } 4265 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4266 goto LBL_RES; 4267 } 4268 continue; 4269 } 4270 4271 /* else we add it to the window */ 4272 bitbuf |= (y << (winsize - ++bitcpy)); 4273 mode = 2; 4274 4275 if (bitcpy == winsize) { 4276 /* ok window is filled so square as required and multiply */ 4277 /* square first */ 4278 for (x = 0; x < winsize; x++) { 4279 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 4280 goto LBL_RES; 4281 } 4282 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4283 goto LBL_RES; 4284 } 4285 } 4286 4287 /* then multiply */ 4288 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 4289 goto LBL_RES; 4290 } 4291 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4292 goto LBL_RES; 4293 } 4294 4295 /* empty window and reset */ 4296 bitcpy = 0; 4297 bitbuf = 0; 4298 mode = 1; 4299 } 4300 } 4301 4302 /* if bits remain then square/multiply */ 4303 if (mode == 2 && bitcpy > 0) { 4304 /* square then multiply if the bit is set */ 4305 for (x = 0; x < bitcpy; x++) { 4306 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 4307 goto LBL_RES; 4308 } 4309 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4310 goto LBL_RES; 4311 } 4312 4313 /* get next bit of the window */ 4314 bitbuf <<= 1; 4315 if ((bitbuf & (1 << winsize)) != 0) { 4316 /* then multiply */ 4317 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 4318 goto LBL_RES; 4319 } 4320 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4321 goto LBL_RES; 4322 } 4323 } 4324 } 4325 } 4326 4327 if (redmode == 0) { 4328 /* fixup result if Montgomery reduction is used 4329 * recall that any value in a Montgomery system is 4330 * actually multiplied by R mod n. So we have 4331 * to reduce one more time to cancel out the factor 4332 * of R. 4333 */ 4334 if ((err = redux(&res, P, mp)) != MP_OKAY) { 4335 goto LBL_RES; 4336 } 4337 } 4338 4339 /* swap res with Y */ 4340 mp_exch (&res, Y); 4341 err = MP_OKAY; 4342 LBL_RES:mp_clear (&res); 4343 LBL_M: 4344 mp_clear(&M[1]); 4345 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4346 mp_clear (&M[x]); 4347 } 4348 return err; 4349 } 4350 4351 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_exptmod_fast.c,v $ */ 4352 /* Revision: 1.4 $ */ 4353 /* Date: 2011/03/18 16:43:04 $ */ 4354 4355 /* this is a shell function that calls either the normal or Montgomery 4356 * exptmod functions. Originally the call to the montgomery code was 4357 * embedded in the normal function but that wasted alot of stack space 4358 * for nothing (since 99% of the time the Montgomery code would be called) 4359 */ 4360 static int 4361 mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int *Y) 4362 { 4363 int dr; 4364 4365 /* modulus P must be positive */ 4366 if (P->sign == MP_NEG) { 4367 return MP_VAL; 4368 } 4369 4370 /* if exponent X is negative we have to recurse */ 4371 if (X->sign == MP_NEG) { 4372 mp_int tmpG, tmpX; 4373 int err; 4374 4375 /* first compute 1/G mod P */ 4376 if ((err = mp_init(&tmpG)) != MP_OKAY) { 4377 return err; 4378 } 4379 if ((err = mp_invmod(&tmpG, G, P)) != MP_OKAY) { 4380 mp_clear(&tmpG); 4381 return err; 4382 } 4383 4384 /* now get |X| */ 4385 if ((err = mp_init(&tmpX)) != MP_OKAY) { 4386 mp_clear(&tmpG); 4387 return err; 4388 } 4389 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) { 4390 mp_clear_multi(&tmpG, &tmpX, NULL); 4391 return err; 4392 } 4393 4394 /* and now compute (1/G)**|X| instead of G**X [X < 0] */ 4395 err = mp_exptmod(&tmpG, &tmpX, P, Y); 4396 mp_clear_multi(&tmpG, &tmpX, NULL); 4397 return err; 4398 } 4399 4400 /* modified diminished radix reduction */ 4401 if (mp_reduce_is_2k_l(P) == MP_YES) { 4402 return s_mp_exptmod(G, X, P, Y, 1); 4403 } 4404 4405 /* is it a DR modulus? */ 4406 dr = mp_dr_is_modulus(P); 4407 4408 /* if not, is it a unrestricted DR modulus? */ 4409 if (dr == 0) { 4410 dr = mp_reduce_is_2k(P) << 1; 4411 } 4412 4413 /* if the modulus is odd or dr != 0 use the montgomery method */ 4414 if (BN_is_odd (P) == 1 || dr != 0) { 4415 return mp_exptmod_fast (G, X, P, Y, dr); 4416 } else { 4417 /* otherwise use the generic Barrett reduction technique */ 4418 return s_mp_exptmod (G, X, P, Y, 0); 4419 } 4420 } 4421 4422 /* reverse an array, used for radix code */ 4423 static void 4424 bn_reverse(unsigned char *s, int len) 4425 { 4426 int ix, iy; 4427 unsigned char t; 4428 4429 ix = 0; 4430 iy = len - 1; 4431 while (ix < iy) { 4432 t = s[ix]; 4433 s[ix] = s[iy]; 4434 s[iy] = t; 4435 ++ix; 4436 --iy; 4437 } 4438 } 4439 4440 static int 4441 s_is_power_of_two(mp_digit b, int *p) 4442 { 4443 int x; 4444 4445 /* fast return if no power of two */ 4446 if ((b==0) || (b & (b-1))) { 4447 return 0; 4448 } 4449 4450 for (x = 0; x < DIGIT_BIT; x++) { 4451 if (b == (((mp_digit)1)<<x)) { 4452 *p = x; 4453 return 1; 4454 } 4455 } 4456 return 0; 4457 } 4458 4459 /* single digit division (based on routine from MPI) */ 4460 static int 4461 mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d) 4462 { 4463 mp_int q; 4464 mp_word w; 4465 mp_digit t; 4466 int res, ix; 4467 4468 /* cannot divide by zero */ 4469 if (b == 0) { 4470 return MP_VAL; 4471 } 4472 4473 /* quick outs */ 4474 if (b == 1 || mp_iszero(a) == 1) { 4475 if (d != NULL) { 4476 *d = 0; 4477 } 4478 if (c != NULL) { 4479 return mp_copy(a, c); 4480 } 4481 return MP_OKAY; 4482 } 4483 4484 /* power of two ? */ 4485 if (s_is_power_of_two(b, &ix) == 1) { 4486 if (d != NULL) { 4487 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1); 4488 } 4489 if (c != NULL) { 4490 return mp_div_2d(a, ix, c, NULL); 4491 } 4492 return MP_OKAY; 4493 } 4494 4495 #ifdef BN_MP_DIV_3_C 4496 /* three? */ 4497 if (b == 3) { 4498 return mp_div_3(a, c, d); 4499 } 4500 #endif 4501 4502 /* no easy answer [c'est la vie]. Just division */ 4503 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 4504 return res; 4505 } 4506 4507 q.used = a->used; 4508 q.sign = a->sign; 4509 w = 0; 4510 for (ix = a->used - 1; ix >= 0; ix--) { 4511 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 4512 4513 if (w >= b) { 4514 t = (mp_digit)(w / b); 4515 w -= ((mp_word)t) * ((mp_word)b); 4516 } else { 4517 t = 0; 4518 } 4519 q.dp[ix] = (mp_digit)t; 4520 } 4521 4522 if (d != NULL) { 4523 *d = (mp_digit)w; 4524 } 4525 4526 if (c != NULL) { 4527 mp_clamp(&q); 4528 mp_exch(&q, c); 4529 } 4530 mp_clear(&q); 4531 4532 return res; 4533 } 4534 4535 static int 4536 mp_mod_d(mp_int *a, mp_digit b, mp_digit *c) 4537 { 4538 return mp_div_d(a, b, NULL, c); 4539 } 4540 4541 static const mp_digit ltm_prime_tab[] = { 4542 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 4543 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, 4544 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, 4545 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 4546 #ifndef MP_8BIT 4547 0x0083, 4548 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, 4549 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, 4550 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, 4551 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, 4552 4553 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, 4554 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, 4555 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, 4556 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, 4557 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, 4558 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, 4559 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, 4560 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, 4561 4562 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, 4563 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, 4564 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, 4565 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, 4566 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, 4567 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, 4568 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, 4569 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, 4570 4571 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, 4572 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, 4573 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, 4574 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, 4575 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, 4576 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, 4577 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, 4578 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 4579 #endif 4580 }; 4581 4582 #define PRIME_SIZE __arraycount(ltm_prime_tab) 4583 4584 static int 4585 mp_prime_is_divisible(mp_int *a, int *result) 4586 { 4587 int err, ix; 4588 mp_digit res; 4589 4590 /* default to not */ 4591 *result = MP_NO; 4592 4593 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4594 /* what is a mod LBL_prime_tab[ix] */ 4595 if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) { 4596 return err; 4597 } 4598 4599 /* is the residue zero? */ 4600 if (res == 0) { 4601 *result = MP_YES; 4602 return MP_OKAY; 4603 } 4604 } 4605 4606 return MP_OKAY; 4607 } 4608 4609 /* single digit addition */ 4610 static int 4611 mp_add_d(mp_int *a, mp_digit b, mp_int *c) 4612 { 4613 int res, ix, oldused; 4614 mp_digit *tmpa, *tmpc, mu; 4615 4616 /* grow c as required */ 4617 if (c->alloc < a->used + 1) { 4618 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4619 return res; 4620 } 4621 } 4622 4623 /* if a is negative and |a| >= b, call c = |a| - b */ 4624 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { 4625 /* temporarily fix sign of a */ 4626 a->sign = MP_ZPOS; 4627 4628 /* c = |a| - b */ 4629 res = mp_sub_d(a, b, c); 4630 4631 /* fix sign */ 4632 a->sign = c->sign = MP_NEG; 4633 4634 /* clamp */ 4635 mp_clamp(c); 4636 4637 return res; 4638 } 4639 4640 /* old number of used digits in c */ 4641 oldused = c->used; 4642 4643 /* sign always positive */ 4644 c->sign = MP_ZPOS; 4645 4646 /* source alias */ 4647 tmpa = a->dp; 4648 4649 /* destination alias */ 4650 tmpc = c->dp; 4651 4652 /* if a is positive */ 4653 if (a->sign == MP_ZPOS) { 4654 /* add digit, after this we're propagating 4655 * the carry. 4656 */ 4657 *tmpc = *tmpa++ + b; 4658 mu = *tmpc >> DIGIT_BIT; 4659 *tmpc++ &= MP_MASK; 4660 4661 /* now handle rest of the digits */ 4662 for (ix = 1; ix < a->used; ix++) { 4663 *tmpc = *tmpa++ + mu; 4664 mu = *tmpc >> DIGIT_BIT; 4665 *tmpc++ &= MP_MASK; 4666 } 4667 /* set final carry */ 4668 ix++; 4669 *tmpc++ = mu; 4670 4671 /* setup size */ 4672 c->used = a->used + 1; 4673 } else { 4674 /* a was negative and |a| < b */ 4675 c->used = 1; 4676 4677 /* the result is a single digit */ 4678 if (a->used == 1) { 4679 *tmpc++ = b - a->dp[0]; 4680 } else { 4681 *tmpc++ = b; 4682 } 4683 4684 /* setup count so the clearing of oldused 4685 * can fall through correctly 4686 */ 4687 ix = 1; 4688 } 4689 4690 /* now zero to oldused */ 4691 while (ix++ < oldused) { 4692 *tmpc++ = 0; 4693 } 4694 mp_clamp(c); 4695 4696 return MP_OKAY; 4697 } 4698 4699 /* single digit subtraction */ 4700 static int 4701 mp_sub_d(mp_int *a, mp_digit b, mp_int *c) 4702 { 4703 mp_digit *tmpa, *tmpc, mu; 4704 int res, ix, oldused; 4705 4706 /* grow c as required */ 4707 if (c->alloc < a->used + 1) { 4708 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4709 return res; 4710 } 4711 } 4712 4713 /* if a is negative just do an unsigned 4714 * addition [with fudged signs] 4715 */ 4716 if (a->sign == MP_NEG) { 4717 a->sign = MP_ZPOS; 4718 res = mp_add_d(a, b, c); 4719 a->sign = c->sign = MP_NEG; 4720 4721 /* clamp */ 4722 mp_clamp(c); 4723 4724 return res; 4725 } 4726 4727 /* setup regs */ 4728 oldused = c->used; 4729 tmpa = a->dp; 4730 tmpc = c->dp; 4731 4732 /* if a <= b simply fix the single digit */ 4733 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { 4734 if (a->used == 1) { 4735 *tmpc++ = b - *tmpa; 4736 } else { 4737 *tmpc++ = b; 4738 } 4739 ix = 1; 4740 4741 /* negative/1digit */ 4742 c->sign = MP_NEG; 4743 c->used = 1; 4744 } else { 4745 /* positive/size */ 4746 c->sign = MP_ZPOS; 4747 c->used = a->used; 4748 4749 /* subtract first digit */ 4750 *tmpc = *tmpa++ - b; 4751 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4752 *tmpc++ &= MP_MASK; 4753 4754 /* handle rest of the digits */ 4755 for (ix = 1; ix < a->used; ix++) { 4756 *tmpc = *tmpa++ - mu; 4757 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4758 *tmpc++ &= MP_MASK; 4759 } 4760 } 4761 4762 /* zero excess digits */ 4763 while (ix++ < oldused) { 4764 *tmpc++ = 0; 4765 } 4766 mp_clamp(c); 4767 return MP_OKAY; 4768 } 4769 4770 static const int lnz[16] = { 4771 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 4772 }; 4773 4774 /* Counts the number of lsbs which are zero before the first zero bit */ 4775 static int 4776 mp_cnt_lsb(mp_int *a) 4777 { 4778 int x; 4779 mp_digit q, qq; 4780 4781 /* easy out */ 4782 if (mp_iszero(a) == 1) { 4783 return 0; 4784 } 4785 4786 /* scan lower digits until non-zero */ 4787 for (x = 0; x < a->used && a->dp[x] == 0; x++); 4788 q = a->dp[x]; 4789 x *= DIGIT_BIT; 4790 4791 /* now scan this digit until a 1 is found */ 4792 if ((q & 1) == 0) { 4793 do { 4794 qq = q & 15; 4795 /* LINTED previous op ensures range of qq */ 4796 x += lnz[qq]; 4797 q >>= 4; 4798 } while (qq == 0); 4799 } 4800 return x; 4801 } 4802 4803 /* c = a * a (mod b) */ 4804 static int 4805 mp_sqrmod(mp_int *a, mp_int *b, mp_int *c) 4806 { 4807 int res; 4808 mp_int t; 4809 4810 if ((res = mp_init (&t)) != MP_OKAY) { 4811 return res; 4812 } 4813 4814 if ((res = mp_sqr (a, &t)) != MP_OKAY) { 4815 mp_clear (&t); 4816 return res; 4817 } 4818 res = mp_mod (&t, b, c); 4819 mp_clear (&t); 4820 return res; 4821 } 4822 static int 4823 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result) 4824 { 4825 mp_int n1, y, r; 4826 int s, j, err; 4827 4828 /* default */ 4829 *result = MP_NO; 4830 4831 /* ensure b > 1 */ 4832 if (mp_cmp_d(b, 1) != MP_GT) { 4833 return MP_VAL; 4834 } 4835 4836 /* get n1 = a - 1 */ 4837 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) { 4838 return err; 4839 } 4840 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) { 4841 goto LBL_N1; 4842 } 4843 4844 /* set 2**s * r = n1 */ 4845 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) { 4846 goto LBL_N1; 4847 } 4848 4849 /* count the number of least significant bits 4850 * which are zero 4851 */ 4852 s = mp_cnt_lsb(&r); 4853 4854 /* now divide n - 1 by 2**s */ 4855 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) { 4856 goto LBL_R; 4857 } 4858 4859 /* compute y = b**r mod a */ 4860 if ((err = mp_init (&y)) != MP_OKAY) { 4861 goto LBL_R; 4862 } 4863 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) { 4864 goto LBL_Y; 4865 } 4866 4867 /* if y != 1 and y != n1 do */ 4868 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) { 4869 j = 1; 4870 /* while j <= s-1 and y != n1 */ 4871 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) { 4872 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) { 4873 goto LBL_Y; 4874 } 4875 4876 /* if y == 1 then composite */ 4877 if (mp_cmp_d (&y, 1) == MP_EQ) { 4878 goto LBL_Y; 4879 } 4880 4881 ++j; 4882 } 4883 4884 /* if y != n1 then composite */ 4885 if (mp_cmp (&y, &n1) != MP_EQ) { 4886 goto LBL_Y; 4887 } 4888 } 4889 4890 /* probably prime now */ 4891 *result = MP_YES; 4892 LBL_Y:mp_clear (&y); 4893 LBL_R:mp_clear (&r); 4894 LBL_N1:mp_clear (&n1); 4895 return err; 4896 } 4897 4898 /* performs a variable number of rounds of Miller-Rabin 4899 * 4900 * Probability of error after t rounds is no more than 4901 4902 * 4903 * Sets result to 1 if probably prime, 0 otherwise 4904 */ 4905 static int 4906 mp_prime_is_prime(mp_int *a, int t, int *result) 4907 { 4908 mp_int b; 4909 int ix, err, res; 4910 4911 /* default to no */ 4912 *result = MP_NO; 4913 4914 /* valid value of t? */ 4915 if (t <= 0 || t > (int)PRIME_SIZE) { 4916 return MP_VAL; 4917 } 4918 4919 /* is the input equal to one of the primes in the table? */ 4920 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4921 if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) { 4922 *result = 1; 4923 return MP_OKAY; 4924 } 4925 } 4926 4927 /* first perform trial division */ 4928 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) { 4929 return err; 4930 } 4931 4932 /* return if it was trivially divisible */ 4933 if (res == MP_YES) { 4934 return MP_OKAY; 4935 } 4936 4937 /* now perform the miller-rabin rounds */ 4938 if ((err = mp_init (&b)) != MP_OKAY) { 4939 return err; 4940 } 4941 4942 for (ix = 0; ix < t; ix++) { 4943 /* set the prime */ 4944 mp_set (&b, ltm_prime_tab[ix]); 4945 4946 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) { 4947 goto LBL_B; 4948 } 4949 4950 if (res == MP_NO) { 4951 goto LBL_B; 4952 } 4953 } 4954 4955 /* passed the test */ 4956 *result = MP_YES; 4957 LBL_B:mp_clear (&b); 4958 return err; 4959 } 4960 4961 /* returns size of ASCII reprensentation */ 4962 static int 4963 mp_radix_size (mp_int *a, int radix, int *size) 4964 { 4965 int res, digs; 4966 mp_int t; 4967 mp_digit d; 4968 4969 *size = 0; 4970 4971 /* special case for binary */ 4972 if (radix == 2) { 4973 *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1; 4974 return MP_OKAY; 4975 } 4976 4977 /* make sure the radix is in range */ 4978 if (radix < 2 || radix > 64) { 4979 return MP_VAL; 4980 } 4981 4982 if (mp_iszero(a) == MP_YES) { 4983 *size = 2; 4984 return MP_OKAY; 4985 } 4986 4987 /* digs is the digit count */ 4988 digs = 0; 4989 4990 /* if it's negative add one for the sign */ 4991 if (a->sign == MP_NEG) { 4992 ++digs; 4993 } 4994 4995 /* init a copy of the input */ 4996 if ((res = mp_init_copy (&t, a)) != MP_OKAY) { 4997 return res; 4998 } 4999 5000 /* force temp to positive */ 5001 t.sign = MP_ZPOS; 5002 5003 /* fetch out all of the digits */ 5004 while (mp_iszero (&t) == MP_NO) { 5005 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 5006 mp_clear (&t); 5007 return res; 5008 } 5009 ++digs; 5010 } 5011 mp_clear (&t); 5012 5013 /* return digs + 1, the 1 is for the NULL byte that would be required. */ 5014 *size = digs + 1; 5015 return MP_OKAY; 5016 } 5017 5018 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; 5019 5020 /* stores a bignum as a ASCII string in a given radix (2..64) 5021 * 5022 * Stores upto maxlen-1 chars and always a NULL byte 5023 */ 5024 static int 5025 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) 5026 { 5027 int res, digs; 5028 mp_int t; 5029 mp_digit d; 5030 char *_s = str; 5031 5032 /* check range of the maxlen, radix */ 5033 if (maxlen < 2 || radix < 2 || radix > 64) { 5034 return MP_VAL; 5035 } 5036 5037 /* quick out if its zero */ 5038 if (mp_iszero(a) == MP_YES) { 5039 *str++ = '0'; 5040 *str = '\0'; 5041 return MP_OKAY; 5042 } 5043 5044 if ((res = mp_init_copy (&t, a)) != MP_OKAY) { 5045 return res; 5046 } 5047 5048 /* if it is negative output a - */ 5049 if (t.sign == MP_NEG) { 5050 /* we have to reverse our digits later... but not the - sign!! */ 5051 ++_s; 5052 5053 /* store the flag and mark the number as positive */ 5054 *str++ = '-'; 5055 t.sign = MP_ZPOS; 5056 5057 /* subtract a char */ 5058 --maxlen; 5059 } 5060 5061 digs = 0; 5062 while (mp_iszero (&t) == 0) { 5063 if (--maxlen < 1) { 5064 /* no more room */ 5065 break; 5066 } 5067 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 5068 mp_clear (&t); 5069 return res; 5070 } 5071 /* LINTED -- radix' range is checked above, limits d's range */ 5072 *str++ = mp_s_rmap[d]; 5073 ++digs; 5074 } 5075 5076 /* reverse the digits of the string. In this case _s points 5077 * to the first digit [exluding the sign] of the number 5078 */ 5079 bn_reverse ((unsigned char *)_s, digs); 5080 5081 /* append a NULL so the string is properly terminated */ 5082 *str = '\0'; 5083 5084 mp_clear (&t); 5085 return MP_OKAY; 5086 } 5087 5088 static char * 5089 formatbn(const BIGNUM *a, const int radix) 5090 { 5091 char *s; 5092 int len; 5093 5094 if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) { 5095 return NULL; 5096 } 5097 if ((s = netpgp_allocate(1, (size_t)len)) != NULL) { 5098 if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) { 5099 netpgp_deallocate(s, (size_t)len); 5100 return NULL; 5101 } 5102 } 5103 return s; 5104 } 5105 5106 static int 5107 mp_getradix_num(mp_int *a, int radix, char *s) 5108 { 5109 int err, ch, neg, y; 5110 5111 /* clear a */ 5112 mp_zero(a); 5113 5114 /* if first digit is - then set negative */ 5115 if ((ch = *s++) == '-') { 5116 neg = MP_NEG; 5117 ch = *s++; 5118 } else { 5119 neg = MP_ZPOS; 5120 } 5121 5122 for (;;) { 5123 /* find y in the radix map */ 5124 for (y = 0; y < radix; y++) { 5125 if (mp_s_rmap[y] == ch) { 5126 break; 5127 } 5128 } 5129 if (y == radix) { 5130 break; 5131 } 5132 5133 /* shift up and add */ 5134 if ((err = mp_mul_d(a, radix, a)) != MP_OKAY) { 5135 return err; 5136 } 5137 if ((err = mp_add_d(a, y, a)) != MP_OKAY) { 5138 return err; 5139 } 5140 5141 ch = *s++; 5142 } 5143 if (mp_cmp_d(a, 0) != MP_EQ) { 5144 a->sign = neg; 5145 } 5146 5147 return MP_OKAY; 5148 } 5149 5150 static int 5151 getbn(BIGNUM **a, const char *str, int radix) 5152 { 5153 int len; 5154 5155 if (a == NULL || str == NULL || (*a = BN_new()) == NULL) { 5156 return 0; 5157 } 5158 if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) { 5159 return 0; 5160 } 5161 mp_radix_size(__UNCONST(a), radix, &len); 5162 return len - 1; 5163 } 5164 5165 /* d = a - b (mod c) */ 5166 static int 5167 mp_submod(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) 5168 { 5169 int res; 5170 mp_int t; 5171 5172 5173 if ((res = mp_init (&t)) != MP_OKAY) { 5174 return res; 5175 } 5176 5177 if ((res = mp_sub (a, b, &t)) != MP_OKAY) { 5178 mp_clear (&t); 5179 return res; 5180 } 5181 res = mp_mod (&t, c, d); 5182 mp_clear (&t); 5183 return res; 5184 } 5185 5186 /**************************************************************************/ 5187 5188 /* BIGNUM emulation layer */ 5189 5190 /* essentiually, these are just wrappers around the libtommath functions */ 5191 /* usually the order of args changes */ 5192 /* the BIGNUM API tends to have more const poisoning */ 5193 /* these wrappers also check the arguments passed for sanity */ 5194 5195 BIGNUM * 5196 BN_bin2bn(const uint8_t *data, int len, BIGNUM *ret) 5197 { 5198 if (data == NULL) { 5199 return BN_new(); 5200 } 5201 if (ret == NULL) { 5202 ret = BN_new(); 5203 } 5204 return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL; 5205 } 5206 5207 /* store in unsigned [big endian] format */ 5208 int 5209 BN_bn2bin(const BIGNUM *a, unsigned char *b) 5210 { 5211 BIGNUM t; 5212 int x; 5213 5214 if (a == NULL || b == NULL) { 5215 return -1; 5216 } 5217 if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) { 5218 return -1; 5219 } 5220 for (x = 0; !BN_is_zero(&t) ; ) { 5221 b[x++] = (unsigned char) (t.dp[0] & 0xff); 5222 if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY) { 5223 mp_clear(&t); 5224 return -1; 5225 } 5226 } 5227 bn_reverse(b, x); 5228 mp_clear(&t); 5229 return x; 5230 } 5231 5232 void 5233 BN_init(BIGNUM *a) 5234 { 5235 if (a != NULL) { 5236 mp_init(a); 5237 } 5238 } 5239 5240 BIGNUM * 5241 BN_new(void) 5242 { 5243 BIGNUM *a; 5244 5245 if ((a = netpgp_allocate(1, sizeof(*a))) != NULL) { 5246 mp_init(a); 5247 } 5248 return a; 5249 } 5250 5251 /* copy, b = a */ 5252 int 5253 BN_copy(BIGNUM *b, const BIGNUM *a) 5254 { 5255 if (a == NULL || b == NULL) { 5256 return MP_VAL; 5257 } 5258 return mp_copy(__UNCONST(a), b); 5259 } 5260 5261 BIGNUM * 5262 BN_dup(const BIGNUM *a) 5263 { 5264 BIGNUM *ret; 5265 5266 if (a == NULL) { 5267 return NULL; 5268 } 5269 if ((ret = BN_new()) != NULL) { 5270 BN_copy(ret, a); 5271 } 5272 return ret; 5273 } 5274 5275 void 5276 BN_swap(BIGNUM *a, BIGNUM *b) 5277 { 5278 if (a && b) { 5279 mp_exch(a, b); 5280 } 5281 } 5282 5283 int 5284 BN_lshift(BIGNUM *r, const BIGNUM *a, int n) 5285 { 5286 if (r == NULL || a == NULL || n < 0) { 5287 return 0; 5288 } 5289 BN_copy(r, a); 5290 return mp_lshd(r, n) == MP_OKAY; 5291 } 5292 5293 int 5294 BN_lshift1(BIGNUM *r, BIGNUM *a) 5295 { 5296 if (r == NULL || a == NULL) { 5297 return 0; 5298 } 5299 BN_copy(r, a); 5300 return mp_lshd(r, 1) == MP_OKAY; 5301 } 5302 5303 int 5304 BN_rshift(BIGNUM *r, const BIGNUM *a, int n) 5305 { 5306 if (r == NULL || a == NULL || n < 0) { 5307 return MP_VAL; 5308 } 5309 BN_copy(r, a); 5310 return mp_rshd(r, n) == MP_OKAY; 5311 } 5312 5313 int 5314 BN_rshift1(BIGNUM *r, BIGNUM *a) 5315 { 5316 if (r == NULL || a == NULL) { 5317 return 0; 5318 } 5319 BN_copy(r, a); 5320 return mp_rshd(r, 1) == MP_OKAY; 5321 } 5322 5323 int 5324 BN_set_word(BIGNUM *a, BN_ULONG w) 5325 { 5326 if (a == NULL) { 5327 return 0; 5328 } 5329 mp_set(a, w); 5330 return 1; 5331 } 5332 5333 int 5334 BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 5335 { 5336 if (a == NULL || b == NULL || r == NULL) { 5337 return 0; 5338 } 5339 return mp_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5340 } 5341 5342 int 5343 BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 5344 { 5345 if (a == NULL || b == NULL || r == NULL) { 5346 return 0; 5347 } 5348 return mp_sub(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5349 } 5350 5351 int 5352 BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 5353 { 5354 if (a == NULL || b == NULL || r == NULL) { 5355 return 0; 5356 } 5357 USE_ARG(ctx); 5358 return mp_mul(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5359 } 5360 5361 int 5362 BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *a, const BIGNUM *d, BN_CTX *ctx) 5363 { 5364 if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) { 5365 return 0; 5366 } 5367 USE_ARG(ctx); 5368 return mp_div(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY; 5369 } 5370 5371 void 5372 BN_free(BIGNUM *a) 5373 { 5374 if (a) { 5375 mp_clear(a); 5376 } 5377 } 5378 5379 void 5380 BN_clear(BIGNUM *a) 5381 { 5382 if (a) { 5383 mp_clear(a); 5384 } 5385 } 5386 5387 void 5388 BN_clear_free(BIGNUM *a) 5389 { 5390 if (a) { 5391 mp_clear(a); 5392 } 5393 } 5394 5395 int 5396 BN_num_bytes(const BIGNUM *a) 5397 { 5398 if (a == NULL) { 5399 return MP_VAL; 5400 } 5401 return mp_unsigned_bin_size(__UNCONST(a)); 5402 } 5403 5404 int 5405 BN_num_bits(const BIGNUM *a) 5406 { 5407 if (a == NULL) { 5408 return 0; 5409 } 5410 return mp_count_bits(a); 5411 } 5412 5413 void 5414 BN_set_negative(BIGNUM *a, int n) 5415 { 5416 if (a) { 5417 a->sign = (n) ? MP_NEG : 0; 5418 } 5419 } 5420 5421 int 5422 BN_cmp(BIGNUM *a, BIGNUM *b) 5423 { 5424 if (a == NULL || b == NULL) { 5425 return MP_VAL; 5426 } 5427 switch(mp_cmp(a, b)) { 5428 case MP_LT: 5429 return -1; 5430 case MP_GT: 5431 return 1; 5432 case MP_EQ: 5433 default: 5434 return 0; 5435 } 5436 } 5437 5438 int 5439 BN_mod_exp(BIGNUM *Y, BIGNUM *G, BIGNUM *X, BIGNUM *P, BN_CTX *ctx) 5440 { 5441 if (Y == NULL || G == NULL || X == NULL || P == NULL) { 5442 return MP_VAL; 5443 } 5444 USE_ARG(ctx); 5445 return mp_exptmod(G, X, P, Y) == MP_OKAY; 5446 } 5447 5448 BIGNUM * 5449 BN_mod_inverse(BIGNUM *r, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) 5450 { 5451 USE_ARG(ctx); 5452 if (r == NULL || a == NULL || n == NULL) { 5453 return NULL; 5454 } 5455 return (mp_invmod(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL; 5456 } 5457 5458 int 5459 BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 5460 { 5461 USE_ARG(ctx); 5462 if (ret == NULL || a == NULL || b == NULL || m == NULL) { 5463 return 0; 5464 } 5465 return mp_mulmod(ret, a, b, __UNCONST(m)) == MP_OKAY; 5466 } 5467 5468 BN_CTX * 5469 BN_CTX_new(void) 5470 { 5471 return netpgp_allocate(1, sizeof(BN_CTX)); 5472 } 5473 5474 void 5475 BN_CTX_init(BN_CTX *c) 5476 { 5477 if (c != NULL) { 5478 c->arraysize = 15; 5479 if ((c->v = netpgp_allocate(sizeof(*c->v), c->arraysize)) == NULL) { 5480 c->arraysize = 0; 5481 } 5482 } 5483 } 5484 5485 BIGNUM * 5486 BN_CTX_get(BN_CTX *ctx) 5487 { 5488 if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) { 5489 return NULL; 5490 } 5491 return ctx->v[ctx->count++] = BN_new(); 5492 } 5493 5494 void 5495 BN_CTX_start(BN_CTX *ctx) 5496 { 5497 BN_CTX_init(ctx); 5498 } 5499 5500 void 5501 BN_CTX_free(BN_CTX *c) 5502 { 5503 unsigned i; 5504 5505 if (c != NULL && c->v != NULL) { 5506 for (i = 0 ; i < c->count ; i++) { 5507 BN_clear_free(c->v[i]); 5508 } 5509 netpgp_deallocate(c->v, sizeof(*c->v) * c->arraysize); 5510 } 5511 } 5512 5513 void 5514 BN_CTX_end(BN_CTX *ctx) 5515 { 5516 BN_CTX_free(ctx); 5517 } 5518 5519 char * 5520 BN_bn2hex(const BIGNUM *a) 5521 { 5522 return (a == NULL) ? NULL : formatbn(a, 16); 5523 } 5524 5525 char * 5526 BN_bn2dec(const BIGNUM *a) 5527 { 5528 return (a == NULL) ? NULL : formatbn(a, 10); 5529 } 5530 5531 #ifndef _KERNEL 5532 int 5533 BN_print_fp(FILE *fp, const BIGNUM *a) 5534 { 5535 char *s; 5536 int ret; 5537 5538 if (fp == NULL || a == NULL) { 5539 return 0; 5540 } 5541 s = BN_bn2hex(a); 5542 ret = fprintf(fp, "%s", s); 5543 netpgp_deallocate(s, strlen(s) + 1); 5544 return ret; 5545 } 5546 #endif 5547 5548 int 5549 BN_rand(BIGNUM *rnd, int bits, int top, int bottom) 5550 { 5551 uint64_t r; 5552 int digits; 5553 int i; 5554 5555 if (rnd == NULL) { 5556 return 0; 5557 } 5558 mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT)); 5559 for (i = 0 ; i < digits ; i++) { 5560 r = (uint64_t)arc4random(); 5561 r <<= 32; 5562 r |= arc4random(); 5563 rnd->dp[i] = (r & MP_MASK); 5564 } 5565 if (top == 0) { 5566 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5567 } 5568 if (top == 1) { 5569 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5570 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1))); 5571 } 5572 if (bottom) { 5573 rnd->dp[0] |= 0x1; 5574 } 5575 return 1; 5576 } 5577 5578 int 5579 BN_rand_range(BIGNUM *rnd, BIGNUM *range) 5580 { 5581 if (rnd == NULL || range == NULL || BN_is_zero(range)) { 5582 return 0; 5583 } 5584 BN_rand(rnd, BN_num_bits(range), 1, 0); 5585 return mp_mod(rnd, range, rnd) == MP_OKAY; 5586 } 5587 5588 int 5589 BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int, int, void *), BN_CTX *ctx, void *cb_arg) 5590 { 5591 int primality; 5592 5593 if (a == NULL) { 5594 return 0; 5595 } 5596 USE_ARG(ctx); 5597 USE_ARG(cb_arg); 5598 USE_ARG(callback); 5599 return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0; 5600 } 5601 5602 const BIGNUM * 5603 BN_value_one(void) 5604 { 5605 static mp_digit digit = 1UL; 5606 static const BIGNUM one = { &digit, 1, 1, 0 }; 5607 5608 return &one; 5609 } 5610 5611 int 5612 BN_hex2bn(BIGNUM **a, const char *str) 5613 { 5614 return getbn(a, str, 16); 5615 } 5616 5617 int 5618 BN_dec2bn(BIGNUM **a, const char *str) 5619 { 5620 return getbn(a, str, 10); 5621 } 5622 5623 int 5624 BN_mod_sub(BIGNUM *r, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 5625 { 5626 USE_ARG(ctx); 5627 if (r == NULL || a == NULL || b == NULL || m == NULL) { 5628 return 0; 5629 } 5630 return mp_submod(a, b, __UNCONST(m), r) == MP_OKAY; 5631 } 5632 5633 int 5634 BN_is_bit_set(const BIGNUM *a, int n) 5635 { 5636 if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) { 5637 return 0; 5638 } 5639 return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0; 5640 } 5641