1 /*- 2 * Copyright (c) 2012 Alistair Crooks <agc@NetBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 /* LibTomMath, multiple-precision integer library -- Tom St Denis 26 * 27 * LibTomMath is a library that provides multiple-precision 28 * integer arithmetic as well as number theoretic functionality. 29 * 30 * The library was designed directly after the MPI library by 31 * Michael Fromberger but has been written from scratch with 32 * additional optimizations in place. 33 * 34 * The library is free for all purposes without any express 35 * guarantee it works. 36 * 37 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 38 */ 39 #include <sys/types.h> 40 #include <sys/stat.h> 41 #include <sys/param.h> 42 43 #ifdef _KERNEL 44 # include <sys/kmem.h> 45 #else 46 # include <arpa/inet.h> 47 # include <ctype.h> 48 # include <inttypes.h> 49 # include <stdarg.h> 50 # include <stdio.h> 51 # include <stdlib.h> 52 # include <string.h> 53 # include <time.h> 54 # include <unistd.h> 55 #endif 56 57 #include "misc.h" 58 #include "bn.h" 59 #include "digest.h" 60 61 /**************************************************************************/ 62 63 /* LibTomMath, multiple-precision integer library -- Tom St Denis 64 * 65 * LibTomMath is a library that provides multiple-precision 66 * integer arithmetic as well as number theoretic functionality. 67 * 68 * The library was designed directly after the MPI library by 69 * Michael Fromberger but has been written from scratch with 70 * additional optimizations in place. 71 * 72 * The library is free for all purposes without any express 73 * guarantee it works. 74 * 75 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 76 */ 77 78 #define MP_PREC 32 79 #define DIGIT_BIT 28 80 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1)) 81 82 #define MP_WARRAY /*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1))) 83 84 #define MP_NO 0 85 #define MP_YES 1 86 87 #ifndef USE_ARG 88 #define USE_ARG(x) /*LINTED*/(void)&(x) 89 #endif 90 91 #ifndef __arraycount 92 #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0])) 93 #endif 94 95 #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO) 96 97 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1) 98 99 typedef int mp_err; 100 101 static int mp_mul(mp_int * a, mp_int * b, mp_int * c); 102 static int mp_sqr(mp_int * a, mp_int * b); 103 104 static int mp_sub_d(mp_int *a, mp_digit b, mp_int *c); 105 106 /* set to zero */ 107 static void 108 mp_zero(mp_int *a) 109 { 110 int n; 111 mp_digit *tmp; 112 113 a->sign = MP_ZPOS; 114 a->used = 0; 115 116 tmp = a->dp; 117 /* XXX - memset */ 118 for (n = 0; n < a->alloc; n++) { 119 *tmp++ = 0; 120 } 121 } 122 123 /* grow as required */ 124 static int 125 mp_grow(mp_int *a, int size) 126 { 127 int i; 128 mp_digit *tmp; 129 130 /* if the alloc size is smaller alloc more ram */ 131 if (a->alloc < size) { 132 /* ensure there are always at least MP_PREC digits extra on top */ 133 size += (MP_PREC * 2) - (size % MP_PREC); 134 135 /* reallocate the array a->dp 136 * 137 * We store the return in a temporary variable 138 * in case the operation failed we don't want 139 * to overwrite the dp member of a. 140 */ 141 tmp = realloc(a->dp, sizeof(*tmp) * size); 142 if (tmp == NULL) { 143 /* reallocation failed but "a" is still valid [can be freed] */ 144 return MP_MEM; 145 } 146 147 /* reallocation succeeded so set a->dp */ 148 a->dp = tmp; 149 150 /* zero excess digits */ 151 i = a->alloc; 152 a->alloc = size; 153 for (; i < a->alloc; i++) { 154 a->dp[i] = 0; 155 } 156 } 157 return MP_OKAY; 158 } 159 160 /* shift left a certain amount of digits */ 161 static int 162 mp_lshd (mp_int * a, int b) 163 { 164 int x, res; 165 166 /* if its less than zero return */ 167 if (b <= 0) { 168 return MP_OKAY; 169 } 170 171 /* grow to fit the new digits */ 172 if (a->alloc < a->used + b) { 173 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) { 174 return res; 175 } 176 } 177 178 { 179 mp_digit *top, *bottom; 180 181 /* increment the used by the shift amount then copy upwards */ 182 a->used += b; 183 184 /* top */ 185 top = a->dp + a->used - 1; 186 187 /* base */ 188 bottom = a->dp + a->used - 1 - b; 189 190 /* much like mp_rshd this is implemented using a sliding window 191 * except the window goes the otherway around. Copying from 192 * the bottom to the top. see bn_mp_rshd.c for more info. 193 */ 194 for (x = a->used - 1; x >= b; x--) { 195 *top-- = *bottom--; 196 } 197 198 /* zero the lower digits */ 199 top = a->dp; 200 for (x = 0; x < b; x++) { 201 *top++ = 0; 202 } 203 } 204 return MP_OKAY; 205 } 206 207 /* trim unused digits 208 * 209 * This is used to ensure that leading zero digits are 210 * trimed and the leading "used" digit will be non-zero 211 * Typically very fast. Also fixes the sign if there 212 * are no more leading digits 213 */ 214 static void 215 mp_clamp (mp_int * a) 216 { 217 /* decrease used while the most significant digit is 218 * zero. 219 */ 220 while (a->used > 0 && a->dp[a->used - 1] == 0) { 221 --(a->used); 222 } 223 224 /* reset the sign flag if used == 0 */ 225 if (a->used == 0) { 226 a->sign = MP_ZPOS; 227 } 228 } 229 230 /* copy, b = a */ 231 static int 232 mp_copy(BIGNUM *a, BIGNUM *b) 233 { 234 int res, n; 235 236 /* if dst == src do nothing */ 237 if (a == b) { 238 return MP_OKAY; 239 } 240 if (a == NULL || b == NULL) { 241 return MP_VAL; 242 } 243 244 /* grow dest */ 245 if (b->alloc < a->used) { 246 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 247 return res; 248 } 249 } 250 251 /* zero b and copy the parameters over */ 252 { 253 mp_digit *tmpa, *tmpb; 254 255 /* pointer aliases */ 256 257 /* source */ 258 tmpa = a->dp; 259 260 /* destination */ 261 tmpb = b->dp; 262 263 /* copy all the digits */ 264 for (n = 0; n < a->used; n++) { 265 *tmpb++ = *tmpa++; 266 } 267 268 /* clear high digits */ 269 for (; n < b->used; n++) { 270 *tmpb++ = 0; 271 } 272 } 273 274 /* copy used count and sign */ 275 b->used = a->used; 276 b->sign = a->sign; 277 return MP_OKAY; 278 } 279 280 /* shift left by a certain bit count */ 281 static int 282 mp_mul_2d(mp_int *a, int b, mp_int *c) 283 { 284 mp_digit d; 285 int res; 286 287 /* copy */ 288 if (a != c) { 289 if ((res = mp_copy (a, c)) != MP_OKAY) { 290 return res; 291 } 292 } 293 294 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { 295 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { 296 return res; 297 } 298 } 299 300 /* shift by as many digits in the bit count */ 301 if (b >= (int)DIGIT_BIT) { 302 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) { 303 return res; 304 } 305 } 306 307 /* shift any bit count < DIGIT_BIT */ 308 d = (mp_digit) (b % DIGIT_BIT); 309 if (d != 0) { 310 mp_digit *tmpc, shift, mask, r, rr; 311 int x; 312 313 /* bitmask for carries */ 314 mask = (((mp_digit)1) << d) - 1; 315 316 /* shift for msbs */ 317 shift = DIGIT_BIT - d; 318 319 /* alias */ 320 tmpc = c->dp; 321 322 /* carry */ 323 r = 0; 324 for (x = 0; x < c->used; x++) { 325 /* get the higher bits of the current word */ 326 rr = (*tmpc >> shift) & mask; 327 328 /* shift the current word and OR in the carry */ 329 *tmpc = ((*tmpc << d) | r) & MP_MASK; 330 ++tmpc; 331 332 /* set the carry to the carry bits of the current word */ 333 r = rr; 334 } 335 336 /* set final carry */ 337 if (r != 0) { 338 c->dp[(c->used)++] = r; 339 } 340 } 341 mp_clamp (c); 342 return MP_OKAY; 343 } 344 345 /* reads a unsigned char array, assumes the msb is stored first [big endian] */ 346 static int 347 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c) 348 { 349 int res; 350 351 /* make sure there are at least two digits */ 352 if (a->alloc < 2) { 353 if ((res = mp_grow(a, 2)) != MP_OKAY) { 354 return res; 355 } 356 } 357 358 /* zero the int */ 359 mp_zero (a); 360 361 /* read the bytes in */ 362 while (c-- > 0) { 363 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) { 364 return res; 365 } 366 367 a->dp[0] |= *b++; 368 a->used += 1; 369 } 370 mp_clamp (a); 371 return MP_OKAY; 372 } 373 374 /* returns the number of bits in an int */ 375 static int 376 mp_count_bits(const mp_int *a) 377 { 378 int r; 379 mp_digit q; 380 381 /* shortcut */ 382 if (a->used == 0) { 383 return 0; 384 } 385 386 /* get number of digits and add that */ 387 r = (a->used - 1) * DIGIT_BIT; 388 389 /* take the last digit and count the bits in it */ 390 q = a->dp[a->used - 1]; 391 while (q > ((mp_digit) 0)) { 392 ++r; 393 q >>= ((mp_digit) 1); 394 } 395 return r; 396 } 397 398 /* compare maginitude of two ints (unsigned) */ 399 static int 400 mp_cmp_mag (mp_int * a, mp_int * b) 401 { 402 int n; 403 mp_digit *tmpa, *tmpb; 404 405 /* compare based on # of non-zero digits */ 406 if (a->used > b->used) { 407 return MP_GT; 408 } 409 410 if (a->used < b->used) { 411 return MP_LT; 412 } 413 414 /* alias for a */ 415 tmpa = a->dp + (a->used - 1); 416 417 /* alias for b */ 418 tmpb = b->dp + (a->used - 1); 419 420 /* compare based on digits */ 421 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { 422 if (*tmpa > *tmpb) { 423 return MP_GT; 424 } 425 426 if (*tmpa < *tmpb) { 427 return MP_LT; 428 } 429 } 430 return MP_EQ; 431 } 432 433 /* compare two ints (signed)*/ 434 static int 435 mp_cmp (mp_int * a, mp_int * b) 436 { 437 /* compare based on sign */ 438 if (a->sign != b->sign) { 439 if (a->sign == MP_NEG) { 440 return MP_LT; 441 } else { 442 return MP_GT; 443 } 444 } 445 446 /* compare digits */ 447 if (a->sign == MP_NEG) { 448 /* if negative compare opposite direction */ 449 return mp_cmp_mag(b, a); 450 } else { 451 return mp_cmp_mag(a, b); 452 } 453 } 454 455 /* get the size for an unsigned equivalent */ 456 static int 457 mp_unsigned_bin_size (mp_int * a) 458 { 459 int size = mp_count_bits (a); 460 return (size / 8 + ((size & 7) != 0 ? 1 : 0)); 461 } 462 463 /* init a new mp_int */ 464 static int 465 mp_init (mp_int * a) 466 { 467 int i; 468 469 /* allocate memory required and clear it */ 470 a->dp = netpgp_allocate(1, sizeof (*a->dp) * MP_PREC); 471 if (a->dp == NULL) { 472 return MP_MEM; 473 } 474 475 /* set the digits to zero */ 476 for (i = 0; i < MP_PREC; i++) { 477 a->dp[i] = 0; 478 } 479 480 /* set the used to zero, allocated digits to the default precision 481 * and sign to positive */ 482 a->used = 0; 483 a->alloc = MP_PREC; 484 a->sign = MP_ZPOS; 485 486 return MP_OKAY; 487 } 488 489 /* clear one (frees) */ 490 static void 491 mp_clear (mp_int * a) 492 { 493 int i; 494 495 /* only do anything if a hasn't been freed previously */ 496 if (a->dp != NULL) { 497 /* first zero the digits */ 498 for (i = 0; i < a->used; i++) { 499 a->dp[i] = 0; 500 } 501 502 /* free ram */ 503 netpgp_deallocate(a->dp, (size_t)a->alloc); 504 505 /* reset members to make debugging easier */ 506 a->dp = NULL; 507 a->alloc = a->used = 0; 508 a->sign = MP_ZPOS; 509 } 510 } 511 512 static int 513 mp_init_multi(mp_int *mp, ...) 514 { 515 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ 516 int n = 0; /* Number of ok inits */ 517 mp_int* cur_arg = mp; 518 va_list args; 519 520 va_start(args, mp); /* init args to next argument from caller */ 521 while (cur_arg != NULL) { 522 if (mp_init(cur_arg) != MP_OKAY) { 523 /* Oops - error! Back-track and mp_clear what we already 524 succeeded in init-ing, then return error. 525 */ 526 va_list clean_args; 527 528 /* end the current list */ 529 va_end(args); 530 531 /* now start cleaning up */ 532 cur_arg = mp; 533 va_start(clean_args, mp); 534 while (n--) { 535 mp_clear(cur_arg); 536 cur_arg = va_arg(clean_args, mp_int*); 537 } 538 va_end(clean_args); 539 res = MP_MEM; 540 break; 541 } 542 n++; 543 cur_arg = va_arg(args, mp_int*); 544 } 545 va_end(args); 546 return res; /* Assumed ok, if error flagged above. */ 547 } 548 549 /* init an mp_init for a given size */ 550 static int 551 mp_init_size (mp_int * a, int size) 552 { 553 int x; 554 555 /* pad size so there are always extra digits */ 556 size += (MP_PREC * 2) - (size % MP_PREC); 557 558 /* alloc mem */ 559 a->dp = netpgp_allocate (1, sizeof (*a->dp) * size); 560 if (a->dp == NULL) { 561 return MP_MEM; 562 } 563 564 /* set the members */ 565 a->used = 0; 566 a->alloc = size; 567 a->sign = MP_ZPOS; 568 569 /* zero the digits */ 570 for (x = 0; x < size; x++) { 571 a->dp[x] = 0; 572 } 573 574 return MP_OKAY; 575 } 576 577 /* creates "a" then copies b into it */ 578 static int mp_init_copy (mp_int * a, mp_int * b) 579 { 580 int res; 581 582 if ((res = mp_init (a)) != MP_OKAY) { 583 return res; 584 } 585 return mp_copy (b, a); 586 } 587 588 /* low level addition, based on HAC pp.594, Algorithm 14.7 */ 589 static int 590 s_mp_add (mp_int * a, mp_int * b, mp_int * c) 591 { 592 mp_int *x; 593 int olduse, res, min, max; 594 595 /* find sizes, we let |a| <= |b| which means we have to sort 596 * them. "x" will point to the input with the most digits 597 */ 598 if (a->used > b->used) { 599 min = b->used; 600 max = a->used; 601 x = a; 602 } else { 603 min = a->used; 604 max = b->used; 605 x = b; 606 } 607 608 /* init result */ 609 if (c->alloc < max + 1) { 610 if ((res = mp_grow (c, max + 1)) != MP_OKAY) { 611 return res; 612 } 613 } 614 615 /* get old used digit count and set new one */ 616 olduse = c->used; 617 c->used = max + 1; 618 619 { 620 mp_digit u, *tmpa, *tmpb, *tmpc; 621 int i; 622 623 /* alias for digit pointers */ 624 625 /* first input */ 626 tmpa = a->dp; 627 628 /* second input */ 629 tmpb = b->dp; 630 631 /* destination */ 632 tmpc = c->dp; 633 634 /* zero the carry */ 635 u = 0; 636 for (i = 0; i < min; i++) { 637 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ 638 *tmpc = *tmpa++ + *tmpb++ + u; 639 640 /* U = carry bit of T[i] */ 641 u = *tmpc >> ((mp_digit)DIGIT_BIT); 642 643 /* take away carry bit from T[i] */ 644 *tmpc++ &= MP_MASK; 645 } 646 647 /* now copy higher words if any, that is in A+B 648 * if A or B has more digits add those in 649 */ 650 if (min != max) { 651 for (; i < max; i++) { 652 /* T[i] = X[i] + U */ 653 *tmpc = x->dp[i] + u; 654 655 /* U = carry bit of T[i] */ 656 u = *tmpc >> ((mp_digit)DIGIT_BIT); 657 658 /* take away carry bit from T[i] */ 659 *tmpc++ &= MP_MASK; 660 } 661 } 662 663 /* add carry */ 664 *tmpc++ = u; 665 666 /* clear digits above oldused */ 667 for (i = c->used; i < olduse; i++) { 668 *tmpc++ = 0; 669 } 670 } 671 672 mp_clamp (c); 673 return MP_OKAY; 674 } 675 676 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ 677 static int 678 s_mp_sub (mp_int * a, mp_int * b, mp_int * c) 679 { 680 int olduse, res, min, max; 681 682 /* find sizes */ 683 min = b->used; 684 max = a->used; 685 686 /* init result */ 687 if (c->alloc < max) { 688 if ((res = mp_grow (c, max)) != MP_OKAY) { 689 return res; 690 } 691 } 692 olduse = c->used; 693 c->used = max; 694 695 { 696 mp_digit u, *tmpa, *tmpb, *tmpc; 697 int i; 698 699 /* alias for digit pointers */ 700 tmpa = a->dp; 701 tmpb = b->dp; 702 tmpc = c->dp; 703 704 /* set carry to zero */ 705 u = 0; 706 for (i = 0; i < min; i++) { 707 /* T[i] = A[i] - B[i] - U */ 708 *tmpc = *tmpa++ - *tmpb++ - u; 709 710 /* U = carry bit of T[i] 711 * Note this saves performing an AND operation since 712 * if a carry does occur it will propagate all the way to the 713 * MSB. As a result a single shift is enough to get the carry 714 */ 715 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 716 717 /* Clear carry from T[i] */ 718 *tmpc++ &= MP_MASK; 719 } 720 721 /* now copy higher words if any, e.g. if A has more digits than B */ 722 for (; i < max; i++) { 723 /* T[i] = A[i] - U */ 724 *tmpc = *tmpa++ - u; 725 726 /* U = carry bit of T[i] */ 727 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 728 729 /* Clear carry from T[i] */ 730 *tmpc++ &= MP_MASK; 731 } 732 733 /* clear digits above used (since we may not have grown result above) */ 734 for (i = c->used; i < olduse; i++) { 735 *tmpc++ = 0; 736 } 737 } 738 739 mp_clamp (c); 740 return MP_OKAY; 741 } 742 743 /* high level subtraction (handles signs) */ 744 static int 745 mp_sub (mp_int * a, mp_int * b, mp_int * c) 746 { 747 int sa, sb, res; 748 749 sa = a->sign; 750 sb = b->sign; 751 752 if (sa != sb) { 753 /* subtract a negative from a positive, OR */ 754 /* subtract a positive from a negative. */ 755 /* In either case, ADD their magnitudes, */ 756 /* and use the sign of the first number. */ 757 c->sign = sa; 758 res = s_mp_add (a, b, c); 759 } else { 760 /* subtract a positive from a positive, OR */ 761 /* subtract a negative from a negative. */ 762 /* First, take the difference between their */ 763 /* magnitudes, then... */ 764 if (mp_cmp_mag (a, b) != MP_LT) { 765 /* Copy the sign from the first */ 766 c->sign = sa; 767 /* The first has a larger or equal magnitude */ 768 res = s_mp_sub (a, b, c); 769 } else { 770 /* The result has the *opposite* sign from */ 771 /* the first number. */ 772 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; 773 /* The second has a larger magnitude */ 774 res = s_mp_sub (b, a, c); 775 } 776 } 777 return res; 778 } 779 780 /* shift right a certain amount of digits */ 781 static int mp_rshd (mp_int * a, int b) 782 { 783 int x; 784 785 /* if b <= 0 then ignore it */ 786 if (b <= 0) { 787 return 0; 788 } 789 790 /* if b > used then simply zero it and return */ 791 if (a->used <= b) { 792 mp_zero (a); 793 return 0; 794 } 795 796 { 797 mp_digit *bottom, *top; 798 799 /* shift the digits down */ 800 801 /* bottom */ 802 bottom = a->dp; 803 804 /* top [offset into digits] */ 805 top = a->dp + b; 806 807 /* this is implemented as a sliding window where 808 * the window is b-digits long and digits from 809 * the top of the window are copied to the bottom 810 * 811 * e.g. 812 813 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> 814 /\ | ----> 815 \-------------------/ ----> 816 */ 817 for (x = 0; x < (a->used - b); x++) { 818 *bottom++ = *top++; 819 } 820 821 /* zero the top digits */ 822 for (; x < a->used; x++) { 823 *bottom++ = 0; 824 } 825 } 826 827 /* remove excess digits */ 828 a->used -= b; 829 return 1; 830 } 831 832 /* multiply by a digit */ 833 static int 834 mp_mul_d (mp_int * a, mp_digit b, mp_int * c) 835 { 836 mp_digit u, *tmpa, *tmpc; 837 mp_word r; 838 int ix, res, olduse; 839 840 /* make sure c is big enough to hold a*b */ 841 if (c->alloc < a->used + 1) { 842 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) { 843 return res; 844 } 845 } 846 847 /* get the original destinations used count */ 848 olduse = c->used; 849 850 /* set the sign */ 851 c->sign = a->sign; 852 853 /* alias for a->dp [source] */ 854 tmpa = a->dp; 855 856 /* alias for c->dp [dest] */ 857 tmpc = c->dp; 858 859 /* zero carry */ 860 u = 0; 861 862 /* compute columns */ 863 for (ix = 0; ix < a->used; ix++) { 864 /* compute product and carry sum for this term */ 865 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); 866 867 /* mask off higher bits to get a single digit */ 868 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); 869 870 /* send carry into next iteration */ 871 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 872 } 873 874 /* store final carry [if any] and increment ix offset */ 875 *tmpc++ = u; 876 ++ix; 877 878 /* now zero digits above the top */ 879 while (ix++ < olduse) { 880 *tmpc++ = 0; 881 } 882 883 /* set used count */ 884 c->used = a->used + 1; 885 mp_clamp(c); 886 887 return MP_OKAY; 888 } 889 890 /* high level addition (handles signs) */ 891 static int mp_add (mp_int * a, mp_int * b, mp_int * c) 892 { 893 int sa, sb, res; 894 895 /* get sign of both inputs */ 896 sa = a->sign; 897 sb = b->sign; 898 899 /* handle two cases, not four */ 900 if (sa == sb) { 901 /* both positive or both negative */ 902 /* add their magnitudes, copy the sign */ 903 c->sign = sa; 904 res = s_mp_add (a, b, c); 905 } else { 906 /* one positive, the other negative */ 907 /* subtract the one with the greater magnitude from */ 908 /* the one of the lesser magnitude. The result gets */ 909 /* the sign of the one with the greater magnitude. */ 910 if (mp_cmp_mag (a, b) == MP_LT) { 911 c->sign = sb; 912 res = s_mp_sub (b, a, c); 913 } else { 914 c->sign = sa; 915 res = s_mp_sub (a, b, c); 916 } 917 } 918 return res; 919 } 920 921 /* swap the elements of two integers, for cases where you can't simply swap the 922 * mp_int pointers around 923 */ 924 static void 925 mp_exch(mp_int *a, mp_int *b) 926 { 927 mp_int t; 928 929 t = *a; 930 *a = *b; 931 *b = t; 932 } 933 934 /* calc a value mod 2**b */ 935 static int 936 mp_mod_2d (mp_int * a, int b, mp_int * c) 937 { 938 int x, res; 939 940 /* if b is <= 0 then zero the int */ 941 if (b <= 0) { 942 mp_zero (c); 943 return MP_OKAY; 944 } 945 946 /* if the modulus is larger than the value than return */ 947 if (b >= (int) (a->used * DIGIT_BIT)) { 948 res = mp_copy (a, c); 949 return res; 950 } 951 952 /* copy */ 953 if ((res = mp_copy (a, c)) != MP_OKAY) { 954 return res; 955 } 956 957 /* zero digits above the last digit of the modulus */ 958 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { 959 c->dp[x] = 0; 960 } 961 /* clear the digit that is not completely outside/inside the modulus */ 962 c->dp[b / DIGIT_BIT] &= 963 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); 964 mp_clamp (c); 965 return MP_OKAY; 966 } 967 968 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ 969 static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) 970 { 971 mp_digit D, r, rr; 972 int x, res; 973 mp_int t; 974 975 976 /* if the shift count is <= 0 then we do no work */ 977 if (b <= 0) { 978 res = mp_copy (a, c); 979 if (d != NULL) { 980 mp_zero (d); 981 } 982 return res; 983 } 984 985 if ((res = mp_init (&t)) != MP_OKAY) { 986 return res; 987 } 988 989 /* get the remainder */ 990 if (d != NULL) { 991 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) { 992 mp_clear (&t); 993 return res; 994 } 995 } 996 997 /* copy */ 998 if ((res = mp_copy (a, c)) != MP_OKAY) { 999 mp_clear (&t); 1000 return res; 1001 } 1002 1003 /* shift by as many digits in the bit count */ 1004 if (b >= (int)DIGIT_BIT) { 1005 mp_rshd (c, b / DIGIT_BIT); 1006 } 1007 1008 /* shift any bit count < DIGIT_BIT */ 1009 D = (mp_digit) (b % DIGIT_BIT); 1010 if (D != 0) { 1011 mp_digit *tmpc, mask, shift; 1012 1013 /* mask */ 1014 mask = (((mp_digit)1) << D) - 1; 1015 1016 /* shift for lsb */ 1017 shift = DIGIT_BIT - D; 1018 1019 /* alias */ 1020 tmpc = c->dp + (c->used - 1); 1021 1022 /* carry */ 1023 r = 0; 1024 for (x = c->used - 1; x >= 0; x--) { 1025 /* get the lower bits of this word in a temp */ 1026 rr = *tmpc & mask; 1027 1028 /* shift the current word and mix in the carry bits from the previous word */ 1029 *tmpc = (*tmpc >> D) | (r << shift); 1030 --tmpc; 1031 1032 /* set the carry to the carry bits of the current word found above */ 1033 r = rr; 1034 } 1035 } 1036 mp_clamp (c); 1037 if (d != NULL) { 1038 mp_exch (&t, d); 1039 } 1040 mp_clear (&t); 1041 return MP_OKAY; 1042 } 1043 1044 /* integer signed division. 1045 * c*b + d == a [e.g. a/b, c=quotient, d=remainder] 1046 * HAC pp.598 Algorithm 14.20 1047 * 1048 * Note that the description in HAC is horribly 1049 * incomplete. For example, it doesn't consider 1050 * the case where digits are removed from 'x' in 1051 * the inner loop. It also doesn't consider the 1052 * case that y has fewer than three digits, etc.. 1053 * 1054 * The overall algorithm is as described as 1055 * 14.20 from HAC but fixed to treat these cases. 1056 */ 1057 static int 1058 mp_div(mp_int *c, mp_int *d, mp_int *a, mp_int *b) 1059 { 1060 mp_int q, x, y, t1, t2; 1061 int res, n, t, i, norm, neg; 1062 1063 /* is divisor zero ? */ 1064 if (BN_is_zero (b) == 1) { 1065 return MP_VAL; 1066 } 1067 1068 /* if a < b then q=0, r = a */ 1069 if (mp_cmp_mag (a, b) == MP_LT) { 1070 if (d != NULL) { 1071 res = mp_copy (a, d); 1072 } else { 1073 res = MP_OKAY; 1074 } 1075 if (c != NULL) { 1076 mp_zero (c); 1077 } 1078 return res; 1079 } 1080 1081 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) { 1082 return res; 1083 } 1084 q.used = a->used + 2; 1085 1086 if ((res = mp_init (&t1)) != MP_OKAY) { 1087 goto LBL_Q; 1088 } 1089 1090 if ((res = mp_init (&t2)) != MP_OKAY) { 1091 goto LBL_T1; 1092 } 1093 1094 if ((res = mp_init_copy (&x, a)) != MP_OKAY) { 1095 goto LBL_T2; 1096 } 1097 1098 if ((res = mp_init_copy (&y, b)) != MP_OKAY) { 1099 goto LBL_X; 1100 } 1101 1102 /* fix the sign */ 1103 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 1104 x.sign = y.sign = MP_ZPOS; 1105 1106 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ 1107 norm = mp_count_bits(&y) % DIGIT_BIT; 1108 if (norm < (int)(DIGIT_BIT-1)) { 1109 norm = (DIGIT_BIT-1) - norm; 1110 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) { 1111 goto LBL_Y; 1112 } 1113 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) { 1114 goto LBL_Y; 1115 } 1116 } else { 1117 norm = 0; 1118 } 1119 1120 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ 1121 n = x.used - 1; 1122 t = y.used - 1; 1123 1124 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ 1125 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ 1126 goto LBL_Y; 1127 } 1128 1129 while (mp_cmp (&x, &y) != MP_LT) { 1130 ++(q.dp[n - t]); 1131 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) { 1132 goto LBL_Y; 1133 } 1134 } 1135 1136 /* reset y by shifting it back down */ 1137 mp_rshd (&y, n - t); 1138 1139 /* step 3. for i from n down to (t + 1) */ 1140 for (i = n; i >= (t + 1); i--) { 1141 if (i > x.used) { 1142 continue; 1143 } 1144 1145 /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 1146 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ 1147 if (x.dp[i] == y.dp[t]) { 1148 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); 1149 } else { 1150 mp_word tmp; 1151 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); 1152 tmp |= ((mp_word) x.dp[i - 1]); 1153 tmp /= ((mp_word) y.dp[t]); 1154 if (tmp > (mp_word) MP_MASK) 1155 tmp = MP_MASK; 1156 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); 1157 } 1158 1159 /* while (q{i-t-1} * (yt * b + y{t-1})) > 1160 xi * b**2 + xi-1 * b + xi-2 1161 1162 do q{i-t-1} -= 1; 1163 */ 1164 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; 1165 do { 1166 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; 1167 1168 /* find left hand */ 1169 mp_zero (&t1); 1170 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; 1171 t1.dp[1] = y.dp[t]; 1172 t1.used = 2; 1173 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1174 goto LBL_Y; 1175 } 1176 1177 /* find right hand */ 1178 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; 1179 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; 1180 t2.dp[2] = x.dp[i]; 1181 t2.used = 3; 1182 } while (mp_cmp_mag(&t1, &t2) == MP_GT); 1183 1184 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ 1185 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1186 goto LBL_Y; 1187 } 1188 1189 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 1190 goto LBL_Y; 1191 } 1192 1193 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) { 1194 goto LBL_Y; 1195 } 1196 1197 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ 1198 if (x.sign == MP_NEG) { 1199 if ((res = mp_copy (&y, &t1)) != MP_OKAY) { 1200 goto LBL_Y; 1201 } 1202 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 1203 goto LBL_Y; 1204 } 1205 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) { 1206 goto LBL_Y; 1207 } 1208 1209 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; 1210 } 1211 } 1212 1213 /* now q is the quotient and x is the remainder 1214 * [which we have to normalize] 1215 */ 1216 1217 /* get sign before writing to c */ 1218 x.sign = x.used == 0 ? MP_ZPOS : a->sign; 1219 1220 if (c != NULL) { 1221 mp_clamp (&q); 1222 mp_exch (&q, c); 1223 c->sign = neg; 1224 } 1225 1226 if (d != NULL) { 1227 mp_div_2d (&x, norm, &x, NULL); 1228 mp_exch (&x, d); 1229 } 1230 1231 res = MP_OKAY; 1232 1233 LBL_Y:mp_clear (&y); 1234 LBL_X:mp_clear (&x); 1235 LBL_T2:mp_clear (&t2); 1236 LBL_T1:mp_clear (&t1); 1237 LBL_Q:mp_clear (&q); 1238 return res; 1239 } 1240 1241 /* c = a mod b, 0 <= c < b */ 1242 static int 1243 mp_mod (mp_int * a, mp_int * b, mp_int * c) 1244 { 1245 mp_int t; 1246 int res; 1247 1248 if ((res = mp_init (&t)) != MP_OKAY) { 1249 return res; 1250 } 1251 1252 if ((res = mp_div (NULL, &t, a, b)) != MP_OKAY) { 1253 mp_clear (&t); 1254 return res; 1255 } 1256 1257 if (t.sign != b->sign) { 1258 res = mp_add (b, &t, c); 1259 } else { 1260 res = MP_OKAY; 1261 mp_exch (&t, c); 1262 } 1263 1264 mp_clear (&t); 1265 return res; 1266 } 1267 1268 /* set to a digit */ 1269 static void mp_set (mp_int * a, mp_digit b) 1270 { 1271 mp_zero (a); 1272 a->dp[0] = b & MP_MASK; 1273 a->used = (a->dp[0] != 0) ? 1 : 0; 1274 } 1275 1276 /* b = a/2 */ 1277 static int mp_div_2(mp_int * a, mp_int * b) 1278 { 1279 int x, res, oldused; 1280 1281 /* copy */ 1282 if (b->alloc < a->used) { 1283 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 1284 return res; 1285 } 1286 } 1287 1288 oldused = b->used; 1289 b->used = a->used; 1290 { 1291 mp_digit r, rr, *tmpa, *tmpb; 1292 1293 /* source alias */ 1294 tmpa = a->dp + b->used - 1; 1295 1296 /* dest alias */ 1297 tmpb = b->dp + b->used - 1; 1298 1299 /* carry */ 1300 r = 0; 1301 for (x = b->used - 1; x >= 0; x--) { 1302 /* get the carry for the next iteration */ 1303 rr = *tmpa & 1; 1304 1305 /* shift the current digit, add in carry and store */ 1306 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); 1307 1308 /* forward carry to next iteration */ 1309 r = rr; 1310 } 1311 1312 /* zero excess digits */ 1313 tmpb = b->dp + b->used; 1314 for (x = b->used; x < oldused; x++) { 1315 *tmpb++ = 0; 1316 } 1317 } 1318 b->sign = a->sign; 1319 mp_clamp (b); 1320 return MP_OKAY; 1321 } 1322 1323 /* compare a digit */ 1324 static int mp_cmp_d(mp_int * a, mp_digit b) 1325 { 1326 /* compare based on sign */ 1327 if (a->sign == MP_NEG) { 1328 return MP_LT; 1329 } 1330 1331 /* compare based on magnitude */ 1332 if (a->used > 1) { 1333 return MP_GT; 1334 } 1335 1336 /* compare the only digit of a to b */ 1337 if (a->dp[0] > b) { 1338 return MP_GT; 1339 } else if (a->dp[0] < b) { 1340 return MP_LT; 1341 } else { 1342 return MP_EQ; 1343 } 1344 } 1345 1346 static void mp_clear_multi(mp_int *mp, ...) 1347 { 1348 mp_int* next_mp = mp; 1349 va_list args; 1350 va_start(args, mp); 1351 while (next_mp != NULL) { 1352 mp_clear(next_mp); 1353 next_mp = va_arg(args, mp_int*); 1354 } 1355 va_end(args); 1356 } 1357 1358 /* computes the modular inverse via binary extended euclidean algorithm, 1359 * that is c = 1/a mod b 1360 * 1361 * Based on slow invmod except this is optimized for the case where b is 1362 * odd as per HAC Note 14.64 on pp. 610 1363 */ 1364 static int 1365 fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c) 1366 { 1367 mp_int x, y, u, v, B, D; 1368 int res, neg; 1369 1370 /* 2. [modified] b must be odd */ 1371 if (BN_is_even (b) == 1) { 1372 return MP_VAL; 1373 } 1374 1375 /* init all our temps */ 1376 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) { 1377 return res; 1378 } 1379 1380 /* x == modulus, y == value to invert */ 1381 if ((res = mp_copy (b, &x)) != MP_OKAY) { 1382 goto LBL_ERR; 1383 } 1384 1385 /* we need y = |a| */ 1386 if ((res = mp_mod (a, b, &y)) != MP_OKAY) { 1387 goto LBL_ERR; 1388 } 1389 1390 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1391 if ((res = mp_copy (&x, &u)) != MP_OKAY) { 1392 goto LBL_ERR; 1393 } 1394 if ((res = mp_copy (&y, &v)) != MP_OKAY) { 1395 goto LBL_ERR; 1396 } 1397 mp_set (&D, (unsigned long)1); 1398 1399 top: 1400 /* 4. while u is even do */ 1401 while (BN_is_even (&u) == 1) { 1402 /* 4.1 u = u/2 */ 1403 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { 1404 goto LBL_ERR; 1405 } 1406 /* 4.2 if B is odd then */ 1407 if (BN_is_odd (&B) == 1) { 1408 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { 1409 goto LBL_ERR; 1410 } 1411 } 1412 /* B = B/2 */ 1413 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { 1414 goto LBL_ERR; 1415 } 1416 } 1417 1418 /* 5. while v is even do */ 1419 while (BN_is_even (&v) == 1) { 1420 /* 5.1 v = v/2 */ 1421 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { 1422 goto LBL_ERR; 1423 } 1424 /* 5.2 if D is odd then */ 1425 if (BN_is_odd (&D) == 1) { 1426 /* D = (D-x)/2 */ 1427 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { 1428 goto LBL_ERR; 1429 } 1430 } 1431 /* D = D/2 */ 1432 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { 1433 goto LBL_ERR; 1434 } 1435 } 1436 1437 /* 6. if u >= v then */ 1438 if (mp_cmp (&u, &v) != MP_LT) { 1439 /* u = u - v, B = B - D */ 1440 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { 1441 goto LBL_ERR; 1442 } 1443 1444 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { 1445 goto LBL_ERR; 1446 } 1447 } else { 1448 /* v - v - u, D = D - B */ 1449 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { 1450 goto LBL_ERR; 1451 } 1452 1453 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { 1454 goto LBL_ERR; 1455 } 1456 } 1457 1458 /* if not zero goto step 4 */ 1459 if (BN_is_zero (&u) == 0) { 1460 goto top; 1461 } 1462 1463 /* now a = C, b = D, gcd == g*v */ 1464 1465 /* if v != 1 then there is no inverse */ 1466 if (mp_cmp_d (&v, (unsigned long)1) != MP_EQ) { 1467 res = MP_VAL; 1468 goto LBL_ERR; 1469 } 1470 1471 /* b is now the inverse */ 1472 neg = a->sign; 1473 while (D.sign == MP_NEG) { 1474 if ((res = mp_add (&D, b, &D)) != MP_OKAY) { 1475 goto LBL_ERR; 1476 } 1477 } 1478 mp_exch (&D, c); 1479 c->sign = neg; 1480 res = MP_OKAY; 1481 1482 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); 1483 return res; 1484 } 1485 1486 /* hac 14.61, pp608 */ 1487 static int 1488 mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c) 1489 { 1490 mp_int x, y, u, v, A, B, C, D; 1491 int res; 1492 1493 /* b cannot be negative */ 1494 if (b->sign == MP_NEG || BN_is_zero(b) == 1) { 1495 return MP_VAL; 1496 } 1497 1498 /* init temps */ 1499 if ((res = mp_init_multi(&x, &y, &u, &v, 1500 &A, &B, &C, &D, NULL)) != MP_OKAY) { 1501 return res; 1502 } 1503 1504 /* x = a, y = b */ 1505 if ((res = mp_mod(a, b, &x)) != MP_OKAY) { 1506 goto LBL_ERR; 1507 } 1508 if ((res = mp_copy (b, &y)) != MP_OKAY) { 1509 goto LBL_ERR; 1510 } 1511 1512 /* 2. [modified] if x,y are both even then return an error! */ 1513 if (BN_is_even (&x) == 1 && BN_is_even (&y) == 1) { 1514 res = MP_VAL; 1515 goto LBL_ERR; 1516 } 1517 1518 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1519 if ((res = mp_copy (&x, &u)) != MP_OKAY) { 1520 goto LBL_ERR; 1521 } 1522 if ((res = mp_copy (&y, &v)) != MP_OKAY) { 1523 goto LBL_ERR; 1524 } 1525 mp_set (&A, (unsigned long)1); 1526 mp_set (&D, (unsigned long)1); 1527 1528 top: 1529 /* 4. while u is even do */ 1530 while (BN_is_even (&u) == 1) { 1531 /* 4.1 u = u/2 */ 1532 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { 1533 goto LBL_ERR; 1534 } 1535 /* 4.2 if A or B is odd then */ 1536 if (BN_is_odd (&A) == 1 || BN_is_odd (&B) == 1) { 1537 /* A = (A+y)/2, B = (B-x)/2 */ 1538 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) { 1539 goto LBL_ERR; 1540 } 1541 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { 1542 goto LBL_ERR; 1543 } 1544 } 1545 /* A = A/2, B = B/2 */ 1546 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) { 1547 goto LBL_ERR; 1548 } 1549 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { 1550 goto LBL_ERR; 1551 } 1552 } 1553 1554 /* 5. while v is even do */ 1555 while (BN_is_even (&v) == 1) { 1556 /* 5.1 v = v/2 */ 1557 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { 1558 goto LBL_ERR; 1559 } 1560 /* 5.2 if C or D is odd then */ 1561 if (BN_is_odd (&C) == 1 || BN_is_odd (&D) == 1) { 1562 /* C = (C+y)/2, D = (D-x)/2 */ 1563 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) { 1564 goto LBL_ERR; 1565 } 1566 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { 1567 goto LBL_ERR; 1568 } 1569 } 1570 /* C = C/2, D = D/2 */ 1571 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) { 1572 goto LBL_ERR; 1573 } 1574 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { 1575 goto LBL_ERR; 1576 } 1577 } 1578 1579 /* 6. if u >= v then */ 1580 if (mp_cmp (&u, &v) != MP_LT) { 1581 /* u = u - v, A = A - C, B = B - D */ 1582 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { 1583 goto LBL_ERR; 1584 } 1585 1586 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) { 1587 goto LBL_ERR; 1588 } 1589 1590 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { 1591 goto LBL_ERR; 1592 } 1593 } else { 1594 /* v - v - u, C = C - A, D = D - B */ 1595 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { 1596 goto LBL_ERR; 1597 } 1598 1599 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) { 1600 goto LBL_ERR; 1601 } 1602 1603 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { 1604 goto LBL_ERR; 1605 } 1606 } 1607 1608 /* if not zero goto step 4 */ 1609 if (BN_is_zero (&u) == 0) 1610 goto top; 1611 1612 /* now a = C, b = D, gcd == g*v */ 1613 1614 /* if v != 1 then there is no inverse */ 1615 if (mp_cmp_d (&v, 1) != MP_EQ) { 1616 res = MP_VAL; 1617 goto LBL_ERR; 1618 } 1619 1620 /* if its too low */ 1621 while (mp_cmp_d(&C, 0) == MP_LT) { 1622 if ((res = mp_add(&C, b, &C)) != MP_OKAY) { 1623 goto LBL_ERR; 1624 } 1625 } 1626 1627 /* too big */ 1628 while (mp_cmp_mag(&C, b) != MP_LT) { 1629 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) { 1630 goto LBL_ERR; 1631 } 1632 } 1633 1634 /* C is now the inverse */ 1635 mp_exch (&C, c); 1636 res = MP_OKAY; 1637 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); 1638 return res; 1639 } 1640 1641 static int 1642 mp_invmod(mp_int *c, mp_int *a, mp_int *b) 1643 { 1644 /* b cannot be negative */ 1645 if (b->sign == MP_NEG || BN_is_zero(b) == 1) { 1646 return MP_VAL; 1647 } 1648 1649 /* if the modulus is odd we can use a faster routine instead */ 1650 if (BN_is_odd (b) == 1) { 1651 return fast_mp_invmod(a, b, c); 1652 } 1653 1654 return mp_invmod_slow(a, b, c); 1655 1656 /*NOTREACHED*/ 1657 return MP_VAL; 1658 } 1659 1660 /* b = |a| 1661 * 1662 * Simple function copies the input and fixes the sign to positive 1663 */ 1664 static int 1665 mp_abs (mp_int * a, mp_int * b) 1666 { 1667 int res; 1668 1669 /* copy a to b */ 1670 if (a != b) { 1671 if ((res = mp_copy (a, b)) != MP_OKAY) { 1672 return res; 1673 } 1674 } 1675 1676 /* force the sign of b to positive */ 1677 b->sign = MP_ZPOS; 1678 1679 return MP_OKAY; 1680 } 1681 1682 /* determines if reduce_2k_l can be used */ 1683 static int mp_reduce_is_2k_l(mp_int *a) 1684 { 1685 int ix, iy; 1686 1687 if (a->used == 0) { 1688 return MP_NO; 1689 } else if (a->used == 1) { 1690 return MP_YES; 1691 } else if (a->used > 1) { 1692 /* if more than half of the digits are -1 we're sold */ 1693 for (iy = ix = 0; ix < a->used; ix++) { 1694 if (a->dp[ix] == MP_MASK) { 1695 ++iy; 1696 } 1697 } 1698 return (iy >= (a->used/2)) ? MP_YES : MP_NO; 1699 1700 } 1701 return MP_NO; 1702 } 1703 1704 /* computes a = 2**b 1705 * 1706 * Simple algorithm which zeroes the int, grows it then just sets one bit 1707 * as required. 1708 */ 1709 static int 1710 mp_2expt (mp_int * a, int b) 1711 { 1712 int res; 1713 1714 /* zero a as per default */ 1715 mp_zero (a); 1716 1717 /* grow a to accomodate the single bit */ 1718 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) { 1719 return res; 1720 } 1721 1722 /* set the used count of where the bit will go */ 1723 a->used = b / DIGIT_BIT + 1; 1724 1725 /* put the single bit in its place */ 1726 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); 1727 1728 return MP_OKAY; 1729 } 1730 1731 /* pre-calculate the value required for Barrett reduction 1732 * For a given modulus "b" it calulates the value required in "a" 1733 */ 1734 static int mp_reduce_setup (mp_int * a, mp_int * b) 1735 { 1736 int res; 1737 1738 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { 1739 return res; 1740 } 1741 return mp_div (a, NULL, a, b); 1742 } 1743 1744 /* b = a*2 */ 1745 static int mp_mul_2(mp_int * a, mp_int * b) 1746 { 1747 int x, res, oldused; 1748 1749 /* grow to accomodate result */ 1750 if (b->alloc < a->used + 1) { 1751 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) { 1752 return res; 1753 } 1754 } 1755 1756 oldused = b->used; 1757 b->used = a->used; 1758 1759 { 1760 mp_digit r, rr, *tmpa, *tmpb; 1761 1762 /* alias for source */ 1763 tmpa = a->dp; 1764 1765 /* alias for dest */ 1766 tmpb = b->dp; 1767 1768 /* carry */ 1769 r = 0; 1770 for (x = 0; x < a->used; x++) { 1771 1772 /* get what will be the *next* carry bit from the 1773 * MSB of the current digit 1774 */ 1775 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); 1776 1777 /* now shift up this digit, add in the carry [from the previous] */ 1778 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; 1779 1780 /* copy the carry that would be from the source 1781 * digit into the next iteration 1782 */ 1783 r = rr; 1784 } 1785 1786 /* new leading digit? */ 1787 if (r != 0) { 1788 /* add a MSB which is always 1 at this point */ 1789 *tmpb = 1; 1790 ++(b->used); 1791 } 1792 1793 /* now zero any excess digits on the destination 1794 * that we didn't write to 1795 */ 1796 tmpb = b->dp + b->used; 1797 for (x = b->used; x < oldused; x++) { 1798 *tmpb++ = 0; 1799 } 1800 } 1801 b->sign = a->sign; 1802 return MP_OKAY; 1803 } 1804 1805 /* divide by three (based on routine from MPI and the GMP manual) */ 1806 static int 1807 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d) 1808 { 1809 mp_int q; 1810 mp_word w, t; 1811 mp_digit b; 1812 int res, ix; 1813 1814 /* b = 2**DIGIT_BIT / 3 */ 1815 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3); 1816 1817 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 1818 return res; 1819 } 1820 1821 q.used = a->used; 1822 q.sign = a->sign; 1823 w = 0; 1824 for (ix = a->used - 1; ix >= 0; ix--) { 1825 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 1826 1827 if (w >= 3) { 1828 /* multiply w by [1/3] */ 1829 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT); 1830 1831 /* now subtract 3 * [w/3] from w, to get the remainder */ 1832 w -= t+t+t; 1833 1834 /* fixup the remainder as required since 1835 * the optimization is not exact. 1836 */ 1837 while (w >= 3) { 1838 t += 1; 1839 w -= 3; 1840 } 1841 } else { 1842 t = 0; 1843 } 1844 q.dp[ix] = (mp_digit)t; 1845 } 1846 1847 /* [optional] store the remainder */ 1848 if (d != NULL) { 1849 *d = (mp_digit)w; 1850 } 1851 1852 /* [optional] store the quotient */ 1853 if (c != NULL) { 1854 mp_clamp(&q); 1855 mp_exch(&q, c); 1856 } 1857 mp_clear(&q); 1858 1859 return res; 1860 } 1861 1862 /* multiplication using the Toom-Cook 3-way algorithm 1863 * 1864 * Much more complicated than Karatsuba but has a lower 1865 * asymptotic running time of O(N**1.464). This algorithm is 1866 * only particularly useful on VERY large inputs 1867 * (we're talking 1000s of digits here...). 1868 */ 1869 static int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) 1870 { 1871 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; 1872 int res, B; 1873 1874 /* init temps */ 1875 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 1876 &a0, &a1, &a2, &b0, &b1, 1877 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { 1878 return res; 1879 } 1880 1881 /* B */ 1882 B = MIN(a->used, b->used) / 3; 1883 1884 /* a = a2 * B**2 + a1 * B + a0 */ 1885 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 1886 goto ERR; 1887 } 1888 1889 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 1890 goto ERR; 1891 } 1892 mp_rshd(&a1, B); 1893 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); 1894 1895 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 1896 goto ERR; 1897 } 1898 mp_rshd(&a2, B*2); 1899 1900 /* b = b2 * B**2 + b1 * B + b0 */ 1901 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { 1902 goto ERR; 1903 } 1904 1905 if ((res = mp_copy(b, &b1)) != MP_OKAY) { 1906 goto ERR; 1907 } 1908 mp_rshd(&b1, B); 1909 mp_mod_2d(&b1, DIGIT_BIT * B, &b1); 1910 1911 if ((res = mp_copy(b, &b2)) != MP_OKAY) { 1912 goto ERR; 1913 } 1914 mp_rshd(&b2, B*2); 1915 1916 /* w0 = a0*b0 */ 1917 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { 1918 goto ERR; 1919 } 1920 1921 /* w4 = a2 * b2 */ 1922 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { 1923 goto ERR; 1924 } 1925 1926 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ 1927 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { 1928 goto ERR; 1929 } 1930 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1931 goto ERR; 1932 } 1933 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 1934 goto ERR; 1935 } 1936 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 1937 goto ERR; 1938 } 1939 1940 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { 1941 goto ERR; 1942 } 1943 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1944 goto ERR; 1945 } 1946 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { 1947 goto ERR; 1948 } 1949 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { 1950 goto ERR; 1951 } 1952 1953 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { 1954 goto ERR; 1955 } 1956 1957 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ 1958 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { 1959 goto ERR; 1960 } 1961 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1962 goto ERR; 1963 } 1964 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 1965 goto ERR; 1966 } 1967 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1968 goto ERR; 1969 } 1970 1971 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { 1972 goto ERR; 1973 } 1974 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1975 goto ERR; 1976 } 1977 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { 1978 goto ERR; 1979 } 1980 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1981 goto ERR; 1982 } 1983 1984 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { 1985 goto ERR; 1986 } 1987 1988 1989 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ 1990 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { 1991 goto ERR; 1992 } 1993 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1994 goto ERR; 1995 } 1996 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) { 1997 goto ERR; 1998 } 1999 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 2000 goto ERR; 2001 } 2002 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { 2003 goto ERR; 2004 } 2005 2006 /* now solve the matrix 2007 2008 0 0 0 0 1 2009 1 2 4 8 16 2010 1 1 1 1 1 2011 16 8 4 2 1 2012 1 0 0 0 0 2013 2014 using 12 subtractions, 4 shifts, 2015 2 small divisions and 1 small multiplication 2016 */ 2017 2018 /* r1 - r4 */ 2019 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { 2020 goto ERR; 2021 } 2022 /* r3 - r0 */ 2023 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { 2024 goto ERR; 2025 } 2026 /* r1/2 */ 2027 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { 2028 goto ERR; 2029 } 2030 /* r3/2 */ 2031 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { 2032 goto ERR; 2033 } 2034 /* r2 - r0 - r4 */ 2035 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { 2036 goto ERR; 2037 } 2038 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { 2039 goto ERR; 2040 } 2041 /* r1 - r2 */ 2042 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2043 goto ERR; 2044 } 2045 /* r3 - r2 */ 2046 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2047 goto ERR; 2048 } 2049 /* r1 - 8r0 */ 2050 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { 2051 goto ERR; 2052 } 2053 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { 2054 goto ERR; 2055 } 2056 /* r3 - 8r4 */ 2057 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { 2058 goto ERR; 2059 } 2060 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { 2061 goto ERR; 2062 } 2063 /* 3r2 - r1 - r3 */ 2064 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { 2065 goto ERR; 2066 } 2067 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { 2068 goto ERR; 2069 } 2070 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { 2071 goto ERR; 2072 } 2073 /* r1 - r2 */ 2074 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2075 goto ERR; 2076 } 2077 /* r3 - r2 */ 2078 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2079 goto ERR; 2080 } 2081 /* r1/3 */ 2082 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { 2083 goto ERR; 2084 } 2085 /* r3/3 */ 2086 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { 2087 goto ERR; 2088 } 2089 2090 /* at this point shift W[n] by B*n */ 2091 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { 2092 goto ERR; 2093 } 2094 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { 2095 goto ERR; 2096 } 2097 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { 2098 goto ERR; 2099 } 2100 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { 2101 goto ERR; 2102 } 2103 2104 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { 2105 goto ERR; 2106 } 2107 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2108 goto ERR; 2109 } 2110 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2111 goto ERR; 2112 } 2113 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { 2114 goto ERR; 2115 } 2116 2117 ERR: 2118 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 2119 &a0, &a1, &a2, &b0, &b1, 2120 &b2, &tmp1, &tmp2, NULL); 2121 return res; 2122 } 2123 2124 #define TOOM_MUL_CUTOFF 350 2125 #define KARATSUBA_MUL_CUTOFF 80 2126 2127 /* c = |a| * |b| using Karatsuba Multiplication using 2128 * three half size multiplications 2129 * 2130 * Let B represent the radix [e.g. 2**DIGIT_BIT] and 2131 * let n represent half of the number of digits in 2132 * the min(a,b) 2133 * 2134 * a = a1 * B**n + a0 2135 * b = b1 * B**n + b0 2136 * 2137 * Then, a * b => 2138 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 2139 * 2140 * Note that a1b1 and a0b0 are used twice and only need to be 2141 * computed once. So in total three half size (half # of 2142 * digit) multiplications are performed, a0b0, a1b1 and 2143 * (a1+b1)(a0+b0) 2144 * 2145 * Note that a multiplication of half the digits requires 2146 * 1/4th the number of single precision multiplications so in 2147 * total after one call 25% of the single precision multiplications 2148 * are saved. Note also that the call to mp_mul can end up back 2149 * in this function if the a0, a1, b0, or b1 are above the threshold. 2150 * This is known as divide-and-conquer and leads to the famous 2151 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 2152 * the standard O(N**2) that the baseline/comba methods use. 2153 * Generally though the overhead of this method doesn't pay off 2154 * until a certain size (N ~ 80) is reached. 2155 */ 2156 static int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) 2157 { 2158 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; 2159 int B; 2160 int err; 2161 2162 /* default the return code to an error */ 2163 err = MP_MEM; 2164 2165 /* min # of digits */ 2166 B = MIN (a->used, b->used); 2167 2168 /* now divide in two */ 2169 B = (int)((unsigned)B >> 1); 2170 2171 /* init copy all the temps */ 2172 if (mp_init_size (&x0, B) != MP_OKAY) 2173 goto ERR; 2174 if (mp_init_size (&x1, a->used - B) != MP_OKAY) 2175 goto X0; 2176 if (mp_init_size (&y0, B) != MP_OKAY) 2177 goto X1; 2178 if (mp_init_size (&y1, b->used - B) != MP_OKAY) 2179 goto Y0; 2180 2181 /* init temps */ 2182 if (mp_init_size (&t1, B * 2) != MP_OKAY) 2183 goto Y1; 2184 if (mp_init_size (&x0y0, B * 2) != MP_OKAY) 2185 goto T1; 2186 if (mp_init_size (&x1y1, B * 2) != MP_OKAY) 2187 goto X0Y0; 2188 2189 /* now shift the digits */ 2190 x0.used = y0.used = B; 2191 x1.used = a->used - B; 2192 y1.used = b->used - B; 2193 2194 { 2195 int x; 2196 mp_digit *tmpa, *tmpb, *tmpx, *tmpy; 2197 2198 /* we copy the digits directly instead of using higher level functions 2199 * since we also need to shift the digits 2200 */ 2201 tmpa = a->dp; 2202 tmpb = b->dp; 2203 2204 tmpx = x0.dp; 2205 tmpy = y0.dp; 2206 for (x = 0; x < B; x++) { 2207 *tmpx++ = *tmpa++; 2208 *tmpy++ = *tmpb++; 2209 } 2210 2211 tmpx = x1.dp; 2212 for (x = B; x < a->used; x++) { 2213 *tmpx++ = *tmpa++; 2214 } 2215 2216 tmpy = y1.dp; 2217 for (x = B; x < b->used; x++) { 2218 *tmpy++ = *tmpb++; 2219 } 2220 } 2221 2222 /* only need to clamp the lower words since by definition the 2223 * upper words x1/y1 must have a known number of digits 2224 */ 2225 mp_clamp (&x0); 2226 mp_clamp (&y0); 2227 2228 /* now calc the products x0y0 and x1y1 */ 2229 /* after this x0 is no longer required, free temp [x0==t2]! */ 2230 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) 2231 goto X1Y1; /* x0y0 = x0*y0 */ 2232 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) 2233 goto X1Y1; /* x1y1 = x1*y1 */ 2234 2235 /* now calc x1+x0 and y1+y0 */ 2236 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) 2237 goto X1Y1; /* t1 = x1 - x0 */ 2238 if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) 2239 goto X1Y1; /* t2 = y1 - y0 */ 2240 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) 2241 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ 2242 2243 /* add x0y0 */ 2244 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) 2245 goto X1Y1; /* t2 = x0y0 + x1y1 */ 2246 if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) 2247 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ 2248 2249 /* shift by B */ 2250 if (mp_lshd (&t1, B) != MP_OKAY) 2251 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ 2252 if (mp_lshd (&x1y1, B * 2) != MP_OKAY) 2253 goto X1Y1; /* x1y1 = x1y1 << 2*B */ 2254 2255 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) 2256 goto X1Y1; /* t1 = x0y0 + t1 */ 2257 if (mp_add (&t1, &x1y1, c) != MP_OKAY) 2258 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ 2259 2260 /* Algorithm succeeded set the return code to MP_OKAY */ 2261 err = MP_OKAY; 2262 2263 X1Y1:mp_clear (&x1y1); 2264 X0Y0:mp_clear (&x0y0); 2265 T1:mp_clear (&t1); 2266 Y1:mp_clear (&y1); 2267 Y0:mp_clear (&y0); 2268 X1:mp_clear (&x1); 2269 X0:mp_clear (&x0); 2270 ERR: 2271 return err; 2272 } 2273 2274 /* Fast (comba) multiplier 2275 * 2276 * This is the fast column-array [comba] multiplier. It is 2277 * designed to compute the columns of the product first 2278 * then handle the carries afterwards. This has the effect 2279 * of making the nested loops that compute the columns very 2280 * simple and schedulable on super-scalar processors. 2281 * 2282 * This has been modified to produce a variable number of 2283 * digits of output so if say only a half-product is required 2284 * you don't have to compute the upper half (a feature 2285 * required for fast Barrett reduction). 2286 * 2287 * Based on Algorithm 14.12 on pp.595 of HAC. 2288 * 2289 */ 2290 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2291 { 2292 int olduse, res, pa, ix, iz; 2293 /*LINTED*/ 2294 mp_digit W[MP_WARRAY]; 2295 mp_word _W; 2296 2297 /* grow the destination as required */ 2298 if (c->alloc < digs) { 2299 if ((res = mp_grow (c, digs)) != MP_OKAY) { 2300 return res; 2301 } 2302 } 2303 2304 /* number of output digits to produce */ 2305 pa = MIN(digs, a->used + b->used); 2306 2307 /* clear the carry */ 2308 _W = 0; 2309 for (ix = 0; ix < pa; ix++) { 2310 int tx, ty; 2311 int iy; 2312 mp_digit *tmpx, *tmpy; 2313 2314 /* get offsets into the two bignums */ 2315 ty = MIN(b->used-1, ix); 2316 tx = ix - ty; 2317 2318 /* setup temp aliases */ 2319 tmpx = a->dp + tx; 2320 tmpy = b->dp + ty; 2321 2322 /* this is the number of times the loop will iterrate, essentially 2323 while (tx++ < a->used && ty-- >= 0) { ... } 2324 */ 2325 iy = MIN(a->used-tx, ty+1); 2326 2327 /* execute loop */ 2328 for (iz = 0; iz < iy; ++iz) { 2329 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2330 2331 } 2332 2333 /* store term */ 2334 W[ix] = ((mp_digit)_W) & MP_MASK; 2335 2336 /* make next carry */ 2337 _W = _W >> ((mp_word)DIGIT_BIT); 2338 } 2339 2340 /* setup dest */ 2341 olduse = c->used; 2342 c->used = pa; 2343 2344 { 2345 mp_digit *tmpc; 2346 tmpc = c->dp; 2347 for (ix = 0; ix < pa+1; ix++) { 2348 /* now extract the previous digit [below the carry] */ 2349 *tmpc++ = W[ix]; 2350 } 2351 2352 /* clear unused digits [that existed in the old copy of c] */ 2353 for (; ix < olduse; ix++) { 2354 *tmpc++ = 0; 2355 } 2356 } 2357 mp_clamp (c); 2358 return MP_OKAY; 2359 } 2360 2361 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */ 2362 /* Revision: 1.2 $ */ 2363 /* Date: 2011/03/18 16:22:09 $ */ 2364 2365 2366 /* multiplies |a| * |b| and only computes upto digs digits of result 2367 * HAC pp. 595, Algorithm 14.12 Modified so you can control how 2368 * many digits of output are created. 2369 */ 2370 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2371 { 2372 mp_int t; 2373 int res, pa, pb, ix, iy; 2374 mp_digit u; 2375 mp_word r; 2376 mp_digit tmpx, *tmpt, *tmpy; 2377 2378 /* can we use the fast multiplier? */ 2379 if (((unsigned)(digs) < MP_WARRAY) && 2380 MIN (a->used, b->used) < 2381 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2382 return fast_s_mp_mul_digs (a, b, c, digs); 2383 } 2384 2385 if ((res = mp_init_size (&t, digs)) != MP_OKAY) { 2386 return res; 2387 } 2388 t.used = digs; 2389 2390 /* compute the digits of the product directly */ 2391 pa = a->used; 2392 for (ix = 0; ix < pa; ix++) { 2393 /* set the carry to zero */ 2394 u = 0; 2395 2396 /* limit ourselves to making digs digits of output */ 2397 pb = MIN (b->used, digs - ix); 2398 2399 /* setup some aliases */ 2400 /* copy of the digit from a used within the nested loop */ 2401 tmpx = a->dp[ix]; 2402 2403 /* an alias for the destination shifted ix places */ 2404 tmpt = t.dp + ix; 2405 2406 /* an alias for the digits of b */ 2407 tmpy = b->dp; 2408 2409 /* compute the columns of the output and propagate the carry */ 2410 for (iy = 0; iy < pb; iy++) { 2411 /* compute the column as a mp_word */ 2412 r = ((mp_word)*tmpt) + 2413 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2414 ((mp_word) u); 2415 2416 /* the new column is the lower part of the result */ 2417 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2418 2419 /* get the carry word from the result */ 2420 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2421 } 2422 /* set carry if it is placed below digs */ 2423 if (ix + iy < digs) { 2424 *tmpt = u; 2425 } 2426 } 2427 2428 mp_clamp (&t); 2429 mp_exch (&t, c); 2430 2431 mp_clear (&t); 2432 return MP_OKAY; 2433 } 2434 2435 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */ 2436 /* Revision: 1.3 $ */ 2437 /* Date: 2011/03/18 16:43:04 $ */ 2438 2439 /* high level multiplication (handles sign) */ 2440 static int 2441 mp_mul(mp_int * a, mp_int * b, mp_int * c) 2442 { 2443 int res, neg; 2444 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 2445 2446 /* use Toom-Cook? */ 2447 if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) { 2448 res = mp_toom_mul(a, b, c); 2449 } else 2450 /* use Karatsuba? */ 2451 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { 2452 res = mp_karatsuba_mul (a, b, c); 2453 } else 2454 { 2455 /* can we use the fast multiplier? 2456 * 2457 * The fast multiplier can be used if the output will 2458 * have less than MP_WARRAY digits and the number of 2459 * digits won't affect carry propagation 2460 */ 2461 int digs = a->used + b->used + 1; 2462 2463 if (((unsigned)digs < MP_WARRAY) && 2464 MIN(a->used, b->used) <= 2465 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2466 res = fast_s_mp_mul_digs (a, b, c, digs); 2467 } else 2468 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */ 2469 2470 } 2471 c->sign = (c->used > 0) ? neg : MP_ZPOS; 2472 return res; 2473 } 2474 2475 /* this is a modified version of fast_s_mul_digs that only produces 2476 * output digits *above* digs. See the comments for fast_s_mul_digs 2477 * to see how it works. 2478 * 2479 * This is used in the Barrett reduction since for one of the multiplications 2480 * only the higher digits were needed. This essentially halves the work. 2481 * 2482 * Based on Algorithm 14.12 on pp.595 of HAC. 2483 */ 2484 static int 2485 fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2486 { 2487 int olduse, res, pa, ix, iz; 2488 mp_digit W[MP_WARRAY]; 2489 mp_word _W; 2490 2491 /* grow the destination as required */ 2492 pa = a->used + b->used; 2493 if (c->alloc < pa) { 2494 if ((res = mp_grow (c, pa)) != MP_OKAY) { 2495 return res; 2496 } 2497 } 2498 2499 /* number of output digits to produce */ 2500 pa = a->used + b->used; 2501 _W = 0; 2502 for (ix = digs; ix < pa; ix++) { 2503 int tx, ty, iy; 2504 mp_digit *tmpx, *tmpy; 2505 2506 /* get offsets into the two bignums */ 2507 ty = MIN(b->used-1, ix); 2508 tx = ix - ty; 2509 2510 /* setup temp aliases */ 2511 tmpx = a->dp + tx; 2512 tmpy = b->dp + ty; 2513 2514 /* this is the number of times the loop will iterrate, essentially its 2515 while (tx++ < a->used && ty-- >= 0) { ... } 2516 */ 2517 iy = MIN(a->used-tx, ty+1); 2518 2519 /* execute loop */ 2520 for (iz = 0; iz < iy; iz++) { 2521 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2522 } 2523 2524 /* store term */ 2525 W[ix] = ((mp_digit)_W) & MP_MASK; 2526 2527 /* make next carry */ 2528 _W = _W >> ((mp_word)DIGIT_BIT); 2529 } 2530 2531 /* setup dest */ 2532 olduse = c->used; 2533 c->used = pa; 2534 2535 { 2536 mp_digit *tmpc; 2537 2538 tmpc = c->dp + digs; 2539 for (ix = digs; ix < pa; ix++) { 2540 /* now extract the previous digit [below the carry] */ 2541 *tmpc++ = W[ix]; 2542 } 2543 2544 /* clear unused digits [that existed in the old copy of c] */ 2545 for (; ix < olduse; ix++) { 2546 *tmpc++ = 0; 2547 } 2548 } 2549 mp_clamp (c); 2550 return MP_OKAY; 2551 } 2552 2553 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */ 2554 /* Revision: 1.1.1.1 $ */ 2555 /* Date: 2011/03/12 22:58:18 $ */ 2556 2557 /* multiplies |a| * |b| and does not compute the lower digs digits 2558 * [meant to get the higher part of the product] 2559 */ 2560 static int 2561 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2562 { 2563 mp_int t; 2564 int res, pa, pb, ix, iy; 2565 mp_digit u; 2566 mp_word r; 2567 mp_digit tmpx, *tmpt, *tmpy; 2568 2569 /* can we use the fast multiplier? */ 2570 if (((unsigned)(a->used + b->used + 1) < MP_WARRAY) 2571 && MIN (a->used, b->used) < (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2572 return fast_s_mp_mul_high_digs (a, b, c, digs); 2573 } 2574 2575 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) { 2576 return res; 2577 } 2578 t.used = a->used + b->used + 1; 2579 2580 pa = a->used; 2581 pb = b->used; 2582 for (ix = 0; ix < pa; ix++) { 2583 /* clear the carry */ 2584 u = 0; 2585 2586 /* left hand side of A[ix] * B[iy] */ 2587 tmpx = a->dp[ix]; 2588 2589 /* alias to the address of where the digits will be stored */ 2590 tmpt = &(t.dp[digs]); 2591 2592 /* alias for where to read the right hand side from */ 2593 tmpy = b->dp + (digs - ix); 2594 2595 for (iy = digs - ix; iy < pb; iy++) { 2596 /* calculate the double precision result */ 2597 r = ((mp_word)*tmpt) + 2598 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2599 ((mp_word) u); 2600 2601 /* get the lower part */ 2602 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2603 2604 /* carry the carry */ 2605 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2606 } 2607 *tmpt = u; 2608 } 2609 mp_clamp (&t); 2610 mp_exch (&t, c); 2611 mp_clear (&t); 2612 return MP_OKAY; 2613 } 2614 2615 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */ 2616 /* Revision: 1.3 $ */ 2617 /* Date: 2011/03/18 16:43:04 $ */ 2618 2619 /* reduces x mod m, assumes 0 < x < m**2, mu is 2620 * precomputed via mp_reduce_setup. 2621 * From HAC pp.604 Algorithm 14.42 2622 */ 2623 static int 2624 mp_reduce (mp_int * x, mp_int * m, mp_int * mu) 2625 { 2626 mp_int q; 2627 int res, um = m->used; 2628 2629 /* q = x */ 2630 if ((res = mp_init_copy (&q, x)) != MP_OKAY) { 2631 return res; 2632 } 2633 2634 /* q1 = x / b**(k-1) */ 2635 mp_rshd (&q, um - 1); 2636 2637 /* according to HAC this optimization is ok */ 2638 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { 2639 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { 2640 goto CLEANUP; 2641 } 2642 } else { 2643 if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { 2644 goto CLEANUP; 2645 } 2646 } 2647 2648 /* q3 = q2 / b**(k+1) */ 2649 mp_rshd (&q, um + 1); 2650 2651 /* x = x mod b**(k+1), quick (no division) */ 2652 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { 2653 goto CLEANUP; 2654 } 2655 2656 /* q = q * m mod b**(k+1), quick (no division) */ 2657 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { 2658 goto CLEANUP; 2659 } 2660 2661 /* x = x - q */ 2662 if ((res = mp_sub (x, &q, x)) != MP_OKAY) { 2663 goto CLEANUP; 2664 } 2665 2666 /* If x < 0, add b**(k+1) to it */ 2667 if (mp_cmp_d (x, 0) == MP_LT) { 2668 mp_set (&q, 1); 2669 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) 2670 goto CLEANUP; 2671 if ((res = mp_add (x, &q, x)) != MP_OKAY) 2672 goto CLEANUP; 2673 } 2674 2675 /* Back off if it's too big */ 2676 while (mp_cmp (x, m) != MP_LT) { 2677 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { 2678 goto CLEANUP; 2679 } 2680 } 2681 2682 CLEANUP: 2683 mp_clear (&q); 2684 2685 return res; 2686 } 2687 2688 /* determines the setup value */ 2689 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d) 2690 { 2691 int res; 2692 mp_int tmp; 2693 2694 if ((res = mp_init(&tmp)) != MP_OKAY) { 2695 return res; 2696 } 2697 2698 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { 2699 goto ERR; 2700 } 2701 2702 if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) { 2703 goto ERR; 2704 } 2705 2706 ERR: 2707 mp_clear(&tmp); 2708 return res; 2709 } 2710 2711 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */ 2712 /* Revision: 1.1.1.1 $ */ 2713 /* Date: 2011/03/12 22:58:18 $ */ 2714 2715 /* reduces a modulo n where n is of the form 2**p - d 2716 This differs from reduce_2k since "d" can be larger 2717 than a single digit. 2718 */ 2719 static int 2720 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) 2721 { 2722 mp_int q; 2723 int p, res; 2724 2725 if ((res = mp_init(&q)) != MP_OKAY) { 2726 return res; 2727 } 2728 2729 p = mp_count_bits(n); 2730 top: 2731 /* q = a/2**p, a = a mod 2**p */ 2732 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { 2733 goto ERR; 2734 } 2735 2736 /* q = q * d */ 2737 if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 2738 goto ERR; 2739 } 2740 2741 /* a = a + q */ 2742 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { 2743 goto ERR; 2744 } 2745 2746 if (mp_cmp_mag(a, n) != MP_LT) { 2747 s_mp_sub(a, n, a); 2748 goto top; 2749 } 2750 2751 ERR: 2752 mp_clear(&q); 2753 return res; 2754 } 2755 2756 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */ 2757 /* Revision: 1.1.1.1 $ */ 2758 /* Date: 2011/03/12 22:58:18 $ */ 2759 2760 /* squaring using Toom-Cook 3-way algorithm */ 2761 static int 2762 mp_toom_sqr(mp_int *a, mp_int *b) 2763 { 2764 mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; 2765 int res, B; 2766 2767 /* init temps */ 2768 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { 2769 return res; 2770 } 2771 2772 /* B */ 2773 B = a->used / 3; 2774 2775 /* a = a2 * B**2 + a1 * B + a0 */ 2776 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 2777 goto ERR; 2778 } 2779 2780 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 2781 goto ERR; 2782 } 2783 mp_rshd(&a1, B); 2784 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); 2785 2786 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 2787 goto ERR; 2788 } 2789 mp_rshd(&a2, B*2); 2790 2791 /* w0 = a0*a0 */ 2792 if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) { 2793 goto ERR; 2794 } 2795 2796 /* w4 = a2 * a2 */ 2797 if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) { 2798 goto ERR; 2799 } 2800 2801 /* w1 = (a2 + 2(a1 + 2a0))**2 */ 2802 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { 2803 goto ERR; 2804 } 2805 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2806 goto ERR; 2807 } 2808 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 2809 goto ERR; 2810 } 2811 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 2812 goto ERR; 2813 } 2814 2815 if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) { 2816 goto ERR; 2817 } 2818 2819 /* w3 = (a0 + 2(a1 + 2a2))**2 */ 2820 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { 2821 goto ERR; 2822 } 2823 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2824 goto ERR; 2825 } 2826 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 2827 goto ERR; 2828 } 2829 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2830 goto ERR; 2831 } 2832 2833 if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) { 2834 goto ERR; 2835 } 2836 2837 2838 /* w2 = (a2 + a1 + a0)**2 */ 2839 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { 2840 goto ERR; 2841 } 2842 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2843 goto ERR; 2844 } 2845 if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) { 2846 goto ERR; 2847 } 2848 2849 /* now solve the matrix 2850 2851 0 0 0 0 1 2852 1 2 4 8 16 2853 1 1 1 1 1 2854 16 8 4 2 1 2855 1 0 0 0 0 2856 2857 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. 2858 */ 2859 2860 /* r1 - r4 */ 2861 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { 2862 goto ERR; 2863 } 2864 /* r3 - r0 */ 2865 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { 2866 goto ERR; 2867 } 2868 /* r1/2 */ 2869 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { 2870 goto ERR; 2871 } 2872 /* r3/2 */ 2873 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { 2874 goto ERR; 2875 } 2876 /* r2 - r0 - r4 */ 2877 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { 2878 goto ERR; 2879 } 2880 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { 2881 goto ERR; 2882 } 2883 /* r1 - r2 */ 2884 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2885 goto ERR; 2886 } 2887 /* r3 - r2 */ 2888 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2889 goto ERR; 2890 } 2891 /* r1 - 8r0 */ 2892 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { 2893 goto ERR; 2894 } 2895 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { 2896 goto ERR; 2897 } 2898 /* r3 - 8r4 */ 2899 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { 2900 goto ERR; 2901 } 2902 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { 2903 goto ERR; 2904 } 2905 /* 3r2 - r1 - r3 */ 2906 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { 2907 goto ERR; 2908 } 2909 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { 2910 goto ERR; 2911 } 2912 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { 2913 goto ERR; 2914 } 2915 /* r1 - r2 */ 2916 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 2917 goto ERR; 2918 } 2919 /* r3 - r2 */ 2920 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 2921 goto ERR; 2922 } 2923 /* r1/3 */ 2924 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { 2925 goto ERR; 2926 } 2927 /* r3/3 */ 2928 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { 2929 goto ERR; 2930 } 2931 2932 /* at this point shift W[n] by B*n */ 2933 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { 2934 goto ERR; 2935 } 2936 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { 2937 goto ERR; 2938 } 2939 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { 2940 goto ERR; 2941 } 2942 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { 2943 goto ERR; 2944 } 2945 2946 if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) { 2947 goto ERR; 2948 } 2949 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2950 goto ERR; 2951 } 2952 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2953 goto ERR; 2954 } 2955 if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) { 2956 goto ERR; 2957 } 2958 2959 ERR: 2960 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); 2961 return res; 2962 } 2963 2964 2965 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */ 2966 /* Revision: 1.1.1.1 $ */ 2967 /* Date: 2011/03/12 22:58:18 $ */ 2968 2969 /* Karatsuba squaring, computes b = a*a using three 2970 * half size squarings 2971 * 2972 * See comments of karatsuba_mul for details. It 2973 * is essentially the same algorithm but merely 2974 * tuned to perform recursive squarings. 2975 */ 2976 static int mp_karatsuba_sqr (mp_int * a, mp_int * b) 2977 { 2978 mp_int x0, x1, t1, t2, x0x0, x1x1; 2979 int B, err; 2980 2981 err = MP_MEM; 2982 2983 /* min # of digits */ 2984 B = a->used; 2985 2986 /* now divide in two */ 2987 B = (unsigned)B >> 1; 2988 2989 /* init copy all the temps */ 2990 if (mp_init_size (&x0, B) != MP_OKAY) 2991 goto ERR; 2992 if (mp_init_size (&x1, a->used - B) != MP_OKAY) 2993 goto X0; 2994 2995 /* init temps */ 2996 if (mp_init_size (&t1, a->used * 2) != MP_OKAY) 2997 goto X1; 2998 if (mp_init_size (&t2, a->used * 2) != MP_OKAY) 2999 goto T1; 3000 if (mp_init_size (&x0x0, B * 2) != MP_OKAY) 3001 goto T2; 3002 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY) 3003 goto X0X0; 3004 3005 { 3006 int x; 3007 mp_digit *dst, *src; 3008 3009 src = a->dp; 3010 3011 /* now shift the digits */ 3012 dst = x0.dp; 3013 for (x = 0; x < B; x++) { 3014 *dst++ = *src++; 3015 } 3016 3017 dst = x1.dp; 3018 for (x = B; x < a->used; x++) { 3019 *dst++ = *src++; 3020 } 3021 } 3022 3023 x0.used = B; 3024 x1.used = a->used - B; 3025 3026 mp_clamp (&x0); 3027 3028 /* now calc the products x0*x0 and x1*x1 */ 3029 if (mp_sqr (&x0, &x0x0) != MP_OKAY) 3030 goto X1X1; /* x0x0 = x0*x0 */ 3031 if (mp_sqr (&x1, &x1x1) != MP_OKAY) 3032 goto X1X1; /* x1x1 = x1*x1 */ 3033 3034 /* now calc (x1+x0)**2 */ 3035 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) 3036 goto X1X1; /* t1 = x1 - x0 */ 3037 if (mp_sqr (&t1, &t1) != MP_OKAY) 3038 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ 3039 3040 /* add x0y0 */ 3041 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) 3042 goto X1X1; /* t2 = x0x0 + x1x1 */ 3043 if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY) 3044 goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ 3045 3046 /* shift by B */ 3047 if (mp_lshd (&t1, B) != MP_OKAY) 3048 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ 3049 if (mp_lshd (&x1x1, B * 2) != MP_OKAY) 3050 goto X1X1; /* x1x1 = x1x1 << 2*B */ 3051 3052 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY) 3053 goto X1X1; /* t1 = x0x0 + t1 */ 3054 if (mp_add (&t1, &x1x1, b) != MP_OKAY) 3055 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ 3056 3057 err = MP_OKAY; 3058 3059 X1X1:mp_clear (&x1x1); 3060 X0X0:mp_clear (&x0x0); 3061 T2:mp_clear (&t2); 3062 T1:mp_clear (&t1); 3063 X1:mp_clear (&x1); 3064 X0:mp_clear (&x0); 3065 ERR: 3066 return err; 3067 } 3068 3069 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */ 3070 /* Revision: 1.2 $ */ 3071 /* Date: 2011/03/12 23:43:54 $ */ 3072 3073 /* the jist of squaring... 3074 * you do like mult except the offset of the tmpx [one that 3075 * starts closer to zero] can't equal the offset of tmpy. 3076 * So basically you set up iy like before then you min it with 3077 * (ty-tx) so that it never happens. You double all those 3078 * you add in the inner loop 3079 3080 After that loop you do the squares and add them in. 3081 */ 3082 3083 static int fast_s_mp_sqr (mp_int * a, mp_int * b) 3084 { 3085 int olduse, res, pa, ix, iz; 3086 mp_digit W[MP_WARRAY], *tmpx; 3087 mp_word W1; 3088 3089 /* grow the destination as required */ 3090 pa = a->used + a->used; 3091 if (b->alloc < pa) { 3092 if ((res = mp_grow (b, pa)) != MP_OKAY) { 3093 return res; 3094 } 3095 } 3096 3097 /* number of output digits to produce */ 3098 W1 = 0; 3099 for (ix = 0; ix < pa; ix++) { 3100 int tx, ty, iy; 3101 mp_word _W; 3102 mp_digit *tmpy; 3103 3104 /* clear counter */ 3105 _W = 0; 3106 3107 /* get offsets into the two bignums */ 3108 ty = MIN(a->used-1, ix); 3109 tx = ix - ty; 3110 3111 /* setup temp aliases */ 3112 tmpx = a->dp + tx; 3113 tmpy = a->dp + ty; 3114 3115 /* this is the number of times the loop will iterrate, essentially 3116 while (tx++ < a->used && ty-- >= 0) { ... } 3117 */ 3118 iy = MIN(a->used-tx, ty+1); 3119 3120 /* now for squaring tx can never equal ty 3121 * we halve the distance since they approach at a rate of 2x 3122 * and we have to round because odd cases need to be executed 3123 */ 3124 iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1)); 3125 3126 /* execute loop */ 3127 for (iz = 0; iz < iy; iz++) { 3128 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 3129 } 3130 3131 /* double the inner product and add carry */ 3132 _W = _W + _W + W1; 3133 3134 /* even columns have the square term in them */ 3135 if ((ix&1) == 0) { 3136 _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]); 3137 } 3138 3139 /* store it */ 3140 W[ix] = (mp_digit)(_W & MP_MASK); 3141 3142 /* make next carry */ 3143 W1 = _W >> ((mp_word)DIGIT_BIT); 3144 } 3145 3146 /* setup dest */ 3147 olduse = b->used; 3148 b->used = a->used+a->used; 3149 3150 { 3151 mp_digit *tmpb; 3152 tmpb = b->dp; 3153 for (ix = 0; ix < pa; ix++) { 3154 *tmpb++ = W[ix] & MP_MASK; 3155 } 3156 3157 /* clear unused digits [that existed in the old copy of c] */ 3158 for (; ix < olduse; ix++) { 3159 *tmpb++ = 0; 3160 } 3161 } 3162 mp_clamp (b); 3163 return MP_OKAY; 3164 } 3165 3166 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */ 3167 /* Revision: 1.3 $ */ 3168 /* Date: 2011/03/18 16:43:04 $ */ 3169 3170 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ 3171 static int 3172 s_mp_sqr (mp_int * a, mp_int * b) 3173 { 3174 mp_int t; 3175 int res, ix, iy, pa; 3176 mp_word r; 3177 mp_digit u, tmpx, *tmpt; 3178 3179 pa = a->used; 3180 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { 3181 return res; 3182 } 3183 3184 /* default used is maximum possible size */ 3185 t.used = 2*pa + 1; 3186 3187 for (ix = 0; ix < pa; ix++) { 3188 /* first calculate the digit at 2*ix */ 3189 /* calculate double precision result */ 3190 r = ((mp_word) t.dp[2*ix]) + 3191 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); 3192 3193 /* store lower part in result */ 3194 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); 3195 3196 /* get the carry */ 3197 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3198 3199 /* left hand side of A[ix] * A[iy] */ 3200 tmpx = a->dp[ix]; 3201 3202 /* alias for where to store the results */ 3203 tmpt = t.dp + (2*ix + 1); 3204 3205 for (iy = ix + 1; iy < pa; iy++) { 3206 /* first calculate the product */ 3207 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); 3208 3209 /* now calculate the double precision result, note we use 3210 * addition instead of *2 since it's easier to optimize 3211 */ 3212 r = ((mp_word) *tmpt) + r + r + ((mp_word) u); 3213 3214 /* store lower part */ 3215 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3216 3217 /* get carry */ 3218 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3219 } 3220 /* propagate upwards */ 3221 while (u != ((mp_digit) 0)) { 3222 r = ((mp_word) *tmpt) + ((mp_word) u); 3223 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3224 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3225 } 3226 } 3227 3228 mp_clamp (&t); 3229 mp_exch (&t, b); 3230 mp_clear (&t); 3231 return MP_OKAY; 3232 } 3233 3234 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */ 3235 /* Revision: 1.1.1.1 $ */ 3236 /* Date: 2011/03/12 22:58:18 $ */ 3237 3238 #define TOOM_SQR_CUTOFF 400 3239 #define KARATSUBA_SQR_CUTOFF 120 3240 3241 /* computes b = a*a */ 3242 static int 3243 mp_sqr (mp_int * a, mp_int * b) 3244 { 3245 int res; 3246 3247 /* use Toom-Cook? */ 3248 if (a->used >= TOOM_SQR_CUTOFF) { 3249 res = mp_toom_sqr(a, b); 3250 /* Karatsuba? */ 3251 } else 3252 if (a->used >= KARATSUBA_SQR_CUTOFF) { 3253 res = mp_karatsuba_sqr (a, b); 3254 } else 3255 { 3256 /* can we use the fast comba multiplier? */ 3257 if (((unsigned)a->used * 2 + 1) < MP_WARRAY && 3258 a->used < 3259 (1 << (unsigned)(sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) { 3260 res = fast_s_mp_sqr (a, b); 3261 } else 3262 res = s_mp_sqr (a, b); 3263 } 3264 b->sign = MP_ZPOS; 3265 return res; 3266 } 3267 3268 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */ 3269 /* Revision: 1.3 $ */ 3270 /* Date: 2011/03/18 16:43:04 $ */ 3271 3272 #define TAB_SIZE 256 3273 3274 static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 3275 { 3276 mp_int M[TAB_SIZE], res, mu; 3277 mp_digit buf; 3278 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 3279 int (*redux)(mp_int*,mp_int*,mp_int*); 3280 3281 /* find window size */ 3282 x = mp_count_bits (X); 3283 if (x <= 7) { 3284 winsize = 2; 3285 } else if (x <= 36) { 3286 winsize = 3; 3287 } else if (x <= 140) { 3288 winsize = 4; 3289 } else if (x <= 450) { 3290 winsize = 5; 3291 } else if (x <= 1303) { 3292 winsize = 6; 3293 } else if (x <= 3529) { 3294 winsize = 7; 3295 } else { 3296 winsize = 8; 3297 } 3298 3299 /* init M array */ 3300 /* init first cell */ 3301 if ((err = mp_init(&M[1])) != MP_OKAY) { 3302 return err; 3303 } 3304 3305 /* now init the second half of the array */ 3306 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3307 if ((err = mp_init(&M[x])) != MP_OKAY) { 3308 for (y = 1<<(winsize-1); y < x; y++) { 3309 mp_clear (&M[y]); 3310 } 3311 mp_clear(&M[1]); 3312 return err; 3313 } 3314 } 3315 3316 /* create mu, used for Barrett reduction */ 3317 if ((err = mp_init (&mu)) != MP_OKAY) { 3318 goto LBL_M; 3319 } 3320 3321 if (redmode == 0) { 3322 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) { 3323 goto LBL_MU; 3324 } 3325 redux = mp_reduce; 3326 } else { 3327 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) { 3328 goto LBL_MU; 3329 } 3330 redux = mp_reduce_2k_l; 3331 } 3332 3333 /* create M table 3334 * 3335 * The M table contains powers of the base, 3336 * e.g. M[x] = G**x mod P 3337 * 3338 * The first half of the table is not 3339 * computed though accept for M[0] and M[1] 3340 */ 3341 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) { 3342 goto LBL_MU; 3343 } 3344 3345 /* compute the value at M[1<<(winsize-1)] by squaring 3346 * M[1] (winsize-1) times 3347 */ 3348 if ((err = mp_copy ( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 3349 goto LBL_MU; 3350 } 3351 3352 for (x = 0; x < (winsize - 1); x++) { 3353 /* square it */ 3354 if ((err = mp_sqr (&M[1 << (winsize - 1)], 3355 &M[1 << (winsize - 1)])) != MP_OKAY) { 3356 goto LBL_MU; 3357 } 3358 3359 /* reduce modulo P */ 3360 if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { 3361 goto LBL_MU; 3362 } 3363 } 3364 3365 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) 3366 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) 3367 */ 3368 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 3369 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 3370 goto LBL_MU; 3371 } 3372 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) { 3373 goto LBL_MU; 3374 } 3375 } 3376 3377 /* setup result */ 3378 if ((err = mp_init (&res)) != MP_OKAY) { 3379 goto LBL_MU; 3380 } 3381 mp_set (&res, 1); 3382 3383 /* set initial mode and bit cnt */ 3384 mode = 0; 3385 bitcnt = 1; 3386 buf = 0; 3387 digidx = X->used - 1; 3388 bitcpy = 0; 3389 bitbuf = 0; 3390 3391 for (;;) { 3392 /* grab next digit as required */ 3393 if (--bitcnt == 0) { 3394 /* if digidx == -1 we are out of digits */ 3395 if (digidx == -1) { 3396 break; 3397 } 3398 /* read next digit and reset the bitcnt */ 3399 buf = X->dp[digidx--]; 3400 bitcnt = (int) DIGIT_BIT; 3401 } 3402 3403 /* grab the next msb from the exponent */ 3404 y = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; 3405 buf <<= (mp_digit)1; 3406 3407 /* if the bit is zero and mode == 0 then we ignore it 3408 * These represent the leading zero bits before the first 1 bit 3409 * in the exponent. Technically this opt is not required but it 3410 * does lower the # of trivial squaring/reductions used 3411 */ 3412 if (mode == 0 && y == 0) { 3413 continue; 3414 } 3415 3416 /* if the bit is zero and mode == 1 then we square */ 3417 if (mode == 1 && y == 0) { 3418 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3419 goto LBL_RES; 3420 } 3421 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3422 goto LBL_RES; 3423 } 3424 continue; 3425 } 3426 3427 /* else we add it to the window */ 3428 bitbuf |= (y << (winsize - ++bitcpy)); 3429 mode = 2; 3430 3431 if (bitcpy == winsize) { 3432 /* ok window is filled so square as required and multiply */ 3433 /* square first */ 3434 for (x = 0; x < winsize; x++) { 3435 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3436 goto LBL_RES; 3437 } 3438 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3439 goto LBL_RES; 3440 } 3441 } 3442 3443 /* then multiply */ 3444 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 3445 goto LBL_RES; 3446 } 3447 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3448 goto LBL_RES; 3449 } 3450 3451 /* empty window and reset */ 3452 bitcpy = 0; 3453 bitbuf = 0; 3454 mode = 1; 3455 } 3456 } 3457 3458 /* if bits remain then square/multiply */ 3459 if (mode == 2 && bitcpy > 0) { 3460 /* square then multiply if the bit is set */ 3461 for (x = 0; x < bitcpy; x++) { 3462 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3463 goto LBL_RES; 3464 } 3465 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3466 goto LBL_RES; 3467 } 3468 3469 bitbuf <<= 1; 3470 if ((bitbuf & (1 << winsize)) != 0) { 3471 /* then multiply */ 3472 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 3473 goto LBL_RES; 3474 } 3475 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 3476 goto LBL_RES; 3477 } 3478 } 3479 } 3480 } 3481 3482 mp_exch (&res, Y); 3483 err = MP_OKAY; 3484 LBL_RES:mp_clear (&res); 3485 LBL_MU:mp_clear (&mu); 3486 LBL_M: 3487 mp_clear(&M[1]); 3488 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3489 mp_clear (&M[x]); 3490 } 3491 return err; 3492 } 3493 3494 /* determines if a number is a valid DR modulus */ 3495 static int 3496 mp_dr_is_modulus(mp_int *a) 3497 { 3498 int ix; 3499 3500 /* must be at least two digits */ 3501 if (a->used < 2) { 3502 return 0; 3503 } 3504 3505 /* must be of the form b**k - a [a <= b] so all 3506 * but the first digit must be equal to -1 (mod b). 3507 */ 3508 for (ix = 1; ix < a->used; ix++) { 3509 if (a->dp[ix] != MP_MASK) { 3510 return 0; 3511 } 3512 } 3513 return 1; 3514 } 3515 3516 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */ 3517 /* Revision: 1.1.1.1 $ */ 3518 /* Date: 2011/03/12 22:58:18 $ */ 3519 3520 /* determines if mp_reduce_2k can be used */ 3521 static int mp_reduce_is_2k(mp_int *a) 3522 { 3523 int ix, iy, iw; 3524 mp_digit iz; 3525 3526 if (a->used == 0) { 3527 return MP_NO; 3528 } else if (a->used == 1) { 3529 return MP_YES; 3530 } else if (a->used > 1) { 3531 iy = mp_count_bits(a); 3532 iz = 1; 3533 iw = 1; 3534 3535 /* Test every bit from the second digit up, must be 1 */ 3536 for (ix = DIGIT_BIT; ix < iy; ix++) { 3537 if ((a->dp[iw] & iz) == 0) { 3538 return MP_NO; 3539 } 3540 iz <<= 1; 3541 if (iz > (mp_digit)MP_MASK) { 3542 ++iw; 3543 iz = 1; 3544 } 3545 } 3546 } 3547 return MP_YES; 3548 } 3549 3550 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */ 3551 /* Revision: 1.1.1.1 $ */ 3552 /* Date: 2011/03/12 22:58:18 $ */ 3553 3554 3555 /* d = a * b (mod c) */ 3556 static int 3557 mp_mulmod (mp_int *d, mp_int * a, mp_int * b, mp_int * c) 3558 { 3559 int res; 3560 mp_int t; 3561 3562 if ((res = mp_init (&t)) != MP_OKAY) { 3563 return res; 3564 } 3565 3566 if ((res = mp_mul (a, b, &t)) != MP_OKAY) { 3567 mp_clear (&t); 3568 return res; 3569 } 3570 res = mp_mod (&t, c, d); 3571 mp_clear (&t); 3572 return res; 3573 } 3574 3575 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */ 3576 /* Revision: 1.1.1.1 $ */ 3577 /* Date: 2011/03/12 22:58:18 $ */ 3578 3579 /* setups the montgomery reduction stuff */ 3580 static int 3581 mp_montgomery_setup (mp_int * n, mp_digit * rho) 3582 { 3583 mp_digit x, b; 3584 3585 /* fast inversion mod 2**k 3586 * 3587 * Based on the fact that 3588 * 3589 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) 3590 * => 2*X*A - X*X*A*A = 1 3591 * => 2*(1) - (1) = 1 3592 */ 3593 b = n->dp[0]; 3594 3595 if ((b & 1) == 0) { 3596 return MP_VAL; 3597 } 3598 3599 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ 3600 x *= 2 - b * x; /* here x*a==1 mod 2**8 */ 3601 x *= 2 - b * x; /* here x*a==1 mod 2**16 */ 3602 x *= 2 - b * x; /* here x*a==1 mod 2**32 */ 3603 if (/*CONSTCOND*/sizeof(mp_digit) == 8) { 3604 x *= 2 - b * x; /* here x*a==1 mod 2**64 */ 3605 } 3606 3607 /* rho = -1/m mod b */ 3608 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; 3609 3610 return MP_OKAY; 3611 } 3612 3613 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */ 3614 /* Revision: 1.1.1.1 $ */ 3615 /* Date: 2011/03/12 22:58:18 $ */ 3616 3617 /* computes xR**-1 == x (mod N) via Montgomery Reduction 3618 * 3619 * This is an optimized implementation of montgomery_reduce 3620 * which uses the comba method to quickly calculate the columns of the 3621 * reduction. 3622 * 3623 * Based on Algorithm 14.32 on pp.601 of HAC. 3624 */ 3625 static int 3626 fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) 3627 { 3628 int ix, res, olduse; 3629 /*LINTED*/ 3630 mp_word W[MP_WARRAY]; 3631 3632 /* get old used count */ 3633 olduse = x->used; 3634 3635 /* grow a as required */ 3636 if (x->alloc < n->used + 1) { 3637 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { 3638 return res; 3639 } 3640 } 3641 3642 /* first we have to get the digits of the input into 3643 * an array of double precision words W[...] 3644 */ 3645 { 3646 mp_word *_W; 3647 mp_digit *tmpx; 3648 3649 /* alias for the W[] array */ 3650 _W = W; 3651 3652 /* alias for the digits of x*/ 3653 tmpx = x->dp; 3654 3655 /* copy the digits of a into W[0..a->used-1] */ 3656 for (ix = 0; ix < x->used; ix++) { 3657 *_W++ = *tmpx++; 3658 } 3659 3660 /* zero the high words of W[a->used..m->used*2] */ 3661 for (; ix < n->used * 2 + 1; ix++) { 3662 *_W++ = 0; 3663 } 3664 } 3665 3666 /* now we proceed to zero successive digits 3667 * from the least significant upwards 3668 */ 3669 for (ix = 0; ix < n->used; ix++) { 3670 /* mu = ai * m' mod b 3671 * 3672 * We avoid a double precision multiplication (which isn't required) 3673 * by casting the value down to a mp_digit. Note this requires 3674 * that W[ix-1] have the carry cleared (see after the inner loop) 3675 */ 3676 mp_digit mu; 3677 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); 3678 3679 /* a = a + mu * m * b**i 3680 * 3681 * This is computed in place and on the fly. The multiplication 3682 * by b**i is handled by offseting which columns the results 3683 * are added to. 3684 * 3685 * Note the comba method normally doesn't handle carries in the 3686 * inner loop In this case we fix the carry from the previous 3687 * column since the Montgomery reduction requires digits of the 3688 * result (so far) [see above] to work. This is 3689 * handled by fixing up one carry after the inner loop. The 3690 * carry fixups are done in order so after these loops the 3691 * first m->used words of W[] have the carries fixed 3692 */ 3693 { 3694 int iy; 3695 mp_digit *tmpn; 3696 mp_word *_W; 3697 3698 /* alias for the digits of the modulus */ 3699 tmpn = n->dp; 3700 3701 /* Alias for the columns set by an offset of ix */ 3702 _W = W + ix; 3703 3704 /* inner loop */ 3705 for (iy = 0; iy < n->used; iy++) { 3706 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); 3707 } 3708 } 3709 3710 /* now fix carry for next digit, W[ix+1] */ 3711 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); 3712 } 3713 3714 /* now we have to propagate the carries and 3715 * shift the words downward [all those least 3716 * significant digits we zeroed]. 3717 */ 3718 { 3719 mp_digit *tmpx; 3720 mp_word *_W, *_W1; 3721 3722 /* nox fix rest of carries */ 3723 3724 /* alias for current word */ 3725 _W1 = W + ix; 3726 3727 /* alias for next word, where the carry goes */ 3728 _W = W + ++ix; 3729 3730 for (; ix <= n->used * 2 + 1; ix++) { 3731 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); 3732 } 3733 3734 /* copy out, A = A/b**n 3735 * 3736 * The result is A/b**n but instead of converting from an 3737 * array of mp_word to mp_digit than calling mp_rshd 3738 * we just copy them in the right order 3739 */ 3740 3741 /* alias for destination word */ 3742 tmpx = x->dp; 3743 3744 /* alias for shifted double precision result */ 3745 _W = W + n->used; 3746 3747 for (ix = 0; ix < n->used + 1; ix++) { 3748 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); 3749 } 3750 3751 /* zero oldused digits, if the input a was larger than 3752 * m->used+1 we'll have to clear the digits 3753 */ 3754 for (; ix < olduse; ix++) { 3755 *tmpx++ = 0; 3756 } 3757 } 3758 3759 /* set the max used and clamp */ 3760 x->used = n->used + 1; 3761 mp_clamp (x); 3762 3763 /* if A >= m then A = A - m */ 3764 if (mp_cmp_mag (x, n) != MP_LT) { 3765 return s_mp_sub (x, n, x); 3766 } 3767 return MP_OKAY; 3768 } 3769 3770 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */ 3771 /* Revision: 1.2 $ */ 3772 /* Date: 2011/03/18 16:22:09 $ */ 3773 3774 /* computes xR**-1 == x (mod N) via Montgomery Reduction */ 3775 static int 3776 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) 3777 { 3778 int ix, res, digs; 3779 mp_digit mu; 3780 3781 /* can the fast reduction [comba] method be used? 3782 * 3783 * Note that unlike in mul you're safely allowed *less* 3784 * than the available columns [255 per default] since carries 3785 * are fixed up in the inner loop. 3786 */ 3787 digs = n->used * 2 + 1; 3788 if (((unsigned)digs < MP_WARRAY) && 3789 n->used < 3790 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 3791 return fast_mp_montgomery_reduce (x, n, rho); 3792 } 3793 3794 /* grow the input as required */ 3795 if (x->alloc < digs) { 3796 if ((res = mp_grow (x, digs)) != MP_OKAY) { 3797 return res; 3798 } 3799 } 3800 x->used = digs; 3801 3802 for (ix = 0; ix < n->used; ix++) { 3803 /* mu = ai * rho mod b 3804 * 3805 * The value of rho must be precalculated via 3806 * montgomery_setup() such that 3807 * it equals -1/n0 mod b this allows the 3808 * following inner loop to reduce the 3809 * input one digit at a time 3810 */ 3811 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); 3812 3813 /* a = a + mu * m * b**i */ 3814 { 3815 int iy; 3816 mp_digit *tmpn, *tmpx, u; 3817 mp_word r; 3818 3819 /* alias for digits of the modulus */ 3820 tmpn = n->dp; 3821 3822 /* alias for the digits of x [the input] */ 3823 tmpx = x->dp + ix; 3824 3825 /* set the carry to zero */ 3826 u = 0; 3827 3828 /* Multiply and add in place */ 3829 for (iy = 0; iy < n->used; iy++) { 3830 /* compute product and sum */ 3831 r = ((mp_word)mu) * ((mp_word)*tmpn++) + 3832 ((mp_word) u) + ((mp_word) * tmpx); 3833 3834 /* get carry */ 3835 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3836 3837 /* fix digit */ 3838 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); 3839 } 3840 /* At this point the ix'th digit of x should be zero */ 3841 3842 3843 /* propagate carries upwards as required*/ 3844 while (u) { 3845 *tmpx += u; 3846 u = *tmpx >> DIGIT_BIT; 3847 *tmpx++ &= MP_MASK; 3848 } 3849 } 3850 } 3851 3852 /* at this point the n.used'th least 3853 * significant digits of x are all zero 3854 * which means we can shift x to the 3855 * right by n.used digits and the 3856 * residue is unchanged. 3857 */ 3858 3859 /* x = x/b**n.used */ 3860 mp_clamp(x); 3861 mp_rshd (x, n->used); 3862 3863 /* if x >= n then x = x - n */ 3864 if (mp_cmp_mag (x, n) != MP_LT) { 3865 return s_mp_sub (x, n, x); 3866 } 3867 3868 return MP_OKAY; 3869 } 3870 3871 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */ 3872 /* Revision: 1.3 $ */ 3873 /* Date: 2011/03/18 16:43:04 $ */ 3874 3875 /* determines the setup value */ 3876 static void 3877 mp_dr_setup(mp_int *a, mp_digit *d) 3878 { 3879 /* the casts are required if DIGIT_BIT is one less than 3880 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31] 3881 */ 3882 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 3883 ((mp_word)a->dp[0])); 3884 } 3885 3886 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */ 3887 /* Revision: 1.1.1.1 $ */ 3888 /* Date: 2011/03/12 22:58:18 $ */ 3889 3890 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm. 3891 * 3892 * Based on algorithm from the paper 3893 * 3894 * "Generating Efficient Primes for Discrete Log Cryptosystems" 3895 * Chae Hoon Lim, Pil Joong Lee, 3896 * POSTECH Information Research Laboratories 3897 * 3898 * The modulus must be of a special format [see manual] 3899 * 3900 * Has been modified to use algorithm 7.10 from the LTM book instead 3901 * 3902 * Input x must be in the range 0 <= x <= (n-1)**2 3903 */ 3904 static int 3905 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) 3906 { 3907 int err, i, m; 3908 mp_word r; 3909 mp_digit mu, *tmpx1, *tmpx2; 3910 3911 /* m = digits in modulus */ 3912 m = n->used; 3913 3914 /* ensure that "x" has at least 2m digits */ 3915 if (x->alloc < m + m) { 3916 if ((err = mp_grow (x, m + m)) != MP_OKAY) { 3917 return err; 3918 } 3919 } 3920 3921 /* top of loop, this is where the code resumes if 3922 * another reduction pass is required. 3923 */ 3924 top: 3925 /* aliases for digits */ 3926 /* alias for lower half of x */ 3927 tmpx1 = x->dp; 3928 3929 /* alias for upper half of x, or x/B**m */ 3930 tmpx2 = x->dp + m; 3931 3932 /* set carry to zero */ 3933 mu = 0; 3934 3935 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ 3936 for (i = 0; i < m; i++) { 3937 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; 3938 *tmpx1++ = (mp_digit)(r & MP_MASK); 3939 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); 3940 } 3941 3942 /* set final carry */ 3943 *tmpx1++ = mu; 3944 3945 /* zero words above m */ 3946 for (i = m + 1; i < x->used; i++) { 3947 *tmpx1++ = 0; 3948 } 3949 3950 /* clamp, sub and return */ 3951 mp_clamp (x); 3952 3953 /* if x >= n then subtract and reduce again 3954 * Each successive "recursion" makes the input smaller and smaller. 3955 */ 3956 if (mp_cmp_mag (x, n) != MP_LT) { 3957 s_mp_sub(x, n, x); 3958 goto top; 3959 } 3960 return MP_OKAY; 3961 } 3962 3963 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */ 3964 /* Revision: 1.1.1.1 $ */ 3965 /* Date: 2011/03/12 22:58:18 $ */ 3966 3967 /* determines the setup value */ 3968 static int 3969 mp_reduce_2k_setup(mp_int *a, mp_digit *d) 3970 { 3971 int res, p; 3972 mp_int tmp; 3973 3974 if ((res = mp_init(&tmp)) != MP_OKAY) { 3975 return res; 3976 } 3977 3978 p = mp_count_bits(a); 3979 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { 3980 mp_clear(&tmp); 3981 return res; 3982 } 3983 3984 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) { 3985 mp_clear(&tmp); 3986 return res; 3987 } 3988 3989 *d = tmp.dp[0]; 3990 mp_clear(&tmp); 3991 return MP_OKAY; 3992 } 3993 3994 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */ 3995 /* Revision: 1.1.1.1 $ */ 3996 /* Date: 2011/03/12 22:58:18 $ */ 3997 3998 /* reduces a modulo n where n is of the form 2**p - d */ 3999 static int 4000 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) 4001 { 4002 mp_int q; 4003 int p, res; 4004 4005 if ((res = mp_init(&q)) != MP_OKAY) { 4006 return res; 4007 } 4008 4009 p = mp_count_bits(n); 4010 top: 4011 /* q = a/2**p, a = a mod 2**p */ 4012 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { 4013 goto ERR; 4014 } 4015 4016 if (d != 1) { 4017 /* q = q * d */ 4018 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { 4019 goto ERR; 4020 } 4021 } 4022 4023 /* a = a + q */ 4024 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { 4025 goto ERR; 4026 } 4027 4028 if (mp_cmp_mag(a, n) != MP_LT) { 4029 s_mp_sub(a, n, a); 4030 goto top; 4031 } 4032 4033 ERR: 4034 mp_clear(&q); 4035 return res; 4036 } 4037 4038 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */ 4039 /* Revision: 1.1.1.1 $ */ 4040 /* Date: 2011/03/12 22:58:18 $ */ 4041 4042 /* 4043 * shifts with subtractions when the result is greater than b. 4044 * 4045 * The method is slightly modified to shift B unconditionally upto just under 4046 * the leading bit of b. This saves alot of multiple precision shifting. 4047 */ 4048 static int 4049 mp_montgomery_calc_normalization (mp_int * a, mp_int * b) 4050 { 4051 int x, bits, res; 4052 4053 /* how many bits of last digit does b use */ 4054 bits = mp_count_bits (b) % DIGIT_BIT; 4055 4056 if (b->used > 1) { 4057 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { 4058 return res; 4059 } 4060 } else { 4061 mp_set(a, 1); 4062 bits = 1; 4063 } 4064 4065 4066 /* now compute C = A * B mod b */ 4067 for (x = bits - 1; x < (int)DIGIT_BIT; x++) { 4068 if ((res = mp_mul_2 (a, a)) != MP_OKAY) { 4069 return res; 4070 } 4071 if (mp_cmp_mag (a, b) != MP_LT) { 4072 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) { 4073 return res; 4074 } 4075 } 4076 } 4077 4078 return MP_OKAY; 4079 } 4080 4081 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */ 4082 /* Revision: 1.1.1.1 $ */ 4083 /* Date: 2011/03/12 22:58:18 $ */ 4084 4085 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 4086 * 4087 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. 4088 * The value of k changes based on the size of the exponent. 4089 * 4090 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] 4091 */ 4092 4093 #define TAB_SIZE 256 4094 4095 static int 4096 mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 4097 { 4098 mp_int M[TAB_SIZE], res; 4099 mp_digit buf, mp; 4100 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 4101 4102 /* use a pointer to the reduction algorithm. This allows us to use 4103 * one of many reduction algorithms without modding the guts of 4104 * the code with if statements everywhere. 4105 */ 4106 int (*redux)(mp_int*,mp_int*,mp_digit); 4107 4108 /* find window size */ 4109 x = mp_count_bits (X); 4110 if (x <= 7) { 4111 winsize = 2; 4112 } else if (x <= 36) { 4113 winsize = 3; 4114 } else if (x <= 140) { 4115 winsize = 4; 4116 } else if (x <= 450) { 4117 winsize = 5; 4118 } else if (x <= 1303) { 4119 winsize = 6; 4120 } else if (x <= 3529) { 4121 winsize = 7; 4122 } else { 4123 winsize = 8; 4124 } 4125 4126 /* init M array */ 4127 /* init first cell */ 4128 if ((err = mp_init(&M[1])) != MP_OKAY) { 4129 return err; 4130 } 4131 4132 /* now init the second half of the array */ 4133 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4134 if ((err = mp_init(&M[x])) != MP_OKAY) { 4135 for (y = 1<<(winsize-1); y < x; y++) { 4136 mp_clear (&M[y]); 4137 } 4138 mp_clear(&M[1]); 4139 return err; 4140 } 4141 } 4142 4143 /* determine and setup reduction code */ 4144 if (redmode == 0) { 4145 /* now setup montgomery */ 4146 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { 4147 goto LBL_M; 4148 } 4149 4150 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ 4151 if (((unsigned)(P->used * 2 + 1) < MP_WARRAY) && 4152 P->used < (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 4153 redux = fast_mp_montgomery_reduce; 4154 } else 4155 { 4156 /* use slower baseline Montgomery method */ 4157 redux = mp_montgomery_reduce; 4158 } 4159 } else if (redmode == 1) { 4160 /* setup DR reduction for moduli of the form B**k - b */ 4161 mp_dr_setup(P, &mp); 4162 redux = mp_dr_reduce; 4163 } else { 4164 /* setup DR reduction for moduli of the form 2**k - b */ 4165 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { 4166 goto LBL_M; 4167 } 4168 redux = mp_reduce_2k; 4169 } 4170 4171 /* setup result */ 4172 if ((err = mp_init (&res)) != MP_OKAY) { 4173 goto LBL_M; 4174 } 4175 4176 /* create M table 4177 * 4178 4179 * 4180 * The first half of the table is not computed though accept for M[0] and M[1] 4181 */ 4182 4183 if (redmode == 0) { 4184 /* now we need R mod m */ 4185 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { 4186 goto LBL_RES; 4187 } 4188 4189 /* now set M[1] to G * R mod m */ 4190 if ((err = mp_mulmod (&M[1], G, &res, P)) != MP_OKAY) { 4191 goto LBL_RES; 4192 } 4193 } else { 4194 mp_set(&res, 1); 4195 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { 4196 goto LBL_RES; 4197 } 4198 } 4199 4200 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ 4201 if ((err = mp_copy ( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 4202 goto LBL_RES; 4203 } 4204 4205 for (x = 0; x < (winsize - 1); x++) { 4206 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { 4207 goto LBL_RES; 4208 } 4209 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { 4210 goto LBL_RES; 4211 } 4212 } 4213 4214 /* create upper table */ 4215 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 4216 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 4217 goto LBL_RES; 4218 } 4219 if ((err = redux (&M[x], P, mp)) != MP_OKAY) { 4220 goto LBL_RES; 4221 } 4222 } 4223 4224 /* set initial mode and bit cnt */ 4225 mode = 0; 4226 bitcnt = 1; 4227 buf = 0; 4228 digidx = X->used - 1; 4229 bitcpy = 0; 4230 bitbuf = 0; 4231 4232 for (;;) { 4233 /* grab next digit as required */ 4234 if (--bitcnt == 0) { 4235 /* if digidx == -1 we are out of digits so break */ 4236 if (digidx == -1) { 4237 break; 4238 } 4239 /* read next digit and reset bitcnt */ 4240 buf = X->dp[digidx--]; 4241 bitcnt = (int)DIGIT_BIT; 4242 } 4243 4244 /* grab the next msb from the exponent */ 4245 y = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1; 4246 buf <<= (mp_digit)1; 4247 4248 /* if the bit is zero and mode == 0 then we ignore it 4249 * These represent the leading zero bits before the first 1 bit 4250 * in the exponent. Technically this opt is not required but it 4251 * does lower the # of trivial squaring/reductions used 4252 */ 4253 if (mode == 0 && y == 0) { 4254 continue; 4255 } 4256 4257 /* if the bit is zero and mode == 1 then we square */ 4258 if (mode == 1 && y == 0) { 4259 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 4260 goto LBL_RES; 4261 } 4262 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4263 goto LBL_RES; 4264 } 4265 continue; 4266 } 4267 4268 /* else we add it to the window */ 4269 bitbuf |= (y << (winsize - ++bitcpy)); 4270 mode = 2; 4271 4272 if (bitcpy == winsize) { 4273 /* ok window is filled so square as required and multiply */ 4274 /* square first */ 4275 for (x = 0; x < winsize; x++) { 4276 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 4277 goto LBL_RES; 4278 } 4279 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4280 goto LBL_RES; 4281 } 4282 } 4283 4284 /* then multiply */ 4285 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 4286 goto LBL_RES; 4287 } 4288 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4289 goto LBL_RES; 4290 } 4291 4292 /* empty window and reset */ 4293 bitcpy = 0; 4294 bitbuf = 0; 4295 mode = 1; 4296 } 4297 } 4298 4299 /* if bits remain then square/multiply */ 4300 if (mode == 2 && bitcpy > 0) { 4301 /* square then multiply if the bit is set */ 4302 for (x = 0; x < bitcpy; x++) { 4303 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 4304 goto LBL_RES; 4305 } 4306 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4307 goto LBL_RES; 4308 } 4309 4310 /* get next bit of the window */ 4311 bitbuf <<= 1; 4312 if ((bitbuf & (1 << winsize)) != 0) { 4313 /* then multiply */ 4314 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 4315 goto LBL_RES; 4316 } 4317 if ((err = redux (&res, P, mp)) != MP_OKAY) { 4318 goto LBL_RES; 4319 } 4320 } 4321 } 4322 } 4323 4324 if (redmode == 0) { 4325 /* fixup result if Montgomery reduction is used 4326 * recall that any value in a Montgomery system is 4327 * actually multiplied by R mod n. So we have 4328 * to reduce one more time to cancel out the factor 4329 * of R. 4330 */ 4331 if ((err = redux(&res, P, mp)) != MP_OKAY) { 4332 goto LBL_RES; 4333 } 4334 } 4335 4336 /* swap res with Y */ 4337 mp_exch (&res, Y); 4338 err = MP_OKAY; 4339 LBL_RES:mp_clear (&res); 4340 LBL_M: 4341 mp_clear(&M[1]); 4342 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4343 mp_clear (&M[x]); 4344 } 4345 return err; 4346 } 4347 4348 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_exptmod_fast.c,v $ */ 4349 /* Revision: 1.4 $ */ 4350 /* Date: 2011/03/18 16:43:04 $ */ 4351 4352 /* this is a shell function that calls either the normal or Montgomery 4353 * exptmod functions. Originally the call to the montgomery code was 4354 * embedded in the normal function but that wasted alot of stack space 4355 * for nothing (since 99% of the time the Montgomery code would be called) 4356 */ 4357 static int 4358 mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int *Y) 4359 { 4360 int dr; 4361 4362 /* modulus P must be positive */ 4363 if (P->sign == MP_NEG) { 4364 return MP_VAL; 4365 } 4366 4367 /* if exponent X is negative we have to recurse */ 4368 if (X->sign == MP_NEG) { 4369 mp_int tmpG, tmpX; 4370 int err; 4371 4372 /* first compute 1/G mod P */ 4373 if ((err = mp_init(&tmpG)) != MP_OKAY) { 4374 return err; 4375 } 4376 if ((err = mp_invmod(&tmpG, G, P)) != MP_OKAY) { 4377 mp_clear(&tmpG); 4378 return err; 4379 } 4380 4381 /* now get |X| */ 4382 if ((err = mp_init(&tmpX)) != MP_OKAY) { 4383 mp_clear(&tmpG); 4384 return err; 4385 } 4386 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) { 4387 mp_clear_multi(&tmpG, &tmpX, NULL); 4388 return err; 4389 } 4390 4391 /* and now compute (1/G)**|X| instead of G**X [X < 0] */ 4392 err = mp_exptmod(&tmpG, &tmpX, P, Y); 4393 mp_clear_multi(&tmpG, &tmpX, NULL); 4394 return err; 4395 } 4396 4397 /* modified diminished radix reduction */ 4398 if (mp_reduce_is_2k_l(P) == MP_YES) { 4399 return s_mp_exptmod(G, X, P, Y, 1); 4400 } 4401 4402 /* is it a DR modulus? */ 4403 dr = mp_dr_is_modulus(P); 4404 4405 /* if not, is it a unrestricted DR modulus? */ 4406 if (dr == 0) { 4407 dr = mp_reduce_is_2k(P) << 1; 4408 } 4409 4410 /* if the modulus is odd or dr != 0 use the montgomery method */ 4411 if (BN_is_odd (P) == 1 || dr != 0) { 4412 return mp_exptmod_fast (G, X, P, Y, dr); 4413 } else { 4414 /* otherwise use the generic Barrett reduction technique */ 4415 return s_mp_exptmod (G, X, P, Y, 0); 4416 } 4417 } 4418 4419 /* reverse an array, used for radix code */ 4420 static void 4421 bn_reverse(unsigned char *s, int len) 4422 { 4423 int ix, iy; 4424 unsigned char t; 4425 4426 ix = 0; 4427 iy = len - 1; 4428 while (ix < iy) { 4429 t = s[ix]; 4430 s[ix] = s[iy]; 4431 s[iy] = t; 4432 ++ix; 4433 --iy; 4434 } 4435 } 4436 4437 static int 4438 s_is_power_of_two(mp_digit b, int *p) 4439 { 4440 int x; 4441 4442 /* fast return if no power of two */ 4443 if ((b==0) || (b & (b-1))) { 4444 return 0; 4445 } 4446 4447 for (x = 0; x < DIGIT_BIT; x++) { 4448 if (b == (((mp_digit)1)<<x)) { 4449 *p = x; 4450 return 1; 4451 } 4452 } 4453 return 0; 4454 } 4455 4456 /* single digit division (based on routine from MPI) */ 4457 static int 4458 mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d) 4459 { 4460 mp_int q; 4461 mp_word w; 4462 mp_digit t; 4463 int res, ix; 4464 4465 /* cannot divide by zero */ 4466 if (b == 0) { 4467 return MP_VAL; 4468 } 4469 4470 /* quick outs */ 4471 if (b == 1 || mp_iszero(a) == 1) { 4472 if (d != NULL) { 4473 *d = 0; 4474 } 4475 if (c != NULL) { 4476 return mp_copy(a, c); 4477 } 4478 return MP_OKAY; 4479 } 4480 4481 /* power of two ? */ 4482 if (s_is_power_of_two(b, &ix) == 1) { 4483 if (d != NULL) { 4484 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1); 4485 } 4486 if (c != NULL) { 4487 return mp_div_2d(a, ix, c, NULL); 4488 } 4489 return MP_OKAY; 4490 } 4491 4492 #ifdef BN_MP_DIV_3_C 4493 /* three? */ 4494 if (b == 3) { 4495 return mp_div_3(a, c, d); 4496 } 4497 #endif 4498 4499 /* no easy answer [c'est la vie]. Just division */ 4500 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 4501 return res; 4502 } 4503 4504 q.used = a->used; 4505 q.sign = a->sign; 4506 w = 0; 4507 for (ix = a->used - 1; ix >= 0; ix--) { 4508 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 4509 4510 if (w >= b) { 4511 t = (mp_digit)(w / b); 4512 w -= ((mp_word)t) * ((mp_word)b); 4513 } else { 4514 t = 0; 4515 } 4516 q.dp[ix] = (mp_digit)t; 4517 } 4518 4519 if (d != NULL) { 4520 *d = (mp_digit)w; 4521 } 4522 4523 if (c != NULL) { 4524 mp_clamp(&q); 4525 mp_exch(&q, c); 4526 } 4527 mp_clear(&q); 4528 4529 return res; 4530 } 4531 4532 static int 4533 mp_mod_d(mp_int *a, mp_digit b, mp_digit *c) 4534 { 4535 return mp_div_d(a, b, NULL, c); 4536 } 4537 4538 static const mp_digit ltm_prime_tab[] = { 4539 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 4540 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, 4541 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, 4542 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 4543 #ifndef MP_8BIT 4544 0x0083, 4545 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, 4546 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, 4547 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, 4548 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, 4549 4550 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, 4551 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, 4552 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, 4553 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, 4554 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, 4555 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, 4556 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, 4557 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, 4558 4559 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, 4560 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, 4561 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, 4562 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, 4563 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, 4564 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, 4565 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, 4566 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, 4567 4568 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, 4569 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, 4570 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, 4571 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, 4572 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, 4573 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, 4574 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, 4575 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 4576 #endif 4577 }; 4578 4579 #define PRIME_SIZE __arraycount(ltm_prime_tab) 4580 4581 static int 4582 mp_prime_is_divisible(mp_int *a, int *result) 4583 { 4584 int err, ix; 4585 mp_digit res; 4586 4587 /* default to not */ 4588 *result = MP_NO; 4589 4590 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4591 /* what is a mod LBL_prime_tab[ix] */ 4592 if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) { 4593 return err; 4594 } 4595 4596 /* is the residue zero? */ 4597 if (res == 0) { 4598 *result = MP_YES; 4599 return MP_OKAY; 4600 } 4601 } 4602 4603 return MP_OKAY; 4604 } 4605 4606 /* single digit addition */ 4607 static int 4608 mp_add_d(mp_int *a, mp_digit b, mp_int *c) 4609 { 4610 int res, ix, oldused; 4611 mp_digit *tmpa, *tmpc, mu; 4612 4613 /* grow c as required */ 4614 if (c->alloc < a->used + 1) { 4615 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4616 return res; 4617 } 4618 } 4619 4620 /* if a is negative and |a| >= b, call c = |a| - b */ 4621 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { 4622 /* temporarily fix sign of a */ 4623 a->sign = MP_ZPOS; 4624 4625 /* c = |a| - b */ 4626 res = mp_sub_d(a, b, c); 4627 4628 /* fix sign */ 4629 a->sign = c->sign = MP_NEG; 4630 4631 /* clamp */ 4632 mp_clamp(c); 4633 4634 return res; 4635 } 4636 4637 /* old number of used digits in c */ 4638 oldused = c->used; 4639 4640 /* sign always positive */ 4641 c->sign = MP_ZPOS; 4642 4643 /* source alias */ 4644 tmpa = a->dp; 4645 4646 /* destination alias */ 4647 tmpc = c->dp; 4648 4649 /* if a is positive */ 4650 if (a->sign == MP_ZPOS) { 4651 /* add digit, after this we're propagating 4652 * the carry. 4653 */ 4654 *tmpc = *tmpa++ + b; 4655 mu = *tmpc >> DIGIT_BIT; 4656 *tmpc++ &= MP_MASK; 4657 4658 /* now handle rest of the digits */ 4659 for (ix = 1; ix < a->used; ix++) { 4660 *tmpc = *tmpa++ + mu; 4661 mu = *tmpc >> DIGIT_BIT; 4662 *tmpc++ &= MP_MASK; 4663 } 4664 /* set final carry */ 4665 ix++; 4666 *tmpc++ = mu; 4667 4668 /* setup size */ 4669 c->used = a->used + 1; 4670 } else { 4671 /* a was negative and |a| < b */ 4672 c->used = 1; 4673 4674 /* the result is a single digit */ 4675 if (a->used == 1) { 4676 *tmpc++ = b - a->dp[0]; 4677 } else { 4678 *tmpc++ = b; 4679 } 4680 4681 /* setup count so the clearing of oldused 4682 * can fall through correctly 4683 */ 4684 ix = 1; 4685 } 4686 4687 /* now zero to oldused */ 4688 while (ix++ < oldused) { 4689 *tmpc++ = 0; 4690 } 4691 mp_clamp(c); 4692 4693 return MP_OKAY; 4694 } 4695 4696 /* single digit subtraction */ 4697 static int 4698 mp_sub_d(mp_int *a, mp_digit b, mp_int *c) 4699 { 4700 mp_digit *tmpa, *tmpc, mu; 4701 int res, ix, oldused; 4702 4703 /* grow c as required */ 4704 if (c->alloc < a->used + 1) { 4705 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4706 return res; 4707 } 4708 } 4709 4710 /* if a is negative just do an unsigned 4711 * addition [with fudged signs] 4712 */ 4713 if (a->sign == MP_NEG) { 4714 a->sign = MP_ZPOS; 4715 res = mp_add_d(a, b, c); 4716 a->sign = c->sign = MP_NEG; 4717 4718 /* clamp */ 4719 mp_clamp(c); 4720 4721 return res; 4722 } 4723 4724 /* setup regs */ 4725 oldused = c->used; 4726 tmpa = a->dp; 4727 tmpc = c->dp; 4728 4729 /* if a <= b simply fix the single digit */ 4730 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { 4731 if (a->used == 1) { 4732 *tmpc++ = b - *tmpa; 4733 } else { 4734 *tmpc++ = b; 4735 } 4736 ix = 1; 4737 4738 /* negative/1digit */ 4739 c->sign = MP_NEG; 4740 c->used = 1; 4741 } else { 4742 /* positive/size */ 4743 c->sign = MP_ZPOS; 4744 c->used = a->used; 4745 4746 /* subtract first digit */ 4747 *tmpc = *tmpa++ - b; 4748 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4749 *tmpc++ &= MP_MASK; 4750 4751 /* handle rest of the digits */ 4752 for (ix = 1; ix < a->used; ix++) { 4753 *tmpc = *tmpa++ - mu; 4754 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4755 *tmpc++ &= MP_MASK; 4756 } 4757 } 4758 4759 /* zero excess digits */ 4760 while (ix++ < oldused) { 4761 *tmpc++ = 0; 4762 } 4763 mp_clamp(c); 4764 return MP_OKAY; 4765 } 4766 4767 static const int lnz[16] = { 4768 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 4769 }; 4770 4771 /* Counts the number of lsbs which are zero before the first zero bit */ 4772 static int 4773 mp_cnt_lsb(mp_int *a) 4774 { 4775 int x; 4776 mp_digit q, qq; 4777 4778 /* easy out */ 4779 if (mp_iszero(a) == 1) { 4780 return 0; 4781 } 4782 4783 /* scan lower digits until non-zero */ 4784 for (x = 0; x < a->used && a->dp[x] == 0; x++); 4785 q = a->dp[x]; 4786 x *= DIGIT_BIT; 4787 4788 /* now scan this digit until a 1 is found */ 4789 if ((q & 1) == 0) { 4790 do { 4791 qq = q & 15; 4792 /* LINTED previous op ensures range of qq */ 4793 x += lnz[qq]; 4794 q >>= 4; 4795 } while (qq == 0); 4796 } 4797 return x; 4798 } 4799 4800 /* c = a * a (mod b) */ 4801 static int 4802 mp_sqrmod(mp_int *a, mp_int *b, mp_int *c) 4803 { 4804 int res; 4805 mp_int t; 4806 4807 if ((res = mp_init (&t)) != MP_OKAY) { 4808 return res; 4809 } 4810 4811 if ((res = mp_sqr (a, &t)) != MP_OKAY) { 4812 mp_clear (&t); 4813 return res; 4814 } 4815 res = mp_mod (&t, b, c); 4816 mp_clear (&t); 4817 return res; 4818 } 4819 static int 4820 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result) 4821 { 4822 mp_int n1, y, r; 4823 int s, j, err; 4824 4825 /* default */ 4826 *result = MP_NO; 4827 4828 /* ensure b > 1 */ 4829 if (mp_cmp_d(b, 1) != MP_GT) { 4830 return MP_VAL; 4831 } 4832 4833 /* get n1 = a - 1 */ 4834 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) { 4835 return err; 4836 } 4837 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) { 4838 goto LBL_N1; 4839 } 4840 4841 /* set 2**s * r = n1 */ 4842 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) { 4843 goto LBL_N1; 4844 } 4845 4846 /* count the number of least significant bits 4847 * which are zero 4848 */ 4849 s = mp_cnt_lsb(&r); 4850 4851 /* now divide n - 1 by 2**s */ 4852 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) { 4853 goto LBL_R; 4854 } 4855 4856 /* compute y = b**r mod a */ 4857 if ((err = mp_init (&y)) != MP_OKAY) { 4858 goto LBL_R; 4859 } 4860 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) { 4861 goto LBL_Y; 4862 } 4863 4864 /* if y != 1 and y != n1 do */ 4865 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) { 4866 j = 1; 4867 /* while j <= s-1 and y != n1 */ 4868 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) { 4869 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) { 4870 goto LBL_Y; 4871 } 4872 4873 /* if y == 1 then composite */ 4874 if (mp_cmp_d (&y, 1) == MP_EQ) { 4875 goto LBL_Y; 4876 } 4877 4878 ++j; 4879 } 4880 4881 /* if y != n1 then composite */ 4882 if (mp_cmp (&y, &n1) != MP_EQ) { 4883 goto LBL_Y; 4884 } 4885 } 4886 4887 /* probably prime now */ 4888 *result = MP_YES; 4889 LBL_Y:mp_clear (&y); 4890 LBL_R:mp_clear (&r); 4891 LBL_N1:mp_clear (&n1); 4892 return err; 4893 } 4894 4895 /* performs a variable number of rounds of Miller-Rabin 4896 * 4897 * Probability of error after t rounds is no more than 4898 4899 * 4900 * Sets result to 1 if probably prime, 0 otherwise 4901 */ 4902 static int 4903 mp_prime_is_prime(mp_int *a, int t, int *result) 4904 { 4905 mp_int b; 4906 int ix, err, res; 4907 4908 /* default to no */ 4909 *result = MP_NO; 4910 4911 /* valid value of t? */ 4912 if (t <= 0 || t > (int)PRIME_SIZE) { 4913 return MP_VAL; 4914 } 4915 4916 /* is the input equal to one of the primes in the table? */ 4917 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4918 if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) { 4919 *result = 1; 4920 return MP_OKAY; 4921 } 4922 } 4923 4924 /* first perform trial division */ 4925 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) { 4926 return err; 4927 } 4928 4929 /* return if it was trivially divisible */ 4930 if (res == MP_YES) { 4931 return MP_OKAY; 4932 } 4933 4934 /* now perform the miller-rabin rounds */ 4935 if ((err = mp_init (&b)) != MP_OKAY) { 4936 return err; 4937 } 4938 4939 for (ix = 0; ix < t; ix++) { 4940 /* set the prime */ 4941 mp_set (&b, ltm_prime_tab[ix]); 4942 4943 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) { 4944 goto LBL_B; 4945 } 4946 4947 if (res == MP_NO) { 4948 goto LBL_B; 4949 } 4950 } 4951 4952 /* passed the test */ 4953 *result = MP_YES; 4954 LBL_B:mp_clear (&b); 4955 return err; 4956 } 4957 4958 /* returns size of ASCII reprensentation */ 4959 static int 4960 mp_radix_size (mp_int *a, int radix, int *size) 4961 { 4962 int res, digs; 4963 mp_int t; 4964 mp_digit d; 4965 4966 *size = 0; 4967 4968 /* special case for binary */ 4969 if (radix == 2) { 4970 *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1; 4971 return MP_OKAY; 4972 } 4973 4974 /* make sure the radix is in range */ 4975 if (radix < 2 || radix > 64) { 4976 return MP_VAL; 4977 } 4978 4979 if (mp_iszero(a) == MP_YES) { 4980 *size = 2; 4981 return MP_OKAY; 4982 } 4983 4984 /* digs is the digit count */ 4985 digs = 0; 4986 4987 /* if it's negative add one for the sign */ 4988 if (a->sign == MP_NEG) { 4989 ++digs; 4990 } 4991 4992 /* init a copy of the input */ 4993 if ((res = mp_init_copy (&t, a)) != MP_OKAY) { 4994 return res; 4995 } 4996 4997 /* force temp to positive */ 4998 t.sign = MP_ZPOS; 4999 5000 /* fetch out all of the digits */ 5001 while (mp_iszero (&t) == MP_NO) { 5002 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 5003 mp_clear (&t); 5004 return res; 5005 } 5006 ++digs; 5007 } 5008 mp_clear (&t); 5009 5010 /* return digs + 1, the 1 is for the NULL byte that would be required. */ 5011 *size = digs + 1; 5012 return MP_OKAY; 5013 } 5014 5015 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; 5016 5017 /* stores a bignum as a ASCII string in a given radix (2..64) 5018 * 5019 * Stores upto maxlen-1 chars and always a NULL byte 5020 */ 5021 static int 5022 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) 5023 { 5024 int res, digs; 5025 mp_int t; 5026 mp_digit d; 5027 char *_s = str; 5028 5029 /* check range of the maxlen, radix */ 5030 if (maxlen < 2 || radix < 2 || radix > 64) { 5031 return MP_VAL; 5032 } 5033 5034 /* quick out if its zero */ 5035 if (mp_iszero(a) == MP_YES) { 5036 *str++ = '0'; 5037 *str = '\0'; 5038 return MP_OKAY; 5039 } 5040 5041 if ((res = mp_init_copy (&t, a)) != MP_OKAY) { 5042 return res; 5043 } 5044 5045 /* if it is negative output a - */ 5046 if (t.sign == MP_NEG) { 5047 /* we have to reverse our digits later... but not the - sign!! */ 5048 ++_s; 5049 5050 /* store the flag and mark the number as positive */ 5051 *str++ = '-'; 5052 t.sign = MP_ZPOS; 5053 5054 /* subtract a char */ 5055 --maxlen; 5056 } 5057 5058 digs = 0; 5059 while (mp_iszero (&t) == 0) { 5060 if (--maxlen < 1) { 5061 /* no more room */ 5062 break; 5063 } 5064 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 5065 mp_clear (&t); 5066 return res; 5067 } 5068 /* LINTED -- radix' range is checked above, limits d's range */ 5069 *str++ = mp_s_rmap[d]; 5070 ++digs; 5071 } 5072 5073 /* reverse the digits of the string. In this case _s points 5074 * to the first digit [exluding the sign] of the number 5075 */ 5076 bn_reverse ((unsigned char *)_s, digs); 5077 5078 /* append a NULL so the string is properly terminated */ 5079 *str = '\0'; 5080 5081 mp_clear (&t); 5082 return MP_OKAY; 5083 } 5084 5085 static char * 5086 formatbn(const BIGNUM *a, const int radix) 5087 { 5088 char *s; 5089 int len; 5090 5091 if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) { 5092 return NULL; 5093 } 5094 if ((s = netpgp_allocate(1, (size_t)len)) != NULL) { 5095 if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) { 5096 netpgp_deallocate(s, (size_t)len); 5097 return NULL; 5098 } 5099 } 5100 return s; 5101 } 5102 5103 static int 5104 mp_getradix_num(mp_int *a, int radix, char *s) 5105 { 5106 int err, ch, neg, y; 5107 5108 /* clear a */ 5109 mp_zero(a); 5110 5111 /* if first digit is - then set negative */ 5112 if ((ch = *s++) == '-') { 5113 neg = MP_NEG; 5114 ch = *s++; 5115 } else { 5116 neg = MP_ZPOS; 5117 } 5118 5119 for (;;) { 5120 /* find y in the radix map */ 5121 for (y = 0; y < radix; y++) { 5122 if (mp_s_rmap[y] == ch) { 5123 break; 5124 } 5125 } 5126 if (y == radix) { 5127 break; 5128 } 5129 5130 /* shift up and add */ 5131 if ((err = mp_mul_d(a, radix, a)) != MP_OKAY) { 5132 return err; 5133 } 5134 if ((err = mp_add_d(a, y, a)) != MP_OKAY) { 5135 return err; 5136 } 5137 5138 ch = *s++; 5139 } 5140 if (mp_cmp_d(a, 0) != MP_EQ) { 5141 a->sign = neg; 5142 } 5143 5144 return MP_OKAY; 5145 } 5146 5147 static int 5148 getbn(BIGNUM **a, const char *str, int radix) 5149 { 5150 int len; 5151 5152 if (a == NULL || str == NULL || (*a = BN_new()) == NULL) { 5153 return 0; 5154 } 5155 if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) { 5156 return 0; 5157 } 5158 mp_radix_size(__UNCONST(a), radix, &len); 5159 return len - 1; 5160 } 5161 5162 /* d = a - b (mod c) */ 5163 static int 5164 mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d) 5165 { 5166 int res; 5167 mp_int t; 5168 5169 5170 if ((res = mp_init (&t)) != MP_OKAY) { 5171 return res; 5172 } 5173 5174 if ((res = mp_sub (a, b, &t)) != MP_OKAY) { 5175 mp_clear (&t); 5176 return res; 5177 } 5178 res = mp_mod (&t, c, d); 5179 mp_clear (&t); 5180 return res; 5181 } 5182 5183 /**************************************************************************/ 5184 5185 /* BIGNUM emulation layer */ 5186 5187 /* essentiually, these are just wrappers around the libtommath functions */ 5188 /* usually the order of args changes */ 5189 /* the BIGNUM API tends to have more const poisoning */ 5190 /* these wrappers also check the arguments passed for sanity */ 5191 5192 BIGNUM * 5193 BN_bin2bn(const uint8_t *data, int len, BIGNUM *ret) 5194 { 5195 if (data == NULL) { 5196 return BN_new(); 5197 } 5198 if (ret == NULL) { 5199 ret = BN_new(); 5200 } 5201 return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL; 5202 } 5203 5204 /* store in unsigned [big endian] format */ 5205 int 5206 BN_bn2bin(const BIGNUM *a, unsigned char *b) 5207 { 5208 BIGNUM t; 5209 int x; 5210 5211 if (a == NULL || b == NULL) { 5212 return -1; 5213 } 5214 if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) { 5215 return -1; 5216 } 5217 for (x = 0; !BN_is_zero(&t) ; ) { 5218 b[x++] = (unsigned char) (t.dp[0] & 0xff); 5219 if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY) { 5220 mp_clear(&t); 5221 return -1; 5222 } 5223 } 5224 bn_reverse(b, x); 5225 mp_clear(&t); 5226 return x; 5227 } 5228 5229 void 5230 BN_init(BIGNUM *a) 5231 { 5232 if (a != NULL) { 5233 mp_init(a); 5234 } 5235 } 5236 5237 BIGNUM * 5238 BN_new(void) 5239 { 5240 BIGNUM *a; 5241 5242 if ((a = netpgp_allocate(1, sizeof(*a))) != NULL) { 5243 mp_init(a); 5244 } 5245 return a; 5246 } 5247 5248 /* copy, b = a */ 5249 int 5250 BN_copy(BIGNUM *b, const BIGNUM *a) 5251 { 5252 if (a == NULL || b == NULL) { 5253 return MP_VAL; 5254 } 5255 return mp_copy(__UNCONST(a), b); 5256 } 5257 5258 BIGNUM * 5259 BN_dup(const BIGNUM *a) 5260 { 5261 BIGNUM *ret; 5262 5263 if (a == NULL) { 5264 return NULL; 5265 } 5266 if ((ret = BN_new()) != NULL) { 5267 BN_copy(ret, a); 5268 } 5269 return ret; 5270 } 5271 5272 void 5273 BN_swap(BIGNUM *a, BIGNUM *b) 5274 { 5275 if (a && b) { 5276 mp_exch(a, b); 5277 } 5278 } 5279 5280 int 5281 BN_lshift(BIGNUM *r, const BIGNUM *a, int n) 5282 { 5283 if (r == NULL || a == NULL || n < 0) { 5284 return 0; 5285 } 5286 BN_copy(r, a); 5287 return mp_lshd(r, n) == MP_OKAY; 5288 } 5289 5290 int 5291 BN_lshift1(BIGNUM *r, BIGNUM *a) 5292 { 5293 if (r == NULL || a == NULL) { 5294 return 0; 5295 } 5296 BN_copy(r, a); 5297 return mp_lshd(r, 1) == MP_OKAY; 5298 } 5299 5300 int 5301 BN_rshift(BIGNUM *r, const BIGNUM *a, int n) 5302 { 5303 if (r == NULL || a == NULL || n < 0) { 5304 return MP_VAL; 5305 } 5306 BN_copy(r, a); 5307 return mp_rshd(r, n) == MP_OKAY; 5308 } 5309 5310 int 5311 BN_rshift1(BIGNUM *r, BIGNUM *a) 5312 { 5313 if (r == NULL || a == NULL) { 5314 return 0; 5315 } 5316 BN_copy(r, a); 5317 return mp_rshd(r, 1) == MP_OKAY; 5318 } 5319 5320 int 5321 BN_set_word(BIGNUM *a, BN_ULONG w) 5322 { 5323 if (a == NULL) { 5324 return 0; 5325 } 5326 mp_set(a, w); 5327 return 1; 5328 } 5329 5330 int 5331 BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 5332 { 5333 if (a == NULL || b == NULL || r == NULL) { 5334 return 0; 5335 } 5336 return mp_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5337 } 5338 5339 int 5340 BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 5341 { 5342 if (a == NULL || b == NULL || r == NULL) { 5343 return 0; 5344 } 5345 return mp_sub(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5346 } 5347 5348 int 5349 BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 5350 { 5351 if (a == NULL || b == NULL || r == NULL) { 5352 return 0; 5353 } 5354 USE_ARG(ctx); 5355 return mp_mul(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5356 } 5357 5358 int 5359 BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *a, const BIGNUM *d, BN_CTX *ctx) 5360 { 5361 if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) { 5362 return 0; 5363 } 5364 USE_ARG(ctx); 5365 return mp_div(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY; 5366 } 5367 5368 void 5369 BN_free(BIGNUM *a) 5370 { 5371 if (a) { 5372 mp_clear(a); 5373 } 5374 } 5375 5376 void 5377 BN_clear(BIGNUM *a) 5378 { 5379 if (a) { 5380 mp_clear(a); 5381 } 5382 } 5383 5384 void 5385 BN_clear_free(BIGNUM *a) 5386 { 5387 if (a) { 5388 mp_clear(a); 5389 } 5390 } 5391 5392 int 5393 BN_num_bytes(const BIGNUM *a) 5394 { 5395 if (a == NULL) { 5396 return MP_VAL; 5397 } 5398 return mp_unsigned_bin_size(__UNCONST(a)); 5399 } 5400 5401 int 5402 BN_num_bits(const BIGNUM *a) 5403 { 5404 if (a == NULL) { 5405 return 0; 5406 } 5407 return mp_count_bits(a); 5408 } 5409 5410 void 5411 BN_set_negative(BIGNUM *a, int n) 5412 { 5413 if (a) { 5414 a->sign = (n) ? MP_NEG : 0; 5415 } 5416 } 5417 5418 int 5419 BN_cmp(BIGNUM *a, BIGNUM *b) 5420 { 5421 if (a == NULL || b == NULL) { 5422 return MP_VAL; 5423 } 5424 switch(mp_cmp(a, b)) { 5425 case MP_LT: 5426 return -1; 5427 case MP_GT: 5428 return 1; 5429 case MP_EQ: 5430 default: 5431 return 0; 5432 } 5433 } 5434 5435 int 5436 BN_mod_exp(BIGNUM *Y, BIGNUM *G, BIGNUM *X, BIGNUM *P, BN_CTX *ctx) 5437 { 5438 if (Y == NULL || G == NULL || X == NULL || P == NULL) { 5439 return MP_VAL; 5440 } 5441 USE_ARG(ctx); 5442 return mp_exptmod(G, X, P, Y) == MP_OKAY; 5443 } 5444 5445 BIGNUM * 5446 BN_mod_inverse(BIGNUM *r, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) 5447 { 5448 USE_ARG(ctx); 5449 if (r == NULL || a == NULL || n == NULL) { 5450 return NULL; 5451 } 5452 return (mp_invmod(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL; 5453 } 5454 5455 int 5456 BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 5457 { 5458 USE_ARG(ctx); 5459 if (ret == NULL || a == NULL || b == NULL || m == NULL) { 5460 return 0; 5461 } 5462 return mp_mulmod(ret, a, b, __UNCONST(m)) == MP_OKAY; 5463 } 5464 5465 BN_CTX * 5466 BN_CTX_new(void) 5467 { 5468 return netpgp_allocate(1, sizeof(BN_CTX)); 5469 } 5470 5471 void 5472 BN_CTX_init(BN_CTX *c) 5473 { 5474 if (c != NULL) { 5475 c->arraysize = 15; 5476 if ((c->v = netpgp_allocate(sizeof(*c->v), c->arraysize)) == NULL) { 5477 c->arraysize = 0; 5478 } 5479 } 5480 } 5481 5482 BIGNUM * 5483 BN_CTX_get(BN_CTX *ctx) 5484 { 5485 if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) { 5486 return NULL; 5487 } 5488 return ctx->v[ctx->count++] = BN_new(); 5489 } 5490 5491 void 5492 BN_CTX_start(BN_CTX *ctx) 5493 { 5494 BN_CTX_init(ctx); 5495 } 5496 5497 void 5498 BN_CTX_free(BN_CTX *c) 5499 { 5500 unsigned i; 5501 5502 if (c != NULL && c->v != NULL) { 5503 for (i = 0 ; i < c->count ; i++) { 5504 BN_clear_free(c->v[i]); 5505 } 5506 netpgp_deallocate(c->v, sizeof(*c->v) * c->arraysize); 5507 } 5508 } 5509 5510 void 5511 BN_CTX_end(BN_CTX *ctx) 5512 { 5513 BN_CTX_free(ctx); 5514 } 5515 5516 char * 5517 BN_bn2hex(const BIGNUM *a) 5518 { 5519 return (a == NULL) ? NULL : formatbn(a, 16); 5520 } 5521 5522 char * 5523 BN_bn2dec(const BIGNUM *a) 5524 { 5525 return (a == NULL) ? NULL : formatbn(a, 10); 5526 } 5527 5528 #ifndef _KERNEL 5529 int 5530 BN_print_fp(FILE *fp, const BIGNUM *a) 5531 { 5532 char *s; 5533 int ret; 5534 5535 if (fp == NULL || a == NULL) { 5536 return 0; 5537 } 5538 s = BN_bn2hex(a); 5539 ret = fprintf(fp, "%s", s); 5540 netpgp_deallocate(s, strlen(s) + 1); 5541 return ret; 5542 } 5543 #endif 5544 5545 int 5546 BN_rand(BIGNUM *rnd, int bits, int top, int bottom) 5547 { 5548 uint64_t r; 5549 int digits; 5550 int i; 5551 5552 if (rnd == NULL) { 5553 return 0; 5554 } 5555 mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT)); 5556 for (i = 0 ; i < digits ; i++) { 5557 r = (uint64_t)arc4random(); 5558 r <<= 32; 5559 r |= arc4random(); 5560 rnd->dp[i] = (r & MP_MASK); 5561 } 5562 if (top == 0) { 5563 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5564 } 5565 if (top == 1) { 5566 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5567 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1))); 5568 } 5569 if (bottom) { 5570 rnd->dp[0] |= 0x1; 5571 } 5572 return 1; 5573 } 5574 5575 int 5576 BN_rand_range(BIGNUM *rnd, BIGNUM *range) 5577 { 5578 if (rnd == NULL || range == NULL || BN_is_zero(range)) { 5579 return 0; 5580 } 5581 BN_rand(rnd, BN_num_bits(range), 1, 0); 5582 return mp_mod(rnd, range, rnd) == MP_OKAY; 5583 } 5584 5585 int 5586 BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int, int, void *), BN_CTX *ctx, void *cb_arg) 5587 { 5588 int primality; 5589 5590 if (a == NULL) { 5591 return 0; 5592 } 5593 USE_ARG(ctx); 5594 USE_ARG(cb_arg); 5595 USE_ARG(callback); 5596 return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0; 5597 } 5598 5599 const BIGNUM * 5600 BN_value_one(void) 5601 { 5602 static mp_digit digit = 1UL; 5603 static const BIGNUM one = { &digit, 1, 1, 0 }; 5604 5605 return &one; 5606 } 5607 5608 int 5609 BN_hex2bn(BIGNUM **a, const char *str) 5610 { 5611 return getbn(a, str, 16); 5612 } 5613 5614 int 5615 BN_dec2bn(BIGNUM **a, const char *str) 5616 { 5617 return getbn(a, str, 10); 5618 } 5619 5620 int 5621 BN_mod_sub(BIGNUM *r, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 5622 { 5623 USE_ARG(ctx); 5624 if (r == NULL || a == NULL || b == NULL || m == NULL) { 5625 return 0; 5626 } 5627 return mp_submod(a, b, __UNCONST(m), r) == MP_OKAY; 5628 } 5629 5630 int 5631 BN_is_bit_set(const BIGNUM *a, int n) 5632 { 5633 if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) { 5634 return 0; 5635 } 5636 return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0; 5637 } 5638