1 /* $NetBSD: bignum.c,v 1.4 2021/11/10 16:02:14 msaitoh Exp $ */ 2 3 /*- 4 * Copyright (c) 2012 Alistair Crooks <agc@NetBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 /* LibTomMath, multiple-precision integer library -- Tom St Denis 28 * 29 * LibTomMath is a library that provides multiple-precision 30 * integer arithmetic as well as number theoretic functionality. 31 * 32 * The library was designed directly after the MPI library by 33 * Michael Fromberger but has been written from scratch with 34 * additional optimizations in place. 35 * 36 * The library is free for all purposes without any express 37 * guarantee it works. 38 * 39 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 40 */ 41 42 #include <sys/types.h> 43 #include <sys/param.h> 44 45 #include <limits.h> 46 #include <stdarg.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 52 #include "bn.h" 53 54 /**************************************************************************/ 55 56 /* LibTomMath, multiple-precision integer library -- Tom St Denis 57 * 58 * LibTomMath is a library that provides multiple-precision 59 * integer arithmetic as well as number theoretic functionality. 60 * 61 * The library was designed directly after the MPI library by 62 * Michael Fromberger but has been written from scratch with 63 * additional optimizations in place. 64 * 65 * The library is free for all purposes without any express 66 * guarantee it works. 67 * 68 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 69 */ 70 71 #define MP_PREC 32 72 #define DIGIT_BIT 28 73 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1)) 74 75 #define MP_WARRAY /*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1))) 76 77 #define MP_NO 0 78 #define MP_YES 1 79 80 #ifndef USE_ARG 81 #define USE_ARG(x) /*LINTED*/(void)&(x) 82 #endif 83 84 #ifndef __arraycount 85 #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0])) 86 #endif 87 88 #ifndef MIN 89 #define MIN(a,b) (((a)<(b))?(a):(b)) 90 #endif 91 92 #define MP_ISZERO(a) (((a)->used == 0) ? MP_YES : MP_NO) 93 94 typedef int mp_err; 95 96 static int signed_multiply(mp_int * a, mp_int * b, mp_int * c); 97 static int square(mp_int * a, mp_int * b); 98 99 static int signed_subtract_word(mp_int *a, mp_digit b, mp_int *c); 100 101 static inline void * 102 allocate(size_t n, size_t m) 103 { 104 return calloc(n, m); 105 } 106 107 static inline void 108 deallocate(void *v, size_t sz) 109 { 110 USE_ARG(sz); 111 free(v); 112 } 113 114 /* set to zero */ 115 static inline void 116 mp_zero(mp_int *a) 117 { 118 a->sign = MP_ZPOS; 119 a->used = 0; 120 memset(a->dp, 0x0, a->alloc * sizeof(*a->dp)); 121 } 122 123 /* grow as required */ 124 static int 125 mp_grow(mp_int *a, int size) 126 { 127 mp_digit *tmp; 128 129 /* if the alloc size is smaller alloc more ram */ 130 if (a->alloc < size) { 131 /* ensure there are always at least MP_PREC digits extra on top */ 132 size += (MP_PREC * 2) - (size % MP_PREC); 133 134 /* reallocate the array a->dp 135 * 136 * We store the return in a temporary variable 137 * in case the operation failed we don't want 138 * to overwrite the dp member of a. 139 */ 140 tmp = realloc(a->dp, sizeof(*tmp) * size); 141 if (tmp == NULL) { 142 /* reallocation failed but "a" is still valid [can be freed] */ 143 return MP_MEM; 144 } 145 146 /* reallocation succeeded so set a->dp */ 147 a->dp = tmp; 148 /* zero excess digits */ 149 memset(&a->dp[a->alloc], 0x0, (size - a->alloc) * sizeof(*a->dp)); 150 a->alloc = size; 151 } 152 return MP_OKAY; 153 } 154 155 /* shift left a certain amount of digits */ 156 static int 157 lshift_digits(mp_int * a, int b) 158 { 159 mp_digit *top, *bottom; 160 int x, res; 161 162 /* if its less than zero return */ 163 if (b <= 0) { 164 return MP_OKAY; 165 } 166 167 /* grow to fit the new digits */ 168 if (a->alloc < a->used + b) { 169 if ((res = mp_grow(a, a->used + b)) != MP_OKAY) { 170 return res; 171 } 172 } 173 174 /* increment the used by the shift amount then copy upwards */ 175 a->used += b; 176 177 /* top */ 178 top = a->dp + a->used - 1; 179 180 /* base */ 181 bottom = a->dp + a->used - 1 - b; 182 183 /* much like rshift_digits this is implemented using a sliding window 184 * except the window goes the otherway around. Copying from 185 * the bottom to the top. 186 */ 187 for (x = a->used - 1; x >= b; x--) { 188 *top-- = *bottom--; 189 } 190 191 /* zero the lower digits */ 192 memset(a->dp, 0x0, b * sizeof(*a->dp)); 193 return MP_OKAY; 194 } 195 196 /* trim unused digits 197 * 198 * This is used to ensure that leading zero digits are 199 * trimed and the leading "used" digit will be non-zero 200 * Typically very fast. Also fixes the sign if there 201 * are no more leading digits 202 */ 203 static void 204 trim_unused_digits(mp_int * a) 205 { 206 /* decrease used while the most significant digit is 207 * zero. 208 */ 209 while (a->used > 0 && a->dp[a->used - 1] == 0) { 210 a->used -= 1; 211 } 212 /* reset the sign flag if used == 0 */ 213 if (a->used == 0) { 214 a->sign = MP_ZPOS; 215 } 216 } 217 218 /* copy, b = a */ 219 static int 220 mp_copy(BIGNUM *a, BIGNUM *b) 221 { 222 int res; 223 224 /* if dst == src do nothing */ 225 if (a == b) { 226 return MP_OKAY; 227 } 228 if (a == NULL || b == NULL) { 229 return MP_VAL; 230 } 231 232 /* grow dest */ 233 if (b->alloc < a->used) { 234 if ((res = mp_grow(b, a->used)) != MP_OKAY) { 235 return res; 236 } 237 } 238 239 memcpy(b->dp, a->dp, a->used * sizeof(*b->dp)); 240 if (b->used > a->used) { 241 memset(&b->dp[a->used], 0x0, (b->used - a->used) * sizeof(*b->dp)); 242 } 243 244 /* copy used count and sign */ 245 b->used = a->used; 246 b->sign = a->sign; 247 return MP_OKAY; 248 } 249 250 /* shift left by a certain bit count */ 251 static int 252 lshift_bits(mp_int *a, int b, mp_int *c) 253 { 254 mp_digit d; 255 int res; 256 257 /* copy */ 258 if (a != c) { 259 if ((res = mp_copy(a, c)) != MP_OKAY) { 260 return res; 261 } 262 } 263 264 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { 265 if ((res = mp_grow(c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { 266 return res; 267 } 268 } 269 270 /* shift by as many digits in the bit count */ 271 if (b >= (int)DIGIT_BIT) { 272 if ((res = lshift_digits(c, b / DIGIT_BIT)) != MP_OKAY) { 273 return res; 274 } 275 } 276 277 /* shift any bit count < DIGIT_BIT */ 278 d = (mp_digit) (b % DIGIT_BIT); 279 if (d != 0) { 280 mp_digit *tmpc, shift, mask, carry, rr; 281 int x; 282 283 /* bitmask for carries */ 284 mask = (((mp_digit)1) << d) - 1; 285 286 /* shift for msbs */ 287 shift = DIGIT_BIT - d; 288 289 /* alias */ 290 tmpc = c->dp; 291 292 /* carry */ 293 carry = 0; 294 for (x = 0; x < c->used; x++) { 295 /* get the higher bits of the current word */ 296 rr = (*tmpc >> shift) & mask; 297 298 /* shift the current word and OR in the carry */ 299 *tmpc = ((*tmpc << d) | carry) & MP_MASK; 300 ++tmpc; 301 302 /* set the carry to the carry bits of the current word */ 303 carry = rr; 304 } 305 306 /* set final carry */ 307 if (carry != 0) { 308 c->dp[c->used++] = carry; 309 } 310 } 311 trim_unused_digits(c); 312 return MP_OKAY; 313 } 314 315 /* reads a unsigned char array, assumes the msb is stored first [big endian] */ 316 static int 317 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c) 318 { 319 int res; 320 321 /* make sure there are at least two digits */ 322 if (a->alloc < 2) { 323 if ((res = mp_grow(a, 2)) != MP_OKAY) { 324 return res; 325 } 326 } 327 328 /* zero the int */ 329 mp_zero(a); 330 331 /* read the bytes in */ 332 while (c-- > 0) { 333 if ((res = lshift_bits(a, 8, a)) != MP_OKAY) { 334 return res; 335 } 336 337 a->dp[0] |= *b++; 338 a->used += 1; 339 } 340 trim_unused_digits(a); 341 return MP_OKAY; 342 } 343 344 /* returns the number of bits in an mpi */ 345 static int 346 mp_count_bits(const mp_int *a) 347 { 348 int r; 349 mp_digit q; 350 351 /* shortcut */ 352 if (a->used == 0) { 353 return 0; 354 } 355 356 /* get number of digits and add that */ 357 r = (a->used - 1) * DIGIT_BIT; 358 359 /* take the last digit and count the bits in it */ 360 for (q = a->dp[a->used - 1]; q > ((mp_digit) 0) ; r++) { 361 q >>= ((mp_digit) 1); 362 } 363 return r; 364 } 365 366 /* compare maginitude of two ints (unsigned) */ 367 static int 368 compare_magnitude(mp_int * a, mp_int * b) 369 { 370 int n; 371 mp_digit *tmpa, *tmpb; 372 373 /* compare based on # of non-zero digits */ 374 if (a->used > b->used) { 375 return MP_GT; 376 } 377 378 if (a->used < b->used) { 379 return MP_LT; 380 } 381 382 /* alias for a */ 383 tmpa = a->dp + (a->used - 1); 384 385 /* alias for b */ 386 tmpb = b->dp + (a->used - 1); 387 388 /* compare based on digits */ 389 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { 390 if (*tmpa > *tmpb) { 391 return MP_GT; 392 } 393 394 if (*tmpa < *tmpb) { 395 return MP_LT; 396 } 397 } 398 return MP_EQ; 399 } 400 401 /* compare two ints (signed)*/ 402 static int 403 signed_compare(mp_int * a, mp_int * b) 404 { 405 /* compare based on sign */ 406 if (a->sign != b->sign) { 407 return (a->sign == MP_NEG) ? MP_LT : MP_GT; 408 } 409 return (a->sign == MP_NEG) ? compare_magnitude(b, a) : compare_magnitude(a, b); 410 } 411 412 /* get the size for an unsigned equivalent */ 413 static int 414 mp_unsigned_bin_size(mp_int * a) 415 { 416 int size = mp_count_bits(a); 417 418 return (size / 8 + ((size & 7) != 0 ? 1 : 0)); 419 } 420 421 /* init a new mp_int */ 422 static int 423 mp_init(mp_int * a) 424 { 425 /* allocate memory required and clear it */ 426 a->dp = allocate(1, sizeof(*a->dp) * MP_PREC); 427 if (a->dp == NULL) { 428 return MP_MEM; 429 } 430 431 /* set the digits to zero */ 432 memset(a->dp, 0x0, MP_PREC * sizeof(*a->dp)); 433 434 /* set the used to zero, allocated digits to the default precision 435 * and sign to positive */ 436 a->used = 0; 437 a->alloc = MP_PREC; 438 a->sign = MP_ZPOS; 439 440 return MP_OKAY; 441 } 442 443 /* clear one (frees) */ 444 static void 445 mp_clear(mp_int * a) 446 { 447 /* only do anything if a hasn't been freed previously */ 448 if (a->dp != NULL) { 449 memset(a->dp, 0x0, a->used * sizeof(*a->dp)); 450 451 /* free ram */ 452 deallocate(a->dp, (size_t)a->alloc); 453 454 /* reset members to make debugging easier */ 455 a->dp = NULL; 456 a->alloc = a->used = 0; 457 a->sign = MP_ZPOS; 458 } 459 } 460 461 static int 462 mp_init_multi(mp_int *mp, ...) 463 { 464 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ 465 int n = 0; /* Number of ok inits */ 466 mp_int* cur_arg = mp; 467 va_list args; 468 469 va_start(args, mp); /* init args to next argument from caller */ 470 while (cur_arg != NULL) { 471 if (mp_init(cur_arg) != MP_OKAY) { 472 /* Oops - error! Back-track and mp_clear what we already 473 succeeded in init-ing, then return error. 474 */ 475 va_list clean_args; 476 477 /* end the current list */ 478 va_end(args); 479 480 /* now start cleaning up */ 481 cur_arg = mp; 482 va_start(clean_args, mp); 483 while (n--) { 484 mp_clear(cur_arg); 485 cur_arg = va_arg(clean_args, mp_int*); 486 } 487 va_end(clean_args); 488 res = MP_MEM; 489 break; 490 } 491 n++; 492 cur_arg = va_arg(args, mp_int*); 493 } 494 va_end(args); 495 return res; /* Assumed ok, if error flagged above. */ 496 } 497 498 /* init an mp_init for a given size */ 499 static int 500 mp_init_size(mp_int * a, int size) 501 { 502 /* pad size so there are always extra digits */ 503 size += (MP_PREC * 2) - (size % MP_PREC); 504 505 /* alloc mem */ 506 a->dp = allocate(1, sizeof(*a->dp) * size); 507 if (a->dp == NULL) { 508 return MP_MEM; 509 } 510 511 /* set the members */ 512 a->used = 0; 513 a->alloc = size; 514 a->sign = MP_ZPOS; 515 516 /* zero the digits */ 517 memset(a->dp, 0x0, size * sizeof(*a->dp)); 518 return MP_OKAY; 519 } 520 521 /* creates "a" then copies b into it */ 522 static int 523 mp_init_copy(mp_int * a, mp_int * b) 524 { 525 int res; 526 527 if ((res = mp_init(a)) != MP_OKAY) { 528 return res; 529 } 530 return mp_copy(b, a); 531 } 532 533 /* low level addition, based on HAC pp.594, Algorithm 14.7 */ 534 static int 535 basic_add(mp_int * a, mp_int * b, mp_int * c) 536 { 537 mp_int *x; 538 int olduse, res, min, max; 539 540 /* find sizes, we let |a| <= |b| which means we have to sort 541 * them. "x" will point to the input with the most digits 542 */ 543 if (a->used > b->used) { 544 min = b->used; 545 max = a->used; 546 x = a; 547 } else { 548 min = a->used; 549 max = b->used; 550 x = b; 551 } 552 553 /* init result */ 554 if (c->alloc < max + 1) { 555 if ((res = mp_grow(c, max + 1)) != MP_OKAY) { 556 return res; 557 } 558 } 559 560 /* get old used digit count and set new one */ 561 olduse = c->used; 562 c->used = max + 1; 563 564 { 565 mp_digit carry, *tmpa, *tmpb, *tmpc; 566 int i; 567 568 /* alias for digit pointers */ 569 570 /* first input */ 571 tmpa = a->dp; 572 573 /* second input */ 574 tmpb = b->dp; 575 576 /* destination */ 577 tmpc = c->dp; 578 579 /* zero the carry */ 580 carry = 0; 581 for (i = 0; i < min; i++) { 582 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ 583 *tmpc = *tmpa++ + *tmpb++ + carry; 584 585 /* U = carry bit of T[i] */ 586 carry = *tmpc >> ((mp_digit)DIGIT_BIT); 587 588 /* take away carry bit from T[i] */ 589 *tmpc++ &= MP_MASK; 590 } 591 592 /* now copy higher words if any, that is in A+B 593 * if A or B has more digits add those in 594 */ 595 if (min != max) { 596 for (; i < max; i++) { 597 /* T[i] = X[i] + U */ 598 *tmpc = x->dp[i] + carry; 599 600 /* U = carry bit of T[i] */ 601 carry = *tmpc >> ((mp_digit)DIGIT_BIT); 602 603 /* take away carry bit from T[i] */ 604 *tmpc++ &= MP_MASK; 605 } 606 } 607 608 /* add carry */ 609 *tmpc++ = carry; 610 611 /* clear digits above oldused */ 612 if (olduse > c->used) { 613 memset(tmpc, 0x0, (olduse - c->used) * sizeof(*c->dp)); 614 } 615 } 616 617 trim_unused_digits(c); 618 return MP_OKAY; 619 } 620 621 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ 622 static int 623 basic_subtract(mp_int * a, mp_int * b, mp_int * c) 624 { 625 int olduse, res, min, max; 626 627 /* find sizes */ 628 min = b->used; 629 max = a->used; 630 631 /* init result */ 632 if (c->alloc < max) { 633 if ((res = mp_grow(c, max)) != MP_OKAY) { 634 return res; 635 } 636 } 637 olduse = c->used; 638 c->used = max; 639 640 { 641 mp_digit carry, *tmpa, *tmpb, *tmpc; 642 int i; 643 644 /* alias for digit pointers */ 645 tmpa = a->dp; 646 tmpb = b->dp; 647 tmpc = c->dp; 648 649 /* set carry to zero */ 650 carry = 0; 651 for (i = 0; i < min; i++) { 652 /* T[i] = A[i] - B[i] - U */ 653 *tmpc = *tmpa++ - *tmpb++ - carry; 654 655 /* U = carry bit of T[i] 656 * Note this saves performing an AND operation since 657 * if a carry does occur it will propagate all the way to the 658 * MSB. As a result a single shift is enough to get the carry 659 */ 660 carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1)); 661 662 /* Clear carry from T[i] */ 663 *tmpc++ &= MP_MASK; 664 } 665 666 /* now copy higher words if any, e.g. if A has more digits than B */ 667 for (; i < max; i++) { 668 /* T[i] = A[i] - U */ 669 *tmpc = *tmpa++ - carry; 670 671 /* U = carry bit of T[i] */ 672 carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1)); 673 674 /* Clear carry from T[i] */ 675 *tmpc++ &= MP_MASK; 676 } 677 678 /* clear digits above used (since we may not have grown result above) */ 679 if (olduse > c->used) { 680 memset(tmpc, 0x0, (olduse - c->used) * sizeof(*a->dp)); 681 } 682 } 683 684 trim_unused_digits(c); 685 return MP_OKAY; 686 } 687 688 /* high level subtraction (handles signs) */ 689 static int 690 signed_subtract(mp_int * a, mp_int * b, mp_int * c) 691 { 692 int sa, sb, res; 693 694 sa = a->sign; 695 sb = b->sign; 696 697 if (sa != sb) { 698 /* subtract a negative from a positive, OR */ 699 /* subtract a positive from a negative. */ 700 /* In either case, ADD their magnitudes, */ 701 /* and use the sign of the first number. */ 702 c->sign = sa; 703 res = basic_add(a, b, c); 704 } else { 705 /* subtract a positive from a positive, OR */ 706 /* subtract a negative from a negative. */ 707 /* First, take the difference between their */ 708 /* magnitudes, then... */ 709 if (compare_magnitude(a, b) != MP_LT) { 710 /* Copy the sign from the first */ 711 c->sign = sa; 712 /* The first has a larger or equal magnitude */ 713 res = basic_subtract(a, b, c); 714 } else { 715 /* The result has the *opposite* sign from */ 716 /* the first number. */ 717 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; 718 /* The second has a larger magnitude */ 719 res = basic_subtract(b, a, c); 720 } 721 } 722 return res; 723 } 724 725 /* shift right a certain amount of digits */ 726 static int 727 rshift_digits(mp_int * a, int b) 728 { 729 /* if b <= 0 then ignore it */ 730 if (b <= 0) { 731 return 0; 732 } 733 734 /* if b > used then simply zero it and return */ 735 if (a->used <= b) { 736 mp_zero(a); 737 return 0; 738 } 739 740 /* this is implemented as a sliding window where 741 * the window is b-digits long and digits from 742 * the top of the window are copied to the bottom 743 * 744 * e.g. 745 746 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> 747 /\ | ----> 748 \-------------------/ ----> 749 */ 750 memmove(a->dp, &a->dp[b], (a->used - b) * sizeof(*a->dp)); 751 memset(&a->dp[a->used - b], 0x0, b * sizeof(*a->dp)); 752 753 /* remove excess digits */ 754 a->used -= b; 755 return 1; 756 } 757 758 /* multiply by a digit */ 759 static int 760 multiply_digit(mp_int * a, mp_digit b, mp_int * c) 761 { 762 mp_digit carry, *tmpa, *tmpc; 763 mp_word r; 764 int ix, res, olduse; 765 766 /* make sure c is big enough to hold a*b */ 767 if (c->alloc < a->used + 1) { 768 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 769 return res; 770 } 771 } 772 773 /* get the original destinations used count */ 774 olduse = c->used; 775 776 /* set the sign */ 777 c->sign = a->sign; 778 779 /* alias for a->dp [source] */ 780 tmpa = a->dp; 781 782 /* alias for c->dp [dest] */ 783 tmpc = c->dp; 784 785 /* zero carry */ 786 carry = 0; 787 788 /* compute columns */ 789 for (ix = 0; ix < a->used; ix++) { 790 /* compute product and carry sum for this term */ 791 r = ((mp_word) carry) + ((mp_word)*tmpa++) * ((mp_word)b); 792 793 /* mask off higher bits to get a single digit */ 794 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); 795 796 /* send carry into next iteration */ 797 carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 798 } 799 800 /* store final carry [if any] and increment ix offset */ 801 *tmpc++ = carry; 802 ++ix; 803 if (olduse > ix) { 804 memset(tmpc, 0x0, (olduse - ix) * sizeof(*tmpc)); 805 } 806 807 /* set used count */ 808 c->used = a->used + 1; 809 trim_unused_digits(c); 810 811 return MP_OKAY; 812 } 813 814 /* high level addition (handles signs) */ 815 static int 816 signed_add(mp_int * a, mp_int * b, mp_int * c) 817 { 818 int asign, bsign, res; 819 820 /* get sign of both inputs */ 821 asign = a->sign; 822 bsign = b->sign; 823 824 /* handle two cases, not four */ 825 if (asign == bsign) { 826 /* both positive or both negative */ 827 /* add their magnitudes, copy the sign */ 828 c->sign = asign; 829 res = basic_add(a, b, c); 830 } else { 831 /* one positive, the other negative */ 832 /* subtract the one with the greater magnitude from */ 833 /* the one of the lesser magnitude. The result gets */ 834 /* the sign of the one with the greater magnitude. */ 835 if (compare_magnitude(a, b) == MP_LT) { 836 c->sign = bsign; 837 res = basic_subtract(b, a, c); 838 } else { 839 c->sign = asign; 840 res = basic_subtract(a, b, c); 841 } 842 } 843 return res; 844 } 845 846 /* swap the elements of two integers, for cases where you can't simply swap the 847 * mp_int pointers around 848 */ 849 static void 850 mp_exch(mp_int *a, mp_int *b) 851 { 852 mp_int t; 853 854 t = *a; 855 *a = *b; 856 *b = t; 857 } 858 859 /* calc a value mod 2**b */ 860 static int 861 modulo_2_to_power(mp_int * a, int b, mp_int * c) 862 { 863 int x, res; 864 865 /* if b is <= 0 then zero the int */ 866 if (b <= 0) { 867 mp_zero(c); 868 return MP_OKAY; 869 } 870 871 /* if the modulus is larger than the value than return */ 872 if (b >= (int) (a->used * DIGIT_BIT)) { 873 res = mp_copy(a, c); 874 return res; 875 } 876 877 /* copy */ 878 if ((res = mp_copy(a, c)) != MP_OKAY) { 879 return res; 880 } 881 882 /* zero digits above the last digit of the modulus */ 883 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { 884 c->dp[x] = 0; 885 } 886 /* clear the digit that is not completely outside/inside the modulus */ 887 c->dp[b / DIGIT_BIT] &= 888 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); 889 trim_unused_digits(c); 890 return MP_OKAY; 891 } 892 893 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ 894 static int 895 rshift_bits(mp_int * a, int b, mp_int * c, mp_int * d) 896 { 897 mp_digit D, r, rr; 898 int x, res; 899 mp_int t; 900 901 902 /* if the shift count is <= 0 then we do no work */ 903 if (b <= 0) { 904 res = mp_copy(a, c); 905 if (d != NULL) { 906 mp_zero(d); 907 } 908 return res; 909 } 910 911 if ((res = mp_init(&t)) != MP_OKAY) { 912 return res; 913 } 914 915 /* get the remainder */ 916 if (d != NULL) { 917 if ((res = modulo_2_to_power(a, b, &t)) != MP_OKAY) { 918 mp_clear(&t); 919 return res; 920 } 921 } 922 923 /* copy */ 924 if ((res = mp_copy(a, c)) != MP_OKAY) { 925 mp_clear(&t); 926 return res; 927 } 928 929 /* shift by as many digits in the bit count */ 930 if (b >= (int)DIGIT_BIT) { 931 rshift_digits(c, b / DIGIT_BIT); 932 } 933 934 /* shift any bit count < DIGIT_BIT */ 935 D = (mp_digit) (b % DIGIT_BIT); 936 if (D != 0) { 937 mp_digit *tmpc, mask, shift; 938 939 /* mask */ 940 mask = (((mp_digit)1) << D) - 1; 941 942 /* shift for lsb */ 943 shift = DIGIT_BIT - D; 944 945 /* alias */ 946 tmpc = c->dp + (c->used - 1); 947 948 /* carry */ 949 r = 0; 950 for (x = c->used - 1; x >= 0; x--) { 951 /* get the lower bits of this word in a temp */ 952 rr = *tmpc & mask; 953 954 /* shift the current word and mix in the carry bits from the previous word */ 955 *tmpc = (*tmpc >> D) | (r << shift); 956 --tmpc; 957 958 /* set the carry to the carry bits of the current word found above */ 959 r = rr; 960 } 961 } 962 trim_unused_digits(c); 963 if (d != NULL) { 964 mp_exch(&t, d); 965 } 966 mp_clear(&t); 967 return MP_OKAY; 968 } 969 970 /* integer signed division. 971 * c*b + d == a [e.g. a/b, c=quotient, d=remainder] 972 * HAC pp.598 Algorithm 14.20 973 * 974 * Note that the description in HAC is horribly 975 * incomplete. For example, it doesn't consider 976 * the case where digits are removed from 'x' in 977 * the inner loop. It also doesn't consider the 978 * case that y has fewer than three digits, etc.. 979 * 980 * The overall algorithm is as described as 981 * 14.20 from HAC but fixed to treat these cases. 982 */ 983 static int 984 signed_divide(mp_int *c, mp_int *d, mp_int *a, mp_int *b) 985 { 986 mp_int q, x, y, t1, t2; 987 int res, n, t, i, norm, neg; 988 989 /* is divisor zero ? */ 990 if (MP_ISZERO(b) == MP_YES) { 991 return MP_VAL; 992 } 993 994 /* if a < b then q=0, r = a */ 995 if (compare_magnitude(a, b) == MP_LT) { 996 if (d != NULL) { 997 res = mp_copy(a, d); 998 } else { 999 res = MP_OKAY; 1000 } 1001 if (c != NULL) { 1002 mp_zero(c); 1003 } 1004 return res; 1005 } 1006 1007 if ((res = mp_init_size(&q, a->used + 2)) != MP_OKAY) { 1008 return res; 1009 } 1010 q.used = a->used + 2; 1011 1012 if ((res = mp_init(&t1)) != MP_OKAY) { 1013 goto LBL_Q; 1014 } 1015 1016 if ((res = mp_init(&t2)) != MP_OKAY) { 1017 goto LBL_T1; 1018 } 1019 1020 if ((res = mp_init_copy(&x, a)) != MP_OKAY) { 1021 goto LBL_T2; 1022 } 1023 1024 if ((res = mp_init_copy(&y, b)) != MP_OKAY) { 1025 goto LBL_X; 1026 } 1027 1028 /* fix the sign */ 1029 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 1030 x.sign = y.sign = MP_ZPOS; 1031 1032 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ 1033 norm = mp_count_bits(&y) % DIGIT_BIT; 1034 if (norm < (int)(DIGIT_BIT-1)) { 1035 norm = (DIGIT_BIT-1) - norm; 1036 if ((res = lshift_bits(&x, norm, &x)) != MP_OKAY) { 1037 goto LBL_Y; 1038 } 1039 if ((res = lshift_bits(&y, norm, &y)) != MP_OKAY) { 1040 goto LBL_Y; 1041 } 1042 } else { 1043 norm = 0; 1044 } 1045 1046 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ 1047 n = x.used - 1; 1048 t = y.used - 1; 1049 1050 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ 1051 if ((res = lshift_digits(&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ 1052 goto LBL_Y; 1053 } 1054 1055 while (signed_compare(&x, &y) != MP_LT) { 1056 ++(q.dp[n - t]); 1057 if ((res = signed_subtract(&x, &y, &x)) != MP_OKAY) { 1058 goto LBL_Y; 1059 } 1060 } 1061 1062 /* reset y by shifting it back down */ 1063 rshift_digits(&y, n - t); 1064 1065 /* step 3. for i from n down to (t + 1) */ 1066 for (i = n; i >= (t + 1); i--) { 1067 if (i > x.used) { 1068 continue; 1069 } 1070 1071 /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 1072 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ 1073 if (x.dp[i] == y.dp[t]) { 1074 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); 1075 } else { 1076 mp_word tmp; 1077 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); 1078 tmp |= ((mp_word) x.dp[i - 1]); 1079 tmp /= ((mp_word) y.dp[t]); 1080 if (tmp > (mp_word) MP_MASK) { 1081 tmp = MP_MASK; 1082 } 1083 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); 1084 } 1085 1086 /* while (q{i-t-1} * (yt * b + y{t-1})) > 1087 xi * b**2 + xi-1 * b + xi-2 1088 do q{i-t-1} -= 1; 1089 */ 1090 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; 1091 do { 1092 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; 1093 1094 /* find left hand */ 1095 mp_zero(&t1); 1096 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; 1097 t1.dp[1] = y.dp[t]; 1098 t1.used = 2; 1099 if ((res = multiply_digit(&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1100 goto LBL_Y; 1101 } 1102 1103 /* find right hand */ 1104 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; 1105 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; 1106 t2.dp[2] = x.dp[i]; 1107 t2.used = 3; 1108 } while (compare_magnitude(&t1, &t2) == MP_GT); 1109 1110 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ 1111 if ((res = multiply_digit(&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1112 goto LBL_Y; 1113 } 1114 1115 if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) { 1116 goto LBL_Y; 1117 } 1118 1119 if ((res = signed_subtract(&x, &t1, &x)) != MP_OKAY) { 1120 goto LBL_Y; 1121 } 1122 1123 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ 1124 if (x.sign == MP_NEG) { 1125 if ((res = mp_copy(&y, &t1)) != MP_OKAY) { 1126 goto LBL_Y; 1127 } 1128 if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) { 1129 goto LBL_Y; 1130 } 1131 if ((res = signed_add(&x, &t1, &x)) != MP_OKAY) { 1132 goto LBL_Y; 1133 } 1134 1135 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; 1136 } 1137 } 1138 1139 /* now q is the quotient and x is the remainder 1140 * [which we have to normalize] 1141 */ 1142 1143 /* get sign before writing to c */ 1144 x.sign = x.used == 0 ? MP_ZPOS : a->sign; 1145 1146 if (c != NULL) { 1147 trim_unused_digits(&q); 1148 mp_exch(&q, c); 1149 c->sign = neg; 1150 } 1151 1152 if (d != NULL) { 1153 rshift_bits(&x, norm, &x, NULL); 1154 mp_exch(&x, d); 1155 } 1156 1157 res = MP_OKAY; 1158 1159 LBL_Y: 1160 mp_clear(&y); 1161 LBL_X: 1162 mp_clear(&x); 1163 LBL_T2: 1164 mp_clear(&t2); 1165 LBL_T1: 1166 mp_clear(&t1); 1167 LBL_Q: 1168 mp_clear(&q); 1169 return res; 1170 } 1171 1172 /* c = a mod b, 0 <= c < b */ 1173 static int 1174 modulo(mp_int * a, mp_int * b, mp_int * c) 1175 { 1176 mp_int t; 1177 int res; 1178 1179 if ((res = mp_init(&t)) != MP_OKAY) { 1180 return res; 1181 } 1182 1183 if ((res = signed_divide(NULL, &t, a, b)) != MP_OKAY) { 1184 mp_clear(&t); 1185 return res; 1186 } 1187 1188 if (t.sign != b->sign) { 1189 res = signed_add(b, &t, c); 1190 } else { 1191 res = MP_OKAY; 1192 mp_exch(&t, c); 1193 } 1194 1195 mp_clear(&t); 1196 return res; 1197 } 1198 1199 /* set to a digit */ 1200 static void 1201 set_word(mp_int * a, mp_digit b) 1202 { 1203 mp_zero(a); 1204 a->dp[0] = b & MP_MASK; 1205 a->used = (a->dp[0] != 0) ? 1 : 0; 1206 } 1207 1208 /* b = a/2 */ 1209 static int 1210 half(mp_int * a, mp_int * b) 1211 { 1212 int x, res, oldused; 1213 1214 /* copy */ 1215 if (b->alloc < a->used) { 1216 if ((res = mp_grow(b, a->used)) != MP_OKAY) { 1217 return res; 1218 } 1219 } 1220 1221 oldused = b->used; 1222 b->used = a->used; 1223 { 1224 mp_digit r, rr, *tmpa, *tmpb; 1225 1226 /* source alias */ 1227 tmpa = a->dp + b->used - 1; 1228 1229 /* dest alias */ 1230 tmpb = b->dp + b->used - 1; 1231 1232 /* carry */ 1233 r = 0; 1234 for (x = b->used - 1; x >= 0; x--) { 1235 /* get the carry for the next iteration */ 1236 rr = *tmpa & 1; 1237 1238 /* shift the current digit, add in carry and store */ 1239 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); 1240 1241 /* forward carry to next iteration */ 1242 r = rr; 1243 } 1244 1245 /* zero excess digits */ 1246 tmpb = b->dp + b->used; 1247 for (x = b->used; x < oldused; x++) { 1248 *tmpb++ = 0; 1249 } 1250 } 1251 b->sign = a->sign; 1252 trim_unused_digits(b); 1253 return MP_OKAY; 1254 } 1255 1256 /* compare a digit */ 1257 static int 1258 compare_digit(mp_int * a, mp_digit b) 1259 { 1260 /* compare based on sign */ 1261 if (a->sign == MP_NEG) { 1262 return MP_LT; 1263 } 1264 1265 /* compare based on magnitude */ 1266 if (a->used > 1) { 1267 return MP_GT; 1268 } 1269 1270 /* compare the only digit of a to b */ 1271 if (a->dp[0] > b) { 1272 return MP_GT; 1273 } else if (a->dp[0] < b) { 1274 return MP_LT; 1275 } else { 1276 return MP_EQ; 1277 } 1278 } 1279 1280 static void 1281 mp_clear_multi(mp_int *mp, ...) 1282 { 1283 mp_int* next_mp = mp; 1284 va_list args; 1285 1286 va_start(args, mp); 1287 while (next_mp != NULL) { 1288 mp_clear(next_mp); 1289 next_mp = va_arg(args, mp_int*); 1290 } 1291 va_end(args); 1292 } 1293 1294 /* computes the modular inverse via binary extended euclidean algorithm, 1295 * that is c = 1/a mod b 1296 * 1297 * Based on slow invmod except this is optimized for the case where b is 1298 * odd as per HAC Note 14.64 on pp. 610 1299 */ 1300 static int 1301 fast_modular_inverse(mp_int * a, mp_int * b, mp_int * c) 1302 { 1303 mp_int x, y, u, v, B, D; 1304 int res, neg; 1305 1306 /* 2. [modified] b must be odd */ 1307 if (MP_ISZERO(b) == MP_YES) { 1308 return MP_VAL; 1309 } 1310 1311 /* init all our temps */ 1312 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) { 1313 return res; 1314 } 1315 1316 /* x == modulus, y == value to invert */ 1317 if ((res = mp_copy(b, &x)) != MP_OKAY) { 1318 goto LBL_ERR; 1319 } 1320 1321 /* we need y = |a| */ 1322 if ((res = modulo(a, b, &y)) != MP_OKAY) { 1323 goto LBL_ERR; 1324 } 1325 1326 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1327 if ((res = mp_copy(&x, &u)) != MP_OKAY) { 1328 goto LBL_ERR; 1329 } 1330 if ((res = mp_copy(&y, &v)) != MP_OKAY) { 1331 goto LBL_ERR; 1332 } 1333 set_word(&D, 1); 1334 1335 top: 1336 /* 4. while u is even do */ 1337 while (BN_is_even(&u) == 1) { 1338 /* 4.1 u = u/2 */ 1339 if ((res = half(&u, &u)) != MP_OKAY) { 1340 goto LBL_ERR; 1341 } 1342 /* 4.2 if B is odd then */ 1343 if (BN_is_odd(&B) == 1) { 1344 if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) { 1345 goto LBL_ERR; 1346 } 1347 } 1348 /* B = B/2 */ 1349 if ((res = half(&B, &B)) != MP_OKAY) { 1350 goto LBL_ERR; 1351 } 1352 } 1353 1354 /* 5. while v is even do */ 1355 while (BN_is_even(&v) == 1) { 1356 /* 5.1 v = v/2 */ 1357 if ((res = half(&v, &v)) != MP_OKAY) { 1358 goto LBL_ERR; 1359 } 1360 /* 5.2 if D is odd then */ 1361 if (BN_is_odd(&D) == 1) { 1362 /* D = (D-x)/2 */ 1363 if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) { 1364 goto LBL_ERR; 1365 } 1366 } 1367 /* D = D/2 */ 1368 if ((res = half(&D, &D)) != MP_OKAY) { 1369 goto LBL_ERR; 1370 } 1371 } 1372 1373 /* 6. if u >= v then */ 1374 if (signed_compare(&u, &v) != MP_LT) { 1375 /* u = u - v, B = B - D */ 1376 if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) { 1377 goto LBL_ERR; 1378 } 1379 1380 if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) { 1381 goto LBL_ERR; 1382 } 1383 } else { 1384 /* v - v - u, D = D - B */ 1385 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 1386 goto LBL_ERR; 1387 } 1388 1389 if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) { 1390 goto LBL_ERR; 1391 } 1392 } 1393 1394 /* if not zero goto step 4 */ 1395 if (MP_ISZERO(&u) == MP_NO) { 1396 goto top; 1397 } 1398 1399 /* now a = C, b = D, gcd == g*v */ 1400 1401 /* if v != 1 then there is no inverse */ 1402 if (compare_digit(&v, 1) != MP_EQ) { 1403 res = MP_VAL; 1404 goto LBL_ERR; 1405 } 1406 1407 /* b is now the inverse */ 1408 neg = a->sign; 1409 while (D.sign == MP_NEG) { 1410 if ((res = signed_add(&D, b, &D)) != MP_OKAY) { 1411 goto LBL_ERR; 1412 } 1413 } 1414 mp_exch(&D, c); 1415 c->sign = neg; 1416 res = MP_OKAY; 1417 1418 LBL_ERR: 1419 mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); 1420 return res; 1421 } 1422 1423 /* hac 14.61, pp608 */ 1424 static int 1425 slow_modular_inverse(mp_int * a, mp_int * b, mp_int * c) 1426 { 1427 mp_int x, y, u, v, A, B, C, D; 1428 int res; 1429 1430 /* b cannot be negative */ 1431 if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) { 1432 return MP_VAL; 1433 } 1434 1435 /* init temps */ 1436 if ((res = mp_init_multi(&x, &y, &u, &v, 1437 &A, &B, &C, &D, NULL)) != MP_OKAY) { 1438 return res; 1439 } 1440 1441 /* x = a, y = b */ 1442 if ((res = modulo(a, b, &x)) != MP_OKAY) { 1443 goto LBL_ERR; 1444 } 1445 if ((res = mp_copy(b, &y)) != MP_OKAY) { 1446 goto LBL_ERR; 1447 } 1448 1449 /* 2. [modified] if x,y are both even then return an error! */ 1450 if (BN_is_even(&x) == 1 && BN_is_even(&y) == 1) { 1451 res = MP_VAL; 1452 goto LBL_ERR; 1453 } 1454 1455 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1456 if ((res = mp_copy(&x, &u)) != MP_OKAY) { 1457 goto LBL_ERR; 1458 } 1459 if ((res = mp_copy(&y, &v)) != MP_OKAY) { 1460 goto LBL_ERR; 1461 } 1462 set_word(&A, 1); 1463 set_word(&D, 1); 1464 1465 top: 1466 /* 4. while u is even do */ 1467 while (BN_is_even(&u) == 1) { 1468 /* 4.1 u = u/2 */ 1469 if ((res = half(&u, &u)) != MP_OKAY) { 1470 goto LBL_ERR; 1471 } 1472 /* 4.2 if A or B is odd then */ 1473 if (BN_is_odd(&A) == 1 || BN_is_odd(&B) == 1) { 1474 /* A = (A+y)/2, B = (B-x)/2 */ 1475 if ((res = signed_add(&A, &y, &A)) != MP_OKAY) { 1476 goto LBL_ERR; 1477 } 1478 if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) { 1479 goto LBL_ERR; 1480 } 1481 } 1482 /* A = A/2, B = B/2 */ 1483 if ((res = half(&A, &A)) != MP_OKAY) { 1484 goto LBL_ERR; 1485 } 1486 if ((res = half(&B, &B)) != MP_OKAY) { 1487 goto LBL_ERR; 1488 } 1489 } 1490 1491 /* 5. while v is even do */ 1492 while (BN_is_even(&v) == 1) { 1493 /* 5.1 v = v/2 */ 1494 if ((res = half(&v, &v)) != MP_OKAY) { 1495 goto LBL_ERR; 1496 } 1497 /* 5.2 if C or D is odd then */ 1498 if (BN_is_odd(&C) == 1 || BN_is_odd(&D) == 1) { 1499 /* C = (C+y)/2, D = (D-x)/2 */ 1500 if ((res = signed_add(&C, &y, &C)) != MP_OKAY) { 1501 goto LBL_ERR; 1502 } 1503 if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) { 1504 goto LBL_ERR; 1505 } 1506 } 1507 /* C = C/2, D = D/2 */ 1508 if ((res = half(&C, &C)) != MP_OKAY) { 1509 goto LBL_ERR; 1510 } 1511 if ((res = half(&D, &D)) != MP_OKAY) { 1512 goto LBL_ERR; 1513 } 1514 } 1515 1516 /* 6. if u >= v then */ 1517 if (signed_compare(&u, &v) != MP_LT) { 1518 /* u = u - v, A = A - C, B = B - D */ 1519 if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) { 1520 goto LBL_ERR; 1521 } 1522 1523 if ((res = signed_subtract(&A, &C, &A)) != MP_OKAY) { 1524 goto LBL_ERR; 1525 } 1526 1527 if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) { 1528 goto LBL_ERR; 1529 } 1530 } else { 1531 /* v - v - u, C = C - A, D = D - B */ 1532 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 1533 goto LBL_ERR; 1534 } 1535 1536 if ((res = signed_subtract(&C, &A, &C)) != MP_OKAY) { 1537 goto LBL_ERR; 1538 } 1539 1540 if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) { 1541 goto LBL_ERR; 1542 } 1543 } 1544 1545 /* if not zero goto step 4 */ 1546 if (BN_is_zero(&u) == 0) { 1547 goto top; 1548 } 1549 /* now a = C, b = D, gcd == g*v */ 1550 1551 /* if v != 1 then there is no inverse */ 1552 if (compare_digit(&v, 1) != MP_EQ) { 1553 res = MP_VAL; 1554 goto LBL_ERR; 1555 } 1556 1557 /* if its too low */ 1558 while (compare_digit(&C, 0) == MP_LT) { 1559 if ((res = signed_add(&C, b, &C)) != MP_OKAY) { 1560 goto LBL_ERR; 1561 } 1562 } 1563 1564 /* too big */ 1565 while (compare_magnitude(&C, b) != MP_LT) { 1566 if ((res = signed_subtract(&C, b, &C)) != MP_OKAY) { 1567 goto LBL_ERR; 1568 } 1569 } 1570 1571 /* C is now the inverse */ 1572 mp_exch(&C, c); 1573 res = MP_OKAY; 1574 LBL_ERR: 1575 mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL); 1576 return res; 1577 } 1578 1579 static int 1580 modular_inverse(mp_int *c, mp_int *a, mp_int *b) 1581 { 1582 /* b cannot be negative */ 1583 if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) { 1584 return MP_VAL; 1585 } 1586 1587 /* if the modulus is odd we can use a faster routine instead */ 1588 if (BN_is_odd(b) == 1) { 1589 return fast_modular_inverse(a, b, c); 1590 } 1591 return slow_modular_inverse(a, b, c); 1592 } 1593 1594 /* b = |a| 1595 * 1596 * Simple function copies the input and fixes the sign to positive 1597 */ 1598 static int 1599 absolute(mp_int * a, mp_int * b) 1600 { 1601 int res; 1602 1603 /* copy a to b */ 1604 if (a != b) { 1605 if ((res = mp_copy(a, b)) != MP_OKAY) { 1606 return res; 1607 } 1608 } 1609 1610 /* force the sign of b to positive */ 1611 b->sign = MP_ZPOS; 1612 1613 return MP_OKAY; 1614 } 1615 1616 /* determines if reduce_2k_l can be used */ 1617 static int 1618 mp_reduce_is_2k_l(mp_int *a) 1619 { 1620 int ix, iy; 1621 1622 if (a->used == 0) { 1623 return MP_NO; 1624 } else if (a->used == 1) { 1625 return MP_YES; 1626 } else if (a->used > 1) { 1627 /* if more than half of the digits are -1 we're sold */ 1628 for (iy = ix = 0; ix < a->used; ix++) { 1629 if (a->dp[ix] == MP_MASK) { 1630 ++iy; 1631 } 1632 } 1633 return (iy >= (a->used/2)) ? MP_YES : MP_NO; 1634 1635 } 1636 return MP_NO; 1637 } 1638 1639 /* computes a = 2**b 1640 * 1641 * Simple algorithm which zeroes the int, grows it then just sets one bit 1642 * as required. 1643 */ 1644 static int 1645 mp_2expt(mp_int * a, int b) 1646 { 1647 int res; 1648 1649 /* zero a as per default */ 1650 mp_zero(a); 1651 1652 /* grow a to accommodate the single bit */ 1653 if ((res = mp_grow(a, b / DIGIT_BIT + 1)) != MP_OKAY) { 1654 return res; 1655 } 1656 1657 /* set the used count of where the bit will go */ 1658 a->used = b / DIGIT_BIT + 1; 1659 1660 /* put the single bit in its place */ 1661 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); 1662 1663 return MP_OKAY; 1664 } 1665 1666 /* pre-calculate the value required for Barrett reduction 1667 * For a given modulus "b" it calulates the value required in "a" 1668 */ 1669 static int 1670 mp_reduce_setup(mp_int * a, mp_int * b) 1671 { 1672 int res; 1673 1674 if ((res = mp_2expt(a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { 1675 return res; 1676 } 1677 return signed_divide(a, NULL, a, b); 1678 } 1679 1680 /* b = a*2 */ 1681 static int 1682 doubled(mp_int * a, mp_int * b) 1683 { 1684 int x, res, oldused; 1685 1686 /* grow to accommodate result */ 1687 if (b->alloc < a->used + 1) { 1688 if ((res = mp_grow(b, a->used + 1)) != MP_OKAY) { 1689 return res; 1690 } 1691 } 1692 1693 oldused = b->used; 1694 b->used = a->used; 1695 1696 { 1697 mp_digit r, rr, *tmpa, *tmpb; 1698 1699 /* alias for source */ 1700 tmpa = a->dp; 1701 1702 /* alias for dest */ 1703 tmpb = b->dp; 1704 1705 /* carry */ 1706 r = 0; 1707 for (x = 0; x < a->used; x++) { 1708 1709 /* get what will be the *next* carry bit from the 1710 * MSB of the current digit 1711 */ 1712 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); 1713 1714 /* now shift up this digit, add in the carry [from the previous] */ 1715 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; 1716 1717 /* copy the carry that would be from the source 1718 * digit into the next iteration 1719 */ 1720 r = rr; 1721 } 1722 1723 /* new leading digit? */ 1724 if (r != 0) { 1725 /* add a MSB which is always 1 at this point */ 1726 *tmpb = 1; 1727 ++(b->used); 1728 } 1729 1730 /* now zero any excess digits on the destination 1731 * that we didn't write to 1732 */ 1733 tmpb = b->dp + b->used; 1734 for (x = b->used; x < oldused; x++) { 1735 *tmpb++ = 0; 1736 } 1737 } 1738 b->sign = a->sign; 1739 return MP_OKAY; 1740 } 1741 1742 /* divide by three (based on routine from MPI and the GMP manual) */ 1743 static int 1744 third(mp_int * a, mp_int *c, mp_digit * d) 1745 { 1746 mp_int q; 1747 mp_word w, t; 1748 mp_digit b; 1749 int res, ix; 1750 1751 /* b = 2**DIGIT_BIT / 3 */ 1752 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3); 1753 1754 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 1755 return res; 1756 } 1757 1758 q.used = a->used; 1759 q.sign = a->sign; 1760 w = 0; 1761 for (ix = a->used - 1; ix >= 0; ix--) { 1762 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 1763 1764 if (w >= 3) { 1765 /* multiply w by [1/3] */ 1766 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT); 1767 1768 /* now subtract 3 * [w/3] from w, to get the remainder */ 1769 w -= t+t+t; 1770 1771 /* fixup the remainder as required since 1772 * the optimization is not exact. 1773 */ 1774 while (w >= 3) { 1775 t += 1; 1776 w -= 3; 1777 } 1778 } else { 1779 t = 0; 1780 } 1781 q.dp[ix] = (mp_digit)t; 1782 } 1783 1784 /* [optional] store the remainder */ 1785 if (d != NULL) { 1786 *d = (mp_digit)w; 1787 } 1788 1789 /* [optional] store the quotient */ 1790 if (c != NULL) { 1791 trim_unused_digits(&q); 1792 mp_exch(&q, c); 1793 } 1794 mp_clear(&q); 1795 1796 return res; 1797 } 1798 1799 /* multiplication using the Toom-Cook 3-way algorithm 1800 * 1801 * Much more complicated than Karatsuba but has a lower 1802 * asymptotic running time of O(N**1.464). This algorithm is 1803 * only particularly useful on VERY large inputs 1804 * (we're talking 1000s of digits here...). 1805 */ 1806 static int 1807 toom_cook_multiply(mp_int *a, mp_int *b, mp_int *c) 1808 { 1809 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; 1810 int res, B; 1811 1812 /* init temps */ 1813 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 1814 &a0, &a1, &a2, &b0, &b1, 1815 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { 1816 return res; 1817 } 1818 1819 /* B */ 1820 B = MIN(a->used, b->used) / 3; 1821 1822 /* a = a2 * B**2 + a1 * B + a0 */ 1823 if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 1824 goto ERR; 1825 } 1826 1827 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 1828 goto ERR; 1829 } 1830 rshift_digits(&a1, B); 1831 modulo_2_to_power(&a1, DIGIT_BIT * B, &a1); 1832 1833 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 1834 goto ERR; 1835 } 1836 rshift_digits(&a2, B*2); 1837 1838 /* b = b2 * B**2 + b1 * B + b0 */ 1839 if ((res = modulo_2_to_power(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { 1840 goto ERR; 1841 } 1842 1843 if ((res = mp_copy(b, &b1)) != MP_OKAY) { 1844 goto ERR; 1845 } 1846 rshift_digits(&b1, B); 1847 modulo_2_to_power(&b1, DIGIT_BIT * B, &b1); 1848 1849 if ((res = mp_copy(b, &b2)) != MP_OKAY) { 1850 goto ERR; 1851 } 1852 rshift_digits(&b2, B*2); 1853 1854 /* w0 = a0*b0 */ 1855 if ((res = signed_multiply(&a0, &b0, &w0)) != MP_OKAY) { 1856 goto ERR; 1857 } 1858 1859 /* w4 = a2 * b2 */ 1860 if ((res = signed_multiply(&a2, &b2, &w4)) != MP_OKAY) { 1861 goto ERR; 1862 } 1863 1864 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ 1865 if ((res = doubled(&a0, &tmp1)) != MP_OKAY) { 1866 goto ERR; 1867 } 1868 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1869 goto ERR; 1870 } 1871 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 1872 goto ERR; 1873 } 1874 if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 1875 goto ERR; 1876 } 1877 1878 if ((res = doubled(&b0, &tmp2)) != MP_OKAY) { 1879 goto ERR; 1880 } 1881 if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1882 goto ERR; 1883 } 1884 if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) { 1885 goto ERR; 1886 } 1887 if ((res = signed_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { 1888 goto ERR; 1889 } 1890 1891 if ((res = signed_multiply(&tmp1, &tmp2, &w1)) != MP_OKAY) { 1892 goto ERR; 1893 } 1894 1895 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ 1896 if ((res = doubled(&a2, &tmp1)) != MP_OKAY) { 1897 goto ERR; 1898 } 1899 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1900 goto ERR; 1901 } 1902 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 1903 goto ERR; 1904 } 1905 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1906 goto ERR; 1907 } 1908 1909 if ((res = doubled(&b2, &tmp2)) != MP_OKAY) { 1910 goto ERR; 1911 } 1912 if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1913 goto ERR; 1914 } 1915 if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) { 1916 goto ERR; 1917 } 1918 if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1919 goto ERR; 1920 } 1921 1922 if ((res = signed_multiply(&tmp1, &tmp2, &w3)) != MP_OKAY) { 1923 goto ERR; 1924 } 1925 1926 1927 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ 1928 if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) { 1929 goto ERR; 1930 } 1931 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1932 goto ERR; 1933 } 1934 if ((res = signed_add(&b2, &b1, &tmp2)) != MP_OKAY) { 1935 goto ERR; 1936 } 1937 if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1938 goto ERR; 1939 } 1940 if ((res = signed_multiply(&tmp1, &tmp2, &w2)) != MP_OKAY) { 1941 goto ERR; 1942 } 1943 1944 /* now solve the matrix 1945 1946 0 0 0 0 1 1947 1 2 4 8 16 1948 1 1 1 1 1 1949 16 8 4 2 1 1950 1 0 0 0 0 1951 1952 using 12 subtractions, 4 shifts, 1953 2 small divisions and 1 small multiplication 1954 */ 1955 1956 /* r1 - r4 */ 1957 if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) { 1958 goto ERR; 1959 } 1960 /* r3 - r0 */ 1961 if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) { 1962 goto ERR; 1963 } 1964 /* r1/2 */ 1965 if ((res = half(&w1, &w1)) != MP_OKAY) { 1966 goto ERR; 1967 } 1968 /* r3/2 */ 1969 if ((res = half(&w3, &w3)) != MP_OKAY) { 1970 goto ERR; 1971 } 1972 /* r2 - r0 - r4 */ 1973 if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) { 1974 goto ERR; 1975 } 1976 if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) { 1977 goto ERR; 1978 } 1979 /* r1 - r2 */ 1980 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 1981 goto ERR; 1982 } 1983 /* r3 - r2 */ 1984 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 1985 goto ERR; 1986 } 1987 /* r1 - 8r0 */ 1988 if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) { 1989 goto ERR; 1990 } 1991 if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) { 1992 goto ERR; 1993 } 1994 /* r3 - 8r4 */ 1995 if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) { 1996 goto ERR; 1997 } 1998 if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) { 1999 goto ERR; 2000 } 2001 /* 3r2 - r1 - r3 */ 2002 if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) { 2003 goto ERR; 2004 } 2005 if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) { 2006 goto ERR; 2007 } 2008 if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) { 2009 goto ERR; 2010 } 2011 /* r1 - r2 */ 2012 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2013 goto ERR; 2014 } 2015 /* r3 - r2 */ 2016 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2017 goto ERR; 2018 } 2019 /* r1/3 */ 2020 if ((res = third(&w1, &w1, NULL)) != MP_OKAY) { 2021 goto ERR; 2022 } 2023 /* r3/3 */ 2024 if ((res = third(&w3, &w3, NULL)) != MP_OKAY) { 2025 goto ERR; 2026 } 2027 2028 /* at this point shift W[n] by B*n */ 2029 if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) { 2030 goto ERR; 2031 } 2032 if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) { 2033 goto ERR; 2034 } 2035 if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) { 2036 goto ERR; 2037 } 2038 if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) { 2039 goto ERR; 2040 } 2041 2042 if ((res = signed_add(&w0, &w1, c)) != MP_OKAY) { 2043 goto ERR; 2044 } 2045 if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2046 goto ERR; 2047 } 2048 if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2049 goto ERR; 2050 } 2051 if ((res = signed_add(&tmp1, c, c)) != MP_OKAY) { 2052 goto ERR; 2053 } 2054 2055 ERR: 2056 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 2057 &a0, &a1, &a2, &b0, &b1, 2058 &b2, &tmp1, &tmp2, NULL); 2059 return res; 2060 } 2061 2062 #define TOOM_MUL_CUTOFF 350 2063 #define KARATSUBA_MUL_CUTOFF 80 2064 2065 /* c = |a| * |b| using Karatsuba Multiplication using 2066 * three half size multiplications 2067 * 2068 * Let B represent the radix [e.g. 2**DIGIT_BIT] and 2069 * let n represent half of the number of digits in 2070 * the min(a,b) 2071 * 2072 * a = a1 * B**n + a0 2073 * b = b1 * B**n + b0 2074 * 2075 * Then, a * b => 2076 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 2077 * 2078 * Note that a1b1 and a0b0 are used twice and only need to be 2079 * computed once. So in total three half size (half # of 2080 * digit) multiplications are performed, a0b0, a1b1 and 2081 * (a1+b1)(a0+b0) 2082 * 2083 * Note that a multiplication of half the digits requires 2084 * 1/4th the number of single precision multiplications so in 2085 * total after one call 25% of the single precision multiplications 2086 * are saved. Note also that the call to signed_multiply can end up back 2087 * in this function if the a0, a1, b0, or b1 are above the threshold. 2088 * This is known as divide-and-conquer and leads to the famous 2089 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 2090 * the standard O(N**2) that the baseline/comba methods use. 2091 * Generally though the overhead of this method doesn't pay off 2092 * until a certain size (N ~ 80) is reached. 2093 */ 2094 static int 2095 karatsuba_multiply(mp_int * a, mp_int * b, mp_int * c) 2096 { 2097 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; 2098 int B; 2099 int err; 2100 2101 /* default the return code to an error */ 2102 err = MP_MEM; 2103 2104 /* min # of digits */ 2105 B = MIN(a->used, b->used); 2106 2107 /* now divide in two */ 2108 B = (int)((unsigned)B >> 1); 2109 2110 /* init copy all the temps */ 2111 if (mp_init_size(&x0, B) != MP_OKAY) { 2112 goto ERR; 2113 } 2114 if (mp_init_size(&x1, a->used - B) != MP_OKAY) { 2115 goto X0; 2116 } 2117 if (mp_init_size(&y0, B) != MP_OKAY) { 2118 goto X1; 2119 } 2120 if (mp_init_size(&y1, b->used - B) != MP_OKAY) { 2121 goto Y0; 2122 } 2123 /* init temps */ 2124 if (mp_init_size(&t1, B * 2) != MP_OKAY) { 2125 goto Y1; 2126 } 2127 if (mp_init_size(&x0y0, B * 2) != MP_OKAY) { 2128 goto T1; 2129 } 2130 if (mp_init_size(&x1y1, B * 2) != MP_OKAY) { 2131 goto X0Y0; 2132 } 2133 /* now shift the digits */ 2134 x0.used = y0.used = B; 2135 x1.used = a->used - B; 2136 y1.used = b->used - B; 2137 2138 { 2139 int x; 2140 mp_digit *tmpa, *tmpb, *tmpx, *tmpy; 2141 2142 /* we copy the digits directly instead of using higher level functions 2143 * since we also need to shift the digits 2144 */ 2145 tmpa = a->dp; 2146 tmpb = b->dp; 2147 2148 tmpx = x0.dp; 2149 tmpy = y0.dp; 2150 for (x = 0; x < B; x++) { 2151 *tmpx++ = *tmpa++; 2152 *tmpy++ = *tmpb++; 2153 } 2154 2155 tmpx = x1.dp; 2156 for (x = B; x < a->used; x++) { 2157 *tmpx++ = *tmpa++; 2158 } 2159 2160 tmpy = y1.dp; 2161 for (x = B; x < b->used; x++) { 2162 *tmpy++ = *tmpb++; 2163 } 2164 } 2165 2166 /* only need to clamp the lower words since by definition the 2167 * upper words x1/y1 must have a known number of digits 2168 */ 2169 trim_unused_digits(&x0); 2170 trim_unused_digits(&y0); 2171 2172 /* now calc the products x0y0 and x1y1 */ 2173 /* after this x0 is no longer required, free temp [x0==t2]! */ 2174 if (signed_multiply(&x0, &y0, &x0y0) != MP_OKAY) { 2175 goto X1Y1; /* x0y0 = x0*y0 */ 2176 } 2177 if (signed_multiply(&x1, &y1, &x1y1) != MP_OKAY) { 2178 goto X1Y1; /* x1y1 = x1*y1 */ 2179 } 2180 /* now calc x1+x0 and y1+y0 */ 2181 if (basic_add(&x1, &x0, &t1) != MP_OKAY) { 2182 goto X1Y1; /* t1 = x1 - x0 */ 2183 } 2184 if (basic_add(&y1, &y0, &x0) != MP_OKAY) { 2185 goto X1Y1; /* t2 = y1 - y0 */ 2186 } 2187 if (signed_multiply(&t1, &x0, &t1) != MP_OKAY) { 2188 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ 2189 } 2190 /* add x0y0 */ 2191 if (signed_add(&x0y0, &x1y1, &x0) != MP_OKAY) { 2192 goto X1Y1; /* t2 = x0y0 + x1y1 */ 2193 } 2194 if (basic_subtract(&t1, &x0, &t1) != MP_OKAY) { 2195 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ 2196 } 2197 /* shift by B */ 2198 if (lshift_digits(&t1, B) != MP_OKAY) { 2199 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ 2200 } 2201 if (lshift_digits(&x1y1, B * 2) != MP_OKAY) { 2202 goto X1Y1; /* x1y1 = x1y1 << 2*B */ 2203 } 2204 if (signed_add(&x0y0, &t1, &t1) != MP_OKAY) { 2205 goto X1Y1; /* t1 = x0y0 + t1 */ 2206 } 2207 if (signed_add(&t1, &x1y1, c) != MP_OKAY) { 2208 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ 2209 } 2210 /* Algorithm succeeded set the return code to MP_OKAY */ 2211 err = MP_OKAY; 2212 2213 X1Y1: 2214 mp_clear(&x1y1); 2215 X0Y0: 2216 mp_clear(&x0y0); 2217 T1: 2218 mp_clear(&t1); 2219 Y1: 2220 mp_clear(&y1); 2221 Y0: 2222 mp_clear(&y0); 2223 X1: 2224 mp_clear(&x1); 2225 X0: 2226 mp_clear(&x0); 2227 ERR: 2228 return err; 2229 } 2230 2231 /* Fast (comba) multiplier 2232 * 2233 * This is the fast column-array [comba] multiplier. It is 2234 * designed to compute the columns of the product first 2235 * then handle the carries afterwards. This has the effect 2236 * of making the nested loops that compute the columns very 2237 * simple and schedulable on super-scalar processors. 2238 * 2239 * This has been modified to produce a variable number of 2240 * digits of output so if say only a half-product is required 2241 * you don't have to compute the upper half (a feature 2242 * required for fast Barrett reduction). 2243 * 2244 * Based on Algorithm 14.12 on pp.595 of HAC. 2245 * 2246 */ 2247 static int 2248 fast_col_array_multiply(mp_int * a, mp_int * b, mp_int * c, int digs) 2249 { 2250 int olduse, res, pa, ix, iz; 2251 /*LINTED*/ 2252 mp_digit W[MP_WARRAY]; 2253 mp_word _W; 2254 2255 /* grow the destination as required */ 2256 if (c->alloc < digs) { 2257 if ((res = mp_grow(c, digs)) != MP_OKAY) { 2258 return res; 2259 } 2260 } 2261 2262 /* number of output digits to produce */ 2263 pa = MIN(digs, a->used + b->used); 2264 2265 /* clear the carry */ 2266 _W = 0; 2267 for (ix = 0; ix < pa; ix++) { 2268 int tx, ty; 2269 int iy; 2270 mp_digit *tmpx, *tmpy; 2271 2272 /* get offsets into the two bignums */ 2273 ty = MIN(b->used-1, ix); 2274 tx = ix - ty; 2275 2276 /* setup temp aliases */ 2277 tmpx = a->dp + tx; 2278 tmpy = b->dp + ty; 2279 2280 /* this is the number of times the loop will iterrate, essentially 2281 while (tx++ < a->used && ty-- >= 0) { ... } 2282 */ 2283 iy = MIN(a->used-tx, ty+1); 2284 2285 /* execute loop */ 2286 for (iz = 0; iz < iy; ++iz) { 2287 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2288 2289 } 2290 2291 /* store term */ 2292 W[ix] = ((mp_digit)_W) & MP_MASK; 2293 2294 /* make next carry */ 2295 _W = _W >> ((mp_word)DIGIT_BIT); 2296 } 2297 2298 /* setup dest */ 2299 olduse = c->used; 2300 c->used = pa; 2301 2302 { 2303 mp_digit *tmpc; 2304 tmpc = c->dp; 2305 for (ix = 0; ix < pa+1; ix++) { 2306 /* now extract the previous digit [below the carry] */ 2307 *tmpc++ = (ix < pa) ? W[ix] : 0; 2308 } 2309 2310 /* clear unused digits [that existed in the old copy of c] */ 2311 for (; ix < olduse; ix++) { 2312 *tmpc++ = 0; 2313 } 2314 } 2315 trim_unused_digits(c); 2316 return MP_OKAY; 2317 } 2318 2319 /* return 1 if we can use fast column array multiply */ 2320 /* 2321 * The fast multiplier can be used if the output will 2322 * have less than MP_WARRAY digits and the number of 2323 * digits won't affect carry propagation 2324 */ 2325 static inline int 2326 can_use_fast_column_array(int ndigits, int used) 2327 { 2328 return (((unsigned)ndigits < MP_WARRAY) && 2329 used < (1 << (unsigned)((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT)))); 2330 } 2331 2332 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */ 2333 /* Revision: 1.2 $ */ 2334 /* Date: 2011/03/18 16:22:09 $ */ 2335 2336 2337 /* multiplies |a| * |b| and only computes upto digs digits of result 2338 * HAC pp. 595, Algorithm 14.12 Modified so you can control how 2339 * many digits of output are created. 2340 */ 2341 static int 2342 basic_multiply_partial_lower(mp_int * a, mp_int * b, mp_int * c, int digs) 2343 { 2344 mp_int t; 2345 int res, pa, pb, ix, iy; 2346 mp_digit u; 2347 mp_word r; 2348 mp_digit tmpx, *tmpt, *tmpy; 2349 2350 /* can we use the fast multiplier? */ 2351 if (can_use_fast_column_array(digs, MIN(a->used, b->used))) { 2352 return fast_col_array_multiply(a, b, c, digs); 2353 } 2354 2355 if ((res = mp_init_size(&t, digs)) != MP_OKAY) { 2356 return res; 2357 } 2358 t.used = digs; 2359 2360 /* compute the digits of the product directly */ 2361 pa = a->used; 2362 for (ix = 0; ix < pa; ix++) { 2363 /* set the carry to zero */ 2364 u = 0; 2365 2366 /* limit ourselves to making digs digits of output */ 2367 pb = MIN(b->used, digs - ix); 2368 2369 /* setup some aliases */ 2370 /* copy of the digit from a used within the nested loop */ 2371 tmpx = a->dp[ix]; 2372 2373 /* an alias for the destination shifted ix places */ 2374 tmpt = t.dp + ix; 2375 2376 /* an alias for the digits of b */ 2377 tmpy = b->dp; 2378 2379 /* compute the columns of the output and propagate the carry */ 2380 for (iy = 0; iy < pb; iy++) { 2381 /* compute the column as a mp_word */ 2382 r = ((mp_word)*tmpt) + 2383 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2384 ((mp_word) u); 2385 2386 /* the new column is the lower part of the result */ 2387 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2388 2389 /* get the carry word from the result */ 2390 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2391 } 2392 /* set carry if it is placed below digs */ 2393 if (ix + iy < digs) { 2394 *tmpt = u; 2395 } 2396 } 2397 2398 trim_unused_digits(&t); 2399 mp_exch(&t, c); 2400 2401 mp_clear(&t); 2402 return MP_OKAY; 2403 } 2404 2405 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */ 2406 /* Revision: 1.3 $ */ 2407 /* Date: 2011/03/18 16:43:04 $ */ 2408 2409 /* high level multiplication (handles sign) */ 2410 static int 2411 signed_multiply(mp_int * a, mp_int * b, mp_int * c) 2412 { 2413 int res, neg; 2414 2415 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 2416 /* use Toom-Cook? */ 2417 if (MIN(a->used, b->used) >= TOOM_MUL_CUTOFF) { 2418 res = toom_cook_multiply(a, b, c); 2419 } else if (MIN(a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { 2420 /* use Karatsuba? */ 2421 res = karatsuba_multiply(a, b, c); 2422 } else { 2423 /* can we use the fast multiplier? */ 2424 int digs = a->used + b->used + 1; 2425 2426 if (can_use_fast_column_array(digs, MIN(a->used, b->used))) { 2427 res = fast_col_array_multiply(a, b, c, digs); 2428 } else { 2429 res = basic_multiply_partial_lower(a, b, c, (a)->used + (b)->used + 1); 2430 } 2431 } 2432 c->sign = (c->used > 0) ? neg : MP_ZPOS; 2433 return res; 2434 } 2435 2436 /* this is a modified version of fast_s_mul_digs that only produces 2437 * output digits *above* digs. See the comments for fast_s_mul_digs 2438 * to see how it works. 2439 * 2440 * This is used in the Barrett reduction since for one of the multiplications 2441 * only the higher digits were needed. This essentially halves the work. 2442 * 2443 * Based on Algorithm 14.12 on pp.595 of HAC. 2444 */ 2445 static int 2446 fast_basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs) 2447 { 2448 int olduse, res, pa, ix, iz; 2449 mp_digit W[MP_WARRAY]; 2450 mp_word _W; 2451 2452 /* grow the destination as required */ 2453 pa = a->used + b->used; 2454 if (c->alloc < pa) { 2455 if ((res = mp_grow(c, pa)) != MP_OKAY) { 2456 return res; 2457 } 2458 } 2459 2460 /* number of output digits to produce */ 2461 pa = a->used + b->used; 2462 _W = 0; 2463 for (ix = digs; ix < pa; ix++) { 2464 int tx, ty, iy; 2465 mp_digit *tmpx, *tmpy; 2466 2467 /* get offsets into the two bignums */ 2468 ty = MIN(b->used-1, ix); 2469 tx = ix - ty; 2470 2471 /* setup temp aliases */ 2472 tmpx = a->dp + tx; 2473 tmpy = b->dp + ty; 2474 2475 /* this is the number of times the loop will iterrate, essentially its 2476 while (tx++ < a->used && ty-- >= 0) { ... } 2477 */ 2478 iy = MIN(a->used-tx, ty+1); 2479 2480 /* execute loop */ 2481 for (iz = 0; iz < iy; iz++) { 2482 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2483 } 2484 2485 /* store term */ 2486 W[ix] = ((mp_digit)_W) & MP_MASK; 2487 2488 /* make next carry */ 2489 _W = _W >> ((mp_word)DIGIT_BIT); 2490 } 2491 2492 /* setup dest */ 2493 olduse = c->used; 2494 c->used = pa; 2495 2496 { 2497 mp_digit *tmpc; 2498 2499 tmpc = c->dp + digs; 2500 for (ix = digs; ix < pa; ix++) { 2501 /* now extract the previous digit [below the carry] */ 2502 *tmpc++ = W[ix]; 2503 } 2504 2505 /* clear unused digits [that existed in the old copy of c] */ 2506 for (; ix < olduse; ix++) { 2507 *tmpc++ = 0; 2508 } 2509 } 2510 trim_unused_digits(c); 2511 return MP_OKAY; 2512 } 2513 2514 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */ 2515 /* Revision: 1.1.1.1 $ */ 2516 /* Date: 2011/03/12 22:58:18 $ */ 2517 2518 /* multiplies |a| * |b| and does not compute the lower digs digits 2519 * [meant to get the higher part of the product] 2520 */ 2521 static int 2522 basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs) 2523 { 2524 mp_int t; 2525 int res, pa, pb, ix, iy; 2526 mp_digit carry; 2527 mp_word r; 2528 mp_digit tmpx, *tmpt, *tmpy; 2529 2530 /* can we use the fast multiplier? */ 2531 if (can_use_fast_column_array(a->used + b->used + 1, MIN(a->used, b->used))) { 2532 return fast_basic_multiply_partial_upper(a, b, c, digs); 2533 } 2534 2535 if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) { 2536 return res; 2537 } 2538 t.used = a->used + b->used + 1; 2539 2540 pa = a->used; 2541 pb = b->used; 2542 for (ix = 0; ix < pa; ix++) { 2543 /* clear the carry */ 2544 carry = 0; 2545 2546 /* left hand side of A[ix] * B[iy] */ 2547 tmpx = a->dp[ix]; 2548 2549 /* alias to the address of where the digits will be stored */ 2550 tmpt = &(t.dp[digs]); 2551 2552 /* alias for where to read the right hand side from */ 2553 tmpy = b->dp + (digs - ix); 2554 2555 for (iy = digs - ix; iy < pb; iy++) { 2556 /* calculate the double precision result */ 2557 r = ((mp_word)*tmpt) + 2558 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2559 ((mp_word) carry); 2560 2561 /* get the lower part */ 2562 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2563 2564 /* carry the carry */ 2565 carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2566 } 2567 *tmpt = carry; 2568 } 2569 trim_unused_digits(&t); 2570 mp_exch(&t, c); 2571 mp_clear(&t); 2572 return MP_OKAY; 2573 } 2574 2575 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */ 2576 /* Revision: 1.3 $ */ 2577 /* Date: 2011/03/18 16:43:04 $ */ 2578 2579 /* reduces x mod m, assumes 0 < x < m**2, mu is 2580 * precomputed via mp_reduce_setup. 2581 * From HAC pp.604 Algorithm 14.42 2582 */ 2583 static int 2584 mp_reduce(mp_int * x, mp_int * m, mp_int * mu) 2585 { 2586 mp_int q; 2587 int res, um = m->used; 2588 2589 /* q = x */ 2590 if ((res = mp_init_copy(&q, x)) != MP_OKAY) { 2591 return res; 2592 } 2593 2594 /* q1 = x / b**(k-1) */ 2595 rshift_digits(&q, um - 1); 2596 2597 /* according to HAC this optimization is ok */ 2598 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { 2599 if ((res = signed_multiply(&q, mu, &q)) != MP_OKAY) { 2600 goto CLEANUP; 2601 } 2602 } else { 2603 if ((res = basic_multiply_partial_upper(&q, mu, &q, um)) != MP_OKAY) { 2604 goto CLEANUP; 2605 } 2606 } 2607 2608 /* q3 = q2 / b**(k+1) */ 2609 rshift_digits(&q, um + 1); 2610 2611 /* x = x mod b**(k+1), quick (no division) */ 2612 if ((res = modulo_2_to_power(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { 2613 goto CLEANUP; 2614 } 2615 2616 /* q = q * m mod b**(k+1), quick (no division) */ 2617 if ((res = basic_multiply_partial_lower(&q, m, &q, um + 1)) != MP_OKAY) { 2618 goto CLEANUP; 2619 } 2620 2621 /* x = x - q */ 2622 if ((res = signed_subtract(x, &q, x)) != MP_OKAY) { 2623 goto CLEANUP; 2624 } 2625 2626 /* If x < 0, add b**(k+1) to it */ 2627 if (compare_digit(x, 0) == MP_LT) { 2628 set_word(&q, 1); 2629 if ((res = lshift_digits(&q, um + 1)) != MP_OKAY) { 2630 goto CLEANUP; 2631 } 2632 if ((res = signed_add(x, &q, x)) != MP_OKAY) { 2633 goto CLEANUP; 2634 } 2635 } 2636 2637 /* Back off if it's too big */ 2638 while (signed_compare(x, m) != MP_LT) { 2639 if ((res = basic_subtract(x, m, x)) != MP_OKAY) { 2640 goto CLEANUP; 2641 } 2642 } 2643 2644 CLEANUP: 2645 mp_clear(&q); 2646 2647 return res; 2648 } 2649 2650 /* determines the setup value */ 2651 static int 2652 mp_reduce_2k_setup_l(mp_int *a, mp_int *d) 2653 { 2654 int res; 2655 mp_int tmp; 2656 2657 if ((res = mp_init(&tmp)) != MP_OKAY) { 2658 return res; 2659 } 2660 2661 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { 2662 goto ERR; 2663 } 2664 2665 if ((res = basic_subtract(&tmp, a, d)) != MP_OKAY) { 2666 goto ERR; 2667 } 2668 2669 ERR: 2670 mp_clear(&tmp); 2671 return res; 2672 } 2673 2674 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */ 2675 /* Revision: 1.1.1.1 $ */ 2676 /* Date: 2011/03/12 22:58:18 $ */ 2677 2678 /* reduces a modulo n where n is of the form 2**p - d 2679 This differs from reduce_2k since "d" can be larger 2680 than a single digit. 2681 */ 2682 static int 2683 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) 2684 { 2685 mp_int q; 2686 int p, res; 2687 2688 if ((res = mp_init(&q)) != MP_OKAY) { 2689 return res; 2690 } 2691 2692 p = mp_count_bits(n); 2693 top: 2694 /* q = a/2**p, a = a mod 2**p */ 2695 if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) { 2696 goto ERR; 2697 } 2698 2699 /* q = q * d */ 2700 if ((res = signed_multiply(&q, d, &q)) != MP_OKAY) { 2701 goto ERR; 2702 } 2703 2704 /* a = a + q */ 2705 if ((res = basic_add(a, &q, a)) != MP_OKAY) { 2706 goto ERR; 2707 } 2708 2709 if (compare_magnitude(a, n) != MP_LT) { 2710 basic_subtract(a, n, a); 2711 goto top; 2712 } 2713 2714 ERR: 2715 mp_clear(&q); 2716 return res; 2717 } 2718 2719 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */ 2720 /* Revision: 1.1.1.1 $ */ 2721 /* Date: 2011/03/12 22:58:18 $ */ 2722 2723 /* squaring using Toom-Cook 3-way algorithm */ 2724 static int 2725 toom_cook_square(mp_int *a, mp_int *b) 2726 { 2727 mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; 2728 int res, B; 2729 2730 /* init temps */ 2731 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { 2732 return res; 2733 } 2734 2735 /* B */ 2736 B = a->used / 3; 2737 2738 /* a = a2 * B**2 + a1 * B + a0 */ 2739 if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 2740 goto ERR; 2741 } 2742 2743 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 2744 goto ERR; 2745 } 2746 rshift_digits(&a1, B); 2747 modulo_2_to_power(&a1, DIGIT_BIT * B, &a1); 2748 2749 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 2750 goto ERR; 2751 } 2752 rshift_digits(&a2, B*2); 2753 2754 /* w0 = a0*a0 */ 2755 if ((res = square(&a0, &w0)) != MP_OKAY) { 2756 goto ERR; 2757 } 2758 2759 /* w4 = a2 * a2 */ 2760 if ((res = square(&a2, &w4)) != MP_OKAY) { 2761 goto ERR; 2762 } 2763 2764 /* w1 = (a2 + 2(a1 + 2a0))**2 */ 2765 if ((res = doubled(&a0, &tmp1)) != MP_OKAY) { 2766 goto ERR; 2767 } 2768 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2769 goto ERR; 2770 } 2771 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 2772 goto ERR; 2773 } 2774 if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 2775 goto ERR; 2776 } 2777 2778 if ((res = square(&tmp1, &w1)) != MP_OKAY) { 2779 goto ERR; 2780 } 2781 2782 /* w3 = (a0 + 2(a1 + 2a2))**2 */ 2783 if ((res = doubled(&a2, &tmp1)) != MP_OKAY) { 2784 goto ERR; 2785 } 2786 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2787 goto ERR; 2788 } 2789 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 2790 goto ERR; 2791 } 2792 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2793 goto ERR; 2794 } 2795 2796 if ((res = square(&tmp1, &w3)) != MP_OKAY) { 2797 goto ERR; 2798 } 2799 2800 2801 /* w2 = (a2 + a1 + a0)**2 */ 2802 if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) { 2803 goto ERR; 2804 } 2805 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2806 goto ERR; 2807 } 2808 if ((res = square(&tmp1, &w2)) != MP_OKAY) { 2809 goto ERR; 2810 } 2811 2812 /* now solve the matrix 2813 2814 0 0 0 0 1 2815 1 2 4 8 16 2816 1 1 1 1 1 2817 16 8 4 2 1 2818 1 0 0 0 0 2819 2820 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. 2821 */ 2822 2823 /* r1 - r4 */ 2824 if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) { 2825 goto ERR; 2826 } 2827 /* r3 - r0 */ 2828 if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) { 2829 goto ERR; 2830 } 2831 /* r1/2 */ 2832 if ((res = half(&w1, &w1)) != MP_OKAY) { 2833 goto ERR; 2834 } 2835 /* r3/2 */ 2836 if ((res = half(&w3, &w3)) != MP_OKAY) { 2837 goto ERR; 2838 } 2839 /* r2 - r0 - r4 */ 2840 if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) { 2841 goto ERR; 2842 } 2843 if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) { 2844 goto ERR; 2845 } 2846 /* r1 - r2 */ 2847 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2848 goto ERR; 2849 } 2850 /* r3 - r2 */ 2851 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2852 goto ERR; 2853 } 2854 /* r1 - 8r0 */ 2855 if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) { 2856 goto ERR; 2857 } 2858 if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) { 2859 goto ERR; 2860 } 2861 /* r3 - 8r4 */ 2862 if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) { 2863 goto ERR; 2864 } 2865 if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) { 2866 goto ERR; 2867 } 2868 /* 3r2 - r1 - r3 */ 2869 if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) { 2870 goto ERR; 2871 } 2872 if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) { 2873 goto ERR; 2874 } 2875 if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) { 2876 goto ERR; 2877 } 2878 /* r1 - r2 */ 2879 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2880 goto ERR; 2881 } 2882 /* r3 - r2 */ 2883 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2884 goto ERR; 2885 } 2886 /* r1/3 */ 2887 if ((res = third(&w1, &w1, NULL)) != MP_OKAY) { 2888 goto ERR; 2889 } 2890 /* r3/3 */ 2891 if ((res = third(&w3, &w3, NULL)) != MP_OKAY) { 2892 goto ERR; 2893 } 2894 2895 /* at this point shift W[n] by B*n */ 2896 if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) { 2897 goto ERR; 2898 } 2899 if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) { 2900 goto ERR; 2901 } 2902 if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) { 2903 goto ERR; 2904 } 2905 if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) { 2906 goto ERR; 2907 } 2908 2909 if ((res = signed_add(&w0, &w1, b)) != MP_OKAY) { 2910 goto ERR; 2911 } 2912 if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2913 goto ERR; 2914 } 2915 if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2916 goto ERR; 2917 } 2918 if ((res = signed_add(&tmp1, b, b)) != MP_OKAY) { 2919 goto ERR; 2920 } 2921 2922 ERR: 2923 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); 2924 return res; 2925 } 2926 2927 2928 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */ 2929 /* Revision: 1.1.1.1 $ */ 2930 /* Date: 2011/03/12 22:58:18 $ */ 2931 2932 /* Karatsuba squaring, computes b = a*a using three 2933 * half size squarings 2934 * 2935 * See comments of karatsuba_mul for details. It 2936 * is essentially the same algorithm but merely 2937 * tuned to perform recursive squarings. 2938 */ 2939 static int 2940 karatsuba_square(mp_int * a, mp_int * b) 2941 { 2942 mp_int x0, x1, t1, t2, x0x0, x1x1; 2943 int B, err; 2944 2945 err = MP_MEM; 2946 2947 /* min # of digits */ 2948 B = a->used; 2949 2950 /* now divide in two */ 2951 B = (unsigned)B >> 1; 2952 2953 /* init copy all the temps */ 2954 if (mp_init_size(&x0, B) != MP_OKAY) { 2955 goto ERR; 2956 } 2957 if (mp_init_size(&x1, a->used - B) != MP_OKAY) { 2958 goto X0; 2959 } 2960 /* init temps */ 2961 if (mp_init_size(&t1, a->used * 2) != MP_OKAY) { 2962 goto X1; 2963 } 2964 if (mp_init_size(&t2, a->used * 2) != MP_OKAY) { 2965 goto T1; 2966 } 2967 if (mp_init_size(&x0x0, B * 2) != MP_OKAY) { 2968 goto T2; 2969 } 2970 if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) { 2971 goto X0X0; 2972 } 2973 2974 memcpy(x0.dp, a->dp, B * sizeof(*x0.dp)); 2975 memcpy(x1.dp, &a->dp[B], (a->used - B) * sizeof(*x1.dp)); 2976 2977 x0.used = B; 2978 x1.used = a->used - B; 2979 2980 trim_unused_digits(&x0); 2981 2982 /* now calc the products x0*x0 and x1*x1 */ 2983 if (square(&x0, &x0x0) != MP_OKAY) { 2984 goto X1X1; /* x0x0 = x0*x0 */ 2985 } 2986 if (square(&x1, &x1x1) != MP_OKAY) { 2987 goto X1X1; /* x1x1 = x1*x1 */ 2988 } 2989 /* now calc (x1+x0)**2 */ 2990 if (basic_add(&x1, &x0, &t1) != MP_OKAY) { 2991 goto X1X1; /* t1 = x1 - x0 */ 2992 } 2993 if (square(&t1, &t1) != MP_OKAY) { 2994 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ 2995 } 2996 /* add x0y0 */ 2997 if (basic_add(&x0x0, &x1x1, &t2) != MP_OKAY) { 2998 goto X1X1; /* t2 = x0x0 + x1x1 */ 2999 } 3000 if (basic_subtract(&t1, &t2, &t1) != MP_OKAY) { 3001 goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ 3002 } 3003 /* shift by B */ 3004 if (lshift_digits(&t1, B) != MP_OKAY) { 3005 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ 3006 } 3007 if (lshift_digits(&x1x1, B * 2) != MP_OKAY) { 3008 goto X1X1; /* x1x1 = x1x1 << 2*B */ 3009 } 3010 if (signed_add(&x0x0, &t1, &t1) != MP_OKAY) { 3011 goto X1X1; /* t1 = x0x0 + t1 */ 3012 } 3013 if (signed_add(&t1, &x1x1, b) != MP_OKAY) { 3014 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ 3015 } 3016 err = MP_OKAY; 3017 3018 X1X1: 3019 mp_clear(&x1x1); 3020 X0X0: 3021 mp_clear(&x0x0); 3022 T2: 3023 mp_clear(&t2); 3024 T1: 3025 mp_clear(&t1); 3026 X1: 3027 mp_clear(&x1); 3028 X0: 3029 mp_clear(&x0); 3030 ERR: 3031 return err; 3032 } 3033 3034 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */ 3035 /* Revision: 1.2 $ */ 3036 /* Date: 2011/03/12 23:43:54 $ */ 3037 3038 /* the jist of squaring... 3039 * you do like mult except the offset of the tmpx [one that 3040 * starts closer to zero] can't equal the offset of tmpy. 3041 * So basically you set up iy like before then you min it with 3042 * (ty-tx) so that it never happens. You double all those 3043 * you add in the inner loop 3044 3045 After that loop you do the squares and add them in. 3046 */ 3047 3048 static int 3049 fast_basic_square(mp_int * a, mp_int * b) 3050 { 3051 int olduse, res, pa, ix, iz; 3052 mp_digit W[MP_WARRAY], *tmpx; 3053 mp_word W1; 3054 3055 /* grow the destination as required */ 3056 pa = a->used + a->used; 3057 if (b->alloc < pa) { 3058 if ((res = mp_grow(b, pa)) != MP_OKAY) { 3059 return res; 3060 } 3061 } 3062 3063 /* number of output digits to produce */ 3064 W1 = 0; 3065 for (ix = 0; ix < pa; ix++) { 3066 int tx, ty, iy; 3067 mp_word _W; 3068 mp_digit *tmpy; 3069 3070 /* clear counter */ 3071 _W = 0; 3072 3073 /* get offsets into the two bignums */ 3074 ty = MIN(a->used-1, ix); 3075 tx = ix - ty; 3076 3077 /* setup temp aliases */ 3078 tmpx = a->dp + tx; 3079 tmpy = a->dp + ty; 3080 3081 /* this is the number of times the loop will iterrate, essentially 3082 while (tx++ < a->used && ty-- >= 0) { ... } 3083 */ 3084 iy = MIN(a->used-tx, ty+1); 3085 3086 /* now for squaring tx can never equal ty 3087 * we halve the distance since they approach at a rate of 2x 3088 * and we have to round because odd cases need to be executed 3089 */ 3090 iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1)); 3091 3092 /* execute loop */ 3093 for (iz = 0; iz < iy; iz++) { 3094 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 3095 } 3096 3097 /* double the inner product and add carry */ 3098 _W = _W + _W + W1; 3099 3100 /* even columns have the square term in them */ 3101 if ((ix&1) == 0) { 3102 _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]); 3103 } 3104 3105 /* store it */ 3106 W[ix] = (mp_digit)(_W & MP_MASK); 3107 3108 /* make next carry */ 3109 W1 = _W >> ((mp_word)DIGIT_BIT); 3110 } 3111 3112 /* setup dest */ 3113 olduse = b->used; 3114 b->used = a->used+a->used; 3115 3116 { 3117 mp_digit *tmpb; 3118 tmpb = b->dp; 3119 for (ix = 0; ix < pa; ix++) { 3120 *tmpb++ = W[ix] & MP_MASK; 3121 } 3122 3123 /* clear unused digits [that existed in the old copy of c] */ 3124 for (; ix < olduse; ix++) { 3125 *tmpb++ = 0; 3126 } 3127 } 3128 trim_unused_digits(b); 3129 return MP_OKAY; 3130 } 3131 3132 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */ 3133 /* Revision: 1.3 $ */ 3134 /* Date: 2011/03/18 16:43:04 $ */ 3135 3136 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ 3137 static int 3138 basic_square(mp_int * a, mp_int * b) 3139 { 3140 mp_int t; 3141 int res, ix, iy, pa; 3142 mp_word r; 3143 mp_digit carry, tmpx, *tmpt; 3144 3145 pa = a->used; 3146 if ((res = mp_init_size(&t, 2*pa + 1)) != MP_OKAY) { 3147 return res; 3148 } 3149 3150 /* default used is maximum possible size */ 3151 t.used = 2*pa + 1; 3152 3153 for (ix = 0; ix < pa; ix++) { 3154 /* first calculate the digit at 2*ix */ 3155 /* calculate double precision result */ 3156 r = ((mp_word) t.dp[2*ix]) + 3157 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); 3158 3159 /* store lower part in result */ 3160 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); 3161 3162 /* get the carry */ 3163 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3164 3165 /* left hand side of A[ix] * A[iy] */ 3166 tmpx = a->dp[ix]; 3167 3168 /* alias for where to store the results */ 3169 tmpt = t.dp + (2*ix + 1); 3170 3171 for (iy = ix + 1; iy < pa; iy++) { 3172 /* first calculate the product */ 3173 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); 3174 3175 /* now calculate the double precision result, note we use 3176 * addition instead of *2 since it's easier to optimize 3177 */ 3178 r = ((mp_word) *tmpt) + r + r + ((mp_word) carry); 3179 3180 /* store lower part */ 3181 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3182 3183 /* get carry */ 3184 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3185 } 3186 /* propagate upwards */ 3187 while (carry != ((mp_digit) 0)) { 3188 r = ((mp_word) *tmpt) + ((mp_word) carry); 3189 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3190 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3191 } 3192 } 3193 3194 trim_unused_digits(&t); 3195 mp_exch(&t, b); 3196 mp_clear(&t); 3197 return MP_OKAY; 3198 } 3199 3200 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */ 3201 /* Revision: 1.1.1.1 $ */ 3202 /* Date: 2011/03/12 22:58:18 $ */ 3203 3204 #define TOOM_SQR_CUTOFF 400 3205 #define KARATSUBA_SQR_CUTOFF 120 3206 3207 /* computes b = a*a */ 3208 static int 3209 square(mp_int * a, mp_int * b) 3210 { 3211 int res; 3212 3213 /* use Toom-Cook? */ 3214 if (a->used >= TOOM_SQR_CUTOFF) { 3215 res = toom_cook_square(a, b); 3216 /* Karatsuba? */ 3217 } else if (a->used >= KARATSUBA_SQR_CUTOFF) { 3218 res = karatsuba_square(a, b); 3219 } else { 3220 /* can we use the fast comba multiplier? */ 3221 if (can_use_fast_column_array(a->used + a->used + 1, a->used)) { 3222 res = fast_basic_square(a, b); 3223 } else { 3224 res = basic_square(a, b); 3225 } 3226 } 3227 b->sign = MP_ZPOS; 3228 return res; 3229 } 3230 3231 /* find window size */ 3232 static inline int 3233 find_window_size(mp_int *X) 3234 { 3235 int x; 3236 3237 x = mp_count_bits(X); 3238 return (x <= 7) ? 2 : (x <= 36) ? 3 : (x <= 140) ? 4 : (x <= 450) ? 5 : (x <= 1303) ? 6 : (x <= 3529) ? 7 : 8; 3239 } 3240 3241 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */ 3242 /* Revision: 1.3 $ */ 3243 /* Date: 2011/03/18 16:43:04 $ */ 3244 3245 #define TAB_SIZE 256 3246 3247 static int 3248 basic_exponent_mod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 3249 { 3250 mp_digit buf; 3251 mp_int M[TAB_SIZE], res, mu; 3252 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 3253 int (*redux)(mp_int*,mp_int*,mp_int*); 3254 3255 winsize = find_window_size(X); 3256 3257 /* init M array */ 3258 /* init first cell */ 3259 if ((err = mp_init(&M[1])) != MP_OKAY) { 3260 return err; 3261 } 3262 3263 /* now init the second half of the array */ 3264 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3265 if ((err = mp_init(&M[x])) != MP_OKAY) { 3266 for (y = 1<<(winsize-1); y < x; y++) { 3267 mp_clear(&M[y]); 3268 } 3269 mp_clear(&M[1]); 3270 return err; 3271 } 3272 } 3273 3274 /* create mu, used for Barrett reduction */ 3275 if ((err = mp_init(&mu)) != MP_OKAY) { 3276 goto LBL_M; 3277 } 3278 3279 if (redmode == 0) { 3280 if ((err = mp_reduce_setup(&mu, P)) != MP_OKAY) { 3281 goto LBL_MU; 3282 } 3283 redux = mp_reduce; 3284 } else { 3285 if ((err = mp_reduce_2k_setup_l(P, &mu)) != MP_OKAY) { 3286 goto LBL_MU; 3287 } 3288 redux = mp_reduce_2k_l; 3289 } 3290 3291 /* create M table 3292 * 3293 * The M table contains powers of the base, 3294 * e.g. M[x] = G**x mod P 3295 * 3296 * The first half of the table is not 3297 * computed though accept for M[0] and M[1] 3298 */ 3299 if ((err = modulo(G, P, &M[1])) != MP_OKAY) { 3300 goto LBL_MU; 3301 } 3302 3303 /* compute the value at M[1<<(winsize-1)] by squaring 3304 * M[1] (winsize-1) times 3305 */ 3306 if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 3307 goto LBL_MU; 3308 } 3309 3310 for (x = 0; x < (winsize - 1); x++) { 3311 /* square it */ 3312 if ((err = square(&M[1 << (winsize - 1)], 3313 &M[1 << (winsize - 1)])) != MP_OKAY) { 3314 goto LBL_MU; 3315 } 3316 3317 /* reduce modulo P */ 3318 if ((err = redux(&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { 3319 goto LBL_MU; 3320 } 3321 } 3322 3323 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) 3324 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) 3325 */ 3326 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 3327 if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 3328 goto LBL_MU; 3329 } 3330 if ((err = redux(&M[x], P, &mu)) != MP_OKAY) { 3331 goto LBL_MU; 3332 } 3333 } 3334 3335 /* setup result */ 3336 if ((err = mp_init(&res)) != MP_OKAY) { 3337 goto LBL_MU; 3338 } 3339 set_word(&res, 1); 3340 3341 /* set initial mode and bit cnt */ 3342 mode = 0; 3343 bitcnt = 1; 3344 buf = 0; 3345 digidx = X->used - 1; 3346 bitcpy = 0; 3347 bitbuf = 0; 3348 3349 for (;;) { 3350 /* grab next digit as required */ 3351 if (--bitcnt == 0) { 3352 /* if digidx == -1 we are out of digits */ 3353 if (digidx == -1) { 3354 break; 3355 } 3356 /* read next digit and reset the bitcnt */ 3357 buf = X->dp[digidx--]; 3358 bitcnt = (int) DIGIT_BIT; 3359 } 3360 3361 /* grab the next msb from the exponent */ 3362 y = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; 3363 buf <<= (mp_digit)1; 3364 3365 /* if the bit is zero and mode == 0 then we ignore it 3366 * These represent the leading zero bits before the first 1 bit 3367 * in the exponent. Technically this opt is not required but it 3368 * does lower the # of trivial squaring/reductions used 3369 */ 3370 if (mode == 0 && y == 0) { 3371 continue; 3372 } 3373 3374 /* if the bit is zero and mode == 1 then we square */ 3375 if (mode == 1 && y == 0) { 3376 if ((err = square(&res, &res)) != MP_OKAY) { 3377 goto LBL_RES; 3378 } 3379 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3380 goto LBL_RES; 3381 } 3382 continue; 3383 } 3384 3385 /* else we add it to the window */ 3386 bitbuf |= (y << (winsize - ++bitcpy)); 3387 mode = 2; 3388 3389 if (bitcpy == winsize) { 3390 /* ok window is filled so square as required and multiply */ 3391 /* square first */ 3392 for (x = 0; x < winsize; x++) { 3393 if ((err = square(&res, &res)) != MP_OKAY) { 3394 goto LBL_RES; 3395 } 3396 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3397 goto LBL_RES; 3398 } 3399 } 3400 3401 /* then multiply */ 3402 if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) { 3403 goto LBL_RES; 3404 } 3405 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3406 goto LBL_RES; 3407 } 3408 3409 /* empty window and reset */ 3410 bitcpy = 0; 3411 bitbuf = 0; 3412 mode = 1; 3413 } 3414 } 3415 3416 /* if bits remain then square/multiply */ 3417 if (mode == 2 && bitcpy > 0) { 3418 /* square then multiply if the bit is set */ 3419 for (x = 0; x < bitcpy; x++) { 3420 if ((err = square(&res, &res)) != MP_OKAY) { 3421 goto LBL_RES; 3422 } 3423 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3424 goto LBL_RES; 3425 } 3426 3427 bitbuf <<= 1; 3428 if ((bitbuf & (1 << winsize)) != 0) { 3429 /* then multiply */ 3430 if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) { 3431 goto LBL_RES; 3432 } 3433 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3434 goto LBL_RES; 3435 } 3436 } 3437 } 3438 } 3439 3440 mp_exch(&res, Y); 3441 err = MP_OKAY; 3442 LBL_RES: 3443 mp_clear(&res); 3444 LBL_MU: 3445 mp_clear(&mu); 3446 LBL_M: 3447 mp_clear(&M[1]); 3448 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3449 mp_clear(&M[x]); 3450 } 3451 return err; 3452 } 3453 3454 /* determines if a number is a valid DR modulus */ 3455 static int 3456 is_diminished_radix_modulus(mp_int *a) 3457 { 3458 int ix; 3459 3460 /* must be at least two digits */ 3461 if (a->used < 2) { 3462 return 0; 3463 } 3464 3465 /* must be of the form b**k - a [a <= b] so all 3466 * but the first digit must be equal to -1 (mod b). 3467 */ 3468 for (ix = 1; ix < a->used; ix++) { 3469 if (a->dp[ix] != MP_MASK) { 3470 return 0; 3471 } 3472 } 3473 return 1; 3474 } 3475 3476 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */ 3477 /* Revision: 1.1.1.1 $ */ 3478 /* Date: 2011/03/12 22:58:18 $ */ 3479 3480 /* determines if mp_reduce_2k can be used */ 3481 static int 3482 mp_reduce_is_2k(mp_int *a) 3483 { 3484 int ix, iy, iw; 3485 mp_digit iz; 3486 3487 if (a->used == 0) { 3488 return MP_NO; 3489 } 3490 if (a->used == 1) { 3491 return MP_YES; 3492 } 3493 if (a->used > 1) { 3494 iy = mp_count_bits(a); 3495 iz = 1; 3496 iw = 1; 3497 3498 /* Test every bit from the second digit up, must be 1 */ 3499 for (ix = DIGIT_BIT; ix < iy; ix++) { 3500 if ((a->dp[iw] & iz) == 0) { 3501 return MP_NO; 3502 } 3503 iz <<= 1; 3504 if (iz > (mp_digit)MP_MASK) { 3505 ++iw; 3506 iz = 1; 3507 } 3508 } 3509 } 3510 return MP_YES; 3511 } 3512 3513 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */ 3514 /* Revision: 1.1.1.1 $ */ 3515 /* Date: 2011/03/12 22:58:18 $ */ 3516 3517 3518 /* d = a * b (mod c) */ 3519 static int 3520 multiply_modulo(mp_int *d, mp_int * a, mp_int * b, mp_int * c) 3521 { 3522 mp_int t; 3523 int res; 3524 3525 if ((res = mp_init(&t)) != MP_OKAY) { 3526 return res; 3527 } 3528 3529 if ((res = signed_multiply(a, b, &t)) != MP_OKAY) { 3530 mp_clear(&t); 3531 return res; 3532 } 3533 res = modulo(&t, c, d); 3534 mp_clear(&t); 3535 return res; 3536 } 3537 3538 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */ 3539 /* Revision: 1.1.1.1 $ */ 3540 /* Date: 2011/03/12 22:58:18 $ */ 3541 3542 /* setups the montgomery reduction stuff */ 3543 static int 3544 mp_montgomery_setup(mp_int * n, mp_digit * rho) 3545 { 3546 mp_digit x, b; 3547 3548 /* fast inversion mod 2**k 3549 * 3550 * Based on the fact that 3551 * 3552 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) 3553 * => 2*X*A - X*X*A*A = 1 3554 * => 2*(1) - (1) = 1 3555 */ 3556 b = n->dp[0]; 3557 3558 if ((b & 1) == 0) { 3559 return MP_VAL; 3560 } 3561 3562 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ 3563 x *= 2 - b * x; /* here x*a==1 mod 2**8 */ 3564 x *= 2 - b * x; /* here x*a==1 mod 2**16 */ 3565 x *= 2 - b * x; /* here x*a==1 mod 2**32 */ 3566 if (/*CONSTCOND*/sizeof(mp_digit) == 8) { 3567 x *= 2 - b * x; /* here x*a==1 mod 2**64 */ 3568 } 3569 3570 /* rho = -1/m mod b */ 3571 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; 3572 3573 return MP_OKAY; 3574 } 3575 3576 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */ 3577 /* Revision: 1.1.1.1 $ */ 3578 /* Date: 2011/03/12 22:58:18 $ */ 3579 3580 /* computes xR**-1 == x (mod N) via Montgomery Reduction 3581 * 3582 * This is an optimized implementation of montgomery_reduce 3583 * which uses the comba method to quickly calculate the columns of the 3584 * reduction. 3585 * 3586 * Based on Algorithm 14.32 on pp.601 of HAC. 3587 */ 3588 static int 3589 fast_mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho) 3590 { 3591 int ix, res, olduse; 3592 /*LINTED*/ 3593 mp_word W[MP_WARRAY]; 3594 3595 /* get old used count */ 3596 olduse = x->used; 3597 3598 /* grow a as required */ 3599 if (x->alloc < n->used + 1) { 3600 if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) { 3601 return res; 3602 } 3603 } 3604 3605 /* first we have to get the digits of the input into 3606 * an array of double precision words W[...] 3607 */ 3608 { 3609 mp_word *_W; 3610 mp_digit *tmpx; 3611 3612 /* alias for the W[] array */ 3613 _W = W; 3614 3615 /* alias for the digits of x*/ 3616 tmpx = x->dp; 3617 3618 /* copy the digits of a into W[0..a->used-1] */ 3619 for (ix = 0; ix < x->used; ix++) { 3620 *_W++ = *tmpx++; 3621 } 3622 3623 /* zero the high words of W[a->used..m->used*2] */ 3624 for (; ix < n->used * 2 + 1; ix++) { 3625 *_W++ = 0; 3626 } 3627 } 3628 3629 /* now we proceed to zero successive digits 3630 * from the least significant upwards 3631 */ 3632 for (ix = 0; ix < n->used; ix++) { 3633 /* mu = ai * m' mod b 3634 * 3635 * We avoid a double precision multiplication (which isn't required) 3636 * by casting the value down to a mp_digit. Note this requires 3637 * that W[ix-1] have the carry cleared (see after the inner loop) 3638 */ 3639 mp_digit mu; 3640 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); 3641 3642 /* a = a + mu * m * b**i 3643 * 3644 * This is computed in place and on the fly. The multiplication 3645 * by b**i is handled by offsetting which columns the results 3646 * are added to. 3647 * 3648 * Note the comba method normally doesn't handle carries in the 3649 * inner loop In this case we fix the carry from the previous 3650 * column since the Montgomery reduction requires digits of the 3651 * result (so far) [see above] to work. This is 3652 * handled by fixing up one carry after the inner loop. The 3653 * carry fixups are done in order so after these loops the 3654 * first m->used words of W[] have the carries fixed 3655 */ 3656 { 3657 int iy; 3658 mp_digit *tmpn; 3659 mp_word *_W; 3660 3661 /* alias for the digits of the modulus */ 3662 tmpn = n->dp; 3663 3664 /* Alias for the columns set by an offset of ix */ 3665 _W = W + ix; 3666 3667 /* inner loop */ 3668 for (iy = 0; iy < n->used; iy++) { 3669 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); 3670 } 3671 } 3672 3673 /* now fix carry for next digit, W[ix+1] */ 3674 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); 3675 } 3676 3677 /* now we have to propagate the carries and 3678 * shift the words downward [all those least 3679 * significant digits we zeroed]. 3680 */ 3681 { 3682 mp_digit *tmpx; 3683 mp_word *_W, *_W1; 3684 3685 /* nox fix rest of carries */ 3686 3687 /* alias for current word */ 3688 _W1 = W + ix; 3689 3690 /* alias for next word, where the carry goes */ 3691 _W = W + ++ix; 3692 3693 for (; ix <= n->used * 2 + 1; ix++) { 3694 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); 3695 } 3696 3697 /* copy out, A = A/b**n 3698 * 3699 * The result is A/b**n but instead of converting from an 3700 * array of mp_word to mp_digit than calling rshift_digits 3701 * we just copy them in the right order 3702 */ 3703 3704 /* alias for destination word */ 3705 tmpx = x->dp; 3706 3707 /* alias for shifted double precision result */ 3708 _W = W + n->used; 3709 3710 for (ix = 0; ix < n->used + 1; ix++) { 3711 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); 3712 } 3713 3714 /* zero oldused digits, if the input a was larger than 3715 * m->used+1 we'll have to clear the digits 3716 */ 3717 for (; ix < olduse; ix++) { 3718 *tmpx++ = 0; 3719 } 3720 } 3721 3722 /* set the max used and clamp */ 3723 x->used = n->used + 1; 3724 trim_unused_digits(x); 3725 3726 /* if A >= m then A = A - m */ 3727 if (compare_magnitude(x, n) != MP_LT) { 3728 return basic_subtract(x, n, x); 3729 } 3730 return MP_OKAY; 3731 } 3732 3733 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */ 3734 /* Revision: 1.2 $ */ 3735 /* Date: 2011/03/18 16:22:09 $ */ 3736 3737 /* computes xR**-1 == x (mod N) via Montgomery Reduction */ 3738 static int 3739 mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho) 3740 { 3741 int ix, res, digs; 3742 mp_digit mu; 3743 3744 /* can the fast reduction [comba] method be used? 3745 * 3746 * Note that unlike in mul you're safely allowed *less* 3747 * than the available columns [255 per default] since carries 3748 * are fixed up in the inner loop. 3749 */ 3750 digs = n->used * 2 + 1; 3751 if (can_use_fast_column_array(digs, n->used)) { 3752 return fast_mp_montgomery_reduce(x, n, rho); 3753 } 3754 3755 /* grow the input as required */ 3756 if (x->alloc < digs) { 3757 if ((res = mp_grow(x, digs)) != MP_OKAY) { 3758 return res; 3759 } 3760 } 3761 x->used = digs; 3762 3763 for (ix = 0; ix < n->used; ix++) { 3764 /* mu = ai * rho mod b 3765 * 3766 * The value of rho must be precalculated via 3767 * montgomery_setup() such that 3768 * it equals -1/n0 mod b this allows the 3769 * following inner loop to reduce the 3770 * input one digit at a time 3771 */ 3772 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); 3773 3774 /* a = a + mu * m * b**i */ 3775 { 3776 int iy; 3777 mp_digit *tmpn, *tmpx, carry; 3778 mp_word r; 3779 3780 /* alias for digits of the modulus */ 3781 tmpn = n->dp; 3782 3783 /* alias for the digits of x [the input] */ 3784 tmpx = x->dp + ix; 3785 3786 /* set the carry to zero */ 3787 carry = 0; 3788 3789 /* Multiply and add in place */ 3790 for (iy = 0; iy < n->used; iy++) { 3791 /* compute product and sum */ 3792 r = ((mp_word)mu) * ((mp_word)*tmpn++) + 3793 ((mp_word) carry) + ((mp_word) * tmpx); 3794 3795 /* get carry */ 3796 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3797 3798 /* fix digit */ 3799 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); 3800 } 3801 /* At this point the ix'th digit of x should be zero */ 3802 3803 3804 /* propagate carries upwards as required*/ 3805 while (carry) { 3806 *tmpx += carry; 3807 carry = *tmpx >> DIGIT_BIT; 3808 *tmpx++ &= MP_MASK; 3809 } 3810 } 3811 } 3812 3813 /* at this point the n.used'th least 3814 * significant digits of x are all zero 3815 * which means we can shift x to the 3816 * right by n.used digits and the 3817 * residue is unchanged. 3818 */ 3819 3820 /* x = x/b**n.used */ 3821 trim_unused_digits(x); 3822 rshift_digits(x, n->used); 3823 3824 /* if x >= n then x = x - n */ 3825 if (compare_magnitude(x, n) != MP_LT) { 3826 return basic_subtract(x, n, x); 3827 } 3828 3829 return MP_OKAY; 3830 } 3831 3832 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */ 3833 /* Revision: 1.3 $ */ 3834 /* Date: 2011/03/18 16:43:04 $ */ 3835 3836 /* determines the setup value */ 3837 static void 3838 diminished_radix_setup(mp_int *a, mp_digit *d) 3839 { 3840 /* the casts are required if DIGIT_BIT is one less than 3841 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31] 3842 */ 3843 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 3844 ((mp_word)a->dp[0])); 3845 } 3846 3847 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */ 3848 /* Revision: 1.1.1.1 $ */ 3849 /* Date: 2011/03/12 22:58:18 $ */ 3850 3851 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm. 3852 * 3853 * Based on algorithm from the paper 3854 * 3855 * "Generating Efficient Primes for Discrete Log Cryptosystems" 3856 * Chae Hoon Lim, Pil Joong Lee, 3857 * POSTECH Information Research Laboratories 3858 * 3859 * The modulus must be of a special format [see manual] 3860 * 3861 * Has been modified to use algorithm 7.10 from the LTM book instead 3862 * 3863 * Input x must be in the range 0 <= x <= (n-1)**2 3864 */ 3865 static int 3866 diminished_radix_reduce(mp_int * x, mp_int * n, mp_digit k) 3867 { 3868 int err, i, m; 3869 mp_word r; 3870 mp_digit mu, *tmpx1, *tmpx2; 3871 3872 /* m = digits in modulus */ 3873 m = n->used; 3874 3875 /* ensure that "x" has at least 2m digits */ 3876 if (x->alloc < m + m) { 3877 if ((err = mp_grow(x, m + m)) != MP_OKAY) { 3878 return err; 3879 } 3880 } 3881 3882 /* top of loop, this is where the code resumes if 3883 * another reduction pass is required. 3884 */ 3885 top: 3886 /* aliases for digits */ 3887 /* alias for lower half of x */ 3888 tmpx1 = x->dp; 3889 3890 /* alias for upper half of x, or x/B**m */ 3891 tmpx2 = x->dp + m; 3892 3893 /* set carry to zero */ 3894 mu = 0; 3895 3896 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ 3897 for (i = 0; i < m; i++) { 3898 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; 3899 *tmpx1++ = (mp_digit)(r & MP_MASK); 3900 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); 3901 } 3902 3903 /* set final carry */ 3904 *tmpx1++ = mu; 3905 3906 /* zero words above m */ 3907 for (i = m + 1; i < x->used; i++) { 3908 *tmpx1++ = 0; 3909 } 3910 3911 /* clamp, sub and return */ 3912 trim_unused_digits(x); 3913 3914 /* if x >= n then subtract and reduce again 3915 * Each successive "recursion" makes the input smaller and smaller. 3916 */ 3917 if (compare_magnitude(x, n) != MP_LT) { 3918 basic_subtract(x, n, x); 3919 goto top; 3920 } 3921 return MP_OKAY; 3922 } 3923 3924 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */ 3925 /* Revision: 1.1.1.1 $ */ 3926 /* Date: 2011/03/12 22:58:18 $ */ 3927 3928 /* determines the setup value */ 3929 static int 3930 mp_reduce_2k_setup(mp_int *a, mp_digit *d) 3931 { 3932 int res, p; 3933 mp_int tmp; 3934 3935 if ((res = mp_init(&tmp)) != MP_OKAY) { 3936 return res; 3937 } 3938 3939 p = mp_count_bits(a); 3940 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { 3941 mp_clear(&tmp); 3942 return res; 3943 } 3944 3945 if ((res = basic_subtract(&tmp, a, &tmp)) != MP_OKAY) { 3946 mp_clear(&tmp); 3947 return res; 3948 } 3949 3950 *d = tmp.dp[0]; 3951 mp_clear(&tmp); 3952 return MP_OKAY; 3953 } 3954 3955 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */ 3956 /* Revision: 1.1.1.1 $ */ 3957 /* Date: 2011/03/12 22:58:18 $ */ 3958 3959 /* reduces a modulo n where n is of the form 2**p - d */ 3960 static int 3961 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) 3962 { 3963 mp_int q; 3964 int p, res; 3965 3966 if ((res = mp_init(&q)) != MP_OKAY) { 3967 return res; 3968 } 3969 3970 p = mp_count_bits(n); 3971 top: 3972 /* q = a/2**p, a = a mod 2**p */ 3973 if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) { 3974 goto ERR; 3975 } 3976 3977 if (d != 1) { 3978 /* q = q * d */ 3979 if ((res = multiply_digit(&q, d, &q)) != MP_OKAY) { 3980 goto ERR; 3981 } 3982 } 3983 3984 /* a = a + q */ 3985 if ((res = basic_add(a, &q, a)) != MP_OKAY) { 3986 goto ERR; 3987 } 3988 3989 if (compare_magnitude(a, n) != MP_LT) { 3990 basic_subtract(a, n, a); 3991 goto top; 3992 } 3993 3994 ERR: 3995 mp_clear(&q); 3996 return res; 3997 } 3998 3999 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */ 4000 /* Revision: 1.1.1.1 $ */ 4001 /* Date: 2011/03/12 22:58:18 $ */ 4002 4003 /* 4004 * shifts with subtractions when the result is greater than b. 4005 * 4006 * The method is slightly modified to shift B unconditionally upto just under 4007 * the leading bit of b. This saves alot of multiple precision shifting. 4008 */ 4009 static int 4010 mp_montgomery_calc_normalization(mp_int * a, mp_int * b) 4011 { 4012 int x, bits, res; 4013 4014 /* how many bits of last digit does b use */ 4015 bits = mp_count_bits(b) % DIGIT_BIT; 4016 4017 if (b->used > 1) { 4018 if ((res = mp_2expt(a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { 4019 return res; 4020 } 4021 } else { 4022 set_word(a, 1); 4023 bits = 1; 4024 } 4025 4026 4027 /* now compute C = A * B mod b */ 4028 for (x = bits - 1; x < (int)DIGIT_BIT; x++) { 4029 if ((res = doubled(a, a)) != MP_OKAY) { 4030 return res; 4031 } 4032 if (compare_magnitude(a, b) != MP_LT) { 4033 if ((res = basic_subtract(a, b, a)) != MP_OKAY) { 4034 return res; 4035 } 4036 } 4037 } 4038 4039 return MP_OKAY; 4040 } 4041 4042 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */ 4043 /* Revision: 1.1.1.1 $ */ 4044 /* Date: 2011/03/12 22:58:18 $ */ 4045 4046 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 4047 * 4048 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. 4049 * The value of k changes based on the size of the exponent. 4050 * 4051 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] 4052 */ 4053 4054 #define TAB_SIZE 256 4055 4056 static int 4057 fast_exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 4058 { 4059 mp_int M[TAB_SIZE], res; 4060 mp_digit buf, mp; 4061 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 4062 4063 /* use a pointer to the reduction algorithm. This allows us to use 4064 * one of many reduction algorithms without modding the guts of 4065 * the code with if statements everywhere. 4066 */ 4067 int (*redux)(mp_int*,mp_int*,mp_digit); 4068 4069 winsize = find_window_size(X); 4070 4071 /* init M array */ 4072 /* init first cell */ 4073 if ((err = mp_init(&M[1])) != MP_OKAY) { 4074 return err; 4075 } 4076 4077 /* now init the second half of the array */ 4078 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4079 if ((err = mp_init(&M[x])) != MP_OKAY) { 4080 for (y = 1<<(winsize-1); y < x; y++) { 4081 mp_clear(&M[y]); 4082 } 4083 mp_clear(&M[1]); 4084 return err; 4085 } 4086 } 4087 4088 /* determine and setup reduction code */ 4089 if (redmode == 0) { 4090 /* now setup montgomery */ 4091 if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) { 4092 goto LBL_M; 4093 } 4094 4095 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ 4096 if (can_use_fast_column_array(P->used + P->used + 1, P->used)) { 4097 redux = fast_mp_montgomery_reduce; 4098 } else { 4099 /* use slower baseline Montgomery method */ 4100 redux = mp_montgomery_reduce; 4101 } 4102 } else if (redmode == 1) { 4103 /* setup DR reduction for moduli of the form B**k - b */ 4104 diminished_radix_setup(P, &mp); 4105 redux = diminished_radix_reduce; 4106 } else { 4107 /* setup DR reduction for moduli of the form 2**k - b */ 4108 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { 4109 goto LBL_M; 4110 } 4111 redux = mp_reduce_2k; 4112 } 4113 4114 /* setup result */ 4115 if ((err = mp_init(&res)) != MP_OKAY) { 4116 goto LBL_M; 4117 } 4118 4119 /* create M table 4120 * 4121 4122 * 4123 * The first half of the table is not computed though accept for M[0] and M[1] 4124 */ 4125 4126 if (redmode == 0) { 4127 /* now we need R mod m */ 4128 if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) { 4129 goto LBL_RES; 4130 } 4131 4132 /* now set M[1] to G * R mod m */ 4133 if ((err = multiply_modulo(&M[1], G, &res, P)) != MP_OKAY) { 4134 goto LBL_RES; 4135 } 4136 } else { 4137 set_word(&res, 1); 4138 if ((err = modulo(G, P, &M[1])) != MP_OKAY) { 4139 goto LBL_RES; 4140 } 4141 } 4142 4143 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ 4144 if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 4145 goto LBL_RES; 4146 } 4147 4148 for (x = 0; x < (winsize - 1); x++) { 4149 if ((err = square(&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { 4150 goto LBL_RES; 4151 } 4152 if ((err = (*redux)(&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { 4153 goto LBL_RES; 4154 } 4155 } 4156 4157 /* create upper table */ 4158 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 4159 if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 4160 goto LBL_RES; 4161 } 4162 if ((err = (*redux)(&M[x], P, mp)) != MP_OKAY) { 4163 goto LBL_RES; 4164 } 4165 } 4166 4167 /* set initial mode and bit cnt */ 4168 mode = 0; 4169 bitcnt = 1; 4170 buf = 0; 4171 digidx = X->used - 1; 4172 bitcpy = 0; 4173 bitbuf = 0; 4174 4175 for (;;) { 4176 /* grab next digit as required */ 4177 if (--bitcnt == 0) { 4178 /* if digidx == -1 we are out of digits so break */ 4179 if (digidx == -1) { 4180 break; 4181 } 4182 /* read next digit and reset bitcnt */ 4183 buf = X->dp[digidx--]; 4184 bitcnt = (int)DIGIT_BIT; 4185 } 4186 4187 /* grab the next msb from the exponent */ 4188 y = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1; 4189 buf <<= (mp_digit)1; 4190 4191 /* if the bit is zero and mode == 0 then we ignore it 4192 * These represent the leading zero bits before the first 1 bit 4193 * in the exponent. Technically this opt is not required but it 4194 * does lower the # of trivial squaring/reductions used 4195 */ 4196 if (mode == 0 && y == 0) { 4197 continue; 4198 } 4199 4200 /* if the bit is zero and mode == 1 then we square */ 4201 if (mode == 1 && y == 0) { 4202 if ((err = square(&res, &res)) != MP_OKAY) { 4203 goto LBL_RES; 4204 } 4205 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4206 goto LBL_RES; 4207 } 4208 continue; 4209 } 4210 4211 /* else we add it to the window */ 4212 bitbuf |= (y << (winsize - ++bitcpy)); 4213 mode = 2; 4214 4215 if (bitcpy == winsize) { 4216 /* ok window is filled so square as required and multiply */ 4217 /* square first */ 4218 for (x = 0; x < winsize; x++) { 4219 if ((err = square(&res, &res)) != MP_OKAY) { 4220 goto LBL_RES; 4221 } 4222 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4223 goto LBL_RES; 4224 } 4225 } 4226 4227 /* then multiply */ 4228 if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) { 4229 goto LBL_RES; 4230 } 4231 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4232 goto LBL_RES; 4233 } 4234 4235 /* empty window and reset */ 4236 bitcpy = 0; 4237 bitbuf = 0; 4238 mode = 1; 4239 } 4240 } 4241 4242 /* if bits remain then square/multiply */ 4243 if (mode == 2 && bitcpy > 0) { 4244 /* square then multiply if the bit is set */ 4245 for (x = 0; x < bitcpy; x++) { 4246 if ((err = square(&res, &res)) != MP_OKAY) { 4247 goto LBL_RES; 4248 } 4249 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4250 goto LBL_RES; 4251 } 4252 4253 /* get next bit of the window */ 4254 bitbuf <<= 1; 4255 if ((bitbuf & (1 << winsize)) != 0) { 4256 /* then multiply */ 4257 if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) { 4258 goto LBL_RES; 4259 } 4260 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4261 goto LBL_RES; 4262 } 4263 } 4264 } 4265 } 4266 4267 if (redmode == 0) { 4268 /* fixup result if Montgomery reduction is used 4269 * recall that any value in a Montgomery system is 4270 * actually multiplied by R mod n. So we have 4271 * to reduce one more time to cancel out the factor 4272 * of R. 4273 */ 4274 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4275 goto LBL_RES; 4276 } 4277 } 4278 4279 /* swap res with Y */ 4280 mp_exch(&res, Y); 4281 err = MP_OKAY; 4282 LBL_RES: 4283 mp_clear(&res); 4284 LBL_M: 4285 mp_clear(&M[1]); 4286 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4287 mp_clear(&M[x]); 4288 } 4289 return err; 4290 } 4291 4292 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_exponent_modulo.c,v $ */ 4293 /* Revision: 1.4 $ */ 4294 /* Date: 2011/03/18 16:43:04 $ */ 4295 4296 /* this is a shell function that calls either the normal or Montgomery 4297 * exptmod functions. Originally the call to the montgomery code was 4298 * embedded in the normal function but that wasted alot of stack space 4299 * for nothing (since 99% of the time the Montgomery code would be called) 4300 */ 4301 static int 4302 exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int *Y) 4303 { 4304 int diminished_radix; 4305 4306 /* modulus P must be positive */ 4307 if (P->sign == MP_NEG) { 4308 return MP_VAL; 4309 } 4310 4311 /* if exponent X is negative we have to recurse */ 4312 if (X->sign == MP_NEG) { 4313 mp_int tmpG, tmpX; 4314 int err; 4315 4316 /* first compute 1/G mod P */ 4317 if ((err = mp_init(&tmpG)) != MP_OKAY) { 4318 return err; 4319 } 4320 if ((err = modular_inverse(&tmpG, G, P)) != MP_OKAY) { 4321 mp_clear(&tmpG); 4322 return err; 4323 } 4324 4325 /* now get |X| */ 4326 if ((err = mp_init(&tmpX)) != MP_OKAY) { 4327 mp_clear(&tmpG); 4328 return err; 4329 } 4330 if ((err = absolute(X, &tmpX)) != MP_OKAY) { 4331 mp_clear_multi(&tmpG, &tmpX, NULL); 4332 return err; 4333 } 4334 4335 /* and now compute (1/G)**|X| instead of G**X [X < 0] */ 4336 err = exponent_modulo(&tmpG, &tmpX, P, Y); 4337 mp_clear_multi(&tmpG, &tmpX, NULL); 4338 return err; 4339 } 4340 4341 /* modified diminished radix reduction */ 4342 if (mp_reduce_is_2k_l(P) == MP_YES) { 4343 return basic_exponent_mod(G, X, P, Y, 1); 4344 } 4345 4346 /* is it a DR modulus? */ 4347 diminished_radix = is_diminished_radix_modulus(P); 4348 4349 /* if not, is it a unrestricted DR modulus? */ 4350 if (!diminished_radix) { 4351 diminished_radix = mp_reduce_is_2k(P) << 1; 4352 } 4353 4354 /* if the modulus is odd or diminished_radix, use the montgomery method */ 4355 if (BN_is_odd(P) == 1 || diminished_radix) { 4356 return fast_exponent_modulo(G, X, P, Y, diminished_radix); 4357 } 4358 /* otherwise use the generic Barrett reduction technique */ 4359 return basic_exponent_mod(G, X, P, Y, 0); 4360 } 4361 4362 /* reverse an array, used for radix code */ 4363 static void 4364 bn_reverse(unsigned char *s, int len) 4365 { 4366 int ix, iy; 4367 uint8_t t; 4368 4369 for (ix = 0, iy = len - 1; ix < iy ; ix++, --iy) { 4370 t = s[ix]; 4371 s[ix] = s[iy]; 4372 s[iy] = t; 4373 } 4374 } 4375 4376 static inline int 4377 is_power_of_two(mp_digit b, int *p) 4378 { 4379 int x; 4380 4381 /* fast return if no power of two */ 4382 if ((b==0) || (b & (b-1))) { 4383 return 0; 4384 } 4385 4386 for (x = 0; x < DIGIT_BIT; x++) { 4387 if (b == (((mp_digit)1)<<x)) { 4388 *p = x; 4389 return 1; 4390 } 4391 } 4392 return 0; 4393 } 4394 4395 /* single digit division (based on routine from MPI) */ 4396 static int 4397 signed_divide_word(mp_int *a, mp_digit b, mp_int *c, mp_digit *d) 4398 { 4399 mp_int q; 4400 mp_word w; 4401 mp_digit t; 4402 int res, ix; 4403 4404 /* cannot divide by zero */ 4405 if (b == 0) { 4406 return MP_VAL; 4407 } 4408 4409 /* quick outs */ 4410 if (b == 1 || MP_ISZERO(a) == 1) { 4411 if (d != NULL) { 4412 *d = 0; 4413 } 4414 if (c != NULL) { 4415 return mp_copy(a, c); 4416 } 4417 return MP_OKAY; 4418 } 4419 4420 /* power of two ? */ 4421 if (is_power_of_two(b, &ix) == 1) { 4422 if (d != NULL) { 4423 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1); 4424 } 4425 if (c != NULL) { 4426 return rshift_bits(a, ix, c, NULL); 4427 } 4428 return MP_OKAY; 4429 } 4430 4431 /* three? */ 4432 if (b == 3) { 4433 return third(a, c, d); 4434 } 4435 4436 /* no easy answer [c'est la vie]. Just division */ 4437 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 4438 return res; 4439 } 4440 4441 q.used = a->used; 4442 q.sign = a->sign; 4443 w = 0; 4444 for (ix = a->used - 1; ix >= 0; ix--) { 4445 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 4446 4447 if (w >= b) { 4448 t = (mp_digit)(w / b); 4449 w -= ((mp_word)t) * ((mp_word)b); 4450 } else { 4451 t = 0; 4452 } 4453 q.dp[ix] = (mp_digit)t; 4454 } 4455 4456 if (d != NULL) { 4457 *d = (mp_digit)w; 4458 } 4459 4460 if (c != NULL) { 4461 trim_unused_digits(&q); 4462 mp_exch(&q, c); 4463 } 4464 mp_clear(&q); 4465 4466 return res; 4467 } 4468 4469 static const mp_digit ltm_prime_tab[] = { 4470 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 4471 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, 4472 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, 4473 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 4474 #ifndef MP_8BIT 4475 0x0083, 4476 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, 4477 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, 4478 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, 4479 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, 4480 4481 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, 4482 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, 4483 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, 4484 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, 4485 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, 4486 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, 4487 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, 4488 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, 4489 4490 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, 4491 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, 4492 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, 4493 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, 4494 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, 4495 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, 4496 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, 4497 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, 4498 4499 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, 4500 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, 4501 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, 4502 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, 4503 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, 4504 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, 4505 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, 4506 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 4507 #endif 4508 }; 4509 4510 #define PRIME_SIZE __arraycount(ltm_prime_tab) 4511 4512 static inline int 4513 mp_prime_is_divisible(mp_int *a, int *result) 4514 { 4515 int err, ix; 4516 mp_digit res; 4517 4518 /* default to not */ 4519 *result = MP_NO; 4520 4521 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4522 /* what is a mod LBL_prime_tab[ix] */ 4523 if ((err = signed_divide_word(a, ltm_prime_tab[ix], NULL, &res)) != MP_OKAY) { 4524 return err; 4525 } 4526 4527 /* is the residue zero? */ 4528 if (res == 0) { 4529 *result = MP_YES; 4530 return MP_OKAY; 4531 } 4532 } 4533 4534 return MP_OKAY; 4535 } 4536 4537 /* single digit addition */ 4538 static int 4539 add_single_digit(mp_int *a, mp_digit b, mp_int *c) 4540 { 4541 int res, ix, oldused; 4542 mp_digit *tmpa, *tmpc, mu; 4543 4544 /* grow c as required */ 4545 if (c->alloc < a->used + 1) { 4546 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4547 return res; 4548 } 4549 } 4550 4551 /* if a is negative and |a| >= b, call c = |a| - b */ 4552 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { 4553 /* temporarily fix sign of a */ 4554 a->sign = MP_ZPOS; 4555 4556 /* c = |a| - b */ 4557 res = signed_subtract_word(a, b, c); 4558 4559 /* fix sign */ 4560 a->sign = c->sign = MP_NEG; 4561 4562 /* clamp */ 4563 trim_unused_digits(c); 4564 4565 return res; 4566 } 4567 4568 /* old number of used digits in c */ 4569 oldused = c->used; 4570 4571 /* sign always positive */ 4572 c->sign = MP_ZPOS; 4573 4574 /* source alias */ 4575 tmpa = a->dp; 4576 4577 /* destination alias */ 4578 tmpc = c->dp; 4579 4580 /* if a is positive */ 4581 if (a->sign == MP_ZPOS) { 4582 /* add digit, after this we're propagating 4583 * the carry. 4584 */ 4585 *tmpc = *tmpa++ + b; 4586 mu = *tmpc >> DIGIT_BIT; 4587 *tmpc++ &= MP_MASK; 4588 4589 /* now handle rest of the digits */ 4590 for (ix = 1; ix < a->used; ix++) { 4591 *tmpc = *tmpa++ + mu; 4592 mu = *tmpc >> DIGIT_BIT; 4593 *tmpc++ &= MP_MASK; 4594 } 4595 /* set final carry */ 4596 ix++; 4597 *tmpc++ = mu; 4598 4599 /* setup size */ 4600 c->used = a->used + 1; 4601 } else { 4602 /* a was negative and |a| < b */ 4603 c->used = 1; 4604 4605 /* the result is a single digit */ 4606 if (a->used == 1) { 4607 *tmpc++ = b - a->dp[0]; 4608 } else { 4609 *tmpc++ = b; 4610 } 4611 4612 /* setup count so the clearing of oldused 4613 * can fall through correctly 4614 */ 4615 ix = 1; 4616 } 4617 4618 /* now zero to oldused */ 4619 while (ix++ < oldused) { 4620 *tmpc++ = 0; 4621 } 4622 trim_unused_digits(c); 4623 4624 return MP_OKAY; 4625 } 4626 4627 /* single digit subtraction */ 4628 static int 4629 signed_subtract_word(mp_int *a, mp_digit b, mp_int *c) 4630 { 4631 mp_digit *tmpa, *tmpc, mu; 4632 int res, ix, oldused; 4633 4634 /* grow c as required */ 4635 if (c->alloc < a->used + 1) { 4636 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4637 return res; 4638 } 4639 } 4640 4641 /* if a is negative just do an unsigned 4642 * addition [with fudged signs] 4643 */ 4644 if (a->sign == MP_NEG) { 4645 a->sign = MP_ZPOS; 4646 res = add_single_digit(a, b, c); 4647 a->sign = c->sign = MP_NEG; 4648 4649 /* clamp */ 4650 trim_unused_digits(c); 4651 4652 return res; 4653 } 4654 4655 /* setup regs */ 4656 oldused = c->used; 4657 tmpa = a->dp; 4658 tmpc = c->dp; 4659 4660 /* if a <= b simply fix the single digit */ 4661 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { 4662 if (a->used == 1) { 4663 *tmpc++ = b - *tmpa; 4664 } else { 4665 *tmpc++ = b; 4666 } 4667 ix = 1; 4668 4669 /* negative/1digit */ 4670 c->sign = MP_NEG; 4671 c->used = 1; 4672 } else { 4673 /* positive/size */ 4674 c->sign = MP_ZPOS; 4675 c->used = a->used; 4676 4677 /* subtract first digit */ 4678 *tmpc = *tmpa++ - b; 4679 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4680 *tmpc++ &= MP_MASK; 4681 4682 /* handle rest of the digits */ 4683 for (ix = 1; ix < a->used; ix++) { 4684 *tmpc = *tmpa++ - mu; 4685 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4686 *tmpc++ &= MP_MASK; 4687 } 4688 } 4689 4690 /* zero excess digits */ 4691 while (ix++ < oldused) { 4692 *tmpc++ = 0; 4693 } 4694 trim_unused_digits(c); 4695 return MP_OKAY; 4696 } 4697 4698 static const int lnz[16] = { 4699 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 4700 }; 4701 4702 /* Counts the number of lsbs which are zero before the first zero bit */ 4703 static int 4704 mp_cnt_lsb(mp_int *a) 4705 { 4706 int x; 4707 mp_digit q, qq; 4708 4709 /* easy out */ 4710 if (MP_ISZERO(a) == 1) { 4711 return 0; 4712 } 4713 4714 /* scan lower digits until non-zero */ 4715 for (x = 0; x < a->used && a->dp[x] == 0; x++) { 4716 } 4717 q = a->dp[x]; 4718 x *= DIGIT_BIT; 4719 4720 /* now scan this digit until a 1 is found */ 4721 if ((q & 1) == 0) { 4722 do { 4723 qq = q & 15; 4724 /* LINTED previous op ensures range of qq */ 4725 x += lnz[qq]; 4726 q >>= 4; 4727 } while (qq == 0); 4728 } 4729 return x; 4730 } 4731 4732 /* c = a * a (mod b) */ 4733 static int 4734 square_modulo(mp_int *a, mp_int *b, mp_int *c) 4735 { 4736 int res; 4737 mp_int t; 4738 4739 if ((res = mp_init(&t)) != MP_OKAY) { 4740 return res; 4741 } 4742 4743 if ((res = square(a, &t)) != MP_OKAY) { 4744 mp_clear(&t); 4745 return res; 4746 } 4747 res = modulo(&t, b, c); 4748 mp_clear(&t); 4749 return res; 4750 } 4751 4752 static int 4753 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result) 4754 { 4755 mp_int n1, y, r; 4756 int s, j, err; 4757 4758 /* default */ 4759 *result = MP_NO; 4760 4761 /* ensure b > 1 */ 4762 if (compare_digit(b, 1) != MP_GT) { 4763 return MP_VAL; 4764 } 4765 4766 /* get n1 = a - 1 */ 4767 if ((err = mp_init_copy(&n1, a)) != MP_OKAY) { 4768 return err; 4769 } 4770 if ((err = signed_subtract_word(&n1, 1, &n1)) != MP_OKAY) { 4771 goto LBL_N1; 4772 } 4773 4774 /* set 2**s * r = n1 */ 4775 if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) { 4776 goto LBL_N1; 4777 } 4778 4779 /* count the number of least significant bits 4780 * which are zero 4781 */ 4782 s = mp_cnt_lsb(&r); 4783 4784 /* now divide n - 1 by 2**s */ 4785 if ((err = rshift_bits(&r, s, &r, NULL)) != MP_OKAY) { 4786 goto LBL_R; 4787 } 4788 4789 /* compute y = b**r mod a */ 4790 if ((err = mp_init(&y)) != MP_OKAY) { 4791 goto LBL_R; 4792 } 4793 if ((err = exponent_modulo(b, &r, a, &y)) != MP_OKAY) { 4794 goto LBL_Y; 4795 } 4796 4797 /* if y != 1 and y != n1 do */ 4798 if (compare_digit(&y, 1) != MP_EQ && signed_compare(&y, &n1) != MP_EQ) { 4799 j = 1; 4800 /* while j <= s-1 and y != n1 */ 4801 while ((j <= (s - 1)) && signed_compare(&y, &n1) != MP_EQ) { 4802 if ((err = square_modulo(&y, a, &y)) != MP_OKAY) { 4803 goto LBL_Y; 4804 } 4805 4806 /* if y == 1 then composite */ 4807 if (compare_digit(&y, 1) == MP_EQ) { 4808 goto LBL_Y; 4809 } 4810 4811 ++j; 4812 } 4813 4814 /* if y != n1 then composite */ 4815 if (signed_compare(&y, &n1) != MP_EQ) { 4816 goto LBL_Y; 4817 } 4818 } 4819 4820 /* probably prime now */ 4821 *result = MP_YES; 4822 LBL_Y: 4823 mp_clear(&y); 4824 LBL_R: 4825 mp_clear(&r); 4826 LBL_N1: 4827 mp_clear(&n1); 4828 return err; 4829 } 4830 4831 /* performs a variable number of rounds of Miller-Rabin 4832 * 4833 * Probability of error after t rounds is no more than 4834 4835 * 4836 * Sets result to 1 if probably prime, 0 otherwise 4837 */ 4838 static int 4839 mp_prime_is_prime(mp_int *a, int t, int *result) 4840 { 4841 mp_int b; 4842 int ix, err, res; 4843 4844 /* default to no */ 4845 *result = MP_NO; 4846 4847 /* valid value of t? */ 4848 if (t <= 0 || t > (int)PRIME_SIZE) { 4849 return MP_VAL; 4850 } 4851 4852 /* is the input equal to one of the primes in the table? */ 4853 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4854 if (compare_digit(a, ltm_prime_tab[ix]) == MP_EQ) { 4855 *result = 1; 4856 return MP_OKAY; 4857 } 4858 } 4859 4860 /* first perform trial division */ 4861 if ((err = mp_prime_is_divisible(a, &res)) != MP_OKAY) { 4862 return err; 4863 } 4864 4865 /* return if it was trivially divisible */ 4866 if (res == MP_YES) { 4867 return MP_OKAY; 4868 } 4869 4870 /* now perform the miller-rabin rounds */ 4871 if ((err = mp_init(&b)) != MP_OKAY) { 4872 return err; 4873 } 4874 4875 for (ix = 0; ix < t; ix++) { 4876 /* set the prime */ 4877 set_word(&b, ltm_prime_tab[ix]); 4878 4879 if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) { 4880 goto LBL_B; 4881 } 4882 4883 if (res == MP_NO) { 4884 goto LBL_B; 4885 } 4886 } 4887 4888 /* passed the test */ 4889 *result = MP_YES; 4890 LBL_B: 4891 mp_clear(&b); 4892 return err; 4893 } 4894 4895 /* returns size of ASCII reprensentation */ 4896 static int 4897 mp_radix_size(mp_int *a, int radix, int *size) 4898 { 4899 int res, digs; 4900 mp_int t; 4901 mp_digit d; 4902 4903 *size = 0; 4904 4905 /* special case for binary */ 4906 if (radix == 2) { 4907 *size = mp_count_bits(a) + (a->sign == MP_NEG ? 1 : 0) + 1; 4908 return MP_OKAY; 4909 } 4910 4911 /* make sure the radix is in range */ 4912 if (radix < 2 || radix > 64) { 4913 return MP_VAL; 4914 } 4915 4916 if (MP_ISZERO(a) == MP_YES) { 4917 *size = 2; 4918 return MP_OKAY; 4919 } 4920 4921 /* digs is the digit count */ 4922 digs = 0; 4923 4924 /* if it's negative add one for the sign */ 4925 if (a->sign == MP_NEG) { 4926 ++digs; 4927 } 4928 4929 /* init a copy of the input */ 4930 if ((res = mp_init_copy(&t, a)) != MP_OKAY) { 4931 return res; 4932 } 4933 4934 /* force temp to positive */ 4935 t.sign = MP_ZPOS; 4936 4937 /* fetch out all of the digits */ 4938 while (MP_ISZERO(&t) == MP_NO) { 4939 if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 4940 mp_clear(&t); 4941 return res; 4942 } 4943 ++digs; 4944 } 4945 mp_clear(&t); 4946 4947 /* return digs + 1, the 1 is for the NULL byte that would be required. */ 4948 *size = digs + 1; 4949 return MP_OKAY; 4950 } 4951 4952 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; 4953 4954 /* stores a bignum as a ASCII string in a given radix (2..64) 4955 * 4956 * Stores upto maxlen-1 chars and always a NULL byte 4957 */ 4958 static int 4959 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) 4960 { 4961 int res, digs; 4962 mp_int t; 4963 mp_digit d; 4964 char *_s = str; 4965 4966 /* check range of the maxlen, radix */ 4967 if (maxlen < 2 || radix < 2 || radix > 64) { 4968 return MP_VAL; 4969 } 4970 4971 /* quick out if its zero */ 4972 if (MP_ISZERO(a) == MP_YES) { 4973 *str++ = '0'; 4974 *str = '\0'; 4975 return MP_OKAY; 4976 } 4977 4978 if ((res = mp_init_copy(&t, a)) != MP_OKAY) { 4979 return res; 4980 } 4981 4982 /* if it is negative output a - */ 4983 if (t.sign == MP_NEG) { 4984 /* we have to reverse our digits later... but not the - sign!! */ 4985 ++_s; 4986 4987 /* store the flag and mark the number as positive */ 4988 *str++ = '-'; 4989 t.sign = MP_ZPOS; 4990 4991 /* subtract a char */ 4992 --maxlen; 4993 } 4994 4995 digs = 0; 4996 while (MP_ISZERO(&t) == 0) { 4997 if (--maxlen < 1) { 4998 /* no more room */ 4999 break; 5000 } 5001 if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 5002 mp_clear(&t); 5003 return res; 5004 } 5005 /* LINTED -- radix' range is checked above, limits d's range */ 5006 *str++ = mp_s_rmap[d]; 5007 ++digs; 5008 } 5009 5010 /* reverse the digits of the string. In this case _s points 5011 * to the first digit [exluding the sign] of the number 5012 */ 5013 bn_reverse((unsigned char *)_s, digs); 5014 5015 /* append a NULL so the string is properly terminated */ 5016 *str = '\0'; 5017 5018 mp_clear(&t); 5019 return MP_OKAY; 5020 } 5021 5022 static char * 5023 formatbn(const BIGNUM *a, const int radix) 5024 { 5025 char *s; 5026 int len; 5027 5028 if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) { 5029 return NULL; 5030 } 5031 if ((s = allocate(1, (size_t)len)) != NULL) { 5032 if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) { 5033 deallocate(s, (size_t)len); 5034 return NULL; 5035 } 5036 } 5037 return s; 5038 } 5039 5040 static int 5041 mp_getradix_num(mp_int *a, int radix, char *s) 5042 { 5043 int err, ch, neg, y; 5044 5045 /* clear a */ 5046 mp_zero(a); 5047 5048 /* if first digit is - then set negative */ 5049 if ((ch = *s++) == '-') { 5050 neg = MP_NEG; 5051 ch = *s++; 5052 } else { 5053 neg = MP_ZPOS; 5054 } 5055 5056 for (;;) { 5057 /* find y in the radix map */ 5058 for (y = 0; y < radix; y++) { 5059 if (mp_s_rmap[y] == ch) { 5060 break; 5061 } 5062 } 5063 if (y == radix) { 5064 break; 5065 } 5066 5067 /* shift up and add */ 5068 if ((err = multiply_digit(a, radix, a)) != MP_OKAY) { 5069 return err; 5070 } 5071 if ((err = add_single_digit(a, y, a)) != MP_OKAY) { 5072 return err; 5073 } 5074 5075 ch = *s++; 5076 } 5077 if (compare_digit(a, 0) != MP_EQ) { 5078 a->sign = neg; 5079 } 5080 5081 return MP_OKAY; 5082 } 5083 5084 static int 5085 getbn(BIGNUM **a, const char *str, int radix) 5086 { 5087 int len; 5088 5089 if (a == NULL || str == NULL || (*a = BN_new()) == NULL) { 5090 return 0; 5091 } 5092 if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) { 5093 return 0; 5094 } 5095 mp_radix_size(__UNCONST(*a), radix, &len); 5096 return len - 1; 5097 } 5098 5099 /* d = a - b (mod c) */ 5100 static int 5101 subtract_modulo(mp_int *a, mp_int *b, mp_int *c, mp_int *d) 5102 { 5103 int res; 5104 mp_int t; 5105 5106 5107 if ((res = mp_init(&t)) != MP_OKAY) { 5108 return res; 5109 } 5110 5111 if ((res = signed_subtract(a, b, &t)) != MP_OKAY) { 5112 mp_clear(&t); 5113 return res; 5114 } 5115 res = modulo(&t, c, d); 5116 mp_clear(&t); 5117 return res; 5118 } 5119 5120 /* bn_mp_gcd.c */ 5121 /* Greatest Common Divisor using the binary method */ 5122 static int 5123 mp_gcd(mp_int *a, mp_int *b, mp_int *c) 5124 { 5125 mp_int u, v; 5126 int k, u_lsb, v_lsb, res; 5127 5128 /* either zero than gcd is the largest */ 5129 if (BN_is_zero(a) == MP_YES) { 5130 return absolute(b, c); 5131 } 5132 if (BN_is_zero(b) == MP_YES) { 5133 return absolute(a, c); 5134 } 5135 5136 /* get copies of a and b we can modify */ 5137 if ((res = mp_init_copy(&u, a)) != MP_OKAY) { 5138 return res; 5139 } 5140 5141 if ((res = mp_init_copy(&v, b)) != MP_OKAY) { 5142 goto LBL_U; 5143 } 5144 5145 /* must be positive for the remainder of the algorithm */ 5146 u.sign = v.sign = MP_ZPOS; 5147 5148 /* B1. Find the common power of two for u and v */ 5149 u_lsb = mp_cnt_lsb(&u); 5150 v_lsb = mp_cnt_lsb(&v); 5151 k = MIN(u_lsb, v_lsb); 5152 5153 if (k > 0) { 5154 /* divide the power of two out */ 5155 if ((res = rshift_bits(&u, k, &u, NULL)) != MP_OKAY) { 5156 goto LBL_V; 5157 } 5158 5159 if ((res = rshift_bits(&v, k, &v, NULL)) != MP_OKAY) { 5160 goto LBL_V; 5161 } 5162 } 5163 5164 /* divide any remaining factors of two out */ 5165 if (u_lsb != k) { 5166 if ((res = rshift_bits(&u, u_lsb - k, &u, NULL)) != MP_OKAY) { 5167 goto LBL_V; 5168 } 5169 } 5170 5171 if (v_lsb != k) { 5172 if ((res = rshift_bits(&v, v_lsb - k, &v, NULL)) != MP_OKAY) { 5173 goto LBL_V; 5174 } 5175 } 5176 5177 while (BN_is_zero(&v) == 0) { 5178 /* make sure v is the largest */ 5179 if (compare_magnitude(&u, &v) == MP_GT) { 5180 /* swap u and v to make sure v is >= u */ 5181 mp_exch(&u, &v); 5182 } 5183 5184 /* subtract smallest from largest */ 5185 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 5186 goto LBL_V; 5187 } 5188 5189 /* Divide out all factors of two */ 5190 if ((res = rshift_bits(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) { 5191 goto LBL_V; 5192 } 5193 } 5194 5195 /* multiply by 2**k which we divided out at the beginning */ 5196 if ((res = lshift_bits(&u, k, c)) != MP_OKAY) { 5197 goto LBL_V; 5198 } 5199 c->sign = MP_ZPOS; 5200 res = MP_OKAY; 5201 LBL_V: 5202 mp_clear (&u); 5203 LBL_U: 5204 mp_clear (&v); 5205 return res; 5206 } 5207 5208 /**************************************************************************/ 5209 5210 /* BIGNUM emulation layer */ 5211 5212 /* essentiually, these are just wrappers around the libtommath functions */ 5213 /* usually the order of args changes */ 5214 /* the BIGNUM API tends to have more const poisoning */ 5215 /* these wrappers also check the arguments passed for sanity */ 5216 5217 BIGNUM * 5218 BN_bin2bn(const uint8_t *data, int len, BIGNUM *ret) 5219 { 5220 if (data == NULL) { 5221 return BN_new(); 5222 } 5223 if (ret == NULL) { 5224 ret = BN_new(); 5225 } 5226 return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL; 5227 } 5228 5229 /* store in unsigned [big endian] format */ 5230 int 5231 BN_bn2bin(const BIGNUM *a, unsigned char *b) 5232 { 5233 BIGNUM t; 5234 int x; 5235 5236 if (a == NULL || b == NULL) { 5237 return -1; 5238 } 5239 if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) { 5240 return -1; 5241 } 5242 for (x = 0; !BN_is_zero(&t) ; ) { 5243 b[x++] = (unsigned char) (t.dp[0] & 0xff); 5244 if (rshift_bits(&t, 8, &t, NULL) != MP_OKAY) { 5245 mp_clear(&t); 5246 return -1; 5247 } 5248 } 5249 bn_reverse(b, x); 5250 mp_clear(&t); 5251 return x; 5252 } 5253 5254 void 5255 BN_init(BIGNUM *a) 5256 { 5257 if (a != NULL) { 5258 mp_init(a); 5259 } 5260 } 5261 5262 BIGNUM * 5263 BN_new(void) 5264 { 5265 BIGNUM *a; 5266 5267 if ((a = allocate(1, sizeof(*a))) != NULL) { 5268 mp_init(a); 5269 } 5270 return a; 5271 } 5272 5273 /* copy, b = a */ 5274 int 5275 BN_copy(BIGNUM *b, const BIGNUM *a) 5276 { 5277 if (a == NULL || b == NULL) { 5278 return MP_VAL; 5279 } 5280 return mp_copy(__UNCONST(a), b); 5281 } 5282 5283 BIGNUM * 5284 BN_dup(const BIGNUM *a) 5285 { 5286 BIGNUM *ret; 5287 5288 if (a == NULL) { 5289 return NULL; 5290 } 5291 if ((ret = BN_new()) != NULL) { 5292 BN_copy(ret, a); 5293 } 5294 return ret; 5295 } 5296 5297 void 5298 BN_swap(BIGNUM *a, BIGNUM *b) 5299 { 5300 if (a && b) { 5301 mp_exch(a, b); 5302 } 5303 } 5304 5305 int 5306 BN_lshift(BIGNUM *r, const BIGNUM *a, int n) 5307 { 5308 if (r == NULL || a == NULL || n < 0) { 5309 return 0; 5310 } 5311 BN_copy(r, a); 5312 return lshift_digits(r, n) == MP_OKAY; 5313 } 5314 5315 int 5316 BN_lshift1(BIGNUM *r, BIGNUM *a) 5317 { 5318 if (r == NULL || a == NULL) { 5319 return 0; 5320 } 5321 BN_copy(r, a); 5322 return lshift_digits(r, 1) == MP_OKAY; 5323 } 5324 5325 int 5326 BN_rshift(BIGNUM *r, const BIGNUM *a, int n) 5327 { 5328 if (r == NULL || a == NULL || n < 0) { 5329 return MP_VAL; 5330 } 5331 BN_copy(r, a); 5332 return rshift_digits(r, n) == MP_OKAY; 5333 } 5334 5335 int 5336 BN_rshift1(BIGNUM *r, BIGNUM *a) 5337 { 5338 if (r == NULL || a == NULL) { 5339 return 0; 5340 } 5341 BN_copy(r, a); 5342 return rshift_digits(r, 1) == MP_OKAY; 5343 } 5344 5345 int 5346 BN_set_word(BIGNUM *a, BN_ULONG w) 5347 { 5348 if (a == NULL) { 5349 return 0; 5350 } 5351 set_word(a, w); 5352 return 1; 5353 } 5354 5355 int 5356 BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 5357 { 5358 if (a == NULL || b == NULL || r == NULL) { 5359 return 0; 5360 } 5361 return signed_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5362 } 5363 5364 int 5365 BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 5366 { 5367 if (a == NULL || b == NULL || r == NULL) { 5368 return 0; 5369 } 5370 return signed_subtract(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5371 } 5372 5373 int 5374 BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 5375 { 5376 if (a == NULL || b == NULL || r == NULL) { 5377 return 0; 5378 } 5379 USE_ARG(ctx); 5380 return signed_multiply(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5381 } 5382 5383 int 5384 BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *a, const BIGNUM *d, BN_CTX *ctx) 5385 { 5386 if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) { 5387 return 0; 5388 } 5389 USE_ARG(ctx); 5390 return signed_divide(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY; 5391 } 5392 5393 /* perform a bit operation on the 2 bignums */ 5394 int 5395 BN_bitop(BIGNUM *r, const BIGNUM *a, char op, const BIGNUM *b) 5396 { 5397 unsigned ndigits; 5398 mp_digit ad; 5399 mp_digit bd; 5400 int i; 5401 5402 if (a == NULL || b == NULL || r == NULL) { 5403 return 0; 5404 } 5405 if (BN_cmp(__UNCONST(a), __UNCONST(b)) >= 0) { 5406 BN_copy(r, a); 5407 ndigits = a->used; 5408 } else { 5409 BN_copy(r, b); 5410 ndigits = b->used; 5411 } 5412 for (i = 0 ; i < (int)ndigits ; i++) { 5413 ad = (i > a->used) ? 0 : a->dp[i]; 5414 bd = (i > b->used) ? 0 : b->dp[i]; 5415 switch(op) { 5416 case '&': 5417 r->dp[i] = (ad & bd); 5418 break; 5419 case '|': 5420 r->dp[i] = (ad | bd); 5421 break; 5422 case '^': 5423 r->dp[i] = (ad ^ bd); 5424 break; 5425 default: 5426 break; 5427 } 5428 } 5429 return 1; 5430 } 5431 5432 void 5433 BN_free(BIGNUM *a) 5434 { 5435 if (a) { 5436 mp_clear(a); 5437 free(a); 5438 } 5439 } 5440 5441 void 5442 BN_clear(BIGNUM *a) 5443 { 5444 if (a) { 5445 mp_clear(a); 5446 } 5447 } 5448 5449 void 5450 BN_clear_free(BIGNUM *a) 5451 { 5452 BN_clear(a); 5453 free(a); 5454 } 5455 5456 int 5457 BN_num_bytes(const BIGNUM *a) 5458 { 5459 if (a == NULL) { 5460 return MP_VAL; 5461 } 5462 return mp_unsigned_bin_size(__UNCONST(a)); 5463 } 5464 5465 int 5466 BN_num_bits(const BIGNUM *a) 5467 { 5468 if (a == NULL) { 5469 return 0; 5470 } 5471 return mp_count_bits(a); 5472 } 5473 5474 void 5475 BN_set_negative(BIGNUM *a, int n) 5476 { 5477 if (a) { 5478 a->sign = (n) ? MP_NEG : 0; 5479 } 5480 } 5481 5482 int 5483 BN_cmp(BIGNUM *a, BIGNUM *b) 5484 { 5485 if (a == NULL || b == NULL) { 5486 return MP_VAL; 5487 } 5488 switch(signed_compare(a, b)) { 5489 case MP_LT: 5490 return -1; 5491 case MP_GT: 5492 return 1; 5493 case MP_EQ: 5494 default: 5495 return 0; 5496 } 5497 } 5498 5499 int 5500 BN_mod_exp(BIGNUM *Y, BIGNUM *G, BIGNUM *X, BIGNUM *P, BN_CTX *ctx) 5501 { 5502 if (Y == NULL || G == NULL || X == NULL || P == NULL) { 5503 return MP_VAL; 5504 } 5505 USE_ARG(ctx); 5506 return exponent_modulo(G, X, P, Y) == MP_OKAY; 5507 } 5508 5509 BIGNUM * 5510 BN_mod_inverse(BIGNUM *r, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) 5511 { 5512 USE_ARG(ctx); 5513 if (r == NULL || a == NULL || n == NULL) { 5514 return NULL; 5515 } 5516 return (modular_inverse(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL; 5517 } 5518 5519 int 5520 BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 5521 { 5522 USE_ARG(ctx); 5523 if (ret == NULL || a == NULL || b == NULL || m == NULL) { 5524 return 0; 5525 } 5526 return multiply_modulo(ret, a, b, __UNCONST(m)) == MP_OKAY; 5527 } 5528 5529 BN_CTX * 5530 BN_CTX_new(void) 5531 { 5532 return allocate(1, sizeof(BN_CTX)); 5533 } 5534 5535 void 5536 BN_CTX_init(BN_CTX *c) 5537 { 5538 if (c != NULL) { 5539 c->arraysize = 15; 5540 if ((c->v = allocate(sizeof(*c->v), c->arraysize)) == NULL) { 5541 c->arraysize = 0; 5542 } 5543 } 5544 } 5545 5546 BIGNUM * 5547 BN_CTX_get(BN_CTX *ctx) 5548 { 5549 if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) { 5550 return NULL; 5551 } 5552 return ctx->v[ctx->count++] = BN_new(); 5553 } 5554 5555 void 5556 BN_CTX_start(BN_CTX *ctx) 5557 { 5558 BN_CTX_init(ctx); 5559 } 5560 5561 void 5562 BN_CTX_free(BN_CTX *c) 5563 { 5564 unsigned i; 5565 5566 if (c != NULL && c->v != NULL) { 5567 for (i = 0 ; i < c->count ; i++) { 5568 BN_clear_free(c->v[i]); 5569 } 5570 deallocate(c->v, sizeof(*c->v) * c->arraysize); 5571 } 5572 } 5573 5574 void 5575 BN_CTX_end(BN_CTX *ctx) 5576 { 5577 BN_CTX_free(ctx); 5578 } 5579 5580 char * 5581 BN_bn2hex(const BIGNUM *a) 5582 { 5583 return (a == NULL) ? NULL : formatbn(a, 16); 5584 } 5585 5586 char * 5587 BN_bn2dec(const BIGNUM *a) 5588 { 5589 return (a == NULL) ? NULL : formatbn(a, 10); 5590 } 5591 5592 char * 5593 BN_bn2radix(const BIGNUM *a, unsigned radix) 5594 { 5595 return (a == NULL) ? NULL : formatbn(a, (int)radix); 5596 } 5597 5598 int 5599 BN_print_fp(FILE *fp, const BIGNUM *a) 5600 { 5601 char *s; 5602 int ret; 5603 5604 if (fp == NULL || a == NULL) { 5605 return 0; 5606 } 5607 s = BN_bn2hex(a); 5608 ret = fprintf(fp, "%s", s); 5609 deallocate(s, strlen(s) + 1); 5610 return ret; 5611 } 5612 5613 #ifdef BN_RAND_NEEDED 5614 int 5615 BN_rand(BIGNUM *rnd, int bits, int top, int bottom) 5616 { 5617 uint64_t r; 5618 int digits; 5619 int i; 5620 5621 if (rnd == NULL) { 5622 return 0; 5623 } 5624 mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT)); 5625 for (i = 0 ; i < digits ; i++) { 5626 r = (uint64_t)arc4random(); 5627 r <<= 32; 5628 r |= arc4random(); 5629 rnd->dp[i] = (r & MP_MASK); 5630 rnd->used += 1; 5631 } 5632 if (top == 0) { 5633 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5634 } 5635 if (top == 1) { 5636 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5637 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1))); 5638 } 5639 if (bottom) { 5640 rnd->dp[0] |= 0x1; 5641 } 5642 return 1; 5643 } 5644 5645 int 5646 BN_rand_range(BIGNUM *rnd, BIGNUM *range) 5647 { 5648 if (rnd == NULL || range == NULL || BN_is_zero(range)) { 5649 return 0; 5650 } 5651 BN_rand(rnd, BN_num_bits(range), 1, 0); 5652 return modulo(rnd, range, rnd) == MP_OKAY; 5653 } 5654 #endif 5655 5656 int 5657 BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int, int, void *), BN_CTX *ctx, void *cb_arg) 5658 { 5659 int primality; 5660 5661 if (a == NULL) { 5662 return 0; 5663 } 5664 USE_ARG(ctx); 5665 USE_ARG(cb_arg); 5666 USE_ARG(callback); 5667 return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0; 5668 } 5669 5670 const BIGNUM * 5671 BN_value_one(void) 5672 { 5673 static mp_digit digit = 1UL; 5674 static const BIGNUM one = { &digit, 1, 1, 0 }; 5675 5676 return &one; 5677 } 5678 5679 int 5680 BN_hex2bn(BIGNUM **a, const char *str) 5681 { 5682 return getbn(a, str, 16); 5683 } 5684 5685 int 5686 BN_dec2bn(BIGNUM **a, const char *str) 5687 { 5688 return getbn(a, str, 10); 5689 } 5690 5691 int 5692 BN_radix2bn(BIGNUM **a, const char *str, unsigned radix) 5693 { 5694 return getbn(a, str, (int)radix); 5695 } 5696 5697 int 5698 BN_mod_sub(BIGNUM *r, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 5699 { 5700 USE_ARG(ctx); 5701 if (r == NULL || a == NULL || b == NULL || m == NULL) { 5702 return 0; 5703 } 5704 return subtract_modulo(a, b, __UNCONST(m), r) == MP_OKAY; 5705 } 5706 5707 int 5708 BN_is_bit_set(const BIGNUM *a, int n) 5709 { 5710 if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) { 5711 return 0; 5712 } 5713 return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0; 5714 } 5715 5716 /* raise 'a' to power of 'b' */ 5717 int 5718 BN_raise(BIGNUM *res, BIGNUM *a, BIGNUM *b) 5719 { 5720 uint64_t exponent; 5721 BIGNUM *power; 5722 BIGNUM *temp; 5723 char *t; 5724 5725 t = BN_bn2dec(b); 5726 exponent = (uint64_t)strtoull(t, NULL, 10); 5727 free(t); 5728 if (exponent == 0) { 5729 BN_copy(res, BN_value_one()); 5730 } else { 5731 power = BN_dup(a); 5732 for ( ; (exponent & 1) == 0 ; exponent >>= 1) { 5733 BN_mul(power, power, power, NULL); 5734 } 5735 temp = BN_dup(power); 5736 for (exponent >>= 1 ; exponent > 0 ; exponent >>= 1) { 5737 BN_mul(power, power, power, NULL); 5738 if (exponent & 1) { 5739 BN_mul(temp, power, temp, NULL); 5740 } 5741 } 5742 BN_copy(res, temp); 5743 BN_free(power); 5744 BN_free(temp); 5745 } 5746 return 1; 5747 } 5748 5749 /* compute the factorial */ 5750 int 5751 BN_factorial(BIGNUM *res, BIGNUM *f) 5752 { 5753 BIGNUM *one; 5754 BIGNUM *i; 5755 5756 i = BN_dup(f); 5757 one = __UNCONST(BN_value_one()); 5758 BN_sub(i, i, one); 5759 BN_copy(res, f); 5760 while (BN_cmp(i, one) > 0) { 5761 BN_mul(res, res, i, NULL); 5762 BN_sub(i, i, one); 5763 } 5764 BN_free(i); 5765 return 1; 5766 } 5767 5768 /* get greatest common divisor */ 5769 int 5770 BN_gcd(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx) 5771 { 5772 return mp_gcd(a, b, r); 5773 } 5774