1 /*- 2 * Copyright (c) 2012 Alistair Crooks <agc@NetBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 /* LibTomMath, multiple-precision integer library -- Tom St Denis 26 * 27 * LibTomMath is a library that provides multiple-precision 28 * integer arithmetic as well as number theoretic functionality. 29 * 30 * The library was designed directly after the MPI library by 31 * Michael Fromberger but has been written from scratch with 32 * additional optimizations in place. 33 * 34 * The library is free for all purposes without any express 35 * guarantee it works. 36 * 37 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 38 */ 39 #include "config.h" 40 41 #include <sys/types.h> 42 #include <sys/param.h> 43 44 #ifdef _KERNEL 45 # include <sys/kmem.h> 46 #else 47 # include <arpa/inet.h> 48 # include <limits.h> 49 # include <stdarg.h> 50 # include <stdio.h> 51 # include <stdlib.h> 52 # include <string.h> 53 # include <unistd.h> 54 #endif 55 56 #include "bn.h" 57 58 /**************************************************************************/ 59 60 /* LibTomMath, multiple-precision integer library -- Tom St Denis 61 * 62 * LibTomMath is a library that provides multiple-precision 63 * integer arithmetic as well as number theoretic functionality. 64 * 65 * The library was designed directly after the MPI library by 66 * Michael Fromberger but has been written from scratch with 67 * additional optimizations in place. 68 * 69 * The library is free for all purposes without any express 70 * guarantee it works. 71 * 72 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 73 */ 74 75 #define MP_PREC 32 76 #define DIGIT_BIT 28 77 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1)) 78 79 #define MP_WARRAY /*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1))) 80 81 #define MP_NO 0 82 #define MP_YES 1 83 84 #ifndef USE_ARG 85 #define USE_ARG(x) /*LINTED*/(void)&(x) 86 #endif 87 88 #ifndef __arraycount 89 #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0])) 90 #endif 91 92 #ifndef MIN 93 #define MIN(a,b) (((a)<(b))?(a):(b)) 94 #endif 95 96 #define MP_ISZERO(a) (((a)->used == 0) ? MP_YES : MP_NO) 97 98 typedef int mp_err; 99 100 static int signed_multiply(mp_int * a, mp_int * b, mp_int * c); 101 static int square(mp_int * a, mp_int * b); 102 103 static int signed_subtract_word(mp_int *a, mp_digit b, mp_int *c); 104 105 static inline void * 106 allocate(size_t n, size_t m) 107 { 108 return calloc(n, m); 109 } 110 111 static inline void 112 deallocate(void *v, size_t sz) 113 { 114 USE_ARG(sz); 115 free(v); 116 } 117 118 /* set to zero */ 119 static inline void 120 mp_zero(mp_int *a) 121 { 122 a->sign = MP_ZPOS; 123 a->used = 0; 124 memset(a->dp, 0x0, a->alloc * sizeof(*a->dp)); 125 } 126 127 /* grow as required */ 128 static int 129 mp_grow(mp_int *a, int size) 130 { 131 mp_digit *tmp; 132 133 /* if the alloc size is smaller alloc more ram */ 134 if (a->alloc < size) { 135 /* ensure there are always at least MP_PREC digits extra on top */ 136 size += (MP_PREC * 2) - (size % MP_PREC); 137 138 /* reallocate the array a->dp 139 * 140 * We store the return in a temporary variable 141 * in case the operation failed we don't want 142 * to overwrite the dp member of a. 143 */ 144 tmp = realloc(a->dp, sizeof(*tmp) * size); 145 if (tmp == NULL) { 146 /* reallocation failed but "a" is still valid [can be freed] */ 147 return MP_MEM; 148 } 149 150 /* reallocation succeeded so set a->dp */ 151 a->dp = tmp; 152 /* zero excess digits */ 153 memset(&a->dp[a->alloc], 0x0, (size - a->alloc) * sizeof(*a->dp)); 154 a->alloc = size; 155 } 156 return MP_OKAY; 157 } 158 159 /* shift left a certain amount of digits */ 160 static int 161 lshift_digits(mp_int * a, int b) 162 { 163 mp_digit *top, *bottom; 164 int x, res; 165 166 /* if its less than zero return */ 167 if (b <= 0) { 168 return MP_OKAY; 169 } 170 171 /* grow to fit the new digits */ 172 if (a->alloc < a->used + b) { 173 if ((res = mp_grow(a, a->used + b)) != MP_OKAY) { 174 return res; 175 } 176 } 177 178 /* increment the used by the shift amount then copy upwards */ 179 a->used += b; 180 181 /* top */ 182 top = a->dp + a->used - 1; 183 184 /* base */ 185 bottom = a->dp + a->used - 1 - b; 186 187 /* much like rshift_digits this is implemented using a sliding window 188 * except the window goes the otherway around. Copying from 189 * the bottom to the top. 190 */ 191 for (x = a->used - 1; x >= b; x--) { 192 *top-- = *bottom--; 193 } 194 195 /* zero the lower digits */ 196 memset(a->dp, 0x0, b * sizeof(*a->dp)); 197 return MP_OKAY; 198 } 199 200 /* trim unused digits 201 * 202 * This is used to ensure that leading zero digits are 203 * trimed and the leading "used" digit will be non-zero 204 * Typically very fast. Also fixes the sign if there 205 * are no more leading digits 206 */ 207 static void 208 trim_unused_digits(mp_int * a) 209 { 210 /* decrease used while the most significant digit is 211 * zero. 212 */ 213 while (a->used > 0 && a->dp[a->used - 1] == 0) { 214 a->used -= 1; 215 } 216 /* reset the sign flag if used == 0 */ 217 if (a->used == 0) { 218 a->sign = MP_ZPOS; 219 } 220 } 221 222 /* copy, b = a */ 223 static int 224 mp_copy(PGPV_BIGNUM *a, PGPV_BIGNUM *b) 225 { 226 int res; 227 228 /* if dst == src do nothing */ 229 if (a == b) { 230 return MP_OKAY; 231 } 232 if (a == NULL || b == NULL) { 233 return MP_VAL; 234 } 235 236 /* grow dest */ 237 if (b->alloc < a->used) { 238 if ((res = mp_grow(b, a->used)) != MP_OKAY) { 239 return res; 240 } 241 } 242 243 memcpy(b->dp, a->dp, a->used * sizeof(*b->dp)); 244 if (b->used > a->used) { 245 memset(&b->dp[a->used], 0x0, (b->used - a->used) * sizeof(*b->dp)); 246 } 247 248 /* copy used count and sign */ 249 b->used = a->used; 250 b->sign = a->sign; 251 return MP_OKAY; 252 } 253 254 /* shift left by a certain bit count */ 255 static int 256 lshift_bits(mp_int *a, int b, mp_int *c) 257 { 258 mp_digit d; 259 int res; 260 261 /* copy */ 262 if (a != c) { 263 if ((res = mp_copy(a, c)) != MP_OKAY) { 264 return res; 265 } 266 } 267 268 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { 269 if ((res = mp_grow(c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { 270 return res; 271 } 272 } 273 274 /* shift by as many digits in the bit count */ 275 if (b >= (int)DIGIT_BIT) { 276 if ((res = lshift_digits(c, b / DIGIT_BIT)) != MP_OKAY) { 277 return res; 278 } 279 } 280 281 /* shift any bit count < DIGIT_BIT */ 282 d = (mp_digit) (b % DIGIT_BIT); 283 if (d != 0) { 284 mp_digit *tmpc, shift, mask, carry, rr; 285 int x; 286 287 /* bitmask for carries */ 288 mask = (((mp_digit)1) << d) - 1; 289 290 /* shift for msbs */ 291 shift = DIGIT_BIT - d; 292 293 /* alias */ 294 tmpc = c->dp; 295 296 /* carry */ 297 carry = 0; 298 for (x = 0; x < c->used; x++) { 299 /* get the higher bits of the current word */ 300 rr = (*tmpc >> shift) & mask; 301 302 /* shift the current word and OR in the carry */ 303 *tmpc = ((*tmpc << d) | carry) & MP_MASK; 304 ++tmpc; 305 306 /* set the carry to the carry bits of the current word */ 307 carry = rr; 308 } 309 310 /* set final carry */ 311 if (carry != 0) { 312 c->dp[c->used++] = carry; 313 } 314 } 315 trim_unused_digits(c); 316 return MP_OKAY; 317 } 318 319 /* reads a unsigned char array, assumes the msb is stored first [big endian] */ 320 static int 321 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c) 322 { 323 int res; 324 325 /* make sure there are at least two digits */ 326 if (a->alloc < 2) { 327 if ((res = mp_grow(a, 2)) != MP_OKAY) { 328 return res; 329 } 330 } 331 332 /* zero the int */ 333 mp_zero(a); 334 335 /* read the bytes in */ 336 while (c-- > 0) { 337 if ((res = lshift_bits(a, 8, a)) != MP_OKAY) { 338 return res; 339 } 340 341 a->dp[0] |= *b++; 342 a->used += 1; 343 } 344 trim_unused_digits(a); 345 return MP_OKAY; 346 } 347 348 /* returns the number of bits in an mpi */ 349 static int 350 mp_count_bits(const mp_int *a) 351 { 352 int r; 353 mp_digit q; 354 355 /* shortcut */ 356 if (a->used == 0) { 357 return 0; 358 } 359 360 /* get number of digits and add that */ 361 r = (a->used - 1) * DIGIT_BIT; 362 363 /* take the last digit and count the bits in it */ 364 for (q = a->dp[a->used - 1]; q > ((mp_digit) 0) ; r++) { 365 q >>= ((mp_digit) 1); 366 } 367 return r; 368 } 369 370 /* compare maginitude of two ints (unsigned) */ 371 static int 372 compare_magnitude(mp_int * a, mp_int * b) 373 { 374 int n; 375 mp_digit *tmpa, *tmpb; 376 377 /* compare based on # of non-zero digits */ 378 if (a->used > b->used) { 379 return MP_GT; 380 } 381 382 if (a->used < b->used) { 383 return MP_LT; 384 } 385 386 /* alias for a */ 387 tmpa = a->dp + (a->used - 1); 388 389 /* alias for b */ 390 tmpb = b->dp + (a->used - 1); 391 392 /* compare based on digits */ 393 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { 394 if (*tmpa > *tmpb) { 395 return MP_GT; 396 } 397 398 if (*tmpa < *tmpb) { 399 return MP_LT; 400 } 401 } 402 return MP_EQ; 403 } 404 405 /* compare two ints (signed)*/ 406 static int 407 signed_compare(mp_int * a, mp_int * b) 408 { 409 /* compare based on sign */ 410 if (a->sign != b->sign) { 411 return (a->sign == MP_NEG) ? MP_LT : MP_GT; 412 } 413 return (a->sign == MP_NEG) ? compare_magnitude(b, a) : compare_magnitude(a, b); 414 } 415 416 /* get the size for an unsigned equivalent */ 417 static int 418 mp_unsigned_bin_size(mp_int * a) 419 { 420 int size = mp_count_bits(a); 421 422 return (size / 8 + ((size & 7) != 0 ? 1 : 0)); 423 } 424 425 /* init a new mp_int */ 426 static int 427 mp_init(mp_int * a) 428 { 429 /* allocate memory required and clear it */ 430 a->dp = allocate(1, sizeof(*a->dp) * MP_PREC); 431 if (a->dp == NULL) { 432 return MP_MEM; 433 } 434 435 /* set the digits to zero */ 436 memset(a->dp, 0x0, MP_PREC * sizeof(*a->dp)); 437 438 /* set the used to zero, allocated digits to the default precision 439 * and sign to positive */ 440 a->used = 0; 441 a->alloc = MP_PREC; 442 a->sign = MP_ZPOS; 443 444 return MP_OKAY; 445 } 446 447 /* clear one (frees) */ 448 static void 449 mp_clear(mp_int * a) 450 { 451 /* only do anything if a hasn't been freed previously */ 452 if (a->dp != NULL) { 453 memset(a->dp, 0x0, a->used * sizeof(*a->dp)); 454 455 /* free ram */ 456 deallocate(a->dp, (size_t)a->alloc); 457 458 /* reset members to make debugging easier */ 459 a->dp = NULL; 460 a->alloc = a->used = 0; 461 a->sign = MP_ZPOS; 462 } 463 } 464 465 static int 466 mp_init_multi(mp_int *mp, ...) 467 { 468 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ 469 int n = 0; /* Number of ok inits */ 470 mp_int* cur_arg = mp; 471 va_list args; 472 473 va_start(args, mp); /* init args to next argument from caller */ 474 while (cur_arg != NULL) { 475 if (mp_init(cur_arg) != MP_OKAY) { 476 /* Oops - error! Back-track and mp_clear what we already 477 succeeded in init-ing, then return error. 478 */ 479 va_list clean_args; 480 481 /* end the current list */ 482 va_end(args); 483 484 /* now start cleaning up */ 485 cur_arg = mp; 486 va_start(clean_args, mp); 487 while (n--) { 488 mp_clear(cur_arg); 489 cur_arg = va_arg(clean_args, mp_int*); 490 } 491 va_end(clean_args); 492 res = MP_MEM; 493 break; 494 } 495 n++; 496 cur_arg = va_arg(args, mp_int*); 497 } 498 va_end(args); 499 return res; /* Assumed ok, if error flagged above. */ 500 } 501 502 /* init an mp_init for a given size */ 503 static int 504 mp_init_size(mp_int * a, int size) 505 { 506 /* pad size so there are always extra digits */ 507 size += (MP_PREC * 2) - (size % MP_PREC); 508 509 /* alloc mem */ 510 a->dp = allocate(1, sizeof(*a->dp) * size); 511 if (a->dp == NULL) { 512 return MP_MEM; 513 } 514 515 /* set the members */ 516 a->used = 0; 517 a->alloc = size; 518 a->sign = MP_ZPOS; 519 520 /* zero the digits */ 521 memset(a->dp, 0x0, size * sizeof(*a->dp)); 522 return MP_OKAY; 523 } 524 525 /* creates "a" then copies b into it */ 526 static int 527 mp_init_copy(mp_int * a, mp_int * b) 528 { 529 int res; 530 531 if ((res = mp_init(a)) != MP_OKAY) { 532 return res; 533 } 534 return mp_copy(b, a); 535 } 536 537 /* low level addition, based on HAC pp.594, Algorithm 14.7 */ 538 static int 539 basic_add(mp_int * a, mp_int * b, mp_int * c) 540 { 541 mp_int *x; 542 int olduse, res, min, max; 543 544 /* find sizes, we let |a| <= |b| which means we have to sort 545 * them. "x" will point to the input with the most digits 546 */ 547 if (a->used > b->used) { 548 min = b->used; 549 max = a->used; 550 x = a; 551 } else { 552 min = a->used; 553 max = b->used; 554 x = b; 555 } 556 557 /* init result */ 558 if (c->alloc < max + 1) { 559 if ((res = mp_grow(c, max + 1)) != MP_OKAY) { 560 return res; 561 } 562 } 563 564 /* get old used digit count and set new one */ 565 olduse = c->used; 566 c->used = max + 1; 567 568 { 569 mp_digit carry, *tmpa, *tmpb, *tmpc; 570 int i; 571 572 /* alias for digit pointers */ 573 574 /* first input */ 575 tmpa = a->dp; 576 577 /* second input */ 578 tmpb = b->dp; 579 580 /* destination */ 581 tmpc = c->dp; 582 583 /* zero the carry */ 584 carry = 0; 585 for (i = 0; i < min; i++) { 586 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ 587 *tmpc = *tmpa++ + *tmpb++ + carry; 588 589 /* U = carry bit of T[i] */ 590 carry = *tmpc >> ((mp_digit)DIGIT_BIT); 591 592 /* take away carry bit from T[i] */ 593 *tmpc++ &= MP_MASK; 594 } 595 596 /* now copy higher words if any, that is in A+B 597 * if A or B has more digits add those in 598 */ 599 if (min != max) { 600 for (; i < max; i++) { 601 /* T[i] = X[i] + U */ 602 *tmpc = x->dp[i] + carry; 603 604 /* U = carry bit of T[i] */ 605 carry = *tmpc >> ((mp_digit)DIGIT_BIT); 606 607 /* take away carry bit from T[i] */ 608 *tmpc++ &= MP_MASK; 609 } 610 } 611 612 /* add carry */ 613 *tmpc++ = carry; 614 615 /* clear digits above oldused */ 616 if (olduse > c->used) { 617 memset(tmpc, 0x0, (olduse - c->used) * sizeof(*c->dp)); 618 } 619 } 620 621 trim_unused_digits(c); 622 return MP_OKAY; 623 } 624 625 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ 626 static int 627 basic_subtract(mp_int * a, mp_int * b, mp_int * c) 628 { 629 int olduse, res, min, max; 630 631 /* find sizes */ 632 min = b->used; 633 max = a->used; 634 635 /* init result */ 636 if (c->alloc < max) { 637 if ((res = mp_grow(c, max)) != MP_OKAY) { 638 return res; 639 } 640 } 641 olduse = c->used; 642 c->used = max; 643 644 { 645 mp_digit carry, *tmpa, *tmpb, *tmpc; 646 int i; 647 648 /* alias for digit pointers */ 649 tmpa = a->dp; 650 tmpb = b->dp; 651 tmpc = c->dp; 652 653 /* set carry to zero */ 654 carry = 0; 655 for (i = 0; i < min; i++) { 656 /* T[i] = A[i] - B[i] - U */ 657 *tmpc = *tmpa++ - *tmpb++ - carry; 658 659 /* U = carry bit of T[i] 660 * Note this saves performing an AND operation since 661 * if a carry does occur it will propagate all the way to the 662 * MSB. As a result a single shift is enough to get the carry 663 */ 664 carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1)); 665 666 /* Clear carry from T[i] */ 667 *tmpc++ &= MP_MASK; 668 } 669 670 /* now copy higher words if any, e.g. if A has more digits than B */ 671 for (; i < max; i++) { 672 /* T[i] = A[i] - U */ 673 *tmpc = *tmpa++ - carry; 674 675 /* U = carry bit of T[i] */ 676 carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1)); 677 678 /* Clear carry from T[i] */ 679 *tmpc++ &= MP_MASK; 680 } 681 682 /* clear digits above used (since we may not have grown result above) */ 683 if (olduse > c->used) { 684 memset(tmpc, 0x0, (olduse - c->used) * sizeof(*a->dp)); 685 } 686 } 687 688 trim_unused_digits(c); 689 return MP_OKAY; 690 } 691 692 /* high level subtraction (handles signs) */ 693 static int 694 signed_subtract(mp_int * a, mp_int * b, mp_int * c) 695 { 696 int sa, sb, res; 697 698 sa = a->sign; 699 sb = b->sign; 700 701 if (sa != sb) { 702 /* subtract a negative from a positive, OR */ 703 /* subtract a positive from a negative. */ 704 /* In either case, ADD their magnitudes, */ 705 /* and use the sign of the first number. */ 706 c->sign = sa; 707 res = basic_add(a, b, c); 708 } else { 709 /* subtract a positive from a positive, OR */ 710 /* subtract a negative from a negative. */ 711 /* First, take the difference between their */ 712 /* magnitudes, then... */ 713 if (compare_magnitude(a, b) != MP_LT) { 714 /* Copy the sign from the first */ 715 c->sign = sa; 716 /* The first has a larger or equal magnitude */ 717 res = basic_subtract(a, b, c); 718 } else { 719 /* The result has the *opposite* sign from */ 720 /* the first number. */ 721 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; 722 /* The second has a larger magnitude */ 723 res = basic_subtract(b, a, c); 724 } 725 } 726 return res; 727 } 728 729 /* shift right a certain amount of digits */ 730 static int 731 rshift_digits(mp_int * a, int b) 732 { 733 /* if b <= 0 then ignore it */ 734 if (b <= 0) { 735 return 0; 736 } 737 738 /* if b > used then simply zero it and return */ 739 if (a->used <= b) { 740 mp_zero(a); 741 return 0; 742 } 743 744 /* this is implemented as a sliding window where 745 * the window is b-digits long and digits from 746 * the top of the window are copied to the bottom 747 * 748 * e.g. 749 750 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> 751 /\ | ----> 752 \-------------------/ ----> 753 */ 754 memmove(a->dp, &a->dp[b], (a->used - b) * sizeof(*a->dp)); 755 memset(&a->dp[a->used - b], 0x0, b * sizeof(*a->dp)); 756 757 /* remove excess digits */ 758 a->used -= b; 759 return 1; 760 } 761 762 /* multiply by a digit */ 763 static int 764 multiply_digit(mp_int * a, mp_digit b, mp_int * c) 765 { 766 mp_digit carry, *tmpa, *tmpc; 767 mp_word r; 768 int ix, res, olduse; 769 770 /* make sure c is big enough to hold a*b */ 771 if (c->alloc < a->used + 1) { 772 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 773 return res; 774 } 775 } 776 777 /* get the original destinations used count */ 778 olduse = c->used; 779 780 /* set the sign */ 781 c->sign = a->sign; 782 783 /* alias for a->dp [source] */ 784 tmpa = a->dp; 785 786 /* alias for c->dp [dest] */ 787 tmpc = c->dp; 788 789 /* zero carry */ 790 carry = 0; 791 792 /* compute columns */ 793 for (ix = 0; ix < a->used; ix++) { 794 /* compute product and carry sum for this term */ 795 r = ((mp_word) carry) + ((mp_word)*tmpa++) * ((mp_word)b); 796 797 /* mask off higher bits to get a single digit */ 798 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); 799 800 /* send carry into next iteration */ 801 carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 802 } 803 804 /* store final carry [if any] and increment ix offset */ 805 *tmpc++ = carry; 806 ++ix; 807 if (olduse > ix) { 808 memset(tmpc, 0x0, (olduse - ix) * sizeof(*tmpc)); 809 } 810 811 /* set used count */ 812 c->used = a->used + 1; 813 trim_unused_digits(c); 814 815 return MP_OKAY; 816 } 817 818 /* high level addition (handles signs) */ 819 static int 820 signed_add(mp_int * a, mp_int * b, mp_int * c) 821 { 822 int asign, bsign, res; 823 824 /* get sign of both inputs */ 825 asign = a->sign; 826 bsign = b->sign; 827 828 /* handle two cases, not four */ 829 if (asign == bsign) { 830 /* both positive or both negative */ 831 /* add their magnitudes, copy the sign */ 832 c->sign = asign; 833 res = basic_add(a, b, c); 834 } else { 835 /* one positive, the other negative */ 836 /* subtract the one with the greater magnitude from */ 837 /* the one of the lesser magnitude. The result gets */ 838 /* the sign of the one with the greater magnitude. */ 839 if (compare_magnitude(a, b) == MP_LT) { 840 c->sign = bsign; 841 res = basic_subtract(b, a, c); 842 } else { 843 c->sign = asign; 844 res = basic_subtract(a, b, c); 845 } 846 } 847 return res; 848 } 849 850 /* swap the elements of two integers, for cases where you can't simply swap the 851 * mp_int pointers around 852 */ 853 static void 854 mp_exch(mp_int *a, mp_int *b) 855 { 856 mp_int t; 857 858 t = *a; 859 *a = *b; 860 *b = t; 861 } 862 863 /* calc a value mod 2**b */ 864 static int 865 modulo_2_to_power(mp_int * a, int b, mp_int * c) 866 { 867 int x, res; 868 869 /* if b is <= 0 then zero the int */ 870 if (b <= 0) { 871 mp_zero(c); 872 return MP_OKAY; 873 } 874 875 /* if the modulus is larger than the value than return */ 876 if (b >= (int) (a->used * DIGIT_BIT)) { 877 res = mp_copy(a, c); 878 return res; 879 } 880 881 /* copy */ 882 if ((res = mp_copy(a, c)) != MP_OKAY) { 883 return res; 884 } 885 886 /* zero digits above the last digit of the modulus */ 887 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { 888 c->dp[x] = 0; 889 } 890 /* clear the digit that is not completely outside/inside the modulus */ 891 c->dp[b / DIGIT_BIT] &= 892 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); 893 trim_unused_digits(c); 894 return MP_OKAY; 895 } 896 897 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ 898 static int 899 rshift_bits(mp_int * a, int b, mp_int * c, mp_int * d) 900 { 901 mp_digit D, r, rr; 902 int x, res; 903 mp_int t; 904 905 906 /* if the shift count is <= 0 then we do no work */ 907 if (b <= 0) { 908 res = mp_copy(a, c); 909 if (d != NULL) { 910 mp_zero(d); 911 } 912 return res; 913 } 914 915 if ((res = mp_init(&t)) != MP_OKAY) { 916 return res; 917 } 918 919 /* get the remainder */ 920 if (d != NULL) { 921 if ((res = modulo_2_to_power(a, b, &t)) != MP_OKAY) { 922 mp_clear(&t); 923 return res; 924 } 925 } 926 927 /* copy */ 928 if ((res = mp_copy(a, c)) != MP_OKAY) { 929 mp_clear(&t); 930 return res; 931 } 932 933 /* shift by as many digits in the bit count */ 934 if (b >= (int)DIGIT_BIT) { 935 rshift_digits(c, b / DIGIT_BIT); 936 } 937 938 /* shift any bit count < DIGIT_BIT */ 939 D = (mp_digit) (b % DIGIT_BIT); 940 if (D != 0) { 941 mp_digit *tmpc, mask, shift; 942 943 /* mask */ 944 mask = (((mp_digit)1) << D) - 1; 945 946 /* shift for lsb */ 947 shift = DIGIT_BIT - D; 948 949 /* alias */ 950 tmpc = c->dp + (c->used - 1); 951 952 /* carry */ 953 r = 0; 954 for (x = c->used - 1; x >= 0; x--) { 955 /* get the lower bits of this word in a temp */ 956 rr = *tmpc & mask; 957 958 /* shift the current word and mix in the carry bits from the previous word */ 959 *tmpc = (*tmpc >> D) | (r << shift); 960 --tmpc; 961 962 /* set the carry to the carry bits of the current word found above */ 963 r = rr; 964 } 965 } 966 trim_unused_digits(c); 967 if (d != NULL) { 968 mp_exch(&t, d); 969 } 970 mp_clear(&t); 971 return MP_OKAY; 972 } 973 974 /* integer signed division. 975 * c*b + d == a [e.g. a/b, c=quotient, d=remainder] 976 * HAC pp.598 Algorithm 14.20 977 * 978 * Note that the description in HAC is horribly 979 * incomplete. For example, it doesn't consider 980 * the case where digits are removed from 'x' in 981 * the inner loop. It also doesn't consider the 982 * case that y has fewer than three digits, etc.. 983 * 984 * The overall algorithm is as described as 985 * 14.20 from HAC but fixed to treat these cases. 986 */ 987 static int 988 signed_divide(mp_int *c, mp_int *d, mp_int *a, mp_int *b) 989 { 990 mp_int q, x, y, t1, t2; 991 int res, n, t, i, norm, neg; 992 993 /* is divisor zero ? */ 994 if (MP_ISZERO(b) == MP_YES) { 995 return MP_VAL; 996 } 997 998 /* if a < b then q=0, r = a */ 999 if (compare_magnitude(a, b) == MP_LT) { 1000 if (d != NULL) { 1001 res = mp_copy(a, d); 1002 } else { 1003 res = MP_OKAY; 1004 } 1005 if (c != NULL) { 1006 mp_zero(c); 1007 } 1008 return res; 1009 } 1010 1011 if ((res = mp_init_size(&q, a->used + 2)) != MP_OKAY) { 1012 return res; 1013 } 1014 q.used = a->used + 2; 1015 1016 if ((res = mp_init(&t1)) != MP_OKAY) { 1017 goto LBL_Q; 1018 } 1019 1020 if ((res = mp_init(&t2)) != MP_OKAY) { 1021 goto LBL_T1; 1022 } 1023 1024 if ((res = mp_init_copy(&x, a)) != MP_OKAY) { 1025 goto LBL_T2; 1026 } 1027 1028 if ((res = mp_init_copy(&y, b)) != MP_OKAY) { 1029 goto LBL_X; 1030 } 1031 1032 /* fix the sign */ 1033 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 1034 x.sign = y.sign = MP_ZPOS; 1035 1036 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ 1037 norm = mp_count_bits(&y) % DIGIT_BIT; 1038 if (norm < (int)(DIGIT_BIT-1)) { 1039 norm = (DIGIT_BIT-1) - norm; 1040 if ((res = lshift_bits(&x, norm, &x)) != MP_OKAY) { 1041 goto LBL_Y; 1042 } 1043 if ((res = lshift_bits(&y, norm, &y)) != MP_OKAY) { 1044 goto LBL_Y; 1045 } 1046 } else { 1047 norm = 0; 1048 } 1049 1050 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ 1051 n = x.used - 1; 1052 t = y.used - 1; 1053 1054 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ 1055 if ((res = lshift_digits(&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ 1056 goto LBL_Y; 1057 } 1058 1059 while (signed_compare(&x, &y) != MP_LT) { 1060 ++(q.dp[n - t]); 1061 if ((res = signed_subtract(&x, &y, &x)) != MP_OKAY) { 1062 goto LBL_Y; 1063 } 1064 } 1065 1066 /* reset y by shifting it back down */ 1067 rshift_digits(&y, n - t); 1068 1069 /* step 3. for i from n down to (t + 1) */ 1070 for (i = n; i >= (t + 1); i--) { 1071 if (i > x.used) { 1072 continue; 1073 } 1074 1075 /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 1076 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ 1077 if (x.dp[i] == y.dp[t]) { 1078 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); 1079 } else { 1080 mp_word tmp; 1081 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); 1082 tmp |= ((mp_word) x.dp[i - 1]); 1083 tmp /= ((mp_word) y.dp[t]); 1084 if (tmp > (mp_word) MP_MASK) { 1085 tmp = MP_MASK; 1086 } 1087 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); 1088 } 1089 1090 /* while (q{i-t-1} * (yt * b + y{t-1})) > 1091 xi * b**2 + xi-1 * b + xi-2 1092 do q{i-t-1} -= 1; 1093 */ 1094 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; 1095 do { 1096 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; 1097 1098 /* find left hand */ 1099 mp_zero(&t1); 1100 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; 1101 t1.dp[1] = y.dp[t]; 1102 t1.used = 2; 1103 if ((res = multiply_digit(&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1104 goto LBL_Y; 1105 } 1106 1107 /* find right hand */ 1108 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; 1109 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; 1110 t2.dp[2] = x.dp[i]; 1111 t2.used = 3; 1112 } while (compare_magnitude(&t1, &t2) == MP_GT); 1113 1114 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ 1115 if ((res = multiply_digit(&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1116 goto LBL_Y; 1117 } 1118 1119 if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) { 1120 goto LBL_Y; 1121 } 1122 1123 if ((res = signed_subtract(&x, &t1, &x)) != MP_OKAY) { 1124 goto LBL_Y; 1125 } 1126 1127 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ 1128 if (x.sign == MP_NEG) { 1129 if ((res = mp_copy(&y, &t1)) != MP_OKAY) { 1130 goto LBL_Y; 1131 } 1132 if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) { 1133 goto LBL_Y; 1134 } 1135 if ((res = signed_add(&x, &t1, &x)) != MP_OKAY) { 1136 goto LBL_Y; 1137 } 1138 1139 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; 1140 } 1141 } 1142 1143 /* now q is the quotient and x is the remainder 1144 * [which we have to normalize] 1145 */ 1146 1147 /* get sign before writing to c */ 1148 x.sign = x.used == 0 ? MP_ZPOS : a->sign; 1149 1150 if (c != NULL) { 1151 trim_unused_digits(&q); 1152 mp_exch(&q, c); 1153 c->sign = neg; 1154 } 1155 1156 if (d != NULL) { 1157 rshift_bits(&x, norm, &x, NULL); 1158 mp_exch(&x, d); 1159 } 1160 1161 res = MP_OKAY; 1162 1163 LBL_Y: 1164 mp_clear(&y); 1165 LBL_X: 1166 mp_clear(&x); 1167 LBL_T2: 1168 mp_clear(&t2); 1169 LBL_T1: 1170 mp_clear(&t1); 1171 LBL_Q: 1172 mp_clear(&q); 1173 return res; 1174 } 1175 1176 /* c = a mod b, 0 <= c < b */ 1177 static int 1178 modulo(mp_int * a, mp_int * b, mp_int * c) 1179 { 1180 mp_int t; 1181 int res; 1182 1183 if ((res = mp_init(&t)) != MP_OKAY) { 1184 return res; 1185 } 1186 1187 if ((res = signed_divide(NULL, &t, a, b)) != MP_OKAY) { 1188 mp_clear(&t); 1189 return res; 1190 } 1191 1192 if (t.sign != b->sign) { 1193 res = signed_add(b, &t, c); 1194 } else { 1195 res = MP_OKAY; 1196 mp_exch(&t, c); 1197 } 1198 1199 mp_clear(&t); 1200 return res; 1201 } 1202 1203 /* set to a digit */ 1204 static void 1205 set_word(mp_int * a, mp_digit b) 1206 { 1207 mp_zero(a); 1208 a->dp[0] = b & MP_MASK; 1209 a->used = (a->dp[0] != 0) ? 1 : 0; 1210 } 1211 1212 /* b = a/2 */ 1213 static int 1214 half(mp_int * a, mp_int * b) 1215 { 1216 int x, res, oldused; 1217 1218 /* copy */ 1219 if (b->alloc < a->used) { 1220 if ((res = mp_grow(b, a->used)) != MP_OKAY) { 1221 return res; 1222 } 1223 } 1224 1225 oldused = b->used; 1226 b->used = a->used; 1227 { 1228 mp_digit r, rr, *tmpa, *tmpb; 1229 1230 /* source alias */ 1231 tmpa = a->dp + b->used - 1; 1232 1233 /* dest alias */ 1234 tmpb = b->dp + b->used - 1; 1235 1236 /* carry */ 1237 r = 0; 1238 for (x = b->used - 1; x >= 0; x--) { 1239 /* get the carry for the next iteration */ 1240 rr = *tmpa & 1; 1241 1242 /* shift the current digit, add in carry and store */ 1243 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); 1244 1245 /* forward carry to next iteration */ 1246 r = rr; 1247 } 1248 1249 /* zero excess digits */ 1250 tmpb = b->dp + b->used; 1251 for (x = b->used; x < oldused; x++) { 1252 *tmpb++ = 0; 1253 } 1254 } 1255 b->sign = a->sign; 1256 trim_unused_digits(b); 1257 return MP_OKAY; 1258 } 1259 1260 /* compare a digit */ 1261 static int 1262 compare_digit(mp_int * a, mp_digit b) 1263 { 1264 /* compare based on sign */ 1265 if (a->sign == MP_NEG) { 1266 return MP_LT; 1267 } 1268 1269 /* compare based on magnitude */ 1270 if (a->used > 1) { 1271 return MP_GT; 1272 } 1273 1274 /* compare the only digit of a to b */ 1275 if (a->dp[0] > b) { 1276 return MP_GT; 1277 } else if (a->dp[0] < b) { 1278 return MP_LT; 1279 } else { 1280 return MP_EQ; 1281 } 1282 } 1283 1284 static void 1285 mp_clear_multi(mp_int *mp, ...) 1286 { 1287 mp_int* next_mp = mp; 1288 va_list args; 1289 1290 va_start(args, mp); 1291 while (next_mp != NULL) { 1292 mp_clear(next_mp); 1293 next_mp = va_arg(args, mp_int*); 1294 } 1295 va_end(args); 1296 } 1297 1298 /* computes the modular inverse via binary extended euclidean algorithm, 1299 * that is c = 1/a mod b 1300 * 1301 * Based on slow invmod except this is optimized for the case where b is 1302 * odd as per HAC Note 14.64 on pp. 610 1303 */ 1304 static int 1305 fast_modular_inverse(mp_int * a, mp_int * b, mp_int * c) 1306 { 1307 mp_int x, y, u, v, B, D; 1308 int res, neg; 1309 1310 /* 2. [modified] b must be odd */ 1311 if (MP_ISZERO(b) == MP_YES) { 1312 return MP_VAL; 1313 } 1314 1315 /* init all our temps */ 1316 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) { 1317 return res; 1318 } 1319 1320 /* x == modulus, y == value to invert */ 1321 if ((res = mp_copy(b, &x)) != MP_OKAY) { 1322 goto LBL_ERR; 1323 } 1324 1325 /* we need y = |a| */ 1326 if ((res = modulo(a, b, &y)) != MP_OKAY) { 1327 goto LBL_ERR; 1328 } 1329 1330 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1331 if ((res = mp_copy(&x, &u)) != MP_OKAY) { 1332 goto LBL_ERR; 1333 } 1334 if ((res = mp_copy(&y, &v)) != MP_OKAY) { 1335 goto LBL_ERR; 1336 } 1337 set_word(&D, 1); 1338 1339 top: 1340 /* 4. while u is even do */ 1341 while (PGPV_BN_is_even(&u) == 1) { 1342 /* 4.1 u = u/2 */ 1343 if ((res = half(&u, &u)) != MP_OKAY) { 1344 goto LBL_ERR; 1345 } 1346 /* 4.2 if B is odd then */ 1347 if (PGPV_BN_is_odd(&B) == 1) { 1348 if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) { 1349 goto LBL_ERR; 1350 } 1351 } 1352 /* B = B/2 */ 1353 if ((res = half(&B, &B)) != MP_OKAY) { 1354 goto LBL_ERR; 1355 } 1356 } 1357 1358 /* 5. while v is even do */ 1359 while (PGPV_BN_is_even(&v) == 1) { 1360 /* 5.1 v = v/2 */ 1361 if ((res = half(&v, &v)) != MP_OKAY) { 1362 goto LBL_ERR; 1363 } 1364 /* 5.2 if D is odd then */ 1365 if (PGPV_BN_is_odd(&D) == 1) { 1366 /* D = (D-x)/2 */ 1367 if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) { 1368 goto LBL_ERR; 1369 } 1370 } 1371 /* D = D/2 */ 1372 if ((res = half(&D, &D)) != MP_OKAY) { 1373 goto LBL_ERR; 1374 } 1375 } 1376 1377 /* 6. if u >= v then */ 1378 if (signed_compare(&u, &v) != MP_LT) { 1379 /* u = u - v, B = B - D */ 1380 if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) { 1381 goto LBL_ERR; 1382 } 1383 1384 if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) { 1385 goto LBL_ERR; 1386 } 1387 } else { 1388 /* v - v - u, D = D - B */ 1389 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 1390 goto LBL_ERR; 1391 } 1392 1393 if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) { 1394 goto LBL_ERR; 1395 } 1396 } 1397 1398 /* if not zero goto step 4 */ 1399 if (MP_ISZERO(&u) == MP_NO) { 1400 goto top; 1401 } 1402 1403 /* now a = C, b = D, gcd == g*v */ 1404 1405 /* if v != 1 then there is no inverse */ 1406 if (compare_digit(&v, 1) != MP_EQ) { 1407 res = MP_VAL; 1408 goto LBL_ERR; 1409 } 1410 1411 /* b is now the inverse */ 1412 neg = a->sign; 1413 while (D.sign == MP_NEG) { 1414 if ((res = signed_add(&D, b, &D)) != MP_OKAY) { 1415 goto LBL_ERR; 1416 } 1417 } 1418 mp_exch(&D, c); 1419 c->sign = neg; 1420 res = MP_OKAY; 1421 1422 LBL_ERR: 1423 mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); 1424 return res; 1425 } 1426 1427 /* hac 14.61, pp608 */ 1428 static int 1429 slow_modular_inverse(mp_int * a, mp_int * b, mp_int * c) 1430 { 1431 mp_int x, y, u, v, A, B, C, D; 1432 int res; 1433 1434 /* b cannot be negative */ 1435 if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) { 1436 return MP_VAL; 1437 } 1438 1439 /* init temps */ 1440 if ((res = mp_init_multi(&x, &y, &u, &v, 1441 &A, &B, &C, &D, NULL)) != MP_OKAY) { 1442 return res; 1443 } 1444 1445 /* x = a, y = b */ 1446 if ((res = modulo(a, b, &x)) != MP_OKAY) { 1447 goto LBL_ERR; 1448 } 1449 if ((res = mp_copy(b, &y)) != MP_OKAY) { 1450 goto LBL_ERR; 1451 } 1452 1453 /* 2. [modified] if x,y are both even then return an error! */ 1454 if (PGPV_BN_is_even(&x) == 1 && PGPV_BN_is_even(&y) == 1) { 1455 res = MP_VAL; 1456 goto LBL_ERR; 1457 } 1458 1459 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1460 if ((res = mp_copy(&x, &u)) != MP_OKAY) { 1461 goto LBL_ERR; 1462 } 1463 if ((res = mp_copy(&y, &v)) != MP_OKAY) { 1464 goto LBL_ERR; 1465 } 1466 set_word(&A, 1); 1467 set_word(&D, 1); 1468 1469 top: 1470 /* 4. while u is even do */ 1471 while (PGPV_BN_is_even(&u) == 1) { 1472 /* 4.1 u = u/2 */ 1473 if ((res = half(&u, &u)) != MP_OKAY) { 1474 goto LBL_ERR; 1475 } 1476 /* 4.2 if A or B is odd then */ 1477 if (PGPV_BN_is_odd(&A) == 1 || PGPV_BN_is_odd(&B) == 1) { 1478 /* A = (A+y)/2, B = (B-x)/2 */ 1479 if ((res = signed_add(&A, &y, &A)) != MP_OKAY) { 1480 goto LBL_ERR; 1481 } 1482 if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) { 1483 goto LBL_ERR; 1484 } 1485 } 1486 /* A = A/2, B = B/2 */ 1487 if ((res = half(&A, &A)) != MP_OKAY) { 1488 goto LBL_ERR; 1489 } 1490 if ((res = half(&B, &B)) != MP_OKAY) { 1491 goto LBL_ERR; 1492 } 1493 } 1494 1495 /* 5. while v is even do */ 1496 while (PGPV_BN_is_even(&v) == 1) { 1497 /* 5.1 v = v/2 */ 1498 if ((res = half(&v, &v)) != MP_OKAY) { 1499 goto LBL_ERR; 1500 } 1501 /* 5.2 if C or D is odd then */ 1502 if (PGPV_BN_is_odd(&C) == 1 || PGPV_BN_is_odd(&D) == 1) { 1503 /* C = (C+y)/2, D = (D-x)/2 */ 1504 if ((res = signed_add(&C, &y, &C)) != MP_OKAY) { 1505 goto LBL_ERR; 1506 } 1507 if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) { 1508 goto LBL_ERR; 1509 } 1510 } 1511 /* C = C/2, D = D/2 */ 1512 if ((res = half(&C, &C)) != MP_OKAY) { 1513 goto LBL_ERR; 1514 } 1515 if ((res = half(&D, &D)) != MP_OKAY) { 1516 goto LBL_ERR; 1517 } 1518 } 1519 1520 /* 6. if u >= v then */ 1521 if (signed_compare(&u, &v) != MP_LT) { 1522 /* u = u - v, A = A - C, B = B - D */ 1523 if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) { 1524 goto LBL_ERR; 1525 } 1526 1527 if ((res = signed_subtract(&A, &C, &A)) != MP_OKAY) { 1528 goto LBL_ERR; 1529 } 1530 1531 if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) { 1532 goto LBL_ERR; 1533 } 1534 } else { 1535 /* v - v - u, C = C - A, D = D - B */ 1536 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 1537 goto LBL_ERR; 1538 } 1539 1540 if ((res = signed_subtract(&C, &A, &C)) != MP_OKAY) { 1541 goto LBL_ERR; 1542 } 1543 1544 if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) { 1545 goto LBL_ERR; 1546 } 1547 } 1548 1549 /* if not zero goto step 4 */ 1550 if (PGPV_BN_is_zero(&u) == 0) { 1551 goto top; 1552 } 1553 /* now a = C, b = D, gcd == g*v */ 1554 1555 /* if v != 1 then there is no inverse */ 1556 if (compare_digit(&v, 1) != MP_EQ) { 1557 res = MP_VAL; 1558 goto LBL_ERR; 1559 } 1560 1561 /* if its too low */ 1562 while (compare_digit(&C, 0) == MP_LT) { 1563 if ((res = signed_add(&C, b, &C)) != MP_OKAY) { 1564 goto LBL_ERR; 1565 } 1566 } 1567 1568 /* too big */ 1569 while (compare_magnitude(&C, b) != MP_LT) { 1570 if ((res = signed_subtract(&C, b, &C)) != MP_OKAY) { 1571 goto LBL_ERR; 1572 } 1573 } 1574 1575 /* C is now the inverse */ 1576 mp_exch(&C, c); 1577 res = MP_OKAY; 1578 LBL_ERR: 1579 mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL); 1580 return res; 1581 } 1582 1583 static int 1584 modular_inverse(mp_int *c, mp_int *a, mp_int *b) 1585 { 1586 /* b cannot be negative */ 1587 if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) { 1588 return MP_VAL; 1589 } 1590 1591 /* if the modulus is odd we can use a faster routine instead */ 1592 if (PGPV_BN_is_odd(b) == 1) { 1593 return fast_modular_inverse(a, b, c); 1594 } 1595 return slow_modular_inverse(a, b, c); 1596 } 1597 1598 /* b = |a| 1599 * 1600 * Simple function copies the input and fixes the sign to positive 1601 */ 1602 static int 1603 absolute(mp_int * a, mp_int * b) 1604 { 1605 int res; 1606 1607 /* copy a to b */ 1608 if (a != b) { 1609 if ((res = mp_copy(a, b)) != MP_OKAY) { 1610 return res; 1611 } 1612 } 1613 1614 /* force the sign of b to positive */ 1615 b->sign = MP_ZPOS; 1616 1617 return MP_OKAY; 1618 } 1619 1620 /* determines if reduce_2k_l can be used */ 1621 static int 1622 mp_reduce_is_2k_l(mp_int *a) 1623 { 1624 int ix, iy; 1625 1626 if (a->used == 0) { 1627 return MP_NO; 1628 } else if (a->used == 1) { 1629 return MP_YES; 1630 } else if (a->used > 1) { 1631 /* if more than half of the digits are -1 we're sold */ 1632 for (iy = ix = 0; ix < a->used; ix++) { 1633 if (a->dp[ix] == MP_MASK) { 1634 ++iy; 1635 } 1636 } 1637 return (iy >= (a->used/2)) ? MP_YES : MP_NO; 1638 1639 } 1640 return MP_NO; 1641 } 1642 1643 /* computes a = 2**b 1644 * 1645 * Simple algorithm which zeroes the int, grows it then just sets one bit 1646 * as required. 1647 */ 1648 static int 1649 mp_2expt(mp_int * a, int b) 1650 { 1651 int res; 1652 1653 /* zero a as per default */ 1654 mp_zero(a); 1655 1656 /* grow a to accomodate the single bit */ 1657 if ((res = mp_grow(a, b / DIGIT_BIT + 1)) != MP_OKAY) { 1658 return res; 1659 } 1660 1661 /* set the used count of where the bit will go */ 1662 a->used = b / DIGIT_BIT + 1; 1663 1664 /* put the single bit in its place */ 1665 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); 1666 1667 return MP_OKAY; 1668 } 1669 1670 /* pre-calculate the value required for Barrett reduction 1671 * For a given modulus "b" it calulates the value required in "a" 1672 */ 1673 static int 1674 mp_reduce_setup(mp_int * a, mp_int * b) 1675 { 1676 int res; 1677 1678 if ((res = mp_2expt(a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { 1679 return res; 1680 } 1681 return signed_divide(a, NULL, a, b); 1682 } 1683 1684 /* b = a*2 */ 1685 static int 1686 doubled(mp_int * a, mp_int * b) 1687 { 1688 int x, res, oldused; 1689 1690 /* grow to accomodate result */ 1691 if (b->alloc < a->used + 1) { 1692 if ((res = mp_grow(b, a->used + 1)) != MP_OKAY) { 1693 return res; 1694 } 1695 } 1696 1697 oldused = b->used; 1698 b->used = a->used; 1699 1700 { 1701 mp_digit r, rr, *tmpa, *tmpb; 1702 1703 /* alias for source */ 1704 tmpa = a->dp; 1705 1706 /* alias for dest */ 1707 tmpb = b->dp; 1708 1709 /* carry */ 1710 r = 0; 1711 for (x = 0; x < a->used; x++) { 1712 1713 /* get what will be the *next* carry bit from the 1714 * MSB of the current digit 1715 */ 1716 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); 1717 1718 /* now shift up this digit, add in the carry [from the previous] */ 1719 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; 1720 1721 /* copy the carry that would be from the source 1722 * digit into the next iteration 1723 */ 1724 r = rr; 1725 } 1726 1727 /* new leading digit? */ 1728 if (r != 0) { 1729 /* add a MSB which is always 1 at this point */ 1730 *tmpb = 1; 1731 ++(b->used); 1732 } 1733 1734 /* now zero any excess digits on the destination 1735 * that we didn't write to 1736 */ 1737 tmpb = b->dp + b->used; 1738 for (x = b->used; x < oldused; x++) { 1739 *tmpb++ = 0; 1740 } 1741 } 1742 b->sign = a->sign; 1743 return MP_OKAY; 1744 } 1745 1746 /* divide by three (based on routine from MPI and the GMP manual) */ 1747 static int 1748 third(mp_int * a, mp_int *c, mp_digit * d) 1749 { 1750 mp_int q; 1751 mp_word w, t; 1752 mp_digit b; 1753 int res, ix; 1754 1755 /* b = 2**DIGIT_BIT / 3 */ 1756 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3); 1757 1758 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 1759 return res; 1760 } 1761 1762 q.used = a->used; 1763 q.sign = a->sign; 1764 w = 0; 1765 for (ix = a->used - 1; ix >= 0; ix--) { 1766 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 1767 1768 if (w >= 3) { 1769 /* multiply w by [1/3] */ 1770 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT); 1771 1772 /* now subtract 3 * [w/3] from w, to get the remainder */ 1773 w -= t+t+t; 1774 1775 /* fixup the remainder as required since 1776 * the optimization is not exact. 1777 */ 1778 while (w >= 3) { 1779 t += 1; 1780 w -= 3; 1781 } 1782 } else { 1783 t = 0; 1784 } 1785 q.dp[ix] = (mp_digit)t; 1786 } 1787 1788 /* [optional] store the remainder */ 1789 if (d != NULL) { 1790 *d = (mp_digit)w; 1791 } 1792 1793 /* [optional] store the quotient */ 1794 if (c != NULL) { 1795 trim_unused_digits(&q); 1796 mp_exch(&q, c); 1797 } 1798 mp_clear(&q); 1799 1800 return res; 1801 } 1802 1803 /* multiplication using the Toom-Cook 3-way algorithm 1804 * 1805 * Much more complicated than Karatsuba but has a lower 1806 * asymptotic running time of O(N**1.464). This algorithm is 1807 * only particularly useful on VERY large inputs 1808 * (we're talking 1000s of digits here...). 1809 */ 1810 static int 1811 toom_cook_multiply(mp_int *a, mp_int *b, mp_int *c) 1812 { 1813 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; 1814 int res, B; 1815 1816 /* init temps */ 1817 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 1818 &a0, &a1, &a2, &b0, &b1, 1819 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { 1820 return res; 1821 } 1822 1823 /* B */ 1824 B = MIN(a->used, b->used) / 3; 1825 1826 /* a = a2 * B**2 + a1 * B + a0 */ 1827 if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 1828 goto ERR; 1829 } 1830 1831 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 1832 goto ERR; 1833 } 1834 rshift_digits(&a1, B); 1835 modulo_2_to_power(&a1, DIGIT_BIT * B, &a1); 1836 1837 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 1838 goto ERR; 1839 } 1840 rshift_digits(&a2, B*2); 1841 1842 /* b = b2 * B**2 + b1 * B + b0 */ 1843 if ((res = modulo_2_to_power(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { 1844 goto ERR; 1845 } 1846 1847 if ((res = mp_copy(b, &b1)) != MP_OKAY) { 1848 goto ERR; 1849 } 1850 rshift_digits(&b1, B); 1851 modulo_2_to_power(&b1, DIGIT_BIT * B, &b1); 1852 1853 if ((res = mp_copy(b, &b2)) != MP_OKAY) { 1854 goto ERR; 1855 } 1856 rshift_digits(&b2, B*2); 1857 1858 /* w0 = a0*b0 */ 1859 if ((res = signed_multiply(&a0, &b0, &w0)) != MP_OKAY) { 1860 goto ERR; 1861 } 1862 1863 /* w4 = a2 * b2 */ 1864 if ((res = signed_multiply(&a2, &b2, &w4)) != MP_OKAY) { 1865 goto ERR; 1866 } 1867 1868 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ 1869 if ((res = doubled(&a0, &tmp1)) != MP_OKAY) { 1870 goto ERR; 1871 } 1872 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1873 goto ERR; 1874 } 1875 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 1876 goto ERR; 1877 } 1878 if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 1879 goto ERR; 1880 } 1881 1882 if ((res = doubled(&b0, &tmp2)) != MP_OKAY) { 1883 goto ERR; 1884 } 1885 if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1886 goto ERR; 1887 } 1888 if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) { 1889 goto ERR; 1890 } 1891 if ((res = signed_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { 1892 goto ERR; 1893 } 1894 1895 if ((res = signed_multiply(&tmp1, &tmp2, &w1)) != MP_OKAY) { 1896 goto ERR; 1897 } 1898 1899 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ 1900 if ((res = doubled(&a2, &tmp1)) != MP_OKAY) { 1901 goto ERR; 1902 } 1903 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1904 goto ERR; 1905 } 1906 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 1907 goto ERR; 1908 } 1909 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1910 goto ERR; 1911 } 1912 1913 if ((res = doubled(&b2, &tmp2)) != MP_OKAY) { 1914 goto ERR; 1915 } 1916 if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1917 goto ERR; 1918 } 1919 if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) { 1920 goto ERR; 1921 } 1922 if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1923 goto ERR; 1924 } 1925 1926 if ((res = signed_multiply(&tmp1, &tmp2, &w3)) != MP_OKAY) { 1927 goto ERR; 1928 } 1929 1930 1931 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ 1932 if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) { 1933 goto ERR; 1934 } 1935 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1936 goto ERR; 1937 } 1938 if ((res = signed_add(&b2, &b1, &tmp2)) != MP_OKAY) { 1939 goto ERR; 1940 } 1941 if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1942 goto ERR; 1943 } 1944 if ((res = signed_multiply(&tmp1, &tmp2, &w2)) != MP_OKAY) { 1945 goto ERR; 1946 } 1947 1948 /* now solve the matrix 1949 1950 0 0 0 0 1 1951 1 2 4 8 16 1952 1 1 1 1 1 1953 16 8 4 2 1 1954 1 0 0 0 0 1955 1956 using 12 subtractions, 4 shifts, 1957 2 small divisions and 1 small multiplication 1958 */ 1959 1960 /* r1 - r4 */ 1961 if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) { 1962 goto ERR; 1963 } 1964 /* r3 - r0 */ 1965 if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) { 1966 goto ERR; 1967 } 1968 /* r1/2 */ 1969 if ((res = half(&w1, &w1)) != MP_OKAY) { 1970 goto ERR; 1971 } 1972 /* r3/2 */ 1973 if ((res = half(&w3, &w3)) != MP_OKAY) { 1974 goto ERR; 1975 } 1976 /* r2 - r0 - r4 */ 1977 if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) { 1978 goto ERR; 1979 } 1980 if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) { 1981 goto ERR; 1982 } 1983 /* r1 - r2 */ 1984 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 1985 goto ERR; 1986 } 1987 /* r3 - r2 */ 1988 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 1989 goto ERR; 1990 } 1991 /* r1 - 8r0 */ 1992 if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) { 1993 goto ERR; 1994 } 1995 if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) { 1996 goto ERR; 1997 } 1998 /* r3 - 8r4 */ 1999 if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) { 2000 goto ERR; 2001 } 2002 if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) { 2003 goto ERR; 2004 } 2005 /* 3r2 - r1 - r3 */ 2006 if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) { 2007 goto ERR; 2008 } 2009 if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) { 2010 goto ERR; 2011 } 2012 if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) { 2013 goto ERR; 2014 } 2015 /* r1 - r2 */ 2016 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2017 goto ERR; 2018 } 2019 /* r3 - r2 */ 2020 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2021 goto ERR; 2022 } 2023 /* r1/3 */ 2024 if ((res = third(&w1, &w1, NULL)) != MP_OKAY) { 2025 goto ERR; 2026 } 2027 /* r3/3 */ 2028 if ((res = third(&w3, &w3, NULL)) != MP_OKAY) { 2029 goto ERR; 2030 } 2031 2032 /* at this point shift W[n] by B*n */ 2033 if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) { 2034 goto ERR; 2035 } 2036 if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) { 2037 goto ERR; 2038 } 2039 if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) { 2040 goto ERR; 2041 } 2042 if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) { 2043 goto ERR; 2044 } 2045 2046 if ((res = signed_add(&w0, &w1, c)) != MP_OKAY) { 2047 goto ERR; 2048 } 2049 if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2050 goto ERR; 2051 } 2052 if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2053 goto ERR; 2054 } 2055 if ((res = signed_add(&tmp1, c, c)) != MP_OKAY) { 2056 goto ERR; 2057 } 2058 2059 ERR: 2060 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 2061 &a0, &a1, &a2, &b0, &b1, 2062 &b2, &tmp1, &tmp2, NULL); 2063 return res; 2064 } 2065 2066 #define TOOM_MUL_CUTOFF 350 2067 #define KARATSUBA_MUL_CUTOFF 80 2068 2069 /* c = |a| * |b| using Karatsuba Multiplication using 2070 * three half size multiplications 2071 * 2072 * Let B represent the radix [e.g. 2**DIGIT_BIT] and 2073 * let n represent half of the number of digits in 2074 * the min(a,b) 2075 * 2076 * a = a1 * B**n + a0 2077 * b = b1 * B**n + b0 2078 * 2079 * Then, a * b => 2080 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 2081 * 2082 * Note that a1b1 and a0b0 are used twice and only need to be 2083 * computed once. So in total three half size (half # of 2084 * digit) multiplications are performed, a0b0, a1b1 and 2085 * (a1+b1)(a0+b0) 2086 * 2087 * Note that a multiplication of half the digits requires 2088 * 1/4th the number of single precision multiplications so in 2089 * total after one call 25% of the single precision multiplications 2090 * are saved. Note also that the call to signed_multiply can end up back 2091 * in this function if the a0, a1, b0, or b1 are above the threshold. 2092 * This is known as divide-and-conquer and leads to the famous 2093 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 2094 * the standard O(N**2) that the baseline/comba methods use. 2095 * Generally though the overhead of this method doesn't pay off 2096 * until a certain size (N ~ 80) is reached. 2097 */ 2098 static int 2099 karatsuba_multiply(mp_int * a, mp_int * b, mp_int * c) 2100 { 2101 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; 2102 int B; 2103 int err; 2104 2105 /* default the return code to an error */ 2106 err = MP_MEM; 2107 2108 /* min # of digits */ 2109 B = MIN(a->used, b->used); 2110 2111 /* now divide in two */ 2112 B = (int)((unsigned)B >> 1); 2113 2114 /* init copy all the temps */ 2115 if (mp_init_size(&x0, B) != MP_OKAY) { 2116 goto ERR; 2117 } 2118 if (mp_init_size(&x1, a->used - B) != MP_OKAY) { 2119 goto X0; 2120 } 2121 if (mp_init_size(&y0, B) != MP_OKAY) { 2122 goto X1; 2123 } 2124 if (mp_init_size(&y1, b->used - B) != MP_OKAY) { 2125 goto Y0; 2126 } 2127 /* init temps */ 2128 if (mp_init_size(&t1, B * 2) != MP_OKAY) { 2129 goto Y1; 2130 } 2131 if (mp_init_size(&x0y0, B * 2) != MP_OKAY) { 2132 goto T1; 2133 } 2134 if (mp_init_size(&x1y1, B * 2) != MP_OKAY) { 2135 goto X0Y0; 2136 } 2137 /* now shift the digits */ 2138 x0.used = y0.used = B; 2139 x1.used = a->used - B; 2140 y1.used = b->used - B; 2141 2142 { 2143 int x; 2144 mp_digit *tmpa, *tmpb, *tmpx, *tmpy; 2145 2146 /* we copy the digits directly instead of using higher level functions 2147 * since we also need to shift the digits 2148 */ 2149 tmpa = a->dp; 2150 tmpb = b->dp; 2151 2152 tmpx = x0.dp; 2153 tmpy = y0.dp; 2154 for (x = 0; x < B; x++) { 2155 *tmpx++ = *tmpa++; 2156 *tmpy++ = *tmpb++; 2157 } 2158 2159 tmpx = x1.dp; 2160 for (x = B; x < a->used; x++) { 2161 *tmpx++ = *tmpa++; 2162 } 2163 2164 tmpy = y1.dp; 2165 for (x = B; x < b->used; x++) { 2166 *tmpy++ = *tmpb++; 2167 } 2168 } 2169 2170 /* only need to clamp the lower words since by definition the 2171 * upper words x1/y1 must have a known number of digits 2172 */ 2173 trim_unused_digits(&x0); 2174 trim_unused_digits(&y0); 2175 2176 /* now calc the products x0y0 and x1y1 */ 2177 /* after this x0 is no longer required, free temp [x0==t2]! */ 2178 if (signed_multiply(&x0, &y0, &x0y0) != MP_OKAY) { 2179 goto X1Y1; /* x0y0 = x0*y0 */ 2180 } 2181 if (signed_multiply(&x1, &y1, &x1y1) != MP_OKAY) { 2182 goto X1Y1; /* x1y1 = x1*y1 */ 2183 } 2184 /* now calc x1+x0 and y1+y0 */ 2185 if (basic_add(&x1, &x0, &t1) != MP_OKAY) { 2186 goto X1Y1; /* t1 = x1 - x0 */ 2187 } 2188 if (basic_add(&y1, &y0, &x0) != MP_OKAY) { 2189 goto X1Y1; /* t2 = y1 - y0 */ 2190 } 2191 if (signed_multiply(&t1, &x0, &t1) != MP_OKAY) { 2192 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ 2193 } 2194 /* add x0y0 */ 2195 if (signed_add(&x0y0, &x1y1, &x0) != MP_OKAY) { 2196 goto X1Y1; /* t2 = x0y0 + x1y1 */ 2197 } 2198 if (basic_subtract(&t1, &x0, &t1) != MP_OKAY) { 2199 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ 2200 } 2201 /* shift by B */ 2202 if (lshift_digits(&t1, B) != MP_OKAY) { 2203 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ 2204 } 2205 if (lshift_digits(&x1y1, B * 2) != MP_OKAY) { 2206 goto X1Y1; /* x1y1 = x1y1 << 2*B */ 2207 } 2208 if (signed_add(&x0y0, &t1, &t1) != MP_OKAY) { 2209 goto X1Y1; /* t1 = x0y0 + t1 */ 2210 } 2211 if (signed_add(&t1, &x1y1, c) != MP_OKAY) { 2212 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ 2213 } 2214 /* Algorithm succeeded set the return code to MP_OKAY */ 2215 err = MP_OKAY; 2216 2217 X1Y1: 2218 mp_clear(&x1y1); 2219 X0Y0: 2220 mp_clear(&x0y0); 2221 T1: 2222 mp_clear(&t1); 2223 Y1: 2224 mp_clear(&y1); 2225 Y0: 2226 mp_clear(&y0); 2227 X1: 2228 mp_clear(&x1); 2229 X0: 2230 mp_clear(&x0); 2231 ERR: 2232 return err; 2233 } 2234 2235 /* Fast (comba) multiplier 2236 * 2237 * This is the fast column-array [comba] multiplier. It is 2238 * designed to compute the columns of the product first 2239 * then handle the carries afterwards. This has the effect 2240 * of making the nested loops that compute the columns very 2241 * simple and schedulable on super-scalar processors. 2242 * 2243 * This has been modified to produce a variable number of 2244 * digits of output so if say only a half-product is required 2245 * you don't have to compute the upper half (a feature 2246 * required for fast Barrett reduction). 2247 * 2248 * Based on Algorithm 14.12 on pp.595 of HAC. 2249 * 2250 */ 2251 static int 2252 fast_col_array_multiply(mp_int * a, mp_int * b, mp_int * c, int digs) 2253 { 2254 int olduse, res, pa, ix, iz; 2255 /*LINTED*/ 2256 mp_digit W[MP_WARRAY]; 2257 mp_word _W; 2258 2259 /* grow the destination as required */ 2260 if (c->alloc < digs) { 2261 if ((res = mp_grow(c, digs)) != MP_OKAY) { 2262 return res; 2263 } 2264 } 2265 2266 /* number of output digits to produce */ 2267 pa = MIN(digs, a->used + b->used); 2268 2269 /* clear the carry */ 2270 _W = 0; 2271 for (ix = 0; ix < pa; ix++) { 2272 int tx, ty; 2273 int iy; 2274 mp_digit *tmpx, *tmpy; 2275 2276 /* get offsets into the two bignums */ 2277 ty = MIN(b->used-1, ix); 2278 tx = ix - ty; 2279 2280 /* setup temp aliases */ 2281 tmpx = a->dp + tx; 2282 tmpy = b->dp + ty; 2283 2284 /* this is the number of times the loop will iterrate, essentially 2285 while (tx++ < a->used && ty-- >= 0) { ... } 2286 */ 2287 iy = MIN(a->used-tx, ty+1); 2288 2289 /* execute loop */ 2290 for (iz = 0; iz < iy; ++iz) { 2291 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2292 2293 } 2294 2295 /* store term */ 2296 W[ix] = ((mp_digit)_W) & MP_MASK; 2297 2298 /* make next carry */ 2299 _W = _W >> ((mp_word)DIGIT_BIT); 2300 } 2301 2302 /* setup dest */ 2303 olduse = c->used; 2304 c->used = pa; 2305 2306 { 2307 mp_digit *tmpc; 2308 tmpc = c->dp; 2309 for (ix = 0; ix < pa+1; ix++) { 2310 /* now extract the previous digit [below the carry] */ 2311 *tmpc++ = W[ix]; 2312 } 2313 2314 /* clear unused digits [that existed in the old copy of c] */ 2315 for (; ix < olduse; ix++) { 2316 *tmpc++ = 0; 2317 } 2318 } 2319 trim_unused_digits(c); 2320 return MP_OKAY; 2321 } 2322 2323 /* return 1 if we can use fast column array multiply */ 2324 /* 2325 * The fast multiplier can be used if the output will 2326 * have less than MP_WARRAY digits and the number of 2327 * digits won't affect carry propagation 2328 */ 2329 static inline int 2330 can_use_fast_column_array(int ndigits, int used) 2331 { 2332 return (((unsigned)ndigits < MP_WARRAY) && 2333 used < (1 << (unsigned)((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT)))); 2334 } 2335 2336 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */ 2337 /* Revision: 1.2 $ */ 2338 /* Date: 2011/03/18 16:22:09 $ */ 2339 2340 2341 /* multiplies |a| * |b| and only computes upto digs digits of result 2342 * HAC pp. 595, Algorithm 14.12 Modified so you can control how 2343 * many digits of output are created. 2344 */ 2345 static int 2346 basic_multiply_partial_lower(mp_int * a, mp_int * b, mp_int * c, int digs) 2347 { 2348 mp_int t; 2349 int res, pa, pb, ix, iy; 2350 mp_digit u; 2351 mp_word r; 2352 mp_digit tmpx, *tmpt, *tmpy; 2353 2354 /* can we use the fast multiplier? */ 2355 if (can_use_fast_column_array(digs, MIN(a->used, b->used))) { 2356 return fast_col_array_multiply(a, b, c, digs); 2357 } 2358 2359 if ((res = mp_init_size(&t, digs)) != MP_OKAY) { 2360 return res; 2361 } 2362 t.used = digs; 2363 2364 /* compute the digits of the product directly */ 2365 pa = a->used; 2366 for (ix = 0; ix < pa; ix++) { 2367 /* set the carry to zero */ 2368 u = 0; 2369 2370 /* limit ourselves to making digs digits of output */ 2371 pb = MIN(b->used, digs - ix); 2372 2373 /* setup some aliases */ 2374 /* copy of the digit from a used within the nested loop */ 2375 tmpx = a->dp[ix]; 2376 2377 /* an alias for the destination shifted ix places */ 2378 tmpt = t.dp + ix; 2379 2380 /* an alias for the digits of b */ 2381 tmpy = b->dp; 2382 2383 /* compute the columns of the output and propagate the carry */ 2384 for (iy = 0; iy < pb; iy++) { 2385 /* compute the column as a mp_word */ 2386 r = ((mp_word)*tmpt) + 2387 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2388 ((mp_word) u); 2389 2390 /* the new column is the lower part of the result */ 2391 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2392 2393 /* get the carry word from the result */ 2394 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2395 } 2396 /* set carry if it is placed below digs */ 2397 if (ix + iy < digs) { 2398 *tmpt = u; 2399 } 2400 } 2401 2402 trim_unused_digits(&t); 2403 mp_exch(&t, c); 2404 2405 mp_clear(&t); 2406 return MP_OKAY; 2407 } 2408 2409 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */ 2410 /* Revision: 1.3 $ */ 2411 /* Date: 2011/03/18 16:43:04 $ */ 2412 2413 /* high level multiplication (handles sign) */ 2414 static int 2415 signed_multiply(mp_int * a, mp_int * b, mp_int * c) 2416 { 2417 int res, neg; 2418 2419 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 2420 /* use Toom-Cook? */ 2421 if (MIN(a->used, b->used) >= TOOM_MUL_CUTOFF) { 2422 res = toom_cook_multiply(a, b, c); 2423 } else if (MIN(a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { 2424 /* use Karatsuba? */ 2425 res = karatsuba_multiply(a, b, c); 2426 } else { 2427 /* can we use the fast multiplier? */ 2428 int digs = a->used + b->used + 1; 2429 2430 if (can_use_fast_column_array(digs, MIN(a->used, b->used))) { 2431 res = fast_col_array_multiply(a, b, c, digs); 2432 } else { 2433 res = basic_multiply_partial_lower(a, b, c, (a)->used + (b)->used + 1); 2434 } 2435 } 2436 c->sign = (c->used > 0) ? neg : MP_ZPOS; 2437 return res; 2438 } 2439 2440 /* this is a modified version of fast_s_mul_digs that only produces 2441 * output digits *above* digs. See the comments for fast_s_mul_digs 2442 * to see how it works. 2443 * 2444 * This is used in the Barrett reduction since for one of the multiplications 2445 * only the higher digits were needed. This essentially halves the work. 2446 * 2447 * Based on Algorithm 14.12 on pp.595 of HAC. 2448 */ 2449 static int 2450 fast_basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs) 2451 { 2452 int olduse, res, pa, ix, iz; 2453 mp_digit W[MP_WARRAY]; 2454 mp_word _W; 2455 2456 /* grow the destination as required */ 2457 pa = a->used + b->used; 2458 if (c->alloc < pa) { 2459 if ((res = mp_grow(c, pa)) != MP_OKAY) { 2460 return res; 2461 } 2462 } 2463 2464 /* number of output digits to produce */ 2465 pa = a->used + b->used; 2466 _W = 0; 2467 for (ix = digs; ix < pa; ix++) { 2468 int tx, ty, iy; 2469 mp_digit *tmpx, *tmpy; 2470 2471 /* get offsets into the two bignums */ 2472 ty = MIN(b->used-1, ix); 2473 tx = ix - ty; 2474 2475 /* setup temp aliases */ 2476 tmpx = a->dp + tx; 2477 tmpy = b->dp + ty; 2478 2479 /* this is the number of times the loop will iterrate, essentially its 2480 while (tx++ < a->used && ty-- >= 0) { ... } 2481 */ 2482 iy = MIN(a->used-tx, ty+1); 2483 2484 /* execute loop */ 2485 for (iz = 0; iz < iy; iz++) { 2486 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2487 } 2488 2489 /* store term */ 2490 W[ix] = ((mp_digit)_W) & MP_MASK; 2491 2492 /* make next carry */ 2493 _W = _W >> ((mp_word)DIGIT_BIT); 2494 } 2495 2496 /* setup dest */ 2497 olduse = c->used; 2498 c->used = pa; 2499 2500 { 2501 mp_digit *tmpc; 2502 2503 tmpc = c->dp + digs; 2504 for (ix = digs; ix < pa; ix++) { 2505 /* now extract the previous digit [below the carry] */ 2506 *tmpc++ = W[ix]; 2507 } 2508 2509 /* clear unused digits [that existed in the old copy of c] */ 2510 for (; ix < olduse; ix++) { 2511 *tmpc++ = 0; 2512 } 2513 } 2514 trim_unused_digits(c); 2515 return MP_OKAY; 2516 } 2517 2518 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */ 2519 /* Revision: 1.1.1.1 $ */ 2520 /* Date: 2011/03/12 22:58:18 $ */ 2521 2522 /* multiplies |a| * |b| and does not compute the lower digs digits 2523 * [meant to get the higher part of the product] 2524 */ 2525 static int 2526 basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs) 2527 { 2528 mp_int t; 2529 int res, pa, pb, ix, iy; 2530 mp_digit carry; 2531 mp_word r; 2532 mp_digit tmpx, *tmpt, *tmpy; 2533 2534 /* can we use the fast multiplier? */ 2535 if (can_use_fast_column_array(a->used + b->used + 1, MIN(a->used, b->used))) { 2536 return fast_basic_multiply_partial_upper(a, b, c, digs); 2537 } 2538 2539 if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) { 2540 return res; 2541 } 2542 t.used = a->used + b->used + 1; 2543 2544 pa = a->used; 2545 pb = b->used; 2546 for (ix = 0; ix < pa; ix++) { 2547 /* clear the carry */ 2548 carry = 0; 2549 2550 /* left hand side of A[ix] * B[iy] */ 2551 tmpx = a->dp[ix]; 2552 2553 /* alias to the address of where the digits will be stored */ 2554 tmpt = &(t.dp[digs]); 2555 2556 /* alias for where to read the right hand side from */ 2557 tmpy = b->dp + (digs - ix); 2558 2559 for (iy = digs - ix; iy < pb; iy++) { 2560 /* calculate the double precision result */ 2561 r = ((mp_word)*tmpt) + 2562 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2563 ((mp_word) carry); 2564 2565 /* get the lower part */ 2566 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2567 2568 /* carry the carry */ 2569 carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2570 } 2571 *tmpt = carry; 2572 } 2573 trim_unused_digits(&t); 2574 mp_exch(&t, c); 2575 mp_clear(&t); 2576 return MP_OKAY; 2577 } 2578 2579 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */ 2580 /* Revision: 1.3 $ */ 2581 /* Date: 2011/03/18 16:43:04 $ */ 2582 2583 /* reduces x mod m, assumes 0 < x < m**2, mu is 2584 * precomputed via mp_reduce_setup. 2585 * From HAC pp.604 Algorithm 14.42 2586 */ 2587 static int 2588 mp_reduce(mp_int * x, mp_int * m, mp_int * mu) 2589 { 2590 mp_int q; 2591 int res, um = m->used; 2592 2593 /* q = x */ 2594 if ((res = mp_init_copy(&q, x)) != MP_OKAY) { 2595 return res; 2596 } 2597 2598 /* q1 = x / b**(k-1) */ 2599 rshift_digits(&q, um - 1); 2600 2601 /* according to HAC this optimization is ok */ 2602 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { 2603 if ((res = signed_multiply(&q, mu, &q)) != MP_OKAY) { 2604 goto CLEANUP; 2605 } 2606 } else { 2607 if ((res = basic_multiply_partial_upper(&q, mu, &q, um)) != MP_OKAY) { 2608 goto CLEANUP; 2609 } 2610 } 2611 2612 /* q3 = q2 / b**(k+1) */ 2613 rshift_digits(&q, um + 1); 2614 2615 /* x = x mod b**(k+1), quick (no division) */ 2616 if ((res = modulo_2_to_power(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { 2617 goto CLEANUP; 2618 } 2619 2620 /* q = q * m mod b**(k+1), quick (no division) */ 2621 if ((res = basic_multiply_partial_lower(&q, m, &q, um + 1)) != MP_OKAY) { 2622 goto CLEANUP; 2623 } 2624 2625 /* x = x - q */ 2626 if ((res = signed_subtract(x, &q, x)) != MP_OKAY) { 2627 goto CLEANUP; 2628 } 2629 2630 /* If x < 0, add b**(k+1) to it */ 2631 if (compare_digit(x, 0) == MP_LT) { 2632 set_word(&q, 1); 2633 if ((res = lshift_digits(&q, um + 1)) != MP_OKAY) { 2634 goto CLEANUP; 2635 } 2636 if ((res = signed_add(x, &q, x)) != MP_OKAY) { 2637 goto CLEANUP; 2638 } 2639 } 2640 2641 /* Back off if it's too big */ 2642 while (signed_compare(x, m) != MP_LT) { 2643 if ((res = basic_subtract(x, m, x)) != MP_OKAY) { 2644 goto CLEANUP; 2645 } 2646 } 2647 2648 CLEANUP: 2649 mp_clear(&q); 2650 2651 return res; 2652 } 2653 2654 /* determines the setup value */ 2655 static int 2656 mp_reduce_2k_setup_l(mp_int *a, mp_int *d) 2657 { 2658 int res; 2659 mp_int tmp; 2660 2661 if ((res = mp_init(&tmp)) != MP_OKAY) { 2662 return res; 2663 } 2664 2665 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { 2666 goto ERR; 2667 } 2668 2669 if ((res = basic_subtract(&tmp, a, d)) != MP_OKAY) { 2670 goto ERR; 2671 } 2672 2673 ERR: 2674 mp_clear(&tmp); 2675 return res; 2676 } 2677 2678 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */ 2679 /* Revision: 1.1.1.1 $ */ 2680 /* Date: 2011/03/12 22:58:18 $ */ 2681 2682 /* reduces a modulo n where n is of the form 2**p - d 2683 This differs from reduce_2k since "d" can be larger 2684 than a single digit. 2685 */ 2686 static int 2687 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) 2688 { 2689 mp_int q; 2690 int p, res; 2691 2692 if ((res = mp_init(&q)) != MP_OKAY) { 2693 return res; 2694 } 2695 2696 p = mp_count_bits(n); 2697 top: 2698 /* q = a/2**p, a = a mod 2**p */ 2699 if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) { 2700 goto ERR; 2701 } 2702 2703 /* q = q * d */ 2704 if ((res = signed_multiply(&q, d, &q)) != MP_OKAY) { 2705 goto ERR; 2706 } 2707 2708 /* a = a + q */ 2709 if ((res = basic_add(a, &q, a)) != MP_OKAY) { 2710 goto ERR; 2711 } 2712 2713 if (compare_magnitude(a, n) != MP_LT) { 2714 basic_subtract(a, n, a); 2715 goto top; 2716 } 2717 2718 ERR: 2719 mp_clear(&q); 2720 return res; 2721 } 2722 2723 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */ 2724 /* Revision: 1.1.1.1 $ */ 2725 /* Date: 2011/03/12 22:58:18 $ */ 2726 2727 /* squaring using Toom-Cook 3-way algorithm */ 2728 static int 2729 toom_cook_square(mp_int *a, mp_int *b) 2730 { 2731 mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; 2732 int res, B; 2733 2734 /* init temps */ 2735 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { 2736 return res; 2737 } 2738 2739 /* B */ 2740 B = a->used / 3; 2741 2742 /* a = a2 * B**2 + a1 * B + a0 */ 2743 if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 2744 goto ERR; 2745 } 2746 2747 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 2748 goto ERR; 2749 } 2750 rshift_digits(&a1, B); 2751 modulo_2_to_power(&a1, DIGIT_BIT * B, &a1); 2752 2753 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 2754 goto ERR; 2755 } 2756 rshift_digits(&a2, B*2); 2757 2758 /* w0 = a0*a0 */ 2759 if ((res = square(&a0, &w0)) != MP_OKAY) { 2760 goto ERR; 2761 } 2762 2763 /* w4 = a2 * a2 */ 2764 if ((res = square(&a2, &w4)) != MP_OKAY) { 2765 goto ERR; 2766 } 2767 2768 /* w1 = (a2 + 2(a1 + 2a0))**2 */ 2769 if ((res = doubled(&a0, &tmp1)) != MP_OKAY) { 2770 goto ERR; 2771 } 2772 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2773 goto ERR; 2774 } 2775 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 2776 goto ERR; 2777 } 2778 if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 2779 goto ERR; 2780 } 2781 2782 if ((res = square(&tmp1, &w1)) != MP_OKAY) { 2783 goto ERR; 2784 } 2785 2786 /* w3 = (a0 + 2(a1 + 2a2))**2 */ 2787 if ((res = doubled(&a2, &tmp1)) != MP_OKAY) { 2788 goto ERR; 2789 } 2790 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2791 goto ERR; 2792 } 2793 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 2794 goto ERR; 2795 } 2796 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2797 goto ERR; 2798 } 2799 2800 if ((res = square(&tmp1, &w3)) != MP_OKAY) { 2801 goto ERR; 2802 } 2803 2804 2805 /* w2 = (a2 + a1 + a0)**2 */ 2806 if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) { 2807 goto ERR; 2808 } 2809 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2810 goto ERR; 2811 } 2812 if ((res = square(&tmp1, &w2)) != MP_OKAY) { 2813 goto ERR; 2814 } 2815 2816 /* now solve the matrix 2817 2818 0 0 0 0 1 2819 1 2 4 8 16 2820 1 1 1 1 1 2821 16 8 4 2 1 2822 1 0 0 0 0 2823 2824 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. 2825 */ 2826 2827 /* r1 - r4 */ 2828 if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) { 2829 goto ERR; 2830 } 2831 /* r3 - r0 */ 2832 if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) { 2833 goto ERR; 2834 } 2835 /* r1/2 */ 2836 if ((res = half(&w1, &w1)) != MP_OKAY) { 2837 goto ERR; 2838 } 2839 /* r3/2 */ 2840 if ((res = half(&w3, &w3)) != MP_OKAY) { 2841 goto ERR; 2842 } 2843 /* r2 - r0 - r4 */ 2844 if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) { 2845 goto ERR; 2846 } 2847 if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) { 2848 goto ERR; 2849 } 2850 /* r1 - r2 */ 2851 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2852 goto ERR; 2853 } 2854 /* r3 - r2 */ 2855 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2856 goto ERR; 2857 } 2858 /* r1 - 8r0 */ 2859 if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) { 2860 goto ERR; 2861 } 2862 if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) { 2863 goto ERR; 2864 } 2865 /* r3 - 8r4 */ 2866 if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) { 2867 goto ERR; 2868 } 2869 if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) { 2870 goto ERR; 2871 } 2872 /* 3r2 - r1 - r3 */ 2873 if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) { 2874 goto ERR; 2875 } 2876 if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) { 2877 goto ERR; 2878 } 2879 if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) { 2880 goto ERR; 2881 } 2882 /* r1 - r2 */ 2883 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2884 goto ERR; 2885 } 2886 /* r3 - r2 */ 2887 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2888 goto ERR; 2889 } 2890 /* r1/3 */ 2891 if ((res = third(&w1, &w1, NULL)) != MP_OKAY) { 2892 goto ERR; 2893 } 2894 /* r3/3 */ 2895 if ((res = third(&w3, &w3, NULL)) != MP_OKAY) { 2896 goto ERR; 2897 } 2898 2899 /* at this point shift W[n] by B*n */ 2900 if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) { 2901 goto ERR; 2902 } 2903 if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) { 2904 goto ERR; 2905 } 2906 if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) { 2907 goto ERR; 2908 } 2909 if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) { 2910 goto ERR; 2911 } 2912 2913 if ((res = signed_add(&w0, &w1, b)) != MP_OKAY) { 2914 goto ERR; 2915 } 2916 if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2917 goto ERR; 2918 } 2919 if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2920 goto ERR; 2921 } 2922 if ((res = signed_add(&tmp1, b, b)) != MP_OKAY) { 2923 goto ERR; 2924 } 2925 2926 ERR: 2927 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); 2928 return res; 2929 } 2930 2931 2932 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */ 2933 /* Revision: 1.1.1.1 $ */ 2934 /* Date: 2011/03/12 22:58:18 $ */ 2935 2936 /* Karatsuba squaring, computes b = a*a using three 2937 * half size squarings 2938 * 2939 * See comments of karatsuba_mul for details. It 2940 * is essentially the same algorithm but merely 2941 * tuned to perform recursive squarings. 2942 */ 2943 static int 2944 karatsuba_square(mp_int * a, mp_int * b) 2945 { 2946 mp_int x0, x1, t1, t2, x0x0, x1x1; 2947 int B, err; 2948 2949 err = MP_MEM; 2950 2951 /* min # of digits */ 2952 B = a->used; 2953 2954 /* now divide in two */ 2955 B = (unsigned)B >> 1; 2956 2957 /* init copy all the temps */ 2958 if (mp_init_size(&x0, B) != MP_OKAY) { 2959 goto ERR; 2960 } 2961 if (mp_init_size(&x1, a->used - B) != MP_OKAY) { 2962 goto X0; 2963 } 2964 /* init temps */ 2965 if (mp_init_size(&t1, a->used * 2) != MP_OKAY) { 2966 goto X1; 2967 } 2968 if (mp_init_size(&t2, a->used * 2) != MP_OKAY) { 2969 goto T1; 2970 } 2971 if (mp_init_size(&x0x0, B * 2) != MP_OKAY) { 2972 goto T2; 2973 } 2974 if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) { 2975 goto X0X0; 2976 } 2977 2978 memcpy(x0.dp, a->dp, B * sizeof(*x0.dp)); 2979 memcpy(x1.dp, &a->dp[B], (a->used - B) * sizeof(*x1.dp)); 2980 2981 x0.used = B; 2982 x1.used = a->used - B; 2983 2984 trim_unused_digits(&x0); 2985 2986 /* now calc the products x0*x0 and x1*x1 */ 2987 if (square(&x0, &x0x0) != MP_OKAY) { 2988 goto X1X1; /* x0x0 = x0*x0 */ 2989 } 2990 if (square(&x1, &x1x1) != MP_OKAY) { 2991 goto X1X1; /* x1x1 = x1*x1 */ 2992 } 2993 /* now calc (x1+x0)**2 */ 2994 if (basic_add(&x1, &x0, &t1) != MP_OKAY) { 2995 goto X1X1; /* t1 = x1 - x0 */ 2996 } 2997 if (square(&t1, &t1) != MP_OKAY) { 2998 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ 2999 } 3000 /* add x0y0 */ 3001 if (basic_add(&x0x0, &x1x1, &t2) != MP_OKAY) { 3002 goto X1X1; /* t2 = x0x0 + x1x1 */ 3003 } 3004 if (basic_subtract(&t1, &t2, &t1) != MP_OKAY) { 3005 goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ 3006 } 3007 /* shift by B */ 3008 if (lshift_digits(&t1, B) != MP_OKAY) { 3009 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ 3010 } 3011 if (lshift_digits(&x1x1, B * 2) != MP_OKAY) { 3012 goto X1X1; /* x1x1 = x1x1 << 2*B */ 3013 } 3014 if (signed_add(&x0x0, &t1, &t1) != MP_OKAY) { 3015 goto X1X1; /* t1 = x0x0 + t1 */ 3016 } 3017 if (signed_add(&t1, &x1x1, b) != MP_OKAY) { 3018 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ 3019 } 3020 err = MP_OKAY; 3021 3022 X1X1: 3023 mp_clear(&x1x1); 3024 X0X0: 3025 mp_clear(&x0x0); 3026 T2: 3027 mp_clear(&t2); 3028 T1: 3029 mp_clear(&t1); 3030 X1: 3031 mp_clear(&x1); 3032 X0: 3033 mp_clear(&x0); 3034 ERR: 3035 return err; 3036 } 3037 3038 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */ 3039 /* Revision: 1.2 $ */ 3040 /* Date: 2011/03/12 23:43:54 $ */ 3041 3042 /* the jist of squaring... 3043 * you do like mult except the offset of the tmpx [one that 3044 * starts closer to zero] can't equal the offset of tmpy. 3045 * So basically you set up iy like before then you min it with 3046 * (ty-tx) so that it never happens. You double all those 3047 * you add in the inner loop 3048 3049 After that loop you do the squares and add them in. 3050 */ 3051 3052 static int 3053 fast_basic_square(mp_int * a, mp_int * b) 3054 { 3055 int olduse, res, pa, ix, iz; 3056 mp_digit W[MP_WARRAY], *tmpx; 3057 mp_word W1; 3058 3059 /* grow the destination as required */ 3060 pa = a->used + a->used; 3061 if (b->alloc < pa) { 3062 if ((res = mp_grow(b, pa)) != MP_OKAY) { 3063 return res; 3064 } 3065 } 3066 3067 /* number of output digits to produce */ 3068 W1 = 0; 3069 for (ix = 0; ix < pa; ix++) { 3070 int tx, ty, iy; 3071 mp_word _W; 3072 mp_digit *tmpy; 3073 3074 /* clear counter */ 3075 _W = 0; 3076 3077 /* get offsets into the two bignums */ 3078 ty = MIN(a->used-1, ix); 3079 tx = ix - ty; 3080 3081 /* setup temp aliases */ 3082 tmpx = a->dp + tx; 3083 tmpy = a->dp + ty; 3084 3085 /* this is the number of times the loop will iterrate, essentially 3086 while (tx++ < a->used && ty-- >= 0) { ... } 3087 */ 3088 iy = MIN(a->used-tx, ty+1); 3089 3090 /* now for squaring tx can never equal ty 3091 * we halve the distance since they approach at a rate of 2x 3092 * and we have to round because odd cases need to be executed 3093 */ 3094 iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1)); 3095 3096 /* execute loop */ 3097 for (iz = 0; iz < iy; iz++) { 3098 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 3099 } 3100 3101 /* double the inner product and add carry */ 3102 _W = _W + _W + W1; 3103 3104 /* even columns have the square term in them */ 3105 if ((ix&1) == 0) { 3106 _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]); 3107 } 3108 3109 /* store it */ 3110 W[ix] = (mp_digit)(_W & MP_MASK); 3111 3112 /* make next carry */ 3113 W1 = _W >> ((mp_word)DIGIT_BIT); 3114 } 3115 3116 /* setup dest */ 3117 olduse = b->used; 3118 b->used = a->used+a->used; 3119 3120 { 3121 mp_digit *tmpb; 3122 tmpb = b->dp; 3123 for (ix = 0; ix < pa; ix++) { 3124 *tmpb++ = W[ix] & MP_MASK; 3125 } 3126 3127 /* clear unused digits [that existed in the old copy of c] */ 3128 for (; ix < olduse; ix++) { 3129 *tmpb++ = 0; 3130 } 3131 } 3132 trim_unused_digits(b); 3133 return MP_OKAY; 3134 } 3135 3136 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */ 3137 /* Revision: 1.3 $ */ 3138 /* Date: 2011/03/18 16:43:04 $ */ 3139 3140 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ 3141 static int 3142 basic_square(mp_int * a, mp_int * b) 3143 { 3144 mp_int t; 3145 int res, ix, iy, pa; 3146 mp_word r; 3147 mp_digit carry, tmpx, *tmpt; 3148 3149 pa = a->used; 3150 if ((res = mp_init_size(&t, 2*pa + 1)) != MP_OKAY) { 3151 return res; 3152 } 3153 3154 /* default used is maximum possible size */ 3155 t.used = 2*pa + 1; 3156 3157 for (ix = 0; ix < pa; ix++) { 3158 /* first calculate the digit at 2*ix */ 3159 /* calculate double precision result */ 3160 r = ((mp_word) t.dp[2*ix]) + 3161 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); 3162 3163 /* store lower part in result */ 3164 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); 3165 3166 /* get the carry */ 3167 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3168 3169 /* left hand side of A[ix] * A[iy] */ 3170 tmpx = a->dp[ix]; 3171 3172 /* alias for where to store the results */ 3173 tmpt = t.dp + (2*ix + 1); 3174 3175 for (iy = ix + 1; iy < pa; iy++) { 3176 /* first calculate the product */ 3177 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); 3178 3179 /* now calculate the double precision result, note we use 3180 * addition instead of *2 since it's easier to optimize 3181 */ 3182 r = ((mp_word) *tmpt) + r + r + ((mp_word) carry); 3183 3184 /* store lower part */ 3185 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3186 3187 /* get carry */ 3188 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3189 } 3190 /* propagate upwards */ 3191 while (carry != ((mp_digit) 0)) { 3192 r = ((mp_word) *tmpt) + ((mp_word) carry); 3193 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3194 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3195 } 3196 } 3197 3198 trim_unused_digits(&t); 3199 mp_exch(&t, b); 3200 mp_clear(&t); 3201 return MP_OKAY; 3202 } 3203 3204 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */ 3205 /* Revision: 1.1.1.1 $ */ 3206 /* Date: 2011/03/12 22:58:18 $ */ 3207 3208 #define TOOM_SQR_CUTOFF 400 3209 #define KARATSUBA_SQR_CUTOFF 120 3210 3211 /* computes b = a*a */ 3212 static int 3213 square(mp_int * a, mp_int * b) 3214 { 3215 int res; 3216 3217 /* use Toom-Cook? */ 3218 if (a->used >= TOOM_SQR_CUTOFF) { 3219 res = toom_cook_square(a, b); 3220 /* Karatsuba? */ 3221 } else if (a->used >= KARATSUBA_SQR_CUTOFF) { 3222 res = karatsuba_square(a, b); 3223 } else { 3224 /* can we use the fast comba multiplier? */ 3225 if (can_use_fast_column_array(a->used + a->used + 1, a->used)) { 3226 res = fast_basic_square(a, b); 3227 } else { 3228 res = basic_square(a, b); 3229 } 3230 } 3231 b->sign = MP_ZPOS; 3232 return res; 3233 } 3234 3235 /* find window size */ 3236 static inline int 3237 find_window_size(mp_int *X) 3238 { 3239 int x; 3240 3241 x = mp_count_bits(X); 3242 return (x <= 7) ? 2 : (x <= 36) ? 3 : (x <= 140) ? 4 : (x <= 450) ? 5 : (x <= 1303) ? 6 : (x <= 3529) ? 7 : 8; 3243 } 3244 3245 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */ 3246 /* Revision: 1.3 $ */ 3247 /* Date: 2011/03/18 16:43:04 $ */ 3248 3249 #define TAB_SIZE 256 3250 3251 static int 3252 basic_exponent_mod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 3253 { 3254 mp_digit buf; 3255 mp_int M[TAB_SIZE], res, mu; 3256 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 3257 int (*redux)(mp_int*,mp_int*,mp_int*); 3258 3259 winsize = find_window_size(X); 3260 3261 /* init M array */ 3262 /* init first cell */ 3263 if ((err = mp_init(&M[1])) != MP_OKAY) { 3264 return err; 3265 } 3266 3267 /* now init the second half of the array */ 3268 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3269 if ((err = mp_init(&M[x])) != MP_OKAY) { 3270 for (y = 1<<(winsize-1); y < x; y++) { 3271 mp_clear(&M[y]); 3272 } 3273 mp_clear(&M[1]); 3274 return err; 3275 } 3276 } 3277 3278 /* create mu, used for Barrett reduction */ 3279 if ((err = mp_init(&mu)) != MP_OKAY) { 3280 goto LBL_M; 3281 } 3282 3283 if (redmode == 0) { 3284 if ((err = mp_reduce_setup(&mu, P)) != MP_OKAY) { 3285 goto LBL_MU; 3286 } 3287 redux = mp_reduce; 3288 } else { 3289 if ((err = mp_reduce_2k_setup_l(P, &mu)) != MP_OKAY) { 3290 goto LBL_MU; 3291 } 3292 redux = mp_reduce_2k_l; 3293 } 3294 3295 /* create M table 3296 * 3297 * The M table contains powers of the base, 3298 * e.g. M[x] = G**x mod P 3299 * 3300 * The first half of the table is not 3301 * computed though accept for M[0] and M[1] 3302 */ 3303 if ((err = modulo(G, P, &M[1])) != MP_OKAY) { 3304 goto LBL_MU; 3305 } 3306 3307 /* compute the value at M[1<<(winsize-1)] by squaring 3308 * M[1] (winsize-1) times 3309 */ 3310 if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 3311 goto LBL_MU; 3312 } 3313 3314 for (x = 0; x < (winsize - 1); x++) { 3315 /* square it */ 3316 if ((err = square(&M[1 << (winsize - 1)], 3317 &M[1 << (winsize - 1)])) != MP_OKAY) { 3318 goto LBL_MU; 3319 } 3320 3321 /* reduce modulo P */ 3322 if ((err = redux(&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { 3323 goto LBL_MU; 3324 } 3325 } 3326 3327 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) 3328 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) 3329 */ 3330 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 3331 if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 3332 goto LBL_MU; 3333 } 3334 if ((err = redux(&M[x], P, &mu)) != MP_OKAY) { 3335 goto LBL_MU; 3336 } 3337 } 3338 3339 /* setup result */ 3340 if ((err = mp_init(&res)) != MP_OKAY) { 3341 goto LBL_MU; 3342 } 3343 set_word(&res, 1); 3344 3345 /* set initial mode and bit cnt */ 3346 mode = 0; 3347 bitcnt = 1; 3348 buf = 0; 3349 digidx = X->used - 1; 3350 bitcpy = 0; 3351 bitbuf = 0; 3352 3353 for (;;) { 3354 /* grab next digit as required */ 3355 if (--bitcnt == 0) { 3356 /* if digidx == -1 we are out of digits */ 3357 if (digidx == -1) { 3358 break; 3359 } 3360 /* read next digit and reset the bitcnt */ 3361 buf = X->dp[digidx--]; 3362 bitcnt = (int) DIGIT_BIT; 3363 } 3364 3365 /* grab the next msb from the exponent */ 3366 y = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; 3367 buf <<= (mp_digit)1; 3368 3369 /* if the bit is zero and mode == 0 then we ignore it 3370 * These represent the leading zero bits before the first 1 bit 3371 * in the exponent. Technically this opt is not required but it 3372 * does lower the # of trivial squaring/reductions used 3373 */ 3374 if (mode == 0 && y == 0) { 3375 continue; 3376 } 3377 3378 /* if the bit is zero and mode == 1 then we square */ 3379 if (mode == 1 && y == 0) { 3380 if ((err = square(&res, &res)) != MP_OKAY) { 3381 goto LBL_RES; 3382 } 3383 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3384 goto LBL_RES; 3385 } 3386 continue; 3387 } 3388 3389 /* else we add it to the window */ 3390 bitbuf |= (y << (winsize - ++bitcpy)); 3391 mode = 2; 3392 3393 if (bitcpy == winsize) { 3394 /* ok window is filled so square as required and multiply */ 3395 /* square first */ 3396 for (x = 0; x < winsize; x++) { 3397 if ((err = square(&res, &res)) != MP_OKAY) { 3398 goto LBL_RES; 3399 } 3400 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3401 goto LBL_RES; 3402 } 3403 } 3404 3405 /* then multiply */ 3406 if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) { 3407 goto LBL_RES; 3408 } 3409 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3410 goto LBL_RES; 3411 } 3412 3413 /* empty window and reset */ 3414 bitcpy = 0; 3415 bitbuf = 0; 3416 mode = 1; 3417 } 3418 } 3419 3420 /* if bits remain then square/multiply */ 3421 if (mode == 2 && bitcpy > 0) { 3422 /* square then multiply if the bit is set */ 3423 for (x = 0; x < bitcpy; x++) { 3424 if ((err = square(&res, &res)) != MP_OKAY) { 3425 goto LBL_RES; 3426 } 3427 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3428 goto LBL_RES; 3429 } 3430 3431 bitbuf <<= 1; 3432 if ((bitbuf & (1 << winsize)) != 0) { 3433 /* then multiply */ 3434 if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) { 3435 goto LBL_RES; 3436 } 3437 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3438 goto LBL_RES; 3439 } 3440 } 3441 } 3442 } 3443 3444 mp_exch(&res, Y); 3445 err = MP_OKAY; 3446 LBL_RES: 3447 mp_clear(&res); 3448 LBL_MU: 3449 mp_clear(&mu); 3450 LBL_M: 3451 mp_clear(&M[1]); 3452 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3453 mp_clear(&M[x]); 3454 } 3455 return err; 3456 } 3457 3458 /* determines if a number is a valid DR modulus */ 3459 static int 3460 is_diminished_radix_modulus(mp_int *a) 3461 { 3462 int ix; 3463 3464 /* must be at least two digits */ 3465 if (a->used < 2) { 3466 return 0; 3467 } 3468 3469 /* must be of the form b**k - a [a <= b] so all 3470 * but the first digit must be equal to -1 (mod b). 3471 */ 3472 for (ix = 1; ix < a->used; ix++) { 3473 if (a->dp[ix] != MP_MASK) { 3474 return 0; 3475 } 3476 } 3477 return 1; 3478 } 3479 3480 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */ 3481 /* Revision: 1.1.1.1 $ */ 3482 /* Date: 2011/03/12 22:58:18 $ */ 3483 3484 /* determines if mp_reduce_2k can be used */ 3485 static int 3486 mp_reduce_is_2k(mp_int *a) 3487 { 3488 int ix, iy, iw; 3489 mp_digit iz; 3490 3491 if (a->used == 0) { 3492 return MP_NO; 3493 } 3494 if (a->used == 1) { 3495 return MP_YES; 3496 } 3497 if (a->used > 1) { 3498 iy = mp_count_bits(a); 3499 iz = 1; 3500 iw = 1; 3501 3502 /* Test every bit from the second digit up, must be 1 */ 3503 for (ix = DIGIT_BIT; ix < iy; ix++) { 3504 if ((a->dp[iw] & iz) == 0) { 3505 return MP_NO; 3506 } 3507 iz <<= 1; 3508 if (iz > (mp_digit)MP_MASK) { 3509 ++iw; 3510 iz = 1; 3511 } 3512 } 3513 } 3514 return MP_YES; 3515 } 3516 3517 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */ 3518 /* Revision: 1.1.1.1 $ */ 3519 /* Date: 2011/03/12 22:58:18 $ */ 3520 3521 3522 /* d = a * b (mod c) */ 3523 static int 3524 multiply_modulo(mp_int *d, mp_int * a, mp_int * b, mp_int * c) 3525 { 3526 mp_int t; 3527 int res; 3528 3529 if ((res = mp_init(&t)) != MP_OKAY) { 3530 return res; 3531 } 3532 3533 if ((res = signed_multiply(a, b, &t)) != MP_OKAY) { 3534 mp_clear(&t); 3535 return res; 3536 } 3537 res = modulo(&t, c, d); 3538 mp_clear(&t); 3539 return res; 3540 } 3541 3542 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */ 3543 /* Revision: 1.1.1.1 $ */ 3544 /* Date: 2011/03/12 22:58:18 $ */ 3545 3546 /* setups the montgomery reduction stuff */ 3547 static int 3548 mp_montgomery_setup(mp_int * n, mp_digit * rho) 3549 { 3550 mp_digit x, b; 3551 3552 /* fast inversion mod 2**k 3553 * 3554 * Based on the fact that 3555 * 3556 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) 3557 * => 2*X*A - X*X*A*A = 1 3558 * => 2*(1) - (1) = 1 3559 */ 3560 b = n->dp[0]; 3561 3562 if ((b & 1) == 0) { 3563 return MP_VAL; 3564 } 3565 3566 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ 3567 x *= 2 - b * x; /* here x*a==1 mod 2**8 */ 3568 x *= 2 - b * x; /* here x*a==1 mod 2**16 */ 3569 x *= 2 - b * x; /* here x*a==1 mod 2**32 */ 3570 if (/*CONSTCOND*/sizeof(mp_digit) == 8) { 3571 x *= 2 - b * x; /* here x*a==1 mod 2**64 */ 3572 } 3573 3574 /* rho = -1/m mod b */ 3575 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; 3576 3577 return MP_OKAY; 3578 } 3579 3580 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */ 3581 /* Revision: 1.1.1.1 $ */ 3582 /* Date: 2011/03/12 22:58:18 $ */ 3583 3584 /* computes xR**-1 == x (mod N) via Montgomery Reduction 3585 * 3586 * This is an optimized implementation of montgomery_reduce 3587 * which uses the comba method to quickly calculate the columns of the 3588 * reduction. 3589 * 3590 * Based on Algorithm 14.32 on pp.601 of HAC. 3591 */ 3592 static int 3593 fast_mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho) 3594 { 3595 int ix, res, olduse; 3596 /*LINTED*/ 3597 mp_word W[MP_WARRAY]; 3598 3599 /* get old used count */ 3600 olduse = x->used; 3601 3602 /* grow a as required */ 3603 if (x->alloc < n->used + 1) { 3604 if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) { 3605 return res; 3606 } 3607 } 3608 3609 /* first we have to get the digits of the input into 3610 * an array of double precision words W[...] 3611 */ 3612 { 3613 mp_word *_W; 3614 mp_digit *tmpx; 3615 3616 /* alias for the W[] array */ 3617 _W = W; 3618 3619 /* alias for the digits of x*/ 3620 tmpx = x->dp; 3621 3622 /* copy the digits of a into W[0..a->used-1] */ 3623 for (ix = 0; ix < x->used; ix++) { 3624 *_W++ = *tmpx++; 3625 } 3626 3627 /* zero the high words of W[a->used..m->used*2] */ 3628 for (; ix < n->used * 2 + 1; ix++) { 3629 *_W++ = 0; 3630 } 3631 } 3632 3633 /* now we proceed to zero successive digits 3634 * from the least significant upwards 3635 */ 3636 for (ix = 0; ix < n->used; ix++) { 3637 /* mu = ai * m' mod b 3638 * 3639 * We avoid a double precision multiplication (which isn't required) 3640 * by casting the value down to a mp_digit. Note this requires 3641 * that W[ix-1] have the carry cleared (see after the inner loop) 3642 */ 3643 mp_digit mu; 3644 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); 3645 3646 /* a = a + mu * m * b**i 3647 * 3648 * This is computed in place and on the fly. The multiplication 3649 * by b**i is handled by offseting which columns the results 3650 * are added to. 3651 * 3652 * Note the comba method normally doesn't handle carries in the 3653 * inner loop In this case we fix the carry from the previous 3654 * column since the Montgomery reduction requires digits of the 3655 * result (so far) [see above] to work. This is 3656 * handled by fixing up one carry after the inner loop. The 3657 * carry fixups are done in order so after these loops the 3658 * first m->used words of W[] have the carries fixed 3659 */ 3660 { 3661 int iy; 3662 mp_digit *tmpn; 3663 mp_word *_W; 3664 3665 /* alias for the digits of the modulus */ 3666 tmpn = n->dp; 3667 3668 /* Alias for the columns set by an offset of ix */ 3669 _W = W + ix; 3670 3671 /* inner loop */ 3672 for (iy = 0; iy < n->used; iy++) { 3673 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); 3674 } 3675 } 3676 3677 /* now fix carry for next digit, W[ix+1] */ 3678 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); 3679 } 3680 3681 /* now we have to propagate the carries and 3682 * shift the words downward [all those least 3683 * significant digits we zeroed]. 3684 */ 3685 { 3686 mp_digit *tmpx; 3687 mp_word *_W, *_W1; 3688 3689 /* nox fix rest of carries */ 3690 3691 /* alias for current word */ 3692 _W1 = W + ix; 3693 3694 /* alias for next word, where the carry goes */ 3695 _W = W + ++ix; 3696 3697 for (; ix <= n->used * 2 + 1; ix++) { 3698 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); 3699 } 3700 3701 /* copy out, A = A/b**n 3702 * 3703 * The result is A/b**n but instead of converting from an 3704 * array of mp_word to mp_digit than calling rshift_digits 3705 * we just copy them in the right order 3706 */ 3707 3708 /* alias for destination word */ 3709 tmpx = x->dp; 3710 3711 /* alias for shifted double precision result */ 3712 _W = W + n->used; 3713 3714 for (ix = 0; ix < n->used + 1; ix++) { 3715 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); 3716 } 3717 3718 /* zero oldused digits, if the input a was larger than 3719 * m->used+1 we'll have to clear the digits 3720 */ 3721 for (; ix < olduse; ix++) { 3722 *tmpx++ = 0; 3723 } 3724 } 3725 3726 /* set the max used and clamp */ 3727 x->used = n->used + 1; 3728 trim_unused_digits(x); 3729 3730 /* if A >= m then A = A - m */ 3731 if (compare_magnitude(x, n) != MP_LT) { 3732 return basic_subtract(x, n, x); 3733 } 3734 return MP_OKAY; 3735 } 3736 3737 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */ 3738 /* Revision: 1.2 $ */ 3739 /* Date: 2011/03/18 16:22:09 $ */ 3740 3741 /* computes xR**-1 == x (mod N) via Montgomery Reduction */ 3742 static int 3743 mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho) 3744 { 3745 int ix, res, digs; 3746 mp_digit mu; 3747 3748 /* can the fast reduction [comba] method be used? 3749 * 3750 * Note that unlike in mul you're safely allowed *less* 3751 * than the available columns [255 per default] since carries 3752 * are fixed up in the inner loop. 3753 */ 3754 digs = n->used * 2 + 1; 3755 if (can_use_fast_column_array(digs, n->used)) { 3756 return fast_mp_montgomery_reduce(x, n, rho); 3757 } 3758 3759 /* grow the input as required */ 3760 if (x->alloc < digs) { 3761 if ((res = mp_grow(x, digs)) != MP_OKAY) { 3762 return res; 3763 } 3764 } 3765 x->used = digs; 3766 3767 for (ix = 0; ix < n->used; ix++) { 3768 /* mu = ai * rho mod b 3769 * 3770 * The value of rho must be precalculated via 3771 * montgomery_setup() such that 3772 * it equals -1/n0 mod b this allows the 3773 * following inner loop to reduce the 3774 * input one digit at a time 3775 */ 3776 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); 3777 3778 /* a = a + mu * m * b**i */ 3779 { 3780 int iy; 3781 mp_digit *tmpn, *tmpx, carry; 3782 mp_word r; 3783 3784 /* alias for digits of the modulus */ 3785 tmpn = n->dp; 3786 3787 /* alias for the digits of x [the input] */ 3788 tmpx = x->dp + ix; 3789 3790 /* set the carry to zero */ 3791 carry = 0; 3792 3793 /* Multiply and add in place */ 3794 for (iy = 0; iy < n->used; iy++) { 3795 /* compute product and sum */ 3796 r = ((mp_word)mu) * ((mp_word)*tmpn++) + 3797 ((mp_word) carry) + ((mp_word) * tmpx); 3798 3799 /* get carry */ 3800 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3801 3802 /* fix digit */ 3803 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); 3804 } 3805 /* At this point the ix'th digit of x should be zero */ 3806 3807 3808 /* propagate carries upwards as required*/ 3809 while (carry) { 3810 *tmpx += carry; 3811 carry = *tmpx >> DIGIT_BIT; 3812 *tmpx++ &= MP_MASK; 3813 } 3814 } 3815 } 3816 3817 /* at this point the n.used'th least 3818 * significant digits of x are all zero 3819 * which means we can shift x to the 3820 * right by n.used digits and the 3821 * residue is unchanged. 3822 */ 3823 3824 /* x = x/b**n.used */ 3825 trim_unused_digits(x); 3826 rshift_digits(x, n->used); 3827 3828 /* if x >= n then x = x - n */ 3829 if (compare_magnitude(x, n) != MP_LT) { 3830 return basic_subtract(x, n, x); 3831 } 3832 3833 return MP_OKAY; 3834 } 3835 3836 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */ 3837 /* Revision: 1.3 $ */ 3838 /* Date: 2011/03/18 16:43:04 $ */ 3839 3840 /* determines the setup value */ 3841 static void 3842 diminished_radix_setup(mp_int *a, mp_digit *d) 3843 { 3844 /* the casts are required if DIGIT_BIT is one less than 3845 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31] 3846 */ 3847 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 3848 ((mp_word)a->dp[0])); 3849 } 3850 3851 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */ 3852 /* Revision: 1.1.1.1 $ */ 3853 /* Date: 2011/03/12 22:58:18 $ */ 3854 3855 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm. 3856 * 3857 * Based on algorithm from the paper 3858 * 3859 * "Generating Efficient Primes for Discrete Log Cryptosystems" 3860 * Chae Hoon Lim, Pil Joong Lee, 3861 * POSTECH Information Research Laboratories 3862 * 3863 * The modulus must be of a special format [see manual] 3864 * 3865 * Has been modified to use algorithm 7.10 from the LTM book instead 3866 * 3867 * Input x must be in the range 0 <= x <= (n-1)**2 3868 */ 3869 static int 3870 diminished_radix_reduce(mp_int * x, mp_int * n, mp_digit k) 3871 { 3872 int err, i, m; 3873 mp_word r; 3874 mp_digit mu, *tmpx1, *tmpx2; 3875 3876 /* m = digits in modulus */ 3877 m = n->used; 3878 3879 /* ensure that "x" has at least 2m digits */ 3880 if (x->alloc < m + m) { 3881 if ((err = mp_grow(x, m + m)) != MP_OKAY) { 3882 return err; 3883 } 3884 } 3885 3886 /* top of loop, this is where the code resumes if 3887 * another reduction pass is required. 3888 */ 3889 top: 3890 /* aliases for digits */ 3891 /* alias for lower half of x */ 3892 tmpx1 = x->dp; 3893 3894 /* alias for upper half of x, or x/B**m */ 3895 tmpx2 = x->dp + m; 3896 3897 /* set carry to zero */ 3898 mu = 0; 3899 3900 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ 3901 for (i = 0; i < m; i++) { 3902 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; 3903 *tmpx1++ = (mp_digit)(r & MP_MASK); 3904 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); 3905 } 3906 3907 /* set final carry */ 3908 *tmpx1++ = mu; 3909 3910 /* zero words above m */ 3911 for (i = m + 1; i < x->used; i++) { 3912 *tmpx1++ = 0; 3913 } 3914 3915 /* clamp, sub and return */ 3916 trim_unused_digits(x); 3917 3918 /* if x >= n then subtract and reduce again 3919 * Each successive "recursion" makes the input smaller and smaller. 3920 */ 3921 if (compare_magnitude(x, n) != MP_LT) { 3922 basic_subtract(x, n, x); 3923 goto top; 3924 } 3925 return MP_OKAY; 3926 } 3927 3928 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */ 3929 /* Revision: 1.1.1.1 $ */ 3930 /* Date: 2011/03/12 22:58:18 $ */ 3931 3932 /* determines the setup value */ 3933 static int 3934 mp_reduce_2k_setup(mp_int *a, mp_digit *d) 3935 { 3936 int res, p; 3937 mp_int tmp; 3938 3939 if ((res = mp_init(&tmp)) != MP_OKAY) { 3940 return res; 3941 } 3942 3943 p = mp_count_bits(a); 3944 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { 3945 mp_clear(&tmp); 3946 return res; 3947 } 3948 3949 if ((res = basic_subtract(&tmp, a, &tmp)) != MP_OKAY) { 3950 mp_clear(&tmp); 3951 return res; 3952 } 3953 3954 *d = tmp.dp[0]; 3955 mp_clear(&tmp); 3956 return MP_OKAY; 3957 } 3958 3959 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */ 3960 /* Revision: 1.1.1.1 $ */ 3961 /* Date: 2011/03/12 22:58:18 $ */ 3962 3963 /* reduces a modulo n where n is of the form 2**p - d */ 3964 static int 3965 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) 3966 { 3967 mp_int q; 3968 int p, res; 3969 3970 if ((res = mp_init(&q)) != MP_OKAY) { 3971 return res; 3972 } 3973 3974 p = mp_count_bits(n); 3975 top: 3976 /* q = a/2**p, a = a mod 2**p */ 3977 if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) { 3978 goto ERR; 3979 } 3980 3981 if (d != 1) { 3982 /* q = q * d */ 3983 if ((res = multiply_digit(&q, d, &q)) != MP_OKAY) { 3984 goto ERR; 3985 } 3986 } 3987 3988 /* a = a + q */ 3989 if ((res = basic_add(a, &q, a)) != MP_OKAY) { 3990 goto ERR; 3991 } 3992 3993 if (compare_magnitude(a, n) != MP_LT) { 3994 basic_subtract(a, n, a); 3995 goto top; 3996 } 3997 3998 ERR: 3999 mp_clear(&q); 4000 return res; 4001 } 4002 4003 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */ 4004 /* Revision: 1.1.1.1 $ */ 4005 /* Date: 2011/03/12 22:58:18 $ */ 4006 4007 /* 4008 * shifts with subtractions when the result is greater than b. 4009 * 4010 * The method is slightly modified to shift B unconditionally upto just under 4011 * the leading bit of b. This saves alot of multiple precision shifting. 4012 */ 4013 static int 4014 mp_montgomery_calc_normalization(mp_int * a, mp_int * b) 4015 { 4016 int x, bits, res; 4017 4018 /* how many bits of last digit does b use */ 4019 bits = mp_count_bits(b) % DIGIT_BIT; 4020 4021 if (b->used > 1) { 4022 if ((res = mp_2expt(a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { 4023 return res; 4024 } 4025 } else { 4026 set_word(a, 1); 4027 bits = 1; 4028 } 4029 4030 4031 /* now compute C = A * B mod b */ 4032 for (x = bits - 1; x < (int)DIGIT_BIT; x++) { 4033 if ((res = doubled(a, a)) != MP_OKAY) { 4034 return res; 4035 } 4036 if (compare_magnitude(a, b) != MP_LT) { 4037 if ((res = basic_subtract(a, b, a)) != MP_OKAY) { 4038 return res; 4039 } 4040 } 4041 } 4042 4043 return MP_OKAY; 4044 } 4045 4046 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */ 4047 /* Revision: 1.1.1.1 $ */ 4048 /* Date: 2011/03/12 22:58:18 $ */ 4049 4050 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 4051 * 4052 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. 4053 * The value of k changes based on the size of the exponent. 4054 * 4055 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] 4056 */ 4057 4058 #define TAB_SIZE 256 4059 4060 static int 4061 fast_exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 4062 { 4063 mp_int M[TAB_SIZE], res; 4064 mp_digit buf, mp; 4065 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 4066 4067 /* use a pointer to the reduction algorithm. This allows us to use 4068 * one of many reduction algorithms without modding the guts of 4069 * the code with if statements everywhere. 4070 */ 4071 int (*redux)(mp_int*,mp_int*,mp_digit); 4072 4073 winsize = find_window_size(X); 4074 4075 /* init M array */ 4076 /* init first cell */ 4077 if ((err = mp_init(&M[1])) != MP_OKAY) { 4078 return err; 4079 } 4080 4081 /* now init the second half of the array */ 4082 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4083 if ((err = mp_init(&M[x])) != MP_OKAY) { 4084 for (y = 1<<(winsize-1); y < x; y++) { 4085 mp_clear(&M[y]); 4086 } 4087 mp_clear(&M[1]); 4088 return err; 4089 } 4090 } 4091 4092 /* determine and setup reduction code */ 4093 if (redmode == 0) { 4094 /* now setup montgomery */ 4095 if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) { 4096 goto LBL_M; 4097 } 4098 4099 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ 4100 if (can_use_fast_column_array(P->used + P->used + 1, P->used)) { 4101 redux = fast_mp_montgomery_reduce; 4102 } else { 4103 /* use slower baseline Montgomery method */ 4104 redux = mp_montgomery_reduce; 4105 } 4106 } else if (redmode == 1) { 4107 /* setup DR reduction for moduli of the form B**k - b */ 4108 diminished_radix_setup(P, &mp); 4109 redux = diminished_radix_reduce; 4110 } else { 4111 /* setup DR reduction for moduli of the form 2**k - b */ 4112 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { 4113 goto LBL_M; 4114 } 4115 redux = mp_reduce_2k; 4116 } 4117 4118 /* setup result */ 4119 if ((err = mp_init(&res)) != MP_OKAY) { 4120 goto LBL_M; 4121 } 4122 4123 /* create M table 4124 * 4125 4126 * 4127 * The first half of the table is not computed though accept for M[0] and M[1] 4128 */ 4129 4130 if (redmode == 0) { 4131 /* now we need R mod m */ 4132 if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) { 4133 goto LBL_RES; 4134 } 4135 4136 /* now set M[1] to G * R mod m */ 4137 if ((err = multiply_modulo(&M[1], G, &res, P)) != MP_OKAY) { 4138 goto LBL_RES; 4139 } 4140 } else { 4141 set_word(&res, 1); 4142 if ((err = modulo(G, P, &M[1])) != MP_OKAY) { 4143 goto LBL_RES; 4144 } 4145 } 4146 4147 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ 4148 if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 4149 goto LBL_RES; 4150 } 4151 4152 for (x = 0; x < (winsize - 1); x++) { 4153 if ((err = square(&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { 4154 goto LBL_RES; 4155 } 4156 if ((err = (*redux)(&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { 4157 goto LBL_RES; 4158 } 4159 } 4160 4161 /* create upper table */ 4162 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 4163 if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 4164 goto LBL_RES; 4165 } 4166 if ((err = (*redux)(&M[x], P, mp)) != MP_OKAY) { 4167 goto LBL_RES; 4168 } 4169 } 4170 4171 /* set initial mode and bit cnt */ 4172 mode = 0; 4173 bitcnt = 1; 4174 buf = 0; 4175 digidx = X->used - 1; 4176 bitcpy = 0; 4177 bitbuf = 0; 4178 4179 for (;;) { 4180 /* grab next digit as required */ 4181 if (--bitcnt == 0) { 4182 /* if digidx == -1 we are out of digits so break */ 4183 if (digidx == -1) { 4184 break; 4185 } 4186 /* read next digit and reset bitcnt */ 4187 buf = X->dp[digidx--]; 4188 bitcnt = (int)DIGIT_BIT; 4189 } 4190 4191 /* grab the next msb from the exponent */ 4192 y = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1; 4193 buf <<= (mp_digit)1; 4194 4195 /* if the bit is zero and mode == 0 then we ignore it 4196 * These represent the leading zero bits before the first 1 bit 4197 * in the exponent. Technically this opt is not required but it 4198 * does lower the # of trivial squaring/reductions used 4199 */ 4200 if (mode == 0 && y == 0) { 4201 continue; 4202 } 4203 4204 /* if the bit is zero and mode == 1 then we square */ 4205 if (mode == 1 && y == 0) { 4206 if ((err = square(&res, &res)) != MP_OKAY) { 4207 goto LBL_RES; 4208 } 4209 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4210 goto LBL_RES; 4211 } 4212 continue; 4213 } 4214 4215 /* else we add it to the window */ 4216 bitbuf |= (y << (winsize - ++bitcpy)); 4217 mode = 2; 4218 4219 if (bitcpy == winsize) { 4220 /* ok window is filled so square as required and multiply */ 4221 /* square first */ 4222 for (x = 0; x < winsize; x++) { 4223 if ((err = square(&res, &res)) != MP_OKAY) { 4224 goto LBL_RES; 4225 } 4226 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4227 goto LBL_RES; 4228 } 4229 } 4230 4231 /* then multiply */ 4232 if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) { 4233 goto LBL_RES; 4234 } 4235 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4236 goto LBL_RES; 4237 } 4238 4239 /* empty window and reset */ 4240 bitcpy = 0; 4241 bitbuf = 0; 4242 mode = 1; 4243 } 4244 } 4245 4246 /* if bits remain then square/multiply */ 4247 if (mode == 2 && bitcpy > 0) { 4248 /* square then multiply if the bit is set */ 4249 for (x = 0; x < bitcpy; x++) { 4250 if ((err = square(&res, &res)) != MP_OKAY) { 4251 goto LBL_RES; 4252 } 4253 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4254 goto LBL_RES; 4255 } 4256 4257 /* get next bit of the window */ 4258 bitbuf <<= 1; 4259 if ((bitbuf & (1 << winsize)) != 0) { 4260 /* then multiply */ 4261 if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) { 4262 goto LBL_RES; 4263 } 4264 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4265 goto LBL_RES; 4266 } 4267 } 4268 } 4269 } 4270 4271 if (redmode == 0) { 4272 /* fixup result if Montgomery reduction is used 4273 * recall that any value in a Montgomery system is 4274 * actually multiplied by R mod n. So we have 4275 * to reduce one more time to cancel out the factor 4276 * of R. 4277 */ 4278 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4279 goto LBL_RES; 4280 } 4281 } 4282 4283 /* swap res with Y */ 4284 mp_exch(&res, Y); 4285 err = MP_OKAY; 4286 LBL_RES: 4287 mp_clear(&res); 4288 LBL_M: 4289 mp_clear(&M[1]); 4290 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4291 mp_clear(&M[x]); 4292 } 4293 return err; 4294 } 4295 4296 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_exponent_modulo.c,v $ */ 4297 /* Revision: 1.4 $ */ 4298 /* Date: 2011/03/18 16:43:04 $ */ 4299 4300 /* this is a shell function that calls either the normal or Montgomery 4301 * exptmod functions. Originally the call to the montgomery code was 4302 * embedded in the normal function but that wasted alot of stack space 4303 * for nothing (since 99% of the time the Montgomery code would be called) 4304 */ 4305 static int 4306 exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int *Y) 4307 { 4308 int diminished_radix; 4309 4310 /* modulus P must be positive */ 4311 if (P->sign == MP_NEG) { 4312 return MP_VAL; 4313 } 4314 4315 /* if exponent X is negative we have to recurse */ 4316 if (X->sign == MP_NEG) { 4317 mp_int tmpG, tmpX; 4318 int err; 4319 4320 /* first compute 1/G mod P */ 4321 if ((err = mp_init(&tmpG)) != MP_OKAY) { 4322 return err; 4323 } 4324 if ((err = modular_inverse(&tmpG, G, P)) != MP_OKAY) { 4325 mp_clear(&tmpG); 4326 return err; 4327 } 4328 4329 /* now get |X| */ 4330 if ((err = mp_init(&tmpX)) != MP_OKAY) { 4331 mp_clear(&tmpG); 4332 return err; 4333 } 4334 if ((err = absolute(X, &tmpX)) != MP_OKAY) { 4335 mp_clear_multi(&tmpG, &tmpX, NULL); 4336 return err; 4337 } 4338 4339 /* and now compute (1/G)**|X| instead of G**X [X < 0] */ 4340 err = exponent_modulo(&tmpG, &tmpX, P, Y); 4341 mp_clear_multi(&tmpG, &tmpX, NULL); 4342 return err; 4343 } 4344 4345 /* modified diminished radix reduction */ 4346 if (mp_reduce_is_2k_l(P) == MP_YES) { 4347 return basic_exponent_mod(G, X, P, Y, 1); 4348 } 4349 4350 /* is it a DR modulus? */ 4351 diminished_radix = is_diminished_radix_modulus(P); 4352 4353 /* if not, is it a unrestricted DR modulus? */ 4354 if (!diminished_radix) { 4355 diminished_radix = mp_reduce_is_2k(P) << 1; 4356 } 4357 4358 /* if the modulus is odd or diminished_radix, use the montgomery method */ 4359 if (PGPV_BN_is_odd(P) == 1 || diminished_radix) { 4360 return fast_exponent_modulo(G, X, P, Y, diminished_radix); 4361 } 4362 /* otherwise use the generic Barrett reduction technique */ 4363 return basic_exponent_mod(G, X, P, Y, 0); 4364 } 4365 4366 /* reverse an array, used for radix code */ 4367 static void 4368 bn_reverse(unsigned char *s, int len) 4369 { 4370 int ix, iy; 4371 uint8_t t; 4372 4373 for (ix = 0, iy = len - 1; ix < iy ; ix++, --iy) { 4374 t = s[ix]; 4375 s[ix] = s[iy]; 4376 s[iy] = t; 4377 } 4378 } 4379 4380 static inline int 4381 is_power_of_two(mp_digit b, int *p) 4382 { 4383 int x; 4384 4385 /* fast return if no power of two */ 4386 if ((b==0) || (b & (b-1))) { 4387 return 0; 4388 } 4389 4390 for (x = 0; x < DIGIT_BIT; x++) { 4391 if (b == (((mp_digit)1)<<x)) { 4392 *p = x; 4393 return 1; 4394 } 4395 } 4396 return 0; 4397 } 4398 4399 /* single digit division (based on routine from MPI) */ 4400 static int 4401 signed_divide_word(mp_int *a, mp_digit b, mp_int *c, mp_digit *d) 4402 { 4403 mp_int q; 4404 mp_word w; 4405 mp_digit t; 4406 int res, ix; 4407 4408 /* cannot divide by zero */ 4409 if (b == 0) { 4410 return MP_VAL; 4411 } 4412 4413 /* quick outs */ 4414 if (b == 1 || MP_ISZERO(a) == 1) { 4415 if (d != NULL) { 4416 *d = 0; 4417 } 4418 if (c != NULL) { 4419 return mp_copy(a, c); 4420 } 4421 return MP_OKAY; 4422 } 4423 4424 /* power of two ? */ 4425 if (is_power_of_two(b, &ix) == 1) { 4426 if (d != NULL) { 4427 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1); 4428 } 4429 if (c != NULL) { 4430 return rshift_bits(a, ix, c, NULL); 4431 } 4432 return MP_OKAY; 4433 } 4434 4435 /* three? */ 4436 if (b == 3) { 4437 return third(a, c, d); 4438 } 4439 4440 /* no easy answer [c'est la vie]. Just division */ 4441 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 4442 return res; 4443 } 4444 4445 q.used = a->used; 4446 q.sign = a->sign; 4447 w = 0; 4448 for (ix = a->used - 1; ix >= 0; ix--) { 4449 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 4450 4451 if (w >= b) { 4452 t = (mp_digit)(w / b); 4453 w -= ((mp_word)t) * ((mp_word)b); 4454 } else { 4455 t = 0; 4456 } 4457 q.dp[ix] = (mp_digit)t; 4458 } 4459 4460 if (d != NULL) { 4461 *d = (mp_digit)w; 4462 } 4463 4464 if (c != NULL) { 4465 trim_unused_digits(&q); 4466 mp_exch(&q, c); 4467 } 4468 mp_clear(&q); 4469 4470 return res; 4471 } 4472 4473 static const mp_digit ltm_prime_tab[] = { 4474 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 4475 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, 4476 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, 4477 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 4478 #ifndef MP_8BIT 4479 0x0083, 4480 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, 4481 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, 4482 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, 4483 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, 4484 4485 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, 4486 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, 4487 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, 4488 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, 4489 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, 4490 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, 4491 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, 4492 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, 4493 4494 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, 4495 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, 4496 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, 4497 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, 4498 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, 4499 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, 4500 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, 4501 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, 4502 4503 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, 4504 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, 4505 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, 4506 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, 4507 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, 4508 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, 4509 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, 4510 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 4511 #endif 4512 }; 4513 4514 #define PRIME_SIZE __arraycount(ltm_prime_tab) 4515 4516 static inline int 4517 mp_prime_is_divisible(mp_int *a, int *result) 4518 { 4519 int err, ix; 4520 mp_digit res; 4521 4522 /* default to not */ 4523 *result = MP_NO; 4524 4525 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4526 /* what is a mod LBL_prime_tab[ix] */ 4527 if ((err = signed_divide_word(a, ltm_prime_tab[ix], NULL, &res)) != MP_OKAY) { 4528 return err; 4529 } 4530 4531 /* is the residue zero? */ 4532 if (res == 0) { 4533 *result = MP_YES; 4534 return MP_OKAY; 4535 } 4536 } 4537 4538 return MP_OKAY; 4539 } 4540 4541 /* single digit addition */ 4542 static int 4543 add_single_digit(mp_int *a, mp_digit b, mp_int *c) 4544 { 4545 int res, ix, oldused; 4546 mp_digit *tmpa, *tmpc, mu; 4547 4548 /* grow c as required */ 4549 if (c->alloc < a->used + 1) { 4550 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4551 return res; 4552 } 4553 } 4554 4555 /* if a is negative and |a| >= b, call c = |a| - b */ 4556 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { 4557 /* temporarily fix sign of a */ 4558 a->sign = MP_ZPOS; 4559 4560 /* c = |a| - b */ 4561 res = signed_subtract_word(a, b, c); 4562 4563 /* fix sign */ 4564 a->sign = c->sign = MP_NEG; 4565 4566 /* clamp */ 4567 trim_unused_digits(c); 4568 4569 return res; 4570 } 4571 4572 /* old number of used digits in c */ 4573 oldused = c->used; 4574 4575 /* sign always positive */ 4576 c->sign = MP_ZPOS; 4577 4578 /* source alias */ 4579 tmpa = a->dp; 4580 4581 /* destination alias */ 4582 tmpc = c->dp; 4583 4584 /* if a is positive */ 4585 if (a->sign == MP_ZPOS) { 4586 /* add digit, after this we're propagating 4587 * the carry. 4588 */ 4589 *tmpc = *tmpa++ + b; 4590 mu = *tmpc >> DIGIT_BIT; 4591 *tmpc++ &= MP_MASK; 4592 4593 /* now handle rest of the digits */ 4594 for (ix = 1; ix < a->used; ix++) { 4595 *tmpc = *tmpa++ + mu; 4596 mu = *tmpc >> DIGIT_BIT; 4597 *tmpc++ &= MP_MASK; 4598 } 4599 /* set final carry */ 4600 ix++; 4601 *tmpc++ = mu; 4602 4603 /* setup size */ 4604 c->used = a->used + 1; 4605 } else { 4606 /* a was negative and |a| < b */ 4607 c->used = 1; 4608 4609 /* the result is a single digit */ 4610 if (a->used == 1) { 4611 *tmpc++ = b - a->dp[0]; 4612 } else { 4613 *tmpc++ = b; 4614 } 4615 4616 /* setup count so the clearing of oldused 4617 * can fall through correctly 4618 */ 4619 ix = 1; 4620 } 4621 4622 /* now zero to oldused */ 4623 while (ix++ < oldused) { 4624 *tmpc++ = 0; 4625 } 4626 trim_unused_digits(c); 4627 4628 return MP_OKAY; 4629 } 4630 4631 /* single digit subtraction */ 4632 static int 4633 signed_subtract_word(mp_int *a, mp_digit b, mp_int *c) 4634 { 4635 mp_digit *tmpa, *tmpc, mu; 4636 int res, ix, oldused; 4637 4638 /* grow c as required */ 4639 if (c->alloc < a->used + 1) { 4640 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4641 return res; 4642 } 4643 } 4644 4645 /* if a is negative just do an unsigned 4646 * addition [with fudged signs] 4647 */ 4648 if (a->sign == MP_NEG) { 4649 a->sign = MP_ZPOS; 4650 res = add_single_digit(a, b, c); 4651 a->sign = c->sign = MP_NEG; 4652 4653 /* clamp */ 4654 trim_unused_digits(c); 4655 4656 return res; 4657 } 4658 4659 /* setup regs */ 4660 oldused = c->used; 4661 tmpa = a->dp; 4662 tmpc = c->dp; 4663 4664 /* if a <= b simply fix the single digit */ 4665 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { 4666 if (a->used == 1) { 4667 *tmpc++ = b - *tmpa; 4668 } else { 4669 *tmpc++ = b; 4670 } 4671 ix = 1; 4672 4673 /* negative/1digit */ 4674 c->sign = MP_NEG; 4675 c->used = 1; 4676 } else { 4677 /* positive/size */ 4678 c->sign = MP_ZPOS; 4679 c->used = a->used; 4680 4681 /* subtract first digit */ 4682 *tmpc = *tmpa++ - b; 4683 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4684 *tmpc++ &= MP_MASK; 4685 4686 /* handle rest of the digits */ 4687 for (ix = 1; ix < a->used; ix++) { 4688 *tmpc = *tmpa++ - mu; 4689 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4690 *tmpc++ &= MP_MASK; 4691 } 4692 } 4693 4694 /* zero excess digits */ 4695 while (ix++ < oldused) { 4696 *tmpc++ = 0; 4697 } 4698 trim_unused_digits(c); 4699 return MP_OKAY; 4700 } 4701 4702 static const int lnz[16] = { 4703 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 4704 }; 4705 4706 /* Counts the number of lsbs which are zero before the first zero bit */ 4707 static int 4708 mp_cnt_lsb(mp_int *a) 4709 { 4710 int x; 4711 mp_digit q, qq; 4712 4713 /* easy out */ 4714 if (MP_ISZERO(a) == 1) { 4715 return 0; 4716 } 4717 4718 /* scan lower digits until non-zero */ 4719 for (x = 0; x < a->used && a->dp[x] == 0; x++) { 4720 } 4721 q = a->dp[x]; 4722 x *= DIGIT_BIT; 4723 4724 /* now scan this digit until a 1 is found */ 4725 if ((q & 1) == 0) { 4726 do { 4727 qq = q & 15; 4728 /* LINTED previous op ensures range of qq */ 4729 x += lnz[qq]; 4730 q >>= 4; 4731 } while (qq == 0); 4732 } 4733 return x; 4734 } 4735 4736 /* c = a * a (mod b) */ 4737 static int 4738 square_modulo(mp_int *a, mp_int *b, mp_int *c) 4739 { 4740 int res; 4741 mp_int t; 4742 4743 if ((res = mp_init(&t)) != MP_OKAY) { 4744 return res; 4745 } 4746 4747 if ((res = square(a, &t)) != MP_OKAY) { 4748 mp_clear(&t); 4749 return res; 4750 } 4751 res = modulo(&t, b, c); 4752 mp_clear(&t); 4753 return res; 4754 } 4755 4756 static int 4757 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result) 4758 { 4759 mp_int n1, y, r; 4760 int s, j, err; 4761 4762 /* default */ 4763 *result = MP_NO; 4764 4765 /* ensure b > 1 */ 4766 if (compare_digit(b, 1) != MP_GT) { 4767 return MP_VAL; 4768 } 4769 4770 /* get n1 = a - 1 */ 4771 if ((err = mp_init_copy(&n1, a)) != MP_OKAY) { 4772 return err; 4773 } 4774 if ((err = signed_subtract_word(&n1, 1, &n1)) != MP_OKAY) { 4775 goto LBL_N1; 4776 } 4777 4778 /* set 2**s * r = n1 */ 4779 if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) { 4780 goto LBL_N1; 4781 } 4782 4783 /* count the number of least significant bits 4784 * which are zero 4785 */ 4786 s = mp_cnt_lsb(&r); 4787 4788 /* now divide n - 1 by 2**s */ 4789 if ((err = rshift_bits(&r, s, &r, NULL)) != MP_OKAY) { 4790 goto LBL_R; 4791 } 4792 4793 /* compute y = b**r mod a */ 4794 if ((err = mp_init(&y)) != MP_OKAY) { 4795 goto LBL_R; 4796 } 4797 if ((err = exponent_modulo(b, &r, a, &y)) != MP_OKAY) { 4798 goto LBL_Y; 4799 } 4800 4801 /* if y != 1 and y != n1 do */ 4802 if (compare_digit(&y, 1) != MP_EQ && signed_compare(&y, &n1) != MP_EQ) { 4803 j = 1; 4804 /* while j <= s-1 and y != n1 */ 4805 while ((j <= (s - 1)) && signed_compare(&y, &n1) != MP_EQ) { 4806 if ((err = square_modulo(&y, a, &y)) != MP_OKAY) { 4807 goto LBL_Y; 4808 } 4809 4810 /* if y == 1 then composite */ 4811 if (compare_digit(&y, 1) == MP_EQ) { 4812 goto LBL_Y; 4813 } 4814 4815 ++j; 4816 } 4817 4818 /* if y != n1 then composite */ 4819 if (signed_compare(&y, &n1) != MP_EQ) { 4820 goto LBL_Y; 4821 } 4822 } 4823 4824 /* probably prime now */ 4825 *result = MP_YES; 4826 LBL_Y: 4827 mp_clear(&y); 4828 LBL_R: 4829 mp_clear(&r); 4830 LBL_N1: 4831 mp_clear(&n1); 4832 return err; 4833 } 4834 4835 /* performs a variable number of rounds of Miller-Rabin 4836 * 4837 * Probability of error after t rounds is no more than 4838 4839 * 4840 * Sets result to 1 if probably prime, 0 otherwise 4841 */ 4842 static int 4843 mp_prime_is_prime(mp_int *a, int t, int *result) 4844 { 4845 mp_int b; 4846 int ix, err, res; 4847 4848 /* default to no */ 4849 *result = MP_NO; 4850 4851 /* valid value of t? */ 4852 if (t <= 0 || t > (int)PRIME_SIZE) { 4853 return MP_VAL; 4854 } 4855 4856 /* is the input equal to one of the primes in the table? */ 4857 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4858 if (compare_digit(a, ltm_prime_tab[ix]) == MP_EQ) { 4859 *result = 1; 4860 return MP_OKAY; 4861 } 4862 } 4863 4864 /* first perform trial division */ 4865 if ((err = mp_prime_is_divisible(a, &res)) != MP_OKAY) { 4866 return err; 4867 } 4868 4869 /* return if it was trivially divisible */ 4870 if (res == MP_YES) { 4871 return MP_OKAY; 4872 } 4873 4874 /* now perform the miller-rabin rounds */ 4875 if ((err = mp_init(&b)) != MP_OKAY) { 4876 return err; 4877 } 4878 4879 for (ix = 0; ix < t; ix++) { 4880 /* set the prime */ 4881 set_word(&b, ltm_prime_tab[ix]); 4882 4883 if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) { 4884 goto LBL_B; 4885 } 4886 4887 if (res == MP_NO) { 4888 goto LBL_B; 4889 } 4890 } 4891 4892 /* passed the test */ 4893 *result = MP_YES; 4894 LBL_B: 4895 mp_clear(&b); 4896 return err; 4897 } 4898 4899 /* returns size of ASCII reprensentation */ 4900 static int 4901 mp_radix_size(mp_int *a, int radix, int *size) 4902 { 4903 int res, digs; 4904 mp_int t; 4905 mp_digit d; 4906 4907 *size = 0; 4908 4909 /* special case for binary */ 4910 if (radix == 2) { 4911 *size = mp_count_bits(a) + (a->sign == MP_NEG ? 1 : 0) + 1; 4912 return MP_OKAY; 4913 } 4914 4915 /* make sure the radix is in range */ 4916 if (radix < 2 || radix > 64) { 4917 return MP_VAL; 4918 } 4919 4920 if (MP_ISZERO(a) == MP_YES) { 4921 *size = 2; 4922 return MP_OKAY; 4923 } 4924 4925 /* digs is the digit count */ 4926 digs = 0; 4927 4928 /* if it's negative add one for the sign */ 4929 if (a->sign == MP_NEG) { 4930 ++digs; 4931 } 4932 4933 /* init a copy of the input */ 4934 if ((res = mp_init_copy(&t, a)) != MP_OKAY) { 4935 return res; 4936 } 4937 4938 /* force temp to positive */ 4939 t.sign = MP_ZPOS; 4940 4941 /* fetch out all of the digits */ 4942 while (MP_ISZERO(&t) == MP_NO) { 4943 if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 4944 mp_clear(&t); 4945 return res; 4946 } 4947 ++digs; 4948 } 4949 mp_clear(&t); 4950 4951 /* return digs + 1, the 1 is for the NULL byte that would be required. */ 4952 *size = digs + 1; 4953 return MP_OKAY; 4954 } 4955 4956 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; 4957 4958 /* stores a bignum as a ASCII string in a given radix (2..64) 4959 * 4960 * Stores upto maxlen-1 chars and always a NULL byte 4961 */ 4962 static int 4963 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) 4964 { 4965 int res, digs; 4966 mp_int t; 4967 mp_digit d; 4968 char *_s = str; 4969 4970 /* check range of the maxlen, radix */ 4971 if (maxlen < 2 || radix < 2 || radix > 64) { 4972 return MP_VAL; 4973 } 4974 4975 /* quick out if its zero */ 4976 if (MP_ISZERO(a) == MP_YES) { 4977 *str++ = '0'; 4978 *str = '\0'; 4979 return MP_OKAY; 4980 } 4981 4982 if ((res = mp_init_copy(&t, a)) != MP_OKAY) { 4983 return res; 4984 } 4985 4986 /* if it is negative output a - */ 4987 if (t.sign == MP_NEG) { 4988 /* we have to reverse our digits later... but not the - sign!! */ 4989 ++_s; 4990 4991 /* store the flag and mark the number as positive */ 4992 *str++ = '-'; 4993 t.sign = MP_ZPOS; 4994 4995 /* subtract a char */ 4996 --maxlen; 4997 } 4998 4999 digs = 0; 5000 while (MP_ISZERO(&t) == 0) { 5001 if (--maxlen < 1) { 5002 /* no more room */ 5003 break; 5004 } 5005 if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 5006 mp_clear(&t); 5007 return res; 5008 } 5009 /* LINTED -- radix' range is checked above, limits d's range */ 5010 *str++ = mp_s_rmap[d]; 5011 ++digs; 5012 } 5013 5014 /* reverse the digits of the string. In this case _s points 5015 * to the first digit [exluding the sign] of the number 5016 */ 5017 bn_reverse((unsigned char *)_s, digs); 5018 5019 /* append a NULL so the string is properly terminated */ 5020 *str = '\0'; 5021 5022 mp_clear(&t); 5023 return MP_OKAY; 5024 } 5025 5026 static char * 5027 formatbn(const PGPV_BIGNUM *a, const int radix) 5028 { 5029 char *s; 5030 int len; 5031 5032 if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) { 5033 return NULL; 5034 } 5035 if ((s = allocate(1, (size_t)len)) != NULL) { 5036 if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) { 5037 deallocate(s, (size_t)len); 5038 return NULL; 5039 } 5040 } 5041 return s; 5042 } 5043 5044 static int 5045 mp_getradix_num(mp_int *a, int radix, char *s) 5046 { 5047 int err, ch, neg, y; 5048 5049 /* clear a */ 5050 mp_zero(a); 5051 5052 /* if first digit is - then set negative */ 5053 if ((ch = *s++) == '-') { 5054 neg = MP_NEG; 5055 ch = *s++; 5056 } else { 5057 neg = MP_ZPOS; 5058 } 5059 5060 for (;;) { 5061 /* find y in the radix map */ 5062 for (y = 0; y < radix; y++) { 5063 if (mp_s_rmap[y] == ch) { 5064 break; 5065 } 5066 } 5067 if (y == radix) { 5068 break; 5069 } 5070 5071 /* shift up and add */ 5072 if ((err = multiply_digit(a, radix, a)) != MP_OKAY) { 5073 return err; 5074 } 5075 if ((err = add_single_digit(a, y, a)) != MP_OKAY) { 5076 return err; 5077 } 5078 5079 ch = *s++; 5080 } 5081 if (compare_digit(a, 0) != MP_EQ) { 5082 a->sign = neg; 5083 } 5084 5085 return MP_OKAY; 5086 } 5087 5088 static int 5089 getbn(PGPV_BIGNUM **a, const char *str, int radix) 5090 { 5091 int len; 5092 5093 if (a == NULL || str == NULL || (*a = PGPV_BN_new()) == NULL) { 5094 return 0; 5095 } 5096 if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) { 5097 return 0; 5098 } 5099 mp_radix_size(__UNCONST(*a), radix, &len); 5100 return len - 1; 5101 } 5102 5103 /* d = a - b (mod c) */ 5104 static int 5105 subtract_modulo(mp_int *a, mp_int *b, mp_int *c, mp_int *d) 5106 { 5107 int res; 5108 mp_int t; 5109 5110 5111 if ((res = mp_init(&t)) != MP_OKAY) { 5112 return res; 5113 } 5114 5115 if ((res = signed_subtract(a, b, &t)) != MP_OKAY) { 5116 mp_clear(&t); 5117 return res; 5118 } 5119 res = modulo(&t, c, d); 5120 mp_clear(&t); 5121 return res; 5122 } 5123 5124 /* bn_mp_gcd.c */ 5125 /* Greatest Common Divisor using the binary method */ 5126 static int 5127 mp_gcd(mp_int *a, mp_int *b, mp_int *c) 5128 { 5129 mp_int u, v; 5130 int k, u_lsb, v_lsb, res; 5131 5132 /* either zero than gcd is the largest */ 5133 if (PGPV_BN_is_zero(a) == MP_YES) { 5134 return absolute(b, c); 5135 } 5136 if (PGPV_BN_is_zero(b) == MP_YES) { 5137 return absolute(a, c); 5138 } 5139 5140 /* get copies of a and b we can modify */ 5141 if ((res = mp_init_copy(&u, a)) != MP_OKAY) { 5142 return res; 5143 } 5144 5145 if ((res = mp_init_copy(&v, b)) != MP_OKAY) { 5146 goto LBL_U; 5147 } 5148 5149 /* must be positive for the remainder of the algorithm */ 5150 u.sign = v.sign = MP_ZPOS; 5151 5152 /* B1. Find the common power of two for u and v */ 5153 u_lsb = mp_cnt_lsb(&u); 5154 v_lsb = mp_cnt_lsb(&v); 5155 k = MIN(u_lsb, v_lsb); 5156 5157 if (k > 0) { 5158 /* divide the power of two out */ 5159 if ((res = rshift_bits(&u, k, &u, NULL)) != MP_OKAY) { 5160 goto LBL_V; 5161 } 5162 5163 if ((res = rshift_bits(&v, k, &v, NULL)) != MP_OKAY) { 5164 goto LBL_V; 5165 } 5166 } 5167 5168 /* divide any remaining factors of two out */ 5169 if (u_lsb != k) { 5170 if ((res = rshift_bits(&u, u_lsb - k, &u, NULL)) != MP_OKAY) { 5171 goto LBL_V; 5172 } 5173 } 5174 5175 if (v_lsb != k) { 5176 if ((res = rshift_bits(&v, v_lsb - k, &v, NULL)) != MP_OKAY) { 5177 goto LBL_V; 5178 } 5179 } 5180 5181 while (PGPV_BN_is_zero(&v) == 0) { 5182 /* make sure v is the largest */ 5183 if (compare_magnitude(&u, &v) == MP_GT) { 5184 /* swap u and v to make sure v is >= u */ 5185 mp_exch(&u, &v); 5186 } 5187 5188 /* subtract smallest from largest */ 5189 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 5190 goto LBL_V; 5191 } 5192 5193 /* Divide out all factors of two */ 5194 if ((res = rshift_bits(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) { 5195 goto LBL_V; 5196 } 5197 } 5198 5199 /* multiply by 2**k which we divided out at the beginning */ 5200 if ((res = lshift_bits(&u, k, c)) != MP_OKAY) { 5201 goto LBL_V; 5202 } 5203 c->sign = MP_ZPOS; 5204 res = MP_OKAY; 5205 LBL_V: 5206 mp_clear (&u); 5207 LBL_U: 5208 mp_clear (&v); 5209 return res; 5210 } 5211 5212 /**************************************************************************/ 5213 5214 /* PGPV_BIGNUM emulation layer */ 5215 5216 /* essentiually, these are just wrappers around the libtommath functions */ 5217 /* usually the order of args changes */ 5218 /* the PGPV_BIGNUM API tends to have more const poisoning */ 5219 /* these wrappers also check the arguments passed for sanity */ 5220 5221 PGPV_BIGNUM * 5222 PGPV_BN_bin2bn(const uint8_t *data, int len, PGPV_BIGNUM *ret) 5223 { 5224 if (data == NULL) { 5225 return PGPV_BN_new(); 5226 } 5227 if (ret == NULL) { 5228 ret = PGPV_BN_new(); 5229 } 5230 return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL; 5231 } 5232 5233 /* store in unsigned [big endian] format */ 5234 int 5235 PGPV_BN_bn2bin(const PGPV_BIGNUM *a, unsigned char *b) 5236 { 5237 PGPV_BIGNUM t; 5238 int x; 5239 5240 if (a == NULL || b == NULL) { 5241 return -1; 5242 } 5243 if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) { 5244 return -1; 5245 } 5246 for (x = 0; !PGPV_BN_is_zero(&t) ; ) { 5247 b[x++] = (unsigned char) (t.dp[0] & 0xff); 5248 if (rshift_bits(&t, 8, &t, NULL) != MP_OKAY) { 5249 mp_clear(&t); 5250 return -1; 5251 } 5252 } 5253 bn_reverse(b, x); 5254 mp_clear(&t); 5255 return x; 5256 } 5257 5258 void 5259 PGPV_BN_init(PGPV_BIGNUM *a) 5260 { 5261 if (a != NULL) { 5262 mp_init(a); 5263 } 5264 } 5265 5266 PGPV_BIGNUM * 5267 PGPV_BN_new(void) 5268 { 5269 PGPV_BIGNUM *a; 5270 5271 if ((a = allocate(1, sizeof(*a))) != NULL) { 5272 mp_init(a); 5273 } 5274 return a; 5275 } 5276 5277 /* copy, b = a */ 5278 int 5279 PGPV_BN_copy(PGPV_BIGNUM *b, const PGPV_BIGNUM *a) 5280 { 5281 if (a == NULL || b == NULL) { 5282 return MP_VAL; 5283 } 5284 return mp_copy(__UNCONST(a), b); 5285 } 5286 5287 PGPV_BIGNUM * 5288 PGPV_BN_dup(const PGPV_BIGNUM *a) 5289 { 5290 PGPV_BIGNUM *ret; 5291 5292 if (a == NULL) { 5293 return NULL; 5294 } 5295 if ((ret = PGPV_BN_new()) != NULL) { 5296 PGPV_BN_copy(ret, a); 5297 } 5298 return ret; 5299 } 5300 5301 void 5302 PGPV_BN_swap(PGPV_BIGNUM *a, PGPV_BIGNUM *b) 5303 { 5304 if (a && b) { 5305 mp_exch(a, b); 5306 } 5307 } 5308 5309 int 5310 PGPV_BN_lshift(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, int n) 5311 { 5312 if (r == NULL || a == NULL || n < 0) { 5313 return 0; 5314 } 5315 PGPV_BN_copy(r, a); 5316 return lshift_digits(r, n) == MP_OKAY; 5317 } 5318 5319 int 5320 PGPV_BN_lshift1(PGPV_BIGNUM *r, PGPV_BIGNUM *a) 5321 { 5322 if (r == NULL || a == NULL) { 5323 return 0; 5324 } 5325 PGPV_BN_copy(r, a); 5326 return lshift_digits(r, 1) == MP_OKAY; 5327 } 5328 5329 int 5330 PGPV_BN_rshift(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, int n) 5331 { 5332 if (r == NULL || a == NULL || n < 0) { 5333 return MP_VAL; 5334 } 5335 PGPV_BN_copy(r, a); 5336 return rshift_digits(r, n) == MP_OKAY; 5337 } 5338 5339 int 5340 PGPV_BN_rshift1(PGPV_BIGNUM *r, PGPV_BIGNUM *a) 5341 { 5342 if (r == NULL || a == NULL) { 5343 return 0; 5344 } 5345 PGPV_BN_copy(r, a); 5346 return rshift_digits(r, 1) == MP_OKAY; 5347 } 5348 5349 int 5350 PGPV_BN_set_word(PGPV_BIGNUM *a, PGPV_BN_ULONG w) 5351 { 5352 if (a == NULL) { 5353 return 0; 5354 } 5355 set_word(a, w); 5356 return 1; 5357 } 5358 5359 int 5360 PGPV_BN_add(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b) 5361 { 5362 if (a == NULL || b == NULL || r == NULL) { 5363 return 0; 5364 } 5365 return signed_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5366 } 5367 5368 int 5369 PGPV_BN_sub(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b) 5370 { 5371 if (a == NULL || b == NULL || r == NULL) { 5372 return 0; 5373 } 5374 return signed_subtract(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5375 } 5376 5377 int 5378 PGPV_BN_mul(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b, PGPV_BN_CTX *ctx) 5379 { 5380 if (a == NULL || b == NULL || r == NULL) { 5381 return 0; 5382 } 5383 USE_ARG(ctx); 5384 return signed_multiply(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5385 } 5386 5387 int 5388 PGPV_BN_div(PGPV_BIGNUM *dv, PGPV_BIGNUM *rem, const PGPV_BIGNUM *a, const PGPV_BIGNUM *d, PGPV_BN_CTX *ctx) 5389 { 5390 if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) { 5391 return 0; 5392 } 5393 USE_ARG(ctx); 5394 return signed_divide(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY; 5395 } 5396 5397 /* perform a bit operation on the 2 bignums */ 5398 int 5399 PGPV_BN_bitop(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, char op, const PGPV_BIGNUM *b) 5400 { 5401 unsigned ndigits; 5402 mp_digit ad; 5403 mp_digit bd; 5404 int i; 5405 5406 if (a == NULL || b == NULL || r == NULL) { 5407 return 0; 5408 } 5409 if (PGPV_BN_cmp(__UNCONST(a), __UNCONST(b)) >= 0) { 5410 PGPV_BN_copy(r, a); 5411 ndigits = a->used; 5412 } else { 5413 PGPV_BN_copy(r, b); 5414 ndigits = b->used; 5415 } 5416 for (i = 0 ; i < (int)ndigits ; i++) { 5417 ad = (i > a->used) ? 0 : a->dp[i]; 5418 bd = (i > b->used) ? 0 : b->dp[i]; 5419 switch(op) { 5420 case '&': 5421 r->dp[i] = (ad & bd); 5422 break; 5423 case '|': 5424 r->dp[i] = (ad | bd); 5425 break; 5426 case '^': 5427 r->dp[i] = (ad ^ bd); 5428 break; 5429 default: 5430 break; 5431 } 5432 } 5433 return 1; 5434 } 5435 5436 void 5437 PGPV_BN_free(PGPV_BIGNUM *a) 5438 { 5439 if (a) { 5440 mp_clear(a); 5441 free(a); 5442 } 5443 } 5444 5445 void 5446 PGPV_BN_clear(PGPV_BIGNUM *a) 5447 { 5448 if (a) { 5449 mp_clear(a); 5450 } 5451 } 5452 5453 void 5454 PGPV_BN_clear_free(PGPV_BIGNUM *a) 5455 { 5456 PGPV_BN_clear(a); 5457 free(a); 5458 } 5459 5460 int 5461 PGPV_BN_num_bytes(const PGPV_BIGNUM *a) 5462 { 5463 if (a == NULL) { 5464 return MP_VAL; 5465 } 5466 return mp_unsigned_bin_size(__UNCONST(a)); 5467 } 5468 5469 int 5470 PGPV_BN_num_bits(const PGPV_BIGNUM *a) 5471 { 5472 if (a == NULL) { 5473 return 0; 5474 } 5475 return mp_count_bits(a); 5476 } 5477 5478 void 5479 PGPV_BN_set_negative(PGPV_BIGNUM *a, int n) 5480 { 5481 if (a) { 5482 a->sign = (n) ? MP_NEG : 0; 5483 } 5484 } 5485 5486 int 5487 PGPV_BN_cmp(PGPV_BIGNUM *a, PGPV_BIGNUM *b) 5488 { 5489 if (a == NULL || b == NULL) { 5490 return MP_VAL; 5491 } 5492 switch(signed_compare(a, b)) { 5493 case MP_LT: 5494 return -1; 5495 case MP_GT: 5496 return 1; 5497 case MP_EQ: 5498 default: 5499 return 0; 5500 } 5501 } 5502 5503 int 5504 PGPV_BN_mod_exp(PGPV_BIGNUM *Y, PGPV_BIGNUM *G, PGPV_BIGNUM *X, PGPV_BIGNUM *P, PGPV_BN_CTX *ctx) 5505 { 5506 if (Y == NULL || G == NULL || X == NULL || P == NULL) { 5507 return MP_VAL; 5508 } 5509 USE_ARG(ctx); 5510 return exponent_modulo(G, X, P, Y) == MP_OKAY; 5511 } 5512 5513 PGPV_BIGNUM * 5514 PGPV_BN_mod_inverse(PGPV_BIGNUM *r, PGPV_BIGNUM *a, const PGPV_BIGNUM *n, PGPV_BN_CTX *ctx) 5515 { 5516 USE_ARG(ctx); 5517 if (r == NULL || a == NULL || n == NULL) { 5518 return NULL; 5519 } 5520 return (modular_inverse(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL; 5521 } 5522 5523 int 5524 PGPV_BN_mod_mul(PGPV_BIGNUM *ret, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx) 5525 { 5526 USE_ARG(ctx); 5527 if (ret == NULL || a == NULL || b == NULL || m == NULL) { 5528 return 0; 5529 } 5530 return multiply_modulo(ret, a, b, __UNCONST(m)) == MP_OKAY; 5531 } 5532 5533 PGPV_BN_CTX * 5534 PGPV_BN_CTX_new(void) 5535 { 5536 return allocate(1, sizeof(PGPV_BN_CTX)); 5537 } 5538 5539 void 5540 PGPV_BN_CTX_init(PGPV_BN_CTX *c) 5541 { 5542 if (c != NULL) { 5543 c->arraysize = 15; 5544 if ((c->v = allocate(sizeof(*c->v), c->arraysize)) == NULL) { 5545 c->arraysize = 0; 5546 } 5547 } 5548 } 5549 5550 PGPV_BIGNUM * 5551 PGPV_BN_CTX_get(PGPV_BN_CTX *ctx) 5552 { 5553 if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) { 5554 return NULL; 5555 } 5556 return ctx->v[ctx->count++] = PGPV_BN_new(); 5557 } 5558 5559 void 5560 PGPV_BN_CTX_start(PGPV_BN_CTX *ctx) 5561 { 5562 PGPV_BN_CTX_init(ctx); 5563 } 5564 5565 void 5566 PGPV_BN_CTX_free(PGPV_BN_CTX *c) 5567 { 5568 unsigned i; 5569 5570 if (c != NULL && c->v != NULL) { 5571 for (i = 0 ; i < c->count ; i++) { 5572 PGPV_BN_clear_free(c->v[i]); 5573 } 5574 deallocate(c->v, sizeof(*c->v) * c->arraysize); 5575 } 5576 } 5577 5578 void 5579 PGPV_BN_CTX_end(PGPV_BN_CTX *ctx) 5580 { 5581 PGPV_BN_CTX_free(ctx); 5582 } 5583 5584 char * 5585 PGPV_BN_bn2hex(const PGPV_BIGNUM *a) 5586 { 5587 return (a == NULL) ? NULL : formatbn(a, 16); 5588 } 5589 5590 char * 5591 PGPV_BN_bn2dec(const PGPV_BIGNUM *a) 5592 { 5593 return (a == NULL) ? NULL : formatbn(a, 10); 5594 } 5595 5596 char * 5597 PGPV_BN_bn2radix(const PGPV_BIGNUM *a, unsigned radix) 5598 { 5599 return (a == NULL) ? NULL : formatbn(a, (int)radix); 5600 } 5601 5602 #ifndef _KERNEL 5603 int 5604 PGPV_BN_print_fp(FILE *fp, const PGPV_BIGNUM *a) 5605 { 5606 char *s; 5607 int ret; 5608 5609 if (fp == NULL || a == NULL) { 5610 return 0; 5611 } 5612 s = PGPV_BN_bn2hex(a); 5613 ret = fprintf(fp, "%s", s); 5614 deallocate(s, strlen(s) + 1); 5615 return ret; 5616 } 5617 #endif 5618 5619 #ifdef PGPV_BN_RAND_NEEDED 5620 int 5621 PGPV_BN_rand(PGPV_BIGNUM *rnd, int bits, int top, int bottom) 5622 { 5623 uint64_t r; 5624 int digits; 5625 int i; 5626 5627 if (rnd == NULL) { 5628 return 0; 5629 } 5630 mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT)); 5631 for (i = 0 ; i < digits ; i++) { 5632 r = (uint64_t)arc4random(); 5633 r <<= 32; 5634 r |= arc4random(); 5635 rnd->dp[i] = (r & MP_MASK); 5636 rnd->used += 1; 5637 } 5638 if (top == 0) { 5639 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5640 } 5641 if (top == 1) { 5642 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5643 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1))); 5644 } 5645 if (bottom) { 5646 rnd->dp[0] |= 0x1; 5647 } 5648 return 1; 5649 } 5650 5651 int 5652 PGPV_BN_rand_range(PGPV_BIGNUM *rnd, PGPV_BIGNUM *range) 5653 { 5654 if (rnd == NULL || range == NULL || PGPV_BN_is_zero(range)) { 5655 return 0; 5656 } 5657 PGPV_BN_rand(rnd, PGPV_BN_num_bits(range), 1, 0); 5658 return modulo(rnd, range, rnd) == MP_OKAY; 5659 } 5660 #endif 5661 5662 int 5663 PGPV_BN_is_prime(const PGPV_BIGNUM *a, int checks, void (*callback)(int, int, void *), PGPV_BN_CTX *ctx, void *cb_arg) 5664 { 5665 int primality; 5666 5667 if (a == NULL) { 5668 return 0; 5669 } 5670 USE_ARG(ctx); 5671 USE_ARG(cb_arg); 5672 USE_ARG(callback); 5673 return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0; 5674 } 5675 5676 const PGPV_BIGNUM * 5677 PGPV_BN_value_one(void) 5678 { 5679 static mp_digit digit = 1UL; 5680 static const PGPV_BIGNUM one = { &digit, 1, 1, 0 }; 5681 5682 return &one; 5683 } 5684 5685 int 5686 PGPV_BN_hex2bn(PGPV_BIGNUM **a, const char *str) 5687 { 5688 return getbn(a, str, 16); 5689 } 5690 5691 int 5692 PGPV_BN_dec2bn(PGPV_BIGNUM **a, const char *str) 5693 { 5694 return getbn(a, str, 10); 5695 } 5696 5697 int 5698 PGPV_BN_radix2bn(PGPV_BIGNUM **a, const char *str, unsigned radix) 5699 { 5700 return getbn(a, str, (int)radix); 5701 } 5702 5703 int 5704 PGPV_BN_mod_sub(PGPV_BIGNUM *r, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx) 5705 { 5706 USE_ARG(ctx); 5707 if (r == NULL || a == NULL || b == NULL || m == NULL) { 5708 return 0; 5709 } 5710 return subtract_modulo(a, b, __UNCONST(m), r) == MP_OKAY; 5711 } 5712 5713 int 5714 PGPV_BN_is_bit_set(const PGPV_BIGNUM *a, int n) 5715 { 5716 if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) { 5717 return 0; 5718 } 5719 return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0; 5720 } 5721 5722 /* raise 'a' to power of 'b' */ 5723 int 5724 PGPV_BN_raise(PGPV_BIGNUM *res, PGPV_BIGNUM *a, PGPV_BIGNUM *b) 5725 { 5726 uint64_t exponent; 5727 PGPV_BIGNUM *power; 5728 PGPV_BIGNUM *temp; 5729 char *t; 5730 5731 t = PGPV_BN_bn2dec(b); 5732 exponent = (uint64_t)strtoull(t, NULL, 10); 5733 free(t); 5734 if (exponent == 0) { 5735 PGPV_BN_copy(res, PGPV_BN_value_one()); 5736 } else { 5737 power = PGPV_BN_dup(a); 5738 for ( ; (exponent & 1) == 0 ; exponent >>= 1) { 5739 PGPV_BN_mul(power, power, power, NULL); 5740 } 5741 temp = PGPV_BN_dup(power); 5742 for (exponent >>= 1 ; exponent > 0 ; exponent >>= 1) { 5743 PGPV_BN_mul(power, power, power, NULL); 5744 if (exponent & 1) { 5745 PGPV_BN_mul(temp, power, temp, NULL); 5746 } 5747 } 5748 PGPV_BN_copy(res, temp); 5749 PGPV_BN_free(power); 5750 PGPV_BN_free(temp); 5751 } 5752 return 1; 5753 } 5754 5755 /* compute the factorial */ 5756 int 5757 PGPV_BN_factorial(PGPV_BIGNUM *res, PGPV_BIGNUM *f) 5758 { 5759 PGPV_BIGNUM *one; 5760 PGPV_BIGNUM *i; 5761 5762 i = PGPV_BN_dup(f); 5763 one = __UNCONST(PGPV_BN_value_one()); 5764 PGPV_BN_sub(i, i, one); 5765 PGPV_BN_copy(res, f); 5766 while (PGPV_BN_cmp(i, one) > 0) { 5767 PGPV_BN_mul(res, res, i, NULL); 5768 PGPV_BN_sub(i, i, one); 5769 } 5770 PGPV_BN_free(i); 5771 return 1; 5772 } 5773 5774 /* get greatest common divisor */ 5775 int 5776 PGPV_BN_gcd(PGPV_BIGNUM *r, PGPV_BIGNUM *a, PGPV_BIGNUM *b, PGPV_BN_CTX *ctx) 5777 { 5778 return mp_gcd(a, b, r); 5779 } 5780