1 /*-
2 * Copyright (c) 2012 Alistair Crooks <agc@NetBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 /* LibTomMath, multiple-precision integer library -- Tom St Denis
26 *
27 * LibTomMath is a library that provides multiple-precision
28 * integer arithmetic as well as number theoretic functionality.
29 *
30 * The library was designed directly after the MPI library by
31 * Michael Fromberger but has been written from scratch with
32 * additional optimizations in place.
33 *
34 * The library is free for all purposes without any express
35 * guarantee it works.
36 *
37 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
38 */
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <sys/param.h>
42
43 #ifdef _KERNEL
44 # include <sys/kmem.h>
45 #else
46 # include <arpa/inet.h>
47 # include <ctype.h>
48 # include <inttypes.h>
49 # include <stdarg.h>
50 # include <stdio.h>
51 # include <stdlib.h>
52 # include <string.h>
53 # include <time.h>
54 # include <unistd.h>
55 #endif
56
57 #include "misc.h"
58 #include "bn.h"
59 #include "digest.h"
60
61 /**************************************************************************/
62
63 /* LibTomMath, multiple-precision integer library -- Tom St Denis
64 *
65 * LibTomMath is a library that provides multiple-precision
66 * integer arithmetic as well as number theoretic functionality.
67 *
68 * The library was designed directly after the MPI library by
69 * Michael Fromberger but has been written from scratch with
70 * additional optimizations in place.
71 *
72 * The library is free for all purposes without any express
73 * guarantee it works.
74 *
75 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
76 */
77
78 #define MP_PREC 32
79 #define DIGIT_BIT 28
80 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
81
82 #define MP_WARRAY /*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1)))
83
84 #define MP_NO 0
85 #define MP_YES 1
86
87 #ifndef USE_ARG
88 #define USE_ARG(x) /*LINTED*/(void)&(x)
89 #endif
90
91 #ifndef __arraycount
92 #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0]))
93 #endif
94
95 #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
96
97 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
98
99 typedef int mp_err;
100
101 static int mp_mul(mp_int * a, mp_int * b, mp_int * c);
102 static int mp_sqr(mp_int * a, mp_int * b);
103
104 static int mp_sub_d(mp_int *a, mp_digit b, mp_int *c);
105
106 /* set to zero */
107 static void
mp_zero(mp_int * a)108 mp_zero(mp_int *a)
109 {
110 int n;
111 mp_digit *tmp;
112
113 a->sign = MP_ZPOS;
114 a->used = 0;
115
116 tmp = a->dp;
117 /* XXX - memset */
118 for (n = 0; n < a->alloc; n++) {
119 *tmp++ = 0;
120 }
121 }
122
123 /* grow as required */
124 static int
mp_grow(mp_int * a,int size)125 mp_grow(mp_int *a, int size)
126 {
127 int i;
128 mp_digit *tmp;
129
130 /* if the alloc size is smaller alloc more ram */
131 if (a->alloc < size) {
132 /* ensure there are always at least MP_PREC digits extra on top */
133 size += (MP_PREC * 2) - (size % MP_PREC);
134
135 /* reallocate the array a->dp
136 *
137 * We store the return in a temporary variable
138 * in case the operation failed we don't want
139 * to overwrite the dp member of a.
140 */
141 tmp = realloc(a->dp, sizeof(*tmp) * size);
142 if (tmp == NULL) {
143 /* reallocation failed but "a" is still valid [can be freed] */
144 return MP_MEM;
145 }
146
147 /* reallocation succeeded so set a->dp */
148 a->dp = tmp;
149
150 /* zero excess digits */
151 i = a->alloc;
152 a->alloc = size;
153 for (; i < a->alloc; i++) {
154 a->dp[i] = 0;
155 }
156 }
157 return MP_OKAY;
158 }
159
160 /* shift left a certain amount of digits */
161 static int
mp_lshd(mp_int * a,int b)162 mp_lshd (mp_int * a, int b)
163 {
164 int x, res;
165
166 /* if its less than zero return */
167 if (b <= 0) {
168 return MP_OKAY;
169 }
170
171 /* grow to fit the new digits */
172 if (a->alloc < a->used + b) {
173 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
174 return res;
175 }
176 }
177
178 {
179 mp_digit *top, *bottom;
180
181 /* increment the used by the shift amount then copy upwards */
182 a->used += b;
183
184 /* top */
185 top = a->dp + a->used - 1;
186
187 /* base */
188 bottom = a->dp + a->used - 1 - b;
189
190 /* much like mp_rshd this is implemented using a sliding window
191 * except the window goes the otherway around. Copying from
192 * the bottom to the top. see bn_mp_rshd.c for more info.
193 */
194 for (x = a->used - 1; x >= b; x--) {
195 *top-- = *bottom--;
196 }
197
198 /* zero the lower digits */
199 top = a->dp;
200 for (x = 0; x < b; x++) {
201 *top++ = 0;
202 }
203 }
204 return MP_OKAY;
205 }
206
207 /* trim unused digits
208 *
209 * This is used to ensure that leading zero digits are
210 * trimed and the leading "used" digit will be non-zero
211 * Typically very fast. Also fixes the sign if there
212 * are no more leading digits
213 */
214 static void
mp_clamp(mp_int * a)215 mp_clamp (mp_int * a)
216 {
217 /* decrease used while the most significant digit is
218 * zero.
219 */
220 while (a->used > 0 && a->dp[a->used - 1] == 0) {
221 --(a->used);
222 }
223
224 /* reset the sign flag if used == 0 */
225 if (a->used == 0) {
226 a->sign = MP_ZPOS;
227 }
228 }
229
230 /* copy, b = a */
231 static int
mp_copy(BIGNUM * a,BIGNUM * b)232 mp_copy(BIGNUM *a, BIGNUM *b)
233 {
234 int res, n;
235
236 /* if dst == src do nothing */
237 if (a == b) {
238 return MP_OKAY;
239 }
240 if (a == NULL || b == NULL) {
241 return MP_VAL;
242 }
243
244 /* grow dest */
245 if (b->alloc < a->used) {
246 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
247 return res;
248 }
249 }
250
251 /* zero b and copy the parameters over */
252 {
253 mp_digit *tmpa, *tmpb;
254
255 /* pointer aliases */
256
257 /* source */
258 tmpa = a->dp;
259
260 /* destination */
261 tmpb = b->dp;
262
263 /* copy all the digits */
264 for (n = 0; n < a->used; n++) {
265 *tmpb++ = *tmpa++;
266 }
267
268 /* clear high digits */
269 for (; n < b->used; n++) {
270 *tmpb++ = 0;
271 }
272 }
273
274 /* copy used count and sign */
275 b->used = a->used;
276 b->sign = a->sign;
277 return MP_OKAY;
278 }
279
280 /* shift left by a certain bit count */
281 static int
mp_mul_2d(mp_int * a,int b,mp_int * c)282 mp_mul_2d(mp_int *a, int b, mp_int *c)
283 {
284 mp_digit d;
285 int res;
286
287 /* copy */
288 if (a != c) {
289 if ((res = mp_copy (a, c)) != MP_OKAY) {
290 return res;
291 }
292 }
293
294 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
295 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
296 return res;
297 }
298 }
299
300 /* shift by as many digits in the bit count */
301 if (b >= (int)DIGIT_BIT) {
302 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
303 return res;
304 }
305 }
306
307 /* shift any bit count < DIGIT_BIT */
308 d = (mp_digit) (b % DIGIT_BIT);
309 if (d != 0) {
310 mp_digit *tmpc, shift, mask, r, rr;
311 int x;
312
313 /* bitmask for carries */
314 mask = (((mp_digit)1) << d) - 1;
315
316 /* shift for msbs */
317 shift = DIGIT_BIT - d;
318
319 /* alias */
320 tmpc = c->dp;
321
322 /* carry */
323 r = 0;
324 for (x = 0; x < c->used; x++) {
325 /* get the higher bits of the current word */
326 rr = (*tmpc >> shift) & mask;
327
328 /* shift the current word and OR in the carry */
329 *tmpc = ((*tmpc << d) | r) & MP_MASK;
330 ++tmpc;
331
332 /* set the carry to the carry bits of the current word */
333 r = rr;
334 }
335
336 /* set final carry */
337 if (r != 0) {
338 c->dp[(c->used)++] = r;
339 }
340 }
341 mp_clamp (c);
342 return MP_OKAY;
343 }
344
345 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
346 static int
mp_read_unsigned_bin(mp_int * a,const uint8_t * b,int c)347 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c)
348 {
349 int res;
350
351 /* make sure there are at least two digits */
352 if (a->alloc < 2) {
353 if ((res = mp_grow(a, 2)) != MP_OKAY) {
354 return res;
355 }
356 }
357
358 /* zero the int */
359 mp_zero (a);
360
361 /* read the bytes in */
362 while (c-- > 0) {
363 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
364 return res;
365 }
366
367 a->dp[0] |= *b++;
368 a->used += 1;
369 }
370 mp_clamp (a);
371 return MP_OKAY;
372 }
373
374 /* returns the number of bits in an int */
375 static int
mp_count_bits(const mp_int * a)376 mp_count_bits(const mp_int *a)
377 {
378 int r;
379 mp_digit q;
380
381 /* shortcut */
382 if (a->used == 0) {
383 return 0;
384 }
385
386 /* get number of digits and add that */
387 r = (a->used - 1) * DIGIT_BIT;
388
389 /* take the last digit and count the bits in it */
390 q = a->dp[a->used - 1];
391 while (q > ((mp_digit) 0)) {
392 ++r;
393 q >>= ((mp_digit) 1);
394 }
395 return r;
396 }
397
398 /* compare maginitude of two ints (unsigned) */
399 static int
mp_cmp_mag(mp_int * a,mp_int * b)400 mp_cmp_mag (mp_int * a, mp_int * b)
401 {
402 int n;
403 mp_digit *tmpa, *tmpb;
404
405 /* compare based on # of non-zero digits */
406 if (a->used > b->used) {
407 return MP_GT;
408 }
409
410 if (a->used < b->used) {
411 return MP_LT;
412 }
413
414 /* alias for a */
415 tmpa = a->dp + (a->used - 1);
416
417 /* alias for b */
418 tmpb = b->dp + (a->used - 1);
419
420 /* compare based on digits */
421 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
422 if (*tmpa > *tmpb) {
423 return MP_GT;
424 }
425
426 if (*tmpa < *tmpb) {
427 return MP_LT;
428 }
429 }
430 return MP_EQ;
431 }
432
433 /* compare two ints (signed)*/
434 static int
mp_cmp(mp_int * a,mp_int * b)435 mp_cmp (mp_int * a, mp_int * b)
436 {
437 /* compare based on sign */
438 if (a->sign != b->sign) {
439 if (a->sign == MP_NEG) {
440 return MP_LT;
441 } else {
442 return MP_GT;
443 }
444 }
445
446 /* compare digits */
447 if (a->sign == MP_NEG) {
448 /* if negative compare opposite direction */
449 return mp_cmp_mag(b, a);
450 } else {
451 return mp_cmp_mag(a, b);
452 }
453 }
454
455 /* get the size for an unsigned equivalent */
456 static int
mp_unsigned_bin_size(mp_int * a)457 mp_unsigned_bin_size (mp_int * a)
458 {
459 int size = mp_count_bits (a);
460 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
461 }
462
463 /* init a new mp_int */
464 static int
mp_init(mp_int * a)465 mp_init (mp_int * a)
466 {
467 int i;
468
469 /* allocate memory required and clear it */
470 a->dp = netpgp_allocate(1, sizeof (*a->dp) * MP_PREC);
471 if (a->dp == NULL) {
472 return MP_MEM;
473 }
474
475 /* set the digits to zero */
476 for (i = 0; i < MP_PREC; i++) {
477 a->dp[i] = 0;
478 }
479
480 /* set the used to zero, allocated digits to the default precision
481 * and sign to positive */
482 a->used = 0;
483 a->alloc = MP_PREC;
484 a->sign = MP_ZPOS;
485
486 return MP_OKAY;
487 }
488
489 /* clear one (frees) */
490 static void
mp_clear(mp_int * a)491 mp_clear (mp_int * a)
492 {
493 int i;
494
495 /* only do anything if a hasn't been freed previously */
496 if (a->dp != NULL) {
497 /* first zero the digits */
498 for (i = 0; i < a->used; i++) {
499 a->dp[i] = 0;
500 }
501
502 /* free ram */
503 netpgp_deallocate(a->dp, (size_t)a->alloc);
504
505 /* reset members to make debugging easier */
506 a->dp = NULL;
507 a->alloc = a->used = 0;
508 a->sign = MP_ZPOS;
509 }
510 }
511
512 static int
mp_init_multi(mp_int * mp,...)513 mp_init_multi(mp_int *mp, ...)
514 {
515 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
516 int n = 0; /* Number of ok inits */
517 mp_int* cur_arg = mp;
518 va_list args;
519
520 va_start(args, mp); /* init args to next argument from caller */
521 while (cur_arg != NULL) {
522 if (mp_init(cur_arg) != MP_OKAY) {
523 /* Oops - error! Back-track and mp_clear what we already
524 succeeded in init-ing, then return error.
525 */
526 va_list clean_args;
527
528 /* end the current list */
529 va_end(args);
530
531 /* now start cleaning up */
532 cur_arg = mp;
533 va_start(clean_args, mp);
534 while (n--) {
535 mp_clear(cur_arg);
536 cur_arg = va_arg(clean_args, mp_int*);
537 }
538 va_end(clean_args);
539 res = MP_MEM;
540 break;
541 }
542 n++;
543 cur_arg = va_arg(args, mp_int*);
544 }
545 va_end(args);
546 return res; /* Assumed ok, if error flagged above. */
547 }
548
549 /* init an mp_init for a given size */
550 static int
mp_init_size(mp_int * a,int size)551 mp_init_size (mp_int * a, int size)
552 {
553 int x;
554
555 /* pad size so there are always extra digits */
556 size += (MP_PREC * 2) - (size % MP_PREC);
557
558 /* alloc mem */
559 a->dp = netpgp_allocate (1, sizeof (*a->dp) * size);
560 if (a->dp == NULL) {
561 return MP_MEM;
562 }
563
564 /* set the members */
565 a->used = 0;
566 a->alloc = size;
567 a->sign = MP_ZPOS;
568
569 /* zero the digits */
570 for (x = 0; x < size; x++) {
571 a->dp[x] = 0;
572 }
573
574 return MP_OKAY;
575 }
576
577 /* creates "a" then copies b into it */
mp_init_copy(mp_int * a,const mp_int * b)578 static int mp_init_copy (mp_int * a, const mp_int * b)
579 {
580 int res;
581
582 if ((res = mp_init (a)) != MP_OKAY) {
583 return res;
584 }
585 return mp_copy (b, a);
586 }
587
588 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
589 static int
s_mp_add(const mp_int * a,const mp_int * b,mp_int * c)590 s_mp_add (const mp_int * a, const mp_int * b, mp_int * c)
591 {
592 const mp_int *x;
593 int olduse, res, min, max;
594
595 /* find sizes, we let |a| <= |b| which means we have to sort
596 * them. "x" will point to the input with the most digits
597 */
598 if (a->used > b->used) {
599 min = b->used;
600 max = a->used;
601 x = a;
602 } else {
603 min = a->used;
604 max = b->used;
605 x = b;
606 }
607
608 /* init result */
609 if (c->alloc < max + 1) {
610 if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
611 return res;
612 }
613 }
614
615 /* get old used digit count and set new one */
616 olduse = c->used;
617 c->used = max + 1;
618
619 {
620 const mp_digit *tmpa, *tmpb;
621 mp_digit u, *tmpc;
622 int i;
623
624 /* alias for digit pointers */
625
626 /* first input */
627 tmpa = a->dp;
628
629 /* second input */
630 tmpb = b->dp;
631
632 /* destination */
633 tmpc = c->dp;
634
635 /* zero the carry */
636 u = 0;
637 for (i = 0; i < min; i++) {
638 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
639 *tmpc = *tmpa++ + *tmpb++ + u;
640
641 /* U = carry bit of T[i] */
642 u = *tmpc >> ((mp_digit)DIGIT_BIT);
643
644 /* take away carry bit from T[i] */
645 *tmpc++ &= MP_MASK;
646 }
647
648 /* now copy higher words if any, that is in A+B
649 * if A or B has more digits add those in
650 */
651 if (min != max) {
652 for (; i < max; i++) {
653 /* T[i] = X[i] + U */
654 *tmpc = x->dp[i] + u;
655
656 /* U = carry bit of T[i] */
657 u = *tmpc >> ((mp_digit)DIGIT_BIT);
658
659 /* take away carry bit from T[i] */
660 *tmpc++ &= MP_MASK;
661 }
662 }
663
664 /* add carry */
665 *tmpc++ = u;
666
667 /* clear digits above oldused */
668 for (i = c->used; i < olduse; i++) {
669 *tmpc++ = 0;
670 }
671 }
672
673 mp_clamp (c);
674 return MP_OKAY;
675 }
676
677 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
678 static int
s_mp_sub(const mp_int * a,const mp_int * b,mp_int * c)679 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
680 {
681 int olduse, res, min, max;
682
683 /* find sizes */
684 min = b->used;
685 max = a->used;
686
687 /* init result */
688 if (c->alloc < max) {
689 if ((res = mp_grow (c, max)) != MP_OKAY) {
690 return res;
691 }
692 }
693 olduse = c->used;
694 c->used = max;
695
696 {
697 const mp_digit *tmpa, *tmpb;
698 mp_digit u, *tmpc;
699 int i;
700
701 /* alias for digit pointers */
702 tmpa = a->dp;
703 tmpb = b->dp;
704 tmpc = c->dp;
705
706 /* set carry to zero */
707 u = 0;
708 for (i = 0; i < min; i++) {
709 /* T[i] = A[i] - B[i] - U */
710 *tmpc = *tmpa++ - *tmpb++ - u;
711
712 /* U = carry bit of T[i]
713 * Note this saves performing an AND operation since
714 * if a carry does occur it will propagate all the way to the
715 * MSB. As a result a single shift is enough to get the carry
716 */
717 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
718
719 /* Clear carry from T[i] */
720 *tmpc++ &= MP_MASK;
721 }
722
723 /* now copy higher words if any, e.g. if A has more digits than B */
724 for (; i < max; i++) {
725 /* T[i] = A[i] - U */
726 *tmpc = *tmpa++ - u;
727
728 /* U = carry bit of T[i] */
729 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
730
731 /* Clear carry from T[i] */
732 *tmpc++ &= MP_MASK;
733 }
734
735 /* clear digits above used (since we may not have grown result above) */
736 for (i = c->used; i < olduse; i++) {
737 *tmpc++ = 0;
738 }
739 }
740
741 mp_clamp (c);
742 return MP_OKAY;
743 }
744
745 /* high level subtraction (handles signs) */
746 static int
mp_sub(const mp_int * a,const mp_int * b,mp_int * c)747 mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
748 {
749 int sa, sb, res;
750
751 sa = a->sign;
752 sb = b->sign;
753
754 if (sa != sb) {
755 /* subtract a negative from a positive, OR */
756 /* subtract a positive from a negative. */
757 /* In either case, ADD their magnitudes, */
758 /* and use the sign of the first number. */
759 c->sign = sa;
760 res = s_mp_add (a, b, c);
761 } else {
762 /* subtract a positive from a positive, OR */
763 /* subtract a negative from a negative. */
764 /* First, take the difference between their */
765 /* magnitudes, then... */
766 if (mp_cmp_mag (a, b) != MP_LT) {
767 /* Copy the sign from the first */
768 c->sign = sa;
769 /* The first has a larger or equal magnitude */
770 res = s_mp_sub (a, b, c);
771 } else {
772 /* The result has the *opposite* sign from */
773 /* the first number. */
774 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
775 /* The second has a larger magnitude */
776 res = s_mp_sub (b, a, c);
777 }
778 }
779 return res;
780 }
781
782 /* shift right a certain amount of digits */
mp_rshd(mp_int * a,int b)783 static int mp_rshd (mp_int * a, int b)
784 {
785 int x;
786
787 /* if b <= 0 then ignore it */
788 if (b <= 0) {
789 return 0;
790 }
791
792 /* if b > used then simply zero it and return */
793 if (a->used <= b) {
794 mp_zero (a);
795 return 0;
796 }
797
798 {
799 mp_digit *bottom, *top;
800
801 /* shift the digits down */
802
803 /* bottom */
804 bottom = a->dp;
805
806 /* top [offset into digits] */
807 top = a->dp + b;
808
809 /* this is implemented as a sliding window where
810 * the window is b-digits long and digits from
811 * the top of the window are copied to the bottom
812 *
813 * e.g.
814
815 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
816 /\ | ---->
817 \-------------------/ ---->
818 */
819 for (x = 0; x < (a->used - b); x++) {
820 *bottom++ = *top++;
821 }
822
823 /* zero the top digits */
824 for (; x < a->used; x++) {
825 *bottom++ = 0;
826 }
827 }
828
829 /* remove excess digits */
830 a->used -= b;
831 return 1;
832 }
833
834 /* multiply by a digit */
835 static int
mp_mul_d(const mp_int * a,mp_digit b,mp_int * c)836 mp_mul_d (const mp_int * a, mp_digit b, mp_int * c)
837 {
838 const mp_digit *tmpa;
839 mp_digit u, *tmpc;
840 mp_word r;
841 int ix, res, olduse;
842
843 /* make sure c is big enough to hold a*b */
844 if (c->alloc < a->used + 1) {
845 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
846 return res;
847 }
848 }
849
850 /* get the original destinations used count */
851 olduse = c->used;
852
853 /* set the sign */
854 c->sign = a->sign;
855
856 /* alias for a->dp [source] */
857 tmpa = a->dp;
858
859 /* alias for c->dp [dest] */
860 tmpc = c->dp;
861
862 /* zero carry */
863 u = 0;
864
865 /* compute columns */
866 for (ix = 0; ix < a->used; ix++) {
867 /* compute product and carry sum for this term */
868 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
869
870 /* mask off higher bits to get a single digit */
871 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
872
873 /* send carry into next iteration */
874 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
875 }
876
877 /* store final carry [if any] and increment ix offset */
878 *tmpc++ = u;
879 ++ix;
880
881 /* now zero digits above the top */
882 while (ix++ < olduse) {
883 *tmpc++ = 0;
884 }
885
886 /* set used count */
887 c->used = a->used + 1;
888 mp_clamp(c);
889
890 return MP_OKAY;
891 }
892
893 /* high level addition (handles signs) */
mp_add(const mp_int * a,const mp_int * b,mp_int * c)894 static int mp_add (const mp_int * a, const mp_int * b, mp_int * c)
895 {
896 int sa, sb, res;
897
898 /* get sign of both inputs */
899 sa = a->sign;
900 sb = b->sign;
901
902 /* handle two cases, not four */
903 if (sa == sb) {
904 /* both positive or both negative */
905 /* add their magnitudes, copy the sign */
906 c->sign = sa;
907 res = s_mp_add (a, b, c);
908 } else {
909 /* one positive, the other negative */
910 /* subtract the one with the greater magnitude from */
911 /* the one of the lesser magnitude. The result gets */
912 /* the sign of the one with the greater magnitude. */
913 if (mp_cmp_mag (a, b) == MP_LT) {
914 c->sign = sb;
915 res = s_mp_sub (b, a, c);
916 } else {
917 c->sign = sa;
918 res = s_mp_sub (a, b, c);
919 }
920 }
921 return res;
922 }
923
924 /* swap the elements of two integers, for cases where you can't simply swap the
925 * mp_int pointers around
926 */
927 static void
mp_exch(mp_int * a,mp_int * b)928 mp_exch(mp_int *a, mp_int *b)
929 {
930 mp_int t;
931
932 t = *a;
933 *a = *b;
934 *b = t;
935 }
936
937 /* calc a value mod 2**b */
938 static int
mp_mod_2d(const mp_int * a,int b,mp_int * c)939 mp_mod_2d (const mp_int * a, int b, mp_int * c)
940 {
941 int x, res;
942
943 /* if b is <= 0 then zero the int */
944 if (b <= 0) {
945 mp_zero (c);
946 return MP_OKAY;
947 }
948
949 /* if the modulus is larger than the value than return */
950 if (b >= (int) (a->used * DIGIT_BIT)) {
951 res = mp_copy (a, c);
952 return res;
953 }
954
955 /* copy */
956 if ((res = mp_copy (a, c)) != MP_OKAY) {
957 return res;
958 }
959
960 /* zero digits above the last digit of the modulus */
961 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
962 c->dp[x] = 0;
963 }
964 /* clear the digit that is not completely outside/inside the modulus */
965 c->dp[b / DIGIT_BIT] &=
966 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
967 mp_clamp (c);
968 return MP_OKAY;
969 }
970
971 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
mp_div_2d(const mp_int * a,int b,mp_int * c,mp_int * d)972 static int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d)
973 {
974 mp_digit D, r, rr;
975 int x, res;
976 mp_int t;
977
978
979 /* if the shift count is <= 0 then we do no work */
980 if (b <= 0) {
981 res = mp_copy (a, c);
982 if (d != NULL) {
983 mp_zero (d);
984 }
985 return res;
986 }
987
988 if ((res = mp_init (&t)) != MP_OKAY) {
989 return res;
990 }
991
992 /* get the remainder */
993 if (d != NULL) {
994 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
995 mp_clear (&t);
996 return res;
997 }
998 }
999
1000 /* copy */
1001 if ((res = mp_copy (a, c)) != MP_OKAY) {
1002 mp_clear (&t);
1003 return res;
1004 }
1005
1006 /* shift by as many digits in the bit count */
1007 if (b >= (int)DIGIT_BIT) {
1008 mp_rshd (c, b / DIGIT_BIT);
1009 }
1010
1011 /* shift any bit count < DIGIT_BIT */
1012 D = (mp_digit) (b % DIGIT_BIT);
1013 if (D != 0) {
1014 mp_digit *tmpc, mask, shift;
1015
1016 /* mask */
1017 mask = (((mp_digit)1) << D) - 1;
1018
1019 /* shift for lsb */
1020 shift = DIGIT_BIT - D;
1021
1022 /* alias */
1023 tmpc = c->dp + (c->used - 1);
1024
1025 /* carry */
1026 r = 0;
1027 for (x = c->used - 1; x >= 0; x--) {
1028 /* get the lower bits of this word in a temp */
1029 rr = *tmpc & mask;
1030
1031 /* shift the current word and mix in the carry bits from the previous word */
1032 *tmpc = (*tmpc >> D) | (r << shift);
1033 --tmpc;
1034
1035 /* set the carry to the carry bits of the current word found above */
1036 r = rr;
1037 }
1038 }
1039 mp_clamp (c);
1040 if (d != NULL) {
1041 mp_exch (&t, d);
1042 }
1043 mp_clear (&t);
1044 return MP_OKAY;
1045 }
1046
1047 /* integer signed division.
1048 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1049 * HAC pp.598 Algorithm 14.20
1050 *
1051 * Note that the description in HAC is horribly
1052 * incomplete. For example, it doesn't consider
1053 * the case where digits are removed from 'x' in
1054 * the inner loop. It also doesn't consider the
1055 * case that y has fewer than three digits, etc..
1056 *
1057 * The overall algorithm is as described as
1058 * 14.20 from HAC but fixed to treat these cases.
1059 */
1060 static int
mp_div(mp_int * c,mp_int * d,const mp_int * a,const mp_int * b)1061 mp_div(mp_int *c, mp_int *d, const mp_int *a, const mp_int *b)
1062 {
1063 mp_int q, x, y, t1, t2;
1064 int res, n, t, i, norm, neg;
1065
1066 /* is divisor zero ? */
1067 if (BN_is_zero (b) == 1) {
1068 return MP_VAL;
1069 }
1070
1071 /* if a < b then q=0, r = a */
1072 if (mp_cmp_mag (a, b) == MP_LT) {
1073 if (d != NULL) {
1074 res = mp_copy (a, d);
1075 } else {
1076 res = MP_OKAY;
1077 }
1078 if (c != NULL) {
1079 mp_zero (c);
1080 }
1081 return res;
1082 }
1083
1084 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1085 return res;
1086 }
1087 q.used = a->used + 2;
1088
1089 if ((res = mp_init (&t1)) != MP_OKAY) {
1090 goto LBL_Q;
1091 }
1092
1093 if ((res = mp_init (&t2)) != MP_OKAY) {
1094 goto LBL_T1;
1095 }
1096
1097 if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1098 goto LBL_T2;
1099 }
1100
1101 if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1102 goto LBL_X;
1103 }
1104
1105 /* fix the sign */
1106 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1107 x.sign = y.sign = MP_ZPOS;
1108
1109 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1110 norm = mp_count_bits(&y) % DIGIT_BIT;
1111 if (norm < (int)(DIGIT_BIT-1)) {
1112 norm = (DIGIT_BIT-1) - norm;
1113 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1114 goto LBL_Y;
1115 }
1116 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1117 goto LBL_Y;
1118 }
1119 } else {
1120 norm = 0;
1121 }
1122
1123 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1124 n = x.used - 1;
1125 t = y.used - 1;
1126
1127 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1128 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1129 goto LBL_Y;
1130 }
1131
1132 while (mp_cmp (&x, &y) != MP_LT) {
1133 ++(q.dp[n - t]);
1134 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1135 goto LBL_Y;
1136 }
1137 }
1138
1139 /* reset y by shifting it back down */
1140 mp_rshd (&y, n - t);
1141
1142 /* step 3. for i from n down to (t + 1) */
1143 for (i = n; i >= (t + 1); i--) {
1144 if (i > x.used) {
1145 continue;
1146 }
1147
1148 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1149 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1150 if (x.dp[i] == y.dp[t]) {
1151 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1152 } else {
1153 mp_word tmp;
1154 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1155 tmp |= ((mp_word) x.dp[i - 1]);
1156 tmp /= ((mp_word) y.dp[t]);
1157 if (tmp > (mp_word) MP_MASK)
1158 tmp = MP_MASK;
1159 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1160 }
1161
1162 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1163 xi * b**2 + xi-1 * b + xi-2
1164
1165 do q{i-t-1} -= 1;
1166 */
1167 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1168 do {
1169 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1170
1171 /* find left hand */
1172 mp_zero (&t1);
1173 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1174 t1.dp[1] = y.dp[t];
1175 t1.used = 2;
1176 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1177 goto LBL_Y;
1178 }
1179
1180 /* find right hand */
1181 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1182 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1183 t2.dp[2] = x.dp[i];
1184 t2.used = 3;
1185 } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1186
1187 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1188 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1189 goto LBL_Y;
1190 }
1191
1192 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1193 goto LBL_Y;
1194 }
1195
1196 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1197 goto LBL_Y;
1198 }
1199
1200 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1201 if (x.sign == MP_NEG) {
1202 if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1203 goto LBL_Y;
1204 }
1205 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1206 goto LBL_Y;
1207 }
1208 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1209 goto LBL_Y;
1210 }
1211
1212 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1213 }
1214 }
1215
1216 /* now q is the quotient and x is the remainder
1217 * [which we have to normalize]
1218 */
1219
1220 /* get sign before writing to c */
1221 x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1222
1223 if (c != NULL) {
1224 mp_clamp (&q);
1225 mp_exch (&q, c);
1226 c->sign = neg;
1227 }
1228
1229 if (d != NULL) {
1230 mp_div_2d (&x, norm, &x, NULL);
1231 mp_exch (&x, d);
1232 }
1233
1234 res = MP_OKAY;
1235
1236 LBL_Y:mp_clear (&y);
1237 LBL_X:mp_clear (&x);
1238 LBL_T2:mp_clear (&t2);
1239 LBL_T1:mp_clear (&t1);
1240 LBL_Q:mp_clear (&q);
1241 return res;
1242 }
1243
1244 /* c = a mod b, 0 <= c < b */
1245 static int
mp_mod(const mp_int * a,const mp_int * b,mp_int * c)1246 mp_mod (const mp_int * a, const mp_int * b, mp_int * c)
1247 {
1248 mp_int t;
1249 int res;
1250
1251 if ((res = mp_init (&t)) != MP_OKAY) {
1252 return res;
1253 }
1254
1255 if ((res = mp_div (NULL, &t, a, b)) != MP_OKAY) {
1256 mp_clear (&t);
1257 return res;
1258 }
1259
1260 if (t.sign != b->sign) {
1261 res = mp_add (b, &t, c);
1262 } else {
1263 res = MP_OKAY;
1264 mp_exch (&t, c);
1265 }
1266
1267 mp_clear (&t);
1268 return res;
1269 }
1270
1271 /* set to a digit */
mp_set(mp_int * a,mp_digit b)1272 static void mp_set (mp_int * a, mp_digit b)
1273 {
1274 mp_zero (a);
1275 a->dp[0] = b & MP_MASK;
1276 a->used = (a->dp[0] != 0) ? 1 : 0;
1277 }
1278
1279 /* b = a/2 */
mp_div_2(const mp_int * a,mp_int * b)1280 static int mp_div_2(const mp_int * a, mp_int * b)
1281 {
1282 int x, res, oldused;
1283
1284 /* copy */
1285 if (b->alloc < a->used) {
1286 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1287 return res;
1288 }
1289 }
1290
1291 oldused = b->used;
1292 b->used = a->used;
1293 {
1294 mp_digit r, rr, *tmpa, *tmpb;
1295
1296 /* source alias */
1297 tmpa = a->dp + b->used - 1;
1298
1299 /* dest alias */
1300 tmpb = b->dp + b->used - 1;
1301
1302 /* carry */
1303 r = 0;
1304 for (x = b->used - 1; x >= 0; x--) {
1305 /* get the carry for the next iteration */
1306 rr = *tmpa & 1;
1307
1308 /* shift the current digit, add in carry and store */
1309 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1310
1311 /* forward carry to next iteration */
1312 r = rr;
1313 }
1314
1315 /* zero excess digits */
1316 tmpb = b->dp + b->used;
1317 for (x = b->used; x < oldused; x++) {
1318 *tmpb++ = 0;
1319 }
1320 }
1321 b->sign = a->sign;
1322 mp_clamp (b);
1323 return MP_OKAY;
1324 }
1325
1326 /* compare a digit */
mp_cmp_d(const mp_int * a,mp_digit b)1327 static int mp_cmp_d(const mp_int * a, mp_digit b)
1328 {
1329 /* compare based on sign */
1330 if (a->sign == MP_NEG) {
1331 return MP_LT;
1332 }
1333
1334 /* compare based on magnitude */
1335 if (a->used > 1) {
1336 return MP_GT;
1337 }
1338
1339 /* compare the only digit of a to b */
1340 if (a->dp[0] > b) {
1341 return MP_GT;
1342 } else if (a->dp[0] < b) {
1343 return MP_LT;
1344 } else {
1345 return MP_EQ;
1346 }
1347 }
1348
mp_clear_multi(mp_int * mp,...)1349 static void mp_clear_multi(mp_int *mp, ...)
1350 {
1351 mp_int* next_mp = mp;
1352 va_list args;
1353 va_start(args, mp);
1354 while (next_mp != NULL) {
1355 mp_clear(next_mp);
1356 next_mp = va_arg(args, mp_int*);
1357 }
1358 va_end(args);
1359 }
1360
1361 /* computes the modular inverse via binary extended euclidean algorithm,
1362 * that is c = 1/a mod b
1363 *
1364 * Based on slow invmod except this is optimized for the case where b is
1365 * odd as per HAC Note 14.64 on pp. 610
1366 */
1367 static int
fast_mp_invmod(const mp_int * a,const mp_int * b,mp_int * c)1368 fast_mp_invmod (const mp_int * a, const mp_int * b, mp_int * c)
1369 {
1370 mp_int x, y, u, v, B, D;
1371 int res, neg;
1372
1373 /* 2. [modified] b must be odd */
1374 if (BN_is_even (b) == 1) {
1375 return MP_VAL;
1376 }
1377
1378 /* init all our temps */
1379 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
1380 return res;
1381 }
1382
1383 /* x == modulus, y == value to invert */
1384 if ((res = mp_copy (b, &x)) != MP_OKAY) {
1385 goto LBL_ERR;
1386 }
1387
1388 /* we need y = |a| */
1389 if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
1390 goto LBL_ERR;
1391 }
1392
1393 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1394 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1395 goto LBL_ERR;
1396 }
1397 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1398 goto LBL_ERR;
1399 }
1400 mp_set (&D, (unsigned long)1);
1401
1402 top:
1403 /* 4. while u is even do */
1404 while (BN_is_even (&u) == 1) {
1405 /* 4.1 u = u/2 */
1406 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1407 goto LBL_ERR;
1408 }
1409 /* 4.2 if B is odd then */
1410 if (BN_is_odd (&B) == 1) {
1411 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1412 goto LBL_ERR;
1413 }
1414 }
1415 /* B = B/2 */
1416 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1417 goto LBL_ERR;
1418 }
1419 }
1420
1421 /* 5. while v is even do */
1422 while (BN_is_even (&v) == 1) {
1423 /* 5.1 v = v/2 */
1424 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1425 goto LBL_ERR;
1426 }
1427 /* 5.2 if D is odd then */
1428 if (BN_is_odd (&D) == 1) {
1429 /* D = (D-x)/2 */
1430 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1431 goto LBL_ERR;
1432 }
1433 }
1434 /* D = D/2 */
1435 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1436 goto LBL_ERR;
1437 }
1438 }
1439
1440 /* 6. if u >= v then */
1441 if (mp_cmp (&u, &v) != MP_LT) {
1442 /* u = u - v, B = B - D */
1443 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1444 goto LBL_ERR;
1445 }
1446
1447 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1448 goto LBL_ERR;
1449 }
1450 } else {
1451 /* v - v - u, D = D - B */
1452 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1453 goto LBL_ERR;
1454 }
1455
1456 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1457 goto LBL_ERR;
1458 }
1459 }
1460
1461 /* if not zero goto step 4 */
1462 if (BN_is_zero (&u) == 0) {
1463 goto top;
1464 }
1465
1466 /* now a = C, b = D, gcd == g*v */
1467
1468 /* if v != 1 then there is no inverse */
1469 if (mp_cmp_d (&v, (unsigned long)1) != MP_EQ) {
1470 res = MP_VAL;
1471 goto LBL_ERR;
1472 }
1473
1474 /* b is now the inverse */
1475 neg = a->sign;
1476 while (D.sign == MP_NEG) {
1477 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
1478 goto LBL_ERR;
1479 }
1480 }
1481 mp_exch (&D, c);
1482 c->sign = neg;
1483 res = MP_OKAY;
1484
1485 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
1486 return res;
1487 }
1488
1489 /* hac 14.61, pp608 */
1490 static int
mp_invmod_slow(const mp_int * a,const mp_int * b,mp_int * c)1491 mp_invmod_slow (const mp_int * a, const mp_int * b, mp_int * c)
1492 {
1493 mp_int x, y, u, v, A, B, C, D;
1494 int res;
1495
1496 /* b cannot be negative */
1497 if (b->sign == MP_NEG || BN_is_zero(b) == 1) {
1498 return MP_VAL;
1499 }
1500
1501 /* init temps */
1502 if ((res = mp_init_multi(&x, &y, &u, &v,
1503 &A, &B, &C, &D, NULL)) != MP_OKAY) {
1504 return res;
1505 }
1506
1507 /* x = a, y = b */
1508 if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
1509 goto LBL_ERR;
1510 }
1511 if ((res = mp_copy (b, &y)) != MP_OKAY) {
1512 goto LBL_ERR;
1513 }
1514
1515 /* 2. [modified] if x,y are both even then return an error! */
1516 if (BN_is_even (&x) == 1 && BN_is_even (&y) == 1) {
1517 res = MP_VAL;
1518 goto LBL_ERR;
1519 }
1520
1521 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1522 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1523 goto LBL_ERR;
1524 }
1525 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1526 goto LBL_ERR;
1527 }
1528 mp_set (&A, (unsigned long)1);
1529 mp_set (&D, (unsigned long)1);
1530
1531 top:
1532 /* 4. while u is even do */
1533 while (BN_is_even (&u) == 1) {
1534 /* 4.1 u = u/2 */
1535 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1536 goto LBL_ERR;
1537 }
1538 /* 4.2 if A or B is odd then */
1539 if (BN_is_odd (&A) == 1 || BN_is_odd (&B) == 1) {
1540 /* A = (A+y)/2, B = (B-x)/2 */
1541 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
1542 goto LBL_ERR;
1543 }
1544 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1545 goto LBL_ERR;
1546 }
1547 }
1548 /* A = A/2, B = B/2 */
1549 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
1550 goto LBL_ERR;
1551 }
1552 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1553 goto LBL_ERR;
1554 }
1555 }
1556
1557 /* 5. while v is even do */
1558 while (BN_is_even (&v) == 1) {
1559 /* 5.1 v = v/2 */
1560 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1561 goto LBL_ERR;
1562 }
1563 /* 5.2 if C or D is odd then */
1564 if (BN_is_odd (&C) == 1 || BN_is_odd (&D) == 1) {
1565 /* C = (C+y)/2, D = (D-x)/2 */
1566 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
1567 goto LBL_ERR;
1568 }
1569 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1570 goto LBL_ERR;
1571 }
1572 }
1573 /* C = C/2, D = D/2 */
1574 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
1575 goto LBL_ERR;
1576 }
1577 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1578 goto LBL_ERR;
1579 }
1580 }
1581
1582 /* 6. if u >= v then */
1583 if (mp_cmp (&u, &v) != MP_LT) {
1584 /* u = u - v, A = A - C, B = B - D */
1585 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1586 goto LBL_ERR;
1587 }
1588
1589 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
1590 goto LBL_ERR;
1591 }
1592
1593 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1594 goto LBL_ERR;
1595 }
1596 } else {
1597 /* v - v - u, C = C - A, D = D - B */
1598 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1599 goto LBL_ERR;
1600 }
1601
1602 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
1603 goto LBL_ERR;
1604 }
1605
1606 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1607 goto LBL_ERR;
1608 }
1609 }
1610
1611 /* if not zero goto step 4 */
1612 if (BN_is_zero (&u) == 0)
1613 goto top;
1614
1615 /* now a = C, b = D, gcd == g*v */
1616
1617 /* if v != 1 then there is no inverse */
1618 if (mp_cmp_d (&v, 1) != MP_EQ) {
1619 res = MP_VAL;
1620 goto LBL_ERR;
1621 }
1622
1623 /* if its too low */
1624 while (mp_cmp_d(&C, 0) == MP_LT) {
1625 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
1626 goto LBL_ERR;
1627 }
1628 }
1629
1630 /* too big */
1631 while (mp_cmp_mag(&C, b) != MP_LT) {
1632 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
1633 goto LBL_ERR;
1634 }
1635 }
1636
1637 /* C is now the inverse */
1638 mp_exch (&C, c);
1639 res = MP_OKAY;
1640 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
1641 return res;
1642 }
1643
1644 static int
mp_invmod(mp_int * c,const mp_int * a,const mp_int * b)1645 mp_invmod(mp_int *c, const mp_int *a, const mp_int *b)
1646 {
1647 /* b cannot be negative */
1648 if (b->sign == MP_NEG || BN_is_zero(b) == 1) {
1649 return MP_VAL;
1650 }
1651
1652 /* if the modulus is odd we can use a faster routine instead */
1653 if (BN_is_odd (b) == 1) {
1654 return fast_mp_invmod(a, b, c);
1655 }
1656
1657 return mp_invmod_slow(a, b, c);
1658
1659 /*NOTREACHED*/
1660 return MP_VAL;
1661 }
1662
1663 /* b = |a|
1664 *
1665 * Simple function copies the input and fixes the sign to positive
1666 */
1667 static int
mp_abs(const mp_int * a,mp_int * b)1668 mp_abs (const mp_int * a, mp_int * b)
1669 {
1670 int res;
1671
1672 /* copy a to b */
1673 if (a != b) {
1674 if ((res = mp_copy (a, b)) != MP_OKAY) {
1675 return res;
1676 }
1677 }
1678
1679 /* force the sign of b to positive */
1680 b->sign = MP_ZPOS;
1681
1682 return MP_OKAY;
1683 }
1684
1685 /* determines if reduce_2k_l can be used */
mp_reduce_is_2k_l(const mp_int * a)1686 static int mp_reduce_is_2k_l(const mp_int *a)
1687 {
1688 int ix, iy;
1689
1690 if (a->used == 0) {
1691 return MP_NO;
1692 } else if (a->used == 1) {
1693 return MP_YES;
1694 } else if (a->used > 1) {
1695 /* if more than half of the digits are -1 we're sold */
1696 for (iy = ix = 0; ix < a->used; ix++) {
1697 if (a->dp[ix] == MP_MASK) {
1698 ++iy;
1699 }
1700 }
1701 return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1702
1703 }
1704 return MP_NO;
1705 }
1706
1707 /* computes a = 2**b
1708 *
1709 * Simple algorithm which zeroes the int, grows it then just sets one bit
1710 * as required.
1711 */
1712 static int
mp_2expt(mp_int * a,int b)1713 mp_2expt (mp_int * a, int b)
1714 {
1715 int res;
1716
1717 /* zero a as per default */
1718 mp_zero (a);
1719
1720 /* grow a to accomodate the single bit */
1721 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
1722 return res;
1723 }
1724
1725 /* set the used count of where the bit will go */
1726 a->used = b / DIGIT_BIT + 1;
1727
1728 /* put the single bit in its place */
1729 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
1730
1731 return MP_OKAY;
1732 }
1733
1734 /* pre-calculate the value required for Barrett reduction
1735 * For a given modulus "b" it calulates the value required in "a"
1736 */
mp_reduce_setup(mp_int * a,mp_int * b)1737 static int mp_reduce_setup (mp_int * a, mp_int * b)
1738 {
1739 int res;
1740
1741 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
1742 return res;
1743 }
1744 return mp_div (a, NULL, a, b);
1745 }
1746
1747 /* b = a*2 */
mp_mul_2(mp_int * a,mp_int * b)1748 static int mp_mul_2(mp_int * a, mp_int * b)
1749 {
1750 int x, res, oldused;
1751
1752 /* grow to accomodate result */
1753 if (b->alloc < a->used + 1) {
1754 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
1755 return res;
1756 }
1757 }
1758
1759 oldused = b->used;
1760 b->used = a->used;
1761
1762 {
1763 mp_digit r, rr, *tmpa, *tmpb;
1764
1765 /* alias for source */
1766 tmpa = a->dp;
1767
1768 /* alias for dest */
1769 tmpb = b->dp;
1770
1771 /* carry */
1772 r = 0;
1773 for (x = 0; x < a->used; x++) {
1774
1775 /* get what will be the *next* carry bit from the
1776 * MSB of the current digit
1777 */
1778 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
1779
1780 /* now shift up this digit, add in the carry [from the previous] */
1781 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
1782
1783 /* copy the carry that would be from the source
1784 * digit into the next iteration
1785 */
1786 r = rr;
1787 }
1788
1789 /* new leading digit? */
1790 if (r != 0) {
1791 /* add a MSB which is always 1 at this point */
1792 *tmpb = 1;
1793 ++(b->used);
1794 }
1795
1796 /* now zero any excess digits on the destination
1797 * that we didn't write to
1798 */
1799 tmpb = b->dp + b->used;
1800 for (x = b->used; x < oldused; x++) {
1801 *tmpb++ = 0;
1802 }
1803 }
1804 b->sign = a->sign;
1805 return MP_OKAY;
1806 }
1807
1808 /* divide by three (based on routine from MPI and the GMP manual) */
1809 static int
mp_div_3(mp_int * a,mp_int * c,mp_digit * d)1810 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
1811 {
1812 mp_int q;
1813 mp_word w, t;
1814 mp_digit b;
1815 int res, ix;
1816
1817 /* b = 2**DIGIT_BIT / 3 */
1818 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
1819
1820 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1821 return res;
1822 }
1823
1824 q.used = a->used;
1825 q.sign = a->sign;
1826 w = 0;
1827 for (ix = a->used - 1; ix >= 0; ix--) {
1828 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1829
1830 if (w >= 3) {
1831 /* multiply w by [1/3] */
1832 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
1833
1834 /* now subtract 3 * [w/3] from w, to get the remainder */
1835 w -= t+t+t;
1836
1837 /* fixup the remainder as required since
1838 * the optimization is not exact.
1839 */
1840 while (w >= 3) {
1841 t += 1;
1842 w -= 3;
1843 }
1844 } else {
1845 t = 0;
1846 }
1847 q.dp[ix] = (mp_digit)t;
1848 }
1849
1850 /* [optional] store the remainder */
1851 if (d != NULL) {
1852 *d = (mp_digit)w;
1853 }
1854
1855 /* [optional] store the quotient */
1856 if (c != NULL) {
1857 mp_clamp(&q);
1858 mp_exch(&q, c);
1859 }
1860 mp_clear(&q);
1861
1862 return res;
1863 }
1864
1865 /* multiplication using the Toom-Cook 3-way algorithm
1866 *
1867 * Much more complicated than Karatsuba but has a lower
1868 * asymptotic running time of O(N**1.464). This algorithm is
1869 * only particularly useful on VERY large inputs
1870 * (we're talking 1000s of digits here...).
1871 */
mp_toom_mul(mp_int * a,mp_int * b,mp_int * c)1872 static int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
1873 {
1874 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
1875 int res, B;
1876
1877 /* init temps */
1878 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
1879 &a0, &a1, &a2, &b0, &b1,
1880 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
1881 return res;
1882 }
1883
1884 /* B */
1885 B = MIN(a->used, b->used) / 3;
1886
1887 /* a = a2 * B**2 + a1 * B + a0 */
1888 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
1889 goto ERR;
1890 }
1891
1892 if ((res = mp_copy(a, &a1)) != MP_OKAY) {
1893 goto ERR;
1894 }
1895 mp_rshd(&a1, B);
1896 mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
1897
1898 if ((res = mp_copy(a, &a2)) != MP_OKAY) {
1899 goto ERR;
1900 }
1901 mp_rshd(&a2, B*2);
1902
1903 /* b = b2 * B**2 + b1 * B + b0 */
1904 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
1905 goto ERR;
1906 }
1907
1908 if ((res = mp_copy(b, &b1)) != MP_OKAY) {
1909 goto ERR;
1910 }
1911 mp_rshd(&b1, B);
1912 mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
1913
1914 if ((res = mp_copy(b, &b2)) != MP_OKAY) {
1915 goto ERR;
1916 }
1917 mp_rshd(&b2, B*2);
1918
1919 /* w0 = a0*b0 */
1920 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
1921 goto ERR;
1922 }
1923
1924 /* w4 = a2 * b2 */
1925 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
1926 goto ERR;
1927 }
1928
1929 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
1930 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
1931 goto ERR;
1932 }
1933 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
1934 goto ERR;
1935 }
1936 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
1937 goto ERR;
1938 }
1939 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
1940 goto ERR;
1941 }
1942
1943 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
1944 goto ERR;
1945 }
1946 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
1947 goto ERR;
1948 }
1949 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
1950 goto ERR;
1951 }
1952 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
1953 goto ERR;
1954 }
1955
1956 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
1957 goto ERR;
1958 }
1959
1960 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
1961 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
1962 goto ERR;
1963 }
1964 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
1965 goto ERR;
1966 }
1967 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
1968 goto ERR;
1969 }
1970 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
1971 goto ERR;
1972 }
1973
1974 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
1975 goto ERR;
1976 }
1977 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
1978 goto ERR;
1979 }
1980 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
1981 goto ERR;
1982 }
1983 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
1984 goto ERR;
1985 }
1986
1987 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
1988 goto ERR;
1989 }
1990
1991
1992 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
1993 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
1994 goto ERR;
1995 }
1996 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
1997 goto ERR;
1998 }
1999 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
2000 goto ERR;
2001 }
2002 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
2003 goto ERR;
2004 }
2005 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
2006 goto ERR;
2007 }
2008
2009 /* now solve the matrix
2010
2011 0 0 0 0 1
2012 1 2 4 8 16
2013 1 1 1 1 1
2014 16 8 4 2 1
2015 1 0 0 0 0
2016
2017 using 12 subtractions, 4 shifts,
2018 2 small divisions and 1 small multiplication
2019 */
2020
2021 /* r1 - r4 */
2022 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
2023 goto ERR;
2024 }
2025 /* r3 - r0 */
2026 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
2027 goto ERR;
2028 }
2029 /* r1/2 */
2030 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
2031 goto ERR;
2032 }
2033 /* r3/2 */
2034 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
2035 goto ERR;
2036 }
2037 /* r2 - r0 - r4 */
2038 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
2039 goto ERR;
2040 }
2041 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
2042 goto ERR;
2043 }
2044 /* r1 - r2 */
2045 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
2046 goto ERR;
2047 }
2048 /* r3 - r2 */
2049 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
2050 goto ERR;
2051 }
2052 /* r1 - 8r0 */
2053 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
2054 goto ERR;
2055 }
2056 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
2057 goto ERR;
2058 }
2059 /* r3 - 8r4 */
2060 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
2061 goto ERR;
2062 }
2063 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
2064 goto ERR;
2065 }
2066 /* 3r2 - r1 - r3 */
2067 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
2068 goto ERR;
2069 }
2070 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
2071 goto ERR;
2072 }
2073 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
2074 goto ERR;
2075 }
2076 /* r1 - r2 */
2077 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
2078 goto ERR;
2079 }
2080 /* r3 - r2 */
2081 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
2082 goto ERR;
2083 }
2084 /* r1/3 */
2085 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
2086 goto ERR;
2087 }
2088 /* r3/3 */
2089 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
2090 goto ERR;
2091 }
2092
2093 /* at this point shift W[n] by B*n */
2094 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
2095 goto ERR;
2096 }
2097 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
2098 goto ERR;
2099 }
2100 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
2101 goto ERR;
2102 }
2103 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
2104 goto ERR;
2105 }
2106
2107 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
2108 goto ERR;
2109 }
2110 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
2111 goto ERR;
2112 }
2113 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
2114 goto ERR;
2115 }
2116 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
2117 goto ERR;
2118 }
2119
2120 ERR:
2121 mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
2122 &a0, &a1, &a2, &b0, &b1,
2123 &b2, &tmp1, &tmp2, NULL);
2124 return res;
2125 }
2126
2127 #define TOOM_MUL_CUTOFF 350
2128 #define KARATSUBA_MUL_CUTOFF 80
2129
2130 /* c = |a| * |b| using Karatsuba Multiplication using
2131 * three half size multiplications
2132 *
2133 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2134 * let n represent half of the number of digits in
2135 * the min(a,b)
2136 *
2137 * a = a1 * B**n + a0
2138 * b = b1 * B**n + b0
2139 *
2140 * Then, a * b =>
2141 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
2142 *
2143 * Note that a1b1 and a0b0 are used twice and only need to be
2144 * computed once. So in total three half size (half # of
2145 * digit) multiplications are performed, a0b0, a1b1 and
2146 * (a1+b1)(a0+b0)
2147 *
2148 * Note that a multiplication of half the digits requires
2149 * 1/4th the number of single precision multiplications so in
2150 * total after one call 25% of the single precision multiplications
2151 * are saved. Note also that the call to mp_mul can end up back
2152 * in this function if the a0, a1, b0, or b1 are above the threshold.
2153 * This is known as divide-and-conquer and leads to the famous
2154 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
2155 * the standard O(N**2) that the baseline/comba methods use.
2156 * Generally though the overhead of this method doesn't pay off
2157 * until a certain size (N ~ 80) is reached.
2158 */
mp_karatsuba_mul(mp_int * a,mp_int * b,mp_int * c)2159 static int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
2160 {
2161 mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
2162 int B;
2163 int err;
2164
2165 /* default the return code to an error */
2166 err = MP_MEM;
2167
2168 /* min # of digits */
2169 B = MIN (a->used, b->used);
2170
2171 /* now divide in two */
2172 B = (int)((unsigned)B >> 1);
2173
2174 /* init copy all the temps */
2175 if (mp_init_size (&x0, B) != MP_OKAY)
2176 goto ERR;
2177 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2178 goto X0;
2179 if (mp_init_size (&y0, B) != MP_OKAY)
2180 goto X1;
2181 if (mp_init_size (&y1, b->used - B) != MP_OKAY)
2182 goto Y0;
2183
2184 /* init temps */
2185 if (mp_init_size (&t1, B * 2) != MP_OKAY)
2186 goto Y1;
2187 if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
2188 goto T1;
2189 if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
2190 goto X0Y0;
2191
2192 /* now shift the digits */
2193 x0.used = y0.used = B;
2194 x1.used = a->used - B;
2195 y1.used = b->used - B;
2196
2197 {
2198 int x;
2199 mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2200
2201 /* we copy the digits directly instead of using higher level functions
2202 * since we also need to shift the digits
2203 */
2204 tmpa = a->dp;
2205 tmpb = b->dp;
2206
2207 tmpx = x0.dp;
2208 tmpy = y0.dp;
2209 for (x = 0; x < B; x++) {
2210 *tmpx++ = *tmpa++;
2211 *tmpy++ = *tmpb++;
2212 }
2213
2214 tmpx = x1.dp;
2215 for (x = B; x < a->used; x++) {
2216 *tmpx++ = *tmpa++;
2217 }
2218
2219 tmpy = y1.dp;
2220 for (x = B; x < b->used; x++) {
2221 *tmpy++ = *tmpb++;
2222 }
2223 }
2224
2225 /* only need to clamp the lower words since by definition the
2226 * upper words x1/y1 must have a known number of digits
2227 */
2228 mp_clamp (&x0);
2229 mp_clamp (&y0);
2230
2231 /* now calc the products x0y0 and x1y1 */
2232 /* after this x0 is no longer required, free temp [x0==t2]! */
2233 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
2234 goto X1Y1; /* x0y0 = x0*y0 */
2235 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
2236 goto X1Y1; /* x1y1 = x1*y1 */
2237
2238 /* now calc x1+x0 and y1+y0 */
2239 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
2240 goto X1Y1; /* t1 = x1 - x0 */
2241 if (s_mp_add (&y1, &y0, &x0) != MP_OKAY)
2242 goto X1Y1; /* t2 = y1 - y0 */
2243 if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
2244 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */
2245
2246 /* add x0y0 */
2247 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
2248 goto X1Y1; /* t2 = x0y0 + x1y1 */
2249 if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY)
2250 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */
2251
2252 /* shift by B */
2253 if (mp_lshd (&t1, B) != MP_OKAY)
2254 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2255 if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
2256 goto X1Y1; /* x1y1 = x1y1 << 2*B */
2257
2258 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
2259 goto X1Y1; /* t1 = x0y0 + t1 */
2260 if (mp_add (&t1, &x1y1, c) != MP_OKAY)
2261 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
2262
2263 /* Algorithm succeeded set the return code to MP_OKAY */
2264 err = MP_OKAY;
2265
2266 X1Y1:mp_clear (&x1y1);
2267 X0Y0:mp_clear (&x0y0);
2268 T1:mp_clear (&t1);
2269 Y1:mp_clear (&y1);
2270 Y0:mp_clear (&y0);
2271 X1:mp_clear (&x1);
2272 X0:mp_clear (&x0);
2273 ERR:
2274 return err;
2275 }
2276
2277 /* Fast (comba) multiplier
2278 *
2279 * This is the fast column-array [comba] multiplier. It is
2280 * designed to compute the columns of the product first
2281 * then handle the carries afterwards. This has the effect
2282 * of making the nested loops that compute the columns very
2283 * simple and schedulable on super-scalar processors.
2284 *
2285 * This has been modified to produce a variable number of
2286 * digits of output so if say only a half-product is required
2287 * you don't have to compute the upper half (a feature
2288 * required for fast Barrett reduction).
2289 *
2290 * Based on Algorithm 14.12 on pp.595 of HAC.
2291 *
2292 */
fast_s_mp_mul_digs(mp_int * a,mp_int * b,mp_int * c,int digs)2293 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2294 {
2295 int olduse, res, pa, ix, iz;
2296 /*LINTED*/
2297 mp_digit W[MP_WARRAY];
2298 mp_word _W;
2299
2300 /* grow the destination as required */
2301 if (c->alloc < digs) {
2302 if ((res = mp_grow (c, digs)) != MP_OKAY) {
2303 return res;
2304 }
2305 }
2306
2307 /* number of output digits to produce */
2308 pa = MIN(digs, a->used + b->used);
2309
2310 /* clear the carry */
2311 _W = 0;
2312 for (ix = 0; ix < pa; ix++) {
2313 int tx, ty;
2314 int iy;
2315 mp_digit *tmpx, *tmpy;
2316
2317 /* get offsets into the two bignums */
2318 ty = MIN(b->used-1, ix);
2319 tx = ix - ty;
2320
2321 /* setup temp aliases */
2322 tmpx = a->dp + tx;
2323 tmpy = b->dp + ty;
2324
2325 /* this is the number of times the loop will iterrate, essentially
2326 while (tx++ < a->used && ty-- >= 0) { ... }
2327 */
2328 iy = MIN(a->used-tx, ty+1);
2329
2330 /* execute loop */
2331 for (iz = 0; iz < iy; ++iz) {
2332 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2333
2334 }
2335
2336 /* store term */
2337 W[ix] = ((mp_digit)_W) & MP_MASK;
2338
2339 /* make next carry */
2340 _W = _W >> ((mp_word)DIGIT_BIT);
2341 }
2342
2343 /* setup dest */
2344 olduse = c->used;
2345 c->used = pa;
2346
2347 {
2348 mp_digit *tmpc;
2349 tmpc = c->dp;
2350 for (ix = 0; ix < pa+1; ix++) {
2351 /* now extract the previous digit [below the carry] */
2352 *tmpc++ = W[ix];
2353 }
2354
2355 /* clear unused digits [that existed in the old copy of c] */
2356 for (; ix < olduse; ix++) {
2357 *tmpc++ = 0;
2358 }
2359 }
2360 mp_clamp (c);
2361 return MP_OKAY;
2362 }
2363
2364 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */
2365 /* Revision: 1.2 $ */
2366 /* Date: 2011/03/18 16:22:09 $ */
2367
2368
2369 /* multiplies |a| * |b| and only computes upto digs digits of result
2370 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
2371 * many digits of output are created.
2372 */
s_mp_mul_digs(mp_int * a,mp_int * b,mp_int * c,int digs)2373 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2374 {
2375 mp_int t;
2376 int res, pa, pb, ix, iy;
2377 mp_digit u;
2378 mp_word r;
2379 mp_digit tmpx, *tmpt, *tmpy;
2380
2381 /* can we use the fast multiplier? */
2382 if (((unsigned)(digs) < MP_WARRAY) &&
2383 MIN (a->used, b->used) <
2384 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2385 return fast_s_mp_mul_digs (a, b, c, digs);
2386 }
2387
2388 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
2389 return res;
2390 }
2391 t.used = digs;
2392
2393 /* compute the digits of the product directly */
2394 pa = a->used;
2395 for (ix = 0; ix < pa; ix++) {
2396 /* set the carry to zero */
2397 u = 0;
2398
2399 /* limit ourselves to making digs digits of output */
2400 pb = MIN (b->used, digs - ix);
2401
2402 /* setup some aliases */
2403 /* copy of the digit from a used within the nested loop */
2404 tmpx = a->dp[ix];
2405
2406 /* an alias for the destination shifted ix places */
2407 tmpt = t.dp + ix;
2408
2409 /* an alias for the digits of b */
2410 tmpy = b->dp;
2411
2412 /* compute the columns of the output and propagate the carry */
2413 for (iy = 0; iy < pb; iy++) {
2414 /* compute the column as a mp_word */
2415 r = ((mp_word)*tmpt) +
2416 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2417 ((mp_word) u);
2418
2419 /* the new column is the lower part of the result */
2420 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2421
2422 /* get the carry word from the result */
2423 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2424 }
2425 /* set carry if it is placed below digs */
2426 if (ix + iy < digs) {
2427 *tmpt = u;
2428 }
2429 }
2430
2431 mp_clamp (&t);
2432 mp_exch (&t, c);
2433
2434 mp_clear (&t);
2435 return MP_OKAY;
2436 }
2437
2438 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */
2439 /* Revision: 1.3 $ */
2440 /* Date: 2011/03/18 16:43:04 $ */
2441
2442 /* high level multiplication (handles sign) */
2443 static int
mp_mul(mp_int * a,mp_int * b,mp_int * c)2444 mp_mul(mp_int * a, mp_int * b, mp_int * c)
2445 {
2446 int res, neg;
2447 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2448
2449 /* use Toom-Cook? */
2450 if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
2451 res = mp_toom_mul(a, b, c);
2452 } else
2453 /* use Karatsuba? */
2454 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
2455 res = mp_karatsuba_mul (a, b, c);
2456 } else
2457 {
2458 /* can we use the fast multiplier?
2459 *
2460 * The fast multiplier can be used if the output will
2461 * have less than MP_WARRAY digits and the number of
2462 * digits won't affect carry propagation
2463 */
2464 int digs = a->used + b->used + 1;
2465
2466 if (((unsigned)digs < MP_WARRAY) &&
2467 MIN(a->used, b->used) <=
2468 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2469 res = fast_s_mp_mul_digs (a, b, c, digs);
2470 } else
2471 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2472
2473 }
2474 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2475 return res;
2476 }
2477
2478 /* this is a modified version of fast_s_mul_digs that only produces
2479 * output digits *above* digs. See the comments for fast_s_mul_digs
2480 * to see how it works.
2481 *
2482 * This is used in the Barrett reduction since for one of the multiplications
2483 * only the higher digits were needed. This essentially halves the work.
2484 *
2485 * Based on Algorithm 14.12 on pp.595 of HAC.
2486 */
2487 static int
fast_s_mp_mul_high_digs(mp_int * a,mp_int * b,mp_int * c,int digs)2488 fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2489 {
2490 int olduse, res, pa, ix, iz;
2491 mp_digit W[MP_WARRAY];
2492 mp_word _W;
2493
2494 /* grow the destination as required */
2495 pa = a->used + b->used;
2496 if (c->alloc < pa) {
2497 if ((res = mp_grow (c, pa)) != MP_OKAY) {
2498 return res;
2499 }
2500 }
2501
2502 /* number of output digits to produce */
2503 pa = a->used + b->used;
2504 _W = 0;
2505 for (ix = digs; ix < pa; ix++) {
2506 int tx, ty, iy;
2507 mp_digit *tmpx, *tmpy;
2508
2509 /* get offsets into the two bignums */
2510 ty = MIN(b->used-1, ix);
2511 tx = ix - ty;
2512
2513 /* setup temp aliases */
2514 tmpx = a->dp + tx;
2515 tmpy = b->dp + ty;
2516
2517 /* this is the number of times the loop will iterrate, essentially its
2518 while (tx++ < a->used && ty-- >= 0) { ... }
2519 */
2520 iy = MIN(a->used-tx, ty+1);
2521
2522 /* execute loop */
2523 for (iz = 0; iz < iy; iz++) {
2524 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2525 }
2526
2527 /* store term */
2528 W[ix] = ((mp_digit)_W) & MP_MASK;
2529
2530 /* make next carry */
2531 _W = _W >> ((mp_word)DIGIT_BIT);
2532 }
2533
2534 /* setup dest */
2535 olduse = c->used;
2536 c->used = pa;
2537
2538 {
2539 mp_digit *tmpc;
2540
2541 tmpc = c->dp + digs;
2542 for (ix = digs; ix < pa; ix++) {
2543 /* now extract the previous digit [below the carry] */
2544 *tmpc++ = W[ix];
2545 }
2546
2547 /* clear unused digits [that existed in the old copy of c] */
2548 for (; ix < olduse; ix++) {
2549 *tmpc++ = 0;
2550 }
2551 }
2552 mp_clamp (c);
2553 return MP_OKAY;
2554 }
2555
2556 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */
2557 /* Revision: 1.1.1.1 $ */
2558 /* Date: 2011/03/12 22:58:18 $ */
2559
2560 /* multiplies |a| * |b| and does not compute the lower digs digits
2561 * [meant to get the higher part of the product]
2562 */
2563 static int
s_mp_mul_high_digs(mp_int * a,mp_int * b,mp_int * c,int digs)2564 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2565 {
2566 mp_int t;
2567 int res, pa, pb, ix, iy;
2568 mp_digit u;
2569 mp_word r;
2570 mp_digit tmpx, *tmpt, *tmpy;
2571
2572 /* can we use the fast multiplier? */
2573 if (((unsigned)(a->used + b->used + 1) < MP_WARRAY)
2574 && MIN (a->used, b->used) < (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2575 return fast_s_mp_mul_high_digs (a, b, c, digs);
2576 }
2577
2578 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
2579 return res;
2580 }
2581 t.used = a->used + b->used + 1;
2582
2583 pa = a->used;
2584 pb = b->used;
2585 for (ix = 0; ix < pa; ix++) {
2586 /* clear the carry */
2587 u = 0;
2588
2589 /* left hand side of A[ix] * B[iy] */
2590 tmpx = a->dp[ix];
2591
2592 /* alias to the address of where the digits will be stored */
2593 tmpt = &(t.dp[digs]);
2594
2595 /* alias for where to read the right hand side from */
2596 tmpy = b->dp + (digs - ix);
2597
2598 for (iy = digs - ix; iy < pb; iy++) {
2599 /* calculate the double precision result */
2600 r = ((mp_word)*tmpt) +
2601 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2602 ((mp_word) u);
2603
2604 /* get the lower part */
2605 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2606
2607 /* carry the carry */
2608 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2609 }
2610 *tmpt = u;
2611 }
2612 mp_clamp (&t);
2613 mp_exch (&t, c);
2614 mp_clear (&t);
2615 return MP_OKAY;
2616 }
2617
2618 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */
2619 /* Revision: 1.3 $ */
2620 /* Date: 2011/03/18 16:43:04 $ */
2621
2622 /* reduces x mod m, assumes 0 < x < m**2, mu is
2623 * precomputed via mp_reduce_setup.
2624 * From HAC pp.604 Algorithm 14.42
2625 */
2626 static int
mp_reduce(mp_int * x,mp_int * m,mp_int * mu)2627 mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
2628 {
2629 mp_int q;
2630 int res, um = m->used;
2631
2632 /* q = x */
2633 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
2634 return res;
2635 }
2636
2637 /* q1 = x / b**(k-1) */
2638 mp_rshd (&q, um - 1);
2639
2640 /* according to HAC this optimization is ok */
2641 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
2642 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
2643 goto CLEANUP;
2644 }
2645 } else {
2646 if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
2647 goto CLEANUP;
2648 }
2649 }
2650
2651 /* q3 = q2 / b**(k+1) */
2652 mp_rshd (&q, um + 1);
2653
2654 /* x = x mod b**(k+1), quick (no division) */
2655 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
2656 goto CLEANUP;
2657 }
2658
2659 /* q = q * m mod b**(k+1), quick (no division) */
2660 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
2661 goto CLEANUP;
2662 }
2663
2664 /* x = x - q */
2665 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
2666 goto CLEANUP;
2667 }
2668
2669 /* If x < 0, add b**(k+1) to it */
2670 if (mp_cmp_d (x, 0) == MP_LT) {
2671 mp_set (&q, 1);
2672 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
2673 goto CLEANUP;
2674 if ((res = mp_add (x, &q, x)) != MP_OKAY)
2675 goto CLEANUP;
2676 }
2677
2678 /* Back off if it's too big */
2679 while (mp_cmp (x, m) != MP_LT) {
2680 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
2681 goto CLEANUP;
2682 }
2683 }
2684
2685 CLEANUP:
2686 mp_clear (&q);
2687
2688 return res;
2689 }
2690
2691 /* determines the setup value */
mp_reduce_2k_setup_l(mp_int * a,mp_int * d)2692 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
2693 {
2694 int res;
2695 mp_int tmp;
2696
2697 if ((res = mp_init(&tmp)) != MP_OKAY) {
2698 return res;
2699 }
2700
2701 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
2702 goto ERR;
2703 }
2704
2705 if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
2706 goto ERR;
2707 }
2708
2709 ERR:
2710 mp_clear(&tmp);
2711 return res;
2712 }
2713
2714 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */
2715 /* Revision: 1.1.1.1 $ */
2716 /* Date: 2011/03/12 22:58:18 $ */
2717
2718 /* reduces a modulo n where n is of the form 2**p - d
2719 This differs from reduce_2k since "d" can be larger
2720 than a single digit.
2721 */
2722 static int
mp_reduce_2k_l(mp_int * a,mp_int * n,mp_int * d)2723 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
2724 {
2725 mp_int q;
2726 int p, res;
2727
2728 if ((res = mp_init(&q)) != MP_OKAY) {
2729 return res;
2730 }
2731
2732 p = mp_count_bits(n);
2733 top:
2734 /* q = a/2**p, a = a mod 2**p */
2735 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2736 goto ERR;
2737 }
2738
2739 /* q = q * d */
2740 if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
2741 goto ERR;
2742 }
2743
2744 /* a = a + q */
2745 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2746 goto ERR;
2747 }
2748
2749 if (mp_cmp_mag(a, n) != MP_LT) {
2750 s_mp_sub(a, n, a);
2751 goto top;
2752 }
2753
2754 ERR:
2755 mp_clear(&q);
2756 return res;
2757 }
2758
2759 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */
2760 /* Revision: 1.1.1.1 $ */
2761 /* Date: 2011/03/12 22:58:18 $ */
2762
2763 /* squaring using Toom-Cook 3-way algorithm */
2764 static int
mp_toom_sqr(mp_int * a,mp_int * b)2765 mp_toom_sqr(mp_int *a, mp_int *b)
2766 {
2767 mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
2768 int res, B;
2769
2770 /* init temps */
2771 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
2772 return res;
2773 }
2774
2775 /* B */
2776 B = a->used / 3;
2777
2778 /* a = a2 * B**2 + a1 * B + a0 */
2779 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
2780 goto ERR;
2781 }
2782
2783 if ((res = mp_copy(a, &a1)) != MP_OKAY) {
2784 goto ERR;
2785 }
2786 mp_rshd(&a1, B);
2787 mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
2788
2789 if ((res = mp_copy(a, &a2)) != MP_OKAY) {
2790 goto ERR;
2791 }
2792 mp_rshd(&a2, B*2);
2793
2794 /* w0 = a0*a0 */
2795 if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {
2796 goto ERR;
2797 }
2798
2799 /* w4 = a2 * a2 */
2800 if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {
2801 goto ERR;
2802 }
2803
2804 /* w1 = (a2 + 2(a1 + 2a0))**2 */
2805 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
2806 goto ERR;
2807 }
2808 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
2809 goto ERR;
2810 }
2811 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
2812 goto ERR;
2813 }
2814 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
2815 goto ERR;
2816 }
2817
2818 if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {
2819 goto ERR;
2820 }
2821
2822 /* w3 = (a0 + 2(a1 + 2a2))**2 */
2823 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
2824 goto ERR;
2825 }
2826 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
2827 goto ERR;
2828 }
2829 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
2830 goto ERR;
2831 }
2832 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
2833 goto ERR;
2834 }
2835
2836 if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {
2837 goto ERR;
2838 }
2839
2840
2841 /* w2 = (a2 + a1 + a0)**2 */
2842 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
2843 goto ERR;
2844 }
2845 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
2846 goto ERR;
2847 }
2848 if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {
2849 goto ERR;
2850 }
2851
2852 /* now solve the matrix
2853
2854 0 0 0 0 1
2855 1 2 4 8 16
2856 1 1 1 1 1
2857 16 8 4 2 1
2858 1 0 0 0 0
2859
2860 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
2861 */
2862
2863 /* r1 - r4 */
2864 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
2865 goto ERR;
2866 }
2867 /* r3 - r0 */
2868 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
2869 goto ERR;
2870 }
2871 /* r1/2 */
2872 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
2873 goto ERR;
2874 }
2875 /* r3/2 */
2876 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
2877 goto ERR;
2878 }
2879 /* r2 - r0 - r4 */
2880 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
2881 goto ERR;
2882 }
2883 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
2884 goto ERR;
2885 }
2886 /* r1 - r2 */
2887 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
2888 goto ERR;
2889 }
2890 /* r3 - r2 */
2891 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
2892 goto ERR;
2893 }
2894 /* r1 - 8r0 */
2895 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
2896 goto ERR;
2897 }
2898 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
2899 goto ERR;
2900 }
2901 /* r3 - 8r4 */
2902 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
2903 goto ERR;
2904 }
2905 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
2906 goto ERR;
2907 }
2908 /* 3r2 - r1 - r3 */
2909 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
2910 goto ERR;
2911 }
2912 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
2913 goto ERR;
2914 }
2915 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
2916 goto ERR;
2917 }
2918 /* r1 - r2 */
2919 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
2920 goto ERR;
2921 }
2922 /* r3 - r2 */
2923 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
2924 goto ERR;
2925 }
2926 /* r1/3 */
2927 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
2928 goto ERR;
2929 }
2930 /* r3/3 */
2931 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
2932 goto ERR;
2933 }
2934
2935 /* at this point shift W[n] by B*n */
2936 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
2937 goto ERR;
2938 }
2939 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
2940 goto ERR;
2941 }
2942 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
2943 goto ERR;
2944 }
2945 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
2946 goto ERR;
2947 }
2948
2949 if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {
2950 goto ERR;
2951 }
2952 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
2953 goto ERR;
2954 }
2955 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
2956 goto ERR;
2957 }
2958 if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {
2959 goto ERR;
2960 }
2961
2962 ERR:
2963 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
2964 return res;
2965 }
2966
2967
2968 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */
2969 /* Revision: 1.1.1.1 $ */
2970 /* Date: 2011/03/12 22:58:18 $ */
2971
2972 /* Karatsuba squaring, computes b = a*a using three
2973 * half size squarings
2974 *
2975 * See comments of karatsuba_mul for details. It
2976 * is essentially the same algorithm but merely
2977 * tuned to perform recursive squarings.
2978 */
mp_karatsuba_sqr(mp_int * a,mp_int * b)2979 static int mp_karatsuba_sqr (mp_int * a, mp_int * b)
2980 {
2981 mp_int x0, x1, t1, t2, x0x0, x1x1;
2982 int B, err;
2983
2984 err = MP_MEM;
2985
2986 /* min # of digits */
2987 B = a->used;
2988
2989 /* now divide in two */
2990 B = (unsigned)B >> 1;
2991
2992 /* init copy all the temps */
2993 if (mp_init_size (&x0, B) != MP_OKAY)
2994 goto ERR;
2995 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2996 goto X0;
2997
2998 /* init temps */
2999 if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
3000 goto X1;
3001 if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
3002 goto T1;
3003 if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
3004 goto T2;
3005 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
3006 goto X0X0;
3007
3008 {
3009 int x;
3010 mp_digit *dst, *src;
3011
3012 src = a->dp;
3013
3014 /* now shift the digits */
3015 dst = x0.dp;
3016 for (x = 0; x < B; x++) {
3017 *dst++ = *src++;
3018 }
3019
3020 dst = x1.dp;
3021 for (x = B; x < a->used; x++) {
3022 *dst++ = *src++;
3023 }
3024 }
3025
3026 x0.used = B;
3027 x1.used = a->used - B;
3028
3029 mp_clamp (&x0);
3030
3031 /* now calc the products x0*x0 and x1*x1 */
3032 if (mp_sqr (&x0, &x0x0) != MP_OKAY)
3033 goto X1X1; /* x0x0 = x0*x0 */
3034 if (mp_sqr (&x1, &x1x1) != MP_OKAY)
3035 goto X1X1; /* x1x1 = x1*x1 */
3036
3037 /* now calc (x1+x0)**2 */
3038 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
3039 goto X1X1; /* t1 = x1 - x0 */
3040 if (mp_sqr (&t1, &t1) != MP_OKAY)
3041 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
3042
3043 /* add x0y0 */
3044 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
3045 goto X1X1; /* t2 = x0x0 + x1x1 */
3046 if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY)
3047 goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */
3048
3049 /* shift by B */
3050 if (mp_lshd (&t1, B) != MP_OKAY)
3051 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
3052 if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
3053 goto X1X1; /* x1x1 = x1x1 << 2*B */
3054
3055 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
3056 goto X1X1; /* t1 = x0x0 + t1 */
3057 if (mp_add (&t1, &x1x1, b) != MP_OKAY)
3058 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
3059
3060 err = MP_OKAY;
3061
3062 X1X1:mp_clear (&x1x1);
3063 X0X0:mp_clear (&x0x0);
3064 T2:mp_clear (&t2);
3065 T1:mp_clear (&t1);
3066 X1:mp_clear (&x1);
3067 X0:mp_clear (&x0);
3068 ERR:
3069 return err;
3070 }
3071
3072 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */
3073 /* Revision: 1.2 $ */
3074 /* Date: 2011/03/12 23:43:54 $ */
3075
3076 /* the jist of squaring...
3077 * you do like mult except the offset of the tmpx [one that
3078 * starts closer to zero] can't equal the offset of tmpy.
3079 * So basically you set up iy like before then you min it with
3080 * (ty-tx) so that it never happens. You double all those
3081 * you add in the inner loop
3082
3083 After that loop you do the squares and add them in.
3084 */
3085
fast_s_mp_sqr(mp_int * a,mp_int * b)3086 static int fast_s_mp_sqr (mp_int * a, mp_int * b)
3087 {
3088 int olduse, res, pa, ix, iz;
3089 mp_digit W[MP_WARRAY], *tmpx;
3090 mp_word W1;
3091
3092 /* grow the destination as required */
3093 pa = a->used + a->used;
3094 if (b->alloc < pa) {
3095 if ((res = mp_grow (b, pa)) != MP_OKAY) {
3096 return res;
3097 }
3098 }
3099
3100 /* number of output digits to produce */
3101 W1 = 0;
3102 for (ix = 0; ix < pa; ix++) {
3103 int tx, ty, iy;
3104 mp_word _W;
3105 mp_digit *tmpy;
3106
3107 /* clear counter */
3108 _W = 0;
3109
3110 /* get offsets into the two bignums */
3111 ty = MIN(a->used-1, ix);
3112 tx = ix - ty;
3113
3114 /* setup temp aliases */
3115 tmpx = a->dp + tx;
3116 tmpy = a->dp + ty;
3117
3118 /* this is the number of times the loop will iterrate, essentially
3119 while (tx++ < a->used && ty-- >= 0) { ... }
3120 */
3121 iy = MIN(a->used-tx, ty+1);
3122
3123 /* now for squaring tx can never equal ty
3124 * we halve the distance since they approach at a rate of 2x
3125 * and we have to round because odd cases need to be executed
3126 */
3127 iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1));
3128
3129 /* execute loop */
3130 for (iz = 0; iz < iy; iz++) {
3131 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3132 }
3133
3134 /* double the inner product and add carry */
3135 _W = _W + _W + W1;
3136
3137 /* even columns have the square term in them */
3138 if ((ix&1) == 0) {
3139 _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]);
3140 }
3141
3142 /* store it */
3143 W[ix] = (mp_digit)(_W & MP_MASK);
3144
3145 /* make next carry */
3146 W1 = _W >> ((mp_word)DIGIT_BIT);
3147 }
3148
3149 /* setup dest */
3150 olduse = b->used;
3151 b->used = a->used+a->used;
3152
3153 {
3154 mp_digit *tmpb;
3155 tmpb = b->dp;
3156 for (ix = 0; ix < pa; ix++) {
3157 *tmpb++ = W[ix] & MP_MASK;
3158 }
3159
3160 /* clear unused digits [that existed in the old copy of c] */
3161 for (; ix < olduse; ix++) {
3162 *tmpb++ = 0;
3163 }
3164 }
3165 mp_clamp (b);
3166 return MP_OKAY;
3167 }
3168
3169 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */
3170 /* Revision: 1.3 $ */
3171 /* Date: 2011/03/18 16:43:04 $ */
3172
3173 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
3174 static int
s_mp_sqr(mp_int * a,mp_int * b)3175 s_mp_sqr (mp_int * a, mp_int * b)
3176 {
3177 mp_int t;
3178 int res, ix, iy, pa;
3179 mp_word r;
3180 mp_digit u, tmpx, *tmpt;
3181
3182 pa = a->used;
3183 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
3184 return res;
3185 }
3186
3187 /* default used is maximum possible size */
3188 t.used = 2*pa + 1;
3189
3190 for (ix = 0; ix < pa; ix++) {
3191 /* first calculate the digit at 2*ix */
3192 /* calculate double precision result */
3193 r = ((mp_word) t.dp[2*ix]) +
3194 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3195
3196 /* store lower part in result */
3197 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3198
3199 /* get the carry */
3200 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3201
3202 /* left hand side of A[ix] * A[iy] */
3203 tmpx = a->dp[ix];
3204
3205 /* alias for where to store the results */
3206 tmpt = t.dp + (2*ix + 1);
3207
3208 for (iy = ix + 1; iy < pa; iy++) {
3209 /* first calculate the product */
3210 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3211
3212 /* now calculate the double precision result, note we use
3213 * addition instead of *2 since it's easier to optimize
3214 */
3215 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
3216
3217 /* store lower part */
3218 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3219
3220 /* get carry */
3221 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3222 }
3223 /* propagate upwards */
3224 while (u != ((mp_digit) 0)) {
3225 r = ((mp_word) *tmpt) + ((mp_word) u);
3226 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3227 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3228 }
3229 }
3230
3231 mp_clamp (&t);
3232 mp_exch (&t, b);
3233 mp_clear (&t);
3234 return MP_OKAY;
3235 }
3236
3237 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */
3238 /* Revision: 1.1.1.1 $ */
3239 /* Date: 2011/03/12 22:58:18 $ */
3240
3241 #define TOOM_SQR_CUTOFF 400
3242 #define KARATSUBA_SQR_CUTOFF 120
3243
3244 /* computes b = a*a */
3245 static int
mp_sqr(mp_int * a,mp_int * b)3246 mp_sqr (mp_int * a, mp_int * b)
3247 {
3248 int res;
3249
3250 /* use Toom-Cook? */
3251 if (a->used >= TOOM_SQR_CUTOFF) {
3252 res = mp_toom_sqr(a, b);
3253 /* Karatsuba? */
3254 } else
3255 if (a->used >= KARATSUBA_SQR_CUTOFF) {
3256 res = mp_karatsuba_sqr (a, b);
3257 } else
3258 {
3259 /* can we use the fast comba multiplier? */
3260 if (((unsigned)a->used * 2 + 1) < MP_WARRAY &&
3261 a->used <
3262 (1 << (unsigned)(sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
3263 res = fast_s_mp_sqr (a, b);
3264 } else
3265 res = s_mp_sqr (a, b);
3266 }
3267 b->sign = MP_ZPOS;
3268 return res;
3269 }
3270
3271 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */
3272 /* Revision: 1.3 $ */
3273 /* Date: 2011/03/18 16:43:04 $ */
3274
3275 #define TAB_SIZE 256
3276
s_mp_exptmod(mp_int * G,mp_int * X,mp_int * P,mp_int * Y,int redmode)3277 static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3278 {
3279 mp_int M[TAB_SIZE], res, mu;
3280 mp_digit buf;
3281 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3282 int (*redux)(mp_int*,mp_int*,mp_int*);
3283
3284 /* find window size */
3285 x = mp_count_bits (X);
3286 if (x <= 7) {
3287 winsize = 2;
3288 } else if (x <= 36) {
3289 winsize = 3;
3290 } else if (x <= 140) {
3291 winsize = 4;
3292 } else if (x <= 450) {
3293 winsize = 5;
3294 } else if (x <= 1303) {
3295 winsize = 6;
3296 } else if (x <= 3529) {
3297 winsize = 7;
3298 } else {
3299 winsize = 8;
3300 }
3301
3302 /* init M array */
3303 /* init first cell */
3304 if ((err = mp_init(&M[1])) != MP_OKAY) {
3305 return err;
3306 }
3307
3308 /* now init the second half of the array */
3309 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3310 if ((err = mp_init(&M[x])) != MP_OKAY) {
3311 for (y = 1<<(winsize-1); y < x; y++) {
3312 mp_clear (&M[y]);
3313 }
3314 mp_clear(&M[1]);
3315 return err;
3316 }
3317 }
3318
3319 /* create mu, used for Barrett reduction */
3320 if ((err = mp_init (&mu)) != MP_OKAY) {
3321 goto LBL_M;
3322 }
3323
3324 if (redmode == 0) {
3325 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
3326 goto LBL_MU;
3327 }
3328 redux = mp_reduce;
3329 } else {
3330 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
3331 goto LBL_MU;
3332 }
3333 redux = mp_reduce_2k_l;
3334 }
3335
3336 /* create M table
3337 *
3338 * The M table contains powers of the base,
3339 * e.g. M[x] = G**x mod P
3340 *
3341 * The first half of the table is not
3342 * computed though accept for M[0] and M[1]
3343 */
3344 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
3345 goto LBL_MU;
3346 }
3347
3348 /* compute the value at M[1<<(winsize-1)] by squaring
3349 * M[1] (winsize-1) times
3350 */
3351 if ((err = mp_copy ( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
3352 goto LBL_MU;
3353 }
3354
3355 for (x = 0; x < (winsize - 1); x++) {
3356 /* square it */
3357 if ((err = mp_sqr (&M[1 << (winsize - 1)],
3358 &M[1 << (winsize - 1)])) != MP_OKAY) {
3359 goto LBL_MU;
3360 }
3361
3362 /* reduce modulo P */
3363 if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
3364 goto LBL_MU;
3365 }
3366 }
3367
3368 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3369 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
3370 */
3371 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3372 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3373 goto LBL_MU;
3374 }
3375 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
3376 goto LBL_MU;
3377 }
3378 }
3379
3380 /* setup result */
3381 if ((err = mp_init (&res)) != MP_OKAY) {
3382 goto LBL_MU;
3383 }
3384 mp_set (&res, 1);
3385
3386 /* set initial mode and bit cnt */
3387 mode = 0;
3388 bitcnt = 1;
3389 buf = 0;
3390 digidx = X->used - 1;
3391 bitcpy = 0;
3392 bitbuf = 0;
3393
3394 for (;;) {
3395 /* grab next digit as required */
3396 if (--bitcnt == 0) {
3397 /* if digidx == -1 we are out of digits */
3398 if (digidx == -1) {
3399 break;
3400 }
3401 /* read next digit and reset the bitcnt */
3402 buf = X->dp[digidx--];
3403 bitcnt = (int) DIGIT_BIT;
3404 }
3405
3406 /* grab the next msb from the exponent */
3407 y = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3408 buf <<= (mp_digit)1;
3409
3410 /* if the bit is zero and mode == 0 then we ignore it
3411 * These represent the leading zero bits before the first 1 bit
3412 * in the exponent. Technically this opt is not required but it
3413 * does lower the # of trivial squaring/reductions used
3414 */
3415 if (mode == 0 && y == 0) {
3416 continue;
3417 }
3418
3419 /* if the bit is zero and mode == 1 then we square */
3420 if (mode == 1 && y == 0) {
3421 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3422 goto LBL_RES;
3423 }
3424 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3425 goto LBL_RES;
3426 }
3427 continue;
3428 }
3429
3430 /* else we add it to the window */
3431 bitbuf |= (y << (winsize - ++bitcpy));
3432 mode = 2;
3433
3434 if (bitcpy == winsize) {
3435 /* ok window is filled so square as required and multiply */
3436 /* square first */
3437 for (x = 0; x < winsize; x++) {
3438 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3439 goto LBL_RES;
3440 }
3441 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3442 goto LBL_RES;
3443 }
3444 }
3445
3446 /* then multiply */
3447 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3448 goto LBL_RES;
3449 }
3450 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3451 goto LBL_RES;
3452 }
3453
3454 /* empty window and reset */
3455 bitcpy = 0;
3456 bitbuf = 0;
3457 mode = 1;
3458 }
3459 }
3460
3461 /* if bits remain then square/multiply */
3462 if (mode == 2 && bitcpy > 0) {
3463 /* square then multiply if the bit is set */
3464 for (x = 0; x < bitcpy; x++) {
3465 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3466 goto LBL_RES;
3467 }
3468 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3469 goto LBL_RES;
3470 }
3471
3472 bitbuf <<= 1;
3473 if ((bitbuf & (1 << winsize)) != 0) {
3474 /* then multiply */
3475 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3476 goto LBL_RES;
3477 }
3478 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3479 goto LBL_RES;
3480 }
3481 }
3482 }
3483 }
3484
3485 mp_exch (&res, Y);
3486 err = MP_OKAY;
3487 LBL_RES:mp_clear (&res);
3488 LBL_MU:mp_clear (&mu);
3489 LBL_M:
3490 mp_clear(&M[1]);
3491 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3492 mp_clear (&M[x]);
3493 }
3494 return err;
3495 }
3496
3497 /* determines if a number is a valid DR modulus */
3498 static int
mp_dr_is_modulus(mp_int * a)3499 mp_dr_is_modulus(mp_int *a)
3500 {
3501 int ix;
3502
3503 /* must be at least two digits */
3504 if (a->used < 2) {
3505 return 0;
3506 }
3507
3508 /* must be of the form b**k - a [a <= b] so all
3509 * but the first digit must be equal to -1 (mod b).
3510 */
3511 for (ix = 1; ix < a->used; ix++) {
3512 if (a->dp[ix] != MP_MASK) {
3513 return 0;
3514 }
3515 }
3516 return 1;
3517 }
3518
3519 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */
3520 /* Revision: 1.1.1.1 $ */
3521 /* Date: 2011/03/12 22:58:18 $ */
3522
3523 /* determines if mp_reduce_2k can be used */
mp_reduce_is_2k(mp_int * a)3524 static int mp_reduce_is_2k(mp_int *a)
3525 {
3526 int ix, iy, iw;
3527 mp_digit iz;
3528
3529 if (a->used == 0) {
3530 return MP_NO;
3531 } else if (a->used == 1) {
3532 return MP_YES;
3533 } else if (a->used > 1) {
3534 iy = mp_count_bits(a);
3535 iz = 1;
3536 iw = 1;
3537
3538 /* Test every bit from the second digit up, must be 1 */
3539 for (ix = DIGIT_BIT; ix < iy; ix++) {
3540 if ((a->dp[iw] & iz) == 0) {
3541 return MP_NO;
3542 }
3543 iz <<= 1;
3544 if (iz > (mp_digit)MP_MASK) {
3545 ++iw;
3546 iz = 1;
3547 }
3548 }
3549 }
3550 return MP_YES;
3551 }
3552
3553 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */
3554 /* Revision: 1.1.1.1 $ */
3555 /* Date: 2011/03/12 22:58:18 $ */
3556
3557
3558 /* d = a * b (mod c) */
3559 static int
mp_mulmod(mp_int * d,mp_int * a,mp_int * b,mp_int * c)3560 mp_mulmod (mp_int *d, mp_int * a, mp_int * b, mp_int * c)
3561 {
3562 int res;
3563 mp_int t;
3564
3565 if ((res = mp_init (&t)) != MP_OKAY) {
3566 return res;
3567 }
3568
3569 if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
3570 mp_clear (&t);
3571 return res;
3572 }
3573 res = mp_mod (&t, c, d);
3574 mp_clear (&t);
3575 return res;
3576 }
3577
3578 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */
3579 /* Revision: 1.1.1.1 $ */
3580 /* Date: 2011/03/12 22:58:18 $ */
3581
3582 /* setups the montgomery reduction stuff */
3583 static int
mp_montgomery_setup(mp_int * n,mp_digit * rho)3584 mp_montgomery_setup (mp_int * n, mp_digit * rho)
3585 {
3586 mp_digit x, b;
3587
3588 /* fast inversion mod 2**k
3589 *
3590 * Based on the fact that
3591 *
3592 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
3593 * => 2*X*A - X*X*A*A = 1
3594 * => 2*(1) - (1) = 1
3595 */
3596 b = n->dp[0];
3597
3598 if ((b & 1) == 0) {
3599 return MP_VAL;
3600 }
3601
3602 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
3603 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
3604 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
3605 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
3606 if (/*CONSTCOND*/sizeof(mp_digit) == 8) {
3607 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
3608 }
3609
3610 /* rho = -1/m mod b */
3611 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
3612
3613 return MP_OKAY;
3614 }
3615
3616 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */
3617 /* Revision: 1.1.1.1 $ */
3618 /* Date: 2011/03/12 22:58:18 $ */
3619
3620 /* computes xR**-1 == x (mod N) via Montgomery Reduction
3621 *
3622 * This is an optimized implementation of montgomery_reduce
3623 * which uses the comba method to quickly calculate the columns of the
3624 * reduction.
3625 *
3626 * Based on Algorithm 14.32 on pp.601 of HAC.
3627 */
3628 static int
fast_mp_montgomery_reduce(mp_int * x,mp_int * n,mp_digit rho)3629 fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
3630 {
3631 int ix, res, olduse;
3632 /*LINTED*/
3633 mp_word W[MP_WARRAY];
3634
3635 /* get old used count */
3636 olduse = x->used;
3637
3638 /* grow a as required */
3639 if (x->alloc < n->used + 1) {
3640 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
3641 return res;
3642 }
3643 }
3644
3645 /* first we have to get the digits of the input into
3646 * an array of double precision words W[...]
3647 */
3648 {
3649 mp_word *_W;
3650 mp_digit *tmpx;
3651
3652 /* alias for the W[] array */
3653 _W = W;
3654
3655 /* alias for the digits of x*/
3656 tmpx = x->dp;
3657
3658 /* copy the digits of a into W[0..a->used-1] */
3659 for (ix = 0; ix < x->used; ix++) {
3660 *_W++ = *tmpx++;
3661 }
3662
3663 /* zero the high words of W[a->used..m->used*2] */
3664 for (; ix < n->used * 2 + 1; ix++) {
3665 *_W++ = 0;
3666 }
3667 }
3668
3669 /* now we proceed to zero successive digits
3670 * from the least significant upwards
3671 */
3672 for (ix = 0; ix < n->used; ix++) {
3673 /* mu = ai * m' mod b
3674 *
3675 * We avoid a double precision multiplication (which isn't required)
3676 * by casting the value down to a mp_digit. Note this requires
3677 * that W[ix-1] have the carry cleared (see after the inner loop)
3678 */
3679 mp_digit mu;
3680 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
3681
3682 /* a = a + mu * m * b**i
3683 *
3684 * This is computed in place and on the fly. The multiplication
3685 * by b**i is handled by offseting which columns the results
3686 * are added to.
3687 *
3688 * Note the comba method normally doesn't handle carries in the
3689 * inner loop In this case we fix the carry from the previous
3690 * column since the Montgomery reduction requires digits of the
3691 * result (so far) [see above] to work. This is
3692 * handled by fixing up one carry after the inner loop. The
3693 * carry fixups are done in order so after these loops the
3694 * first m->used words of W[] have the carries fixed
3695 */
3696 {
3697 int iy;
3698 mp_digit *tmpn;
3699 mp_word *_W;
3700
3701 /* alias for the digits of the modulus */
3702 tmpn = n->dp;
3703
3704 /* Alias for the columns set by an offset of ix */
3705 _W = W + ix;
3706
3707 /* inner loop */
3708 for (iy = 0; iy < n->used; iy++) {
3709 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
3710 }
3711 }
3712
3713 /* now fix carry for next digit, W[ix+1] */
3714 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
3715 }
3716
3717 /* now we have to propagate the carries and
3718 * shift the words downward [all those least
3719 * significant digits we zeroed].
3720 */
3721 {
3722 mp_digit *tmpx;
3723 mp_word *_W, *_W1;
3724
3725 /* nox fix rest of carries */
3726
3727 /* alias for current word */
3728 _W1 = W + ix;
3729
3730 /* alias for next word, where the carry goes */
3731 _W = W + ++ix;
3732
3733 for (; ix <= n->used * 2 + 1; ix++) {
3734 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
3735 }
3736
3737 /* copy out, A = A/b**n
3738 *
3739 * The result is A/b**n but instead of converting from an
3740 * array of mp_word to mp_digit than calling mp_rshd
3741 * we just copy them in the right order
3742 */
3743
3744 /* alias for destination word */
3745 tmpx = x->dp;
3746
3747 /* alias for shifted double precision result */
3748 _W = W + n->used;
3749
3750 for (ix = 0; ix < n->used + 1; ix++) {
3751 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
3752 }
3753
3754 /* zero oldused digits, if the input a was larger than
3755 * m->used+1 we'll have to clear the digits
3756 */
3757 for (; ix < olduse; ix++) {
3758 *tmpx++ = 0;
3759 }
3760 }
3761
3762 /* set the max used and clamp */
3763 x->used = n->used + 1;
3764 mp_clamp (x);
3765
3766 /* if A >= m then A = A - m */
3767 if (mp_cmp_mag (x, n) != MP_LT) {
3768 return s_mp_sub (x, n, x);
3769 }
3770 return MP_OKAY;
3771 }
3772
3773 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */
3774 /* Revision: 1.2 $ */
3775 /* Date: 2011/03/18 16:22:09 $ */
3776
3777 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
3778 static int
mp_montgomery_reduce(mp_int * x,mp_int * n,mp_digit rho)3779 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
3780 {
3781 int ix, res, digs;
3782 mp_digit mu;
3783
3784 /* can the fast reduction [comba] method be used?
3785 *
3786 * Note that unlike in mul you're safely allowed *less*
3787 * than the available columns [255 per default] since carries
3788 * are fixed up in the inner loop.
3789 */
3790 digs = n->used * 2 + 1;
3791 if (((unsigned)digs < MP_WARRAY) &&
3792 n->used <
3793 (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3794 return fast_mp_montgomery_reduce (x, n, rho);
3795 }
3796
3797 /* grow the input as required */
3798 if (x->alloc < digs) {
3799 if ((res = mp_grow (x, digs)) != MP_OKAY) {
3800 return res;
3801 }
3802 }
3803 x->used = digs;
3804
3805 for (ix = 0; ix < n->used; ix++) {
3806 /* mu = ai * rho mod b
3807 *
3808 * The value of rho must be precalculated via
3809 * montgomery_setup() such that
3810 * it equals -1/n0 mod b this allows the
3811 * following inner loop to reduce the
3812 * input one digit at a time
3813 */
3814 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
3815
3816 /* a = a + mu * m * b**i */
3817 {
3818 int iy;
3819 mp_digit *tmpn, *tmpx, u;
3820 mp_word r;
3821
3822 /* alias for digits of the modulus */
3823 tmpn = n->dp;
3824
3825 /* alias for the digits of x [the input] */
3826 tmpx = x->dp + ix;
3827
3828 /* set the carry to zero */
3829 u = 0;
3830
3831 /* Multiply and add in place */
3832 for (iy = 0; iy < n->used; iy++) {
3833 /* compute product and sum */
3834 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
3835 ((mp_word) u) + ((mp_word) * tmpx);
3836
3837 /* get carry */
3838 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3839
3840 /* fix digit */
3841 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
3842 }
3843 /* At this point the ix'th digit of x should be zero */
3844
3845
3846 /* propagate carries upwards as required*/
3847 while (u) {
3848 *tmpx += u;
3849 u = *tmpx >> DIGIT_BIT;
3850 *tmpx++ &= MP_MASK;
3851 }
3852 }
3853 }
3854
3855 /* at this point the n.used'th least
3856 * significant digits of x are all zero
3857 * which means we can shift x to the
3858 * right by n.used digits and the
3859 * residue is unchanged.
3860 */
3861
3862 /* x = x/b**n.used */
3863 mp_clamp(x);
3864 mp_rshd (x, n->used);
3865
3866 /* if x >= n then x = x - n */
3867 if (mp_cmp_mag (x, n) != MP_LT) {
3868 return s_mp_sub (x, n, x);
3869 }
3870
3871 return MP_OKAY;
3872 }
3873
3874 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */
3875 /* Revision: 1.3 $ */
3876 /* Date: 2011/03/18 16:43:04 $ */
3877
3878 /* determines the setup value */
3879 static void
mp_dr_setup(mp_int * a,mp_digit * d)3880 mp_dr_setup(mp_int *a, mp_digit *d)
3881 {
3882 /* the casts are required if DIGIT_BIT is one less than
3883 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
3884 */
3885 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
3886 ((mp_word)a->dp[0]));
3887 }
3888
3889 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */
3890 /* Revision: 1.1.1.1 $ */
3891 /* Date: 2011/03/12 22:58:18 $ */
3892
3893 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
3894 *
3895 * Based on algorithm from the paper
3896 *
3897 * "Generating Efficient Primes for Discrete Log Cryptosystems"
3898 * Chae Hoon Lim, Pil Joong Lee,
3899 * POSTECH Information Research Laboratories
3900 *
3901 * The modulus must be of a special format [see manual]
3902 *
3903 * Has been modified to use algorithm 7.10 from the LTM book instead
3904 *
3905 * Input x must be in the range 0 <= x <= (n-1)**2
3906 */
3907 static int
mp_dr_reduce(mp_int * x,mp_int * n,mp_digit k)3908 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
3909 {
3910 int err, i, m;
3911 mp_word r;
3912 mp_digit mu, *tmpx1, *tmpx2;
3913
3914 /* m = digits in modulus */
3915 m = n->used;
3916
3917 /* ensure that "x" has at least 2m digits */
3918 if (x->alloc < m + m) {
3919 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
3920 return err;
3921 }
3922 }
3923
3924 /* top of loop, this is where the code resumes if
3925 * another reduction pass is required.
3926 */
3927 top:
3928 /* aliases for digits */
3929 /* alias for lower half of x */
3930 tmpx1 = x->dp;
3931
3932 /* alias for upper half of x, or x/B**m */
3933 tmpx2 = x->dp + m;
3934
3935 /* set carry to zero */
3936 mu = 0;
3937
3938 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
3939 for (i = 0; i < m; i++) {
3940 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
3941 *tmpx1++ = (mp_digit)(r & MP_MASK);
3942 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
3943 }
3944
3945 /* set final carry */
3946 *tmpx1++ = mu;
3947
3948 /* zero words above m */
3949 for (i = m + 1; i < x->used; i++) {
3950 *tmpx1++ = 0;
3951 }
3952
3953 /* clamp, sub and return */
3954 mp_clamp (x);
3955
3956 /* if x >= n then subtract and reduce again
3957 * Each successive "recursion" makes the input smaller and smaller.
3958 */
3959 if (mp_cmp_mag (x, n) != MP_LT) {
3960 s_mp_sub(x, n, x);
3961 goto top;
3962 }
3963 return MP_OKAY;
3964 }
3965
3966 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */
3967 /* Revision: 1.1.1.1 $ */
3968 /* Date: 2011/03/12 22:58:18 $ */
3969
3970 /* determines the setup value */
3971 static int
mp_reduce_2k_setup(mp_int * a,mp_digit * d)3972 mp_reduce_2k_setup(mp_int *a, mp_digit *d)
3973 {
3974 int res, p;
3975 mp_int tmp;
3976
3977 if ((res = mp_init(&tmp)) != MP_OKAY) {
3978 return res;
3979 }
3980
3981 p = mp_count_bits(a);
3982 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3983 mp_clear(&tmp);
3984 return res;
3985 }
3986
3987 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
3988 mp_clear(&tmp);
3989 return res;
3990 }
3991
3992 *d = tmp.dp[0];
3993 mp_clear(&tmp);
3994 return MP_OKAY;
3995 }
3996
3997 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */
3998 /* Revision: 1.1.1.1 $ */
3999 /* Date: 2011/03/12 22:58:18 $ */
4000
4001 /* reduces a modulo n where n is of the form 2**p - d */
4002 static int
mp_reduce_2k(mp_int * a,mp_int * n,mp_digit d)4003 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
4004 {
4005 mp_int q;
4006 int p, res;
4007
4008 if ((res = mp_init(&q)) != MP_OKAY) {
4009 return res;
4010 }
4011
4012 p = mp_count_bits(n);
4013 top:
4014 /* q = a/2**p, a = a mod 2**p */
4015 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
4016 goto ERR;
4017 }
4018
4019 if (d != 1) {
4020 /* q = q * d */
4021 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
4022 goto ERR;
4023 }
4024 }
4025
4026 /* a = a + q */
4027 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
4028 goto ERR;
4029 }
4030
4031 if (mp_cmp_mag(a, n) != MP_LT) {
4032 s_mp_sub(a, n, a);
4033 goto top;
4034 }
4035
4036 ERR:
4037 mp_clear(&q);
4038 return res;
4039 }
4040
4041 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */
4042 /* Revision: 1.1.1.1 $ */
4043 /* Date: 2011/03/12 22:58:18 $ */
4044
4045 /*
4046 * shifts with subtractions when the result is greater than b.
4047 *
4048 * The method is slightly modified to shift B unconditionally upto just under
4049 * the leading bit of b. This saves alot of multiple precision shifting.
4050 */
4051 static int
mp_montgomery_calc_normalization(mp_int * a,mp_int * b)4052 mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
4053 {
4054 int x, bits, res;
4055
4056 /* how many bits of last digit does b use */
4057 bits = mp_count_bits (b) % DIGIT_BIT;
4058
4059 if (b->used > 1) {
4060 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
4061 return res;
4062 }
4063 } else {
4064 mp_set(a, 1);
4065 bits = 1;
4066 }
4067
4068
4069 /* now compute C = A * B mod b */
4070 for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
4071 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
4072 return res;
4073 }
4074 if (mp_cmp_mag (a, b) != MP_LT) {
4075 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
4076 return res;
4077 }
4078 }
4079 }
4080
4081 return MP_OKAY;
4082 }
4083
4084 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */
4085 /* Revision: 1.1.1.1 $ */
4086 /* Date: 2011/03/12 22:58:18 $ */
4087
4088 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
4089 *
4090 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
4091 * The value of k changes based on the size of the exponent.
4092 *
4093 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
4094 */
4095
4096 #define TAB_SIZE 256
4097
4098 static int
mp_exptmod_fast(mp_int * G,mp_int * X,mp_int * P,mp_int * Y,int redmode)4099 mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
4100 {
4101 mp_int M[TAB_SIZE], res;
4102 mp_digit buf, mp;
4103 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
4104
4105 /* use a pointer to the reduction algorithm. This allows us to use
4106 * one of many reduction algorithms without modding the guts of
4107 * the code with if statements everywhere.
4108 */
4109 int (*redux)(mp_int*,mp_int*,mp_digit);
4110
4111 /* find window size */
4112 x = mp_count_bits (X);
4113 if (x <= 7) {
4114 winsize = 2;
4115 } else if (x <= 36) {
4116 winsize = 3;
4117 } else if (x <= 140) {
4118 winsize = 4;
4119 } else if (x <= 450) {
4120 winsize = 5;
4121 } else if (x <= 1303) {
4122 winsize = 6;
4123 } else if (x <= 3529) {
4124 winsize = 7;
4125 } else {
4126 winsize = 8;
4127 }
4128
4129 /* init M array */
4130 /* init first cell */
4131 if ((err = mp_init(&M[1])) != MP_OKAY) {
4132 return err;
4133 }
4134
4135 /* now init the second half of the array */
4136 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4137 if ((err = mp_init(&M[x])) != MP_OKAY) {
4138 for (y = 1<<(winsize-1); y < x; y++) {
4139 mp_clear (&M[y]);
4140 }
4141 mp_clear(&M[1]);
4142 return err;
4143 }
4144 }
4145
4146 /* determine and setup reduction code */
4147 if (redmode == 0) {
4148 /* now setup montgomery */
4149 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
4150 goto LBL_M;
4151 }
4152
4153 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
4154 if (((unsigned)(P->used * 2 + 1) < MP_WARRAY) &&
4155 P->used < (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4156 redux = fast_mp_montgomery_reduce;
4157 } else
4158 {
4159 /* use slower baseline Montgomery method */
4160 redux = mp_montgomery_reduce;
4161 }
4162 } else if (redmode == 1) {
4163 /* setup DR reduction for moduli of the form B**k - b */
4164 mp_dr_setup(P, &mp);
4165 redux = mp_dr_reduce;
4166 } else {
4167 /* setup DR reduction for moduli of the form 2**k - b */
4168 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
4169 goto LBL_M;
4170 }
4171 redux = mp_reduce_2k;
4172 }
4173
4174 /* setup result */
4175 if ((err = mp_init (&res)) != MP_OKAY) {
4176 goto LBL_M;
4177 }
4178
4179 /* create M table
4180 *
4181
4182 *
4183 * The first half of the table is not computed though accept for M[0] and M[1]
4184 */
4185
4186 if (redmode == 0) {
4187 /* now we need R mod m */
4188 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
4189 goto LBL_RES;
4190 }
4191
4192 /* now set M[1] to G * R mod m */
4193 if ((err = mp_mulmod (&M[1], G, &res, P)) != MP_OKAY) {
4194 goto LBL_RES;
4195 }
4196 } else {
4197 mp_set(&res, 1);
4198 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
4199 goto LBL_RES;
4200 }
4201 }
4202
4203 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
4204 if ((err = mp_copy ( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4205 goto LBL_RES;
4206 }
4207
4208 for (x = 0; x < (winsize - 1); x++) {
4209 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
4210 goto LBL_RES;
4211 }
4212 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
4213 goto LBL_RES;
4214 }
4215 }
4216
4217 /* create upper table */
4218 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4219 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4220 goto LBL_RES;
4221 }
4222 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
4223 goto LBL_RES;
4224 }
4225 }
4226
4227 /* set initial mode and bit cnt */
4228 mode = 0;
4229 bitcnt = 1;
4230 buf = 0;
4231 digidx = X->used - 1;
4232 bitcpy = 0;
4233 bitbuf = 0;
4234
4235 for (;;) {
4236 /* grab next digit as required */
4237 if (--bitcnt == 0) {
4238 /* if digidx == -1 we are out of digits so break */
4239 if (digidx == -1) {
4240 break;
4241 }
4242 /* read next digit and reset bitcnt */
4243 buf = X->dp[digidx--];
4244 bitcnt = (int)DIGIT_BIT;
4245 }
4246
4247 /* grab the next msb from the exponent */
4248 y = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1;
4249 buf <<= (mp_digit)1;
4250
4251 /* if the bit is zero and mode == 0 then we ignore it
4252 * These represent the leading zero bits before the first 1 bit
4253 * in the exponent. Technically this opt is not required but it
4254 * does lower the # of trivial squaring/reductions used
4255 */
4256 if (mode == 0 && y == 0) {
4257 continue;
4258 }
4259
4260 /* if the bit is zero and mode == 1 then we square */
4261 if (mode == 1 && y == 0) {
4262 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4263 goto LBL_RES;
4264 }
4265 if ((err = redux (&res, P, mp)) != MP_OKAY) {
4266 goto LBL_RES;
4267 }
4268 continue;
4269 }
4270
4271 /* else we add it to the window */
4272 bitbuf |= (y << (winsize - ++bitcpy));
4273 mode = 2;
4274
4275 if (bitcpy == winsize) {
4276 /* ok window is filled so square as required and multiply */
4277 /* square first */
4278 for (x = 0; x < winsize; x++) {
4279 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4280 goto LBL_RES;
4281 }
4282 if ((err = redux (&res, P, mp)) != MP_OKAY) {
4283 goto LBL_RES;
4284 }
4285 }
4286
4287 /* then multiply */
4288 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
4289 goto LBL_RES;
4290 }
4291 if ((err = redux (&res, P, mp)) != MP_OKAY) {
4292 goto LBL_RES;
4293 }
4294
4295 /* empty window and reset */
4296 bitcpy = 0;
4297 bitbuf = 0;
4298 mode = 1;
4299 }
4300 }
4301
4302 /* if bits remain then square/multiply */
4303 if (mode == 2 && bitcpy > 0) {
4304 /* square then multiply if the bit is set */
4305 for (x = 0; x < bitcpy; x++) {
4306 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4307 goto LBL_RES;
4308 }
4309 if ((err = redux (&res, P, mp)) != MP_OKAY) {
4310 goto LBL_RES;
4311 }
4312
4313 /* get next bit of the window */
4314 bitbuf <<= 1;
4315 if ((bitbuf & (1 << winsize)) != 0) {
4316 /* then multiply */
4317 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
4318 goto LBL_RES;
4319 }
4320 if ((err = redux (&res, P, mp)) != MP_OKAY) {
4321 goto LBL_RES;
4322 }
4323 }
4324 }
4325 }
4326
4327 if (redmode == 0) {
4328 /* fixup result if Montgomery reduction is used
4329 * recall that any value in a Montgomery system is
4330 * actually multiplied by R mod n. So we have
4331 * to reduce one more time to cancel out the factor
4332 * of R.
4333 */
4334 if ((err = redux(&res, P, mp)) != MP_OKAY) {
4335 goto LBL_RES;
4336 }
4337 }
4338
4339 /* swap res with Y */
4340 mp_exch (&res, Y);
4341 err = MP_OKAY;
4342 LBL_RES:mp_clear (&res);
4343 LBL_M:
4344 mp_clear(&M[1]);
4345 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4346 mp_clear (&M[x]);
4347 }
4348 return err;
4349 }
4350
4351 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_exptmod_fast.c,v $ */
4352 /* Revision: 1.4 $ */
4353 /* Date: 2011/03/18 16:43:04 $ */
4354
4355 /* this is a shell function that calls either the normal or Montgomery
4356 * exptmod functions. Originally the call to the montgomery code was
4357 * embedded in the normal function but that wasted alot of stack space
4358 * for nothing (since 99% of the time the Montgomery code would be called)
4359 */
4360 static int
mp_exptmod(mp_int * G,mp_int * X,mp_int * P,mp_int * Y)4361 mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int *Y)
4362 {
4363 int dr;
4364
4365 /* modulus P must be positive */
4366 if (P->sign == MP_NEG) {
4367 return MP_VAL;
4368 }
4369
4370 /* if exponent X is negative we have to recurse */
4371 if (X->sign == MP_NEG) {
4372 mp_int tmpG, tmpX;
4373 int err;
4374
4375 /* first compute 1/G mod P */
4376 if ((err = mp_init(&tmpG)) != MP_OKAY) {
4377 return err;
4378 }
4379 if ((err = mp_invmod(&tmpG, G, P)) != MP_OKAY) {
4380 mp_clear(&tmpG);
4381 return err;
4382 }
4383
4384 /* now get |X| */
4385 if ((err = mp_init(&tmpX)) != MP_OKAY) {
4386 mp_clear(&tmpG);
4387 return err;
4388 }
4389 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
4390 mp_clear_multi(&tmpG, &tmpX, NULL);
4391 return err;
4392 }
4393
4394 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
4395 err = mp_exptmod(&tmpG, &tmpX, P, Y);
4396 mp_clear_multi(&tmpG, &tmpX, NULL);
4397 return err;
4398 }
4399
4400 /* modified diminished radix reduction */
4401 if (mp_reduce_is_2k_l(P) == MP_YES) {
4402 return s_mp_exptmod(G, X, P, Y, 1);
4403 }
4404
4405 /* is it a DR modulus? */
4406 dr = mp_dr_is_modulus(P);
4407
4408 /* if not, is it a unrestricted DR modulus? */
4409 if (dr == 0) {
4410 dr = mp_reduce_is_2k(P) << 1;
4411 }
4412
4413 /* if the modulus is odd or dr != 0 use the montgomery method */
4414 if (BN_is_odd (P) == 1 || dr != 0) {
4415 return mp_exptmod_fast (G, X, P, Y, dr);
4416 } else {
4417 /* otherwise use the generic Barrett reduction technique */
4418 return s_mp_exptmod (G, X, P, Y, 0);
4419 }
4420 }
4421
4422 /* reverse an array, used for radix code */
4423 static void
bn_reverse(unsigned char * s,int len)4424 bn_reverse(unsigned char *s, int len)
4425 {
4426 int ix, iy;
4427 unsigned char t;
4428
4429 ix = 0;
4430 iy = len - 1;
4431 while (ix < iy) {
4432 t = s[ix];
4433 s[ix] = s[iy];
4434 s[iy] = t;
4435 ++ix;
4436 --iy;
4437 }
4438 }
4439
4440 static int
s_is_power_of_two(mp_digit b,int * p)4441 s_is_power_of_two(mp_digit b, int *p)
4442 {
4443 int x;
4444
4445 /* fast return if no power of two */
4446 if ((b==0) || (b & (b-1))) {
4447 return 0;
4448 }
4449
4450 for (x = 0; x < DIGIT_BIT; x++) {
4451 if (b == (((mp_digit)1)<<x)) {
4452 *p = x;
4453 return 1;
4454 }
4455 }
4456 return 0;
4457 }
4458
4459 /* single digit division (based on routine from MPI) */
4460 static int
mp_div_d(mp_int * a,mp_digit b,mp_int * c,mp_digit * d)4461 mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
4462 {
4463 mp_int q;
4464 mp_word w;
4465 mp_digit t;
4466 int res, ix;
4467
4468 /* cannot divide by zero */
4469 if (b == 0) {
4470 return MP_VAL;
4471 }
4472
4473 /* quick outs */
4474 if (b == 1 || mp_iszero(a) == 1) {
4475 if (d != NULL) {
4476 *d = 0;
4477 }
4478 if (c != NULL) {
4479 return mp_copy(a, c);
4480 }
4481 return MP_OKAY;
4482 }
4483
4484 /* power of two ? */
4485 if (s_is_power_of_two(b, &ix) == 1) {
4486 if (d != NULL) {
4487 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
4488 }
4489 if (c != NULL) {
4490 return mp_div_2d(a, ix, c, NULL);
4491 }
4492 return MP_OKAY;
4493 }
4494
4495 #ifdef BN_MP_DIV_3_C
4496 /* three? */
4497 if (b == 3) {
4498 return mp_div_3(a, c, d);
4499 }
4500 #endif
4501
4502 /* no easy answer [c'est la vie]. Just division */
4503 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4504 return res;
4505 }
4506
4507 q.used = a->used;
4508 q.sign = a->sign;
4509 w = 0;
4510 for (ix = a->used - 1; ix >= 0; ix--) {
4511 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
4512
4513 if (w >= b) {
4514 t = (mp_digit)(w / b);
4515 w -= ((mp_word)t) * ((mp_word)b);
4516 } else {
4517 t = 0;
4518 }
4519 q.dp[ix] = (mp_digit)t;
4520 }
4521
4522 if (d != NULL) {
4523 *d = (mp_digit)w;
4524 }
4525
4526 if (c != NULL) {
4527 mp_clamp(&q);
4528 mp_exch(&q, c);
4529 }
4530 mp_clear(&q);
4531
4532 return res;
4533 }
4534
4535 static int
mp_mod_d(mp_int * a,mp_digit b,mp_digit * c)4536 mp_mod_d(mp_int *a, mp_digit b, mp_digit *c)
4537 {
4538 return mp_div_d(a, b, NULL, c);
4539 }
4540
4541 static const mp_digit ltm_prime_tab[] = {
4542 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4543 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4544 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4545 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4546 #ifndef MP_8BIT
4547 0x0083,
4548 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4549 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4550 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4551 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4552
4553 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4554 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4555 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4556 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4557 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4558 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4559 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4560 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4561
4562 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4563 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4564 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4565 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4566 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4567 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4568 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4569 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4570
4571 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4572 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4573 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4574 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4575 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4576 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4577 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4578 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4579 #endif
4580 };
4581
4582 #define PRIME_SIZE __arraycount(ltm_prime_tab)
4583
4584 static int
mp_prime_is_divisible(mp_int * a,int * result)4585 mp_prime_is_divisible(mp_int *a, int *result)
4586 {
4587 int err, ix;
4588 mp_digit res;
4589
4590 /* default to not */
4591 *result = MP_NO;
4592
4593 for (ix = 0; ix < (int)PRIME_SIZE; ix++) {
4594 /* what is a mod LBL_prime_tab[ix] */
4595 if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4596 return err;
4597 }
4598
4599 /* is the residue zero? */
4600 if (res == 0) {
4601 *result = MP_YES;
4602 return MP_OKAY;
4603 }
4604 }
4605
4606 return MP_OKAY;
4607 }
4608
4609 /* single digit addition */
4610 static int
mp_add_d(mp_int * a,mp_digit b,mp_int * c)4611 mp_add_d(mp_int *a, mp_digit b, mp_int *c)
4612 {
4613 int res, ix, oldused;
4614 mp_digit *tmpa, *tmpc, mu;
4615
4616 /* grow c as required */
4617 if (c->alloc < a->used + 1) {
4618 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4619 return res;
4620 }
4621 }
4622
4623 /* if a is negative and |a| >= b, call c = |a| - b */
4624 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
4625 /* temporarily fix sign of a */
4626 a->sign = MP_ZPOS;
4627
4628 /* c = |a| - b */
4629 res = mp_sub_d(a, b, c);
4630
4631 /* fix sign */
4632 a->sign = c->sign = MP_NEG;
4633
4634 /* clamp */
4635 mp_clamp(c);
4636
4637 return res;
4638 }
4639
4640 /* old number of used digits in c */
4641 oldused = c->used;
4642
4643 /* sign always positive */
4644 c->sign = MP_ZPOS;
4645
4646 /* source alias */
4647 tmpa = a->dp;
4648
4649 /* destination alias */
4650 tmpc = c->dp;
4651
4652 /* if a is positive */
4653 if (a->sign == MP_ZPOS) {
4654 /* add digit, after this we're propagating
4655 * the carry.
4656 */
4657 *tmpc = *tmpa++ + b;
4658 mu = *tmpc >> DIGIT_BIT;
4659 *tmpc++ &= MP_MASK;
4660
4661 /* now handle rest of the digits */
4662 for (ix = 1; ix < a->used; ix++) {
4663 *tmpc = *tmpa++ + mu;
4664 mu = *tmpc >> DIGIT_BIT;
4665 *tmpc++ &= MP_MASK;
4666 }
4667 /* set final carry */
4668 ix++;
4669 *tmpc++ = mu;
4670
4671 /* setup size */
4672 c->used = a->used + 1;
4673 } else {
4674 /* a was negative and |a| < b */
4675 c->used = 1;
4676
4677 /* the result is a single digit */
4678 if (a->used == 1) {
4679 *tmpc++ = b - a->dp[0];
4680 } else {
4681 *tmpc++ = b;
4682 }
4683
4684 /* setup count so the clearing of oldused
4685 * can fall through correctly
4686 */
4687 ix = 1;
4688 }
4689
4690 /* now zero to oldused */
4691 while (ix++ < oldused) {
4692 *tmpc++ = 0;
4693 }
4694 mp_clamp(c);
4695
4696 return MP_OKAY;
4697 }
4698
4699 /* single digit subtraction */
4700 static int
mp_sub_d(mp_int * a,mp_digit b,mp_int * c)4701 mp_sub_d(mp_int *a, mp_digit b, mp_int *c)
4702 {
4703 mp_digit *tmpa, *tmpc, mu;
4704 int res, ix, oldused;
4705
4706 /* grow c as required */
4707 if (c->alloc < a->used + 1) {
4708 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4709 return res;
4710 }
4711 }
4712
4713 /* if a is negative just do an unsigned
4714 * addition [with fudged signs]
4715 */
4716 if (a->sign == MP_NEG) {
4717 a->sign = MP_ZPOS;
4718 res = mp_add_d(a, b, c);
4719 a->sign = c->sign = MP_NEG;
4720
4721 /* clamp */
4722 mp_clamp(c);
4723
4724 return res;
4725 }
4726
4727 /* setup regs */
4728 oldused = c->used;
4729 tmpa = a->dp;
4730 tmpc = c->dp;
4731
4732 /* if a <= b simply fix the single digit */
4733 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
4734 if (a->used == 1) {
4735 *tmpc++ = b - *tmpa;
4736 } else {
4737 *tmpc++ = b;
4738 }
4739 ix = 1;
4740
4741 /* negative/1digit */
4742 c->sign = MP_NEG;
4743 c->used = 1;
4744 } else {
4745 /* positive/size */
4746 c->sign = MP_ZPOS;
4747 c->used = a->used;
4748
4749 /* subtract first digit */
4750 *tmpc = *tmpa++ - b;
4751 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4752 *tmpc++ &= MP_MASK;
4753
4754 /* handle rest of the digits */
4755 for (ix = 1; ix < a->used; ix++) {
4756 *tmpc = *tmpa++ - mu;
4757 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4758 *tmpc++ &= MP_MASK;
4759 }
4760 }
4761
4762 /* zero excess digits */
4763 while (ix++ < oldused) {
4764 *tmpc++ = 0;
4765 }
4766 mp_clamp(c);
4767 return MP_OKAY;
4768 }
4769
4770 static const int lnz[16] = {
4771 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4772 };
4773
4774 /* Counts the number of lsbs which are zero before the first zero bit */
4775 static int
mp_cnt_lsb(mp_int * a)4776 mp_cnt_lsb(mp_int *a)
4777 {
4778 int x;
4779 mp_digit q, qq;
4780
4781 /* easy out */
4782 if (mp_iszero(a) == 1) {
4783 return 0;
4784 }
4785
4786 /* scan lower digits until non-zero */
4787 for (x = 0; x < a->used && a->dp[x] == 0; x++);
4788 q = a->dp[x];
4789 x *= DIGIT_BIT;
4790
4791 /* now scan this digit until a 1 is found */
4792 if ((q & 1) == 0) {
4793 do {
4794 qq = q & 15;
4795 /* LINTED previous op ensures range of qq */
4796 x += lnz[qq];
4797 q >>= 4;
4798 } while (qq == 0);
4799 }
4800 return x;
4801 }
4802
4803 /* c = a * a (mod b) */
4804 static int
mp_sqrmod(mp_int * a,mp_int * b,mp_int * c)4805 mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
4806 {
4807 int res;
4808 mp_int t;
4809
4810 if ((res = mp_init (&t)) != MP_OKAY) {
4811 return res;
4812 }
4813
4814 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
4815 mp_clear (&t);
4816 return res;
4817 }
4818 res = mp_mod (&t, b, c);
4819 mp_clear (&t);
4820 return res;
4821 }
4822 static int
mp_prime_miller_rabin(mp_int * a,mp_int * b,int * result)4823 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result)
4824 {
4825 mp_int n1, y, r;
4826 int s, j, err;
4827
4828 /* default */
4829 *result = MP_NO;
4830
4831 /* ensure b > 1 */
4832 if (mp_cmp_d(b, 1) != MP_GT) {
4833 return MP_VAL;
4834 }
4835
4836 /* get n1 = a - 1 */
4837 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
4838 return err;
4839 }
4840 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
4841 goto LBL_N1;
4842 }
4843
4844 /* set 2**s * r = n1 */
4845 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
4846 goto LBL_N1;
4847 }
4848
4849 /* count the number of least significant bits
4850 * which are zero
4851 */
4852 s = mp_cnt_lsb(&r);
4853
4854 /* now divide n - 1 by 2**s */
4855 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
4856 goto LBL_R;
4857 }
4858
4859 /* compute y = b**r mod a */
4860 if ((err = mp_init (&y)) != MP_OKAY) {
4861 goto LBL_R;
4862 }
4863 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
4864 goto LBL_Y;
4865 }
4866
4867 /* if y != 1 and y != n1 do */
4868 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
4869 j = 1;
4870 /* while j <= s-1 and y != n1 */
4871 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
4872 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
4873 goto LBL_Y;
4874 }
4875
4876 /* if y == 1 then composite */
4877 if (mp_cmp_d (&y, 1) == MP_EQ) {
4878 goto LBL_Y;
4879 }
4880
4881 ++j;
4882 }
4883
4884 /* if y != n1 then composite */
4885 if (mp_cmp (&y, &n1) != MP_EQ) {
4886 goto LBL_Y;
4887 }
4888 }
4889
4890 /* probably prime now */
4891 *result = MP_YES;
4892 LBL_Y:mp_clear (&y);
4893 LBL_R:mp_clear (&r);
4894 LBL_N1:mp_clear (&n1);
4895 return err;
4896 }
4897
4898 /* performs a variable number of rounds of Miller-Rabin
4899 *
4900 * Probability of error after t rounds is no more than
4901
4902 *
4903 * Sets result to 1 if probably prime, 0 otherwise
4904 */
4905 static int
mp_prime_is_prime(mp_int * a,int t,int * result)4906 mp_prime_is_prime(mp_int *a, int t, int *result)
4907 {
4908 mp_int b;
4909 int ix, err, res;
4910
4911 /* default to no */
4912 *result = MP_NO;
4913
4914 /* valid value of t? */
4915 if (t <= 0 || t > (int)PRIME_SIZE) {
4916 return MP_VAL;
4917 }
4918
4919 /* is the input equal to one of the primes in the table? */
4920 for (ix = 0; ix < (int)PRIME_SIZE; ix++) {
4921 if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4922 *result = 1;
4923 return MP_OKAY;
4924 }
4925 }
4926
4927 /* first perform trial division */
4928 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4929 return err;
4930 }
4931
4932 /* return if it was trivially divisible */
4933 if (res == MP_YES) {
4934 return MP_OKAY;
4935 }
4936
4937 /* now perform the miller-rabin rounds */
4938 if ((err = mp_init (&b)) != MP_OKAY) {
4939 return err;
4940 }
4941
4942 for (ix = 0; ix < t; ix++) {
4943 /* set the prime */
4944 mp_set (&b, ltm_prime_tab[ix]);
4945
4946 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4947 goto LBL_B;
4948 }
4949
4950 if (res == MP_NO) {
4951 goto LBL_B;
4952 }
4953 }
4954
4955 /* passed the test */
4956 *result = MP_YES;
4957 LBL_B:mp_clear (&b);
4958 return err;
4959 }
4960
4961 /* returns size of ASCII reprensentation */
4962 static int
mp_radix_size(mp_int * a,int radix,int * size)4963 mp_radix_size (mp_int *a, int radix, int *size)
4964 {
4965 int res, digs;
4966 mp_int t;
4967 mp_digit d;
4968
4969 *size = 0;
4970
4971 /* special case for binary */
4972 if (radix == 2) {
4973 *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
4974 return MP_OKAY;
4975 }
4976
4977 /* make sure the radix is in range */
4978 if (radix < 2 || radix > 64) {
4979 return MP_VAL;
4980 }
4981
4982 if (mp_iszero(a) == MP_YES) {
4983 *size = 2;
4984 return MP_OKAY;
4985 }
4986
4987 /* digs is the digit count */
4988 digs = 0;
4989
4990 /* if it's negative add one for the sign */
4991 if (a->sign == MP_NEG) {
4992 ++digs;
4993 }
4994
4995 /* init a copy of the input */
4996 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
4997 return res;
4998 }
4999
5000 /* force temp to positive */
5001 t.sign = MP_ZPOS;
5002
5003 /* fetch out all of the digits */
5004 while (mp_iszero (&t) == MP_NO) {
5005 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
5006 mp_clear (&t);
5007 return res;
5008 }
5009 ++digs;
5010 }
5011 mp_clear (&t);
5012
5013 /* return digs + 1, the 1 is for the NULL byte that would be required. */
5014 *size = digs + 1;
5015 return MP_OKAY;
5016 }
5017
5018 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
5019
5020 /* stores a bignum as a ASCII string in a given radix (2..64)
5021 *
5022 * Stores upto maxlen-1 chars and always a NULL byte
5023 */
5024 static int
mp_toradix_n(mp_int * a,char * str,int radix,int maxlen)5025 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen)
5026 {
5027 int res, digs;
5028 mp_int t;
5029 mp_digit d;
5030 char *_s = str;
5031
5032 /* check range of the maxlen, radix */
5033 if (maxlen < 2 || radix < 2 || radix > 64) {
5034 return MP_VAL;
5035 }
5036
5037 /* quick out if its zero */
5038 if (mp_iszero(a) == MP_YES) {
5039 *str++ = '0';
5040 *str = '\0';
5041 return MP_OKAY;
5042 }
5043
5044 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
5045 return res;
5046 }
5047
5048 /* if it is negative output a - */
5049 if (t.sign == MP_NEG) {
5050 /* we have to reverse our digits later... but not the - sign!! */
5051 ++_s;
5052
5053 /* store the flag and mark the number as positive */
5054 *str++ = '-';
5055 t.sign = MP_ZPOS;
5056
5057 /* subtract a char */
5058 --maxlen;
5059 }
5060
5061 digs = 0;
5062 while (mp_iszero (&t) == 0) {
5063 if (--maxlen < 1) {
5064 /* no more room */
5065 break;
5066 }
5067 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
5068 mp_clear (&t);
5069 return res;
5070 }
5071 /* LINTED -- radix' range is checked above, limits d's range */
5072 *str++ = mp_s_rmap[d];
5073 ++digs;
5074 }
5075
5076 /* reverse the digits of the string. In this case _s points
5077 * to the first digit [exluding the sign] of the number
5078 */
5079 bn_reverse ((unsigned char *)_s, digs);
5080
5081 /* append a NULL so the string is properly terminated */
5082 *str = '\0';
5083
5084 mp_clear (&t);
5085 return MP_OKAY;
5086 }
5087
5088 static char *
formatbn(const BIGNUM * a,const int radix)5089 formatbn(const BIGNUM *a, const int radix)
5090 {
5091 char *s;
5092 int len;
5093
5094 if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) {
5095 return NULL;
5096 }
5097 if ((s = netpgp_allocate(1, (size_t)len)) != NULL) {
5098 if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) {
5099 netpgp_deallocate(s, (size_t)len);
5100 return NULL;
5101 }
5102 }
5103 return s;
5104 }
5105
5106 static int
mp_getradix_num(mp_int * a,int radix,char * s)5107 mp_getradix_num(mp_int *a, int radix, char *s)
5108 {
5109 int err, ch, neg, y;
5110
5111 /* clear a */
5112 mp_zero(a);
5113
5114 /* if first digit is - then set negative */
5115 if ((ch = *s++) == '-') {
5116 neg = MP_NEG;
5117 ch = *s++;
5118 } else {
5119 neg = MP_ZPOS;
5120 }
5121
5122 for (;;) {
5123 /* find y in the radix map */
5124 for (y = 0; y < radix; y++) {
5125 if (mp_s_rmap[y] == ch) {
5126 break;
5127 }
5128 }
5129 if (y == radix) {
5130 break;
5131 }
5132
5133 /* shift up and add */
5134 if ((err = mp_mul_d(a, radix, a)) != MP_OKAY) {
5135 return err;
5136 }
5137 if ((err = mp_add_d(a, y, a)) != MP_OKAY) {
5138 return err;
5139 }
5140
5141 ch = *s++;
5142 }
5143 if (mp_cmp_d(a, 0) != MP_EQ) {
5144 a->sign = neg;
5145 }
5146
5147 return MP_OKAY;
5148 }
5149
5150 static int
getbn(BIGNUM ** a,const char * str,int radix)5151 getbn(BIGNUM **a, const char *str, int radix)
5152 {
5153 int len;
5154
5155 if (a == NULL || str == NULL || (*a = BN_new()) == NULL) {
5156 return 0;
5157 }
5158 if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) {
5159 return 0;
5160 }
5161 mp_radix_size(__UNCONST(a), radix, &len);
5162 return len - 1;
5163 }
5164
5165 /* d = a - b (mod c) */
5166 static int
mp_submod(const mp_int * a,const mp_int * b,mp_int * c,mp_int * d)5167 mp_submod(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d)
5168 {
5169 int res;
5170 mp_int t;
5171
5172
5173 if ((res = mp_init (&t)) != MP_OKAY) {
5174 return res;
5175 }
5176
5177 if ((res = mp_sub (a, b, &t)) != MP_OKAY) {
5178 mp_clear (&t);
5179 return res;
5180 }
5181 res = mp_mod (&t, c, d);
5182 mp_clear (&t);
5183 return res;
5184 }
5185
5186 /**************************************************************************/
5187
5188 /* BIGNUM emulation layer */
5189
5190 /* essentiually, these are just wrappers around the libtommath functions */
5191 /* usually the order of args changes */
5192 /* the BIGNUM API tends to have more const poisoning */
5193 /* these wrappers also check the arguments passed for sanity */
5194
5195 BIGNUM *
BN_bin2bn(const uint8_t * data,int len,BIGNUM * ret)5196 BN_bin2bn(const uint8_t *data, int len, BIGNUM *ret)
5197 {
5198 if (data == NULL) {
5199 return BN_new();
5200 }
5201 if (ret == NULL) {
5202 ret = BN_new();
5203 }
5204 return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL;
5205 }
5206
5207 /* store in unsigned [big endian] format */
5208 int
BN_bn2bin(const BIGNUM * a,unsigned char * b)5209 BN_bn2bin(const BIGNUM *a, unsigned char *b)
5210 {
5211 BIGNUM t;
5212 int x;
5213
5214 if (a == NULL || b == NULL) {
5215 return -1;
5216 }
5217 if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) {
5218 return -1;
5219 }
5220 for (x = 0; !BN_is_zero(&t) ; ) {
5221 b[x++] = (unsigned char) (t.dp[0] & 0xff);
5222 if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY) {
5223 mp_clear(&t);
5224 return -1;
5225 }
5226 }
5227 bn_reverse(b, x);
5228 mp_clear(&t);
5229 return x;
5230 }
5231
5232 void
BN_init(BIGNUM * a)5233 BN_init(BIGNUM *a)
5234 {
5235 if (a != NULL) {
5236 mp_init(a);
5237 }
5238 }
5239
5240 BIGNUM *
BN_new(void)5241 BN_new(void)
5242 {
5243 BIGNUM *a;
5244
5245 if ((a = netpgp_allocate(1, sizeof(*a))) != NULL) {
5246 mp_init(a);
5247 }
5248 return a;
5249 }
5250
5251 /* copy, b = a */
5252 int
BN_copy(BIGNUM * b,const BIGNUM * a)5253 BN_copy(BIGNUM *b, const BIGNUM *a)
5254 {
5255 if (a == NULL || b == NULL) {
5256 return MP_VAL;
5257 }
5258 return mp_copy(__UNCONST(a), b);
5259 }
5260
5261 BIGNUM *
BN_dup(const BIGNUM * a)5262 BN_dup(const BIGNUM *a)
5263 {
5264 BIGNUM *ret;
5265
5266 if (a == NULL) {
5267 return NULL;
5268 }
5269 if ((ret = BN_new()) != NULL) {
5270 BN_copy(ret, a);
5271 }
5272 return ret;
5273 }
5274
5275 void
BN_swap(BIGNUM * a,BIGNUM * b)5276 BN_swap(BIGNUM *a, BIGNUM *b)
5277 {
5278 if (a && b) {
5279 mp_exch(a, b);
5280 }
5281 }
5282
5283 int
BN_lshift(BIGNUM * r,const BIGNUM * a,int n)5284 BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
5285 {
5286 if (r == NULL || a == NULL || n < 0) {
5287 return 0;
5288 }
5289 BN_copy(r, a);
5290 return mp_lshd(r, n) == MP_OKAY;
5291 }
5292
5293 int
BN_lshift1(BIGNUM * r,BIGNUM * a)5294 BN_lshift1(BIGNUM *r, BIGNUM *a)
5295 {
5296 if (r == NULL || a == NULL) {
5297 return 0;
5298 }
5299 BN_copy(r, a);
5300 return mp_lshd(r, 1) == MP_OKAY;
5301 }
5302
5303 int
BN_rshift(BIGNUM * r,const BIGNUM * a,int n)5304 BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
5305 {
5306 if (r == NULL || a == NULL || n < 0) {
5307 return MP_VAL;
5308 }
5309 BN_copy(r, a);
5310 return mp_rshd(r, n) == MP_OKAY;
5311 }
5312
5313 int
BN_rshift1(BIGNUM * r,BIGNUM * a)5314 BN_rshift1(BIGNUM *r, BIGNUM *a)
5315 {
5316 if (r == NULL || a == NULL) {
5317 return 0;
5318 }
5319 BN_copy(r, a);
5320 return mp_rshd(r, 1) == MP_OKAY;
5321 }
5322
5323 int
BN_set_word(BIGNUM * a,BN_ULONG w)5324 BN_set_word(BIGNUM *a, BN_ULONG w)
5325 {
5326 if (a == NULL) {
5327 return 0;
5328 }
5329 mp_set(a, w);
5330 return 1;
5331 }
5332
5333 int
BN_add(BIGNUM * r,const BIGNUM * a,const BIGNUM * b)5334 BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
5335 {
5336 if (a == NULL || b == NULL || r == NULL) {
5337 return 0;
5338 }
5339 return mp_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5340 }
5341
5342 int
BN_sub(BIGNUM * r,const BIGNUM * a,const BIGNUM * b)5343 BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
5344 {
5345 if (a == NULL || b == NULL || r == NULL) {
5346 return 0;
5347 }
5348 return mp_sub(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5349 }
5350
5351 int
BN_mul(BIGNUM * r,const BIGNUM * a,const BIGNUM * b,BN_CTX * ctx)5352 BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
5353 {
5354 if (a == NULL || b == NULL || r == NULL) {
5355 return 0;
5356 }
5357 USE_ARG(ctx);
5358 return mp_mul(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5359 }
5360
5361 int
BN_div(BIGNUM * dv,BIGNUM * rem,const BIGNUM * a,const BIGNUM * d,BN_CTX * ctx)5362 BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *a, const BIGNUM *d, BN_CTX *ctx)
5363 {
5364 if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) {
5365 return 0;
5366 }
5367 USE_ARG(ctx);
5368 return mp_div(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY;
5369 }
5370
5371 void
BN_free(BIGNUM * a)5372 BN_free(BIGNUM *a)
5373 {
5374 if (a) {
5375 mp_clear(a);
5376 }
5377 }
5378
5379 void
BN_clear(BIGNUM * a)5380 BN_clear(BIGNUM *a)
5381 {
5382 if (a) {
5383 mp_clear(a);
5384 }
5385 }
5386
5387 void
BN_clear_free(BIGNUM * a)5388 BN_clear_free(BIGNUM *a)
5389 {
5390 if (a) {
5391 mp_clear(a);
5392 }
5393 }
5394
5395 int
BN_num_bytes(const BIGNUM * a)5396 BN_num_bytes(const BIGNUM *a)
5397 {
5398 if (a == NULL) {
5399 return MP_VAL;
5400 }
5401 return mp_unsigned_bin_size(__UNCONST(a));
5402 }
5403
5404 int
BN_num_bits(const BIGNUM * a)5405 BN_num_bits(const BIGNUM *a)
5406 {
5407 if (a == NULL) {
5408 return 0;
5409 }
5410 return mp_count_bits(a);
5411 }
5412
5413 void
BN_set_negative(BIGNUM * a,int n)5414 BN_set_negative(BIGNUM *a, int n)
5415 {
5416 if (a) {
5417 a->sign = (n) ? MP_NEG : 0;
5418 }
5419 }
5420
5421 int
BN_cmp(BIGNUM * a,BIGNUM * b)5422 BN_cmp(BIGNUM *a, BIGNUM *b)
5423 {
5424 if (a == NULL || b == NULL) {
5425 return MP_VAL;
5426 }
5427 switch(mp_cmp(a, b)) {
5428 case MP_LT:
5429 return -1;
5430 case MP_GT:
5431 return 1;
5432 case MP_EQ:
5433 default:
5434 return 0;
5435 }
5436 }
5437
5438 int
BN_mod_exp(BIGNUM * Y,BIGNUM * G,BIGNUM * X,BIGNUM * P,BN_CTX * ctx)5439 BN_mod_exp(BIGNUM *Y, BIGNUM *G, BIGNUM *X, BIGNUM *P, BN_CTX *ctx)
5440 {
5441 if (Y == NULL || G == NULL || X == NULL || P == NULL) {
5442 return MP_VAL;
5443 }
5444 USE_ARG(ctx);
5445 return mp_exptmod(G, X, P, Y) == MP_OKAY;
5446 }
5447
5448 BIGNUM *
BN_mod_inverse(BIGNUM * r,BIGNUM * a,const BIGNUM * n,BN_CTX * ctx)5449 BN_mod_inverse(BIGNUM *r, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
5450 {
5451 USE_ARG(ctx);
5452 if (r == NULL || a == NULL || n == NULL) {
5453 return NULL;
5454 }
5455 return (mp_invmod(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL;
5456 }
5457
5458 int
BN_mod_mul(BIGNUM * ret,BIGNUM * a,BIGNUM * b,const BIGNUM * m,BN_CTX * ctx)5459 BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
5460 {
5461 USE_ARG(ctx);
5462 if (ret == NULL || a == NULL || b == NULL || m == NULL) {
5463 return 0;
5464 }
5465 return mp_mulmod(ret, a, b, __UNCONST(m)) == MP_OKAY;
5466 }
5467
5468 BN_CTX *
BN_CTX_new(void)5469 BN_CTX_new(void)
5470 {
5471 return netpgp_allocate(1, sizeof(BN_CTX));
5472 }
5473
5474 void
BN_CTX_init(BN_CTX * c)5475 BN_CTX_init(BN_CTX *c)
5476 {
5477 if (c != NULL) {
5478 c->arraysize = 15;
5479 if ((c->v = netpgp_allocate(sizeof(*c->v), c->arraysize)) == NULL) {
5480 c->arraysize = 0;
5481 }
5482 }
5483 }
5484
5485 BIGNUM *
BN_CTX_get(BN_CTX * ctx)5486 BN_CTX_get(BN_CTX *ctx)
5487 {
5488 if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) {
5489 return NULL;
5490 }
5491 return ctx->v[ctx->count++] = BN_new();
5492 }
5493
5494 void
BN_CTX_start(BN_CTX * ctx)5495 BN_CTX_start(BN_CTX *ctx)
5496 {
5497 BN_CTX_init(ctx);
5498 }
5499
5500 void
BN_CTX_free(BN_CTX * c)5501 BN_CTX_free(BN_CTX *c)
5502 {
5503 unsigned i;
5504
5505 if (c != NULL && c->v != NULL) {
5506 for (i = 0 ; i < c->count ; i++) {
5507 BN_clear_free(c->v[i]);
5508 }
5509 netpgp_deallocate(c->v, sizeof(*c->v) * c->arraysize);
5510 }
5511 }
5512
5513 void
BN_CTX_end(BN_CTX * ctx)5514 BN_CTX_end(BN_CTX *ctx)
5515 {
5516 BN_CTX_free(ctx);
5517 }
5518
5519 char *
BN_bn2hex(const BIGNUM * a)5520 BN_bn2hex(const BIGNUM *a)
5521 {
5522 return (a == NULL) ? NULL : formatbn(a, 16);
5523 }
5524
5525 char *
BN_bn2dec(const BIGNUM * a)5526 BN_bn2dec(const BIGNUM *a)
5527 {
5528 return (a == NULL) ? NULL : formatbn(a, 10);
5529 }
5530
5531 #ifndef _KERNEL
5532 int
BN_print_fp(FILE * fp,const BIGNUM * a)5533 BN_print_fp(FILE *fp, const BIGNUM *a)
5534 {
5535 char *s;
5536 int ret;
5537
5538 if (fp == NULL || a == NULL) {
5539 return 0;
5540 }
5541 s = BN_bn2hex(a);
5542 ret = fprintf(fp, "%s", s);
5543 netpgp_deallocate(s, strlen(s) + 1);
5544 return ret;
5545 }
5546 #endif
5547
5548 int
BN_rand(BIGNUM * rnd,int bits,int top,int bottom)5549 BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
5550 {
5551 uint64_t r;
5552 int digits;
5553 int i;
5554
5555 if (rnd == NULL) {
5556 return 0;
5557 }
5558 mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT));
5559 for (i = 0 ; i < digits ; i++) {
5560 r = (uint64_t)arc4random();
5561 r <<= 32;
5562 r |= arc4random();
5563 rnd->dp[i] = (r & MP_MASK);
5564 }
5565 if (top == 0) {
5566 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT));
5567 }
5568 if (top == 1) {
5569 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT));
5570 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1)));
5571 }
5572 if (bottom) {
5573 rnd->dp[0] |= 0x1;
5574 }
5575 return 1;
5576 }
5577
5578 int
BN_rand_range(BIGNUM * rnd,BIGNUM * range)5579 BN_rand_range(BIGNUM *rnd, BIGNUM *range)
5580 {
5581 if (rnd == NULL || range == NULL || BN_is_zero(range)) {
5582 return 0;
5583 }
5584 BN_rand(rnd, BN_num_bits(range), 1, 0);
5585 return mp_mod(rnd, range, rnd) == MP_OKAY;
5586 }
5587
5588 int
BN_is_prime(const BIGNUM * a,int checks,void (* callback)(int,int,void *),BN_CTX * ctx,void * cb_arg)5589 BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int, int, void *), BN_CTX *ctx, void *cb_arg)
5590 {
5591 int primality;
5592
5593 if (a == NULL) {
5594 return 0;
5595 }
5596 USE_ARG(ctx);
5597 USE_ARG(cb_arg);
5598 USE_ARG(callback);
5599 return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0;
5600 }
5601
5602 const BIGNUM *
BN_value_one(void)5603 BN_value_one(void)
5604 {
5605 static mp_digit digit = 1UL;
5606 static const BIGNUM one = { &digit, 1, 1, 0 };
5607
5608 return &one;
5609 }
5610
5611 int
BN_hex2bn(BIGNUM ** a,const char * str)5612 BN_hex2bn(BIGNUM **a, const char *str)
5613 {
5614 return getbn(a, str, 16);
5615 }
5616
5617 int
BN_dec2bn(BIGNUM ** a,const char * str)5618 BN_dec2bn(BIGNUM **a, const char *str)
5619 {
5620 return getbn(a, str, 10);
5621 }
5622
5623 int
BN_mod_sub(BIGNUM * r,BIGNUM * a,BIGNUM * b,const BIGNUM * m,BN_CTX * ctx)5624 BN_mod_sub(BIGNUM *r, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
5625 {
5626 USE_ARG(ctx);
5627 if (r == NULL || a == NULL || b == NULL || m == NULL) {
5628 return 0;
5629 }
5630 return mp_submod(a, b, __UNCONST(m), r) == MP_OKAY;
5631 }
5632
5633 int
BN_is_bit_set(const BIGNUM * a,int n)5634 BN_is_bit_set(const BIGNUM *a, int n)
5635 {
5636 if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) {
5637 return 0;
5638 }
5639 return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0;
5640 }
5641