xref: /netbsd-src/crypto/external/bsd/netpgp/dist/src/libbn/bignum.c (revision efb7d8e3ed80ee027d16001c46a8d9984e91b36a)
1 /*-
2  * Copyright (c) 2012 Alistair Crooks <agc@NetBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 /* LibTomMath, multiple-precision integer library -- Tom St Denis
26  *
27  * LibTomMath is a library that provides multiple-precision
28  * integer arithmetic as well as number theoretic functionality.
29  *
30  * The library was designed directly after the MPI library by
31  * Michael Fromberger but has been written from scratch with
32  * additional optimizations in place.
33  *
34  * The library is free for all purposes without any express
35  * guarantee it works.
36  *
37  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
38  */
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <sys/param.h>
42 
43 #ifdef _KERNEL
44 # include <sys/kmem.h>
45 #else
46 # include <arpa/inet.h>
47 # include <ctype.h>
48 # include <inttypes.h>
49 # include <stdarg.h>
50 # include <stdio.h>
51 # include <stdlib.h>
52 # include <string.h>
53 # include <time.h>
54 # include <unistd.h>
55 #endif
56 
57 #include "misc.h"
58 #include "bn.h"
59 #include "digest.h"
60 
61 /**************************************************************************/
62 
63 /* LibTomMath, multiple-precision integer library -- Tom St Denis
64  *
65  * LibTomMath is a library that provides multiple-precision
66  * integer arithmetic as well as number theoretic functionality.
67  *
68  * The library was designed directly after the MPI library by
69  * Michael Fromberger but has been written from scratch with
70  * additional optimizations in place.
71  *
72  * The library is free for all purposes without any express
73  * guarantee it works.
74  *
75  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
76  */
77 
78 #define MP_PREC		32
79 #define DIGIT_BIT	28
80 #define MP_MASK          ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
81 
82 #define MP_WARRAY	/*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1)))
83 
84 #define MP_NO		0
85 #define MP_YES		1
86 
87 #ifndef USE_ARG
88 #define USE_ARG(x)	/*LINTED*/(void)&(x)
89 #endif
90 
91 #ifndef __arraycount
92 #define	__arraycount(__x)	(sizeof(__x) / sizeof(__x[0]))
93 #endif
94 
95 #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
96 
97 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
98 
99 typedef int           mp_err;
100 
101 static int mp_mul(mp_int * a, mp_int * b, mp_int * c);
102 static int mp_sqr(mp_int * a, mp_int * b);
103 
104 static int mp_sub_d(mp_int *a, mp_digit b, mp_int *c);
105 
106 /* set to zero */
107 static void
mp_zero(mp_int * a)108 mp_zero(mp_int *a)
109 {
110   int       n;
111   mp_digit *tmp;
112 
113   a->sign = MP_ZPOS;
114   a->used = 0;
115 
116   tmp = a->dp;
117   /* XXX - memset */
118   for (n = 0; n < a->alloc; n++) {
119      *tmp++ = 0;
120   }
121 }
122 
123 /* grow as required */
124 static int
mp_grow(mp_int * a,int size)125 mp_grow(mp_int *a, int size)
126 {
127   int     i;
128   mp_digit *tmp;
129 
130   /* if the alloc size is smaller alloc more ram */
131   if (a->alloc < size) {
132     /* ensure there are always at least MP_PREC digits extra on top */
133     size += (MP_PREC * 2) - (size % MP_PREC);
134 
135     /* reallocate the array a->dp
136      *
137      * We store the return in a temporary variable
138      * in case the operation failed we don't want
139      * to overwrite the dp member of a.
140      */
141     tmp = realloc(a->dp, sizeof(*tmp) * size);
142     if (tmp == NULL) {
143       /* reallocation failed but "a" is still valid [can be freed] */
144       return MP_MEM;
145     }
146 
147     /* reallocation succeeded so set a->dp */
148     a->dp = tmp;
149 
150     /* zero excess digits */
151     i        = a->alloc;
152     a->alloc = size;
153     for (; i < a->alloc; i++) {
154       a->dp[i] = 0;
155     }
156   }
157   return MP_OKAY;
158 }
159 
160 /* shift left a certain amount of digits */
161 static int
mp_lshd(mp_int * a,int b)162 mp_lshd (mp_int * a, int b)
163 {
164   int     x, res;
165 
166   /* if its less than zero return */
167   if (b <= 0) {
168     return MP_OKAY;
169   }
170 
171   /* grow to fit the new digits */
172   if (a->alloc < a->used + b) {
173      if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
174        return res;
175      }
176   }
177 
178   {
179     mp_digit *top, *bottom;
180 
181     /* increment the used by the shift amount then copy upwards */
182     a->used += b;
183 
184     /* top */
185     top = a->dp + a->used - 1;
186 
187     /* base */
188     bottom = a->dp + a->used - 1 - b;
189 
190     /* much like mp_rshd this is implemented using a sliding window
191      * except the window goes the otherway around.  Copying from
192      * the bottom to the top.  see bn_mp_rshd.c for more info.
193      */
194     for (x = a->used - 1; x >= b; x--) {
195       *top-- = *bottom--;
196     }
197 
198     /* zero the lower digits */
199     top = a->dp;
200     for (x = 0; x < b; x++) {
201       *top++ = 0;
202     }
203   }
204   return MP_OKAY;
205 }
206 
207 /* trim unused digits
208  *
209  * This is used to ensure that leading zero digits are
210  * trimed and the leading "used" digit will be non-zero
211  * Typically very fast.  Also fixes the sign if there
212  * are no more leading digits
213  */
214 static void
mp_clamp(mp_int * a)215 mp_clamp (mp_int * a)
216 {
217   /* decrease used while the most significant digit is
218    * zero.
219    */
220   while (a->used > 0 && a->dp[a->used - 1] == 0) {
221     --(a->used);
222   }
223 
224   /* reset the sign flag if used == 0 */
225   if (a->used == 0) {
226     a->sign = MP_ZPOS;
227   }
228 }
229 
230 /* copy, b = a */
231 static int
mp_copy(BIGNUM * a,BIGNUM * b)232 mp_copy(BIGNUM *a, BIGNUM *b)
233 {
234   int     res, n;
235 
236   /* if dst == src do nothing */
237   if (a == b) {
238     return MP_OKAY;
239   }
240   if (a == NULL || b == NULL) {
241 	return MP_VAL;
242   }
243 
244   /* grow dest */
245   if (b->alloc < a->used) {
246      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
247         return res;
248      }
249   }
250 
251   /* zero b and copy the parameters over */
252   {
253     mp_digit *tmpa, *tmpb;
254 
255     /* pointer aliases */
256 
257     /* source */
258     tmpa = a->dp;
259 
260     /* destination */
261     tmpb = b->dp;
262 
263     /* copy all the digits */
264     for (n = 0; n < a->used; n++) {
265       *tmpb++ = *tmpa++;
266     }
267 
268     /* clear high digits */
269     for (; n < b->used; n++) {
270       *tmpb++ = 0;
271     }
272   }
273 
274   /* copy used count and sign */
275   b->used = a->used;
276   b->sign = a->sign;
277   return MP_OKAY;
278 }
279 
280 /* shift left by a certain bit count */
281 static int
mp_mul_2d(mp_int * a,int b,mp_int * c)282 mp_mul_2d(mp_int *a, int b, mp_int *c)
283 {
284   mp_digit d;
285   int      res;
286 
287   /* copy */
288   if (a != c) {
289      if ((res = mp_copy (a, c)) != MP_OKAY) {
290        return res;
291      }
292   }
293 
294   if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
295      if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
296        return res;
297      }
298   }
299 
300   /* shift by as many digits in the bit count */
301   if (b >= (int)DIGIT_BIT) {
302     if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
303       return res;
304     }
305   }
306 
307   /* shift any bit count < DIGIT_BIT */
308   d = (mp_digit) (b % DIGIT_BIT);
309   if (d != 0) {
310     mp_digit *tmpc, shift, mask, r, rr;
311     int x;
312 
313     /* bitmask for carries */
314     mask = (((mp_digit)1) << d) - 1;
315 
316     /* shift for msbs */
317     shift = DIGIT_BIT - d;
318 
319     /* alias */
320     tmpc = c->dp;
321 
322     /* carry */
323     r    = 0;
324     for (x = 0; x < c->used; x++) {
325       /* get the higher bits of the current word */
326       rr = (*tmpc >> shift) & mask;
327 
328       /* shift the current word and OR in the carry */
329       *tmpc = ((*tmpc << d) | r) & MP_MASK;
330       ++tmpc;
331 
332       /* set the carry to the carry bits of the current word */
333       r = rr;
334     }
335 
336     /* set final carry */
337     if (r != 0) {
338        c->dp[(c->used)++] = r;
339     }
340   }
341   mp_clamp (c);
342   return MP_OKAY;
343 }
344 
345 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
346 static int
mp_read_unsigned_bin(mp_int * a,const uint8_t * b,int c)347 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c)
348 {
349   int     res;
350 
351   /* make sure there are at least two digits */
352   if (a->alloc < 2) {
353      if ((res = mp_grow(a, 2)) != MP_OKAY) {
354         return res;
355      }
356   }
357 
358   /* zero the int */
359   mp_zero (a);
360 
361   /* read the bytes in */
362   while (c-- > 0) {
363     if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
364       return res;
365     }
366 
367       a->dp[0] |= *b++;
368       a->used += 1;
369   }
370   mp_clamp (a);
371   return MP_OKAY;
372 }
373 
374 /* returns the number of bits in an int */
375 static int
mp_count_bits(const mp_int * a)376 mp_count_bits(const mp_int *a)
377 {
378   int     r;
379   mp_digit q;
380 
381   /* shortcut */
382   if (a->used == 0) {
383     return 0;
384   }
385 
386   /* get number of digits and add that */
387   r = (a->used - 1) * DIGIT_BIT;
388 
389   /* take the last digit and count the bits in it */
390   q = a->dp[a->used - 1];
391   while (q > ((mp_digit) 0)) {
392     ++r;
393     q >>= ((mp_digit) 1);
394   }
395   return r;
396 }
397 
398 /* compare maginitude of two ints (unsigned) */
399 static int
mp_cmp_mag(mp_int * a,mp_int * b)400 mp_cmp_mag (mp_int * a, mp_int * b)
401 {
402   int     n;
403   mp_digit *tmpa, *tmpb;
404 
405   /* compare based on # of non-zero digits */
406   if (a->used > b->used) {
407     return MP_GT;
408   }
409 
410   if (a->used < b->used) {
411     return MP_LT;
412   }
413 
414   /* alias for a */
415   tmpa = a->dp + (a->used - 1);
416 
417   /* alias for b */
418   tmpb = b->dp + (a->used - 1);
419 
420   /* compare based on digits  */
421   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
422     if (*tmpa > *tmpb) {
423       return MP_GT;
424     }
425 
426     if (*tmpa < *tmpb) {
427       return MP_LT;
428     }
429   }
430   return MP_EQ;
431 }
432 
433 /* compare two ints (signed)*/
434 static int
mp_cmp(mp_int * a,mp_int * b)435 mp_cmp (mp_int * a, mp_int * b)
436 {
437   /* compare based on sign */
438   if (a->sign != b->sign) {
439      if (a->sign == MP_NEG) {
440         return MP_LT;
441      } else {
442         return MP_GT;
443      }
444   }
445 
446   /* compare digits */
447   if (a->sign == MP_NEG) {
448      /* if negative compare opposite direction */
449      return mp_cmp_mag(b, a);
450   } else {
451      return mp_cmp_mag(a, b);
452   }
453 }
454 
455 /* get the size for an unsigned equivalent */
456 static int
mp_unsigned_bin_size(mp_int * a)457 mp_unsigned_bin_size (mp_int * a)
458 {
459   int     size = mp_count_bits (a);
460   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
461 }
462 
463 /* init a new mp_int */
464 static int
mp_init(mp_int * a)465 mp_init (mp_int * a)
466 {
467   int i;
468 
469   /* allocate memory required and clear it */
470   a->dp = netpgp_allocate(1, sizeof (*a->dp) * MP_PREC);
471   if (a->dp == NULL) {
472     return MP_MEM;
473   }
474 
475   /* set the digits to zero */
476   for (i = 0; i < MP_PREC; i++) {
477       a->dp[i] = 0;
478   }
479 
480   /* set the used to zero, allocated digits to the default precision
481    * and sign to positive */
482   a->used  = 0;
483   a->alloc = MP_PREC;
484   a->sign  = MP_ZPOS;
485 
486   return MP_OKAY;
487 }
488 
489 /* clear one (frees)  */
490 static void
mp_clear(mp_int * a)491 mp_clear (mp_int * a)
492 {
493   int i;
494 
495   /* only do anything if a hasn't been freed previously */
496   if (a->dp != NULL) {
497     /* first zero the digits */
498     for (i = 0; i < a->used; i++) {
499         a->dp[i] = 0;
500     }
501 
502     /* free ram */
503     netpgp_deallocate(a->dp, (size_t)a->alloc);
504 
505     /* reset members to make debugging easier */
506     a->dp    = NULL;
507     a->alloc = a->used = 0;
508     a->sign  = MP_ZPOS;
509   }
510 }
511 
512 static int
mp_init_multi(mp_int * mp,...)513 mp_init_multi(mp_int *mp, ...)
514 {
515     mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
516     int n = 0;                 /* Number of ok inits */
517     mp_int* cur_arg = mp;
518     va_list args;
519 
520     va_start(args, mp);        /* init args to next argument from caller */
521     while (cur_arg != NULL) {
522         if (mp_init(cur_arg) != MP_OKAY) {
523             /* Oops - error! Back-track and mp_clear what we already
524                succeeded in init-ing, then return error.
525             */
526             va_list clean_args;
527 
528             /* end the current list */
529             va_end(args);
530 
531             /* now start cleaning up */
532             cur_arg = mp;
533             va_start(clean_args, mp);
534             while (n--) {
535                 mp_clear(cur_arg);
536                 cur_arg = va_arg(clean_args, mp_int*);
537             }
538             va_end(clean_args);
539             res = MP_MEM;
540             break;
541         }
542         n++;
543         cur_arg = va_arg(args, mp_int*);
544     }
545     va_end(args);
546     return res;                /* Assumed ok, if error flagged above. */
547 }
548 
549 /* init an mp_init for a given size */
550 static int
mp_init_size(mp_int * a,int size)551 mp_init_size (mp_int * a, int size)
552 {
553   int x;
554 
555   /* pad size so there are always extra digits */
556   size += (MP_PREC * 2) - (size % MP_PREC);
557 
558   /* alloc mem */
559   a->dp = netpgp_allocate (1, sizeof (*a->dp) * size);
560   if (a->dp == NULL) {
561     return MP_MEM;
562   }
563 
564   /* set the members */
565   a->used  = 0;
566   a->alloc = size;
567   a->sign  = MP_ZPOS;
568 
569   /* zero the digits */
570   for (x = 0; x < size; x++) {
571       a->dp[x] = 0;
572   }
573 
574   return MP_OKAY;
575 }
576 
577 /* creates "a" then copies b into it */
mp_init_copy(mp_int * a,const mp_int * b)578 static int mp_init_copy (mp_int * a, const mp_int * b)
579 {
580   int     res;
581 
582   if ((res = mp_init (a)) != MP_OKAY) {
583     return res;
584   }
585   return mp_copy (b, a);
586 }
587 
588 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
589 static int
s_mp_add(const mp_int * a,const mp_int * b,mp_int * c)590 s_mp_add (const mp_int * a, const mp_int * b, mp_int * c)
591 {
592   const mp_int *x;
593   int     olduse, res, min, max;
594 
595   /* find sizes, we let |a| <= |b| which means we have to sort
596    * them.  "x" will point to the input with the most digits
597    */
598   if (a->used > b->used) {
599     min = b->used;
600     max = a->used;
601     x = a;
602   } else {
603     min = a->used;
604     max = b->used;
605     x = b;
606   }
607 
608   /* init result */
609   if (c->alloc < max + 1) {
610     if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
611       return res;
612     }
613   }
614 
615   /* get old used digit count and set new one */
616   olduse = c->used;
617   c->used = max + 1;
618 
619   {
620     const mp_digit *tmpa, *tmpb;
621     mp_digit u, *tmpc;
622     int i;
623 
624     /* alias for digit pointers */
625 
626     /* first input */
627     tmpa = a->dp;
628 
629     /* second input */
630     tmpb = b->dp;
631 
632     /* destination */
633     tmpc = c->dp;
634 
635     /* zero the carry */
636     u = 0;
637     for (i = 0; i < min; i++) {
638       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
639       *tmpc = *tmpa++ + *tmpb++ + u;
640 
641       /* U = carry bit of T[i] */
642       u = *tmpc >> ((mp_digit)DIGIT_BIT);
643 
644       /* take away carry bit from T[i] */
645       *tmpc++ &= MP_MASK;
646     }
647 
648     /* now copy higher words if any, that is in A+B
649      * if A or B has more digits add those in
650      */
651     if (min != max) {
652       for (; i < max; i++) {
653         /* T[i] = X[i] + U */
654         *tmpc = x->dp[i] + u;
655 
656         /* U = carry bit of T[i] */
657         u = *tmpc >> ((mp_digit)DIGIT_BIT);
658 
659         /* take away carry bit from T[i] */
660         *tmpc++ &= MP_MASK;
661       }
662     }
663 
664     /* add carry */
665     *tmpc++ = u;
666 
667     /* clear digits above oldused */
668     for (i = c->used; i < olduse; i++) {
669       *tmpc++ = 0;
670     }
671   }
672 
673   mp_clamp (c);
674   return MP_OKAY;
675 }
676 
677 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
678 static int
s_mp_sub(const mp_int * a,const mp_int * b,mp_int * c)679 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
680 {
681   int     olduse, res, min, max;
682 
683   /* find sizes */
684   min = b->used;
685   max = a->used;
686 
687   /* init result */
688   if (c->alloc < max) {
689     if ((res = mp_grow (c, max)) != MP_OKAY) {
690       return res;
691     }
692   }
693   olduse = c->used;
694   c->used = max;
695 
696   {
697     const mp_digit *tmpa, *tmpb;
698     mp_digit u, *tmpc;
699     int i;
700 
701     /* alias for digit pointers */
702     tmpa = a->dp;
703     tmpb = b->dp;
704     tmpc = c->dp;
705 
706     /* set carry to zero */
707     u = 0;
708     for (i = 0; i < min; i++) {
709       /* T[i] = A[i] - B[i] - U */
710       *tmpc = *tmpa++ - *tmpb++ - u;
711 
712       /* U = carry bit of T[i]
713        * Note this saves performing an AND operation since
714        * if a carry does occur it will propagate all the way to the
715        * MSB.  As a result a single shift is enough to get the carry
716        */
717       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
718 
719       /* Clear carry from T[i] */
720       *tmpc++ &= MP_MASK;
721     }
722 
723     /* now copy higher words if any, e.g. if A has more digits than B  */
724     for (; i < max; i++) {
725       /* T[i] = A[i] - U */
726       *tmpc = *tmpa++ - u;
727 
728       /* U = carry bit of T[i] */
729       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
730 
731       /* Clear carry from T[i] */
732       *tmpc++ &= MP_MASK;
733     }
734 
735     /* clear digits above used (since we may not have grown result above) */
736     for (i = c->used; i < olduse; i++) {
737       *tmpc++ = 0;
738     }
739   }
740 
741   mp_clamp (c);
742   return MP_OKAY;
743 }
744 
745 /* high level subtraction (handles signs) */
746 static int
mp_sub(const mp_int * a,const mp_int * b,mp_int * c)747 mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
748 {
749   int     sa, sb, res;
750 
751   sa = a->sign;
752   sb = b->sign;
753 
754   if (sa != sb) {
755     /* subtract a negative from a positive, OR */
756     /* subtract a positive from a negative. */
757     /* In either case, ADD their magnitudes, */
758     /* and use the sign of the first number. */
759     c->sign = sa;
760     res = s_mp_add (a, b, c);
761   } else {
762     /* subtract a positive from a positive, OR */
763     /* subtract a negative from a negative. */
764     /* First, take the difference between their */
765     /* magnitudes, then... */
766     if (mp_cmp_mag (a, b) != MP_LT) {
767       /* Copy the sign from the first */
768       c->sign = sa;
769       /* The first has a larger or equal magnitude */
770       res = s_mp_sub (a, b, c);
771     } else {
772       /* The result has the *opposite* sign from */
773       /* the first number. */
774       c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
775       /* The second has a larger magnitude */
776       res = s_mp_sub (b, a, c);
777     }
778   }
779   return res;
780 }
781 
782 /* shift right a certain amount of digits */
mp_rshd(mp_int * a,int b)783 static int mp_rshd (mp_int * a, int b)
784 {
785   int     x;
786 
787   /* if b <= 0 then ignore it */
788   if (b <= 0) {
789     return 0;
790   }
791 
792   /* if b > used then simply zero it and return */
793   if (a->used <= b) {
794     mp_zero (a);
795     return 0;
796   }
797 
798   {
799     mp_digit *bottom, *top;
800 
801     /* shift the digits down */
802 
803     /* bottom */
804     bottom = a->dp;
805 
806     /* top [offset into digits] */
807     top = a->dp + b;
808 
809     /* this is implemented as a sliding window where
810      * the window is b-digits long and digits from
811      * the top of the window are copied to the bottom
812      *
813      * e.g.
814 
815      b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
816                  /\                   |      ---->
817                   \-------------------/      ---->
818      */
819     for (x = 0; x < (a->used - b); x++) {
820       *bottom++ = *top++;
821     }
822 
823     /* zero the top digits */
824     for (; x < a->used; x++) {
825       *bottom++ = 0;
826     }
827   }
828 
829   /* remove excess digits */
830   a->used -= b;
831   return 1;
832 }
833 
834 /* multiply by a digit */
835 static int
mp_mul_d(const mp_int * a,mp_digit b,mp_int * c)836 mp_mul_d (const mp_int * a, mp_digit b, mp_int * c)
837 {
838   const mp_digit *tmpa;
839   mp_digit u, *tmpc;
840   mp_word  r;
841   int      ix, res, olduse;
842 
843   /* make sure c is big enough to hold a*b */
844   if (c->alloc < a->used + 1) {
845     if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
846       return res;
847     }
848   }
849 
850   /* get the original destinations used count */
851   olduse = c->used;
852 
853   /* set the sign */
854   c->sign = a->sign;
855 
856   /* alias for a->dp [source] */
857   tmpa = a->dp;
858 
859   /* alias for c->dp [dest] */
860   tmpc = c->dp;
861 
862   /* zero carry */
863   u = 0;
864 
865   /* compute columns */
866   for (ix = 0; ix < a->used; ix++) {
867     /* compute product and carry sum for this term */
868     r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
869 
870     /* mask off higher bits to get a single digit */
871     *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
872 
873     /* send carry into next iteration */
874     u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
875   }
876 
877   /* store final carry [if any] and increment ix offset  */
878   *tmpc++ = u;
879   ++ix;
880 
881   /* now zero digits above the top */
882   while (ix++ < olduse) {
883      *tmpc++ = 0;
884   }
885 
886   /* set used count */
887   c->used = a->used + 1;
888   mp_clamp(c);
889 
890   return MP_OKAY;
891 }
892 
893 /* high level addition (handles signs) */
mp_add(const mp_int * a,const mp_int * b,mp_int * c)894 static int mp_add (const mp_int * a, const mp_int * b, mp_int * c)
895 {
896   int     sa, sb, res;
897 
898   /* get sign of both inputs */
899   sa = a->sign;
900   sb = b->sign;
901 
902   /* handle two cases, not four */
903   if (sa == sb) {
904     /* both positive or both negative */
905     /* add their magnitudes, copy the sign */
906     c->sign = sa;
907     res = s_mp_add (a, b, c);
908   } else {
909     /* one positive, the other negative */
910     /* subtract the one with the greater magnitude from */
911     /* the one of the lesser magnitude.  The result gets */
912     /* the sign of the one with the greater magnitude. */
913     if (mp_cmp_mag (a, b) == MP_LT) {
914       c->sign = sb;
915       res = s_mp_sub (b, a, c);
916     } else {
917       c->sign = sa;
918       res = s_mp_sub (a, b, c);
919     }
920   }
921   return res;
922 }
923 
924 /* swap the elements of two integers, for cases where you can't simply swap the
925  * mp_int pointers around
926  */
927 static void
mp_exch(mp_int * a,mp_int * b)928 mp_exch(mp_int *a, mp_int *b)
929 {
930   mp_int  t;
931 
932   t  = *a;
933   *a = *b;
934   *b = t;
935 }
936 
937 /* calc a value mod 2**b */
938 static int
mp_mod_2d(const mp_int * a,int b,mp_int * c)939 mp_mod_2d (const mp_int * a, int b, mp_int * c)
940 {
941   int     x, res;
942 
943   /* if b is <= 0 then zero the int */
944   if (b <= 0) {
945     mp_zero (c);
946     return MP_OKAY;
947   }
948 
949   /* if the modulus is larger than the value than return */
950   if (b >= (int) (a->used * DIGIT_BIT)) {
951     res = mp_copy (a, c);
952     return res;
953   }
954 
955   /* copy */
956   if ((res = mp_copy (a, c)) != MP_OKAY) {
957     return res;
958   }
959 
960   /* zero digits above the last digit of the modulus */
961   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
962     c->dp[x] = 0;
963   }
964   /* clear the digit that is not completely outside/inside the modulus */
965   c->dp[b / DIGIT_BIT] &=
966     (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
967   mp_clamp (c);
968   return MP_OKAY;
969 }
970 
971 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
mp_div_2d(const mp_int * a,int b,mp_int * c,mp_int * d)972 static int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d)
973 {
974   mp_digit D, r, rr;
975   int     x, res;
976   mp_int  t;
977 
978 
979   /* if the shift count is <= 0 then we do no work */
980   if (b <= 0) {
981     res = mp_copy (a, c);
982     if (d != NULL) {
983       mp_zero (d);
984     }
985     return res;
986   }
987 
988   if ((res = mp_init (&t)) != MP_OKAY) {
989     return res;
990   }
991 
992   /* get the remainder */
993   if (d != NULL) {
994     if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
995       mp_clear (&t);
996       return res;
997     }
998   }
999 
1000   /* copy */
1001   if ((res = mp_copy (a, c)) != MP_OKAY) {
1002     mp_clear (&t);
1003     return res;
1004   }
1005 
1006   /* shift by as many digits in the bit count */
1007   if (b >= (int)DIGIT_BIT) {
1008     mp_rshd (c, b / DIGIT_BIT);
1009   }
1010 
1011   /* shift any bit count < DIGIT_BIT */
1012   D = (mp_digit) (b % DIGIT_BIT);
1013   if (D != 0) {
1014     mp_digit *tmpc, mask, shift;
1015 
1016     /* mask */
1017     mask = (((mp_digit)1) << D) - 1;
1018 
1019     /* shift for lsb */
1020     shift = DIGIT_BIT - D;
1021 
1022     /* alias */
1023     tmpc = c->dp + (c->used - 1);
1024 
1025     /* carry */
1026     r = 0;
1027     for (x = c->used - 1; x >= 0; x--) {
1028       /* get the lower  bits of this word in a temp */
1029       rr = *tmpc & mask;
1030 
1031       /* shift the current word and mix in the carry bits from the previous word */
1032       *tmpc = (*tmpc >> D) | (r << shift);
1033       --tmpc;
1034 
1035       /* set the carry to the carry bits of the current word found above */
1036       r = rr;
1037     }
1038   }
1039   mp_clamp (c);
1040   if (d != NULL) {
1041     mp_exch (&t, d);
1042   }
1043   mp_clear (&t);
1044   return MP_OKAY;
1045 }
1046 
1047 /* integer signed division.
1048  * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1049  * HAC pp.598 Algorithm 14.20
1050  *
1051  * Note that the description in HAC is horribly
1052  * incomplete.  For example, it doesn't consider
1053  * the case where digits are removed from 'x' in
1054  * the inner loop.  It also doesn't consider the
1055  * case that y has fewer than three digits, etc..
1056  *
1057  * The overall algorithm is as described as
1058  * 14.20 from HAC but fixed to treat these cases.
1059 */
1060 static int
mp_div(mp_int * c,mp_int * d,const mp_int * a,const mp_int * b)1061 mp_div(mp_int *c, mp_int *d, const mp_int *a, const mp_int *b)
1062 {
1063   mp_int  q, x, y, t1, t2;
1064   int     res, n, t, i, norm, neg;
1065 
1066   /* is divisor zero ? */
1067   if (BN_is_zero (b) == 1) {
1068     return MP_VAL;
1069   }
1070 
1071   /* if a < b then q=0, r = a */
1072   if (mp_cmp_mag (a, b) == MP_LT) {
1073     if (d != NULL) {
1074       res = mp_copy (a, d);
1075     } else {
1076       res = MP_OKAY;
1077     }
1078     if (c != NULL) {
1079       mp_zero (c);
1080     }
1081     return res;
1082   }
1083 
1084   if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1085     return res;
1086   }
1087   q.used = a->used + 2;
1088 
1089   if ((res = mp_init (&t1)) != MP_OKAY) {
1090     goto LBL_Q;
1091   }
1092 
1093   if ((res = mp_init (&t2)) != MP_OKAY) {
1094     goto LBL_T1;
1095   }
1096 
1097   if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1098     goto LBL_T2;
1099   }
1100 
1101   if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1102     goto LBL_X;
1103   }
1104 
1105   /* fix the sign */
1106   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1107   x.sign = y.sign = MP_ZPOS;
1108 
1109   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1110   norm = mp_count_bits(&y) % DIGIT_BIT;
1111   if (norm < (int)(DIGIT_BIT-1)) {
1112      norm = (DIGIT_BIT-1) - norm;
1113      if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1114        goto LBL_Y;
1115      }
1116      if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1117        goto LBL_Y;
1118      }
1119   } else {
1120      norm = 0;
1121   }
1122 
1123   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1124   n = x.used - 1;
1125   t = y.used - 1;
1126 
1127   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1128   if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1129     goto LBL_Y;
1130   }
1131 
1132   while (mp_cmp (&x, &y) != MP_LT) {
1133     ++(q.dp[n - t]);
1134     if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1135       goto LBL_Y;
1136     }
1137   }
1138 
1139   /* reset y by shifting it back down */
1140   mp_rshd (&y, n - t);
1141 
1142   /* step 3. for i from n down to (t + 1) */
1143   for (i = n; i >= (t + 1); i--) {
1144     if (i > x.used) {
1145       continue;
1146     }
1147 
1148     /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1149      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1150     if (x.dp[i] == y.dp[t]) {
1151       q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1152     } else {
1153       mp_word tmp;
1154       tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1155       tmp |= ((mp_word) x.dp[i - 1]);
1156       tmp /= ((mp_word) y.dp[t]);
1157       if (tmp > (mp_word) MP_MASK)
1158         tmp = MP_MASK;
1159       q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1160     }
1161 
1162     /* while (q{i-t-1} * (yt * b + y{t-1})) >
1163              xi * b**2 + xi-1 * b + xi-2
1164 
1165        do q{i-t-1} -= 1;
1166     */
1167     q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1168     do {
1169       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1170 
1171       /* find left hand */
1172       mp_zero (&t1);
1173       t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1174       t1.dp[1] = y.dp[t];
1175       t1.used = 2;
1176       if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1177         goto LBL_Y;
1178       }
1179 
1180       /* find right hand */
1181       t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1182       t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1183       t2.dp[2] = x.dp[i];
1184       t2.used = 3;
1185     } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1186 
1187     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1188     if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1189       goto LBL_Y;
1190     }
1191 
1192     if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1193       goto LBL_Y;
1194     }
1195 
1196     if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1197       goto LBL_Y;
1198     }
1199 
1200     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1201     if (x.sign == MP_NEG) {
1202       if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1203         goto LBL_Y;
1204       }
1205       if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1206         goto LBL_Y;
1207       }
1208       if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1209         goto LBL_Y;
1210       }
1211 
1212       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1213     }
1214   }
1215 
1216   /* now q is the quotient and x is the remainder
1217    * [which we have to normalize]
1218    */
1219 
1220   /* get sign before writing to c */
1221   x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1222 
1223   if (c != NULL) {
1224     mp_clamp (&q);
1225     mp_exch (&q, c);
1226     c->sign = neg;
1227   }
1228 
1229   if (d != NULL) {
1230     mp_div_2d (&x, norm, &x, NULL);
1231     mp_exch (&x, d);
1232   }
1233 
1234   res = MP_OKAY;
1235 
1236 LBL_Y:mp_clear (&y);
1237 LBL_X:mp_clear (&x);
1238 LBL_T2:mp_clear (&t2);
1239 LBL_T1:mp_clear (&t1);
1240 LBL_Q:mp_clear (&q);
1241   return res;
1242 }
1243 
1244 /* c = a mod b, 0 <= c < b */
1245 static int
mp_mod(const mp_int * a,const mp_int * b,mp_int * c)1246 mp_mod (const mp_int * a, const mp_int * b, mp_int * c)
1247 {
1248   mp_int  t;
1249   int     res;
1250 
1251   if ((res = mp_init (&t)) != MP_OKAY) {
1252     return res;
1253   }
1254 
1255   if ((res = mp_div (NULL, &t, a, b)) != MP_OKAY) {
1256     mp_clear (&t);
1257     return res;
1258   }
1259 
1260   if (t.sign != b->sign) {
1261     res = mp_add (b, &t, c);
1262   } else {
1263     res = MP_OKAY;
1264     mp_exch (&t, c);
1265   }
1266 
1267   mp_clear (&t);
1268   return res;
1269 }
1270 
1271 /* set to a digit */
mp_set(mp_int * a,mp_digit b)1272 static void mp_set (mp_int * a, mp_digit b)
1273 {
1274   mp_zero (a);
1275   a->dp[0] = b & MP_MASK;
1276   a->used  = (a->dp[0] != 0) ? 1 : 0;
1277 }
1278 
1279 /* b = a/2 */
mp_div_2(const mp_int * a,mp_int * b)1280 static int mp_div_2(const mp_int * a, mp_int * b)
1281 {
1282   int     x, res, oldused;
1283 
1284   /* copy */
1285   if (b->alloc < a->used) {
1286     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1287       return res;
1288     }
1289   }
1290 
1291   oldused = b->used;
1292   b->used = a->used;
1293   {
1294     mp_digit r, rr, *tmpa, *tmpb;
1295 
1296     /* source alias */
1297     tmpa = a->dp + b->used - 1;
1298 
1299     /* dest alias */
1300     tmpb = b->dp + b->used - 1;
1301 
1302     /* carry */
1303     r = 0;
1304     for (x = b->used - 1; x >= 0; x--) {
1305       /* get the carry for the next iteration */
1306       rr = *tmpa & 1;
1307 
1308       /* shift the current digit, add in carry and store */
1309       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1310 
1311       /* forward carry to next iteration */
1312       r = rr;
1313     }
1314 
1315     /* zero excess digits */
1316     tmpb = b->dp + b->used;
1317     for (x = b->used; x < oldused; x++) {
1318       *tmpb++ = 0;
1319     }
1320   }
1321   b->sign = a->sign;
1322   mp_clamp (b);
1323   return MP_OKAY;
1324 }
1325 
1326 /* compare a digit */
mp_cmp_d(const mp_int * a,mp_digit b)1327 static int mp_cmp_d(const mp_int * a, mp_digit b)
1328 {
1329   /* compare based on sign */
1330   if (a->sign == MP_NEG) {
1331     return MP_LT;
1332   }
1333 
1334   /* compare based on magnitude */
1335   if (a->used > 1) {
1336     return MP_GT;
1337   }
1338 
1339   /* compare the only digit of a to b */
1340   if (a->dp[0] > b) {
1341     return MP_GT;
1342   } else if (a->dp[0] < b) {
1343     return MP_LT;
1344   } else {
1345     return MP_EQ;
1346   }
1347 }
1348 
mp_clear_multi(mp_int * mp,...)1349 static void mp_clear_multi(mp_int *mp, ...)
1350 {
1351     mp_int* next_mp = mp;
1352     va_list args;
1353     va_start(args, mp);
1354     while (next_mp != NULL) {
1355         mp_clear(next_mp);
1356         next_mp = va_arg(args, mp_int*);
1357     }
1358     va_end(args);
1359 }
1360 
1361 /* computes the modular inverse via binary extended euclidean algorithm,
1362  * that is c = 1/a mod b
1363  *
1364  * Based on slow invmod except this is optimized for the case where b is
1365  * odd as per HAC Note 14.64 on pp. 610
1366  */
1367 static int
fast_mp_invmod(const mp_int * a,const mp_int * b,mp_int * c)1368 fast_mp_invmod (const mp_int * a, const mp_int * b, mp_int * c)
1369 {
1370   mp_int  x, y, u, v, B, D;
1371   int     res, neg;
1372 
1373   /* 2. [modified] b must be odd   */
1374   if (BN_is_even (b) == 1) {
1375     return MP_VAL;
1376   }
1377 
1378   /* init all our temps */
1379   if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
1380      return res;
1381   }
1382 
1383   /* x == modulus, y == value to invert */
1384   if ((res = mp_copy (b, &x)) != MP_OKAY) {
1385     goto LBL_ERR;
1386   }
1387 
1388   /* we need y = |a| */
1389   if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
1390     goto LBL_ERR;
1391   }
1392 
1393   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1394   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1395     goto LBL_ERR;
1396   }
1397   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1398     goto LBL_ERR;
1399   }
1400   mp_set (&D, (unsigned long)1);
1401 
1402 top:
1403   /* 4.  while u is even do */
1404   while (BN_is_even (&u) == 1) {
1405     /* 4.1 u = u/2 */
1406     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1407       goto LBL_ERR;
1408     }
1409     /* 4.2 if B is odd then */
1410     if (BN_is_odd (&B) == 1) {
1411       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1412         goto LBL_ERR;
1413       }
1414     }
1415     /* B = B/2 */
1416     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1417       goto LBL_ERR;
1418     }
1419   }
1420 
1421   /* 5.  while v is even do */
1422   while (BN_is_even (&v) == 1) {
1423     /* 5.1 v = v/2 */
1424     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1425       goto LBL_ERR;
1426     }
1427     /* 5.2 if D is odd then */
1428     if (BN_is_odd (&D) == 1) {
1429       /* D = (D-x)/2 */
1430       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1431         goto LBL_ERR;
1432       }
1433     }
1434     /* D = D/2 */
1435     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1436       goto LBL_ERR;
1437     }
1438   }
1439 
1440   /* 6.  if u >= v then */
1441   if (mp_cmp (&u, &v) != MP_LT) {
1442     /* u = u - v, B = B - D */
1443     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1444       goto LBL_ERR;
1445     }
1446 
1447     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1448       goto LBL_ERR;
1449     }
1450   } else {
1451     /* v - v - u, D = D - B */
1452     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1453       goto LBL_ERR;
1454     }
1455 
1456     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1457       goto LBL_ERR;
1458     }
1459   }
1460 
1461   /* if not zero goto step 4 */
1462   if (BN_is_zero (&u) == 0) {
1463     goto top;
1464   }
1465 
1466   /* now a = C, b = D, gcd == g*v */
1467 
1468   /* if v != 1 then there is no inverse */
1469   if (mp_cmp_d (&v, (unsigned long)1) != MP_EQ) {
1470     res = MP_VAL;
1471     goto LBL_ERR;
1472   }
1473 
1474   /* b is now the inverse */
1475   neg = a->sign;
1476   while (D.sign == MP_NEG) {
1477     if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
1478       goto LBL_ERR;
1479     }
1480   }
1481   mp_exch (&D, c);
1482   c->sign = neg;
1483   res = MP_OKAY;
1484 
1485 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
1486   return res;
1487 }
1488 
1489 /* hac 14.61, pp608 */
1490 static int
mp_invmod_slow(const mp_int * a,const mp_int * b,mp_int * c)1491 mp_invmod_slow (const mp_int * a, const mp_int * b, mp_int * c)
1492 {
1493   mp_int  x, y, u, v, A, B, C, D;
1494   int     res;
1495 
1496   /* b cannot be negative */
1497   if (b->sign == MP_NEG || BN_is_zero(b) == 1) {
1498     return MP_VAL;
1499   }
1500 
1501   /* init temps */
1502   if ((res = mp_init_multi(&x, &y, &u, &v,
1503                            &A, &B, &C, &D, NULL)) != MP_OKAY) {
1504      return res;
1505   }
1506 
1507   /* x = a, y = b */
1508   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
1509       goto LBL_ERR;
1510   }
1511   if ((res = mp_copy (b, &y)) != MP_OKAY) {
1512     goto LBL_ERR;
1513   }
1514 
1515   /* 2. [modified] if x,y are both even then return an error! */
1516   if (BN_is_even (&x) == 1 && BN_is_even (&y) == 1) {
1517     res = MP_VAL;
1518     goto LBL_ERR;
1519   }
1520 
1521   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1522   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1523     goto LBL_ERR;
1524   }
1525   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1526     goto LBL_ERR;
1527   }
1528   mp_set (&A, (unsigned long)1);
1529   mp_set (&D, (unsigned long)1);
1530 
1531 top:
1532   /* 4.  while u is even do */
1533   while (BN_is_even (&u) == 1) {
1534     /* 4.1 u = u/2 */
1535     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1536       goto LBL_ERR;
1537     }
1538     /* 4.2 if A or B is odd then */
1539     if (BN_is_odd (&A) == 1 || BN_is_odd (&B) == 1) {
1540       /* A = (A+y)/2, B = (B-x)/2 */
1541       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
1542          goto LBL_ERR;
1543       }
1544       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1545          goto LBL_ERR;
1546       }
1547     }
1548     /* A = A/2, B = B/2 */
1549     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
1550       goto LBL_ERR;
1551     }
1552     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1553       goto LBL_ERR;
1554     }
1555   }
1556 
1557   /* 5.  while v is even do */
1558   while (BN_is_even (&v) == 1) {
1559     /* 5.1 v = v/2 */
1560     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1561       goto LBL_ERR;
1562     }
1563     /* 5.2 if C or D is odd then */
1564     if (BN_is_odd (&C) == 1 || BN_is_odd (&D) == 1) {
1565       /* C = (C+y)/2, D = (D-x)/2 */
1566       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
1567          goto LBL_ERR;
1568       }
1569       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1570          goto LBL_ERR;
1571       }
1572     }
1573     /* C = C/2, D = D/2 */
1574     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
1575       goto LBL_ERR;
1576     }
1577     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1578       goto LBL_ERR;
1579     }
1580   }
1581 
1582   /* 6.  if u >= v then */
1583   if (mp_cmp (&u, &v) != MP_LT) {
1584     /* u = u - v, A = A - C, B = B - D */
1585     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1586       goto LBL_ERR;
1587     }
1588 
1589     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
1590       goto LBL_ERR;
1591     }
1592 
1593     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1594       goto LBL_ERR;
1595     }
1596   } else {
1597     /* v - v - u, C = C - A, D = D - B */
1598     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1599       goto LBL_ERR;
1600     }
1601 
1602     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
1603       goto LBL_ERR;
1604     }
1605 
1606     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1607       goto LBL_ERR;
1608     }
1609   }
1610 
1611   /* if not zero goto step 4 */
1612   if (BN_is_zero (&u) == 0)
1613     goto top;
1614 
1615   /* now a = C, b = D, gcd == g*v */
1616 
1617   /* if v != 1 then there is no inverse */
1618   if (mp_cmp_d (&v, 1) != MP_EQ) {
1619     res = MP_VAL;
1620     goto LBL_ERR;
1621   }
1622 
1623   /* if its too low */
1624   while (mp_cmp_d(&C, 0) == MP_LT) {
1625       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
1626          goto LBL_ERR;
1627       }
1628   }
1629 
1630   /* too big */
1631   while (mp_cmp_mag(&C, b) != MP_LT) {
1632       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
1633          goto LBL_ERR;
1634       }
1635   }
1636 
1637   /* C is now the inverse */
1638   mp_exch (&C, c);
1639   res = MP_OKAY;
1640 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
1641   return res;
1642 }
1643 
1644 static int
mp_invmod(mp_int * c,const mp_int * a,const mp_int * b)1645 mp_invmod(mp_int *c, const mp_int *a, const mp_int *b)
1646 {
1647   /* b cannot be negative */
1648   if (b->sign == MP_NEG || BN_is_zero(b) == 1) {
1649     return MP_VAL;
1650   }
1651 
1652   /* if the modulus is odd we can use a faster routine instead */
1653   if (BN_is_odd (b) == 1) {
1654     return fast_mp_invmod(a, b, c);
1655   }
1656 
1657   return mp_invmod_slow(a, b, c);
1658 
1659   /*NOTREACHED*/
1660   return MP_VAL;
1661 }
1662 
1663 /* b = |a|
1664  *
1665  * Simple function copies the input and fixes the sign to positive
1666  */
1667 static int
mp_abs(const mp_int * a,mp_int * b)1668 mp_abs (const mp_int * a, mp_int * b)
1669 {
1670   int     res;
1671 
1672   /* copy a to b */
1673   if (a != b) {
1674      if ((res = mp_copy (a, b)) != MP_OKAY) {
1675        return res;
1676      }
1677   }
1678 
1679   /* force the sign of b to positive */
1680   b->sign = MP_ZPOS;
1681 
1682   return MP_OKAY;
1683 }
1684 
1685 /* determines if reduce_2k_l can be used */
mp_reduce_is_2k_l(const mp_int * a)1686 static int mp_reduce_is_2k_l(const mp_int *a)
1687 {
1688    int ix, iy;
1689 
1690    if (a->used == 0) {
1691       return MP_NO;
1692    } else if (a->used == 1) {
1693       return MP_YES;
1694    } else if (a->used > 1) {
1695       /* if more than half of the digits are -1 we're sold */
1696       for (iy = ix = 0; ix < a->used; ix++) {
1697           if (a->dp[ix] == MP_MASK) {
1698               ++iy;
1699           }
1700       }
1701       return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1702 
1703    }
1704    return MP_NO;
1705 }
1706 
1707 /* computes a = 2**b
1708  *
1709  * Simple algorithm which zeroes the int, grows it then just sets one bit
1710  * as required.
1711  */
1712 static int
mp_2expt(mp_int * a,int b)1713 mp_2expt (mp_int * a, int b)
1714 {
1715   int     res;
1716 
1717   /* zero a as per default */
1718   mp_zero (a);
1719 
1720   /* grow a to accomodate the single bit */
1721   if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
1722     return res;
1723   }
1724 
1725   /* set the used count of where the bit will go */
1726   a->used = b / DIGIT_BIT + 1;
1727 
1728   /* put the single bit in its place */
1729   a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
1730 
1731   return MP_OKAY;
1732 }
1733 
1734 /* pre-calculate the value required for Barrett reduction
1735  * For a given modulus "b" it calulates the value required in "a"
1736  */
mp_reduce_setup(mp_int * a,mp_int * b)1737 static int mp_reduce_setup (mp_int * a, mp_int * b)
1738 {
1739   int     res;
1740 
1741   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
1742     return res;
1743   }
1744   return mp_div (a, NULL, a, b);
1745 }
1746 
1747 /* b = a*2 */
mp_mul_2(mp_int * a,mp_int * b)1748 static int mp_mul_2(mp_int * a, mp_int * b)
1749 {
1750   int     x, res, oldused;
1751 
1752   /* grow to accomodate result */
1753   if (b->alloc < a->used + 1) {
1754     if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
1755       return res;
1756     }
1757   }
1758 
1759   oldused = b->used;
1760   b->used = a->used;
1761 
1762   {
1763     mp_digit r, rr, *tmpa, *tmpb;
1764 
1765     /* alias for source */
1766     tmpa = a->dp;
1767 
1768     /* alias for dest */
1769     tmpb = b->dp;
1770 
1771     /* carry */
1772     r = 0;
1773     for (x = 0; x < a->used; x++) {
1774 
1775       /* get what will be the *next* carry bit from the
1776        * MSB of the current digit
1777        */
1778       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
1779 
1780       /* now shift up this digit, add in the carry [from the previous] */
1781       *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
1782 
1783       /* copy the carry that would be from the source
1784        * digit into the next iteration
1785        */
1786       r = rr;
1787     }
1788 
1789     /* new leading digit? */
1790     if (r != 0) {
1791       /* add a MSB which is always 1 at this point */
1792       *tmpb = 1;
1793       ++(b->used);
1794     }
1795 
1796     /* now zero any excess digits on the destination
1797      * that we didn't write to
1798      */
1799     tmpb = b->dp + b->used;
1800     for (x = b->used; x < oldused; x++) {
1801       *tmpb++ = 0;
1802     }
1803   }
1804   b->sign = a->sign;
1805   return MP_OKAY;
1806 }
1807 
1808 /* divide by three (based on routine from MPI and the GMP manual) */
1809 static int
mp_div_3(mp_int * a,mp_int * c,mp_digit * d)1810 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
1811 {
1812   mp_int   q;
1813   mp_word  w, t;
1814   mp_digit b;
1815   int      res, ix;
1816 
1817   /* b = 2**DIGIT_BIT / 3 */
1818   b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
1819 
1820   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1821      return res;
1822   }
1823 
1824   q.used = a->used;
1825   q.sign = a->sign;
1826   w = 0;
1827   for (ix = a->used - 1; ix >= 0; ix--) {
1828      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1829 
1830      if (w >= 3) {
1831         /* multiply w by [1/3] */
1832         t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
1833 
1834         /* now subtract 3 * [w/3] from w, to get the remainder */
1835         w -= t+t+t;
1836 
1837         /* fixup the remainder as required since
1838          * the optimization is not exact.
1839          */
1840         while (w >= 3) {
1841            t += 1;
1842            w -= 3;
1843         }
1844       } else {
1845         t = 0;
1846       }
1847       q.dp[ix] = (mp_digit)t;
1848   }
1849 
1850   /* [optional] store the remainder */
1851   if (d != NULL) {
1852      *d = (mp_digit)w;
1853   }
1854 
1855   /* [optional] store the quotient */
1856   if (c != NULL) {
1857      mp_clamp(&q);
1858      mp_exch(&q, c);
1859   }
1860   mp_clear(&q);
1861 
1862   return res;
1863 }
1864 
1865 /* multiplication using the Toom-Cook 3-way algorithm
1866  *
1867  * Much more complicated than Karatsuba but has a lower
1868  * asymptotic running time of O(N**1.464).  This algorithm is
1869  * only particularly useful on VERY large inputs
1870  * (we're talking 1000s of digits here...).
1871 */
mp_toom_mul(mp_int * a,mp_int * b,mp_int * c)1872 static int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
1873 {
1874     mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
1875     int res, B;
1876 
1877     /* init temps */
1878     if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
1879                              &a0, &a1, &a2, &b0, &b1,
1880                              &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
1881        return res;
1882     }
1883 
1884     /* B */
1885     B = MIN(a->used, b->used) / 3;
1886 
1887     /* a = a2 * B**2 + a1 * B + a0 */
1888     if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
1889        goto ERR;
1890     }
1891 
1892     if ((res = mp_copy(a, &a1)) != MP_OKAY) {
1893        goto ERR;
1894     }
1895     mp_rshd(&a1, B);
1896     mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
1897 
1898     if ((res = mp_copy(a, &a2)) != MP_OKAY) {
1899        goto ERR;
1900     }
1901     mp_rshd(&a2, B*2);
1902 
1903     /* b = b2 * B**2 + b1 * B + b0 */
1904     if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
1905        goto ERR;
1906     }
1907 
1908     if ((res = mp_copy(b, &b1)) != MP_OKAY) {
1909        goto ERR;
1910     }
1911     mp_rshd(&b1, B);
1912     mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
1913 
1914     if ((res = mp_copy(b, &b2)) != MP_OKAY) {
1915        goto ERR;
1916     }
1917     mp_rshd(&b2, B*2);
1918 
1919     /* w0 = a0*b0 */
1920     if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
1921        goto ERR;
1922     }
1923 
1924     /* w4 = a2 * b2 */
1925     if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
1926        goto ERR;
1927     }
1928 
1929     /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
1930     if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
1931        goto ERR;
1932     }
1933     if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
1934        goto ERR;
1935     }
1936     if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
1937        goto ERR;
1938     }
1939     if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
1940        goto ERR;
1941     }
1942 
1943     if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
1944        goto ERR;
1945     }
1946     if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
1947        goto ERR;
1948     }
1949     if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
1950        goto ERR;
1951     }
1952     if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
1953        goto ERR;
1954     }
1955 
1956     if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
1957        goto ERR;
1958     }
1959 
1960     /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
1961     if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
1962        goto ERR;
1963     }
1964     if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
1965        goto ERR;
1966     }
1967     if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
1968        goto ERR;
1969     }
1970     if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
1971        goto ERR;
1972     }
1973 
1974     if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
1975        goto ERR;
1976     }
1977     if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
1978        goto ERR;
1979     }
1980     if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
1981        goto ERR;
1982     }
1983     if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
1984        goto ERR;
1985     }
1986 
1987     if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
1988        goto ERR;
1989     }
1990 
1991 
1992     /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
1993     if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
1994        goto ERR;
1995     }
1996     if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
1997        goto ERR;
1998     }
1999     if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
2000        goto ERR;
2001     }
2002     if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
2003        goto ERR;
2004     }
2005     if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
2006        goto ERR;
2007     }
2008 
2009     /* now solve the matrix
2010 
2011        0  0  0  0  1
2012        1  2  4  8  16
2013        1  1  1  1  1
2014        16 8  4  2  1
2015        1  0  0  0  0
2016 
2017        using 12 subtractions, 4 shifts,
2018               2 small divisions and 1 small multiplication
2019      */
2020 
2021      /* r1 - r4 */
2022      if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
2023         goto ERR;
2024      }
2025      /* r3 - r0 */
2026      if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
2027         goto ERR;
2028      }
2029      /* r1/2 */
2030      if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
2031         goto ERR;
2032      }
2033      /* r3/2 */
2034      if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
2035         goto ERR;
2036      }
2037      /* r2 - r0 - r4 */
2038      if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
2039         goto ERR;
2040      }
2041      if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
2042         goto ERR;
2043      }
2044      /* r1 - r2 */
2045      if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
2046         goto ERR;
2047      }
2048      /* r3 - r2 */
2049      if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
2050         goto ERR;
2051      }
2052      /* r1 - 8r0 */
2053      if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
2054         goto ERR;
2055      }
2056      if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
2057         goto ERR;
2058      }
2059      /* r3 - 8r4 */
2060      if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
2061         goto ERR;
2062      }
2063      if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
2064         goto ERR;
2065      }
2066      /* 3r2 - r1 - r3 */
2067      if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
2068         goto ERR;
2069      }
2070      if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
2071         goto ERR;
2072      }
2073      if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
2074         goto ERR;
2075      }
2076      /* r1 - r2 */
2077      if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
2078         goto ERR;
2079      }
2080      /* r3 - r2 */
2081      if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
2082         goto ERR;
2083      }
2084      /* r1/3 */
2085      if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
2086         goto ERR;
2087      }
2088      /* r3/3 */
2089      if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
2090         goto ERR;
2091      }
2092 
2093      /* at this point shift W[n] by B*n */
2094      if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
2095         goto ERR;
2096      }
2097      if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
2098         goto ERR;
2099      }
2100      if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
2101         goto ERR;
2102      }
2103      if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
2104         goto ERR;
2105      }
2106 
2107      if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
2108         goto ERR;
2109      }
2110      if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
2111         goto ERR;
2112      }
2113      if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
2114         goto ERR;
2115      }
2116      if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
2117         goto ERR;
2118      }
2119 
2120 ERR:
2121      mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
2122                     &a0, &a1, &a2, &b0, &b1,
2123                     &b2, &tmp1, &tmp2, NULL);
2124      return res;
2125 }
2126 
2127 #define TOOM_MUL_CUTOFF	350
2128 #define KARATSUBA_MUL_CUTOFF 80
2129 
2130 /* c = |a| * |b| using Karatsuba Multiplication using
2131  * three half size multiplications
2132  *
2133  * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2134  * let n represent half of the number of digits in
2135  * the min(a,b)
2136  *
2137  * a = a1 * B**n + a0
2138  * b = b1 * B**n + b0
2139  *
2140  * Then, a * b =>
2141    a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
2142  *
2143  * Note that a1b1 and a0b0 are used twice and only need to be
2144  * computed once.  So in total three half size (half # of
2145  * digit) multiplications are performed, a0b0, a1b1 and
2146  * (a1+b1)(a0+b0)
2147  *
2148  * Note that a multiplication of half the digits requires
2149  * 1/4th the number of single precision multiplications so in
2150  * total after one call 25% of the single precision multiplications
2151  * are saved.  Note also that the call to mp_mul can end up back
2152  * in this function if the a0, a1, b0, or b1 are above the threshold.
2153  * This is known as divide-and-conquer and leads to the famous
2154  * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
2155  * the standard O(N**2) that the baseline/comba methods use.
2156  * Generally though the overhead of this method doesn't pay off
2157  * until a certain size (N ~ 80) is reached.
2158  */
mp_karatsuba_mul(mp_int * a,mp_int * b,mp_int * c)2159 static int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
2160 {
2161   mp_int  x0, x1, y0, y1, t1, x0y0, x1y1;
2162   int     B;
2163   int     err;
2164 
2165   /* default the return code to an error */
2166   err = MP_MEM;
2167 
2168   /* min # of digits */
2169   B = MIN (a->used, b->used);
2170 
2171   /* now divide in two */
2172   B = (int)((unsigned)B >> 1);
2173 
2174   /* init copy all the temps */
2175   if (mp_init_size (&x0, B) != MP_OKAY)
2176     goto ERR;
2177   if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2178     goto X0;
2179   if (mp_init_size (&y0, B) != MP_OKAY)
2180     goto X1;
2181   if (mp_init_size (&y1, b->used - B) != MP_OKAY)
2182     goto Y0;
2183 
2184   /* init temps */
2185   if (mp_init_size (&t1, B * 2) != MP_OKAY)
2186     goto Y1;
2187   if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
2188     goto T1;
2189   if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
2190     goto X0Y0;
2191 
2192   /* now shift the digits */
2193   x0.used = y0.used = B;
2194   x1.used = a->used - B;
2195   y1.used = b->used - B;
2196 
2197   {
2198     int x;
2199     mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2200 
2201     /* we copy the digits directly instead of using higher level functions
2202      * since we also need to shift the digits
2203      */
2204     tmpa = a->dp;
2205     tmpb = b->dp;
2206 
2207     tmpx = x0.dp;
2208     tmpy = y0.dp;
2209     for (x = 0; x < B; x++) {
2210       *tmpx++ = *tmpa++;
2211       *tmpy++ = *tmpb++;
2212     }
2213 
2214     tmpx = x1.dp;
2215     for (x = B; x < a->used; x++) {
2216       *tmpx++ = *tmpa++;
2217     }
2218 
2219     tmpy = y1.dp;
2220     for (x = B; x < b->used; x++) {
2221       *tmpy++ = *tmpb++;
2222     }
2223   }
2224 
2225   /* only need to clamp the lower words since by definition the
2226    * upper words x1/y1 must have a known number of digits
2227    */
2228   mp_clamp (&x0);
2229   mp_clamp (&y0);
2230 
2231   /* now calc the products x0y0 and x1y1 */
2232   /* after this x0 is no longer required, free temp [x0==t2]! */
2233   if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
2234     goto X1Y1;          /* x0y0 = x0*y0 */
2235   if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
2236     goto X1Y1;          /* x1y1 = x1*y1 */
2237 
2238   /* now calc x1+x0 and y1+y0 */
2239   if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
2240     goto X1Y1;          /* t1 = x1 - x0 */
2241   if (s_mp_add (&y1, &y0, &x0) != MP_OKAY)
2242     goto X1Y1;          /* t2 = y1 - y0 */
2243   if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
2244     goto X1Y1;          /* t1 = (x1 + x0) * (y1 + y0) */
2245 
2246   /* add x0y0 */
2247   if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
2248     goto X1Y1;          /* t2 = x0y0 + x1y1 */
2249   if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY)
2250     goto X1Y1;          /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */
2251 
2252   /* shift by B */
2253   if (mp_lshd (&t1, B) != MP_OKAY)
2254     goto X1Y1;          /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2255   if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
2256     goto X1Y1;          /* x1y1 = x1y1 << 2*B */
2257 
2258   if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
2259     goto X1Y1;          /* t1 = x0y0 + t1 */
2260   if (mp_add (&t1, &x1y1, c) != MP_OKAY)
2261     goto X1Y1;          /* t1 = x0y0 + t1 + x1y1 */
2262 
2263   /* Algorithm succeeded set the return code to MP_OKAY */
2264   err = MP_OKAY;
2265 
2266 X1Y1:mp_clear (&x1y1);
2267 X0Y0:mp_clear (&x0y0);
2268 T1:mp_clear (&t1);
2269 Y1:mp_clear (&y1);
2270 Y0:mp_clear (&y0);
2271 X1:mp_clear (&x1);
2272 X0:mp_clear (&x0);
2273 ERR:
2274   return err;
2275 }
2276 
2277 /* Fast (comba) multiplier
2278  *
2279  * This is the fast column-array [comba] multiplier.  It is
2280  * designed to compute the columns of the product first
2281  * then handle the carries afterwards.  This has the effect
2282  * of making the nested loops that compute the columns very
2283  * simple and schedulable on super-scalar processors.
2284  *
2285  * This has been modified to produce a variable number of
2286  * digits of output so if say only a half-product is required
2287  * you don't have to compute the upper half (a feature
2288  * required for fast Barrett reduction).
2289  *
2290  * Based on Algorithm 14.12 on pp.595 of HAC.
2291  *
2292  */
fast_s_mp_mul_digs(mp_int * a,mp_int * b,mp_int * c,int digs)2293 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2294 {
2295   int     olduse, res, pa, ix, iz;
2296   /*LINTED*/
2297   mp_digit W[MP_WARRAY];
2298   mp_word  _W;
2299 
2300   /* grow the destination as required */
2301   if (c->alloc < digs) {
2302     if ((res = mp_grow (c, digs)) != MP_OKAY) {
2303       return res;
2304     }
2305   }
2306 
2307   /* number of output digits to produce */
2308   pa = MIN(digs, a->used + b->used);
2309 
2310   /* clear the carry */
2311   _W = 0;
2312   for (ix = 0; ix < pa; ix++) {
2313       int      tx, ty;
2314       int      iy;
2315       mp_digit *tmpx, *tmpy;
2316 
2317       /* get offsets into the two bignums */
2318       ty = MIN(b->used-1, ix);
2319       tx = ix - ty;
2320 
2321       /* setup temp aliases */
2322       tmpx = a->dp + tx;
2323       tmpy = b->dp + ty;
2324 
2325       /* this is the number of times the loop will iterrate, essentially
2326          while (tx++ < a->used && ty-- >= 0) { ... }
2327        */
2328       iy = MIN(a->used-tx, ty+1);
2329 
2330       /* execute loop */
2331       for (iz = 0; iz < iy; ++iz) {
2332          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2333 
2334       }
2335 
2336       /* store term */
2337       W[ix] = ((mp_digit)_W) & MP_MASK;
2338 
2339       /* make next carry */
2340       _W = _W >> ((mp_word)DIGIT_BIT);
2341  }
2342 
2343   /* setup dest */
2344   olduse  = c->used;
2345   c->used = pa;
2346 
2347   {
2348     mp_digit *tmpc;
2349     tmpc = c->dp;
2350     for (ix = 0; ix < pa+1; ix++) {
2351       /* now extract the previous digit [below the carry] */
2352       *tmpc++ = W[ix];
2353     }
2354 
2355     /* clear unused digits [that existed in the old copy of c] */
2356     for (; ix < olduse; ix++) {
2357       *tmpc++ = 0;
2358     }
2359   }
2360   mp_clamp (c);
2361   return MP_OKAY;
2362 }
2363 
2364 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */
2365 /* Revision: 1.2 $ */
2366 /* Date: 2011/03/18 16:22:09 $ */
2367 
2368 
2369 /* multiplies |a| * |b| and only computes upto digs digits of result
2370  * HAC pp. 595, Algorithm 14.12  Modified so you can control how
2371  * many digits of output are created.
2372  */
s_mp_mul_digs(mp_int * a,mp_int * b,mp_int * c,int digs)2373 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2374 {
2375   mp_int  t;
2376   int     res, pa, pb, ix, iy;
2377   mp_digit u;
2378   mp_word r;
2379   mp_digit tmpx, *tmpt, *tmpy;
2380 
2381   /* can we use the fast multiplier? */
2382   if (((unsigned)(digs) < MP_WARRAY) &&
2383       MIN (a->used, b->used) <
2384           (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2385     return fast_s_mp_mul_digs (a, b, c, digs);
2386   }
2387 
2388   if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
2389     return res;
2390   }
2391   t.used = digs;
2392 
2393   /* compute the digits of the product directly */
2394   pa = a->used;
2395   for (ix = 0; ix < pa; ix++) {
2396     /* set the carry to zero */
2397     u = 0;
2398 
2399     /* limit ourselves to making digs digits of output */
2400     pb = MIN (b->used, digs - ix);
2401 
2402     /* setup some aliases */
2403     /* copy of the digit from a used within the nested loop */
2404     tmpx = a->dp[ix];
2405 
2406     /* an alias for the destination shifted ix places */
2407     tmpt = t.dp + ix;
2408 
2409     /* an alias for the digits of b */
2410     tmpy = b->dp;
2411 
2412     /* compute the columns of the output and propagate the carry */
2413     for (iy = 0; iy < pb; iy++) {
2414       /* compute the column as a mp_word */
2415       r       = ((mp_word)*tmpt) +
2416                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2417                 ((mp_word) u);
2418 
2419       /* the new column is the lower part of the result */
2420       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2421 
2422       /* get the carry word from the result */
2423       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2424     }
2425     /* set carry if it is placed below digs */
2426     if (ix + iy < digs) {
2427       *tmpt = u;
2428     }
2429   }
2430 
2431   mp_clamp (&t);
2432   mp_exch (&t, c);
2433 
2434   mp_clear (&t);
2435   return MP_OKAY;
2436 }
2437 
2438 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */
2439 /* Revision: 1.3 $ */
2440 /* Date: 2011/03/18 16:43:04 $ */
2441 
2442 /* high level multiplication (handles sign) */
2443 static int
mp_mul(mp_int * a,mp_int * b,mp_int * c)2444 mp_mul(mp_int * a, mp_int * b, mp_int * c)
2445 {
2446   int     res, neg;
2447   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2448 
2449   /* use Toom-Cook? */
2450   if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
2451     res = mp_toom_mul(a, b, c);
2452   } else
2453   /* use Karatsuba? */
2454   if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
2455     res = mp_karatsuba_mul (a, b, c);
2456   } else
2457   {
2458     /* can we use the fast multiplier?
2459      *
2460      * The fast multiplier can be used if the output will
2461      * have less than MP_WARRAY digits and the number of
2462      * digits won't affect carry propagation
2463      */
2464     int     digs = a->used + b->used + 1;
2465 
2466     if (((unsigned)digs < MP_WARRAY) &&
2467         MIN(a->used, b->used) <=
2468         (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2469       res = fast_s_mp_mul_digs (a, b, c, digs);
2470     } else
2471       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2472 
2473   }
2474   c->sign = (c->used > 0) ? neg : MP_ZPOS;
2475   return res;
2476 }
2477 
2478 /* this is a modified version of fast_s_mul_digs that only produces
2479  * output digits *above* digs.  See the comments for fast_s_mul_digs
2480  * to see how it works.
2481  *
2482  * This is used in the Barrett reduction since for one of the multiplications
2483  * only the higher digits were needed.  This essentially halves the work.
2484  *
2485  * Based on Algorithm 14.12 on pp.595 of HAC.
2486  */
2487 static int
fast_s_mp_mul_high_digs(mp_int * a,mp_int * b,mp_int * c,int digs)2488 fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2489 {
2490   int     olduse, res, pa, ix, iz;
2491   mp_digit W[MP_WARRAY];
2492   mp_word  _W;
2493 
2494   /* grow the destination as required */
2495   pa = a->used + b->used;
2496   if (c->alloc < pa) {
2497     if ((res = mp_grow (c, pa)) != MP_OKAY) {
2498       return res;
2499     }
2500   }
2501 
2502   /* number of output digits to produce */
2503   pa = a->used + b->used;
2504   _W = 0;
2505   for (ix = digs; ix < pa; ix++) {
2506       int      tx, ty, iy;
2507       mp_digit *tmpx, *tmpy;
2508 
2509       /* get offsets into the two bignums */
2510       ty = MIN(b->used-1, ix);
2511       tx = ix - ty;
2512 
2513       /* setup temp aliases */
2514       tmpx = a->dp + tx;
2515       tmpy = b->dp + ty;
2516 
2517       /* this is the number of times the loop will iterrate, essentially its
2518          while (tx++ < a->used && ty-- >= 0) { ... }
2519        */
2520       iy = MIN(a->used-tx, ty+1);
2521 
2522       /* execute loop */
2523       for (iz = 0; iz < iy; iz++) {
2524          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2525       }
2526 
2527       /* store term */
2528       W[ix] = ((mp_digit)_W) & MP_MASK;
2529 
2530       /* make next carry */
2531       _W = _W >> ((mp_word)DIGIT_BIT);
2532   }
2533 
2534   /* setup dest */
2535   olduse  = c->used;
2536   c->used = pa;
2537 
2538   {
2539     mp_digit *tmpc;
2540 
2541     tmpc = c->dp + digs;
2542     for (ix = digs; ix < pa; ix++) {
2543       /* now extract the previous digit [below the carry] */
2544       *tmpc++ = W[ix];
2545     }
2546 
2547     /* clear unused digits [that existed in the old copy of c] */
2548     for (; ix < olduse; ix++) {
2549       *tmpc++ = 0;
2550     }
2551   }
2552   mp_clamp (c);
2553   return MP_OKAY;
2554 }
2555 
2556 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */
2557 /* Revision: 1.1.1.1 $ */
2558 /* Date: 2011/03/12 22:58:18 $ */
2559 
2560 /* multiplies |a| * |b| and does not compute the lower digs digits
2561  * [meant to get the higher part of the product]
2562  */
2563 static int
s_mp_mul_high_digs(mp_int * a,mp_int * b,mp_int * c,int digs)2564 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2565 {
2566   mp_int  t;
2567   int     res, pa, pb, ix, iy;
2568   mp_digit u;
2569   mp_word r;
2570   mp_digit tmpx, *tmpt, *tmpy;
2571 
2572   /* can we use the fast multiplier? */
2573   if (((unsigned)(a->used + b->used + 1) < MP_WARRAY)
2574       && MIN (a->used, b->used) < (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2575     return fast_s_mp_mul_high_digs (a, b, c, digs);
2576   }
2577 
2578   if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
2579     return res;
2580   }
2581   t.used = a->used + b->used + 1;
2582 
2583   pa = a->used;
2584   pb = b->used;
2585   for (ix = 0; ix < pa; ix++) {
2586     /* clear the carry */
2587     u = 0;
2588 
2589     /* left hand side of A[ix] * B[iy] */
2590     tmpx = a->dp[ix];
2591 
2592     /* alias to the address of where the digits will be stored */
2593     tmpt = &(t.dp[digs]);
2594 
2595     /* alias for where to read the right hand side from */
2596     tmpy = b->dp + (digs - ix);
2597 
2598     for (iy = digs - ix; iy < pb; iy++) {
2599       /* calculate the double precision result */
2600       r       = ((mp_word)*tmpt) +
2601                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2602                 ((mp_word) u);
2603 
2604       /* get the lower part */
2605       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2606 
2607       /* carry the carry */
2608       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2609     }
2610     *tmpt = u;
2611   }
2612   mp_clamp (&t);
2613   mp_exch (&t, c);
2614   mp_clear (&t);
2615   return MP_OKAY;
2616 }
2617 
2618 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */
2619 /* Revision: 1.3 $ */
2620 /* Date: 2011/03/18 16:43:04 $ */
2621 
2622 /* reduces x mod m, assumes 0 < x < m**2, mu is
2623  * precomputed via mp_reduce_setup.
2624  * From HAC pp.604 Algorithm 14.42
2625  */
2626 static int
mp_reduce(mp_int * x,mp_int * m,mp_int * mu)2627 mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
2628 {
2629   mp_int  q;
2630   int     res, um = m->used;
2631 
2632   /* q = x */
2633   if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
2634     return res;
2635   }
2636 
2637   /* q1 = x / b**(k-1)  */
2638   mp_rshd (&q, um - 1);
2639 
2640   /* according to HAC this optimization is ok */
2641   if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
2642     if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
2643       goto CLEANUP;
2644     }
2645   } else {
2646     if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
2647       goto CLEANUP;
2648     }
2649   }
2650 
2651   /* q3 = q2 / b**(k+1) */
2652   mp_rshd (&q, um + 1);
2653 
2654   /* x = x mod b**(k+1), quick (no division) */
2655   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
2656     goto CLEANUP;
2657   }
2658 
2659   /* q = q * m mod b**(k+1), quick (no division) */
2660   if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
2661     goto CLEANUP;
2662   }
2663 
2664   /* x = x - q */
2665   if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
2666     goto CLEANUP;
2667   }
2668 
2669   /* If x < 0, add b**(k+1) to it */
2670   if (mp_cmp_d (x, 0) == MP_LT) {
2671     mp_set (&q, 1);
2672     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
2673       goto CLEANUP;
2674     if ((res = mp_add (x, &q, x)) != MP_OKAY)
2675       goto CLEANUP;
2676   }
2677 
2678   /* Back off if it's too big */
2679   while (mp_cmp (x, m) != MP_LT) {
2680     if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
2681       goto CLEANUP;
2682     }
2683   }
2684 
2685 CLEANUP:
2686   mp_clear (&q);
2687 
2688   return res;
2689 }
2690 
2691 /* determines the setup value */
mp_reduce_2k_setup_l(mp_int * a,mp_int * d)2692 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
2693 {
2694    int    res;
2695    mp_int tmp;
2696 
2697    if ((res = mp_init(&tmp)) != MP_OKAY) {
2698       return res;
2699    }
2700 
2701    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
2702       goto ERR;
2703    }
2704 
2705    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
2706       goto ERR;
2707    }
2708 
2709 ERR:
2710    mp_clear(&tmp);
2711    return res;
2712 }
2713 
2714 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */
2715 /* Revision: 1.1.1.1 $ */
2716 /* Date: 2011/03/12 22:58:18 $ */
2717 
2718 /* reduces a modulo n where n is of the form 2**p - d
2719    This differs from reduce_2k since "d" can be larger
2720    than a single digit.
2721 */
2722 static int
mp_reduce_2k_l(mp_int * a,mp_int * n,mp_int * d)2723 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
2724 {
2725    mp_int q;
2726    int    p, res;
2727 
2728    if ((res = mp_init(&q)) != MP_OKAY) {
2729       return res;
2730    }
2731 
2732    p = mp_count_bits(n);
2733 top:
2734    /* q = a/2**p, a = a mod 2**p */
2735    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2736       goto ERR;
2737    }
2738 
2739    /* q = q * d */
2740    if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
2741       goto ERR;
2742    }
2743 
2744    /* a = a + q */
2745    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2746       goto ERR;
2747    }
2748 
2749    if (mp_cmp_mag(a, n) != MP_LT) {
2750       s_mp_sub(a, n, a);
2751       goto top;
2752    }
2753 
2754 ERR:
2755    mp_clear(&q);
2756    return res;
2757 }
2758 
2759 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */
2760 /* Revision: 1.1.1.1 $ */
2761 /* Date: 2011/03/12 22:58:18 $ */
2762 
2763 /* squaring using Toom-Cook 3-way algorithm */
2764 static int
mp_toom_sqr(mp_int * a,mp_int * b)2765 mp_toom_sqr(mp_int *a, mp_int *b)
2766 {
2767     mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
2768     int res, B;
2769 
2770     /* init temps */
2771     if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
2772        return res;
2773     }
2774 
2775     /* B */
2776     B = a->used / 3;
2777 
2778     /* a = a2 * B**2 + a1 * B + a0 */
2779     if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
2780        goto ERR;
2781     }
2782 
2783     if ((res = mp_copy(a, &a1)) != MP_OKAY) {
2784        goto ERR;
2785     }
2786     mp_rshd(&a1, B);
2787     mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
2788 
2789     if ((res = mp_copy(a, &a2)) != MP_OKAY) {
2790        goto ERR;
2791     }
2792     mp_rshd(&a2, B*2);
2793 
2794     /* w0 = a0*a0 */
2795     if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {
2796        goto ERR;
2797     }
2798 
2799     /* w4 = a2 * a2 */
2800     if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {
2801        goto ERR;
2802     }
2803 
2804     /* w1 = (a2 + 2(a1 + 2a0))**2 */
2805     if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
2806        goto ERR;
2807     }
2808     if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
2809        goto ERR;
2810     }
2811     if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
2812        goto ERR;
2813     }
2814     if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
2815        goto ERR;
2816     }
2817 
2818     if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {
2819        goto ERR;
2820     }
2821 
2822     /* w3 = (a0 + 2(a1 + 2a2))**2 */
2823     if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
2824        goto ERR;
2825     }
2826     if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
2827        goto ERR;
2828     }
2829     if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
2830        goto ERR;
2831     }
2832     if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
2833        goto ERR;
2834     }
2835 
2836     if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {
2837        goto ERR;
2838     }
2839 
2840 
2841     /* w2 = (a2 + a1 + a0)**2 */
2842     if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
2843        goto ERR;
2844     }
2845     if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
2846        goto ERR;
2847     }
2848     if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {
2849        goto ERR;
2850     }
2851 
2852     /* now solve the matrix
2853 
2854        0  0  0  0  1
2855        1  2  4  8  16
2856        1  1  1  1  1
2857        16 8  4  2  1
2858        1  0  0  0  0
2859 
2860        using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
2861      */
2862 
2863      /* r1 - r4 */
2864      if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
2865         goto ERR;
2866      }
2867      /* r3 - r0 */
2868      if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
2869         goto ERR;
2870      }
2871      /* r1/2 */
2872      if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
2873         goto ERR;
2874      }
2875      /* r3/2 */
2876      if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
2877         goto ERR;
2878      }
2879      /* r2 - r0 - r4 */
2880      if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
2881         goto ERR;
2882      }
2883      if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
2884         goto ERR;
2885      }
2886      /* r1 - r2 */
2887      if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
2888         goto ERR;
2889      }
2890      /* r3 - r2 */
2891      if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
2892         goto ERR;
2893      }
2894      /* r1 - 8r0 */
2895      if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
2896         goto ERR;
2897      }
2898      if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
2899         goto ERR;
2900      }
2901      /* r3 - 8r4 */
2902      if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
2903         goto ERR;
2904      }
2905      if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
2906         goto ERR;
2907      }
2908      /* 3r2 - r1 - r3 */
2909      if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
2910         goto ERR;
2911      }
2912      if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
2913         goto ERR;
2914      }
2915      if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
2916         goto ERR;
2917      }
2918      /* r1 - r2 */
2919      if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
2920         goto ERR;
2921      }
2922      /* r3 - r2 */
2923      if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
2924         goto ERR;
2925      }
2926      /* r1/3 */
2927      if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
2928         goto ERR;
2929      }
2930      /* r3/3 */
2931      if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
2932         goto ERR;
2933      }
2934 
2935      /* at this point shift W[n] by B*n */
2936      if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
2937         goto ERR;
2938      }
2939      if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
2940         goto ERR;
2941      }
2942      if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
2943         goto ERR;
2944      }
2945      if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
2946         goto ERR;
2947      }
2948 
2949      if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {
2950         goto ERR;
2951      }
2952      if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
2953         goto ERR;
2954      }
2955      if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
2956         goto ERR;
2957      }
2958      if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {
2959         goto ERR;
2960      }
2961 
2962 ERR:
2963      mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
2964      return res;
2965 }
2966 
2967 
2968 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */
2969 /* Revision: 1.1.1.1 $ */
2970 /* Date: 2011/03/12 22:58:18 $ */
2971 
2972 /* Karatsuba squaring, computes b = a*a using three
2973  * half size squarings
2974  *
2975  * See comments of karatsuba_mul for details.  It
2976  * is essentially the same algorithm but merely
2977  * tuned to perform recursive squarings.
2978  */
mp_karatsuba_sqr(mp_int * a,mp_int * b)2979 static int mp_karatsuba_sqr (mp_int * a, mp_int * b)
2980 {
2981   mp_int  x0, x1, t1, t2, x0x0, x1x1;
2982   int     B, err;
2983 
2984   err = MP_MEM;
2985 
2986   /* min # of digits */
2987   B = a->used;
2988 
2989   /* now divide in two */
2990   B = (unsigned)B >> 1;
2991 
2992   /* init copy all the temps */
2993   if (mp_init_size (&x0, B) != MP_OKAY)
2994     goto ERR;
2995   if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2996     goto X0;
2997 
2998   /* init temps */
2999   if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
3000     goto X1;
3001   if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
3002     goto T1;
3003   if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
3004     goto T2;
3005   if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
3006     goto X0X0;
3007 
3008   {
3009     int x;
3010     mp_digit *dst, *src;
3011 
3012     src = a->dp;
3013 
3014     /* now shift the digits */
3015     dst = x0.dp;
3016     for (x = 0; x < B; x++) {
3017       *dst++ = *src++;
3018     }
3019 
3020     dst = x1.dp;
3021     for (x = B; x < a->used; x++) {
3022       *dst++ = *src++;
3023     }
3024   }
3025 
3026   x0.used = B;
3027   x1.used = a->used - B;
3028 
3029   mp_clamp (&x0);
3030 
3031   /* now calc the products x0*x0 and x1*x1 */
3032   if (mp_sqr (&x0, &x0x0) != MP_OKAY)
3033     goto X1X1;           /* x0x0 = x0*x0 */
3034   if (mp_sqr (&x1, &x1x1) != MP_OKAY)
3035     goto X1X1;           /* x1x1 = x1*x1 */
3036 
3037   /* now calc (x1+x0)**2 */
3038   if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
3039     goto X1X1;           /* t1 = x1 - x0 */
3040   if (mp_sqr (&t1, &t1) != MP_OKAY)
3041     goto X1X1;           /* t1 = (x1 - x0) * (x1 - x0) */
3042 
3043   /* add x0y0 */
3044   if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
3045     goto X1X1;           /* t2 = x0x0 + x1x1 */
3046   if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY)
3047     goto X1X1;           /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */
3048 
3049   /* shift by B */
3050   if (mp_lshd (&t1, B) != MP_OKAY)
3051     goto X1X1;           /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
3052   if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
3053     goto X1X1;           /* x1x1 = x1x1 << 2*B */
3054 
3055   if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
3056     goto X1X1;           /* t1 = x0x0 + t1 */
3057   if (mp_add (&t1, &x1x1, b) != MP_OKAY)
3058     goto X1X1;           /* t1 = x0x0 + t1 + x1x1 */
3059 
3060   err = MP_OKAY;
3061 
3062 X1X1:mp_clear (&x1x1);
3063 X0X0:mp_clear (&x0x0);
3064 T2:mp_clear (&t2);
3065 T1:mp_clear (&t1);
3066 X1:mp_clear (&x1);
3067 X0:mp_clear (&x0);
3068 ERR:
3069   return err;
3070 }
3071 
3072 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */
3073 /* Revision: 1.2 $ */
3074 /* Date: 2011/03/12 23:43:54 $ */
3075 
3076 /* the jist of squaring...
3077  * you do like mult except the offset of the tmpx [one that
3078  * starts closer to zero] can't equal the offset of tmpy.
3079  * So basically you set up iy like before then you min it with
3080  * (ty-tx) so that it never happens.  You double all those
3081  * you add in the inner loop
3082 
3083 After that loop you do the squares and add them in.
3084 */
3085 
fast_s_mp_sqr(mp_int * a,mp_int * b)3086 static int fast_s_mp_sqr (mp_int * a, mp_int * b)
3087 {
3088   int       olduse, res, pa, ix, iz;
3089   mp_digit   W[MP_WARRAY], *tmpx;
3090   mp_word   W1;
3091 
3092   /* grow the destination as required */
3093   pa = a->used + a->used;
3094   if (b->alloc < pa) {
3095     if ((res = mp_grow (b, pa)) != MP_OKAY) {
3096       return res;
3097     }
3098   }
3099 
3100   /* number of output digits to produce */
3101   W1 = 0;
3102   for (ix = 0; ix < pa; ix++) {
3103       int      tx, ty, iy;
3104       mp_word  _W;
3105       mp_digit *tmpy;
3106 
3107       /* clear counter */
3108       _W = 0;
3109 
3110       /* get offsets into the two bignums */
3111       ty = MIN(a->used-1, ix);
3112       tx = ix - ty;
3113 
3114       /* setup temp aliases */
3115       tmpx = a->dp + tx;
3116       tmpy = a->dp + ty;
3117 
3118       /* this is the number of times the loop will iterrate, essentially
3119          while (tx++ < a->used && ty-- >= 0) { ... }
3120        */
3121       iy = MIN(a->used-tx, ty+1);
3122 
3123       /* now for squaring tx can never equal ty
3124        * we halve the distance since they approach at a rate of 2x
3125        * and we have to round because odd cases need to be executed
3126        */
3127       iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1));
3128 
3129       /* execute loop */
3130       for (iz = 0; iz < iy; iz++) {
3131          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3132       }
3133 
3134       /* double the inner product and add carry */
3135       _W = _W + _W + W1;
3136 
3137       /* even columns have the square term in them */
3138       if ((ix&1) == 0) {
3139          _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]);
3140       }
3141 
3142       /* store it */
3143       W[ix] = (mp_digit)(_W & MP_MASK);
3144 
3145       /* make next carry */
3146       W1 = _W >> ((mp_word)DIGIT_BIT);
3147   }
3148 
3149   /* setup dest */
3150   olduse  = b->used;
3151   b->used = a->used+a->used;
3152 
3153   {
3154     mp_digit *tmpb;
3155     tmpb = b->dp;
3156     for (ix = 0; ix < pa; ix++) {
3157       *tmpb++ = W[ix] & MP_MASK;
3158     }
3159 
3160     /* clear unused digits [that existed in the old copy of c] */
3161     for (; ix < olduse; ix++) {
3162       *tmpb++ = 0;
3163     }
3164   }
3165   mp_clamp (b);
3166   return MP_OKAY;
3167 }
3168 
3169 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */
3170 /* Revision: 1.3 $ */
3171 /* Date: 2011/03/18 16:43:04 $ */
3172 
3173 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
3174 static int
s_mp_sqr(mp_int * a,mp_int * b)3175 s_mp_sqr (mp_int * a, mp_int * b)
3176 {
3177   mp_int  t;
3178   int     res, ix, iy, pa;
3179   mp_word r;
3180   mp_digit u, tmpx, *tmpt;
3181 
3182   pa = a->used;
3183   if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
3184     return res;
3185   }
3186 
3187   /* default used is maximum possible size */
3188   t.used = 2*pa + 1;
3189 
3190   for (ix = 0; ix < pa; ix++) {
3191     /* first calculate the digit at 2*ix */
3192     /* calculate double precision result */
3193     r = ((mp_word) t.dp[2*ix]) +
3194         ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3195 
3196     /* store lower part in result */
3197     t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3198 
3199     /* get the carry */
3200     u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3201 
3202     /* left hand side of A[ix] * A[iy] */
3203     tmpx        = a->dp[ix];
3204 
3205     /* alias for where to store the results */
3206     tmpt        = t.dp + (2*ix + 1);
3207 
3208     for (iy = ix + 1; iy < pa; iy++) {
3209       /* first calculate the product */
3210       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3211 
3212       /* now calculate the double precision result, note we use
3213        * addition instead of *2 since it's easier to optimize
3214        */
3215       r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
3216 
3217       /* store lower part */
3218       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3219 
3220       /* get carry */
3221       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3222     }
3223     /* propagate upwards */
3224     while (u != ((mp_digit) 0)) {
3225       r       = ((mp_word) *tmpt) + ((mp_word) u);
3226       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3227       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3228     }
3229   }
3230 
3231   mp_clamp (&t);
3232   mp_exch (&t, b);
3233   mp_clear (&t);
3234   return MP_OKAY;
3235 }
3236 
3237 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */
3238 /* Revision: 1.1.1.1 $ */
3239 /* Date: 2011/03/12 22:58:18 $ */
3240 
3241 #define TOOM_SQR_CUTOFF      400
3242 #define KARATSUBA_SQR_CUTOFF 120
3243 
3244 /* computes b = a*a */
3245 static int
mp_sqr(mp_int * a,mp_int * b)3246 mp_sqr (mp_int * a, mp_int * b)
3247 {
3248   int     res;
3249 
3250   /* use Toom-Cook? */
3251   if (a->used >= TOOM_SQR_CUTOFF) {
3252     res = mp_toom_sqr(a, b);
3253   /* Karatsuba? */
3254   } else
3255 if (a->used >= KARATSUBA_SQR_CUTOFF) {
3256     res = mp_karatsuba_sqr (a, b);
3257   } else
3258   {
3259     /* can we use the fast comba multiplier? */
3260     if (((unsigned)a->used * 2 + 1) < MP_WARRAY &&
3261          a->used <
3262          (1 << (unsigned)(sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
3263       res = fast_s_mp_sqr (a, b);
3264     } else
3265       res = s_mp_sqr (a, b);
3266   }
3267   b->sign = MP_ZPOS;
3268   return res;
3269 }
3270 
3271 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */
3272 /* Revision: 1.3 $ */
3273 /* Date: 2011/03/18 16:43:04 $ */
3274 
3275 #define TAB_SIZE 256
3276 
s_mp_exptmod(mp_int * G,mp_int * X,mp_int * P,mp_int * Y,int redmode)3277 static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3278 {
3279   mp_int  M[TAB_SIZE], res, mu;
3280   mp_digit buf;
3281   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3282   int (*redux)(mp_int*,mp_int*,mp_int*);
3283 
3284   /* find window size */
3285   x = mp_count_bits (X);
3286   if (x <= 7) {
3287     winsize = 2;
3288   } else if (x <= 36) {
3289     winsize = 3;
3290   } else if (x <= 140) {
3291     winsize = 4;
3292   } else if (x <= 450) {
3293     winsize = 5;
3294   } else if (x <= 1303) {
3295     winsize = 6;
3296   } else if (x <= 3529) {
3297     winsize = 7;
3298   } else {
3299     winsize = 8;
3300   }
3301 
3302   /* init M array */
3303   /* init first cell */
3304   if ((err = mp_init(&M[1])) != MP_OKAY) {
3305      return err;
3306   }
3307 
3308   /* now init the second half of the array */
3309   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3310     if ((err = mp_init(&M[x])) != MP_OKAY) {
3311       for (y = 1<<(winsize-1); y < x; y++) {
3312         mp_clear (&M[y]);
3313       }
3314       mp_clear(&M[1]);
3315       return err;
3316     }
3317   }
3318 
3319   /* create mu, used for Barrett reduction */
3320   if ((err = mp_init (&mu)) != MP_OKAY) {
3321     goto LBL_M;
3322   }
3323 
3324   if (redmode == 0) {
3325      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
3326         goto LBL_MU;
3327      }
3328      redux = mp_reduce;
3329   } else {
3330      if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
3331         goto LBL_MU;
3332      }
3333      redux = mp_reduce_2k_l;
3334   }
3335 
3336   /* create M table
3337    *
3338    * The M table contains powers of the base,
3339    * e.g. M[x] = G**x mod P
3340    *
3341    * The first half of the table is not
3342    * computed though accept for M[0] and M[1]
3343    */
3344   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
3345     goto LBL_MU;
3346   }
3347 
3348   /* compute the value at M[1<<(winsize-1)] by squaring
3349    * M[1] (winsize-1) times
3350    */
3351   if ((err = mp_copy ( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
3352     goto LBL_MU;
3353   }
3354 
3355   for (x = 0; x < (winsize - 1); x++) {
3356     /* square it */
3357     if ((err = mp_sqr (&M[1 << (winsize - 1)],
3358                        &M[1 << (winsize - 1)])) != MP_OKAY) {
3359       goto LBL_MU;
3360     }
3361 
3362     /* reduce modulo P */
3363     if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
3364       goto LBL_MU;
3365     }
3366   }
3367 
3368   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3369    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
3370    */
3371   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3372     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3373       goto LBL_MU;
3374     }
3375     if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
3376       goto LBL_MU;
3377     }
3378   }
3379 
3380   /* setup result */
3381   if ((err = mp_init (&res)) != MP_OKAY) {
3382     goto LBL_MU;
3383   }
3384   mp_set (&res, 1);
3385 
3386   /* set initial mode and bit cnt */
3387   mode   = 0;
3388   bitcnt = 1;
3389   buf    = 0;
3390   digidx = X->used - 1;
3391   bitcpy = 0;
3392   bitbuf = 0;
3393 
3394   for (;;) {
3395     /* grab next digit as required */
3396     if (--bitcnt == 0) {
3397       /* if digidx == -1 we are out of digits */
3398       if (digidx == -1) {
3399         break;
3400       }
3401       /* read next digit and reset the bitcnt */
3402       buf    = X->dp[digidx--];
3403       bitcnt = (int) DIGIT_BIT;
3404     }
3405 
3406     /* grab the next msb from the exponent */
3407     y     = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3408     buf <<= (mp_digit)1;
3409 
3410     /* if the bit is zero and mode == 0 then we ignore it
3411      * These represent the leading zero bits before the first 1 bit
3412      * in the exponent.  Technically this opt is not required but it
3413      * does lower the # of trivial squaring/reductions used
3414      */
3415     if (mode == 0 && y == 0) {
3416       continue;
3417     }
3418 
3419     /* if the bit is zero and mode == 1 then we square */
3420     if (mode == 1 && y == 0) {
3421       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3422         goto LBL_RES;
3423       }
3424       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3425         goto LBL_RES;
3426       }
3427       continue;
3428     }
3429 
3430     /* else we add it to the window */
3431     bitbuf |= (y << (winsize - ++bitcpy));
3432     mode    = 2;
3433 
3434     if (bitcpy == winsize) {
3435       /* ok window is filled so square as required and multiply  */
3436       /* square first */
3437       for (x = 0; x < winsize; x++) {
3438         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3439           goto LBL_RES;
3440         }
3441         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3442           goto LBL_RES;
3443         }
3444       }
3445 
3446       /* then multiply */
3447       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3448         goto LBL_RES;
3449       }
3450       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3451         goto LBL_RES;
3452       }
3453 
3454       /* empty window and reset */
3455       bitcpy = 0;
3456       bitbuf = 0;
3457       mode   = 1;
3458     }
3459   }
3460 
3461   /* if bits remain then square/multiply */
3462   if (mode == 2 && bitcpy > 0) {
3463     /* square then multiply if the bit is set */
3464     for (x = 0; x < bitcpy; x++) {
3465       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3466         goto LBL_RES;
3467       }
3468       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3469         goto LBL_RES;
3470       }
3471 
3472       bitbuf <<= 1;
3473       if ((bitbuf & (1 << winsize)) != 0) {
3474         /* then multiply */
3475         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3476           goto LBL_RES;
3477         }
3478         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3479           goto LBL_RES;
3480         }
3481       }
3482     }
3483   }
3484 
3485   mp_exch (&res, Y);
3486   err = MP_OKAY;
3487 LBL_RES:mp_clear (&res);
3488 LBL_MU:mp_clear (&mu);
3489 LBL_M:
3490   mp_clear(&M[1]);
3491   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3492     mp_clear (&M[x]);
3493   }
3494   return err;
3495 }
3496 
3497 /* determines if a number is a valid DR modulus */
3498 static int
mp_dr_is_modulus(mp_int * a)3499 mp_dr_is_modulus(mp_int *a)
3500 {
3501    int ix;
3502 
3503    /* must be at least two digits */
3504    if (a->used < 2) {
3505       return 0;
3506    }
3507 
3508    /* must be of the form b**k - a [a <= b] so all
3509     * but the first digit must be equal to -1 (mod b).
3510     */
3511    for (ix = 1; ix < a->used; ix++) {
3512        if (a->dp[ix] != MP_MASK) {
3513           return 0;
3514        }
3515    }
3516    return 1;
3517 }
3518 
3519 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */
3520 /* Revision: 1.1.1.1 $ */
3521 /* Date: 2011/03/12 22:58:18 $ */
3522 
3523 /* determines if mp_reduce_2k can be used */
mp_reduce_is_2k(mp_int * a)3524 static int mp_reduce_is_2k(mp_int *a)
3525 {
3526    int ix, iy, iw;
3527    mp_digit iz;
3528 
3529    if (a->used == 0) {
3530       return MP_NO;
3531    } else if (a->used == 1) {
3532       return MP_YES;
3533    } else if (a->used > 1) {
3534       iy = mp_count_bits(a);
3535       iz = 1;
3536       iw = 1;
3537 
3538       /* Test every bit from the second digit up, must be 1 */
3539       for (ix = DIGIT_BIT; ix < iy; ix++) {
3540           if ((a->dp[iw] & iz) == 0) {
3541              return MP_NO;
3542           }
3543           iz <<= 1;
3544           if (iz > (mp_digit)MP_MASK) {
3545              ++iw;
3546              iz = 1;
3547           }
3548       }
3549    }
3550    return MP_YES;
3551 }
3552 
3553 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */
3554 /* Revision: 1.1.1.1 $ */
3555 /* Date: 2011/03/12 22:58:18 $ */
3556 
3557 
3558 /* d = a * b (mod c) */
3559 static int
mp_mulmod(mp_int * d,mp_int * a,mp_int * b,mp_int * c)3560 mp_mulmod (mp_int *d, mp_int * a, mp_int * b, mp_int * c)
3561 {
3562   int     res;
3563   mp_int  t;
3564 
3565   if ((res = mp_init (&t)) != MP_OKAY) {
3566     return res;
3567   }
3568 
3569   if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
3570     mp_clear (&t);
3571     return res;
3572   }
3573   res = mp_mod (&t, c, d);
3574   mp_clear (&t);
3575   return res;
3576 }
3577 
3578 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */
3579 /* Revision: 1.1.1.1 $ */
3580 /* Date: 2011/03/12 22:58:18 $ */
3581 
3582 /* setups the montgomery reduction stuff */
3583 static int
mp_montgomery_setup(mp_int * n,mp_digit * rho)3584 mp_montgomery_setup (mp_int * n, mp_digit * rho)
3585 {
3586   mp_digit x, b;
3587 
3588 /* fast inversion mod 2**k
3589  *
3590  * Based on the fact that
3591  *
3592  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
3593  *                    =>  2*X*A - X*X*A*A = 1
3594  *                    =>  2*(1) - (1)     = 1
3595  */
3596   b = n->dp[0];
3597 
3598   if ((b & 1) == 0) {
3599     return MP_VAL;
3600   }
3601 
3602   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
3603   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
3604   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
3605   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
3606   if (/*CONSTCOND*/sizeof(mp_digit) == 8) {
3607 	  x *= 2 - b * x;	/* here x*a==1 mod 2**64 */
3608   }
3609 
3610   /* rho = -1/m mod b */
3611   *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
3612 
3613   return MP_OKAY;
3614 }
3615 
3616 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */
3617 /* Revision: 1.1.1.1 $ */
3618 /* Date: 2011/03/12 22:58:18 $ */
3619 
3620 /* computes xR**-1 == x (mod N) via Montgomery Reduction
3621  *
3622  * This is an optimized implementation of montgomery_reduce
3623  * which uses the comba method to quickly calculate the columns of the
3624  * reduction.
3625  *
3626  * Based on Algorithm 14.32 on pp.601 of HAC.
3627 */
3628 static int
fast_mp_montgomery_reduce(mp_int * x,mp_int * n,mp_digit rho)3629 fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
3630 {
3631   int     ix, res, olduse;
3632   /*LINTED*/
3633   mp_word W[MP_WARRAY];
3634 
3635   /* get old used count */
3636   olduse = x->used;
3637 
3638   /* grow a as required */
3639   if (x->alloc < n->used + 1) {
3640     if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
3641       return res;
3642     }
3643   }
3644 
3645   /* first we have to get the digits of the input into
3646    * an array of double precision words W[...]
3647    */
3648   {
3649     mp_word *_W;
3650     mp_digit *tmpx;
3651 
3652     /* alias for the W[] array */
3653     _W   = W;
3654 
3655     /* alias for the digits of  x*/
3656     tmpx = x->dp;
3657 
3658     /* copy the digits of a into W[0..a->used-1] */
3659     for (ix = 0; ix < x->used; ix++) {
3660       *_W++ = *tmpx++;
3661     }
3662 
3663     /* zero the high words of W[a->used..m->used*2] */
3664     for (; ix < n->used * 2 + 1; ix++) {
3665       *_W++ = 0;
3666     }
3667   }
3668 
3669   /* now we proceed to zero successive digits
3670    * from the least significant upwards
3671    */
3672   for (ix = 0; ix < n->used; ix++) {
3673     /* mu = ai * m' mod b
3674      *
3675      * We avoid a double precision multiplication (which isn't required)
3676      * by casting the value down to a mp_digit.  Note this requires
3677      * that W[ix-1] have  the carry cleared (see after the inner loop)
3678      */
3679     mp_digit mu;
3680     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
3681 
3682     /* a = a + mu * m * b**i
3683      *
3684      * This is computed in place and on the fly.  The multiplication
3685      * by b**i is handled by offseting which columns the results
3686      * are added to.
3687      *
3688      * Note the comba method normally doesn't handle carries in the
3689      * inner loop In this case we fix the carry from the previous
3690      * column since the Montgomery reduction requires digits of the
3691      * result (so far) [see above] to work.  This is
3692      * handled by fixing up one carry after the inner loop.  The
3693      * carry fixups are done in order so after these loops the
3694      * first m->used words of W[] have the carries fixed
3695      */
3696     {
3697       int iy;
3698       mp_digit *tmpn;
3699       mp_word *_W;
3700 
3701       /* alias for the digits of the modulus */
3702       tmpn = n->dp;
3703 
3704       /* Alias for the columns set by an offset of ix */
3705       _W = W + ix;
3706 
3707       /* inner loop */
3708       for (iy = 0; iy < n->used; iy++) {
3709           *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
3710       }
3711     }
3712 
3713     /* now fix carry for next digit, W[ix+1] */
3714     W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
3715   }
3716 
3717   /* now we have to propagate the carries and
3718    * shift the words downward [all those least
3719    * significant digits we zeroed].
3720    */
3721   {
3722     mp_digit *tmpx;
3723     mp_word *_W, *_W1;
3724 
3725     /* nox fix rest of carries */
3726 
3727     /* alias for current word */
3728     _W1 = W + ix;
3729 
3730     /* alias for next word, where the carry goes */
3731     _W = W + ++ix;
3732 
3733     for (; ix <= n->used * 2 + 1; ix++) {
3734       *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
3735     }
3736 
3737     /* copy out, A = A/b**n
3738      *
3739      * The result is A/b**n but instead of converting from an
3740      * array of mp_word to mp_digit than calling mp_rshd
3741      * we just copy them in the right order
3742      */
3743 
3744     /* alias for destination word */
3745     tmpx = x->dp;
3746 
3747     /* alias for shifted double precision result */
3748     _W = W + n->used;
3749 
3750     for (ix = 0; ix < n->used + 1; ix++) {
3751       *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
3752     }
3753 
3754     /* zero oldused digits, if the input a was larger than
3755      * m->used+1 we'll have to clear the digits
3756      */
3757     for (; ix < olduse; ix++) {
3758       *tmpx++ = 0;
3759     }
3760   }
3761 
3762   /* set the max used and clamp */
3763   x->used = n->used + 1;
3764   mp_clamp (x);
3765 
3766   /* if A >= m then A = A - m */
3767   if (mp_cmp_mag (x, n) != MP_LT) {
3768     return s_mp_sub (x, n, x);
3769   }
3770   return MP_OKAY;
3771 }
3772 
3773 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */
3774 /* Revision: 1.2 $ */
3775 /* Date: 2011/03/18 16:22:09 $ */
3776 
3777 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
3778 static int
mp_montgomery_reduce(mp_int * x,mp_int * n,mp_digit rho)3779 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
3780 {
3781   int     ix, res, digs;
3782   mp_digit mu;
3783 
3784   /* can the fast reduction [comba] method be used?
3785    *
3786    * Note that unlike in mul you're safely allowed *less*
3787    * than the available columns [255 per default] since carries
3788    * are fixed up in the inner loop.
3789    */
3790   digs = n->used * 2 + 1;
3791   if (((unsigned)digs < MP_WARRAY) &&
3792       n->used <
3793       (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3794     return fast_mp_montgomery_reduce (x, n, rho);
3795   }
3796 
3797   /* grow the input as required */
3798   if (x->alloc < digs) {
3799     if ((res = mp_grow (x, digs)) != MP_OKAY) {
3800       return res;
3801     }
3802   }
3803   x->used = digs;
3804 
3805   for (ix = 0; ix < n->used; ix++) {
3806     /* mu = ai * rho mod b
3807      *
3808      * The value of rho must be precalculated via
3809      * montgomery_setup() such that
3810      * it equals -1/n0 mod b this allows the
3811      * following inner loop to reduce the
3812      * input one digit at a time
3813      */
3814     mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
3815 
3816     /* a = a + mu * m * b**i */
3817     {
3818       int iy;
3819       mp_digit *tmpn, *tmpx, u;
3820       mp_word r;
3821 
3822       /* alias for digits of the modulus */
3823       tmpn = n->dp;
3824 
3825       /* alias for the digits of x [the input] */
3826       tmpx = x->dp + ix;
3827 
3828       /* set the carry to zero */
3829       u = 0;
3830 
3831       /* Multiply and add in place */
3832       for (iy = 0; iy < n->used; iy++) {
3833         /* compute product and sum */
3834         r       = ((mp_word)mu) * ((mp_word)*tmpn++) +
3835                   ((mp_word) u) + ((mp_word) * tmpx);
3836 
3837         /* get carry */
3838         u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3839 
3840         /* fix digit */
3841         *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
3842       }
3843       /* At this point the ix'th digit of x should be zero */
3844 
3845 
3846       /* propagate carries upwards as required*/
3847       while (u) {
3848         *tmpx   += u;
3849         u        = *tmpx >> DIGIT_BIT;
3850         *tmpx++ &= MP_MASK;
3851       }
3852     }
3853   }
3854 
3855   /* at this point the n.used'th least
3856    * significant digits of x are all zero
3857    * which means we can shift x to the
3858    * right by n.used digits and the
3859    * residue is unchanged.
3860    */
3861 
3862   /* x = x/b**n.used */
3863   mp_clamp(x);
3864   mp_rshd (x, n->used);
3865 
3866   /* if x >= n then x = x - n */
3867   if (mp_cmp_mag (x, n) != MP_LT) {
3868     return s_mp_sub (x, n, x);
3869   }
3870 
3871   return MP_OKAY;
3872 }
3873 
3874 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */
3875 /* Revision: 1.3 $ */
3876 /* Date: 2011/03/18 16:43:04 $ */
3877 
3878 /* determines the setup value */
3879 static void
mp_dr_setup(mp_int * a,mp_digit * d)3880 mp_dr_setup(mp_int *a, mp_digit *d)
3881 {
3882    /* the casts are required if DIGIT_BIT is one less than
3883     * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
3884     */
3885    *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
3886         ((mp_word)a->dp[0]));
3887 }
3888 
3889 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */
3890 /* Revision: 1.1.1.1 $ */
3891 /* Date: 2011/03/12 22:58:18 $ */
3892 
3893 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
3894  *
3895  * Based on algorithm from the paper
3896  *
3897  * "Generating Efficient Primes for Discrete Log Cryptosystems"
3898  *                 Chae Hoon Lim, Pil Joong Lee,
3899  *          POSTECH Information Research Laboratories
3900  *
3901  * The modulus must be of a special format [see manual]
3902  *
3903  * Has been modified to use algorithm 7.10 from the LTM book instead
3904  *
3905  * Input x must be in the range 0 <= x <= (n-1)**2
3906  */
3907 static int
mp_dr_reduce(mp_int * x,mp_int * n,mp_digit k)3908 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
3909 {
3910   int      err, i, m;
3911   mp_word  r;
3912   mp_digit mu, *tmpx1, *tmpx2;
3913 
3914   /* m = digits in modulus */
3915   m = n->used;
3916 
3917   /* ensure that "x" has at least 2m digits */
3918   if (x->alloc < m + m) {
3919     if ((err = mp_grow (x, m + m)) != MP_OKAY) {
3920       return err;
3921     }
3922   }
3923 
3924 /* top of loop, this is where the code resumes if
3925  * another reduction pass is required.
3926  */
3927 top:
3928   /* aliases for digits */
3929   /* alias for lower half of x */
3930   tmpx1 = x->dp;
3931 
3932   /* alias for upper half of x, or x/B**m */
3933   tmpx2 = x->dp + m;
3934 
3935   /* set carry to zero */
3936   mu = 0;
3937 
3938   /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
3939   for (i = 0; i < m; i++) {
3940       r         = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
3941       *tmpx1++  = (mp_digit)(r & MP_MASK);
3942       mu        = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
3943   }
3944 
3945   /* set final carry */
3946   *tmpx1++ = mu;
3947 
3948   /* zero words above m */
3949   for (i = m + 1; i < x->used; i++) {
3950       *tmpx1++ = 0;
3951   }
3952 
3953   /* clamp, sub and return */
3954   mp_clamp (x);
3955 
3956   /* if x >= n then subtract and reduce again
3957    * Each successive "recursion" makes the input smaller and smaller.
3958    */
3959   if (mp_cmp_mag (x, n) != MP_LT) {
3960     s_mp_sub(x, n, x);
3961     goto top;
3962   }
3963   return MP_OKAY;
3964 }
3965 
3966 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */
3967 /* Revision: 1.1.1.1 $ */
3968 /* Date: 2011/03/12 22:58:18 $ */
3969 
3970 /* determines the setup value */
3971 static int
mp_reduce_2k_setup(mp_int * a,mp_digit * d)3972 mp_reduce_2k_setup(mp_int *a, mp_digit *d)
3973 {
3974    int res, p;
3975    mp_int tmp;
3976 
3977    if ((res = mp_init(&tmp)) != MP_OKAY) {
3978       return res;
3979    }
3980 
3981    p = mp_count_bits(a);
3982    if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3983       mp_clear(&tmp);
3984       return res;
3985    }
3986 
3987    if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
3988       mp_clear(&tmp);
3989       return res;
3990    }
3991 
3992    *d = tmp.dp[0];
3993    mp_clear(&tmp);
3994    return MP_OKAY;
3995 }
3996 
3997 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */
3998 /* Revision: 1.1.1.1 $ */
3999 /* Date: 2011/03/12 22:58:18 $ */
4000 
4001 /* reduces a modulo n where n is of the form 2**p - d */
4002 static int
mp_reduce_2k(mp_int * a,mp_int * n,mp_digit d)4003 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
4004 {
4005    mp_int q;
4006    int    p, res;
4007 
4008    if ((res = mp_init(&q)) != MP_OKAY) {
4009       return res;
4010    }
4011 
4012    p = mp_count_bits(n);
4013 top:
4014    /* q = a/2**p, a = a mod 2**p */
4015    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
4016       goto ERR;
4017    }
4018 
4019    if (d != 1) {
4020       /* q = q * d */
4021       if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
4022          goto ERR;
4023       }
4024    }
4025 
4026    /* a = a + q */
4027    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
4028       goto ERR;
4029    }
4030 
4031    if (mp_cmp_mag(a, n) != MP_LT) {
4032       s_mp_sub(a, n, a);
4033       goto top;
4034    }
4035 
4036 ERR:
4037    mp_clear(&q);
4038    return res;
4039 }
4040 
4041 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */
4042 /* Revision: 1.1.1.1 $ */
4043 /* Date: 2011/03/12 22:58:18 $ */
4044 
4045 /*
4046  * shifts with subtractions when the result is greater than b.
4047  *
4048  * The method is slightly modified to shift B unconditionally upto just under
4049  * the leading bit of b.  This saves alot of multiple precision shifting.
4050  */
4051 static int
mp_montgomery_calc_normalization(mp_int * a,mp_int * b)4052 mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
4053 {
4054   int     x, bits, res;
4055 
4056   /* how many bits of last digit does b use */
4057   bits = mp_count_bits (b) % DIGIT_BIT;
4058 
4059   if (b->used > 1) {
4060      if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
4061         return res;
4062      }
4063   } else {
4064      mp_set(a, 1);
4065      bits = 1;
4066   }
4067 
4068 
4069   /* now compute C = A * B mod b */
4070   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
4071     if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
4072       return res;
4073     }
4074     if (mp_cmp_mag (a, b) != MP_LT) {
4075       if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
4076         return res;
4077       }
4078     }
4079   }
4080 
4081   return MP_OKAY;
4082 }
4083 
4084 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */
4085 /* Revision: 1.1.1.1 $ */
4086 /* Date: 2011/03/12 22:58:18 $ */
4087 
4088 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
4089  *
4090  * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
4091  * The value of k changes based on the size of the exponent.
4092  *
4093  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
4094  */
4095 
4096 #define TAB_SIZE 256
4097 
4098 static int
mp_exptmod_fast(mp_int * G,mp_int * X,mp_int * P,mp_int * Y,int redmode)4099 mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
4100 {
4101   mp_int  M[TAB_SIZE], res;
4102   mp_digit buf, mp;
4103   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
4104 
4105   /* use a pointer to the reduction algorithm.  This allows us to use
4106    * one of many reduction algorithms without modding the guts of
4107    * the code with if statements everywhere.
4108    */
4109   int     (*redux)(mp_int*,mp_int*,mp_digit);
4110 
4111   /* find window size */
4112   x = mp_count_bits (X);
4113   if (x <= 7) {
4114     winsize = 2;
4115   } else if (x <= 36) {
4116     winsize = 3;
4117   } else if (x <= 140) {
4118     winsize = 4;
4119   } else if (x <= 450) {
4120     winsize = 5;
4121   } else if (x <= 1303) {
4122     winsize = 6;
4123   } else if (x <= 3529) {
4124     winsize = 7;
4125   } else {
4126     winsize = 8;
4127   }
4128 
4129   /* init M array */
4130   /* init first cell */
4131   if ((err = mp_init(&M[1])) != MP_OKAY) {
4132      return err;
4133   }
4134 
4135   /* now init the second half of the array */
4136   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4137     if ((err = mp_init(&M[x])) != MP_OKAY) {
4138       for (y = 1<<(winsize-1); y < x; y++) {
4139         mp_clear (&M[y]);
4140       }
4141       mp_clear(&M[1]);
4142       return err;
4143     }
4144   }
4145 
4146   /* determine and setup reduction code */
4147   if (redmode == 0) {
4148      /* now setup montgomery  */
4149      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
4150         goto LBL_M;
4151      }
4152 
4153      /* automatically pick the comba one if available (saves quite a few calls/ifs) */
4154      if (((unsigned)(P->used * 2 + 1) < MP_WARRAY) &&
4155           P->used < (1 << (unsigned)((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4156         redux = fast_mp_montgomery_reduce;
4157      } else
4158      {
4159         /* use slower baseline Montgomery method */
4160         redux = mp_montgomery_reduce;
4161      }
4162   } else if (redmode == 1) {
4163      /* setup DR reduction for moduli of the form B**k - b */
4164      mp_dr_setup(P, &mp);
4165      redux = mp_dr_reduce;
4166   } else {
4167      /* setup DR reduction for moduli of the form 2**k - b */
4168      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
4169         goto LBL_M;
4170      }
4171      redux = mp_reduce_2k;
4172   }
4173 
4174   /* setup result */
4175   if ((err = mp_init (&res)) != MP_OKAY) {
4176     goto LBL_M;
4177   }
4178 
4179   /* create M table
4180    *
4181 
4182    *
4183    * The first half of the table is not computed though accept for M[0] and M[1]
4184    */
4185 
4186   if (redmode == 0) {
4187      /* now we need R mod m */
4188      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
4189        goto LBL_RES;
4190      }
4191 
4192      /* now set M[1] to G * R mod m */
4193      if ((err = mp_mulmod (&M[1], G, &res, P)) != MP_OKAY) {
4194        goto LBL_RES;
4195      }
4196   } else {
4197      mp_set(&res, 1);
4198      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
4199         goto LBL_RES;
4200      }
4201   }
4202 
4203   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
4204   if ((err = mp_copy ( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4205     goto LBL_RES;
4206   }
4207 
4208   for (x = 0; x < (winsize - 1); x++) {
4209     if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
4210       goto LBL_RES;
4211     }
4212     if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
4213       goto LBL_RES;
4214     }
4215   }
4216 
4217   /* create upper table */
4218   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4219     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4220       goto LBL_RES;
4221     }
4222     if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
4223       goto LBL_RES;
4224     }
4225   }
4226 
4227   /* set initial mode and bit cnt */
4228   mode   = 0;
4229   bitcnt = 1;
4230   buf    = 0;
4231   digidx = X->used - 1;
4232   bitcpy = 0;
4233   bitbuf = 0;
4234 
4235   for (;;) {
4236     /* grab next digit as required */
4237     if (--bitcnt == 0) {
4238       /* if digidx == -1 we are out of digits so break */
4239       if (digidx == -1) {
4240         break;
4241       }
4242       /* read next digit and reset bitcnt */
4243       buf    = X->dp[digidx--];
4244       bitcnt = (int)DIGIT_BIT;
4245     }
4246 
4247     /* grab the next msb from the exponent */
4248     y     = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1;
4249     buf <<= (mp_digit)1;
4250 
4251     /* if the bit is zero and mode == 0 then we ignore it
4252      * These represent the leading zero bits before the first 1 bit
4253      * in the exponent.  Technically this opt is not required but it
4254      * does lower the # of trivial squaring/reductions used
4255      */
4256     if (mode == 0 && y == 0) {
4257       continue;
4258     }
4259 
4260     /* if the bit is zero and mode == 1 then we square */
4261     if (mode == 1 && y == 0) {
4262       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4263         goto LBL_RES;
4264       }
4265       if ((err = redux (&res, P, mp)) != MP_OKAY) {
4266         goto LBL_RES;
4267       }
4268       continue;
4269     }
4270 
4271     /* else we add it to the window */
4272     bitbuf |= (y << (winsize - ++bitcpy));
4273     mode    = 2;
4274 
4275     if (bitcpy == winsize) {
4276       /* ok window is filled so square as required and multiply  */
4277       /* square first */
4278       for (x = 0; x < winsize; x++) {
4279         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4280           goto LBL_RES;
4281         }
4282         if ((err = redux (&res, P, mp)) != MP_OKAY) {
4283           goto LBL_RES;
4284         }
4285       }
4286 
4287       /* then multiply */
4288       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
4289         goto LBL_RES;
4290       }
4291       if ((err = redux (&res, P, mp)) != MP_OKAY) {
4292         goto LBL_RES;
4293       }
4294 
4295       /* empty window and reset */
4296       bitcpy = 0;
4297       bitbuf = 0;
4298       mode   = 1;
4299     }
4300   }
4301 
4302   /* if bits remain then square/multiply */
4303   if (mode == 2 && bitcpy > 0) {
4304     /* square then multiply if the bit is set */
4305     for (x = 0; x < bitcpy; x++) {
4306       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4307         goto LBL_RES;
4308       }
4309       if ((err = redux (&res, P, mp)) != MP_OKAY) {
4310         goto LBL_RES;
4311       }
4312 
4313       /* get next bit of the window */
4314       bitbuf <<= 1;
4315       if ((bitbuf & (1 << winsize)) != 0) {
4316         /* then multiply */
4317         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
4318           goto LBL_RES;
4319         }
4320         if ((err = redux (&res, P, mp)) != MP_OKAY) {
4321           goto LBL_RES;
4322         }
4323       }
4324     }
4325   }
4326 
4327   if (redmode == 0) {
4328      /* fixup result if Montgomery reduction is used
4329       * recall that any value in a Montgomery system is
4330       * actually multiplied by R mod n.  So we have
4331       * to reduce one more time to cancel out the factor
4332       * of R.
4333       */
4334      if ((err = redux(&res, P, mp)) != MP_OKAY) {
4335        goto LBL_RES;
4336      }
4337   }
4338 
4339   /* swap res with Y */
4340   mp_exch (&res, Y);
4341   err = MP_OKAY;
4342 LBL_RES:mp_clear (&res);
4343 LBL_M:
4344   mp_clear(&M[1]);
4345   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4346     mp_clear (&M[x]);
4347   }
4348   return err;
4349 }
4350 
4351 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_exptmod_fast.c,v $ */
4352 /* Revision: 1.4 $ */
4353 /* Date: 2011/03/18 16:43:04 $ */
4354 
4355 /* this is a shell function that calls either the normal or Montgomery
4356  * exptmod functions.  Originally the call to the montgomery code was
4357  * embedded in the normal function but that wasted alot of stack space
4358  * for nothing (since 99% of the time the Montgomery code would be called)
4359  */
4360 static int
mp_exptmod(mp_int * G,mp_int * X,mp_int * P,mp_int * Y)4361 mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int *Y)
4362 {
4363   int dr;
4364 
4365   /* modulus P must be positive */
4366   if (P->sign == MP_NEG) {
4367      return MP_VAL;
4368   }
4369 
4370   /* if exponent X is negative we have to recurse */
4371   if (X->sign == MP_NEG) {
4372      mp_int tmpG, tmpX;
4373      int err;
4374 
4375      /* first compute 1/G mod P */
4376      if ((err = mp_init(&tmpG)) != MP_OKAY) {
4377         return err;
4378      }
4379      if ((err = mp_invmod(&tmpG, G, P)) != MP_OKAY) {
4380         mp_clear(&tmpG);
4381         return err;
4382      }
4383 
4384      /* now get |X| */
4385      if ((err = mp_init(&tmpX)) != MP_OKAY) {
4386         mp_clear(&tmpG);
4387         return err;
4388      }
4389      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
4390         mp_clear_multi(&tmpG, &tmpX, NULL);
4391         return err;
4392      }
4393 
4394      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
4395      err = mp_exptmod(&tmpG, &tmpX, P, Y);
4396      mp_clear_multi(&tmpG, &tmpX, NULL);
4397      return err;
4398   }
4399 
4400 /* modified diminished radix reduction */
4401   if (mp_reduce_is_2k_l(P) == MP_YES) {
4402      return s_mp_exptmod(G, X, P, Y, 1);
4403   }
4404 
4405   /* is it a DR modulus? */
4406   dr = mp_dr_is_modulus(P);
4407 
4408   /* if not, is it a unrestricted DR modulus? */
4409   if (dr == 0) {
4410      dr = mp_reduce_is_2k(P) << 1;
4411   }
4412 
4413   /* if the modulus is odd or dr != 0 use the montgomery method */
4414   if (BN_is_odd (P) == 1 || dr !=  0) {
4415     return mp_exptmod_fast (G, X, P, Y, dr);
4416   } else {
4417     /* otherwise use the generic Barrett reduction technique */
4418     return s_mp_exptmod (G, X, P, Y, 0);
4419   }
4420 }
4421 
4422 /* reverse an array, used for radix code */
4423 static void
bn_reverse(unsigned char * s,int len)4424 bn_reverse(unsigned char *s, int len)
4425 {
4426   int     ix, iy;
4427   unsigned char t;
4428 
4429   ix = 0;
4430   iy = len - 1;
4431   while (ix < iy) {
4432     t     = s[ix];
4433     s[ix] = s[iy];
4434     s[iy] = t;
4435     ++ix;
4436     --iy;
4437   }
4438 }
4439 
4440 static int
s_is_power_of_two(mp_digit b,int * p)4441 s_is_power_of_two(mp_digit b, int *p)
4442 {
4443    int x;
4444 
4445    /* fast return if no power of two */
4446    if ((b==0) || (b & (b-1))) {
4447       return 0;
4448    }
4449 
4450    for (x = 0; x < DIGIT_BIT; x++) {
4451       if (b == (((mp_digit)1)<<x)) {
4452          *p = x;
4453          return 1;
4454       }
4455    }
4456    return 0;
4457 }
4458 
4459 /* single digit division (based on routine from MPI) */
4460 static int
mp_div_d(mp_int * a,mp_digit b,mp_int * c,mp_digit * d)4461 mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
4462 {
4463   mp_int  q;
4464   mp_word w;
4465   mp_digit t;
4466   int     res, ix;
4467 
4468   /* cannot divide by zero */
4469   if (b == 0) {
4470      return MP_VAL;
4471   }
4472 
4473   /* quick outs */
4474   if (b == 1 || mp_iszero(a) == 1) {
4475      if (d != NULL) {
4476         *d = 0;
4477      }
4478      if (c != NULL) {
4479         return mp_copy(a, c);
4480      }
4481      return MP_OKAY;
4482   }
4483 
4484   /* power of two ? */
4485   if (s_is_power_of_two(b, &ix) == 1) {
4486      if (d != NULL) {
4487         *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
4488      }
4489      if (c != NULL) {
4490         return mp_div_2d(a, ix, c, NULL);
4491      }
4492      return MP_OKAY;
4493   }
4494 
4495 #ifdef BN_MP_DIV_3_C
4496   /* three? */
4497   if (b == 3) {
4498      return mp_div_3(a, c, d);
4499   }
4500 #endif
4501 
4502   /* no easy answer [c'est la vie].  Just division */
4503   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4504      return res;
4505   }
4506 
4507   q.used = a->used;
4508   q.sign = a->sign;
4509   w = 0;
4510   for (ix = a->used - 1; ix >= 0; ix--) {
4511      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
4512 
4513      if (w >= b) {
4514         t = (mp_digit)(w / b);
4515         w -= ((mp_word)t) * ((mp_word)b);
4516       } else {
4517         t = 0;
4518       }
4519       q.dp[ix] = (mp_digit)t;
4520   }
4521 
4522   if (d != NULL) {
4523      *d = (mp_digit)w;
4524   }
4525 
4526   if (c != NULL) {
4527      mp_clamp(&q);
4528      mp_exch(&q, c);
4529   }
4530   mp_clear(&q);
4531 
4532   return res;
4533 }
4534 
4535 static int
mp_mod_d(mp_int * a,mp_digit b,mp_digit * c)4536 mp_mod_d(mp_int *a, mp_digit b, mp_digit *c)
4537 {
4538   return mp_div_d(a, b, NULL, c);
4539 }
4540 
4541 static const mp_digit ltm_prime_tab[] = {
4542   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4543   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4544   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4545   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4546 #ifndef MP_8BIT
4547   0x0083,
4548   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4549   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4550   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4551   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4552 
4553   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4554   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4555   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4556   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4557   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4558   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4559   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4560   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4561 
4562   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4563   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4564   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4565   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4566   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4567   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4568   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4569   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4570 
4571   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4572   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4573   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4574   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4575   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4576   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4577   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4578   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4579 #endif
4580 };
4581 
4582 #define PRIME_SIZE	__arraycount(ltm_prime_tab)
4583 
4584 static int
mp_prime_is_divisible(mp_int * a,int * result)4585 mp_prime_is_divisible(mp_int *a, int *result)
4586 {
4587   int     err, ix;
4588   mp_digit res;
4589 
4590   /* default to not */
4591   *result = MP_NO;
4592 
4593   for (ix = 0; ix < (int)PRIME_SIZE; ix++) {
4594     /* what is a mod LBL_prime_tab[ix] */
4595     if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4596       return err;
4597     }
4598 
4599     /* is the residue zero? */
4600     if (res == 0) {
4601       *result = MP_YES;
4602       return MP_OKAY;
4603     }
4604   }
4605 
4606   return MP_OKAY;
4607 }
4608 
4609 /* single digit addition */
4610 static int
mp_add_d(mp_int * a,mp_digit b,mp_int * c)4611 mp_add_d(mp_int *a, mp_digit b, mp_int *c)
4612 {
4613   int     res, ix, oldused;
4614   mp_digit *tmpa, *tmpc, mu;
4615 
4616   /* grow c as required */
4617   if (c->alloc < a->used + 1) {
4618      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4619         return res;
4620      }
4621   }
4622 
4623   /* if a is negative and |a| >= b, call c = |a| - b */
4624   if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
4625      /* temporarily fix sign of a */
4626      a->sign = MP_ZPOS;
4627 
4628      /* c = |a| - b */
4629      res = mp_sub_d(a, b, c);
4630 
4631      /* fix sign  */
4632      a->sign = c->sign = MP_NEG;
4633 
4634      /* clamp */
4635      mp_clamp(c);
4636 
4637      return res;
4638   }
4639 
4640   /* old number of used digits in c */
4641   oldused = c->used;
4642 
4643   /* sign always positive */
4644   c->sign = MP_ZPOS;
4645 
4646   /* source alias */
4647   tmpa    = a->dp;
4648 
4649   /* destination alias */
4650   tmpc    = c->dp;
4651 
4652   /* if a is positive */
4653   if (a->sign == MP_ZPOS) {
4654      /* add digit, after this we're propagating
4655       * the carry.
4656       */
4657      *tmpc   = *tmpa++ + b;
4658      mu      = *tmpc >> DIGIT_BIT;
4659      *tmpc++ &= MP_MASK;
4660 
4661      /* now handle rest of the digits */
4662      for (ix = 1; ix < a->used; ix++) {
4663         *tmpc   = *tmpa++ + mu;
4664         mu      = *tmpc >> DIGIT_BIT;
4665         *tmpc++ &= MP_MASK;
4666      }
4667      /* set final carry */
4668      ix++;
4669      *tmpc++  = mu;
4670 
4671      /* setup size */
4672      c->used = a->used + 1;
4673   } else {
4674      /* a was negative and |a| < b */
4675      c->used  = 1;
4676 
4677      /* the result is a single digit */
4678      if (a->used == 1) {
4679         *tmpc++  =  b - a->dp[0];
4680      } else {
4681         *tmpc++  =  b;
4682      }
4683 
4684      /* setup count so the clearing of oldused
4685       * can fall through correctly
4686       */
4687      ix       = 1;
4688   }
4689 
4690   /* now zero to oldused */
4691   while (ix++ < oldused) {
4692      *tmpc++ = 0;
4693   }
4694   mp_clamp(c);
4695 
4696   return MP_OKAY;
4697 }
4698 
4699 /* single digit subtraction */
4700 static int
mp_sub_d(mp_int * a,mp_digit b,mp_int * c)4701 mp_sub_d(mp_int *a, mp_digit b, mp_int *c)
4702 {
4703   mp_digit *tmpa, *tmpc, mu;
4704   int       res, ix, oldused;
4705 
4706   /* grow c as required */
4707   if (c->alloc < a->used + 1) {
4708      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4709         return res;
4710      }
4711   }
4712 
4713   /* if a is negative just do an unsigned
4714    * addition [with fudged signs]
4715    */
4716   if (a->sign == MP_NEG) {
4717      a->sign = MP_ZPOS;
4718      res     = mp_add_d(a, b, c);
4719      a->sign = c->sign = MP_NEG;
4720 
4721      /* clamp */
4722      mp_clamp(c);
4723 
4724      return res;
4725   }
4726 
4727   /* setup regs */
4728   oldused = c->used;
4729   tmpa    = a->dp;
4730   tmpc    = c->dp;
4731 
4732   /* if a <= b simply fix the single digit */
4733   if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
4734      if (a->used == 1) {
4735         *tmpc++ = b - *tmpa;
4736      } else {
4737         *tmpc++ = b;
4738      }
4739      ix      = 1;
4740 
4741      /* negative/1digit */
4742      c->sign = MP_NEG;
4743      c->used = 1;
4744   } else {
4745      /* positive/size */
4746      c->sign = MP_ZPOS;
4747      c->used = a->used;
4748 
4749      /* subtract first digit */
4750      *tmpc    = *tmpa++ - b;
4751      mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4752      *tmpc++ &= MP_MASK;
4753 
4754      /* handle rest of the digits */
4755      for (ix = 1; ix < a->used; ix++) {
4756         *tmpc    = *tmpa++ - mu;
4757         mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4758         *tmpc++ &= MP_MASK;
4759      }
4760   }
4761 
4762   /* zero excess digits */
4763   while (ix++ < oldused) {
4764      *tmpc++ = 0;
4765   }
4766   mp_clamp(c);
4767   return MP_OKAY;
4768 }
4769 
4770 static const int lnz[16] = {
4771    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4772 };
4773 
4774 /* Counts the number of lsbs which are zero before the first zero bit */
4775 static int
mp_cnt_lsb(mp_int * a)4776 mp_cnt_lsb(mp_int *a)
4777 {
4778    int x;
4779    mp_digit q, qq;
4780 
4781    /* easy out */
4782    if (mp_iszero(a) == 1) {
4783       return 0;
4784    }
4785 
4786    /* scan lower digits until non-zero */
4787    for (x = 0; x < a->used && a->dp[x] == 0; x++);
4788    q = a->dp[x];
4789    x *= DIGIT_BIT;
4790 
4791    /* now scan this digit until a 1 is found */
4792    if ((q & 1) == 0) {
4793       do {
4794          qq  = q & 15;
4795 	 /* LINTED previous op ensures range of qq */
4796          x  += lnz[qq];
4797          q >>= 4;
4798       } while (qq == 0);
4799    }
4800    return x;
4801 }
4802 
4803 /* c = a * a (mod b) */
4804 static int
mp_sqrmod(mp_int * a,mp_int * b,mp_int * c)4805 mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
4806 {
4807   int     res;
4808   mp_int  t;
4809 
4810   if ((res = mp_init (&t)) != MP_OKAY) {
4811     return res;
4812   }
4813 
4814   if ((res = mp_sqr (a, &t)) != MP_OKAY) {
4815     mp_clear (&t);
4816     return res;
4817   }
4818   res = mp_mod (&t, b, c);
4819   mp_clear (&t);
4820   return res;
4821 }
4822 static int
mp_prime_miller_rabin(mp_int * a,mp_int * b,int * result)4823 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result)
4824 {
4825   mp_int  n1, y, r;
4826   int     s, j, err;
4827 
4828   /* default */
4829   *result = MP_NO;
4830 
4831   /* ensure b > 1 */
4832   if (mp_cmp_d(b, 1) != MP_GT) {
4833      return MP_VAL;
4834   }
4835 
4836   /* get n1 = a - 1 */
4837   if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
4838     return err;
4839   }
4840   if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
4841     goto LBL_N1;
4842   }
4843 
4844   /* set 2**s * r = n1 */
4845   if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
4846     goto LBL_N1;
4847   }
4848 
4849   /* count the number of least significant bits
4850    * which are zero
4851    */
4852   s = mp_cnt_lsb(&r);
4853 
4854   /* now divide n - 1 by 2**s */
4855   if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
4856     goto LBL_R;
4857   }
4858 
4859   /* compute y = b**r mod a */
4860   if ((err = mp_init (&y)) != MP_OKAY) {
4861     goto LBL_R;
4862   }
4863   if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
4864     goto LBL_Y;
4865   }
4866 
4867   /* if y != 1 and y != n1 do */
4868   if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
4869     j = 1;
4870     /* while j <= s-1 and y != n1 */
4871     while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
4872       if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
4873          goto LBL_Y;
4874       }
4875 
4876       /* if y == 1 then composite */
4877       if (mp_cmp_d (&y, 1) == MP_EQ) {
4878          goto LBL_Y;
4879       }
4880 
4881       ++j;
4882     }
4883 
4884     /* if y != n1 then composite */
4885     if (mp_cmp (&y, &n1) != MP_EQ) {
4886       goto LBL_Y;
4887     }
4888   }
4889 
4890   /* probably prime now */
4891   *result = MP_YES;
4892 LBL_Y:mp_clear (&y);
4893 LBL_R:mp_clear (&r);
4894 LBL_N1:mp_clear (&n1);
4895   return err;
4896 }
4897 
4898 /* performs a variable number of rounds of Miller-Rabin
4899  *
4900  * Probability of error after t rounds is no more than
4901 
4902  *
4903  * Sets result to 1 if probably prime, 0 otherwise
4904  */
4905 static int
mp_prime_is_prime(mp_int * a,int t,int * result)4906 mp_prime_is_prime(mp_int *a, int t, int *result)
4907 {
4908   mp_int  b;
4909   int     ix, err, res;
4910 
4911   /* default to no */
4912   *result = MP_NO;
4913 
4914   /* valid value of t? */
4915   if (t <= 0 || t > (int)PRIME_SIZE) {
4916     return MP_VAL;
4917   }
4918 
4919   /* is the input equal to one of the primes in the table? */
4920   for (ix = 0; ix < (int)PRIME_SIZE; ix++) {
4921       if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4922          *result = 1;
4923          return MP_OKAY;
4924       }
4925   }
4926 
4927   /* first perform trial division */
4928   if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4929     return err;
4930   }
4931 
4932   /* return if it was trivially divisible */
4933   if (res == MP_YES) {
4934     return MP_OKAY;
4935   }
4936 
4937   /* now perform the miller-rabin rounds */
4938   if ((err = mp_init (&b)) != MP_OKAY) {
4939     return err;
4940   }
4941 
4942   for (ix = 0; ix < t; ix++) {
4943     /* set the prime */
4944     mp_set (&b, ltm_prime_tab[ix]);
4945 
4946     if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4947       goto LBL_B;
4948     }
4949 
4950     if (res == MP_NO) {
4951       goto LBL_B;
4952     }
4953   }
4954 
4955   /* passed the test */
4956   *result = MP_YES;
4957 LBL_B:mp_clear (&b);
4958   return err;
4959 }
4960 
4961 /* returns size of ASCII reprensentation */
4962 static int
mp_radix_size(mp_int * a,int radix,int * size)4963 mp_radix_size (mp_int *a, int radix, int *size)
4964 {
4965   int     res, digs;
4966   mp_int  t;
4967   mp_digit d;
4968 
4969   *size = 0;
4970 
4971   /* special case for binary */
4972   if (radix == 2) {
4973     *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
4974     return MP_OKAY;
4975   }
4976 
4977   /* make sure the radix is in range */
4978   if (radix < 2 || radix > 64) {
4979     return MP_VAL;
4980   }
4981 
4982   if (mp_iszero(a) == MP_YES) {
4983     *size = 2;
4984     return MP_OKAY;
4985   }
4986 
4987   /* digs is the digit count */
4988   digs = 0;
4989 
4990   /* if it's negative add one for the sign */
4991   if (a->sign == MP_NEG) {
4992     ++digs;
4993   }
4994 
4995   /* init a copy of the input */
4996   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
4997     return res;
4998   }
4999 
5000   /* force temp to positive */
5001   t.sign = MP_ZPOS;
5002 
5003   /* fetch out all of the digits */
5004   while (mp_iszero (&t) == MP_NO) {
5005     if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
5006       mp_clear (&t);
5007       return res;
5008     }
5009     ++digs;
5010   }
5011   mp_clear (&t);
5012 
5013   /* return digs + 1, the 1 is for the NULL byte that would be required. */
5014   *size = digs + 1;
5015   return MP_OKAY;
5016 }
5017 
5018 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
5019 
5020 /* stores a bignum as a ASCII string in a given radix (2..64)
5021  *
5022  * Stores upto maxlen-1 chars and always a NULL byte
5023  */
5024 static int
mp_toradix_n(mp_int * a,char * str,int radix,int maxlen)5025 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen)
5026 {
5027   int     res, digs;
5028   mp_int  t;
5029   mp_digit d;
5030   char   *_s = str;
5031 
5032   /* check range of the maxlen, radix */
5033   if (maxlen < 2 || radix < 2 || radix > 64) {
5034     return MP_VAL;
5035   }
5036 
5037   /* quick out if its zero */
5038   if (mp_iszero(a) == MP_YES) {
5039      *str++ = '0';
5040      *str = '\0';
5041      return MP_OKAY;
5042   }
5043 
5044   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
5045     return res;
5046   }
5047 
5048   /* if it is negative output a - */
5049   if (t.sign == MP_NEG) {
5050     /* we have to reverse our digits later... but not the - sign!! */
5051     ++_s;
5052 
5053     /* store the flag and mark the number as positive */
5054     *str++ = '-';
5055     t.sign = MP_ZPOS;
5056 
5057     /* subtract a char */
5058     --maxlen;
5059   }
5060 
5061   digs = 0;
5062   while (mp_iszero (&t) == 0) {
5063     if (--maxlen < 1) {
5064        /* no more room */
5065        break;
5066     }
5067     if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
5068       mp_clear (&t);
5069       return res;
5070     }
5071     /* LINTED -- radix' range is checked above, limits d's range */
5072     *str++ = mp_s_rmap[d];
5073     ++digs;
5074   }
5075 
5076   /* reverse the digits of the string.  In this case _s points
5077    * to the first digit [exluding the sign] of the number
5078    */
5079   bn_reverse ((unsigned char *)_s, digs);
5080 
5081   /* append a NULL so the string is properly terminated */
5082   *str = '\0';
5083 
5084   mp_clear (&t);
5085   return MP_OKAY;
5086 }
5087 
5088 static char *
formatbn(const BIGNUM * a,const int radix)5089 formatbn(const BIGNUM *a, const int radix)
5090 {
5091 	char	*s;
5092 	int	 len;
5093 
5094 	if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) {
5095 		return NULL;
5096 	}
5097 	if ((s = netpgp_allocate(1, (size_t)len)) != NULL) {
5098 		if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) {
5099 			netpgp_deallocate(s, (size_t)len);
5100 			return NULL;
5101 		}
5102 	}
5103 	return s;
5104 }
5105 
5106 static int
mp_getradix_num(mp_int * a,int radix,char * s)5107 mp_getradix_num(mp_int *a, int radix, char *s)
5108 {
5109    int err, ch, neg, y;
5110 
5111    /* clear a */
5112    mp_zero(a);
5113 
5114    /* if first digit is - then set negative */
5115    if ((ch = *s++) == '-') {
5116       neg = MP_NEG;
5117       ch = *s++;
5118    } else {
5119       neg = MP_ZPOS;
5120    }
5121 
5122    for (;;) {
5123       /* find y in the radix map */
5124       for (y = 0; y < radix; y++) {
5125           if (mp_s_rmap[y] == ch) {
5126              break;
5127           }
5128       }
5129       if (y == radix) {
5130          break;
5131       }
5132 
5133       /* shift up and add */
5134       if ((err = mp_mul_d(a, radix, a)) != MP_OKAY) {
5135          return err;
5136       }
5137       if ((err = mp_add_d(a, y, a)) != MP_OKAY) {
5138          return err;
5139       }
5140 
5141       ch = *s++;
5142    }
5143    if (mp_cmp_d(a, 0) != MP_EQ) {
5144       a->sign = neg;
5145    }
5146 
5147    return MP_OKAY;
5148 }
5149 
5150 static int
getbn(BIGNUM ** a,const char * str,int radix)5151 getbn(BIGNUM **a, const char *str, int radix)
5152 {
5153 	int	len;
5154 
5155 	if (a == NULL || str == NULL || (*a = BN_new()) == NULL) {
5156 		return 0;
5157 	}
5158 	if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) {
5159 		return 0;
5160 	}
5161 	mp_radix_size(__UNCONST(a), radix, &len);
5162 	return len - 1;
5163 }
5164 
5165 /* d = a - b (mod c) */
5166 static int
mp_submod(const mp_int * a,const mp_int * b,mp_int * c,mp_int * d)5167 mp_submod(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d)
5168 {
5169   int     res;
5170   mp_int  t;
5171 
5172 
5173   if ((res = mp_init (&t)) != MP_OKAY) {
5174     return res;
5175   }
5176 
5177   if ((res = mp_sub (a, b, &t)) != MP_OKAY) {
5178     mp_clear (&t);
5179     return res;
5180   }
5181   res = mp_mod (&t, c, d);
5182   mp_clear (&t);
5183   return res;
5184 }
5185 
5186 /**************************************************************************/
5187 
5188 /* BIGNUM emulation layer */
5189 
5190 /* essentiually, these are just wrappers around the libtommath functions */
5191 /* usually the order of args changes */
5192 /* the BIGNUM API tends to have more const poisoning */
5193 /* these wrappers also check the arguments passed for sanity */
5194 
5195 BIGNUM *
BN_bin2bn(const uint8_t * data,int len,BIGNUM * ret)5196 BN_bin2bn(const uint8_t *data, int len, BIGNUM *ret)
5197 {
5198 	if (data == NULL) {
5199 		return BN_new();
5200 	}
5201 	if (ret == NULL) {
5202 		ret = BN_new();
5203 	}
5204 	return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL;
5205 }
5206 
5207 /* store in unsigned [big endian] format */
5208 int
BN_bn2bin(const BIGNUM * a,unsigned char * b)5209 BN_bn2bin(const BIGNUM *a, unsigned char *b)
5210 {
5211 	BIGNUM	t;
5212 	int    	x;
5213 
5214 	if (a == NULL || b == NULL) {
5215 		return -1;
5216 	}
5217 	if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) {
5218 		return -1;
5219 	}
5220 	for (x = 0; !BN_is_zero(&t) ; ) {
5221 		b[x++] = (unsigned char) (t.dp[0] & 0xff);
5222 		if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY) {
5223 			mp_clear(&t);
5224 			return -1;
5225 		}
5226 	}
5227 	bn_reverse(b, x);
5228 	mp_clear(&t);
5229 	return x;
5230 }
5231 
5232 void
BN_init(BIGNUM * a)5233 BN_init(BIGNUM *a)
5234 {
5235 	if (a != NULL) {
5236 		mp_init(a);
5237 	}
5238 }
5239 
5240 BIGNUM *
BN_new(void)5241 BN_new(void)
5242 {
5243 	BIGNUM	*a;
5244 
5245 	if ((a = netpgp_allocate(1, sizeof(*a))) != NULL) {
5246 		mp_init(a);
5247 	}
5248 	return a;
5249 }
5250 
5251 /* copy, b = a */
5252 int
BN_copy(BIGNUM * b,const BIGNUM * a)5253 BN_copy(BIGNUM *b, const BIGNUM *a)
5254 {
5255 	if (a == NULL || b == NULL) {
5256 		return MP_VAL;
5257 	}
5258 	return mp_copy(__UNCONST(a), b);
5259 }
5260 
5261 BIGNUM *
BN_dup(const BIGNUM * a)5262 BN_dup(const BIGNUM *a)
5263 {
5264 	BIGNUM	*ret;
5265 
5266 	if (a == NULL) {
5267 		return NULL;
5268 	}
5269 	if ((ret = BN_new()) != NULL) {
5270 		BN_copy(ret, a);
5271 	}
5272 	return ret;
5273 }
5274 
5275 void
BN_swap(BIGNUM * a,BIGNUM * b)5276 BN_swap(BIGNUM *a, BIGNUM *b)
5277 {
5278 	if (a && b) {
5279 		mp_exch(a, b);
5280 	}
5281 }
5282 
5283 int
BN_lshift(BIGNUM * r,const BIGNUM * a,int n)5284 BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
5285 {
5286 	if (r == NULL || a == NULL || n < 0) {
5287 		return 0;
5288 	}
5289 	BN_copy(r, a);
5290 	return mp_lshd(r, n) == MP_OKAY;
5291 }
5292 
5293 int
BN_lshift1(BIGNUM * r,BIGNUM * a)5294 BN_lshift1(BIGNUM *r, BIGNUM *a)
5295 {
5296 	if (r == NULL || a == NULL) {
5297 		return 0;
5298 	}
5299 	BN_copy(r, a);
5300 	return mp_lshd(r, 1) == MP_OKAY;
5301 }
5302 
5303 int
BN_rshift(BIGNUM * r,const BIGNUM * a,int n)5304 BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
5305 {
5306 	if (r == NULL || a == NULL || n < 0) {
5307 		return MP_VAL;
5308 	}
5309 	BN_copy(r, a);
5310 	return mp_rshd(r, n) == MP_OKAY;
5311 }
5312 
5313 int
BN_rshift1(BIGNUM * r,BIGNUM * a)5314 BN_rshift1(BIGNUM *r, BIGNUM *a)
5315 {
5316 	if (r == NULL || a == NULL) {
5317 		return 0;
5318 	}
5319 	BN_copy(r, a);
5320 	return mp_rshd(r, 1) == MP_OKAY;
5321 }
5322 
5323 int
BN_set_word(BIGNUM * a,BN_ULONG w)5324 BN_set_word(BIGNUM *a, BN_ULONG w)
5325 {
5326 	if (a == NULL) {
5327 		return 0;
5328 	}
5329 	mp_set(a, w);
5330 	return 1;
5331 }
5332 
5333 int
BN_add(BIGNUM * r,const BIGNUM * a,const BIGNUM * b)5334 BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
5335 {
5336 	if (a == NULL || b == NULL || r == NULL) {
5337 		return 0;
5338 	}
5339 	return mp_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5340 }
5341 
5342 int
BN_sub(BIGNUM * r,const BIGNUM * a,const BIGNUM * b)5343 BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
5344 {
5345 	if (a == NULL || b == NULL || r == NULL) {
5346 		return 0;
5347 	}
5348 	return mp_sub(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5349 }
5350 
5351 int
BN_mul(BIGNUM * r,const BIGNUM * a,const BIGNUM * b,BN_CTX * ctx)5352 BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
5353 {
5354 	if (a == NULL || b == NULL || r == NULL) {
5355 		return 0;
5356 	}
5357 	USE_ARG(ctx);
5358 	return mp_mul(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5359 }
5360 
5361 int
BN_div(BIGNUM * dv,BIGNUM * rem,const BIGNUM * a,const BIGNUM * d,BN_CTX * ctx)5362 BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *a, const BIGNUM *d, BN_CTX *ctx)
5363 {
5364 	if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) {
5365 		return 0;
5366 	}
5367 	USE_ARG(ctx);
5368 	return mp_div(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY;
5369 }
5370 
5371 void
BN_free(BIGNUM * a)5372 BN_free(BIGNUM *a)
5373 {
5374 	if (a) {
5375 		mp_clear(a);
5376 	}
5377 }
5378 
5379 void
BN_clear(BIGNUM * a)5380 BN_clear(BIGNUM *a)
5381 {
5382 	if (a) {
5383 		mp_clear(a);
5384 	}
5385 }
5386 
5387 void
BN_clear_free(BIGNUM * a)5388 BN_clear_free(BIGNUM *a)
5389 {
5390 	if (a) {
5391 		mp_clear(a);
5392 	}
5393 }
5394 
5395 int
BN_num_bytes(const BIGNUM * a)5396 BN_num_bytes(const BIGNUM *a)
5397 {
5398 	if (a == NULL) {
5399 		return MP_VAL;
5400 	}
5401 	return mp_unsigned_bin_size(__UNCONST(a));
5402 }
5403 
5404 int
BN_num_bits(const BIGNUM * a)5405 BN_num_bits(const BIGNUM *a)
5406 {
5407 	if (a == NULL) {
5408 		return 0;
5409 	}
5410 	return mp_count_bits(a);
5411 }
5412 
5413 void
BN_set_negative(BIGNUM * a,int n)5414 BN_set_negative(BIGNUM *a, int n)
5415 {
5416 	if (a) {
5417 		a->sign = (n) ? MP_NEG : 0;
5418 	}
5419 }
5420 
5421 int
BN_cmp(BIGNUM * a,BIGNUM * b)5422 BN_cmp(BIGNUM *a, BIGNUM *b)
5423 {
5424 	if (a == NULL || b == NULL) {
5425 		return MP_VAL;
5426 	}
5427 	switch(mp_cmp(a, b)) {
5428 	case MP_LT:
5429 		return -1;
5430 	case MP_GT:
5431 		return 1;
5432 	case MP_EQ:
5433 	default:
5434 		return 0;
5435 	}
5436 }
5437 
5438 int
BN_mod_exp(BIGNUM * Y,BIGNUM * G,BIGNUM * X,BIGNUM * P,BN_CTX * ctx)5439 BN_mod_exp(BIGNUM *Y, BIGNUM *G, BIGNUM *X, BIGNUM *P, BN_CTX *ctx)
5440 {
5441 	if (Y == NULL || G == NULL || X == NULL || P == NULL) {
5442 		return MP_VAL;
5443 	}
5444 	USE_ARG(ctx);
5445 	return mp_exptmod(G, X, P, Y) == MP_OKAY;
5446 }
5447 
5448 BIGNUM *
BN_mod_inverse(BIGNUM * r,BIGNUM * a,const BIGNUM * n,BN_CTX * ctx)5449 BN_mod_inverse(BIGNUM *r, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
5450 {
5451 	USE_ARG(ctx);
5452 	if (r == NULL || a == NULL || n == NULL) {
5453 		return NULL;
5454 	}
5455 	return (mp_invmod(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL;
5456 }
5457 
5458 int
BN_mod_mul(BIGNUM * ret,BIGNUM * a,BIGNUM * b,const BIGNUM * m,BN_CTX * ctx)5459 BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
5460 {
5461 	USE_ARG(ctx);
5462 	if (ret == NULL || a == NULL || b == NULL || m == NULL) {
5463 		return 0;
5464 	}
5465 	return mp_mulmod(ret, a, b, __UNCONST(m)) == MP_OKAY;
5466 }
5467 
5468 BN_CTX *
BN_CTX_new(void)5469 BN_CTX_new(void)
5470 {
5471 	return netpgp_allocate(1, sizeof(BN_CTX));
5472 }
5473 
5474 void
BN_CTX_init(BN_CTX * c)5475 BN_CTX_init(BN_CTX *c)
5476 {
5477 	if (c != NULL) {
5478 		c->arraysize = 15;
5479 		if ((c->v = netpgp_allocate(sizeof(*c->v), c->arraysize)) == NULL) {
5480 			c->arraysize = 0;
5481 		}
5482 	}
5483 }
5484 
5485 BIGNUM *
BN_CTX_get(BN_CTX * ctx)5486 BN_CTX_get(BN_CTX *ctx)
5487 {
5488 	if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) {
5489 		return NULL;
5490 	}
5491 	return ctx->v[ctx->count++] = BN_new();
5492 }
5493 
5494 void
BN_CTX_start(BN_CTX * ctx)5495 BN_CTX_start(BN_CTX *ctx)
5496 {
5497 	BN_CTX_init(ctx);
5498 }
5499 
5500 void
BN_CTX_free(BN_CTX * c)5501 BN_CTX_free(BN_CTX *c)
5502 {
5503 	unsigned	i;
5504 
5505 	if (c != NULL && c->v != NULL) {
5506 		for (i = 0 ; i < c->count ; i++) {
5507 			BN_clear_free(c->v[i]);
5508 		}
5509 		netpgp_deallocate(c->v, sizeof(*c->v) * c->arraysize);
5510 	}
5511 }
5512 
5513 void
BN_CTX_end(BN_CTX * ctx)5514 BN_CTX_end(BN_CTX *ctx)
5515 {
5516 	BN_CTX_free(ctx);
5517 }
5518 
5519 char *
BN_bn2hex(const BIGNUM * a)5520 BN_bn2hex(const BIGNUM *a)
5521 {
5522 	return (a == NULL) ? NULL : formatbn(a, 16);
5523 }
5524 
5525 char *
BN_bn2dec(const BIGNUM * a)5526 BN_bn2dec(const BIGNUM *a)
5527 {
5528 	return (a == NULL) ? NULL : formatbn(a, 10);
5529 }
5530 
5531 #ifndef _KERNEL
5532 int
BN_print_fp(FILE * fp,const BIGNUM * a)5533 BN_print_fp(FILE *fp, const BIGNUM *a)
5534 {
5535 	char	*s;
5536 	int	 ret;
5537 
5538 	if (fp == NULL || a == NULL) {
5539 		return 0;
5540 	}
5541 	s = BN_bn2hex(a);
5542 	ret = fprintf(fp, "%s", s);
5543 	netpgp_deallocate(s, strlen(s) + 1);
5544 	return ret;
5545 }
5546 #endif
5547 
5548 int
BN_rand(BIGNUM * rnd,int bits,int top,int bottom)5549 BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
5550 {
5551 	uint64_t	r;
5552 	int		digits;
5553 	int		i;
5554 
5555 	if (rnd == NULL) {
5556 		return 0;
5557 	}
5558 	mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT));
5559 	for (i = 0 ; i < digits ; i++) {
5560 		r = (uint64_t)arc4random();
5561 		r <<= 32;
5562 		r |= arc4random();
5563 		rnd->dp[i] = (r & MP_MASK);
5564 	}
5565 	if (top == 0) {
5566 		rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT));
5567 	}
5568 	if (top == 1) {
5569 		rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT));
5570 		rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1)));
5571 	}
5572 	if (bottom) {
5573 		rnd->dp[0] |= 0x1;
5574 	}
5575 	return 1;
5576 }
5577 
5578 int
BN_rand_range(BIGNUM * rnd,BIGNUM * range)5579 BN_rand_range(BIGNUM *rnd, BIGNUM *range)
5580 {
5581 	if (rnd == NULL || range == NULL || BN_is_zero(range)) {
5582 		return 0;
5583 	}
5584 	BN_rand(rnd, BN_num_bits(range), 1, 0);
5585 	return mp_mod(rnd, range, rnd) == MP_OKAY;
5586 }
5587 
5588 int
BN_is_prime(const BIGNUM * a,int checks,void (* callback)(int,int,void *),BN_CTX * ctx,void * cb_arg)5589 BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int, int, void *), BN_CTX *ctx, void *cb_arg)
5590 {
5591 	int	primality;
5592 
5593 	if (a == NULL) {
5594 		return 0;
5595 	}
5596 	USE_ARG(ctx);
5597 	USE_ARG(cb_arg);
5598 	USE_ARG(callback);
5599 	return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0;
5600 }
5601 
5602 const BIGNUM *
BN_value_one(void)5603 BN_value_one(void)
5604 {
5605 	static mp_digit		digit = 1UL;
5606 	static const BIGNUM	one = { &digit, 1, 1, 0 };
5607 
5608 	return &one;
5609 }
5610 
5611 int
BN_hex2bn(BIGNUM ** a,const char * str)5612 BN_hex2bn(BIGNUM **a, const char *str)
5613 {
5614 	return getbn(a, str, 16);
5615 }
5616 
5617 int
BN_dec2bn(BIGNUM ** a,const char * str)5618 BN_dec2bn(BIGNUM **a, const char *str)
5619 {
5620 	return getbn(a, str, 10);
5621 }
5622 
5623 int
BN_mod_sub(BIGNUM * r,BIGNUM * a,BIGNUM * b,const BIGNUM * m,BN_CTX * ctx)5624 BN_mod_sub(BIGNUM *r, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
5625 {
5626 	USE_ARG(ctx);
5627 	if (r == NULL || a == NULL || b == NULL || m == NULL) {
5628 		return 0;
5629 	}
5630 	return mp_submod(a, b, __UNCONST(m), r) == MP_OKAY;
5631 }
5632 
5633 int
BN_is_bit_set(const BIGNUM * a,int n)5634 BN_is_bit_set(const BIGNUM *a, int n)
5635 {
5636 	if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) {
5637 		return 0;
5638 	}
5639 	return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0;
5640 }
5641