xref: /openbsd-src/lib/libcrypto/bn/bn_exp.c (revision ff0e7be1ebbcc809ea8ad2b6dafe215824da9e46)
1 /* $OpenBSD: bn_exp.c,v 1.46 2023/05/09 05:38:11 tb Exp $ */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 /* ====================================================================
59  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60  *
61  * Redistribution and use in source and binary forms, with or without
62  * modification, are permitted provided that the following conditions
63  * are met:
64  *
65  * 1. Redistributions of source code must retain the above copyright
66  *    notice, this list of conditions and the following disclaimer.
67  *
68  * 2. Redistributions in binary form must reproduce the above copyright
69  *    notice, this list of conditions and the following disclaimer in
70  *    the documentation and/or other materials provided with the
71  *    distribution.
72  *
73  * 3. All advertising materials mentioning features or use of this
74  *    software must display the following acknowledgment:
75  *    "This product includes software developed by the OpenSSL Project
76  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77  *
78  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79  *    endorse or promote products derived from this software without
80  *    prior written permission. For written permission, please contact
81  *    openssl-core@openssl.org.
82  *
83  * 5. Products derived from this software may not be called "OpenSSL"
84  *    nor may "OpenSSL" appear in their names without prior written
85  *    permission of the OpenSSL Project.
86  *
87  * 6. Redistributions of any form whatsoever must retain the following
88  *    acknowledgment:
89  *    "This product includes software developed by the OpenSSL Project
90  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103  * OF THE POSSIBILITY OF SUCH DAMAGE.
104  * ====================================================================
105  *
106  * This product includes cryptographic software written by Eric Young
107  * (eay@cryptsoft.com).  This product includes software written by Tim
108  * Hudson (tjh@cryptsoft.com).
109  *
110  */
111 
112 #include <stdlib.h>
113 #include <string.h>
114 
115 #include <openssl/err.h>
116 
117 #include "bn_local.h"
118 #include "constant_time.h"
119 
120 /* maximum precomputation table size for *variable* sliding windows */
121 #define TABLE_SIZE	32
122 
123 /* Calculates r = a^p by successive squaring of a. Not constant time. */
124 int
125 BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
126 {
127 	BIGNUM *rr, *v;
128 	int i;
129 	int ret = 0;
130 
131 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
132 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
133 		return -1;
134 	}
135 
136 	BN_CTX_start(ctx);
137 
138 	if ((v = BN_CTX_get(ctx)) == NULL)
139 		goto err;
140 
141 	rr = r;
142 	if (r == a || r == p)
143 		rr = BN_CTX_get(ctx);
144 	if (rr == NULL)
145 		goto err;
146 
147 	if (!BN_one(rr))
148 		goto err;
149 	if (BN_is_odd(p)) {
150 		if (!bn_copy(rr, a))
151 			goto err;
152 	}
153 
154 	if (!bn_copy(v, a))
155 		goto err;
156 
157 	for (i = 1; i < BN_num_bits(p); i++) {
158 		if (!BN_sqr(v, v, ctx))
159 			goto err;
160 		if (!BN_is_bit_set(p, i))
161 			continue;
162 		if (!BN_mul(rr, rr, v, ctx))
163 			goto err;
164 	}
165 
166 	if (!bn_copy(r, rr))
167 		goto err;
168 
169 	ret = 1;
170 
171  err:
172 	BN_CTX_end(ctx);
173 
174 	return ret;
175 }
176 
177 /* The old fallback, simple version :-) */
178 int
179 BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
180     BN_CTX *ctx)
181 {
182 	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
183 	int start = 1;
184 	BIGNUM *d;
185 	/* Table of variables obtained from 'ctx' */
186 	BIGNUM *val[TABLE_SIZE];
187 
188 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
189 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
190 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
191 		return -1;
192 	}
193 
194 	bits = BN_num_bits(p);
195 	if (bits == 0) {
196 		/* x**0 mod 1 is still zero. */
197 		if (BN_abs_is_word(m, 1)) {
198 			ret = 1;
199 			BN_zero(r);
200 		} else
201 			ret = BN_one(r);
202 		return ret;
203 	}
204 
205 	BN_CTX_start(ctx);
206 	if ((d = BN_CTX_get(ctx)) == NULL)
207 		goto err;
208 	if ((val[0] = BN_CTX_get(ctx)) == NULL)
209 		goto err;
210 
211 	if (!BN_nnmod(val[0],a,m,ctx))
212 		goto err;		/* 1 */
213 	if (BN_is_zero(val[0])) {
214 		BN_zero(r);
215 		ret = 1;
216 		goto err;
217 	}
218 
219 	window = BN_window_bits_for_exponent_size(bits);
220 	if (window > 1) {
221 		if (!BN_mod_mul(d, val[0], val[0], m, ctx))
222 			goto err;				/* 2 */
223 		j = 1 << (window - 1);
224 		for (i = 1; i < j; i++) {
225 			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
226 			    !BN_mod_mul(val[i], val[i - 1], d,m, ctx))
227 				goto err;
228 		}
229 	}
230 
231 	start = 1;		/* This is used to avoid multiplication etc
232 				 * when there is only the value '1' in the
233 				 * buffer. */
234 	wvalue = 0;		/* The 'value' of the window */
235 	wstart = bits - 1;	/* The top bit of the window */
236 	wend = 0;		/* The bottom bit of the window */
237 
238 	if (!BN_one(r))
239 		goto err;
240 
241 	for (;;) {
242 		if (BN_is_bit_set(p, wstart) == 0) {
243 			if (!start)
244 				if (!BN_mod_mul(r, r, r, m, ctx))
245 					goto err;
246 			if (wstart == 0)
247 				break;
248 			wstart--;
249 			continue;
250 		}
251 		/* We now have wstart on a 'set' bit, we now need to work out
252 		 * how bit a window to do.  To do this we need to scan
253 		 * forward until the last set bit before the end of the
254 		 * window */
255 		j = wstart;
256 		wvalue = 1;
257 		wend = 0;
258 		for (i = 1; i < window; i++) {
259 			if (wstart - i < 0)
260 				break;
261 			if (BN_is_bit_set(p, wstart - i)) {
262 				wvalue <<= (i - wend);
263 				wvalue |= 1;
264 				wend = i;
265 			}
266 		}
267 
268 		/* wend is the size of the current window */
269 		j = wend + 1;
270 		/* add the 'bytes above' */
271 		if (!start)
272 			for (i = 0; i < j; i++) {
273 				if (!BN_mod_mul(r, r, r, m, ctx))
274 					goto err;
275 			}
276 
277 		/* wvalue will be an odd number < 2^window */
278 		if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
279 			goto err;
280 
281 		/* move the 'window' down further */
282 		wstart -= wend + 1;
283 		wvalue = 0;
284 		start = 0;
285 		if (wstart < 0)
286 			break;
287 	}
288 	ret = 1;
289 
290 err:
291 	BN_CTX_end(ctx);
292 	return (ret);
293 }
294 
295 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
296  * so that accessing any of these table values shows the same access pattern as far
297  * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
298  * from/to that table. */
299 
300 static int
301 MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf,
302     int idx, int window)
303 {
304 	int i, j;
305 	int width = 1 << window;
306 	BN_ULONG *table = (BN_ULONG *)buf;
307 
308 	if (top > b->top)
309 		top = b->top; /* this works because 'buf' is explicitly zeroed */
310 
311 	for (i = 0, j = idx; i < top; i++, j += width) {
312 		table[j] = b->d[i];
313 	}
314 
315 	return 1;
316 }
317 
318 static int
319 MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx,
320     int window)
321 {
322 	int i, j;
323 	int width = 1 << window;
324 	volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
325 
326 	if (!bn_wexpand(b, top))
327 		return 0;
328 
329 	if (window <= 3) {
330 		for (i = 0; i < top; i++, table += width) {
331 		    BN_ULONG acc = 0;
332 
333 		    for (j = 0; j < width; j++) {
334 			acc |= table[j] &
335 			       ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
336 		    }
337 
338 		    b->d[i] = acc;
339 		}
340 	} else {
341 		int xstride = 1 << (window - 2);
342 		BN_ULONG y0, y1, y2, y3;
343 
344 		i = idx >> (window - 2);        /* equivalent of idx / xstride */
345 		idx &= xstride - 1;             /* equivalent of idx % xstride */
346 
347 		y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
348 		y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
349 		y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
350 		y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
351 
352 		for (i = 0; i < top; i++, table += width) {
353 		    BN_ULONG acc = 0;
354 
355 		    for (j = 0; j < xstride; j++) {
356 			acc |= ( (table[j + 0 * xstride] & y0) |
357 				 (table[j + 1 * xstride] & y1) |
358 				 (table[j + 2 * xstride] & y2) |
359 				 (table[j + 3 * xstride] & y3) )
360 			       & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
361 		    }
362 
363 		    b->d[i] = acc;
364 		}
365 	}
366 	b->top = top;
367 	bn_correct_top(b);
368 	return 1;
369 }
370 
371 /* Given a pointer value, compute the next address that is a cache line multiple. */
372 #define MOD_EXP_CTIME_ALIGN(x_) \
373 	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
374 
375 /* This variant of BN_mod_exp_mont() uses fixed windows and the special
376  * precomputation memory layout to limit data-dependency to a minimum
377  * to protect secret exponents (cf. the hyper-threading timing attacks
378  * pointed out by Colin Percival,
379  * http://www.daemonology.net/hyperthreading-considered-harmful/)
380  */
381 int
382 BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
383     const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
384 {
385 	int i, bits, ret = 0, window, wvalue;
386 	int top;
387 	BN_MONT_CTX *mont = NULL;
388 	int numPowers;
389 	unsigned char *powerbufFree = NULL;
390 	int powerbufLen = 0;
391 	unsigned char *powerbuf = NULL;
392 	BIGNUM tmp, am;
393 
394 
395 	if (!BN_is_odd(m)) {
396 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
397 		return (0);
398 	}
399 
400 	top = m->top;
401 
402 	bits = BN_num_bits(p);
403 	if (bits == 0) {
404 		/* x**0 mod 1 is still zero. */
405 		if (BN_abs_is_word(m, 1)) {
406 			ret = 1;
407 			BN_zero(rr);
408 		} else
409 			ret = BN_one(rr);
410 		return ret;
411 	}
412 
413 	BN_CTX_start(ctx);
414 
415 	/*
416 	 * Allocate a Montgomery context if it was not supplied by the caller.
417 	 * If this is not done, things will break in the montgomery part.
418 	 */
419 	if (in_mont != NULL)
420 		mont = in_mont;
421 	else {
422 		if ((mont = BN_MONT_CTX_new()) == NULL)
423 			goto err;
424 		if (!BN_MONT_CTX_set(mont, m, ctx))
425 			goto err;
426 	}
427 
428 	/* Get the window size to use with size of p. */
429 	window = BN_window_bits_for_ctime_exponent_size(bits);
430 #if defined(OPENSSL_BN_ASM_MONT5)
431 	if (window == 6 && bits <= 1024)
432 		window = 5;	/* ~5% improvement of 2048-bit RSA sign */
433 #endif
434 
435 	/* Allocate a buffer large enough to hold all of the pre-computed
436 	 * powers of am, am itself and tmp.
437 	 */
438 	numPowers = 1 << window;
439 	powerbufLen = sizeof(m->d[0]) * (top * numPowers +
440 	    ((2*top) > numPowers ? (2*top) : numPowers));
441 	if ((powerbufFree = calloc(powerbufLen +
442 	    MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL)
443 		goto err;
444 	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
445 
446 	/* lay down tmp and am right after powers table */
447 	tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
448 	am.d = tmp.d + top;
449 	tmp.top = am.top = 0;
450 	tmp.dmax = am.dmax = top;
451 	tmp.neg = am.neg = 0;
452 	tmp.flags = am.flags = BN_FLG_STATIC_DATA;
453 
454 	/* prepare a^0 in Montgomery domain */
455 #if 1
456 	if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
457 		goto err;
458 #else
459 	tmp.d[0] = (0 - m - >d[0]) & BN_MASK2;	/* 2^(top*BN_BITS2) - m */
460 	for (i = 1; i < top; i++)
461 		tmp.d[i] = (~m->d[i]) & BN_MASK2;
462 	tmp.top = top;
463 #endif
464 
465 	/* prepare a^1 in Montgomery domain */
466 	if (!BN_nnmod(&am, a, m, ctx))
467 		goto err;
468 	if (!BN_to_montgomery(&am, &am, mont, ctx))
469 		goto err;
470 
471 #if defined(OPENSSL_BN_ASM_MONT5)
472 	/* This optimization uses ideas from http://eprint.iacr.org/2011/239,
473 	 * specifically optimization of cache-timing attack countermeasures
474 	 * and pre-computation optimization. */
475 
476 	/* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
477 	 * 512-bit RSA is hardly relevant, we omit it to spare size... */
478 	if (window == 5 && top > 1) {
479 		void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
480 		    const void *table, const BN_ULONG *np,
481 		    const BN_ULONG *n0, int num, int power);
482 		void bn_scatter5(const BN_ULONG *inp, size_t num,
483 		    void *table, size_t power);
484 		void bn_gather5(BN_ULONG *out, size_t num,
485 		    void *table, size_t power);
486 
487 		BN_ULONG *np = mont->N.d, *n0 = mont->n0;
488 
489 		/* BN_to_montgomery can contaminate words above .top
490 		 * [in BN_DEBUG[_DEBUG] build]... */
491 		for (i = am.top; i < top; i++)
492 			am.d[i] = 0;
493 		for (i = tmp.top; i < top; i++)
494 			tmp.d[i] = 0;
495 
496 		bn_scatter5(tmp.d, top, powerbuf, 0);
497 		bn_scatter5(am.d, am.top, powerbuf, 1);
498 		bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
499 		bn_scatter5(tmp.d, top, powerbuf, 2);
500 
501 #if 0
502 		for (i = 3; i < 32; i++) {
503 			/* Calculate a^i = a^(i-1) * a */
504 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
505 			    n0, top, i - 1);
506 			bn_scatter5(tmp.d, top, powerbuf, i);
507 		}
508 #else
509 		/* same as above, but uses squaring for 1/2 of operations */
510 		for (i = 4; i < 32; i*=2) {
511 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
512 			bn_scatter5(tmp.d, top, powerbuf, i);
513 		}
514 		for (i = 3; i < 8; i += 2) {
515 			int j;
516 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
517 			    n0, top, i - 1);
518 			bn_scatter5(tmp.d, top, powerbuf, i);
519 			for (j = 2 * i; j < 32; j *= 2) {
520 				bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
521 				bn_scatter5(tmp.d, top, powerbuf, j);
522 			}
523 		}
524 		for (; i < 16; i += 2) {
525 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
526 			    n0, top, i - 1);
527 			bn_scatter5(tmp.d, top, powerbuf, i);
528 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
529 			bn_scatter5(tmp.d, top, powerbuf, 2*i);
530 		}
531 		for (; i < 32; i += 2) {
532 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
533 			    n0, top, i - 1);
534 			bn_scatter5(tmp.d, top, powerbuf, i);
535 		}
536 #endif
537 		bits--;
538 		for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
539 			wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
540 		bn_gather5(tmp.d, top, powerbuf, wvalue);
541 
542 		/* Scan the exponent one window at a time starting from the most
543 		 * significant bits.
544 		 */
545 		while (bits >= 0) {
546 			for (wvalue = 0, i = 0; i < 5; i++, bits--)
547 				wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
548 
549 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
550 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
551 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
552 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
553 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
554 			bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
555 		}
556 
557 		tmp.top = top;
558 		bn_correct_top(&tmp);
559 	} else
560 #endif
561 	{
562 		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0,
563 		    window))
564 			goto err;
565 		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am,  top, powerbuf, 1,
566 		    window))
567 			goto err;
568 
569 		/* If the window size is greater than 1, then calculate
570 		 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
571 		 * (even powers could instead be computed as (a^(i/2))^2
572 		 * to use the slight performance advantage of sqr over mul).
573 		 */
574 		if (window > 1) {
575 			if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
576 				goto err;
577 			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf,
578 			    2, window))
579 				goto err;
580 			for (i = 3; i < numPowers; i++) {
581 				/* Calculate a^i = a^(i-1) * a */
582 				if (!BN_mod_mul_montgomery(&tmp, &am, &tmp,
583 				    mont, ctx))
584 					goto err;
585 				if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top,
586 				    powerbuf, i, window))
587 					goto err;
588 			}
589 		}
590 
591 		bits--;
592 		for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
593 			wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
594 		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf,
595 		    wvalue, window))
596 			goto err;
597 
598 		/* Scan the exponent one window at a time starting from the most
599 		 * significant bits.
600 		 */
601 		while (bits >= 0) {
602 			wvalue = 0; /* The 'value' of the window */
603 
604 			/* Scan the window, squaring the result as we go */
605 			for (i = 0; i < window; i++, bits--) {
606 				if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp,
607 				    mont, ctx))
608 					goto err;
609 				wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
610 			}
611 
612 			/* Fetch the appropriate pre-computed value from the pre-buf */
613 			if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf,
614 			    wvalue, window))
615 				goto err;
616 
617 			/* Multiply the result into the intermediate result */
618 			if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
619 				goto err;
620 		}
621 	}
622 
623 	/* Convert the final result from montgomery to standard format */
624 	if (!BN_from_montgomery(rr, &tmp, mont, ctx))
625 		goto err;
626 	ret = 1;
627 
628 err:
629 	if ((in_mont == NULL) && (mont != NULL))
630 		BN_MONT_CTX_free(mont);
631 	freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
632 	BN_CTX_end(ctx);
633 	return (ret);
634 }
635 
636 static int
637 BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
638     BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct)
639 {
640 	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
641 	int start = 1;
642 	BIGNUM *d, *r;
643 	const BIGNUM *aa;
644 	/* Table of variables obtained from 'ctx' */
645 	BIGNUM *val[TABLE_SIZE];
646 	BN_MONT_CTX *mont = NULL;
647 
648 	if (ct) {
649 		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
650 	}
651 
652 
653 	if (!BN_is_odd(m)) {
654 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
655 		return (0);
656 	}
657 
658 	bits = BN_num_bits(p);
659 	if (bits == 0) {
660 		/* x**0 mod 1 is still zero. */
661 		if (BN_abs_is_word(m, 1)) {
662 			ret = 1;
663 			BN_zero(rr);
664 		} else
665 			ret = BN_one(rr);
666 		return ret;
667 	}
668 
669 	BN_CTX_start(ctx);
670 	if ((d = BN_CTX_get(ctx)) == NULL)
671 		goto err;
672 	if ((r = BN_CTX_get(ctx)) == NULL)
673 		goto err;
674 	if ((val[0] = BN_CTX_get(ctx)) == NULL)
675 		goto err;
676 
677 	/* If this is not done, things will break in the montgomery
678 	 * part */
679 
680 	if (in_mont != NULL)
681 		mont = in_mont;
682 	else {
683 		if ((mont = BN_MONT_CTX_new()) == NULL)
684 			goto err;
685 		if (!BN_MONT_CTX_set(mont, m, ctx))
686 			goto err;
687 	}
688 
689 	if (!BN_nnmod(val[0], a,m, ctx))
690 		goto err;
691 	aa = val[0];
692 	if (BN_is_zero(aa)) {
693 		BN_zero(rr);
694 		ret = 1;
695 		goto err;
696 	}
697 	if (!BN_to_montgomery(val[0], aa, mont, ctx))
698 		goto err; /* 1 */
699 
700 	window = BN_window_bits_for_exponent_size(bits);
701 	if (window > 1) {
702 		if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
703 			goto err; /* 2 */
704 		j = 1 << (window - 1);
705 		for (i = 1; i < j; i++) {
706 			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
707 			    !BN_mod_mul_montgomery(val[i], val[i - 1],
708 			    d, mont, ctx))
709 				goto err;
710 		}
711 	}
712 
713 	start = 1;		/* This is used to avoid multiplication etc
714 				 * when there is only the value '1' in the
715 				 * buffer. */
716 	wvalue = 0;		/* The 'value' of the window */
717 	wstart = bits - 1;	/* The top bit of the window */
718 	wend = 0;		/* The bottom bit of the window */
719 
720 	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
721 		goto err;
722 	for (;;) {
723 		if (BN_is_bit_set(p, wstart) == 0) {
724 			if (!start) {
725 				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
726 					goto err;
727 			}
728 			if (wstart == 0)
729 				break;
730 			wstart--;
731 			continue;
732 		}
733 		/* We now have wstart on a 'set' bit, we now need to work out
734 		 * how bit a window to do.  To do this we need to scan
735 		 * forward until the last set bit before the end of the
736 		 * window */
737 		j = wstart;
738 		wvalue = 1;
739 		wend = 0;
740 		for (i = 1; i < window; i++) {
741 			if (wstart - i < 0)
742 				break;
743 			if (BN_is_bit_set(p, wstart - i)) {
744 				wvalue <<= (i - wend);
745 				wvalue |= 1;
746 				wend = i;
747 			}
748 		}
749 
750 		/* wend is the size of the current window */
751 		j = wend + 1;
752 		/* add the 'bytes above' */
753 		if (!start)
754 			for (i = 0; i < j; i++) {
755 				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
756 					goto err;
757 			}
758 
759 		/* wvalue will be an odd number < 2^window */
760 		if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
761 			goto err;
762 
763 		/* move the 'window' down further */
764 		wstart -= wend + 1;
765 		wvalue = 0;
766 		start = 0;
767 		if (wstart < 0)
768 			break;
769 	}
770 	if (!BN_from_montgomery(rr, r,mont, ctx))
771 		goto err;
772 	ret = 1;
773 
774 err:
775 	if ((in_mont == NULL) && (mont != NULL))
776 		BN_MONT_CTX_free(mont);
777 	BN_CTX_end(ctx);
778 	return (ret);
779 }
780 
781 int
782 BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
783     BN_CTX *ctx, BN_MONT_CTX *in_mont)
784 {
785 	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont,
786 	    (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
787 }
788 
789 int
790 BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
791     BN_CTX *ctx, BN_MONT_CTX *in_mont)
792 {
793 	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1);
794 }
795 
796 int
797 BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
798     BN_CTX *ctx, BN_MONT_CTX *in_mont)
799 {
800 	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0);
801 }
802 
803 int
804 BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m,
805     BN_CTX *ctx, BN_MONT_CTX *in_mont)
806 {
807 	BN_MONT_CTX *mont = NULL;
808 	int b, bits, ret = 0;
809 	int r_is_one;
810 	BN_ULONG w, next_w;
811 	BIGNUM *d, *r, *t;
812 	BIGNUM *swap_tmp;
813 
814 #define BN_MOD_MUL_WORD(r, w, m) \
815 		(BN_mul_word(r, (w)) && \
816 		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
817 			(BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
818 		/* BN_MOD_MUL_WORD is only used with 'w' large,
819 		 * so the BN_ucmp test is probably more overhead
820 		 * than always using BN_mod (which uses bn_copy if
821 		 * a similar test returns true). */
822 		/* We can use BN_mod and do not need BN_nnmod because our
823 		 * accumulator is never negative (the result of BN_mod does
824 		 * not depend on the sign of the modulus).
825 		 */
826 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
827 		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
828 
829 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
830 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
831 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
832 		return -1;
833 	}
834 
835 
836 	if (!BN_is_odd(m)) {
837 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
838 		return (0);
839 	}
840 	if (m->top == 1)
841 		a %= m->d[0]; /* make sure that 'a' is reduced */
842 
843 	bits = BN_num_bits(p);
844 	if (bits == 0) {
845 		/* x**0 mod 1 is still zero. */
846 		if (BN_abs_is_word(m, 1)) {
847 			ret = 1;
848 			BN_zero(rr);
849 		} else
850 			ret = BN_one(rr);
851 		return ret;
852 	}
853 	if (a == 0) {
854 		BN_zero(rr);
855 		ret = 1;
856 		return ret;
857 	}
858 
859 	BN_CTX_start(ctx);
860 	if ((d = BN_CTX_get(ctx)) == NULL)
861 		goto err;
862 	if ((r = BN_CTX_get(ctx)) == NULL)
863 		goto err;
864 	if ((t = BN_CTX_get(ctx)) == NULL)
865 		goto err;
866 
867 	if (in_mont != NULL)
868 		mont = in_mont;
869 	else {
870 		if ((mont = BN_MONT_CTX_new()) == NULL)
871 			goto err;
872 		if (!BN_MONT_CTX_set(mont, m, ctx))
873 			goto err;
874 	}
875 
876 	r_is_one = 1; /* except for Montgomery factor */
877 
878 	/* bits-1 >= 0 */
879 
880 	/* The result is accumulated in the product r*w. */
881 	w = a; /* bit 'bits-1' of 'p' is always set */
882 	for (b = bits - 2; b >= 0; b--) {
883 		/* First, square r*w. */
884 		next_w = w * w;
885 		if ((next_w / w) != w) /* overflow */
886 		{
887 			if (r_is_one) {
888 				if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
889 					goto err;
890 				r_is_one = 0;
891 			} else {
892 				if (!BN_MOD_MUL_WORD(r, w, m))
893 					goto err;
894 			}
895 			next_w = 1;
896 		}
897 		w = next_w;
898 		if (!r_is_one) {
899 			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
900 				goto err;
901 		}
902 
903 		/* Second, multiply r*w by 'a' if exponent bit is set. */
904 		if (BN_is_bit_set(p, b)) {
905 			next_w = w * a;
906 			if ((next_w / a) != w) /* overflow */
907 			{
908 				if (r_is_one) {
909 					if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
910 						goto err;
911 					r_is_one = 0;
912 				} else {
913 					if (!BN_MOD_MUL_WORD(r, w, m))
914 						goto err;
915 				}
916 				next_w = a;
917 			}
918 			w = next_w;
919 		}
920 	}
921 
922 	/* Finally, set r:=r*w. */
923 	if (w != 1) {
924 		if (r_is_one) {
925 			if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
926 				goto err;
927 			r_is_one = 0;
928 		} else {
929 			if (!BN_MOD_MUL_WORD(r, w, m))
930 				goto err;
931 		}
932 	}
933 
934 	if (r_is_one) /* can happen only if a == 1*/
935 	{
936 		if (!BN_one(rr))
937 			goto err;
938 	} else {
939 		if (!BN_from_montgomery(rr, r, mont, ctx))
940 			goto err;
941 	}
942 	ret = 1;
943 
944 err:
945 	if ((in_mont == NULL) && (mont != NULL))
946 		BN_MONT_CTX_free(mont);
947 	BN_CTX_end(ctx);
948 	return (ret);
949 }
950 
951 int
952 BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
953     BN_CTX *ctx)
954 {
955 	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
956 	int start = 1;
957 	BIGNUM *aa;
958 	/* Table of variables obtained from 'ctx' */
959 	BIGNUM *val[TABLE_SIZE];
960 	BN_RECP_CTX recp;
961 
962 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
963 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
964 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
965 		return -1;
966 	}
967 
968 	bits = BN_num_bits(p);
969 	if (bits == 0) {
970 		/* x**0 mod 1 is still zero. */
971 		if (BN_abs_is_word(m, 1)) {
972 			ret = 1;
973 			BN_zero(r);
974 		} else
975 			ret = BN_one(r);
976 		return ret;
977 	}
978 
979 	BN_RECP_CTX_init(&recp);
980 
981 	BN_CTX_start(ctx);
982 	if ((aa = BN_CTX_get(ctx)) == NULL)
983 		goto err;
984 	if ((val[0] = BN_CTX_get(ctx)) == NULL)
985 		goto err;
986 
987 	if (m->neg) {
988 		/* ignore sign of 'm' */
989 		if (!bn_copy(aa, m))
990 			goto err;
991 		aa->neg = 0;
992 		if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
993 			goto err;
994 	} else {
995 		if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
996 			goto err;
997 	}
998 
999 	if (!BN_nnmod(val[0], a, m, ctx))
1000 		goto err;		/* 1 */
1001 	if (BN_is_zero(val[0])) {
1002 		BN_zero(r);
1003 		ret = 1;
1004 		goto err;
1005 	}
1006 
1007 	window = BN_window_bits_for_exponent_size(bits);
1008 	if (window > 1) {
1009 		if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
1010 			goto err;				/* 2 */
1011 		j = 1 << (window - 1);
1012 		for (i = 1; i < j; i++) {
1013 			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1014 			    !BN_mod_mul_reciprocal(val[i], val[i - 1],
1015 			    aa, &recp, ctx))
1016 				goto err;
1017 		}
1018 	}
1019 
1020 	start = 1;		/* This is used to avoid multiplication etc
1021 				 * when there is only the value '1' in the
1022 				 * buffer. */
1023 	wvalue = 0;		/* The 'value' of the window */
1024 	wstart = bits - 1;	/* The top bit of the window */
1025 	wend = 0;		/* The bottom bit of the window */
1026 
1027 	if (!BN_one(r))
1028 		goto err;
1029 
1030 	for (;;) {
1031 		if (BN_is_bit_set(p, wstart) == 0) {
1032 			if (!start)
1033 				if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
1034 					goto err;
1035 			if (wstart == 0)
1036 				break;
1037 			wstart--;
1038 			continue;
1039 		}
1040 		/* We now have wstart on a 'set' bit, we now need to work out
1041 		 * how bit a window to do.  To do this we need to scan
1042 		 * forward until the last set bit before the end of the
1043 		 * window */
1044 		j = wstart;
1045 		wvalue = 1;
1046 		wend = 0;
1047 		for (i = 1; i < window; i++) {
1048 			if (wstart - i < 0)
1049 				break;
1050 			if (BN_is_bit_set(p, wstart - i)) {
1051 				wvalue <<= (i - wend);
1052 				wvalue |= 1;
1053 				wend = i;
1054 			}
1055 		}
1056 
1057 		/* wend is the size of the current window */
1058 		j = wend + 1;
1059 		/* add the 'bytes above' */
1060 		if (!start)
1061 			for (i = 0; i < j; i++) {
1062 				if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
1063 					goto err;
1064 			}
1065 
1066 		/* wvalue will be an odd number < 2^window */
1067 		if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx))
1068 			goto err;
1069 
1070 		/* move the 'window' down further */
1071 		wstart -= wend + 1;
1072 		wvalue = 0;
1073 		start = 0;
1074 		if (wstart < 0)
1075 			break;
1076 	}
1077 	ret = 1;
1078 
1079 err:
1080 	BN_CTX_end(ctx);
1081 	BN_RECP_CTX_free(&recp);
1082 	return (ret);
1083 }
1084 
1085 static int
1086 BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1087     BN_CTX *ctx, int ct)
1088 {
1089 	int ret;
1090 
1091 
1092 	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
1093 	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
1094 	 * exponentiation for the odd part), using appropriate exponent
1095 	 * reductions, and combine the results using the CRT.
1096 	 *
1097 	 * For now, we use Montgomery only if the modulus is odd; otherwise,
1098 	 * exponentiation using the reciprocal-based quick remaindering
1099 	 * algorithm is used.
1100 	 *
1101 	 * (Timing obtained with expspeed.c [computations  a^p mod m
1102 	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
1103 	 * 4096, 8192 bits], compared to the running time of the
1104 	 * standard algorithm:
1105 	 *
1106 	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
1107 	 *                     55 .. 77 %  [UltraSparc processor, but
1108 	 *                                  debug-solaris-sparcv8-gcc conf.]
1109 	 *
1110 	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
1111 	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
1112 	 *
1113 	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
1114 	 * at 2048 and more bits, but at 512 and 1024 bits, it was
1115 	 * slower even than the standard algorithm!
1116 	 *
1117 	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
1118 	 * should be obtained when the new Montgomery reduction code
1119 	 * has been integrated into OpenSSL.)
1120 	 */
1121 
1122 	if (BN_is_odd(m)) {
1123 		if (a->top == 1 && !a->neg && !ct) {
1124 			BN_ULONG A = a->d[0];
1125 			ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL);
1126 		} else
1127 			ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL);
1128 	} else	{
1129 		ret = BN_mod_exp_recp(r, a,p, m, ctx);
1130 	}
1131 
1132 	return (ret);
1133 }
1134 
1135 int
1136 BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1137     BN_CTX *ctx)
1138 {
1139 	return BN_mod_exp_internal(r, a, p, m, ctx,
1140 	    (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
1141 }
1142 
1143 int
1144 BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1145     BN_CTX *ctx)
1146 {
1147 	return BN_mod_exp_internal(r, a, p, m, ctx, 1);
1148 }
1149 
1150 int
1151 BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1152     BN_CTX *ctx)
1153 {
1154 	return BN_mod_exp_internal(r, a, p, m, ctx, 0);
1155 }
1156 
1157 int
1158 BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
1159     const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, BN_CTX *ctx,
1160     BN_MONT_CTX *in_mont)
1161 {
1162 	int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2, wvalue1, wvalue2;
1163 	int r_is_one = 1;
1164 	BIGNUM *d, *r;
1165 	const BIGNUM *a_mod_m;
1166 	/* Tables of variables obtained from 'ctx' */
1167 	BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
1168 	BN_MONT_CTX *mont = NULL;
1169 
1170 
1171 	if (!BN_is_odd(m)) {
1172 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
1173 		return (0);
1174 	}
1175 	bits1 = BN_num_bits(p1);
1176 	bits2 = BN_num_bits(p2);
1177 	if ((bits1 == 0) && (bits2 == 0)) {
1178 		ret = BN_one(rr);
1179 		return ret;
1180 	}
1181 
1182 	bits = (bits1 > bits2) ? bits1 : bits2;
1183 
1184 	BN_CTX_start(ctx);
1185 	if ((d = BN_CTX_get(ctx)) == NULL)
1186 		goto err;
1187 	if ((r = BN_CTX_get(ctx)) == NULL)
1188 		goto err;
1189 	if ((val1[0] = BN_CTX_get(ctx)) == NULL)
1190 		goto err;
1191 	if ((val2[0] = BN_CTX_get(ctx)) == NULL)
1192 		goto err;
1193 
1194 	if (in_mont != NULL)
1195 		mont = in_mont;
1196 	else {
1197 		if ((mont = BN_MONT_CTX_new()) == NULL)
1198 			goto err;
1199 		if (!BN_MONT_CTX_set(mont, m, ctx))
1200 			goto err;
1201 	}
1202 
1203 	window1 = BN_window_bits_for_exponent_size(bits1);
1204 	window2 = BN_window_bits_for_exponent_size(bits2);
1205 
1206 	/*
1207 	 * Build table for a1:   val1[i] := a1^(2*i + 1) mod m  for i = 0 .. 2^(window1-1)
1208 	 */
1209 	if (!BN_nnmod(val1[0], a1, m, ctx))
1210 		goto err;
1211 	a_mod_m = val1[0];
1212 	if (BN_is_zero(a_mod_m)) {
1213 		BN_zero(rr);
1214 		ret = 1;
1215 		goto err;
1216 	}
1217 
1218 	if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx))
1219 		goto err;
1220 	if (window1 > 1) {
1221 		if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx))
1222 			goto err;
1223 
1224 		j = 1 << (window1 - 1);
1225 		for (i = 1; i < j; i++) {
1226 			if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
1227 			    !BN_mod_mul_montgomery(val1[i], val1[i - 1],
1228 			    d, mont, ctx))
1229 				goto err;
1230 		}
1231 	}
1232 
1233 
1234 	/*
1235 	 * Build table for a2:   val2[i] := a2^(2*i + 1) mod m  for i = 0 .. 2^(window2-1)
1236 	 */
1237 	if (!BN_nnmod(val2[0], a2, m, ctx))
1238 		goto err;
1239 	a_mod_m = val2[0];
1240 	if (BN_is_zero(a_mod_m)) {
1241 		BN_zero(rr);
1242 		ret = 1;
1243 		goto err;
1244 	}
1245 	if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx))
1246 		goto err;
1247 	if (window2 > 1) {
1248 		if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx))
1249 			goto err;
1250 
1251 		j = 1 << (window2 - 1);
1252 		for (i = 1; i < j; i++) {
1253 			if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
1254 			    !BN_mod_mul_montgomery(val2[i], val2[i - 1],
1255 			    d, mont, ctx))
1256 				goto err;
1257 		}
1258 	}
1259 
1260 
1261 	/* Now compute the power product, using independent windows. */
1262 	r_is_one = 1;
1263 	wvalue1 = 0;  /* The 'value' of the first window */
1264 	wvalue2 = 0;  /* The 'value' of the second window */
1265 	wpos1 = 0;    /* If wvalue1 > 0, the bottom bit of the first window */
1266 	wpos2 = 0;    /* If wvalue2 > 0, the bottom bit of the second window */
1267 
1268 	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
1269 		goto err;
1270 	for (b = bits - 1; b >= 0; b--) {
1271 		if (!r_is_one) {
1272 			if (!BN_mod_mul_montgomery(r, r,r, mont, ctx))
1273 				goto err;
1274 		}
1275 
1276 		if (!wvalue1)
1277 			if (BN_is_bit_set(p1, b)) {
1278 			/* consider bits b-window1+1 .. b for this window */
1279 			i = b - window1 + 1;
1280 			while (!BN_is_bit_set(p1, i)) /* works for i<0 */
1281 				i++;
1282 			wpos1 = i;
1283 			wvalue1 = 1;
1284 			for (i = b - 1; i >= wpos1; i--) {
1285 				wvalue1 <<= 1;
1286 				if (BN_is_bit_set(p1, i))
1287 					wvalue1++;
1288 			}
1289 		}
1290 
1291 		if (!wvalue2)
1292 			if (BN_is_bit_set(p2, b)) {
1293 			/* consider bits b-window2+1 .. b for this window */
1294 			i = b - window2 + 1;
1295 			while (!BN_is_bit_set(p2, i))
1296 				i++;
1297 			wpos2 = i;
1298 			wvalue2 = 1;
1299 			for (i = b - 1; i >= wpos2; i--) {
1300 				wvalue2 <<= 1;
1301 				if (BN_is_bit_set(p2, i))
1302 					wvalue2++;
1303 			}
1304 		}
1305 
1306 		if (wvalue1 && b == wpos1) {
1307 			/* wvalue1 is odd and < 2^window1 */
1308 			if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1],
1309 			    mont, ctx))
1310 				goto err;
1311 			wvalue1 = 0;
1312 			r_is_one = 0;
1313 		}
1314 
1315 		if (wvalue2 && b == wpos2) {
1316 			/* wvalue2 is odd and < 2^window2 */
1317 			if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1],
1318 			    mont, ctx))
1319 				goto err;
1320 			wvalue2 = 0;
1321 			r_is_one = 0;
1322 		}
1323 	}
1324 	if (!BN_from_montgomery(rr, r,mont, ctx))
1325 		goto err;
1326 	ret = 1;
1327 
1328 err:
1329 	if ((in_mont == NULL) && (mont != NULL))
1330 		BN_MONT_CTX_free(mont);
1331 	BN_CTX_end(ctx);
1332 	return (ret);
1333 }
1334