1 /* $OpenBSD: bn_exp.c,v 1.56 2025/01/22 12:53:16 tb Exp $ */ 2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 /* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112 #include <stdlib.h> 113 #include <string.h> 114 115 #include <openssl/err.h> 116 117 #include "bn_local.h" 118 #include "constant_time.h" 119 120 /* maximum precomputation table size for *variable* sliding windows */ 121 #define TABLE_SIZE 32 122 123 /* Calculates r = a^p by successive squaring of a. Not constant time. */ 124 int 125 BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 126 { 127 BIGNUM *rr, *v; 128 int i; 129 int ret = 0; 130 131 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 132 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 133 return -1; 134 } 135 136 BN_CTX_start(ctx); 137 138 if ((v = BN_CTX_get(ctx)) == NULL) 139 goto err; 140 141 rr = r; 142 if (r == a || r == p) 143 rr = BN_CTX_get(ctx); 144 if (rr == NULL) 145 goto err; 146 147 if (!BN_one(rr)) 148 goto err; 149 if (BN_is_odd(p)) { 150 if (!bn_copy(rr, a)) 151 goto err; 152 } 153 154 if (!bn_copy(v, a)) 155 goto err; 156 157 for (i = 1; i < BN_num_bits(p); i++) { 158 if (!BN_sqr(v, v, ctx)) 159 goto err; 160 if (!BN_is_bit_set(p, i)) 161 continue; 162 if (!BN_mul(rr, rr, v, ctx)) 163 goto err; 164 } 165 166 if (!bn_copy(r, rr)) 167 goto err; 168 169 ret = 1; 170 171 err: 172 BN_CTX_end(ctx); 173 174 return ret; 175 } 176 LCRYPTO_ALIAS(BN_exp); 177 178 /* The old fallback, simple version :-) */ 179 int 180 BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 181 BN_CTX *ctx) 182 { 183 int i, j, bits, wstart, wend, window, wvalue; 184 int start = 1; 185 BIGNUM *d, *q; 186 /* Table of variables obtained from 'ctx' */ 187 BIGNUM *val[TABLE_SIZE]; 188 int ret = 0; 189 190 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 191 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 192 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 193 return -1; 194 } 195 196 if (r == m) { 197 BNerror(BN_R_INVALID_ARGUMENT); 198 return 0; 199 } 200 201 bits = BN_num_bits(p); 202 if (bits == 0) { 203 /* x**0 mod 1 is still zero. */ 204 if (BN_abs_is_word(m, 1)) { 205 ret = 1; 206 BN_zero(r); 207 } else 208 ret = BN_one(r); 209 return ret; 210 } 211 212 BN_CTX_start(ctx); 213 if ((d = BN_CTX_get(ctx)) == NULL) 214 goto err; 215 if ((q = BN_CTX_get(ctx)) == NULL) 216 goto err; 217 if ((val[0] = BN_CTX_get(ctx)) == NULL) 218 goto err; 219 220 if (!BN_nnmod(val[0], a, m, ctx)) 221 goto err; 222 if (BN_is_zero(val[0])) { 223 BN_zero(r); 224 goto done; 225 } 226 if (!bn_copy(q, p)) 227 goto err; 228 229 window = BN_window_bits_for_exponent_size(bits); 230 if (window > 1) { 231 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 232 goto err; 233 j = 1 << (window - 1); 234 for (i = 1; i < j; i++) { 235 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 236 !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) 237 goto err; 238 } 239 } 240 241 start = 1; /* This is used to avoid multiplication etc 242 * when there is only the value '1' in the 243 * buffer. */ 244 wvalue = 0; /* The 'value' of the window */ 245 wstart = bits - 1; /* The top bit of the window */ 246 wend = 0; /* The bottom bit of the window */ 247 248 if (!BN_one(r)) 249 goto err; 250 251 for (;;) { 252 if (BN_is_bit_set(q, wstart) == 0) { 253 if (!start) 254 if (!BN_mod_mul(r, r, r, m, ctx)) 255 goto err; 256 if (wstart == 0) 257 break; 258 wstart--; 259 continue; 260 } 261 /* We now have wstart on a 'set' bit, we now need to work out 262 * how bit a window to do. To do this we need to scan 263 * forward until the last set bit before the end of the 264 * window */ 265 j = wstart; 266 wvalue = 1; 267 wend = 0; 268 for (i = 1; i < window; i++) { 269 if (wstart - i < 0) 270 break; 271 if (BN_is_bit_set(q, wstart - i)) { 272 wvalue <<= (i - wend); 273 wvalue |= 1; 274 wend = i; 275 } 276 } 277 278 /* wend is the size of the current window */ 279 j = wend + 1; 280 /* add the 'bytes above' */ 281 if (!start) 282 for (i = 0; i < j; i++) { 283 if (!BN_mod_mul(r, r, r, m, ctx)) 284 goto err; 285 } 286 287 /* wvalue will be an odd number < 2^window */ 288 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 289 goto err; 290 291 /* move the 'window' down further */ 292 wstart -= wend + 1; 293 wvalue = 0; 294 start = 0; 295 if (wstart < 0) 296 break; 297 } 298 299 done: 300 ret = 1; 301 302 err: 303 BN_CTX_end(ctx); 304 305 return ret; 306 } 307 308 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout 309 * so that accessing any of these table values shows the same access pattern as far 310 * as cache lines are concerned. The following functions are used to transfer a BIGNUM 311 * from/to that table. */ 312 313 static int 314 MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, 315 int idx, int window) 316 { 317 int i, j; 318 int width = 1 << window; 319 BN_ULONG *table = (BN_ULONG *)buf; 320 321 if (top > b->top) 322 top = b->top; /* this works because 'buf' is explicitly zeroed */ 323 324 for (i = 0, j = idx; i < top; i++, j += width) { 325 table[j] = b->d[i]; 326 } 327 328 return 1; 329 } 330 331 static int 332 MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, 333 int window) 334 { 335 int i, j; 336 int width = 1 << window; 337 volatile BN_ULONG *table = (volatile BN_ULONG *)buf; 338 339 if (!bn_wexpand(b, top)) 340 return 0; 341 342 if (window <= 3) { 343 for (i = 0; i < top; i++, table += width) { 344 BN_ULONG acc = 0; 345 346 for (j = 0; j < width; j++) { 347 acc |= table[j] & 348 ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 349 } 350 351 b->d[i] = acc; 352 } 353 } else { 354 int xstride = 1 << (window - 2); 355 BN_ULONG y0, y1, y2, y3; 356 357 i = idx >> (window - 2); /* equivalent of idx / xstride */ 358 idx &= xstride - 1; /* equivalent of idx % xstride */ 359 360 y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); 361 y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); 362 y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); 363 y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); 364 365 for (i = 0; i < top; i++, table += width) { 366 BN_ULONG acc = 0; 367 368 for (j = 0; j < xstride; j++) { 369 acc |= ( (table[j + 0 * xstride] & y0) | 370 (table[j + 1 * xstride] & y1) | 371 (table[j + 2 * xstride] & y2) | 372 (table[j + 3 * xstride] & y3) ) 373 & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 374 } 375 376 b->d[i] = acc; 377 } 378 } 379 b->top = top; 380 bn_correct_top(b); 381 return 1; 382 } 383 384 /* Given a pointer value, compute the next address that is a cache line multiple. */ 385 #define MOD_EXP_CTIME_ALIGN(x_) \ 386 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 387 388 /* This variant of BN_mod_exp_mont() uses fixed windows and the special 389 * precomputation memory layout to limit data-dependency to a minimum 390 * to protect secret exponents (cf. the hyper-threading timing attacks 391 * pointed out by Colin Percival, 392 * http://www.daemonology.net/hyperthreading-considered-harmful/) 393 */ 394 int 395 BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 396 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 397 { 398 int i, bits, ret = 0, window, wvalue; 399 int top; 400 BN_MONT_CTX *mont = NULL; 401 int numPowers; 402 unsigned char *powerbufFree = NULL; 403 int powerbufLen = 0; 404 unsigned char *powerbuf = NULL; 405 BIGNUM tmp, am; 406 407 408 if (!BN_is_odd(m)) { 409 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 410 return (0); 411 } 412 413 top = m->top; 414 415 bits = BN_num_bits(p); 416 if (bits == 0) { 417 /* x**0 mod 1 is still zero. */ 418 if (BN_abs_is_word(m, 1)) { 419 ret = 1; 420 BN_zero(rr); 421 } else 422 ret = BN_one(rr); 423 return ret; 424 } 425 426 BN_CTX_start(ctx); 427 428 /* 429 * Allocate a Montgomery context if it was not supplied by the caller. 430 * If this is not done, things will break in the montgomery part. 431 */ 432 if (in_mont != NULL) 433 mont = in_mont; 434 else { 435 if ((mont = BN_MONT_CTX_new()) == NULL) 436 goto err; 437 if (!BN_MONT_CTX_set(mont, m, ctx)) 438 goto err; 439 } 440 441 /* Get the window size to use with size of p. */ 442 window = BN_window_bits_for_ctime_exponent_size(bits); 443 #if defined(OPENSSL_BN_ASM_MONT5) 444 if (window == 6 && bits <= 1024) 445 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 446 #endif 447 448 /* Allocate a buffer large enough to hold all of the pre-computed 449 * powers of am, am itself and tmp. 450 */ 451 numPowers = 1 << window; 452 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 453 ((2*top) > numPowers ? (2*top) : numPowers)); 454 if ((powerbufFree = calloc(powerbufLen + 455 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL) 456 goto err; 457 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 458 459 /* lay down tmp and am right after powers table */ 460 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 461 am.d = tmp.d + top; 462 tmp.top = am.top = 0; 463 tmp.dmax = am.dmax = top; 464 tmp.neg = am.neg = 0; 465 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 466 467 /* prepare a^0 in Montgomery domain */ 468 #if 1 469 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 470 goto err; 471 #else 472 tmp.d[0] = (0 - m - >d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 473 for (i = 1; i < top; i++) 474 tmp.d[i] = (~m->d[i]) & BN_MASK2; 475 tmp.top = top; 476 #endif 477 478 /* prepare a^1 in Montgomery domain */ 479 if (!BN_nnmod(&am, a, m, ctx)) 480 goto err; 481 if (!BN_to_montgomery(&am, &am, mont, ctx)) 482 goto err; 483 484 #if defined(OPENSSL_BN_ASM_MONT5) 485 /* This optimization uses ideas from http://eprint.iacr.org/2011/239, 486 * specifically optimization of cache-timing attack countermeasures 487 * and pre-computation optimization. */ 488 489 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 490 * 512-bit RSA is hardly relevant, we omit it to spare size... */ 491 if (window == 5 && top > 1) { 492 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 493 const void *table, const BN_ULONG *np, 494 const BN_ULONG *n0, int num, int power); 495 void bn_scatter5(const BN_ULONG *inp, size_t num, 496 void *table, size_t power); 497 void bn_gather5(BN_ULONG *out, size_t num, 498 void *table, size_t power); 499 500 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 501 502 /* BN_to_montgomery can contaminate words above .top 503 * [in BN_DEBUG[_DEBUG] build]... */ 504 for (i = am.top; i < top; i++) 505 am.d[i] = 0; 506 for (i = tmp.top; i < top; i++) 507 tmp.d[i] = 0; 508 509 bn_scatter5(tmp.d, top, powerbuf, 0); 510 bn_scatter5(am.d, am.top, powerbuf, 1); 511 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 512 bn_scatter5(tmp.d, top, powerbuf, 2); 513 514 #if 0 515 for (i = 3; i < 32; i++) { 516 /* Calculate a^i = a^(i-1) * a */ 517 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 518 n0, top, i - 1); 519 bn_scatter5(tmp.d, top, powerbuf, i); 520 } 521 #else 522 /* same as above, but uses squaring for 1/2 of operations */ 523 for (i = 4; i < 32; i*=2) { 524 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 525 bn_scatter5(tmp.d, top, powerbuf, i); 526 } 527 for (i = 3; i < 8; i += 2) { 528 int j; 529 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 530 n0, top, i - 1); 531 bn_scatter5(tmp.d, top, powerbuf, i); 532 for (j = 2 * i; j < 32; j *= 2) { 533 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 534 bn_scatter5(tmp.d, top, powerbuf, j); 535 } 536 } 537 for (; i < 16; i += 2) { 538 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 539 n0, top, i - 1); 540 bn_scatter5(tmp.d, top, powerbuf, i); 541 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 542 bn_scatter5(tmp.d, top, powerbuf, 2*i); 543 } 544 for (; i < 32; i += 2) { 545 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 546 n0, top, i - 1); 547 bn_scatter5(tmp.d, top, powerbuf, i); 548 } 549 #endif 550 bits--; 551 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 552 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 553 bn_gather5(tmp.d, top, powerbuf, wvalue); 554 555 /* Scan the exponent one window at a time starting from the most 556 * significant bits. 557 */ 558 while (bits >= 0) { 559 for (wvalue = 0, i = 0; i < 5; i++, bits--) 560 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 561 562 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 563 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 564 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 565 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 566 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 567 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 568 } 569 570 tmp.top = top; 571 bn_correct_top(&tmp); 572 } else 573 #endif 574 { 575 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, 576 window)) 577 goto err; 578 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, 579 window)) 580 goto err; 581 582 /* If the window size is greater than 1, then calculate 583 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 584 * (even powers could instead be computed as (a^(i/2))^2 585 * to use the slight performance advantage of sqr over mul). 586 */ 587 if (window > 1) { 588 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 589 goto err; 590 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 591 2, window)) 592 goto err; 593 for (i = 3; i < numPowers; i++) { 594 /* Calculate a^i = a^(i-1) * a */ 595 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, 596 mont, ctx)) 597 goto err; 598 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, 599 powerbuf, i, window)) 600 goto err; 601 } 602 } 603 604 bits--; 605 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 606 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 607 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, 608 wvalue, window)) 609 goto err; 610 611 /* Scan the exponent one window at a time starting from the most 612 * significant bits. 613 */ 614 while (bits >= 0) { 615 wvalue = 0; /* The 'value' of the window */ 616 617 /* Scan the window, squaring the result as we go */ 618 for (i = 0; i < window; i++, bits--) { 619 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, 620 mont, ctx)) 621 goto err; 622 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 623 } 624 625 /* Fetch the appropriate pre-computed value from the pre-buf */ 626 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, 627 wvalue, window)) 628 goto err; 629 630 /* Multiply the result into the intermediate result */ 631 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 632 goto err; 633 } 634 } 635 636 /* Convert the final result from montgomery to standard format */ 637 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 638 goto err; 639 ret = 1; 640 641 err: 642 if ((in_mont == NULL) && (mont != NULL)) 643 BN_MONT_CTX_free(mont); 644 freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 645 BN_CTX_end(ctx); 646 return (ret); 647 } 648 LCRYPTO_ALIAS(BN_mod_exp_mont_consttime); 649 650 static int 651 BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 652 BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct) 653 { 654 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 655 int start = 1; 656 BIGNUM *d, *r; 657 const BIGNUM *aa; 658 /* Table of variables obtained from 'ctx' */ 659 BIGNUM *val[TABLE_SIZE]; 660 BN_MONT_CTX *mont = NULL; 661 662 if (ct) { 663 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 664 } 665 666 667 if (!BN_is_odd(m)) { 668 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 669 return (0); 670 } 671 672 bits = BN_num_bits(p); 673 if (bits == 0) { 674 /* x**0 mod 1 is still zero. */ 675 if (BN_abs_is_word(m, 1)) { 676 ret = 1; 677 BN_zero(rr); 678 } else 679 ret = BN_one(rr); 680 return ret; 681 } 682 683 BN_CTX_start(ctx); 684 if ((d = BN_CTX_get(ctx)) == NULL) 685 goto err; 686 if ((r = BN_CTX_get(ctx)) == NULL) 687 goto err; 688 if ((val[0] = BN_CTX_get(ctx)) == NULL) 689 goto err; 690 691 /* If this is not done, things will break in the montgomery 692 * part */ 693 694 if (in_mont != NULL) 695 mont = in_mont; 696 else { 697 if ((mont = BN_MONT_CTX_new()) == NULL) 698 goto err; 699 if (!BN_MONT_CTX_set(mont, m, ctx)) 700 goto err; 701 } 702 703 if (!BN_nnmod(val[0], a,m, ctx)) 704 goto err; 705 aa = val[0]; 706 if (BN_is_zero(aa)) { 707 BN_zero(rr); 708 ret = 1; 709 goto err; 710 } 711 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 712 goto err; 713 714 window = BN_window_bits_for_exponent_size(bits); 715 if (window > 1) { 716 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 717 goto err; 718 j = 1 << (window - 1); 719 for (i = 1; i < j; i++) { 720 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 721 !BN_mod_mul_montgomery(val[i], val[i - 1], 722 d, mont, ctx)) 723 goto err; 724 } 725 } 726 727 start = 1; /* This is used to avoid multiplication etc 728 * when there is only the value '1' in the 729 * buffer. */ 730 wvalue = 0; /* The 'value' of the window */ 731 wstart = bits - 1; /* The top bit of the window */ 732 wend = 0; /* The bottom bit of the window */ 733 734 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 735 goto err; 736 for (;;) { 737 if (BN_is_bit_set(p, wstart) == 0) { 738 if (!start) { 739 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 740 goto err; 741 } 742 if (wstart == 0) 743 break; 744 wstart--; 745 continue; 746 } 747 /* We now have wstart on a 'set' bit, we now need to work out 748 * how bit a window to do. To do this we need to scan 749 * forward until the last set bit before the end of the 750 * window */ 751 j = wstart; 752 wvalue = 1; 753 wend = 0; 754 for (i = 1; i < window; i++) { 755 if (wstart - i < 0) 756 break; 757 if (BN_is_bit_set(p, wstart - i)) { 758 wvalue <<= (i - wend); 759 wvalue |= 1; 760 wend = i; 761 } 762 } 763 764 /* wend is the size of the current window */ 765 j = wend + 1; 766 /* add the 'bytes above' */ 767 if (!start) 768 for (i = 0; i < j; i++) { 769 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 770 goto err; 771 } 772 773 /* wvalue will be an odd number < 2^window */ 774 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 775 goto err; 776 777 /* move the 'window' down further */ 778 wstart -= wend + 1; 779 wvalue = 0; 780 start = 0; 781 if (wstart < 0) 782 break; 783 } 784 if (!BN_from_montgomery(rr, r,mont, ctx)) 785 goto err; 786 ret = 1; 787 788 err: 789 if ((in_mont == NULL) && (mont != NULL)) 790 BN_MONT_CTX_free(mont); 791 BN_CTX_end(ctx); 792 return (ret); 793 } 794 795 int 796 BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 797 BN_CTX *ctx, BN_MONT_CTX *in_mont) 798 { 799 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 800 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); 801 } 802 LCRYPTO_ALIAS(BN_mod_exp_mont); 803 804 int 805 BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 806 BN_CTX *ctx, BN_MONT_CTX *in_mont) 807 { 808 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1); 809 } 810 811 int 812 BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 813 BN_CTX *ctx, BN_MONT_CTX *in_mont) 814 { 815 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0); 816 } 817 818 int 819 BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, 820 BN_CTX *ctx, BN_MONT_CTX *in_mont) 821 { 822 BN_MONT_CTX *mont = NULL; 823 int b, bits, ret = 0; 824 int r_is_one; 825 BN_ULONG w, next_w; 826 BIGNUM *d, *r, *t; 827 BIGNUM *swap_tmp; 828 829 #define BN_MOD_MUL_WORD(r, w, m) \ 830 (BN_mul_word(r, (w)) && \ 831 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 832 (BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 833 /* BN_MOD_MUL_WORD is only used with 'w' large, 834 * so the BN_ucmp test is probably more overhead 835 * than always using BN_mod (which uses bn_copy if 836 * a similar test returns true). */ 837 /* We can use BN_mod and do not need BN_nnmod because our 838 * accumulator is never negative (the result of BN_mod does 839 * not depend on the sign of the modulus). 840 */ 841 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 842 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 843 844 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 845 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 846 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 847 return -1; 848 } 849 850 851 if (!BN_is_odd(m)) { 852 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 853 return (0); 854 } 855 if (m->top == 1) 856 a %= m->d[0]; /* make sure that 'a' is reduced */ 857 858 bits = BN_num_bits(p); 859 if (bits == 0) { 860 /* x**0 mod 1 is still zero. */ 861 if (BN_abs_is_word(m, 1)) { 862 ret = 1; 863 BN_zero(rr); 864 } else 865 ret = BN_one(rr); 866 return ret; 867 } 868 if (a == 0) { 869 BN_zero(rr); 870 ret = 1; 871 return ret; 872 } 873 874 BN_CTX_start(ctx); 875 if ((d = BN_CTX_get(ctx)) == NULL) 876 goto err; 877 if ((r = BN_CTX_get(ctx)) == NULL) 878 goto err; 879 if ((t = BN_CTX_get(ctx)) == NULL) 880 goto err; 881 882 if (in_mont != NULL) 883 mont = in_mont; 884 else { 885 if ((mont = BN_MONT_CTX_new()) == NULL) 886 goto err; 887 if (!BN_MONT_CTX_set(mont, m, ctx)) 888 goto err; 889 } 890 891 r_is_one = 1; /* except for Montgomery factor */ 892 893 /* bits-1 >= 0 */ 894 895 /* The result is accumulated in the product r*w. */ 896 w = a; /* bit 'bits-1' of 'p' is always set */ 897 for (b = bits - 2; b >= 0; b--) { 898 /* First, square r*w. */ 899 next_w = w * w; 900 if ((next_w / w) != w) /* overflow */ 901 { 902 if (r_is_one) { 903 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 904 goto err; 905 r_is_one = 0; 906 } else { 907 if (!BN_MOD_MUL_WORD(r, w, m)) 908 goto err; 909 } 910 next_w = 1; 911 } 912 w = next_w; 913 if (!r_is_one) { 914 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 915 goto err; 916 } 917 918 /* Second, multiply r*w by 'a' if exponent bit is set. */ 919 if (BN_is_bit_set(p, b)) { 920 next_w = w * a; 921 if ((next_w / a) != w) /* overflow */ 922 { 923 if (r_is_one) { 924 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 925 goto err; 926 r_is_one = 0; 927 } else { 928 if (!BN_MOD_MUL_WORD(r, w, m)) 929 goto err; 930 } 931 next_w = a; 932 } 933 w = next_w; 934 } 935 } 936 937 /* Finally, set r:=r*w. */ 938 if (w != 1) { 939 if (r_is_one) { 940 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 941 goto err; 942 r_is_one = 0; 943 } else { 944 if (!BN_MOD_MUL_WORD(r, w, m)) 945 goto err; 946 } 947 } 948 949 if (r_is_one) /* can happen only if a == 1*/ 950 { 951 if (!BN_one(rr)) 952 goto err; 953 } else { 954 if (!BN_from_montgomery(rr, r, mont, ctx)) 955 goto err; 956 } 957 ret = 1; 958 959 err: 960 if ((in_mont == NULL) && (mont != NULL)) 961 BN_MONT_CTX_free(mont); 962 BN_CTX_end(ctx); 963 return (ret); 964 } 965 966 int 967 BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 968 BN_CTX *ctx) 969 { 970 int i, j, bits, wstart, wend, window, wvalue; 971 int start = 1; 972 BIGNUM *aa, *q; 973 /* Table of variables obtained from 'ctx' */ 974 BIGNUM *val[TABLE_SIZE]; 975 BN_RECP_CTX *recp = NULL; 976 int ret = 0; 977 978 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 979 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 980 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 981 return -1; 982 } 983 984 bits = BN_num_bits(p); 985 if (bits == 0) { 986 /* x**0 mod 1 is still zero. */ 987 if (BN_abs_is_word(m, 1)) { 988 ret = 1; 989 BN_zero(r); 990 } else 991 ret = BN_one(r); 992 return ret; 993 } 994 995 BN_CTX_start(ctx); 996 if ((aa = BN_CTX_get(ctx)) == NULL) 997 goto err; 998 if ((q = BN_CTX_get(ctx)) == NULL) 999 goto err; 1000 if ((val[0] = BN_CTX_get(ctx)) == NULL) 1001 goto err; 1002 1003 if ((recp = BN_RECP_CTX_create(m)) == NULL) 1004 goto err; 1005 1006 if (!BN_nnmod(val[0], a, m, ctx)) 1007 goto err; 1008 if (BN_is_zero(val[0])) { 1009 BN_zero(r); 1010 goto done; 1011 } 1012 if (!bn_copy(q, p)) 1013 goto err; 1014 1015 window = BN_window_bits_for_exponent_size(bits); 1016 if (window > 1) { 1017 if (!BN_mod_sqr_reciprocal(aa, val[0], recp, ctx)) 1018 goto err; 1019 j = 1 << (window - 1); 1020 for (i = 1; i < j; i++) { 1021 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1022 !BN_mod_mul_reciprocal(val[i], val[i - 1], 1023 aa, recp, ctx)) 1024 goto err; 1025 } 1026 } 1027 1028 start = 1; /* This is used to avoid multiplication etc 1029 * when there is only the value '1' in the 1030 * buffer. */ 1031 wvalue = 0; /* The 'value' of the window */ 1032 wstart = bits - 1; /* The top bit of the window */ 1033 wend = 0; /* The bottom bit of the window */ 1034 1035 if (!BN_one(r)) 1036 goto err; 1037 1038 for (;;) { 1039 if (BN_is_bit_set(q, wstart) == 0) { 1040 if (!start) 1041 if (!BN_mod_sqr_reciprocal(r, r, recp, ctx)) 1042 goto err; 1043 if (wstart == 0) 1044 break; 1045 wstart--; 1046 continue; 1047 } 1048 /* We now have wstart on a 'set' bit, we now need to work out 1049 * how bit a window to do. To do this we need to scan 1050 * forward until the last set bit before the end of the 1051 * window */ 1052 j = wstart; 1053 wvalue = 1; 1054 wend = 0; 1055 for (i = 1; i < window; i++) { 1056 if (wstart - i < 0) 1057 break; 1058 if (BN_is_bit_set(q, wstart - i)) { 1059 wvalue <<= (i - wend); 1060 wvalue |= 1; 1061 wend = i; 1062 } 1063 } 1064 1065 /* wend is the size of the current window */ 1066 j = wend + 1; 1067 /* add the 'bytes above' */ 1068 if (!start) 1069 for (i = 0; i < j; i++) { 1070 if (!BN_mod_sqr_reciprocal(r, r, recp, ctx)) 1071 goto err; 1072 } 1073 1074 /* wvalue will be an odd number < 2^window */ 1075 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], recp, ctx)) 1076 goto err; 1077 1078 /* move the 'window' down further */ 1079 wstart -= wend + 1; 1080 wvalue = 0; 1081 start = 0; 1082 if (wstart < 0) 1083 break; 1084 } 1085 1086 done: 1087 ret = 1; 1088 1089 err: 1090 BN_CTX_end(ctx); 1091 BN_RECP_CTX_free(recp); 1092 1093 return ret; 1094 } 1095 1096 static int 1097 BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1098 BN_CTX *ctx, int ct) 1099 { 1100 int ret; 1101 1102 1103 /* For even modulus m = 2^k*m_odd, it might make sense to compute 1104 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 1105 * exponentiation for the odd part), using appropriate exponent 1106 * reductions, and combine the results using the CRT. 1107 * 1108 * For now, we use Montgomery only if the modulus is odd; otherwise, 1109 * exponentiation using the reciprocal-based quick remaindering 1110 * algorithm is used. 1111 * 1112 * (Timing obtained with expspeed.c [computations a^p mod m 1113 * where a, p, m are of the same length: 256, 512, 1024, 2048, 1114 * 4096, 8192 bits], compared to the running time of the 1115 * standard algorithm: 1116 * 1117 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 1118 * 55 .. 77 % [UltraSparc processor, but 1119 * debug-solaris-sparcv8-gcc conf.] 1120 * 1121 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 1122 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 1123 * 1124 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 1125 * at 2048 and more bits, but at 512 and 1024 bits, it was 1126 * slower even than the standard algorithm! 1127 * 1128 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 1129 * should be obtained when the new Montgomery reduction code 1130 * has been integrated into OpenSSL.) 1131 */ 1132 1133 if (BN_is_odd(m)) { 1134 if (a->top == 1 && !a->neg && !ct) { 1135 BN_ULONG A = a->d[0]; 1136 ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); 1137 } else 1138 ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL); 1139 } else { 1140 ret = BN_mod_exp_recp(r, a,p, m, ctx); 1141 } 1142 1143 return (ret); 1144 } 1145 1146 int 1147 BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1148 BN_CTX *ctx) 1149 { 1150 return BN_mod_exp_internal(r, a, p, m, ctx, 1151 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); 1152 } 1153 LCRYPTO_ALIAS(BN_mod_exp); 1154 1155 int 1156 BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1157 BN_CTX *ctx) 1158 { 1159 return BN_mod_exp_internal(r, a, p, m, ctx, 1); 1160 } 1161 1162 int 1163 BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1164 BN_CTX *ctx) 1165 { 1166 return BN_mod_exp_internal(r, a, p, m, ctx, 0); 1167 } 1168 1169 int 1170 BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, 1171 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, BN_CTX *ctx, 1172 BN_MONT_CTX *in_mont) 1173 { 1174 int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2, wvalue1, wvalue2; 1175 int r_is_one = 1; 1176 BIGNUM *d, *r; 1177 const BIGNUM *a_mod_m; 1178 /* Tables of variables obtained from 'ctx' */ 1179 BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE]; 1180 BN_MONT_CTX *mont = NULL; 1181 1182 1183 if (!BN_is_odd(m)) { 1184 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 1185 return (0); 1186 } 1187 bits1 = BN_num_bits(p1); 1188 bits2 = BN_num_bits(p2); 1189 if ((bits1 == 0) && (bits2 == 0)) { 1190 ret = BN_one(rr); 1191 return ret; 1192 } 1193 1194 bits = (bits1 > bits2) ? bits1 : bits2; 1195 1196 BN_CTX_start(ctx); 1197 if ((d = BN_CTX_get(ctx)) == NULL) 1198 goto err; 1199 if ((r = BN_CTX_get(ctx)) == NULL) 1200 goto err; 1201 if ((val1[0] = BN_CTX_get(ctx)) == NULL) 1202 goto err; 1203 if ((val2[0] = BN_CTX_get(ctx)) == NULL) 1204 goto err; 1205 1206 if (in_mont != NULL) 1207 mont = in_mont; 1208 else { 1209 if ((mont = BN_MONT_CTX_new()) == NULL) 1210 goto err; 1211 if (!BN_MONT_CTX_set(mont, m, ctx)) 1212 goto err; 1213 } 1214 1215 window1 = BN_window_bits_for_exponent_size(bits1); 1216 window2 = BN_window_bits_for_exponent_size(bits2); 1217 1218 /* 1219 * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1) 1220 */ 1221 if (!BN_nnmod(val1[0], a1, m, ctx)) 1222 goto err; 1223 a_mod_m = val1[0]; 1224 if (BN_is_zero(a_mod_m)) { 1225 BN_zero(rr); 1226 ret = 1; 1227 goto err; 1228 } 1229 1230 if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) 1231 goto err; 1232 if (window1 > 1) { 1233 if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) 1234 goto err; 1235 1236 j = 1 << (window1 - 1); 1237 for (i = 1; i < j; i++) { 1238 if (((val1[i] = BN_CTX_get(ctx)) == NULL) || 1239 !BN_mod_mul_montgomery(val1[i], val1[i - 1], 1240 d, mont, ctx)) 1241 goto err; 1242 } 1243 } 1244 1245 1246 /* 1247 * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1) 1248 */ 1249 if (!BN_nnmod(val2[0], a2, m, ctx)) 1250 goto err; 1251 a_mod_m = val2[0]; 1252 if (BN_is_zero(a_mod_m)) { 1253 BN_zero(rr); 1254 ret = 1; 1255 goto err; 1256 } 1257 if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) 1258 goto err; 1259 if (window2 > 1) { 1260 if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) 1261 goto err; 1262 1263 j = 1 << (window2 - 1); 1264 for (i = 1; i < j; i++) { 1265 if (((val2[i] = BN_CTX_get(ctx)) == NULL) || 1266 !BN_mod_mul_montgomery(val2[i], val2[i - 1], 1267 d, mont, ctx)) 1268 goto err; 1269 } 1270 } 1271 1272 1273 /* Now compute the power product, using independent windows. */ 1274 r_is_one = 1; 1275 wvalue1 = 0; /* The 'value' of the first window */ 1276 wvalue2 = 0; /* The 'value' of the second window */ 1277 wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */ 1278 wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */ 1279 1280 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 1281 goto err; 1282 for (b = bits - 1; b >= 0; b--) { 1283 if (!r_is_one) { 1284 if (!BN_mod_mul_montgomery(r, r,r, mont, ctx)) 1285 goto err; 1286 } 1287 1288 if (!wvalue1) 1289 if (BN_is_bit_set(p1, b)) { 1290 /* consider bits b-window1+1 .. b for this window */ 1291 i = b - window1 + 1; 1292 while (!BN_is_bit_set(p1, i)) /* works for i<0 */ 1293 i++; 1294 wpos1 = i; 1295 wvalue1 = 1; 1296 for (i = b - 1; i >= wpos1; i--) { 1297 wvalue1 <<= 1; 1298 if (BN_is_bit_set(p1, i)) 1299 wvalue1++; 1300 } 1301 } 1302 1303 if (!wvalue2) 1304 if (BN_is_bit_set(p2, b)) { 1305 /* consider bits b-window2+1 .. b for this window */ 1306 i = b - window2 + 1; 1307 while (!BN_is_bit_set(p2, i)) 1308 i++; 1309 wpos2 = i; 1310 wvalue2 = 1; 1311 for (i = b - 1; i >= wpos2; i--) { 1312 wvalue2 <<= 1; 1313 if (BN_is_bit_set(p2, i)) 1314 wvalue2++; 1315 } 1316 } 1317 1318 if (wvalue1 && b == wpos1) { 1319 /* wvalue1 is odd and < 2^window1 */ 1320 if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], 1321 mont, ctx)) 1322 goto err; 1323 wvalue1 = 0; 1324 r_is_one = 0; 1325 } 1326 1327 if (wvalue2 && b == wpos2) { 1328 /* wvalue2 is odd and < 2^window2 */ 1329 if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], 1330 mont, ctx)) 1331 goto err; 1332 wvalue2 = 0; 1333 r_is_one = 0; 1334 } 1335 } 1336 if (!BN_from_montgomery(rr, r,mont, ctx)) 1337 goto err; 1338 ret = 1; 1339 1340 err: 1341 if ((in_mont == NULL) && (mont != NULL)) 1342 BN_MONT_CTX_free(mont); 1343 BN_CTX_end(ctx); 1344 return (ret); 1345 } 1346