1 /* $OpenBSD: bn_exp.c,v 1.46 2023/05/09 05:38:11 tb Exp $ */ 2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 /* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112 #include <stdlib.h> 113 #include <string.h> 114 115 #include <openssl/err.h> 116 117 #include "bn_local.h" 118 #include "constant_time.h" 119 120 /* maximum precomputation table size for *variable* sliding windows */ 121 #define TABLE_SIZE 32 122 123 /* Calculates r = a^p by successive squaring of a. Not constant time. */ 124 int 125 BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 126 { 127 BIGNUM *rr, *v; 128 int i; 129 int ret = 0; 130 131 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 132 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 133 return -1; 134 } 135 136 BN_CTX_start(ctx); 137 138 if ((v = BN_CTX_get(ctx)) == NULL) 139 goto err; 140 141 rr = r; 142 if (r == a || r == p) 143 rr = BN_CTX_get(ctx); 144 if (rr == NULL) 145 goto err; 146 147 if (!BN_one(rr)) 148 goto err; 149 if (BN_is_odd(p)) { 150 if (!bn_copy(rr, a)) 151 goto err; 152 } 153 154 if (!bn_copy(v, a)) 155 goto err; 156 157 for (i = 1; i < BN_num_bits(p); i++) { 158 if (!BN_sqr(v, v, ctx)) 159 goto err; 160 if (!BN_is_bit_set(p, i)) 161 continue; 162 if (!BN_mul(rr, rr, v, ctx)) 163 goto err; 164 } 165 166 if (!bn_copy(r, rr)) 167 goto err; 168 169 ret = 1; 170 171 err: 172 BN_CTX_end(ctx); 173 174 return ret; 175 } 176 177 /* The old fallback, simple version :-) */ 178 int 179 BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 180 BN_CTX *ctx) 181 { 182 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 183 int start = 1; 184 BIGNUM *d; 185 /* Table of variables obtained from 'ctx' */ 186 BIGNUM *val[TABLE_SIZE]; 187 188 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 189 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 190 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 191 return -1; 192 } 193 194 bits = BN_num_bits(p); 195 if (bits == 0) { 196 /* x**0 mod 1 is still zero. */ 197 if (BN_abs_is_word(m, 1)) { 198 ret = 1; 199 BN_zero(r); 200 } else 201 ret = BN_one(r); 202 return ret; 203 } 204 205 BN_CTX_start(ctx); 206 if ((d = BN_CTX_get(ctx)) == NULL) 207 goto err; 208 if ((val[0] = BN_CTX_get(ctx)) == NULL) 209 goto err; 210 211 if (!BN_nnmod(val[0],a,m,ctx)) 212 goto err; /* 1 */ 213 if (BN_is_zero(val[0])) { 214 BN_zero(r); 215 ret = 1; 216 goto err; 217 } 218 219 window = BN_window_bits_for_exponent_size(bits); 220 if (window > 1) { 221 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 222 goto err; /* 2 */ 223 j = 1 << (window - 1); 224 for (i = 1; i < j; i++) { 225 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 226 !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) 227 goto err; 228 } 229 } 230 231 start = 1; /* This is used to avoid multiplication etc 232 * when there is only the value '1' in the 233 * buffer. */ 234 wvalue = 0; /* The 'value' of the window */ 235 wstart = bits - 1; /* The top bit of the window */ 236 wend = 0; /* The bottom bit of the window */ 237 238 if (!BN_one(r)) 239 goto err; 240 241 for (;;) { 242 if (BN_is_bit_set(p, wstart) == 0) { 243 if (!start) 244 if (!BN_mod_mul(r, r, r, m, ctx)) 245 goto err; 246 if (wstart == 0) 247 break; 248 wstart--; 249 continue; 250 } 251 /* We now have wstart on a 'set' bit, we now need to work out 252 * how bit a window to do. To do this we need to scan 253 * forward until the last set bit before the end of the 254 * window */ 255 j = wstart; 256 wvalue = 1; 257 wend = 0; 258 for (i = 1; i < window; i++) { 259 if (wstart - i < 0) 260 break; 261 if (BN_is_bit_set(p, wstart - i)) { 262 wvalue <<= (i - wend); 263 wvalue |= 1; 264 wend = i; 265 } 266 } 267 268 /* wend is the size of the current window */ 269 j = wend + 1; 270 /* add the 'bytes above' */ 271 if (!start) 272 for (i = 0; i < j; i++) { 273 if (!BN_mod_mul(r, r, r, m, ctx)) 274 goto err; 275 } 276 277 /* wvalue will be an odd number < 2^window */ 278 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 279 goto err; 280 281 /* move the 'window' down further */ 282 wstart -= wend + 1; 283 wvalue = 0; 284 start = 0; 285 if (wstart < 0) 286 break; 287 } 288 ret = 1; 289 290 err: 291 BN_CTX_end(ctx); 292 return (ret); 293 } 294 295 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout 296 * so that accessing any of these table values shows the same access pattern as far 297 * as cache lines are concerned. The following functions are used to transfer a BIGNUM 298 * from/to that table. */ 299 300 static int 301 MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, 302 int idx, int window) 303 { 304 int i, j; 305 int width = 1 << window; 306 BN_ULONG *table = (BN_ULONG *)buf; 307 308 if (top > b->top) 309 top = b->top; /* this works because 'buf' is explicitly zeroed */ 310 311 for (i = 0, j = idx; i < top; i++, j += width) { 312 table[j] = b->d[i]; 313 } 314 315 return 1; 316 } 317 318 static int 319 MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, 320 int window) 321 { 322 int i, j; 323 int width = 1 << window; 324 volatile BN_ULONG *table = (volatile BN_ULONG *)buf; 325 326 if (!bn_wexpand(b, top)) 327 return 0; 328 329 if (window <= 3) { 330 for (i = 0; i < top; i++, table += width) { 331 BN_ULONG acc = 0; 332 333 for (j = 0; j < width; j++) { 334 acc |= table[j] & 335 ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 336 } 337 338 b->d[i] = acc; 339 } 340 } else { 341 int xstride = 1 << (window - 2); 342 BN_ULONG y0, y1, y2, y3; 343 344 i = idx >> (window - 2); /* equivalent of idx / xstride */ 345 idx &= xstride - 1; /* equivalent of idx % xstride */ 346 347 y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); 348 y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); 349 y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); 350 y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); 351 352 for (i = 0; i < top; i++, table += width) { 353 BN_ULONG acc = 0; 354 355 for (j = 0; j < xstride; j++) { 356 acc |= ( (table[j + 0 * xstride] & y0) | 357 (table[j + 1 * xstride] & y1) | 358 (table[j + 2 * xstride] & y2) | 359 (table[j + 3 * xstride] & y3) ) 360 & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 361 } 362 363 b->d[i] = acc; 364 } 365 } 366 b->top = top; 367 bn_correct_top(b); 368 return 1; 369 } 370 371 /* Given a pointer value, compute the next address that is a cache line multiple. */ 372 #define MOD_EXP_CTIME_ALIGN(x_) \ 373 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 374 375 /* This variant of BN_mod_exp_mont() uses fixed windows and the special 376 * precomputation memory layout to limit data-dependency to a minimum 377 * to protect secret exponents (cf. the hyper-threading timing attacks 378 * pointed out by Colin Percival, 379 * http://www.daemonology.net/hyperthreading-considered-harmful/) 380 */ 381 int 382 BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 383 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 384 { 385 int i, bits, ret = 0, window, wvalue; 386 int top; 387 BN_MONT_CTX *mont = NULL; 388 int numPowers; 389 unsigned char *powerbufFree = NULL; 390 int powerbufLen = 0; 391 unsigned char *powerbuf = NULL; 392 BIGNUM tmp, am; 393 394 395 if (!BN_is_odd(m)) { 396 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 397 return (0); 398 } 399 400 top = m->top; 401 402 bits = BN_num_bits(p); 403 if (bits == 0) { 404 /* x**0 mod 1 is still zero. */ 405 if (BN_abs_is_word(m, 1)) { 406 ret = 1; 407 BN_zero(rr); 408 } else 409 ret = BN_one(rr); 410 return ret; 411 } 412 413 BN_CTX_start(ctx); 414 415 /* 416 * Allocate a Montgomery context if it was not supplied by the caller. 417 * If this is not done, things will break in the montgomery part. 418 */ 419 if (in_mont != NULL) 420 mont = in_mont; 421 else { 422 if ((mont = BN_MONT_CTX_new()) == NULL) 423 goto err; 424 if (!BN_MONT_CTX_set(mont, m, ctx)) 425 goto err; 426 } 427 428 /* Get the window size to use with size of p. */ 429 window = BN_window_bits_for_ctime_exponent_size(bits); 430 #if defined(OPENSSL_BN_ASM_MONT5) 431 if (window == 6 && bits <= 1024) 432 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 433 #endif 434 435 /* Allocate a buffer large enough to hold all of the pre-computed 436 * powers of am, am itself and tmp. 437 */ 438 numPowers = 1 << window; 439 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 440 ((2*top) > numPowers ? (2*top) : numPowers)); 441 if ((powerbufFree = calloc(powerbufLen + 442 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL) 443 goto err; 444 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 445 446 /* lay down tmp and am right after powers table */ 447 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 448 am.d = tmp.d + top; 449 tmp.top = am.top = 0; 450 tmp.dmax = am.dmax = top; 451 tmp.neg = am.neg = 0; 452 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 453 454 /* prepare a^0 in Montgomery domain */ 455 #if 1 456 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 457 goto err; 458 #else 459 tmp.d[0] = (0 - m - >d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 460 for (i = 1; i < top; i++) 461 tmp.d[i] = (~m->d[i]) & BN_MASK2; 462 tmp.top = top; 463 #endif 464 465 /* prepare a^1 in Montgomery domain */ 466 if (!BN_nnmod(&am, a, m, ctx)) 467 goto err; 468 if (!BN_to_montgomery(&am, &am, mont, ctx)) 469 goto err; 470 471 #if defined(OPENSSL_BN_ASM_MONT5) 472 /* This optimization uses ideas from http://eprint.iacr.org/2011/239, 473 * specifically optimization of cache-timing attack countermeasures 474 * and pre-computation optimization. */ 475 476 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 477 * 512-bit RSA is hardly relevant, we omit it to spare size... */ 478 if (window == 5 && top > 1) { 479 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 480 const void *table, const BN_ULONG *np, 481 const BN_ULONG *n0, int num, int power); 482 void bn_scatter5(const BN_ULONG *inp, size_t num, 483 void *table, size_t power); 484 void bn_gather5(BN_ULONG *out, size_t num, 485 void *table, size_t power); 486 487 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 488 489 /* BN_to_montgomery can contaminate words above .top 490 * [in BN_DEBUG[_DEBUG] build]... */ 491 for (i = am.top; i < top; i++) 492 am.d[i] = 0; 493 for (i = tmp.top; i < top; i++) 494 tmp.d[i] = 0; 495 496 bn_scatter5(tmp.d, top, powerbuf, 0); 497 bn_scatter5(am.d, am.top, powerbuf, 1); 498 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 499 bn_scatter5(tmp.d, top, powerbuf, 2); 500 501 #if 0 502 for (i = 3; i < 32; i++) { 503 /* Calculate a^i = a^(i-1) * a */ 504 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 505 n0, top, i - 1); 506 bn_scatter5(tmp.d, top, powerbuf, i); 507 } 508 #else 509 /* same as above, but uses squaring for 1/2 of operations */ 510 for (i = 4; i < 32; i*=2) { 511 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 512 bn_scatter5(tmp.d, top, powerbuf, i); 513 } 514 for (i = 3; i < 8; i += 2) { 515 int j; 516 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 517 n0, top, i - 1); 518 bn_scatter5(tmp.d, top, powerbuf, i); 519 for (j = 2 * i; j < 32; j *= 2) { 520 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 521 bn_scatter5(tmp.d, top, powerbuf, j); 522 } 523 } 524 for (; i < 16; i += 2) { 525 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 526 n0, top, i - 1); 527 bn_scatter5(tmp.d, top, powerbuf, i); 528 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 529 bn_scatter5(tmp.d, top, powerbuf, 2*i); 530 } 531 for (; i < 32; i += 2) { 532 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 533 n0, top, i - 1); 534 bn_scatter5(tmp.d, top, powerbuf, i); 535 } 536 #endif 537 bits--; 538 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 539 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 540 bn_gather5(tmp.d, top, powerbuf, wvalue); 541 542 /* Scan the exponent one window at a time starting from the most 543 * significant bits. 544 */ 545 while (bits >= 0) { 546 for (wvalue = 0, i = 0; i < 5; i++, bits--) 547 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 548 549 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 550 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 551 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 552 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 553 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 554 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 555 } 556 557 tmp.top = top; 558 bn_correct_top(&tmp); 559 } else 560 #endif 561 { 562 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, 563 window)) 564 goto err; 565 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, 566 window)) 567 goto err; 568 569 /* If the window size is greater than 1, then calculate 570 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 571 * (even powers could instead be computed as (a^(i/2))^2 572 * to use the slight performance advantage of sqr over mul). 573 */ 574 if (window > 1) { 575 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 576 goto err; 577 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 578 2, window)) 579 goto err; 580 for (i = 3; i < numPowers; i++) { 581 /* Calculate a^i = a^(i-1) * a */ 582 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, 583 mont, ctx)) 584 goto err; 585 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, 586 powerbuf, i, window)) 587 goto err; 588 } 589 } 590 591 bits--; 592 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 593 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 594 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, 595 wvalue, window)) 596 goto err; 597 598 /* Scan the exponent one window at a time starting from the most 599 * significant bits. 600 */ 601 while (bits >= 0) { 602 wvalue = 0; /* The 'value' of the window */ 603 604 /* Scan the window, squaring the result as we go */ 605 for (i = 0; i < window; i++, bits--) { 606 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, 607 mont, ctx)) 608 goto err; 609 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 610 } 611 612 /* Fetch the appropriate pre-computed value from the pre-buf */ 613 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, 614 wvalue, window)) 615 goto err; 616 617 /* Multiply the result into the intermediate result */ 618 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 619 goto err; 620 } 621 } 622 623 /* Convert the final result from montgomery to standard format */ 624 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 625 goto err; 626 ret = 1; 627 628 err: 629 if ((in_mont == NULL) && (mont != NULL)) 630 BN_MONT_CTX_free(mont); 631 freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 632 BN_CTX_end(ctx); 633 return (ret); 634 } 635 636 static int 637 BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 638 BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct) 639 { 640 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 641 int start = 1; 642 BIGNUM *d, *r; 643 const BIGNUM *aa; 644 /* Table of variables obtained from 'ctx' */ 645 BIGNUM *val[TABLE_SIZE]; 646 BN_MONT_CTX *mont = NULL; 647 648 if (ct) { 649 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 650 } 651 652 653 if (!BN_is_odd(m)) { 654 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 655 return (0); 656 } 657 658 bits = BN_num_bits(p); 659 if (bits == 0) { 660 /* x**0 mod 1 is still zero. */ 661 if (BN_abs_is_word(m, 1)) { 662 ret = 1; 663 BN_zero(rr); 664 } else 665 ret = BN_one(rr); 666 return ret; 667 } 668 669 BN_CTX_start(ctx); 670 if ((d = BN_CTX_get(ctx)) == NULL) 671 goto err; 672 if ((r = BN_CTX_get(ctx)) == NULL) 673 goto err; 674 if ((val[0] = BN_CTX_get(ctx)) == NULL) 675 goto err; 676 677 /* If this is not done, things will break in the montgomery 678 * part */ 679 680 if (in_mont != NULL) 681 mont = in_mont; 682 else { 683 if ((mont = BN_MONT_CTX_new()) == NULL) 684 goto err; 685 if (!BN_MONT_CTX_set(mont, m, ctx)) 686 goto err; 687 } 688 689 if (!BN_nnmod(val[0], a,m, ctx)) 690 goto err; 691 aa = val[0]; 692 if (BN_is_zero(aa)) { 693 BN_zero(rr); 694 ret = 1; 695 goto err; 696 } 697 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 698 goto err; /* 1 */ 699 700 window = BN_window_bits_for_exponent_size(bits); 701 if (window > 1) { 702 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 703 goto err; /* 2 */ 704 j = 1 << (window - 1); 705 for (i = 1; i < j; i++) { 706 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 707 !BN_mod_mul_montgomery(val[i], val[i - 1], 708 d, mont, ctx)) 709 goto err; 710 } 711 } 712 713 start = 1; /* This is used to avoid multiplication etc 714 * when there is only the value '1' in the 715 * buffer. */ 716 wvalue = 0; /* The 'value' of the window */ 717 wstart = bits - 1; /* The top bit of the window */ 718 wend = 0; /* The bottom bit of the window */ 719 720 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 721 goto err; 722 for (;;) { 723 if (BN_is_bit_set(p, wstart) == 0) { 724 if (!start) { 725 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 726 goto err; 727 } 728 if (wstart == 0) 729 break; 730 wstart--; 731 continue; 732 } 733 /* We now have wstart on a 'set' bit, we now need to work out 734 * how bit a window to do. To do this we need to scan 735 * forward until the last set bit before the end of the 736 * window */ 737 j = wstart; 738 wvalue = 1; 739 wend = 0; 740 for (i = 1; i < window; i++) { 741 if (wstart - i < 0) 742 break; 743 if (BN_is_bit_set(p, wstart - i)) { 744 wvalue <<= (i - wend); 745 wvalue |= 1; 746 wend = i; 747 } 748 } 749 750 /* wend is the size of the current window */ 751 j = wend + 1; 752 /* add the 'bytes above' */ 753 if (!start) 754 for (i = 0; i < j; i++) { 755 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 756 goto err; 757 } 758 759 /* wvalue will be an odd number < 2^window */ 760 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 761 goto err; 762 763 /* move the 'window' down further */ 764 wstart -= wend + 1; 765 wvalue = 0; 766 start = 0; 767 if (wstart < 0) 768 break; 769 } 770 if (!BN_from_montgomery(rr, r,mont, ctx)) 771 goto err; 772 ret = 1; 773 774 err: 775 if ((in_mont == NULL) && (mont != NULL)) 776 BN_MONT_CTX_free(mont); 777 BN_CTX_end(ctx); 778 return (ret); 779 } 780 781 int 782 BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 783 BN_CTX *ctx, BN_MONT_CTX *in_mont) 784 { 785 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 786 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); 787 } 788 789 int 790 BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 791 BN_CTX *ctx, BN_MONT_CTX *in_mont) 792 { 793 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1); 794 } 795 796 int 797 BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 798 BN_CTX *ctx, BN_MONT_CTX *in_mont) 799 { 800 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0); 801 } 802 803 int 804 BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, 805 BN_CTX *ctx, BN_MONT_CTX *in_mont) 806 { 807 BN_MONT_CTX *mont = NULL; 808 int b, bits, ret = 0; 809 int r_is_one; 810 BN_ULONG w, next_w; 811 BIGNUM *d, *r, *t; 812 BIGNUM *swap_tmp; 813 814 #define BN_MOD_MUL_WORD(r, w, m) \ 815 (BN_mul_word(r, (w)) && \ 816 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 817 (BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 818 /* BN_MOD_MUL_WORD is only used with 'w' large, 819 * so the BN_ucmp test is probably more overhead 820 * than always using BN_mod (which uses bn_copy if 821 * a similar test returns true). */ 822 /* We can use BN_mod and do not need BN_nnmod because our 823 * accumulator is never negative (the result of BN_mod does 824 * not depend on the sign of the modulus). 825 */ 826 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 827 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 828 829 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 830 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 831 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 832 return -1; 833 } 834 835 836 if (!BN_is_odd(m)) { 837 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 838 return (0); 839 } 840 if (m->top == 1) 841 a %= m->d[0]; /* make sure that 'a' is reduced */ 842 843 bits = BN_num_bits(p); 844 if (bits == 0) { 845 /* x**0 mod 1 is still zero. */ 846 if (BN_abs_is_word(m, 1)) { 847 ret = 1; 848 BN_zero(rr); 849 } else 850 ret = BN_one(rr); 851 return ret; 852 } 853 if (a == 0) { 854 BN_zero(rr); 855 ret = 1; 856 return ret; 857 } 858 859 BN_CTX_start(ctx); 860 if ((d = BN_CTX_get(ctx)) == NULL) 861 goto err; 862 if ((r = BN_CTX_get(ctx)) == NULL) 863 goto err; 864 if ((t = BN_CTX_get(ctx)) == NULL) 865 goto err; 866 867 if (in_mont != NULL) 868 mont = in_mont; 869 else { 870 if ((mont = BN_MONT_CTX_new()) == NULL) 871 goto err; 872 if (!BN_MONT_CTX_set(mont, m, ctx)) 873 goto err; 874 } 875 876 r_is_one = 1; /* except for Montgomery factor */ 877 878 /* bits-1 >= 0 */ 879 880 /* The result is accumulated in the product r*w. */ 881 w = a; /* bit 'bits-1' of 'p' is always set */ 882 for (b = bits - 2; b >= 0; b--) { 883 /* First, square r*w. */ 884 next_w = w * w; 885 if ((next_w / w) != w) /* overflow */ 886 { 887 if (r_is_one) { 888 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 889 goto err; 890 r_is_one = 0; 891 } else { 892 if (!BN_MOD_MUL_WORD(r, w, m)) 893 goto err; 894 } 895 next_w = 1; 896 } 897 w = next_w; 898 if (!r_is_one) { 899 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 900 goto err; 901 } 902 903 /* Second, multiply r*w by 'a' if exponent bit is set. */ 904 if (BN_is_bit_set(p, b)) { 905 next_w = w * a; 906 if ((next_w / a) != w) /* overflow */ 907 { 908 if (r_is_one) { 909 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 910 goto err; 911 r_is_one = 0; 912 } else { 913 if (!BN_MOD_MUL_WORD(r, w, m)) 914 goto err; 915 } 916 next_w = a; 917 } 918 w = next_w; 919 } 920 } 921 922 /* Finally, set r:=r*w. */ 923 if (w != 1) { 924 if (r_is_one) { 925 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 926 goto err; 927 r_is_one = 0; 928 } else { 929 if (!BN_MOD_MUL_WORD(r, w, m)) 930 goto err; 931 } 932 } 933 934 if (r_is_one) /* can happen only if a == 1*/ 935 { 936 if (!BN_one(rr)) 937 goto err; 938 } else { 939 if (!BN_from_montgomery(rr, r, mont, ctx)) 940 goto err; 941 } 942 ret = 1; 943 944 err: 945 if ((in_mont == NULL) && (mont != NULL)) 946 BN_MONT_CTX_free(mont); 947 BN_CTX_end(ctx); 948 return (ret); 949 } 950 951 int 952 BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 953 BN_CTX *ctx) 954 { 955 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 956 int start = 1; 957 BIGNUM *aa; 958 /* Table of variables obtained from 'ctx' */ 959 BIGNUM *val[TABLE_SIZE]; 960 BN_RECP_CTX recp; 961 962 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 963 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 964 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 965 return -1; 966 } 967 968 bits = BN_num_bits(p); 969 if (bits == 0) { 970 /* x**0 mod 1 is still zero. */ 971 if (BN_abs_is_word(m, 1)) { 972 ret = 1; 973 BN_zero(r); 974 } else 975 ret = BN_one(r); 976 return ret; 977 } 978 979 BN_RECP_CTX_init(&recp); 980 981 BN_CTX_start(ctx); 982 if ((aa = BN_CTX_get(ctx)) == NULL) 983 goto err; 984 if ((val[0] = BN_CTX_get(ctx)) == NULL) 985 goto err; 986 987 if (m->neg) { 988 /* ignore sign of 'm' */ 989 if (!bn_copy(aa, m)) 990 goto err; 991 aa->neg = 0; 992 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 993 goto err; 994 } else { 995 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 996 goto err; 997 } 998 999 if (!BN_nnmod(val[0], a, m, ctx)) 1000 goto err; /* 1 */ 1001 if (BN_is_zero(val[0])) { 1002 BN_zero(r); 1003 ret = 1; 1004 goto err; 1005 } 1006 1007 window = BN_window_bits_for_exponent_size(bits); 1008 if (window > 1) { 1009 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 1010 goto err; /* 2 */ 1011 j = 1 << (window - 1); 1012 for (i = 1; i < j; i++) { 1013 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1014 !BN_mod_mul_reciprocal(val[i], val[i - 1], 1015 aa, &recp, ctx)) 1016 goto err; 1017 } 1018 } 1019 1020 start = 1; /* This is used to avoid multiplication etc 1021 * when there is only the value '1' in the 1022 * buffer. */ 1023 wvalue = 0; /* The 'value' of the window */ 1024 wstart = bits - 1; /* The top bit of the window */ 1025 wend = 0; /* The bottom bit of the window */ 1026 1027 if (!BN_one(r)) 1028 goto err; 1029 1030 for (;;) { 1031 if (BN_is_bit_set(p, wstart) == 0) { 1032 if (!start) 1033 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 1034 goto err; 1035 if (wstart == 0) 1036 break; 1037 wstart--; 1038 continue; 1039 } 1040 /* We now have wstart on a 'set' bit, we now need to work out 1041 * how bit a window to do. To do this we need to scan 1042 * forward until the last set bit before the end of the 1043 * window */ 1044 j = wstart; 1045 wvalue = 1; 1046 wend = 0; 1047 for (i = 1; i < window; i++) { 1048 if (wstart - i < 0) 1049 break; 1050 if (BN_is_bit_set(p, wstart - i)) { 1051 wvalue <<= (i - wend); 1052 wvalue |= 1; 1053 wend = i; 1054 } 1055 } 1056 1057 /* wend is the size of the current window */ 1058 j = wend + 1; 1059 /* add the 'bytes above' */ 1060 if (!start) 1061 for (i = 0; i < j; i++) { 1062 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 1063 goto err; 1064 } 1065 1066 /* wvalue will be an odd number < 2^window */ 1067 if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx)) 1068 goto err; 1069 1070 /* move the 'window' down further */ 1071 wstart -= wend + 1; 1072 wvalue = 0; 1073 start = 0; 1074 if (wstart < 0) 1075 break; 1076 } 1077 ret = 1; 1078 1079 err: 1080 BN_CTX_end(ctx); 1081 BN_RECP_CTX_free(&recp); 1082 return (ret); 1083 } 1084 1085 static int 1086 BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1087 BN_CTX *ctx, int ct) 1088 { 1089 int ret; 1090 1091 1092 /* For even modulus m = 2^k*m_odd, it might make sense to compute 1093 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 1094 * exponentiation for the odd part), using appropriate exponent 1095 * reductions, and combine the results using the CRT. 1096 * 1097 * For now, we use Montgomery only if the modulus is odd; otherwise, 1098 * exponentiation using the reciprocal-based quick remaindering 1099 * algorithm is used. 1100 * 1101 * (Timing obtained with expspeed.c [computations a^p mod m 1102 * where a, p, m are of the same length: 256, 512, 1024, 2048, 1103 * 4096, 8192 bits], compared to the running time of the 1104 * standard algorithm: 1105 * 1106 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 1107 * 55 .. 77 % [UltraSparc processor, but 1108 * debug-solaris-sparcv8-gcc conf.] 1109 * 1110 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 1111 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 1112 * 1113 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 1114 * at 2048 and more bits, but at 512 and 1024 bits, it was 1115 * slower even than the standard algorithm! 1116 * 1117 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 1118 * should be obtained when the new Montgomery reduction code 1119 * has been integrated into OpenSSL.) 1120 */ 1121 1122 if (BN_is_odd(m)) { 1123 if (a->top == 1 && !a->neg && !ct) { 1124 BN_ULONG A = a->d[0]; 1125 ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); 1126 } else 1127 ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL); 1128 } else { 1129 ret = BN_mod_exp_recp(r, a,p, m, ctx); 1130 } 1131 1132 return (ret); 1133 } 1134 1135 int 1136 BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1137 BN_CTX *ctx) 1138 { 1139 return BN_mod_exp_internal(r, a, p, m, ctx, 1140 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); 1141 } 1142 1143 int 1144 BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1145 BN_CTX *ctx) 1146 { 1147 return BN_mod_exp_internal(r, a, p, m, ctx, 1); 1148 } 1149 1150 int 1151 BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1152 BN_CTX *ctx) 1153 { 1154 return BN_mod_exp_internal(r, a, p, m, ctx, 0); 1155 } 1156 1157 int 1158 BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, 1159 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, BN_CTX *ctx, 1160 BN_MONT_CTX *in_mont) 1161 { 1162 int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2, wvalue1, wvalue2; 1163 int r_is_one = 1; 1164 BIGNUM *d, *r; 1165 const BIGNUM *a_mod_m; 1166 /* Tables of variables obtained from 'ctx' */ 1167 BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE]; 1168 BN_MONT_CTX *mont = NULL; 1169 1170 1171 if (!BN_is_odd(m)) { 1172 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 1173 return (0); 1174 } 1175 bits1 = BN_num_bits(p1); 1176 bits2 = BN_num_bits(p2); 1177 if ((bits1 == 0) && (bits2 == 0)) { 1178 ret = BN_one(rr); 1179 return ret; 1180 } 1181 1182 bits = (bits1 > bits2) ? bits1 : bits2; 1183 1184 BN_CTX_start(ctx); 1185 if ((d = BN_CTX_get(ctx)) == NULL) 1186 goto err; 1187 if ((r = BN_CTX_get(ctx)) == NULL) 1188 goto err; 1189 if ((val1[0] = BN_CTX_get(ctx)) == NULL) 1190 goto err; 1191 if ((val2[0] = BN_CTX_get(ctx)) == NULL) 1192 goto err; 1193 1194 if (in_mont != NULL) 1195 mont = in_mont; 1196 else { 1197 if ((mont = BN_MONT_CTX_new()) == NULL) 1198 goto err; 1199 if (!BN_MONT_CTX_set(mont, m, ctx)) 1200 goto err; 1201 } 1202 1203 window1 = BN_window_bits_for_exponent_size(bits1); 1204 window2 = BN_window_bits_for_exponent_size(bits2); 1205 1206 /* 1207 * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1) 1208 */ 1209 if (!BN_nnmod(val1[0], a1, m, ctx)) 1210 goto err; 1211 a_mod_m = val1[0]; 1212 if (BN_is_zero(a_mod_m)) { 1213 BN_zero(rr); 1214 ret = 1; 1215 goto err; 1216 } 1217 1218 if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) 1219 goto err; 1220 if (window1 > 1) { 1221 if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) 1222 goto err; 1223 1224 j = 1 << (window1 - 1); 1225 for (i = 1; i < j; i++) { 1226 if (((val1[i] = BN_CTX_get(ctx)) == NULL) || 1227 !BN_mod_mul_montgomery(val1[i], val1[i - 1], 1228 d, mont, ctx)) 1229 goto err; 1230 } 1231 } 1232 1233 1234 /* 1235 * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1) 1236 */ 1237 if (!BN_nnmod(val2[0], a2, m, ctx)) 1238 goto err; 1239 a_mod_m = val2[0]; 1240 if (BN_is_zero(a_mod_m)) { 1241 BN_zero(rr); 1242 ret = 1; 1243 goto err; 1244 } 1245 if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) 1246 goto err; 1247 if (window2 > 1) { 1248 if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) 1249 goto err; 1250 1251 j = 1 << (window2 - 1); 1252 for (i = 1; i < j; i++) { 1253 if (((val2[i] = BN_CTX_get(ctx)) == NULL) || 1254 !BN_mod_mul_montgomery(val2[i], val2[i - 1], 1255 d, mont, ctx)) 1256 goto err; 1257 } 1258 } 1259 1260 1261 /* Now compute the power product, using independent windows. */ 1262 r_is_one = 1; 1263 wvalue1 = 0; /* The 'value' of the first window */ 1264 wvalue2 = 0; /* The 'value' of the second window */ 1265 wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */ 1266 wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */ 1267 1268 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 1269 goto err; 1270 for (b = bits - 1; b >= 0; b--) { 1271 if (!r_is_one) { 1272 if (!BN_mod_mul_montgomery(r, r,r, mont, ctx)) 1273 goto err; 1274 } 1275 1276 if (!wvalue1) 1277 if (BN_is_bit_set(p1, b)) { 1278 /* consider bits b-window1+1 .. b for this window */ 1279 i = b - window1 + 1; 1280 while (!BN_is_bit_set(p1, i)) /* works for i<0 */ 1281 i++; 1282 wpos1 = i; 1283 wvalue1 = 1; 1284 for (i = b - 1; i >= wpos1; i--) { 1285 wvalue1 <<= 1; 1286 if (BN_is_bit_set(p1, i)) 1287 wvalue1++; 1288 } 1289 } 1290 1291 if (!wvalue2) 1292 if (BN_is_bit_set(p2, b)) { 1293 /* consider bits b-window2+1 .. b for this window */ 1294 i = b - window2 + 1; 1295 while (!BN_is_bit_set(p2, i)) 1296 i++; 1297 wpos2 = i; 1298 wvalue2 = 1; 1299 for (i = b - 1; i >= wpos2; i--) { 1300 wvalue2 <<= 1; 1301 if (BN_is_bit_set(p2, i)) 1302 wvalue2++; 1303 } 1304 } 1305 1306 if (wvalue1 && b == wpos1) { 1307 /* wvalue1 is odd and < 2^window1 */ 1308 if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], 1309 mont, ctx)) 1310 goto err; 1311 wvalue1 = 0; 1312 r_is_one = 0; 1313 } 1314 1315 if (wvalue2 && b == wpos2) { 1316 /* wvalue2 is odd and < 2^window2 */ 1317 if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], 1318 mont, ctx)) 1319 goto err; 1320 wvalue2 = 0; 1321 r_is_one = 0; 1322 } 1323 } 1324 if (!BN_from_montgomery(rr, r,mont, ctx)) 1325 goto err; 1326 ret = 1; 1327 1328 err: 1329 if ((in_mont == NULL) && (mont != NULL)) 1330 BN_MONT_CTX_free(mont); 1331 BN_CTX_end(ctx); 1332 return (ret); 1333 } 1334