1 /* $OpenBSD: bn_exp.c,v 1.26 2016/09/03 17:26:29 bcook Exp $ */ 2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 /* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112 #include <stdlib.h> 113 #include <string.h> 114 115 #include <openssl/err.h> 116 117 #include "bn_lcl.h" 118 #include "constant_time_locl.h" 119 120 /* maximum precomputation table size for *variable* sliding windows */ 121 #define TABLE_SIZE 32 122 123 /* this one works - simple but works */ 124 int 125 BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 126 { 127 int i, bits, ret = 0; 128 BIGNUM *v, *rr; 129 130 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 131 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 132 BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 133 return -1; 134 } 135 136 BN_CTX_start(ctx); 137 if ((r == a) || (r == p)) 138 rr = BN_CTX_get(ctx); 139 else 140 rr = r; 141 v = BN_CTX_get(ctx); 142 if (rr == NULL || v == NULL) 143 goto err; 144 145 if (BN_copy(v, a) == NULL) 146 goto err; 147 bits = BN_num_bits(p); 148 149 if (BN_is_odd(p)) { 150 if (BN_copy(rr, a) == NULL) 151 goto err; 152 } else { 153 if (!BN_one(rr)) 154 goto err; 155 } 156 157 for (i = 1; i < bits; i++) { 158 if (!BN_sqr(v, v, ctx)) 159 goto err; 160 if (BN_is_bit_set(p, i)) { 161 if (!BN_mul(rr, rr, v, ctx)) 162 goto err; 163 } 164 } 165 ret = 1; 166 167 err: 168 if (r != rr && rr != NULL) 169 BN_copy(r, rr); 170 BN_CTX_end(ctx); 171 bn_check_top(r); 172 return (ret); 173 } 174 175 int 176 BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 177 BN_CTX *ctx) 178 { 179 int ret; 180 181 bn_check_top(a); 182 bn_check_top(p); 183 bn_check_top(m); 184 185 /* For even modulus m = 2^k*m_odd, it might make sense to compute 186 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 187 * exponentiation for the odd part), using appropriate exponent 188 * reductions, and combine the results using the CRT. 189 * 190 * For now, we use Montgomery only if the modulus is odd; otherwise, 191 * exponentiation using the reciprocal-based quick remaindering 192 * algorithm is used. 193 * 194 * (Timing obtained with expspeed.c [computations a^p mod m 195 * where a, p, m are of the same length: 256, 512, 1024, 2048, 196 * 4096, 8192 bits], compared to the running time of the 197 * standard algorithm: 198 * 199 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 200 * 55 .. 77 % [UltraSparc processor, but 201 * debug-solaris-sparcv8-gcc conf.] 202 * 203 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 204 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 205 * 206 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 207 * at 2048 and more bits, but at 512 and 1024 bits, it was 208 * slower even than the standard algorithm! 209 * 210 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 211 * should be obtained when the new Montgomery reduction code 212 * has been integrated into OpenSSL.) 213 */ 214 215 #define MONT_MUL_MOD 216 #define MONT_EXP_WORD 217 #define RECP_MUL_MOD 218 219 #ifdef MONT_MUL_MOD 220 /* I have finally been able to take out this pre-condition of 221 * the top bit being set. It was caused by an error in BN_div 222 * with negatives. There was also another problem when for a^b%m 223 * a >= m. eay 07-May-97 */ 224 /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 225 226 if (BN_is_odd(m)) { 227 # ifdef MONT_EXP_WORD 228 if (a->top == 1 && !a->neg && 229 (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) { 230 BN_ULONG A = a->d[0]; 231 ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); 232 } else 233 # endif 234 ret = BN_mod_exp_mont(r, a,p, m,ctx, NULL); 235 } else 236 #endif 237 #ifdef RECP_MUL_MOD 238 { 239 ret = BN_mod_exp_recp(r, a,p, m, ctx); 240 } 241 #else 242 { 243 ret = BN_mod_exp_simple(r, a,p, m, ctx); 244 } 245 #endif 246 247 bn_check_top(r); 248 return (ret); 249 } 250 251 int 252 BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 253 BN_CTX *ctx) 254 { 255 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 256 int start = 1; 257 BIGNUM *aa; 258 /* Table of variables obtained from 'ctx' */ 259 BIGNUM *val[TABLE_SIZE]; 260 BN_RECP_CTX recp; 261 262 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 263 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 264 BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 265 return -1; 266 } 267 268 bits = BN_num_bits(p); 269 if (bits == 0) { 270 /* x**0 mod 1 is still zero. */ 271 if (BN_is_one(m)) { 272 ret = 1; 273 BN_zero(r); 274 } else 275 ret = BN_one(r); 276 return ret; 277 } 278 279 BN_CTX_start(ctx); 280 if ((aa = BN_CTX_get(ctx)) == NULL) 281 goto err; 282 if ((val[0] = BN_CTX_get(ctx)) == NULL) 283 goto err; 284 285 BN_RECP_CTX_init(&recp); 286 if (m->neg) { 287 /* ignore sign of 'm' */ 288 if (!BN_copy(aa, m)) 289 goto err; 290 aa->neg = 0; 291 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 292 goto err; 293 } else { 294 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 295 goto err; 296 } 297 298 if (!BN_nnmod(val[0], a, m, ctx)) 299 goto err; /* 1 */ 300 if (BN_is_zero(val[0])) { 301 BN_zero(r); 302 ret = 1; 303 goto err; 304 } 305 306 window = BN_window_bits_for_exponent_size(bits); 307 if (window > 1) { 308 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 309 goto err; /* 2 */ 310 j = 1 << (window - 1); 311 for (i = 1; i < j; i++) { 312 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 313 !BN_mod_mul_reciprocal(val[i], val[i - 1], 314 aa, &recp, ctx)) 315 goto err; 316 } 317 } 318 319 start = 1; /* This is used to avoid multiplication etc 320 * when there is only the value '1' in the 321 * buffer. */ 322 wvalue = 0; /* The 'value' of the window */ 323 wstart = bits - 1; /* The top bit of the window */ 324 wend = 0; /* The bottom bit of the window */ 325 326 if (!BN_one(r)) 327 goto err; 328 329 for (;;) { 330 if (BN_is_bit_set(p, wstart) == 0) { 331 if (!start) 332 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 333 goto err; 334 if (wstart == 0) 335 break; 336 wstart--; 337 continue; 338 } 339 /* We now have wstart on a 'set' bit, we now need to work out 340 * how bit a window to do. To do this we need to scan 341 * forward until the last set bit before the end of the 342 * window */ 343 j = wstart; 344 wvalue = 1; 345 wend = 0; 346 for (i = 1; i < window; i++) { 347 if (wstart - i < 0) 348 break; 349 if (BN_is_bit_set(p, wstart - i)) { 350 wvalue <<= (i - wend); 351 wvalue |= 1; 352 wend = i; 353 } 354 } 355 356 /* wend is the size of the current window */ 357 j = wend + 1; 358 /* add the 'bytes above' */ 359 if (!start) 360 for (i = 0; i < j; i++) { 361 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 362 goto err; 363 } 364 365 /* wvalue will be an odd number < 2^window */ 366 if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx)) 367 goto err; 368 369 /* move the 'window' down further */ 370 wstart -= wend + 1; 371 wvalue = 0; 372 start = 0; 373 if (wstart < 0) 374 break; 375 } 376 ret = 1; 377 378 err: 379 BN_CTX_end(ctx); 380 BN_RECP_CTX_free(&recp); 381 bn_check_top(r); 382 return (ret); 383 } 384 385 int 386 BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 387 BN_CTX *ctx, BN_MONT_CTX *in_mont) 388 { 389 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 390 int start = 1; 391 BIGNUM *d, *r; 392 const BIGNUM *aa; 393 /* Table of variables obtained from 'ctx' */ 394 BIGNUM *val[TABLE_SIZE]; 395 BN_MONT_CTX *mont = NULL; 396 397 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 398 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 399 } 400 401 bn_check_top(a); 402 bn_check_top(p); 403 bn_check_top(m); 404 405 if (!BN_is_odd(m)) { 406 BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); 407 return (0); 408 } 409 410 bits = BN_num_bits(p); 411 if (bits == 0) { 412 /* x**0 mod 1 is still zero. */ 413 if (BN_is_one(m)) { 414 ret = 1; 415 BN_zero(rr); 416 } else 417 ret = BN_one(rr); 418 return ret; 419 } 420 421 BN_CTX_start(ctx); 422 if ((d = BN_CTX_get(ctx)) == NULL) 423 goto err; 424 if ((r = BN_CTX_get(ctx)) == NULL) 425 goto err; 426 if ((val[0] = BN_CTX_get(ctx)) == NULL) 427 goto err; 428 429 /* If this is not done, things will break in the montgomery 430 * part */ 431 432 if (in_mont != NULL) 433 mont = in_mont; 434 else { 435 if ((mont = BN_MONT_CTX_new()) == NULL) 436 goto err; 437 if (!BN_MONT_CTX_set(mont, m, ctx)) 438 goto err; 439 } 440 441 if (a->neg || BN_ucmp(a, m) >= 0) { 442 if (!BN_nnmod(val[0], a,m, ctx)) 443 goto err; 444 aa = val[0]; 445 } else 446 aa = a; 447 if (BN_is_zero(aa)) { 448 BN_zero(rr); 449 ret = 1; 450 goto err; 451 } 452 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 453 goto err; /* 1 */ 454 455 window = BN_window_bits_for_exponent_size(bits); 456 if (window > 1) { 457 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 458 goto err; /* 2 */ 459 j = 1 << (window - 1); 460 for (i = 1; i < j; i++) { 461 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 462 !BN_mod_mul_montgomery(val[i], val[i - 1], 463 d, mont, ctx)) 464 goto err; 465 } 466 } 467 468 start = 1; /* This is used to avoid multiplication etc 469 * when there is only the value '1' in the 470 * buffer. */ 471 wvalue = 0; /* The 'value' of the window */ 472 wstart = bits - 1; /* The top bit of the window */ 473 wend = 0; /* The bottom bit of the window */ 474 475 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 476 goto err; 477 for (;;) { 478 if (BN_is_bit_set(p, wstart) == 0) { 479 if (!start) { 480 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 481 goto err; 482 } 483 if (wstart == 0) 484 break; 485 wstart--; 486 continue; 487 } 488 /* We now have wstart on a 'set' bit, we now need to work out 489 * how bit a window to do. To do this we need to scan 490 * forward until the last set bit before the end of the 491 * window */ 492 j = wstart; 493 wvalue = 1; 494 wend = 0; 495 for (i = 1; i < window; i++) { 496 if (wstart - i < 0) 497 break; 498 if (BN_is_bit_set(p, wstart - i)) { 499 wvalue <<= (i - wend); 500 wvalue |= 1; 501 wend = i; 502 } 503 } 504 505 /* wend is the size of the current window */ 506 j = wend + 1; 507 /* add the 'bytes above' */ 508 if (!start) 509 for (i = 0; i < j; i++) { 510 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 511 goto err; 512 } 513 514 /* wvalue will be an odd number < 2^window */ 515 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 516 goto err; 517 518 /* move the 'window' down further */ 519 wstart -= wend + 1; 520 wvalue = 0; 521 start = 0; 522 if (wstart < 0) 523 break; 524 } 525 if (!BN_from_montgomery(rr, r,mont, ctx)) 526 goto err; 527 ret = 1; 528 529 err: 530 if ((in_mont == NULL) && (mont != NULL)) 531 BN_MONT_CTX_free(mont); 532 BN_CTX_end(ctx); 533 bn_check_top(rr); 534 return (ret); 535 } 536 537 538 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout 539 * so that accessing any of these table values shows the same access pattern as far 540 * as cache lines are concerned. The following functions are used to transfer a BIGNUM 541 * from/to that table. */ 542 543 static int 544 MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, 545 int idx, int window) 546 { 547 int i, j; 548 int width = 1 << window; 549 BN_ULONG *table = (BN_ULONG *)buf; 550 551 if (top > b->top) 552 top = b->top; /* this works because 'buf' is explicitly zeroed */ 553 554 for (i = 0, j = idx; i < top; i++, j += width) { 555 table[j] = b->d[i]; 556 } 557 558 return 1; 559 } 560 561 static int 562 MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, 563 int window) 564 { 565 int i, j; 566 int width = 1 << window; 567 volatile BN_ULONG *table = (volatile BN_ULONG *)buf; 568 569 if (bn_wexpand(b, top) == NULL) 570 return 0; 571 572 if (window <= 3) { 573 for (i = 0; i < top; i++, table += width) { 574 BN_ULONG acc = 0; 575 576 for (j = 0; j < width; j++) { 577 acc |= table[j] & 578 ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 579 } 580 581 b->d[i] = acc; 582 } 583 } else { 584 int xstride = 1 << (window - 2); 585 BN_ULONG y0, y1, y2, y3; 586 587 i = idx >> (window - 2); /* equivalent of idx / xstride */ 588 idx &= xstride - 1; /* equivalent of idx % xstride */ 589 590 y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); 591 y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); 592 y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); 593 y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); 594 595 for (i = 0; i < top; i++, table += width) { 596 BN_ULONG acc = 0; 597 598 for (j = 0; j < xstride; j++) { 599 acc |= ( (table[j + 0 * xstride] & y0) | 600 (table[j + 1 * xstride] & y1) | 601 (table[j + 2 * xstride] & y2) | 602 (table[j + 3 * xstride] & y3) ) 603 & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 604 } 605 606 b->d[i] = acc; 607 } 608 } 609 b->top = top; 610 bn_correct_top(b); 611 return 1; 612 } 613 614 /* Given a pointer value, compute the next address that is a cache line multiple. */ 615 #define MOD_EXP_CTIME_ALIGN(x_) \ 616 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 617 618 /* This variant of BN_mod_exp_mont() uses fixed windows and the special 619 * precomputation memory layout to limit data-dependency to a minimum 620 * to protect secret exponents (cf. the hyper-threading timing attacks 621 * pointed out by Colin Percival, 622 * http://www.daemonology.net/hyperthreading-considered-harmful/) 623 */ 624 int 625 BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 626 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 627 { 628 int i, bits, ret = 0, window, wvalue; 629 int top; 630 BN_MONT_CTX *mont = NULL; 631 int numPowers; 632 unsigned char *powerbufFree = NULL; 633 int powerbufLen = 0; 634 unsigned char *powerbuf = NULL; 635 BIGNUM tmp, am; 636 637 bn_check_top(a); 638 bn_check_top(p); 639 bn_check_top(m); 640 641 if (!BN_is_odd(m)) { 642 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, 643 BN_R_CALLED_WITH_EVEN_MODULUS); 644 return (0); 645 } 646 647 top = m->top; 648 649 bits = BN_num_bits(p); 650 if (bits == 0) { 651 /* x**0 mod 1 is still zero. */ 652 if (BN_is_one(m)) { 653 ret = 1; 654 BN_zero(rr); 655 } else 656 ret = BN_one(rr); 657 return ret; 658 } 659 660 BN_CTX_start(ctx); 661 662 /* Allocate a montgomery context if it was not supplied by the caller. 663 * If this is not done, things will break in the montgomery part. 664 */ 665 if (in_mont != NULL) 666 mont = in_mont; 667 else { 668 if ((mont = BN_MONT_CTX_new()) == NULL) 669 goto err; 670 if (!BN_MONT_CTX_set(mont, m, ctx)) 671 goto err; 672 } 673 674 /* Get the window size to use with size of p. */ 675 window = BN_window_bits_for_ctime_exponent_size(bits); 676 #if defined(OPENSSL_BN_ASM_MONT5) 677 if (window == 6 && bits <= 1024) 678 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 679 #endif 680 681 /* Allocate a buffer large enough to hold all of the pre-computed 682 * powers of am, am itself and tmp. 683 */ 684 numPowers = 1 << window; 685 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 686 ((2*top) > numPowers ? (2*top) : numPowers)); 687 if ((powerbufFree = malloc(powerbufLen + 688 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) 689 goto err; 690 691 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 692 memset(powerbuf, 0, powerbufLen); 693 694 /* lay down tmp and am right after powers table */ 695 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 696 am.d = tmp.d + top; 697 tmp.top = am.top = 0; 698 tmp.dmax = am.dmax = top; 699 tmp.neg = am.neg = 0; 700 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 701 702 /* prepare a^0 in Montgomery domain */ 703 #if 1 704 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 705 goto err; 706 #else 707 tmp.d[0] = (0 - m - >d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 708 for (i = 1; i < top; i++) 709 tmp.d[i] = (~m->d[i]) & BN_MASK2; 710 tmp.top = top; 711 #endif 712 713 /* prepare a^1 in Montgomery domain */ 714 if (a->neg || BN_ucmp(a, m) >= 0) { 715 if (!BN_mod(&am, a,m, ctx)) 716 goto err; 717 if (!BN_to_montgomery(&am, &am, mont, ctx)) 718 goto err; 719 } else if (!BN_to_montgomery(&am, a,mont, ctx)) 720 goto err; 721 722 #if defined(OPENSSL_BN_ASM_MONT5) 723 /* This optimization uses ideas from http://eprint.iacr.org/2011/239, 724 * specifically optimization of cache-timing attack countermeasures 725 * and pre-computation optimization. */ 726 727 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 728 * 512-bit RSA is hardly relevant, we omit it to spare size... */ 729 if (window == 5 && top > 1) { 730 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 731 const void *table, const BN_ULONG *np, 732 const BN_ULONG *n0, int num, int power); 733 void bn_scatter5(const BN_ULONG *inp, size_t num, 734 void *table, size_t power); 735 void bn_gather5(BN_ULONG *out, size_t num, 736 void *table, size_t power); 737 738 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 739 740 /* BN_to_montgomery can contaminate words above .top 741 * [in BN_DEBUG[_DEBUG] build]... */ 742 for (i = am.top; i < top; i++) 743 am.d[i] = 0; 744 for (i = tmp.top; i < top; i++) 745 tmp.d[i] = 0; 746 747 bn_scatter5(tmp.d, top, powerbuf, 0); 748 bn_scatter5(am.d, am.top, powerbuf, 1); 749 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 750 bn_scatter5(tmp.d, top, powerbuf, 2); 751 752 #if 0 753 for (i = 3; i < 32; i++) { 754 /* Calculate a^i = a^(i-1) * a */ 755 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 756 n0, top, i - 1); 757 bn_scatter5(tmp.d, top, powerbuf, i); 758 } 759 #else 760 /* same as above, but uses squaring for 1/2 of operations */ 761 for (i = 4; i < 32; i*=2) { 762 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 763 bn_scatter5(tmp.d, top, powerbuf, i); 764 } 765 for (i = 3; i < 8; i += 2) { 766 int j; 767 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 768 n0, top, i - 1); 769 bn_scatter5(tmp.d, top, powerbuf, i); 770 for (j = 2 * i; j < 32; j *= 2) { 771 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 772 bn_scatter5(tmp.d, top, powerbuf, j); 773 } 774 } 775 for (; i < 16; i += 2) { 776 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 777 n0, top, i - 1); 778 bn_scatter5(tmp.d, top, powerbuf, i); 779 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 780 bn_scatter5(tmp.d, top, powerbuf, 2*i); 781 } 782 for (; i < 32; i += 2) { 783 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 784 n0, top, i - 1); 785 bn_scatter5(tmp.d, top, powerbuf, i); 786 } 787 #endif 788 bits--; 789 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 790 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 791 bn_gather5(tmp.d, top, powerbuf, wvalue); 792 793 /* Scan the exponent one window at a time starting from the most 794 * significant bits. 795 */ 796 while (bits >= 0) { 797 for (wvalue = 0, i = 0; i < 5; i++, bits--) 798 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 799 800 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 801 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 802 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 803 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 804 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 805 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 806 } 807 808 tmp.top = top; 809 bn_correct_top(&tmp); 810 } else 811 #endif 812 { 813 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, 814 window)) 815 goto err; 816 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, 817 window)) 818 goto err; 819 820 /* If the window size is greater than 1, then calculate 821 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 822 * (even powers could instead be computed as (a^(i/2))^2 823 * to use the slight performance advantage of sqr over mul). 824 */ 825 if (window > 1) { 826 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 827 goto err; 828 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 829 2, window)) 830 goto err; 831 for (i = 3; i < numPowers; i++) { 832 /* Calculate a^i = a^(i-1) * a */ 833 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, 834 mont, ctx)) 835 goto err; 836 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, 837 powerbuf, i, window)) 838 goto err; 839 } 840 } 841 842 bits--; 843 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 844 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 845 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, 846 wvalue, window)) 847 goto err; 848 849 /* Scan the exponent one window at a time starting from the most 850 * significant bits. 851 */ 852 while (bits >= 0) { 853 wvalue = 0; /* The 'value' of the window */ 854 855 /* Scan the window, squaring the result as we go */ 856 for (i = 0; i < window; i++, bits--) { 857 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, 858 mont, ctx)) 859 goto err; 860 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 861 } 862 863 /* Fetch the appropriate pre-computed value from the pre-buf */ 864 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, 865 wvalue, window)) 866 goto err; 867 868 /* Multiply the result into the intermediate result */ 869 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 870 goto err; 871 } 872 } 873 874 /* Convert the final result from montgomery to standard format */ 875 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 876 goto err; 877 ret = 1; 878 879 err: 880 if ((in_mont == NULL) && (mont != NULL)) 881 BN_MONT_CTX_free(mont); 882 if (powerbuf != NULL) { 883 explicit_bzero(powerbuf, powerbufLen); 884 free(powerbufFree); 885 } 886 BN_CTX_end(ctx); 887 return (ret); 888 } 889 890 int 891 BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, 892 BN_CTX *ctx, BN_MONT_CTX *in_mont) 893 { 894 BN_MONT_CTX *mont = NULL; 895 int b, bits, ret = 0; 896 int r_is_one; 897 BN_ULONG w, next_w; 898 BIGNUM *d, *r, *t; 899 BIGNUM *swap_tmp; 900 901 #define BN_MOD_MUL_WORD(r, w, m) \ 902 (BN_mul_word(r, (w)) && \ 903 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 904 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 905 /* BN_MOD_MUL_WORD is only used with 'w' large, 906 * so the BN_ucmp test is probably more overhead 907 * than always using BN_mod (which uses BN_copy if 908 * a similar test returns true). */ 909 /* We can use BN_mod and do not need BN_nnmod because our 910 * accumulator is never negative (the result of BN_mod does 911 * not depend on the sign of the modulus). 912 */ 913 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 914 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 915 916 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 917 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 918 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, 919 ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 920 return -1; 921 } 922 923 bn_check_top(p); 924 bn_check_top(m); 925 926 if (!BN_is_odd(m)) { 927 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); 928 return (0); 929 } 930 if (m->top == 1) 931 a %= m->d[0]; /* make sure that 'a' is reduced */ 932 933 bits = BN_num_bits(p); 934 if (bits == 0) { 935 /* x**0 mod 1 is still zero. */ 936 if (BN_is_one(m)) { 937 ret = 1; 938 BN_zero(rr); 939 } else 940 ret = BN_one(rr); 941 return ret; 942 } 943 if (a == 0) { 944 BN_zero(rr); 945 ret = 1; 946 return ret; 947 } 948 949 BN_CTX_start(ctx); 950 if ((d = BN_CTX_get(ctx)) == NULL) 951 goto err; 952 if ((r = BN_CTX_get(ctx)) == NULL) 953 goto err; 954 if ((t = BN_CTX_get(ctx)) == NULL) 955 goto err; 956 957 if (in_mont != NULL) 958 mont = in_mont; 959 else { 960 if ((mont = BN_MONT_CTX_new()) == NULL) 961 goto err; 962 if (!BN_MONT_CTX_set(mont, m, ctx)) 963 goto err; 964 } 965 966 r_is_one = 1; /* except for Montgomery factor */ 967 968 /* bits-1 >= 0 */ 969 970 /* The result is accumulated in the product r*w. */ 971 w = a; /* bit 'bits-1' of 'p' is always set */ 972 for (b = bits - 2; b >= 0; b--) { 973 /* First, square r*w. */ 974 next_w = w * w; 975 if ((next_w / w) != w) /* overflow */ 976 { 977 if (r_is_one) { 978 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 979 goto err; 980 r_is_one = 0; 981 } else { 982 if (!BN_MOD_MUL_WORD(r, w, m)) 983 goto err; 984 } 985 next_w = 1; 986 } 987 w = next_w; 988 if (!r_is_one) { 989 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 990 goto err; 991 } 992 993 /* Second, multiply r*w by 'a' if exponent bit is set. */ 994 if (BN_is_bit_set(p, b)) { 995 next_w = w * a; 996 if ((next_w / a) != w) /* overflow */ 997 { 998 if (r_is_one) { 999 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1000 goto err; 1001 r_is_one = 0; 1002 } else { 1003 if (!BN_MOD_MUL_WORD(r, w, m)) 1004 goto err; 1005 } 1006 next_w = a; 1007 } 1008 w = next_w; 1009 } 1010 } 1011 1012 /* Finally, set r:=r*w. */ 1013 if (w != 1) { 1014 if (r_is_one) { 1015 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1016 goto err; 1017 r_is_one = 0; 1018 } else { 1019 if (!BN_MOD_MUL_WORD(r, w, m)) 1020 goto err; 1021 } 1022 } 1023 1024 if (r_is_one) /* can happen only if a == 1*/ 1025 { 1026 if (!BN_one(rr)) 1027 goto err; 1028 } else { 1029 if (!BN_from_montgomery(rr, r, mont, ctx)) 1030 goto err; 1031 } 1032 ret = 1; 1033 1034 err: 1035 if ((in_mont == NULL) && (mont != NULL)) 1036 BN_MONT_CTX_free(mont); 1037 BN_CTX_end(ctx); 1038 bn_check_top(rr); 1039 return (ret); 1040 } 1041 1042 1043 /* The old fallback, simple version :-) */ 1044 int 1045 BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1046 BN_CTX *ctx) 1047 { 1048 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 1049 int start = 1; 1050 BIGNUM *d; 1051 /* Table of variables obtained from 'ctx' */ 1052 BIGNUM *val[TABLE_SIZE]; 1053 1054 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1055 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1056 BNerr(BN_F_BN_MOD_EXP_SIMPLE, 1057 ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1058 return -1; 1059 } 1060 1061 bits = BN_num_bits(p); 1062 if (bits == 0) { 1063 /* x**0 mod 1 is still zero. */ 1064 if (BN_is_one(m)) { 1065 ret = 1; 1066 BN_zero(r); 1067 } else 1068 ret = BN_one(r); 1069 return ret; 1070 } 1071 1072 BN_CTX_start(ctx); 1073 if ((d = BN_CTX_get(ctx)) == NULL) 1074 goto err; 1075 if ((val[0] = BN_CTX_get(ctx)) == NULL) 1076 goto err; 1077 1078 if (!BN_nnmod(val[0],a,m,ctx)) 1079 goto err; /* 1 */ 1080 if (BN_is_zero(val[0])) { 1081 BN_zero(r); 1082 ret = 1; 1083 goto err; 1084 } 1085 1086 window = BN_window_bits_for_exponent_size(bits); 1087 if (window > 1) { 1088 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1089 goto err; /* 2 */ 1090 j = 1 << (window - 1); 1091 for (i = 1; i < j; i++) { 1092 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1093 !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) 1094 goto err; 1095 } 1096 } 1097 1098 start = 1; /* This is used to avoid multiplication etc 1099 * when there is only the value '1' in the 1100 * buffer. */ 1101 wvalue = 0; /* The 'value' of the window */ 1102 wstart = bits - 1; /* The top bit of the window */ 1103 wend = 0; /* The bottom bit of the window */ 1104 1105 if (!BN_one(r)) 1106 goto err; 1107 1108 for (;;) { 1109 if (BN_is_bit_set(p, wstart) == 0) { 1110 if (!start) 1111 if (!BN_mod_mul(r, r, r, m, ctx)) 1112 goto err; 1113 if (wstart == 0) 1114 break; 1115 wstart--; 1116 continue; 1117 } 1118 /* We now have wstart on a 'set' bit, we now need to work out 1119 * how bit a window to do. To do this we need to scan 1120 * forward until the last set bit before the end of the 1121 * window */ 1122 j = wstart; 1123 wvalue = 1; 1124 wend = 0; 1125 for (i = 1; i < window; i++) { 1126 if (wstart - i < 0) 1127 break; 1128 if (BN_is_bit_set(p, wstart - i)) { 1129 wvalue <<= (i - wend); 1130 wvalue |= 1; 1131 wend = i; 1132 } 1133 } 1134 1135 /* wend is the size of the current window */ 1136 j = wend + 1; 1137 /* add the 'bytes above' */ 1138 if (!start) 1139 for (i = 0; i < j; i++) { 1140 if (!BN_mod_mul(r, r, r, m, ctx)) 1141 goto err; 1142 } 1143 1144 /* wvalue will be an odd number < 2^window */ 1145 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1146 goto err; 1147 1148 /* move the 'window' down further */ 1149 wstart -= wend + 1; 1150 wvalue = 0; 1151 start = 0; 1152 if (wstart < 0) 1153 break; 1154 } 1155 ret = 1; 1156 1157 err: 1158 BN_CTX_end(ctx); 1159 bn_check_top(r); 1160 return (ret); 1161 } 1162