1 /* crypto/bn/bn_exp.c */ 2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 /* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112 113 #include "cryptlib.h" 114 #include "bn_lcl.h" 115 116 #include <stdlib.h> 117 #ifdef _WIN32 118 # include <malloc.h> 119 # ifndef alloca 120 # define alloca _alloca 121 # endif 122 #elif defined(__GNUC__) 123 # ifndef __SSP__ 124 # ifndef alloca 125 # define alloca(s) __builtin_alloca((s)) 126 # endif 127 # else 128 # undef alloca 129 # endif 130 #endif 131 132 /* maximum precomputation table size for *variable* sliding windows */ 133 #define TABLE_SIZE 32 134 135 /* this one works - simple but works */ 136 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 137 { 138 int i,bits,ret=0; 139 BIGNUM *v,*rr; 140 141 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) 142 { 143 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 144 BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 145 return -1; 146 } 147 148 BN_CTX_start(ctx); 149 if ((r == a) || (r == p)) 150 rr = BN_CTX_get(ctx); 151 else 152 rr = r; 153 v = BN_CTX_get(ctx); 154 if (rr == NULL || v == NULL) goto err; 155 156 if (BN_copy(v,a) == NULL) goto err; 157 bits=BN_num_bits(p); 158 159 if (BN_is_odd(p)) 160 { if (BN_copy(rr,a) == NULL) goto err; } 161 else { if (!BN_one(rr)) goto err; } 162 163 for (i=1; i<bits; i++) 164 { 165 if (!BN_sqr(v,v,ctx)) goto err; 166 if (BN_is_bit_set(p,i)) 167 { 168 if (!BN_mul(rr,rr,v,ctx)) goto err; 169 } 170 } 171 ret=1; 172 err: 173 if (r != rr) BN_copy(r,rr); 174 BN_CTX_end(ctx); 175 bn_check_top(r); 176 return(ret); 177 } 178 179 180 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 181 BN_CTX *ctx) 182 { 183 int ret; 184 185 bn_check_top(a); 186 bn_check_top(p); 187 bn_check_top(m); 188 189 /* For even modulus m = 2^k*m_odd, it might make sense to compute 190 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 191 * exponentiation for the odd part), using appropriate exponent 192 * reductions, and combine the results using the CRT. 193 * 194 * For now, we use Montgomery only if the modulus is odd; otherwise, 195 * exponentiation using the reciprocal-based quick remaindering 196 * algorithm is used. 197 * 198 * (Timing obtained with expspeed.c [computations a^p mod m 199 * where a, p, m are of the same length: 256, 512, 1024, 2048, 200 * 4096, 8192 bits], compared to the running time of the 201 * standard algorithm: 202 * 203 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 204 * 55 .. 77 % [UltraSparc processor, but 205 * debug-solaris-sparcv8-gcc conf.] 206 * 207 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 208 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 209 * 210 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 211 * at 2048 and more bits, but at 512 and 1024 bits, it was 212 * slower even than the standard algorithm! 213 * 214 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 215 * should be obtained when the new Montgomery reduction code 216 * has been integrated into OpenSSL.) 217 */ 218 219 #define MONT_MUL_MOD 220 #define MONT_EXP_WORD 221 #define RECP_MUL_MOD 222 223 #ifdef MONT_MUL_MOD 224 /* I have finally been able to take out this pre-condition of 225 * the top bit being set. It was caused by an error in BN_div 226 * with negatives. There was also another problem when for a^b%m 227 * a >= m. eay 07-May-97 */ 228 /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 229 230 if (BN_is_odd(m)) 231 { 232 # ifdef MONT_EXP_WORD 233 if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) 234 { 235 BN_ULONG A = a->d[0]; 236 ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL); 237 } 238 else 239 # endif 240 ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); 241 } 242 else 243 #endif 244 #ifdef RECP_MUL_MOD 245 { ret=BN_mod_exp_recp(r,a,p,m,ctx); } 246 #else 247 { ret=BN_mod_exp_simple(r,a,p,m,ctx); } 248 #endif 249 250 bn_check_top(r); 251 return(ret); 252 } 253 254 255 int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 256 const BIGNUM *m, BN_CTX *ctx) 257 { 258 int i,j,bits,ret=0,wstart,wend,window,wvalue; 259 int start=1; 260 BIGNUM *aa; 261 /* Table of variables obtained from 'ctx' */ 262 BIGNUM *val[TABLE_SIZE]; 263 BN_RECP_CTX recp; 264 265 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) 266 { 267 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 268 BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 269 return -1; 270 } 271 272 bits=BN_num_bits(p); 273 274 if (bits == 0) 275 { 276 ret = BN_one(r); 277 return ret; 278 } 279 280 BN_CTX_start(ctx); 281 aa = BN_CTX_get(ctx); 282 val[0] = BN_CTX_get(ctx); 283 if(!aa || !val[0]) goto err; 284 285 BN_RECP_CTX_init(&recp); 286 if (m->neg) 287 { 288 /* ignore sign of 'm' */ 289 if (!BN_copy(aa, m)) goto err; 290 aa->neg = 0; 291 if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err; 292 } 293 else 294 { 295 if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err; 296 } 297 298 if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */ 299 if (BN_is_zero(val[0])) 300 { 301 BN_zero(r); 302 ret = 1; 303 goto err; 304 } 305 306 window = BN_window_bits_for_exponent_size(bits); 307 if (window > 1) 308 { 309 if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx)) 310 goto err; /* 2 */ 311 j=1<<(window-1); 312 for (i=1; i<j; i++) 313 { 314 if(((val[i] = BN_CTX_get(ctx)) == NULL) || 315 !BN_mod_mul_reciprocal(val[i],val[i-1], 316 aa,&recp,ctx)) 317 goto err; 318 } 319 } 320 321 start=1; /* This is used to avoid multiplication etc 322 * when there is only the value '1' in the 323 * buffer. */ 324 wvalue=0; /* The 'value' of the window */ 325 wstart=bits-1; /* The top bit of the window */ 326 wend=0; /* The bottom bit of the window */ 327 328 if (!BN_one(r)) goto err; 329 330 for (;;) 331 { 332 if (BN_is_bit_set(p,wstart) == 0) 333 { 334 if (!start) 335 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx)) 336 goto err; 337 if (wstart == 0) break; 338 wstart--; 339 continue; 340 } 341 /* We now have wstart on a 'set' bit, we now need to work out 342 * how bit a window to do. To do this we need to scan 343 * forward until the last set bit before the end of the 344 * window */ 345 j=wstart; 346 wvalue=1; 347 wend=0; 348 for (i=1; i<window; i++) 349 { 350 if (wstart-i < 0) break; 351 if (BN_is_bit_set(p,wstart-i)) 352 { 353 wvalue<<=(i-wend); 354 wvalue|=1; 355 wend=i; 356 } 357 } 358 359 /* wend is the size of the current window */ 360 j=wend+1; 361 /* add the 'bytes above' */ 362 if (!start) 363 for (i=0; i<j; i++) 364 { 365 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx)) 366 goto err; 367 } 368 369 /* wvalue will be an odd number < 2^window */ 370 if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx)) 371 goto err; 372 373 /* move the 'window' down further */ 374 wstart-=wend+1; 375 wvalue=0; 376 start=0; 377 if (wstart < 0) break; 378 } 379 ret=1; 380 err: 381 BN_CTX_end(ctx); 382 BN_RECP_CTX_free(&recp); 383 bn_check_top(r); 384 return(ret); 385 } 386 387 388 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 389 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 390 { 391 int i,j,bits,ret=0,wstart,wend,window,wvalue; 392 int start=1; 393 BIGNUM *d,*r; 394 const BIGNUM *aa; 395 /* Table of variables obtained from 'ctx' */ 396 BIGNUM *val[TABLE_SIZE]; 397 BN_MONT_CTX *mont=NULL; 398 399 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) 400 { 401 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 402 } 403 404 bn_check_top(a); 405 bn_check_top(p); 406 bn_check_top(m); 407 408 if (!BN_is_odd(m)) 409 { 410 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); 411 return(0); 412 } 413 bits=BN_num_bits(p); 414 if (bits == 0) 415 { 416 ret = BN_one(rr); 417 return ret; 418 } 419 420 BN_CTX_start(ctx); 421 d = BN_CTX_get(ctx); 422 r = BN_CTX_get(ctx); 423 val[0] = BN_CTX_get(ctx); 424 if (!d || !r || !val[0]) goto err; 425 426 /* If this is not done, things will break in the montgomery 427 * part */ 428 429 if (in_mont != NULL) 430 mont=in_mont; 431 else 432 { 433 if ((mont=BN_MONT_CTX_new()) == NULL) goto err; 434 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; 435 } 436 437 if (a->neg || BN_ucmp(a,m) >= 0) 438 { 439 if (!BN_nnmod(val[0],a,m,ctx)) 440 goto err; 441 aa= val[0]; 442 } 443 else 444 aa=a; 445 if (BN_is_zero(aa)) 446 { 447 BN_zero(rr); 448 ret = 1; 449 goto err; 450 } 451 if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */ 452 453 window = BN_window_bits_for_exponent_size(bits); 454 if (window > 1) 455 { 456 if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */ 457 j=1<<(window-1); 458 for (i=1; i<j; i++) 459 { 460 if(((val[i] = BN_CTX_get(ctx)) == NULL) || 461 !BN_mod_mul_montgomery(val[i],val[i-1], 462 d,mont,ctx)) 463 goto err; 464 } 465 } 466 467 start=1; /* This is used to avoid multiplication etc 468 * when there is only the value '1' in the 469 * buffer. */ 470 wvalue=0; /* The 'value' of the window */ 471 wstart=bits-1; /* The top bit of the window */ 472 wend=0; /* The bottom bit of the window */ 473 474 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; 475 for (;;) 476 { 477 if (BN_is_bit_set(p,wstart) == 0) 478 { 479 if (!start) 480 { 481 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) 482 goto err; 483 } 484 if (wstart == 0) break; 485 wstart--; 486 continue; 487 } 488 /* We now have wstart on a 'set' bit, we now need to work out 489 * how bit a window to do. To do this we need to scan 490 * forward until the last set bit before the end of the 491 * window */ 492 j=wstart; 493 wvalue=1; 494 wend=0; 495 for (i=1; i<window; i++) 496 { 497 if (wstart-i < 0) break; 498 if (BN_is_bit_set(p,wstart-i)) 499 { 500 wvalue<<=(i-wend); 501 wvalue|=1; 502 wend=i; 503 } 504 } 505 506 /* wend is the size of the current window */ 507 j=wend+1; 508 /* add the 'bytes above' */ 509 if (!start) 510 for (i=0; i<j; i++) 511 { 512 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) 513 goto err; 514 } 515 516 /* wvalue will be an odd number < 2^window */ 517 if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx)) 518 goto err; 519 520 /* move the 'window' down further */ 521 wstart-=wend+1; 522 wvalue=0; 523 start=0; 524 if (wstart < 0) break; 525 } 526 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; 527 ret=1; 528 err: 529 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); 530 BN_CTX_end(ctx); 531 bn_check_top(rr); 532 return(ret); 533 } 534 535 536 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout 537 * so that accessing any of these table values shows the same access pattern as far 538 * as cache lines are concerned. The following functions are used to transfer a BIGNUM 539 * from/to that table. */ 540 541 static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width) 542 { 543 size_t i, j; 544 545 if (top > b->top) 546 top = b->top; /* this works because 'buf' is explicitly zeroed */ 547 for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) 548 { 549 buf[j] = ((unsigned char*)b->d)[i]; 550 } 551 552 return 1; 553 } 554 555 static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) 556 { 557 size_t i, j; 558 559 if (bn_wexpand(b, top) == NULL) 560 return 0; 561 562 for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) 563 { 564 ((unsigned char*)b->d)[i] = buf[j]; 565 } 566 567 b->top = top; 568 bn_correct_top(b); 569 return 1; 570 } 571 572 /* Given a pointer value, compute the next address that is a cache line multiple. */ 573 #define MOD_EXP_CTIME_ALIGN(x_) \ 574 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 575 576 /* This variant of BN_mod_exp_mont() uses fixed windows and the special 577 * precomputation memory layout to limit data-dependency to a minimum 578 * to protect secret exponents (cf. the hyper-threading timing attacks 579 * pointed out by Colin Percival, 580 * http://www.daemonology.net/hyperthreading-considered-harmful/) 581 */ 582 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 583 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 584 { 585 int i,bits,ret=0,window,wvalue; 586 int top; 587 BN_MONT_CTX *mont=NULL; 588 589 int numPowers; 590 unsigned char *powerbufFree=NULL; 591 int powerbufLen = 0; 592 unsigned char *powerbuf=NULL; 593 BIGNUM tmp, am; 594 595 bn_check_top(a); 596 bn_check_top(p); 597 bn_check_top(m); 598 599 top = m->top; 600 601 if (!(m->d[0] & 1)) 602 { 603 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS); 604 return(0); 605 } 606 bits=BN_num_bits(p); 607 if (bits == 0) 608 { 609 ret = BN_one(rr); 610 return ret; 611 } 612 613 BN_CTX_start(ctx); 614 615 /* Allocate a montgomery context if it was not supplied by the caller. 616 * If this is not done, things will break in the montgomery part. 617 */ 618 if (in_mont != NULL) 619 mont=in_mont; 620 else 621 { 622 if ((mont=BN_MONT_CTX_new()) == NULL) goto err; 623 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; 624 } 625 626 /* Get the window size to use with size of p. */ 627 window = BN_window_bits_for_ctime_exponent_size(bits); 628 #if defined(OPENSSL_BN_ASM_MONT5) 629 if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */ 630 #endif 631 632 /* Allocate a buffer large enough to hold all of the pre-computed 633 * powers of am, am itself and tmp. 634 */ 635 numPowers = 1 << window; 636 powerbufLen = sizeof(m->d[0])*(top*numPowers + 637 ((2*top)>numPowers?(2*top):numPowers)); 638 #ifdef alloca 639 if (powerbufLen < 3072) 640 powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 641 else 642 #endif 643 if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) 644 goto err; 645 646 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 647 memset(powerbuf, 0, powerbufLen); 648 649 #ifdef alloca 650 if (powerbufLen < 3072) 651 powerbufFree = NULL; 652 #endif 653 654 /* lay down tmp and am right after powers table */ 655 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers); 656 am.d = tmp.d + top; 657 tmp.top = am.top = 0; 658 tmp.dmax = am.dmax = top; 659 tmp.neg = am.neg = 0; 660 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 661 662 /* prepare a^0 in Montgomery domain */ 663 #if 1 664 if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err; 665 #else 666 tmp.d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */ 667 for (i=1;i<top;i++) 668 tmp.d[i] = (~m->d[i])&BN_MASK2; 669 tmp.top = top; 670 #endif 671 672 /* prepare a^1 in Montgomery domain */ 673 if (a->neg || BN_ucmp(a,m) >= 0) 674 { 675 if (!BN_mod(&am,a,m,ctx)) goto err; 676 if (!BN_to_montgomery(&am,&am,mont,ctx)) goto err; 677 } 678 else if (!BN_to_montgomery(&am,a,mont,ctx)) goto err; 679 680 #if defined(OPENSSL_BN_ASM_MONT5) 681 /* This optimization uses ideas from http://eprint.iacr.org/2011/239, 682 * specifically optimization of cache-timing attack countermeasures 683 * and pre-computation optimization. */ 684 685 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 686 * 512-bit RSA is hardly relevant, we omit it to spare size... */ 687 if (window==5) 688 { 689 void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap, 690 const void *table,const BN_ULONG *np, 691 const BN_ULONG *n0,int num,int power); 692 void bn_scatter5(const BN_ULONG *inp,size_t num, 693 void *table,size_t power); 694 void bn_gather5(BN_ULONG *out,size_t num, 695 void *table,size_t power); 696 697 BN_ULONG *np=mont->N.d, *n0=mont->n0; 698 699 /* BN_to_montgomery can contaminate words above .top 700 * [in BN_DEBUG[_DEBUG] build]... */ 701 for (i=am.top; i<top; i++) am.d[i]=0; 702 for (i=tmp.top; i<top; i++) tmp.d[i]=0; 703 704 bn_scatter5(tmp.d,top,powerbuf,0); 705 bn_scatter5(am.d,am.top,powerbuf,1); 706 bn_mul_mont(tmp.d,am.d,am.d,np,n0,top); 707 bn_scatter5(tmp.d,top,powerbuf,2); 708 709 #if 0 710 for (i=3; i<32; i++) 711 { 712 /* Calculate a^i = a^(i-1) * a */ 713 bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); 714 bn_scatter5(tmp.d,top,powerbuf,i); 715 } 716 #else 717 /* same as above, but uses squaring for 1/2 of operations */ 718 for (i=4; i<32; i*=2) 719 { 720 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 721 bn_scatter5(tmp.d,top,powerbuf,i); 722 } 723 for (i=3; i<8; i+=2) 724 { 725 int j; 726 bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); 727 bn_scatter5(tmp.d,top,powerbuf,i); 728 for (j=2*i; j<32; j*=2) 729 { 730 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 731 bn_scatter5(tmp.d,top,powerbuf,j); 732 } 733 } 734 for (; i<16; i+=2) 735 { 736 bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); 737 bn_scatter5(tmp.d,top,powerbuf,i); 738 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 739 bn_scatter5(tmp.d,top,powerbuf,2*i); 740 } 741 for (; i<32; i+=2) 742 { 743 bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); 744 bn_scatter5(tmp.d,top,powerbuf,i); 745 } 746 #endif 747 bits--; 748 for (wvalue=0, i=bits%5; i>=0; i--,bits--) 749 wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); 750 bn_gather5(tmp.d,top,powerbuf,wvalue); 751 752 /* Scan the exponent one window at a time starting from the most 753 * significant bits. 754 */ 755 while (bits >= 0) 756 { 757 for (wvalue=0, i=0; i<5; i++,bits--) 758 wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); 759 760 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 761 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 762 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 763 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 764 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 765 bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue); 766 } 767 768 tmp.top=top; 769 bn_correct_top(&tmp); 770 } 771 else 772 #endif 773 { 774 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err; 775 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) goto err; 776 777 /* If the window size is greater than 1, then calculate 778 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 779 * (even powers could instead be computed as (a^(i/2))^2 780 * to use the slight performance advantage of sqr over mul). 781 */ 782 if (window > 1) 783 { 784 if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx)) goto err; 785 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err; 786 for (i=3; i<numPowers; i++) 787 { 788 /* Calculate a^i = a^(i-1) * a */ 789 if (!BN_mod_mul_montgomery(&tmp,&am,&tmp,mont,ctx)) 790 goto err; 791 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, numPowers)) goto err; 792 } 793 } 794 795 bits--; 796 for (wvalue=0, i=bits%window; i>=0; i--,bits--) 797 wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); 798 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err; 799 800 /* Scan the exponent one window at a time starting from the most 801 * significant bits. 802 */ 803 while (bits >= 0) 804 { 805 wvalue=0; /* The 'value' of the window */ 806 807 /* Scan the window, squaring the result as we go */ 808 for (i=0; i<window; i++,bits--) 809 { 810 if (!BN_mod_mul_montgomery(&tmp,&tmp,&tmp,mont,ctx)) goto err; 811 wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); 812 } 813 814 /* Fetch the appropriate pre-computed value from the pre-buf */ 815 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err; 816 817 /* Multiply the result into the intermediate result */ 818 if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err; 819 } 820 } 821 822 /* Convert the final result from montgomery to standard format */ 823 if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err; 824 ret=1; 825 err: 826 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); 827 if (powerbuf!=NULL) 828 { 829 OPENSSL_cleanse(powerbuf,powerbufLen); 830 if (powerbufFree) OPENSSL_free(powerbufFree); 831 } 832 BN_CTX_end(ctx); 833 return(ret); 834 } 835 836 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 837 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 838 { 839 BN_MONT_CTX *mont = NULL; 840 int b, bits, ret=0; 841 int r_is_one; 842 BN_ULONG w, next_w; 843 BIGNUM *d, *r, *t; 844 BIGNUM *swap_tmp; 845 #define BN_MOD_MUL_WORD(r, w, m) \ 846 (BN_mul_word(r, (w)) && \ 847 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 848 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 849 /* BN_MOD_MUL_WORD is only used with 'w' large, 850 * so the BN_ucmp test is probably more overhead 851 * than always using BN_mod (which uses BN_copy if 852 * a similar test returns true). */ 853 /* We can use BN_mod and do not need BN_nnmod because our 854 * accumulator is never negative (the result of BN_mod does 855 * not depend on the sign of the modulus). 856 */ 857 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 858 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 859 860 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) 861 { 862 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 863 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 864 return -1; 865 } 866 867 bn_check_top(p); 868 bn_check_top(m); 869 870 if (!BN_is_odd(m)) 871 { 872 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS); 873 return(0); 874 } 875 if (m->top == 1) 876 a %= m->d[0]; /* make sure that 'a' is reduced */ 877 878 bits = BN_num_bits(p); 879 if (bits == 0) 880 { 881 ret = BN_one(rr); 882 return ret; 883 } 884 if (a == 0) 885 { 886 BN_zero(rr); 887 ret = 1; 888 return ret; 889 } 890 891 BN_CTX_start(ctx); 892 d = BN_CTX_get(ctx); 893 r = BN_CTX_get(ctx); 894 t = BN_CTX_get(ctx); 895 if (d == NULL || r == NULL || t == NULL) goto err; 896 897 if (in_mont != NULL) 898 mont=in_mont; 899 else 900 { 901 if ((mont = BN_MONT_CTX_new()) == NULL) goto err; 902 if (!BN_MONT_CTX_set(mont, m, ctx)) goto err; 903 } 904 905 r_is_one = 1; /* except for Montgomery factor */ 906 907 /* bits-1 >= 0 */ 908 909 /* The result is accumulated in the product r*w. */ 910 w = a; /* bit 'bits-1' of 'p' is always set */ 911 for (b = bits-2; b >= 0; b--) 912 { 913 /* First, square r*w. */ 914 next_w = w*w; 915 if ((next_w/w) != w) /* overflow */ 916 { 917 if (r_is_one) 918 { 919 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; 920 r_is_one = 0; 921 } 922 else 923 { 924 if (!BN_MOD_MUL_WORD(r, w, m)) goto err; 925 } 926 next_w = 1; 927 } 928 w = next_w; 929 if (!r_is_one) 930 { 931 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err; 932 } 933 934 /* Second, multiply r*w by 'a' if exponent bit is set. */ 935 if (BN_is_bit_set(p, b)) 936 { 937 next_w = w*a; 938 if ((next_w/a) != w) /* overflow */ 939 { 940 if (r_is_one) 941 { 942 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; 943 r_is_one = 0; 944 } 945 else 946 { 947 if (!BN_MOD_MUL_WORD(r, w, m)) goto err; 948 } 949 next_w = a; 950 } 951 w = next_w; 952 } 953 } 954 955 /* Finally, set r:=r*w. */ 956 if (w != 1) 957 { 958 if (r_is_one) 959 { 960 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; 961 r_is_one = 0; 962 } 963 else 964 { 965 if (!BN_MOD_MUL_WORD(r, w, m)) goto err; 966 } 967 } 968 969 if (r_is_one) /* can happen only if a == 1*/ 970 { 971 if (!BN_one(rr)) goto err; 972 } 973 else 974 { 975 if (!BN_from_montgomery(rr, r, mont, ctx)) goto err; 976 } 977 ret = 1; 978 err: 979 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); 980 BN_CTX_end(ctx); 981 bn_check_top(rr); 982 return(ret); 983 } 984 985 986 /* The old fallback, simple version :-) */ 987 int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 988 const BIGNUM *m, BN_CTX *ctx) 989 { 990 int i,j,bits,ret=0,wstart,wend,window,wvalue; 991 int start=1; 992 BIGNUM *d; 993 /* Table of variables obtained from 'ctx' */ 994 BIGNUM *val[TABLE_SIZE]; 995 996 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) 997 { 998 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 999 BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1000 return -1; 1001 } 1002 1003 bits=BN_num_bits(p); 1004 1005 if (bits == 0) 1006 { 1007 ret = BN_one(r); 1008 return ret; 1009 } 1010 1011 BN_CTX_start(ctx); 1012 d = BN_CTX_get(ctx); 1013 val[0] = BN_CTX_get(ctx); 1014 if(!d || !val[0]) goto err; 1015 1016 if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */ 1017 if (BN_is_zero(val[0])) 1018 { 1019 BN_zero(r); 1020 ret = 1; 1021 goto err; 1022 } 1023 1024 window = BN_window_bits_for_exponent_size(bits); 1025 if (window > 1) 1026 { 1027 if (!BN_mod_mul(d,val[0],val[0],m,ctx)) 1028 goto err; /* 2 */ 1029 j=1<<(window-1); 1030 for (i=1; i<j; i++) 1031 { 1032 if(((val[i] = BN_CTX_get(ctx)) == NULL) || 1033 !BN_mod_mul(val[i],val[i-1],d,m,ctx)) 1034 goto err; 1035 } 1036 } 1037 1038 start=1; /* This is used to avoid multiplication etc 1039 * when there is only the value '1' in the 1040 * buffer. */ 1041 wvalue=0; /* The 'value' of the window */ 1042 wstart=bits-1; /* The top bit of the window */ 1043 wend=0; /* The bottom bit of the window */ 1044 1045 if (!BN_one(r)) goto err; 1046 1047 for (;;) 1048 { 1049 if (BN_is_bit_set(p,wstart) == 0) 1050 { 1051 if (!start) 1052 if (!BN_mod_mul(r,r,r,m,ctx)) 1053 goto err; 1054 if (wstart == 0) break; 1055 wstart--; 1056 continue; 1057 } 1058 /* We now have wstart on a 'set' bit, we now need to work out 1059 * how bit a window to do. To do this we need to scan 1060 * forward until the last set bit before the end of the 1061 * window */ 1062 j=wstart; 1063 wvalue=1; 1064 wend=0; 1065 for (i=1; i<window; i++) 1066 { 1067 if (wstart-i < 0) break; 1068 if (BN_is_bit_set(p,wstart-i)) 1069 { 1070 wvalue<<=(i-wend); 1071 wvalue|=1; 1072 wend=i; 1073 } 1074 } 1075 1076 /* wend is the size of the current window */ 1077 j=wend+1; 1078 /* add the 'bytes above' */ 1079 if (!start) 1080 for (i=0; i<j; i++) 1081 { 1082 if (!BN_mod_mul(r,r,r,m,ctx)) 1083 goto err; 1084 } 1085 1086 /* wvalue will be an odd number < 2^window */ 1087 if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx)) 1088 goto err; 1089 1090 /* move the 'window' down further */ 1091 wstart-=wend+1; 1092 wvalue=0; 1093 start=0; 1094 if (wstart < 0) break; 1095 } 1096 ret=1; 1097 err: 1098 BN_CTX_end(ctx); 1099 bn_check_top(r); 1100 return(ret); 1101 } 1102