1*f0865ec9SKyle Evans /* 2*f0865ec9SKyle Evans * Copyright (C) 2017 - This file is part of libecc project 3*f0865ec9SKyle Evans * 4*f0865ec9SKyle Evans * Authors: 5*f0865ec9SKyle Evans * Ryad BENADJILA <ryadbenadjila@gmail.com> 6*f0865ec9SKyle Evans * Arnaud EBALARD <arnaud.ebalard@ssi.gouv.fr> 7*f0865ec9SKyle Evans * Jean-Pierre FLORI <jean-pierre.flori@ssi.gouv.fr> 8*f0865ec9SKyle Evans * 9*f0865ec9SKyle Evans * Contributors: 10*f0865ec9SKyle Evans * Nicolas VIVET <nicolas.vivet@ssi.gouv.fr> 11*f0865ec9SKyle Evans * Karim KHALFALLAH <karim.khalfallah@ssi.gouv.fr> 12*f0865ec9SKyle Evans * 13*f0865ec9SKyle Evans * This software is licensed under a dual BSD and GPL v2 license. 14*f0865ec9SKyle Evans * See LICENSE file at the root folder of the project. 15*f0865ec9SKyle Evans */ 16*f0865ec9SKyle Evans #include <libecc/nn/nn_mul_public.h> 17*f0865ec9SKyle Evans #include <libecc/nn/nn_logical.h> 18*f0865ec9SKyle Evans #include <libecc/nn/nn_add.h> 19*f0865ec9SKyle Evans #include <libecc/nn/nn.h> 20*f0865ec9SKyle Evans /* Use internal API header */ 21*f0865ec9SKyle Evans #include "nn_div.h" 22*f0865ec9SKyle Evans 23*f0865ec9SKyle Evans /* 24*f0865ec9SKyle Evans * Some helper functions to perform operations on an arbitrary part 25*f0865ec9SKyle Evans * of a multiprecision number. 26*f0865ec9SKyle Evans * This is exactly the same code as for operations on the least significant 27*f0865ec9SKyle Evans * part of a multiprecision number except for the starting point in the 28*f0865ec9SKyle Evans * array representing it. 29*f0865ec9SKyle Evans * Done in *constant time*. 30*f0865ec9SKyle Evans * 31*f0865ec9SKyle Evans * Operations producing an output are in place. 32*f0865ec9SKyle Evans */ 33*f0865ec9SKyle Evans 34*f0865ec9SKyle Evans /* 35*f0865ec9SKyle Evans * Compare all the bits of in2 with the same number of bits in in1 starting at 36*f0865ec9SKyle Evans * 'shift' position in in1. in1 must be long enough for that purpose, i.e. 37*f0865ec9SKyle Evans * in1->wlen >= (in2->wlen + shift). The comparison value is provided in 38*f0865ec9SKyle Evans * 'cmp' parameter. The function returns 0 on success, -1 on error. 39*f0865ec9SKyle Evans * 40*f0865ec9SKyle Evans * The function is an internal helper; it expects initialized nn in1 and 41*f0865ec9SKyle Evans * in2: it does not verify that. 42*f0865ec9SKyle Evans */ 43*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_cmp_shift(nn_src_t in1, nn_src_t in2, u8 shift, int *cmp) 44*f0865ec9SKyle Evans { 45*f0865ec9SKyle Evans int ret, mask, tmp; 46*f0865ec9SKyle Evans u8 i; 47*f0865ec9SKyle Evans 48*f0865ec9SKyle Evans MUST_HAVE((in1->wlen >= (in2->wlen + shift)), ret, err); 49*f0865ec9SKyle Evans MUST_HAVE((cmp != NULL), ret, err); 50*f0865ec9SKyle Evans 51*f0865ec9SKyle Evans tmp = 0; 52*f0865ec9SKyle Evans for (i = in2->wlen; i > 0; i--) { 53*f0865ec9SKyle Evans mask = (!(tmp & 0x1)); 54*f0865ec9SKyle Evans tmp += ((in1->val[shift + i - 1] > in2->val[i - 1]) & mask); 55*f0865ec9SKyle Evans tmp -= ((in1->val[shift + i - 1] < in2->val[i - 1]) & mask); 56*f0865ec9SKyle Evans } 57*f0865ec9SKyle Evans (*cmp) = tmp; 58*f0865ec9SKyle Evans ret = 0; 59*f0865ec9SKyle Evans 60*f0865ec9SKyle Evans err: 61*f0865ec9SKyle Evans return ret; 62*f0865ec9SKyle Evans } 63*f0865ec9SKyle Evans 64*f0865ec9SKyle Evans /* 65*f0865ec9SKyle Evans * Conditionally subtract a shifted version of in from out, i.e.: 66*f0865ec9SKyle Evans * - if cnd == 1, out <- out - (in << shift) 67*f0865ec9SKyle Evans * - if cnd == 0, out <- out 68*f0865ec9SKyle Evans * The function returns 0 on success, -1 on error. On success, 'borrow' 69*f0865ec9SKyle Evans * provides the possible borrow resulting from the subtraction. 'borrow' 70*f0865ec9SKyle Evans * is not meaningful on error. 71*f0865ec9SKyle Evans * 72*f0865ec9SKyle Evans * The function is an internal helper; it expects initialized nn out and 73*f0865ec9SKyle Evans * in: it does not verify that. 74*f0865ec9SKyle Evans */ 75*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_cnd_sub_shift(int cnd, nn_t out, nn_src_t in, 76*f0865ec9SKyle Evans u8 shift, word_t *borrow) 77*f0865ec9SKyle Evans { 78*f0865ec9SKyle Evans word_t tmp, borrow1, borrow2, _borrow = WORD(0); 79*f0865ec9SKyle Evans word_t mask = WORD_MASK_IFNOTZERO(cnd); 80*f0865ec9SKyle Evans int ret; 81*f0865ec9SKyle Evans u8 i; 82*f0865ec9SKyle Evans 83*f0865ec9SKyle Evans MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err); 84*f0865ec9SKyle Evans MUST_HAVE((borrow != NULL), ret, err); 85*f0865ec9SKyle Evans 86*f0865ec9SKyle Evans /* 87*f0865ec9SKyle Evans * Perform subtraction one word at a time, 88*f0865ec9SKyle Evans * propagating the borrow. 89*f0865ec9SKyle Evans */ 90*f0865ec9SKyle Evans for (i = 0; i < in->wlen; i++) { 91*f0865ec9SKyle Evans tmp = (word_t)(out->val[shift + i] - (in->val[i] & mask)); 92*f0865ec9SKyle Evans borrow1 = (word_t)(tmp > out->val[shift + i]); 93*f0865ec9SKyle Evans out->val[shift + i] = (word_t)(tmp - _borrow); 94*f0865ec9SKyle Evans borrow2 = (word_t)(out->val[shift + i] > tmp); 95*f0865ec9SKyle Evans /* There is at most one borrow going out. */ 96*f0865ec9SKyle Evans _borrow = (word_t)(borrow1 | borrow2); 97*f0865ec9SKyle Evans } 98*f0865ec9SKyle Evans 99*f0865ec9SKyle Evans (*borrow) = _borrow; 100*f0865ec9SKyle Evans ret = 0; 101*f0865ec9SKyle Evans 102*f0865ec9SKyle Evans err: 103*f0865ec9SKyle Evans return ret; 104*f0865ec9SKyle Evans } 105*f0865ec9SKyle Evans 106*f0865ec9SKyle Evans /* 107*f0865ec9SKyle Evans * Subtract a shifted version of 'in' multiplied by 'w' from 'out' and return 108*f0865ec9SKyle Evans * borrow. The function returns 0 on success, -1 on error. 'borrow' is 109*f0865ec9SKyle Evans * meaningful only on success. 110*f0865ec9SKyle Evans * 111*f0865ec9SKyle Evans * The function is an internal helper; it expects initialized nn out and 112*f0865ec9SKyle Evans * in: it does not verify that. 113*f0865ec9SKyle Evans */ 114*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_submul_word_shift(nn_t out, nn_src_t in, word_t w, u8 shift, 115*f0865ec9SKyle Evans word_t *borrow) 116*f0865ec9SKyle Evans { 117*f0865ec9SKyle Evans word_t _borrow = WORD(0), prod_high, prod_low, tmp; 118*f0865ec9SKyle Evans int ret; 119*f0865ec9SKyle Evans u8 i; 120*f0865ec9SKyle Evans 121*f0865ec9SKyle Evans MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err); 122*f0865ec9SKyle Evans MUST_HAVE((borrow != NULL), ret, err); 123*f0865ec9SKyle Evans 124*f0865ec9SKyle Evans for (i = 0; i < in->wlen; i++) { 125*f0865ec9SKyle Evans /* 126*f0865ec9SKyle Evans * Compute the result of the multiplication of 127*f0865ec9SKyle Evans * two words. 128*f0865ec9SKyle Evans */ 129*f0865ec9SKyle Evans WORD_MUL(prod_high, prod_low, in->val[i], w); 130*f0865ec9SKyle Evans 131*f0865ec9SKyle Evans /* 132*f0865ec9SKyle Evans * And add previous borrow. 133*f0865ec9SKyle Evans */ 134*f0865ec9SKyle Evans prod_low = (word_t)(prod_low + _borrow); 135*f0865ec9SKyle Evans prod_high = (word_t)(prod_high + (prod_low < _borrow)); 136*f0865ec9SKyle Evans 137*f0865ec9SKyle Evans /* 138*f0865ec9SKyle Evans * Subtract computed word at current position in result. 139*f0865ec9SKyle Evans */ 140*f0865ec9SKyle Evans tmp = (word_t)(out->val[shift + i] - prod_low); 141*f0865ec9SKyle Evans _borrow = (word_t)(prod_high + (tmp > out->val[shift + i])); 142*f0865ec9SKyle Evans out->val[shift + i] = tmp; 143*f0865ec9SKyle Evans } 144*f0865ec9SKyle Evans 145*f0865ec9SKyle Evans (*borrow) = _borrow; 146*f0865ec9SKyle Evans ret = 0; 147*f0865ec9SKyle Evans 148*f0865ec9SKyle Evans err: 149*f0865ec9SKyle Evans return ret; 150*f0865ec9SKyle Evans } 151*f0865ec9SKyle Evans 152*f0865ec9SKyle Evans /* 153*f0865ec9SKyle Evans * Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b' 154*f0865ec9SKyle Evans * (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized on 155*f0865ec9SKyle Evans * return. * Computation are performed in *constant time*, only depending on 156*f0865ec9SKyle Evans * the lengths of 'a' and 'b', but not on the values of 'a' and 'b'. 157*f0865ec9SKyle Evans * 158*f0865ec9SKyle Evans * This uses the above function to perform arithmetic on arbitrary parts 159*f0865ec9SKyle Evans * of multiprecision numbers. 160*f0865ec9SKyle Evans * 161*f0865ec9SKyle Evans * The algorithm used is schoolbook division: 162*f0865ec9SKyle Evans * + the quotient is computed word by word, 163*f0865ec9SKyle Evans * + a small division of the MSW is performed to obtain an 164*f0865ec9SKyle Evans * approximation of the MSW of the quotient, 165*f0865ec9SKyle Evans * + the approximation is corrected to obtain the correct 166*f0865ec9SKyle Evans * multiprecision MSW of the quotient, 167*f0865ec9SKyle Evans * + the corresponding product is subtracted from the dividend, 168*f0865ec9SKyle Evans * + the same procedure is used for the following word of the quotient. 169*f0865ec9SKyle Evans * 170*f0865ec9SKyle Evans * It is assumed that: 171*f0865ec9SKyle Evans * + b is normalized: the MSB of its MSW is 1, 172*f0865ec9SKyle Evans * + the most significant part of a is smaller than b, 173*f0865ec9SKyle Evans * + a precomputed reciprocal 174*f0865ec9SKyle Evans * v = floor(B^3/(d+1)) - B 175*f0865ec9SKyle Evans * where d is the MSW of the (normalized) divisor 176*f0865ec9SKyle Evans * is given to perform the small 3-by-2 division. 177*f0865ec9SKyle Evans * + using this reciprocal, the approximated quotient is always 178*f0865ec9SKyle Evans * too small and at most one multiprecision correction is needed. 179*f0865ec9SKyle Evans * 180*f0865ec9SKyle Evans * It returns 0 on sucess, -1 on error. 181*f0865ec9SKyle Evans * 182*f0865ec9SKyle Evans * CAUTION: 183*f0865ec9SKyle Evans * 184*f0865ec9SKyle Evans * - The function is expected to be used ONLY by the generic version 185*f0865ec9SKyle Evans * nn_divrem_normalized() defined later in the file. 186*f0865ec9SKyle Evans * - All parameters must have been initialized. Unlike exported/public 187*f0865ec9SKyle Evans * functions, this internal helper does not verify that nn parameters 188*f0865ec9SKyle Evans * have been initialized. Again, this is expected from the caller 189*f0865ec9SKyle Evans * (nn_divrem_normalized()). 190*f0865ec9SKyle Evans * - The function does not support aliasing of 'b' or 'q'. See 191*f0865ec9SKyle Evans * _nn_divrem_normalized_aliased() for such a wrapper. 192*f0865ec9SKyle Evans * 193*f0865ec9SKyle Evans */ 194*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized(nn_t q, nn_t r, 195*f0865ec9SKyle Evans nn_src_t a, nn_src_t b, word_t v) 196*f0865ec9SKyle Evans { 197*f0865ec9SKyle Evans word_t borrow, qstar, qh, ql, rh, rl; /* for 3-by-2 div. */ 198*f0865ec9SKyle Evans int _small, cmp, ret; 199*f0865ec9SKyle Evans u8 i; 200*f0865ec9SKyle Evans 201*f0865ec9SKyle Evans MUST_HAVE(!(b->wlen <= 0), ret, err); 202*f0865ec9SKyle Evans MUST_HAVE(!(a->wlen <= b->wlen), ret, err); 203*f0865ec9SKyle Evans MUST_HAVE(!(!((b->val[b->wlen - 1] >> (WORD_BITS - 1)) == WORD(1))), ret, err); 204*f0865ec9SKyle Evans MUST_HAVE(!_nn_cmp_shift(a, b, (u8)(a->wlen - b->wlen), &cmp) && (cmp < 0), ret, err); 205*f0865ec9SKyle Evans 206*f0865ec9SKyle Evans /* Handle trivial aliasing for a and r */ 207*f0865ec9SKyle Evans if (r != a) { 208*f0865ec9SKyle Evans ret = nn_set_wlen(r, a->wlen); EG(ret, err); 209*f0865ec9SKyle Evans ret = nn_copy(r, a); EG(ret, err); 210*f0865ec9SKyle Evans } 211*f0865ec9SKyle Evans 212*f0865ec9SKyle Evans ret = nn_set_wlen(q, (u8)(r->wlen - b->wlen)); EG(ret, err); 213*f0865ec9SKyle Evans 214*f0865ec9SKyle Evans /* 215*f0865ec9SKyle Evans * Compute subsequent words of the quotient one by one. 216*f0865ec9SKyle Evans * Perform approximate 3-by-2 division using the precomputed 217*f0865ec9SKyle Evans * reciprocal and correct afterward. 218*f0865ec9SKyle Evans */ 219*f0865ec9SKyle Evans for (i = r->wlen; i > b->wlen; i--) { 220*f0865ec9SKyle Evans u8 shift = (u8)(i - b->wlen - 1); 221*f0865ec9SKyle Evans 222*f0865ec9SKyle Evans /* 223*f0865ec9SKyle Evans * Perform 3-by-2 approximate division: 224*f0865ec9SKyle Evans * <qstar, qh, ql> = <rh, rl> * (v + B) 225*f0865ec9SKyle Evans * We are only interested in qstar. 226*f0865ec9SKyle Evans */ 227*f0865ec9SKyle Evans rh = r->val[i - 1]; 228*f0865ec9SKyle Evans rl = r->val[i - 2]; 229*f0865ec9SKyle Evans /* Perform 2-by-1 multiplication. */ 230*f0865ec9SKyle Evans WORD_MUL(qh, ql, rl, v); 231*f0865ec9SKyle Evans WORD_MUL(qstar, ql, rh, v); 232*f0865ec9SKyle Evans /* And propagate carries. */ 233*f0865ec9SKyle Evans qh = (word_t)(qh + ql); 234*f0865ec9SKyle Evans qstar = (word_t)(qstar + (qh < ql)); 235*f0865ec9SKyle Evans qh = (word_t)(qh + rl); 236*f0865ec9SKyle Evans rh = (word_t)(rh + (qh < rl)); 237*f0865ec9SKyle Evans qstar = (word_t)(qstar + rh); 238*f0865ec9SKyle Evans 239*f0865ec9SKyle Evans /* 240*f0865ec9SKyle Evans * Compute approximate quotient times divisor 241*f0865ec9SKyle Evans * and subtract it from remainder: 242*f0865ec9SKyle Evans * r = r - (b*qstar << B^shift) 243*f0865ec9SKyle Evans */ 244*f0865ec9SKyle Evans ret = _nn_submul_word_shift(r, b, qstar, shift, &borrow); EG(ret, err); 245*f0865ec9SKyle Evans 246*f0865ec9SKyle Evans /* Check the approximate quotient was indeed not too large. */ 247*f0865ec9SKyle Evans MUST_HAVE(!(r->val[i - 1] < borrow), ret, err); 248*f0865ec9SKyle Evans r->val[i - 1] = (word_t)(r->val[i - 1] - borrow); 249*f0865ec9SKyle Evans 250*f0865ec9SKyle Evans /* 251*f0865ec9SKyle Evans * Check whether the approximate quotient was too small or not. 252*f0865ec9SKyle Evans * At most one multiprecision correction is needed. 253*f0865ec9SKyle Evans */ 254*f0865ec9SKyle Evans ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err); 255*f0865ec9SKyle Evans _small = ((!!(r->val[i - 1])) | (cmp >= 0)); 256*f0865ec9SKyle Evans /* Perform conditional multiprecision correction. */ 257*f0865ec9SKyle Evans ret = _nn_cnd_sub_shift(_small, r, b, shift, &borrow); EG(ret, err); 258*f0865ec9SKyle Evans MUST_HAVE(!(r->val[i - 1] != borrow), ret, err); 259*f0865ec9SKyle Evans r->val[i - 1] = (word_t)(r->val[i - 1] - borrow); 260*f0865ec9SKyle Evans /* 261*f0865ec9SKyle Evans * Adjust the quotient if it was too small and set it in the 262*f0865ec9SKyle Evans * multiprecision array. 263*f0865ec9SKyle Evans */ 264*f0865ec9SKyle Evans qstar = (word_t)(qstar + (word_t)_small); 265*f0865ec9SKyle Evans q->val[shift] = qstar; 266*f0865ec9SKyle Evans /* 267*f0865ec9SKyle Evans * Check that the MSW of remainder was cancelled out and that 268*f0865ec9SKyle Evans * we could not increase the quotient anymore. 269*f0865ec9SKyle Evans */ 270*f0865ec9SKyle Evans MUST_HAVE(!(r->val[r->wlen - 1] != WORD(0)), ret, err); 271*f0865ec9SKyle Evans 272*f0865ec9SKyle Evans ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err); 273*f0865ec9SKyle Evans MUST_HAVE(!(cmp >= 0), ret, err); 274*f0865ec9SKyle Evans 275*f0865ec9SKyle Evans ret = nn_set_wlen(r, (u8)(r->wlen - 1)); EG(ret, err); 276*f0865ec9SKyle Evans } 277*f0865ec9SKyle Evans 278*f0865ec9SKyle Evans err: 279*f0865ec9SKyle Evans return ret; 280*f0865ec9SKyle Evans } 281*f0865ec9SKyle Evans 282*f0865ec9SKyle Evans /* 283*f0865ec9SKyle Evans * Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b' 284*f0865ec9SKyle Evans * (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized. 285*f0865ec9SKyle Evans * Compared to _nn_divrem_normalized(), this internal version 286*f0865ec9SKyle Evans * explicitly handle the case where 'b' and 'r' point to the same nn (i.e. 'r' 287*f0865ec9SKyle Evans * result is stored in 'b' on success), hence the removal of 'r' parameter from 288*f0865ec9SKyle Evans * function prototype compared to _nn_divrem_normalized(). 289*f0865ec9SKyle Evans * 290*f0865ec9SKyle Evans * The computation is performed in *constant time*, see documentation of 291*f0865ec9SKyle Evans * _nn_divrem_normalized(). 292*f0865ec9SKyle Evans * 293*f0865ec9SKyle Evans * Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the 294*f0865ec9SKyle Evans * reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than 295*f0865ec9SKyle Evans * 'b'. 296*f0865ec9SKyle Evans * 297*f0865ec9SKyle Evans * The function returns 0 on success, -1 on error. 298*f0865ec9SKyle Evans * 299*f0865ec9SKyle Evans * CAUTION: 300*f0865ec9SKyle Evans * 301*f0865ec9SKyle Evans * - The function is expected to be used ONLY by the generic version 302*f0865ec9SKyle Evans * nn_divrem_normalized() defined later in the file. 303*f0865ec9SKyle Evans * - All parameters must have been initialized. Unlike exported/public 304*f0865ec9SKyle Evans * functions, this internal helper does not verify that nn parameters 305*f0865ec9SKyle Evans * have been initialized. Again, this is expected from the caller 306*f0865ec9SKyle Evans * (nn_divrem_normalized()). 307*f0865ec9SKyle Evans * - The function does not support aliasing of 'a' or 'q'. 308*f0865ec9SKyle Evans * 309*f0865ec9SKyle Evans */ 310*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized_aliased(nn_t q, nn_src_t a, nn_t b, word_t v) 311*f0865ec9SKyle Evans { 312*f0865ec9SKyle Evans int ret; 313*f0865ec9SKyle Evans nn r; 314*f0865ec9SKyle Evans r.magic = WORD(0); 315*f0865ec9SKyle Evans 316*f0865ec9SKyle Evans ret = nn_init(&r, 0); EG(ret, err); 317*f0865ec9SKyle Evans ret = _nn_divrem_normalized(q, &r, a, b, v); EG(ret, err); 318*f0865ec9SKyle Evans ret = nn_copy(b, &r); EG(ret, err); 319*f0865ec9SKyle Evans 320*f0865ec9SKyle Evans err: 321*f0865ec9SKyle Evans nn_uninit(&r); 322*f0865ec9SKyle Evans return ret; 323*f0865ec9SKyle Evans } 324*f0865ec9SKyle Evans 325*f0865ec9SKyle Evans /* 326*f0865ec9SKyle Evans * Compute quotient and remainder of Euclidean division, and do not normalize 327*f0865ec9SKyle Evans * them. Done in *constant time*, see documentation of _nn_divrem_normalized(). 328*f0865ec9SKyle Evans * 329*f0865ec9SKyle Evans * Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the 330*f0865ec9SKyle Evans * reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than 331*f0865ec9SKyle Evans * 'b'. 332*f0865ec9SKyle Evans * 333*f0865ec9SKyle Evans * Aliasing is supported for 'r' only (with 'b'), i.e. 'r' and 'b' can point 334*f0865ec9SKyle Evans * to the same nn. 335*f0865ec9SKyle Evans * 336*f0865ec9SKyle Evans * The function returns 0 on success, -1 on error. 337*f0865ec9SKyle Evans */ 338*f0865ec9SKyle Evans int nn_divrem_normalized(nn_t q, nn_t r, nn_src_t a, nn_src_t b, word_t v) 339*f0865ec9SKyle Evans { 340*f0865ec9SKyle Evans int ret; 341*f0865ec9SKyle Evans 342*f0865ec9SKyle Evans ret = nn_check_initialized(a); EG(ret, err); 343*f0865ec9SKyle Evans ret = nn_check_initialized(q); EG(ret, err); 344*f0865ec9SKyle Evans ret = nn_check_initialized(r); EG(ret, err); 345*f0865ec9SKyle Evans 346*f0865ec9SKyle Evans /* Unsupported aliasings */ 347*f0865ec9SKyle Evans MUST_HAVE((q != r) && (q != a) && (q != b), ret, err); 348*f0865ec9SKyle Evans 349*f0865ec9SKyle Evans if (r == b) { 350*f0865ec9SKyle Evans ret = _nn_divrem_normalized_aliased(q, a, r, v); 351*f0865ec9SKyle Evans } else { 352*f0865ec9SKyle Evans ret = nn_check_initialized(b); EG(ret, err); 353*f0865ec9SKyle Evans ret = _nn_divrem_normalized(q, r, a, b, v); 354*f0865ec9SKyle Evans } 355*f0865ec9SKyle Evans 356*f0865ec9SKyle Evans err: 357*f0865ec9SKyle Evans return ret; 358*f0865ec9SKyle Evans } 359*f0865ec9SKyle Evans 360*f0865ec9SKyle Evans /* 361*f0865ec9SKyle Evans * Compute remainder only and do not normalize it. 362*f0865ec9SKyle Evans * Constant time, see documentation of _nn_divrem_normalized. 363*f0865ec9SKyle Evans * 364*f0865ec9SKyle Evans * Support aliasing of inputs and outputs. 365*f0865ec9SKyle Evans * 366*f0865ec9SKyle Evans * The function returns 0 on success, -1 on error. 367*f0865ec9SKyle Evans */ 368*f0865ec9SKyle Evans int nn_mod_normalized(nn_t r, nn_src_t a, nn_src_t b, word_t v) 369*f0865ec9SKyle Evans { 370*f0865ec9SKyle Evans int ret; 371*f0865ec9SKyle Evans nn q; 372*f0865ec9SKyle Evans q.magic = WORD(0); 373*f0865ec9SKyle Evans 374*f0865ec9SKyle Evans ret = nn_init(&q, 0); EG(ret, err); 375*f0865ec9SKyle Evans ret = nn_divrem_normalized(&q, r, a, b, v); 376*f0865ec9SKyle Evans 377*f0865ec9SKyle Evans err: 378*f0865ec9SKyle Evans nn_uninit(&q); 379*f0865ec9SKyle Evans return ret; 380*f0865ec9SKyle Evans } 381*f0865ec9SKyle Evans 382*f0865ec9SKyle Evans /* 383*f0865ec9SKyle Evans * Compute quotient and remainder of Euclidean division, 384*f0865ec9SKyle Evans * and do not normalize them. 385*f0865ec9SKyle Evans * Done in *constant time*, 386*f0865ec9SKyle Evans * only depending on the lengths of 'a' and 'b' and the value of 'cnt', 387*f0865ec9SKyle Evans * but not on the values of 'a' and 'b'. 388*f0865ec9SKyle Evans * 389*f0865ec9SKyle Evans * Assume that b has been normalized by a 'cnt' bit shift, 390*f0865ec9SKyle Evans * that v is the reciprocal of the MSW of 'b', 391*f0865ec9SKyle Evans * but a is not shifted yet. 392*f0865ec9SKyle Evans * Useful when multiple multiplication by the same b are performed, 393*f0865ec9SKyle Evans * e.g. at the fp level. 394*f0865ec9SKyle Evans * 395*f0865ec9SKyle Evans * All outputs MUST have been initialized. The function does not support 396*f0865ec9SKyle Evans * aliasing of 'b' or 'q'. It returns 0 on success, -1 on error. 397*f0865ec9SKyle Evans * 398*f0865ec9SKyle Evans * CAUTION: 399*f0865ec9SKyle Evans * 400*f0865ec9SKyle Evans * - The function is expected to be used ONLY by the generic version 401*f0865ec9SKyle Evans * nn_divrem_normalized() defined later in the file. 402*f0865ec9SKyle Evans * - All parameters must have been initialized. Unlike exported/public 403*f0865ec9SKyle Evans * functions, this internal helper does not verify that 404*f0865ec9SKyle Evans * have been initialized. Again, this is expected from the caller 405*f0865ec9SKyle Evans * (nn_divrem_unshifted()). 406*f0865ec9SKyle Evans * - The function does not support aliasing. See 407*f0865ec9SKyle Evans * _nn_divrem_unshifted_aliased() for such a wrapper. 408*f0865ec9SKyle Evans */ 409*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b_norm, 410*f0865ec9SKyle Evans word_t v, bitcnt_t cnt) 411*f0865ec9SKyle Evans { 412*f0865ec9SKyle Evans nn a_shift; 413*f0865ec9SKyle Evans u8 new_wlen, b_wlen; 414*f0865ec9SKyle Evans int larger, ret, cmp; 415*f0865ec9SKyle Evans word_t borrow; 416*f0865ec9SKyle Evans a_shift.magic = WORD(0); 417*f0865ec9SKyle Evans 418*f0865ec9SKyle Evans /* Avoid overflow */ 419*f0865ec9SKyle Evans MUST_HAVE(((a->wlen + BIT_LEN_WORDS(cnt)) < NN_MAX_WORD_LEN), ret, err); 420*f0865ec9SKyle Evans 421*f0865ec9SKyle Evans /* We now know that new_wlen will fit in an u8 */ 422*f0865ec9SKyle Evans new_wlen = (u8)(a->wlen + (u8)BIT_LEN_WORDS(cnt)); 423*f0865ec9SKyle Evans 424*f0865ec9SKyle Evans b_wlen = b_norm->wlen; 425*f0865ec9SKyle Evans if (new_wlen < b_wlen) { /* trivial case */ 426*f0865ec9SKyle Evans ret = nn_copy(r, a); EG(ret, err); 427*f0865ec9SKyle Evans ret = nn_zero(q); 428*f0865ec9SKyle Evans goto err; 429*f0865ec9SKyle Evans } 430*f0865ec9SKyle Evans 431*f0865ec9SKyle Evans /* Shift a. */ 432*f0865ec9SKyle Evans ret = nn_init(&a_shift, (u16)(new_wlen * WORD_BYTES)); EG(ret, err); 433*f0865ec9SKyle Evans ret = nn_set_wlen(&a_shift, new_wlen); EG(ret, err); 434*f0865ec9SKyle Evans ret = nn_lshift_fixedlen(&a_shift, a, cnt); EG(ret, err); 435*f0865ec9SKyle Evans ret = nn_set_wlen(r, new_wlen); EG(ret, err); 436*f0865ec9SKyle Evans 437*f0865ec9SKyle Evans if (new_wlen == b_wlen) { 438*f0865ec9SKyle Evans /* Ensure that a is smaller than b. */ 439*f0865ec9SKyle Evans ret = nn_cmp(&a_shift, b_norm, &cmp); EG(ret, err); 440*f0865ec9SKyle Evans larger = (cmp >= 0); 441*f0865ec9SKyle Evans ret = nn_cnd_sub(larger, r, &a_shift, b_norm); EG(ret, err); 442*f0865ec9SKyle Evans MUST_HAVE(((!nn_cmp(r, b_norm, &cmp)) && (cmp < 0)), ret, err); 443*f0865ec9SKyle Evans 444*f0865ec9SKyle Evans /* Set MSW of quotient. */ 445*f0865ec9SKyle Evans ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err); 446*f0865ec9SKyle Evans q->val[new_wlen - b_wlen] = (word_t) larger; 447*f0865ec9SKyle Evans /* And we are done as the quotient is 0 or 1. */ 448*f0865ec9SKyle Evans } else if (new_wlen > b_wlen) { 449*f0865ec9SKyle Evans /* Ensure that most significant part of a is smaller than b. */ 450*f0865ec9SKyle Evans ret = _nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp); EG(ret, err); 451*f0865ec9SKyle Evans larger = (cmp >= 0); 452*f0865ec9SKyle Evans ret = _nn_cnd_sub_shift(larger, &a_shift, b_norm, (u8)(new_wlen - b_wlen), &borrow); EG(ret, err); 453*f0865ec9SKyle Evans MUST_HAVE(((!_nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp)) && (cmp < 0)), ret, err); 454*f0865ec9SKyle Evans 455*f0865ec9SKyle Evans /* 456*f0865ec9SKyle Evans * Perform division with MSP of a smaller than b. This ensures 457*f0865ec9SKyle Evans * that the quotient is of length a_len - b_len. 458*f0865ec9SKyle Evans */ 459*f0865ec9SKyle Evans ret = nn_divrem_normalized(q, r, &a_shift, b_norm, v); EG(ret, err); 460*f0865ec9SKyle Evans 461*f0865ec9SKyle Evans /* Set MSW of quotient. */ 462*f0865ec9SKyle Evans ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err); 463*f0865ec9SKyle Evans q->val[new_wlen - b_wlen] = (word_t) larger; 464*f0865ec9SKyle Evans } /* else a is smaller than b... treated above. */ 465*f0865ec9SKyle Evans 466*f0865ec9SKyle Evans ret = nn_rshift_fixedlen(r, r, cnt); EG(ret, err); 467*f0865ec9SKyle Evans ret = nn_set_wlen(r, b_wlen); 468*f0865ec9SKyle Evans 469*f0865ec9SKyle Evans err: 470*f0865ec9SKyle Evans nn_uninit(&a_shift); 471*f0865ec9SKyle Evans 472*f0865ec9SKyle Evans return ret; 473*f0865ec9SKyle Evans } 474*f0865ec9SKyle Evans 475*f0865ec9SKyle Evans /* 476*f0865ec9SKyle Evans * Same as previous but handling aliasing of 'r' with 'b_norm', i.e. on success, 477*f0865ec9SKyle Evans * result 'r' is passed through 'b_norm'. 478*f0865ec9SKyle Evans * 479*f0865ec9SKyle Evans * CAUTION: 480*f0865ec9SKyle Evans * 481*f0865ec9SKyle Evans * - The function is expected to be used ONLY by the generic version 482*f0865ec9SKyle Evans * nn_divrem_normalized() defined later in the file. 483*f0865ec9SKyle Evans * - All parameter must have been initialized. Unlike exported/public 484*f0865ec9SKyle Evans * functions, this internal helper does not verify that nn parameters 485*f0865ec9SKyle Evans * have been initialized. Again, this is expected from the caller 486*f0865ec9SKyle Evans * (nn_divrem_unshifted()). 487*f0865ec9SKyle Evans */ 488*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted_aliased(nn_t q, nn_src_t a, nn_t b_norm, 489*f0865ec9SKyle Evans word_t v, bitcnt_t cnt) 490*f0865ec9SKyle Evans { 491*f0865ec9SKyle Evans int ret; 492*f0865ec9SKyle Evans nn r; 493*f0865ec9SKyle Evans r.magic = WORD(0); 494*f0865ec9SKyle Evans 495*f0865ec9SKyle Evans ret = nn_init(&r, 0); EG(ret, err); 496*f0865ec9SKyle Evans ret = _nn_divrem_unshifted(q, &r, a, b_norm, v, cnt); EG(ret, err); 497*f0865ec9SKyle Evans ret = nn_copy(b_norm, &r); EG(ret, err); 498*f0865ec9SKyle Evans 499*f0865ec9SKyle Evans err: 500*f0865ec9SKyle Evans nn_uninit(&r); 501*f0865ec9SKyle Evans return ret; 502*f0865ec9SKyle Evans } 503*f0865ec9SKyle Evans 504*f0865ec9SKyle Evans /* 505*f0865ec9SKyle Evans * Compute quotient and remainder and do not normalize them. 506*f0865ec9SKyle Evans * Constant time, see documentation of _nn_divrem_unshifted(). 507*f0865ec9SKyle Evans * 508*f0865ec9SKyle Evans * Alias-supporting version of _nn_divrem_unshifted for 'r' only. 509*f0865ec9SKyle Evans * 510*f0865ec9SKyle Evans * The function returns 0 on success, -1 on error. 511*f0865ec9SKyle Evans */ 512*f0865ec9SKyle Evans int nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b, 513*f0865ec9SKyle Evans word_t v, bitcnt_t cnt) 514*f0865ec9SKyle Evans { 515*f0865ec9SKyle Evans int ret; 516*f0865ec9SKyle Evans 517*f0865ec9SKyle Evans ret = nn_check_initialized(a); EG(ret, err); 518*f0865ec9SKyle Evans ret = nn_check_initialized(q); EG(ret, err); 519*f0865ec9SKyle Evans ret = nn_check_initialized(r); EG(ret, err); 520*f0865ec9SKyle Evans 521*f0865ec9SKyle Evans /* Unsupported aliasings */ 522*f0865ec9SKyle Evans MUST_HAVE((q != r) && (q != a) && (q != b), ret, err); 523*f0865ec9SKyle Evans 524*f0865ec9SKyle Evans if (r == b) { 525*f0865ec9SKyle Evans ret = _nn_divrem_unshifted_aliased(q, a, r, v, cnt); 526*f0865ec9SKyle Evans } else { 527*f0865ec9SKyle Evans ret = nn_check_initialized(b); EG(ret, err); 528*f0865ec9SKyle Evans ret = _nn_divrem_unshifted(q, r, a, b, v, cnt); 529*f0865ec9SKyle Evans } 530*f0865ec9SKyle Evans 531*f0865ec9SKyle Evans err: 532*f0865ec9SKyle Evans return ret; 533*f0865ec9SKyle Evans } 534*f0865ec9SKyle Evans 535*f0865ec9SKyle Evans /* 536*f0865ec9SKyle Evans * Compute remainder only and do not normalize it. 537*f0865ec9SKyle Evans * Constant time, see documentation of _nn_divrem_unshifted. 538*f0865ec9SKyle Evans * 539*f0865ec9SKyle Evans * Aliasing of inputs and outputs is possible. 540*f0865ec9SKyle Evans * 541*f0865ec9SKyle Evans * The function returns 0 on success, -1 on error. 542*f0865ec9SKyle Evans */ 543*f0865ec9SKyle Evans int nn_mod_unshifted(nn_t r, nn_src_t a, nn_src_t b, word_t v, bitcnt_t cnt) 544*f0865ec9SKyle Evans { 545*f0865ec9SKyle Evans nn q; 546*f0865ec9SKyle Evans int ret; 547*f0865ec9SKyle Evans q.magic = WORD(0); 548*f0865ec9SKyle Evans 549*f0865ec9SKyle Evans ret = nn_init(&q, 0); EG(ret, err); 550*f0865ec9SKyle Evans ret = nn_divrem_unshifted(&q, r, a, b, v, cnt); 551*f0865ec9SKyle Evans 552*f0865ec9SKyle Evans err: 553*f0865ec9SKyle Evans nn_uninit(&q); 554*f0865ec9SKyle Evans 555*f0865ec9SKyle Evans return ret; 556*f0865ec9SKyle Evans } 557*f0865ec9SKyle Evans 558*f0865ec9SKyle Evans /* 559*f0865ec9SKyle Evans * Helper functions for arithmetic in 2-by-1 division 560*f0865ec9SKyle Evans * used in the reciprocal computation. 561*f0865ec9SKyle Evans * 562*f0865ec9SKyle Evans * These are variations of the nn multiprecision functions 563*f0865ec9SKyle Evans * acting on arrays of fixed length, in place, 564*f0865ec9SKyle Evans * and returning carry/borrow. 565*f0865ec9SKyle Evans * 566*f0865ec9SKyle Evans * Done in constant time. 567*f0865ec9SKyle Evans */ 568*f0865ec9SKyle Evans 569*f0865ec9SKyle Evans /* 570*f0865ec9SKyle Evans * Comparison of two limbs numbers. Internal helper. 571*f0865ec9SKyle Evans * Checks left to the caller 572*f0865ec9SKyle Evans */ 573*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _wcmp_22(word_t a[2], word_t b[2]) 574*f0865ec9SKyle Evans { 575*f0865ec9SKyle Evans int mask, ret = 0; 576*f0865ec9SKyle Evans ret += (a[1] > b[1]); 577*f0865ec9SKyle Evans ret -= (a[1] < b[1]); 578*f0865ec9SKyle Evans mask = !(ret & 0x1); 579*f0865ec9SKyle Evans ret += ((a[0] > b[0]) & mask); 580*f0865ec9SKyle Evans ret -= ((a[0] < b[0]) & mask); 581*f0865ec9SKyle Evans return ret; 582*f0865ec9SKyle Evans } 583*f0865ec9SKyle Evans 584*f0865ec9SKyle Evans /* 585*f0865ec9SKyle Evans * Addition of two limbs numbers with carry returned. Internal helper. 586*f0865ec9SKyle Evans * Checks left to the caller. 587*f0865ec9SKyle Evans */ 588*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static word_t _wadd_22(word_t a[2], word_t b[2]) 589*f0865ec9SKyle Evans { 590*f0865ec9SKyle Evans word_t carry; 591*f0865ec9SKyle Evans a[0] = (word_t)(a[0] + b[0]); 592*f0865ec9SKyle Evans carry = (word_t)(a[0] < b[0]); 593*f0865ec9SKyle Evans a[1] = (word_t)(a[1] + carry); 594*f0865ec9SKyle Evans carry = (word_t)(a[1] < carry); 595*f0865ec9SKyle Evans a[1] = (word_t)(a[1] + b[1]); 596*f0865ec9SKyle Evans carry = (word_t)(carry | (a[1] < b[1])); 597*f0865ec9SKyle Evans return carry; 598*f0865ec9SKyle Evans } 599*f0865ec9SKyle Evans 600*f0865ec9SKyle Evans /* 601*f0865ec9SKyle Evans * Subtraction of two limbs numbers with borrow returned. Internal helper. 602*f0865ec9SKyle Evans * Checks left to the caller. 603*f0865ec9SKyle Evans */ 604*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static word_t _wsub_22(word_t a[2], word_t b[2]) 605*f0865ec9SKyle Evans { 606*f0865ec9SKyle Evans word_t borrow, tmp; 607*f0865ec9SKyle Evans tmp = (word_t)(a[0] - b[0]); 608*f0865ec9SKyle Evans borrow = (word_t)(tmp > a[0]); 609*f0865ec9SKyle Evans a[0] = tmp; 610*f0865ec9SKyle Evans tmp = (word_t)(a[1] - borrow); 611*f0865ec9SKyle Evans borrow = (word_t)(tmp > a[1]); 612*f0865ec9SKyle Evans a[1] = (word_t)(tmp - b[1]); 613*f0865ec9SKyle Evans borrow = (word_t)(borrow | (a[1] > tmp)); 614*f0865ec9SKyle Evans return borrow; 615*f0865ec9SKyle Evans } 616*f0865ec9SKyle Evans 617*f0865ec9SKyle Evans /* 618*f0865ec9SKyle Evans * Helper macros for conditional subtraction in 2-by-1 division 619*f0865ec9SKyle Evans * used in the reciprocal computation. 620*f0865ec9SKyle Evans * 621*f0865ec9SKyle Evans * Done in constant time. 622*f0865ec9SKyle Evans */ 623*f0865ec9SKyle Evans 624*f0865ec9SKyle Evans /* Conditional subtraction of a one limb number from a two limbs number. */ 625*f0865ec9SKyle Evans #define WORD_CND_SUB_21(cnd, ah, al, b) do { \ 626*f0865ec9SKyle Evans word_t tmp, mask; \ 627*f0865ec9SKyle Evans mask = WORD_MASK_IFNOTZERO((cnd)); \ 628*f0865ec9SKyle Evans tmp = (word_t)((al) - ((b) & mask)); \ 629*f0865ec9SKyle Evans (ah) = (word_t)((ah) - (tmp > (al))); \ 630*f0865ec9SKyle Evans (al) = tmp; \ 631*f0865ec9SKyle Evans } while (0) 632*f0865ec9SKyle Evans /* Conditional subtraction of a two limbs number from a two limbs number. */ 633*f0865ec9SKyle Evans #define WORD_CND_SUB_22(cnd, ah, al, bh, bl) do { \ 634*f0865ec9SKyle Evans word_t tmp, mask; \ 635*f0865ec9SKyle Evans mask = WORD_MASK_IFNOTZERO((cnd)); \ 636*f0865ec9SKyle Evans tmp = (word_t)((al) - ((bl) & mask)); \ 637*f0865ec9SKyle Evans (ah) = (word_t)((ah) - (tmp > (al))); \ 638*f0865ec9SKyle Evans (al) = tmp; \ 639*f0865ec9SKyle Evans (ah) = (word_t)((ah) - ((bh) & mask)); \ 640*f0865ec9SKyle Evans } while (0) 641*f0865ec9SKyle Evans 642*f0865ec9SKyle Evans /* 643*f0865ec9SKyle Evans * divide two words by a normalized word using schoolbook division on half 644*f0865ec9SKyle Evans * words. This is only used below in the reciprocal computation. No checks 645*f0865ec9SKyle Evans * are performed on inputs. This is expected to be done by the caller. 646*f0865ec9SKyle Evans * 647*f0865ec9SKyle Evans * Returns 0 on success, -1 on error. 648*f0865ec9SKyle Evans */ 649*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _word_divrem(word_t *q, word_t *r, word_t ah, word_t al, word_t b) 650*f0865ec9SKyle Evans { 651*f0865ec9SKyle Evans word_t bh, bl, qh, ql, rm, rhl[2], phl[2]; 652*f0865ec9SKyle Evans int larger, ret; 653*f0865ec9SKyle Evans u8 j; 654*f0865ec9SKyle Evans 655*f0865ec9SKyle Evans MUST_HAVE((WRSHIFT((b), (WORD_BITS - 1)) == WORD(1)), ret, err); 656*f0865ec9SKyle Evans bh = WRSHIFT((b), HWORD_BITS); 657*f0865ec9SKyle Evans bl = WLSHIFT((b), HWORD_BITS); 658*f0865ec9SKyle Evans rhl[1] = ah; 659*f0865ec9SKyle Evans rhl[0] = al; 660*f0865ec9SKyle Evans 661*f0865ec9SKyle Evans /* 662*f0865ec9SKyle Evans * Compute high part of the quotient. We know from 663*f0865ec9SKyle Evans * MUST_HAVE() check above that bh (a word_t) is not 0 664*f0865ec9SKyle Evans */ 665*f0865ec9SKyle Evans 666*f0865ec9SKyle Evans KNOWN_FACT(bh != 0, ret, err); 667*f0865ec9SKyle Evans qh = (rhl[1] / bh); 668*f0865ec9SKyle Evans qh = WORD_MIN(qh, HWORD_MASK); 669*f0865ec9SKyle Evans WORD_MUL(phl[1], phl[0], qh, (b)); 670*f0865ec9SKyle Evans phl[1] = (WLSHIFT(phl[1], HWORD_BITS) | 671*f0865ec9SKyle Evans WRSHIFT(phl[0], HWORD_BITS)); 672*f0865ec9SKyle Evans phl[0] = WLSHIFT(phl[0], HWORD_BITS); 673*f0865ec9SKyle Evans 674*f0865ec9SKyle Evans for (j = 0; j < 2; j++) { 675*f0865ec9SKyle Evans larger = (_wcmp_22(phl, rhl) > 0); 676*f0865ec9SKyle Evans qh = (word_t)(qh - (word_t)larger); 677*f0865ec9SKyle Evans WORD_CND_SUB_22(larger, phl[1], phl[0], bh, bl); 678*f0865ec9SKyle Evans } 679*f0865ec9SKyle Evans 680*f0865ec9SKyle Evans ret = (_wcmp_22(phl, rhl) > 0); 681*f0865ec9SKyle Evans MUST_HAVE(!(ret), ret, err); 682*f0865ec9SKyle Evans IGNORE_RET_VAL(_wsub_22(rhl, phl)); 683*f0865ec9SKyle Evans MUST_HAVE((WRSHIFT(rhl[1], HWORD_BITS) == 0), ret, err); 684*f0865ec9SKyle Evans 685*f0865ec9SKyle Evans /* Compute low part of the quotient. */ 686*f0865ec9SKyle Evans rm = (WLSHIFT(rhl[1], HWORD_BITS) | 687*f0865ec9SKyle Evans WRSHIFT(rhl[0], HWORD_BITS)); 688*f0865ec9SKyle Evans ql = (rm / bh); 689*f0865ec9SKyle Evans ql = WORD_MIN(ql, HWORD_MASK); 690*f0865ec9SKyle Evans WORD_MUL(phl[1], phl[0], ql, (b)); 691*f0865ec9SKyle Evans 692*f0865ec9SKyle Evans for (j = 0; j < 2; j++) { 693*f0865ec9SKyle Evans larger = (_wcmp_22(phl, rhl) > 0); 694*f0865ec9SKyle Evans ql = (word_t) (ql - (word_t)larger); 695*f0865ec9SKyle Evans WORD_CND_SUB_21(larger, phl[1], phl[0], (b)); 696*f0865ec9SKyle Evans } 697*f0865ec9SKyle Evans 698*f0865ec9SKyle Evans ret = _wcmp_22(phl, rhl) > 0; 699*f0865ec9SKyle Evans MUST_HAVE(!(ret), ret, err); 700*f0865ec9SKyle Evans IGNORE_RET_VAL(_wsub_22(rhl, phl)); 701*f0865ec9SKyle Evans /* Set outputs. */ 702*f0865ec9SKyle Evans MUST_HAVE((rhl[1] == WORD(0)), ret, err); 703*f0865ec9SKyle Evans MUST_HAVE(!(rhl[0] >= (b)), ret, err); 704*f0865ec9SKyle Evans (*q) = (WLSHIFT(qh, HWORD_BITS) | ql); 705*f0865ec9SKyle Evans (*r) = rhl[0]; 706*f0865ec9SKyle Evans MUST_HAVE(!((word_t) ((*q)*(b) + (*r)) != (al)), ret, err); 707*f0865ec9SKyle Evans ret = 0; 708*f0865ec9SKyle Evans 709*f0865ec9SKyle Evans err: 710*f0865ec9SKyle Evans return ret; 711*f0865ec9SKyle Evans } 712*f0865ec9SKyle Evans 713*f0865ec9SKyle Evans /* 714*f0865ec9SKyle Evans * Compute the reciprocal of d as 715*f0865ec9SKyle Evans * floor(B^3/(d+1)) - B 716*f0865ec9SKyle Evans * which is used to perform approximate small division using a multiplication. 717*f0865ec9SKyle Evans * 718*f0865ec9SKyle Evans * No attempt was made to make it constant time. Indeed, such values are usually 719*f0865ec9SKyle Evans * precomputed in contexts where constant time is wanted, e.g. in the fp layer. 720*f0865ec9SKyle Evans * 721*f0865ec9SKyle Evans * Returns 0 on success, -1 on error. 722*f0865ec9SKyle Evans */ 723*f0865ec9SKyle Evans int wreciprocal(word_t dh, word_t dl, word_t *reciprocal) 724*f0865ec9SKyle Evans { 725*f0865ec9SKyle Evans word_t q, carry, r[2], t[2]; 726*f0865ec9SKyle Evans int ret; 727*f0865ec9SKyle Evans 728*f0865ec9SKyle Evans MUST_HAVE((reciprocal != NULL), ret, err); 729*f0865ec9SKyle Evans 730*f0865ec9SKyle Evans if (((word_t)(dh + WORD(1)) == WORD(0)) && 731*f0865ec9SKyle Evans ((word_t)(dl + WORD(1)) == WORD(0))) { 732*f0865ec9SKyle Evans (*reciprocal) = WORD(0); 733*f0865ec9SKyle Evans ret = 0; 734*f0865ec9SKyle Evans goto err; 735*f0865ec9SKyle Evans } 736*f0865ec9SKyle Evans 737*f0865ec9SKyle Evans if ((word_t)(dh + WORD(1)) == WORD(0)) { 738*f0865ec9SKyle Evans q = (word_t)(~dh); 739*f0865ec9SKyle Evans r[1] = (word_t)(~dl); 740*f0865ec9SKyle Evans } else { 741*f0865ec9SKyle Evans t[1] = (word_t)(~dh); 742*f0865ec9SKyle Evans t[0] = (word_t)(~dl); 743*f0865ec9SKyle Evans ret = _word_divrem(&q, r+1, t[1], t[0], 744*f0865ec9SKyle Evans (word_t)(dh + WORD(1))); EG(ret, err); 745*f0865ec9SKyle Evans } 746*f0865ec9SKyle Evans 747*f0865ec9SKyle Evans if ((word_t)(dl + WORD(1)) == WORD(0)) { 748*f0865ec9SKyle Evans (*reciprocal) = q; 749*f0865ec9SKyle Evans ret = 0; 750*f0865ec9SKyle Evans goto err; 751*f0865ec9SKyle Evans } 752*f0865ec9SKyle Evans 753*f0865ec9SKyle Evans r[0] = WORD(0); 754*f0865ec9SKyle Evans 755*f0865ec9SKyle Evans WORD_MUL(t[1], t[0], q, (word_t)~dl); 756*f0865ec9SKyle Evans carry = _wadd_22(r, t); 757*f0865ec9SKyle Evans 758*f0865ec9SKyle Evans t[0] = (word_t)(dl + WORD(1)); 759*f0865ec9SKyle Evans t[1] = dh; 760*f0865ec9SKyle Evans while (carry || (_wcmp_22(r, t) >= 0)) { 761*f0865ec9SKyle Evans q++; 762*f0865ec9SKyle Evans carry = (word_t)(carry - _wsub_22(r, t)); 763*f0865ec9SKyle Evans } 764*f0865ec9SKyle Evans 765*f0865ec9SKyle Evans (*reciprocal) = q; 766*f0865ec9SKyle Evans ret = 0; 767*f0865ec9SKyle Evans 768*f0865ec9SKyle Evans err: 769*f0865ec9SKyle Evans return ret; 770*f0865ec9SKyle Evans } 771*f0865ec9SKyle Evans 772*f0865ec9SKyle Evans /* 773*f0865ec9SKyle Evans * Given an odd number p, compute division coefficients p_normalized, 774*f0865ec9SKyle Evans * p_shift and p_reciprocal so that: 775*f0865ec9SKyle Evans * - p_shift = p_rounded_bitlen - bitsizeof(p), where 776*f0865ec9SKyle Evans * o p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of 777*f0865ec9SKyle Evans * minimum number of words required to store p) and 778*f0865ec9SKyle Evans * o p_bitlen is the real bit size of p 779*f0865ec9SKyle Evans * - p_normalized = p << p_shift 780*f0865ec9SKyle Evans * - p_reciprocal = B^3 / ((p_normalized >> (pbitlen - 2*WORDSIZE)) + 1) - B 781*f0865ec9SKyle Evans * with B = 2^WORDSIZE 782*f0865ec9SKyle Evans * 783*f0865ec9SKyle Evans * These coefficients are useful for the optimized shifted variants of NN 784*f0865ec9SKyle Evans * division and modular functions. Because we have two word_t outputs 785*f0865ec9SKyle Evans * (p_shift and p_reciprocal), these are passed through word_t pointers. 786*f0865ec9SKyle Evans * Aliasing of outputs with the input is possible since p_in is copied in 787*f0865ec9SKyle Evans * local p at the beginning of the function. 788*f0865ec9SKyle Evans * 789*f0865ec9SKyle Evans * The function does not support aliasing. 790*f0865ec9SKyle Evans * 791*f0865ec9SKyle Evans * The function returns 0 on success, -1 on error. 792*f0865ec9SKyle Evans */ 793*f0865ec9SKyle Evans int nn_compute_div_coefs(nn_t p_normalized, word_t *p_shift, 794*f0865ec9SKyle Evans word_t *p_reciprocal, nn_src_t p_in) 795*f0865ec9SKyle Evans { 796*f0865ec9SKyle Evans bitcnt_t p_rounded_bitlen, p_bitlen; 797*f0865ec9SKyle Evans nn p, tmp_nn; 798*f0865ec9SKyle Evans int ret; 799*f0865ec9SKyle Evans p.magic = tmp_nn.magic = WORD(0); 800*f0865ec9SKyle Evans 801*f0865ec9SKyle Evans ret = nn_check_initialized(p_in); EG(ret, err); 802*f0865ec9SKyle Evans 803*f0865ec9SKyle Evans MUST_HAVE((p_shift != NULL), ret, err); 804*f0865ec9SKyle Evans MUST_HAVE((p_reciprocal != NULL), ret, err); 805*f0865ec9SKyle Evans 806*f0865ec9SKyle Evans /* Unsupported aliasing */ 807*f0865ec9SKyle Evans MUST_HAVE((p_normalized != p_in), ret, err); 808*f0865ec9SKyle Evans 809*f0865ec9SKyle Evans ret = nn_init(&p, 0); EG(ret, err); 810*f0865ec9SKyle Evans ret = nn_copy(&p, p_in); EG(ret, err); 811*f0865ec9SKyle Evans 812*f0865ec9SKyle Evans /* 813*f0865ec9SKyle Evans * In order for our reciprocal division routines to work, it is expected 814*f0865ec9SKyle Evans * that the bit length (including leading zeroes) of input prime 815*f0865ec9SKyle Evans * p is >= 2 * wlen where wlen is the number of bits of a word size. 816*f0865ec9SKyle Evans */ 817*f0865ec9SKyle Evans if (p.wlen < 2) { 818*f0865ec9SKyle Evans ret = nn_set_wlen(&p, 2); EG(ret, err); 819*f0865ec9SKyle Evans } 820*f0865ec9SKyle Evans 821*f0865ec9SKyle Evans ret = nn_init(p_normalized, 0); EG(ret, err); 822*f0865ec9SKyle Evans ret = nn_init(&tmp_nn, 0); EG(ret, err); 823*f0865ec9SKyle Evans 824*f0865ec9SKyle Evans /* p_rounded_bitlen = bitlen of p rounded to word size */ 825*f0865ec9SKyle Evans p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen); 826*f0865ec9SKyle Evans 827*f0865ec9SKyle Evans /* p_shift */ 828*f0865ec9SKyle Evans ret = nn_bitlen(&p, &p_bitlen); EG(ret, err); 829*f0865ec9SKyle Evans (*p_shift) = (word_t)(p_rounded_bitlen - p_bitlen); 830*f0865ec9SKyle Evans 831*f0865ec9SKyle Evans /* p_normalized = p << pshift */ 832*f0865ec9SKyle Evans ret = nn_lshift(p_normalized, &p, (bitcnt_t)(*p_shift)); EG(ret, err); 833*f0865ec9SKyle Evans 834*f0865ec9SKyle Evans /* Sanity check to protect the p_reciprocal computation */ 835*f0865ec9SKyle Evans MUST_HAVE((p_rounded_bitlen >= (2 * WORDSIZE)), ret, err); 836*f0865ec9SKyle Evans 837*f0865ec9SKyle Evans /* 838*f0865ec9SKyle Evans * p_reciprocal = B^3 / ((p_normalized >> (p_rounded_bitlen - 2 * wlen)) + 1) - B 839*f0865ec9SKyle Evans * where B = 2^wlen where wlen = word size in bits. We use our NN 840*f0865ec9SKyle Evans * helper to compute it. 841*f0865ec9SKyle Evans */ 842*f0865ec9SKyle Evans ret = nn_rshift(&tmp_nn, p_normalized, (bitcnt_t)(p_rounded_bitlen - (2 * WORDSIZE))); EG(ret, err); 843*f0865ec9SKyle Evans ret = wreciprocal(tmp_nn.val[1], tmp_nn.val[0], p_reciprocal); 844*f0865ec9SKyle Evans 845*f0865ec9SKyle Evans err: 846*f0865ec9SKyle Evans nn_uninit(&p); 847*f0865ec9SKyle Evans nn_uninit(&tmp_nn); 848*f0865ec9SKyle Evans 849*f0865ec9SKyle Evans return ret; 850*f0865ec9SKyle Evans } 851*f0865ec9SKyle Evans 852*f0865ec9SKyle Evans /* 853*f0865ec9SKyle Evans * Compute quotient remainder of Euclidean division. 854*f0865ec9SKyle Evans * 855*f0865ec9SKyle Evans * This function is a wrapper to normalize the divisor, i.e. shift it so that 856*f0865ec9SKyle Evans * the MSB of its MSW is set, and precompute the reciprocal of this MSW to be 857*f0865ec9SKyle Evans * used to perform small divisions using multiplications during the long 858*f0865ec9SKyle Evans * schoolbook division. It uses the helper functions/macros above. 859*f0865ec9SKyle Evans * 860*f0865ec9SKyle Evans * This is NOT constant time with regards to the word length of a and b, 861*f0865ec9SKyle Evans * but also the actual bitlength of b as we need to normalize b at the 862*f0865ec9SKyle Evans * bit level. 863*f0865ec9SKyle Evans * Moreover the precomputation of the reciprocal is not constant time at all. 864*f0865ec9SKyle Evans * 865*f0865ec9SKyle Evans * r need not be initialized, the function does it for the the caller. 866*f0865ec9SKyle Evans * 867*f0865ec9SKyle Evans * This function does not support aliasing. This is an internal helper, which 868*f0865ec9SKyle Evans * expects caller to check parameters. 869*f0865ec9SKyle Evans * 870*f0865ec9SKyle Evans * It returns 0 on sucess, -1 on error. 871*f0865ec9SKyle Evans */ 872*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b) 873*f0865ec9SKyle Evans { 874*f0865ec9SKyle Evans nn b_large, b_normalized; 875*f0865ec9SKyle Evans bitcnt_t cnt; 876*f0865ec9SKyle Evans word_t v; 877*f0865ec9SKyle Evans nn_src_t ptr = b; 878*f0865ec9SKyle Evans int ret, iszero; 879*f0865ec9SKyle Evans b_large.magic = b_normalized.magic = WORD(0); 880*f0865ec9SKyle Evans 881*f0865ec9SKyle Evans ret = nn_init(r, 0); EG(ret, err); 882*f0865ec9SKyle Evans ret = nn_init(q, 0); EG(ret, err); 883*f0865ec9SKyle Evans ret = nn_init(&b_large, 0); EG(ret, err); 884*f0865ec9SKyle Evans 885*f0865ec9SKyle Evans MUST_HAVE(!nn_iszero(b, &iszero) && !iszero, ret, err); 886*f0865ec9SKyle Evans 887*f0865ec9SKyle Evans if (b->wlen == 1) { 888*f0865ec9SKyle Evans ret = nn_copy(&b_large, b); EG(ret, err); 889*f0865ec9SKyle Evans 890*f0865ec9SKyle Evans /* Expand our big number with zeroes */ 891*f0865ec9SKyle Evans ret = nn_set_wlen(&b_large, 2); EG(ret, err); 892*f0865ec9SKyle Evans 893*f0865ec9SKyle Evans /* 894*f0865ec9SKyle Evans * This cast could seem inappropriate, but we are 895*f0865ec9SKyle Evans * sure here that we won't touch ptr since it is only 896*f0865ec9SKyle Evans * given as a const parameter to sub functions. 897*f0865ec9SKyle Evans */ 898*f0865ec9SKyle Evans ptr = (nn_src_t) &b_large; 899*f0865ec9SKyle Evans } 900*f0865ec9SKyle Evans 901*f0865ec9SKyle Evans /* After this, we only handle >= 2 words big numbers */ 902*f0865ec9SKyle Evans MUST_HAVE(!(ptr->wlen < 2), ret, err); 903*f0865ec9SKyle Evans 904*f0865ec9SKyle Evans ret = nn_init(&b_normalized, (u16)((ptr->wlen) * WORD_BYTES)); EG(ret, err); 905*f0865ec9SKyle Evans ret = nn_clz(ptr, &cnt); EG(ret, err); 906*f0865ec9SKyle Evans ret = nn_lshift_fixedlen(&b_normalized, ptr, cnt); EG(ret, err); 907*f0865ec9SKyle Evans ret = wreciprocal(b_normalized.val[ptr->wlen - 1], 908*f0865ec9SKyle Evans b_normalized.val[ptr->wlen - 2], 909*f0865ec9SKyle Evans &v); /* Not constant time. */ EG(ret, err); 910*f0865ec9SKyle Evans 911*f0865ec9SKyle Evans ret = _nn_divrem_unshifted(q, r, a, &b_normalized, v, cnt); 912*f0865ec9SKyle Evans 913*f0865ec9SKyle Evans err: 914*f0865ec9SKyle Evans nn_uninit(&b_normalized); 915*f0865ec9SKyle Evans nn_uninit(&b_large); 916*f0865ec9SKyle Evans 917*f0865ec9SKyle Evans return ret; 918*f0865ec9SKyle Evans } 919*f0865ec9SKyle Evans 920*f0865ec9SKyle Evans /* 921*f0865ec9SKyle Evans * Returns 0 on succes, -1 on error. Internal helper. Checks on params 922*f0865ec9SKyle Evans * expected from the caller. 923*f0865ec9SKyle Evans */ 924*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int __nn_divrem_notrim_alias(nn_t q, nn_t r, nn_src_t a, nn_src_t b) 925*f0865ec9SKyle Evans { 926*f0865ec9SKyle Evans nn a_cpy, b_cpy; 927*f0865ec9SKyle Evans int ret; 928*f0865ec9SKyle Evans a_cpy.magic = b_cpy.magic = WORD(0); 929*f0865ec9SKyle Evans 930*f0865ec9SKyle Evans ret = nn_init(&a_cpy, 0); EG(ret, err); 931*f0865ec9SKyle Evans ret = nn_init(&b_cpy, 0); EG(ret, err); 932*f0865ec9SKyle Evans ret = nn_copy(&a_cpy, a); EG(ret, err); 933*f0865ec9SKyle Evans ret = nn_copy(&b_cpy, b); EG(ret, err); 934*f0865ec9SKyle Evans ret = _nn_divrem(q, r, &a_cpy, &b_cpy); 935*f0865ec9SKyle Evans 936*f0865ec9SKyle Evans err: 937*f0865ec9SKyle Evans nn_uninit(&b_cpy); 938*f0865ec9SKyle Evans nn_uninit(&a_cpy); 939*f0865ec9SKyle Evans 940*f0865ec9SKyle Evans return ret; 941*f0865ec9SKyle Evans } 942*f0865ec9SKyle Evans 943*f0865ec9SKyle Evans 944*f0865ec9SKyle Evans 945*f0865ec9SKyle Evans /* 946*f0865ec9SKyle Evans * Compute quotient and remainder and normalize them. 947*f0865ec9SKyle Evans * Not constant time, see documentation of _nn_divrem. 948*f0865ec9SKyle Evans * 949*f0865ec9SKyle Evans * Aliased version of _nn_divrem. Returns 0 on success, 950*f0865ec9SKyle Evans * -1 on error. 951*f0865ec9SKyle Evans */ 952*f0865ec9SKyle Evans int nn_divrem_notrim(nn_t q, nn_t r, nn_src_t a, nn_src_t b) 953*f0865ec9SKyle Evans { 954*f0865ec9SKyle Evans int ret; 955*f0865ec9SKyle Evans 956*f0865ec9SKyle Evans /* _nn_divrem initializes q and r */ 957*f0865ec9SKyle Evans ret = nn_check_initialized(a); EG(ret, err); 958*f0865ec9SKyle Evans ret = nn_check_initialized(b); EG(ret, err); 959*f0865ec9SKyle Evans MUST_HAVE(((q != NULL) && (r != NULL)), ret, err); 960*f0865ec9SKyle Evans 961*f0865ec9SKyle Evans /* 962*f0865ec9SKyle Evans * Handle aliasing whenever any of the inputs is 963*f0865ec9SKyle Evans * used as an output. 964*f0865ec9SKyle Evans */ 965*f0865ec9SKyle Evans if ((a == q) || (a == r) || (b == q) || (b == r)) { 966*f0865ec9SKyle Evans ret = __nn_divrem_notrim_alias(q, r, a, b); 967*f0865ec9SKyle Evans } else { 968*f0865ec9SKyle Evans ret = _nn_divrem(q, r, a, b); 969*f0865ec9SKyle Evans } 970*f0865ec9SKyle Evans 971*f0865ec9SKyle Evans err: 972*f0865ec9SKyle Evans return ret; 973*f0865ec9SKyle Evans } 974*f0865ec9SKyle Evans 975*f0865ec9SKyle Evans /* 976*f0865ec9SKyle Evans * Compute quotient and remainder and normalize them. 977*f0865ec9SKyle Evans * Not constant time, see documentation of _nn_divrem(). 978*f0865ec9SKyle Evans * Returns 0 on success, -1 on error. 979*f0865ec9SKyle Evans * 980*f0865ec9SKyle Evans * Aliasing is supported. 981*f0865ec9SKyle Evans */ 982*f0865ec9SKyle Evans int nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b) 983*f0865ec9SKyle Evans { 984*f0865ec9SKyle Evans int ret; 985*f0865ec9SKyle Evans 986*f0865ec9SKyle Evans ret = nn_divrem_notrim(q, r, a, b); EG(ret, err); 987*f0865ec9SKyle Evans 988*f0865ec9SKyle Evans /* Normalize (trim) the quotient and rest to avoid size overflow */ 989*f0865ec9SKyle Evans ret = nn_normalize(q); EG(ret, err); 990*f0865ec9SKyle Evans ret = nn_normalize(r); 991*f0865ec9SKyle Evans 992*f0865ec9SKyle Evans err: 993*f0865ec9SKyle Evans return ret; 994*f0865ec9SKyle Evans } 995*f0865ec9SKyle Evans 996*f0865ec9SKyle Evans /* 997*f0865ec9SKyle Evans * Compute remainder only and do not normalize it. Not constant time, see 998*f0865ec9SKyle Evans * documentation of _nn_divrem(). Returns 0 on success, -1 on error. 999*f0865ec9SKyle Evans * 1000*f0865ec9SKyle Evans * Aliasing is supported. 1001*f0865ec9SKyle Evans */ 1002*f0865ec9SKyle Evans int nn_mod_notrim(nn_t r, nn_src_t a, nn_src_t b) 1003*f0865ec9SKyle Evans { 1004*f0865ec9SKyle Evans int ret; 1005*f0865ec9SKyle Evans nn q; 1006*f0865ec9SKyle Evans q.magic = WORD(0); 1007*f0865ec9SKyle Evans 1008*f0865ec9SKyle Evans /* nn_divrem() will init q. */ 1009*f0865ec9SKyle Evans ret = nn_divrem_notrim(&q, r, a, b); 1010*f0865ec9SKyle Evans 1011*f0865ec9SKyle Evans nn_uninit(&q); 1012*f0865ec9SKyle Evans 1013*f0865ec9SKyle Evans return ret; 1014*f0865ec9SKyle Evans } 1015*f0865ec9SKyle Evans 1016*f0865ec9SKyle Evans /* 1017*f0865ec9SKyle Evans * Compute remainder only and normalize it. Not constant time, see 1018*f0865ec9SKyle Evans * documentation of _nn_divrem(). r is initialized by the function. 1019*f0865ec9SKyle Evans * Returns 0 on success, -1 on error. 1020*f0865ec9SKyle Evans * 1021*f0865ec9SKyle Evans * Aliasing is supported. 1022*f0865ec9SKyle Evans */ 1023*f0865ec9SKyle Evans int nn_mod(nn_t r, nn_src_t a, nn_src_t b) 1024*f0865ec9SKyle Evans { 1025*f0865ec9SKyle Evans int ret; 1026*f0865ec9SKyle Evans nn q; 1027*f0865ec9SKyle Evans q.magic = WORD(0); 1028*f0865ec9SKyle Evans 1029*f0865ec9SKyle Evans /* nn_divrem will init q. */ 1030*f0865ec9SKyle Evans ret = nn_divrem(&q, r, a, b); 1031*f0865ec9SKyle Evans 1032*f0865ec9SKyle Evans nn_uninit(&q); 1033*f0865ec9SKyle Evans 1034*f0865ec9SKyle Evans return ret; 1035*f0865ec9SKyle Evans } 1036*f0865ec9SKyle Evans 1037*f0865ec9SKyle Evans /* 1038*f0865ec9SKyle Evans * Below follow gcd and xgcd non constant time functions for the user ease. 1039*f0865ec9SKyle Evans */ 1040*f0865ec9SKyle Evans 1041*f0865ec9SKyle Evans /* 1042*f0865ec9SKyle Evans * Unaliased version of xgcd, and we suppose that a >= b. Badly non-constant 1043*f0865ec9SKyle Evans * time per the algorithm used. The function returns 0 on success, -1 on 1044*f0865ec9SKyle Evans * error. internal helper: expect caller to check parameters. 1045*f0865ec9SKyle Evans */ 1046*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b, 1047*f0865ec9SKyle Evans int *sign) 1048*f0865ec9SKyle Evans { 1049*f0865ec9SKyle Evans nn_t c, d, q, r, u1, v1, u2, v2; 1050*f0865ec9SKyle Evans nn scratch[8]; 1051*f0865ec9SKyle Evans int ret, swap, iszero; 1052*f0865ec9SKyle Evans u8 i; 1053*f0865ec9SKyle Evans 1054*f0865ec9SKyle Evans for (i = 0; i < 8; i++){ 1055*f0865ec9SKyle Evans scratch[i].magic = WORD(0); 1056*f0865ec9SKyle Evans } 1057*f0865ec9SKyle Evans 1058*f0865ec9SKyle Evans /* 1059*f0865ec9SKyle Evans * Maintain: 1060*f0865ec9SKyle Evans * |u1 v1| |c| = |a| 1061*f0865ec9SKyle Evans * |u2 v2| |d| |b| 1062*f0865ec9SKyle Evans * u1, v1, u2, v2 >= 0 1063*f0865ec9SKyle Evans * c >= d 1064*f0865ec9SKyle Evans * 1065*f0865ec9SKyle Evans * Initially: 1066*f0865ec9SKyle Evans * |1 0 | |a| = |a| 1067*f0865ec9SKyle Evans * |0 1 | |b| |b| 1068*f0865ec9SKyle Evans * 1069*f0865ec9SKyle Evans * At each iteration: 1070*f0865ec9SKyle Evans * c >= d 1071*f0865ec9SKyle Evans * c = q*d + r 1072*f0865ec9SKyle Evans * |u1 v1| = |q*u1+v1 u1| 1073*f0865ec9SKyle Evans * |u2 v2| |q*u2+v2 u2| 1074*f0865ec9SKyle Evans * 1075*f0865ec9SKyle Evans * Finally, after i steps: 1076*f0865ec9SKyle Evans * |u1 v1| |g| = |a| 1077*f0865ec9SKyle Evans * |u2 v2| |0| = |b| 1078*f0865ec9SKyle Evans * 1079*f0865ec9SKyle Evans * Inverting the matrix: 1080*f0865ec9SKyle Evans * |g| = (-1)^i | v2 -v1| |a| 1081*f0865ec9SKyle Evans * |0| |-u2 u1| |b| 1082*f0865ec9SKyle Evans */ 1083*f0865ec9SKyle Evans 1084*f0865ec9SKyle Evans /* 1085*f0865ec9SKyle Evans * Initialization. 1086*f0865ec9SKyle Evans */ 1087*f0865ec9SKyle Evans ret = nn_init(g, 0); EG(ret, err); 1088*f0865ec9SKyle Evans ret = nn_init(u, 0); EG(ret, err); 1089*f0865ec9SKyle Evans ret = nn_init(v, 0); EG(ret, err); 1090*f0865ec9SKyle Evans ret = nn_iszero(b, &iszero); EG(ret, err); 1091*f0865ec9SKyle Evans if (iszero) { 1092*f0865ec9SKyle Evans /* gcd(0, a) = a, and 1*a + 0*b = a */ 1093*f0865ec9SKyle Evans ret = nn_copy(g, a); EG(ret, err); 1094*f0865ec9SKyle Evans ret = nn_one(u); EG(ret, err); 1095*f0865ec9SKyle Evans ret = nn_zero(v); EG(ret, err); 1096*f0865ec9SKyle Evans (*sign) = 1; 1097*f0865ec9SKyle Evans goto err; 1098*f0865ec9SKyle Evans } 1099*f0865ec9SKyle Evans 1100*f0865ec9SKyle Evans for (i = 0; i < 8; i++){ 1101*f0865ec9SKyle Evans ret = nn_init(&scratch[i], 0); EG(ret, err); 1102*f0865ec9SKyle Evans } 1103*f0865ec9SKyle Evans 1104*f0865ec9SKyle Evans u1 = &(scratch[0]); 1105*f0865ec9SKyle Evans v1 = &(scratch[1]); 1106*f0865ec9SKyle Evans u2 = &(scratch[2]); 1107*f0865ec9SKyle Evans v2 = &(scratch[3]); 1108*f0865ec9SKyle Evans ret = nn_one(u1); EG(ret, err); 1109*f0865ec9SKyle Evans ret = nn_zero(v1); EG(ret, err); 1110*f0865ec9SKyle Evans ret = nn_zero(u2); EG(ret, err); 1111*f0865ec9SKyle Evans ret = nn_one(v2); EG(ret, err); 1112*f0865ec9SKyle Evans c = &(scratch[4]); 1113*f0865ec9SKyle Evans d = &(scratch[5]); 1114*f0865ec9SKyle Evans ret = nn_copy(c, a); EG(ret, err); /* Copy could be skipped. */ 1115*f0865ec9SKyle Evans ret = nn_copy(d, b); EG(ret, err); /* Copy could be skipped. */ 1116*f0865ec9SKyle Evans q = &(scratch[6]); 1117*f0865ec9SKyle Evans r = &(scratch[7]); 1118*f0865ec9SKyle Evans swap = 0; 1119*f0865ec9SKyle Evans 1120*f0865ec9SKyle Evans /* 1121*f0865ec9SKyle Evans * Loop. 1122*f0865ec9SKyle Evans */ 1123*f0865ec9SKyle Evans ret = nn_iszero(d, &iszero); EG(ret, err); 1124*f0865ec9SKyle Evans while (!iszero) { 1125*f0865ec9SKyle Evans ret = nn_divrem(q, r, c, d); EG(ret, err); 1126*f0865ec9SKyle Evans ret = nn_normalize(q); EG(ret, err); 1127*f0865ec9SKyle Evans ret = nn_normalize(r); EG(ret, err); 1128*f0865ec9SKyle Evans ret = nn_copy(c, r); EG(ret, err); 1129*f0865ec9SKyle Evans ret = nn_mul(r, q, u1); EG(ret, err); 1130*f0865ec9SKyle Evans ret = nn_normalize(r); EG(ret, err); 1131*f0865ec9SKyle Evans ret = nn_add(v1, v1, r); EG(ret, err); 1132*f0865ec9SKyle Evans ret = nn_mul(r, q, u2); EG(ret, err); 1133*f0865ec9SKyle Evans ret = nn_normalize(r); EG(ret, err); 1134*f0865ec9SKyle Evans ret = nn_add(v2, v2, r); EG(ret, err); 1135*f0865ec9SKyle Evans ret = nn_normalize(v1); EG(ret, err); 1136*f0865ec9SKyle Evans ret = nn_normalize(v2); EG(ret, err); 1137*f0865ec9SKyle Evans swap = 1; 1138*f0865ec9SKyle Evans ret = nn_iszero(c, &iszero); EG(ret, err); 1139*f0865ec9SKyle Evans if (iszero) { 1140*f0865ec9SKyle Evans break; 1141*f0865ec9SKyle Evans } 1142*f0865ec9SKyle Evans ret = nn_divrem(q, r, d, c); EG(ret, err); 1143*f0865ec9SKyle Evans ret = nn_normalize(q); EG(ret, err); 1144*f0865ec9SKyle Evans ret = nn_normalize(r); EG(ret, err); 1145*f0865ec9SKyle Evans ret = nn_copy(d, r); EG(ret, err); 1146*f0865ec9SKyle Evans ret = nn_mul(r, q, v1); EG(ret, err); 1147*f0865ec9SKyle Evans ret = nn_normalize(r); EG(ret, err); 1148*f0865ec9SKyle Evans ret = nn_add(u1, u1, r); EG(ret, err); 1149*f0865ec9SKyle Evans ret = nn_mul(r, q, v2); EG(ret, err); 1150*f0865ec9SKyle Evans ret = nn_normalize(r); EG(ret, err); 1151*f0865ec9SKyle Evans ret = nn_add(u2, u2, r); EG(ret, err); 1152*f0865ec9SKyle Evans ret = nn_normalize(u1); EG(ret, err); 1153*f0865ec9SKyle Evans ret = nn_normalize(u2); EG(ret, err); 1154*f0865ec9SKyle Evans swap = 0; 1155*f0865ec9SKyle Evans 1156*f0865ec9SKyle Evans /* refresh loop condition */ 1157*f0865ec9SKyle Evans ret = nn_iszero(d, &iszero); EG(ret, err); 1158*f0865ec9SKyle Evans } 1159*f0865ec9SKyle Evans 1160*f0865ec9SKyle Evans /* Copies could be skipped. */ 1161*f0865ec9SKyle Evans if (swap) { 1162*f0865ec9SKyle Evans ret = nn_copy(g, d); EG(ret, err); 1163*f0865ec9SKyle Evans ret = nn_copy(u, u2); EG(ret, err); 1164*f0865ec9SKyle Evans ret = nn_copy(v, u1); EG(ret, err); 1165*f0865ec9SKyle Evans } else { 1166*f0865ec9SKyle Evans ret = nn_copy(g, c); EG(ret, err); 1167*f0865ec9SKyle Evans ret = nn_copy(u, v2); EG(ret, err); 1168*f0865ec9SKyle Evans ret = nn_copy(v, v1); EG(ret, err); 1169*f0865ec9SKyle Evans } 1170*f0865ec9SKyle Evans 1171*f0865ec9SKyle Evans /* swap = -1 means u <= 0; = 1 means v <= 0 */ 1172*f0865ec9SKyle Evans (*sign) = swap ? -1 : 1; 1173*f0865ec9SKyle Evans ret = 0; 1174*f0865ec9SKyle Evans 1175*f0865ec9SKyle Evans err: 1176*f0865ec9SKyle Evans /* 1177*f0865ec9SKyle Evans * We uninit scratch elements in all cases, i.e. whether or not 1178*f0865ec9SKyle Evans * we return an error or not. 1179*f0865ec9SKyle Evans */ 1180*f0865ec9SKyle Evans for (i = 0; i < 8; i++){ 1181*f0865ec9SKyle Evans nn_uninit(&scratch[i]); 1182*f0865ec9SKyle Evans } 1183*f0865ec9SKyle Evans /* Unitialize output in case of error */ 1184*f0865ec9SKyle Evans if (ret){ 1185*f0865ec9SKyle Evans nn_uninit(v); 1186*f0865ec9SKyle Evans nn_uninit(u); 1187*f0865ec9SKyle Evans nn_uninit(g); 1188*f0865ec9SKyle Evans } 1189*f0865ec9SKyle Evans 1190*f0865ec9SKyle Evans return ret; 1191*f0865ec9SKyle Evans } 1192*f0865ec9SKyle Evans 1193*f0865ec9SKyle Evans /* 1194*f0865ec9SKyle Evans * Aliased version of xgcd, and no assumption on a and b. Not constant time at 1195*f0865ec9SKyle Evans * all. returns 0 on success, -1 on error. XXX document 'sign' 1196*f0865ec9SKyle Evans * 1197*f0865ec9SKyle Evans * Aliasing is supported. 1198*f0865ec9SKyle Evans */ 1199*f0865ec9SKyle Evans int nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b, int *sign) 1200*f0865ec9SKyle Evans { 1201*f0865ec9SKyle Evans /* Handle aliasing 1202*f0865ec9SKyle Evans * Note: in order to properly handle aliasing, we accept to lose 1203*f0865ec9SKyle Evans * some "space" on the stack with copies. 1204*f0865ec9SKyle Evans */ 1205*f0865ec9SKyle Evans nn a_cpy, b_cpy; 1206*f0865ec9SKyle Evans nn_src_t a_, b_; 1207*f0865ec9SKyle Evans int ret, cmp, _sign; 1208*f0865ec9SKyle Evans a_cpy.magic = b_cpy.magic = WORD(0); 1209*f0865ec9SKyle Evans 1210*f0865ec9SKyle Evans /* The internal _nn_xgcd function initializes g, u and v */ 1211*f0865ec9SKyle Evans ret = nn_check_initialized(a); EG(ret, err); 1212*f0865ec9SKyle Evans ret = nn_check_initialized(b); EG(ret, err); 1213*f0865ec9SKyle Evans MUST_HAVE((sign != NULL), ret, err); 1214*f0865ec9SKyle Evans 1215*f0865ec9SKyle Evans ret = nn_init(&b_cpy, 0); EG(ret, err); 1216*f0865ec9SKyle Evans 1217*f0865ec9SKyle Evans /* Aliasing of a */ 1218*f0865ec9SKyle Evans if ((g == a) || (u == a) || (v == a)){ 1219*f0865ec9SKyle Evans ret = nn_copy(&a_cpy, a); EG(ret, err); 1220*f0865ec9SKyle Evans a_ = &a_cpy; 1221*f0865ec9SKyle Evans } else { 1222*f0865ec9SKyle Evans a_ = a; 1223*f0865ec9SKyle Evans } 1224*f0865ec9SKyle Evans /* Aliasing of b */ 1225*f0865ec9SKyle Evans if ((g == b) || (u == b) || (v == b)) { 1226*f0865ec9SKyle Evans ret = nn_copy(&b_cpy, b); EG(ret, err); 1227*f0865ec9SKyle Evans b_ = &b_cpy; 1228*f0865ec9SKyle Evans } else { 1229*f0865ec9SKyle Evans b_ = b; 1230*f0865ec9SKyle Evans } 1231*f0865ec9SKyle Evans 1232*f0865ec9SKyle Evans ret = nn_cmp(a_, b_, &cmp); EG(ret, err); 1233*f0865ec9SKyle Evans if (cmp < 0) { 1234*f0865ec9SKyle Evans /* If a < b, swap the inputs */ 1235*f0865ec9SKyle Evans ret = _nn_xgcd(g, v, u, b_, a_, &_sign); EG(ret, err); 1236*f0865ec9SKyle Evans (*sign) = -(_sign); 1237*f0865ec9SKyle Evans } else { 1238*f0865ec9SKyle Evans ret = _nn_xgcd(g, u, v, a_, b_, &_sign); EG(ret, err); 1239*f0865ec9SKyle Evans (*sign) = _sign; 1240*f0865ec9SKyle Evans } 1241*f0865ec9SKyle Evans 1242*f0865ec9SKyle Evans err: 1243*f0865ec9SKyle Evans nn_uninit(&b_cpy); 1244*f0865ec9SKyle Evans nn_uninit(&a_cpy); 1245*f0865ec9SKyle Evans 1246*f0865ec9SKyle Evans return ret; 1247*f0865ec9SKyle Evans } 1248*f0865ec9SKyle Evans 1249*f0865ec9SKyle Evans /* 1250*f0865ec9SKyle Evans * Compute g = gcd(a, b). Internally use the xgcd and drop u and v. 1251*f0865ec9SKyle Evans * Not constant time at all. Returns 0 on success, -1 on error. 1252*f0865ec9SKyle Evans * XXX document 'sign'. 1253*f0865ec9SKyle Evans * 1254*f0865ec9SKyle Evans * Aliasing is supported. 1255*f0865ec9SKyle Evans */ 1256*f0865ec9SKyle Evans int nn_gcd(nn_t g, nn_src_t a, nn_src_t b, int *sign) 1257*f0865ec9SKyle Evans { 1258*f0865ec9SKyle Evans nn u, v; 1259*f0865ec9SKyle Evans int ret; 1260*f0865ec9SKyle Evans u.magic = v.magic = WORD(0); 1261*f0865ec9SKyle Evans 1262*f0865ec9SKyle Evans /* nn_xgcd will initialize g, u and v and 1263*f0865ec9SKyle Evans * check if a and b are indeed initialized. 1264*f0865ec9SKyle Evans */ 1265*f0865ec9SKyle Evans ret = nn_xgcd(g, &u, &v, a, b, sign); 1266*f0865ec9SKyle Evans 1267*f0865ec9SKyle Evans nn_uninit(&u); 1268*f0865ec9SKyle Evans nn_uninit(&v); 1269*f0865ec9SKyle Evans 1270*f0865ec9SKyle Evans return ret; 1271*f0865ec9SKyle Evans } 1272