1 /* mpfr_div -- divide two floating-point numbers 2 3 Copyright 1999, 2001-2023 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramba projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 /* References: 24 [1] Short Division of Long Integers, David Harvey and Paul Zimmermann, 25 Proceedings of the 20th Symposium on Computer Arithmetic (ARITH-20), 26 July 25-27, 2011, pages 7-14. 27 [2] Improved Division by Invariant Integers, Niels Möller and Torbjörn Granlund, 28 IEEE Transactions on Computers, volume 60, number 2, pages 165-175, 2011. 29 */ 30 31 #define MPFR_NEED_LONGLONG_H 32 #include "mpfr-impl.h" 33 34 #if !defined(MPFR_GENERIC_ABI) 35 36 #if GMP_NUMB_BITS == 64 37 38 #include "invert_limb.h" 39 40 /* Given u = u1*B+u0 < v = v1*B+v0 with v normalized (high bit of v1 set), 41 put in q = Q1*B+Q0 an approximation of floor(u*B^2/v), with: 42 B = 2^GMP_NUMB_BITS and q <= floor(u*B^2/v) <= q + 21. 43 Note: this function requires __gmpfr_invert_limb_approx (from invert_limb.h) 44 which is only provided so far for 64-bit limb. 45 Note: __gmpfr_invert_limb_approx can be replaced by __gmpfr_invert_limb, 46 in that case the bound 21 reduces to 16. */ 47 static void 48 mpfr_div2_approx (mpfr_limb_ptr Q1, mpfr_limb_ptr Q0, 49 mp_limb_t u1, mp_limb_t u0, 50 mp_limb_t v1, mp_limb_t v0) 51 { 52 mp_limb_t inv, q1, q0, r1, r0, cy, xx, yy; 53 54 /* first compute an approximation of q1, using a lower approximation of 55 B^2/(v1+1) - B */ 56 if (MPFR_UNLIKELY(v1 == MPFR_LIMB_MAX)) 57 inv = MPFR_LIMB_ZERO; 58 else 59 __gmpfr_invert_limb_approx (inv, v1 + 1); 60 /* now inv <= B^2/(v1+1) - B */ 61 umul_ppmm (q1, q0, u1, inv); 62 q1 += u1; 63 /* now q1 <= u1*B/(v1+1) < (u1*B+u0)*B/(v1*B+v0) */ 64 65 /* compute q1*(v1*B+v0) into r1:r0:yy and subtract from u1:u0:0 */ 66 umul_ppmm (r1, r0, q1, v1); 67 umul_ppmm (xx, yy, q1, v0); 68 69 ADD_LIMB (r0, xx, cy); 70 r1 += cy; 71 72 /* we ignore yy below, but first increment r0, to ensure we get a lower 73 approximation of the remainder */ 74 r0 += yy != 0; 75 r1 += r0 == 0 && yy != 0; 76 r0 = u0 - r0; 77 r1 = u1 - r1 - (r0 > u0); 78 79 /* r1:r0 should be non-negative */ 80 MPFR_ASSERTD((r1 & MPFR_LIMB_HIGHBIT) == 0); 81 82 /* the second quotient limb is approximated by (r1*B^2+r0*B) / v1, 83 and since (B+inv)/B approximates B/v1, this is in turn approximated 84 by (r1*B+r0)*(B+inv)/B = r1*B*r1*inv+r0+(r0*inv/B) */ 85 86 q0 = r0; 87 q1 += r1; 88 /* add floor(r0*inv/B) to q0 */ 89 umul_ppmm (xx, yy, r0, inv); 90 ADD_LIMB (q0, xx, cy); 91 q1 += cy; 92 MPFR_ASSERTD (r1 <= 4); 93 /* TODO: use value coverage on r1 to check that the 5 cases are tested. */ 94 while (r1) /* the number of loops is at most 4 */ 95 { 96 /* add inv to q0 */ 97 ADD_LIMB (q0, inv, cy); 98 q1 += cy; 99 r1 --; 100 } 101 102 *Q1 = q1; 103 *Q0 = q0; 104 } 105 106 #endif /* GMP_NUMB_BITS == 64 */ 107 108 /* Special code for PREC(q) = PREC(u) = PREC(v) = p < GMP_NUMB_BITS */ 109 static int 110 mpfr_div_1 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode) 111 { 112 mpfr_prec_t p = MPFR_GET_PREC(q); 113 mpfr_limb_ptr qp = MPFR_MANT(q); 114 mpfr_exp_t qx = MPFR_GET_EXP(u) - MPFR_GET_EXP(v); 115 mpfr_prec_t sh = GMP_NUMB_BITS - p; 116 mp_limb_t u0 = MPFR_MANT(u)[0]; 117 mp_limb_t v0 = MPFR_MANT(v)[0]; 118 mp_limb_t q0, rb, sb, mask = MPFR_LIMB_MASK(sh); 119 int extra; 120 121 if ((extra = (u0 >= v0))) 122 u0 -= v0; 123 124 #if GMP_NUMB_BITS == 64 /* __gmpfr_invert_limb_approx only exists for 64-bit */ 125 /* First try with an approximate quotient. 126 FIXME: for p<=62 we have sh-1<2 and will never be able to round correctly. 127 Even for p=61 we have sh-1=2 and we can round correctly only when the two 128 last bist of q0 are 01, which happens with probability 25% only. */ 129 { 130 mp_limb_t inv; 131 __gmpfr_invert_limb_approx (inv, v0); 132 umul_ppmm (rb, sb, u0, inv); 133 } 134 rb += u0; 135 q0 = rb >> extra; 136 /* rb does not exceed the true quotient floor(u0*2^GMP_NUMB_BITS/v0), 137 with error at most 2, which means the rational quotient q satisfies 138 rb <= q < rb + 3. We can round correctly except when the last sh-1 bits 139 of q0 are 000..000 or 111..111 or 111..110. */ 140 if (MPFR_LIKELY(((q0 + 2) & (mask >> 1)) > 2)) 141 { 142 rb = q0 & (MPFR_LIMB_ONE << (sh - 1)); 143 sb = 1; /* result cannot be exact in this case */ 144 } 145 else /* the true quotient is rb, rb+1 or rb+2 */ 146 { 147 mp_limb_t h, l; 148 q0 = rb; 149 umul_ppmm (h, l, q0, v0); 150 MPFR_ASSERTD(h < u0 || (h == u0 && l == MPFR_LIMB_ZERO)); 151 /* subtract {h,l} from {u0,0} */ 152 sub_ddmmss (h, l, u0, 0, h, l); 153 /* the remainder {h, l} should be < v0 */ 154 /* This while loop is executed at most two times, but does not seem 155 slower than two consecutive identical if-statements. */ 156 while (h || l >= v0) 157 { 158 q0 ++; 159 h -= (l < v0); 160 l -= v0; 161 } 162 MPFR_ASSERTD(h == 0 && l < v0); 163 sb = l | (q0 & extra); 164 q0 >>= extra; 165 rb = q0 & (MPFR_LIMB_ONE << (sh - 1)); 166 sb |= q0 & (mask >> 1); 167 } 168 #else 169 udiv_qrnnd (q0, sb, u0, 0, v0); 170 sb |= q0 & extra; 171 q0 >>= extra; 172 rb = q0 & (MPFR_LIMB_ONE << (sh - 1)); 173 sb |= q0 & (mask >> 1); 174 #endif 175 176 qp[0] = (MPFR_LIMB_HIGHBIT | q0) & ~mask; 177 qx += extra; 178 MPFR_SIGN(q) = MPFR_MULT_SIGN (MPFR_SIGN (u), MPFR_SIGN (v)); 179 180 /* rounding */ 181 if (MPFR_UNLIKELY(qx > __gmpfr_emax)) 182 return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q)); 183 184 /* Warning: underflow should be checked *after* rounding, thus when rounding 185 away and when q > 0.111...111*2^(emin-1), or when rounding to nearest and 186 q >= 0.111...111[1]*2^(emin-1), there is no underflow. */ 187 if (MPFR_UNLIKELY(qx < __gmpfr_emin)) 188 { 189 /* Note: the case 0.111...111*2^(emin-1) < q < 2^(emin-1) is not possible 190 here since (up to exponent) this would imply 1 - 2^(-p) < u/v < 1, 191 thus v - 2^(-p)*v < u < v, and since we can assume 1/2 <= v < 1, it 192 would imply v - 2^(-p) = v - ulp(v) < u < v, which has no solution. */ 193 194 /* For RNDN, mpfr_underflow always rounds away, thus for |q|<=2^(emin-2) 195 we have to change to RNDZ. This corresponds to: 196 (a) either qx < emin - 1 197 (b) or qx = emin - 1 and qp[0] = 1000....000 and rb = sb = 0. 198 Note: in case (b), it suffices to check whether sb = 0, since rb = 1 199 and sb = 0 is not possible (the exact quotient would have p+1 bits, 200 thus u would need at least p+1 bits). */ 201 if (rnd_mode == MPFR_RNDN && 202 (qx < __gmpfr_emin - 1 || (qp[0] == MPFR_LIMB_HIGHBIT && sb == 0))) 203 rnd_mode = MPFR_RNDZ; 204 return mpfr_underflow (q, rnd_mode, MPFR_SIGN(q)); 205 } 206 207 MPFR_EXP (q) = qx; /* Don't use MPFR_SET_EXP since qx might be < __gmpfr_emin 208 in the cases "goto rounding" above. */ 209 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) 210 { 211 MPFR_ASSERTD(qx >= __gmpfr_emin); 212 MPFR_RET (0); 213 } 214 else if (rnd_mode == MPFR_RNDN) 215 { 216 /* It is not possible to have rb <> 0 and sb = 0 here, since it would 217 mean a n-bit by n-bit division gives an exact (n+1)-bit number. 218 And since the case rb = sb = 0 was already dealt with, we cannot 219 have sb = 0. Thus we cannot be in the middle of two numbers. */ 220 MPFR_ASSERTD(sb != 0); 221 if (rb == 0) 222 goto truncate; 223 else 224 goto add_one_ulp; 225 } 226 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(q))) 227 { 228 truncate: 229 MPFR_ASSERTD(qx >= __gmpfr_emin); 230 MPFR_RET(-MPFR_SIGN(q)); 231 } 232 else /* round away from zero */ 233 { 234 add_one_ulp: 235 qp[0] += MPFR_LIMB_ONE << sh; 236 MPFR_ASSERTD(qp[0] != 0); 237 /* It is not possible to have an overflow in the addition above. 238 Proof: if p is the precision of the inputs, it would mean we have two 239 integers n and d with 2^(p-1) <= n, d < 2^p, such that the binary 240 expansion of n/d starts with p '1', and has at least one '1' later. 241 We distinguish two cases: 242 (1) if n/d < 1, it would mean 1-2^(-p) < n/d < 1 243 (2) if n/d >= 1, it would mean 2-2^(1-p) < n/d < 1 244 In case (1), multiplying by d we get 1-d/2^p < n < d, 245 which has no integer solution since d/2^p < 1. 246 In case (2), multiplying by d we get 2d-2d/2^p < n < 2d: 247 (2a) if d=2^(p-1), we get 2^p-1 < n < 2^p which has no solution; 248 if d>=2^(p-1)+1, then 2d-2d/2^p >= 2^p+2-2 = 2^p, thus there is 249 solution n < 2^p either. */ 250 MPFR_RET(MPFR_SIGN(q)); 251 } 252 } 253 254 /* Special code for PREC(q) = GMP_NUMB_BITS, 255 with PREC(u), PREC(v) <= GMP_NUMB_BITS. */ 256 static int 257 mpfr_div_1n (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode) 258 { 259 mpfr_limb_ptr qp = MPFR_MANT(q); 260 mpfr_exp_t qx = MPFR_GET_EXP(u) - MPFR_GET_EXP(v); 261 mp_limb_t u0 = MPFR_MANT(u)[0]; 262 mp_limb_t v0 = MPFR_MANT(v)[0]; 263 mp_limb_t q0, rb, sb, l; 264 int extra; 265 266 MPFR_ASSERTD(MPFR_PREC(q) == GMP_NUMB_BITS); 267 MPFR_ASSERTD(MPFR_PREC(u) <= GMP_NUMB_BITS); 268 MPFR_ASSERTD(MPFR_PREC(v) <= GMP_NUMB_BITS); 269 270 if ((extra = (u0 >= v0))) 271 u0 -= v0; 272 273 #if GMP_NUMB_BITS == 64 /* __gmpfr_invert_limb_approx only exists for 64-bit */ 274 { 275 mp_limb_t inv, h; 276 277 /* First compute an approximate quotient. */ 278 __gmpfr_invert_limb_approx (inv, v0); 279 umul_ppmm (rb, sb, u0, inv); 280 q0 = u0 + rb; 281 /* rb does not exceed the true quotient floor(u0*2^GMP_NUMB_BITS/v0), 282 with error at most 2, which means the rational quotient q satisfies 283 rb <= q < rb + 3, thus the true quotient is rb, rb+1 or rb+2 */ 284 umul_ppmm (h, l, q0, v0); 285 MPFR_ASSERTD(h < u0 || (h == u0 && l == MPFR_LIMB_ZERO)); 286 /* subtract {h,l} from {u0,0} */ 287 sub_ddmmss (h, l, u0, 0, h, l); 288 /* the remainder {h, l} should be < v0 */ 289 /* This while loop is executed at most two times, but does not seem 290 slower than two consecutive identical if-statements. */ 291 while (h || l >= v0) 292 { 293 q0 ++; 294 h -= (l < v0); 295 l -= v0; 296 } 297 MPFR_ASSERTD(h == 0 && l < v0); 298 } 299 #else 300 udiv_qrnnd (q0, l, u0, 0, v0); 301 #endif 302 303 /* now (u0 - extra*v0) * 2^GMP_NUMB_BITS = q0*v0 + l with 0 <= l < v0 */ 304 305 /* If extra=0, the quotient is q0, the round bit is 1 if l >= v0/2, 306 and sb are the remaining bits from l. 307 If extra=1, the quotient is MPFR_LIMB_HIGHBIT + (q0 >> 1), the round bit 308 is the least significant bit of q0, and sb is l. */ 309 310 if (extra == 0) 311 { 312 qp[0] = q0; 313 /* If "l + l < l", then there is a carry in l + l, thus 2*l > v0. 314 Otherwise if there is no carry, we check whether 2*l >= v0. */ 315 rb = (l + l < l) || (l + l >= v0); 316 sb = (rb) ? l + l - v0 : l; 317 } 318 else 319 { 320 qp[0] = MPFR_LIMB_HIGHBIT | (q0 >> 1); 321 rb = q0 & MPFR_LIMB_ONE; 322 sb = l; 323 qx ++; 324 } 325 326 MPFR_SIGN(q) = MPFR_MULT_SIGN (MPFR_SIGN (u), MPFR_SIGN (v)); 327 328 /* rounding */ 329 if (MPFR_UNLIKELY(qx > __gmpfr_emax)) 330 return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q)); 331 332 /* Warning: underflow should be checked *after* rounding, thus when rounding 333 away and when q > 0.111...111*2^(emin-1), or when rounding to nearest and 334 q >= 0.111...111[1]*2^(emin-1), there is no underflow. */ 335 if (MPFR_UNLIKELY(qx < __gmpfr_emin)) 336 { 337 /* Note: the case 0.111...111*2^(emin-1) < q < 2^(emin-1) is not possible 338 here since (up to exponent) this would imply 1 - 2^(-p) < u/v < 1, 339 thus v - 2^(-p)*v < u < v, and since we can assume 1/2 <= v < 1, it 340 would imply v - 2^(-p) = v - ulp(v) < u < v, which has no solution. */ 341 342 /* For RNDN, mpfr_underflow always rounds away, thus for |q|<=2^(emin-2) 343 we have to change to RNDZ. This corresponds to: 344 (a) either qx < emin - 1 345 (b) or qx = emin - 1 and qp[0] = 1000....000 and rb = sb = 0. 346 Note: in case (b), it suffices to check whether sb = 0, since rb = 1 347 and sb = 0 is not possible (the exact quotient would have p+1 bits, 348 thus u would need at least p+1 bits). */ 349 if (rnd_mode == MPFR_RNDN && 350 (qx < __gmpfr_emin - 1 || (qp[0] == MPFR_LIMB_HIGHBIT && sb == 0))) 351 rnd_mode = MPFR_RNDZ; 352 return mpfr_underflow (q, rnd_mode, MPFR_SIGN(q)); 353 } 354 355 MPFR_EXP (q) = qx; /* Don't use MPFR_SET_EXP since qx might be < __gmpfr_emin 356 in the cases "goto rounding" above. */ 357 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) 358 { 359 MPFR_ASSERTD(qx >= __gmpfr_emin); 360 MPFR_RET (0); 361 } 362 else if (rnd_mode == MPFR_RNDN) 363 { 364 /* It is not possible to have rb <> 0 and sb = 0 here, since it would 365 mean a n-bit by n-bit division gives an exact (n+1)-bit number. 366 And since the case rb = sb = 0 was already dealt with, we cannot 367 have sb = 0. Thus we cannot be in the middle of two numbers. */ 368 MPFR_ASSERTD(sb != 0); 369 if (rb == 0) 370 goto truncate; 371 else 372 goto add_one_ulp; 373 } 374 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(q))) 375 { 376 truncate: 377 MPFR_ASSERTD(qx >= __gmpfr_emin); 378 MPFR_RET(-MPFR_SIGN(q)); 379 } 380 else /* round away from zero */ 381 { 382 add_one_ulp: 383 qp[0] += MPFR_LIMB_ONE; 384 /* there can be no overflow in the addition above, 385 see the analysis of mpfr_div_1 */ 386 MPFR_ASSERTD(qp[0] != 0); 387 MPFR_RET(MPFR_SIGN(q)); 388 } 389 } 390 391 /* Special code for GMP_NUMB_BITS < PREC(q) < 2*GMP_NUMB_BITS and 392 PREC(u) = PREC(v) = PREC(q) */ 393 static int 394 mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode) 395 { 396 mpfr_prec_t p = MPFR_GET_PREC(q); 397 mpfr_limb_ptr qp = MPFR_MANT(q); 398 mpfr_exp_t qx = MPFR_GET_EXP(u) - MPFR_GET_EXP(v); 399 mpfr_prec_t sh = 2*GMP_NUMB_BITS - p; 400 mp_limb_t h, rb, sb, mask = MPFR_LIMB_MASK(sh); 401 mp_limb_t v1 = MPFR_MANT(v)[1], v0 = MPFR_MANT(v)[0]; 402 mp_limb_t q1, q0, r3, r2, r1, r0, l, t; 403 int extra; 404 405 r3 = MPFR_MANT(u)[1]; 406 r2 = MPFR_MANT(u)[0]; 407 extra = r3 > v1 || (r3 == v1 && r2 >= v0); 408 if (extra) 409 sub_ddmmss (r3, r2, r3, r2, v1, v0); 410 411 MPFR_ASSERTD(r3 < v1 || (r3 == v1 && r2 < v0)); 412 413 #if GMP_NUMB_BITS == 64 414 mpfr_div2_approx (&q1, &q0, r3, r2, v1, v0); 415 /* we know q1*B+q0 is smaller or equal to the exact quotient, with 416 difference at most 21 */ 417 if (MPFR_LIKELY(((q0 + 21) & (mask >> 1)) > 21)) 418 sb = 1; /* result is not exact when we can round with an approximation */ 419 else 420 { 421 /* we know q1:q0 is a good-enough approximation, use it! */ 422 mp_limb_t s0, s1, s2, h, l; 423 424 /* Since we know the difference should be at most 21*(v1:v0) after the 425 subtraction below, thus at most 21*2^128, it suffices to compute the 426 lower 3 limbs of (q1:q0) * (v1:v0). */ 427 umul_ppmm (s1, s0, q0, v0); 428 umul_ppmm (s2, l, q0, v1); 429 s1 += l; 430 s2 += (s1 < l); 431 umul_ppmm (h, l, q1, v0); 432 s1 += l; 433 s2 += h + (s1 < l); 434 s2 += q1 * v1; 435 /* Subtract s2:s1:s0 from r2:0:0, with result in s2:s1:s0. */ 436 s2 = r2 - s2; 437 /* now negate s1:s0 */ 438 s0 = -s0; 439 s1 = -s1 - (s0 != 0); 440 /* there is a borrow in s2 when s0 and s1 are not both zero */ 441 s2 -= (s1 != 0 || s0 != 0); 442 while (s2 > 0 || (s1 > v1) || (s1 == v1 && s0 >= v0)) 443 { 444 /* add 1 to q1:q0 */ 445 q0 ++; 446 q1 += (q0 == 0); 447 /* subtract v1:v0 to s2:s1:s0 */ 448 s2 -= (s1 < v1) || (s1 == v1 && s0 < v0); 449 sub_ddmmss (s1, s0, s1, s0, v1, v0); 450 } 451 sb = s1 | s0; 452 } 453 goto round_div2; 454 #endif 455 456 /* now r3:r2 < v1:v0 */ 457 if (MPFR_UNLIKELY(r3 == v1)) /* can occur in some rare cases */ 458 { 459 /* This can only occur in case extra=0, since otherwise we would have 460 u_old >= u_new + v >= B^2/2 + B^2/2 = B^2. In this case we have 461 r3 = u1 and r2 = u0, thus the remainder u*B-q1*v is 462 v1*B^2+u0*B-(B-1)*(v1*B+v0) = (u0-v0+v1)*B+v0. 463 Warning: in this case q1 = B-1 can be too large, for example with 464 u = B^2/2 and v = B^2/2 + B - 1, then u*B-(B-1)*u = -1/2*B^2+2*B-1. */ 465 MPFR_ASSERTD(extra == 0); 466 q1 = MPFR_LIMB_MAX; 467 r1 = v0; 468 t = v0 - r2; /* t > 0 since r3:r2 < v1:v0 */ 469 r2 = v1 - t; 470 if (t > v1) /* q1 = B-1 is too large, we need q1 = B-2, which is ok 471 since u*B - q1*v >= v1*B^2-(B-2)*(v1*B+B-1) = 472 -B^2 + 2*B*v1 + 3*B - 2 >= 0 since v1>=B/2 and B>=2 */ 473 { 474 q1 --; 475 /* add v to r2:r1 */ 476 r1 += v0; 477 r2 += v1 + (r1 < v0); 478 } 479 } 480 else 481 { 482 /* divide r3:r2 by v1: requires r3 < v1 */ 483 udiv_qrnnd (q1, r2, r3, r2, v1); 484 /* u-extra*v = q1 * v1 + r2 */ 485 486 /* now subtract q1*v0 to r2:0 */ 487 umul_ppmm (h, l, q1, v0); 488 t = r2; /* save old value of r2 */ 489 r1 = -l; 490 r2 -= h + (l != 0); 491 /* Note: h + (l != 0) < 2^GMP_NUMB_BITS. */ 492 493 /* we have r2:r1 = oldr2:0 - q1*v0 mod 2^(2*GMP_NUMB_BITS) 494 thus (u-extra*v)*B = q1 * v + r2:r1 mod 2^(2*GMP_NUMB_BITS) */ 495 496 /* this while loop should be run at most twice */ 497 while (r2 > t) /* borrow when subtracting h + (l != 0), q1 too large */ 498 { 499 q1 --; 500 /* add v1:v0 to r2:r1 */ 501 t = r2; 502 r1 += v0; 503 r2 += v1 + (r1 < v0); 504 /* note: since 2^(GMP_NUMB_BITS-1) <= v1 + (r1 < v0) 505 <= 2^GMP_NUMB_BITS, it suffices to check if r2 <= t to see 506 if there was a carry or not. */ 507 } 508 } 509 510 /* now (u-extra*v)*B = q1 * v + r2:r1 with 0 <= r2:r1 < v */ 511 512 MPFR_ASSERTD(r2 < v1 || (r2 == v1 && r1 < v0)); 513 514 if (MPFR_UNLIKELY(r2 == v1)) 515 { 516 q0 = MPFR_LIMB_MAX; 517 /* r2:r1:0 - q0*(v1:v0) = v1:r1:0 - (B-1)*(v1:v0) 518 = r1:0 - v0:0 + v1:v0 */ 519 r0 = v0; 520 t = v0 - r1; /* t > 0 since r2:r1 < v1:v0 */ 521 r1 = v1 - t; 522 if (t > v1) 523 { 524 q0 --; 525 /* add v to r1:r0 */ 526 r0 += v0; 527 r1 += v1 + (r0 < v0); 528 } 529 } 530 else 531 { 532 /* divide r2:r1 by v1: requires r2 < v1 */ 533 udiv_qrnnd (q0, r1, r2, r1, v1); 534 535 /* r2:r1 = q0*v1 + r1 */ 536 537 /* subtract q0*v0 to r1:0 */ 538 umul_ppmm (h, l, q0, v0); 539 t = r1; 540 r0 = -l; 541 r1 -= h + (l != 0); 542 543 /* this while loop should be run at most twice */ 544 while (r1 > t) /* borrow when subtracting h + (l != 0), 545 q0 was too large */ 546 { 547 q0 --; 548 /* add v1:v0 to r1:r0 */ 549 t = r1; 550 r0 += v0; 551 r1 += v1 + (r0 < v0); 552 /* note: since 2^(GMP_NUMB_BITS-1) <= v1 + (r0 < v0) 553 <= 2^GMP_NUMB_BITS, it suffices to check if r1 <= t to see 554 if there was a carry or not. */ 555 } 556 } 557 558 MPFR_ASSERTD(r1 < v1 || (r1 == v1 && r0 < v0)); 559 560 /* now (u-extra*v)*B^2 = (q1:q0) * v + r1:r0 */ 561 562 sb = r1 | r0; 563 564 /* here, q1:q0 should be an approximation of the quotient (or the exact 565 quotient), and sb the sticky bit */ 566 567 #if GMP_NUMB_BITS == 64 568 round_div2: 569 #endif 570 if (extra) 571 { 572 qx ++; 573 sb |= q0 & 1; 574 q0 = (q1 << (GMP_NUMB_BITS - 1)) | (q0 >> 1); 575 q1 = MPFR_LIMB_HIGHBIT | (q1 >> 1); 576 } 577 rb = q0 & (MPFR_LIMB_ONE << (sh - 1)); 578 sb |= (q0 & mask) ^ rb; 579 qp[1] = q1; 580 qp[0] = q0 & ~mask; 581 582 MPFR_SIGN(q) = MPFR_MULT_SIGN (MPFR_SIGN (u), MPFR_SIGN (v)); 583 584 /* rounding */ 585 if (qx > __gmpfr_emax) 586 return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q)); 587 588 /* Warning: underflow should be checked *after* rounding, thus when rounding 589 away and when q > 0.111...111*2^(emin-1), or when rounding to nearest and 590 q >= 0.111...111[1]*2^(emin-1), there is no underflow. */ 591 if (qx < __gmpfr_emin) 592 { 593 /* Note: the case 0.111...111*2^(emin-1) < q < 2^(emin-1) is not possible 594 here since (up to exponent) this would imply 1 - 2^(-p) < u/v < 1, 595 thus v - 2^(-p)*v < u < v, and since we can assume 1/2 <= v < 1, it 596 would imply v - 2^(-p) = v - ulp(v) < u < v, which has no solution. */ 597 598 /* For RNDN, mpfr_underflow always rounds away, thus for |q|<=2^(emin-2) 599 we have to change to RNDZ. This corresponds to: 600 (a) either qx < emin - 1 601 (b) or qx = emin - 1 and qp[1] = 100....000, qp[0] = 0 and rb = sb = 0. 602 Note: in case (b), it suffices to check whether sb = 0, since rb = 1 603 and sb = 0 is not possible (the exact quotient would have p+1 bits, thus 604 u would need at least p+1 bits). */ 605 if (rnd_mode == MPFR_RNDN && 606 (qx < __gmpfr_emin - 1 || 607 (qp[1] == MPFR_LIMB_HIGHBIT && qp[0] == MPFR_LIMB_ZERO && sb == 0))) 608 rnd_mode = MPFR_RNDZ; 609 return mpfr_underflow (q, rnd_mode, MPFR_SIGN(q)); 610 } 611 612 MPFR_EXP (q) = qx; /* Don't use MPFR_SET_EXP since qx might be < __gmpfr_emin 613 in the cases "goto rounding" above. */ 614 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) 615 { 616 MPFR_ASSERTD(qx >= __gmpfr_emin); 617 MPFR_RET (0); 618 } 619 else if (rnd_mode == MPFR_RNDN) 620 { 621 /* See the comment in mpfr_div_1. */ 622 MPFR_ASSERTD(sb != 0); 623 if (rb == 0) 624 goto truncate; 625 else 626 goto add_one_ulp; 627 } 628 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(q))) 629 { 630 truncate: 631 MPFR_ASSERTD(qx >= __gmpfr_emin); 632 MPFR_RET(-MPFR_SIGN(q)); 633 } 634 else /* round away from zero */ 635 { 636 add_one_ulp: 637 qp[0] += MPFR_LIMB_ONE << sh; 638 qp[1] += qp[0] == 0; 639 /* there can be no overflow in the addition above, 640 see the analysis of mpfr_div_1 */ 641 MPFR_ASSERTD(qp[1] != 0); 642 MPFR_RET(MPFR_SIGN(q)); 643 } 644 } 645 646 #endif /* !defined(MPFR_GENERIC_ABI) */ 647 648 /* check if {ap, an} is zero */ 649 static int 650 mpfr_mpn_cmpzero (mpfr_limb_ptr ap, mp_size_t an) 651 { 652 MPFR_ASSERTD (an >= 0); 653 while (an > 0) 654 if (MPFR_LIKELY(ap[--an] != MPFR_LIMB_ZERO)) 655 return 1; 656 return 0; 657 } 658 659 /* compare {ap, an} and {bp, bn} >> extra, 660 aligned by the more significant limbs. 661 Takes into account bp[0] for extra=1. 662 */ 663 static int 664 mpfr_mpn_cmp_aux (mpfr_limb_ptr ap, mp_size_t an, 665 mpfr_limb_ptr bp, mp_size_t bn, int extra) 666 { 667 int cmp = 0; 668 mp_size_t k; 669 mp_limb_t bb; 670 671 MPFR_ASSERTD (an >= 0); 672 MPFR_ASSERTD (bn >= 0); 673 MPFR_ASSERTD (extra == 0 || extra == 1); 674 675 if (an >= bn) 676 { 677 k = an - bn; 678 while (cmp == 0 && bn > 0) 679 { 680 bn --; 681 bb = (extra) ? ((bp[bn+1] << (GMP_NUMB_BITS - 1)) | (bp[bn] >> 1)) 682 : bp[bn]; 683 cmp = (ap[k + bn] > bb) ? 1 : ((ap[k + bn] < bb) ? -1 : 0); 684 } 685 bb = (extra) ? bp[0] << (GMP_NUMB_BITS - 1) : MPFR_LIMB_ZERO; 686 while (cmp == 0 && k > 0) 687 { 688 k--; 689 cmp = (ap[k] > bb) ? 1 : ((ap[k] < bb) ? -1 : 0); 690 bb = MPFR_LIMB_ZERO; /* ensure we consider only once bp[0] & 1 */ 691 } 692 if (cmp == 0 && bb != MPFR_LIMB_ZERO) 693 cmp = -1; 694 } 695 else /* an < bn */ 696 { 697 k = bn - an; 698 while (cmp == 0 && an > 0) 699 { 700 an --; 701 bb = (extra) ? ((bp[k+an+1] << (GMP_NUMB_BITS - 1)) | (bp[k+an] >> 1)) 702 : bp[k+an]; 703 if (ap[an] > bb) 704 cmp = 1; 705 else if (ap[an] < bb) 706 cmp = -1; 707 } 708 while (cmp == 0 && k > 0) 709 { 710 k--; 711 bb = (extra) ? ((bp[k+1] << (GMP_NUMB_BITS - 1)) | (bp[k] >> 1)) 712 : bp[k]; 713 cmp = (bb != MPFR_LIMB_ZERO) ? -1 : 0; 714 } 715 if (cmp == 0 && extra && (bp[0] & MPFR_LIMB_ONE)) 716 cmp = -1; 717 } 718 return cmp; 719 } 720 721 /* {ap, n} <- {ap, n} - {bp, n} >> extra - cy, with cy = 0 or 1. 722 Return borrow out. 723 */ 724 static mp_limb_t 725 mpfr_mpn_sub_aux (mpfr_limb_ptr ap, mpfr_limb_ptr bp, mp_size_t n, 726 mp_limb_t cy, int extra) 727 { 728 mp_limb_t bb, rp; 729 730 MPFR_ASSERTD (cy <= 1); 731 MPFR_ASSERTD (n >= 0); 732 733 while (n--) 734 { 735 bb = (extra) ? (MPFR_LIMB_LSHIFT(bp[1],GMP_NUMB_BITS-1) | (bp[0] >> 1)) : bp[0]; 736 rp = ap[0] - bb - cy; 737 cy = (ap[0] < bb) || (cy && rp == MPFR_LIMB_MAX) ? 738 MPFR_LIMB_ONE : MPFR_LIMB_ZERO; 739 ap[0] = rp; 740 ap ++; 741 bp ++; 742 } 743 MPFR_ASSERTD (cy <= 1); 744 return cy; 745 } 746 747 MPFR_HOT_FUNCTION_ATTR int 748 mpfr_div (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode) 749 { 750 mp_size_t q0size, usize, vsize; 751 mp_size_t qsize; /* number of limbs wanted for the computed quotient */ 752 mp_size_t qqsize; 753 mp_size_t k; 754 mpfr_limb_ptr q0p, qp; 755 mpfr_limb_ptr up, vp; 756 mpfr_limb_ptr ap; 757 mpfr_limb_ptr bp; 758 mp_limb_t qh; 759 mp_limb_t sticky_u, sticky_v; 760 mp_limb_t low_u; 761 mp_limb_t sticky; 762 mp_limb_t sticky3; 763 mp_limb_t round_bit; 764 mpfr_exp_t qexp; 765 int sign_quotient; 766 int extra_bit; 767 int sh, sh2; 768 int inex; 769 int like_rndz; 770 MPFR_TMP_DECL(marker); 771 772 MPFR_LOG_FUNC ( 773 ("u[%Pd]=%.*Rg v[%Pd]=%.*Rg rnd=%d", 774 mpfr_get_prec(u), mpfr_log_prec, u, 775 mpfr_get_prec (v),mpfr_log_prec, v, rnd_mode), 776 ("q[%Pd]=%.*Rg inexact=%d", mpfr_get_prec(q), mpfr_log_prec, q, inex)); 777 778 /************************************************************************** 779 * * 780 * This part of the code deals with special cases * 781 * * 782 **************************************************************************/ 783 784 if (MPFR_UNLIKELY(MPFR_ARE_SINGULAR(u,v))) 785 { 786 if (MPFR_IS_NAN(u) || MPFR_IS_NAN(v)) 787 { 788 MPFR_SET_NAN(q); 789 MPFR_RET_NAN; 790 } 791 sign_quotient = MPFR_MULT_SIGN(MPFR_SIGN(u), MPFR_SIGN(v)); 792 MPFR_SET_SIGN(q, sign_quotient); 793 if (MPFR_IS_INF(u)) 794 { 795 if (MPFR_IS_INF(v)) 796 { 797 MPFR_SET_NAN(q); 798 MPFR_RET_NAN; 799 } 800 else 801 { 802 MPFR_SET_INF(q); 803 MPFR_RET(0); 804 } 805 } 806 else if (MPFR_IS_INF(v)) 807 { 808 MPFR_SET_ZERO (q); 809 MPFR_RET (0); 810 } 811 else if (MPFR_IS_ZERO (v)) 812 { 813 if (MPFR_IS_ZERO (u)) 814 { 815 MPFR_SET_NAN(q); 816 MPFR_RET_NAN; 817 } 818 else 819 { 820 MPFR_ASSERTD (! MPFR_IS_INF (u)); 821 MPFR_SET_INF(q); 822 MPFR_SET_DIVBY0 (); 823 MPFR_RET(0); 824 } 825 } 826 else 827 { 828 MPFR_ASSERTD (MPFR_IS_ZERO (u)); 829 MPFR_SET_ZERO (q); 830 MPFR_RET (0); 831 } 832 } 833 834 /* When MPFR_GENERIC_ABI is defined, we don't use special code. */ 835 #if !defined(MPFR_GENERIC_ABI) 836 if (MPFR_GET_PREC(u) == MPFR_GET_PREC(q) && 837 MPFR_GET_PREC(v) == MPFR_GET_PREC(q)) 838 { 839 if (MPFR_GET_PREC(q) < GMP_NUMB_BITS) 840 return mpfr_div_1 (q, u, v, rnd_mode); 841 842 if (GMP_NUMB_BITS < MPFR_GET_PREC(q) && 843 MPFR_GET_PREC(q) < 2 * GMP_NUMB_BITS) 844 return mpfr_div_2 (q, u, v, rnd_mode); 845 846 if (MPFR_GET_PREC(q) == GMP_NUMB_BITS) 847 return mpfr_div_1n (q, u, v, rnd_mode); 848 } 849 #endif /* !defined(MPFR_GENERIC_ABI) */ 850 851 usize = MPFR_LIMB_SIZE(u); 852 vsize = MPFR_LIMB_SIZE(v); 853 q0size = MPFR_LIMB_SIZE(q); /* number of limbs of destination */ 854 q0p = MPFR_MANT(q); 855 up = MPFR_MANT(u); 856 vp = MPFR_MANT(v); 857 sticky_u = MPFR_LIMB_ZERO; 858 sticky_v = MPFR_LIMB_ZERO; 859 round_bit = MPFR_LIMB_ZERO; 860 861 /************************************************************************** 862 * * 863 * End of the part concerning special values. * 864 * * 865 **************************************************************************/ 866 867 /* When the divisor has one limb and MPFR_LONG_WITHIN_LIMB is defined, 868 we can use mpfr_div_ui, which should be faster, assuming there is no 869 intermediate overflow or underflow. 870 The divisor interpreted as an integer satisfies 871 2^(GMP_NUMB_BITS-1) <= vm < 2^GMP_NUMB_BITS, thus the quotient 872 satisfies 2^(EXP(u)-1-GMP_NUMB_BITS) < u/vm < 2^(EXP(u)-GMP_NUMB_BITS+1) 873 and its exponent is either EXP(u)-GMP_NUMB_BITS or one more. */ 874 #ifdef MPFR_LONG_WITHIN_LIMB 875 if (vsize <= 1 && __gmpfr_emin <= MPFR_EXP(u) - GMP_NUMB_BITS 876 && MPFR_EXP(u) - GMP_NUMB_BITS + 1 <= __gmpfr_emax 877 && vp[0] <= ULONG_MAX) 878 { 879 mpfr_exp_t exp_v = MPFR_EXP(v); /* save it in case q=v */ 880 if (MPFR_IS_POS (v)) 881 inex = mpfr_div_ui (q, u, vp[0], rnd_mode); 882 else 883 { 884 inex = -mpfr_div_ui (q, u, vp[0], MPFR_INVERT_RND(rnd_mode)); 885 MPFR_CHANGE_SIGN(q); 886 } 887 /* q did not under/overflow */ 888 MPFR_EXP(q) -= exp_v; 889 /* The following test is needed, otherwise the next addition 890 on the exponent may overflow, e.g. when dividing the 891 largest finite MPFR number by the smallest positive one. */ 892 if (MPFR_UNLIKELY (MPFR_EXP(q) > __gmpfr_emax - GMP_NUMB_BITS)) 893 return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q)); 894 MPFR_EXP(q) += GMP_NUMB_BITS; 895 return mpfr_check_range (q, inex, rnd_mode); 896 } 897 #endif 898 899 MPFR_TMP_MARK(marker); 900 901 /* set sign */ 902 sign_quotient = MPFR_MULT_SIGN(MPFR_SIGN(u), MPFR_SIGN(v)); 903 MPFR_SET_SIGN(q, sign_quotient); 904 905 /* determine if an extra bit comes from the division, i.e. if the 906 significand of u (as a fraction in [1/2, 1[) is larger than that 907 of v */ 908 if (MPFR_LIKELY(up[usize - 1] != vp[vsize - 1])) 909 extra_bit = (up[usize - 1] > vp[vsize - 1]) ? 1 : 0; 910 else /* most significant limbs are equal, must look at further limbs */ 911 { 912 mp_size_t l; 913 914 k = usize - 1; 915 l = vsize - 1; 916 while (k != 0 && l != 0 && up[--k] == vp[--l]); 917 /* now k=0 or l=0 or up[k] != vp[l] */ 918 if (up[k] != vp[l]) 919 extra_bit = (up[k] > vp[l]); 920 /* now up[k] = vp[l], thus either k=0 or l=0 */ 921 else if (l == 0) /* no more divisor limb */ 922 extra_bit = 1; 923 else /* k=0: no more dividend limb */ 924 extra_bit = mpfr_mpn_cmpzero (vp, l) == 0; 925 } 926 927 /* set exponent */ 928 qexp = MPFR_GET_EXP (u) - MPFR_GET_EXP (v) + extra_bit; 929 930 /* sh is the number of zero bits in the low limb of the quotient */ 931 MPFR_UNSIGNED_MINUS_MODULO(sh, MPFR_PREC(q)); 932 933 like_rndz = rnd_mode == MPFR_RNDZ || 934 rnd_mode == (sign_quotient < 0 ? MPFR_RNDU : MPFR_RNDD); 935 936 /************************************************************************** 937 * * 938 * We first try Mulders' short division (for large operands) * 939 * * 940 **************************************************************************/ 941 942 if (MPFR_UNLIKELY(q0size >= MPFR_DIV_THRESHOLD && 943 vsize >= MPFR_DIV_THRESHOLD)) 944 { 945 mp_size_t n = q0size + 1; /* we will perform a short (2n)/n division */ 946 mpfr_limb_ptr ap, bp, qp; 947 mpfr_prec_t p; 948 949 /* since Mulders' short division clobbers the dividend, we have to 950 copy it */ 951 ap = MPFR_TMP_LIMBS_ALLOC (n + n); 952 if (usize >= n + n) /* truncate the dividend */ 953 MPN_COPY(ap, up + usize - (n + n), n + n); 954 else /* zero-pad the dividend */ 955 { 956 MPN_COPY(ap + (n + n) - usize, up, usize); 957 MPN_ZERO(ap, (n + n) - usize); 958 } 959 960 if (vsize >= n) /* truncate the divisor */ 961 bp = vp + vsize - n; 962 else /* zero-pad the divisor */ 963 { 964 bp = MPFR_TMP_LIMBS_ALLOC (n); 965 MPN_COPY(bp + n - vsize, vp, vsize); 966 MPN_ZERO(bp, n - vsize); 967 } 968 969 qp = MPFR_TMP_LIMBS_ALLOC (n); 970 /* since n = q0size + 1, we have n >= 2 here */ 971 qh = mpfr_divhigh_n (qp, ap, bp, n); 972 MPFR_ASSERTD (qh == 0 || qh == 1); 973 /* in all cases, the error is at most (2n+2) ulps on qh*B^n+{qp,n}, 974 cf algorithms.tex */ 975 976 p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (2 * n + 2); 977 /* If rnd=RNDN, we need to be able to round with a directed rounding 978 and one more bit. */ 979 if (qh == 1) 980 { 981 mpn_rshift (qp, qp, n, 1); 982 qp[n - 1] |= MPFR_LIMB_HIGHBIT; 983 } 984 if (MPFR_LIKELY (mpfr_round_p (qp, n, p, 985 MPFR_PREC(q) + (rnd_mode == MPFR_RNDN)))) 986 { 987 /* we can round correctly whatever the rounding mode */ 988 MPN_COPY (q0p, qp + 1, q0size); 989 q0p[0] &= ~MPFR_LIMB_MASK(sh); /* put to zero low sh bits */ 990 991 if (rnd_mode == MPFR_RNDN) /* round to nearest */ 992 { 993 /* we know we can round, thus we are never in the even rule case: 994 if the round bit is 0, we truncate 995 if the round bit is 1, we add 1 */ 996 if (sh > 0) 997 round_bit = (qp[1] >> (sh - 1)) & 1; 998 else 999 round_bit = qp[0] >> (GMP_NUMB_BITS - 1); 1000 /* TODO: add value coverage tests in tdiv to check that 1001 we reach this part with different values of qh and 1002 round_bit (4 cases). */ 1003 if (round_bit == 0) 1004 { 1005 inex = -1; 1006 goto truncate; 1007 } 1008 else /* round_bit = 1 */ 1009 goto add_one_ulp; 1010 } 1011 else if (! like_rndz) /* round away */ 1012 goto add_one_ulp; 1013 else /* round to zero: nothing to do */ 1014 { 1015 inex = -1; 1016 goto truncate; 1017 } 1018 } 1019 } 1020 1021 /************************************************************************** 1022 * * 1023 * Mulders' short division failed: we revert to integer division * 1024 * * 1025 **************************************************************************/ 1026 1027 if (MPFR_UNLIKELY(rnd_mode == MPFR_RNDN && sh == 0)) 1028 { /* we compute the quotient with one more limb, in order to get 1029 the round bit in the quotient, and the remainder only contains 1030 sticky bits */ 1031 qsize = q0size + 1; 1032 /* need to allocate memory for the quotient */ 1033 qp = MPFR_TMP_LIMBS_ALLOC (qsize); 1034 } 1035 else 1036 { 1037 qsize = q0size; 1038 qp = q0p; /* directly put the quotient in the destination */ 1039 } 1040 qqsize = qsize + qsize; 1041 1042 /* prepare the dividend */ 1043 ap = MPFR_TMP_LIMBS_ALLOC (qqsize); 1044 if (MPFR_LIKELY(qqsize > usize)) /* use the full dividend */ 1045 { 1046 k = qqsize - usize; /* k > 0 */ 1047 MPN_ZERO(ap, k); 1048 if (extra_bit) 1049 ap[k - 1] = mpn_rshift (ap + k, up, usize, 1); 1050 else 1051 MPN_COPY(ap + k, up, usize); 1052 } 1053 else /* truncate the dividend */ 1054 { 1055 k = usize - qqsize; 1056 if (extra_bit) 1057 sticky_u = mpn_rshift (ap, up + k, qqsize, 1); 1058 else 1059 MPN_COPY(ap, up + k, qqsize); 1060 sticky_u = sticky_u || mpfr_mpn_cmpzero (up, k); 1061 } 1062 low_u = sticky_u; 1063 1064 /* now sticky_u is non-zero iff the truncated part of u is non-zero */ 1065 1066 /* prepare the divisor */ 1067 if (MPFR_LIKELY(vsize >= qsize)) 1068 { 1069 k = vsize - qsize; 1070 if (qp != vp) 1071 bp = vp + k; /* avoid copying the divisor */ 1072 else /* need to copy, since mpn_divrem doesn't allow overlap 1073 between quotient and divisor, necessarily k = 0 1074 since quotient and divisor are the same mpfr variable */ 1075 { 1076 bp = MPFR_TMP_LIMBS_ALLOC (qsize); 1077 MPN_COPY(bp, vp, vsize); 1078 } 1079 sticky_v = sticky_v || mpfr_mpn_cmpzero (vp, k); 1080 k = 0; 1081 } 1082 else /* vsize < qsize: small divisor case */ 1083 { 1084 bp = vp; 1085 k = qsize - vsize; 1086 } 1087 1088 /************************************************************************** 1089 * * 1090 * Here we perform the real division of {ap+k,qqsize-k} by {bp,qsize-k} * 1091 * * 1092 **************************************************************************/ 1093 1094 /* In the general case (usize > 2*qsize and vsize > qsize), we have: 1095 ______________________________________ 1096 | | | u1 has 2*qsize limbs 1097 | u1 | u0 | u0 has usize-2*qsize limbs 1098 |__________________________|___________| 1099 1100 ____________________ 1101 | | | v1 has qsize limbs 1102 | v1 | v0 | v0 has vsize-qsize limbs 1103 |___________|________| 1104 1105 We divide u1 by v1, with quotient in qh + {qp, qsize} and 1106 remainder (denoted r below) stored in place of the low qsize limbs of u1. 1107 */ 1108 1109 /* if Mulders' short division failed, we revert to division with remainder */ 1110 qh = mpn_divrem (qp, 0, ap + k, qqsize - k, bp, qsize - k); 1111 /* let u1 be the upper part of u, and v1 the upper part of v (with sticky_u 1112 and sticky_v representing the lower parts), then the quotient of u1 by v1 1113 is now in {qp, qsize}, with possible carry in qh, and the remainder in 1114 {ap + k, qsize - k} */ 1115 /* warning: qh may be 1 if u1 == v1, but u < v */ 1116 1117 k = qsize; 1118 sticky_u = sticky_u || mpfr_mpn_cmpzero (ap, k); 1119 1120 sticky = sticky_u | sticky_v; 1121 1122 /* now sticky is non-zero iff one of the following holds: 1123 (a) the truncated part of u is non-zero 1124 (b) the truncated part of v is non-zero 1125 (c) the remainder from division is non-zero */ 1126 1127 if (MPFR_LIKELY(qsize == q0size)) 1128 { 1129 sticky3 = qp[0] & MPFR_LIMB_MASK(sh); /* does nothing when sh=0 */ 1130 sh2 = sh; 1131 } 1132 else /* qsize = q0size + 1: only happens when rnd_mode=MPFR_RNDN and sh=0 */ 1133 { 1134 MPN_COPY (q0p, qp + 1, q0size); 1135 sticky3 = qp[0]; 1136 sh2 = GMP_NUMB_BITS; 1137 } 1138 qp[0] ^= sticky3; 1139 /* sticky3 contains the truncated bits from the quotient, 1140 including the round bit, and 1 <= sh2 <= GMP_NUMB_BITS 1141 is the number of bits in sticky3 */ 1142 inex = (sticky != MPFR_LIMB_ZERO) || (sticky3 != MPFR_LIMB_ZERO); 1143 1144 /* to round, we distinguish two cases: 1145 (a) vsize <= qsize: we used the full divisor 1146 (b) vsize > qsize: the divisor was truncated 1147 */ 1148 1149 if (MPFR_LIKELY(vsize <= qsize)) /* use the full divisor */ 1150 { 1151 if (MPFR_LIKELY(rnd_mode == MPFR_RNDN)) 1152 { 1153 round_bit = sticky3 & (MPFR_LIMB_ONE << (sh2 - 1)); 1154 sticky = (sticky3 ^ round_bit) | sticky_u; 1155 } 1156 else if (like_rndz || inex == 0) 1157 sticky = (inex == 0) ? MPFR_LIMB_ZERO : MPFR_LIMB_ONE; 1158 else /* round away from zero */ 1159 sticky = MPFR_LIMB_ONE; 1160 goto case_1; 1161 } 1162 else /* vsize > qsize: need to truncate the divisor */ 1163 { 1164 if (inex == 0) 1165 goto truncate; 1166 else 1167 { 1168 /* We know the estimated quotient is an upper bound of the exact 1169 quotient (with rounding toward zero), with a difference of at 1170 most 2 in qp[0]. 1171 Thus we can round except when sticky3 is 000...000 or 000...001 1172 for directed rounding, and 100...000 or 100...001 for rounding 1173 to nearest. (For rounding to nearest, we cannot determine the 1174 inexact flag for 000...000 or 000...001.) 1175 */ 1176 mp_limb_t sticky3orig = sticky3; 1177 if (rnd_mode == MPFR_RNDN) 1178 { 1179 round_bit = sticky3 & (MPFR_LIMB_ONE << (sh2 - 1)); 1180 sticky3 = sticky3 ^ round_bit; 1181 } 1182 if (sticky3 != MPFR_LIMB_ZERO && sticky3 != MPFR_LIMB_ONE) 1183 { 1184 sticky = sticky3; 1185 goto case_1; 1186 } 1187 else /* hard case: we have to compare q1 * v0 and r + u0, 1188 where q1 * v0 has qsize + (vsize-qsize) = vsize limbs, and 1189 r + u0 has qsize + (usize-2*qsize) = usize-qsize limbs */ 1190 { 1191 mp_size_t l; 1192 mpfr_limb_ptr sp; 1193 int cmp_s_r; 1194 mp_limb_t qh2; 1195 1196 sp = MPFR_TMP_LIMBS_ALLOC (vsize); 1197 k = vsize - qsize; 1198 /* sp <- {qp, qsize} * {vp, vsize-qsize} */ 1199 qp[0] ^= sticky3orig; /* restore original quotient */ 1200 if (qsize >= k) 1201 mpn_mul (sp, qp, qsize, vp, k); 1202 else 1203 mpn_mul (sp, vp, k, qp, qsize); 1204 if (qh) 1205 qh2 = mpn_add_n (sp + qsize, sp + qsize, vp, k); 1206 else 1207 qh2 = MPFR_LIMB_ZERO; 1208 qp[0] ^= sticky3orig; /* restore truncated quotient */ 1209 1210 /* compare qh2 + {sp, k + qsize} to {ap, qsize} + u0 */ 1211 cmp_s_r = (qh2 != 0) ? 1 : mpn_cmp (sp + k, ap, qsize); 1212 if (cmp_s_r == 0) /* compare {sp, k} and u0 */ 1213 { 1214 cmp_s_r = (usize >= qqsize) ? 1215 mpfr_mpn_cmp_aux (sp, k, up, usize - qqsize, extra_bit) : 1216 mpfr_mpn_cmpzero (sp, k); 1217 } 1218 /* now cmp_s_r > 0 if {sp, vsize} > {ap, qsize} + u0 1219 cmp_s_r = 0 if {sp, vsize} = {ap, qsize} + u0 1220 cmp_s_r < 0 if {sp, vsize} < {ap, qsize} + u0 */ 1221 if (cmp_s_r <= 0) /* quotient is in [q1, q1+1) */ 1222 { 1223 sticky = (cmp_s_r == 0) ? sticky3 : MPFR_LIMB_ONE; 1224 goto case_1; 1225 } 1226 else /* cmp_s_r > 0, quotient is < q1: to determine if it is 1227 in [q1-2,q1-1] or in [q1-1,q1], we need to subtract 1228 the low part u0 of the dividend from q*v0 */ 1229 { 1230 mp_limb_t cy = MPFR_LIMB_ZERO; 1231 1232 /* subtract u0 >> extra_bit if non-zero */ 1233 if (qh2 != 0) /* whatever the value of {up, m + k}, it 1234 will be smaller than qh2 + {sp, k} */ 1235 cmp_s_r = 1; 1236 else 1237 { 1238 if (low_u != MPFR_LIMB_ZERO) 1239 { 1240 mp_size_t m; 1241 l = usize - qqsize; /* number of limbs in u0 */ 1242 m = (l > k) ? l - k : 0; 1243 cy = (extra_bit) ? 1244 (up[m] & MPFR_LIMB_ONE) : MPFR_LIMB_ZERO; 1245 if (l >= k) /* u0 has at least as many limbs than s: 1246 first look if {up, m} is not zero, 1247 and compare {sp, k} and {up + m, k} */ 1248 { 1249 cy = cy || mpfr_mpn_cmpzero (up, m); 1250 low_u = cy; 1251 cy = mpfr_mpn_sub_aux (sp, up + m, k, 1252 cy, extra_bit); 1253 } 1254 else /* l < k: s has more limbs than u0 */ 1255 { 1256 low_u = MPFR_LIMB_ZERO; 1257 if (cy != MPFR_LIMB_ZERO) 1258 cy = mpn_sub_1 (sp + k - l - 1, sp + k - l - 1, 1259 1, MPFR_LIMB_HIGHBIT); 1260 cy = mpfr_mpn_sub_aux (sp + k - l, up, l, 1261 cy, extra_bit); 1262 } 1263 } 1264 MPFR_ASSERTD (cy <= 1); 1265 cy = mpn_sub_1 (sp + k, sp + k, qsize, cy); 1266 /* subtract r */ 1267 cy += mpn_sub_n (sp + k, sp + k, ap, qsize); 1268 MPFR_ASSERTD (cy <= 1); 1269 /* now compare {sp, ssize} to v */ 1270 cmp_s_r = mpn_cmp (sp, vp, vsize); 1271 if (cmp_s_r == 0 && low_u != MPFR_LIMB_ZERO) 1272 cmp_s_r = 1; /* since in fact we subtracted 1273 less than 1 */ 1274 } 1275 if (cmp_s_r <= 0) /* q1-1 <= u/v < q1 */ 1276 { 1277 if (sticky3 == MPFR_LIMB_ONE) 1278 { /* q1-1 is either representable (directed rounding), 1279 or the middle of two numbers (nearest) */ 1280 sticky = (cmp_s_r) ? MPFR_LIMB_ONE : MPFR_LIMB_ZERO; 1281 goto case_1; 1282 } 1283 /* now necessarily sticky3=0 */ 1284 else if (round_bit == MPFR_LIMB_ZERO) 1285 { /* round_bit=0, sticky3=0: q1-1 is exact only 1286 when sh=0 */ 1287 inex = (cmp_s_r || sh) ? -1 : 0; 1288 if (rnd_mode == MPFR_RNDN || 1289 (! like_rndz && inex != 0)) 1290 { 1291 inex = 1; 1292 goto truncate_check_qh; 1293 } 1294 else /* round down */ 1295 goto sub_one_ulp; 1296 } 1297 else /* sticky3=0, round_bit=1 ==> rounding to nearest */ 1298 { 1299 inex = cmp_s_r; 1300 goto truncate; 1301 } 1302 } 1303 else /* q1-2 < u/v < q1-1 */ 1304 { 1305 /* if rnd=MPFR_RNDN, the result is q1 when 1306 q1-2 >= q1-2^(sh-1), i.e. sh >= 2, 1307 otherwise (sh=1) it is q1-2 */ 1308 if (rnd_mode == MPFR_RNDN) /* sh > 0 */ 1309 { 1310 /* Case sh=1: sb=0 always, and q1-rb is exactly 1311 representable, like q1-rb-2. 1312 rb action 1313 0 subtract two ulps, inex=-1 1314 1 truncate, inex=1 1315 1316 Case sh>1: one ulp is 2^(sh-1) >= 2 1317 rb sb action 1318 0 0 truncate, inex=1 1319 0 1 truncate, inex=1 1320 1 x truncate, inex=-1 1321 */ 1322 if (sh == 1) 1323 { 1324 if (round_bit == MPFR_LIMB_ZERO) 1325 { 1326 inex = -1; 1327 sh = 0; 1328 goto sub_two_ulp; 1329 } 1330 else 1331 { 1332 inex = 1; 1333 goto truncate_check_qh; 1334 } 1335 } 1336 else /* sh > 1 */ 1337 { 1338 inex = (round_bit == MPFR_LIMB_ZERO) ? 1 : -1; 1339 goto truncate_check_qh; 1340 } 1341 } 1342 else if (like_rndz) 1343 { 1344 /* the result is down(q1-2), i.e. subtract one 1345 ulp if sh > 0, and two ulps if sh=0 */ 1346 inex = -1; 1347 if (sh > 0) 1348 goto sub_one_ulp; 1349 else 1350 goto sub_two_ulp; 1351 } 1352 /* if round away from zero, the result is up(q1-1), 1353 which is q1 unless sh = 0, where it is q1-1 */ 1354 else 1355 { 1356 inex = 1; 1357 if (sh > 0) 1358 goto truncate_check_qh; 1359 else /* sh = 0 */ 1360 goto sub_one_ulp; 1361 } 1362 } 1363 } 1364 } 1365 } 1366 } 1367 1368 case_1: /* quotient is in [q1, q1+1), 1369 round_bit is the round_bit (0 for directed rounding), 1370 sticky the sticky bit */ 1371 if (like_rndz || (round_bit == MPFR_LIMB_ZERO && sticky == MPFR_LIMB_ZERO)) 1372 { 1373 inex = round_bit == MPFR_LIMB_ZERO && sticky == MPFR_LIMB_ZERO ? 0 : -1; 1374 goto truncate; 1375 } 1376 else if (rnd_mode == MPFR_RNDN) /* sticky <> 0 or round <> 0 */ 1377 { 1378 if (round_bit == MPFR_LIMB_ZERO) /* necessarily sticky <> 0 */ 1379 { 1380 inex = -1; 1381 goto truncate; 1382 } 1383 /* round_bit = 1 */ 1384 else if (sticky != MPFR_LIMB_ZERO) 1385 goto add_one_ulp; /* inex=1 */ 1386 else /* round_bit=1, sticky=0 */ 1387 goto even_rule; 1388 } 1389 else /* round away from zero, sticky <> 0 */ 1390 goto add_one_ulp; /* with inex=1 */ 1391 1392 sub_two_ulp: 1393 /* we cannot subtract MPFR_LIMB_MPFR_LIMB_ONE << (sh+1) since this is 1394 undefined for sh = GMP_NUMB_BITS */ 1395 qh -= mpn_sub_1 (q0p, q0p, q0size, MPFR_LIMB_ONE << sh); 1396 /* go through */ 1397 1398 sub_one_ulp: 1399 qh -= mpn_sub_1 (q0p, q0p, q0size, MPFR_LIMB_ONE << sh); 1400 /* go through truncate_check_qh */ 1401 1402 truncate_check_qh: 1403 if (qh) 1404 { 1405 if (MPFR_LIKELY (qexp < MPFR_EXP_MAX)) 1406 qexp ++; 1407 /* else qexp is now incorrect, but one will still get an overflow */ 1408 q0p[q0size - 1] = MPFR_LIMB_HIGHBIT; 1409 } 1410 goto truncate; 1411 1412 even_rule: /* has to set inex */ 1413 inex = (q0p[0] & (MPFR_LIMB_ONE << sh)) ? 1 : -1; 1414 if (inex < 0) 1415 goto truncate; 1416 /* else go through add_one_ulp */ 1417 1418 add_one_ulp: 1419 inex = 1; /* always here */ 1420 if (mpn_add_1 (q0p, q0p, q0size, MPFR_LIMB_ONE << sh)) 1421 { 1422 if (MPFR_LIKELY (qexp < MPFR_EXP_MAX)) 1423 qexp ++; 1424 /* else qexp is now incorrect, but one will still get an overflow */ 1425 q0p[q0size - 1] = MPFR_LIMB_HIGHBIT; 1426 } 1427 1428 truncate: /* inex already set */ 1429 1430 MPFR_TMP_FREE(marker); 1431 1432 /* check for underflow/overflow */ 1433 if (MPFR_UNLIKELY(qexp > __gmpfr_emax)) 1434 return mpfr_overflow (q, rnd_mode, sign_quotient); 1435 else if (MPFR_UNLIKELY(qexp < __gmpfr_emin)) 1436 { 1437 if (rnd_mode == MPFR_RNDN && ((qexp < __gmpfr_emin - 1) || 1438 (inex >= 0 && mpfr_powerof2_raw (q)))) 1439 rnd_mode = MPFR_RNDZ; 1440 return mpfr_underflow (q, rnd_mode, sign_quotient); 1441 } 1442 MPFR_SET_EXP(q, qexp); 1443 1444 inex *= sign_quotient; 1445 MPFR_RET (inex); 1446 } 1447