1 /* mpfr_div -- divide two floating-point numbers
2
3 Copyright 1999, 2001-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 /* References:
24 [1] Short Division of Long Integers, David Harvey and Paul Zimmermann,
25 Proceedings of the 20th Symposium on Computer Arithmetic (ARITH-20),
26 July 25-27, 2011, pages 7-14.
27 [2] Improved Division by Invariant Integers, Niels Möller and Torbjörn Granlund,
28 IEEE Transactions on Computers, volume 60, number 2, pages 165-175, 2011.
29 */
30
31 #define MPFR_NEED_LONGLONG_H
32 #include "mpfr-impl.h"
33
34 #if !defined(MPFR_GENERIC_ABI)
35
36 #if GMP_NUMB_BITS == 64
37
38 #include "invert_limb.h"
39
40 /* Given u = u1*B+u0 < v = v1*B+v0 with v normalized (high bit of v1 set),
41 put in q = Q1*B+Q0 an approximation of floor(u*B^2/v), with:
42 B = 2^GMP_NUMB_BITS and q <= floor(u*B^2/v) <= q + 21.
43 Note: this function requires __gmpfr_invert_limb_approx (from invert_limb.h)
44 which is only provided so far for 64-bit limb.
45 Note: __gmpfr_invert_limb_approx can be replaced by __gmpfr_invert_limb,
46 in that case the bound 21 reduces to 16. */
47 static void
mpfr_div2_approx(mpfr_limb_ptr Q1,mpfr_limb_ptr Q0,mp_limb_t u1,mp_limb_t u0,mp_limb_t v1,mp_limb_t v0)48 mpfr_div2_approx (mpfr_limb_ptr Q1, mpfr_limb_ptr Q0,
49 mp_limb_t u1, mp_limb_t u0,
50 mp_limb_t v1, mp_limb_t v0)
51 {
52 mp_limb_t inv, q1, q0, r1, r0, cy, xx, yy;
53
54 /* first compute an approximation of q1, using a lower approximation of
55 B^2/(v1+1) - B */
56 if (MPFR_UNLIKELY(v1 == MPFR_LIMB_MAX))
57 inv = MPFR_LIMB_ZERO;
58 else
59 __gmpfr_invert_limb_approx (inv, v1 + 1);
60 /* now inv <= B^2/(v1+1) - B */
61 umul_ppmm (q1, q0, u1, inv);
62 q1 += u1;
63 /* now q1 <= u1*B/(v1+1) < (u1*B+u0)*B/(v1*B+v0) */
64
65 /* compute q1*(v1*B+v0) into r1:r0:yy and subtract from u1:u0:0 */
66 umul_ppmm (r1, r0, q1, v1);
67 umul_ppmm (xx, yy, q1, v0);
68
69 ADD_LIMB (r0, xx, cy);
70 r1 += cy;
71
72 /* we ignore yy below, but first increment r0, to ensure we get a lower
73 approximation of the remainder */
74 r0 += yy != 0;
75 r1 += r0 == 0 && yy != 0;
76 r0 = u0 - r0;
77 r1 = u1 - r1 - (r0 > u0);
78
79 /* r1:r0 should be non-negative */
80 MPFR_ASSERTD((r1 & MPFR_LIMB_HIGHBIT) == 0);
81
82 /* the second quotient limb is approximated by (r1*B^2+r0*B) / v1,
83 and since (B+inv)/B approximates B/v1, this is in turn approximated
84 by (r1*B+r0)*(B+inv)/B = r1*B*r1*inv+r0+(r0*inv/B) */
85
86 q0 = r0;
87 q1 += r1;
88 /* add floor(r0*inv/B) to q0 */
89 umul_ppmm (xx, yy, r0, inv);
90 ADD_LIMB (q0, xx, cy);
91 q1 += cy;
92 MPFR_ASSERTD (r1 <= 4);
93 /* TODO: use value coverage on r1 to check that the 5 cases are tested. */
94 while (r1) /* the number of loops is at most 4 */
95 {
96 /* add inv to q0 */
97 ADD_LIMB (q0, inv, cy);
98 q1 += cy;
99 r1 --;
100 }
101
102 *Q1 = q1;
103 *Q0 = q0;
104 }
105
106 #endif /* GMP_NUMB_BITS == 64 */
107
108 /* Special code for PREC(q) = PREC(u) = PREC(v) = p < GMP_NUMB_BITS */
109 static int
mpfr_div_1(mpfr_ptr q,mpfr_srcptr u,mpfr_srcptr v,mpfr_rnd_t rnd_mode)110 mpfr_div_1 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
111 {
112 mpfr_prec_t p = MPFR_GET_PREC(q);
113 mpfr_limb_ptr qp = MPFR_MANT(q);
114 mpfr_exp_t qx = MPFR_GET_EXP(u) - MPFR_GET_EXP(v);
115 mpfr_prec_t sh = GMP_NUMB_BITS - p;
116 mp_limb_t u0 = MPFR_MANT(u)[0];
117 mp_limb_t v0 = MPFR_MANT(v)[0];
118 mp_limb_t q0, rb, sb, mask = MPFR_LIMB_MASK(sh);
119 int extra;
120
121 if ((extra = (u0 >= v0)))
122 u0 -= v0;
123
124 #if GMP_NUMB_BITS == 64 /* __gmpfr_invert_limb_approx only exists for 64-bit */
125 /* First try with an approximate quotient.
126 FIXME: for p<=62 we have sh-1<2 and will never be able to round correctly.
127 Even for p=61 we have sh-1=2 and we can round correctly only when the two
128 last bist of q0 are 01, which happens with probability 25% only. */
129 {
130 mp_limb_t inv;
131 __gmpfr_invert_limb_approx (inv, v0);
132 umul_ppmm (rb, sb, u0, inv);
133 }
134 rb += u0;
135 q0 = rb >> extra;
136 /* rb does not exceed the true quotient floor(u0*2^GMP_NUMB_BITS/v0),
137 with error at most 2, which means the rational quotient q satisfies
138 rb <= q < rb + 3. We can round correctly except when the last sh-1 bits
139 of q0 are 000..000 or 111..111 or 111..110. */
140 if (MPFR_LIKELY(((q0 + 2) & (mask >> 1)) > 2))
141 {
142 rb = q0 & (MPFR_LIMB_ONE << (sh - 1));
143 sb = 1; /* result cannot be exact in this case */
144 }
145 else /* the true quotient is rb, rb+1 or rb+2 */
146 {
147 mp_limb_t h, l;
148 q0 = rb;
149 umul_ppmm (h, l, q0, v0);
150 MPFR_ASSERTD(h < u0 || (h == u0 && l == MPFR_LIMB_ZERO));
151 /* subtract {h,l} from {u0,0} */
152 sub_ddmmss (h, l, u0, 0, h, l);
153 /* the remainder {h, l} should be < v0 */
154 /* This while loop is executed at most two times, but does not seem
155 slower than two consecutive identical if-statements. */
156 while (h || l >= v0)
157 {
158 q0 ++;
159 h -= (l < v0);
160 l -= v0;
161 }
162 MPFR_ASSERTD(h == 0 && l < v0);
163 sb = l | (q0 & extra);
164 q0 >>= extra;
165 rb = q0 & (MPFR_LIMB_ONE << (sh - 1));
166 sb |= q0 & (mask >> 1);
167 }
168 #else
169 udiv_qrnnd (q0, sb, u0, 0, v0);
170 sb |= q0 & extra;
171 q0 >>= extra;
172 rb = q0 & (MPFR_LIMB_ONE << (sh - 1));
173 sb |= q0 & (mask >> 1);
174 #endif
175
176 qp[0] = (MPFR_LIMB_HIGHBIT | q0) & ~mask;
177 qx += extra;
178 MPFR_SIGN(q) = MPFR_MULT_SIGN (MPFR_SIGN (u), MPFR_SIGN (v));
179
180 /* rounding */
181 if (MPFR_UNLIKELY(qx > __gmpfr_emax))
182 return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q));
183
184 /* Warning: underflow should be checked *after* rounding, thus when rounding
185 away and when q > 0.111...111*2^(emin-1), or when rounding to nearest and
186 q >= 0.111...111[1]*2^(emin-1), there is no underflow. */
187 if (MPFR_UNLIKELY(qx < __gmpfr_emin))
188 {
189 /* Note: the case 0.111...111*2^(emin-1) < q < 2^(emin-1) is not possible
190 here since (up to exponent) this would imply 1 - 2^(-p) < u/v < 1,
191 thus v - 2^(-p)*v < u < v, and since we can assume 1/2 <= v < 1, it
192 would imply v - 2^(-p) = v - ulp(v) < u < v, which has no solution. */
193
194 /* For RNDN, mpfr_underflow always rounds away, thus for |q|<=2^(emin-2)
195 we have to change to RNDZ. This corresponds to:
196 (a) either qx < emin - 1
197 (b) or qx = emin - 1 and qp[0] = 1000....000 and rb = sb = 0.
198 Note: in case (b), it suffices to check whether sb = 0, since rb = 1
199 and sb = 0 is not possible (the exact quotient would have p+1 bits,
200 thus u would need at least p+1 bits). */
201 if (rnd_mode == MPFR_RNDN &&
202 (qx < __gmpfr_emin - 1 || (qp[0] == MPFR_LIMB_HIGHBIT && sb == 0)))
203 rnd_mode = MPFR_RNDZ;
204 return mpfr_underflow (q, rnd_mode, MPFR_SIGN(q));
205 }
206
207 MPFR_EXP (q) = qx; /* Don't use MPFR_SET_EXP since qx might be < __gmpfr_emin
208 in the cases "goto rounding" above. */
209 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
210 {
211 MPFR_ASSERTD(qx >= __gmpfr_emin);
212 MPFR_RET (0);
213 }
214 else if (rnd_mode == MPFR_RNDN)
215 {
216 /* It is not possible to have rb <> 0 and sb = 0 here, since it would
217 mean a n-bit by n-bit division gives an exact (n+1)-bit number.
218 And since the case rb = sb = 0 was already dealt with, we cannot
219 have sb = 0. Thus we cannot be in the middle of two numbers. */
220 MPFR_ASSERTD(sb != 0);
221 if (rb == 0)
222 goto truncate;
223 else
224 goto add_one_ulp;
225 }
226 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(q)))
227 {
228 truncate:
229 MPFR_ASSERTD(qx >= __gmpfr_emin);
230 MPFR_RET(-MPFR_SIGN(q));
231 }
232 else /* round away from zero */
233 {
234 add_one_ulp:
235 qp[0] += MPFR_LIMB_ONE << sh;
236 MPFR_ASSERTD(qp[0] != 0);
237 /* It is not possible to have an overflow in the addition above.
238 Proof: if p is the precision of the inputs, it would mean we have two
239 integers n and d with 2^(p-1) <= n, d < 2^p, such that the binary
240 expansion of n/d starts with p '1', and has at least one '1' later.
241 We distinguish two cases:
242 (1) if n/d < 1, it would mean 1-2^(-p) < n/d < 1
243 (2) if n/d >= 1, it would mean 2-2^(1-p) < n/d < 1
244 In case (1), multiplying by d we get 1-d/2^p < n < d,
245 which has no integer solution since d/2^p < 1.
246 In case (2), multiplying by d we get 2d-2d/2^p < n < 2d:
247 (2a) if d=2^(p-1), we get 2^p-1 < n < 2^p which has no solution;
248 if d>=2^(p-1)+1, then 2d-2d/2^p >= 2^p+2-2 = 2^p, thus there is
249 solution n < 2^p either. */
250 MPFR_RET(MPFR_SIGN(q));
251 }
252 }
253
254 /* Special code for PREC(q) = GMP_NUMB_BITS,
255 with PREC(u), PREC(v) <= GMP_NUMB_BITS. */
256 static int
mpfr_div_1n(mpfr_ptr q,mpfr_srcptr u,mpfr_srcptr v,mpfr_rnd_t rnd_mode)257 mpfr_div_1n (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
258 {
259 mpfr_limb_ptr qp = MPFR_MANT(q);
260 mpfr_exp_t qx = MPFR_GET_EXP(u) - MPFR_GET_EXP(v);
261 mp_limb_t u0 = MPFR_MANT(u)[0];
262 mp_limb_t v0 = MPFR_MANT(v)[0];
263 mp_limb_t q0, rb, sb, l;
264 int extra;
265
266 MPFR_ASSERTD(MPFR_PREC(q) == GMP_NUMB_BITS);
267 MPFR_ASSERTD(MPFR_PREC(u) <= GMP_NUMB_BITS);
268 MPFR_ASSERTD(MPFR_PREC(v) <= GMP_NUMB_BITS);
269
270 if ((extra = (u0 >= v0)))
271 u0 -= v0;
272
273 #if GMP_NUMB_BITS == 64 /* __gmpfr_invert_limb_approx only exists for 64-bit */
274 {
275 mp_limb_t inv, h;
276
277 /* First compute an approximate quotient. */
278 __gmpfr_invert_limb_approx (inv, v0);
279 umul_ppmm (rb, sb, u0, inv);
280 q0 = u0 + rb;
281 /* rb does not exceed the true quotient floor(u0*2^GMP_NUMB_BITS/v0),
282 with error at most 2, which means the rational quotient q satisfies
283 rb <= q < rb + 3, thus the true quotient is rb, rb+1 or rb+2 */
284 umul_ppmm (h, l, q0, v0);
285 MPFR_ASSERTD(h < u0 || (h == u0 && l == MPFR_LIMB_ZERO));
286 /* subtract {h,l} from {u0,0} */
287 sub_ddmmss (h, l, u0, 0, h, l);
288 /* the remainder {h, l} should be < v0 */
289 /* This while loop is executed at most two times, but does not seem
290 slower than two consecutive identical if-statements. */
291 while (h || l >= v0)
292 {
293 q0 ++;
294 h -= (l < v0);
295 l -= v0;
296 }
297 MPFR_ASSERTD(h == 0 && l < v0);
298 }
299 #else
300 udiv_qrnnd (q0, l, u0, 0, v0);
301 #endif
302
303 /* now (u0 - extra*v0) * 2^GMP_NUMB_BITS = q0*v0 + l with 0 <= l < v0 */
304
305 /* If extra=0, the quotient is q0, the round bit is 1 if l >= v0/2,
306 and sb are the remaining bits from l.
307 If extra=1, the quotient is MPFR_LIMB_HIGHBIT + (q0 >> 1), the round bit
308 is the least significant bit of q0, and sb is l. */
309
310 if (extra == 0)
311 {
312 qp[0] = q0;
313 /* If "l + l < l", then there is a carry in l + l, thus 2*l > v0.
314 Otherwise if there is no carry, we check whether 2*l >= v0. */
315 rb = (l + l < l) || (l + l >= v0);
316 sb = (rb) ? l + l - v0 : l;
317 }
318 else
319 {
320 qp[0] = MPFR_LIMB_HIGHBIT | (q0 >> 1);
321 rb = q0 & MPFR_LIMB_ONE;
322 sb = l;
323 qx ++;
324 }
325
326 MPFR_SIGN(q) = MPFR_MULT_SIGN (MPFR_SIGN (u), MPFR_SIGN (v));
327
328 /* rounding */
329 if (MPFR_UNLIKELY(qx > __gmpfr_emax))
330 return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q));
331
332 /* Warning: underflow should be checked *after* rounding, thus when rounding
333 away and when q > 0.111...111*2^(emin-1), or when rounding to nearest and
334 q >= 0.111...111[1]*2^(emin-1), there is no underflow. */
335 if (MPFR_UNLIKELY(qx < __gmpfr_emin))
336 {
337 /* Note: the case 0.111...111*2^(emin-1) < q < 2^(emin-1) is not possible
338 here since (up to exponent) this would imply 1 - 2^(-p) < u/v < 1,
339 thus v - 2^(-p)*v < u < v, and since we can assume 1/2 <= v < 1, it
340 would imply v - 2^(-p) = v - ulp(v) < u < v, which has no solution. */
341
342 /* For RNDN, mpfr_underflow always rounds away, thus for |q|<=2^(emin-2)
343 we have to change to RNDZ. This corresponds to:
344 (a) either qx < emin - 1
345 (b) or qx = emin - 1 and qp[0] = 1000....000 and rb = sb = 0.
346 Note: in case (b), it suffices to check whether sb = 0, since rb = 1
347 and sb = 0 is not possible (the exact quotient would have p+1 bits,
348 thus u would need at least p+1 bits). */
349 if (rnd_mode == MPFR_RNDN &&
350 (qx < __gmpfr_emin - 1 || (qp[0] == MPFR_LIMB_HIGHBIT && sb == 0)))
351 rnd_mode = MPFR_RNDZ;
352 return mpfr_underflow (q, rnd_mode, MPFR_SIGN(q));
353 }
354
355 MPFR_EXP (q) = qx; /* Don't use MPFR_SET_EXP since qx might be < __gmpfr_emin
356 in the cases "goto rounding" above. */
357 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
358 {
359 MPFR_ASSERTD(qx >= __gmpfr_emin);
360 MPFR_RET (0);
361 }
362 else if (rnd_mode == MPFR_RNDN)
363 {
364 /* It is not possible to have rb <> 0 and sb = 0 here, since it would
365 mean a n-bit by n-bit division gives an exact (n+1)-bit number.
366 And since the case rb = sb = 0 was already dealt with, we cannot
367 have sb = 0. Thus we cannot be in the middle of two numbers. */
368 MPFR_ASSERTD(sb != 0);
369 if (rb == 0)
370 goto truncate;
371 else
372 goto add_one_ulp;
373 }
374 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(q)))
375 {
376 truncate:
377 MPFR_ASSERTD(qx >= __gmpfr_emin);
378 MPFR_RET(-MPFR_SIGN(q));
379 }
380 else /* round away from zero */
381 {
382 add_one_ulp:
383 qp[0] += MPFR_LIMB_ONE;
384 /* there can be no overflow in the addition above,
385 see the analysis of mpfr_div_1 */
386 MPFR_ASSERTD(qp[0] != 0);
387 MPFR_RET(MPFR_SIGN(q));
388 }
389 }
390
391 /* Special code for GMP_NUMB_BITS < PREC(q) < 2*GMP_NUMB_BITS and
392 PREC(u) = PREC(v) = PREC(q) */
393 static int
mpfr_div_2(mpfr_ptr q,mpfr_srcptr u,mpfr_srcptr v,mpfr_rnd_t rnd_mode)394 mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
395 {
396 mpfr_prec_t p = MPFR_GET_PREC(q);
397 mpfr_limb_ptr qp = MPFR_MANT(q);
398 mpfr_exp_t qx = MPFR_GET_EXP(u) - MPFR_GET_EXP(v);
399 mpfr_prec_t sh = 2*GMP_NUMB_BITS - p;
400 mp_limb_t h, rb, sb, mask = MPFR_LIMB_MASK(sh);
401 mp_limb_t v1 = MPFR_MANT(v)[1], v0 = MPFR_MANT(v)[0];
402 mp_limb_t q1, q0, r3, r2, r1, r0, l, t;
403 int extra;
404
405 r3 = MPFR_MANT(u)[1];
406 r2 = MPFR_MANT(u)[0];
407 extra = r3 > v1 || (r3 == v1 && r2 >= v0);
408 if (extra)
409 sub_ddmmss (r3, r2, r3, r2, v1, v0);
410
411 MPFR_ASSERTD(r3 < v1 || (r3 == v1 && r2 < v0));
412
413 #if GMP_NUMB_BITS == 64
414 mpfr_div2_approx (&q1, &q0, r3, r2, v1, v0);
415 /* we know q1*B+q0 is smaller or equal to the exact quotient, with
416 difference at most 21 */
417 if (MPFR_LIKELY(((q0 + 21) & (mask >> 1)) > 21))
418 sb = 1; /* result is not exact when we can round with an approximation */
419 else
420 {
421 /* we know q1:q0 is a good-enough approximation, use it! */
422 mp_limb_t s0, s1, s2, h, l;
423
424 /* Since we know the difference should be at most 21*(v1:v0) after the
425 subtraction below, thus at most 21*2^128, it suffices to compute the
426 lower 3 limbs of (q1:q0) * (v1:v0). */
427 umul_ppmm (s1, s0, q0, v0);
428 umul_ppmm (s2, l, q0, v1);
429 s1 += l;
430 s2 += (s1 < l);
431 umul_ppmm (h, l, q1, v0);
432 s1 += l;
433 s2 += h + (s1 < l);
434 s2 += q1 * v1;
435 /* Subtract s2:s1:s0 from r2:0:0, with result in s2:s1:s0. */
436 s2 = r2 - s2;
437 /* now negate s1:s0 */
438 s0 = -s0;
439 s1 = -s1 - (s0 != 0);
440 /* there is a borrow in s2 when s0 and s1 are not both zero */
441 s2 -= (s1 != 0 || s0 != 0);
442 while (s2 > 0 || (s1 > v1) || (s1 == v1 && s0 >= v0))
443 {
444 /* add 1 to q1:q0 */
445 q0 ++;
446 q1 += (q0 == 0);
447 /* subtract v1:v0 to s2:s1:s0 */
448 s2 -= (s1 < v1) || (s1 == v1 && s0 < v0);
449 sub_ddmmss (s1, s0, s1, s0, v1, v0);
450 }
451 sb = s1 | s0;
452 }
453 goto round_div2;
454 #endif
455
456 /* now r3:r2 < v1:v0 */
457 if (MPFR_UNLIKELY(r3 == v1)) /* can occur in some rare cases */
458 {
459 /* This can only occur in case extra=0, since otherwise we would have
460 u_old >= u_new + v >= B^2/2 + B^2/2 = B^2. In this case we have
461 r3 = u1 and r2 = u0, thus the remainder u*B-q1*v is
462 v1*B^2+u0*B-(B-1)*(v1*B+v0) = (u0-v0+v1)*B+v0.
463 Warning: in this case q1 = B-1 can be too large, for example with
464 u = B^2/2 and v = B^2/2 + B - 1, then u*B-(B-1)*u = -1/2*B^2+2*B-1. */
465 MPFR_ASSERTD(extra == 0);
466 q1 = MPFR_LIMB_MAX;
467 r1 = v0;
468 t = v0 - r2; /* t > 0 since r3:r2 < v1:v0 */
469 r2 = v1 - t;
470 if (t > v1) /* q1 = B-1 is too large, we need q1 = B-2, which is ok
471 since u*B - q1*v >= v1*B^2-(B-2)*(v1*B+B-1) =
472 -B^2 + 2*B*v1 + 3*B - 2 >= 0 since v1>=B/2 and B>=2 */
473 {
474 q1 --;
475 /* add v to r2:r1 */
476 r1 += v0;
477 r2 += v1 + (r1 < v0);
478 }
479 }
480 else
481 {
482 /* divide r3:r2 by v1: requires r3 < v1 */
483 udiv_qrnnd (q1, r2, r3, r2, v1);
484 /* u-extra*v = q1 * v1 + r2 */
485
486 /* now subtract q1*v0 to r2:0 */
487 umul_ppmm (h, l, q1, v0);
488 t = r2; /* save old value of r2 */
489 r1 = -l;
490 r2 -= h + (l != 0);
491 /* Note: h + (l != 0) < 2^GMP_NUMB_BITS. */
492
493 /* we have r2:r1 = oldr2:0 - q1*v0 mod 2^(2*GMP_NUMB_BITS)
494 thus (u-extra*v)*B = q1 * v + r2:r1 mod 2^(2*GMP_NUMB_BITS) */
495
496 /* this while loop should be run at most twice */
497 while (r2 > t) /* borrow when subtracting h + (l != 0), q1 too large */
498 {
499 q1 --;
500 /* add v1:v0 to r2:r1 */
501 t = r2;
502 r1 += v0;
503 r2 += v1 + (r1 < v0);
504 /* note: since 2^(GMP_NUMB_BITS-1) <= v1 + (r1 < v0)
505 <= 2^GMP_NUMB_BITS, it suffices to check if r2 <= t to see
506 if there was a carry or not. */
507 }
508 }
509
510 /* now (u-extra*v)*B = q1 * v + r2:r1 with 0 <= r2:r1 < v */
511
512 MPFR_ASSERTD(r2 < v1 || (r2 == v1 && r1 < v0));
513
514 if (MPFR_UNLIKELY(r2 == v1))
515 {
516 q0 = MPFR_LIMB_MAX;
517 /* r2:r1:0 - q0*(v1:v0) = v1:r1:0 - (B-1)*(v1:v0)
518 = r1:0 - v0:0 + v1:v0 */
519 r0 = v0;
520 t = v0 - r1; /* t > 0 since r2:r1 < v1:v0 */
521 r1 = v1 - t;
522 if (t > v1)
523 {
524 q0 --;
525 /* add v to r1:r0 */
526 r0 += v0;
527 r1 += v1 + (r0 < v0);
528 }
529 }
530 else
531 {
532 /* divide r2:r1 by v1: requires r2 < v1 */
533 udiv_qrnnd (q0, r1, r2, r1, v1);
534
535 /* r2:r1 = q0*v1 + r1 */
536
537 /* subtract q0*v0 to r1:0 */
538 umul_ppmm (h, l, q0, v0);
539 t = r1;
540 r0 = -l;
541 r1 -= h + (l != 0);
542
543 /* this while loop should be run at most twice */
544 while (r1 > t) /* borrow when subtracting h + (l != 0),
545 q0 was too large */
546 {
547 q0 --;
548 /* add v1:v0 to r1:r0 */
549 t = r1;
550 r0 += v0;
551 r1 += v1 + (r0 < v0);
552 /* note: since 2^(GMP_NUMB_BITS-1) <= v1 + (r0 < v0)
553 <= 2^GMP_NUMB_BITS, it suffices to check if r1 <= t to see
554 if there was a carry or not. */
555 }
556 }
557
558 MPFR_ASSERTD(r1 < v1 || (r1 == v1 && r0 < v0));
559
560 /* now (u-extra*v)*B^2 = (q1:q0) * v + r1:r0 */
561
562 sb = r1 | r0;
563
564 /* here, q1:q0 should be an approximation of the quotient (or the exact
565 quotient), and sb the sticky bit */
566
567 #if GMP_NUMB_BITS == 64
568 round_div2:
569 #endif
570 if (extra)
571 {
572 qx ++;
573 sb |= q0 & 1;
574 q0 = (q1 << (GMP_NUMB_BITS - 1)) | (q0 >> 1);
575 q1 = MPFR_LIMB_HIGHBIT | (q1 >> 1);
576 }
577 rb = q0 & (MPFR_LIMB_ONE << (sh - 1));
578 sb |= (q0 & mask) ^ rb;
579 qp[1] = q1;
580 qp[0] = q0 & ~mask;
581
582 MPFR_SIGN(q) = MPFR_MULT_SIGN (MPFR_SIGN (u), MPFR_SIGN (v));
583
584 /* rounding */
585 if (qx > __gmpfr_emax)
586 return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q));
587
588 /* Warning: underflow should be checked *after* rounding, thus when rounding
589 away and when q > 0.111...111*2^(emin-1), or when rounding to nearest and
590 q >= 0.111...111[1]*2^(emin-1), there is no underflow. */
591 if (qx < __gmpfr_emin)
592 {
593 /* Note: the case 0.111...111*2^(emin-1) < q < 2^(emin-1) is not possible
594 here since (up to exponent) this would imply 1 - 2^(-p) < u/v < 1,
595 thus v - 2^(-p)*v < u < v, and since we can assume 1/2 <= v < 1, it
596 would imply v - 2^(-p) = v - ulp(v) < u < v, which has no solution. */
597
598 /* For RNDN, mpfr_underflow always rounds away, thus for |q|<=2^(emin-2)
599 we have to change to RNDZ. This corresponds to:
600 (a) either qx < emin - 1
601 (b) or qx = emin - 1 and qp[1] = 100....000, qp[0] = 0 and rb = sb = 0.
602 Note: in case (b), it suffices to check whether sb = 0, since rb = 1
603 and sb = 0 is not possible (the exact quotient would have p+1 bits, thus
604 u would need at least p+1 bits). */
605 if (rnd_mode == MPFR_RNDN &&
606 (qx < __gmpfr_emin - 1 ||
607 (qp[1] == MPFR_LIMB_HIGHBIT && qp[0] == MPFR_LIMB_ZERO && sb == 0)))
608 rnd_mode = MPFR_RNDZ;
609 return mpfr_underflow (q, rnd_mode, MPFR_SIGN(q));
610 }
611
612 MPFR_EXP (q) = qx; /* Don't use MPFR_SET_EXP since qx might be < __gmpfr_emin
613 in the cases "goto rounding" above. */
614 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
615 {
616 MPFR_ASSERTD(qx >= __gmpfr_emin);
617 MPFR_RET (0);
618 }
619 else if (rnd_mode == MPFR_RNDN)
620 {
621 /* See the comment in mpfr_div_1. */
622 MPFR_ASSERTD(sb != 0);
623 if (rb == 0)
624 goto truncate;
625 else
626 goto add_one_ulp;
627 }
628 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(q)))
629 {
630 truncate:
631 MPFR_ASSERTD(qx >= __gmpfr_emin);
632 MPFR_RET(-MPFR_SIGN(q));
633 }
634 else /* round away from zero */
635 {
636 add_one_ulp:
637 qp[0] += MPFR_LIMB_ONE << sh;
638 qp[1] += qp[0] == 0;
639 /* there can be no overflow in the addition above,
640 see the analysis of mpfr_div_1 */
641 MPFR_ASSERTD(qp[1] != 0);
642 MPFR_RET(MPFR_SIGN(q));
643 }
644 }
645
646 #endif /* !defined(MPFR_GENERIC_ABI) */
647
648 /* check if {ap, an} is zero */
649 static int
mpfr_mpn_cmpzero(mpfr_limb_ptr ap,mp_size_t an)650 mpfr_mpn_cmpzero (mpfr_limb_ptr ap, mp_size_t an)
651 {
652 MPFR_ASSERTD (an >= 0);
653 while (an > 0)
654 if (MPFR_LIKELY(ap[--an] != MPFR_LIMB_ZERO))
655 return 1;
656 return 0;
657 }
658
659 /* compare {ap, an} and {bp, bn} >> extra,
660 aligned by the more significant limbs.
661 Takes into account bp[0] for extra=1.
662 */
663 static int
mpfr_mpn_cmp_aux(mpfr_limb_ptr ap,mp_size_t an,mpfr_limb_ptr bp,mp_size_t bn,int extra)664 mpfr_mpn_cmp_aux (mpfr_limb_ptr ap, mp_size_t an,
665 mpfr_limb_ptr bp, mp_size_t bn, int extra)
666 {
667 int cmp = 0;
668 mp_size_t k;
669 mp_limb_t bb;
670
671 MPFR_ASSERTD (an >= 0);
672 MPFR_ASSERTD (bn >= 0);
673 MPFR_ASSERTD (extra == 0 || extra == 1);
674
675 if (an >= bn)
676 {
677 k = an - bn;
678 while (cmp == 0 && bn > 0)
679 {
680 bn --;
681 bb = (extra) ? ((bp[bn+1] << (GMP_NUMB_BITS - 1)) | (bp[bn] >> 1))
682 : bp[bn];
683 cmp = (ap[k + bn] > bb) ? 1 : ((ap[k + bn] < bb) ? -1 : 0);
684 }
685 bb = (extra) ? bp[0] << (GMP_NUMB_BITS - 1) : MPFR_LIMB_ZERO;
686 while (cmp == 0 && k > 0)
687 {
688 k--;
689 cmp = (ap[k] > bb) ? 1 : ((ap[k] < bb) ? -1 : 0);
690 bb = MPFR_LIMB_ZERO; /* ensure we consider only once bp[0] & 1 */
691 }
692 if (cmp == 0 && bb != MPFR_LIMB_ZERO)
693 cmp = -1;
694 }
695 else /* an < bn */
696 {
697 k = bn - an;
698 while (cmp == 0 && an > 0)
699 {
700 an --;
701 bb = (extra) ? ((bp[k+an+1] << (GMP_NUMB_BITS - 1)) | (bp[k+an] >> 1))
702 : bp[k+an];
703 if (ap[an] > bb)
704 cmp = 1;
705 else if (ap[an] < bb)
706 cmp = -1;
707 }
708 while (cmp == 0 && k > 0)
709 {
710 k--;
711 bb = (extra) ? ((bp[k+1] << (GMP_NUMB_BITS - 1)) | (bp[k] >> 1))
712 : bp[k];
713 cmp = (bb != MPFR_LIMB_ZERO) ? -1 : 0;
714 }
715 if (cmp == 0 && extra && (bp[0] & MPFR_LIMB_ONE))
716 cmp = -1;
717 }
718 return cmp;
719 }
720
721 /* {ap, n} <- {ap, n} - {bp, n} >> extra - cy, with cy = 0 or 1.
722 Return borrow out.
723 */
724 static mp_limb_t
mpfr_mpn_sub_aux(mpfr_limb_ptr ap,mpfr_limb_ptr bp,mp_size_t n,mp_limb_t cy,int extra)725 mpfr_mpn_sub_aux (mpfr_limb_ptr ap, mpfr_limb_ptr bp, mp_size_t n,
726 mp_limb_t cy, int extra)
727 {
728 mp_limb_t bb, rp;
729
730 MPFR_ASSERTD (cy <= 1);
731 MPFR_ASSERTD (n >= 0);
732
733 while (n--)
734 {
735 bb = (extra) ? (MPFR_LIMB_LSHIFT(bp[1],GMP_NUMB_BITS-1) | (bp[0] >> 1)) : bp[0];
736 rp = ap[0] - bb - cy;
737 cy = (ap[0] < bb) || (cy && rp == MPFR_LIMB_MAX) ?
738 MPFR_LIMB_ONE : MPFR_LIMB_ZERO;
739 ap[0] = rp;
740 ap ++;
741 bp ++;
742 }
743 MPFR_ASSERTD (cy <= 1);
744 return cy;
745 }
746
747 MPFR_HOT_FUNCTION_ATTR int
mpfr_div(mpfr_ptr q,mpfr_srcptr u,mpfr_srcptr v,mpfr_rnd_t rnd_mode)748 mpfr_div (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
749 {
750 mp_size_t q0size, usize, vsize;
751 mp_size_t qsize; /* number of limbs wanted for the computed quotient */
752 mp_size_t qqsize;
753 mp_size_t k;
754 mpfr_limb_ptr q0p, qp;
755 mpfr_limb_ptr up, vp;
756 mpfr_limb_ptr ap;
757 mpfr_limb_ptr bp;
758 mp_limb_t qh;
759 mp_limb_t sticky_u, sticky_v;
760 mp_limb_t low_u;
761 mp_limb_t sticky;
762 mp_limb_t sticky3;
763 mp_limb_t round_bit;
764 mpfr_exp_t qexp;
765 int sign_quotient;
766 int extra_bit;
767 int sh, sh2;
768 int inex;
769 int like_rndz;
770 MPFR_TMP_DECL(marker);
771
772 MPFR_LOG_FUNC (
773 ("u[%Pd]=%.*Rg v[%Pd]=%.*Rg rnd=%d",
774 mpfr_get_prec(u), mpfr_log_prec, u,
775 mpfr_get_prec (v),mpfr_log_prec, v, rnd_mode),
776 ("q[%Pd]=%.*Rg inexact=%d", mpfr_get_prec(q), mpfr_log_prec, q, inex));
777
778 /**************************************************************************
779 * *
780 * This part of the code deals with special cases *
781 * *
782 **************************************************************************/
783
784 if (MPFR_UNLIKELY(MPFR_ARE_SINGULAR(u,v)))
785 {
786 if (MPFR_IS_NAN(u) || MPFR_IS_NAN(v))
787 {
788 MPFR_SET_NAN(q);
789 MPFR_RET_NAN;
790 }
791 sign_quotient = MPFR_MULT_SIGN(MPFR_SIGN(u), MPFR_SIGN(v));
792 MPFR_SET_SIGN(q, sign_quotient);
793 if (MPFR_IS_INF(u))
794 {
795 if (MPFR_IS_INF(v))
796 {
797 MPFR_SET_NAN(q);
798 MPFR_RET_NAN;
799 }
800 else
801 {
802 MPFR_SET_INF(q);
803 MPFR_RET(0);
804 }
805 }
806 else if (MPFR_IS_INF(v))
807 {
808 MPFR_SET_ZERO (q);
809 MPFR_RET (0);
810 }
811 else if (MPFR_IS_ZERO (v))
812 {
813 if (MPFR_IS_ZERO (u))
814 {
815 MPFR_SET_NAN(q);
816 MPFR_RET_NAN;
817 }
818 else
819 {
820 MPFR_ASSERTD (! MPFR_IS_INF (u));
821 MPFR_SET_INF(q);
822 MPFR_SET_DIVBY0 ();
823 MPFR_RET(0);
824 }
825 }
826 else
827 {
828 MPFR_ASSERTD (MPFR_IS_ZERO (u));
829 MPFR_SET_ZERO (q);
830 MPFR_RET (0);
831 }
832 }
833
834 /* When MPFR_GENERIC_ABI is defined, we don't use special code. */
835 #if !defined(MPFR_GENERIC_ABI)
836 if (MPFR_GET_PREC(u) == MPFR_GET_PREC(q) &&
837 MPFR_GET_PREC(v) == MPFR_GET_PREC(q))
838 {
839 if (MPFR_GET_PREC(q) < GMP_NUMB_BITS)
840 return mpfr_div_1 (q, u, v, rnd_mode);
841
842 if (GMP_NUMB_BITS < MPFR_GET_PREC(q) &&
843 MPFR_GET_PREC(q) < 2 * GMP_NUMB_BITS)
844 return mpfr_div_2 (q, u, v, rnd_mode);
845
846 if (MPFR_GET_PREC(q) == GMP_NUMB_BITS)
847 return mpfr_div_1n (q, u, v, rnd_mode);
848 }
849 #endif /* !defined(MPFR_GENERIC_ABI) */
850
851 usize = MPFR_LIMB_SIZE(u);
852 vsize = MPFR_LIMB_SIZE(v);
853 q0size = MPFR_LIMB_SIZE(q); /* number of limbs of destination */
854 q0p = MPFR_MANT(q);
855 up = MPFR_MANT(u);
856 vp = MPFR_MANT(v);
857 sticky_u = MPFR_LIMB_ZERO;
858 sticky_v = MPFR_LIMB_ZERO;
859 round_bit = MPFR_LIMB_ZERO;
860
861 /**************************************************************************
862 * *
863 * End of the part concerning special values. *
864 * *
865 **************************************************************************/
866
867 /* When the divisor has one limb and MPFR_LONG_WITHIN_LIMB is defined,
868 we can use mpfr_div_ui, which should be faster, assuming there is no
869 intermediate overflow or underflow.
870 The divisor interpreted as an integer satisfies
871 2^(GMP_NUMB_BITS-1) <= vm < 2^GMP_NUMB_BITS, thus the quotient
872 satisfies 2^(EXP(u)-1-GMP_NUMB_BITS) < u/vm < 2^(EXP(u)-GMP_NUMB_BITS+1)
873 and its exponent is either EXP(u)-GMP_NUMB_BITS or one more. */
874 #ifdef MPFR_LONG_WITHIN_LIMB
875 if (vsize <= 1 && __gmpfr_emin <= MPFR_EXP(u) - GMP_NUMB_BITS
876 && MPFR_EXP(u) - GMP_NUMB_BITS + 1 <= __gmpfr_emax
877 && vp[0] <= ULONG_MAX)
878 {
879 mpfr_exp_t exp_v = MPFR_EXP(v); /* save it in case q=v */
880 if (MPFR_IS_POS (v))
881 inex = mpfr_div_ui (q, u, vp[0], rnd_mode);
882 else
883 {
884 inex = -mpfr_div_ui (q, u, vp[0], MPFR_INVERT_RND(rnd_mode));
885 MPFR_CHANGE_SIGN(q);
886 }
887 /* q did not under/overflow */
888 MPFR_EXP(q) -= exp_v;
889 /* The following test is needed, otherwise the next addition
890 on the exponent may overflow, e.g. when dividing the
891 largest finite MPFR number by the smallest positive one. */
892 if (MPFR_UNLIKELY (MPFR_EXP(q) > __gmpfr_emax - GMP_NUMB_BITS))
893 return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q));
894 MPFR_EXP(q) += GMP_NUMB_BITS;
895 return mpfr_check_range (q, inex, rnd_mode);
896 }
897 #endif
898
899 MPFR_TMP_MARK(marker);
900
901 /* set sign */
902 sign_quotient = MPFR_MULT_SIGN(MPFR_SIGN(u), MPFR_SIGN(v));
903 MPFR_SET_SIGN(q, sign_quotient);
904
905 /* determine if an extra bit comes from the division, i.e. if the
906 significand of u (as a fraction in [1/2, 1[) is larger than that
907 of v */
908 if (MPFR_LIKELY(up[usize - 1] != vp[vsize - 1]))
909 extra_bit = (up[usize - 1] > vp[vsize - 1]) ? 1 : 0;
910 else /* most significant limbs are equal, must look at further limbs */
911 {
912 mp_size_t l;
913
914 k = usize - 1;
915 l = vsize - 1;
916 while (k != 0 && l != 0 && up[--k] == vp[--l]);
917 /* now k=0 or l=0 or up[k] != vp[l] */
918 if (up[k] != vp[l])
919 extra_bit = (up[k] > vp[l]);
920 /* now up[k] = vp[l], thus either k=0 or l=0 */
921 else if (l == 0) /* no more divisor limb */
922 extra_bit = 1;
923 else /* k=0: no more dividend limb */
924 extra_bit = mpfr_mpn_cmpzero (vp, l) == 0;
925 }
926
927 /* set exponent */
928 qexp = MPFR_GET_EXP (u) - MPFR_GET_EXP (v) + extra_bit;
929
930 /* sh is the number of zero bits in the low limb of the quotient */
931 MPFR_UNSIGNED_MINUS_MODULO(sh, MPFR_PREC(q));
932
933 like_rndz = rnd_mode == MPFR_RNDZ ||
934 rnd_mode == (sign_quotient < 0 ? MPFR_RNDU : MPFR_RNDD);
935
936 /**************************************************************************
937 * *
938 * We first try Mulders' short division (for large operands) *
939 * *
940 **************************************************************************/
941
942 if (MPFR_UNLIKELY(q0size >= MPFR_DIV_THRESHOLD &&
943 vsize >= MPFR_DIV_THRESHOLD))
944 {
945 mp_size_t n = q0size + 1; /* we will perform a short (2n)/n division */
946 mpfr_limb_ptr ap, bp, qp;
947 mpfr_prec_t p;
948
949 /* since Mulders' short division clobbers the dividend, we have to
950 copy it */
951 ap = MPFR_TMP_LIMBS_ALLOC (n + n);
952 if (usize >= n + n) /* truncate the dividend */
953 MPN_COPY(ap, up + usize - (n + n), n + n);
954 else /* zero-pad the dividend */
955 {
956 MPN_COPY(ap + (n + n) - usize, up, usize);
957 MPN_ZERO(ap, (n + n) - usize);
958 }
959
960 if (vsize >= n) /* truncate the divisor */
961 bp = vp + vsize - n;
962 else /* zero-pad the divisor */
963 {
964 bp = MPFR_TMP_LIMBS_ALLOC (n);
965 MPN_COPY(bp + n - vsize, vp, vsize);
966 MPN_ZERO(bp, n - vsize);
967 }
968
969 qp = MPFR_TMP_LIMBS_ALLOC (n);
970 /* since n = q0size + 1, we have n >= 2 here */
971 qh = mpfr_divhigh_n (qp, ap, bp, n);
972 MPFR_ASSERTD (qh == 0 || qh == 1);
973 /* in all cases, the error is at most (2n+2) ulps on qh*B^n+{qp,n},
974 cf algorithms.tex */
975
976 p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (2 * n + 2);
977 /* If rnd=RNDN, we need to be able to round with a directed rounding
978 and one more bit. */
979 if (qh == 1)
980 {
981 mpn_rshift (qp, qp, n, 1);
982 qp[n - 1] |= MPFR_LIMB_HIGHBIT;
983 }
984 if (MPFR_LIKELY (mpfr_round_p (qp, n, p,
985 MPFR_PREC(q) + (rnd_mode == MPFR_RNDN))))
986 {
987 /* we can round correctly whatever the rounding mode */
988 MPN_COPY (q0p, qp + 1, q0size);
989 q0p[0] &= ~MPFR_LIMB_MASK(sh); /* put to zero low sh bits */
990
991 if (rnd_mode == MPFR_RNDN) /* round to nearest */
992 {
993 /* we know we can round, thus we are never in the even rule case:
994 if the round bit is 0, we truncate
995 if the round bit is 1, we add 1 */
996 if (sh > 0)
997 round_bit = (qp[1] >> (sh - 1)) & 1;
998 else
999 round_bit = qp[0] >> (GMP_NUMB_BITS - 1);
1000 /* TODO: add value coverage tests in tdiv to check that
1001 we reach this part with different values of qh and
1002 round_bit (4 cases). */
1003 if (round_bit == 0)
1004 {
1005 inex = -1;
1006 goto truncate;
1007 }
1008 else /* round_bit = 1 */
1009 goto add_one_ulp;
1010 }
1011 else if (! like_rndz) /* round away */
1012 goto add_one_ulp;
1013 else /* round to zero: nothing to do */
1014 {
1015 inex = -1;
1016 goto truncate;
1017 }
1018 }
1019 }
1020
1021 /**************************************************************************
1022 * *
1023 * Mulders' short division failed: we revert to integer division *
1024 * *
1025 **************************************************************************/
1026
1027 if (MPFR_UNLIKELY(rnd_mode == MPFR_RNDN && sh == 0))
1028 { /* we compute the quotient with one more limb, in order to get
1029 the round bit in the quotient, and the remainder only contains
1030 sticky bits */
1031 qsize = q0size + 1;
1032 /* need to allocate memory for the quotient */
1033 qp = MPFR_TMP_LIMBS_ALLOC (qsize);
1034 }
1035 else
1036 {
1037 qsize = q0size;
1038 qp = q0p; /* directly put the quotient in the destination */
1039 }
1040 qqsize = qsize + qsize;
1041
1042 /* prepare the dividend */
1043 ap = MPFR_TMP_LIMBS_ALLOC (qqsize);
1044 if (MPFR_LIKELY(qqsize > usize)) /* use the full dividend */
1045 {
1046 k = qqsize - usize; /* k > 0 */
1047 MPN_ZERO(ap, k);
1048 if (extra_bit)
1049 ap[k - 1] = mpn_rshift (ap + k, up, usize, 1);
1050 else
1051 MPN_COPY(ap + k, up, usize);
1052 }
1053 else /* truncate the dividend */
1054 {
1055 k = usize - qqsize;
1056 if (extra_bit)
1057 sticky_u = mpn_rshift (ap, up + k, qqsize, 1);
1058 else
1059 MPN_COPY(ap, up + k, qqsize);
1060 sticky_u = sticky_u || mpfr_mpn_cmpzero (up, k);
1061 }
1062 low_u = sticky_u;
1063
1064 /* now sticky_u is non-zero iff the truncated part of u is non-zero */
1065
1066 /* prepare the divisor */
1067 if (MPFR_LIKELY(vsize >= qsize))
1068 {
1069 k = vsize - qsize;
1070 if (qp != vp)
1071 bp = vp + k; /* avoid copying the divisor */
1072 else /* need to copy, since mpn_divrem doesn't allow overlap
1073 between quotient and divisor, necessarily k = 0
1074 since quotient and divisor are the same mpfr variable */
1075 {
1076 bp = MPFR_TMP_LIMBS_ALLOC (qsize);
1077 MPN_COPY(bp, vp, vsize);
1078 }
1079 sticky_v = sticky_v || mpfr_mpn_cmpzero (vp, k);
1080 k = 0;
1081 }
1082 else /* vsize < qsize: small divisor case */
1083 {
1084 bp = vp;
1085 k = qsize - vsize;
1086 }
1087
1088 /**************************************************************************
1089 * *
1090 * Here we perform the real division of {ap+k,qqsize-k} by {bp,qsize-k} *
1091 * *
1092 **************************************************************************/
1093
1094 /* In the general case (usize > 2*qsize and vsize > qsize), we have:
1095 ______________________________________
1096 | | | u1 has 2*qsize limbs
1097 | u1 | u0 | u0 has usize-2*qsize limbs
1098 |__________________________|___________|
1099
1100 ____________________
1101 | | | v1 has qsize limbs
1102 | v1 | v0 | v0 has vsize-qsize limbs
1103 |___________|________|
1104
1105 We divide u1 by v1, with quotient in qh + {qp, qsize} and
1106 remainder (denoted r below) stored in place of the low qsize limbs of u1.
1107 */
1108
1109 /* if Mulders' short division failed, we revert to division with remainder */
1110 qh = mpn_divrem (qp, 0, ap + k, qqsize - k, bp, qsize - k);
1111 /* let u1 be the upper part of u, and v1 the upper part of v (with sticky_u
1112 and sticky_v representing the lower parts), then the quotient of u1 by v1
1113 is now in {qp, qsize}, with possible carry in qh, and the remainder in
1114 {ap + k, qsize - k} */
1115 /* warning: qh may be 1 if u1 == v1, but u < v */
1116
1117 k = qsize;
1118 sticky_u = sticky_u || mpfr_mpn_cmpzero (ap, k);
1119
1120 sticky = sticky_u | sticky_v;
1121
1122 /* now sticky is non-zero iff one of the following holds:
1123 (a) the truncated part of u is non-zero
1124 (b) the truncated part of v is non-zero
1125 (c) the remainder from division is non-zero */
1126
1127 if (MPFR_LIKELY(qsize == q0size))
1128 {
1129 sticky3 = qp[0] & MPFR_LIMB_MASK(sh); /* does nothing when sh=0 */
1130 sh2 = sh;
1131 }
1132 else /* qsize = q0size + 1: only happens when rnd_mode=MPFR_RNDN and sh=0 */
1133 {
1134 MPN_COPY (q0p, qp + 1, q0size);
1135 sticky3 = qp[0];
1136 sh2 = GMP_NUMB_BITS;
1137 }
1138 qp[0] ^= sticky3;
1139 /* sticky3 contains the truncated bits from the quotient,
1140 including the round bit, and 1 <= sh2 <= GMP_NUMB_BITS
1141 is the number of bits in sticky3 */
1142 inex = (sticky != MPFR_LIMB_ZERO) || (sticky3 != MPFR_LIMB_ZERO);
1143
1144 /* to round, we distinguish two cases:
1145 (a) vsize <= qsize: we used the full divisor
1146 (b) vsize > qsize: the divisor was truncated
1147 */
1148
1149 if (MPFR_LIKELY(vsize <= qsize)) /* use the full divisor */
1150 {
1151 if (MPFR_LIKELY(rnd_mode == MPFR_RNDN))
1152 {
1153 round_bit = sticky3 & (MPFR_LIMB_ONE << (sh2 - 1));
1154 sticky = (sticky3 ^ round_bit) | sticky_u;
1155 }
1156 else if (like_rndz || inex == 0)
1157 sticky = (inex == 0) ? MPFR_LIMB_ZERO : MPFR_LIMB_ONE;
1158 else /* round away from zero */
1159 sticky = MPFR_LIMB_ONE;
1160 goto case_1;
1161 }
1162 else /* vsize > qsize: need to truncate the divisor */
1163 {
1164 if (inex == 0)
1165 goto truncate;
1166 else
1167 {
1168 /* We know the estimated quotient is an upper bound of the exact
1169 quotient (with rounding toward zero), with a difference of at
1170 most 2 in qp[0].
1171 Thus we can round except when sticky3 is 000...000 or 000...001
1172 for directed rounding, and 100...000 or 100...001 for rounding
1173 to nearest. (For rounding to nearest, we cannot determine the
1174 inexact flag for 000...000 or 000...001.)
1175 */
1176 mp_limb_t sticky3orig = sticky3;
1177 if (rnd_mode == MPFR_RNDN)
1178 {
1179 round_bit = sticky3 & (MPFR_LIMB_ONE << (sh2 - 1));
1180 sticky3 = sticky3 ^ round_bit;
1181 }
1182 if (sticky3 != MPFR_LIMB_ZERO && sticky3 != MPFR_LIMB_ONE)
1183 {
1184 sticky = sticky3;
1185 goto case_1;
1186 }
1187 else /* hard case: we have to compare q1 * v0 and r + u0,
1188 where q1 * v0 has qsize + (vsize-qsize) = vsize limbs, and
1189 r + u0 has qsize + (usize-2*qsize) = usize-qsize limbs */
1190 {
1191 mp_size_t l;
1192 mpfr_limb_ptr sp;
1193 int cmp_s_r;
1194 mp_limb_t qh2;
1195
1196 sp = MPFR_TMP_LIMBS_ALLOC (vsize);
1197 k = vsize - qsize;
1198 /* sp <- {qp, qsize} * {vp, vsize-qsize} */
1199 qp[0] ^= sticky3orig; /* restore original quotient */
1200 if (qsize >= k)
1201 mpn_mul (sp, qp, qsize, vp, k);
1202 else
1203 mpn_mul (sp, vp, k, qp, qsize);
1204 if (qh)
1205 qh2 = mpn_add_n (sp + qsize, sp + qsize, vp, k);
1206 else
1207 qh2 = MPFR_LIMB_ZERO;
1208 qp[0] ^= sticky3orig; /* restore truncated quotient */
1209
1210 /* compare qh2 + {sp, k + qsize} to {ap, qsize} + u0 */
1211 cmp_s_r = (qh2 != 0) ? 1 : mpn_cmp (sp + k, ap, qsize);
1212 if (cmp_s_r == 0) /* compare {sp, k} and u0 */
1213 {
1214 cmp_s_r = (usize >= qqsize) ?
1215 mpfr_mpn_cmp_aux (sp, k, up, usize - qqsize, extra_bit) :
1216 mpfr_mpn_cmpzero (sp, k);
1217 }
1218 /* now cmp_s_r > 0 if {sp, vsize} > {ap, qsize} + u0
1219 cmp_s_r = 0 if {sp, vsize} = {ap, qsize} + u0
1220 cmp_s_r < 0 if {sp, vsize} < {ap, qsize} + u0 */
1221 if (cmp_s_r <= 0) /* quotient is in [q1, q1+1) */
1222 {
1223 sticky = (cmp_s_r == 0) ? sticky3 : MPFR_LIMB_ONE;
1224 goto case_1;
1225 }
1226 else /* cmp_s_r > 0, quotient is < q1: to determine if it is
1227 in [q1-2,q1-1] or in [q1-1,q1], we need to subtract
1228 the low part u0 of the dividend from q*v0 */
1229 {
1230 mp_limb_t cy = MPFR_LIMB_ZERO;
1231
1232 /* subtract u0 >> extra_bit if non-zero */
1233 if (qh2 != 0) /* whatever the value of {up, m + k}, it
1234 will be smaller than qh2 + {sp, k} */
1235 cmp_s_r = 1;
1236 else
1237 {
1238 if (low_u != MPFR_LIMB_ZERO)
1239 {
1240 mp_size_t m;
1241 l = usize - qqsize; /* number of limbs in u0 */
1242 m = (l > k) ? l - k : 0;
1243 cy = (extra_bit) ?
1244 (up[m] & MPFR_LIMB_ONE) : MPFR_LIMB_ZERO;
1245 if (l >= k) /* u0 has at least as many limbs than s:
1246 first look if {up, m} is not zero,
1247 and compare {sp, k} and {up + m, k} */
1248 {
1249 cy = cy || mpfr_mpn_cmpzero (up, m);
1250 low_u = cy;
1251 cy = mpfr_mpn_sub_aux (sp, up + m, k,
1252 cy, extra_bit);
1253 }
1254 else /* l < k: s has more limbs than u0 */
1255 {
1256 low_u = MPFR_LIMB_ZERO;
1257 if (cy != MPFR_LIMB_ZERO)
1258 cy = mpn_sub_1 (sp + k - l - 1, sp + k - l - 1,
1259 1, MPFR_LIMB_HIGHBIT);
1260 cy = mpfr_mpn_sub_aux (sp + k - l, up, l,
1261 cy, extra_bit);
1262 }
1263 }
1264 MPFR_ASSERTD (cy <= 1);
1265 cy = mpn_sub_1 (sp + k, sp + k, qsize, cy);
1266 /* subtract r */
1267 cy += mpn_sub_n (sp + k, sp + k, ap, qsize);
1268 MPFR_ASSERTD (cy <= 1);
1269 /* now compare {sp, ssize} to v */
1270 cmp_s_r = mpn_cmp (sp, vp, vsize);
1271 if (cmp_s_r == 0 && low_u != MPFR_LIMB_ZERO)
1272 cmp_s_r = 1; /* since in fact we subtracted
1273 less than 1 */
1274 }
1275 if (cmp_s_r <= 0) /* q1-1 <= u/v < q1 */
1276 {
1277 if (sticky3 == MPFR_LIMB_ONE)
1278 { /* q1-1 is either representable (directed rounding),
1279 or the middle of two numbers (nearest) */
1280 sticky = (cmp_s_r) ? MPFR_LIMB_ONE : MPFR_LIMB_ZERO;
1281 goto case_1;
1282 }
1283 /* now necessarily sticky3=0 */
1284 else if (round_bit == MPFR_LIMB_ZERO)
1285 { /* round_bit=0, sticky3=0: q1-1 is exact only
1286 when sh=0 */
1287 inex = (cmp_s_r || sh) ? -1 : 0;
1288 if (rnd_mode == MPFR_RNDN ||
1289 (! like_rndz && inex != 0))
1290 {
1291 inex = 1;
1292 goto truncate_check_qh;
1293 }
1294 else /* round down */
1295 goto sub_one_ulp;
1296 }
1297 else /* sticky3=0, round_bit=1 ==> rounding to nearest */
1298 {
1299 inex = cmp_s_r;
1300 goto truncate;
1301 }
1302 }
1303 else /* q1-2 < u/v < q1-1 */
1304 {
1305 /* if rnd=MPFR_RNDN, the result is q1 when
1306 q1-2 >= q1-2^(sh-1), i.e. sh >= 2,
1307 otherwise (sh=1) it is q1-2 */
1308 if (rnd_mode == MPFR_RNDN) /* sh > 0 */
1309 {
1310 /* Case sh=1: sb=0 always, and q1-rb is exactly
1311 representable, like q1-rb-2.
1312 rb action
1313 0 subtract two ulps, inex=-1
1314 1 truncate, inex=1
1315
1316 Case sh>1: one ulp is 2^(sh-1) >= 2
1317 rb sb action
1318 0 0 truncate, inex=1
1319 0 1 truncate, inex=1
1320 1 x truncate, inex=-1
1321 */
1322 if (sh == 1)
1323 {
1324 if (round_bit == MPFR_LIMB_ZERO)
1325 {
1326 inex = -1;
1327 sh = 0;
1328 goto sub_two_ulp;
1329 }
1330 else
1331 {
1332 inex = 1;
1333 goto truncate_check_qh;
1334 }
1335 }
1336 else /* sh > 1 */
1337 {
1338 inex = (round_bit == MPFR_LIMB_ZERO) ? 1 : -1;
1339 goto truncate_check_qh;
1340 }
1341 }
1342 else if (like_rndz)
1343 {
1344 /* the result is down(q1-2), i.e. subtract one
1345 ulp if sh > 0, and two ulps if sh=0 */
1346 inex = -1;
1347 if (sh > 0)
1348 goto sub_one_ulp;
1349 else
1350 goto sub_two_ulp;
1351 }
1352 /* if round away from zero, the result is up(q1-1),
1353 which is q1 unless sh = 0, where it is q1-1 */
1354 else
1355 {
1356 inex = 1;
1357 if (sh > 0)
1358 goto truncate_check_qh;
1359 else /* sh = 0 */
1360 goto sub_one_ulp;
1361 }
1362 }
1363 }
1364 }
1365 }
1366 }
1367
1368 case_1: /* quotient is in [q1, q1+1),
1369 round_bit is the round_bit (0 for directed rounding),
1370 sticky the sticky bit */
1371 if (like_rndz || (round_bit == MPFR_LIMB_ZERO && sticky == MPFR_LIMB_ZERO))
1372 {
1373 inex = round_bit == MPFR_LIMB_ZERO && sticky == MPFR_LIMB_ZERO ? 0 : -1;
1374 goto truncate;
1375 }
1376 else if (rnd_mode == MPFR_RNDN) /* sticky <> 0 or round <> 0 */
1377 {
1378 if (round_bit == MPFR_LIMB_ZERO) /* necessarily sticky <> 0 */
1379 {
1380 inex = -1;
1381 goto truncate;
1382 }
1383 /* round_bit = 1 */
1384 else if (sticky != MPFR_LIMB_ZERO)
1385 goto add_one_ulp; /* inex=1 */
1386 else /* round_bit=1, sticky=0 */
1387 goto even_rule;
1388 }
1389 else /* round away from zero, sticky <> 0 */
1390 goto add_one_ulp; /* with inex=1 */
1391
1392 sub_two_ulp:
1393 /* we cannot subtract MPFR_LIMB_MPFR_LIMB_ONE << (sh+1) since this is
1394 undefined for sh = GMP_NUMB_BITS */
1395 qh -= mpn_sub_1 (q0p, q0p, q0size, MPFR_LIMB_ONE << sh);
1396 /* go through */
1397
1398 sub_one_ulp:
1399 qh -= mpn_sub_1 (q0p, q0p, q0size, MPFR_LIMB_ONE << sh);
1400 /* go through truncate_check_qh */
1401
1402 truncate_check_qh:
1403 if (qh)
1404 {
1405 if (MPFR_LIKELY (qexp < MPFR_EXP_MAX))
1406 qexp ++;
1407 /* else qexp is now incorrect, but one will still get an overflow */
1408 q0p[q0size - 1] = MPFR_LIMB_HIGHBIT;
1409 }
1410 goto truncate;
1411
1412 even_rule: /* has to set inex */
1413 inex = (q0p[0] & (MPFR_LIMB_ONE << sh)) ? 1 : -1;
1414 if (inex < 0)
1415 goto truncate;
1416 /* else go through add_one_ulp */
1417
1418 add_one_ulp:
1419 inex = 1; /* always here */
1420 if (mpn_add_1 (q0p, q0p, q0size, MPFR_LIMB_ONE << sh))
1421 {
1422 if (MPFR_LIKELY (qexp < MPFR_EXP_MAX))
1423 qexp ++;
1424 /* else qexp is now incorrect, but one will still get an overflow */
1425 q0p[q0size - 1] = MPFR_LIMB_HIGHBIT;
1426 }
1427
1428 truncate: /* inex already set */
1429
1430 MPFR_TMP_FREE(marker);
1431
1432 /* check for underflow/overflow */
1433 if (MPFR_UNLIKELY(qexp > __gmpfr_emax))
1434 return mpfr_overflow (q, rnd_mode, sign_quotient);
1435 else if (MPFR_UNLIKELY(qexp < __gmpfr_emin))
1436 {
1437 if (rnd_mode == MPFR_RNDN && ((qexp < __gmpfr_emin - 1) ||
1438 (inex >= 0 && mpfr_powerof2_raw (q))))
1439 rnd_mode = MPFR_RNDZ;
1440 return mpfr_underflow (q, rnd_mode, sign_quotient);
1441 }
1442 MPFR_SET_EXP(q, qexp);
1443
1444 inex *= sign_quotient;
1445 MPFR_RET (inex);
1446 }
1447