xref: /netbsd-src/external/lgpl3/mpfr/dist/src/div.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd)
1 /* mpfr_div -- divide two floating-point numbers
2 
3 Copyright 1999, 2001-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 /* References:
24    [1] Short Division of Long Integers, David Harvey and Paul Zimmermann,
25        Proceedings of the 20th Symposium on Computer Arithmetic (ARITH-20),
26        July 25-27, 2011, pages 7-14.
27    [2] Improved Division by Invariant Integers, Niels Möller and Torbjörn Granlund,
28        IEEE Transactions on Computers, volume 60, number 2, pages 165-175, 2011.
29 */
30 
31 #define MPFR_NEED_LONGLONG_H
32 #include "mpfr-impl.h"
33 
34 #if !defined(MPFR_GENERIC_ABI)
35 
36 #if GMP_NUMB_BITS == 64
37 
38 #include "invert_limb.h"
39 
40 /* Given u = u1*B+u0 < v = v1*B+v0 with v normalized (high bit of v1 set),
41    put in q = Q1*B+Q0 an approximation of floor(u*B^2/v), with:
42    B = 2^GMP_NUMB_BITS and q <= floor(u*B^2/v) <= q + 21.
43    Note: this function requires __gmpfr_invert_limb_approx (from invert_limb.h)
44    which is only provided so far for 64-bit limb.
45    Note: __gmpfr_invert_limb_approx can be replaced by __gmpfr_invert_limb,
46    in that case the bound 21 reduces to 16. */
47 static void
mpfr_div2_approx(mpfr_limb_ptr Q1,mpfr_limb_ptr Q0,mp_limb_t u1,mp_limb_t u0,mp_limb_t v1,mp_limb_t v0)48 mpfr_div2_approx (mpfr_limb_ptr Q1, mpfr_limb_ptr Q0,
49                   mp_limb_t u1, mp_limb_t u0,
50                   mp_limb_t v1, mp_limb_t v0)
51 {
52   mp_limb_t inv, q1, q0, r1, r0, cy, xx, yy;
53 
54   /* first compute an approximation of q1, using a lower approximation of
55      B^2/(v1+1) - B */
56   if (MPFR_UNLIKELY(v1 == MPFR_LIMB_MAX))
57     inv = MPFR_LIMB_ZERO;
58   else
59     __gmpfr_invert_limb_approx (inv, v1 + 1);
60   /* now inv <= B^2/(v1+1) - B */
61   umul_ppmm (q1, q0, u1, inv);
62   q1 += u1;
63   /* now q1 <= u1*B/(v1+1) < (u1*B+u0)*B/(v1*B+v0) */
64 
65   /* compute q1*(v1*B+v0) into r1:r0:yy and subtract from u1:u0:0 */
66   umul_ppmm (r1, r0, q1, v1);
67   umul_ppmm (xx, yy, q1, v0);
68 
69   ADD_LIMB (r0, xx, cy);
70   r1 += cy;
71 
72   /* we ignore yy below, but first increment r0, to ensure we get a lower
73      approximation of the remainder */
74   r0 += yy != 0;
75   r1 += r0 == 0 && yy != 0;
76   r0 = u0 - r0;
77   r1 = u1 - r1 - (r0 > u0);
78 
79   /* r1:r0 should be non-negative */
80   MPFR_ASSERTD((r1 & MPFR_LIMB_HIGHBIT) == 0);
81 
82   /* the second quotient limb is approximated by (r1*B^2+r0*B) / v1,
83      and since (B+inv)/B approximates B/v1, this is in turn approximated
84      by (r1*B+r0)*(B+inv)/B = r1*B*r1*inv+r0+(r0*inv/B) */
85 
86   q0 = r0;
87   q1 += r1;
88   /* add floor(r0*inv/B) to q0 */
89   umul_ppmm (xx, yy, r0, inv);
90   ADD_LIMB (q0, xx, cy);
91   q1 += cy;
92   MPFR_ASSERTD (r1 <= 4);
93   /* TODO: use value coverage on r1 to check that the 5 cases are tested. */
94   while (r1) /* the number of loops is at most 4 */
95     {
96       /* add inv to q0 */
97       ADD_LIMB (q0, inv, cy);
98       q1 += cy;
99       r1 --;
100     }
101 
102   *Q1 = q1;
103   *Q0 = q0;
104 }
105 
106 #endif /* GMP_NUMB_BITS == 64 */
107 
108 /* Special code for PREC(q) = PREC(u) = PREC(v) = p < GMP_NUMB_BITS */
109 static int
mpfr_div_1(mpfr_ptr q,mpfr_srcptr u,mpfr_srcptr v,mpfr_rnd_t rnd_mode)110 mpfr_div_1 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
111 {
112   mpfr_prec_t p = MPFR_GET_PREC(q);
113   mpfr_limb_ptr qp = MPFR_MANT(q);
114   mpfr_exp_t qx = MPFR_GET_EXP(u) - MPFR_GET_EXP(v);
115   mpfr_prec_t sh = GMP_NUMB_BITS - p;
116   mp_limb_t u0 = MPFR_MANT(u)[0];
117   mp_limb_t v0 = MPFR_MANT(v)[0];
118   mp_limb_t q0, rb, sb, mask = MPFR_LIMB_MASK(sh);
119   int extra;
120 
121   if ((extra = (u0 >= v0)))
122     u0 -= v0;
123 
124 #if GMP_NUMB_BITS == 64 /* __gmpfr_invert_limb_approx only exists for 64-bit */
125   /* First try with an approximate quotient.
126      FIXME: for p<=62 we have sh-1<2 and will never be able to round correctly.
127      Even for p=61 we have sh-1=2 and we can round correctly only when the two
128      last bist of q0 are 01, which happens with probability 25% only. */
129   {
130     mp_limb_t inv;
131     __gmpfr_invert_limb_approx (inv, v0);
132     umul_ppmm (rb, sb, u0, inv);
133   }
134   rb += u0;
135   q0 = rb >> extra;
136   /* rb does not exceed the true quotient floor(u0*2^GMP_NUMB_BITS/v0),
137      with error at most 2, which means the rational quotient q satisfies
138      rb <= q < rb + 3. We can round correctly except when the last sh-1 bits
139      of q0 are 000..000 or 111..111 or 111..110. */
140   if (MPFR_LIKELY(((q0 + 2) & (mask >> 1)) > 2))
141     {
142       rb = q0 & (MPFR_LIMB_ONE << (sh - 1));
143       sb = 1; /* result cannot be exact in this case */
144     }
145   else /* the true quotient is rb, rb+1 or rb+2 */
146     {
147       mp_limb_t h, l;
148       q0 = rb;
149       umul_ppmm (h, l, q0, v0);
150       MPFR_ASSERTD(h < u0 || (h == u0 && l == MPFR_LIMB_ZERO));
151       /* subtract {h,l} from {u0,0} */
152       sub_ddmmss (h, l, u0, 0, h, l);
153       /* the remainder {h, l} should be < v0 */
154       /* This while loop is executed at most two times, but does not seem
155          slower than two consecutive identical if-statements. */
156       while (h || l >= v0)
157         {
158           q0 ++;
159           h -= (l < v0);
160           l -= v0;
161         }
162       MPFR_ASSERTD(h == 0 && l < v0);
163       sb = l | (q0 & extra);
164       q0 >>= extra;
165       rb = q0 & (MPFR_LIMB_ONE << (sh - 1));
166       sb |= q0 & (mask >> 1);
167     }
168 #else
169   udiv_qrnnd (q0, sb, u0, 0, v0);
170   sb |= q0 & extra;
171   q0 >>= extra;
172   rb = q0 & (MPFR_LIMB_ONE << (sh - 1));
173   sb |= q0 & (mask >> 1);
174 #endif
175 
176   qp[0] = (MPFR_LIMB_HIGHBIT | q0) & ~mask;
177   qx += extra;
178   MPFR_SIGN(q) = MPFR_MULT_SIGN (MPFR_SIGN (u), MPFR_SIGN (v));
179 
180   /* rounding */
181   if (MPFR_UNLIKELY(qx > __gmpfr_emax))
182     return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q));
183 
184   /* Warning: underflow should be checked *after* rounding, thus when rounding
185      away and when q > 0.111...111*2^(emin-1), or when rounding to nearest and
186      q >= 0.111...111[1]*2^(emin-1), there is no underflow. */
187   if (MPFR_UNLIKELY(qx < __gmpfr_emin))
188     {
189       /* Note: the case 0.111...111*2^(emin-1) < q < 2^(emin-1) is not possible
190          here since (up to exponent) this would imply 1 - 2^(-p) < u/v < 1,
191          thus v - 2^(-p)*v < u < v, and since we can assume 1/2 <= v < 1, it
192          would imply v - 2^(-p) = v - ulp(v) < u < v, which has no solution. */
193 
194       /* For RNDN, mpfr_underflow always rounds away, thus for |q|<=2^(emin-2)
195          we have to change to RNDZ. This corresponds to:
196          (a) either qx < emin - 1
197          (b) or qx = emin - 1 and qp[0] = 1000....000 and rb = sb = 0.
198          Note: in case (b), it suffices to check whether sb = 0, since rb = 1
199          and sb = 0 is not possible (the exact quotient would have p+1 bits,
200          thus u would need at least p+1 bits). */
201       if (rnd_mode == MPFR_RNDN &&
202           (qx < __gmpfr_emin - 1 || (qp[0] == MPFR_LIMB_HIGHBIT && sb == 0)))
203         rnd_mode = MPFR_RNDZ;
204       return mpfr_underflow (q, rnd_mode, MPFR_SIGN(q));
205     }
206 
207   MPFR_EXP (q) = qx; /* Don't use MPFR_SET_EXP since qx might be < __gmpfr_emin
208                         in the cases "goto rounding" above. */
209   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
210     {
211       MPFR_ASSERTD(qx >= __gmpfr_emin);
212       MPFR_RET (0);
213     }
214   else if (rnd_mode == MPFR_RNDN)
215     {
216       /* It is not possible to have rb <> 0 and sb = 0 here, since it would
217          mean a n-bit by n-bit division gives an exact (n+1)-bit number.
218          And since the case rb = sb = 0 was already dealt with, we cannot
219          have sb = 0. Thus we cannot be in the middle of two numbers. */
220       MPFR_ASSERTD(sb != 0);
221       if (rb == 0)
222         goto truncate;
223       else
224         goto add_one_ulp;
225     }
226   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(q)))
227     {
228     truncate:
229       MPFR_ASSERTD(qx >= __gmpfr_emin);
230       MPFR_RET(-MPFR_SIGN(q));
231     }
232   else /* round away from zero */
233     {
234     add_one_ulp:
235       qp[0] += MPFR_LIMB_ONE << sh;
236       MPFR_ASSERTD(qp[0] != 0);
237       /* It is not possible to have an overflow in the addition above.
238          Proof: if p is the precision of the inputs, it would mean we have two
239          integers n and d with 2^(p-1) <= n, d < 2^p, such that the binary
240          expansion of n/d starts with p '1', and has at least one '1' later.
241          We distinguish two cases:
242          (1) if n/d < 1, it would mean 1-2^(-p) < n/d < 1
243          (2) if n/d >= 1, it would mean 2-2^(1-p) < n/d < 1
244          In case (1), multiplying by d we get 1-d/2^p < n < d,
245          which has no integer solution since d/2^p < 1.
246          In case (2), multiplying by d we get 2d-2d/2^p < n < 2d:
247          (2a) if d=2^(p-1), we get 2^p-1 < n < 2^p which has no solution;
248               if d>=2^(p-1)+1, then 2d-2d/2^p >= 2^p+2-2 = 2^p, thus there is
249               solution n < 2^p either. */
250       MPFR_RET(MPFR_SIGN(q));
251     }
252 }
253 
254 /* Special code for PREC(q) = GMP_NUMB_BITS,
255    with PREC(u), PREC(v) <= GMP_NUMB_BITS. */
256 static int
mpfr_div_1n(mpfr_ptr q,mpfr_srcptr u,mpfr_srcptr v,mpfr_rnd_t rnd_mode)257 mpfr_div_1n (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
258 {
259   mpfr_limb_ptr qp = MPFR_MANT(q);
260   mpfr_exp_t qx = MPFR_GET_EXP(u) - MPFR_GET_EXP(v);
261   mp_limb_t u0 = MPFR_MANT(u)[0];
262   mp_limb_t v0 = MPFR_MANT(v)[0];
263   mp_limb_t q0, rb, sb, l;
264   int extra;
265 
266   MPFR_ASSERTD(MPFR_PREC(q) == GMP_NUMB_BITS);
267   MPFR_ASSERTD(MPFR_PREC(u) <= GMP_NUMB_BITS);
268   MPFR_ASSERTD(MPFR_PREC(v) <= GMP_NUMB_BITS);
269 
270   if ((extra = (u0 >= v0)))
271     u0 -= v0;
272 
273 #if GMP_NUMB_BITS == 64 /* __gmpfr_invert_limb_approx only exists for 64-bit */
274   {
275     mp_limb_t inv, h;
276 
277     /* First compute an approximate quotient. */
278     __gmpfr_invert_limb_approx (inv, v0);
279     umul_ppmm (rb, sb, u0, inv);
280     q0 = u0 + rb;
281     /* rb does not exceed the true quotient floor(u0*2^GMP_NUMB_BITS/v0),
282        with error at most 2, which means the rational quotient q satisfies
283        rb <= q < rb + 3, thus the true quotient is rb, rb+1 or rb+2 */
284     umul_ppmm (h, l, q0, v0);
285     MPFR_ASSERTD(h < u0 || (h == u0 && l == MPFR_LIMB_ZERO));
286     /* subtract {h,l} from {u0,0} */
287     sub_ddmmss (h, l, u0, 0, h, l);
288     /* the remainder {h, l} should be < v0 */
289     /* This while loop is executed at most two times, but does not seem
290        slower than two consecutive identical if-statements. */
291     while (h || l >= v0)
292       {
293         q0 ++;
294         h -= (l < v0);
295         l -= v0;
296       }
297     MPFR_ASSERTD(h == 0 && l < v0);
298   }
299 #else
300   udiv_qrnnd (q0, l, u0, 0, v0);
301 #endif
302 
303   /* now (u0 - extra*v0) * 2^GMP_NUMB_BITS = q0*v0 + l with 0 <= l < v0 */
304 
305   /* If extra=0, the quotient is q0, the round bit is 1 if l >= v0/2,
306      and sb are the remaining bits from l.
307      If extra=1, the quotient is MPFR_LIMB_HIGHBIT + (q0 >> 1), the round bit
308      is the least significant bit of q0, and sb is l. */
309 
310   if (extra == 0)
311     {
312       qp[0] = q0;
313       /* If "l + l < l", then there is a carry in l + l, thus 2*l > v0.
314          Otherwise if there is no carry, we check whether 2*l >= v0. */
315       rb = (l + l < l) || (l + l >= v0);
316       sb = (rb) ? l + l - v0 : l;
317     }
318   else
319     {
320       qp[0] = MPFR_LIMB_HIGHBIT | (q0 >> 1);
321       rb = q0 & MPFR_LIMB_ONE;
322       sb = l;
323       qx ++;
324     }
325 
326   MPFR_SIGN(q) = MPFR_MULT_SIGN (MPFR_SIGN (u), MPFR_SIGN (v));
327 
328   /* rounding */
329   if (MPFR_UNLIKELY(qx > __gmpfr_emax))
330     return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q));
331 
332   /* Warning: underflow should be checked *after* rounding, thus when rounding
333      away and when q > 0.111...111*2^(emin-1), or when rounding to nearest and
334      q >= 0.111...111[1]*2^(emin-1), there is no underflow. */
335   if (MPFR_UNLIKELY(qx < __gmpfr_emin))
336     {
337       /* Note: the case 0.111...111*2^(emin-1) < q < 2^(emin-1) is not possible
338          here since (up to exponent) this would imply 1 - 2^(-p) < u/v < 1,
339          thus v - 2^(-p)*v < u < v, and since we can assume 1/2 <= v < 1, it
340          would imply v - 2^(-p) = v - ulp(v) < u < v, which has no solution. */
341 
342       /* For RNDN, mpfr_underflow always rounds away, thus for |q|<=2^(emin-2)
343          we have to change to RNDZ. This corresponds to:
344          (a) either qx < emin - 1
345          (b) or qx = emin - 1 and qp[0] = 1000....000 and rb = sb = 0.
346          Note: in case (b), it suffices to check whether sb = 0, since rb = 1
347          and sb = 0 is not possible (the exact quotient would have p+1 bits,
348          thus u would need at least p+1 bits). */
349       if (rnd_mode == MPFR_RNDN &&
350           (qx < __gmpfr_emin - 1 || (qp[0] == MPFR_LIMB_HIGHBIT && sb == 0)))
351         rnd_mode = MPFR_RNDZ;
352       return mpfr_underflow (q, rnd_mode, MPFR_SIGN(q));
353     }
354 
355   MPFR_EXP (q) = qx; /* Don't use MPFR_SET_EXP since qx might be < __gmpfr_emin
356                         in the cases "goto rounding" above. */
357   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
358     {
359       MPFR_ASSERTD(qx >= __gmpfr_emin);
360       MPFR_RET (0);
361     }
362   else if (rnd_mode == MPFR_RNDN)
363     {
364       /* It is not possible to have rb <> 0 and sb = 0 here, since it would
365          mean a n-bit by n-bit division gives an exact (n+1)-bit number.
366          And since the case rb = sb = 0 was already dealt with, we cannot
367          have sb = 0. Thus we cannot be in the middle of two numbers. */
368       MPFR_ASSERTD(sb != 0);
369       if (rb == 0)
370         goto truncate;
371       else
372         goto add_one_ulp;
373     }
374   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(q)))
375     {
376     truncate:
377       MPFR_ASSERTD(qx >= __gmpfr_emin);
378       MPFR_RET(-MPFR_SIGN(q));
379     }
380   else /* round away from zero */
381     {
382     add_one_ulp:
383       qp[0] += MPFR_LIMB_ONE;
384       /* there can be no overflow in the addition above,
385          see the analysis of mpfr_div_1 */
386       MPFR_ASSERTD(qp[0] != 0);
387       MPFR_RET(MPFR_SIGN(q));
388     }
389 }
390 
391 /* Special code for GMP_NUMB_BITS < PREC(q) < 2*GMP_NUMB_BITS and
392    PREC(u) = PREC(v) = PREC(q) */
393 static int
mpfr_div_2(mpfr_ptr q,mpfr_srcptr u,mpfr_srcptr v,mpfr_rnd_t rnd_mode)394 mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
395 {
396   mpfr_prec_t p = MPFR_GET_PREC(q);
397   mpfr_limb_ptr qp = MPFR_MANT(q);
398   mpfr_exp_t qx = MPFR_GET_EXP(u) - MPFR_GET_EXP(v);
399   mpfr_prec_t sh = 2*GMP_NUMB_BITS - p;
400   mp_limb_t h, rb, sb, mask = MPFR_LIMB_MASK(sh);
401   mp_limb_t v1 = MPFR_MANT(v)[1], v0 = MPFR_MANT(v)[0];
402   mp_limb_t q1, q0, r3, r2, r1, r0, l, t;
403   int extra;
404 
405   r3 = MPFR_MANT(u)[1];
406   r2 = MPFR_MANT(u)[0];
407   extra = r3 > v1 || (r3 == v1 && r2 >= v0);
408   if (extra)
409     sub_ddmmss (r3, r2, r3, r2, v1, v0);
410 
411   MPFR_ASSERTD(r3 < v1 || (r3 == v1 && r2 < v0));
412 
413 #if GMP_NUMB_BITS == 64
414   mpfr_div2_approx (&q1, &q0, r3, r2, v1, v0);
415   /* we know q1*B+q0 is smaller or equal to the exact quotient, with
416      difference at most 21 */
417   if (MPFR_LIKELY(((q0 + 21) & (mask >> 1)) > 21))
418     sb = 1; /* result is not exact when we can round with an approximation */
419   else
420     {
421       /* we know q1:q0 is a good-enough approximation, use it! */
422       mp_limb_t s0, s1, s2, h, l;
423 
424       /* Since we know the difference should be at most 21*(v1:v0) after the
425          subtraction below, thus at most 21*2^128, it suffices to compute the
426          lower 3 limbs of (q1:q0) * (v1:v0). */
427       umul_ppmm (s1, s0, q0, v0);
428       umul_ppmm (s2, l, q0, v1);
429       s1 += l;
430       s2 += (s1 < l);
431       umul_ppmm (h, l, q1, v0);
432       s1 += l;
433       s2 += h + (s1 < l);
434       s2 += q1 * v1;
435       /* Subtract s2:s1:s0 from r2:0:0, with result in s2:s1:s0. */
436       s2 = r2 - s2;
437       /* now negate s1:s0 */
438       s0 = -s0;
439       s1 = -s1 - (s0 != 0);
440       /* there is a borrow in s2 when s0 and s1 are not both zero */
441       s2 -= (s1 != 0 || s0 != 0);
442       while (s2 > 0 || (s1 > v1) || (s1 == v1 && s0 >= v0))
443         {
444           /* add 1 to q1:q0 */
445           q0 ++;
446           q1 += (q0 == 0);
447           /* subtract v1:v0 to s2:s1:s0 */
448           s2 -= (s1 < v1) || (s1 == v1 && s0 < v0);
449           sub_ddmmss (s1, s0, s1, s0, v1, v0);
450         }
451       sb = s1 | s0;
452     }
453   goto round_div2;
454 #endif
455 
456   /* now r3:r2 < v1:v0 */
457   if (MPFR_UNLIKELY(r3 == v1)) /* can occur in some rare cases */
458     {
459       /* This can only occur in case extra=0, since otherwise we would have
460          u_old >= u_new + v >= B^2/2 + B^2/2 = B^2. In this case we have
461          r3 = u1 and r2 = u0, thus the remainder u*B-q1*v is
462          v1*B^2+u0*B-(B-1)*(v1*B+v0) = (u0-v0+v1)*B+v0.
463          Warning: in this case q1 = B-1 can be too large, for example with
464          u = B^2/2 and v = B^2/2 + B - 1, then u*B-(B-1)*u = -1/2*B^2+2*B-1. */
465       MPFR_ASSERTD(extra == 0);
466       q1 = MPFR_LIMB_MAX;
467       r1 = v0;
468       t = v0 - r2; /* t > 0 since r3:r2 < v1:v0 */
469       r2 = v1 - t;
470       if (t > v1) /* q1 = B-1 is too large, we need q1 = B-2, which is ok
471                         since u*B - q1*v >= v1*B^2-(B-2)*(v1*B+B-1) =
472                         -B^2 + 2*B*v1 + 3*B - 2 >= 0 since v1>=B/2 and B>=2 */
473         {
474           q1 --;
475           /* add v to r2:r1 */
476           r1 += v0;
477           r2 += v1 + (r1 < v0);
478         }
479     }
480   else
481     {
482       /* divide r3:r2 by v1: requires r3 < v1 */
483       udiv_qrnnd (q1, r2, r3, r2, v1);
484       /* u-extra*v = q1 * v1 + r2 */
485 
486       /* now subtract q1*v0 to r2:0 */
487       umul_ppmm (h, l, q1, v0);
488       t = r2; /* save old value of r2 */
489       r1 = -l;
490       r2 -= h + (l != 0);
491       /* Note: h + (l != 0) < 2^GMP_NUMB_BITS. */
492 
493       /* we have r2:r1 = oldr2:0 - q1*v0 mod 2^(2*GMP_NUMB_BITS)
494          thus (u-extra*v)*B = q1 * v + r2:r1 mod 2^(2*GMP_NUMB_BITS) */
495 
496       /* this while loop should be run at most twice */
497       while (r2 > t) /* borrow when subtracting h + (l != 0), q1 too large */
498         {
499           q1 --;
500           /* add v1:v0 to r2:r1 */
501           t = r2;
502           r1 += v0;
503           r2 += v1 + (r1 < v0);
504           /* note: since 2^(GMP_NUMB_BITS-1) <= v1 + (r1 < v0)
505              <= 2^GMP_NUMB_BITS, it suffices to check if r2 <= t to see
506              if there was a carry or not. */
507         }
508     }
509 
510   /* now (u-extra*v)*B = q1 * v + r2:r1 with 0 <= r2:r1 < v */
511 
512   MPFR_ASSERTD(r2 < v1 || (r2 == v1 && r1 < v0));
513 
514   if (MPFR_UNLIKELY(r2 == v1))
515     {
516       q0 = MPFR_LIMB_MAX;
517       /* r2:r1:0 - q0*(v1:v0) = v1:r1:0 - (B-1)*(v1:v0)
518          = r1:0 - v0:0 + v1:v0 */
519       r0 = v0;
520       t = v0 - r1; /* t > 0 since r2:r1 < v1:v0 */
521       r1 = v1 - t;
522       if (t > v1)
523         {
524           q0 --;
525           /* add v to r1:r0 */
526           r0 += v0;
527           r1 += v1 + (r0 < v0);
528         }
529     }
530   else
531     {
532       /* divide r2:r1 by v1: requires r2 < v1 */
533       udiv_qrnnd (q0, r1, r2, r1, v1);
534 
535       /* r2:r1 = q0*v1 + r1 */
536 
537       /* subtract q0*v0 to r1:0 */
538       umul_ppmm (h, l, q0, v0);
539       t = r1;
540       r0 = -l;
541       r1 -= h + (l != 0);
542 
543       /* this while loop should be run at most twice */
544       while (r1 > t) /* borrow when subtracting h + (l != 0),
545                         q0 was too large */
546         {
547           q0 --;
548           /* add v1:v0 to r1:r0 */
549           t = r1;
550           r0 += v0;
551           r1 += v1 + (r0 < v0);
552           /* note: since 2^(GMP_NUMB_BITS-1) <= v1 + (r0 < v0)
553              <= 2^GMP_NUMB_BITS, it suffices to check if r1 <= t to see
554              if there was a carry or not. */
555         }
556     }
557 
558   MPFR_ASSERTD(r1 < v1 || (r1 == v1 && r0 < v0));
559 
560   /* now (u-extra*v)*B^2 = (q1:q0) * v + r1:r0 */
561 
562   sb = r1 | r0;
563 
564   /* here, q1:q0 should be an approximation of the quotient (or the exact
565      quotient), and sb the sticky bit */
566 
567 #if GMP_NUMB_BITS == 64
568  round_div2:
569 #endif
570   if (extra)
571     {
572       qx ++;
573       sb |= q0 & 1;
574       q0 = (q1 << (GMP_NUMB_BITS - 1)) | (q0 >> 1);
575       q1 = MPFR_LIMB_HIGHBIT | (q1 >> 1);
576     }
577   rb = q0 & (MPFR_LIMB_ONE << (sh - 1));
578   sb |= (q0 & mask) ^ rb;
579   qp[1] = q1;
580   qp[0] = q0 & ~mask;
581 
582   MPFR_SIGN(q) = MPFR_MULT_SIGN (MPFR_SIGN (u), MPFR_SIGN (v));
583 
584   /* rounding */
585   if (qx > __gmpfr_emax)
586     return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q));
587 
588   /* Warning: underflow should be checked *after* rounding, thus when rounding
589      away and when q > 0.111...111*2^(emin-1), or when rounding to nearest and
590      q >= 0.111...111[1]*2^(emin-1), there is no underflow. */
591   if (qx < __gmpfr_emin)
592     {
593       /* Note: the case 0.111...111*2^(emin-1) < q < 2^(emin-1) is not possible
594          here since (up to exponent) this would imply 1 - 2^(-p) < u/v < 1,
595          thus v - 2^(-p)*v < u < v, and since we can assume 1/2 <= v < 1, it
596          would imply v - 2^(-p) = v - ulp(v) < u < v, which has no solution. */
597 
598       /* For RNDN, mpfr_underflow always rounds away, thus for |q|<=2^(emin-2)
599          we have to change to RNDZ. This corresponds to:
600          (a) either qx < emin - 1
601          (b) or qx = emin - 1 and qp[1] = 100....000, qp[0] = 0 and rb = sb = 0.
602          Note: in case (b), it suffices to check whether sb = 0, since rb = 1
603          and sb = 0 is not possible (the exact quotient would have p+1 bits, thus
604          u would need at least p+1 bits). */
605       if (rnd_mode == MPFR_RNDN &&
606           (qx < __gmpfr_emin - 1 ||
607            (qp[1] == MPFR_LIMB_HIGHBIT && qp[0] == MPFR_LIMB_ZERO && sb == 0)))
608         rnd_mode = MPFR_RNDZ;
609       return mpfr_underflow (q, rnd_mode, MPFR_SIGN(q));
610     }
611 
612   MPFR_EXP (q) = qx; /* Don't use MPFR_SET_EXP since qx might be < __gmpfr_emin
613                         in the cases "goto rounding" above. */
614   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
615     {
616       MPFR_ASSERTD(qx >= __gmpfr_emin);
617       MPFR_RET (0);
618     }
619   else if (rnd_mode == MPFR_RNDN)
620     {
621       /* See the comment in mpfr_div_1. */
622       MPFR_ASSERTD(sb != 0);
623       if (rb == 0)
624         goto truncate;
625       else
626         goto add_one_ulp;
627     }
628   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(q)))
629     {
630     truncate:
631       MPFR_ASSERTD(qx >= __gmpfr_emin);
632       MPFR_RET(-MPFR_SIGN(q));
633     }
634   else /* round away from zero */
635     {
636     add_one_ulp:
637       qp[0] += MPFR_LIMB_ONE << sh;
638       qp[1] += qp[0] == 0;
639       /* there can be no overflow in the addition above,
640          see the analysis of mpfr_div_1 */
641       MPFR_ASSERTD(qp[1] != 0);
642       MPFR_RET(MPFR_SIGN(q));
643     }
644 }
645 
646 #endif /* !defined(MPFR_GENERIC_ABI) */
647 
648 /* check if {ap, an} is zero */
649 static int
mpfr_mpn_cmpzero(mpfr_limb_ptr ap,mp_size_t an)650 mpfr_mpn_cmpzero (mpfr_limb_ptr ap, mp_size_t an)
651 {
652   MPFR_ASSERTD (an >= 0);
653   while (an > 0)
654     if (MPFR_LIKELY(ap[--an] != MPFR_LIMB_ZERO))
655       return 1;
656   return 0;
657 }
658 
659 /* compare {ap, an} and {bp, bn} >> extra,
660    aligned by the more significant limbs.
661    Takes into account bp[0] for extra=1.
662 */
663 static int
mpfr_mpn_cmp_aux(mpfr_limb_ptr ap,mp_size_t an,mpfr_limb_ptr bp,mp_size_t bn,int extra)664 mpfr_mpn_cmp_aux (mpfr_limb_ptr ap, mp_size_t an,
665                   mpfr_limb_ptr bp, mp_size_t bn, int extra)
666 {
667   int cmp = 0;
668   mp_size_t k;
669   mp_limb_t bb;
670 
671   MPFR_ASSERTD (an >= 0);
672   MPFR_ASSERTD (bn >= 0);
673   MPFR_ASSERTD (extra == 0 || extra == 1);
674 
675   if (an >= bn)
676     {
677       k = an - bn;
678       while (cmp == 0 && bn > 0)
679         {
680           bn --;
681           bb = (extra) ? ((bp[bn+1] << (GMP_NUMB_BITS - 1)) | (bp[bn] >> 1))
682             : bp[bn];
683           cmp = (ap[k + bn] > bb) ? 1 : ((ap[k + bn] < bb) ? -1 : 0);
684         }
685       bb = (extra) ? bp[0] << (GMP_NUMB_BITS - 1) : MPFR_LIMB_ZERO;
686       while (cmp == 0 && k > 0)
687         {
688           k--;
689           cmp = (ap[k] > bb) ? 1 : ((ap[k] < bb) ? -1 : 0);
690           bb = MPFR_LIMB_ZERO; /* ensure we consider only once bp[0] & 1 */
691         }
692       if (cmp == 0 && bb != MPFR_LIMB_ZERO)
693         cmp = -1;
694     }
695   else /* an < bn */
696     {
697       k = bn - an;
698       while (cmp == 0 && an > 0)
699         {
700           an --;
701           bb = (extra) ? ((bp[k+an+1] << (GMP_NUMB_BITS - 1)) | (bp[k+an] >> 1))
702             : bp[k+an];
703           if (ap[an] > bb)
704             cmp = 1;
705           else if (ap[an] < bb)
706             cmp = -1;
707         }
708       while (cmp == 0 && k > 0)
709         {
710           k--;
711           bb = (extra) ? ((bp[k+1] << (GMP_NUMB_BITS - 1)) | (bp[k] >> 1))
712             : bp[k];
713           cmp = (bb != MPFR_LIMB_ZERO) ? -1 : 0;
714         }
715       if (cmp == 0 && extra && (bp[0] & MPFR_LIMB_ONE))
716         cmp = -1;
717     }
718   return cmp;
719 }
720 
721 /* {ap, n} <- {ap, n} - {bp, n} >> extra - cy, with cy = 0 or 1.
722    Return borrow out.
723 */
724 static mp_limb_t
mpfr_mpn_sub_aux(mpfr_limb_ptr ap,mpfr_limb_ptr bp,mp_size_t n,mp_limb_t cy,int extra)725 mpfr_mpn_sub_aux (mpfr_limb_ptr ap, mpfr_limb_ptr bp, mp_size_t n,
726                   mp_limb_t cy, int extra)
727 {
728   mp_limb_t bb, rp;
729 
730   MPFR_ASSERTD (cy <= 1);
731   MPFR_ASSERTD (n >= 0);
732 
733   while (n--)
734     {
735       bb = (extra) ? (MPFR_LIMB_LSHIFT(bp[1],GMP_NUMB_BITS-1) | (bp[0] >> 1)) : bp[0];
736       rp = ap[0] - bb - cy;
737       cy = (ap[0] < bb) || (cy && rp == MPFR_LIMB_MAX) ?
738         MPFR_LIMB_ONE : MPFR_LIMB_ZERO;
739       ap[0] = rp;
740       ap ++;
741       bp ++;
742     }
743   MPFR_ASSERTD (cy <= 1);
744   return cy;
745 }
746 
747 MPFR_HOT_FUNCTION_ATTR int
mpfr_div(mpfr_ptr q,mpfr_srcptr u,mpfr_srcptr v,mpfr_rnd_t rnd_mode)748 mpfr_div (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
749 {
750   mp_size_t q0size, usize, vsize;
751   mp_size_t qsize; /* number of limbs wanted for the computed quotient */
752   mp_size_t qqsize;
753   mp_size_t k;
754   mpfr_limb_ptr q0p, qp;
755   mpfr_limb_ptr up, vp;
756   mpfr_limb_ptr ap;
757   mpfr_limb_ptr bp;
758   mp_limb_t qh;
759   mp_limb_t sticky_u, sticky_v;
760   mp_limb_t low_u;
761   mp_limb_t sticky;
762   mp_limb_t sticky3;
763   mp_limb_t round_bit;
764   mpfr_exp_t qexp;
765   int sign_quotient;
766   int extra_bit;
767   int sh, sh2;
768   int inex;
769   int like_rndz;
770   MPFR_TMP_DECL(marker);
771 
772   MPFR_LOG_FUNC (
773     ("u[%Pd]=%.*Rg v[%Pd]=%.*Rg rnd=%d",
774      mpfr_get_prec(u), mpfr_log_prec, u,
775      mpfr_get_prec (v),mpfr_log_prec, v, rnd_mode),
776     ("q[%Pd]=%.*Rg inexact=%d", mpfr_get_prec(q), mpfr_log_prec, q, inex));
777 
778   /**************************************************************************
779    *                                                                        *
780    *              This part of the code deals with special cases            *
781    *                                                                        *
782    **************************************************************************/
783 
784   if (MPFR_UNLIKELY(MPFR_ARE_SINGULAR(u,v)))
785     {
786       if (MPFR_IS_NAN(u) || MPFR_IS_NAN(v))
787         {
788           MPFR_SET_NAN(q);
789           MPFR_RET_NAN;
790         }
791       sign_quotient = MPFR_MULT_SIGN(MPFR_SIGN(u), MPFR_SIGN(v));
792       MPFR_SET_SIGN(q, sign_quotient);
793       if (MPFR_IS_INF(u))
794         {
795           if (MPFR_IS_INF(v))
796             {
797               MPFR_SET_NAN(q);
798               MPFR_RET_NAN;
799             }
800           else
801             {
802               MPFR_SET_INF(q);
803               MPFR_RET(0);
804             }
805         }
806       else if (MPFR_IS_INF(v))
807         {
808           MPFR_SET_ZERO (q);
809           MPFR_RET (0);
810         }
811       else if (MPFR_IS_ZERO (v))
812         {
813           if (MPFR_IS_ZERO (u))
814             {
815               MPFR_SET_NAN(q);
816               MPFR_RET_NAN;
817             }
818           else
819             {
820               MPFR_ASSERTD (! MPFR_IS_INF (u));
821               MPFR_SET_INF(q);
822               MPFR_SET_DIVBY0 ();
823               MPFR_RET(0);
824             }
825         }
826       else
827         {
828           MPFR_ASSERTD (MPFR_IS_ZERO (u));
829           MPFR_SET_ZERO (q);
830           MPFR_RET (0);
831         }
832     }
833 
834   /* When MPFR_GENERIC_ABI is defined, we don't use special code. */
835 #if !defined(MPFR_GENERIC_ABI)
836   if (MPFR_GET_PREC(u) == MPFR_GET_PREC(q) &&
837       MPFR_GET_PREC(v) == MPFR_GET_PREC(q))
838     {
839       if (MPFR_GET_PREC(q) < GMP_NUMB_BITS)
840         return mpfr_div_1 (q, u, v, rnd_mode);
841 
842       if (GMP_NUMB_BITS < MPFR_GET_PREC(q) &&
843           MPFR_GET_PREC(q) < 2 * GMP_NUMB_BITS)
844         return mpfr_div_2 (q, u, v, rnd_mode);
845 
846       if (MPFR_GET_PREC(q) == GMP_NUMB_BITS)
847         return mpfr_div_1n (q, u, v, rnd_mode);
848     }
849 #endif /* !defined(MPFR_GENERIC_ABI) */
850 
851   usize = MPFR_LIMB_SIZE(u);
852   vsize = MPFR_LIMB_SIZE(v);
853   q0size = MPFR_LIMB_SIZE(q); /* number of limbs of destination */
854   q0p = MPFR_MANT(q);
855   up = MPFR_MANT(u);
856   vp = MPFR_MANT(v);
857   sticky_u = MPFR_LIMB_ZERO;
858   sticky_v = MPFR_LIMB_ZERO;
859   round_bit = MPFR_LIMB_ZERO;
860 
861   /**************************************************************************
862    *                                                                        *
863    *              End of the part concerning special values.                *
864    *                                                                        *
865    **************************************************************************/
866 
867   /* When the divisor has one limb and MPFR_LONG_WITHIN_LIMB is defined,
868      we can use mpfr_div_ui, which should be faster, assuming there is no
869      intermediate overflow or underflow.
870      The divisor interpreted as an integer satisfies
871      2^(GMP_NUMB_BITS-1) <= vm < 2^GMP_NUMB_BITS, thus the quotient
872      satisfies 2^(EXP(u)-1-GMP_NUMB_BITS) < u/vm < 2^(EXP(u)-GMP_NUMB_BITS+1)
873      and its exponent is either EXP(u)-GMP_NUMB_BITS or one more. */
874 #ifdef MPFR_LONG_WITHIN_LIMB
875   if (vsize <= 1 && __gmpfr_emin <= MPFR_EXP(u) - GMP_NUMB_BITS
876       && MPFR_EXP(u) - GMP_NUMB_BITS + 1 <= __gmpfr_emax
877       && vp[0] <= ULONG_MAX)
878     {
879       mpfr_exp_t exp_v = MPFR_EXP(v); /* save it in case q=v */
880       if (MPFR_IS_POS (v))
881         inex = mpfr_div_ui (q, u, vp[0], rnd_mode);
882       else
883         {
884           inex = -mpfr_div_ui (q, u, vp[0], MPFR_INVERT_RND(rnd_mode));
885           MPFR_CHANGE_SIGN(q);
886         }
887       /* q did not under/overflow */
888       MPFR_EXP(q) -= exp_v;
889       /* The following test is needed, otherwise the next addition
890          on the exponent may overflow, e.g. when dividing the
891          largest finite MPFR number by the smallest positive one. */
892       if (MPFR_UNLIKELY (MPFR_EXP(q) > __gmpfr_emax - GMP_NUMB_BITS))
893         return mpfr_overflow (q, rnd_mode, MPFR_SIGN(q));
894       MPFR_EXP(q) += GMP_NUMB_BITS;
895       return mpfr_check_range (q, inex, rnd_mode);
896     }
897 #endif
898 
899   MPFR_TMP_MARK(marker);
900 
901   /* set sign */
902   sign_quotient = MPFR_MULT_SIGN(MPFR_SIGN(u), MPFR_SIGN(v));
903   MPFR_SET_SIGN(q, sign_quotient);
904 
905   /* determine if an extra bit comes from the division, i.e. if the
906      significand of u (as a fraction in [1/2, 1[) is larger than that
907      of v */
908   if (MPFR_LIKELY(up[usize - 1] != vp[vsize - 1]))
909     extra_bit = (up[usize - 1] > vp[vsize - 1]) ? 1 : 0;
910   else /* most significant limbs are equal, must look at further limbs */
911     {
912       mp_size_t l;
913 
914       k = usize - 1;
915       l = vsize - 1;
916       while (k != 0 && l != 0 && up[--k] == vp[--l]);
917       /* now k=0 or l=0 or up[k] != vp[l] */
918       if (up[k] != vp[l])
919         extra_bit = (up[k] > vp[l]);
920       /* now up[k] = vp[l], thus either k=0 or l=0 */
921       else if (l == 0) /* no more divisor limb */
922         extra_bit = 1;
923       else /* k=0: no more dividend limb */
924         extra_bit = mpfr_mpn_cmpzero (vp, l) == 0;
925     }
926 
927   /* set exponent */
928   qexp = MPFR_GET_EXP (u) - MPFR_GET_EXP (v) + extra_bit;
929 
930   /* sh is the number of zero bits in the low limb of the quotient */
931   MPFR_UNSIGNED_MINUS_MODULO(sh, MPFR_PREC(q));
932 
933   like_rndz = rnd_mode == MPFR_RNDZ ||
934     rnd_mode == (sign_quotient < 0 ? MPFR_RNDU : MPFR_RNDD);
935 
936   /**************************************************************************
937    *                                                                        *
938    *       We first try Mulders' short division (for large operands)        *
939    *                                                                        *
940    **************************************************************************/
941 
942   if (MPFR_UNLIKELY(q0size >= MPFR_DIV_THRESHOLD &&
943                     vsize >= MPFR_DIV_THRESHOLD))
944     {
945       mp_size_t n = q0size + 1; /* we will perform a short (2n)/n division */
946       mpfr_limb_ptr ap, bp, qp;
947       mpfr_prec_t p;
948 
949       /* since Mulders' short division clobbers the dividend, we have to
950          copy it */
951       ap = MPFR_TMP_LIMBS_ALLOC (n + n);
952       if (usize >= n + n) /* truncate the dividend */
953         MPN_COPY(ap, up + usize - (n + n), n + n);
954       else                /* zero-pad the dividend */
955         {
956           MPN_COPY(ap + (n + n) - usize, up, usize);
957           MPN_ZERO(ap, (n + n) - usize);
958         }
959 
960       if (vsize >= n) /* truncate the divisor */
961         bp = vp + vsize - n;
962       else            /* zero-pad the divisor */
963         {
964           bp = MPFR_TMP_LIMBS_ALLOC (n);
965           MPN_COPY(bp + n - vsize, vp, vsize);
966           MPN_ZERO(bp, n - vsize);
967         }
968 
969       qp = MPFR_TMP_LIMBS_ALLOC (n);
970       /* since n = q0size + 1, we have n >= 2 here */
971       qh = mpfr_divhigh_n (qp, ap, bp, n);
972       MPFR_ASSERTD (qh == 0 || qh == 1);
973       /* in all cases, the error is at most (2n+2) ulps on qh*B^n+{qp,n},
974          cf algorithms.tex */
975 
976       p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (2 * n + 2);
977       /* If rnd=RNDN, we need to be able to round with a directed rounding
978          and one more bit. */
979       if (qh == 1)
980         {
981           mpn_rshift (qp, qp, n, 1);
982           qp[n - 1] |= MPFR_LIMB_HIGHBIT;
983         }
984       if (MPFR_LIKELY (mpfr_round_p (qp, n, p,
985                                      MPFR_PREC(q) + (rnd_mode == MPFR_RNDN))))
986         {
987           /* we can round correctly whatever the rounding mode */
988           MPN_COPY (q0p, qp + 1, q0size);
989           q0p[0] &= ~MPFR_LIMB_MASK(sh); /* put to zero low sh bits */
990 
991           if (rnd_mode == MPFR_RNDN) /* round to nearest */
992             {
993               /* we know we can round, thus we are never in the even rule case:
994                  if the round bit is 0, we truncate
995                  if the round bit is 1, we add 1 */
996               if (sh > 0)
997                 round_bit = (qp[1] >> (sh - 1)) & 1;
998               else
999                 round_bit = qp[0] >> (GMP_NUMB_BITS - 1);
1000               /* TODO: add value coverage tests in tdiv to check that
1001                  we reach this part with different values of qh and
1002                  round_bit (4 cases). */
1003               if (round_bit == 0)
1004                 {
1005                   inex = -1;
1006                   goto truncate;
1007                 }
1008               else /* round_bit = 1 */
1009                 goto add_one_ulp;
1010             }
1011           else if (! like_rndz) /* round away */
1012             goto add_one_ulp;
1013           else /* round to zero: nothing to do */
1014             {
1015               inex = -1;
1016               goto truncate;
1017             }
1018         }
1019     }
1020 
1021   /**************************************************************************
1022    *                                                                        *
1023    *     Mulders' short division failed: we revert to integer division      *
1024    *                                                                        *
1025    **************************************************************************/
1026 
1027   if (MPFR_UNLIKELY(rnd_mode == MPFR_RNDN && sh == 0))
1028     { /* we compute the quotient with one more limb, in order to get
1029          the round bit in the quotient, and the remainder only contains
1030          sticky bits */
1031       qsize = q0size + 1;
1032       /* need to allocate memory for the quotient */
1033       qp = MPFR_TMP_LIMBS_ALLOC (qsize);
1034     }
1035   else
1036     {
1037       qsize = q0size;
1038       qp = q0p; /* directly put the quotient in the destination */
1039     }
1040   qqsize = qsize + qsize;
1041 
1042   /* prepare the dividend */
1043   ap = MPFR_TMP_LIMBS_ALLOC (qqsize);
1044   if (MPFR_LIKELY(qqsize > usize)) /* use the full dividend */
1045     {
1046       k = qqsize - usize; /* k > 0 */
1047       MPN_ZERO(ap, k);
1048       if (extra_bit)
1049         ap[k - 1] = mpn_rshift (ap + k, up, usize, 1);
1050       else
1051         MPN_COPY(ap + k, up, usize);
1052     }
1053   else /* truncate the dividend */
1054     {
1055       k = usize - qqsize;
1056       if (extra_bit)
1057         sticky_u = mpn_rshift (ap, up + k, qqsize, 1);
1058       else
1059         MPN_COPY(ap, up + k, qqsize);
1060       sticky_u = sticky_u || mpfr_mpn_cmpzero (up, k);
1061     }
1062   low_u = sticky_u;
1063 
1064   /* now sticky_u is non-zero iff the truncated part of u is non-zero */
1065 
1066   /* prepare the divisor */
1067   if (MPFR_LIKELY(vsize >= qsize))
1068     {
1069       k = vsize - qsize;
1070       if (qp != vp)
1071         bp = vp + k; /* avoid copying the divisor */
1072       else /* need to copy, since mpn_divrem doesn't allow overlap
1073               between quotient and divisor, necessarily k = 0
1074               since quotient and divisor are the same mpfr variable */
1075         {
1076           bp = MPFR_TMP_LIMBS_ALLOC (qsize);
1077           MPN_COPY(bp, vp, vsize);
1078         }
1079       sticky_v = sticky_v || mpfr_mpn_cmpzero (vp, k);
1080       k = 0;
1081     }
1082   else /* vsize < qsize: small divisor case */
1083     {
1084       bp = vp;
1085       k = qsize - vsize;
1086     }
1087 
1088   /**************************************************************************
1089    *                                                                        *
1090    *  Here we perform the real division of {ap+k,qqsize-k} by {bp,qsize-k}  *
1091    *                                                                        *
1092    **************************************************************************/
1093 
1094   /* In the general case (usize > 2*qsize and vsize > qsize), we have:
1095        ______________________________________
1096       |                          |           |   u1 has 2*qsize limbs
1097       |             u1           |     u0    |   u0 has usize-2*qsize limbs
1098       |__________________________|___________|
1099 
1100                       ____________________
1101                      |           |        |      v1 has qsize limbs
1102                      |    v1     |    v0  |      v0 has vsize-qsize limbs
1103                      |___________|________|
1104 
1105       We divide u1 by v1, with quotient in qh + {qp, qsize} and
1106       remainder (denoted r below) stored in place of the low qsize limbs of u1.
1107   */
1108 
1109   /* if Mulders' short division failed, we revert to division with remainder */
1110   qh = mpn_divrem (qp, 0, ap + k, qqsize - k, bp, qsize - k);
1111   /* let u1 be the upper part of u, and v1 the upper part of v (with sticky_u
1112      and sticky_v representing the lower parts), then the quotient of u1 by v1
1113      is now in {qp, qsize}, with possible carry in qh, and the remainder in
1114      {ap + k, qsize - k} */
1115   /* warning: qh may be 1 if u1 == v1, but u < v */
1116 
1117   k = qsize;
1118   sticky_u = sticky_u || mpfr_mpn_cmpzero (ap, k);
1119 
1120   sticky = sticky_u | sticky_v;
1121 
1122   /* now sticky is non-zero iff one of the following holds:
1123      (a) the truncated part of u is non-zero
1124      (b) the truncated part of v is non-zero
1125      (c) the remainder from division is non-zero */
1126 
1127   if (MPFR_LIKELY(qsize == q0size))
1128     {
1129       sticky3 = qp[0] & MPFR_LIMB_MASK(sh); /* does nothing when sh=0 */
1130       sh2 = sh;
1131     }
1132   else /* qsize = q0size + 1: only happens when rnd_mode=MPFR_RNDN and sh=0 */
1133     {
1134       MPN_COPY (q0p, qp + 1, q0size);
1135       sticky3 = qp[0];
1136       sh2 = GMP_NUMB_BITS;
1137     }
1138   qp[0] ^= sticky3;
1139   /* sticky3 contains the truncated bits from the quotient,
1140      including the round bit, and 1 <= sh2 <= GMP_NUMB_BITS
1141      is the number of bits in sticky3 */
1142   inex = (sticky != MPFR_LIMB_ZERO) || (sticky3 != MPFR_LIMB_ZERO);
1143 
1144   /* to round, we distinguish two cases:
1145      (a) vsize <= qsize: we used the full divisor
1146      (b) vsize > qsize: the divisor was truncated
1147   */
1148 
1149   if (MPFR_LIKELY(vsize <= qsize)) /* use the full divisor */
1150     {
1151       if (MPFR_LIKELY(rnd_mode == MPFR_RNDN))
1152         {
1153           round_bit = sticky3 & (MPFR_LIMB_ONE << (sh2 - 1));
1154           sticky = (sticky3 ^ round_bit) | sticky_u;
1155         }
1156       else if (like_rndz || inex == 0)
1157         sticky = (inex == 0) ? MPFR_LIMB_ZERO : MPFR_LIMB_ONE;
1158       else  /* round away from zero */
1159         sticky = MPFR_LIMB_ONE;
1160       goto case_1;
1161     }
1162   else /* vsize > qsize: need to truncate the divisor */
1163     {
1164       if (inex == 0)
1165         goto truncate;
1166       else
1167         {
1168           /* We know the estimated quotient is an upper bound of the exact
1169              quotient (with rounding toward zero), with a difference of at
1170              most 2 in qp[0].
1171              Thus we can round except when sticky3 is 000...000 or 000...001
1172              for directed rounding, and 100...000 or 100...001 for rounding
1173              to nearest. (For rounding to nearest, we cannot determine the
1174              inexact flag for 000...000 or 000...001.)
1175           */
1176           mp_limb_t sticky3orig = sticky3;
1177           if (rnd_mode == MPFR_RNDN)
1178             {
1179               round_bit = sticky3 & (MPFR_LIMB_ONE << (sh2 - 1));
1180               sticky3   = sticky3 ^ round_bit;
1181             }
1182           if (sticky3 != MPFR_LIMB_ZERO && sticky3 != MPFR_LIMB_ONE)
1183             {
1184               sticky = sticky3;
1185               goto case_1;
1186             }
1187           else /* hard case: we have to compare q1 * v0 and r + u0,
1188                  where q1 * v0 has qsize + (vsize-qsize) = vsize limbs, and
1189                  r + u0 has qsize + (usize-2*qsize) = usize-qsize limbs */
1190             {
1191               mp_size_t l;
1192               mpfr_limb_ptr sp;
1193               int cmp_s_r;
1194               mp_limb_t qh2;
1195 
1196               sp = MPFR_TMP_LIMBS_ALLOC (vsize);
1197               k = vsize - qsize;
1198               /* sp <- {qp, qsize} * {vp, vsize-qsize} */
1199               qp[0] ^= sticky3orig; /* restore original quotient */
1200               if (qsize >= k)
1201                 mpn_mul (sp, qp, qsize, vp, k);
1202               else
1203                 mpn_mul (sp, vp, k, qp, qsize);
1204               if (qh)
1205                 qh2 = mpn_add_n (sp + qsize, sp + qsize, vp, k);
1206               else
1207                 qh2 = MPFR_LIMB_ZERO;
1208               qp[0] ^= sticky3orig; /* restore truncated quotient */
1209 
1210               /* compare qh2 + {sp, k + qsize} to {ap, qsize} + u0 */
1211               cmp_s_r = (qh2 != 0) ? 1 : mpn_cmp (sp + k, ap, qsize);
1212               if (cmp_s_r == 0) /* compare {sp, k} and u0 */
1213                 {
1214                   cmp_s_r = (usize >= qqsize) ?
1215                     mpfr_mpn_cmp_aux (sp, k, up, usize - qqsize, extra_bit) :
1216                     mpfr_mpn_cmpzero (sp, k);
1217                 }
1218               /* now cmp_s_r > 0 if {sp, vsize} > {ap, qsize} + u0
1219                      cmp_s_r = 0 if {sp, vsize} = {ap, qsize} + u0
1220                      cmp_s_r < 0 if {sp, vsize} < {ap, qsize} + u0 */
1221               if (cmp_s_r <= 0) /* quotient is in [q1, q1+1) */
1222                 {
1223                   sticky = (cmp_s_r == 0) ? sticky3 : MPFR_LIMB_ONE;
1224                   goto case_1;
1225                 }
1226               else /* cmp_s_r > 0, quotient is < q1: to determine if it is
1227                       in [q1-2,q1-1] or in [q1-1,q1], we need to subtract
1228                       the low part u0 of the dividend from q*v0 */
1229                 {
1230                   mp_limb_t cy = MPFR_LIMB_ZERO;
1231 
1232                   /* subtract u0 >> extra_bit if non-zero */
1233                   if (qh2 != 0) /* whatever the value of {up, m + k}, it
1234                                    will be smaller than qh2 + {sp, k} */
1235                     cmp_s_r = 1;
1236                   else
1237                     {
1238                       if (low_u != MPFR_LIMB_ZERO)
1239                         {
1240                           mp_size_t m;
1241                           l = usize - qqsize; /* number of limbs in u0 */
1242                           m = (l > k) ? l - k : 0;
1243                           cy = (extra_bit) ?
1244                             (up[m] & MPFR_LIMB_ONE) : MPFR_LIMB_ZERO;
1245                           if (l >= k) /* u0 has at least as many limbs than s:
1246                                          first look if {up, m} is not zero,
1247                                          and compare {sp, k} and {up + m, k} */
1248                             {
1249                               cy = cy || mpfr_mpn_cmpzero (up, m);
1250                               low_u = cy;
1251                               cy = mpfr_mpn_sub_aux (sp, up + m, k,
1252                                                      cy, extra_bit);
1253                             }
1254                           else /* l < k: s has more limbs than u0 */
1255                             {
1256                               low_u = MPFR_LIMB_ZERO;
1257                               if (cy != MPFR_LIMB_ZERO)
1258                                 cy = mpn_sub_1 (sp + k - l - 1, sp + k - l - 1,
1259                                                 1, MPFR_LIMB_HIGHBIT);
1260                               cy = mpfr_mpn_sub_aux (sp + k - l, up, l,
1261                                                      cy, extra_bit);
1262                             }
1263                         }
1264                       MPFR_ASSERTD (cy <= 1);
1265                       cy = mpn_sub_1 (sp + k, sp + k, qsize, cy);
1266                       /* subtract r */
1267                       cy += mpn_sub_n (sp + k, sp + k, ap, qsize);
1268                       MPFR_ASSERTD (cy <= 1);
1269                       /* now compare {sp, ssize} to v */
1270                       cmp_s_r = mpn_cmp (sp, vp, vsize);
1271                       if (cmp_s_r == 0 && low_u != MPFR_LIMB_ZERO)
1272                         cmp_s_r = 1; /* since in fact we subtracted
1273                                         less than 1 */
1274                     }
1275                   if (cmp_s_r <= 0) /* q1-1 <= u/v < q1 */
1276                     {
1277                       if (sticky3 == MPFR_LIMB_ONE)
1278                         { /* q1-1 is either representable (directed rounding),
1279                              or the middle of two numbers (nearest) */
1280                           sticky = (cmp_s_r) ? MPFR_LIMB_ONE : MPFR_LIMB_ZERO;
1281                           goto case_1;
1282                         }
1283                       /* now necessarily sticky3=0 */
1284                       else if (round_bit == MPFR_LIMB_ZERO)
1285                         { /* round_bit=0, sticky3=0: q1-1 is exact only
1286                              when sh=0 */
1287                           inex = (cmp_s_r || sh) ? -1 : 0;
1288                           if (rnd_mode == MPFR_RNDN ||
1289                               (! like_rndz && inex != 0))
1290                             {
1291                               inex = 1;
1292                               goto truncate_check_qh;
1293                             }
1294                           else /* round down */
1295                             goto sub_one_ulp;
1296                         }
1297                       else /* sticky3=0, round_bit=1 ==> rounding to nearest */
1298                         {
1299                           inex = cmp_s_r;
1300                           goto truncate;
1301                         }
1302                     }
1303                   else /* q1-2 < u/v < q1-1 */
1304                     {
1305                       /* if rnd=MPFR_RNDN, the result is q1 when
1306                          q1-2 >= q1-2^(sh-1), i.e. sh >= 2,
1307                          otherwise (sh=1) it is q1-2 */
1308                       if (rnd_mode == MPFR_RNDN) /* sh > 0 */
1309                         {
1310                           /* Case sh=1: sb=0 always, and q1-rb is exactly
1311                              representable, like q1-rb-2.
1312                              rb action
1313                              0  subtract two ulps, inex=-1
1314                              1  truncate, inex=1
1315 
1316                              Case sh>1: one ulp is 2^(sh-1) >= 2
1317                              rb sb action
1318                              0  0  truncate, inex=1
1319                              0  1  truncate, inex=1
1320                              1  x  truncate, inex=-1
1321                            */
1322                           if (sh == 1)
1323                             {
1324                               if (round_bit == MPFR_LIMB_ZERO)
1325                                 {
1326                                   inex = -1;
1327                                   sh = 0;
1328                                   goto sub_two_ulp;
1329                                 }
1330                               else
1331                                 {
1332                                   inex = 1;
1333                                   goto truncate_check_qh;
1334                                 }
1335                             }
1336                           else /* sh > 1 */
1337                             {
1338                               inex = (round_bit == MPFR_LIMB_ZERO) ? 1 : -1;
1339                               goto truncate_check_qh;
1340                             }
1341                         }
1342                       else if (like_rndz)
1343                         {
1344                           /* the result is down(q1-2), i.e. subtract one
1345                              ulp if sh > 0, and two ulps if sh=0 */
1346                           inex = -1;
1347                           if (sh > 0)
1348                             goto sub_one_ulp;
1349                           else
1350                             goto sub_two_ulp;
1351                         }
1352                       /* if round away from zero, the result is up(q1-1),
1353                          which is q1 unless sh = 0, where it is q1-1 */
1354                       else
1355                         {
1356                           inex = 1;
1357                           if (sh > 0)
1358                             goto truncate_check_qh;
1359                           else /* sh = 0 */
1360                             goto sub_one_ulp;
1361                         }
1362                     }
1363                 }
1364             }
1365         }
1366     }
1367 
1368  case_1: /* quotient is in [q1, q1+1),
1369             round_bit is the round_bit (0 for directed rounding),
1370             sticky the sticky bit */
1371   if (like_rndz || (round_bit == MPFR_LIMB_ZERO && sticky == MPFR_LIMB_ZERO))
1372     {
1373       inex = round_bit == MPFR_LIMB_ZERO && sticky == MPFR_LIMB_ZERO ? 0 : -1;
1374       goto truncate;
1375     }
1376   else if (rnd_mode == MPFR_RNDN) /* sticky <> 0 or round <> 0 */
1377     {
1378       if (round_bit == MPFR_LIMB_ZERO) /* necessarily sticky <> 0 */
1379         {
1380           inex = -1;
1381           goto truncate;
1382         }
1383       /* round_bit = 1 */
1384       else if (sticky != MPFR_LIMB_ZERO)
1385         goto add_one_ulp; /* inex=1 */
1386       else /* round_bit=1, sticky=0 */
1387         goto even_rule;
1388     }
1389   else /* round away from zero, sticky <> 0 */
1390     goto add_one_ulp; /* with inex=1 */
1391 
1392  sub_two_ulp:
1393   /* we cannot subtract MPFR_LIMB_MPFR_LIMB_ONE << (sh+1) since this is
1394      undefined for sh = GMP_NUMB_BITS */
1395   qh -= mpn_sub_1 (q0p, q0p, q0size, MPFR_LIMB_ONE << sh);
1396   /* go through */
1397 
1398  sub_one_ulp:
1399   qh -= mpn_sub_1 (q0p, q0p, q0size, MPFR_LIMB_ONE << sh);
1400   /* go through truncate_check_qh */
1401 
1402  truncate_check_qh:
1403   if (qh)
1404     {
1405       if (MPFR_LIKELY (qexp < MPFR_EXP_MAX))
1406         qexp ++;
1407       /* else qexp is now incorrect, but one will still get an overflow */
1408       q0p[q0size - 1] = MPFR_LIMB_HIGHBIT;
1409     }
1410   goto truncate;
1411 
1412  even_rule: /* has to set inex */
1413   inex = (q0p[0] & (MPFR_LIMB_ONE << sh)) ? 1 : -1;
1414   if (inex < 0)
1415     goto truncate;
1416   /* else go through add_one_ulp */
1417 
1418  add_one_ulp:
1419   inex = 1; /* always here */
1420   if (mpn_add_1 (q0p, q0p, q0size, MPFR_LIMB_ONE << sh))
1421     {
1422       if (MPFR_LIKELY (qexp < MPFR_EXP_MAX))
1423         qexp ++;
1424       /* else qexp is now incorrect, but one will still get an overflow */
1425       q0p[q0size - 1] = MPFR_LIMB_HIGHBIT;
1426     }
1427 
1428  truncate: /* inex already set */
1429 
1430   MPFR_TMP_FREE(marker);
1431 
1432   /* check for underflow/overflow */
1433   if (MPFR_UNLIKELY(qexp > __gmpfr_emax))
1434     return mpfr_overflow (q, rnd_mode, sign_quotient);
1435   else if (MPFR_UNLIKELY(qexp < __gmpfr_emin))
1436     {
1437       if (rnd_mode == MPFR_RNDN && ((qexp < __gmpfr_emin - 1) ||
1438                                    (inex >= 0 && mpfr_powerof2_raw (q))))
1439         rnd_mode = MPFR_RNDZ;
1440       return mpfr_underflow (q, rnd_mode, sign_quotient);
1441     }
1442   MPFR_SET_EXP(q, qexp);
1443 
1444   inex *= sign_quotient;
1445   MPFR_RET (inex);
1446 }
1447