xref: /netbsd-src/external/lgpl3/mpfr/dist/src/sub1sp.c (revision ba125506a622fe649968631a56eba5d42ff57863)
1 /* mpfr_sub1sp -- internal function to perform a "real" subtraction
2    All the op must have the same precision
3 
4 Copyright 2003-2023 Free Software Foundation, Inc.
5 Contributed by the AriC and Caramba projects, INRIA.
6 
7 This file is part of the GNU MPFR Library.
8 
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17 License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 
24 #define MPFR_NEED_LONGLONG_H
25 #include "mpfr-impl.h"
26 
27 /* define MPFR_FULLSUB to use alternate code in mpfr_sub1sp2 and mpfr_sub1sp2n
28    (see comments in mpfr_sub1sp2) */
29 /* #define MPFR_FULLSUB */
30 
31 #if MPFR_WANT_ASSERT >= 2
32 /* Check the result of mpfr_sub1sp with mpfr_sub1.
33 
34    Note: mpfr_sub1sp itself has two algorithms: one always valid and one
35    faster for small precisions (up to 3 limbs). The latter one is disabled
36    if MPFR_GENERIC_ABI is defined. When MPFR_WANT_ASSERT >= 2, it could be
37    interesting to compare the results of these different algorithms. For
38    the time being, this is currently done by running the same code on the
39    same data with and without MPFR_GENERIC_ABI defined, where we have the
40    following comparisons in small precisions:
41      mpfr_sub1sp slow <-> mpfr_sub1 when MPFR_GENERIC_ABI is defined;
42      mpfr_sub1sp fast <-> mpfr_sub1 when MPFR_GENERIC_ABI is not defined.
43    By transitivity, the absence of failures implies that the 3 results are
44    the same.
45 */
46 
47 int mpfr_sub1sp_ref (mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_rnd_t);
mpfr_sub1sp(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)48 int mpfr_sub1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
49 {
50   mpfr_t tmpa, tmpb, tmpc;
51   mpfr_flags_t old_flags, flags, flags2;
52   int inexb, inexc, inexact, inexact2;
53 
54   if (rnd_mode == MPFR_RNDF)
55     return mpfr_sub1sp_ref (a, b, c, rnd_mode);
56 
57   old_flags = __gmpfr_flags;
58 
59   mpfr_init2 (tmpa, MPFR_PREC (a));
60   mpfr_init2 (tmpb, MPFR_PREC (b));
61   mpfr_init2 (tmpc, MPFR_PREC (c));
62 
63   inexb = mpfr_set (tmpb, b, MPFR_RNDN);
64   MPFR_ASSERTN (inexb == 0);
65 
66   inexc = mpfr_set (tmpc, c, MPFR_RNDN);
67   MPFR_ASSERTN (inexc == 0);
68 
69   MPFR_ASSERTN (__gmpfr_flags == old_flags);
70 
71   inexact2 = mpfr_sub1 (tmpa, tmpb, tmpc, rnd_mode);
72   flags2 = __gmpfr_flags;
73 
74   __gmpfr_flags = old_flags;
75   inexact = mpfr_sub1sp_ref (a, b, c, rnd_mode);
76   flags = __gmpfr_flags;
77 
78   /* Convert the ternary values to (-1,0,1). */
79   inexact2 = VSIGN (inexact2);
80   inexact = VSIGN (inexact);
81 
82   if (! mpfr_equal_p (tmpa, a) || inexact != inexact2 || flags != flags2)
83     {
84       fprintf (stderr, "sub1 & sub1sp return different values for %s\n"
85                "Prec_a = %lu, Prec_b = %lu, Prec_c = %lu\nB = ",
86                mpfr_print_rnd_mode (rnd_mode),
87                (unsigned long) MPFR_PREC (a),
88                (unsigned long) MPFR_PREC (b),
89                (unsigned long) MPFR_PREC (c));
90       mpfr_fdump (stderr, tmpb);
91       fprintf (stderr, "C = ");
92       mpfr_fdump (stderr, tmpc);
93       fprintf (stderr, "sub1  : ");
94       mpfr_fdump (stderr, tmpa);
95       fprintf (stderr, "sub1sp: ");
96       mpfr_fdump (stderr, a);
97       fprintf (stderr, "sub1  : ternary = %2d, flags =", inexact2);
98       flags_fout (stderr, flags2);
99       fprintf (stderr, "sub1sp: ternary = %2d, flags =", inexact);
100       flags_fout (stderr, flags);
101       MPFR_ASSERTN (0);
102     }
103   mpfr_clears (tmpa, tmpb, tmpc, (mpfr_ptr) 0);
104   return inexact;
105 }
106 # define mpfr_sub1sp mpfr_sub1sp_ref
107 #endif  /* MPFR_WANT_ASSERT >= 2 */
108 
109 #if !defined(MPFR_GENERIC_ABI)
110 
111 /* the sub1sp1_extracted.c is not ready yet */
112 
113 #if 0 && defined(MPFR_WANT_PROVEN_CODE) && GMP_NUMB_BITS == 64 && \
114   UINT_MAX == 0xffffffff && MPFR_PREC_BITS == 64 && \
115   _MPFR_PREC_FORMAT == 3 && _MPFR_EXP_FORMAT == _MPFR_PREC_FORMAT
116 
117 /* The code assumes that mp_limb_t has 64 bits exactly, unsigned int
118    has 32 bits exactly, mpfr_prec_t and mpfr_exp_t are of type long,
119    which has 64 bits exactly. */
120 
121 #include "sub1sp1_extracted.c"
122 
123 #else
124 
125 /* special code for p < GMP_NUMB_BITS */
126 static int
mpfr_sub1sp1(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)127 mpfr_sub1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
128               mpfr_prec_t p)
129 {
130   mpfr_exp_t bx = MPFR_GET_EXP (b);
131   mpfr_exp_t cx = MPFR_GET_EXP (c);
132   mp_limb_t *ap = MPFR_MANT(a);
133   mp_limb_t *bp = MPFR_MANT(b);
134   mp_limb_t *cp = MPFR_MANT(c);
135   mpfr_prec_t cnt, INITIALIZED(sh);
136   mp_limb_t rb; /* round bit */
137   mp_limb_t sb; /* sticky bit */
138   mp_limb_t a0;
139   mp_limb_t mask;
140   mpfr_uexp_t d;
141 
142   MPFR_ASSERTD(p < GMP_NUMB_BITS);
143 
144   if (bx == cx)
145     {
146       if (MPFR_UNLIKELY(bp[0] == cp[0])) /* result is zero */
147         {
148           if (rnd_mode == MPFR_RNDD)
149             MPFR_SET_NEG(a);
150           else
151             MPFR_SET_POS(a);
152           MPFR_SET_ZERO(a);
153           MPFR_RET (0);
154         }
155       else if (cp[0] > bp[0]) /* borrow: |c| > |b| */
156         {
157           a0 = cp[0] - bp[0];
158           MPFR_SET_OPPOSITE_SIGN (a, b);
159         }
160       else /* bp[0] > cp[0] */
161         {
162           a0 = bp[0] - cp[0];
163           MPFR_SET_SAME_SIGN (a, b);
164         }
165 
166       /* now a0 != 0 */
167       MPFR_ASSERTD(a0 != 0);
168       count_leading_zeros (cnt, a0);
169       ap[0] = a0 << cnt;
170       bx -= cnt;
171       rb = sb = 0;
172       /* Note: sh is not initialized, but will not be used in this case. */
173     }
174   else
175     {
176       if (bx < cx)  /* swap b and c */
177         {
178           mpfr_exp_t tx;
179           mp_limb_t *tp;
180           tx = bx; bx = cx; cx = tx;
181           tp = bp; bp = cp; cp = tp;
182           MPFR_SET_OPPOSITE_SIGN (a, b);
183         }
184       else
185         {
186           MPFR_SET_SAME_SIGN (a, b);
187         }
188       MPFR_ASSERTD (bx > cx);
189       d = (mpfr_uexp_t) bx - cx;
190       sh = GMP_NUMB_BITS - p;
191       mask = MPFR_LIMB_MASK(sh);
192       if (d < GMP_NUMB_BITS)
193         {
194           sb = - (cp[0] << (GMP_NUMB_BITS - d)); /* neglected part of -c */
195           /* Note that "a0 = bp[0] - (cp[0] >> d) - (sb != 0);" is faster
196              on some other machines and has no immediate dependencies for
197              the first subtraction. In the future, make sure that the code
198              is recognized as a *single* subtraction with borrow and/or use
199              a builtin when available (currently provided by Clang, but not
200              by GCC); create a new macro for that. See the TODO later.
201              Note also that with Clang, the constant 0 for the first
202              subtraction instead of a variable breaks the optimization:
203              https://llvm.org/bugs/show_bug.cgi?id=31754 */
204           a0 = bp[0] - (sb != 0) - (cp[0] >> d);
205           /* a0 cannot be zero here since:
206              a) if d >= 2, then a0 >= 2^(w-1) - (2^(w-2)-1) with
207                 w = GMP_NUMB_BITS, thus a0 - 1 >= 2^(w-2),
208              b) if d = 1, then since p < GMP_NUMB_BITS we have sb=0.
209           */
210           MPFR_ASSERTD(a0 > 0);
211           count_leading_zeros (cnt, a0);
212           if (cnt)
213             a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt));
214           sb <<= cnt;
215           bx -= cnt;
216           /* sh > 0 since p < GMP_NUMB_BITS */
217           MPFR_ASSERTD(sh > 0);
218           rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
219           sb |= (a0 & mask) ^ rb;
220           ap[0] = a0 & ~mask;
221         }
222       else /* d >= GMP_NUMB_BITS */
223         {
224           if (bp[0] > MPFR_LIMB_HIGHBIT)
225             {
226               /* We compute b - ulp(b), and the remainder ulp(b) - c satisfies:
227                  1/2 ulp(b) < ulp(b) - c < ulp(b), thus rb = sb = 1. */
228               ap[0] = bp[0] - (MPFR_LIMB_ONE << sh);
229               rb = 1;
230             }
231           else
232             {
233               /* Warning: since we have an exponent decrease, when
234                  p = GMP_NUMB_BITS - 1 and d = GMP_NUMB_BITS, the round bit
235                  corresponds to the upper bit of -c. In that case rb = 0 and
236                  sb = 1, except when c0 = MPFR_LIMB_HIGHBIT where rb = 1 and
237                  sb = 0. */
238               rb = sh > 1 || d > GMP_NUMB_BITS || cp[0] == MPFR_LIMB_HIGHBIT;
239               /* sb=1 below is incorrect when p = GMP_NUMB_BITS - 1,
240                  d = GMP_NUMB_BITS and c0 = MPFR_LIMB_HIGHBIT, but in
241                  that case the even rule wound round up too. */
242               ap[0] = ~mask;
243               bx --;
244               /* Warning: if d = GMP_NUMB_BITS and c0 = 1000...000, then
245                  b0 - c0 = |0111...111|1000...000|, which after the shift
246                  becomes |111...111|000...000| thus if p = GMP_NUMB_BITS-1
247                  we have rb = 1 but sb = 0. However, in this case the round
248                  even rule will round up, which is what we get with sb = 1:
249                  the final result will be correct, while sb is incorrect. */
250             }
251           sb = 1;
252         }
253     }
254 
255   /* now perform rounding */
256 
257   /* Warning: MPFR considers underflow *after* rounding with an unbounded
258      exponent range. However, since b and c have same precision p, they are
259      multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the
260      subtraction (with an unbounded exponent range) is exact, so that bx is
261      also the exponent after rounding with an unbounded exponent range. */
262   if (MPFR_UNLIKELY(bx < __gmpfr_emin))
263     {
264       /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
265          we have to change to RNDZ. This corresponds to:
266          (a) either bx < emin - 1
267          (b) or bx = emin - 1 and ap[0] = 1000....000 (in this case necessarily
268              rb = sb = 0 since b-c is multiple of 2^(emin-p) */
269       if (rnd_mode == MPFR_RNDN &&
270           (bx < __gmpfr_emin - 1 || ap[0] == MPFR_LIMB_HIGHBIT))
271         {
272           MPFR_ASSERTD(rb == 0 && sb == 0);
273           rnd_mode = MPFR_RNDZ;
274         }
275       return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
276     }
277 
278   MPFR_SET_EXP (a, bx);
279   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
280     MPFR_RET (0);
281   else if (rnd_mode == MPFR_RNDN)
282     {
283       if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
284         goto truncate;
285       else
286         goto add_one_ulp;
287     }
288   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
289     {
290     truncate:
291       MPFR_RET(-MPFR_SIGN(a));
292     }
293   else /* round away from zero */
294     {
295     add_one_ulp:
296       ap[0] += MPFR_LIMB_ONE << sh;
297       if (MPFR_UNLIKELY(ap[0] == 0))
298         {
299           ap[0] = MPFR_LIMB_HIGHBIT;
300           /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus
301              bx+1 is at most equal to the original exponent of b. */
302           MPFR_ASSERTD(bx + 1 <= __gmpfr_emax);
303           MPFR_SET_EXP (a, bx + 1);
304         }
305       MPFR_RET(MPFR_SIGN(a));
306     }
307 }
308 
309 #endif /* MPFR_WANT_PROVEN_CODE */
310 
311 /* special code for p = GMP_NUMB_BITS */
312 static int
mpfr_sub1sp1n(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)313 mpfr_sub1sp1n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
314 {
315   mpfr_exp_t bx = MPFR_GET_EXP (b);
316   mpfr_exp_t cx = MPFR_GET_EXP (c);
317   mp_limb_t *ap = MPFR_MANT(a);
318   mp_limb_t *bp = MPFR_MANT(b);
319   mp_limb_t *cp = MPFR_MANT(c);
320   mpfr_prec_t cnt;
321   mp_limb_t rb; /* round bit */
322   mp_limb_t sb; /* sticky bit */
323   mp_limb_t a0;
324   mpfr_uexp_t d;
325 
326   MPFR_ASSERTD(MPFR_PREC(a) == GMP_NUMB_BITS);
327   MPFR_ASSERTD(MPFR_PREC(b) == GMP_NUMB_BITS);
328   MPFR_ASSERTD(MPFR_PREC(c) == GMP_NUMB_BITS);
329 
330   if (bx == cx)
331     {
332       a0 = bp[0] - cp[0];
333       if (a0 == 0) /* result is zero */
334         {
335           if (rnd_mode == MPFR_RNDD)
336             MPFR_SET_NEG(a);
337           else
338             MPFR_SET_POS(a);
339           MPFR_SET_ZERO(a);
340           MPFR_RET (0);
341         }
342       else if (a0 > bp[0]) /* borrow: |c| > |b| */
343         {
344           MPFR_SET_OPPOSITE_SIGN (a, b);
345           a0 = -a0;
346         }
347       else /* bp[0] > cp[0] */
348         MPFR_SET_SAME_SIGN (a, b);
349 
350       /* now a0 != 0 */
351       MPFR_ASSERTD(a0 != 0);
352       count_leading_zeros (cnt, a0);
353       ap[0] = a0 << cnt;
354       bx -= cnt;
355       rb = sb = 0;
356     }
357   else
358     {
359       if (bx < cx)  /* swap b and c */
360         {
361           mpfr_exp_t tx;
362           mp_limb_t *tp;
363           tx = bx; bx = cx; cx = tx;
364           tp = bp; bp = cp; cp = tp;
365           MPFR_SET_OPPOSITE_SIGN (a, b);
366         }
367       else
368         {
369           MPFR_SET_SAME_SIGN (a, b);
370         }
371       MPFR_ASSERTD (bx > cx);
372       d = (mpfr_uexp_t) bx - cx;
373       if (d < GMP_NUMB_BITS)
374         {
375           sb = - (cp[0] << (GMP_NUMB_BITS - d)); /* neglected part of -c */
376           a0 = bp[0] - (sb != 0) - (cp[0] >> d);
377           /* a0 can only be zero when d=1, b0 = B/2, and c0 = B-1, where
378              B = 2^GMP_NUMB_BITS, thus b0 - c0/2 = 1/2 */
379           if (a0 == MPFR_LIMB_ZERO)
380             {
381               bx -= GMP_NUMB_BITS;
382               ap[0] = MPFR_LIMB_HIGHBIT;
383               rb = sb = 0;
384             }
385           else
386             {
387               count_leading_zeros (cnt, a0);
388               if (cnt)
389                 a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt));
390               sb <<= cnt;
391               bx -= cnt;
392               rb = sb & MPFR_LIMB_HIGHBIT;
393               sb &= ~MPFR_LIMB_HIGHBIT;
394               ap[0] = a0;
395             }
396         }
397       else /* d >= GMP_NUMB_BITS */
398         {
399           /* We compute b - ulp(b) */
400           if (bp[0] > MPFR_LIMB_HIGHBIT)
401             {
402               /* If d = GMP_NUMB_BITS, rb = 0 and sb = 1,
403                  unless c0 = MPFR_LIMB_HIGHBIT in which case rb = 1 and sb = 0.
404                  If d > GMP_NUMB_BITS, rb = sb = 1. */
405               rb = d > GMP_NUMB_BITS || cp[0] == MPFR_LIMB_HIGHBIT;
406               sb = d > GMP_NUMB_BITS || cp[0] != MPFR_LIMB_HIGHBIT;
407               ap[0] = bp[0] - MPFR_LIMB_ONE;
408             }
409           else
410             {
411               /* Warning: in this case a0 is shifted by one!
412                  If d=GMP_NUMB_BITS:
413                    (a) if c0 = MPFR_LIMB_HIGHBIT, a0 = 111...111, rb = sb = 0
414                    (b) otherwise, a0 = 111...110, rb = -c0 >= 01000...000,
415                        sb = (-c0) << 2
416                  If d=GMP_NUMB_BITS+1: a0 = 111...111
417                    (c) if c0 = MPFR_LIMB_HIGHBIT, rb = 1 and sb = 0
418                    (d) otherwise rb = 0 and sb = 1
419                  If d > GMP_NUMB_BITS+1:
420                    (e) a0 = 111...111, rb = sb = 1
421               */
422               bx --;
423               if (d == GMP_NUMB_BITS && cp[0] > MPFR_LIMB_HIGHBIT)
424                 { /* case (b) */
425                   rb = MPFR_LIMB(-cp[0]) >= (MPFR_LIMB_HIGHBIT >> 1);
426                   sb = MPFR_LIMB(-cp[0]) << 2;
427                   ap[0] = -(MPFR_LIMB_ONE << 1);
428                 }
429               else /* cases (a), (c), (d) and (e) */
430                 {
431                   /* rb=1 in case (e) and case (c) */
432                   rb = d > GMP_NUMB_BITS + 1
433                     || (d == GMP_NUMB_BITS + 1 && cp[0] == MPFR_LIMB_HIGHBIT);
434                   /* sb = 1 in case (d) and (e) */
435                   sb = d > GMP_NUMB_BITS + 1
436                     || (d == GMP_NUMB_BITS + 1 && cp[0] > MPFR_LIMB_HIGHBIT);
437                   /* Warning: only set ap[0] last, otherwise in case ap=cp,
438                      the above comparisons involving cp[0] would be wrong */
439                   ap[0] = -MPFR_LIMB_ONE;
440                 }
441             }
442         }
443     }
444 
445   /* now perform rounding */
446 
447   /* Warning: MPFR considers underflow *after* rounding with an unbounded
448      exponent range. However, since b and c have same precision p, they are
449      multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the
450      subtraction (with an unbounded exponent range) is exact, so that bx is
451      also the exponent after rounding with an unbounded exponent range. */
452   if (MPFR_UNLIKELY(bx < __gmpfr_emin))
453     {
454       /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
455          we have to change to RNDZ. This corresponds to:
456          (a) either bx < emin - 1
457          (b) or bx = emin - 1 and ap[0] = 1000....000 (in this case necessarily
458              rb = sb = 0 since b-c is multiple of 2^(emin-p) */
459       if (rnd_mode == MPFR_RNDN &&
460           (bx < __gmpfr_emin - 1 || ap[0] == MPFR_LIMB_HIGHBIT))
461         {
462           MPFR_ASSERTD(rb == 0 && sb == 0);
463           rnd_mode = MPFR_RNDZ;
464         }
465       return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
466     }
467 
468   MPFR_SET_EXP (a, bx);
469   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
470     MPFR_RET (0);
471   else if (rnd_mode == MPFR_RNDN)
472     {
473       if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0))
474         goto truncate;
475       else
476         goto add_one_ulp;
477     }
478   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
479     {
480     truncate:
481       MPFR_RET(-MPFR_SIGN(a));
482     }
483   else /* round away from zero */
484     {
485     add_one_ulp:
486       ap[0] += MPFR_LIMB_ONE;
487       if (MPFR_UNLIKELY(ap[0] == 0))
488         {
489           ap[0] = MPFR_LIMB_HIGHBIT;
490           /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus
491              bx+1 is at most equal to the original exponent of b. */
492           MPFR_ASSERTD(bx + 1 <= __gmpfr_emax);
493           MPFR_SET_EXP (a, bx + 1);
494         }
495       MPFR_RET(MPFR_SIGN(a));
496     }
497 }
498 
499 /* special code for GMP_NUMB_BITS < p < 2*GMP_NUMB_BITS */
500 static int
mpfr_sub1sp2(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)501 mpfr_sub1sp2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
502               mpfr_prec_t p)
503 {
504   mpfr_exp_t bx = MPFR_GET_EXP (b);
505   mpfr_exp_t cx = MPFR_GET_EXP (c);
506   mp_limb_t *ap = MPFR_MANT(a);
507   mp_limb_t *bp = MPFR_MANT(b);
508   mp_limb_t *cp = MPFR_MANT(c);
509   mpfr_prec_t cnt, INITIALIZED(sh);
510   mp_limb_t rb; /* round bit */
511   mp_limb_t sb; /* sticky bit */
512   mp_limb_t mask, a0, a1;
513   mpfr_uexp_t d;
514 
515   MPFR_ASSERTD(GMP_NUMB_BITS < p && p < 2 * GMP_NUMB_BITS);
516 
517   if (bx == cx) /* subtraction is exact in this case */
518     {
519       /* first compute a0: if the compiler is smart enough, it will use the generated
520          borrow to get for free the term (bp[0] < cp[0]) */
521       a0 = bp[0] - cp[0];
522       a1 = bp[1] - cp[1] - (bp[0] < cp[0]);
523       if (a1 == 0 && a0 == 0) /* result is zero */
524         {
525           if (rnd_mode == MPFR_RNDD)
526             MPFR_SET_NEG(a);
527           else
528             MPFR_SET_POS(a);
529           MPFR_SET_ZERO(a);
530           MPFR_RET (0);
531         }
532       else if (a1 >= bp[1]) /* borrow: |c| > |b| */
533         {
534           MPFR_SET_OPPOSITE_SIGN (a, b);
535           /* a = b-c mod 2^(2*GMP_NUMB_BITS) */
536           a0 = -a0;
537           a1 = -a1 - (a0 != 0);
538         }
539       else /* bp[0] > cp[0] */
540         MPFR_SET_SAME_SIGN (a, b);
541 
542       if (a1 == 0)
543         {
544           a1 = a0;
545           a0 = 0;
546           bx -= GMP_NUMB_BITS;
547         }
548 
549       /* now a1 != 0 */
550       MPFR_ASSERTD(a1 != 0);
551       count_leading_zeros (cnt, a1);
552       if (cnt)
553         {
554           ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
555           ap[0] = a0 << cnt;
556           bx -= cnt;
557         }
558       else
559         {
560           ap[1] = a1;
561           ap[0] = a0;
562         }
563       rb = sb = 0;
564       /* Note: sh is not initialized, but will not be used in this case. */
565     }
566   else
567     {
568       mp_limb_t t;
569 
570       if (bx < cx)  /* swap b and c */
571         {
572           mpfr_exp_t tx;
573           mp_limb_t *tp;
574           tx = bx; bx = cx; cx = tx;
575           tp = bp; bp = cp; cp = tp;
576           MPFR_SET_OPPOSITE_SIGN (a, b);
577         }
578       else
579         {
580           MPFR_SET_SAME_SIGN (a, b);
581         }
582       MPFR_ASSERTD (bx > cx);
583       d = (mpfr_uexp_t) bx - cx;
584       sh =  2 * GMP_NUMB_BITS - p;
585       mask = MPFR_LIMB_MASK(sh);
586       if (d < GMP_NUMB_BITS)
587         {
588           t = (cp[1] << (GMP_NUMB_BITS - d)) | (cp[0] >> d);
589           /* TODO: Change the code to generate a full subtraction with borrow,
590              avoiding the test on sb and the corresponding correction. Note
591              that Clang has builtins:
592                https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins
593              but the generated code may not be good:
594                https://llvm.org/bugs/show_bug.cgi?id=20748
595              With the current source code, Clang generates on x86_64:
596                1. sub %rsi,%rbx for the first subtraction in a1;
597                2. sub %rdi,%rax for the subtraction in a0;
598                3. sbb $0x0,%rbx for the second subtraction in a1, i.e. just
599                   subtracting the borrow out from (2).
600              So, Clang recognizes the borrow, but doesn't merge (1) and (3).
601              Bug: https://llvm.org/bugs/show_bug.cgi?id=25858
602              For GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79173
603           */
604 #ifndef MPFR_FULLSUB
605           a0 = bp[0] - t;
606           a1 = bp[1] - (cp[1] >> d) - (bp[0] < t);
607           sb = cp[0] << (GMP_NUMB_BITS - d); /* neglected part of c */
608           if (sb)
609             {
610               a1 -= (a0 == 0);
611               a0 --;
612               /* a = a1,a0 cannot become zero here, since:
613                  a) if d >= 2, then a1 >= 2^(w-1) - (2^(w-2)-1) with
614                     w = GMP_NUMB_BITS, thus a1 - 1 >= 2^(w-2),
615                  b) if d = 1, then since p < 2*GMP_NUMB_BITS we have sb=0. */
616               MPFR_ASSERTD(a1 > 0 || a0 > 0);
617               sb = -sb; /* 2^GMP_NUMB_BITS - sb */
618             }
619 #else
620           sb = - (cp[0] << (GMP_NUMB_BITS - d));
621           a0 = bp[0] - t - (sb != 0);
622           a1 = bp[1] - (cp[1] >> d) - (bp[0] < t || (bp[0] == t && sb != 0));
623 #endif
624           if (a1 == 0)
625             {
626               /* this implies d=1, which in turn implies sb=0 */
627               MPFR_ASSERTD(sb == 0);
628               a1 = a0;
629               a0 = 0; /* should be a0 = sb */
630               /* since sb=0 already, no need to set it to 0 */
631               bx -= GMP_NUMB_BITS;
632             }
633           /* now a1 != 0 */
634           MPFR_ASSERTD(a1 != 0);
635           count_leading_zeros (cnt, a1);
636           if (cnt)
637             {
638               ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
639               a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt));
640               sb <<= cnt;
641               bx -= cnt;
642             }
643           else
644             ap[1] = a1;
645           /* sh > 0 since p < 2*GMP_NUMB_BITS */
646           MPFR_ASSERTD(sh > 0);
647           rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
648           sb |= (a0 & mask) ^ rb;
649           ap[0] = a0 & ~mask;
650         }
651       else if (d < 2 * GMP_NUMB_BITS)
652         {  /* GMP_NUMB_BITS <= d < 2*GMP_NUMB_BITS */
653           /* warning: the most significant bit of sb might become the least
654              significant bit of a0 below */
655           sb = (d == GMP_NUMB_BITS) ? cp[0]
656             : (cp[1] << (2*GMP_NUMB_BITS - d)) | (cp[0] != 0);
657           t = (cp[1] >> (d - GMP_NUMB_BITS)) + (sb != 0);
658           /* warning: t might overflow to 0 if d=GMP_NUMB_BITS and sb <> 0 */
659           a0 = bp[0] - t;
660           a1 = bp[1] - (bp[0] < t) - (t == 0 && sb != 0);
661           sb = -sb;
662           /* since bp[1] has its most significant bit set, we can have an
663              exponent decrease of at most one */
664           if (a1 < MPFR_LIMB_HIGHBIT)
665             {
666               ap[1] = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
667               a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
668               sb <<= 1;
669               bx --;
670             }
671           else
672             ap[1] = a1;
673           rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
674           sb |= (a0 & mask) ^ rb;
675           ap[0] = a0 & ~mask;
676         }
677       else /* d >= 2*GMP_NUMB_BITS */
678         {
679           /* We compute b - ulp(b), and the remainder ulp(b) - c satisfies:
680              1/2 ulp(b) < ulp(b) - c < ulp(b), thus rb = sb = 1, unless we
681              had an exponent decrease. */
682           t = MPFR_LIMB_ONE << sh;
683           a0 = bp[0] - t;
684           a1 = bp[1] - (bp[0] < t);
685           if (a1 < MPFR_LIMB_HIGHBIT)
686             {
687               /* necessarily we had b = 1000...000 */
688               /* Warning: since we have an exponent decrease, when
689                  p = 2*GMP_NUMB_BITS - 1 and d = 2*GMP_NUMB_BITS, the round bit
690                  corresponds to the upper bit of -c. In that case rb = 0 and
691                  sb = 1, except when c = 1000...000 where rb = 1 and sb = 0. */
692               rb = sh > 1 || d > 2 * GMP_NUMB_BITS
693                 || (cp[1] == MPFR_LIMB_HIGHBIT && cp[0] == MPFR_LIMB_ZERO);
694               /* sb=1 below is incorrect when p = 2*GMP_NUMB_BITS - 1,
695                  d = 2*GMP_NUMB_BITS and c = 1000...000, but in
696                  that case the even rule wound round up too. */
697               ap[0] = ~mask;
698               ap[1] = MPFR_LIMB_MAX;
699               bx --;
700             }
701           else
702             {
703               ap[0] = a0;
704               ap[1] = a1;
705               rb = 1;
706             }
707           sb = 1;
708         }
709     }
710 
711   /* now perform rounding */
712 
713   /* Warning: MPFR considers underflow *after* rounding with an unbounded
714      exponent range. However, since b and c have same precision p, they are
715      multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the
716      subtraction (with an unbounded exponent range) is exact, so that bx is
717      also the exponent after rounding with an unbounded exponent range. */
718   if (MPFR_UNLIKELY(bx < __gmpfr_emin))
719     {
720       /* for RNDN, mpfr_underflow always rounds away, thus for |a|<=2^(emin-2)
721          we have to change to RNDZ */
722       if (rnd_mode == MPFR_RNDN &&
723           (bx < __gmpfr_emin - 1 ||
724            (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0)))
725         rnd_mode = MPFR_RNDZ;
726       return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
727     }
728 
729   MPFR_SET_EXP (a, bx);
730   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
731     MPFR_RET (0);
732   else if (rnd_mode == MPFR_RNDN)
733     {
734       if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
735         goto truncate;
736       else
737         goto add_one_ulp;
738     }
739   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
740     {
741     truncate:
742       MPFR_RET(-MPFR_SIGN(a));
743     }
744   else /* round away from zero */
745     {
746     add_one_ulp:
747       ap[0] += MPFR_LIMB_ONE << sh;
748       ap[1] += (ap[0] == 0);
749       if (MPFR_UNLIKELY(ap[1] == 0))
750         {
751           ap[1] = MPFR_LIMB_HIGHBIT;
752           /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus
753              bx+1 is at most equal to the original exponent of b. */
754           MPFR_ASSERTD(bx + 1 <= __gmpfr_emax);
755           MPFR_SET_EXP (a, bx + 1);
756         }
757       MPFR_RET(MPFR_SIGN(a));
758     }
759 }
760 
761 /* special code for p = 2*GMP_NUMB_BITS */
762 static int
mpfr_sub1sp2n(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)763 mpfr_sub1sp2n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
764 {
765   mpfr_exp_t bx = MPFR_GET_EXP (b);
766   mpfr_exp_t cx = MPFR_GET_EXP (c);
767   mp_limb_t *ap = MPFR_MANT(a);
768   mp_limb_t *bp = MPFR_MANT(b);
769   mp_limb_t *cp = MPFR_MANT(c);
770   mpfr_prec_t cnt;
771   mp_limb_t rb; /* round bit */
772   mp_limb_t sb; /* sticky bit */
773   mp_limb_t a0, a1;
774   mpfr_uexp_t d;
775 
776 /* this function is inspired by mpfr_sub1sp2 (for the operations of the
777    2-limb arrays) and by mpfr_sub1sp1n (for the different cases to handle) */
778 
779   if (bx == cx) /* subtraction is exact in this case */
780     {
781       a0 = bp[0] - cp[0];
782       a1 = bp[1] - cp[1] - (bp[0] < cp[0]);
783       if (a1 == 0 && a0 == 0) /* result is zero */
784         {
785           if (rnd_mode == MPFR_RNDD)
786             MPFR_SET_NEG(a);
787           else
788             MPFR_SET_POS(a);
789           MPFR_SET_ZERO(a);
790           MPFR_RET (0);
791         }
792       /* since B/2 <= bp[1], cp[1] < B with B=2^GMP_NUMB_BITS,
793          if no borrow we have 0 <=  bp[1] - cp[1] - x < B/2
794          where x = (bp[0] < cp[0]) is 0 or 1, thus a1 < B/2 <= bp[1] */
795       else if (a1 >= bp[1]) /* borrow: |c| > |b| */
796         {
797           MPFR_SET_OPPOSITE_SIGN (a, b);
798           /* negate [a1,a0] */
799           a0 = -a0;
800           a1 = -a1 - (a0 != 0);
801         }
802       else /* no borrow */
803         MPFR_SET_SAME_SIGN (a, b);
804 
805       /* now [a1,a0] is the absolute value of b - c,
806          maybe not normalized */
807       if (a1 == 0)
808         {
809           a1 = a0;
810           a0 = 0;
811           bx -= GMP_NUMB_BITS;
812         }
813 
814       /* now a1 != 0 */
815       MPFR_ASSERTD(a1 != 0);
816       count_leading_zeros (cnt, a1);
817       if (cnt)
818         {
819           /* shift [a1,a0] left by cnt bit and store in result */
820           ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
821           ap[0] = a0 << cnt;
822           bx -= cnt;
823         }
824       else
825         {
826           ap[1] = a1;
827           ap[0] = a0;
828         }
829       rb = sb = 0; /* the subtraction is exact */
830     }
831   else
832     {
833       mp_limb_t t;
834 
835       if (bx < cx)  /* swap b and c */
836         {
837           mpfr_exp_t tx;
838           mp_limb_t *tp;
839           tx = bx; bx = cx; cx = tx;
840           tp = bp; bp = cp; cp = tp;
841           MPFR_SET_OPPOSITE_SIGN (a, b);
842         }
843       else
844         {
845           MPFR_SET_SAME_SIGN (a, b);
846         }
847       MPFR_ASSERTD (bx > cx);
848       d = (mpfr_uexp_t) bx - cx;
849       if (d < GMP_NUMB_BITS)
850         {
851           t = (cp[1] << (GMP_NUMB_BITS - d)) | (cp[0] >> d);
852           /* t is the part that should be subtracted to bp[0]:
853              |      a1       |      a0       |
854              |     bp[1]     |     bp[0]     |
855              |    cp[1]>>d   |      t        |     sb     | */
856 #ifndef MPFR_FULLSUB
857           a0 = bp[0] - t;
858           a1 = bp[1] - (cp[1] >> d) - (bp[0] < t);
859           sb = cp[0] << (GMP_NUMB_BITS - d); /* neglected part of c */
860           /* now negate sb and subtract borrow to a0 if sb <> 0 */
861           if (sb)
862             {
863               a1 -= (a0 == 0);
864               a0 --;
865               /* a = a1,a0 can only be zero when d=1, b = 0.1000...000*2^bx,
866                  and c = 0.111...111*2^(bx-1). In that case (where we have
867                  sb = MPFR_LIMB_HIGHBIT below), the subtraction is exact, the
868                  result is b/2^(2*GMP_NUMB_BITS). This case is dealt below. */
869               sb = -sb;
870             }
871 #else
872           sb = - (cp[0] << (GMP_NUMB_BITS - d));
873           a0 = bp[0] - t - (sb != 0);
874           a1 = bp[1] - (cp[1] >> d) - (bp[0] < t || (bp[0] == t && sb != 0));
875 #endif
876           /* now the result is formed of [a1,a0,sb], which might not be
877              normalized */
878           if (a1 == MPFR_LIMB_ZERO)
879             {
880               /* this implies d=1 */
881               MPFR_ASSERTD(d == 1);
882               a1 = a0;
883               a0 = sb;
884               sb = MPFR_LIMB_ZERO;
885               bx -= GMP_NUMB_BITS;
886             }
887           if (a1 == MPFR_LIMB_ZERO) /* case a = a1,a0 = 0 mentioned above */
888             {
889               MPFR_ASSERTD(a0 == MPFR_LIMB_HIGHBIT); /* was sb above */
890               a1 = a0;
891               a0 = sb;
892               bx -= GMP_NUMB_BITS;
893               sb = MPFR_LIMB_ZERO;
894             }
895           else
896             {
897               count_leading_zeros (cnt, a1);
898               if (cnt)
899                 {
900                   /* shift [a1,a0,sb] left by cnt bits and adjust exponent */
901                   a1 = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
902                   a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt));
903                   sb <<= cnt;
904                   bx -= cnt;
905                 }
906             }
907           rb = sb & MPFR_LIMB_HIGHBIT;
908           sb = sb & ~MPFR_LIMB_HIGHBIT;
909           ap[1] = a1;
910           ap[0] = a0;
911         }
912       else if (d < 2 * GMP_NUMB_BITS)
913         {  /* GMP_NUMB_BITS <= d < 2*GMP_NUMB_BITS:
914               compute t, the part to be subtracted to bp[0],
915               and sb, the neglected part of c:
916              |      a1       |      a0       |
917              |     bp[1]     |     bp[0]     |
918                              |      t        |     sb     | */
919           /* warning: we should not ignore the low bits from cp[0]
920              in case d > GMP_NUMB_BITS */
921           sb = (d == GMP_NUMB_BITS) ? cp[0]
922             : (cp[1] << (2*GMP_NUMB_BITS - d))
923               | (cp[0] >> (d - GMP_NUMB_BITS))
924               | ((cp[0] << (2*GMP_NUMB_BITS - d)) != 0);
925           t = (cp[1] >> (d - GMP_NUMB_BITS)) + (sb != 0);
926           /* Warning: t might overflow to 0 if d=GMP_NUMB_BITS, sb <> 0,
927              and cp[1] = 111...111 */
928           a0 = bp[0] - t;
929           a1 = bp[1] - (bp[0] < t) - (t == 0 && sb != 0);
930           sb = -sb;
931           /* now the result is [a1,a0,sb]. Since bp[1] has its most significant
932              bit set, we can have an exponent decrease of at most one */
933           if (a1 < MPFR_LIMB_HIGHBIT)
934             {
935               /* shift [a1,a0] left by 1 bit */
936               a1 = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
937               MPFR_ASSERTD(a1 >= MPFR_LIMB_HIGHBIT);
938               a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
939               sb <<= 1;
940               bx --;
941             }
942           ap[1] = a1;
943           ap[0] = a0;
944           rb = sb & MPFR_LIMB_HIGHBIT;
945           sb = sb & ~MPFR_LIMB_HIGHBIT;
946         }
947       else
948         { /* d >= 2*GMP_NUMB_BITS:
949              |      a1       |      a0       |
950              |     bp[1]     |     bp[0]     |
951                                                 |   cp[1]   |   cp[0]   | */
952           /* we mimic the case d >= GMP_NUMB_BITS of mpfr_sub1sp1n */
953           int tst = cp[1] == MPFR_LIMB_HIGHBIT && cp[0] == MPFR_LIMB_ZERO;
954           /* if d = 2 * GMP_NUMB_BITS and tst=1, c = 1/2*ulp(b) */
955           if (bp[1] > MPFR_LIMB_HIGHBIT || bp[0] > MPFR_LIMB_ZERO)
956             {
957               /* no borrow in b - ulp(b) */
958               rb = d > 2 * GMP_NUMB_BITS || tst;
959               sb = d > 2 * GMP_NUMB_BITS || !tst;
960               ap[1] = bp[1] - (bp[0] == MPFR_LIMB_ZERO);
961               ap[0] = bp[0] - MPFR_LIMB_ONE;
962             }
963           else
964             {
965               /* b = 1000...000, thus subtracting c yields an exponent shift */
966               bx --;
967               if (d == 2 * GMP_NUMB_BITS && !tst) /* c > 1/2*ulp(b) */
968                 {
969                   t = -cp[1] - (cp[0] > MPFR_LIMB_ZERO);
970                   /* the rounding bit is the 2nd most significant bit of t
971                      (where the most significant bit of t is necessarily 0),
972                      and the sticky bit is formed by the remaining bits of t,
973                      and those from -cp[0] */
974                   rb = t >= (MPFR_LIMB_HIGHBIT >> 1);
975                   sb = (t << 2) | cp[0];
976                   ap[1] = MPFR_LIMB_MAX;
977                   ap[0] = -(MPFR_LIMB_ONE << 1);
978                 }
979               else /* c <= 1/2*ulp(b) */
980                 {
981                   rb = d > 2 * GMP_NUMB_BITS + 1
982                     || (d == 2 * GMP_NUMB_BITS + 1 && tst);
983                   sb = d > 2 * GMP_NUMB_BITS + 1
984                     || (d == 2 * GMP_NUMB_BITS + 1 && !tst);
985                   ap[1] = -MPFR_LIMB_ONE;
986                   ap[0] = -MPFR_LIMB_ONE;
987                 }
988             }
989         }
990     }
991 
992   /* now perform rounding */
993 
994   /* Warning: MPFR considers underflow *after* rounding with an unbounded
995      exponent range. However, since b and c have same precision p, they are
996      multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the
997      subtraction (with an unbounded exponent range) is exact, so that bx is
998      also the exponent after rounding with an unbounded exponent range. */
999   if (MPFR_UNLIKELY(bx < __gmpfr_emin))
1000     {
1001       /* for RNDN, mpfr_underflow always rounds away, thus for |a|<=2^(emin-2)
1002          we have to change to RNDZ */
1003       if (rnd_mode == MPFR_RNDN &&
1004           (bx < __gmpfr_emin - 1 ||
1005            (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0)))
1006         rnd_mode = MPFR_RNDZ;
1007       return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
1008     }
1009 
1010   MPFR_SET_EXP (a, bx);
1011   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
1012     MPFR_RET (0);
1013   else if (rnd_mode == MPFR_RNDN)
1014     {
1015       if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0))
1016         goto truncate;
1017       else
1018         goto add_one_ulp;
1019     }
1020   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
1021     {
1022     truncate:
1023       MPFR_RET(-MPFR_SIGN(a));
1024     }
1025   else /* round away from zero */
1026     {
1027     add_one_ulp:
1028       ap[0] += MPFR_LIMB_ONE;
1029       ap[1] += (ap[0] == 0);
1030       if (MPFR_UNLIKELY(ap[1] == 0))
1031         {
1032           ap[1] = MPFR_LIMB_HIGHBIT;
1033           /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus
1034              bx+1 is at most equal to the original exponent of b. */
1035           MPFR_ASSERTD(bx + 1 <= __gmpfr_emax);
1036           MPFR_SET_EXP (a, bx + 1);
1037         }
1038       MPFR_RET(MPFR_SIGN(a));
1039     }
1040 }
1041 
1042 /* special code for 2*GMP_NUMB_BITS < p < 3*GMP_NUMB_BITS */
1043 static int
mpfr_sub1sp3(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)1044 mpfr_sub1sp3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
1045               mpfr_prec_t p)
1046 {
1047   mpfr_exp_t bx = MPFR_GET_EXP (b);
1048   mpfr_exp_t cx = MPFR_GET_EXP (c);
1049   mp_limb_t *ap = MPFR_MANT(a);
1050   mp_limb_t *bp = MPFR_MANT(b);
1051   mp_limb_t *cp = MPFR_MANT(c);
1052   mpfr_prec_t cnt, INITIALIZED(sh);
1053   mp_limb_t rb; /* round bit */
1054   mp_limb_t sb; /* sticky bit */
1055   mp_limb_t mask, a0, a1, a2;
1056   mpfr_uexp_t d;
1057 
1058   MPFR_ASSERTD(2 * GMP_NUMB_BITS < p && p < 3 * GMP_NUMB_BITS);
1059 
1060   if (bx == cx) /* subtraction is exact in this case */
1061     {
1062       a0 = bp[0] - cp[0];
1063       a1 = bp[1] - cp[1] - (bp[0] < cp[0]);
1064       /* a borrow is generated for a when either bp[1] < cp[1],
1065          or bp[1] = cp[1] and bp[0] < cp[0] */
1066       a2 = bp[2] - cp[2]
1067         - (bp[1] < cp[1] || (bp[1] == cp[1] && bp[0] < cp[0]));
1068       if (a2 == 0 && a1 == 0 && a0 == 0) /* result is zero */
1069         {
1070           if (rnd_mode == MPFR_RNDD)
1071             MPFR_SET_NEG(a);
1072           else
1073             MPFR_SET_POS(a);
1074           MPFR_SET_ZERO(a);
1075           MPFR_RET (0);
1076         }
1077       else if (a2 >= bp[2]) /* borrow: |c| > |b| */
1078         {
1079           MPFR_SET_OPPOSITE_SIGN (a, b);
1080           /* a = b-c mod 2^(3*GMP_NUMB_BITS) */
1081           a0 = -a0;
1082           a1 = -a1 - (a0 != 0);
1083           a2 = -a2 - (a0 != 0 || a1 != 0);
1084         }
1085       else /* bp[0] > cp[0] */
1086         MPFR_SET_SAME_SIGN (a, b);
1087 
1088       if (a2 == 0)
1089         {
1090           a2 = a1;
1091           a1 = a0;
1092           a0 = 0;
1093           bx -= GMP_NUMB_BITS;
1094           if (a2 == 0)
1095             {
1096               a2 = a1;
1097               a1 = 0;
1098               bx -= GMP_NUMB_BITS;
1099             }
1100         }
1101 
1102       /* now a2 != 0 */
1103       MPFR_ASSERTD(a2 != 0);
1104       count_leading_zeros (cnt, a2);
1105       if (cnt)
1106         {
1107           ap[2] = (a2 << cnt) | (a1 >> (GMP_NUMB_BITS - cnt));
1108           ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
1109           ap[0] = a0 << cnt;
1110           bx -= cnt;
1111         }
1112       else
1113         {
1114           ap[2] = a2;
1115           ap[1] = a1;
1116           ap[0] = a0;
1117         }
1118       rb = sb = 0;
1119       /* Note: sh is not initialized, but will not be used in this case. */
1120     }
1121   else
1122     {
1123       if (bx < cx)  /* swap b and c */
1124         {
1125           mpfr_exp_t tx;
1126           mp_limb_t *tp;
1127           tx = bx; bx = cx; cx = tx;
1128           tp = bp; bp = cp; cp = tp;
1129           MPFR_SET_OPPOSITE_SIGN (a, b);
1130         }
1131       else
1132         {
1133           MPFR_SET_SAME_SIGN (a, b);
1134         }
1135       MPFR_ASSERTD (bx > cx);
1136       d = (mpfr_uexp_t) bx - cx;
1137       sh =  3 * GMP_NUMB_BITS - p;
1138       mask = MPFR_LIMB_MASK(sh);
1139       if (d < GMP_NUMB_BITS)
1140         {
1141           mp_limb_t cy;
1142           /* warning: we must have the most significant bit of sb correct
1143              since it might become the round bit below */
1144           sb = cp[0] << (GMP_NUMB_BITS - d); /* neglected part of c */
1145           a0 = bp[0] - ((cp[1] << (GMP_NUMB_BITS - d)) | (cp[0] >> d));
1146           a1 = bp[1] - ((cp[2] << (GMP_NUMB_BITS - d)) | (cp[1] >> d))
1147             - (a0 > bp[0]);
1148           cy = a1 > bp[1] || (a1 == bp[1] && a0 > bp[0]); /* borrow in a1 */
1149           a2 = bp[2] - (cp[2] >> d) - cy;
1150           /* if sb is non-zero, subtract 1 from a2, a1, a0 since we want a
1151              non-negative neglected part */
1152           if (sb)
1153             {
1154               a2 -= (a1 == 0 && a0 == 0);
1155               a1 -= (a0 == 0);
1156               a0 --;
1157               /* a = a2,a1,a0 cannot become zero here, since:
1158                  a) if d >= 2, then a2 >= 2^(w-1) - (2^(w-2)-1) with
1159                     w = GMP_NUMB_BITS, thus a2 - 1 >= 2^(w-2),
1160                  b) if d = 1, then since p < 3*GMP_NUMB_BITS we have sb=0. */
1161               MPFR_ASSERTD(a2 > 0 || a1 > 0 || a0 > 0);
1162               sb = -sb; /* 2^GMP_NUMB_BITS - sb */
1163             }
1164           if (a2 == 0)
1165             {
1166               /* this implies d=1, which in turn implies sb=0 */
1167               MPFR_ASSERTD(sb == 0);
1168               a2 = a1;
1169               a1 = a0;
1170               a0 = 0; /* should be a0 = sb */
1171               /* since sb=0 already, no need to set it to 0 */
1172               bx -= GMP_NUMB_BITS;
1173               if (a2 == 0)
1174                 {
1175                   a2 = a1;
1176                   a1 = 0; /* should be a1 = a0 */
1177                   bx -= GMP_NUMB_BITS;
1178                 }
1179             }
1180           /* now a1 != 0 */
1181           MPFR_ASSERTD(a2 != 0);
1182           count_leading_zeros (cnt, a2);
1183           if (cnt)
1184             {
1185               ap[2] = (a2 << cnt) | (a1 >> (GMP_NUMB_BITS - cnt));
1186               ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
1187               a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt));
1188               sb <<= cnt;
1189               bx -= cnt;
1190             }
1191           else
1192             {
1193               ap[2] = a2;
1194               ap[1] = a1;
1195             }
1196           /* sh > 0 since p < 2*GMP_NUMB_BITS */
1197           MPFR_ASSERTD(sh > 0);
1198           rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
1199           sb |= (a0 & mask) ^ rb;
1200           ap[0] = a0 & ~mask;
1201         }
1202       else if (d < 2 * GMP_NUMB_BITS)
1203         {
1204           mp_limb_t c0shifted;
1205           /* warning: we must have the most significant bit of sb correct
1206              since it might become the round bit below */
1207           sb = (d == GMP_NUMB_BITS) ? cp[0]
1208             : (cp[1] << (2*GMP_NUMB_BITS - d)) | (cp[0] != 0);
1209           c0shifted = (d == GMP_NUMB_BITS) ? cp[1]
1210             : (cp[2] << (2*GMP_NUMB_BITS-d)) | (cp[1] >> (d - GMP_NUMB_BITS));
1211           a0 = bp[0] - c0shifted;
1212           /* TODO: add a non-regression test for cp[2] == MPFR_LIMB_MAX,
1213              d == GMP_NUMB_BITS and a0 > bp[0]. */
1214           a1 = bp[1] - (cp[2] >> (d - GMP_NUMB_BITS)) - (a0 > bp[0]);
1215           a2 = bp[2] - (a1 > bp[1] || (a1 == bp[1] && a0 > bp[0]));
1216           /* if sb is non-zero, subtract 1 from a2, a1, a0 since we want a
1217              non-negative neglected part */
1218           if (sb)
1219             {
1220               a2 -= (a1 == 0 && a0 == 0);
1221               a1 -= (a0 == 0);
1222               a0 --;
1223               /* a = a2,a1,a0 cannot become zero here, since:
1224                  a) if d >= 2, then a2 >= 2^(w-1) - (2^(w-2)-1) with
1225                     w = GMP_NUMB_BITS, thus a2 - 1 >= 2^(w-2),
1226                  b) if d = 1, then since p < 3*GMP_NUMB_BITS we have sb=0. */
1227               MPFR_ASSERTD(a2 > 0 || a1 > 0 || a0 > 0);
1228               sb = -sb; /* 2^GMP_NUMB_BITS - sb */
1229             }
1230           /* since bp[2] has its most significant bit set, we can have an
1231              exponent decrease of at most one */
1232           if (a2 < MPFR_LIMB_HIGHBIT)
1233             {
1234               ap[2] = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1));
1235               ap[1] = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
1236               a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
1237               sb <<= 1;
1238               bx --;
1239             }
1240           else
1241             {
1242               ap[2] = a2;
1243               ap[1] = a1;
1244             }
1245           rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
1246           sb |= (a0 & mask) ^ rb;
1247           ap[0] = a0 & ~mask;
1248         }
1249       else if (d < 3 * GMP_NUMB_BITS) /* 2*GMP_NUMB_BITS<=d<3*GMP_NUMB_BITS */
1250         {
1251           MPFR_ASSERTD (2*GMP_NUMB_BITS <= d && d < 3*GMP_NUMB_BITS);
1252           /* warning: we must have the most significant bit of sb correct
1253              since it might become the round bit below */
1254           if (d == 2 * GMP_NUMB_BITS)
1255             sb = cp[1] | (cp[0] != 0);
1256           else
1257             sb = cp[2] << (3*GMP_NUMB_BITS - d) | (cp[1] != 0) | (cp[0] != 0);
1258           sb = -sb;
1259           /* TODO: add a non-regression test for cp[2] == MPFR_LIMB_MAX,
1260              d == 2*GMP_NUMB_BITS and sb != 0. */
1261           a0 = bp[0] - (cp[2] >> (d - 2*GMP_NUMB_BITS)) - (sb != 0);
1262           a1 = bp[1] - (a0 > bp[0] || (a0 == bp[0] && sb != 0));
1263           a2 = bp[2] - (a1 > bp[1]);
1264           if (a2 < MPFR_LIMB_HIGHBIT)
1265             {
1266               ap[2] = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1));
1267               ap[1] = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
1268               a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
1269               sb <<= 1;
1270               bx --;
1271             }
1272           else
1273             {
1274               ap[2] = a2;
1275               ap[1] = a1;
1276             }
1277           rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
1278           sb |= (a0 & mask) ^ rb;
1279           ap[0] = a0 & ~mask;
1280         }
1281       else /* d >= 3*GMP_NUMB_BITS */
1282         {
1283           /* We compute b - ulp(b), and the remainder ulp(b) - c satisfies:
1284              1/2 ulp(b) < ulp(b) - c < ulp(b), thus rb = sb = 1. */
1285           mp_limb_t t = MPFR_LIMB_ONE << sh;
1286           a0 = bp[0] - t;
1287           a1 = bp[1] - (bp[0] < t);
1288           a2 = bp[2] - (a1 > bp[1]);
1289           if (a2 < MPFR_LIMB_HIGHBIT)
1290             {
1291               /* necessarily we had b = 1000...000 */
1292               /* Warning: since we have an exponent decrease, when
1293                  p = 3*GMP_NUMB_BITS - 1 and d = 3*GMP_NUMB_BITS, the round bit
1294                  corresponds to the upper bit of -c. In that case rb = 0 and
1295                  sb = 1, except when c = 1000...000 where rb = 1 and sb = 0. */
1296               rb = sh > 1 || d > 3 * GMP_NUMB_BITS
1297                 || (cp[2] == MPFR_LIMB_HIGHBIT && cp[1] == MPFR_LIMB_ZERO &&
1298                     cp[0] == MPFR_LIMB_ZERO);
1299               /* sb=1 below is incorrect when p = 2*GMP_NUMB_BITS - 1,
1300                  d = 2*GMP_NUMB_BITS and c = 1000...000, but in
1301                  that case the even rule wound round up too. */
1302               ap[0] = ~mask;
1303               ap[1] = MPFR_LIMB_MAX;
1304               ap[2] = MPFR_LIMB_MAX;
1305               bx --;
1306             }
1307           else
1308             {
1309               ap[0] = a0;
1310               ap[1] = a1;
1311               ap[2] = a2;
1312               rb = 1;
1313             }
1314           sb = 1;
1315         }
1316     }
1317 
1318   /* now perform rounding */
1319 
1320   /* Warning: MPFR considers underflow *after* rounding with an unbounded
1321      exponent range. However, since b and c have same precision p, they are
1322      multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the
1323      subtraction (with an unbounded exponent range) is exact, so that bx is
1324      also the exponent after rounding with an unbounded exponent range. */
1325   if (MPFR_UNLIKELY(bx < __gmpfr_emin))
1326     {
1327       /* for RNDN, mpfr_underflow always rounds away, thus for |a|<=2^(emin-2)
1328          we have to change to RNDZ */
1329       if (rnd_mode == MPFR_RNDN &&
1330           (bx < __gmpfr_emin - 1 ||
1331            (ap[2] == MPFR_LIMB_HIGHBIT && ap[1] == 0 && ap[0] == 0)))
1332         rnd_mode = MPFR_RNDZ;
1333       return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
1334     }
1335 
1336   MPFR_SET_EXP (a, bx);
1337   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
1338     MPFR_RET (0);
1339   else if (rnd_mode == MPFR_RNDN)
1340     {
1341       if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
1342         goto truncate;
1343       else
1344         goto add_one_ulp;
1345     }
1346   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
1347     {
1348     truncate:
1349       MPFR_RET(-MPFR_SIGN(a));
1350     }
1351   else /* round away from zero */
1352     {
1353     add_one_ulp:
1354       ap[0] += MPFR_LIMB_ONE << sh;
1355       ap[1] += (ap[0] == 0);
1356       ap[2] += (ap[1] == 0 && ap[0] == 0);
1357       if (MPFR_UNLIKELY(ap[2] == 0))
1358         {
1359           ap[2] = MPFR_LIMB_HIGHBIT;
1360           /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus
1361              bx+1 is at most equal to the original exponent of b. */
1362           MPFR_ASSERTD(bx + 1 <= __gmpfr_emax);
1363           MPFR_SET_EXP (a, bx + 1);
1364         }
1365       MPFR_RET(MPFR_SIGN(a));
1366     }
1367 }
1368 
1369 #endif /* !defined(MPFR_GENERIC_ABI) */
1370 
1371 /* Rounding Sub */
1372 
1373 /*
1374    compute sgn(b)*(|b| - |c|) if |b|>|c| else -sgn(b)*(|c| -|b|)
1375    Returns 0 iff result is exact,
1376    a negative value when the result is less than the exact value,
1377    a positive value otherwise.
1378 */
1379 
1380 /* A0...Ap-1
1381  *          Cp Cp+1 ....
1382  *             <- C'p+1 ->
1383  * Cp = -1 if calculated from c mantissa
1384  * Cp = 0  if 0 from a or c
1385  * Cp = 1  if calculated from a.
1386  * C'p+1 = First bit not null or 0 if there isn't one
1387  *
1388  * Can't have Cp=-1 and C'p+1=1*/
1389 
1390 /* RND = MPFR_RNDZ:
1391  *  + if Cp=0 and C'p+1=0,1,  Truncate.
1392  *  + if Cp=0 and C'p+1=-1,   SubOneUlp
1393  *  + if Cp=-1,               SubOneUlp
1394  *  + if Cp=1,                AddOneUlp
1395  * RND = MPFR_RNDA (Away)
1396  *  + if Cp=0 and C'p+1=0,-1, Truncate
1397  *  + if Cp=0 and C'p+1=1,    AddOneUlp
1398  *  + if Cp=1,                AddOneUlp
1399  *  + if Cp=-1,               Truncate
1400  * RND = MPFR_RNDN
1401  *  + if Cp=0,                Truncate
1402  *  + if Cp=1 and C'p+1=1,    AddOneUlp
1403  *  + if Cp=1 and C'p+1=-1,   Truncate
1404  *  + if Cp=1 and C'p+1=0,    Truncate if Ap-1=0, AddOneUlp else
1405  *  + if Cp=-1 and C'p+1=-1,  SubOneUlp
1406  *  + if Cp=-1 and C'p+1=0,   Truncate if Ap-1=0, SubOneUlp else
1407  *
1408  * If AddOneUlp:
1409  *   If carry, then it is 11111111111 + 1 = 10000000000000
1410  *      ap[n-1]=MPFR_HIGHT_BIT
1411  * If SubOneUlp:
1412  *   If we lose one bit, it is 1000000000 - 1 = 0111111111111
1413  *      Then shift, and put as last bit x which is calculated
1414  *              according Cp, Cp-1 and rnd_mode.
1415  * If Truncate,
1416  *    If it is a power of 2,
1417  *       we may have to suboneulp in some special cases.
1418  *
1419  * To simplify, we don't use Cp = 1.
1420  *
1421  */
1422 
1423 MPFR_HOT_FUNCTION_ATTR int
mpfr_sub1sp(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)1424 mpfr_sub1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
1425 {
1426   mpfr_exp_t bx, cx;
1427   mpfr_uexp_t d;
1428   mpfr_prec_t p, sh, cnt;
1429   mp_size_t n, k;
1430   mp_limb_t *ap = MPFR_MANT(a);
1431   mp_limb_t *bp = MPFR_MANT(b);
1432   mp_limb_t *cp = MPFR_MANT(c);
1433   mp_limb_t limb;
1434   int inexact;
1435   mp_limb_t rb, sb; /* round and sticky bits. They are interpreted as
1436                        negative, i.e., if rb <> 0, then we should subtract 1
1437                        at the round bit position, and of sb <> 0, we should
1438                        subtract something below the round bit position. */
1439   mp_limb_t rbb = MPFR_LIMB_MAX, sbb = MPFR_LIMB_MAX; /* rbb is the next bit
1440     after the round bit, and sbb the corresponding sticky bit.
1441     gcc claims that they might be used uninitialized. We fill them with invalid
1442     values, which should produce a failure if so. See README.dev file. */
1443   int pow2;
1444 
1445   MPFR_TMP_DECL(marker);
1446 
1447   MPFR_ASSERTD(MPFR_PREC(a) == MPFR_PREC(b) && MPFR_PREC(b) == MPFR_PREC(c));
1448   MPFR_ASSERTD(MPFR_IS_PURE_FP(b));
1449   MPFR_ASSERTD(MPFR_IS_PURE_FP(c));
1450 
1451   /* Read prec and num of limbs */
1452   p = MPFR_GET_PREC (b);
1453 
1454 #if !defined(MPFR_GENERIC_ABI)
1455   /* special case for p < GMP_NUMB_BITS */
1456   if (p < GMP_NUMB_BITS)
1457     return mpfr_sub1sp1 (a, b, c, rnd_mode, p);
1458 
1459   /* special case for GMP_NUMB_BITS < p < 2*GMP_NUMB_BITS */
1460   if (GMP_NUMB_BITS < p && p < 2 * GMP_NUMB_BITS)
1461     return mpfr_sub1sp2 (a, b, c, rnd_mode, p);
1462 
1463   /* special case for p = GMP_NUMB_BITS: we put it *after* mpfr_sub1sp2,
1464      in order not to slow down mpfr_sub1sp2, which should be more frequent */
1465   if (p == GMP_NUMB_BITS)
1466     return mpfr_sub1sp1n (a, b, c, rnd_mode);
1467 
1468   /* special case for 2*GMP_NUMB_BITS < p < 3*GMP_NUMB_BITS */
1469   if (2 * GMP_NUMB_BITS < p && p < 3 * GMP_NUMB_BITS)
1470     return mpfr_sub1sp3 (a, b, c, rnd_mode, p);
1471 
1472   if (p == 2 * GMP_NUMB_BITS)
1473     return mpfr_sub1sp2n (a, b, c, rnd_mode);
1474 #endif
1475 
1476   n = MPFR_PREC2LIMBS (p);
1477   /* Fast cmp of |b| and |c| */
1478   bx = MPFR_GET_EXP (b);
1479   cx = MPFR_GET_EXP (c);
1480 
1481   MPFR_TMP_MARK(marker);
1482 
1483   k = n - 1;
1484 
1485   if (bx == cx)
1486     {
1487       /* Check mantissa since exponents are equal */
1488       while (k >= 0 && MPFR_UNLIKELY(bp[k] == cp[k]))
1489         k--;
1490       /* now k = - 1 if b == c, otherwise k is the largest integer < n such
1491          that bp[k] <> cp[k] */
1492       if (k < 0)
1493         /* b == c ! */
1494         {
1495           /* Return exact number 0 */
1496           if (rnd_mode == MPFR_RNDD)
1497             MPFR_SET_NEG(a);
1498           else
1499             MPFR_SET_POS(a);
1500           MPFR_SET_ZERO(a);
1501           MPFR_RET(0);
1502         }
1503       else if (bp[k] > cp[k])
1504         goto BGreater;
1505       else
1506         {
1507           MPFR_ASSERTD(bp[k] < cp[k]);
1508           goto CGreater;
1509         }
1510     }
1511   else if (bx < cx)
1512     {
1513       /* Swap b and c and set sign */
1514       mpfr_srcptr t;
1515       mpfr_exp_t tx;
1516       mp_limb_t *tp;
1517 
1518       tx = bx; bx = cx; cx = tx;
1519     CGreater:
1520       MPFR_SET_OPPOSITE_SIGN(a,b);
1521       t  = b;  b  = c;  c  = t;
1522       tp = bp; bp = cp; cp = tp;
1523     }
1524   else
1525     {
1526       /* |b| > |c| */
1527     BGreater:
1528       MPFR_SET_SAME_SIGN(a,b);
1529     }
1530 
1531   /* Now |b| > |c| */
1532   MPFR_ASSERTD(bx >= cx);
1533   d = (mpfr_uexp_t) bx - cx;
1534   /* printf ("New with diff=%lu\n", (unsigned long) d); */
1535 
1536   /* FIXME: The goto's below are too complex (long backward) and make
1537      the code unreadable. */
1538 
1539   if (d == 0)
1540     {
1541       /* <-- b -->
1542          <-- c --> : exact sub */
1543           mpn_sub_n (ap, bp, cp, n);
1544           /* Normalize */
1545         ExactNormalize:
1546           limb = ap[n-1];
1547           if (MPFR_LIKELY (limb != 0))
1548             {
1549               /* First limb is not zero. */
1550               count_leading_zeros (cnt, limb);
1551               /* Warning: cnt can be 0 when we come from the case SubD1Lose
1552                  with goto ExactNormalize */
1553               if (MPFR_LIKELY(cnt))
1554                 {
1555                   mpn_lshift (ap, ap, n, cnt); /* Normalize number */
1556                   bx -= cnt; /* Update final expo */
1557                 }
1558               /* Last limb should be OK */
1559               MPFR_ASSERTD(!(ap[0] & MPFR_LIMB_MASK((unsigned int) (-p)
1560                                                     % GMP_NUMB_BITS)));
1561             }
1562           else
1563             {
1564               /* First limb is zero: this can only occur for n >= 2 */
1565               mp_size_t len;
1566               /* Find the first limb not equal to zero. It necessarily exists
1567                  since |b| > |c|. We know that bp[k] > cp[k] and all upper
1568                  limbs are equal. */
1569               while (ap[k] == 0)
1570                 k--;
1571               limb = ap[k];
1572               /* ap[k] is the non-zero limb of largest index, thus we have
1573                  to consider the k+1 least significant limbs */
1574               MPFR_ASSERTD(limb != 0);
1575               count_leading_zeros(cnt, limb);
1576               k++;
1577               len = n - k; /* Number of most significant zero limbs */
1578               MPFR_ASSERTD(k > 0);
1579               if (cnt)
1580                 mpn_lshift (ap + len, ap, k, cnt); /* Normalize the High Limb*/
1581               else
1582                 /* Must use copyd since src and dst may overlap & dst>=src */
1583                 mpn_copyd (ap + len, ap, k);
1584               MPN_ZERO(ap, len); /* Zeroing the last limbs */
1585               bx -= cnt + len * GMP_NUMB_BITS; /* update exponent */
1586               /* ap[len] should have its low bits zero: it is bp[0]-cp[0] */
1587               MPFR_ASSERTD(!(ap[len] & MPFR_LIMB_MASK((unsigned int) (-p)
1588                                                       % GMP_NUMB_BITS)));
1589             }
1590           /* Check exponent underflow (no overflow can happen) */
1591           if (MPFR_UNLIKELY(bx < __gmpfr_emin))
1592             {
1593               MPFR_TMP_FREE(marker);
1594               /* since b and c have same sign, exponent and precision, the
1595                  subtraction is exact */
1596               /* printf("(D==0 Underflow)\n"); */
1597               /* for MPFR_RNDN, mpfr_underflow always rounds away from zero,
1598                  thus for |a| <= 2^(emin-2) we change to RNDZ. */
1599               if (rnd_mode == MPFR_RNDN &&
1600                   (bx < __gmpfr_emin - 1 || mpfr_powerof2_raw (a)))
1601                 rnd_mode = MPFR_RNDZ;
1602               return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
1603             }
1604           MPFR_SET_EXP (a, bx);
1605           /* No rounding is necessary since the result is exact */
1606           MPFR_ASSERTD(ap[n-1] & MPFR_LIMB_HIGHBIT);
1607           MPFR_TMP_FREE(marker);
1608           return 0;
1609     }
1610   else if (d == 1)
1611         {
1612           /* | <-- b -->
1613              |  <-- c --> */
1614           mp_limb_t c0, mask;
1615           MPFR_UNSIGNED_MINUS_MODULO(sh, p);
1616           /* If we lose at least one bit, compute 2*b-c (Exact)
1617            * else compute b-c/2 */
1618           limb = bp[k] - cp[k]/2;
1619           /* Let W = 2^GMP_NUMB_BITS:
1620              we have |b|-|c| >= limb*W^k - (2*W^k-1)/2 >= limb*W^k - W^k + 1/2
1621              thus if limb > W/2, |b|-|c| >= 1/2*W^n.
1622              Moreover if trunc(|c|) represents the first p-1 bits of |c|,
1623              minus the last significant bit called c0 below (in fact c0 is that
1624              bit shifted by sh bits), then we have
1625              |b|-trunc(|c|) >= 1/2*W^n+1, thus the two mpn_sub_n calls
1626              below necessarily yield a > 1/2*W^n. */
1627           if (limb > MPFR_LIMB_HIGHBIT) /* case limb > W/2 */
1628             {
1629               mp_limb_t *tp;
1630               /* The exponent cannot decrease: compute b-c/2 */
1631               /* Shift c in the allocated temporary block */
1632             SubD1NoLose:
1633               c0 = cp[0] & (MPFR_LIMB_ONE << sh);
1634               mask = ~MPFR_LIMB_MASK(sh);
1635               tp = MPFR_TMP_LIMBS_ALLOC (n);
1636               /* FIXME: it might be faster to have one function shifting c by 1
1637                  to the right and adding with b to a, which would read c once
1638                  only, and avoid a temporary allocation. */
1639               mpn_rshift (tp, cp, n, 1);
1640               tp[0] &= mask; /* Zero last bit of c if set */
1641               mpn_sub_n (ap, bp, tp, n);
1642               MPFR_SET_EXP(a, bx); /* No exponent overflow! */
1643               MPFR_ASSERTD(ap[n-1] & MPFR_LIMB_HIGHBIT);
1644               if (MPFR_LIKELY(c0 == 0))
1645                 {
1646                   /* Result is exact: no need of rounding! */
1647                   MPFR_TMP_FREE(marker);
1648                   return 0;
1649                 }
1650               /* c0 is non-zero, thus we have to subtract 1/2*ulp(a),
1651                  however, we know (see analysis above) that this cannot
1652                  make the exponent decrease */
1653               MPFR_ASSERTD( !(ap[0] & ~mask) );    /* Check last bits */
1654               /* No normalize is needed */
1655 
1656               /* Rounding is necessary since c0 is non-zero */
1657               /* we have to subtract 1 at the round bit position,
1658                  and 0 for the lower bits */
1659               rb = 1; rbb = sbb = 0;
1660             }
1661           else if (MPFR_LIKELY(limb < MPFR_LIMB_HIGHBIT))
1662             {
1663               mp_limb_t *tp;
1664               /* |b| - |c| <= (W/2-1)*W^k + W^k-1 = 1/2*W^n - 1 */
1665               /* The exponent decreases by one. */
1666             SubD1Lose:
1667               /* Compute 2*b-c (Exact) */
1668 #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_RSBLSH1_N)
1669               /* {ap, n} = 2*{bp, n} - {cp, n} */
1670               /* mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[] */
1671               __gmpn_rsblsh1_n (ap, cp, bp, n);
1672 #else
1673               tp = MPFR_TMP_LIMBS_ALLOC (n);
1674               /* Shift b in the allocated temporary block */
1675               mpn_lshift (tp, bp, n, 1);
1676               mpn_sub_n (ap, tp, cp, n);
1677 #endif
1678               bx--;
1679               MPFR_ASSERTD(k == n-1);
1680               goto ExactNormalize;
1681             }
1682           else /* limb = MPFR_LIMB_HIGHBIT */
1683             {
1684               /* Case: limb = 100000000000 */
1685               /* Check while b[l] == c'[l] (C' is C shifted by 1) */
1686               /* If b[l]<c'[l] => We lose at least one bit */
1687               /* If b[l]>c'[l] => We don't lose any bit */
1688               /* If l==-1 => We don't lose any bit
1689                  AND the result is 100000000000 0000000000 00000000000 */
1690               mp_size_t l = n - 1;
1691               mp_limb_t cl_shifted;
1692               do
1693                 {
1694                   /* the first loop will compare b[n-2] and c'[n-2] */
1695                   cl_shifted = cp[l] << (GMP_NUMB_BITS - 1);
1696                   if (--l < 0)
1697                     break;
1698                   cl_shifted += cp[l] >> 1;
1699                 }
1700               while (bp[l] == cl_shifted);
1701               if (MPFR_UNLIKELY(l < 0))
1702                 {
1703                   if (MPFR_UNLIKELY(cl_shifted))
1704                     {
1705                       /* Since cl_shifted is what should be subtracted
1706                          from ap[-1], if non-zero then necessarily the
1707                          precision is a multiple of GMP_NUMB_BITS, and we lose
1708                          one bit, thus the (exact) result is a power of 2
1709                          minus 1. */
1710                       memset (ap, -1, n * MPFR_BYTES_PER_MP_LIMB);
1711                       MPFR_SET_EXP (a, bx - 1);
1712                       /* No underflow is possible since cx = bx-1 is a valid
1713                          exponent. */
1714                     }
1715                   else
1716                     {
1717                       /* cl_shifted=0: result is a power of 2. */
1718                       MPN_ZERO (ap, n - 1);
1719                       ap[n-1] = MPFR_LIMB_HIGHBIT;
1720                       MPFR_SET_EXP (a, bx); /* No exponent overflow! */
1721                     }
1722                   /* No Normalize is needed */
1723                   /* No Rounding is needed */
1724                   MPFR_TMP_FREE (marker);
1725                   return 0;
1726                 }
1727               /* cl_shifted is the shifted value c'[l] */
1728               else if (bp[l] > cl_shifted)
1729                 goto SubD1NoLose; /* |b|-|c| >= 1/2*W^n */
1730               else
1731                 {
1732                   /* we cannot have bp[l] = cl_shifted since the only way we
1733                      can exit the while loop above is when bp[l] <> cl_shifted
1734                      or l < 0, and the case l < 0 was already treated above. */
1735                   MPFR_ASSERTD(bp[l] < cl_shifted);
1736                   goto SubD1Lose; /* |b|-|c| <= 1/2*W^n-1 and is exact */
1737                 }
1738             }
1739         }
1740   else if (MPFR_UNLIKELY(d >= p)) /* the difference of exponents is larger
1741                                      than the precision of all operands, thus
1742                                      the result is either b or b - 1 ulp,
1743                                      with a possible exact result when
1744                                      d = p, b = 2^e and c = 1/2 ulp(b) */
1745     {
1746       MPFR_UNSIGNED_MINUS_MODULO(sh, p);
1747       /* We can't set A before since we use cp for rounding... */
1748       /* Perform rounding: check if a=b or a=b-ulp(b) */
1749       if (MPFR_UNLIKELY(d == p))
1750         {
1751           /* since c is normalized, we need to subtract 1/2 ulp(b) */
1752           rb = 1;
1753           /* rbb is the bit of weight 1/4 ulp(b) in c. We assume a limb has
1754              at least 2 bits. If the precision is 1, we read in the unused
1755              bits, which should be zero, and this is what we want. */
1756           rbb = cp[n-1] & (MPFR_LIMB_HIGHBIT >> 1);
1757 
1758           /* We need also sbb */
1759           sbb = cp[n-1] & MPFR_LIMB_MASK(GMP_NUMB_BITS - 2);
1760           for (k = n-1; sbb == 0 && k > 0;)
1761             sbb = cp[--k];
1762         }
1763       else
1764         {
1765           rb = 0;
1766           if (d == p + 1)
1767             {
1768               rbb = 1;
1769               sbb = cp[n-1] & MPFR_LIMB_MASK(GMP_NUMB_BITS - 1);
1770               for (k = n-1; sbb == 0 && k > 0;)
1771                 sbb = cp[--k];
1772             }
1773           else
1774             {
1775               rbb = 0;
1776               sbb = 1; /* since C is non-zero */
1777             }
1778         }
1779       /* Copy mantissa B in A */
1780       MPN_COPY(ap, bp, n);
1781     }
1782   else /* case 2 <= d < p */
1783     {
1784       mpfr_uexp_t dm;
1785       mp_size_t m;
1786       mp_limb_t mask, *tp;
1787 
1788       MPFR_UNSIGNED_MINUS_MODULO(sh, p);
1789       tp = MPFR_TMP_LIMBS_ALLOC (n);
1790 
1791       /* Shift c in temporary allocated place */
1792       dm = d % GMP_NUMB_BITS;
1793       m = d / GMP_NUMB_BITS;
1794       if (MPFR_UNLIKELY(dm == 0))
1795         {
1796           /* dm = 0 and m > 0: Just copy */
1797           MPFR_ASSERTD(m != 0);
1798           MPN_COPY(tp, cp + m, n - m);
1799           MPN_ZERO(tp + n - m, m);
1800         }
1801       else if (MPFR_LIKELY(m == 0))
1802         {
1803           /* dm >=2 and m == 0: just shift */
1804           MPFR_ASSERTD(dm >= 2);
1805           mpn_rshift (tp, cp, n, dm);
1806         }
1807       else
1808         {
1809           /* dm > 0 and m > 0: shift and zero  */
1810           mpn_rshift (tp, cp + m, n - m, dm);
1811           MPN_ZERO (tp + n - m, m);
1812         }
1813       /* FIXME: Instead of doing "cp = tp;", keep using tp to avoid
1814          confusion? Thus in the block below, we don't need
1815          "mp_limb_t *cp = MPFR_MANT(c);". In short, cp should always
1816          be MPFR_MANT(c) defined earlier, possibly after the swap. */
1817       cp = tp;
1818 
1819       /* mpfr_print_mant_binary("Before", MPFR_MANT(c), p); */
1820       /* mpfr_print_mant_binary("B=    ", MPFR_MANT(b), p); */
1821       /* mpfr_print_mant_binary("After ", cp, p); */
1822 
1823       /* Compute rb=Cp and sb=C'p+1 */
1824       {
1825         /* Compute rb and rbb from C */
1826         mp_limb_t *cp = MPFR_MANT(c);
1827         /* The round bit is bit p-d in C, assuming the most significant bit
1828            of C is bit 0 */
1829         mpfr_prec_t  x = p - d;
1830         mp_size_t   kx = n - 1 - (x / GMP_NUMB_BITS);
1831         mpfr_prec_t sx = GMP_NUMB_BITS - 1 - (x % GMP_NUMB_BITS);
1832         /* the round bit is in cp[kx], at position sx */
1833         MPFR_ASSERTD(p >= d);
1834         rb = cp[kx] & (MPFR_LIMB_ONE << sx);
1835 
1836         /* Now compute rbb: since d >= 2 it always exists in C */
1837         if (sx == 0) /* rbb is in the next limb */
1838           {
1839             kx --;
1840             sx = GMP_NUMB_BITS - 1;
1841           }
1842         else
1843           sx --; /* rb and rbb are in the same limb */
1844         rbb = cp[kx] & (MPFR_LIMB_ONE << sx);
1845 
1846         /* Now look at the remaining low bits of C to determine sbb */
1847         sbb = cp[kx] & MPFR_LIMB_MASK(sx);
1848         while (sbb == 0 && kx > 0)
1849           sbb = cp[--kx];
1850       }
1851       /* printf("sh=%lu Cp=%d C'p+1=%d\n", sh, rb!=0, sb!=0); */
1852 
1853       /* Clean shifted C' */
1854       mask = ~MPFR_LIMB_MASK (sh);
1855       cp[0] &= mask;
1856 
1857       /* Subtract the mantissa c from b in a */
1858       mpn_sub_n (ap, bp, cp, n);
1859       /* mpfr_print_mant_binary("Sub=  ", ap, p); */
1860 
1861       /* Normalize: we lose at most one bit */
1862       if (MPFR_UNLIKELY(MPFR_LIMB_MSB(ap[n-1]) == 0))
1863         {
1864           /* High bit is not set and we have to fix it! */
1865           /* Ap >= 010000xxx001 */
1866           mpn_lshift (ap, ap, n, 1);
1867           /* Ap >= 100000xxx010 */
1868           if (MPFR_UNLIKELY(rb != 0)) /* Check if Cp = -1 */
1869             /* Since Cp == -1, we have to subtract one more */
1870             {
1871               mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh);
1872               MPFR_ASSERTD(MPFR_LIMB_MSB(ap[n-1]) != 0);
1873             }
1874           /* Ap >= 10000xxx001 */
1875           /* Final exponent -1 since we have shifted the mantissa */
1876           bx--;
1877           /* Update rb and sb */
1878           rb  = rbb;
1879           rbb = sbb;
1880           /* We don't have anymore a valid Cp+1!
1881              But since Ap >= 100000xxx001, the final sub can't unnormalize!*/
1882         }
1883       MPFR_ASSERTD( !(ap[0] & ~mask) );
1884     }
1885 
1886  rounding:
1887   /* at this point 'a' contains b - high(c), normalized,
1888      and we have to subtract rb * 1/2 ulp(a), rbb * 1/4 ulp(a),
1889      and sbb * 1/8 ulp(a), interpreting rb/rbb/sbb as 1 if non-zero. */
1890 
1891   sb = rbb | sbb;
1892 
1893   if (rb == 0 && sb == 0)
1894     {
1895       inexact = 0;
1896       goto end_of_sub;
1897     }
1898 
1899   pow2 = mpfr_powerof2_raw (a);
1900   if (pow2 && rb != 0) /* subtract 1 ulp */
1901     {
1902       mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh);
1903       ap[n-1] |= MPFR_LIMB_HIGHBIT;
1904       bx--;
1905       rb = rbb;
1906       rbb = sbb;
1907       sbb = 0;
1908       /* Note: for p=1 this case can only happen with d=1, but then we will
1909          have rb=sb=0 at the next round. */
1910       goto rounding;
1911     }
1912 
1913   /* now if a is a power of two, necessary rb = 0,
1914      which means the exact result is always in (pred(a), a),
1915      and the bounds cannot be attained */
1916 
1917   if (rnd_mode == MPFR_RNDF)
1918     inexact = 0;
1919   else if (rnd_mode == MPFR_RNDN)
1920     {
1921       if (pow2)
1922         {
1923           MPFR_ASSERTD(rb == 0);
1924           /* since we have at the end of the binade, we have in fact rb = rbb
1925              and sb = sbb */
1926           rb = rbb;
1927           sb = sbb;
1928         }
1929       /* Warning: for p=1, the significand is always odd: the "even" rule
1930          rounds to the value with largest magnitude, thus we have to check
1931          that case separately */
1932       if (rb == 0 ||
1933           (rb != 0 && sb == 0 &&
1934            ((ap[0] & (MPFR_LIMB_ONE << sh)) == 0 || p == 1)))
1935         inexact = 1; /* round to a and return 1 */
1936       else /* round to pred(a) and return -1 */
1937         {
1938         subtract:
1939           mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh);
1940           if (pow2) /* deal with cancellation */
1941             {
1942               ap[n-1] |= MPFR_LIMB_HIGHBIT;
1943               bx--;
1944             }
1945           inexact = -1;
1946         }
1947     }
1948   else /* directed rounding */
1949     {
1950       MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a));
1951       if (rnd_mode == MPFR_RNDZ)
1952         goto subtract;
1953       else
1954         inexact = 1;
1955     }
1956 
1957  end_of_sub:
1958   /* Update Exponent */
1959   /* bx >= emin. Proof:
1960       If d==0      : Exact case. This is never called.
1961       if 1 < d < p : bx=MPFR_EXP(b) or MPFR_EXP(b)-1 > MPFR_EXP(c) > emin
1962       if d == 1    : bx=MPFR_EXP(b). If we could lose any bits, the exact
1963                      normalization is called.
1964       if d >=  p   : bx=MPFR_EXP(b) >= MPFR_EXP(c) + p > emin
1965      After SubOneUlp, we could have one bit less.
1966       if 1 < d < p : bx >= MPFR_EXP(b)-2 >= MPFR_EXP(c) > emin
1967       if d == 1    : bx >= MPFR_EXP(b)-1 = MPFR_EXP(c) > emin.
1968       if d >=  p   : bx >= MPFR_EXP(b)-1 > emin since p>=2.
1969   */
1970   MPFR_ASSERTD( bx >= __gmpfr_emin);
1971   MPFR_SET_EXP (a, bx);
1972 
1973   MPFR_TMP_FREE(marker);
1974   MPFR_RET (inexact * MPFR_INT_SIGN (a));
1975 }
1976