1 /* mpfr_sub1sp -- internal function to perform a "real" subtraction
2 All the op must have the same precision
3
4 Copyright 2003-2023 Free Software Foundation, Inc.
5 Contributed by the AriC and Caramba projects, INRIA.
6
7 This file is part of the GNU MPFR Library.
8
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17 License for more details.
18
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23
24 #define MPFR_NEED_LONGLONG_H
25 #include "mpfr-impl.h"
26
27 /* define MPFR_FULLSUB to use alternate code in mpfr_sub1sp2 and mpfr_sub1sp2n
28 (see comments in mpfr_sub1sp2) */
29 /* #define MPFR_FULLSUB */
30
31 #if MPFR_WANT_ASSERT >= 2
32 /* Check the result of mpfr_sub1sp with mpfr_sub1.
33
34 Note: mpfr_sub1sp itself has two algorithms: one always valid and one
35 faster for small precisions (up to 3 limbs). The latter one is disabled
36 if MPFR_GENERIC_ABI is defined. When MPFR_WANT_ASSERT >= 2, it could be
37 interesting to compare the results of these different algorithms. For
38 the time being, this is currently done by running the same code on the
39 same data with and without MPFR_GENERIC_ABI defined, where we have the
40 following comparisons in small precisions:
41 mpfr_sub1sp slow <-> mpfr_sub1 when MPFR_GENERIC_ABI is defined;
42 mpfr_sub1sp fast <-> mpfr_sub1 when MPFR_GENERIC_ABI is not defined.
43 By transitivity, the absence of failures implies that the 3 results are
44 the same.
45 */
46
47 int mpfr_sub1sp_ref (mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_rnd_t);
mpfr_sub1sp(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)48 int mpfr_sub1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
49 {
50 mpfr_t tmpa, tmpb, tmpc;
51 mpfr_flags_t old_flags, flags, flags2;
52 int inexb, inexc, inexact, inexact2;
53
54 if (rnd_mode == MPFR_RNDF)
55 return mpfr_sub1sp_ref (a, b, c, rnd_mode);
56
57 old_flags = __gmpfr_flags;
58
59 mpfr_init2 (tmpa, MPFR_PREC (a));
60 mpfr_init2 (tmpb, MPFR_PREC (b));
61 mpfr_init2 (tmpc, MPFR_PREC (c));
62
63 inexb = mpfr_set (tmpb, b, MPFR_RNDN);
64 MPFR_ASSERTN (inexb == 0);
65
66 inexc = mpfr_set (tmpc, c, MPFR_RNDN);
67 MPFR_ASSERTN (inexc == 0);
68
69 MPFR_ASSERTN (__gmpfr_flags == old_flags);
70
71 inexact2 = mpfr_sub1 (tmpa, tmpb, tmpc, rnd_mode);
72 flags2 = __gmpfr_flags;
73
74 __gmpfr_flags = old_flags;
75 inexact = mpfr_sub1sp_ref (a, b, c, rnd_mode);
76 flags = __gmpfr_flags;
77
78 /* Convert the ternary values to (-1,0,1). */
79 inexact2 = VSIGN (inexact2);
80 inexact = VSIGN (inexact);
81
82 if (! mpfr_equal_p (tmpa, a) || inexact != inexact2 || flags != flags2)
83 {
84 fprintf (stderr, "sub1 & sub1sp return different values for %s\n"
85 "Prec_a = %lu, Prec_b = %lu, Prec_c = %lu\nB = ",
86 mpfr_print_rnd_mode (rnd_mode),
87 (unsigned long) MPFR_PREC (a),
88 (unsigned long) MPFR_PREC (b),
89 (unsigned long) MPFR_PREC (c));
90 mpfr_fdump (stderr, tmpb);
91 fprintf (stderr, "C = ");
92 mpfr_fdump (stderr, tmpc);
93 fprintf (stderr, "sub1 : ");
94 mpfr_fdump (stderr, tmpa);
95 fprintf (stderr, "sub1sp: ");
96 mpfr_fdump (stderr, a);
97 fprintf (stderr, "sub1 : ternary = %2d, flags =", inexact2);
98 flags_fout (stderr, flags2);
99 fprintf (stderr, "sub1sp: ternary = %2d, flags =", inexact);
100 flags_fout (stderr, flags);
101 MPFR_ASSERTN (0);
102 }
103 mpfr_clears (tmpa, tmpb, tmpc, (mpfr_ptr) 0);
104 return inexact;
105 }
106 # define mpfr_sub1sp mpfr_sub1sp_ref
107 #endif /* MPFR_WANT_ASSERT >= 2 */
108
109 #if !defined(MPFR_GENERIC_ABI)
110
111 /* the sub1sp1_extracted.c is not ready yet */
112
113 #if 0 && defined(MPFR_WANT_PROVEN_CODE) && GMP_NUMB_BITS == 64 && \
114 UINT_MAX == 0xffffffff && MPFR_PREC_BITS == 64 && \
115 _MPFR_PREC_FORMAT == 3 && _MPFR_EXP_FORMAT == _MPFR_PREC_FORMAT
116
117 /* The code assumes that mp_limb_t has 64 bits exactly, unsigned int
118 has 32 bits exactly, mpfr_prec_t and mpfr_exp_t are of type long,
119 which has 64 bits exactly. */
120
121 #include "sub1sp1_extracted.c"
122
123 #else
124
125 /* special code for p < GMP_NUMB_BITS */
126 static int
mpfr_sub1sp1(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)127 mpfr_sub1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
128 mpfr_prec_t p)
129 {
130 mpfr_exp_t bx = MPFR_GET_EXP (b);
131 mpfr_exp_t cx = MPFR_GET_EXP (c);
132 mp_limb_t *ap = MPFR_MANT(a);
133 mp_limb_t *bp = MPFR_MANT(b);
134 mp_limb_t *cp = MPFR_MANT(c);
135 mpfr_prec_t cnt, INITIALIZED(sh);
136 mp_limb_t rb; /* round bit */
137 mp_limb_t sb; /* sticky bit */
138 mp_limb_t a0;
139 mp_limb_t mask;
140 mpfr_uexp_t d;
141
142 MPFR_ASSERTD(p < GMP_NUMB_BITS);
143
144 if (bx == cx)
145 {
146 if (MPFR_UNLIKELY(bp[0] == cp[0])) /* result is zero */
147 {
148 if (rnd_mode == MPFR_RNDD)
149 MPFR_SET_NEG(a);
150 else
151 MPFR_SET_POS(a);
152 MPFR_SET_ZERO(a);
153 MPFR_RET (0);
154 }
155 else if (cp[0] > bp[0]) /* borrow: |c| > |b| */
156 {
157 a0 = cp[0] - bp[0];
158 MPFR_SET_OPPOSITE_SIGN (a, b);
159 }
160 else /* bp[0] > cp[0] */
161 {
162 a0 = bp[0] - cp[0];
163 MPFR_SET_SAME_SIGN (a, b);
164 }
165
166 /* now a0 != 0 */
167 MPFR_ASSERTD(a0 != 0);
168 count_leading_zeros (cnt, a0);
169 ap[0] = a0 << cnt;
170 bx -= cnt;
171 rb = sb = 0;
172 /* Note: sh is not initialized, but will not be used in this case. */
173 }
174 else
175 {
176 if (bx < cx) /* swap b and c */
177 {
178 mpfr_exp_t tx;
179 mp_limb_t *tp;
180 tx = bx; bx = cx; cx = tx;
181 tp = bp; bp = cp; cp = tp;
182 MPFR_SET_OPPOSITE_SIGN (a, b);
183 }
184 else
185 {
186 MPFR_SET_SAME_SIGN (a, b);
187 }
188 MPFR_ASSERTD (bx > cx);
189 d = (mpfr_uexp_t) bx - cx;
190 sh = GMP_NUMB_BITS - p;
191 mask = MPFR_LIMB_MASK(sh);
192 if (d < GMP_NUMB_BITS)
193 {
194 sb = - (cp[0] << (GMP_NUMB_BITS - d)); /* neglected part of -c */
195 /* Note that "a0 = bp[0] - (cp[0] >> d) - (sb != 0);" is faster
196 on some other machines and has no immediate dependencies for
197 the first subtraction. In the future, make sure that the code
198 is recognized as a *single* subtraction with borrow and/or use
199 a builtin when available (currently provided by Clang, but not
200 by GCC); create a new macro for that. See the TODO later.
201 Note also that with Clang, the constant 0 for the first
202 subtraction instead of a variable breaks the optimization:
203 https://llvm.org/bugs/show_bug.cgi?id=31754 */
204 a0 = bp[0] - (sb != 0) - (cp[0] >> d);
205 /* a0 cannot be zero here since:
206 a) if d >= 2, then a0 >= 2^(w-1) - (2^(w-2)-1) with
207 w = GMP_NUMB_BITS, thus a0 - 1 >= 2^(w-2),
208 b) if d = 1, then since p < GMP_NUMB_BITS we have sb=0.
209 */
210 MPFR_ASSERTD(a0 > 0);
211 count_leading_zeros (cnt, a0);
212 if (cnt)
213 a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt));
214 sb <<= cnt;
215 bx -= cnt;
216 /* sh > 0 since p < GMP_NUMB_BITS */
217 MPFR_ASSERTD(sh > 0);
218 rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
219 sb |= (a0 & mask) ^ rb;
220 ap[0] = a0 & ~mask;
221 }
222 else /* d >= GMP_NUMB_BITS */
223 {
224 if (bp[0] > MPFR_LIMB_HIGHBIT)
225 {
226 /* We compute b - ulp(b), and the remainder ulp(b) - c satisfies:
227 1/2 ulp(b) < ulp(b) - c < ulp(b), thus rb = sb = 1. */
228 ap[0] = bp[0] - (MPFR_LIMB_ONE << sh);
229 rb = 1;
230 }
231 else
232 {
233 /* Warning: since we have an exponent decrease, when
234 p = GMP_NUMB_BITS - 1 and d = GMP_NUMB_BITS, the round bit
235 corresponds to the upper bit of -c. In that case rb = 0 and
236 sb = 1, except when c0 = MPFR_LIMB_HIGHBIT where rb = 1 and
237 sb = 0. */
238 rb = sh > 1 || d > GMP_NUMB_BITS || cp[0] == MPFR_LIMB_HIGHBIT;
239 /* sb=1 below is incorrect when p = GMP_NUMB_BITS - 1,
240 d = GMP_NUMB_BITS and c0 = MPFR_LIMB_HIGHBIT, but in
241 that case the even rule wound round up too. */
242 ap[0] = ~mask;
243 bx --;
244 /* Warning: if d = GMP_NUMB_BITS and c0 = 1000...000, then
245 b0 - c0 = |0111...111|1000...000|, which after the shift
246 becomes |111...111|000...000| thus if p = GMP_NUMB_BITS-1
247 we have rb = 1 but sb = 0. However, in this case the round
248 even rule will round up, which is what we get with sb = 1:
249 the final result will be correct, while sb is incorrect. */
250 }
251 sb = 1;
252 }
253 }
254
255 /* now perform rounding */
256
257 /* Warning: MPFR considers underflow *after* rounding with an unbounded
258 exponent range. However, since b and c have same precision p, they are
259 multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the
260 subtraction (with an unbounded exponent range) is exact, so that bx is
261 also the exponent after rounding with an unbounded exponent range. */
262 if (MPFR_UNLIKELY(bx < __gmpfr_emin))
263 {
264 /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
265 we have to change to RNDZ. This corresponds to:
266 (a) either bx < emin - 1
267 (b) or bx = emin - 1 and ap[0] = 1000....000 (in this case necessarily
268 rb = sb = 0 since b-c is multiple of 2^(emin-p) */
269 if (rnd_mode == MPFR_RNDN &&
270 (bx < __gmpfr_emin - 1 || ap[0] == MPFR_LIMB_HIGHBIT))
271 {
272 MPFR_ASSERTD(rb == 0 && sb == 0);
273 rnd_mode = MPFR_RNDZ;
274 }
275 return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
276 }
277
278 MPFR_SET_EXP (a, bx);
279 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
280 MPFR_RET (0);
281 else if (rnd_mode == MPFR_RNDN)
282 {
283 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
284 goto truncate;
285 else
286 goto add_one_ulp;
287 }
288 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
289 {
290 truncate:
291 MPFR_RET(-MPFR_SIGN(a));
292 }
293 else /* round away from zero */
294 {
295 add_one_ulp:
296 ap[0] += MPFR_LIMB_ONE << sh;
297 if (MPFR_UNLIKELY(ap[0] == 0))
298 {
299 ap[0] = MPFR_LIMB_HIGHBIT;
300 /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus
301 bx+1 is at most equal to the original exponent of b. */
302 MPFR_ASSERTD(bx + 1 <= __gmpfr_emax);
303 MPFR_SET_EXP (a, bx + 1);
304 }
305 MPFR_RET(MPFR_SIGN(a));
306 }
307 }
308
309 #endif /* MPFR_WANT_PROVEN_CODE */
310
311 /* special code for p = GMP_NUMB_BITS */
312 static int
mpfr_sub1sp1n(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)313 mpfr_sub1sp1n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
314 {
315 mpfr_exp_t bx = MPFR_GET_EXP (b);
316 mpfr_exp_t cx = MPFR_GET_EXP (c);
317 mp_limb_t *ap = MPFR_MANT(a);
318 mp_limb_t *bp = MPFR_MANT(b);
319 mp_limb_t *cp = MPFR_MANT(c);
320 mpfr_prec_t cnt;
321 mp_limb_t rb; /* round bit */
322 mp_limb_t sb; /* sticky bit */
323 mp_limb_t a0;
324 mpfr_uexp_t d;
325
326 MPFR_ASSERTD(MPFR_PREC(a) == GMP_NUMB_BITS);
327 MPFR_ASSERTD(MPFR_PREC(b) == GMP_NUMB_BITS);
328 MPFR_ASSERTD(MPFR_PREC(c) == GMP_NUMB_BITS);
329
330 if (bx == cx)
331 {
332 a0 = bp[0] - cp[0];
333 if (a0 == 0) /* result is zero */
334 {
335 if (rnd_mode == MPFR_RNDD)
336 MPFR_SET_NEG(a);
337 else
338 MPFR_SET_POS(a);
339 MPFR_SET_ZERO(a);
340 MPFR_RET (0);
341 }
342 else if (a0 > bp[0]) /* borrow: |c| > |b| */
343 {
344 MPFR_SET_OPPOSITE_SIGN (a, b);
345 a0 = -a0;
346 }
347 else /* bp[0] > cp[0] */
348 MPFR_SET_SAME_SIGN (a, b);
349
350 /* now a0 != 0 */
351 MPFR_ASSERTD(a0 != 0);
352 count_leading_zeros (cnt, a0);
353 ap[0] = a0 << cnt;
354 bx -= cnt;
355 rb = sb = 0;
356 }
357 else
358 {
359 if (bx < cx) /* swap b and c */
360 {
361 mpfr_exp_t tx;
362 mp_limb_t *tp;
363 tx = bx; bx = cx; cx = tx;
364 tp = bp; bp = cp; cp = tp;
365 MPFR_SET_OPPOSITE_SIGN (a, b);
366 }
367 else
368 {
369 MPFR_SET_SAME_SIGN (a, b);
370 }
371 MPFR_ASSERTD (bx > cx);
372 d = (mpfr_uexp_t) bx - cx;
373 if (d < GMP_NUMB_BITS)
374 {
375 sb = - (cp[0] << (GMP_NUMB_BITS - d)); /* neglected part of -c */
376 a0 = bp[0] - (sb != 0) - (cp[0] >> d);
377 /* a0 can only be zero when d=1, b0 = B/2, and c0 = B-1, where
378 B = 2^GMP_NUMB_BITS, thus b0 - c0/2 = 1/2 */
379 if (a0 == MPFR_LIMB_ZERO)
380 {
381 bx -= GMP_NUMB_BITS;
382 ap[0] = MPFR_LIMB_HIGHBIT;
383 rb = sb = 0;
384 }
385 else
386 {
387 count_leading_zeros (cnt, a0);
388 if (cnt)
389 a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt));
390 sb <<= cnt;
391 bx -= cnt;
392 rb = sb & MPFR_LIMB_HIGHBIT;
393 sb &= ~MPFR_LIMB_HIGHBIT;
394 ap[0] = a0;
395 }
396 }
397 else /* d >= GMP_NUMB_BITS */
398 {
399 /* We compute b - ulp(b) */
400 if (bp[0] > MPFR_LIMB_HIGHBIT)
401 {
402 /* If d = GMP_NUMB_BITS, rb = 0 and sb = 1,
403 unless c0 = MPFR_LIMB_HIGHBIT in which case rb = 1 and sb = 0.
404 If d > GMP_NUMB_BITS, rb = sb = 1. */
405 rb = d > GMP_NUMB_BITS || cp[0] == MPFR_LIMB_HIGHBIT;
406 sb = d > GMP_NUMB_BITS || cp[0] != MPFR_LIMB_HIGHBIT;
407 ap[0] = bp[0] - MPFR_LIMB_ONE;
408 }
409 else
410 {
411 /* Warning: in this case a0 is shifted by one!
412 If d=GMP_NUMB_BITS:
413 (a) if c0 = MPFR_LIMB_HIGHBIT, a0 = 111...111, rb = sb = 0
414 (b) otherwise, a0 = 111...110, rb = -c0 >= 01000...000,
415 sb = (-c0) << 2
416 If d=GMP_NUMB_BITS+1: a0 = 111...111
417 (c) if c0 = MPFR_LIMB_HIGHBIT, rb = 1 and sb = 0
418 (d) otherwise rb = 0 and sb = 1
419 If d > GMP_NUMB_BITS+1:
420 (e) a0 = 111...111, rb = sb = 1
421 */
422 bx --;
423 if (d == GMP_NUMB_BITS && cp[0] > MPFR_LIMB_HIGHBIT)
424 { /* case (b) */
425 rb = MPFR_LIMB(-cp[0]) >= (MPFR_LIMB_HIGHBIT >> 1);
426 sb = MPFR_LIMB(-cp[0]) << 2;
427 ap[0] = -(MPFR_LIMB_ONE << 1);
428 }
429 else /* cases (a), (c), (d) and (e) */
430 {
431 /* rb=1 in case (e) and case (c) */
432 rb = d > GMP_NUMB_BITS + 1
433 || (d == GMP_NUMB_BITS + 1 && cp[0] == MPFR_LIMB_HIGHBIT);
434 /* sb = 1 in case (d) and (e) */
435 sb = d > GMP_NUMB_BITS + 1
436 || (d == GMP_NUMB_BITS + 1 && cp[0] > MPFR_LIMB_HIGHBIT);
437 /* Warning: only set ap[0] last, otherwise in case ap=cp,
438 the above comparisons involving cp[0] would be wrong */
439 ap[0] = -MPFR_LIMB_ONE;
440 }
441 }
442 }
443 }
444
445 /* now perform rounding */
446
447 /* Warning: MPFR considers underflow *after* rounding with an unbounded
448 exponent range. However, since b and c have same precision p, they are
449 multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the
450 subtraction (with an unbounded exponent range) is exact, so that bx is
451 also the exponent after rounding with an unbounded exponent range. */
452 if (MPFR_UNLIKELY(bx < __gmpfr_emin))
453 {
454 /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
455 we have to change to RNDZ. This corresponds to:
456 (a) either bx < emin - 1
457 (b) or bx = emin - 1 and ap[0] = 1000....000 (in this case necessarily
458 rb = sb = 0 since b-c is multiple of 2^(emin-p) */
459 if (rnd_mode == MPFR_RNDN &&
460 (bx < __gmpfr_emin - 1 || ap[0] == MPFR_LIMB_HIGHBIT))
461 {
462 MPFR_ASSERTD(rb == 0 && sb == 0);
463 rnd_mode = MPFR_RNDZ;
464 }
465 return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
466 }
467
468 MPFR_SET_EXP (a, bx);
469 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
470 MPFR_RET (0);
471 else if (rnd_mode == MPFR_RNDN)
472 {
473 if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0))
474 goto truncate;
475 else
476 goto add_one_ulp;
477 }
478 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
479 {
480 truncate:
481 MPFR_RET(-MPFR_SIGN(a));
482 }
483 else /* round away from zero */
484 {
485 add_one_ulp:
486 ap[0] += MPFR_LIMB_ONE;
487 if (MPFR_UNLIKELY(ap[0] == 0))
488 {
489 ap[0] = MPFR_LIMB_HIGHBIT;
490 /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus
491 bx+1 is at most equal to the original exponent of b. */
492 MPFR_ASSERTD(bx + 1 <= __gmpfr_emax);
493 MPFR_SET_EXP (a, bx + 1);
494 }
495 MPFR_RET(MPFR_SIGN(a));
496 }
497 }
498
499 /* special code for GMP_NUMB_BITS < p < 2*GMP_NUMB_BITS */
500 static int
mpfr_sub1sp2(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)501 mpfr_sub1sp2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
502 mpfr_prec_t p)
503 {
504 mpfr_exp_t bx = MPFR_GET_EXP (b);
505 mpfr_exp_t cx = MPFR_GET_EXP (c);
506 mp_limb_t *ap = MPFR_MANT(a);
507 mp_limb_t *bp = MPFR_MANT(b);
508 mp_limb_t *cp = MPFR_MANT(c);
509 mpfr_prec_t cnt, INITIALIZED(sh);
510 mp_limb_t rb; /* round bit */
511 mp_limb_t sb; /* sticky bit */
512 mp_limb_t mask, a0, a1;
513 mpfr_uexp_t d;
514
515 MPFR_ASSERTD(GMP_NUMB_BITS < p && p < 2 * GMP_NUMB_BITS);
516
517 if (bx == cx) /* subtraction is exact in this case */
518 {
519 /* first compute a0: if the compiler is smart enough, it will use the generated
520 borrow to get for free the term (bp[0] < cp[0]) */
521 a0 = bp[0] - cp[0];
522 a1 = bp[1] - cp[1] - (bp[0] < cp[0]);
523 if (a1 == 0 && a0 == 0) /* result is zero */
524 {
525 if (rnd_mode == MPFR_RNDD)
526 MPFR_SET_NEG(a);
527 else
528 MPFR_SET_POS(a);
529 MPFR_SET_ZERO(a);
530 MPFR_RET (0);
531 }
532 else if (a1 >= bp[1]) /* borrow: |c| > |b| */
533 {
534 MPFR_SET_OPPOSITE_SIGN (a, b);
535 /* a = b-c mod 2^(2*GMP_NUMB_BITS) */
536 a0 = -a0;
537 a1 = -a1 - (a0 != 0);
538 }
539 else /* bp[0] > cp[0] */
540 MPFR_SET_SAME_SIGN (a, b);
541
542 if (a1 == 0)
543 {
544 a1 = a0;
545 a0 = 0;
546 bx -= GMP_NUMB_BITS;
547 }
548
549 /* now a1 != 0 */
550 MPFR_ASSERTD(a1 != 0);
551 count_leading_zeros (cnt, a1);
552 if (cnt)
553 {
554 ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
555 ap[0] = a0 << cnt;
556 bx -= cnt;
557 }
558 else
559 {
560 ap[1] = a1;
561 ap[0] = a0;
562 }
563 rb = sb = 0;
564 /* Note: sh is not initialized, but will not be used in this case. */
565 }
566 else
567 {
568 mp_limb_t t;
569
570 if (bx < cx) /* swap b and c */
571 {
572 mpfr_exp_t tx;
573 mp_limb_t *tp;
574 tx = bx; bx = cx; cx = tx;
575 tp = bp; bp = cp; cp = tp;
576 MPFR_SET_OPPOSITE_SIGN (a, b);
577 }
578 else
579 {
580 MPFR_SET_SAME_SIGN (a, b);
581 }
582 MPFR_ASSERTD (bx > cx);
583 d = (mpfr_uexp_t) bx - cx;
584 sh = 2 * GMP_NUMB_BITS - p;
585 mask = MPFR_LIMB_MASK(sh);
586 if (d < GMP_NUMB_BITS)
587 {
588 t = (cp[1] << (GMP_NUMB_BITS - d)) | (cp[0] >> d);
589 /* TODO: Change the code to generate a full subtraction with borrow,
590 avoiding the test on sb and the corresponding correction. Note
591 that Clang has builtins:
592 https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins
593 but the generated code may not be good:
594 https://llvm.org/bugs/show_bug.cgi?id=20748
595 With the current source code, Clang generates on x86_64:
596 1. sub %rsi,%rbx for the first subtraction in a1;
597 2. sub %rdi,%rax for the subtraction in a0;
598 3. sbb $0x0,%rbx for the second subtraction in a1, i.e. just
599 subtracting the borrow out from (2).
600 So, Clang recognizes the borrow, but doesn't merge (1) and (3).
601 Bug: https://llvm.org/bugs/show_bug.cgi?id=25858
602 For GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79173
603 */
604 #ifndef MPFR_FULLSUB
605 a0 = bp[0] - t;
606 a1 = bp[1] - (cp[1] >> d) - (bp[0] < t);
607 sb = cp[0] << (GMP_NUMB_BITS - d); /* neglected part of c */
608 if (sb)
609 {
610 a1 -= (a0 == 0);
611 a0 --;
612 /* a = a1,a0 cannot become zero here, since:
613 a) if d >= 2, then a1 >= 2^(w-1) - (2^(w-2)-1) with
614 w = GMP_NUMB_BITS, thus a1 - 1 >= 2^(w-2),
615 b) if d = 1, then since p < 2*GMP_NUMB_BITS we have sb=0. */
616 MPFR_ASSERTD(a1 > 0 || a0 > 0);
617 sb = -sb; /* 2^GMP_NUMB_BITS - sb */
618 }
619 #else
620 sb = - (cp[0] << (GMP_NUMB_BITS - d));
621 a0 = bp[0] - t - (sb != 0);
622 a1 = bp[1] - (cp[1] >> d) - (bp[0] < t || (bp[0] == t && sb != 0));
623 #endif
624 if (a1 == 0)
625 {
626 /* this implies d=1, which in turn implies sb=0 */
627 MPFR_ASSERTD(sb == 0);
628 a1 = a0;
629 a0 = 0; /* should be a0 = sb */
630 /* since sb=0 already, no need to set it to 0 */
631 bx -= GMP_NUMB_BITS;
632 }
633 /* now a1 != 0 */
634 MPFR_ASSERTD(a1 != 0);
635 count_leading_zeros (cnt, a1);
636 if (cnt)
637 {
638 ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
639 a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt));
640 sb <<= cnt;
641 bx -= cnt;
642 }
643 else
644 ap[1] = a1;
645 /* sh > 0 since p < 2*GMP_NUMB_BITS */
646 MPFR_ASSERTD(sh > 0);
647 rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
648 sb |= (a0 & mask) ^ rb;
649 ap[0] = a0 & ~mask;
650 }
651 else if (d < 2 * GMP_NUMB_BITS)
652 { /* GMP_NUMB_BITS <= d < 2*GMP_NUMB_BITS */
653 /* warning: the most significant bit of sb might become the least
654 significant bit of a0 below */
655 sb = (d == GMP_NUMB_BITS) ? cp[0]
656 : (cp[1] << (2*GMP_NUMB_BITS - d)) | (cp[0] != 0);
657 t = (cp[1] >> (d - GMP_NUMB_BITS)) + (sb != 0);
658 /* warning: t might overflow to 0 if d=GMP_NUMB_BITS and sb <> 0 */
659 a0 = bp[0] - t;
660 a1 = bp[1] - (bp[0] < t) - (t == 0 && sb != 0);
661 sb = -sb;
662 /* since bp[1] has its most significant bit set, we can have an
663 exponent decrease of at most one */
664 if (a1 < MPFR_LIMB_HIGHBIT)
665 {
666 ap[1] = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
667 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
668 sb <<= 1;
669 bx --;
670 }
671 else
672 ap[1] = a1;
673 rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
674 sb |= (a0 & mask) ^ rb;
675 ap[0] = a0 & ~mask;
676 }
677 else /* d >= 2*GMP_NUMB_BITS */
678 {
679 /* We compute b - ulp(b), and the remainder ulp(b) - c satisfies:
680 1/2 ulp(b) < ulp(b) - c < ulp(b), thus rb = sb = 1, unless we
681 had an exponent decrease. */
682 t = MPFR_LIMB_ONE << sh;
683 a0 = bp[0] - t;
684 a1 = bp[1] - (bp[0] < t);
685 if (a1 < MPFR_LIMB_HIGHBIT)
686 {
687 /* necessarily we had b = 1000...000 */
688 /* Warning: since we have an exponent decrease, when
689 p = 2*GMP_NUMB_BITS - 1 and d = 2*GMP_NUMB_BITS, the round bit
690 corresponds to the upper bit of -c. In that case rb = 0 and
691 sb = 1, except when c = 1000...000 where rb = 1 and sb = 0. */
692 rb = sh > 1 || d > 2 * GMP_NUMB_BITS
693 || (cp[1] == MPFR_LIMB_HIGHBIT && cp[0] == MPFR_LIMB_ZERO);
694 /* sb=1 below is incorrect when p = 2*GMP_NUMB_BITS - 1,
695 d = 2*GMP_NUMB_BITS and c = 1000...000, but in
696 that case the even rule wound round up too. */
697 ap[0] = ~mask;
698 ap[1] = MPFR_LIMB_MAX;
699 bx --;
700 }
701 else
702 {
703 ap[0] = a0;
704 ap[1] = a1;
705 rb = 1;
706 }
707 sb = 1;
708 }
709 }
710
711 /* now perform rounding */
712
713 /* Warning: MPFR considers underflow *after* rounding with an unbounded
714 exponent range. However, since b and c have same precision p, they are
715 multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the
716 subtraction (with an unbounded exponent range) is exact, so that bx is
717 also the exponent after rounding with an unbounded exponent range. */
718 if (MPFR_UNLIKELY(bx < __gmpfr_emin))
719 {
720 /* for RNDN, mpfr_underflow always rounds away, thus for |a|<=2^(emin-2)
721 we have to change to RNDZ */
722 if (rnd_mode == MPFR_RNDN &&
723 (bx < __gmpfr_emin - 1 ||
724 (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0)))
725 rnd_mode = MPFR_RNDZ;
726 return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
727 }
728
729 MPFR_SET_EXP (a, bx);
730 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
731 MPFR_RET (0);
732 else if (rnd_mode == MPFR_RNDN)
733 {
734 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
735 goto truncate;
736 else
737 goto add_one_ulp;
738 }
739 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
740 {
741 truncate:
742 MPFR_RET(-MPFR_SIGN(a));
743 }
744 else /* round away from zero */
745 {
746 add_one_ulp:
747 ap[0] += MPFR_LIMB_ONE << sh;
748 ap[1] += (ap[0] == 0);
749 if (MPFR_UNLIKELY(ap[1] == 0))
750 {
751 ap[1] = MPFR_LIMB_HIGHBIT;
752 /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus
753 bx+1 is at most equal to the original exponent of b. */
754 MPFR_ASSERTD(bx + 1 <= __gmpfr_emax);
755 MPFR_SET_EXP (a, bx + 1);
756 }
757 MPFR_RET(MPFR_SIGN(a));
758 }
759 }
760
761 /* special code for p = 2*GMP_NUMB_BITS */
762 static int
mpfr_sub1sp2n(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)763 mpfr_sub1sp2n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
764 {
765 mpfr_exp_t bx = MPFR_GET_EXP (b);
766 mpfr_exp_t cx = MPFR_GET_EXP (c);
767 mp_limb_t *ap = MPFR_MANT(a);
768 mp_limb_t *bp = MPFR_MANT(b);
769 mp_limb_t *cp = MPFR_MANT(c);
770 mpfr_prec_t cnt;
771 mp_limb_t rb; /* round bit */
772 mp_limb_t sb; /* sticky bit */
773 mp_limb_t a0, a1;
774 mpfr_uexp_t d;
775
776 /* this function is inspired by mpfr_sub1sp2 (for the operations of the
777 2-limb arrays) and by mpfr_sub1sp1n (for the different cases to handle) */
778
779 if (bx == cx) /* subtraction is exact in this case */
780 {
781 a0 = bp[0] - cp[0];
782 a1 = bp[1] - cp[1] - (bp[0] < cp[0]);
783 if (a1 == 0 && a0 == 0) /* result is zero */
784 {
785 if (rnd_mode == MPFR_RNDD)
786 MPFR_SET_NEG(a);
787 else
788 MPFR_SET_POS(a);
789 MPFR_SET_ZERO(a);
790 MPFR_RET (0);
791 }
792 /* since B/2 <= bp[1], cp[1] < B with B=2^GMP_NUMB_BITS,
793 if no borrow we have 0 <= bp[1] - cp[1] - x < B/2
794 where x = (bp[0] < cp[0]) is 0 or 1, thus a1 < B/2 <= bp[1] */
795 else if (a1 >= bp[1]) /* borrow: |c| > |b| */
796 {
797 MPFR_SET_OPPOSITE_SIGN (a, b);
798 /* negate [a1,a0] */
799 a0 = -a0;
800 a1 = -a1 - (a0 != 0);
801 }
802 else /* no borrow */
803 MPFR_SET_SAME_SIGN (a, b);
804
805 /* now [a1,a0] is the absolute value of b - c,
806 maybe not normalized */
807 if (a1 == 0)
808 {
809 a1 = a0;
810 a0 = 0;
811 bx -= GMP_NUMB_BITS;
812 }
813
814 /* now a1 != 0 */
815 MPFR_ASSERTD(a1 != 0);
816 count_leading_zeros (cnt, a1);
817 if (cnt)
818 {
819 /* shift [a1,a0] left by cnt bit and store in result */
820 ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
821 ap[0] = a0 << cnt;
822 bx -= cnt;
823 }
824 else
825 {
826 ap[1] = a1;
827 ap[0] = a0;
828 }
829 rb = sb = 0; /* the subtraction is exact */
830 }
831 else
832 {
833 mp_limb_t t;
834
835 if (bx < cx) /* swap b and c */
836 {
837 mpfr_exp_t tx;
838 mp_limb_t *tp;
839 tx = bx; bx = cx; cx = tx;
840 tp = bp; bp = cp; cp = tp;
841 MPFR_SET_OPPOSITE_SIGN (a, b);
842 }
843 else
844 {
845 MPFR_SET_SAME_SIGN (a, b);
846 }
847 MPFR_ASSERTD (bx > cx);
848 d = (mpfr_uexp_t) bx - cx;
849 if (d < GMP_NUMB_BITS)
850 {
851 t = (cp[1] << (GMP_NUMB_BITS - d)) | (cp[0] >> d);
852 /* t is the part that should be subtracted to bp[0]:
853 | a1 | a0 |
854 | bp[1] | bp[0] |
855 | cp[1]>>d | t | sb | */
856 #ifndef MPFR_FULLSUB
857 a0 = bp[0] - t;
858 a1 = bp[1] - (cp[1] >> d) - (bp[0] < t);
859 sb = cp[0] << (GMP_NUMB_BITS - d); /* neglected part of c */
860 /* now negate sb and subtract borrow to a0 if sb <> 0 */
861 if (sb)
862 {
863 a1 -= (a0 == 0);
864 a0 --;
865 /* a = a1,a0 can only be zero when d=1, b = 0.1000...000*2^bx,
866 and c = 0.111...111*2^(bx-1). In that case (where we have
867 sb = MPFR_LIMB_HIGHBIT below), the subtraction is exact, the
868 result is b/2^(2*GMP_NUMB_BITS). This case is dealt below. */
869 sb = -sb;
870 }
871 #else
872 sb = - (cp[0] << (GMP_NUMB_BITS - d));
873 a0 = bp[0] - t - (sb != 0);
874 a1 = bp[1] - (cp[1] >> d) - (bp[0] < t || (bp[0] == t && sb != 0));
875 #endif
876 /* now the result is formed of [a1,a0,sb], which might not be
877 normalized */
878 if (a1 == MPFR_LIMB_ZERO)
879 {
880 /* this implies d=1 */
881 MPFR_ASSERTD(d == 1);
882 a1 = a0;
883 a0 = sb;
884 sb = MPFR_LIMB_ZERO;
885 bx -= GMP_NUMB_BITS;
886 }
887 if (a1 == MPFR_LIMB_ZERO) /* case a = a1,a0 = 0 mentioned above */
888 {
889 MPFR_ASSERTD(a0 == MPFR_LIMB_HIGHBIT); /* was sb above */
890 a1 = a0;
891 a0 = sb;
892 bx -= GMP_NUMB_BITS;
893 sb = MPFR_LIMB_ZERO;
894 }
895 else
896 {
897 count_leading_zeros (cnt, a1);
898 if (cnt)
899 {
900 /* shift [a1,a0,sb] left by cnt bits and adjust exponent */
901 a1 = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
902 a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt));
903 sb <<= cnt;
904 bx -= cnt;
905 }
906 }
907 rb = sb & MPFR_LIMB_HIGHBIT;
908 sb = sb & ~MPFR_LIMB_HIGHBIT;
909 ap[1] = a1;
910 ap[0] = a0;
911 }
912 else if (d < 2 * GMP_NUMB_BITS)
913 { /* GMP_NUMB_BITS <= d < 2*GMP_NUMB_BITS:
914 compute t, the part to be subtracted to bp[0],
915 and sb, the neglected part of c:
916 | a1 | a0 |
917 | bp[1] | bp[0] |
918 | t | sb | */
919 /* warning: we should not ignore the low bits from cp[0]
920 in case d > GMP_NUMB_BITS */
921 sb = (d == GMP_NUMB_BITS) ? cp[0]
922 : (cp[1] << (2*GMP_NUMB_BITS - d))
923 | (cp[0] >> (d - GMP_NUMB_BITS))
924 | ((cp[0] << (2*GMP_NUMB_BITS - d)) != 0);
925 t = (cp[1] >> (d - GMP_NUMB_BITS)) + (sb != 0);
926 /* Warning: t might overflow to 0 if d=GMP_NUMB_BITS, sb <> 0,
927 and cp[1] = 111...111 */
928 a0 = bp[0] - t;
929 a1 = bp[1] - (bp[0] < t) - (t == 0 && sb != 0);
930 sb = -sb;
931 /* now the result is [a1,a0,sb]. Since bp[1] has its most significant
932 bit set, we can have an exponent decrease of at most one */
933 if (a1 < MPFR_LIMB_HIGHBIT)
934 {
935 /* shift [a1,a0] left by 1 bit */
936 a1 = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
937 MPFR_ASSERTD(a1 >= MPFR_LIMB_HIGHBIT);
938 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
939 sb <<= 1;
940 bx --;
941 }
942 ap[1] = a1;
943 ap[0] = a0;
944 rb = sb & MPFR_LIMB_HIGHBIT;
945 sb = sb & ~MPFR_LIMB_HIGHBIT;
946 }
947 else
948 { /* d >= 2*GMP_NUMB_BITS:
949 | a1 | a0 |
950 | bp[1] | bp[0] |
951 | cp[1] | cp[0] | */
952 /* we mimic the case d >= GMP_NUMB_BITS of mpfr_sub1sp1n */
953 int tst = cp[1] == MPFR_LIMB_HIGHBIT && cp[0] == MPFR_LIMB_ZERO;
954 /* if d = 2 * GMP_NUMB_BITS and tst=1, c = 1/2*ulp(b) */
955 if (bp[1] > MPFR_LIMB_HIGHBIT || bp[0] > MPFR_LIMB_ZERO)
956 {
957 /* no borrow in b - ulp(b) */
958 rb = d > 2 * GMP_NUMB_BITS || tst;
959 sb = d > 2 * GMP_NUMB_BITS || !tst;
960 ap[1] = bp[1] - (bp[0] == MPFR_LIMB_ZERO);
961 ap[0] = bp[0] - MPFR_LIMB_ONE;
962 }
963 else
964 {
965 /* b = 1000...000, thus subtracting c yields an exponent shift */
966 bx --;
967 if (d == 2 * GMP_NUMB_BITS && !tst) /* c > 1/2*ulp(b) */
968 {
969 t = -cp[1] - (cp[0] > MPFR_LIMB_ZERO);
970 /* the rounding bit is the 2nd most significant bit of t
971 (where the most significant bit of t is necessarily 0),
972 and the sticky bit is formed by the remaining bits of t,
973 and those from -cp[0] */
974 rb = t >= (MPFR_LIMB_HIGHBIT >> 1);
975 sb = (t << 2) | cp[0];
976 ap[1] = MPFR_LIMB_MAX;
977 ap[0] = -(MPFR_LIMB_ONE << 1);
978 }
979 else /* c <= 1/2*ulp(b) */
980 {
981 rb = d > 2 * GMP_NUMB_BITS + 1
982 || (d == 2 * GMP_NUMB_BITS + 1 && tst);
983 sb = d > 2 * GMP_NUMB_BITS + 1
984 || (d == 2 * GMP_NUMB_BITS + 1 && !tst);
985 ap[1] = -MPFR_LIMB_ONE;
986 ap[0] = -MPFR_LIMB_ONE;
987 }
988 }
989 }
990 }
991
992 /* now perform rounding */
993
994 /* Warning: MPFR considers underflow *after* rounding with an unbounded
995 exponent range. However, since b and c have same precision p, they are
996 multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the
997 subtraction (with an unbounded exponent range) is exact, so that bx is
998 also the exponent after rounding with an unbounded exponent range. */
999 if (MPFR_UNLIKELY(bx < __gmpfr_emin))
1000 {
1001 /* for RNDN, mpfr_underflow always rounds away, thus for |a|<=2^(emin-2)
1002 we have to change to RNDZ */
1003 if (rnd_mode == MPFR_RNDN &&
1004 (bx < __gmpfr_emin - 1 ||
1005 (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0)))
1006 rnd_mode = MPFR_RNDZ;
1007 return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
1008 }
1009
1010 MPFR_SET_EXP (a, bx);
1011 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
1012 MPFR_RET (0);
1013 else if (rnd_mode == MPFR_RNDN)
1014 {
1015 if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0))
1016 goto truncate;
1017 else
1018 goto add_one_ulp;
1019 }
1020 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
1021 {
1022 truncate:
1023 MPFR_RET(-MPFR_SIGN(a));
1024 }
1025 else /* round away from zero */
1026 {
1027 add_one_ulp:
1028 ap[0] += MPFR_LIMB_ONE;
1029 ap[1] += (ap[0] == 0);
1030 if (MPFR_UNLIKELY(ap[1] == 0))
1031 {
1032 ap[1] = MPFR_LIMB_HIGHBIT;
1033 /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus
1034 bx+1 is at most equal to the original exponent of b. */
1035 MPFR_ASSERTD(bx + 1 <= __gmpfr_emax);
1036 MPFR_SET_EXP (a, bx + 1);
1037 }
1038 MPFR_RET(MPFR_SIGN(a));
1039 }
1040 }
1041
1042 /* special code for 2*GMP_NUMB_BITS < p < 3*GMP_NUMB_BITS */
1043 static int
mpfr_sub1sp3(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)1044 mpfr_sub1sp3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
1045 mpfr_prec_t p)
1046 {
1047 mpfr_exp_t bx = MPFR_GET_EXP (b);
1048 mpfr_exp_t cx = MPFR_GET_EXP (c);
1049 mp_limb_t *ap = MPFR_MANT(a);
1050 mp_limb_t *bp = MPFR_MANT(b);
1051 mp_limb_t *cp = MPFR_MANT(c);
1052 mpfr_prec_t cnt, INITIALIZED(sh);
1053 mp_limb_t rb; /* round bit */
1054 mp_limb_t sb; /* sticky bit */
1055 mp_limb_t mask, a0, a1, a2;
1056 mpfr_uexp_t d;
1057
1058 MPFR_ASSERTD(2 * GMP_NUMB_BITS < p && p < 3 * GMP_NUMB_BITS);
1059
1060 if (bx == cx) /* subtraction is exact in this case */
1061 {
1062 a0 = bp[0] - cp[0];
1063 a1 = bp[1] - cp[1] - (bp[0] < cp[0]);
1064 /* a borrow is generated for a when either bp[1] < cp[1],
1065 or bp[1] = cp[1] and bp[0] < cp[0] */
1066 a2 = bp[2] - cp[2]
1067 - (bp[1] < cp[1] || (bp[1] == cp[1] && bp[0] < cp[0]));
1068 if (a2 == 0 && a1 == 0 && a0 == 0) /* result is zero */
1069 {
1070 if (rnd_mode == MPFR_RNDD)
1071 MPFR_SET_NEG(a);
1072 else
1073 MPFR_SET_POS(a);
1074 MPFR_SET_ZERO(a);
1075 MPFR_RET (0);
1076 }
1077 else if (a2 >= bp[2]) /* borrow: |c| > |b| */
1078 {
1079 MPFR_SET_OPPOSITE_SIGN (a, b);
1080 /* a = b-c mod 2^(3*GMP_NUMB_BITS) */
1081 a0 = -a0;
1082 a1 = -a1 - (a0 != 0);
1083 a2 = -a2 - (a0 != 0 || a1 != 0);
1084 }
1085 else /* bp[0] > cp[0] */
1086 MPFR_SET_SAME_SIGN (a, b);
1087
1088 if (a2 == 0)
1089 {
1090 a2 = a1;
1091 a1 = a0;
1092 a0 = 0;
1093 bx -= GMP_NUMB_BITS;
1094 if (a2 == 0)
1095 {
1096 a2 = a1;
1097 a1 = 0;
1098 bx -= GMP_NUMB_BITS;
1099 }
1100 }
1101
1102 /* now a2 != 0 */
1103 MPFR_ASSERTD(a2 != 0);
1104 count_leading_zeros (cnt, a2);
1105 if (cnt)
1106 {
1107 ap[2] = (a2 << cnt) | (a1 >> (GMP_NUMB_BITS - cnt));
1108 ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
1109 ap[0] = a0 << cnt;
1110 bx -= cnt;
1111 }
1112 else
1113 {
1114 ap[2] = a2;
1115 ap[1] = a1;
1116 ap[0] = a0;
1117 }
1118 rb = sb = 0;
1119 /* Note: sh is not initialized, but will not be used in this case. */
1120 }
1121 else
1122 {
1123 if (bx < cx) /* swap b and c */
1124 {
1125 mpfr_exp_t tx;
1126 mp_limb_t *tp;
1127 tx = bx; bx = cx; cx = tx;
1128 tp = bp; bp = cp; cp = tp;
1129 MPFR_SET_OPPOSITE_SIGN (a, b);
1130 }
1131 else
1132 {
1133 MPFR_SET_SAME_SIGN (a, b);
1134 }
1135 MPFR_ASSERTD (bx > cx);
1136 d = (mpfr_uexp_t) bx - cx;
1137 sh = 3 * GMP_NUMB_BITS - p;
1138 mask = MPFR_LIMB_MASK(sh);
1139 if (d < GMP_NUMB_BITS)
1140 {
1141 mp_limb_t cy;
1142 /* warning: we must have the most significant bit of sb correct
1143 since it might become the round bit below */
1144 sb = cp[0] << (GMP_NUMB_BITS - d); /* neglected part of c */
1145 a0 = bp[0] - ((cp[1] << (GMP_NUMB_BITS - d)) | (cp[0] >> d));
1146 a1 = bp[1] - ((cp[2] << (GMP_NUMB_BITS - d)) | (cp[1] >> d))
1147 - (a0 > bp[0]);
1148 cy = a1 > bp[1] || (a1 == bp[1] && a0 > bp[0]); /* borrow in a1 */
1149 a2 = bp[2] - (cp[2] >> d) - cy;
1150 /* if sb is non-zero, subtract 1 from a2, a1, a0 since we want a
1151 non-negative neglected part */
1152 if (sb)
1153 {
1154 a2 -= (a1 == 0 && a0 == 0);
1155 a1 -= (a0 == 0);
1156 a0 --;
1157 /* a = a2,a1,a0 cannot become zero here, since:
1158 a) if d >= 2, then a2 >= 2^(w-1) - (2^(w-2)-1) with
1159 w = GMP_NUMB_BITS, thus a2 - 1 >= 2^(w-2),
1160 b) if d = 1, then since p < 3*GMP_NUMB_BITS we have sb=0. */
1161 MPFR_ASSERTD(a2 > 0 || a1 > 0 || a0 > 0);
1162 sb = -sb; /* 2^GMP_NUMB_BITS - sb */
1163 }
1164 if (a2 == 0)
1165 {
1166 /* this implies d=1, which in turn implies sb=0 */
1167 MPFR_ASSERTD(sb == 0);
1168 a2 = a1;
1169 a1 = a0;
1170 a0 = 0; /* should be a0 = sb */
1171 /* since sb=0 already, no need to set it to 0 */
1172 bx -= GMP_NUMB_BITS;
1173 if (a2 == 0)
1174 {
1175 a2 = a1;
1176 a1 = 0; /* should be a1 = a0 */
1177 bx -= GMP_NUMB_BITS;
1178 }
1179 }
1180 /* now a1 != 0 */
1181 MPFR_ASSERTD(a2 != 0);
1182 count_leading_zeros (cnt, a2);
1183 if (cnt)
1184 {
1185 ap[2] = (a2 << cnt) | (a1 >> (GMP_NUMB_BITS - cnt));
1186 ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt));
1187 a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt));
1188 sb <<= cnt;
1189 bx -= cnt;
1190 }
1191 else
1192 {
1193 ap[2] = a2;
1194 ap[1] = a1;
1195 }
1196 /* sh > 0 since p < 2*GMP_NUMB_BITS */
1197 MPFR_ASSERTD(sh > 0);
1198 rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
1199 sb |= (a0 & mask) ^ rb;
1200 ap[0] = a0 & ~mask;
1201 }
1202 else if (d < 2 * GMP_NUMB_BITS)
1203 {
1204 mp_limb_t c0shifted;
1205 /* warning: we must have the most significant bit of sb correct
1206 since it might become the round bit below */
1207 sb = (d == GMP_NUMB_BITS) ? cp[0]
1208 : (cp[1] << (2*GMP_NUMB_BITS - d)) | (cp[0] != 0);
1209 c0shifted = (d == GMP_NUMB_BITS) ? cp[1]
1210 : (cp[2] << (2*GMP_NUMB_BITS-d)) | (cp[1] >> (d - GMP_NUMB_BITS));
1211 a0 = bp[0] - c0shifted;
1212 /* TODO: add a non-regression test for cp[2] == MPFR_LIMB_MAX,
1213 d == GMP_NUMB_BITS and a0 > bp[0]. */
1214 a1 = bp[1] - (cp[2] >> (d - GMP_NUMB_BITS)) - (a0 > bp[0]);
1215 a2 = bp[2] - (a1 > bp[1] || (a1 == bp[1] && a0 > bp[0]));
1216 /* if sb is non-zero, subtract 1 from a2, a1, a0 since we want a
1217 non-negative neglected part */
1218 if (sb)
1219 {
1220 a2 -= (a1 == 0 && a0 == 0);
1221 a1 -= (a0 == 0);
1222 a0 --;
1223 /* a = a2,a1,a0 cannot become zero here, since:
1224 a) if d >= 2, then a2 >= 2^(w-1) - (2^(w-2)-1) with
1225 w = GMP_NUMB_BITS, thus a2 - 1 >= 2^(w-2),
1226 b) if d = 1, then since p < 3*GMP_NUMB_BITS we have sb=0. */
1227 MPFR_ASSERTD(a2 > 0 || a1 > 0 || a0 > 0);
1228 sb = -sb; /* 2^GMP_NUMB_BITS - sb */
1229 }
1230 /* since bp[2] has its most significant bit set, we can have an
1231 exponent decrease of at most one */
1232 if (a2 < MPFR_LIMB_HIGHBIT)
1233 {
1234 ap[2] = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1));
1235 ap[1] = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
1236 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
1237 sb <<= 1;
1238 bx --;
1239 }
1240 else
1241 {
1242 ap[2] = a2;
1243 ap[1] = a1;
1244 }
1245 rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
1246 sb |= (a0 & mask) ^ rb;
1247 ap[0] = a0 & ~mask;
1248 }
1249 else if (d < 3 * GMP_NUMB_BITS) /* 2*GMP_NUMB_BITS<=d<3*GMP_NUMB_BITS */
1250 {
1251 MPFR_ASSERTD (2*GMP_NUMB_BITS <= d && d < 3*GMP_NUMB_BITS);
1252 /* warning: we must have the most significant bit of sb correct
1253 since it might become the round bit below */
1254 if (d == 2 * GMP_NUMB_BITS)
1255 sb = cp[1] | (cp[0] != 0);
1256 else
1257 sb = cp[2] << (3*GMP_NUMB_BITS - d) | (cp[1] != 0) | (cp[0] != 0);
1258 sb = -sb;
1259 /* TODO: add a non-regression test for cp[2] == MPFR_LIMB_MAX,
1260 d == 2*GMP_NUMB_BITS and sb != 0. */
1261 a0 = bp[0] - (cp[2] >> (d - 2*GMP_NUMB_BITS)) - (sb != 0);
1262 a1 = bp[1] - (a0 > bp[0] || (a0 == bp[0] && sb != 0));
1263 a2 = bp[2] - (a1 > bp[1]);
1264 if (a2 < MPFR_LIMB_HIGHBIT)
1265 {
1266 ap[2] = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1));
1267 ap[1] = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
1268 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
1269 sb <<= 1;
1270 bx --;
1271 }
1272 else
1273 {
1274 ap[2] = a2;
1275 ap[1] = a1;
1276 }
1277 rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
1278 sb |= (a0 & mask) ^ rb;
1279 ap[0] = a0 & ~mask;
1280 }
1281 else /* d >= 3*GMP_NUMB_BITS */
1282 {
1283 /* We compute b - ulp(b), and the remainder ulp(b) - c satisfies:
1284 1/2 ulp(b) < ulp(b) - c < ulp(b), thus rb = sb = 1. */
1285 mp_limb_t t = MPFR_LIMB_ONE << sh;
1286 a0 = bp[0] - t;
1287 a1 = bp[1] - (bp[0] < t);
1288 a2 = bp[2] - (a1 > bp[1]);
1289 if (a2 < MPFR_LIMB_HIGHBIT)
1290 {
1291 /* necessarily we had b = 1000...000 */
1292 /* Warning: since we have an exponent decrease, when
1293 p = 3*GMP_NUMB_BITS - 1 and d = 3*GMP_NUMB_BITS, the round bit
1294 corresponds to the upper bit of -c. In that case rb = 0 and
1295 sb = 1, except when c = 1000...000 where rb = 1 and sb = 0. */
1296 rb = sh > 1 || d > 3 * GMP_NUMB_BITS
1297 || (cp[2] == MPFR_LIMB_HIGHBIT && cp[1] == MPFR_LIMB_ZERO &&
1298 cp[0] == MPFR_LIMB_ZERO);
1299 /* sb=1 below is incorrect when p = 2*GMP_NUMB_BITS - 1,
1300 d = 2*GMP_NUMB_BITS and c = 1000...000, but in
1301 that case the even rule wound round up too. */
1302 ap[0] = ~mask;
1303 ap[1] = MPFR_LIMB_MAX;
1304 ap[2] = MPFR_LIMB_MAX;
1305 bx --;
1306 }
1307 else
1308 {
1309 ap[0] = a0;
1310 ap[1] = a1;
1311 ap[2] = a2;
1312 rb = 1;
1313 }
1314 sb = 1;
1315 }
1316 }
1317
1318 /* now perform rounding */
1319
1320 /* Warning: MPFR considers underflow *after* rounding with an unbounded
1321 exponent range. However, since b and c have same precision p, they are
1322 multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the
1323 subtraction (with an unbounded exponent range) is exact, so that bx is
1324 also the exponent after rounding with an unbounded exponent range. */
1325 if (MPFR_UNLIKELY(bx < __gmpfr_emin))
1326 {
1327 /* for RNDN, mpfr_underflow always rounds away, thus for |a|<=2^(emin-2)
1328 we have to change to RNDZ */
1329 if (rnd_mode == MPFR_RNDN &&
1330 (bx < __gmpfr_emin - 1 ||
1331 (ap[2] == MPFR_LIMB_HIGHBIT && ap[1] == 0 && ap[0] == 0)))
1332 rnd_mode = MPFR_RNDZ;
1333 return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
1334 }
1335
1336 MPFR_SET_EXP (a, bx);
1337 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
1338 MPFR_RET (0);
1339 else if (rnd_mode == MPFR_RNDN)
1340 {
1341 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
1342 goto truncate;
1343 else
1344 goto add_one_ulp;
1345 }
1346 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
1347 {
1348 truncate:
1349 MPFR_RET(-MPFR_SIGN(a));
1350 }
1351 else /* round away from zero */
1352 {
1353 add_one_ulp:
1354 ap[0] += MPFR_LIMB_ONE << sh;
1355 ap[1] += (ap[0] == 0);
1356 ap[2] += (ap[1] == 0 && ap[0] == 0);
1357 if (MPFR_UNLIKELY(ap[2] == 0))
1358 {
1359 ap[2] = MPFR_LIMB_HIGHBIT;
1360 /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus
1361 bx+1 is at most equal to the original exponent of b. */
1362 MPFR_ASSERTD(bx + 1 <= __gmpfr_emax);
1363 MPFR_SET_EXP (a, bx + 1);
1364 }
1365 MPFR_RET(MPFR_SIGN(a));
1366 }
1367 }
1368
1369 #endif /* !defined(MPFR_GENERIC_ABI) */
1370
1371 /* Rounding Sub */
1372
1373 /*
1374 compute sgn(b)*(|b| - |c|) if |b|>|c| else -sgn(b)*(|c| -|b|)
1375 Returns 0 iff result is exact,
1376 a negative value when the result is less than the exact value,
1377 a positive value otherwise.
1378 */
1379
1380 /* A0...Ap-1
1381 * Cp Cp+1 ....
1382 * <- C'p+1 ->
1383 * Cp = -1 if calculated from c mantissa
1384 * Cp = 0 if 0 from a or c
1385 * Cp = 1 if calculated from a.
1386 * C'p+1 = First bit not null or 0 if there isn't one
1387 *
1388 * Can't have Cp=-1 and C'p+1=1*/
1389
1390 /* RND = MPFR_RNDZ:
1391 * + if Cp=0 and C'p+1=0,1, Truncate.
1392 * + if Cp=0 and C'p+1=-1, SubOneUlp
1393 * + if Cp=-1, SubOneUlp
1394 * + if Cp=1, AddOneUlp
1395 * RND = MPFR_RNDA (Away)
1396 * + if Cp=0 and C'p+1=0,-1, Truncate
1397 * + if Cp=0 and C'p+1=1, AddOneUlp
1398 * + if Cp=1, AddOneUlp
1399 * + if Cp=-1, Truncate
1400 * RND = MPFR_RNDN
1401 * + if Cp=0, Truncate
1402 * + if Cp=1 and C'p+1=1, AddOneUlp
1403 * + if Cp=1 and C'p+1=-1, Truncate
1404 * + if Cp=1 and C'p+1=0, Truncate if Ap-1=0, AddOneUlp else
1405 * + if Cp=-1 and C'p+1=-1, SubOneUlp
1406 * + if Cp=-1 and C'p+1=0, Truncate if Ap-1=0, SubOneUlp else
1407 *
1408 * If AddOneUlp:
1409 * If carry, then it is 11111111111 + 1 = 10000000000000
1410 * ap[n-1]=MPFR_HIGHT_BIT
1411 * If SubOneUlp:
1412 * If we lose one bit, it is 1000000000 - 1 = 0111111111111
1413 * Then shift, and put as last bit x which is calculated
1414 * according Cp, Cp-1 and rnd_mode.
1415 * If Truncate,
1416 * If it is a power of 2,
1417 * we may have to suboneulp in some special cases.
1418 *
1419 * To simplify, we don't use Cp = 1.
1420 *
1421 */
1422
1423 MPFR_HOT_FUNCTION_ATTR int
mpfr_sub1sp(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)1424 mpfr_sub1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
1425 {
1426 mpfr_exp_t bx, cx;
1427 mpfr_uexp_t d;
1428 mpfr_prec_t p, sh, cnt;
1429 mp_size_t n, k;
1430 mp_limb_t *ap = MPFR_MANT(a);
1431 mp_limb_t *bp = MPFR_MANT(b);
1432 mp_limb_t *cp = MPFR_MANT(c);
1433 mp_limb_t limb;
1434 int inexact;
1435 mp_limb_t rb, sb; /* round and sticky bits. They are interpreted as
1436 negative, i.e., if rb <> 0, then we should subtract 1
1437 at the round bit position, and of sb <> 0, we should
1438 subtract something below the round bit position. */
1439 mp_limb_t rbb = MPFR_LIMB_MAX, sbb = MPFR_LIMB_MAX; /* rbb is the next bit
1440 after the round bit, and sbb the corresponding sticky bit.
1441 gcc claims that they might be used uninitialized. We fill them with invalid
1442 values, which should produce a failure if so. See README.dev file. */
1443 int pow2;
1444
1445 MPFR_TMP_DECL(marker);
1446
1447 MPFR_ASSERTD(MPFR_PREC(a) == MPFR_PREC(b) && MPFR_PREC(b) == MPFR_PREC(c));
1448 MPFR_ASSERTD(MPFR_IS_PURE_FP(b));
1449 MPFR_ASSERTD(MPFR_IS_PURE_FP(c));
1450
1451 /* Read prec and num of limbs */
1452 p = MPFR_GET_PREC (b);
1453
1454 #if !defined(MPFR_GENERIC_ABI)
1455 /* special case for p < GMP_NUMB_BITS */
1456 if (p < GMP_NUMB_BITS)
1457 return mpfr_sub1sp1 (a, b, c, rnd_mode, p);
1458
1459 /* special case for GMP_NUMB_BITS < p < 2*GMP_NUMB_BITS */
1460 if (GMP_NUMB_BITS < p && p < 2 * GMP_NUMB_BITS)
1461 return mpfr_sub1sp2 (a, b, c, rnd_mode, p);
1462
1463 /* special case for p = GMP_NUMB_BITS: we put it *after* mpfr_sub1sp2,
1464 in order not to slow down mpfr_sub1sp2, which should be more frequent */
1465 if (p == GMP_NUMB_BITS)
1466 return mpfr_sub1sp1n (a, b, c, rnd_mode);
1467
1468 /* special case for 2*GMP_NUMB_BITS < p < 3*GMP_NUMB_BITS */
1469 if (2 * GMP_NUMB_BITS < p && p < 3 * GMP_NUMB_BITS)
1470 return mpfr_sub1sp3 (a, b, c, rnd_mode, p);
1471
1472 if (p == 2 * GMP_NUMB_BITS)
1473 return mpfr_sub1sp2n (a, b, c, rnd_mode);
1474 #endif
1475
1476 n = MPFR_PREC2LIMBS (p);
1477 /* Fast cmp of |b| and |c| */
1478 bx = MPFR_GET_EXP (b);
1479 cx = MPFR_GET_EXP (c);
1480
1481 MPFR_TMP_MARK(marker);
1482
1483 k = n - 1;
1484
1485 if (bx == cx)
1486 {
1487 /* Check mantissa since exponents are equal */
1488 while (k >= 0 && MPFR_UNLIKELY(bp[k] == cp[k]))
1489 k--;
1490 /* now k = - 1 if b == c, otherwise k is the largest integer < n such
1491 that bp[k] <> cp[k] */
1492 if (k < 0)
1493 /* b == c ! */
1494 {
1495 /* Return exact number 0 */
1496 if (rnd_mode == MPFR_RNDD)
1497 MPFR_SET_NEG(a);
1498 else
1499 MPFR_SET_POS(a);
1500 MPFR_SET_ZERO(a);
1501 MPFR_RET(0);
1502 }
1503 else if (bp[k] > cp[k])
1504 goto BGreater;
1505 else
1506 {
1507 MPFR_ASSERTD(bp[k] < cp[k]);
1508 goto CGreater;
1509 }
1510 }
1511 else if (bx < cx)
1512 {
1513 /* Swap b and c and set sign */
1514 mpfr_srcptr t;
1515 mpfr_exp_t tx;
1516 mp_limb_t *tp;
1517
1518 tx = bx; bx = cx; cx = tx;
1519 CGreater:
1520 MPFR_SET_OPPOSITE_SIGN(a,b);
1521 t = b; b = c; c = t;
1522 tp = bp; bp = cp; cp = tp;
1523 }
1524 else
1525 {
1526 /* |b| > |c| */
1527 BGreater:
1528 MPFR_SET_SAME_SIGN(a,b);
1529 }
1530
1531 /* Now |b| > |c| */
1532 MPFR_ASSERTD(bx >= cx);
1533 d = (mpfr_uexp_t) bx - cx;
1534 /* printf ("New with diff=%lu\n", (unsigned long) d); */
1535
1536 /* FIXME: The goto's below are too complex (long backward) and make
1537 the code unreadable. */
1538
1539 if (d == 0)
1540 {
1541 /* <-- b -->
1542 <-- c --> : exact sub */
1543 mpn_sub_n (ap, bp, cp, n);
1544 /* Normalize */
1545 ExactNormalize:
1546 limb = ap[n-1];
1547 if (MPFR_LIKELY (limb != 0))
1548 {
1549 /* First limb is not zero. */
1550 count_leading_zeros (cnt, limb);
1551 /* Warning: cnt can be 0 when we come from the case SubD1Lose
1552 with goto ExactNormalize */
1553 if (MPFR_LIKELY(cnt))
1554 {
1555 mpn_lshift (ap, ap, n, cnt); /* Normalize number */
1556 bx -= cnt; /* Update final expo */
1557 }
1558 /* Last limb should be OK */
1559 MPFR_ASSERTD(!(ap[0] & MPFR_LIMB_MASK((unsigned int) (-p)
1560 % GMP_NUMB_BITS)));
1561 }
1562 else
1563 {
1564 /* First limb is zero: this can only occur for n >= 2 */
1565 mp_size_t len;
1566 /* Find the first limb not equal to zero. It necessarily exists
1567 since |b| > |c|. We know that bp[k] > cp[k] and all upper
1568 limbs are equal. */
1569 while (ap[k] == 0)
1570 k--;
1571 limb = ap[k];
1572 /* ap[k] is the non-zero limb of largest index, thus we have
1573 to consider the k+1 least significant limbs */
1574 MPFR_ASSERTD(limb != 0);
1575 count_leading_zeros(cnt, limb);
1576 k++;
1577 len = n - k; /* Number of most significant zero limbs */
1578 MPFR_ASSERTD(k > 0);
1579 if (cnt)
1580 mpn_lshift (ap + len, ap, k, cnt); /* Normalize the High Limb*/
1581 else
1582 /* Must use copyd since src and dst may overlap & dst>=src */
1583 mpn_copyd (ap + len, ap, k);
1584 MPN_ZERO(ap, len); /* Zeroing the last limbs */
1585 bx -= cnt + len * GMP_NUMB_BITS; /* update exponent */
1586 /* ap[len] should have its low bits zero: it is bp[0]-cp[0] */
1587 MPFR_ASSERTD(!(ap[len] & MPFR_LIMB_MASK((unsigned int) (-p)
1588 % GMP_NUMB_BITS)));
1589 }
1590 /* Check exponent underflow (no overflow can happen) */
1591 if (MPFR_UNLIKELY(bx < __gmpfr_emin))
1592 {
1593 MPFR_TMP_FREE(marker);
1594 /* since b and c have same sign, exponent and precision, the
1595 subtraction is exact */
1596 /* printf("(D==0 Underflow)\n"); */
1597 /* for MPFR_RNDN, mpfr_underflow always rounds away from zero,
1598 thus for |a| <= 2^(emin-2) we change to RNDZ. */
1599 if (rnd_mode == MPFR_RNDN &&
1600 (bx < __gmpfr_emin - 1 || mpfr_powerof2_raw (a)))
1601 rnd_mode = MPFR_RNDZ;
1602 return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
1603 }
1604 MPFR_SET_EXP (a, bx);
1605 /* No rounding is necessary since the result is exact */
1606 MPFR_ASSERTD(ap[n-1] & MPFR_LIMB_HIGHBIT);
1607 MPFR_TMP_FREE(marker);
1608 return 0;
1609 }
1610 else if (d == 1)
1611 {
1612 /* | <-- b -->
1613 | <-- c --> */
1614 mp_limb_t c0, mask;
1615 MPFR_UNSIGNED_MINUS_MODULO(sh, p);
1616 /* If we lose at least one bit, compute 2*b-c (Exact)
1617 * else compute b-c/2 */
1618 limb = bp[k] - cp[k]/2;
1619 /* Let W = 2^GMP_NUMB_BITS:
1620 we have |b|-|c| >= limb*W^k - (2*W^k-1)/2 >= limb*W^k - W^k + 1/2
1621 thus if limb > W/2, |b|-|c| >= 1/2*W^n.
1622 Moreover if trunc(|c|) represents the first p-1 bits of |c|,
1623 minus the last significant bit called c0 below (in fact c0 is that
1624 bit shifted by sh bits), then we have
1625 |b|-trunc(|c|) >= 1/2*W^n+1, thus the two mpn_sub_n calls
1626 below necessarily yield a > 1/2*W^n. */
1627 if (limb > MPFR_LIMB_HIGHBIT) /* case limb > W/2 */
1628 {
1629 mp_limb_t *tp;
1630 /* The exponent cannot decrease: compute b-c/2 */
1631 /* Shift c in the allocated temporary block */
1632 SubD1NoLose:
1633 c0 = cp[0] & (MPFR_LIMB_ONE << sh);
1634 mask = ~MPFR_LIMB_MASK(sh);
1635 tp = MPFR_TMP_LIMBS_ALLOC (n);
1636 /* FIXME: it might be faster to have one function shifting c by 1
1637 to the right and adding with b to a, which would read c once
1638 only, and avoid a temporary allocation. */
1639 mpn_rshift (tp, cp, n, 1);
1640 tp[0] &= mask; /* Zero last bit of c if set */
1641 mpn_sub_n (ap, bp, tp, n);
1642 MPFR_SET_EXP(a, bx); /* No exponent overflow! */
1643 MPFR_ASSERTD(ap[n-1] & MPFR_LIMB_HIGHBIT);
1644 if (MPFR_LIKELY(c0 == 0))
1645 {
1646 /* Result is exact: no need of rounding! */
1647 MPFR_TMP_FREE(marker);
1648 return 0;
1649 }
1650 /* c0 is non-zero, thus we have to subtract 1/2*ulp(a),
1651 however, we know (see analysis above) that this cannot
1652 make the exponent decrease */
1653 MPFR_ASSERTD( !(ap[0] & ~mask) ); /* Check last bits */
1654 /* No normalize is needed */
1655
1656 /* Rounding is necessary since c0 is non-zero */
1657 /* we have to subtract 1 at the round bit position,
1658 and 0 for the lower bits */
1659 rb = 1; rbb = sbb = 0;
1660 }
1661 else if (MPFR_LIKELY(limb < MPFR_LIMB_HIGHBIT))
1662 {
1663 mp_limb_t *tp;
1664 /* |b| - |c| <= (W/2-1)*W^k + W^k-1 = 1/2*W^n - 1 */
1665 /* The exponent decreases by one. */
1666 SubD1Lose:
1667 /* Compute 2*b-c (Exact) */
1668 #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_RSBLSH1_N)
1669 /* {ap, n} = 2*{bp, n} - {cp, n} */
1670 /* mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[] */
1671 __gmpn_rsblsh1_n (ap, cp, bp, n);
1672 #else
1673 tp = MPFR_TMP_LIMBS_ALLOC (n);
1674 /* Shift b in the allocated temporary block */
1675 mpn_lshift (tp, bp, n, 1);
1676 mpn_sub_n (ap, tp, cp, n);
1677 #endif
1678 bx--;
1679 MPFR_ASSERTD(k == n-1);
1680 goto ExactNormalize;
1681 }
1682 else /* limb = MPFR_LIMB_HIGHBIT */
1683 {
1684 /* Case: limb = 100000000000 */
1685 /* Check while b[l] == c'[l] (C' is C shifted by 1) */
1686 /* If b[l]<c'[l] => We lose at least one bit */
1687 /* If b[l]>c'[l] => We don't lose any bit */
1688 /* If l==-1 => We don't lose any bit
1689 AND the result is 100000000000 0000000000 00000000000 */
1690 mp_size_t l = n - 1;
1691 mp_limb_t cl_shifted;
1692 do
1693 {
1694 /* the first loop will compare b[n-2] and c'[n-2] */
1695 cl_shifted = cp[l] << (GMP_NUMB_BITS - 1);
1696 if (--l < 0)
1697 break;
1698 cl_shifted += cp[l] >> 1;
1699 }
1700 while (bp[l] == cl_shifted);
1701 if (MPFR_UNLIKELY(l < 0))
1702 {
1703 if (MPFR_UNLIKELY(cl_shifted))
1704 {
1705 /* Since cl_shifted is what should be subtracted
1706 from ap[-1], if non-zero then necessarily the
1707 precision is a multiple of GMP_NUMB_BITS, and we lose
1708 one bit, thus the (exact) result is a power of 2
1709 minus 1. */
1710 memset (ap, -1, n * MPFR_BYTES_PER_MP_LIMB);
1711 MPFR_SET_EXP (a, bx - 1);
1712 /* No underflow is possible since cx = bx-1 is a valid
1713 exponent. */
1714 }
1715 else
1716 {
1717 /* cl_shifted=0: result is a power of 2. */
1718 MPN_ZERO (ap, n - 1);
1719 ap[n-1] = MPFR_LIMB_HIGHBIT;
1720 MPFR_SET_EXP (a, bx); /* No exponent overflow! */
1721 }
1722 /* No Normalize is needed */
1723 /* No Rounding is needed */
1724 MPFR_TMP_FREE (marker);
1725 return 0;
1726 }
1727 /* cl_shifted is the shifted value c'[l] */
1728 else if (bp[l] > cl_shifted)
1729 goto SubD1NoLose; /* |b|-|c| >= 1/2*W^n */
1730 else
1731 {
1732 /* we cannot have bp[l] = cl_shifted since the only way we
1733 can exit the while loop above is when bp[l] <> cl_shifted
1734 or l < 0, and the case l < 0 was already treated above. */
1735 MPFR_ASSERTD(bp[l] < cl_shifted);
1736 goto SubD1Lose; /* |b|-|c| <= 1/2*W^n-1 and is exact */
1737 }
1738 }
1739 }
1740 else if (MPFR_UNLIKELY(d >= p)) /* the difference of exponents is larger
1741 than the precision of all operands, thus
1742 the result is either b or b - 1 ulp,
1743 with a possible exact result when
1744 d = p, b = 2^e and c = 1/2 ulp(b) */
1745 {
1746 MPFR_UNSIGNED_MINUS_MODULO(sh, p);
1747 /* We can't set A before since we use cp for rounding... */
1748 /* Perform rounding: check if a=b or a=b-ulp(b) */
1749 if (MPFR_UNLIKELY(d == p))
1750 {
1751 /* since c is normalized, we need to subtract 1/2 ulp(b) */
1752 rb = 1;
1753 /* rbb is the bit of weight 1/4 ulp(b) in c. We assume a limb has
1754 at least 2 bits. If the precision is 1, we read in the unused
1755 bits, which should be zero, and this is what we want. */
1756 rbb = cp[n-1] & (MPFR_LIMB_HIGHBIT >> 1);
1757
1758 /* We need also sbb */
1759 sbb = cp[n-1] & MPFR_LIMB_MASK(GMP_NUMB_BITS - 2);
1760 for (k = n-1; sbb == 0 && k > 0;)
1761 sbb = cp[--k];
1762 }
1763 else
1764 {
1765 rb = 0;
1766 if (d == p + 1)
1767 {
1768 rbb = 1;
1769 sbb = cp[n-1] & MPFR_LIMB_MASK(GMP_NUMB_BITS - 1);
1770 for (k = n-1; sbb == 0 && k > 0;)
1771 sbb = cp[--k];
1772 }
1773 else
1774 {
1775 rbb = 0;
1776 sbb = 1; /* since C is non-zero */
1777 }
1778 }
1779 /* Copy mantissa B in A */
1780 MPN_COPY(ap, bp, n);
1781 }
1782 else /* case 2 <= d < p */
1783 {
1784 mpfr_uexp_t dm;
1785 mp_size_t m;
1786 mp_limb_t mask, *tp;
1787
1788 MPFR_UNSIGNED_MINUS_MODULO(sh, p);
1789 tp = MPFR_TMP_LIMBS_ALLOC (n);
1790
1791 /* Shift c in temporary allocated place */
1792 dm = d % GMP_NUMB_BITS;
1793 m = d / GMP_NUMB_BITS;
1794 if (MPFR_UNLIKELY(dm == 0))
1795 {
1796 /* dm = 0 and m > 0: Just copy */
1797 MPFR_ASSERTD(m != 0);
1798 MPN_COPY(tp, cp + m, n - m);
1799 MPN_ZERO(tp + n - m, m);
1800 }
1801 else if (MPFR_LIKELY(m == 0))
1802 {
1803 /* dm >=2 and m == 0: just shift */
1804 MPFR_ASSERTD(dm >= 2);
1805 mpn_rshift (tp, cp, n, dm);
1806 }
1807 else
1808 {
1809 /* dm > 0 and m > 0: shift and zero */
1810 mpn_rshift (tp, cp + m, n - m, dm);
1811 MPN_ZERO (tp + n - m, m);
1812 }
1813 /* FIXME: Instead of doing "cp = tp;", keep using tp to avoid
1814 confusion? Thus in the block below, we don't need
1815 "mp_limb_t *cp = MPFR_MANT(c);". In short, cp should always
1816 be MPFR_MANT(c) defined earlier, possibly after the swap. */
1817 cp = tp;
1818
1819 /* mpfr_print_mant_binary("Before", MPFR_MANT(c), p); */
1820 /* mpfr_print_mant_binary("B= ", MPFR_MANT(b), p); */
1821 /* mpfr_print_mant_binary("After ", cp, p); */
1822
1823 /* Compute rb=Cp and sb=C'p+1 */
1824 {
1825 /* Compute rb and rbb from C */
1826 mp_limb_t *cp = MPFR_MANT(c);
1827 /* The round bit is bit p-d in C, assuming the most significant bit
1828 of C is bit 0 */
1829 mpfr_prec_t x = p - d;
1830 mp_size_t kx = n - 1 - (x / GMP_NUMB_BITS);
1831 mpfr_prec_t sx = GMP_NUMB_BITS - 1 - (x % GMP_NUMB_BITS);
1832 /* the round bit is in cp[kx], at position sx */
1833 MPFR_ASSERTD(p >= d);
1834 rb = cp[kx] & (MPFR_LIMB_ONE << sx);
1835
1836 /* Now compute rbb: since d >= 2 it always exists in C */
1837 if (sx == 0) /* rbb is in the next limb */
1838 {
1839 kx --;
1840 sx = GMP_NUMB_BITS - 1;
1841 }
1842 else
1843 sx --; /* rb and rbb are in the same limb */
1844 rbb = cp[kx] & (MPFR_LIMB_ONE << sx);
1845
1846 /* Now look at the remaining low bits of C to determine sbb */
1847 sbb = cp[kx] & MPFR_LIMB_MASK(sx);
1848 while (sbb == 0 && kx > 0)
1849 sbb = cp[--kx];
1850 }
1851 /* printf("sh=%lu Cp=%d C'p+1=%d\n", sh, rb!=0, sb!=0); */
1852
1853 /* Clean shifted C' */
1854 mask = ~MPFR_LIMB_MASK (sh);
1855 cp[0] &= mask;
1856
1857 /* Subtract the mantissa c from b in a */
1858 mpn_sub_n (ap, bp, cp, n);
1859 /* mpfr_print_mant_binary("Sub= ", ap, p); */
1860
1861 /* Normalize: we lose at most one bit */
1862 if (MPFR_UNLIKELY(MPFR_LIMB_MSB(ap[n-1]) == 0))
1863 {
1864 /* High bit is not set and we have to fix it! */
1865 /* Ap >= 010000xxx001 */
1866 mpn_lshift (ap, ap, n, 1);
1867 /* Ap >= 100000xxx010 */
1868 if (MPFR_UNLIKELY(rb != 0)) /* Check if Cp = -1 */
1869 /* Since Cp == -1, we have to subtract one more */
1870 {
1871 mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh);
1872 MPFR_ASSERTD(MPFR_LIMB_MSB(ap[n-1]) != 0);
1873 }
1874 /* Ap >= 10000xxx001 */
1875 /* Final exponent -1 since we have shifted the mantissa */
1876 bx--;
1877 /* Update rb and sb */
1878 rb = rbb;
1879 rbb = sbb;
1880 /* We don't have anymore a valid Cp+1!
1881 But since Ap >= 100000xxx001, the final sub can't unnormalize!*/
1882 }
1883 MPFR_ASSERTD( !(ap[0] & ~mask) );
1884 }
1885
1886 rounding:
1887 /* at this point 'a' contains b - high(c), normalized,
1888 and we have to subtract rb * 1/2 ulp(a), rbb * 1/4 ulp(a),
1889 and sbb * 1/8 ulp(a), interpreting rb/rbb/sbb as 1 if non-zero. */
1890
1891 sb = rbb | sbb;
1892
1893 if (rb == 0 && sb == 0)
1894 {
1895 inexact = 0;
1896 goto end_of_sub;
1897 }
1898
1899 pow2 = mpfr_powerof2_raw (a);
1900 if (pow2 && rb != 0) /* subtract 1 ulp */
1901 {
1902 mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh);
1903 ap[n-1] |= MPFR_LIMB_HIGHBIT;
1904 bx--;
1905 rb = rbb;
1906 rbb = sbb;
1907 sbb = 0;
1908 /* Note: for p=1 this case can only happen with d=1, but then we will
1909 have rb=sb=0 at the next round. */
1910 goto rounding;
1911 }
1912
1913 /* now if a is a power of two, necessary rb = 0,
1914 which means the exact result is always in (pred(a), a),
1915 and the bounds cannot be attained */
1916
1917 if (rnd_mode == MPFR_RNDF)
1918 inexact = 0;
1919 else if (rnd_mode == MPFR_RNDN)
1920 {
1921 if (pow2)
1922 {
1923 MPFR_ASSERTD(rb == 0);
1924 /* since we have at the end of the binade, we have in fact rb = rbb
1925 and sb = sbb */
1926 rb = rbb;
1927 sb = sbb;
1928 }
1929 /* Warning: for p=1, the significand is always odd: the "even" rule
1930 rounds to the value with largest magnitude, thus we have to check
1931 that case separately */
1932 if (rb == 0 ||
1933 (rb != 0 && sb == 0 &&
1934 ((ap[0] & (MPFR_LIMB_ONE << sh)) == 0 || p == 1)))
1935 inexact = 1; /* round to a and return 1 */
1936 else /* round to pred(a) and return -1 */
1937 {
1938 subtract:
1939 mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh);
1940 if (pow2) /* deal with cancellation */
1941 {
1942 ap[n-1] |= MPFR_LIMB_HIGHBIT;
1943 bx--;
1944 }
1945 inexact = -1;
1946 }
1947 }
1948 else /* directed rounding */
1949 {
1950 MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a));
1951 if (rnd_mode == MPFR_RNDZ)
1952 goto subtract;
1953 else
1954 inexact = 1;
1955 }
1956
1957 end_of_sub:
1958 /* Update Exponent */
1959 /* bx >= emin. Proof:
1960 If d==0 : Exact case. This is never called.
1961 if 1 < d < p : bx=MPFR_EXP(b) or MPFR_EXP(b)-1 > MPFR_EXP(c) > emin
1962 if d == 1 : bx=MPFR_EXP(b). If we could lose any bits, the exact
1963 normalization is called.
1964 if d >= p : bx=MPFR_EXP(b) >= MPFR_EXP(c) + p > emin
1965 After SubOneUlp, we could have one bit less.
1966 if 1 < d < p : bx >= MPFR_EXP(b)-2 >= MPFR_EXP(c) > emin
1967 if d == 1 : bx >= MPFR_EXP(b)-1 = MPFR_EXP(c) > emin.
1968 if d >= p : bx >= MPFR_EXP(b)-1 > emin since p>=2.
1969 */
1970 MPFR_ASSERTD( bx >= __gmpfr_emin);
1971 MPFR_SET_EXP (a, bx);
1972
1973 MPFR_TMP_FREE(marker);
1974 MPFR_RET (inexact * MPFR_INT_SIGN (a));
1975 }
1976