1 /* mpfr_mul -- multiply two floating-point numbers
2
3 Copyright 1999-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26
27 /********* BEGINNING CHECK *************/
28
29 /* Check if we have to check the result of mpfr_mul.
30 TODO: Find a better (and faster?) check than using old implementation */
31 #if MPFR_WANT_ASSERT >= 2
32
33 int mpfr_mul2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode);
34 static int
mpfr_mul3(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)35 mpfr_mul3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
36 {
37 /* Old implementation */
38 int sign_product, cc, inexact;
39 mpfr_exp_t ax;
40 mp_limb_t *tmp;
41 mp_limb_t b1;
42 mpfr_prec_t bq, cq;
43 mp_size_t bn, cn, tn, k;
44 MPFR_TMP_DECL(marker);
45
46 /* deal with special cases */
47 if (MPFR_ARE_SINGULAR(b,c))
48 {
49 if (MPFR_IS_NAN(b) || MPFR_IS_NAN(c))
50 {
51 MPFR_SET_NAN(a);
52 MPFR_RET_NAN;
53 }
54 sign_product = MPFR_MULT_SIGN(MPFR_SIGN(b), MPFR_SIGN(c));
55 if (MPFR_IS_INF(b))
56 {
57 if (MPFR_IS_INF(c) || MPFR_NOTZERO(c))
58 {
59 MPFR_SET_SIGN(a, sign_product);
60 MPFR_SET_INF(a);
61 MPFR_RET(0); /* exact */
62 }
63 else
64 {
65 MPFR_SET_NAN(a);
66 MPFR_RET_NAN;
67 }
68 }
69 else if (MPFR_IS_INF(c))
70 {
71 if (MPFR_NOTZERO(b))
72 {
73 MPFR_SET_SIGN(a, sign_product);
74 MPFR_SET_INF(a);
75 MPFR_RET(0); /* exact */
76 }
77 else
78 {
79 MPFR_SET_NAN(a);
80 MPFR_RET_NAN;
81 }
82 }
83 else
84 {
85 MPFR_ASSERTD(MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
86 MPFR_SET_SIGN(a, sign_product);
87 MPFR_SET_ZERO(a);
88 MPFR_RET(0); /* 0 * 0 is exact */
89 }
90 }
91 sign_product = MPFR_MULT_SIGN(MPFR_SIGN(b), MPFR_SIGN(c));
92
93 ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
94
95 bq = MPFR_PREC (b);
96 cq = MPFR_PREC (c);
97
98 MPFR_ASSERTN ((mpfr_uprec_t) bq + cq <= MPFR_PREC_MAX);
99
100 bn = MPFR_PREC2LIMBS (bq); /* number of limbs of b */
101 cn = MPFR_PREC2LIMBS (cq); /* number of limbs of c */
102 k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
103 tn = MPFR_PREC2LIMBS (bq + cq);
104 /* <= k, thus no int overflow */
105 MPFR_ASSERTD(tn <= k);
106
107 /* Check for no size_t overflow*/
108 MPFR_ASSERTD((size_t) k <= ((size_t) -1) / MPFR_BYTES_PER_MP_LIMB);
109 MPFR_TMP_MARK(marker);
110 tmp = MPFR_TMP_LIMBS_ALLOC (k);
111
112 /* multiplies two mantissa in temporary allocated space */
113 b1 = (MPFR_LIKELY(bn >= cn)) ?
114 mpn_mul (tmp, MPFR_MANT(b), bn, MPFR_MANT(c), cn)
115 : mpn_mul (tmp, MPFR_MANT(c), cn, MPFR_MANT(b), bn);
116
117 /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
118 with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
119 b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */
120 MPFR_ASSERTD (b1 == 0 || b1 == 1);
121
122 /* if the mantissas of b and c are uniformly distributed in ]1/2, 1],
123 then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386
124 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
125 tmp += k - tn;
126 if (MPFR_UNLIKELY(b1 == 0))
127 mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
128 cc = mpfr_round_raw (MPFR_MANT (a), tmp, bq + cq,
129 MPFR_IS_NEG_SIGN(sign_product),
130 MPFR_PREC (a), rnd_mode, &inexact);
131 MPFR_ASSERTD (cc == 0 || cc == 1);
132
133 /* cc = 1 ==> result is a power of two */
134 if (MPFR_UNLIKELY(cc))
135 MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT;
136
137 MPFR_TMP_FREE(marker);
138
139 {
140 /* We need to cast b1 to a signed integer type in order to use
141 signed integer arithmetic only, as the expression can involve
142 negative integers. Let's recall that both b1 and cc are 0 or 1,
143 and since cc is an int, let's choose int for this part. */
144 mpfr_exp_t ax2 = ax + ((int) b1 - 1 + cc);
145 if (MPFR_UNLIKELY( ax2 > __gmpfr_emax))
146 return mpfr_overflow (a, rnd_mode, sign_product);
147 if (MPFR_UNLIKELY( ax2 < __gmpfr_emin))
148 {
149 /* In the rounding to the nearest mode, if the exponent of the exact
150 result (i.e. before rounding, i.e. without taking cc into account)
151 is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
152 both arguments are powers of 2) in absolute value, then round to
153 zero. */
154 if (rnd_mode == MPFR_RNDN &&
155 (ax + (mpfr_exp_t) b1 < __gmpfr_emin ||
156 (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
157 rnd_mode = MPFR_RNDZ;
158 return mpfr_underflow (a, rnd_mode, sign_product);
159 }
160 MPFR_SET_EXP (a, ax2);
161 MPFR_SET_SIGN(a, sign_product);
162 }
163 MPFR_RET (inexact);
164 }
165
166 int
mpfr_mul(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)167 mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
168 {
169 mpfr_t ta, tb, tc;
170 mpfr_flags_t old_flags, flags1, flags2;
171 int inexact1, inexact2;
172
173 if (rnd_mode == MPFR_RNDF)
174 return mpfr_mul2 (a, b, c, rnd_mode);
175
176 old_flags = __gmpfr_flags;
177
178 mpfr_init2 (ta, MPFR_PREC (a));
179 mpfr_init2 (tb, MPFR_PREC (b));
180 mpfr_init2 (tc, MPFR_PREC (c));
181 MPFR_ASSERTN (mpfr_set (tb, b, MPFR_RNDN) == 0);
182 MPFR_ASSERTN (mpfr_set (tc, c, MPFR_RNDN) == 0);
183
184 /* Note: If b or c is NaN, then the NaN flag has been set by mpfr_set above.
185 Thus restore the old flags just below to make sure that mpfr_mul3 is
186 tested under the real conditions. */
187
188 __gmpfr_flags = old_flags;
189 inexact2 = mpfr_mul3 (ta, tb, tc, rnd_mode);
190 flags2 = __gmpfr_flags;
191
192 __gmpfr_flags = old_flags;
193 inexact1 = mpfr_mul2 (a, b, c, rnd_mode);
194 flags1 = __gmpfr_flags;
195
196 /* Convert the ternary values to (-1,0,1). */
197 inexact2 = VSIGN (inexact2);
198 inexact1 = VSIGN (inexact1);
199
200 if (! ((MPFR_IS_NAN (ta) && MPFR_IS_NAN (a)) || mpfr_equal_p (ta, a)) ||
201 inexact1 != inexact2 || flags1 != flags2)
202 {
203 /* We do not have MPFR_PREC_FSPEC, so let's use mpfr_eexp_t and
204 MPFR_EXP_FSPEC since mpfr_prec_t values are guaranteed to be
205 representable in mpfr_exp_t, thus in mpfr_eexp_t. */
206 fprintf (stderr, "mpfr_mul return different values for %s\n"
207 "Prec_a = %" MPFR_EXP_FSPEC "d, "
208 "Prec_b = %" MPFR_EXP_FSPEC "d, "
209 "Prec_c = %" MPFR_EXP_FSPEC "d\n",
210 mpfr_print_rnd_mode (rnd_mode),
211 (mpfr_eexp_t) MPFR_PREC (a),
212 (mpfr_eexp_t) MPFR_PREC (b),
213 (mpfr_eexp_t) MPFR_PREC (c));
214 /* Note: We output tb and tc instead of b and c, in case a = b or c
215 (this is why tb and tc have been created in the first place). */
216 fprintf (stderr, "b = ");
217 mpfr_fdump (stderr, tb);
218 fprintf (stderr, "c = ");
219 mpfr_fdump (stderr, tc);
220 fprintf (stderr, "OldMul: ");
221 mpfr_fdump (stderr, ta);
222 fprintf (stderr, "NewMul: ");
223 mpfr_fdump (stderr, a);
224 fprintf (stderr, "OldMul: ternary = %2d, flags =", inexact2);
225 flags_fout (stderr, flags2);
226 fprintf (stderr, "NewMul: ternary = %2d, flags =", inexact1);
227 flags_fout (stderr, flags1);
228 MPFR_ASSERTN(0);
229 }
230
231 mpfr_clears (ta, tb, tc, (mpfr_ptr) 0);
232 return inexact1;
233 }
234
235 # define mpfr_mul mpfr_mul2
236
237 #endif /* MPFR_WANT_ASSERT >= 2 */
238
239 /****** END OF CHECK *******/
240
241 /* Multiply 2 mpfr_t */
242
243 #if !defined(MPFR_GENERIC_ABI)
244
245 /* Disabled for now since the mul_1_extracted.c is not formally proven yet.
246 Once it is proven, replace MPFR_WANT_PROVEN_CODExxx by MPFR_WANT_PROVEN_CODE. */
247 #if defined(MPFR_WANT_PROVEN_CODExxx) && GMP_NUMB_BITS == 64 && \
248 UINT_MAX == 0xffffffff && MPFR_PREC_BITS == 64 && \
249 _MPFR_PREC_FORMAT == 3 && _MPFR_EXP_FORMAT == _MPFR_PREC_FORMAT
250
251 /* The code assumes that mp_limb_t has 64 bits exactly, unsigned int
252 has 32 bits exactly, mpfr_prec_t and mpfr_exp_t are of type long,
253 which has 64 bits exactly. */
254
255 #include "mul_1_extracted.c"
256
257 #else
258
259 /* Special code for prec(a) < GMP_NUMB_BITS and
260 prec(b), prec(c) <= GMP_NUMB_BITS.
261 Note: this code was copied in sqr.c, function mpfr_sqr_1 (this saves a few cycles
262 with respect to have this function exported). As a consequence, any change here
263 should be reported in mpfr_sqr_1. */
264 static int
mpfr_mul_1(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)265 mpfr_mul_1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
266 mpfr_prec_t p)
267 {
268 mp_limb_t a0;
269 mpfr_limb_ptr ap = MPFR_MANT(a);
270 mp_limb_t b0 = MPFR_MANT(b)[0];
271 mp_limb_t c0 = MPFR_MANT(c)[0];
272 mpfr_exp_t ax;
273 mpfr_prec_t sh = GMP_NUMB_BITS - p;
274 mp_limb_t rb, sb, mask = MPFR_LIMB_MASK(sh);
275
276 /* When prec(b), prec(c) <= GMP_NUMB_BITS / 2, we could replace umul_ppmm
277 by a limb multiplication as follows, but we assume umul_ppmm is as fast
278 as a limb multiplication on modern processors:
279 a0 = (b0 >> (GMP_NUMB_BITS / 2)) * (c0 >> (GMP_NUMB_BITS / 2));
280 sb = 0;
281 */
282 ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
283 umul_ppmm (a0, sb, b0, c0);
284 if (a0 < MPFR_LIMB_HIGHBIT)
285 {
286 ax --;
287 /* TODO: This is actually an addition with carry (no shifts and no OR
288 needed in asm). Make sure that GCC generates optimized code once
289 it supports carry-in. */
290 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
291 sb <<= 1;
292 }
293 rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
294 sb |= (a0 & mask) ^ rb;
295 ap[0] = a0 & ~mask;
296
297 MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
298
299 /* rounding */
300 if (MPFR_UNLIKELY(ax > __gmpfr_emax))
301 return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
302
303 /* Warning: underflow should be checked *after* rounding, thus when rounding
304 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
305 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
306 if (MPFR_UNLIKELY(ax < __gmpfr_emin))
307 {
308 if (ax == __gmpfr_emin - 1 && ap[0] == MPFR_LIMB(~mask) &&
309 ((rnd_mode == MPFR_RNDN && rb) ||
310 (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
311 goto rounding; /* no underflow */
312 /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
313 we have to change to RNDZ. This corresponds to:
314 (a) either ax < emin - 1
315 (b) or ax = emin - 1 and ap[0] = 1000....000 and rb = sb = 0 */
316 if (rnd_mode == MPFR_RNDN &&
317 (ax < __gmpfr_emin - 1 ||
318 (ap[0] == MPFR_LIMB_HIGHBIT && (rb | sb) == 0)))
319 rnd_mode = MPFR_RNDZ;
320 return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
321 }
322
323 rounding:
324 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
325 in the cases "goto rounding" above. */
326 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
327 {
328 MPFR_ASSERTD(ax >= __gmpfr_emin);
329 MPFR_RET (0);
330 }
331 else if (rnd_mode == MPFR_RNDN)
332 {
333 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
334 goto truncate;
335 else
336 goto add_one_ulp;
337 }
338 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
339 {
340 truncate:
341 MPFR_ASSERTD(ax >= __gmpfr_emin);
342 MPFR_RET(-MPFR_SIGN(a));
343 }
344 else /* round away from zero */
345 {
346 add_one_ulp:
347 ap[0] += MPFR_LIMB_ONE << sh;
348 if (ap[0] == 0)
349 {
350 ap[0] = MPFR_LIMB_HIGHBIT;
351 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
352 return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
353 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
354 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
355 MPFR_SET_EXP (a, ax + 1);
356 }
357 MPFR_RET(MPFR_SIGN(a));
358 }
359 }
360
361 #endif /* MPFR_WANT_PROVEN_CODE */
362
363 /* Special code for prec(a) = GMP_NUMB_BITS and
364 prec(b), prec(c) <= GMP_NUMB_BITS. */
365 static int
mpfr_mul_1n(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)366 mpfr_mul_1n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
367 {
368 mp_limb_t a0;
369 mpfr_limb_ptr ap = MPFR_MANT(a);
370 mp_limb_t b0 = MPFR_MANT(b)[0];
371 mp_limb_t c0 = MPFR_MANT(c)[0];
372 mpfr_exp_t ax;
373 mp_limb_t rb, sb;
374
375 ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
376 umul_ppmm (a0, sb, b0, c0);
377 if (a0 < MPFR_LIMB_HIGHBIT)
378 {
379 ax --;
380 /* TODO: This is actually an addition with carry (no shifts and no OR
381 needed in asm). Make sure that GCC generates optimized code once
382 it supports carry-in. */
383 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
384 sb <<= 1;
385 }
386 rb = sb & MPFR_LIMB_HIGHBIT;
387 sb = sb & ~MPFR_LIMB_HIGHBIT;
388 ap[0] = a0;
389
390 MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
391
392 /* rounding */
393 if (MPFR_UNLIKELY(ax > __gmpfr_emax))
394 return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
395
396 /* Warning: underflow should be checked *after* rounding, thus when rounding
397 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
398 a >= 0.111...111[1]*2^(emin-1), there is no underflow.
399 Note: this case can only occur when the initial a0 (after the umul_ppmm
400 call above) had its most significant bit 0, since the largest a0 is
401 obtained for b0 = c0 = B-1 where B=2^GMP_NUMB_BITS, thus b0*c0 <= (B-1)^2
402 thus a0 <= B-2. */
403 if (MPFR_UNLIKELY(ax < __gmpfr_emin))
404 {
405 if (ax == __gmpfr_emin - 1 && ap[0] == ~MPFR_LIMB_ZERO &&
406 ((rnd_mode == MPFR_RNDN && rb) ||
407 (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
408 goto rounding; /* no underflow */
409 /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
410 we have to change to RNDZ. This corresponds to:
411 (a) either ax < emin - 1
412 (b) or ax = emin - 1 and ap[0] = 1000....000 and rb = sb = 0 */
413 if (rnd_mode == MPFR_RNDN &&
414 (ax < __gmpfr_emin - 1 ||
415 (ap[0] == MPFR_LIMB_HIGHBIT && (rb | sb) == 0)))
416 rnd_mode = MPFR_RNDZ;
417 return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
418 }
419
420 rounding:
421 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
422 in the cases "goto rounding" above. */
423 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
424 {
425 MPFR_ASSERTD(ax >= __gmpfr_emin);
426 MPFR_RET (0);
427 }
428 else if (rnd_mode == MPFR_RNDN)
429 {
430 if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0))
431 goto truncate;
432 else
433 goto add_one_ulp;
434 }
435 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
436 {
437 truncate:
438 MPFR_ASSERTD(ax >= __gmpfr_emin);
439 MPFR_RET(-MPFR_SIGN(a));
440 }
441 else /* round away from zero */
442 {
443 add_one_ulp:
444 ap[0] += MPFR_LIMB_ONE;
445 if (ap[0] == 0)
446 {
447 ap[0] = MPFR_LIMB_HIGHBIT;
448 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
449 return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
450 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
451 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
452 MPFR_SET_EXP (a, ax + 1);
453 }
454 MPFR_RET(MPFR_SIGN(a));
455 }
456 }
457
458 /* Special code for GMP_NUMB_BITS < prec(a) < 2*GMP_NUMB_BITS and
459 GMP_NUMB_BITS < prec(b), prec(c) <= 2*GMP_NUMB_BITS.
460 Note: this code was copied in sqr.c, function mpfr_sqr_2 (this saves a few cycles
461 with respect to have this function exported). As a consequence, any change here
462 should be reported in mpfr_sqr_2. */
463 static int
mpfr_mul_2(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)464 mpfr_mul_2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
465 mpfr_prec_t p)
466 {
467 mp_limb_t h, l, u, v, w;
468 mpfr_limb_ptr ap = MPFR_MANT(a);
469 mpfr_exp_t ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
470 mpfr_prec_t sh = 2 * GMP_NUMB_BITS - p;
471 mp_limb_t rb, sb, sb2, mask = MPFR_LIMB_MASK(sh);
472 mp_limb_t *bp = MPFR_MANT(b), *cp = MPFR_MANT(c);
473
474 /* we store the 4-limb product in h=ap[1], l=ap[0], sb=ap[-1], sb2=ap[-2] */
475 umul_ppmm (h, l, bp[1], cp[1]);
476 umul_ppmm (u, v, bp[1], cp[0]);
477 l += u;
478 h += (l < u);
479 umul_ppmm (u, w, bp[0], cp[1]);
480 l += u;
481 h += (l < u);
482
483 /* now the full product is {h, l, v + w + high(b0*c0), low(b0*c0)},
484 where the lower part contributes to less than 3 ulps to {h, l} */
485
486 /* If h has its most significant bit set and the low sh-1 bits of l are not
487 000...000 nor 111...111 nor 111...110, then we can round correctly;
488 if h has zero as most significant bit, we have to shift left h and l,
489 thus if the low sh-2 bits are not 000...000 nor 111...111 nor 111...110,
490 then we can round correctly. To avoid an extra test we consider the latter
491 case (if we can round, we can also round in the former case).
492 For sh <= 3, we have mask <= 7, thus (mask>>2) <= 1, and the approximation
493 cannot be enough. */
494 if (MPFR_LIKELY(((l + 2) & (mask >> 2)) > 2))
495 sb = sb2 = 1; /* result cannot be exact in that case */
496 else
497 {
498 umul_ppmm (sb, sb2, bp[0], cp[0]);
499 /* the full product is {h, l, sb + v + w, sb2} */
500 sb += v;
501 l += (sb < v);
502 h += (l == 0) && (sb < v);
503 sb += w;
504 l += (sb < w);
505 h += (l == 0) && (sb < w);
506 }
507 if (h < MPFR_LIMB_HIGHBIT)
508 {
509 ax --;
510 h = (h << 1) | (l >> (GMP_NUMB_BITS - 1));
511 l = (l << 1) | (sb >> (GMP_NUMB_BITS - 1));
512 sb <<= 1;
513 /* no need to shift sb2 since we only want to know if it is zero or not */
514 }
515 ap[1] = h;
516 rb = l & (MPFR_LIMB_ONE << (sh - 1));
517 sb |= ((l & mask) ^ rb) | sb2;
518 ap[0] = l & ~mask;
519
520 MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
521
522 /* rounding */
523 if (MPFR_UNLIKELY(ax > __gmpfr_emax))
524 return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
525
526 /* Warning: underflow should be checked *after* rounding, thus when rounding
527 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
528 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
529 if (MPFR_UNLIKELY(ax < __gmpfr_emin))
530 {
531 if (ax == __gmpfr_emin - 1 &&
532 ap[1] == MPFR_LIMB_MAX &&
533 ap[0] == MPFR_LIMB(~mask) &&
534 ((rnd_mode == MPFR_RNDN && rb) ||
535 (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
536 goto rounding; /* no underflow */
537 /* for RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
538 we have to change to RNDZ */
539 if (rnd_mode == MPFR_RNDN &&
540 (ax < __gmpfr_emin - 1 ||
541 (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0 && (rb | sb) == 0)))
542 rnd_mode = MPFR_RNDZ;
543 return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
544 }
545
546 rounding:
547 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
548 in the cases "goto rounding" above. */
549 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
550 {
551 MPFR_ASSERTD(ax >= __gmpfr_emin);
552 MPFR_RET (0);
553 }
554 else if (rnd_mode == MPFR_RNDN)
555 {
556 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
557 goto truncate;
558 else
559 goto add_one_ulp;
560 }
561 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
562 {
563 truncate:
564 MPFR_ASSERTD(ax >= __gmpfr_emin);
565 MPFR_RET(-MPFR_SIGN(a));
566 }
567 else /* round away from zero */
568 {
569 add_one_ulp:
570 ap[0] += MPFR_LIMB_ONE << sh;
571 ap[1] += (ap[0] == 0);
572 if (ap[1] == 0)
573 {
574 ap[1] = MPFR_LIMB_HIGHBIT;
575 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
576 return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
577 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
578 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
579 MPFR_SET_EXP (a, ax + 1);
580 }
581 MPFR_RET(MPFR_SIGN(a));
582 }
583 }
584
585 /* Special code for 2*GMP_NUMB_BITS < prec(a) < 3*GMP_NUMB_BITS and
586 2*GMP_NUMB_BITS < prec(b), prec(c) <= 3*GMP_NUMB_BITS. */
587 static int
mpfr_mul_3(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)588 mpfr_mul_3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
589 mpfr_prec_t p)
590 {
591 mp_limb_t a0, a1, a2, h, l, cy;
592 mpfr_limb_ptr ap = MPFR_MANT(a);
593 mpfr_exp_t ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
594 mpfr_prec_t sh = 3 * GMP_NUMB_BITS - p;
595 mp_limb_t rb, sb, sb2, mask = MPFR_LIMB_MASK(sh);
596 mp_limb_t *bp = MPFR_MANT(b), *cp = MPFR_MANT(c);
597
598 /* we store the upper 3-limb product in a2, a1, a0:
599 b2*c2, b2*c1+b1*c2, b2*c0+b1*c1+b0*c2 */
600 umul_ppmm (a2, a1, bp[2], cp[2]);
601 umul_ppmm (h, a0, bp[2], cp[1]);
602 a1 += h;
603 a2 += (a1 < h);
604 umul_ppmm (h, l, bp[1], cp[2]);
605 a1 += h;
606 a2 += (a1 < h);
607 a0 += l;
608 cy = a0 < l; /* carry in a1 */
609 umul_ppmm (h, l, bp[2], cp[0]);
610 a0 += h;
611 cy += (a0 < h);
612 umul_ppmm (h, l, bp[1], cp[1]);
613 a0 += h;
614 cy += (a0 < h);
615 umul_ppmm (h, l, bp[0], cp[2]);
616 a0 += h;
617 cy += (a0 < h);
618 /* now propagate cy */
619 a1 += cy;
620 a2 += (a1 < cy);
621
622 /* Now the approximate product {a2, a1, a0} has an error of less than
623 5 ulps (3 ulps for the ignored low limbs of b2*c0+b1*c1+b0*c2,
624 plus 2 ulps for the ignored b1*c0+b0*c1 (plus b0*c0)).
625 Since we might shift by 1 bit, we make sure the low sh-2 bits of a0
626 are not 0, -1, -2, -3 or -4. */
627
628 if (MPFR_LIKELY(((a0 + 4) & (mask >> 2)) > 4))
629 sb = sb2 = 1; /* result cannot be exact in that case */
630 else
631 {
632 mp_limb_t p[6];
633 mpn_mul_n (p, bp, cp, 3);
634 a2 = p[5];
635 a1 = p[4];
636 a0 = p[3];
637 sb = p[2];
638 sb2 = p[1] | p[0];
639 }
640 if (a2 < MPFR_LIMB_HIGHBIT)
641 {
642 ax --;
643 a2 = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1));
644 a1 = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
645 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
646 sb <<= 1;
647 /* no need to shift sb2: we only need to know if it is zero or not */
648 }
649 ap[2] = a2;
650 ap[1] = a1;
651 rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
652 sb |= ((a0 & mask) ^ rb) | sb2;
653 ap[0] = a0 & ~mask;
654
655 MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
656
657 /* rounding */
658 if (MPFR_UNLIKELY(ax > __gmpfr_emax))
659 return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
660
661 /* Warning: underflow should be checked *after* rounding, thus when rounding
662 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
663 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
664 if (MPFR_UNLIKELY(ax < __gmpfr_emin))
665 {
666 if (ax == __gmpfr_emin - 1 &&
667 ap[2] == MPFR_LIMB_MAX &&
668 ap[1] == MPFR_LIMB_MAX &&
669 ap[0] == MPFR_LIMB(~mask) &&
670 ((rnd_mode == MPFR_RNDN && rb) ||
671 (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
672 goto rounding; /* no underflow */
673 /* for RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
674 we have to change to RNDZ */
675 if (rnd_mode == MPFR_RNDN &&
676 (ax < __gmpfr_emin - 1 ||
677 (ap[2] == MPFR_LIMB_HIGHBIT && ap[1] == 0 && ap[0] == 0
678 && (rb | sb) == 0)))
679 rnd_mode = MPFR_RNDZ;
680 return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
681 }
682
683 rounding:
684 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
685 in the cases "goto rounding" above. */
686 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
687 {
688 MPFR_ASSERTD(ax >= __gmpfr_emin);
689 MPFR_RET (0);
690 }
691 else if (rnd_mode == MPFR_RNDN)
692 {
693 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
694 goto truncate;
695 else
696 goto add_one_ulp;
697 }
698 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
699 {
700 truncate:
701 MPFR_ASSERTD(ax >= __gmpfr_emin);
702 MPFR_RET(-MPFR_SIGN(a));
703 }
704 else /* round away from zero */
705 {
706 add_one_ulp:
707 ap[0] += MPFR_LIMB_ONE << sh;
708 ap[1] += (ap[0] == 0);
709 ap[2] += (ap[1] == 0) && (ap[0] == 0);
710 if (ap[2] == 0)
711 {
712 ap[2] = MPFR_LIMB_HIGHBIT;
713 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
714 return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
715 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
716 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
717 MPFR_SET_EXP (a, ax + 1);
718 }
719 MPFR_RET(MPFR_SIGN(a));
720 }
721 }
722
723 #endif /* !defined(MPFR_GENERIC_ABI) */
724
725 /* Note: mpfr_sqr will call mpfr_mul if bn > MPFR_SQR_THRESHOLD,
726 in order to use Mulders' mulhigh, which is handled only here
727 to avoid partial code duplication. There is some overhead due
728 to the additional tests, but slowdown should not be noticeable
729 as this code is not executed in very small precisions. */
730
731 MPFR_HOT_FUNCTION_ATTR int
mpfr_mul(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)732 mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
733 {
734 int sign, inexact;
735 mpfr_exp_t ax, ax2;
736 mp_limb_t *tmp;
737 mp_limb_t b1;
738 mpfr_prec_t aq, bq, cq;
739 mp_size_t bn, cn, tn, k, threshold;
740 MPFR_TMP_DECL (marker);
741
742 MPFR_LOG_FUNC
743 (("b[%Pd]=%.*Rg c[%Pd]=%.*Rg rnd=%d",
744 mpfr_get_prec (b), mpfr_log_prec, b,
745 mpfr_get_prec (c), mpfr_log_prec, c, rnd_mode),
746 ("a[%Pd]=%.*Rg inexact=%d",
747 mpfr_get_prec (a), mpfr_log_prec, a, inexact));
748
749 /* deal with special cases */
750 if (MPFR_ARE_SINGULAR (b, c))
751 {
752 if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c))
753 {
754 MPFR_SET_NAN (a);
755 MPFR_RET_NAN;
756 }
757 sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
758 if (MPFR_IS_INF (b))
759 {
760 if (!MPFR_IS_ZERO (c))
761 {
762 MPFR_SET_SIGN (a, sign);
763 MPFR_SET_INF (a);
764 MPFR_RET (0);
765 }
766 else
767 {
768 MPFR_SET_NAN (a);
769 MPFR_RET_NAN;
770 }
771 }
772 else if (MPFR_IS_INF (c))
773 {
774 if (!MPFR_IS_ZERO (b))
775 {
776 MPFR_SET_SIGN (a, sign);
777 MPFR_SET_INF (a);
778 MPFR_RET(0);
779 }
780 else
781 {
782 MPFR_SET_NAN (a);
783 MPFR_RET_NAN;
784 }
785 }
786 else
787 {
788 MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
789 MPFR_SET_SIGN (a, sign);
790 MPFR_SET_ZERO (a);
791 MPFR_RET (0);
792 }
793 }
794
795 aq = MPFR_GET_PREC (a);
796 bq = MPFR_GET_PREC (b);
797 cq = MPFR_GET_PREC (c);
798
799 #if !defined(MPFR_GENERIC_ABI)
800 if (aq == bq && aq == cq)
801 {
802 if (aq < GMP_NUMB_BITS)
803 return mpfr_mul_1 (a, b, c, rnd_mode, aq);
804
805 if (GMP_NUMB_BITS < aq && aq < 2 * GMP_NUMB_BITS)
806 return mpfr_mul_2 (a, b, c, rnd_mode, aq);
807
808 if (aq == GMP_NUMB_BITS)
809 return mpfr_mul_1n (a, b, c, rnd_mode);
810
811 if (2 * GMP_NUMB_BITS < aq && aq < 3 * GMP_NUMB_BITS)
812 return mpfr_mul_3 (a, b, c, rnd_mode, aq);
813 }
814 #endif
815
816 sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
817
818 ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
819 /* Note: the exponent of the exact result will be e = bx + cx + ec with
820 ec in {-1,0,1} and the following assumes that e is representable. */
821
822 /* FIXME: Useful since we do an exponent check after?
823 * It is useful iff the precision is big, there is an overflow
824 * and we are doing further mults...*/
825 #ifdef HUGE
826 if (MPFR_UNLIKELY (ax > __gmpfr_emax + 1))
827 return mpfr_overflow (a, rnd_mode, sign);
828 if (MPFR_UNLIKELY (ax < __gmpfr_emin - 2))
829 return mpfr_underflow (a, rnd_mode == MPFR_RNDN ? MPFR_RNDZ : rnd_mode,
830 sign);
831 #endif
832
833 MPFR_ASSERTN ((mpfr_uprec_t) bq + cq <= MPFR_PREC_MAX);
834
835 bn = MPFR_PREC2LIMBS (bq); /* number of limbs of b */
836 cn = MPFR_PREC2LIMBS (cq); /* number of limbs of c */
837 k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
838 tn = MPFR_PREC2LIMBS (bq + cq);
839 MPFR_ASSERTD (tn <= k); /* tn <= k, thus no int overflow */
840
841 /* Check for no size_t overflow. */
842 MPFR_ASSERTD ((size_t) k <= ((size_t) -1) / MPFR_BYTES_PER_MP_LIMB);
843 MPFR_TMP_MARK (marker);
844 tmp = MPFR_TMP_LIMBS_ALLOC (k);
845
846 /* multiplies two mantissa in temporary allocated space */
847 if (MPFR_UNLIKELY (bn < cn))
848 {
849 mpfr_srcptr z = b;
850 mp_size_t zn = bn;
851 b = c;
852 bn = cn;
853 c = z;
854 cn = zn;
855 }
856 MPFR_ASSERTD (bn >= cn);
857 if (bn <= 2)
858 {
859 /* The 3 cases perform the same first operation. */
860 umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
861 if (bn == 1)
862 {
863 /* 1 limb * 1 limb */
864 b1 = tmp[1];
865 }
866 else if (MPFR_UNLIKELY (cn == 1))
867 {
868 /* 2 limbs * 1 limb */
869 mp_limb_t t;
870 umul_ppmm (tmp[2], t, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
871 add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t);
872 b1 = tmp[2];
873 }
874 else
875 {
876 /* 2 limbs * 2 limbs */
877 mp_limb_t t1, t2, t3;
878 /* First 2 limbs * 1 limb */
879 umul_ppmm (tmp[2], t1, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
880 add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t1);
881 /* Second, the other 2 limbs * 1 limb product */
882 umul_ppmm (t1, t2, MPFR_MANT (b)[0], MPFR_MANT (c)[1]);
883 umul_ppmm (tmp[3], t3, MPFR_MANT (b)[1], MPFR_MANT (c)[1]);
884 add_ssaaaa (tmp[3], t1, tmp[3], t1, 0, t3);
885 /* Sum those two partial products */
886 add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], t1, t2);
887 tmp[3] += (tmp[2] < t1);
888 b1 = tmp[3];
889 }
890 b1 >>= (GMP_NUMB_BITS - 1);
891 tmp += k - tn;
892 if (MPFR_UNLIKELY (b1 == 0))
893 mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
894 }
895 else /* bn >= cn and bn >= 3 */
896 /* Mulders' mulhigh. This code can also be used via mpfr_sqr,
897 hence the tests b != c. */
898 if (MPFR_UNLIKELY (cn > (threshold = b != c ?
899 MPFR_MUL_THRESHOLD : MPFR_SQR_THRESHOLD)))
900 {
901 mp_limb_t *bp, *cp;
902 mp_size_t n;
903 mpfr_prec_t p;
904
905 /* First check if we can reduce the precision of b or c:
906 exact values are a nightmare for the short product trick */
907 bp = MPFR_MANT (b);
908 cp = MPFR_MANT (c);
909 MPFR_STAT_STATIC_ASSERT (MPFR_MUL_THRESHOLD >= 1 &&
910 MPFR_SQR_THRESHOLD >= 1);
911 if (MPFR_UNLIKELY ((bp[0] == 0 && bp[1] == 0) ||
912 (cp[0] == 0 && cp[1] == 0)))
913 {
914 mpfr_t b_tmp, c_tmp;
915
916 MPFR_TMP_FREE (marker);
917 /* Check for b */
918 while (*bp == 0)
919 {
920 bp++;
921 bn--;
922 MPFR_ASSERTD (bn > 0);
923 } /* This must end since the most significant limb is != 0 */
924
925 /* Check for c too: if b == c, this will do nothing */
926 while (*cp == 0)
927 {
928 cp++;
929 cn--;
930 MPFR_ASSERTD (cn > 0);
931 } /* This must end since the most significant limb is != 0 */
932
933 /* It is not the fastest way, but it is safer. */
934 MPFR_SET_SAME_SIGN (b_tmp, b);
935 MPFR_SET_EXP (b_tmp, MPFR_GET_EXP (b));
936 MPFR_PREC (b_tmp) = bn * GMP_NUMB_BITS;
937 MPFR_MANT (b_tmp) = bp;
938
939 if (b != c)
940 {
941 MPFR_SET_SAME_SIGN (c_tmp, c);
942 MPFR_SET_EXP (c_tmp, MPFR_GET_EXP (c));
943 MPFR_PREC (c_tmp) = cn * GMP_NUMB_BITS;
944 MPFR_MANT (c_tmp) = cp;
945
946 /* Call again mpfr_mul with the fixed arguments */
947 return mpfr_mul (a, b_tmp, c_tmp, rnd_mode);
948 }
949 else
950 /* Call mpfr_mul instead of mpfr_sqr as the precision
951 is probably still high enough. It is thus better to call
952 mpfr_mul again, but it should not give an infinite loop
953 if we call mpfr_sqr. */
954 return mpfr_mul (a, b_tmp, b_tmp, rnd_mode);
955 }
956
957 /* Compute estimated precision of mulhigh.
958 We could use `+ (n < cn) + (n < bn)' instead of `+ 2',
959 but does it worth it? */
960 n = MPFR_LIMB_SIZE (a) + 1;
961 n = MIN (n, cn);
962 MPFR_ASSERTD (n >= 1 && 2*n <= k && n <= cn && n <= bn);
963 p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (n + 2);
964 bp += bn - n;
965 cp += cn - n;
966
967 /* Check if MulHigh can produce a roundable result.
968 We may lose 1 bit due to RNDN, 1 due to final shift. */
969 if (MPFR_UNLIKELY (aq > p - 5))
970 {
971 if (MPFR_UNLIKELY (aq > p - 5 + GMP_NUMB_BITS
972 || bn <= threshold + 1))
973 {
974 /* MulHigh can't produce a roundable result. */
975 MPFR_LOG_MSG (("mpfr_mulhigh can't be used (%lu VS %lu)\n",
976 aq, p));
977 goto full_multiply;
978 }
979 /* Add one extra limb to mantissa of b and c. */
980 if (bn > n)
981 bp --;
982 else
983 {
984 bp = MPFR_TMP_LIMBS_ALLOC (n + 1);
985 bp[0] = 0;
986 MPN_COPY (bp + 1, MPFR_MANT (b) + bn - n, n);
987 }
988 if (b != c)
989 {
990 #if GMP_NUMB_BITS <= 32
991 if (cn > n)
992 cp --; /* This can only happen on a 32-bit computer,
993 and is very unlikely to happen.
994 Indeed, since n = MIN (an + 1, cn), with
995 an = MPFR_LIMB_SIZE(a), we can have cn > n
996 only when n = an + 1 < cn.
997 We are in the case aq > p - 5, with
998 aq = PREC(a) = an*W - sh, with W = GMP_NUMB_BITS
999 and 0 <= sh < W, and p = n*W - ceil(log2(n+2)),
1000 thus an*W - sh > n*W - ceil(log2(n+2)) - 5.
1001 Thus n < an + (ceil(log2(n+2)) + 5 - sh)/W.
1002 To get n = an + 1, we need
1003 ceil(log2(n+2)) + 5 - sh > W, thus since sh>=0
1004 we need ceil(log2(n+2)) + 5 > W.
1005 With W=32 this can only happen for n>=2^27-1,
1006 thus for a precision of 2^32-64 for a,
1007 and with W=64 for n>=2^59-1, which would give
1008 a precision >= 2^64. */
1009 else
1010 #endif
1011 {
1012 cp = MPFR_TMP_LIMBS_ALLOC (n + 1);
1013 cp[0] = 0;
1014 MPN_COPY (cp + 1, MPFR_MANT (c) + cn - n, n);
1015 }
1016 }
1017 /* We will compute with one extra limb */
1018 n++;
1019 /* ceil(log2(n+2)) takes into account the lost bits due to
1020 Mulders' short product */
1021 p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (n + 2);
1022 /* Due to some nasty reasons we can have only 4 bits */
1023 MPFR_ASSERTD (aq <= p - 4);
1024
1025 if (MPFR_LIKELY (k < 2*n))
1026 {
1027 tmp = MPFR_TMP_LIMBS_ALLOC (2 * n);
1028 tmp += 2*n-k; /* `tmp' still points to an area of `k' limbs */
1029 }
1030 }
1031 MPFR_LOG_MSG (("Use mpfr_mulhigh (%lu VS %lu)\n", aq, p));
1032 /* Compute an approximation of the product of b and c */
1033 if (b != c)
1034 mpfr_mulhigh_n (tmp + k - 2 * n, bp, cp, n);
1035 else
1036 mpfr_sqrhigh_n (tmp + k - 2 * n, bp, n);
1037 /* now tmp[k-n]..tmp[k-1] contains an approximation of the n upper
1038 limbs of the product, with tmp[k-1] >= 2^(GMP_NUMB_BITS-2) */
1039 b1 = tmp[k-1] >> (GMP_NUMB_BITS - 1); /* msb from the product */
1040
1041 /* If the mantissas of b and c are uniformly distributed in (1/2, 1],
1042 then their product is in (1/4, 1/2] with probability 2*ln(2)-1
1043 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
1044 if (MPFR_UNLIKELY (b1 == 0))
1045 /* Warning: the mpfr_mulhigh_n call above only surely affects
1046 tmp[k-n-1..k-1], thus we shift only those limbs */
1047 mpn_lshift (tmp + k - n - 1, tmp + k - n - 1, n + 1, 1);
1048 tmp += k - tn;
1049 /* now the approximation is in tmp[tn-n]...tmp[tn-1] */
1050 MPFR_ASSERTD (MPFR_LIMB_MSB (tmp[tn-1]) != 0);
1051
1052 /* for RNDF, we simply use RNDZ, since anyway here we multiply numbers
1053 with large precisions, thus the overhead of RNDZ is small */
1054 if (rnd_mode == MPFR_RNDF)
1055 rnd_mode = MPFR_RNDZ;
1056
1057 /* if the most significant bit b1 is zero, we have only p-1 correct
1058 bits */
1059 if (MPFR_UNLIKELY (!mpfr_round_p (tmp, tn, p + b1 - 1,
1060 aq + (rnd_mode == MPFR_RNDN))))
1061 {
1062 tmp -= k - tn; /* tmp may have changed, FIX IT!!!!! */
1063 goto full_multiply;
1064 }
1065 }
1066 else
1067 {
1068 full_multiply:
1069 MPFR_LOG_MSG (("Use mpn_mul\n", 0));
1070 b1 = mpn_mul (tmp, MPFR_MANT (b), bn, MPFR_MANT (c), cn);
1071
1072 /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
1073 with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
1074 b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */
1075
1076 /* if the mantissas of b and c are uniformly distributed in (1/2, 1],
1077 then their product is in (1/4, 1/2] with probability 2*ln(2)-1
1078 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
1079 tmp += k - tn;
1080 if (MPFR_UNLIKELY (b1 == 0))
1081 mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
1082 }
1083
1084 /* b1 is 0 or 1 (most significant bit from the raw product) */
1085 ax2 = ax + ((int) b1 - 1);
1086 MPFR_RNDRAW (inexact, a, tmp, bq + cq, rnd_mode, sign, ax2++);
1087 MPFR_TMP_FREE (marker);
1088 MPFR_EXP (a) = ax2; /* Can't use MPFR_SET_EXP: Expo may be out of range */
1089 MPFR_SET_SIGN (a, sign);
1090 if (MPFR_UNLIKELY (ax2 > __gmpfr_emax))
1091 return mpfr_overflow (a, rnd_mode, sign);
1092 if (MPFR_UNLIKELY (ax2 < __gmpfr_emin))
1093 {
1094 /* In the rounding to the nearest mode, if the exponent of the exact
1095 result (i.e. before rounding, i.e. without taking cc into account)
1096 is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
1097 both arguments are powers of 2), then round to zero. */
1098 if (rnd_mode == MPFR_RNDN
1099 && (ax + (mpfr_exp_t) b1 < __gmpfr_emin
1100 || (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
1101 rnd_mode = MPFR_RNDZ;
1102 return mpfr_underflow (a, rnd_mode, sign);
1103 }
1104 MPFR_RET (inexact);
1105 }
1106