xref: /netbsd-src/external/lgpl3/mpfr/dist/src/mul.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd)
1 /* mpfr_mul -- multiply two floating-point numbers
2 
3 Copyright 1999-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 
27 /********* BEGINNING CHECK *************/
28 
29 /* Check if we have to check the result of mpfr_mul.
30    TODO: Find a better (and faster?) check than using old implementation */
31 #if MPFR_WANT_ASSERT >= 2
32 
33 int mpfr_mul2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode);
34 static int
mpfr_mul3(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)35 mpfr_mul3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
36 {
37   /* Old implementation */
38   int sign_product, cc, inexact;
39   mpfr_exp_t ax;
40   mp_limb_t *tmp;
41   mp_limb_t b1;
42   mpfr_prec_t bq, cq;
43   mp_size_t bn, cn, tn, k;
44   MPFR_TMP_DECL(marker);
45 
46   /* deal with special cases */
47   if (MPFR_ARE_SINGULAR(b,c))
48     {
49       if (MPFR_IS_NAN(b) || MPFR_IS_NAN(c))
50         {
51           MPFR_SET_NAN(a);
52           MPFR_RET_NAN;
53         }
54       sign_product = MPFR_MULT_SIGN(MPFR_SIGN(b), MPFR_SIGN(c));
55       if (MPFR_IS_INF(b))
56         {
57           if (MPFR_IS_INF(c) || MPFR_NOTZERO(c))
58             {
59               MPFR_SET_SIGN(a, sign_product);
60               MPFR_SET_INF(a);
61               MPFR_RET(0); /* exact */
62             }
63           else
64             {
65               MPFR_SET_NAN(a);
66               MPFR_RET_NAN;
67             }
68         }
69       else if (MPFR_IS_INF(c))
70         {
71           if (MPFR_NOTZERO(b))
72             {
73               MPFR_SET_SIGN(a, sign_product);
74               MPFR_SET_INF(a);
75               MPFR_RET(0); /* exact */
76             }
77           else
78             {
79               MPFR_SET_NAN(a);
80               MPFR_RET_NAN;
81             }
82         }
83       else
84         {
85           MPFR_ASSERTD(MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
86           MPFR_SET_SIGN(a, sign_product);
87           MPFR_SET_ZERO(a);
88           MPFR_RET(0); /* 0 * 0 is exact */
89         }
90     }
91   sign_product = MPFR_MULT_SIGN(MPFR_SIGN(b), MPFR_SIGN(c));
92 
93   ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
94 
95   bq = MPFR_PREC (b);
96   cq = MPFR_PREC (c);
97 
98   MPFR_ASSERTN ((mpfr_uprec_t) bq + cq <= MPFR_PREC_MAX);
99 
100   bn = MPFR_PREC2LIMBS (bq); /* number of limbs of b */
101   cn = MPFR_PREC2LIMBS (cq); /* number of limbs of c */
102   k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
103   tn = MPFR_PREC2LIMBS (bq + cq);
104   /* <= k, thus no int overflow */
105   MPFR_ASSERTD(tn <= k);
106 
107   /* Check for no size_t overflow*/
108   MPFR_ASSERTD((size_t) k <= ((size_t) -1) / MPFR_BYTES_PER_MP_LIMB);
109   MPFR_TMP_MARK(marker);
110   tmp = MPFR_TMP_LIMBS_ALLOC (k);
111 
112   /* multiplies two mantissa in temporary allocated space */
113   b1 = (MPFR_LIKELY(bn >= cn)) ?
114     mpn_mul (tmp, MPFR_MANT(b), bn, MPFR_MANT(c), cn)
115     : mpn_mul (tmp, MPFR_MANT(c), cn, MPFR_MANT(b), bn);
116 
117   /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
118      with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
119   b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */
120   MPFR_ASSERTD (b1 == 0 || b1 == 1);
121 
122   /* if the mantissas of b and c are uniformly distributed in ]1/2, 1],
123      then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386
124      and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
125   tmp += k - tn;
126   if (MPFR_UNLIKELY(b1 == 0))
127     mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
128   cc = mpfr_round_raw (MPFR_MANT (a), tmp, bq + cq,
129                        MPFR_IS_NEG_SIGN(sign_product),
130                        MPFR_PREC (a), rnd_mode, &inexact);
131   MPFR_ASSERTD (cc == 0 || cc == 1);
132 
133   /* cc = 1 ==> result is a power of two */
134   if (MPFR_UNLIKELY(cc))
135     MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT;
136 
137   MPFR_TMP_FREE(marker);
138 
139   {
140     /* We need to cast b1 to a signed integer type in order to use
141        signed integer arithmetic only, as the expression can involve
142        negative integers. Let's recall that both b1 and cc are 0 or 1,
143        and since cc is an int, let's choose int for this part. */
144     mpfr_exp_t ax2 = ax + ((int) b1 - 1 + cc);
145     if (MPFR_UNLIKELY( ax2 > __gmpfr_emax))
146       return mpfr_overflow (a, rnd_mode, sign_product);
147     if (MPFR_UNLIKELY( ax2 < __gmpfr_emin))
148       {
149         /* In the rounding to the nearest mode, if the exponent of the exact
150            result (i.e. before rounding, i.e. without taking cc into account)
151            is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
152            both arguments are powers of 2) in absolute value, then round to
153            zero. */
154         if (rnd_mode == MPFR_RNDN &&
155             (ax + (mpfr_exp_t) b1 < __gmpfr_emin ||
156              (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
157           rnd_mode = MPFR_RNDZ;
158         return mpfr_underflow (a, rnd_mode, sign_product);
159       }
160     MPFR_SET_EXP (a, ax2);
161     MPFR_SET_SIGN(a, sign_product);
162   }
163   MPFR_RET (inexact);
164 }
165 
166 int
mpfr_mul(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)167 mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
168 {
169   mpfr_t ta, tb, tc;
170   mpfr_flags_t old_flags, flags1, flags2;
171   int inexact1, inexact2;
172 
173   if (rnd_mode == MPFR_RNDF)
174     return mpfr_mul2 (a, b, c, rnd_mode);
175 
176   old_flags = __gmpfr_flags;
177 
178   mpfr_init2 (ta, MPFR_PREC (a));
179   mpfr_init2 (tb, MPFR_PREC (b));
180   mpfr_init2 (tc, MPFR_PREC (c));
181   MPFR_ASSERTN (mpfr_set (tb, b, MPFR_RNDN) == 0);
182   MPFR_ASSERTN (mpfr_set (tc, c, MPFR_RNDN) == 0);
183 
184   /* Note: If b or c is NaN, then the NaN flag has been set by mpfr_set above.
185      Thus restore the old flags just below to make sure that mpfr_mul3 is
186      tested under the real conditions. */
187 
188   __gmpfr_flags = old_flags;
189   inexact2 = mpfr_mul3 (ta, tb, tc, rnd_mode);
190   flags2 = __gmpfr_flags;
191 
192   __gmpfr_flags = old_flags;
193   inexact1 = mpfr_mul2 (a, b, c, rnd_mode);
194   flags1 = __gmpfr_flags;
195 
196   /* Convert the ternary values to (-1,0,1). */
197   inexact2 = VSIGN (inexact2);
198   inexact1 = VSIGN (inexact1);
199 
200   if (! ((MPFR_IS_NAN (ta) && MPFR_IS_NAN (a)) || mpfr_equal_p (ta, a)) ||
201       inexact1 != inexact2 || flags1 != flags2)
202     {
203       /* We do not have MPFR_PREC_FSPEC, so let's use mpfr_eexp_t and
204          MPFR_EXP_FSPEC since mpfr_prec_t values are guaranteed to be
205          representable in mpfr_exp_t, thus in mpfr_eexp_t. */
206       fprintf (stderr, "mpfr_mul return different values for %s\n"
207                "Prec_a = %" MPFR_EXP_FSPEC "d, "
208                "Prec_b = %" MPFR_EXP_FSPEC "d, "
209                "Prec_c = %" MPFR_EXP_FSPEC "d\n",
210                mpfr_print_rnd_mode (rnd_mode),
211                (mpfr_eexp_t) MPFR_PREC (a),
212                (mpfr_eexp_t) MPFR_PREC (b),
213                (mpfr_eexp_t) MPFR_PREC (c));
214       /* Note: We output tb and tc instead of b and c, in case a = b or c
215          (this is why tb and tc have been created in the first place). */
216       fprintf (stderr, "b = ");
217       mpfr_fdump (stderr, tb);
218       fprintf (stderr, "c = ");
219       mpfr_fdump (stderr, tc);
220       fprintf (stderr, "OldMul: ");
221       mpfr_fdump (stderr, ta);
222       fprintf (stderr, "NewMul: ");
223       mpfr_fdump (stderr, a);
224       fprintf (stderr, "OldMul: ternary = %2d, flags =", inexact2);
225       flags_fout (stderr, flags2);
226       fprintf (stderr, "NewMul: ternary = %2d, flags =", inexact1);
227       flags_fout (stderr, flags1);
228       MPFR_ASSERTN(0);
229     }
230 
231   mpfr_clears (ta, tb, tc, (mpfr_ptr) 0);
232   return inexact1;
233 }
234 
235 # define mpfr_mul mpfr_mul2
236 
237 #endif  /* MPFR_WANT_ASSERT >= 2 */
238 
239 /****** END OF CHECK *******/
240 
241 /* Multiply 2 mpfr_t */
242 
243 #if !defined(MPFR_GENERIC_ABI)
244 
245 /* Disabled for now since the mul_1_extracted.c is not formally proven yet.
246    Once it is proven, replace MPFR_WANT_PROVEN_CODExxx by MPFR_WANT_PROVEN_CODE. */
247 #if defined(MPFR_WANT_PROVEN_CODExxx) && GMP_NUMB_BITS == 64 && \
248   UINT_MAX == 0xffffffff && MPFR_PREC_BITS == 64 && \
249   _MPFR_PREC_FORMAT == 3 && _MPFR_EXP_FORMAT == _MPFR_PREC_FORMAT
250 
251 /* The code assumes that mp_limb_t has 64 bits exactly, unsigned int
252    has 32 bits exactly, mpfr_prec_t and mpfr_exp_t are of type long,
253    which has 64 bits exactly. */
254 
255 #include "mul_1_extracted.c"
256 
257 #else
258 
259 /* Special code for prec(a) < GMP_NUMB_BITS and
260    prec(b), prec(c) <= GMP_NUMB_BITS.
261    Note: this code was copied in sqr.c, function mpfr_sqr_1 (this saves a few cycles
262    with respect to have this function exported). As a consequence, any change here
263    should be reported in mpfr_sqr_1. */
264 static int
mpfr_mul_1(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)265 mpfr_mul_1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
266             mpfr_prec_t p)
267 {
268   mp_limb_t a0;
269   mpfr_limb_ptr ap = MPFR_MANT(a);
270   mp_limb_t b0 = MPFR_MANT(b)[0];
271   mp_limb_t c0 = MPFR_MANT(c)[0];
272   mpfr_exp_t ax;
273   mpfr_prec_t sh = GMP_NUMB_BITS - p;
274   mp_limb_t rb, sb, mask = MPFR_LIMB_MASK(sh);
275 
276   /* When prec(b), prec(c) <= GMP_NUMB_BITS / 2, we could replace umul_ppmm
277      by a limb multiplication as follows, but we assume umul_ppmm is as fast
278      as a limb multiplication on modern processors:
279       a0 = (b0 >> (GMP_NUMB_BITS / 2)) * (c0 >> (GMP_NUMB_BITS / 2));
280       sb = 0;
281   */
282   ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
283   umul_ppmm (a0, sb, b0, c0);
284   if (a0 < MPFR_LIMB_HIGHBIT)
285     {
286       ax --;
287       /* TODO: This is actually an addition with carry (no shifts and no OR
288          needed in asm). Make sure that GCC generates optimized code once
289          it supports carry-in. */
290       a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
291       sb <<= 1;
292     }
293   rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
294   sb |= (a0 & mask) ^ rb;
295   ap[0] = a0 & ~mask;
296 
297   MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
298 
299   /* rounding */
300   if (MPFR_UNLIKELY(ax > __gmpfr_emax))
301     return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
302 
303   /* Warning: underflow should be checked *after* rounding, thus when rounding
304      away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
305      a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
306   if (MPFR_UNLIKELY(ax < __gmpfr_emin))
307     {
308       if (ax == __gmpfr_emin - 1 && ap[0] == MPFR_LIMB(~mask) &&
309           ((rnd_mode == MPFR_RNDN && rb) ||
310            (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
311         goto rounding; /* no underflow */
312       /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
313          we have to change to RNDZ. This corresponds to:
314          (a) either ax < emin - 1
315          (b) or ax = emin - 1 and ap[0] = 1000....000 and rb = sb = 0 */
316       if (rnd_mode == MPFR_RNDN &&
317           (ax < __gmpfr_emin - 1 ||
318            (ap[0] == MPFR_LIMB_HIGHBIT && (rb | sb) == 0)))
319         rnd_mode = MPFR_RNDZ;
320       return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
321     }
322 
323  rounding:
324   MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
325                         in the cases "goto rounding" above. */
326   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
327     {
328       MPFR_ASSERTD(ax >= __gmpfr_emin);
329       MPFR_RET (0);
330     }
331   else if (rnd_mode == MPFR_RNDN)
332     {
333       if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
334         goto truncate;
335       else
336         goto add_one_ulp;
337     }
338   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
339     {
340     truncate:
341       MPFR_ASSERTD(ax >= __gmpfr_emin);
342       MPFR_RET(-MPFR_SIGN(a));
343     }
344   else /* round away from zero */
345     {
346     add_one_ulp:
347       ap[0] += MPFR_LIMB_ONE << sh;
348       if (ap[0] == 0)
349         {
350           ap[0] = MPFR_LIMB_HIGHBIT;
351           if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
352             return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
353           MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
354           MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
355           MPFR_SET_EXP (a, ax + 1);
356         }
357       MPFR_RET(MPFR_SIGN(a));
358     }
359 }
360 
361 #endif /* MPFR_WANT_PROVEN_CODE */
362 
363 /* Special code for prec(a) = GMP_NUMB_BITS and
364    prec(b), prec(c) <= GMP_NUMB_BITS. */
365 static int
mpfr_mul_1n(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)366 mpfr_mul_1n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
367 {
368   mp_limb_t a0;
369   mpfr_limb_ptr ap = MPFR_MANT(a);
370   mp_limb_t b0 = MPFR_MANT(b)[0];
371   mp_limb_t c0 = MPFR_MANT(c)[0];
372   mpfr_exp_t ax;
373   mp_limb_t rb, sb;
374 
375   ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
376   umul_ppmm (a0, sb, b0, c0);
377   if (a0 < MPFR_LIMB_HIGHBIT)
378     {
379       ax --;
380       /* TODO: This is actually an addition with carry (no shifts and no OR
381          needed in asm). Make sure that GCC generates optimized code once
382          it supports carry-in. */
383       a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
384       sb <<= 1;
385     }
386   rb = sb & MPFR_LIMB_HIGHBIT;
387   sb = sb & ~MPFR_LIMB_HIGHBIT;
388   ap[0] = a0;
389 
390   MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
391 
392   /* rounding */
393   if (MPFR_UNLIKELY(ax > __gmpfr_emax))
394     return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
395 
396   /* Warning: underflow should be checked *after* rounding, thus when rounding
397      away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
398      a >= 0.111...111[1]*2^(emin-1), there is no underflow.
399      Note: this case can only occur when the initial a0 (after the umul_ppmm
400      call above) had its most significant bit 0, since the largest a0 is
401      obtained for b0 = c0 = B-1 where B=2^GMP_NUMB_BITS, thus b0*c0 <= (B-1)^2
402      thus a0 <= B-2. */
403   if (MPFR_UNLIKELY(ax < __gmpfr_emin))
404     {
405       if (ax == __gmpfr_emin - 1 && ap[0] == ~MPFR_LIMB_ZERO &&
406           ((rnd_mode == MPFR_RNDN && rb) ||
407            (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
408         goto rounding; /* no underflow */
409       /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
410          we have to change to RNDZ. This corresponds to:
411          (a) either ax < emin - 1
412          (b) or ax = emin - 1 and ap[0] = 1000....000 and rb = sb = 0 */
413       if (rnd_mode == MPFR_RNDN &&
414           (ax < __gmpfr_emin - 1 ||
415            (ap[0] == MPFR_LIMB_HIGHBIT && (rb | sb) == 0)))
416         rnd_mode = MPFR_RNDZ;
417       return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
418     }
419 
420  rounding:
421   MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
422                         in the cases "goto rounding" above. */
423   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
424     {
425       MPFR_ASSERTD(ax >= __gmpfr_emin);
426       MPFR_RET (0);
427     }
428   else if (rnd_mode == MPFR_RNDN)
429     {
430       if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0))
431         goto truncate;
432       else
433         goto add_one_ulp;
434     }
435   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
436     {
437     truncate:
438       MPFR_ASSERTD(ax >= __gmpfr_emin);
439       MPFR_RET(-MPFR_SIGN(a));
440     }
441   else /* round away from zero */
442     {
443     add_one_ulp:
444       ap[0] += MPFR_LIMB_ONE;
445       if (ap[0] == 0)
446         {
447           ap[0] = MPFR_LIMB_HIGHBIT;
448           if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
449             return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
450           MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
451           MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
452           MPFR_SET_EXP (a, ax + 1);
453         }
454       MPFR_RET(MPFR_SIGN(a));
455     }
456 }
457 
458 /* Special code for GMP_NUMB_BITS < prec(a) < 2*GMP_NUMB_BITS and
459    GMP_NUMB_BITS < prec(b), prec(c) <= 2*GMP_NUMB_BITS.
460    Note: this code was copied in sqr.c, function mpfr_sqr_2 (this saves a few cycles
461    with respect to have this function exported). As a consequence, any change here
462    should be reported in mpfr_sqr_2. */
463 static int
mpfr_mul_2(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)464 mpfr_mul_2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
465             mpfr_prec_t p)
466 {
467   mp_limb_t h, l, u, v, w;
468   mpfr_limb_ptr ap = MPFR_MANT(a);
469   mpfr_exp_t ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
470   mpfr_prec_t sh = 2 * GMP_NUMB_BITS - p;
471   mp_limb_t rb, sb, sb2, mask = MPFR_LIMB_MASK(sh);
472   mp_limb_t *bp = MPFR_MANT(b), *cp = MPFR_MANT(c);
473 
474   /* we store the 4-limb product in h=ap[1], l=ap[0], sb=ap[-1], sb2=ap[-2] */
475   umul_ppmm (h, l, bp[1], cp[1]);
476   umul_ppmm (u, v, bp[1], cp[0]);
477   l += u;
478   h += (l < u);
479   umul_ppmm (u, w, bp[0], cp[1]);
480   l += u;
481   h += (l < u);
482 
483   /* now the full product is {h, l, v + w + high(b0*c0), low(b0*c0)},
484      where the lower part contributes to less than 3 ulps to {h, l} */
485 
486   /* If h has its most significant bit set and the low sh-1 bits of l are not
487      000...000 nor 111...111 nor 111...110, then we can round correctly;
488      if h has zero as most significant bit, we have to shift left h and l,
489      thus if the low sh-2 bits are not 000...000 nor 111...111 nor 111...110,
490      then we can round correctly. To avoid an extra test we consider the latter
491      case (if we can round, we can also round in the former case).
492      For sh <= 3, we have mask <= 7, thus (mask>>2) <= 1, and the approximation
493      cannot be enough. */
494   if (MPFR_LIKELY(((l + 2) & (mask >> 2)) > 2))
495     sb = sb2 = 1; /* result cannot be exact in that case */
496   else
497     {
498       umul_ppmm (sb, sb2, bp[0], cp[0]);
499       /* the full product is {h, l, sb + v + w, sb2} */
500       sb += v;
501       l += (sb < v);
502       h += (l == 0) && (sb < v);
503       sb += w;
504       l += (sb < w);
505       h += (l == 0) && (sb < w);
506     }
507   if (h < MPFR_LIMB_HIGHBIT)
508     {
509       ax --;
510       h = (h << 1) | (l >> (GMP_NUMB_BITS - 1));
511       l = (l << 1) | (sb >> (GMP_NUMB_BITS - 1));
512       sb <<= 1;
513       /* no need to shift sb2 since we only want to know if it is zero or not */
514     }
515   ap[1] = h;
516   rb = l & (MPFR_LIMB_ONE << (sh - 1));
517   sb |= ((l & mask) ^ rb) | sb2;
518   ap[0] = l & ~mask;
519 
520   MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
521 
522   /* rounding */
523   if (MPFR_UNLIKELY(ax > __gmpfr_emax))
524     return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
525 
526   /* Warning: underflow should be checked *after* rounding, thus when rounding
527      away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
528      a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
529   if (MPFR_UNLIKELY(ax < __gmpfr_emin))
530     {
531       if (ax == __gmpfr_emin - 1 &&
532           ap[1] == MPFR_LIMB_MAX &&
533           ap[0] == MPFR_LIMB(~mask) &&
534           ((rnd_mode == MPFR_RNDN && rb) ||
535            (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
536         goto rounding; /* no underflow */
537       /* for RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
538          we have to change to RNDZ */
539       if (rnd_mode == MPFR_RNDN &&
540           (ax < __gmpfr_emin - 1 ||
541            (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0 && (rb | sb) == 0)))
542             rnd_mode = MPFR_RNDZ;
543       return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
544     }
545 
546  rounding:
547   MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
548                         in the cases "goto rounding" above. */
549   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
550     {
551       MPFR_ASSERTD(ax >= __gmpfr_emin);
552       MPFR_RET (0);
553     }
554   else if (rnd_mode == MPFR_RNDN)
555     {
556       if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
557         goto truncate;
558       else
559         goto add_one_ulp;
560     }
561   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
562     {
563     truncate:
564       MPFR_ASSERTD(ax >= __gmpfr_emin);
565       MPFR_RET(-MPFR_SIGN(a));
566     }
567   else /* round away from zero */
568     {
569     add_one_ulp:
570       ap[0] += MPFR_LIMB_ONE << sh;
571       ap[1] += (ap[0] == 0);
572       if (ap[1] == 0)
573         {
574           ap[1] = MPFR_LIMB_HIGHBIT;
575           if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
576             return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
577           MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
578           MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
579           MPFR_SET_EXP (a, ax + 1);
580         }
581       MPFR_RET(MPFR_SIGN(a));
582     }
583 }
584 
585 /* Special code for 2*GMP_NUMB_BITS < prec(a) < 3*GMP_NUMB_BITS and
586    2*GMP_NUMB_BITS < prec(b), prec(c) <= 3*GMP_NUMB_BITS. */
587 static int
mpfr_mul_3(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode,mpfr_prec_t p)588 mpfr_mul_3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
589             mpfr_prec_t p)
590 {
591   mp_limb_t a0, a1, a2, h, l, cy;
592   mpfr_limb_ptr ap = MPFR_MANT(a);
593   mpfr_exp_t ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
594   mpfr_prec_t sh = 3 * GMP_NUMB_BITS - p;
595   mp_limb_t rb, sb, sb2, mask = MPFR_LIMB_MASK(sh);
596   mp_limb_t *bp = MPFR_MANT(b), *cp = MPFR_MANT(c);
597 
598   /* we store the upper 3-limb product in a2, a1, a0:
599      b2*c2, b2*c1+b1*c2, b2*c0+b1*c1+b0*c2 */
600   umul_ppmm (a2, a1, bp[2], cp[2]);
601   umul_ppmm (h, a0, bp[2], cp[1]);
602   a1 += h;
603   a2 += (a1 < h);
604   umul_ppmm (h, l, bp[1], cp[2]);
605   a1 += h;
606   a2 += (a1 < h);
607   a0 += l;
608   cy = a0 < l; /* carry in a1 */
609   umul_ppmm (h, l, bp[2], cp[0]);
610   a0 += h;
611   cy += (a0 < h);
612   umul_ppmm (h, l, bp[1], cp[1]);
613   a0 += h;
614   cy += (a0 < h);
615   umul_ppmm (h, l, bp[0], cp[2]);
616   a0 += h;
617   cy += (a0 < h);
618   /* now propagate cy */
619   a1 += cy;
620   a2 += (a1 < cy);
621 
622   /* Now the approximate product {a2, a1, a0} has an error of less than
623      5 ulps (3 ulps for the ignored low limbs of b2*c0+b1*c1+b0*c2,
624      plus 2 ulps for the ignored b1*c0+b0*c1 (plus b0*c0)).
625      Since we might shift by 1 bit, we make sure the low sh-2 bits of a0
626      are not 0, -1, -2, -3 or -4. */
627 
628   if (MPFR_LIKELY(((a0 + 4) & (mask >> 2)) > 4))
629     sb = sb2 = 1; /* result cannot be exact in that case */
630   else
631     {
632       mp_limb_t p[6];
633       mpn_mul_n (p, bp, cp, 3);
634       a2 = p[5];
635       a1 = p[4];
636       a0 = p[3];
637       sb = p[2];
638       sb2 = p[1] | p[0];
639     }
640   if (a2 < MPFR_LIMB_HIGHBIT)
641     {
642       ax --;
643       a2 = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1));
644       a1 = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
645       a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
646       sb <<= 1;
647       /* no need to shift sb2: we only need to know if it is zero or not */
648     }
649   ap[2] = a2;
650   ap[1] = a1;
651   rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
652   sb |= ((a0 & mask) ^ rb) | sb2;
653   ap[0] = a0 & ~mask;
654 
655   MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
656 
657   /* rounding */
658   if (MPFR_UNLIKELY(ax > __gmpfr_emax))
659     return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
660 
661   /* Warning: underflow should be checked *after* rounding, thus when rounding
662      away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
663      a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
664   if (MPFR_UNLIKELY(ax < __gmpfr_emin))
665     {
666       if (ax == __gmpfr_emin - 1 &&
667           ap[2] == MPFR_LIMB_MAX &&
668           ap[1] == MPFR_LIMB_MAX &&
669           ap[0] == MPFR_LIMB(~mask) &&
670           ((rnd_mode == MPFR_RNDN && rb) ||
671            (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
672         goto rounding; /* no underflow */
673       /* for RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
674          we have to change to RNDZ */
675       if (rnd_mode == MPFR_RNDN &&
676           (ax < __gmpfr_emin - 1 ||
677            (ap[2] == MPFR_LIMB_HIGHBIT && ap[1] == 0 && ap[0] == 0
678             && (rb | sb) == 0)))
679         rnd_mode = MPFR_RNDZ;
680       return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
681     }
682 
683  rounding:
684   MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
685                         in the cases "goto rounding" above. */
686   if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
687     {
688       MPFR_ASSERTD(ax >= __gmpfr_emin);
689       MPFR_RET (0);
690     }
691   else if (rnd_mode == MPFR_RNDN)
692     {
693       if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
694         goto truncate;
695       else
696         goto add_one_ulp;
697     }
698   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
699     {
700     truncate:
701       MPFR_ASSERTD(ax >= __gmpfr_emin);
702       MPFR_RET(-MPFR_SIGN(a));
703     }
704   else /* round away from zero */
705     {
706     add_one_ulp:
707       ap[0] += MPFR_LIMB_ONE << sh;
708       ap[1] += (ap[0] == 0);
709       ap[2] += (ap[1] == 0) && (ap[0] == 0);
710       if (ap[2] == 0)
711         {
712           ap[2] = MPFR_LIMB_HIGHBIT;
713           if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
714             return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
715           MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
716           MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
717           MPFR_SET_EXP (a, ax + 1);
718         }
719       MPFR_RET(MPFR_SIGN(a));
720     }
721 }
722 
723 #endif /* !defined(MPFR_GENERIC_ABI) */
724 
725 /* Note: mpfr_sqr will call mpfr_mul if bn > MPFR_SQR_THRESHOLD,
726    in order to use Mulders' mulhigh, which is handled only here
727    to avoid partial code duplication. There is some overhead due
728    to the additional tests, but slowdown should not be noticeable
729    as this code is not executed in very small precisions. */
730 
731 MPFR_HOT_FUNCTION_ATTR int
mpfr_mul(mpfr_ptr a,mpfr_srcptr b,mpfr_srcptr c,mpfr_rnd_t rnd_mode)732 mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
733 {
734   int sign, inexact;
735   mpfr_exp_t ax, ax2;
736   mp_limb_t *tmp;
737   mp_limb_t b1;
738   mpfr_prec_t aq, bq, cq;
739   mp_size_t bn, cn, tn, k, threshold;
740   MPFR_TMP_DECL (marker);
741 
742   MPFR_LOG_FUNC
743     (("b[%Pd]=%.*Rg c[%Pd]=%.*Rg rnd=%d",
744       mpfr_get_prec (b), mpfr_log_prec, b,
745       mpfr_get_prec (c), mpfr_log_prec, c, rnd_mode),
746      ("a[%Pd]=%.*Rg inexact=%d",
747       mpfr_get_prec (a), mpfr_log_prec, a, inexact));
748 
749   /* deal with special cases */
750   if (MPFR_ARE_SINGULAR (b, c))
751     {
752       if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c))
753         {
754           MPFR_SET_NAN (a);
755           MPFR_RET_NAN;
756         }
757       sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
758       if (MPFR_IS_INF (b))
759         {
760           if (!MPFR_IS_ZERO (c))
761             {
762               MPFR_SET_SIGN (a, sign);
763               MPFR_SET_INF (a);
764               MPFR_RET (0);
765             }
766           else
767             {
768               MPFR_SET_NAN (a);
769               MPFR_RET_NAN;
770             }
771         }
772       else if (MPFR_IS_INF (c))
773         {
774           if (!MPFR_IS_ZERO (b))
775             {
776               MPFR_SET_SIGN (a, sign);
777               MPFR_SET_INF (a);
778               MPFR_RET(0);
779             }
780           else
781             {
782               MPFR_SET_NAN (a);
783               MPFR_RET_NAN;
784             }
785         }
786       else
787         {
788           MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
789           MPFR_SET_SIGN (a, sign);
790           MPFR_SET_ZERO (a);
791           MPFR_RET (0);
792         }
793     }
794 
795   aq = MPFR_GET_PREC (a);
796   bq = MPFR_GET_PREC (b);
797   cq = MPFR_GET_PREC (c);
798 
799 #if !defined(MPFR_GENERIC_ABI)
800   if (aq == bq && aq == cq)
801     {
802       if (aq < GMP_NUMB_BITS)
803         return mpfr_mul_1 (a, b, c, rnd_mode, aq);
804 
805       if (GMP_NUMB_BITS < aq && aq < 2 * GMP_NUMB_BITS)
806         return mpfr_mul_2 (a, b, c, rnd_mode, aq);
807 
808       if (aq == GMP_NUMB_BITS)
809         return mpfr_mul_1n (a, b, c, rnd_mode);
810 
811       if (2 * GMP_NUMB_BITS < aq && aq < 3 * GMP_NUMB_BITS)
812         return mpfr_mul_3 (a, b, c, rnd_mode, aq);
813     }
814 #endif
815 
816   sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
817 
818   ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
819   /* Note: the exponent of the exact result will be e = bx + cx + ec with
820      ec in {-1,0,1} and the following assumes that e is representable. */
821 
822   /* FIXME: Useful since we do an exponent check after?
823    * It is useful iff the precision is big, there is an overflow
824    * and we are doing further mults...*/
825 #ifdef HUGE
826   if (MPFR_UNLIKELY (ax > __gmpfr_emax + 1))
827     return mpfr_overflow (a, rnd_mode, sign);
828   if (MPFR_UNLIKELY (ax < __gmpfr_emin - 2))
829     return mpfr_underflow (a, rnd_mode == MPFR_RNDN ? MPFR_RNDZ : rnd_mode,
830                            sign);
831 #endif
832 
833   MPFR_ASSERTN ((mpfr_uprec_t) bq + cq <= MPFR_PREC_MAX);
834 
835   bn = MPFR_PREC2LIMBS (bq); /* number of limbs of b */
836   cn = MPFR_PREC2LIMBS (cq); /* number of limbs of c */
837   k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
838   tn = MPFR_PREC2LIMBS (bq + cq);
839   MPFR_ASSERTD (tn <= k); /* tn <= k, thus no int overflow */
840 
841   /* Check for no size_t overflow. */
842   MPFR_ASSERTD ((size_t) k <= ((size_t) -1) / MPFR_BYTES_PER_MP_LIMB);
843   MPFR_TMP_MARK (marker);
844   tmp = MPFR_TMP_LIMBS_ALLOC (k);
845 
846   /* multiplies two mantissa in temporary allocated space */
847   if (MPFR_UNLIKELY (bn < cn))
848     {
849       mpfr_srcptr z = b;
850       mp_size_t zn  = bn;
851       b = c;
852       bn = cn;
853       c = z;
854       cn = zn;
855     }
856   MPFR_ASSERTD (bn >= cn);
857   if (bn <= 2)
858     {
859       /* The 3 cases perform the same first operation. */
860       umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
861       if (bn == 1)
862         {
863           /* 1 limb * 1 limb */
864           b1 = tmp[1];
865         }
866       else if (MPFR_UNLIKELY (cn == 1))
867         {
868           /* 2 limbs * 1 limb */
869           mp_limb_t t;
870           umul_ppmm (tmp[2], t, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
871           add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t);
872           b1 = tmp[2];
873         }
874       else
875         {
876           /* 2 limbs * 2 limbs */
877           mp_limb_t t1, t2, t3;
878           /* First 2 limbs * 1 limb */
879           umul_ppmm (tmp[2], t1, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
880           add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t1);
881           /* Second, the other 2 limbs * 1 limb product */
882           umul_ppmm (t1, t2, MPFR_MANT (b)[0], MPFR_MANT (c)[1]);
883           umul_ppmm (tmp[3], t3, MPFR_MANT (b)[1], MPFR_MANT (c)[1]);
884           add_ssaaaa (tmp[3], t1, tmp[3], t1, 0, t3);
885           /* Sum those two partial products */
886           add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], t1, t2);
887           tmp[3] += (tmp[2] < t1);
888           b1 = tmp[3];
889         }
890       b1 >>= (GMP_NUMB_BITS - 1);
891       tmp += k - tn;
892       if (MPFR_UNLIKELY (b1 == 0))
893         mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
894     }
895   else /* bn >= cn and bn >= 3 */
896     /* Mulders' mulhigh. This code can also be used via mpfr_sqr,
897        hence the tests b != c. */
898     if (MPFR_UNLIKELY (cn > (threshold = b != c ?
899                              MPFR_MUL_THRESHOLD : MPFR_SQR_THRESHOLD)))
900       {
901         mp_limb_t *bp, *cp;
902         mp_size_t n;
903         mpfr_prec_t p;
904 
905         /* First check if we can reduce the precision of b or c:
906            exact values are a nightmare for the short product trick */
907         bp = MPFR_MANT (b);
908         cp = MPFR_MANT (c);
909         MPFR_STAT_STATIC_ASSERT (MPFR_MUL_THRESHOLD >= 1 &&
910                                  MPFR_SQR_THRESHOLD >= 1);
911         if (MPFR_UNLIKELY ((bp[0] == 0 && bp[1] == 0) ||
912                            (cp[0] == 0 && cp[1] == 0)))
913           {
914             mpfr_t b_tmp, c_tmp;
915 
916             MPFR_TMP_FREE (marker);
917             /* Check for b */
918             while (*bp == 0)
919               {
920                 bp++;
921                 bn--;
922                 MPFR_ASSERTD (bn > 0);
923               } /* This must end since the most significant limb is != 0 */
924 
925             /* Check for c too: if b == c, this will do nothing */
926             while (*cp == 0)
927               {
928                 cp++;
929                 cn--;
930                 MPFR_ASSERTD (cn > 0);
931               } /* This must end since the most significant limb is != 0 */
932 
933             /* It is not the fastest way, but it is safer. */
934             MPFR_SET_SAME_SIGN (b_tmp, b);
935             MPFR_SET_EXP (b_tmp, MPFR_GET_EXP (b));
936             MPFR_PREC (b_tmp) = bn * GMP_NUMB_BITS;
937             MPFR_MANT (b_tmp) = bp;
938 
939             if (b != c)
940               {
941                 MPFR_SET_SAME_SIGN (c_tmp, c);
942                 MPFR_SET_EXP (c_tmp, MPFR_GET_EXP (c));
943                 MPFR_PREC (c_tmp) = cn * GMP_NUMB_BITS;
944                 MPFR_MANT (c_tmp) = cp;
945 
946                 /* Call again mpfr_mul with the fixed arguments */
947                 return mpfr_mul (a, b_tmp, c_tmp, rnd_mode);
948               }
949             else
950               /* Call mpfr_mul instead of mpfr_sqr as the precision
951                  is probably still high enough. It is thus better to call
952                  mpfr_mul again, but it should not give an infinite loop
953                  if we call mpfr_sqr. */
954               return mpfr_mul (a, b_tmp, b_tmp, rnd_mode);
955           }
956 
957         /* Compute estimated precision of mulhigh.
958            We could use `+ (n < cn) + (n < bn)' instead of `+ 2',
959            but does it worth it? */
960         n = MPFR_LIMB_SIZE (a) + 1;
961         n = MIN (n, cn);
962         MPFR_ASSERTD (n >= 1 && 2*n <= k && n <= cn && n <= bn);
963         p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (n + 2);
964         bp += bn - n;
965         cp += cn - n;
966 
967         /* Check if MulHigh can produce a roundable result.
968            We may lose 1 bit due to RNDN, 1 due to final shift. */
969         if (MPFR_UNLIKELY (aq > p - 5))
970           {
971             if (MPFR_UNLIKELY (aq > p - 5 + GMP_NUMB_BITS
972                                || bn <= threshold + 1))
973               {
974                 /* MulHigh can't produce a roundable result. */
975                 MPFR_LOG_MSG (("mpfr_mulhigh can't be used (%lu VS %lu)\n",
976                                aq, p));
977                 goto full_multiply;
978               }
979             /* Add one extra limb to mantissa of b and c. */
980             if (bn > n)
981               bp --;
982             else
983               {
984                 bp = MPFR_TMP_LIMBS_ALLOC (n + 1);
985                 bp[0] = 0;
986                 MPN_COPY (bp + 1, MPFR_MANT (b) + bn - n, n);
987               }
988             if (b != c)
989               {
990 #if GMP_NUMB_BITS <= 32
991                 if (cn > n)
992                   cp --; /* This can only happen on a 32-bit computer,
993                             and is very unlikely to happen.
994                             Indeed, since n = MIN (an + 1, cn), with
995                             an = MPFR_LIMB_SIZE(a), we can have cn > n
996                             only when n = an + 1 < cn.
997                             We are in the case aq > p - 5, with
998                             aq = PREC(a) = an*W - sh, with W = GMP_NUMB_BITS
999                             and 0 <= sh < W, and p = n*W - ceil(log2(n+2)),
1000                             thus an*W - sh > n*W - ceil(log2(n+2)) - 5.
1001                             Thus n < an + (ceil(log2(n+2)) + 5 - sh)/W.
1002                             To get n = an + 1, we need
1003                             ceil(log2(n+2)) + 5 - sh > W, thus since sh>=0
1004                             we need ceil(log2(n+2)) + 5 > W.
1005                             With W=32 this can only happen for n>=2^27-1,
1006                             thus for a precision of 2^32-64 for a,
1007                             and with W=64 for n>=2^59-1, which would give
1008                             a precision >= 2^64. */
1009                 else
1010 #endif
1011                   {
1012                     cp = MPFR_TMP_LIMBS_ALLOC (n + 1);
1013                     cp[0] = 0;
1014                     MPN_COPY (cp + 1, MPFR_MANT (c) + cn - n, n);
1015                   }
1016               }
1017             /* We will compute with one extra limb */
1018             n++;
1019             /* ceil(log2(n+2)) takes into account the lost bits due to
1020                Mulders' short product */
1021             p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (n + 2);
1022             /* Due to some nasty reasons we can have only 4 bits */
1023             MPFR_ASSERTD (aq <= p - 4);
1024 
1025             if (MPFR_LIKELY (k < 2*n))
1026               {
1027                 tmp = MPFR_TMP_LIMBS_ALLOC (2 * n);
1028                 tmp += 2*n-k; /* `tmp' still points to an area of `k' limbs */
1029               }
1030           }
1031         MPFR_LOG_MSG (("Use mpfr_mulhigh (%lu VS %lu)\n", aq, p));
1032         /* Compute an approximation of the product of b and c */
1033         if (b != c)
1034           mpfr_mulhigh_n (tmp + k - 2 * n, bp, cp, n);
1035         else
1036           mpfr_sqrhigh_n (tmp + k - 2 * n, bp, n);
1037         /* now tmp[k-n]..tmp[k-1] contains an approximation of the n upper
1038            limbs of the product, with tmp[k-1] >= 2^(GMP_NUMB_BITS-2) */
1039         b1 = tmp[k-1] >> (GMP_NUMB_BITS - 1); /* msb from the product */
1040 
1041         /* If the mantissas of b and c are uniformly distributed in (1/2, 1],
1042            then their product is in (1/4, 1/2] with probability 2*ln(2)-1
1043            ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
1044         if (MPFR_UNLIKELY (b1 == 0))
1045           /* Warning: the mpfr_mulhigh_n call above only surely affects
1046              tmp[k-n-1..k-1], thus we shift only those limbs */
1047           mpn_lshift (tmp + k - n - 1, tmp + k - n - 1, n + 1, 1);
1048         tmp += k - tn;
1049         /* now the approximation is in tmp[tn-n]...tmp[tn-1] */
1050         MPFR_ASSERTD (MPFR_LIMB_MSB (tmp[tn-1]) != 0);
1051 
1052         /* for RNDF, we simply use RNDZ, since anyway here we multiply numbers
1053            with large precisions, thus the overhead of RNDZ is small */
1054         if (rnd_mode == MPFR_RNDF)
1055           rnd_mode = MPFR_RNDZ;
1056 
1057         /* if the most significant bit b1 is zero, we have only p-1 correct
1058            bits */
1059         if (MPFR_UNLIKELY (!mpfr_round_p (tmp, tn, p + b1 - 1,
1060                                           aq + (rnd_mode == MPFR_RNDN))))
1061           {
1062             tmp -= k - tn; /* tmp may have changed, FIX IT!!!!! */
1063             goto full_multiply;
1064           }
1065       }
1066     else
1067       {
1068       full_multiply:
1069         MPFR_LOG_MSG (("Use mpn_mul\n", 0));
1070         b1 = mpn_mul (tmp, MPFR_MANT (b), bn, MPFR_MANT (c), cn);
1071 
1072         /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
1073            with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
1074         b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */
1075 
1076         /* if the mantissas of b and c are uniformly distributed in (1/2, 1],
1077            then their product is in (1/4, 1/2] with probability 2*ln(2)-1
1078            ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
1079         tmp += k - tn;
1080         if (MPFR_UNLIKELY (b1 == 0))
1081           mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
1082       }
1083 
1084   /* b1 is 0 or 1 (most significant bit from the raw product) */
1085   ax2 = ax + ((int) b1 - 1);
1086   MPFR_RNDRAW (inexact, a, tmp, bq + cq, rnd_mode, sign, ax2++);
1087   MPFR_TMP_FREE (marker);
1088   MPFR_EXP (a) = ax2; /* Can't use MPFR_SET_EXP: Expo may be out of range */
1089   MPFR_SET_SIGN (a, sign);
1090   if (MPFR_UNLIKELY (ax2 > __gmpfr_emax))
1091     return mpfr_overflow (a, rnd_mode, sign);
1092   if (MPFR_UNLIKELY (ax2 < __gmpfr_emin))
1093     {
1094       /* In the rounding to the nearest mode, if the exponent of the exact
1095          result (i.e. before rounding, i.e. without taking cc into account)
1096          is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
1097          both arguments are powers of 2), then round to zero. */
1098       if (rnd_mode == MPFR_RNDN
1099           && (ax + (mpfr_exp_t) b1 < __gmpfr_emin
1100               || (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
1101         rnd_mode = MPFR_RNDZ;
1102       return mpfr_underflow (a, rnd_mode, sign);
1103     }
1104   MPFR_RET (inexact);
1105 }
1106