xref: /netbsd-src/external/lgpl3/mpfr/dist/src/fma.c (revision 16dce51364ebe8aeafbae46bc5aa167b8115bc45)
1 /* mpfr_fma -- Floating multiply-add
2 
3 Copyright 2001-2002, 2004, 2006-2016 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include "mpfr-impl.h"
24 
25 /* The fused-multiply-add (fma) of x, y and z is defined by:
26    fma(x,y,z)= x*y + z
27 */
28 
29 int
30 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
31           mpfr_rnd_t rnd_mode)
32 {
33   int inexact;
34   mpfr_t u;
35   MPFR_SAVE_EXPO_DECL (expo);
36   MPFR_GROUP_DECL(group);
37 
38   MPFR_LOG_FUNC
39     (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg  z[%Pu]=%.*Rg rnd=%d",
40       mpfr_get_prec (x), mpfr_log_prec, x,
41       mpfr_get_prec (y), mpfr_log_prec, y,
42       mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
43      ("s[%Pu]=%.*Rg inexact=%d",
44       mpfr_get_prec (s), mpfr_log_prec, s, inexact));
45 
46   /* particular cases */
47   if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
48                      MPFR_IS_SINGULAR(y) ||
49                      MPFR_IS_SINGULAR(z) ))
50     {
51       if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
52         {
53           MPFR_SET_NAN(s);
54           MPFR_RET_NAN;
55         }
56       /* now neither x, y or z is NaN */
57       else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
58         {
59           /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
60           if ((MPFR_IS_ZERO(y)) ||
61               (MPFR_IS_ZERO(x)) ||
62               (MPFR_IS_INF(z) &&
63                ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
64             {
65               MPFR_SET_NAN(s);
66               MPFR_RET_NAN;
67             }
68           else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
69             {
70               MPFR_SET_INF(s);
71               MPFR_SET_SAME_SIGN(s, z);
72               MPFR_RET(0);
73             }
74           else /* z is finite */
75             {
76               MPFR_SET_INF(s);
77               MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
78               MPFR_RET(0);
79             }
80         }
81       /* now x and y are finite */
82       else if (MPFR_IS_INF(z))
83         {
84           MPFR_SET_INF(s);
85           MPFR_SET_SAME_SIGN(s, z);
86           MPFR_RET(0);
87         }
88       else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
89         {
90           if (MPFR_IS_ZERO(z))
91             {
92               int sign_p;
93               sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
94               MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
95                                ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
96                                 ? -1 : 1) :
97                                ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
98                                 ? 1 : -1)));
99               MPFR_SET_ZERO(s);
100               MPFR_RET(0);
101             }
102           else
103             return mpfr_set (s, z, rnd_mode);
104         }
105       else /* necessarily z is zero here */
106         {
107           MPFR_ASSERTD(MPFR_IS_ZERO(z));
108           return mpfr_mul (s, x, y, rnd_mode);
109         }
110     }
111 
112   /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
113      is exact, except in case of overflow or underflow. */
114   MPFR_SAVE_EXPO_MARK (expo);
115   MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
116 
117   if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
118     {
119       /* overflow or underflow - this case is regarded as rare, thus
120          does not need to be very efficient (even if some tests below
121          could have been done earlier).
122          It is an overflow iff u is an infinity (since MPFR_RNDN was used).
123          Alternatively, we could test the overflow flag, but in this case,
124          mpfr_clear_flags would have been necessary. */
125 
126       if (MPFR_IS_INF (u))  /* overflow */
127         {
128           MPFR_LOG_MSG (("Overflow on x*y\n", 0));
129 
130           /* Let's eliminate the obvious case where x*y and z have the
131              same sign. No possible cancellation -> real overflow.
132              Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
133              then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
134              is also an overflow. */
135           if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
136               MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
137             {
138               MPFR_GROUP_CLEAR (group);
139               MPFR_SAVE_EXPO_FREE (expo);
140               return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
141             }
142 
143           /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
144              (x/4)*y does not overflow (let's recall that the result
145              is exact with an unbounded exponent range). It does not
146              underflow either, because x*y overflows and the exponent
147              range is large enough. */
148           inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
149           MPFR_ASSERTN (inexact == 0);
150           inexact = mpfr_mul (u, u, y, MPFR_RNDN);
151           MPFR_ASSERTN (inexact == 0);
152 
153           /* Now, we need to add z/4... But it may underflow! */
154           {
155             mpfr_t zo4;
156             mpfr_srcptr zz;
157             MPFR_BLOCK_DECL (flags);
158 
159             if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
160                 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
161               {
162                 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
163                 zz = z;
164               }
165             else
166               {
167                 mpfr_init2 (zo4, MPFR_PREC (z));
168                 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
169                   {
170                     /* The division by 4 underflowed! */
171                     MPFR_ASSERTN (0); /* TODO... */
172                   }
173                 zz = zo4;
174               }
175 
176             /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
177                following addition would give the same result). */
178             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
179             /* u and zz have different signs, so that an overflow
180                is not possible. But an underflow is theoretically
181                possible! */
182             if (MPFR_UNDERFLOW (flags))
183               {
184                 MPFR_ASSERTN (zz != z);
185                 MPFR_ASSERTN (0); /* TODO... */
186                 mpfr_clears (zo4, u, (mpfr_ptr) 0);
187               }
188             else
189               {
190                 int inex2;
191 
192                 if (zz != z)
193                   mpfr_clear (zo4);
194                 MPFR_GROUP_CLEAR (group);
195                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
196                 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
197                 if (inex2)  /* overflow */
198                   {
199                     inexact = inex2;
200                     MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
201                   }
202                 goto end;
203               }
204           }
205         }
206       else  /* underflow: one has |xy| < 2^(emin-1). */
207         {
208           unsigned long scale = 0;
209           mpfr_t scaled_z;
210           mpfr_srcptr new_z;
211           mpfr_exp_t diffexp;
212           mpfr_prec_t pzs;
213           int xy_underflows;
214 
215           MPFR_LOG_MSG (("Underflow on x*y\n", 0));
216 
217           /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
218              (the + 1 on MPFR_PREC (s) is necessary because the exponent
219              of the result can be EXP(z) - 1). */
220           diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
221           pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
222           MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d pzs=%Pd\n",
223                          diffexp, pzs));
224           if (diffexp <= pzs)
225             {
226               mpfr_uexp_t uscale;
227               mpfr_t scaled_v;
228               MPFR_BLOCK_DECL (flags);
229 
230               uscale = (mpfr_uexp_t) pzs - diffexp + 1;
231               MPFR_ASSERTN (uscale > 0);
232               MPFR_ASSERTN (uscale <= ULONG_MAX);
233               scale = uscale;
234               mpfr_init2 (scaled_z, MPFR_PREC (z));
235               inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
236               MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
237               new_z = scaled_z;
238               /* Now we need to recompute u = xy * 2^scale. */
239               MPFR_BLOCK (flags,
240                           if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
241                             {
242                               mpfr_init2 (scaled_v, MPFR_PREC (x));
243                               mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
244                               mpfr_mul (u, scaled_v, y, MPFR_RNDN);
245                             }
246                           else
247                             {
248                               mpfr_init2 (scaled_v, MPFR_PREC (y));
249                               mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
250                               mpfr_mul (u, x, scaled_v, MPFR_RNDN);
251                             });
252               mpfr_clear (scaled_v);
253               MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
254               xy_underflows = MPFR_UNDERFLOW (flags);
255             }
256           else
257             {
258               new_z = z;
259               xy_underflows = 1;
260             }
261 
262           MPFR_LOG_MSG (("scale=%lu xy_underflows=%d\n",
263                          scale, xy_underflows));
264 
265           if (xy_underflows)
266             {
267               /* Let's replace xy by sign(xy) * 2^(emin-1). */
268               MPFR_PREC (u) = MPFR_PREC_MIN;
269               mpfr_setmin (u, __gmpfr_emin);
270               MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
271                                                 MPFR_SIGN (y)));
272             }
273 
274           {
275             MPFR_BLOCK_DECL (flags);
276 
277             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
278             MPFR_LOG_MSG (("inexact=%d\n", inexact));
279             MPFR_GROUP_CLEAR (group);
280             if (scale != 0)
281               {
282                 int inex2;
283 
284                 mpfr_clear (scaled_z);
285                 /* Here an overflow is theoretically possible, in which case
286                    the result may be wrong, hence the assert. An underflow
287                    is not possible, but let's check that anyway. */
288                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
289                 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
290                 if (rnd_mode == MPFR_RNDN &&
291                     MPFR_GET_EXP (s) == __gmpfr_emin - 1 + scale &&
292                     mpfr_powerof2_raw (s))
293                   {
294                     MPFR_LOG_MSG (("Double rounding\n", 0));
295                     rnd_mode = (MPFR_IS_NEG (s) ? inexact <= 0 : inexact >= 0)
296                       ? MPFR_RNDZ : MPFR_RNDA;
297                   }
298                 inex2 = mpfr_div_2ui (s, s, scale, rnd_mode);
299                 MPFR_LOG_MSG (("inex2=%d\n", inex2));
300                 if (inex2)  /* underflow */
301                   inexact = inex2;
302               }
303           }
304 
305           /* FIXME/TODO: I'm not sure that the following is correct.
306              Check for possible spurious exceptions due to intermediate
307              computations. */
308           MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
309           goto end;
310         }
311     }
312 
313   inexact = mpfr_add (s, u, z, rnd_mode);
314   MPFR_GROUP_CLEAR (group);
315   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
316  end:
317   MPFR_SAVE_EXPO_FREE (expo);
318   return mpfr_check_range (s, inexact, rnd_mode);
319 }
320