xref: /dflybsd-src/contrib/mpfr/src/fma.c (revision 2786097444a0124b5d33763854de247e230c6629)
14a238c70SJohn Marino /* mpfr_fma -- Floating multiply-add
24a238c70SJohn Marino 
3*ab6d115fSJohn Marino Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4*ab6d115fSJohn Marino Contributed by the AriC and Caramel projects, INRIA.
54a238c70SJohn Marino 
64a238c70SJohn Marino This file is part of the GNU MPFR Library.
74a238c70SJohn Marino 
84a238c70SJohn Marino The GNU MPFR Library is free software; you can redistribute it and/or modify
94a238c70SJohn Marino it under the terms of the GNU Lesser General Public License as published by
104a238c70SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
114a238c70SJohn Marino option) any later version.
124a238c70SJohn Marino 
134a238c70SJohn Marino The GNU MPFR Library is distributed in the hope that it will be useful, but
144a238c70SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
154a238c70SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
164a238c70SJohn Marino License for more details.
174a238c70SJohn Marino 
184a238c70SJohn Marino You should have received a copy of the GNU Lesser General Public License
194a238c70SJohn Marino along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
204a238c70SJohn Marino http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
214a238c70SJohn Marino 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
224a238c70SJohn Marino 
234a238c70SJohn Marino #include "mpfr-impl.h"
244a238c70SJohn Marino 
254a238c70SJohn Marino /* The fused-multiply-add (fma) of x, y and z is defined by:
264a238c70SJohn Marino    fma(x,y,z)= x*y + z
274a238c70SJohn Marino */
284a238c70SJohn Marino 
294a238c70SJohn Marino int
mpfr_fma(mpfr_ptr s,mpfr_srcptr x,mpfr_srcptr y,mpfr_srcptr z,mpfr_rnd_t rnd_mode)304a238c70SJohn Marino mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
314a238c70SJohn Marino           mpfr_rnd_t rnd_mode)
324a238c70SJohn Marino {
334a238c70SJohn Marino   int inexact;
344a238c70SJohn Marino   mpfr_t u;
354a238c70SJohn Marino   MPFR_SAVE_EXPO_DECL (expo);
364a238c70SJohn Marino   MPFR_GROUP_DECL(group);
374a238c70SJohn Marino 
384a238c70SJohn Marino   MPFR_LOG_FUNC
394a238c70SJohn Marino     (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg  z[%Pu]=%.*Rg rnd=%d",
404a238c70SJohn Marino       mpfr_get_prec (x), mpfr_log_prec, x,
414a238c70SJohn Marino       mpfr_get_prec (y), mpfr_log_prec, y,
424a238c70SJohn Marino       mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
434a238c70SJohn Marino      ("s[%Pu]=%.*Rg inexact=%d",
444a238c70SJohn Marino       mpfr_get_prec (s), mpfr_log_prec, s, inexact));
454a238c70SJohn Marino 
464a238c70SJohn Marino   /* particular cases */
474a238c70SJohn Marino   if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
484a238c70SJohn Marino                      MPFR_IS_SINGULAR(y) ||
494a238c70SJohn Marino                      MPFR_IS_SINGULAR(z) ))
504a238c70SJohn Marino     {
514a238c70SJohn Marino       if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
524a238c70SJohn Marino         {
534a238c70SJohn Marino           MPFR_SET_NAN(s);
544a238c70SJohn Marino           MPFR_RET_NAN;
554a238c70SJohn Marino         }
564a238c70SJohn Marino       /* now neither x, y or z is NaN */
574a238c70SJohn Marino       else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
584a238c70SJohn Marino         {
594a238c70SJohn Marino           /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
604a238c70SJohn Marino           if ((MPFR_IS_ZERO(y)) ||
614a238c70SJohn Marino               (MPFR_IS_ZERO(x)) ||
624a238c70SJohn Marino               (MPFR_IS_INF(z) &&
634a238c70SJohn Marino                ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
644a238c70SJohn Marino             {
654a238c70SJohn Marino               MPFR_SET_NAN(s);
664a238c70SJohn Marino               MPFR_RET_NAN;
674a238c70SJohn Marino             }
684a238c70SJohn Marino           else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
694a238c70SJohn Marino             {
704a238c70SJohn Marino               MPFR_SET_INF(s);
714a238c70SJohn Marino               MPFR_SET_SAME_SIGN(s, z);
724a238c70SJohn Marino               MPFR_RET(0);
734a238c70SJohn Marino             }
744a238c70SJohn Marino           else /* z is finite */
754a238c70SJohn Marino             {
764a238c70SJohn Marino               MPFR_SET_INF(s);
774a238c70SJohn Marino               MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
784a238c70SJohn Marino               MPFR_RET(0);
794a238c70SJohn Marino             }
804a238c70SJohn Marino         }
814a238c70SJohn Marino       /* now x and y are finite */
824a238c70SJohn Marino       else if (MPFR_IS_INF(z))
834a238c70SJohn Marino         {
844a238c70SJohn Marino           MPFR_SET_INF(s);
854a238c70SJohn Marino           MPFR_SET_SAME_SIGN(s, z);
864a238c70SJohn Marino           MPFR_RET(0);
874a238c70SJohn Marino         }
884a238c70SJohn Marino       else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
894a238c70SJohn Marino         {
904a238c70SJohn Marino           if (MPFR_IS_ZERO(z))
914a238c70SJohn Marino             {
924a238c70SJohn Marino               int sign_p;
934a238c70SJohn Marino               sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
944a238c70SJohn Marino               MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
954a238c70SJohn Marino                                ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
964a238c70SJohn Marino                                 ? -1 : 1) :
974a238c70SJohn Marino                                ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
984a238c70SJohn Marino                                 ? 1 : -1)));
994a238c70SJohn Marino               MPFR_SET_ZERO(s);
1004a238c70SJohn Marino               MPFR_RET(0);
1014a238c70SJohn Marino             }
1024a238c70SJohn Marino           else
1034a238c70SJohn Marino             return mpfr_set (s, z, rnd_mode);
1044a238c70SJohn Marino         }
1054a238c70SJohn Marino       else /* necessarily z is zero here */
1064a238c70SJohn Marino         {
1074a238c70SJohn Marino           MPFR_ASSERTD(MPFR_IS_ZERO(z));
1084a238c70SJohn Marino           return mpfr_mul (s, x, y, rnd_mode);
1094a238c70SJohn Marino         }
1104a238c70SJohn Marino     }
1114a238c70SJohn Marino 
1124a238c70SJohn Marino   /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
1134a238c70SJohn Marino      is exact, except in case of overflow or underflow. */
1144a238c70SJohn Marino   MPFR_SAVE_EXPO_MARK (expo);
1154a238c70SJohn Marino   MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
1164a238c70SJohn Marino 
1174a238c70SJohn Marino   if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
1184a238c70SJohn Marino     {
1194a238c70SJohn Marino       /* overflow or underflow - this case is regarded as rare, thus
1204a238c70SJohn Marino          does not need to be very efficient (even if some tests below
1214a238c70SJohn Marino          could have been done earlier).
1224a238c70SJohn Marino          It is an overflow iff u is an infinity (since MPFR_RNDN was used).
1234a238c70SJohn Marino          Alternatively, we could test the overflow flag, but in this case,
1244a238c70SJohn Marino          mpfr_clear_flags would have been necessary. */
1254a238c70SJohn Marino       if (MPFR_IS_INF (u))  /* overflow */
1264a238c70SJohn Marino         {
1274a238c70SJohn Marino           /* Let's eliminate the obvious case where x*y and z have the
1284a238c70SJohn Marino              same sign. No possible cancellation -> real overflow.
1294a238c70SJohn Marino              Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
1304a238c70SJohn Marino              then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
1314a238c70SJohn Marino              is also an overflow. */
1324a238c70SJohn Marino           if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
1334a238c70SJohn Marino               MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
1344a238c70SJohn Marino             {
1354a238c70SJohn Marino               MPFR_GROUP_CLEAR (group);
1364a238c70SJohn Marino               MPFR_SAVE_EXPO_FREE (expo);
1374a238c70SJohn Marino               return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
1384a238c70SJohn Marino             }
1394a238c70SJohn Marino 
1404a238c70SJohn Marino           /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
1414a238c70SJohn Marino              (x/4)*y does not overflow (let's recall that the result
1424a238c70SJohn Marino              is exact with an unbounded exponent range). It does not
1434a238c70SJohn Marino              underflow either, because x*y overflows and the exponent
1444a238c70SJohn Marino              range is large enough. */
1454a238c70SJohn Marino           inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
1464a238c70SJohn Marino           MPFR_ASSERTN (inexact == 0);
1474a238c70SJohn Marino           inexact = mpfr_mul (u, u, y, MPFR_RNDN);
1484a238c70SJohn Marino           MPFR_ASSERTN (inexact == 0);
1494a238c70SJohn Marino 
1504a238c70SJohn Marino           /* Now, we need to add z/4... But it may underflow! */
1514a238c70SJohn Marino           {
1524a238c70SJohn Marino             mpfr_t zo4;
1534a238c70SJohn Marino             mpfr_srcptr zz;
1544a238c70SJohn Marino             MPFR_BLOCK_DECL (flags);
1554a238c70SJohn Marino 
1564a238c70SJohn Marino             if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
1574a238c70SJohn Marino                 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
1584a238c70SJohn Marino               {
1594a238c70SJohn Marino                 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
1604a238c70SJohn Marino                 zz = z;
1614a238c70SJohn Marino               }
1624a238c70SJohn Marino             else
1634a238c70SJohn Marino               {
1644a238c70SJohn Marino                 mpfr_init2 (zo4, MPFR_PREC (z));
1654a238c70SJohn Marino                 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
1664a238c70SJohn Marino                   {
1674a238c70SJohn Marino                     /* The division by 4 underflowed! */
1684a238c70SJohn Marino                     MPFR_ASSERTN (0); /* TODO... */
1694a238c70SJohn Marino                   }
1704a238c70SJohn Marino                 zz = zo4;
1714a238c70SJohn Marino               }
1724a238c70SJohn Marino 
1734a238c70SJohn Marino             /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
1744a238c70SJohn Marino                following addition would give the same result). */
1754a238c70SJohn Marino             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
1764a238c70SJohn Marino             /* u and zz have different signs, so that an overflow
1774a238c70SJohn Marino                is not possible. But an underflow is theoretically
1784a238c70SJohn Marino                possible! */
1794a238c70SJohn Marino             if (MPFR_UNDERFLOW (flags))
1804a238c70SJohn Marino               {
1814a238c70SJohn Marino                 MPFR_ASSERTN (zz != z);
1824a238c70SJohn Marino                 MPFR_ASSERTN (0); /* TODO... */
1834a238c70SJohn Marino                 mpfr_clears (zo4, u, (mpfr_ptr) 0);
1844a238c70SJohn Marino               }
1854a238c70SJohn Marino             else
1864a238c70SJohn Marino               {
1874a238c70SJohn Marino                 int inex2;
1884a238c70SJohn Marino 
1894a238c70SJohn Marino                 if (zz != z)
1904a238c70SJohn Marino                   mpfr_clear (zo4);
1914a238c70SJohn Marino                 MPFR_GROUP_CLEAR (group);
1924a238c70SJohn Marino                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
1934a238c70SJohn Marino                 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
1944a238c70SJohn Marino                 if (inex2)  /* overflow */
1954a238c70SJohn Marino                   {
1964a238c70SJohn Marino                     inexact = inex2;
1974a238c70SJohn Marino                     MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
1984a238c70SJohn Marino                   }
1994a238c70SJohn Marino                 goto end;
2004a238c70SJohn Marino               }
2014a238c70SJohn Marino           }
2024a238c70SJohn Marino         }
2034a238c70SJohn Marino       else  /* underflow: one has |xy| < 2^(emin-1). */
2044a238c70SJohn Marino         {
2054a238c70SJohn Marino           unsigned long scale = 0;
2064a238c70SJohn Marino           mpfr_t scaled_z;
2074a238c70SJohn Marino           mpfr_srcptr new_z;
2084a238c70SJohn Marino           mpfr_exp_t diffexp;
2094a238c70SJohn Marino           mpfr_prec_t pzs;
2104a238c70SJohn Marino           int xy_underflows;
2114a238c70SJohn Marino 
2124a238c70SJohn Marino           /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
2134a238c70SJohn Marino              (the + 1 on MPFR_PREC (s) is necessary because the exponent
2144a238c70SJohn Marino              of the result can be EXP(z) - 1). */
2154a238c70SJohn Marino           diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
2164a238c70SJohn Marino           pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
2174a238c70SJohn Marino           if (diffexp <= pzs)
2184a238c70SJohn Marino             {
2194a238c70SJohn Marino               mpfr_uexp_t uscale;
2204a238c70SJohn Marino               mpfr_t scaled_v;
2214a238c70SJohn Marino               MPFR_BLOCK_DECL (flags);
2224a238c70SJohn Marino 
2234a238c70SJohn Marino               uscale = (mpfr_uexp_t) pzs - diffexp + 1;
2244a238c70SJohn Marino               MPFR_ASSERTN (uscale > 0);
2254a238c70SJohn Marino               MPFR_ASSERTN (uscale <= ULONG_MAX);
2264a238c70SJohn Marino               scale = uscale;
2274a238c70SJohn Marino               mpfr_init2 (scaled_z, MPFR_PREC (z));
2284a238c70SJohn Marino               inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
2294a238c70SJohn Marino               MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
2304a238c70SJohn Marino               new_z = scaled_z;
2314a238c70SJohn Marino               /* Now we need to recompute u = xy * 2^scale. */
2324a238c70SJohn Marino               MPFR_BLOCK (flags,
2334a238c70SJohn Marino                           if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
2344a238c70SJohn Marino                             {
2354a238c70SJohn Marino                               mpfr_init2 (scaled_v, MPFR_PREC (x));
2364a238c70SJohn Marino                               mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
2374a238c70SJohn Marino                               mpfr_mul (u, scaled_v, y, MPFR_RNDN);
2384a238c70SJohn Marino                             }
2394a238c70SJohn Marino                           else
2404a238c70SJohn Marino                             {
2414a238c70SJohn Marino                               mpfr_init2 (scaled_v, MPFR_PREC (y));
2424a238c70SJohn Marino                               mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
2434a238c70SJohn Marino                               mpfr_mul (u, x, scaled_v, MPFR_RNDN);
2444a238c70SJohn Marino                             });
2454a238c70SJohn Marino               mpfr_clear (scaled_v);
2464a238c70SJohn Marino               MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
2474a238c70SJohn Marino               xy_underflows = MPFR_UNDERFLOW (flags);
2484a238c70SJohn Marino             }
2494a238c70SJohn Marino           else
2504a238c70SJohn Marino             {
2514a238c70SJohn Marino               new_z = z;
2524a238c70SJohn Marino               xy_underflows = 1;
2534a238c70SJohn Marino             }
2544a238c70SJohn Marino 
2554a238c70SJohn Marino           if (xy_underflows)
2564a238c70SJohn Marino             {
2574a238c70SJohn Marino               /* Let's replace xy by sign(xy) * 2^(emin-1). */
2584a238c70SJohn Marino               MPFR_PREC (u) = MPFR_PREC_MIN;
2594a238c70SJohn Marino               mpfr_setmin (u, __gmpfr_emin);
2604a238c70SJohn Marino               MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
2614a238c70SJohn Marino                                                 MPFR_SIGN (y)));
2624a238c70SJohn Marino             }
2634a238c70SJohn Marino 
2644a238c70SJohn Marino           {
2654a238c70SJohn Marino             MPFR_BLOCK_DECL (flags);
2664a238c70SJohn Marino 
2674a238c70SJohn Marino             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
2684a238c70SJohn Marino             MPFR_GROUP_CLEAR (group);
2694a238c70SJohn Marino             if (scale != 0)
2704a238c70SJohn Marino               {
2714a238c70SJohn Marino                 int inex2;
2724a238c70SJohn Marino 
2734a238c70SJohn Marino                 mpfr_clear (scaled_z);
2744a238c70SJohn Marino                 /* Here an overflow is theoretically possible, in which case
2754a238c70SJohn Marino                    the result may be wrong, hence the assert. An underflow
2764a238c70SJohn Marino                    is not possible, but let's check that anyway. */
2774a238c70SJohn Marino                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
2784a238c70SJohn Marino                 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
2794a238c70SJohn Marino                 inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
2804a238c70SJohn Marino                 /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
2814a238c70SJohn Marino                    Also, handle the double rounding case:
2824a238c70SJohn Marino                    s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
2834a238c70SJohn Marino                 if (inex2)  /* underflow */
2844a238c70SJohn Marino                   inexact = inex2;
2854a238c70SJohn Marino               }
2864a238c70SJohn Marino           }
2874a238c70SJohn Marino 
2884a238c70SJohn Marino           /* FIXME/TODO: I'm not sure that the following is correct.
2894a238c70SJohn Marino              Check for possible spurious exceptions due to intermediate
2904a238c70SJohn Marino              computations. */
2914a238c70SJohn Marino           MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
2924a238c70SJohn Marino           goto end;
2934a238c70SJohn Marino         }
2944a238c70SJohn Marino     }
2954a238c70SJohn Marino 
2964a238c70SJohn Marino   inexact = mpfr_add (s, u, z, rnd_mode);
2974a238c70SJohn Marino   MPFR_GROUP_CLEAR (group);
2984a238c70SJohn Marino   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
2994a238c70SJohn Marino  end:
3004a238c70SJohn Marino   MPFR_SAVE_EXPO_FREE (expo);
3014a238c70SJohn Marino   return mpfr_check_range (s, inexact, rnd_mode);
3024a238c70SJohn Marino }
303