14a238c70SJohn Marino /* mpfr_fma -- Floating multiply-add
24a238c70SJohn Marino
3*ab6d115fSJohn Marino Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4*ab6d115fSJohn Marino Contributed by the AriC and Caramel projects, INRIA.
54a238c70SJohn Marino
64a238c70SJohn Marino This file is part of the GNU MPFR Library.
74a238c70SJohn Marino
84a238c70SJohn Marino The GNU MPFR Library is free software; you can redistribute it and/or modify
94a238c70SJohn Marino it under the terms of the GNU Lesser General Public License as published by
104a238c70SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
114a238c70SJohn Marino option) any later version.
124a238c70SJohn Marino
134a238c70SJohn Marino The GNU MPFR Library is distributed in the hope that it will be useful, but
144a238c70SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
154a238c70SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
164a238c70SJohn Marino License for more details.
174a238c70SJohn Marino
184a238c70SJohn Marino You should have received a copy of the GNU Lesser General Public License
194a238c70SJohn Marino along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
204a238c70SJohn Marino http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
214a238c70SJohn Marino 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
224a238c70SJohn Marino
234a238c70SJohn Marino #include "mpfr-impl.h"
244a238c70SJohn Marino
254a238c70SJohn Marino /* The fused-multiply-add (fma) of x, y and z is defined by:
264a238c70SJohn Marino fma(x,y,z)= x*y + z
274a238c70SJohn Marino */
284a238c70SJohn Marino
294a238c70SJohn Marino int
mpfr_fma(mpfr_ptr s,mpfr_srcptr x,mpfr_srcptr y,mpfr_srcptr z,mpfr_rnd_t rnd_mode)304a238c70SJohn Marino mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
314a238c70SJohn Marino mpfr_rnd_t rnd_mode)
324a238c70SJohn Marino {
334a238c70SJohn Marino int inexact;
344a238c70SJohn Marino mpfr_t u;
354a238c70SJohn Marino MPFR_SAVE_EXPO_DECL (expo);
364a238c70SJohn Marino MPFR_GROUP_DECL(group);
374a238c70SJohn Marino
384a238c70SJohn Marino MPFR_LOG_FUNC
394a238c70SJohn Marino (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg z[%Pu]=%.*Rg rnd=%d",
404a238c70SJohn Marino mpfr_get_prec (x), mpfr_log_prec, x,
414a238c70SJohn Marino mpfr_get_prec (y), mpfr_log_prec, y,
424a238c70SJohn Marino mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
434a238c70SJohn Marino ("s[%Pu]=%.*Rg inexact=%d",
444a238c70SJohn Marino mpfr_get_prec (s), mpfr_log_prec, s, inexact));
454a238c70SJohn Marino
464a238c70SJohn Marino /* particular cases */
474a238c70SJohn Marino if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
484a238c70SJohn Marino MPFR_IS_SINGULAR(y) ||
494a238c70SJohn Marino MPFR_IS_SINGULAR(z) ))
504a238c70SJohn Marino {
514a238c70SJohn Marino if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
524a238c70SJohn Marino {
534a238c70SJohn Marino MPFR_SET_NAN(s);
544a238c70SJohn Marino MPFR_RET_NAN;
554a238c70SJohn Marino }
564a238c70SJohn Marino /* now neither x, y or z is NaN */
574a238c70SJohn Marino else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
584a238c70SJohn Marino {
594a238c70SJohn Marino /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
604a238c70SJohn Marino if ((MPFR_IS_ZERO(y)) ||
614a238c70SJohn Marino (MPFR_IS_ZERO(x)) ||
624a238c70SJohn Marino (MPFR_IS_INF(z) &&
634a238c70SJohn Marino ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
644a238c70SJohn Marino {
654a238c70SJohn Marino MPFR_SET_NAN(s);
664a238c70SJohn Marino MPFR_RET_NAN;
674a238c70SJohn Marino }
684a238c70SJohn Marino else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
694a238c70SJohn Marino {
704a238c70SJohn Marino MPFR_SET_INF(s);
714a238c70SJohn Marino MPFR_SET_SAME_SIGN(s, z);
724a238c70SJohn Marino MPFR_RET(0);
734a238c70SJohn Marino }
744a238c70SJohn Marino else /* z is finite */
754a238c70SJohn Marino {
764a238c70SJohn Marino MPFR_SET_INF(s);
774a238c70SJohn Marino MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
784a238c70SJohn Marino MPFR_RET(0);
794a238c70SJohn Marino }
804a238c70SJohn Marino }
814a238c70SJohn Marino /* now x and y are finite */
824a238c70SJohn Marino else if (MPFR_IS_INF(z))
834a238c70SJohn Marino {
844a238c70SJohn Marino MPFR_SET_INF(s);
854a238c70SJohn Marino MPFR_SET_SAME_SIGN(s, z);
864a238c70SJohn Marino MPFR_RET(0);
874a238c70SJohn Marino }
884a238c70SJohn Marino else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
894a238c70SJohn Marino {
904a238c70SJohn Marino if (MPFR_IS_ZERO(z))
914a238c70SJohn Marino {
924a238c70SJohn Marino int sign_p;
934a238c70SJohn Marino sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
944a238c70SJohn Marino MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
954a238c70SJohn Marino ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
964a238c70SJohn Marino ? -1 : 1) :
974a238c70SJohn Marino ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
984a238c70SJohn Marino ? 1 : -1)));
994a238c70SJohn Marino MPFR_SET_ZERO(s);
1004a238c70SJohn Marino MPFR_RET(0);
1014a238c70SJohn Marino }
1024a238c70SJohn Marino else
1034a238c70SJohn Marino return mpfr_set (s, z, rnd_mode);
1044a238c70SJohn Marino }
1054a238c70SJohn Marino else /* necessarily z is zero here */
1064a238c70SJohn Marino {
1074a238c70SJohn Marino MPFR_ASSERTD(MPFR_IS_ZERO(z));
1084a238c70SJohn Marino return mpfr_mul (s, x, y, rnd_mode);
1094a238c70SJohn Marino }
1104a238c70SJohn Marino }
1114a238c70SJohn Marino
1124a238c70SJohn Marino /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
1134a238c70SJohn Marino is exact, except in case of overflow or underflow. */
1144a238c70SJohn Marino MPFR_SAVE_EXPO_MARK (expo);
1154a238c70SJohn Marino MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
1164a238c70SJohn Marino
1174a238c70SJohn Marino if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
1184a238c70SJohn Marino {
1194a238c70SJohn Marino /* overflow or underflow - this case is regarded as rare, thus
1204a238c70SJohn Marino does not need to be very efficient (even if some tests below
1214a238c70SJohn Marino could have been done earlier).
1224a238c70SJohn Marino It is an overflow iff u is an infinity (since MPFR_RNDN was used).
1234a238c70SJohn Marino Alternatively, we could test the overflow flag, but in this case,
1244a238c70SJohn Marino mpfr_clear_flags would have been necessary. */
1254a238c70SJohn Marino if (MPFR_IS_INF (u)) /* overflow */
1264a238c70SJohn Marino {
1274a238c70SJohn Marino /* Let's eliminate the obvious case where x*y and z have the
1284a238c70SJohn Marino same sign. No possible cancellation -> real overflow.
1294a238c70SJohn Marino Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
1304a238c70SJohn Marino then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
1314a238c70SJohn Marino is also an overflow. */
1324a238c70SJohn Marino if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
1334a238c70SJohn Marino MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
1344a238c70SJohn Marino {
1354a238c70SJohn Marino MPFR_GROUP_CLEAR (group);
1364a238c70SJohn Marino MPFR_SAVE_EXPO_FREE (expo);
1374a238c70SJohn Marino return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
1384a238c70SJohn Marino }
1394a238c70SJohn Marino
1404a238c70SJohn Marino /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
1414a238c70SJohn Marino (x/4)*y does not overflow (let's recall that the result
1424a238c70SJohn Marino is exact with an unbounded exponent range). It does not
1434a238c70SJohn Marino underflow either, because x*y overflows and the exponent
1444a238c70SJohn Marino range is large enough. */
1454a238c70SJohn Marino inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
1464a238c70SJohn Marino MPFR_ASSERTN (inexact == 0);
1474a238c70SJohn Marino inexact = mpfr_mul (u, u, y, MPFR_RNDN);
1484a238c70SJohn Marino MPFR_ASSERTN (inexact == 0);
1494a238c70SJohn Marino
1504a238c70SJohn Marino /* Now, we need to add z/4... But it may underflow! */
1514a238c70SJohn Marino {
1524a238c70SJohn Marino mpfr_t zo4;
1534a238c70SJohn Marino mpfr_srcptr zz;
1544a238c70SJohn Marino MPFR_BLOCK_DECL (flags);
1554a238c70SJohn Marino
1564a238c70SJohn Marino if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
1574a238c70SJohn Marino MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
1584a238c70SJohn Marino {
1594a238c70SJohn Marino /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
1604a238c70SJohn Marino zz = z;
1614a238c70SJohn Marino }
1624a238c70SJohn Marino else
1634a238c70SJohn Marino {
1644a238c70SJohn Marino mpfr_init2 (zo4, MPFR_PREC (z));
1654a238c70SJohn Marino if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
1664a238c70SJohn Marino {
1674a238c70SJohn Marino /* The division by 4 underflowed! */
1684a238c70SJohn Marino MPFR_ASSERTN (0); /* TODO... */
1694a238c70SJohn Marino }
1704a238c70SJohn Marino zz = zo4;
1714a238c70SJohn Marino }
1724a238c70SJohn Marino
1734a238c70SJohn Marino /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
1744a238c70SJohn Marino following addition would give the same result). */
1754a238c70SJohn Marino MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
1764a238c70SJohn Marino /* u and zz have different signs, so that an overflow
1774a238c70SJohn Marino is not possible. But an underflow is theoretically
1784a238c70SJohn Marino possible! */
1794a238c70SJohn Marino if (MPFR_UNDERFLOW (flags))
1804a238c70SJohn Marino {
1814a238c70SJohn Marino MPFR_ASSERTN (zz != z);
1824a238c70SJohn Marino MPFR_ASSERTN (0); /* TODO... */
1834a238c70SJohn Marino mpfr_clears (zo4, u, (mpfr_ptr) 0);
1844a238c70SJohn Marino }
1854a238c70SJohn Marino else
1864a238c70SJohn Marino {
1874a238c70SJohn Marino int inex2;
1884a238c70SJohn Marino
1894a238c70SJohn Marino if (zz != z)
1904a238c70SJohn Marino mpfr_clear (zo4);
1914a238c70SJohn Marino MPFR_GROUP_CLEAR (group);
1924a238c70SJohn Marino MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
1934a238c70SJohn Marino inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
1944a238c70SJohn Marino if (inex2) /* overflow */
1954a238c70SJohn Marino {
1964a238c70SJohn Marino inexact = inex2;
1974a238c70SJohn Marino MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
1984a238c70SJohn Marino }
1994a238c70SJohn Marino goto end;
2004a238c70SJohn Marino }
2014a238c70SJohn Marino }
2024a238c70SJohn Marino }
2034a238c70SJohn Marino else /* underflow: one has |xy| < 2^(emin-1). */
2044a238c70SJohn Marino {
2054a238c70SJohn Marino unsigned long scale = 0;
2064a238c70SJohn Marino mpfr_t scaled_z;
2074a238c70SJohn Marino mpfr_srcptr new_z;
2084a238c70SJohn Marino mpfr_exp_t diffexp;
2094a238c70SJohn Marino mpfr_prec_t pzs;
2104a238c70SJohn Marino int xy_underflows;
2114a238c70SJohn Marino
2124a238c70SJohn Marino /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
2134a238c70SJohn Marino (the + 1 on MPFR_PREC (s) is necessary because the exponent
2144a238c70SJohn Marino of the result can be EXP(z) - 1). */
2154a238c70SJohn Marino diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
2164a238c70SJohn Marino pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
2174a238c70SJohn Marino if (diffexp <= pzs)
2184a238c70SJohn Marino {
2194a238c70SJohn Marino mpfr_uexp_t uscale;
2204a238c70SJohn Marino mpfr_t scaled_v;
2214a238c70SJohn Marino MPFR_BLOCK_DECL (flags);
2224a238c70SJohn Marino
2234a238c70SJohn Marino uscale = (mpfr_uexp_t) pzs - diffexp + 1;
2244a238c70SJohn Marino MPFR_ASSERTN (uscale > 0);
2254a238c70SJohn Marino MPFR_ASSERTN (uscale <= ULONG_MAX);
2264a238c70SJohn Marino scale = uscale;
2274a238c70SJohn Marino mpfr_init2 (scaled_z, MPFR_PREC (z));
2284a238c70SJohn Marino inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
2294a238c70SJohn Marino MPFR_ASSERTN (inexact == 0); /* TODO: overflow case */
2304a238c70SJohn Marino new_z = scaled_z;
2314a238c70SJohn Marino /* Now we need to recompute u = xy * 2^scale. */
2324a238c70SJohn Marino MPFR_BLOCK (flags,
2334a238c70SJohn Marino if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
2344a238c70SJohn Marino {
2354a238c70SJohn Marino mpfr_init2 (scaled_v, MPFR_PREC (x));
2364a238c70SJohn Marino mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
2374a238c70SJohn Marino mpfr_mul (u, scaled_v, y, MPFR_RNDN);
2384a238c70SJohn Marino }
2394a238c70SJohn Marino else
2404a238c70SJohn Marino {
2414a238c70SJohn Marino mpfr_init2 (scaled_v, MPFR_PREC (y));
2424a238c70SJohn Marino mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
2434a238c70SJohn Marino mpfr_mul (u, x, scaled_v, MPFR_RNDN);
2444a238c70SJohn Marino });
2454a238c70SJohn Marino mpfr_clear (scaled_v);
2464a238c70SJohn Marino MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
2474a238c70SJohn Marino xy_underflows = MPFR_UNDERFLOW (flags);
2484a238c70SJohn Marino }
2494a238c70SJohn Marino else
2504a238c70SJohn Marino {
2514a238c70SJohn Marino new_z = z;
2524a238c70SJohn Marino xy_underflows = 1;
2534a238c70SJohn Marino }
2544a238c70SJohn Marino
2554a238c70SJohn Marino if (xy_underflows)
2564a238c70SJohn Marino {
2574a238c70SJohn Marino /* Let's replace xy by sign(xy) * 2^(emin-1). */
2584a238c70SJohn Marino MPFR_PREC (u) = MPFR_PREC_MIN;
2594a238c70SJohn Marino mpfr_setmin (u, __gmpfr_emin);
2604a238c70SJohn Marino MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
2614a238c70SJohn Marino MPFR_SIGN (y)));
2624a238c70SJohn Marino }
2634a238c70SJohn Marino
2644a238c70SJohn Marino {
2654a238c70SJohn Marino MPFR_BLOCK_DECL (flags);
2664a238c70SJohn Marino
2674a238c70SJohn Marino MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
2684a238c70SJohn Marino MPFR_GROUP_CLEAR (group);
2694a238c70SJohn Marino if (scale != 0)
2704a238c70SJohn Marino {
2714a238c70SJohn Marino int inex2;
2724a238c70SJohn Marino
2734a238c70SJohn Marino mpfr_clear (scaled_z);
2744a238c70SJohn Marino /* Here an overflow is theoretically possible, in which case
2754a238c70SJohn Marino the result may be wrong, hence the assert. An underflow
2764a238c70SJohn Marino is not possible, but let's check that anyway. */
2774a238c70SJohn Marino MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); /* TODO... */
2784a238c70SJohn Marino MPFR_ASSERTN (! MPFR_UNDERFLOW (flags)); /* not possible */
2794a238c70SJohn Marino inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
2804a238c70SJohn Marino /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
2814a238c70SJohn Marino Also, handle the double rounding case:
2824a238c70SJohn Marino s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
2834a238c70SJohn Marino if (inex2) /* underflow */
2844a238c70SJohn Marino inexact = inex2;
2854a238c70SJohn Marino }
2864a238c70SJohn Marino }
2874a238c70SJohn Marino
2884a238c70SJohn Marino /* FIXME/TODO: I'm not sure that the following is correct.
2894a238c70SJohn Marino Check for possible spurious exceptions due to intermediate
2904a238c70SJohn Marino computations. */
2914a238c70SJohn Marino MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
2924a238c70SJohn Marino goto end;
2934a238c70SJohn Marino }
2944a238c70SJohn Marino }
2954a238c70SJohn Marino
2964a238c70SJohn Marino inexact = mpfr_add (s, u, z, rnd_mode);
2974a238c70SJohn Marino MPFR_GROUP_CLEAR (group);
2984a238c70SJohn Marino MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
2994a238c70SJohn Marino end:
3004a238c70SJohn Marino MPFR_SAVE_EXPO_FREE (expo);
3014a238c70SJohn Marino return mpfr_check_range (s, inexact, rnd_mode);
3024a238c70SJohn Marino }
303