xref: /dflybsd-src/contrib/mpfr/src/fms.c (revision 2786097444a0124b5d33763854de247e230c6629)
14a238c70SJohn Marino /* mpfr_fms -- Floating multiply-subtract
24a238c70SJohn Marino 
3*ab6d115fSJohn Marino Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4*ab6d115fSJohn Marino Contributed by the AriC and Caramel projects, INRIA.
54a238c70SJohn Marino 
64a238c70SJohn Marino This file is part of the GNU MPFR Library.
74a238c70SJohn Marino 
84a238c70SJohn Marino The GNU MPFR Library is free software; you can redistribute it and/or modify
94a238c70SJohn Marino it under the terms of the GNU Lesser General Public License as published by
104a238c70SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
114a238c70SJohn Marino option) any later version.
124a238c70SJohn Marino 
134a238c70SJohn Marino The GNU MPFR Library is distributed in the hope that it will be useful, but
144a238c70SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
154a238c70SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
164a238c70SJohn Marino License for more details.
174a238c70SJohn Marino 
184a238c70SJohn Marino You should have received a copy of the GNU Lesser General Public License
194a238c70SJohn Marino along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
204a238c70SJohn Marino http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
214a238c70SJohn Marino 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
224a238c70SJohn Marino 
234a238c70SJohn Marino #include "mpfr-impl.h"
244a238c70SJohn Marino 
254a238c70SJohn Marino /* The fused-multiply-subtract (fms) of x, y and z is defined by:
264a238c70SJohn Marino    fms(x,y,z)= x*y - z
274a238c70SJohn Marino    Note: this is neither in IEEE754R, nor in LIA-2, but both the
284a238c70SJohn Marino    PowerPC and the Itanium define fms as x*y - z.
294a238c70SJohn Marino */
304a238c70SJohn Marino 
314a238c70SJohn Marino int
mpfr_fms(mpfr_ptr s,mpfr_srcptr x,mpfr_srcptr y,mpfr_srcptr z,mpfr_rnd_t rnd_mode)324a238c70SJohn Marino mpfr_fms (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
334a238c70SJohn Marino           mpfr_rnd_t rnd_mode)
344a238c70SJohn Marino {
354a238c70SJohn Marino   int inexact;
364a238c70SJohn Marino   mpfr_t u;
374a238c70SJohn Marino   MPFR_SAVE_EXPO_DECL (expo);
384a238c70SJohn Marino   MPFR_GROUP_DECL(group);
394a238c70SJohn Marino 
404a238c70SJohn Marino   MPFR_LOG_FUNC
414a238c70SJohn Marino     (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg  z[%Pu]=%.*Rg rnd=%d",
424a238c70SJohn Marino       mpfr_get_prec (x), mpfr_log_prec, x,
434a238c70SJohn Marino       mpfr_get_prec (y), mpfr_log_prec, y,
444a238c70SJohn Marino       mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
454a238c70SJohn Marino      ("s[%Pu]=%.*Rg inexact=%d",
464a238c70SJohn Marino       mpfr_get_prec (s), mpfr_log_prec, s, inexact));
474a238c70SJohn Marino 
484a238c70SJohn Marino   /* particular cases */
494a238c70SJohn Marino   if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
504a238c70SJohn Marino                      MPFR_IS_SINGULAR(y) ||
514a238c70SJohn Marino                      MPFR_IS_SINGULAR(z) ))
524a238c70SJohn Marino     {
534a238c70SJohn Marino       if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
544a238c70SJohn Marino         {
554a238c70SJohn Marino           MPFR_SET_NAN(s);
564a238c70SJohn Marino           MPFR_RET_NAN;
574a238c70SJohn Marino         }
584a238c70SJohn Marino       /* now neither x, y or z is NaN */
594a238c70SJohn Marino       else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
604a238c70SJohn Marino         {
614a238c70SJohn Marino           /* cases Inf*0-z, 0*Inf-z, Inf-Inf */
624a238c70SJohn Marino           if ((MPFR_IS_ZERO(y)) ||
634a238c70SJohn Marino               (MPFR_IS_ZERO(x)) ||
644a238c70SJohn Marino               (MPFR_IS_INF(z) &&
654a238c70SJohn Marino                ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) == MPFR_SIGN(z))))
664a238c70SJohn Marino             {
674a238c70SJohn Marino               MPFR_SET_NAN(s);
684a238c70SJohn Marino               MPFR_RET_NAN;
694a238c70SJohn Marino             }
704a238c70SJohn Marino           else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
714a238c70SJohn Marino             {
724a238c70SJohn Marino               MPFR_SET_INF(s);
734a238c70SJohn Marino               MPFR_SET_OPPOSITE_SIGN(s, z);
744a238c70SJohn Marino               MPFR_RET(0);
754a238c70SJohn Marino             }
764a238c70SJohn Marino           else /* z is finite */
774a238c70SJohn Marino             {
784a238c70SJohn Marino               MPFR_SET_INF(s);
794a238c70SJohn Marino               MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
804a238c70SJohn Marino               MPFR_RET(0);
814a238c70SJohn Marino             }
824a238c70SJohn Marino         }
834a238c70SJohn Marino       /* now x and y are finite */
844a238c70SJohn Marino       else if (MPFR_IS_INF(z))
854a238c70SJohn Marino         {
864a238c70SJohn Marino           MPFR_SET_INF(s);
874a238c70SJohn Marino           MPFR_SET_OPPOSITE_SIGN(s, z);
884a238c70SJohn Marino           MPFR_RET(0);
894a238c70SJohn Marino         }
904a238c70SJohn Marino       else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
914a238c70SJohn Marino         {
924a238c70SJohn Marino           if (MPFR_IS_ZERO(z))
934a238c70SJohn Marino             {
944a238c70SJohn Marino               int sign_p;
954a238c70SJohn Marino               sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
964a238c70SJohn Marino               MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
974a238c70SJohn Marino                                ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_POS(z))
984a238c70SJohn Marino                                 ? -1 : 1) :
994a238c70SJohn Marino                                ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_NEG(z))
1004a238c70SJohn Marino                                 ? 1 : -1)));
1014a238c70SJohn Marino               MPFR_SET_ZERO(s);
1024a238c70SJohn Marino               MPFR_RET(0);
1034a238c70SJohn Marino             }
1044a238c70SJohn Marino           else
1054a238c70SJohn Marino             return mpfr_neg (s, z, rnd_mode);
1064a238c70SJohn Marino         }
1074a238c70SJohn Marino       else /* necessarily z is zero here */
1084a238c70SJohn Marino         {
1094a238c70SJohn Marino           MPFR_ASSERTD(MPFR_IS_ZERO(z));
1104a238c70SJohn Marino           return mpfr_mul (s, x, y, rnd_mode);
1114a238c70SJohn Marino         }
1124a238c70SJohn Marino     }
1134a238c70SJohn Marino 
1144a238c70SJohn Marino   /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
1154a238c70SJohn Marino      is exact, except in case of overflow or underflow. */
1164a238c70SJohn Marino   MPFR_SAVE_EXPO_MARK (expo);
1174a238c70SJohn Marino   MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
1184a238c70SJohn Marino 
1194a238c70SJohn Marino   if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
1204a238c70SJohn Marino     {
1214a238c70SJohn Marino       /* overflow or underflow - this case is regarded as rare, thus
1224a238c70SJohn Marino          does not need to be very efficient (even if some tests below
1234a238c70SJohn Marino          could have been done earlier).
1244a238c70SJohn Marino          It is an overflow iff u is an infinity (since MPFR_RNDN was used).
1254a238c70SJohn Marino          Alternatively, we could test the overflow flag, but in this case,
1264a238c70SJohn Marino          mpfr_clear_flags would have been necessary. */
1274a238c70SJohn Marino       if (MPFR_IS_INF (u))  /* overflow */
1284a238c70SJohn Marino         {
1294a238c70SJohn Marino           /* Let's eliminate the obvious case where x*y and z have the
1304a238c70SJohn Marino              same sign. No possible cancellation -> real overflow.
1314a238c70SJohn Marino              Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
1324a238c70SJohn Marino              then |x*y| >= 2^(emax+1), and |x*y - z| >= 2^emax. This case
1334a238c70SJohn Marino              is also an overflow. */
1344a238c70SJohn Marino           if (MPFR_SIGN (u) != MPFR_SIGN (z) ||
1354a238c70SJohn Marino               MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
1364a238c70SJohn Marino             {
1374a238c70SJohn Marino               MPFR_GROUP_CLEAR (group);
1384a238c70SJohn Marino               MPFR_SAVE_EXPO_FREE (expo);
1394a238c70SJohn Marino               return mpfr_overflow (s, rnd_mode, - MPFR_SIGN (z));
1404a238c70SJohn Marino             }
1414a238c70SJohn Marino 
1424a238c70SJohn Marino           /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
1434a238c70SJohn Marino              (x/4)*y does not overflow (let's recall that the result
1444a238c70SJohn Marino              is exact with an unbounded exponent range). It does not
1454a238c70SJohn Marino              underflow either, because x*y overflows and the exponent
1464a238c70SJohn Marino              range is large enough. */
1474a238c70SJohn Marino           inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
1484a238c70SJohn Marino           MPFR_ASSERTN (inexact == 0);
1494a238c70SJohn Marino           inexact = mpfr_mul (u, u, y, MPFR_RNDN);
1504a238c70SJohn Marino           MPFR_ASSERTN (inexact == 0);
1514a238c70SJohn Marino 
1524a238c70SJohn Marino           /* Now, we need to subtract z/4... But it may underflow! */
1534a238c70SJohn Marino           {
1544a238c70SJohn Marino             mpfr_t zo4;
1554a238c70SJohn Marino             mpfr_srcptr zz;
1564a238c70SJohn Marino             MPFR_BLOCK_DECL (flags);
1574a238c70SJohn Marino 
1584a238c70SJohn Marino             if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
1594a238c70SJohn Marino                 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
1604a238c70SJohn Marino               {
1614a238c70SJohn Marino                 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
1624a238c70SJohn Marino                 zz = z;
1634a238c70SJohn Marino               }
1644a238c70SJohn Marino             else
1654a238c70SJohn Marino               {
1664a238c70SJohn Marino                 mpfr_init2 (zo4, MPFR_PREC (z));
1674a238c70SJohn Marino                 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
1684a238c70SJohn Marino                   {
1694a238c70SJohn Marino                     /* The division by 4 underflowed! */
1704a238c70SJohn Marino                     MPFR_ASSERTN (0); /* TODO... */
1714a238c70SJohn Marino                   }
1724a238c70SJohn Marino                 zz = zo4;
1734a238c70SJohn Marino               }
1744a238c70SJohn Marino 
1754a238c70SJohn Marino             /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
1764a238c70SJohn Marino                following subtraction would give the same result). */
1774a238c70SJohn Marino             MPFR_BLOCK (flags, inexact = mpfr_sub (s, u, zz, rnd_mode));
1784a238c70SJohn Marino             /* u and zz have the same sign, so that an overflow
1794a238c70SJohn Marino                is not possible. But an underflow is theoretically
1804a238c70SJohn Marino                possible! */
1814a238c70SJohn Marino             if (MPFR_UNDERFLOW (flags))
1824a238c70SJohn Marino               {
1834a238c70SJohn Marino                 MPFR_ASSERTN (zz != z);
1844a238c70SJohn Marino                 MPFR_ASSERTN (0); /* TODO... */
1854a238c70SJohn Marino                 mpfr_clears (zo4, u, (mpfr_ptr) 0);
1864a238c70SJohn Marino               }
1874a238c70SJohn Marino             else
1884a238c70SJohn Marino               {
1894a238c70SJohn Marino                 int inex2;
1904a238c70SJohn Marino 
1914a238c70SJohn Marino                 if (zz != z)
1924a238c70SJohn Marino                   mpfr_clear (zo4);
1934a238c70SJohn Marino                 MPFR_GROUP_CLEAR (group);
1944a238c70SJohn Marino                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
1954a238c70SJohn Marino                 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
1964a238c70SJohn Marino                 if (inex2)  /* overflow */
1974a238c70SJohn Marino                   {
1984a238c70SJohn Marino                     inexact = inex2;
1994a238c70SJohn Marino                     MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
2004a238c70SJohn Marino                   }
2014a238c70SJohn Marino                 goto end;
2024a238c70SJohn Marino               }
2034a238c70SJohn Marino           }
2044a238c70SJohn Marino         }
2054a238c70SJohn Marino       else  /* underflow: one has |xy| < 2^(emin-1). */
2064a238c70SJohn Marino         {
2074a238c70SJohn Marino           unsigned long scale = 0;
2084a238c70SJohn Marino           mpfr_t scaled_z;
2094a238c70SJohn Marino           mpfr_srcptr new_z;
2104a238c70SJohn Marino           mpfr_exp_t diffexp;
2114a238c70SJohn Marino           mpfr_prec_t pzs;
2124a238c70SJohn Marino           int xy_underflows;
2134a238c70SJohn Marino 
2144a238c70SJohn Marino           /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
2154a238c70SJohn Marino              (the + 1 on MPFR_PREC (s) is necessary because the exponent
2164a238c70SJohn Marino              of the result can be EXP(z) - 1). */
2174a238c70SJohn Marino           diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
2184a238c70SJohn Marino           pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
2194a238c70SJohn Marino           if (diffexp <= pzs)
2204a238c70SJohn Marino             {
2214a238c70SJohn Marino               mpfr_uexp_t uscale;
2224a238c70SJohn Marino               mpfr_t scaled_v;
2234a238c70SJohn Marino               MPFR_BLOCK_DECL (flags);
2244a238c70SJohn Marino 
2254a238c70SJohn Marino               uscale = (mpfr_uexp_t) pzs - diffexp + 1;
2264a238c70SJohn Marino               MPFR_ASSERTN (uscale > 0);
2274a238c70SJohn Marino               MPFR_ASSERTN (uscale <= ULONG_MAX);
2284a238c70SJohn Marino               scale = uscale;
2294a238c70SJohn Marino               mpfr_init2 (scaled_z, MPFR_PREC (z));
2304a238c70SJohn Marino               inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
2314a238c70SJohn Marino               MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
2324a238c70SJohn Marino               new_z = scaled_z;
2334a238c70SJohn Marino               /* Now we need to recompute u = xy * 2^scale. */
2344a238c70SJohn Marino               MPFR_BLOCK (flags,
2354a238c70SJohn Marino                           if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
2364a238c70SJohn Marino                             {
2374a238c70SJohn Marino                               mpfr_init2 (scaled_v, MPFR_PREC (x));
2384a238c70SJohn Marino                               mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
2394a238c70SJohn Marino                               mpfr_mul (u, scaled_v, y, MPFR_RNDN);
2404a238c70SJohn Marino                             }
2414a238c70SJohn Marino                           else
2424a238c70SJohn Marino                             {
2434a238c70SJohn Marino                               mpfr_init2 (scaled_v, MPFR_PREC (y));
2444a238c70SJohn Marino                               mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
2454a238c70SJohn Marino                               mpfr_mul (u, x, scaled_v, MPFR_RNDN);
2464a238c70SJohn Marino                             });
2474a238c70SJohn Marino               mpfr_clear (scaled_v);
2484a238c70SJohn Marino               MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
2494a238c70SJohn Marino               xy_underflows = MPFR_UNDERFLOW (flags);
2504a238c70SJohn Marino             }
2514a238c70SJohn Marino           else
2524a238c70SJohn Marino             {
2534a238c70SJohn Marino               new_z = z;
2544a238c70SJohn Marino               xy_underflows = 1;
2554a238c70SJohn Marino             }
2564a238c70SJohn Marino 
2574a238c70SJohn Marino           if (xy_underflows)
2584a238c70SJohn Marino             {
2594a238c70SJohn Marino               /* Let's replace xy by sign(xy) * 2^(emin-1). */
2604a238c70SJohn Marino               MPFR_PREC (u) = MPFR_PREC_MIN;
2614a238c70SJohn Marino               mpfr_setmin (u, __gmpfr_emin);
2624a238c70SJohn Marino               MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
2634a238c70SJohn Marino                                                 MPFR_SIGN (y)));
2644a238c70SJohn Marino             }
2654a238c70SJohn Marino 
2664a238c70SJohn Marino           {
2674a238c70SJohn Marino             MPFR_BLOCK_DECL (flags);
2684a238c70SJohn Marino 
2694a238c70SJohn Marino             MPFR_BLOCK (flags, inexact = mpfr_sub (s, u, new_z, rnd_mode));
2704a238c70SJohn Marino             MPFR_GROUP_CLEAR (group);
2714a238c70SJohn Marino             if (scale != 0)
2724a238c70SJohn Marino               {
2734a238c70SJohn Marino                 int inex2;
2744a238c70SJohn Marino 
2754a238c70SJohn Marino                 mpfr_clear (scaled_z);
2764a238c70SJohn Marino                 /* Here an overflow is theoretically possible, in which case
2774a238c70SJohn Marino                    the result may be wrong, hence the assert. An underflow
2784a238c70SJohn Marino                    is not possible, but let's check that anyway. */
2794a238c70SJohn Marino                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
2804a238c70SJohn Marino                 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
2814a238c70SJohn Marino                 inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
2824a238c70SJohn Marino                 /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
2834a238c70SJohn Marino                    Also, handle the double rounding case:
2844a238c70SJohn Marino                    s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
2854a238c70SJohn Marino                 if (inex2)  /* underflow */
2864a238c70SJohn Marino                   inexact = inex2;
2874a238c70SJohn Marino               }
2884a238c70SJohn Marino           }
2894a238c70SJohn Marino 
2904a238c70SJohn Marino           /* FIXME/TODO: I'm not sure that the following is correct.
2914a238c70SJohn Marino              Check for possible spurious exceptions due to intermediate
2924a238c70SJohn Marino              computations. */
2934a238c70SJohn Marino           MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
2944a238c70SJohn Marino           goto end;
2954a238c70SJohn Marino         }
2964a238c70SJohn Marino     }
2974a238c70SJohn Marino 
2984a238c70SJohn Marino   inexact = mpfr_sub (s, u, z, rnd_mode);
2994a238c70SJohn Marino   MPFR_GROUP_CLEAR (group);
3004a238c70SJohn Marino   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
3014a238c70SJohn Marino  end:
3024a238c70SJohn Marino   MPFR_SAVE_EXPO_FREE (expo);
3034a238c70SJohn Marino   return mpfr_check_range (s, inexact, rnd_mode);
3044a238c70SJohn Marino }
305