mpfr/src/fma.c

4a238c70SJohn Marino/* mpfr_fma -- Floating multiply-add
4a238c70SJohn Marino
*ab6d115fSJohn MarinoCopyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
*ab6d115fSJohn MarinoContributed by the AriC and Caramel projects, INRIA.
4a238c70SJohn Marino
4a238c70SJohn MarinoThis file is part of the GNU MPFR Library.
4a238c70SJohn Marino
4a238c70SJohn MarinoThe GNU MPFR Library is free software; you can redistribute it and/or modify
4a238c70SJohn Marinoit under the terms of the GNU Lesser General Public License as published by
4a238c70SJohn Marinothe Free Software Foundation; either version 3 of the License, or (at your
4a238c70SJohn Marinooption) any later version.
4a238c70SJohn Marino
4a238c70SJohn MarinoThe GNU MPFR Library is distributed in the hope that it will be useful, but
4a238c70SJohn MarinoWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
4a238c70SJohn Marinoor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
4a238c70SJohn MarinoLicense for more details.
4a238c70SJohn Marino
4a238c70SJohn MarinoYou should have received a copy of the GNU Lesser General Public License
4a238c70SJohn Marinoalong with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
4a238c70SJohn Marinohttp://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
4a238c70SJohn Marino51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
4a238c70SJohn Marino
4a238c70SJohn Marino#include "mpfr-impl.h"
4a238c70SJohn Marino
4a238c70SJohn Marino/* The fused-multiply-add (fma) of x, y and z is defined by:
4a238c70SJohn Marino   fma(x,y,z)= x*y + z
4a238c70SJohn Marino*/
4a238c70SJohn Marino
4a238c70SJohn Marinoint
4a238c70SJohn Marinompfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
4a238c70SJohn Marino          mpfr_rnd_t rnd_mode)
4a238c70SJohn Marino{
4a238c70SJohn Marino  int inexact;
4a238c70SJohn Marino  mpfr_t u;
4a238c70SJohn Marino  MPFR_SAVE_EXPO_DECL (expo);
4a238c70SJohn Marino  MPFR_GROUP_DECL(group);
4a238c70SJohn Marino
4a238c70SJohn Marino  MPFR_LOG_FUNC
4a238c70SJohn Marino    (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg  z[%Pu]=%.*Rg rnd=%d",
4a238c70SJohn Marino      mpfr_get_prec (x), mpfr_log_prec, x,
4a238c70SJohn Marino      mpfr_get_prec (y), mpfr_log_prec, y,
4a238c70SJohn Marino      mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
4a238c70SJohn Marino     ("s[%Pu]=%.*Rg inexact=%d",
4a238c70SJohn Marino      mpfr_get_prec (s), mpfr_log_prec, s, inexact));
4a238c70SJohn Marino
4a238c70SJohn Marino  /* particular cases */
4a238c70SJohn Marino  if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
4a238c70SJohn Marino                     MPFR_IS_SINGULAR(y) ||
4a238c70SJohn Marino                     MPFR_IS_SINGULAR(z) ))
4a238c70SJohn Marino    {
4a238c70SJohn Marino      if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
4a238c70SJohn Marino        {
4a238c70SJohn Marino          MPFR_SET_NAN(s);
4a238c70SJohn Marino          MPFR_RET_NAN;
4a238c70SJohn Marino        }
4a238c70SJohn Marino      /* now neither x, y or z is NaN */
4a238c70SJohn Marino      else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
4a238c70SJohn Marino        {
4a238c70SJohn Marino          /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
4a238c70SJohn Marino          if ((MPFR_IS_ZERO(y)) ||
4a238c70SJohn Marino              (MPFR_IS_ZERO(x)) ||
4a238c70SJohn Marino              (MPFR_IS_INF(z) &&
4a238c70SJohn Marino               ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
4a238c70SJohn Marino            {
4a238c70SJohn Marino              MPFR_SET_NAN(s);
4a238c70SJohn Marino              MPFR_RET_NAN;
4a238c70SJohn Marino            }
4a238c70SJohn Marino          else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
4a238c70SJohn Marino            {
4a238c70SJohn Marino              MPFR_SET_INF(s);
4a238c70SJohn Marino              MPFR_SET_SAME_SIGN(s, z);
4a238c70SJohn Marino              MPFR_RET(0);
4a238c70SJohn Marino            }
4a238c70SJohn Marino          else /* z is finite */
4a238c70SJohn Marino            {
4a238c70SJohn Marino              MPFR_SET_INF(s);
4a238c70SJohn Marino              MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
4a238c70SJohn Marino              MPFR_RET(0);
4a238c70SJohn Marino            }
4a238c70SJohn Marino        }
4a238c70SJohn Marino      /* now x and y are finite */
4a238c70SJohn Marino      else if (MPFR_IS_INF(z))
4a238c70SJohn Marino        {
4a238c70SJohn Marino          MPFR_SET_INF(s);
4a238c70SJohn Marino          MPFR_SET_SAME_SIGN(s, z);
4a238c70SJohn Marino          MPFR_RET(0);
4a238c70SJohn Marino        }
4a238c70SJohn Marino      else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
4a238c70SJohn Marino        {
4a238c70SJohn Marino          if (MPFR_IS_ZERO(z))
4a238c70SJohn Marino            {
4a238c70SJohn Marino              int sign_p;
4a238c70SJohn Marino              sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
4a238c70SJohn Marino              MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
4a238c70SJohn Marino                               ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
4a238c70SJohn Marino                                ? -1 : 1) :
4a238c70SJohn Marino                               ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
4a238c70SJohn Marino                                ? 1 : -1)));
4a238c70SJohn Marino              MPFR_SET_ZERO(s);
4a238c70SJohn Marino              MPFR_RET(0);
4a238c70SJohn Marino            }
4a238c70SJohn Marino          else
4a238c70SJohn Marino            return mpfr_set (s, z, rnd_mode);
4a238c70SJohn Marino        }
4a238c70SJohn Marino      else /* necessarily z is zero here */
4a238c70SJohn Marino        {
4a238c70SJohn Marino          MPFR_ASSERTD(MPFR_IS_ZERO(z));
4a238c70SJohn Marino          return mpfr_mul (s, x, y, rnd_mode);
4a238c70SJohn Marino        }
4a238c70SJohn Marino    }
4a238c70SJohn Marino
4a238c70SJohn Marino  /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
4a238c70SJohn Marino     is exact, except in case of overflow or underflow. */
4a238c70SJohn Marino  MPFR_SAVE_EXPO_MARK (expo);
4a238c70SJohn Marino  MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
4a238c70SJohn Marino
4a238c70SJohn Marino  if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
4a238c70SJohn Marino    {
4a238c70SJohn Marino      /* overflow or underflow - this case is regarded as rare, thus
4a238c70SJohn Marino         does not need to be very efficient (even if some tests below
4a238c70SJohn Marino         could have been done earlier).
4a238c70SJohn Marino         It is an overflow iff u is an infinity (since MPFR_RNDN was used).
4a238c70SJohn Marino         Alternatively, we could test the overflow flag, but in this case,
4a238c70SJohn Marino         mpfr_clear_flags would have been necessary. */
4a238c70SJohn Marino      if (MPFR_IS_INF (u))  /* overflow */
4a238c70SJohn Marino        {
4a238c70SJohn Marino          /* Let's eliminate the obvious case where x*y and z have the
4a238c70SJohn Marino             same sign. No possible cancellation -> real overflow.
4a238c70SJohn Marino             Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
4a238c70SJohn Marino             then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
4a238c70SJohn Marino             is also an overflow. */
4a238c70SJohn Marino          if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
4a238c70SJohn Marino              MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
4a238c70SJohn Marino            {
4a238c70SJohn Marino              MPFR_GROUP_CLEAR (group);
4a238c70SJohn Marino              MPFR_SAVE_EXPO_FREE (expo);
4a238c70SJohn Marino              return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
4a238c70SJohn Marino            }
4a238c70SJohn Marino
4a238c70SJohn Marino          /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
4a238c70SJohn Marino             (x/4)*y does not overflow (let's recall that the result
4a238c70SJohn Marino             is exact with an unbounded exponent range). It does not
4a238c70SJohn Marino             underflow either, because x*y overflows and the exponent
4a238c70SJohn Marino             range is large enough. */
4a238c70SJohn Marino          inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
4a238c70SJohn Marino          MPFR_ASSERTN (inexact == 0);
4a238c70SJohn Marino          inexact = mpfr_mul (u, u, y, MPFR_RNDN);
4a238c70SJohn Marino          MPFR_ASSERTN (inexact == 0);
4a238c70SJohn Marino
4a238c70SJohn Marino          /* Now, we need to add z/4... But it may underflow! */
4a238c70SJohn Marino          {
4a238c70SJohn Marino            mpfr_t zo4;
4a238c70SJohn Marino            mpfr_srcptr zz;
4a238c70SJohn Marino            MPFR_BLOCK_DECL (flags);
4a238c70SJohn Marino
4a238c70SJohn Marino            if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
4a238c70SJohn Marino                MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
4a238c70SJohn Marino              {
4a238c70SJohn Marino                /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
4a238c70SJohn Marino                zz = z;
4a238c70SJohn Marino              }
4a238c70SJohn Marino            else
4a238c70SJohn Marino              {
4a238c70SJohn Marino                mpfr_init2 (zo4, MPFR_PREC (z));
4a238c70SJohn Marino                if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
4a238c70SJohn Marino                  {
4a238c70SJohn Marino                    /* The division by 4 underflowed! */
4a238c70SJohn Marino                    MPFR_ASSERTN (0); /* TODO... */
4a238c70SJohn Marino                  }
4a238c70SJohn Marino                zz = zo4;
4a238c70SJohn Marino              }
4a238c70SJohn Marino
4a238c70SJohn Marino            /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
4a238c70SJohn Marino               following addition would give the same result). */
4a238c70SJohn Marino            MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
4a238c70SJohn Marino            /* u and zz have different signs, so that an overflow
4a238c70SJohn Marino               is not possible. But an underflow is theoretically
4a238c70SJohn Marino               possible! */
4a238c70SJohn Marino            if (MPFR_UNDERFLOW (flags))
4a238c70SJohn Marino              {
4a238c70SJohn Marino                MPFR_ASSERTN (zz != z);
4a238c70SJohn Marino                MPFR_ASSERTN (0); /* TODO... */
4a238c70SJohn Marino                mpfr_clears (zo4, u, (mpfr_ptr) 0);
4a238c70SJohn Marino              }
4a238c70SJohn Marino            else
4a238c70SJohn Marino              {
4a238c70SJohn Marino                int inex2;
4a238c70SJohn Marino
4a238c70SJohn Marino                if (zz != z)
4a238c70SJohn Marino                  mpfr_clear (zo4);
4a238c70SJohn Marino                MPFR_GROUP_CLEAR (group);
4a238c70SJohn Marino                MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
4a238c70SJohn Marino                inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
4a238c70SJohn Marino                if (inex2)  /* overflow */
4a238c70SJohn Marino                  {
4a238c70SJohn Marino                    inexact = inex2;
4a238c70SJohn Marino                    MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
4a238c70SJohn Marino                  }
4a238c70SJohn Marino                goto end;
4a238c70SJohn Marino              }
4a238c70SJohn Marino          }
4a238c70SJohn Marino        }
4a238c70SJohn Marino      else  /* underflow: one has |xy| < 2^(emin-1). */
4a238c70SJohn Marino        {
4a238c70SJohn Marino          unsigned long scale = 0;
4a238c70SJohn Marino          mpfr_t scaled_z;
4a238c70SJohn Marino          mpfr_srcptr new_z;
4a238c70SJohn Marino          mpfr_exp_t diffexp;
4a238c70SJohn Marino          mpfr_prec_t pzs;
4a238c70SJohn Marino          int xy_underflows;
4a238c70SJohn Marino
4a238c70SJohn Marino          /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
4a238c70SJohn Marino             (the + 1 on MPFR_PREC (s) is necessary because the exponent
4a238c70SJohn Marino             of the result can be EXP(z) - 1). */
4a238c70SJohn Marino          diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
4a238c70SJohn Marino          pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
4a238c70SJohn Marino          if (diffexp <= pzs)
4a238c70SJohn Marino            {
4a238c70SJohn Marino              mpfr_uexp_t uscale;
4a238c70SJohn Marino              mpfr_t scaled_v;
4a238c70SJohn Marino              MPFR_BLOCK_DECL (flags);
4a238c70SJohn Marino
4a238c70SJohn Marino              uscale = (mpfr_uexp_t) pzs - diffexp + 1;
4a238c70SJohn Marino              MPFR_ASSERTN (uscale > 0);
4a238c70SJohn Marino              MPFR_ASSERTN (uscale <= ULONG_MAX);
4a238c70SJohn Marino              scale = uscale;
4a238c70SJohn Marino              mpfr_init2 (scaled_z, MPFR_PREC (z));
4a238c70SJohn Marino              inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
4a238c70SJohn Marino              MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
4a238c70SJohn Marino              new_z = scaled_z;
4a238c70SJohn Marino              /* Now we need to recompute u = xy * 2^scale. */
4a238c70SJohn Marino              MPFR_BLOCK (flags,
4a238c70SJohn Marino                          if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
4a238c70SJohn Marino                            {
4a238c70SJohn Marino                              mpfr_init2 (scaled_v, MPFR_PREC (x));
4a238c70SJohn Marino                              mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
4a238c70SJohn Marino                              mpfr_mul (u, scaled_v, y, MPFR_RNDN);
4a238c70SJohn Marino                            }
4a238c70SJohn Marino                          else
4a238c70SJohn Marino                            {
4a238c70SJohn Marino                              mpfr_init2 (scaled_v, MPFR_PREC (y));
4a238c70SJohn Marino                              mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
4a238c70SJohn Marino                              mpfr_mul (u, x, scaled_v, MPFR_RNDN);
4a238c70SJohn Marino                            });
4a238c70SJohn Marino              mpfr_clear (scaled_v);
4a238c70SJohn Marino              MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
4a238c70SJohn Marino              xy_underflows = MPFR_UNDERFLOW (flags);
4a238c70SJohn Marino            }
4a238c70SJohn Marino          else
4a238c70SJohn Marino            {
4a238c70SJohn Marino              new_z = z;
4a238c70SJohn Marino              xy_underflows = 1;
4a238c70SJohn Marino            }
4a238c70SJohn Marino
4a238c70SJohn Marino          if (xy_underflows)
4a238c70SJohn Marino            {
4a238c70SJohn Marino              /* Let's replace xy by sign(xy) * 2^(emin-1). */
4a238c70SJohn Marino              MPFR_PREC (u) = MPFR_PREC_MIN;
4a238c70SJohn Marino              mpfr_setmin (u, __gmpfr_emin);
4a238c70SJohn Marino              MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
4a238c70SJohn Marino                                                MPFR_SIGN (y)));
4a238c70SJohn Marino            }
4a238c70SJohn Marino
4a238c70SJohn Marino          {
4a238c70SJohn Marino            MPFR_BLOCK_DECL (flags);
4a238c70SJohn Marino
4a238c70SJohn Marino            MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
4a238c70SJohn Marino            MPFR_GROUP_CLEAR (group);
4a238c70SJohn Marino            if (scale != 0)
4a238c70SJohn Marino              {
4a238c70SJohn Marino                int inex2;
4a238c70SJohn Marino
4a238c70SJohn Marino                mpfr_clear (scaled_z);
4a238c70SJohn Marino                /* Here an overflow is theoretically possible, in which case
4a238c70SJohn Marino                   the result may be wrong, hence the assert. An underflow
4a238c70SJohn Marino                   is not possible, but let's check that anyway. */
4a238c70SJohn Marino                MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
4a238c70SJohn Marino                MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
4a238c70SJohn Marino                inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
4a238c70SJohn Marino                /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
4a238c70SJohn Marino                   Also, handle the double rounding case:
4a238c70SJohn Marino                   s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
4a238c70SJohn Marino                if (inex2)  /* underflow */
4a238c70SJohn Marino                  inexact = inex2;
4a238c70SJohn Marino              }
4a238c70SJohn Marino          }
4a238c70SJohn Marino
4a238c70SJohn Marino          /* FIXME/TODO: I'm not sure that the following is correct.
4a238c70SJohn Marino             Check for possible spurious exceptions due to intermediate
4a238c70SJohn Marino             computations. */
4a238c70SJohn Marino          MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
4a238c70SJohn Marino          goto end;
4a238c70SJohn Marino        }
4a238c70SJohn Marino    }
4a238c70SJohn Marino
4a238c70SJohn Marino  inexact = mpfr_add (s, u, z, rnd_mode);
4a238c70SJohn Marino  MPFR_GROUP_CLEAR (group);
4a238c70SJohn Marino  MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
4a238c70SJohn Marino end:
4a238c70SJohn Marino  MPFR_SAVE_EXPO_FREE (expo);
4a238c70SJohn Marino  return mpfr_check_range (s, inexact, rnd_mode);
4a238c70SJohn Marino}