mpfr/src/agm.c

4a238c70SJohn Marino/* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers
4a238c70SJohn Marino
*ab6d115fSJohn MarinoCopyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
*ab6d115fSJohn MarinoContributed by the AriC and Caramel projects, INRIA.
4a238c70SJohn Marino
4a238c70SJohn MarinoThis file is part of the GNU MPFR Library.
4a238c70SJohn Marino
4a238c70SJohn MarinoThe GNU MPFR Library is free software; you can redistribute it and/or modify
4a238c70SJohn Marinoit under the terms of the GNU Lesser General Public License as published by
4a238c70SJohn Marinothe Free Software Foundation; either version 3 of the License, or (at your
4a238c70SJohn Marinooption) any later version.
4a238c70SJohn Marino
4a238c70SJohn MarinoThe GNU MPFR Library is distributed in the hope that it will be useful, but
4a238c70SJohn MarinoWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
4a238c70SJohn Marinoor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
4a238c70SJohn MarinoLicense for more details.
4a238c70SJohn Marino
4a238c70SJohn MarinoYou should have received a copy of the GNU Lesser General Public License
4a238c70SJohn Marinoalong with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
4a238c70SJohn Marinohttp://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
4a238c70SJohn Marino51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
4a238c70SJohn Marino
4a238c70SJohn Marino#define MPFR_NEED_LONGLONG_H
4a238c70SJohn Marino#include "mpfr-impl.h"
4a238c70SJohn Marino
4a238c70SJohn Marino/* agm(x,y) is between x and y, so we don't need to save exponent range */
4a238c70SJohn Marinoint
4a238c70SJohn Marinompfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode)
4a238c70SJohn Marino{
4a238c70SJohn Marino  int compare, inexact;
4a238c70SJohn Marino  mp_size_t s;
4a238c70SJohn Marino  mpfr_prec_t p, q;
4a238c70SJohn Marino  mp_limb_t *up, *vp, *ufp, *vfp;
4a238c70SJohn Marino  mpfr_t u, v, uf, vf, sc1, sc2;
4a238c70SJohn Marino  mpfr_exp_t scaleop = 0, scaleit;
4a238c70SJohn Marino  unsigned long n; /* number of iterations */
4a238c70SJohn Marino  MPFR_ZIV_DECL (loop);
4a238c70SJohn Marino  MPFR_TMP_DECL(marker);
4a238c70SJohn Marino  MPFR_SAVE_EXPO_DECL (expo);
4a238c70SJohn Marino
4a238c70SJohn Marino  MPFR_LOG_FUNC
4a238c70SJohn Marino    (("op2[%Pu]=%.*Rg op1[%Pu]=%.*Rg rnd=%d",
4a238c70SJohn Marino      mpfr_get_prec (op2), mpfr_log_prec, op2,
4a238c70SJohn Marino      mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode),
4a238c70SJohn Marino     ("r[%Pu]=%.*Rg inexact=%d",
4a238c70SJohn Marino      mpfr_get_prec (r), mpfr_log_prec, r, inexact));
4a238c70SJohn Marino
4a238c70SJohn Marino  /* Deal with special values */
4a238c70SJohn Marino  if (MPFR_ARE_SINGULAR (op1, op2))
4a238c70SJohn Marino    {
4a238c70SJohn Marino      /* If a or b is NaN, the result is NaN */
4a238c70SJohn Marino      if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2))
4a238c70SJohn Marino        {
4a238c70SJohn Marino          MPFR_SET_NAN(r);
4a238c70SJohn Marino          MPFR_RET_NAN;
4a238c70SJohn Marino        }
4a238c70SJohn Marino      /* now one of a or b is Inf or 0 */
4a238c70SJohn Marino      /* If a and b is +Inf, the result is +Inf.
4a238c70SJohn Marino         Otherwise if a or b is -Inf or 0, the result is NaN */
4a238c70SJohn Marino      else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2))
4a238c70SJohn Marino        {
4a238c70SJohn Marino          if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2))
4a238c70SJohn Marino            {
4a238c70SJohn Marino              MPFR_SET_INF(r);
4a238c70SJohn Marino              MPFR_SET_SAME_SIGN(r, op1);
4a238c70SJohn Marino              MPFR_RET(0); /* exact */
4a238c70SJohn Marino            }
4a238c70SJohn Marino          else
4a238c70SJohn Marino            {
4a238c70SJohn Marino              MPFR_SET_NAN(r);
4a238c70SJohn Marino              MPFR_RET_NAN;
4a238c70SJohn Marino            }
4a238c70SJohn Marino        }
4a238c70SJohn Marino      else /* a and b are neither NaN nor Inf, and one is zero */
4a238c70SJohn Marino        {  /* If a or b is 0, the result is +0 since a sqrt is positive */
4a238c70SJohn Marino          MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2));
4a238c70SJohn Marino          MPFR_SET_POS (r);
4a238c70SJohn Marino          MPFR_SET_ZERO (r);
4a238c70SJohn Marino          MPFR_RET (0); /* exact */
4a238c70SJohn Marino        }
4a238c70SJohn Marino    }
4a238c70SJohn Marino
4a238c70SJohn Marino  /* If a or b is negative (excluding -Infinity), the result is NaN */
4a238c70SJohn Marino  if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2)))
4a238c70SJohn Marino    {
4a238c70SJohn Marino      MPFR_SET_NAN(r);
4a238c70SJohn Marino      MPFR_RET_NAN;
4a238c70SJohn Marino    }
4a238c70SJohn Marino
4a238c70SJohn Marino  /* Precision of the following calculus */
4a238c70SJohn Marino  q = MPFR_PREC(r);
4a238c70SJohn Marino  p = q + MPFR_INT_CEIL_LOG2(q) + 15;
4a238c70SJohn Marino  MPFR_ASSERTD (p >= 7); /* see algorithms.tex */
*ab6d115fSJohn Marino  s = MPFR_PREC2LIMBS (p);
4a238c70SJohn Marino
4a238c70SJohn Marino  /* b (op2) and a (op1) are the 2 operands but we want b >= a */
4a238c70SJohn Marino  compare = mpfr_cmp (op1, op2);
4a238c70SJohn Marino  if (MPFR_UNLIKELY( compare == 0 ))
4a238c70SJohn Marino    {
4a238c70SJohn Marino      mpfr_set (r, op1, rnd_mode);
4a238c70SJohn Marino      MPFR_RET (0); /* exact */
4a238c70SJohn Marino    }
4a238c70SJohn Marino  else if (compare > 0)
4a238c70SJohn Marino    {
4a238c70SJohn Marino      mpfr_srcptr t = op1;
4a238c70SJohn Marino      op1 = op2;
4a238c70SJohn Marino      op2 = t;
4a238c70SJohn Marino    }
4a238c70SJohn Marino
4a238c70SJohn Marino  /* Now b (=op2) > a (=op1) */
4a238c70SJohn Marino
4a238c70SJohn Marino  MPFR_SAVE_EXPO_MARK (expo);
4a238c70SJohn Marino
4a238c70SJohn Marino  MPFR_TMP_MARK(marker);
4a238c70SJohn Marino
4a238c70SJohn Marino  /* Main loop */
4a238c70SJohn Marino  MPFR_ZIV_INIT (loop, p);
4a238c70SJohn Marino  for (;;)
4a238c70SJohn Marino    {
4a238c70SJohn Marino      mpfr_prec_t eq;
4a238c70SJohn Marino      unsigned long err = 0;  /* must be set to 0 at each Ziv iteration */
4a238c70SJohn Marino      MPFR_BLOCK_DECL (flags);
4a238c70SJohn Marino
4a238c70SJohn Marino      /* Init temporary vars */
4a238c70SJohn Marino      MPFR_TMP_INIT (up, u, p, s);
4a238c70SJohn Marino      MPFR_TMP_INIT (vp, v, p, s);
4a238c70SJohn Marino      MPFR_TMP_INIT (ufp, uf, p, s);
4a238c70SJohn Marino      MPFR_TMP_INIT (vfp, vf, p, s);
4a238c70SJohn Marino
4a238c70SJohn Marino      /* Calculus of un and vn */
4a238c70SJohn Marino    retry:
4a238c70SJohn Marino      MPFR_BLOCK (flags,
4a238c70SJohn Marino                  mpfr_mul (u, op1, op2, MPFR_RNDN);
4a238c70SJohn Marino                  /* mpfr_mul(...): faster since PREC(op) < PREC(u) */
4a238c70SJohn Marino                  mpfr_add (v, op1, op2, MPFR_RNDN);
4a238c70SJohn Marino                  /* mpfr_add with !=prec is still good */);
4a238c70SJohn Marino      if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)))
4a238c70SJohn Marino        {
4a238c70SJohn Marino          mpfr_exp_t e1 , e2;
4a238c70SJohn Marino
4a238c70SJohn Marino          MPFR_ASSERTN (scaleop == 0);
4a238c70SJohn Marino          e1 = MPFR_GET_EXP (op1);
4a238c70SJohn Marino          e2 = MPFR_GET_EXP (op2);
4a238c70SJohn Marino
4a238c70SJohn Marino          /* Let's determine scaleop to avoid an overflow/underflow. */
4a238c70SJohn Marino          if (MPFR_OVERFLOW (flags))
4a238c70SJohn Marino            {
4a238c70SJohn Marino              /* Let's recall that emin <= e1 <= e2 <= emax.
4a238c70SJohn Marino                 There has been an overflow. Thus e2 >= emax/2.
4a238c70SJohn Marino                 If the mpfr_mul overflowed, then e1 + e2 > emax.
4a238c70SJohn Marino                 If the mpfr_add overflowed, then e2 = emax.
4a238c70SJohn Marino                 We want: (e1 + scale) + (e2 + scale) <= emax,
4a238c70SJohn Marino                 i.e. scale <= (emax - e1 - e2) / 2. Let's take
4a238c70SJohn Marino                 scale = min(floor((emax - e1 - e2) / 2), -1).
4a238c70SJohn Marino                 This is OK, as:
4a238c70SJohn Marino                 1. emin <= scale <= -1.
4a238c70SJohn Marino                 2. e1 + scale >= emin. Indeed:
4a238c70SJohn Marino                    * If e1 + e2 > emax, then
4a238c70SJohn Marino                      e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1
4a238c70SJohn Marino                                 >= (emax + e1 - emax) / 2 - 1
4a238c70SJohn Marino                                 >= e1 / 2 - 1 >= emin.
4a238c70SJohn Marino                    * Otherwise, mpfr_mul didn't overflow, therefore
4a238c70SJohn Marino                      mpfr_add overflowed and e2 = emax, so that
4a238c70SJohn Marino                      e1 > emin (see restriction below).
4a238c70SJohn Marino                      e1 + scale > emin - 1, thus e1 + scale >= emin.
4a238c70SJohn Marino                 3. e2 + scale <= emax, since scale < 0. */
4a238c70SJohn Marino              if (e1 + e2 > MPFR_EXT_EMAX)
4a238c70SJohn Marino                {
4a238c70SJohn Marino                  scaleop = - (((e1 + e2) - MPFR_EXT_EMAX + 1) / 2);
4a238c70SJohn Marino                  MPFR_ASSERTN (scaleop < 0);
4a238c70SJohn Marino                }
4a238c70SJohn Marino              else
4a238c70SJohn Marino                {
4a238c70SJohn Marino                  /* The addition necessarily overflowed. */
4a238c70SJohn Marino                  MPFR_ASSERTN (e2 == MPFR_EXT_EMAX);
4a238c70SJohn Marino                  /* The case where e1 = emin and e2 = emax is not supported
4a238c70SJohn Marino                     here. This would mean that the precision of e2 would be
4a238c70SJohn Marino                     huge (and possibly not supported in practice anyway). */
4a238c70SJohn Marino                  MPFR_ASSERTN (e1 > MPFR_EXT_EMIN);
4a238c70SJohn Marino                  scaleop = -1;
4a238c70SJohn Marino                }
4a238c70SJohn Marino
4a238c70SJohn Marino            }
4a238c70SJohn Marino          else  /* underflow only (in the multiplication) */
4a238c70SJohn Marino            {
4a238c70SJohn Marino              /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0).
4a238c70SJohn Marino                 We want: (e1 + scale) + (e2 + scale) >= emin + 1,
4a238c70SJohn Marino                 i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take
4a238c70SJohn Marino                 scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as:
4a238c70SJohn Marino                 1. 1 <= scale <= emax.
4a238c70SJohn Marino                 2. e1 + scale >= emin + 1 >= emin.
4a238c70SJohn Marino                 3. e2 + scale <= scale <= emax. */
4a238c70SJohn Marino              MPFR_ASSERTN (e1 <= e2 && e2 <= 0);
4a238c70SJohn Marino              scaleop = (MPFR_EXT_EMIN + 2 - e1 - e2) / 2;
4a238c70SJohn Marino              MPFR_ASSERTN (scaleop > 0);
4a238c70SJohn Marino            }
4a238c70SJohn Marino
4a238c70SJohn Marino          MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop);
4a238c70SJohn Marino          MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop);
4a238c70SJohn Marino          op1 = sc1;
4a238c70SJohn Marino          op2 = sc2;
4a238c70SJohn Marino          MPFR_LOG_MSG (("Exception in pre-iteration, scale = %"
4a238c70SJohn Marino                         MPFR_EXP_FSPEC "d\n", scaleop));
4a238c70SJohn Marino          goto retry;
4a238c70SJohn Marino        }
4a238c70SJohn Marino
4a238c70SJohn Marino      mpfr_clear_flags ();
4a238c70SJohn Marino      mpfr_sqrt (u, u, MPFR_RNDN);
4a238c70SJohn Marino      mpfr_div_2ui (v, v, 1, MPFR_RNDN);
4a238c70SJohn Marino
4a238c70SJohn Marino      scaleit = 0;
4a238c70SJohn Marino      n = 1;
4a238c70SJohn Marino      while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2)
4a238c70SJohn Marino        {
4a238c70SJohn Marino          MPFR_BLOCK_DECL (flags2);
4a238c70SJohn Marino
4a238c70SJohn Marino          MPFR_LOG_MSG (("Iteration n = %lu\n", n));
4a238c70SJohn Marino
4a238c70SJohn Marino        retry2:
4a238c70SJohn Marino          mpfr_add (vf, u, v, MPFR_RNDN);  /* No overflow? */
4a238c70SJohn Marino          mpfr_div_2ui (vf, vf, 1, MPFR_RNDN);
4a238c70SJohn Marino          /* See proof in algorithms.tex */
4a238c70SJohn Marino          if (4*eq > p)
4a238c70SJohn Marino            {
4a238c70SJohn Marino              mpfr_t w;
4a238c70SJohn Marino              MPFR_BLOCK_DECL (flags3);
4a238c70SJohn Marino
4a238c70SJohn Marino              MPFR_LOG_MSG (("4*eq > p\n", 0));
4a238c70SJohn Marino
4a238c70SJohn Marino              /* vf = V(k) */
4a238c70SJohn Marino              mpfr_init2 (w, (p + 1) / 2);
4a238c70SJohn Marino              MPFR_BLOCK
4a238c70SJohn Marino                (flags3,
4a238c70SJohn Marino                 mpfr_sub (w, v, u, MPFR_RNDN);       /* e = V(k-1)-U(k-1) */
4a238c70SJohn Marino                 mpfr_sqr (w, w, MPFR_RNDN);          /* e = e^2 */
4a238c70SJohn Marino                 mpfr_div_2ui (w, w, 4, MPFR_RNDN);   /* e*= (1/2)^2*1/4  */
4a238c70SJohn Marino                 mpfr_div (w, w, vf, MPFR_RNDN);      /* 1/4*e^2/V(k) */
4a238c70SJohn Marino                 );
4a238c70SJohn Marino              if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3)))
4a238c70SJohn Marino                {
4a238c70SJohn Marino                  mpfr_sub (v, vf, w, MPFR_RNDN);
4a238c70SJohn Marino                  err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */
4a238c70SJohn Marino                  mpfr_clear (w);
4a238c70SJohn Marino                  break;
4a238c70SJohn Marino                }
4a238c70SJohn Marino              /* There has been an underflow because of the cancellation
4a238c70SJohn Marino                 between V(k-1) and U(k-1). Let's use the conventional
4a238c70SJohn Marino                 method. */
4a238c70SJohn Marino              MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0));
4a238c70SJohn Marino              mpfr_clear (w);
4a238c70SJohn Marino              mpfr_clear_underflow ();
4a238c70SJohn Marino            }
4a238c70SJohn Marino          /* U(k) increases, so that U.V can overflow (but not underflow). */
4a238c70SJohn Marino          MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN););
4a238c70SJohn Marino          if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2)))
4a238c70SJohn Marino            {
4a238c70SJohn Marino              mpfr_exp_t scale2;
4a238c70SJohn Marino
4a238c70SJohn Marino              scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v))
4a238c70SJohn Marino                           - MPFR_EXT_EMAX + 1) / 2);
4a238c70SJohn Marino              MPFR_EXP (u) += scale2;
4a238c70SJohn Marino              MPFR_EXP (v) += scale2;
4a238c70SJohn Marino              scaleit += scale2;
4a238c70SJohn Marino              MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %"
4a238c70SJohn Marino                             MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n",
4a238c70SJohn Marino                             n, scaleit, scale2));
4a238c70SJohn Marino              mpfr_clear_overflow ();
4a238c70SJohn Marino              goto retry2;
4a238c70SJohn Marino            }
4a238c70SJohn Marino          mpfr_sqrt (u, uf, MPFR_RNDN);
4a238c70SJohn Marino          mpfr_swap (v, vf);
4a238c70SJohn Marino          n ++;
4a238c70SJohn Marino        }
4a238c70SJohn Marino
4a238c70SJohn Marino      MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n));
4a238c70SJohn Marino
4a238c70SJohn Marino      /* the error on v is bounded by (18n+51) ulps, or twice if there
4a238c70SJohn Marino         was an exponent loss in the final subtraction */
4a238c70SJohn Marino      err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow
4a238c70SJohn Marino                                                 since n is about log(p) */
4a238c70SJohn Marino      /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */
4a238c70SJohn Marino      if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 &&
4a238c70SJohn Marino                       MPFR_CAN_ROUND (v, p - err, q, rnd_mode)))
4a238c70SJohn Marino        break; /* Stop the loop */
4a238c70SJohn Marino
4a238c70SJohn Marino      /* Next iteration */
4a238c70SJohn Marino      MPFR_ZIV_NEXT (loop, p);
*ab6d115fSJohn Marino      s = MPFR_PREC2LIMBS (p);
4a238c70SJohn Marino    }
4a238c70SJohn Marino  MPFR_ZIV_FREE (loop);
4a238c70SJohn Marino
4a238c70SJohn Marino  if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT))
4a238c70SJohn Marino                     != 0))
4a238c70SJohn Marino    {
4a238c70SJohn Marino      MPFR_ASSERTN (! mpfr_overflow_p ());   /* since mpfr_clear_flags */
4a238c70SJohn Marino      MPFR_ASSERTN (! mpfr_underflow_p ());  /* since mpfr_clear_flags */
4a238c70SJohn Marino      MPFR_ASSERTN (! mpfr_divby0_p ());     /* since mpfr_clear_flags */
4a238c70SJohn Marino      MPFR_ASSERTN (! mpfr_nanflag_p ());    /* since mpfr_clear_flags */
4a238c70SJohn Marino    }
4a238c70SJohn Marino
4a238c70SJohn Marino  /* Setting of the result */
4a238c70SJohn Marino  inexact = mpfr_set (r, v, rnd_mode);
4a238c70SJohn Marino  MPFR_EXP (r) -= scaleop + scaleit;
4a238c70SJohn Marino
4a238c70SJohn Marino  /* Let's clean */
4a238c70SJohn Marino  MPFR_TMP_FREE(marker);
4a238c70SJohn Marino
4a238c70SJohn Marino  MPFR_SAVE_EXPO_FREE (expo);
4a238c70SJohn Marino  /* From the definition of the AGM, underflow and overflow
4a238c70SJohn Marino     are not possible. */
4a238c70SJohn Marino  return mpfr_check_range (r, inexact, rnd_mode);
4a238c70SJohn Marino  /* agm(u,v) can be exact for u, v rational only for u=v.
4a238c70SJohn Marino     Proof (due to Nicolas Brisebarre): it suffices to consider
4a238c70SJohn Marino     u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2),
4a238c70SJohn Marino     and a theorem due to G.V. Chudnovsky states that for x a
4a238c70SJohn Marino     non-zero algebraic number with |x|<1, then
4a238c70SJohn Marino     2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically
4a238c70SJohn Marino     independent over Q. */
4a238c70SJohn Marino}