14a238c70SJohn Marino /* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers
24a238c70SJohn Marino
3*ab6d115fSJohn Marino Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4*ab6d115fSJohn Marino Contributed by the AriC and Caramel projects, INRIA.
54a238c70SJohn Marino
64a238c70SJohn Marino This file is part of the GNU MPFR Library.
74a238c70SJohn Marino
84a238c70SJohn Marino The GNU MPFR Library is free software; you can redistribute it and/or modify
94a238c70SJohn Marino it under the terms of the GNU Lesser General Public License as published by
104a238c70SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
114a238c70SJohn Marino option) any later version.
124a238c70SJohn Marino
134a238c70SJohn Marino The GNU MPFR Library is distributed in the hope that it will be useful, but
144a238c70SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
154a238c70SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
164a238c70SJohn Marino License for more details.
174a238c70SJohn Marino
184a238c70SJohn Marino You should have received a copy of the GNU Lesser General Public License
194a238c70SJohn Marino along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
204a238c70SJohn Marino http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
214a238c70SJohn Marino 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
224a238c70SJohn Marino
234a238c70SJohn Marino #define MPFR_NEED_LONGLONG_H
244a238c70SJohn Marino #include "mpfr-impl.h"
254a238c70SJohn Marino
264a238c70SJohn Marino /* agm(x,y) is between x and y, so we don't need to save exponent range */
274a238c70SJohn Marino int
mpfr_agm(mpfr_ptr r,mpfr_srcptr op2,mpfr_srcptr op1,mpfr_rnd_t rnd_mode)284a238c70SJohn Marino mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode)
294a238c70SJohn Marino {
304a238c70SJohn Marino int compare, inexact;
314a238c70SJohn Marino mp_size_t s;
324a238c70SJohn Marino mpfr_prec_t p, q;
334a238c70SJohn Marino mp_limb_t *up, *vp, *ufp, *vfp;
344a238c70SJohn Marino mpfr_t u, v, uf, vf, sc1, sc2;
354a238c70SJohn Marino mpfr_exp_t scaleop = 0, scaleit;
364a238c70SJohn Marino unsigned long n; /* number of iterations */
374a238c70SJohn Marino MPFR_ZIV_DECL (loop);
384a238c70SJohn Marino MPFR_TMP_DECL(marker);
394a238c70SJohn Marino MPFR_SAVE_EXPO_DECL (expo);
404a238c70SJohn Marino
414a238c70SJohn Marino MPFR_LOG_FUNC
424a238c70SJohn Marino (("op2[%Pu]=%.*Rg op1[%Pu]=%.*Rg rnd=%d",
434a238c70SJohn Marino mpfr_get_prec (op2), mpfr_log_prec, op2,
444a238c70SJohn Marino mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode),
454a238c70SJohn Marino ("r[%Pu]=%.*Rg inexact=%d",
464a238c70SJohn Marino mpfr_get_prec (r), mpfr_log_prec, r, inexact));
474a238c70SJohn Marino
484a238c70SJohn Marino /* Deal with special values */
494a238c70SJohn Marino if (MPFR_ARE_SINGULAR (op1, op2))
504a238c70SJohn Marino {
514a238c70SJohn Marino /* If a or b is NaN, the result is NaN */
524a238c70SJohn Marino if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2))
534a238c70SJohn Marino {
544a238c70SJohn Marino MPFR_SET_NAN(r);
554a238c70SJohn Marino MPFR_RET_NAN;
564a238c70SJohn Marino }
574a238c70SJohn Marino /* now one of a or b is Inf or 0 */
584a238c70SJohn Marino /* If a and b is +Inf, the result is +Inf.
594a238c70SJohn Marino Otherwise if a or b is -Inf or 0, the result is NaN */
604a238c70SJohn Marino else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2))
614a238c70SJohn Marino {
624a238c70SJohn Marino if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2))
634a238c70SJohn Marino {
644a238c70SJohn Marino MPFR_SET_INF(r);
654a238c70SJohn Marino MPFR_SET_SAME_SIGN(r, op1);
664a238c70SJohn Marino MPFR_RET(0); /* exact */
674a238c70SJohn Marino }
684a238c70SJohn Marino else
694a238c70SJohn Marino {
704a238c70SJohn Marino MPFR_SET_NAN(r);
714a238c70SJohn Marino MPFR_RET_NAN;
724a238c70SJohn Marino }
734a238c70SJohn Marino }
744a238c70SJohn Marino else /* a and b are neither NaN nor Inf, and one is zero */
754a238c70SJohn Marino { /* If a or b is 0, the result is +0 since a sqrt is positive */
764a238c70SJohn Marino MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2));
774a238c70SJohn Marino MPFR_SET_POS (r);
784a238c70SJohn Marino MPFR_SET_ZERO (r);
794a238c70SJohn Marino MPFR_RET (0); /* exact */
804a238c70SJohn Marino }
814a238c70SJohn Marino }
824a238c70SJohn Marino
834a238c70SJohn Marino /* If a or b is negative (excluding -Infinity), the result is NaN */
844a238c70SJohn Marino if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2)))
854a238c70SJohn Marino {
864a238c70SJohn Marino MPFR_SET_NAN(r);
874a238c70SJohn Marino MPFR_RET_NAN;
884a238c70SJohn Marino }
894a238c70SJohn Marino
904a238c70SJohn Marino /* Precision of the following calculus */
914a238c70SJohn Marino q = MPFR_PREC(r);
924a238c70SJohn Marino p = q + MPFR_INT_CEIL_LOG2(q) + 15;
934a238c70SJohn Marino MPFR_ASSERTD (p >= 7); /* see algorithms.tex */
94*ab6d115fSJohn Marino s = MPFR_PREC2LIMBS (p);
954a238c70SJohn Marino
964a238c70SJohn Marino /* b (op2) and a (op1) are the 2 operands but we want b >= a */
974a238c70SJohn Marino compare = mpfr_cmp (op1, op2);
984a238c70SJohn Marino if (MPFR_UNLIKELY( compare == 0 ))
994a238c70SJohn Marino {
1004a238c70SJohn Marino mpfr_set (r, op1, rnd_mode);
1014a238c70SJohn Marino MPFR_RET (0); /* exact */
1024a238c70SJohn Marino }
1034a238c70SJohn Marino else if (compare > 0)
1044a238c70SJohn Marino {
1054a238c70SJohn Marino mpfr_srcptr t = op1;
1064a238c70SJohn Marino op1 = op2;
1074a238c70SJohn Marino op2 = t;
1084a238c70SJohn Marino }
1094a238c70SJohn Marino
1104a238c70SJohn Marino /* Now b (=op2) > a (=op1) */
1114a238c70SJohn Marino
1124a238c70SJohn Marino MPFR_SAVE_EXPO_MARK (expo);
1134a238c70SJohn Marino
1144a238c70SJohn Marino MPFR_TMP_MARK(marker);
1154a238c70SJohn Marino
1164a238c70SJohn Marino /* Main loop */
1174a238c70SJohn Marino MPFR_ZIV_INIT (loop, p);
1184a238c70SJohn Marino for (;;)
1194a238c70SJohn Marino {
1204a238c70SJohn Marino mpfr_prec_t eq;
1214a238c70SJohn Marino unsigned long err = 0; /* must be set to 0 at each Ziv iteration */
1224a238c70SJohn Marino MPFR_BLOCK_DECL (flags);
1234a238c70SJohn Marino
1244a238c70SJohn Marino /* Init temporary vars */
1254a238c70SJohn Marino MPFR_TMP_INIT (up, u, p, s);
1264a238c70SJohn Marino MPFR_TMP_INIT (vp, v, p, s);
1274a238c70SJohn Marino MPFR_TMP_INIT (ufp, uf, p, s);
1284a238c70SJohn Marino MPFR_TMP_INIT (vfp, vf, p, s);
1294a238c70SJohn Marino
1304a238c70SJohn Marino /* Calculus of un and vn */
1314a238c70SJohn Marino retry:
1324a238c70SJohn Marino MPFR_BLOCK (flags,
1334a238c70SJohn Marino mpfr_mul (u, op1, op2, MPFR_RNDN);
1344a238c70SJohn Marino /* mpfr_mul(...): faster since PREC(op) < PREC(u) */
1354a238c70SJohn Marino mpfr_add (v, op1, op2, MPFR_RNDN);
1364a238c70SJohn Marino /* mpfr_add with !=prec is still good */);
1374a238c70SJohn Marino if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)))
1384a238c70SJohn Marino {
1394a238c70SJohn Marino mpfr_exp_t e1 , e2;
1404a238c70SJohn Marino
1414a238c70SJohn Marino MPFR_ASSERTN (scaleop == 0);
1424a238c70SJohn Marino e1 = MPFR_GET_EXP (op1);
1434a238c70SJohn Marino e2 = MPFR_GET_EXP (op2);
1444a238c70SJohn Marino
1454a238c70SJohn Marino /* Let's determine scaleop to avoid an overflow/underflow. */
1464a238c70SJohn Marino if (MPFR_OVERFLOW (flags))
1474a238c70SJohn Marino {
1484a238c70SJohn Marino /* Let's recall that emin <= e1 <= e2 <= emax.
1494a238c70SJohn Marino There has been an overflow. Thus e2 >= emax/2.
1504a238c70SJohn Marino If the mpfr_mul overflowed, then e1 + e2 > emax.
1514a238c70SJohn Marino If the mpfr_add overflowed, then e2 = emax.
1524a238c70SJohn Marino We want: (e1 + scale) + (e2 + scale) <= emax,
1534a238c70SJohn Marino i.e. scale <= (emax - e1 - e2) / 2. Let's take
1544a238c70SJohn Marino scale = min(floor((emax - e1 - e2) / 2), -1).
1554a238c70SJohn Marino This is OK, as:
1564a238c70SJohn Marino 1. emin <= scale <= -1.
1574a238c70SJohn Marino 2. e1 + scale >= emin. Indeed:
1584a238c70SJohn Marino * If e1 + e2 > emax, then
1594a238c70SJohn Marino e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1
1604a238c70SJohn Marino >= (emax + e1 - emax) / 2 - 1
1614a238c70SJohn Marino >= e1 / 2 - 1 >= emin.
1624a238c70SJohn Marino * Otherwise, mpfr_mul didn't overflow, therefore
1634a238c70SJohn Marino mpfr_add overflowed and e2 = emax, so that
1644a238c70SJohn Marino e1 > emin (see restriction below).
1654a238c70SJohn Marino e1 + scale > emin - 1, thus e1 + scale >= emin.
1664a238c70SJohn Marino 3. e2 + scale <= emax, since scale < 0. */
1674a238c70SJohn Marino if (e1 + e2 > MPFR_EXT_EMAX)
1684a238c70SJohn Marino {
1694a238c70SJohn Marino scaleop = - (((e1 + e2) - MPFR_EXT_EMAX + 1) / 2);
1704a238c70SJohn Marino MPFR_ASSERTN (scaleop < 0);
1714a238c70SJohn Marino }
1724a238c70SJohn Marino else
1734a238c70SJohn Marino {
1744a238c70SJohn Marino /* The addition necessarily overflowed. */
1754a238c70SJohn Marino MPFR_ASSERTN (e2 == MPFR_EXT_EMAX);
1764a238c70SJohn Marino /* The case where e1 = emin and e2 = emax is not supported
1774a238c70SJohn Marino here. This would mean that the precision of e2 would be
1784a238c70SJohn Marino huge (and possibly not supported in practice anyway). */
1794a238c70SJohn Marino MPFR_ASSERTN (e1 > MPFR_EXT_EMIN);
1804a238c70SJohn Marino scaleop = -1;
1814a238c70SJohn Marino }
1824a238c70SJohn Marino
1834a238c70SJohn Marino }
1844a238c70SJohn Marino else /* underflow only (in the multiplication) */
1854a238c70SJohn Marino {
1864a238c70SJohn Marino /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0).
1874a238c70SJohn Marino We want: (e1 + scale) + (e2 + scale) >= emin + 1,
1884a238c70SJohn Marino i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take
1894a238c70SJohn Marino scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as:
1904a238c70SJohn Marino 1. 1 <= scale <= emax.
1914a238c70SJohn Marino 2. e1 + scale >= emin + 1 >= emin.
1924a238c70SJohn Marino 3. e2 + scale <= scale <= emax. */
1934a238c70SJohn Marino MPFR_ASSERTN (e1 <= e2 && e2 <= 0);
1944a238c70SJohn Marino scaleop = (MPFR_EXT_EMIN + 2 - e1 - e2) / 2;
1954a238c70SJohn Marino MPFR_ASSERTN (scaleop > 0);
1964a238c70SJohn Marino }
1974a238c70SJohn Marino
1984a238c70SJohn Marino MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop);
1994a238c70SJohn Marino MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop);
2004a238c70SJohn Marino op1 = sc1;
2014a238c70SJohn Marino op2 = sc2;
2024a238c70SJohn Marino MPFR_LOG_MSG (("Exception in pre-iteration, scale = %"
2034a238c70SJohn Marino MPFR_EXP_FSPEC "d\n", scaleop));
2044a238c70SJohn Marino goto retry;
2054a238c70SJohn Marino }
2064a238c70SJohn Marino
2074a238c70SJohn Marino mpfr_clear_flags ();
2084a238c70SJohn Marino mpfr_sqrt (u, u, MPFR_RNDN);
2094a238c70SJohn Marino mpfr_div_2ui (v, v, 1, MPFR_RNDN);
2104a238c70SJohn Marino
2114a238c70SJohn Marino scaleit = 0;
2124a238c70SJohn Marino n = 1;
2134a238c70SJohn Marino while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2)
2144a238c70SJohn Marino {
2154a238c70SJohn Marino MPFR_BLOCK_DECL (flags2);
2164a238c70SJohn Marino
2174a238c70SJohn Marino MPFR_LOG_MSG (("Iteration n = %lu\n", n));
2184a238c70SJohn Marino
2194a238c70SJohn Marino retry2:
2204a238c70SJohn Marino mpfr_add (vf, u, v, MPFR_RNDN); /* No overflow? */
2214a238c70SJohn Marino mpfr_div_2ui (vf, vf, 1, MPFR_RNDN);
2224a238c70SJohn Marino /* See proof in algorithms.tex */
2234a238c70SJohn Marino if (4*eq > p)
2244a238c70SJohn Marino {
2254a238c70SJohn Marino mpfr_t w;
2264a238c70SJohn Marino MPFR_BLOCK_DECL (flags3);
2274a238c70SJohn Marino
2284a238c70SJohn Marino MPFR_LOG_MSG (("4*eq > p\n", 0));
2294a238c70SJohn Marino
2304a238c70SJohn Marino /* vf = V(k) */
2314a238c70SJohn Marino mpfr_init2 (w, (p + 1) / 2);
2324a238c70SJohn Marino MPFR_BLOCK
2334a238c70SJohn Marino (flags3,
2344a238c70SJohn Marino mpfr_sub (w, v, u, MPFR_RNDN); /* e = V(k-1)-U(k-1) */
2354a238c70SJohn Marino mpfr_sqr (w, w, MPFR_RNDN); /* e = e^2 */
2364a238c70SJohn Marino mpfr_div_2ui (w, w, 4, MPFR_RNDN); /* e*= (1/2)^2*1/4 */
2374a238c70SJohn Marino mpfr_div (w, w, vf, MPFR_RNDN); /* 1/4*e^2/V(k) */
2384a238c70SJohn Marino );
2394a238c70SJohn Marino if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3)))
2404a238c70SJohn Marino {
2414a238c70SJohn Marino mpfr_sub (v, vf, w, MPFR_RNDN);
2424a238c70SJohn Marino err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */
2434a238c70SJohn Marino mpfr_clear (w);
2444a238c70SJohn Marino break;
2454a238c70SJohn Marino }
2464a238c70SJohn Marino /* There has been an underflow because of the cancellation
2474a238c70SJohn Marino between V(k-1) and U(k-1). Let's use the conventional
2484a238c70SJohn Marino method. */
2494a238c70SJohn Marino MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0));
2504a238c70SJohn Marino mpfr_clear (w);
2514a238c70SJohn Marino mpfr_clear_underflow ();
2524a238c70SJohn Marino }
2534a238c70SJohn Marino /* U(k) increases, so that U.V can overflow (but not underflow). */
2544a238c70SJohn Marino MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN););
2554a238c70SJohn Marino if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2)))
2564a238c70SJohn Marino {
2574a238c70SJohn Marino mpfr_exp_t scale2;
2584a238c70SJohn Marino
2594a238c70SJohn Marino scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v))
2604a238c70SJohn Marino - MPFR_EXT_EMAX + 1) / 2);
2614a238c70SJohn Marino MPFR_EXP (u) += scale2;
2624a238c70SJohn Marino MPFR_EXP (v) += scale2;
2634a238c70SJohn Marino scaleit += scale2;
2644a238c70SJohn Marino MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %"
2654a238c70SJohn Marino MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n",
2664a238c70SJohn Marino n, scaleit, scale2));
2674a238c70SJohn Marino mpfr_clear_overflow ();
2684a238c70SJohn Marino goto retry2;
2694a238c70SJohn Marino }
2704a238c70SJohn Marino mpfr_sqrt (u, uf, MPFR_RNDN);
2714a238c70SJohn Marino mpfr_swap (v, vf);
2724a238c70SJohn Marino n ++;
2734a238c70SJohn Marino }
2744a238c70SJohn Marino
2754a238c70SJohn Marino MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n));
2764a238c70SJohn Marino
2774a238c70SJohn Marino /* the error on v is bounded by (18n+51) ulps, or twice if there
2784a238c70SJohn Marino was an exponent loss in the final subtraction */
2794a238c70SJohn Marino err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow
2804a238c70SJohn Marino since n is about log(p) */
2814a238c70SJohn Marino /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */
2824a238c70SJohn Marino if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 &&
2834a238c70SJohn Marino MPFR_CAN_ROUND (v, p - err, q, rnd_mode)))
2844a238c70SJohn Marino break; /* Stop the loop */
2854a238c70SJohn Marino
2864a238c70SJohn Marino /* Next iteration */
2874a238c70SJohn Marino MPFR_ZIV_NEXT (loop, p);
288*ab6d115fSJohn Marino s = MPFR_PREC2LIMBS (p);
2894a238c70SJohn Marino }
2904a238c70SJohn Marino MPFR_ZIV_FREE (loop);
2914a238c70SJohn Marino
2924a238c70SJohn Marino if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT))
2934a238c70SJohn Marino != 0))
2944a238c70SJohn Marino {
2954a238c70SJohn Marino MPFR_ASSERTN (! mpfr_overflow_p ()); /* since mpfr_clear_flags */
2964a238c70SJohn Marino MPFR_ASSERTN (! mpfr_underflow_p ()); /* since mpfr_clear_flags */
2974a238c70SJohn Marino MPFR_ASSERTN (! mpfr_divby0_p ()); /* since mpfr_clear_flags */
2984a238c70SJohn Marino MPFR_ASSERTN (! mpfr_nanflag_p ()); /* since mpfr_clear_flags */
2994a238c70SJohn Marino }
3004a238c70SJohn Marino
3014a238c70SJohn Marino /* Setting of the result */
3024a238c70SJohn Marino inexact = mpfr_set (r, v, rnd_mode);
3034a238c70SJohn Marino MPFR_EXP (r) -= scaleop + scaleit;
3044a238c70SJohn Marino
3054a238c70SJohn Marino /* Let's clean */
3064a238c70SJohn Marino MPFR_TMP_FREE(marker);
3074a238c70SJohn Marino
3084a238c70SJohn Marino MPFR_SAVE_EXPO_FREE (expo);
3094a238c70SJohn Marino /* From the definition of the AGM, underflow and overflow
3104a238c70SJohn Marino are not possible. */
3114a238c70SJohn Marino return mpfr_check_range (r, inexact, rnd_mode);
3124a238c70SJohn Marino /* agm(u,v) can be exact for u, v rational only for u=v.
3134a238c70SJohn Marino Proof (due to Nicolas Brisebarre): it suffices to consider
3144a238c70SJohn Marino u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2),
3154a238c70SJohn Marino and a theorem due to G.V. Chudnovsky states that for x a
3164a238c70SJohn Marino non-zero algebraic number with |x|<1, then
3174a238c70SJohn Marino 2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically
3184a238c70SJohn Marino independent over Q. */
3194a238c70SJohn Marino }
320