xref: /dflybsd-src/contrib/mpfr/src/agm.c (revision 2786097444a0124b5d33763854de247e230c6629)
14a238c70SJohn Marino /* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers
24a238c70SJohn Marino 
3*ab6d115fSJohn Marino Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4*ab6d115fSJohn Marino Contributed by the AriC and Caramel projects, INRIA.
54a238c70SJohn Marino 
64a238c70SJohn Marino This file is part of the GNU MPFR Library.
74a238c70SJohn Marino 
84a238c70SJohn Marino The GNU MPFR Library is free software; you can redistribute it and/or modify
94a238c70SJohn Marino it under the terms of the GNU Lesser General Public License as published by
104a238c70SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
114a238c70SJohn Marino option) any later version.
124a238c70SJohn Marino 
134a238c70SJohn Marino The GNU MPFR Library is distributed in the hope that it will be useful, but
144a238c70SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
154a238c70SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
164a238c70SJohn Marino License for more details.
174a238c70SJohn Marino 
184a238c70SJohn Marino You should have received a copy of the GNU Lesser General Public License
194a238c70SJohn Marino along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
204a238c70SJohn Marino http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
214a238c70SJohn Marino 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
224a238c70SJohn Marino 
234a238c70SJohn Marino #define MPFR_NEED_LONGLONG_H
244a238c70SJohn Marino #include "mpfr-impl.h"
254a238c70SJohn Marino 
264a238c70SJohn Marino /* agm(x,y) is between x and y, so we don't need to save exponent range */
274a238c70SJohn Marino int
mpfr_agm(mpfr_ptr r,mpfr_srcptr op2,mpfr_srcptr op1,mpfr_rnd_t rnd_mode)284a238c70SJohn Marino mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode)
294a238c70SJohn Marino {
304a238c70SJohn Marino   int compare, inexact;
314a238c70SJohn Marino   mp_size_t s;
324a238c70SJohn Marino   mpfr_prec_t p, q;
334a238c70SJohn Marino   mp_limb_t *up, *vp, *ufp, *vfp;
344a238c70SJohn Marino   mpfr_t u, v, uf, vf, sc1, sc2;
354a238c70SJohn Marino   mpfr_exp_t scaleop = 0, scaleit;
364a238c70SJohn Marino   unsigned long n; /* number of iterations */
374a238c70SJohn Marino   MPFR_ZIV_DECL (loop);
384a238c70SJohn Marino   MPFR_TMP_DECL(marker);
394a238c70SJohn Marino   MPFR_SAVE_EXPO_DECL (expo);
404a238c70SJohn Marino 
414a238c70SJohn Marino   MPFR_LOG_FUNC
424a238c70SJohn Marino     (("op2[%Pu]=%.*Rg op1[%Pu]=%.*Rg rnd=%d",
434a238c70SJohn Marino       mpfr_get_prec (op2), mpfr_log_prec, op2,
444a238c70SJohn Marino       mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode),
454a238c70SJohn Marino      ("r[%Pu]=%.*Rg inexact=%d",
464a238c70SJohn Marino       mpfr_get_prec (r), mpfr_log_prec, r, inexact));
474a238c70SJohn Marino 
484a238c70SJohn Marino   /* Deal with special values */
494a238c70SJohn Marino   if (MPFR_ARE_SINGULAR (op1, op2))
504a238c70SJohn Marino     {
514a238c70SJohn Marino       /* If a or b is NaN, the result is NaN */
524a238c70SJohn Marino       if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2))
534a238c70SJohn Marino         {
544a238c70SJohn Marino           MPFR_SET_NAN(r);
554a238c70SJohn Marino           MPFR_RET_NAN;
564a238c70SJohn Marino         }
574a238c70SJohn Marino       /* now one of a or b is Inf or 0 */
584a238c70SJohn Marino       /* If a and b is +Inf, the result is +Inf.
594a238c70SJohn Marino          Otherwise if a or b is -Inf or 0, the result is NaN */
604a238c70SJohn Marino       else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2))
614a238c70SJohn Marino         {
624a238c70SJohn Marino           if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2))
634a238c70SJohn Marino             {
644a238c70SJohn Marino               MPFR_SET_INF(r);
654a238c70SJohn Marino               MPFR_SET_SAME_SIGN(r, op1);
664a238c70SJohn Marino               MPFR_RET(0); /* exact */
674a238c70SJohn Marino             }
684a238c70SJohn Marino           else
694a238c70SJohn Marino             {
704a238c70SJohn Marino               MPFR_SET_NAN(r);
714a238c70SJohn Marino               MPFR_RET_NAN;
724a238c70SJohn Marino             }
734a238c70SJohn Marino         }
744a238c70SJohn Marino       else /* a and b are neither NaN nor Inf, and one is zero */
754a238c70SJohn Marino         {  /* If a or b is 0, the result is +0 since a sqrt is positive */
764a238c70SJohn Marino           MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2));
774a238c70SJohn Marino           MPFR_SET_POS (r);
784a238c70SJohn Marino           MPFR_SET_ZERO (r);
794a238c70SJohn Marino           MPFR_RET (0); /* exact */
804a238c70SJohn Marino         }
814a238c70SJohn Marino     }
824a238c70SJohn Marino 
834a238c70SJohn Marino   /* If a or b is negative (excluding -Infinity), the result is NaN */
844a238c70SJohn Marino   if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2)))
854a238c70SJohn Marino     {
864a238c70SJohn Marino       MPFR_SET_NAN(r);
874a238c70SJohn Marino       MPFR_RET_NAN;
884a238c70SJohn Marino     }
894a238c70SJohn Marino 
904a238c70SJohn Marino   /* Precision of the following calculus */
914a238c70SJohn Marino   q = MPFR_PREC(r);
924a238c70SJohn Marino   p = q + MPFR_INT_CEIL_LOG2(q) + 15;
934a238c70SJohn Marino   MPFR_ASSERTD (p >= 7); /* see algorithms.tex */
94*ab6d115fSJohn Marino   s = MPFR_PREC2LIMBS (p);
954a238c70SJohn Marino 
964a238c70SJohn Marino   /* b (op2) and a (op1) are the 2 operands but we want b >= a */
974a238c70SJohn Marino   compare = mpfr_cmp (op1, op2);
984a238c70SJohn Marino   if (MPFR_UNLIKELY( compare == 0 ))
994a238c70SJohn Marino     {
1004a238c70SJohn Marino       mpfr_set (r, op1, rnd_mode);
1014a238c70SJohn Marino       MPFR_RET (0); /* exact */
1024a238c70SJohn Marino     }
1034a238c70SJohn Marino   else if (compare > 0)
1044a238c70SJohn Marino     {
1054a238c70SJohn Marino       mpfr_srcptr t = op1;
1064a238c70SJohn Marino       op1 = op2;
1074a238c70SJohn Marino       op2 = t;
1084a238c70SJohn Marino     }
1094a238c70SJohn Marino 
1104a238c70SJohn Marino   /* Now b (=op2) > a (=op1) */
1114a238c70SJohn Marino 
1124a238c70SJohn Marino   MPFR_SAVE_EXPO_MARK (expo);
1134a238c70SJohn Marino 
1144a238c70SJohn Marino   MPFR_TMP_MARK(marker);
1154a238c70SJohn Marino 
1164a238c70SJohn Marino   /* Main loop */
1174a238c70SJohn Marino   MPFR_ZIV_INIT (loop, p);
1184a238c70SJohn Marino   for (;;)
1194a238c70SJohn Marino     {
1204a238c70SJohn Marino       mpfr_prec_t eq;
1214a238c70SJohn Marino       unsigned long err = 0;  /* must be set to 0 at each Ziv iteration */
1224a238c70SJohn Marino       MPFR_BLOCK_DECL (flags);
1234a238c70SJohn Marino 
1244a238c70SJohn Marino       /* Init temporary vars */
1254a238c70SJohn Marino       MPFR_TMP_INIT (up, u, p, s);
1264a238c70SJohn Marino       MPFR_TMP_INIT (vp, v, p, s);
1274a238c70SJohn Marino       MPFR_TMP_INIT (ufp, uf, p, s);
1284a238c70SJohn Marino       MPFR_TMP_INIT (vfp, vf, p, s);
1294a238c70SJohn Marino 
1304a238c70SJohn Marino       /* Calculus of un and vn */
1314a238c70SJohn Marino     retry:
1324a238c70SJohn Marino       MPFR_BLOCK (flags,
1334a238c70SJohn Marino                   mpfr_mul (u, op1, op2, MPFR_RNDN);
1344a238c70SJohn Marino                   /* mpfr_mul(...): faster since PREC(op) < PREC(u) */
1354a238c70SJohn Marino                   mpfr_add (v, op1, op2, MPFR_RNDN);
1364a238c70SJohn Marino                   /* mpfr_add with !=prec is still good */);
1374a238c70SJohn Marino       if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)))
1384a238c70SJohn Marino         {
1394a238c70SJohn Marino           mpfr_exp_t e1 , e2;
1404a238c70SJohn Marino 
1414a238c70SJohn Marino           MPFR_ASSERTN (scaleop == 0);
1424a238c70SJohn Marino           e1 = MPFR_GET_EXP (op1);
1434a238c70SJohn Marino           e2 = MPFR_GET_EXP (op2);
1444a238c70SJohn Marino 
1454a238c70SJohn Marino           /* Let's determine scaleop to avoid an overflow/underflow. */
1464a238c70SJohn Marino           if (MPFR_OVERFLOW (flags))
1474a238c70SJohn Marino             {
1484a238c70SJohn Marino               /* Let's recall that emin <= e1 <= e2 <= emax.
1494a238c70SJohn Marino                  There has been an overflow. Thus e2 >= emax/2.
1504a238c70SJohn Marino                  If the mpfr_mul overflowed, then e1 + e2 > emax.
1514a238c70SJohn Marino                  If the mpfr_add overflowed, then e2 = emax.
1524a238c70SJohn Marino                  We want: (e1 + scale) + (e2 + scale) <= emax,
1534a238c70SJohn Marino                  i.e. scale <= (emax - e1 - e2) / 2. Let's take
1544a238c70SJohn Marino                  scale = min(floor((emax - e1 - e2) / 2), -1).
1554a238c70SJohn Marino                  This is OK, as:
1564a238c70SJohn Marino                  1. emin <= scale <= -1.
1574a238c70SJohn Marino                  2. e1 + scale >= emin. Indeed:
1584a238c70SJohn Marino                     * If e1 + e2 > emax, then
1594a238c70SJohn Marino                       e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1
1604a238c70SJohn Marino                                  >= (emax + e1 - emax) / 2 - 1
1614a238c70SJohn Marino                                  >= e1 / 2 - 1 >= emin.
1624a238c70SJohn Marino                     * Otherwise, mpfr_mul didn't overflow, therefore
1634a238c70SJohn Marino                       mpfr_add overflowed and e2 = emax, so that
1644a238c70SJohn Marino                       e1 > emin (see restriction below).
1654a238c70SJohn Marino                       e1 + scale > emin - 1, thus e1 + scale >= emin.
1664a238c70SJohn Marino                  3. e2 + scale <= emax, since scale < 0. */
1674a238c70SJohn Marino               if (e1 + e2 > MPFR_EXT_EMAX)
1684a238c70SJohn Marino                 {
1694a238c70SJohn Marino                   scaleop = - (((e1 + e2) - MPFR_EXT_EMAX + 1) / 2);
1704a238c70SJohn Marino                   MPFR_ASSERTN (scaleop < 0);
1714a238c70SJohn Marino                 }
1724a238c70SJohn Marino               else
1734a238c70SJohn Marino                 {
1744a238c70SJohn Marino                   /* The addition necessarily overflowed. */
1754a238c70SJohn Marino                   MPFR_ASSERTN (e2 == MPFR_EXT_EMAX);
1764a238c70SJohn Marino                   /* The case where e1 = emin and e2 = emax is not supported
1774a238c70SJohn Marino                      here. This would mean that the precision of e2 would be
1784a238c70SJohn Marino                      huge (and possibly not supported in practice anyway). */
1794a238c70SJohn Marino                   MPFR_ASSERTN (e1 > MPFR_EXT_EMIN);
1804a238c70SJohn Marino                   scaleop = -1;
1814a238c70SJohn Marino                 }
1824a238c70SJohn Marino 
1834a238c70SJohn Marino             }
1844a238c70SJohn Marino           else  /* underflow only (in the multiplication) */
1854a238c70SJohn Marino             {
1864a238c70SJohn Marino               /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0).
1874a238c70SJohn Marino                  We want: (e1 + scale) + (e2 + scale) >= emin + 1,
1884a238c70SJohn Marino                  i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take
1894a238c70SJohn Marino                  scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as:
1904a238c70SJohn Marino                  1. 1 <= scale <= emax.
1914a238c70SJohn Marino                  2. e1 + scale >= emin + 1 >= emin.
1924a238c70SJohn Marino                  3. e2 + scale <= scale <= emax. */
1934a238c70SJohn Marino               MPFR_ASSERTN (e1 <= e2 && e2 <= 0);
1944a238c70SJohn Marino               scaleop = (MPFR_EXT_EMIN + 2 - e1 - e2) / 2;
1954a238c70SJohn Marino               MPFR_ASSERTN (scaleop > 0);
1964a238c70SJohn Marino             }
1974a238c70SJohn Marino 
1984a238c70SJohn Marino           MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop);
1994a238c70SJohn Marino           MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop);
2004a238c70SJohn Marino           op1 = sc1;
2014a238c70SJohn Marino           op2 = sc2;
2024a238c70SJohn Marino           MPFR_LOG_MSG (("Exception in pre-iteration, scale = %"
2034a238c70SJohn Marino                          MPFR_EXP_FSPEC "d\n", scaleop));
2044a238c70SJohn Marino           goto retry;
2054a238c70SJohn Marino         }
2064a238c70SJohn Marino 
2074a238c70SJohn Marino       mpfr_clear_flags ();
2084a238c70SJohn Marino       mpfr_sqrt (u, u, MPFR_RNDN);
2094a238c70SJohn Marino       mpfr_div_2ui (v, v, 1, MPFR_RNDN);
2104a238c70SJohn Marino 
2114a238c70SJohn Marino       scaleit = 0;
2124a238c70SJohn Marino       n = 1;
2134a238c70SJohn Marino       while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2)
2144a238c70SJohn Marino         {
2154a238c70SJohn Marino           MPFR_BLOCK_DECL (flags2);
2164a238c70SJohn Marino 
2174a238c70SJohn Marino           MPFR_LOG_MSG (("Iteration n = %lu\n", n));
2184a238c70SJohn Marino 
2194a238c70SJohn Marino         retry2:
2204a238c70SJohn Marino           mpfr_add (vf, u, v, MPFR_RNDN);  /* No overflow? */
2214a238c70SJohn Marino           mpfr_div_2ui (vf, vf, 1, MPFR_RNDN);
2224a238c70SJohn Marino           /* See proof in algorithms.tex */
2234a238c70SJohn Marino           if (4*eq > p)
2244a238c70SJohn Marino             {
2254a238c70SJohn Marino               mpfr_t w;
2264a238c70SJohn Marino               MPFR_BLOCK_DECL (flags3);
2274a238c70SJohn Marino 
2284a238c70SJohn Marino               MPFR_LOG_MSG (("4*eq > p\n", 0));
2294a238c70SJohn Marino 
2304a238c70SJohn Marino               /* vf = V(k) */
2314a238c70SJohn Marino               mpfr_init2 (w, (p + 1) / 2);
2324a238c70SJohn Marino               MPFR_BLOCK
2334a238c70SJohn Marino                 (flags3,
2344a238c70SJohn Marino                  mpfr_sub (w, v, u, MPFR_RNDN);       /* e = V(k-1)-U(k-1) */
2354a238c70SJohn Marino                  mpfr_sqr (w, w, MPFR_RNDN);          /* e = e^2 */
2364a238c70SJohn Marino                  mpfr_div_2ui (w, w, 4, MPFR_RNDN);   /* e*= (1/2)^2*1/4  */
2374a238c70SJohn Marino                  mpfr_div (w, w, vf, MPFR_RNDN);      /* 1/4*e^2/V(k) */
2384a238c70SJohn Marino                  );
2394a238c70SJohn Marino               if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3)))
2404a238c70SJohn Marino                 {
2414a238c70SJohn Marino                   mpfr_sub (v, vf, w, MPFR_RNDN);
2424a238c70SJohn Marino                   err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */
2434a238c70SJohn Marino                   mpfr_clear (w);
2444a238c70SJohn Marino                   break;
2454a238c70SJohn Marino                 }
2464a238c70SJohn Marino               /* There has been an underflow because of the cancellation
2474a238c70SJohn Marino                  between V(k-1) and U(k-1). Let's use the conventional
2484a238c70SJohn Marino                  method. */
2494a238c70SJohn Marino               MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0));
2504a238c70SJohn Marino               mpfr_clear (w);
2514a238c70SJohn Marino               mpfr_clear_underflow ();
2524a238c70SJohn Marino             }
2534a238c70SJohn Marino           /* U(k) increases, so that U.V can overflow (but not underflow). */
2544a238c70SJohn Marino           MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN););
2554a238c70SJohn Marino           if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2)))
2564a238c70SJohn Marino             {
2574a238c70SJohn Marino               mpfr_exp_t scale2;
2584a238c70SJohn Marino 
2594a238c70SJohn Marino               scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v))
2604a238c70SJohn Marino                            - MPFR_EXT_EMAX + 1) / 2);
2614a238c70SJohn Marino               MPFR_EXP (u) += scale2;
2624a238c70SJohn Marino               MPFR_EXP (v) += scale2;
2634a238c70SJohn Marino               scaleit += scale2;
2644a238c70SJohn Marino               MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %"
2654a238c70SJohn Marino                              MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n",
2664a238c70SJohn Marino                              n, scaleit, scale2));
2674a238c70SJohn Marino               mpfr_clear_overflow ();
2684a238c70SJohn Marino               goto retry2;
2694a238c70SJohn Marino             }
2704a238c70SJohn Marino           mpfr_sqrt (u, uf, MPFR_RNDN);
2714a238c70SJohn Marino           mpfr_swap (v, vf);
2724a238c70SJohn Marino           n ++;
2734a238c70SJohn Marino         }
2744a238c70SJohn Marino 
2754a238c70SJohn Marino       MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n));
2764a238c70SJohn Marino 
2774a238c70SJohn Marino       /* the error on v is bounded by (18n+51) ulps, or twice if there
2784a238c70SJohn Marino          was an exponent loss in the final subtraction */
2794a238c70SJohn Marino       err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow
2804a238c70SJohn Marino                                                  since n is about log(p) */
2814a238c70SJohn Marino       /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */
2824a238c70SJohn Marino       if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 &&
2834a238c70SJohn Marino                        MPFR_CAN_ROUND (v, p - err, q, rnd_mode)))
2844a238c70SJohn Marino         break; /* Stop the loop */
2854a238c70SJohn Marino 
2864a238c70SJohn Marino       /* Next iteration */
2874a238c70SJohn Marino       MPFR_ZIV_NEXT (loop, p);
288*ab6d115fSJohn Marino       s = MPFR_PREC2LIMBS (p);
2894a238c70SJohn Marino     }
2904a238c70SJohn Marino   MPFR_ZIV_FREE (loop);
2914a238c70SJohn Marino 
2924a238c70SJohn Marino   if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT))
2934a238c70SJohn Marino                      != 0))
2944a238c70SJohn Marino     {
2954a238c70SJohn Marino       MPFR_ASSERTN (! mpfr_overflow_p ());   /* since mpfr_clear_flags */
2964a238c70SJohn Marino       MPFR_ASSERTN (! mpfr_underflow_p ());  /* since mpfr_clear_flags */
2974a238c70SJohn Marino       MPFR_ASSERTN (! mpfr_divby0_p ());     /* since mpfr_clear_flags */
2984a238c70SJohn Marino       MPFR_ASSERTN (! mpfr_nanflag_p ());    /* since mpfr_clear_flags */
2994a238c70SJohn Marino     }
3004a238c70SJohn Marino 
3014a238c70SJohn Marino   /* Setting of the result */
3024a238c70SJohn Marino   inexact = mpfr_set (r, v, rnd_mode);
3034a238c70SJohn Marino   MPFR_EXP (r) -= scaleop + scaleit;
3044a238c70SJohn Marino 
3054a238c70SJohn Marino   /* Let's clean */
3064a238c70SJohn Marino   MPFR_TMP_FREE(marker);
3074a238c70SJohn Marino 
3084a238c70SJohn Marino   MPFR_SAVE_EXPO_FREE (expo);
3094a238c70SJohn Marino   /* From the definition of the AGM, underflow and overflow
3104a238c70SJohn Marino      are not possible. */
3114a238c70SJohn Marino   return mpfr_check_range (r, inexact, rnd_mode);
3124a238c70SJohn Marino   /* agm(u,v) can be exact for u, v rational only for u=v.
3134a238c70SJohn Marino      Proof (due to Nicolas Brisebarre): it suffices to consider
3144a238c70SJohn Marino      u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2),
3154a238c70SJohn Marino      and a theorem due to G.V. Chudnovsky states that for x a
3164a238c70SJohn Marino      non-zero algebraic number with |x|<1, then
3174a238c70SJohn Marino      2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically
3184a238c70SJohn Marino      independent over Q. */
3194a238c70SJohn Marino }
320