xref: /dflybsd-src/contrib/mpfr/src/exp_2.c (revision 2786097444a0124b5d33763854de247e230c6629)
14a238c70SJohn Marino /* mpfr_exp_2 -- exponential of a floating-point number
24a238c70SJohn Marino                  using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
34a238c70SJohn Marino 
4*ab6d115fSJohn Marino Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
5*ab6d115fSJohn Marino Contributed by the AriC and Caramel projects, INRIA.
64a238c70SJohn Marino 
74a238c70SJohn Marino This file is part of the GNU MPFR Library.
84a238c70SJohn Marino 
94a238c70SJohn Marino The GNU MPFR Library is free software; you can redistribute it and/or modify
104a238c70SJohn Marino it under the terms of the GNU Lesser General Public License as published by
114a238c70SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
124a238c70SJohn Marino option) any later version.
134a238c70SJohn Marino 
144a238c70SJohn Marino The GNU MPFR Library is distributed in the hope that it will be useful, but
154a238c70SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
164a238c70SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
174a238c70SJohn Marino License for more details.
184a238c70SJohn Marino 
194a238c70SJohn Marino You should have received a copy of the GNU Lesser General Public License
204a238c70SJohn Marino along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
214a238c70SJohn Marino http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
224a238c70SJohn Marino 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
234a238c70SJohn Marino 
244a238c70SJohn Marino /* #define DEBUG */
254a238c70SJohn Marino #define MPFR_NEED_LONGLONG_H /* for count_leading_zeros */
264a238c70SJohn Marino #include "mpfr-impl.h"
274a238c70SJohn Marino 
284a238c70SJohn Marino static unsigned long
294a238c70SJohn Marino mpfr_exp2_aux (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
304a238c70SJohn Marino static unsigned long
314a238c70SJohn Marino mpfr_exp2_aux2 (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
324a238c70SJohn Marino static mpfr_exp_t
334a238c70SJohn Marino mpz_normalize  (mpz_t, mpz_t, mpfr_exp_t);
344a238c70SJohn Marino static mpfr_exp_t
354a238c70SJohn Marino mpz_normalize2 (mpz_t, mpz_t, mpfr_exp_t, mpfr_exp_t);
364a238c70SJohn Marino 
374a238c70SJohn Marino /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
384a238c70SJohn Marino    Otherwise do nothing and return 0.
394a238c70SJohn Marino  */
404a238c70SJohn Marino static mpfr_exp_t
mpz_normalize(mpz_t rop,mpz_t z,mpfr_exp_t q)414a238c70SJohn Marino mpz_normalize (mpz_t rop, mpz_t z, mpfr_exp_t q)
424a238c70SJohn Marino {
434a238c70SJohn Marino   size_t k;
444a238c70SJohn Marino 
454a238c70SJohn Marino   MPFR_MPZ_SIZEINBASE2 (k, z);
464a238c70SJohn Marino   MPFR_ASSERTD (k == (mpfr_uexp_t) k);
474a238c70SJohn Marino   if (q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q)
484a238c70SJohn Marino     {
494a238c70SJohn Marino       mpz_fdiv_q_2exp (rop, z, (unsigned long) ((mpfr_uexp_t) k - q));
504a238c70SJohn Marino       return (mpfr_exp_t) k - q;
514a238c70SJohn Marino     }
524a238c70SJohn Marino   if (MPFR_UNLIKELY(rop != z))
534a238c70SJohn Marino     mpz_set (rop, z);
544a238c70SJohn Marino   return 0;
554a238c70SJohn Marino }
564a238c70SJohn Marino 
574a238c70SJohn Marino /* if expz > target, shift z by (expz-target) bits to the left.
584a238c70SJohn Marino    if expz < target, shift z by (target-expz) bits to the right.
594a238c70SJohn Marino    Returns target.
604a238c70SJohn Marino */
614a238c70SJohn Marino static mpfr_exp_t
mpz_normalize2(mpz_t rop,mpz_t z,mpfr_exp_t expz,mpfr_exp_t target)624a238c70SJohn Marino mpz_normalize2 (mpz_t rop, mpz_t z, mpfr_exp_t expz, mpfr_exp_t target)
634a238c70SJohn Marino {
644a238c70SJohn Marino   if (target > expz)
654a238c70SJohn Marino     mpz_fdiv_q_2exp (rop, z, target - expz);
664a238c70SJohn Marino   else
674a238c70SJohn Marino     mpz_mul_2exp (rop, z, expz - target);
684a238c70SJohn Marino   return target;
694a238c70SJohn Marino }
704a238c70SJohn Marino 
714a238c70SJohn Marino /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
724a238c70SJohn Marino    where x = n*log(2)+(2^K)*r
734a238c70SJohn Marino    together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
744a238c70SJohn Marino    evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
754a238c70SJohn Marino    This function returns with the exact flags due to exp.
764a238c70SJohn Marino */
774a238c70SJohn Marino int
mpfr_exp_2(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd_mode)784a238c70SJohn Marino mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
794a238c70SJohn Marino {
804a238c70SJohn Marino   long n;
814a238c70SJohn Marino   unsigned long K, k, l, err; /* FIXME: Which type ? */
824a238c70SJohn Marino   int error_r;
834a238c70SJohn Marino   mpfr_exp_t exps, expx;
844a238c70SJohn Marino   mpfr_prec_t q, precy;
854a238c70SJohn Marino   int inexact;
864a238c70SJohn Marino   mpfr_t r, s;
874a238c70SJohn Marino   mpz_t ss;
884a238c70SJohn Marino   MPFR_ZIV_DECL (loop);
894a238c70SJohn Marino 
904a238c70SJohn Marino   MPFR_LOG_FUNC
914a238c70SJohn Marino     (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode),
924a238c70SJohn Marino      ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y,
934a238c70SJohn Marino       inexact));
944a238c70SJohn Marino 
954a238c70SJohn Marino   expx = MPFR_GET_EXP (x);
964a238c70SJohn Marino   precy = MPFR_PREC(y);
974a238c70SJohn Marino 
984a238c70SJohn Marino   /* Warning: we cannot use the 'double' type here, since on 64-bit machines
994a238c70SJohn Marino      x may be as large as 2^62*log(2) without overflow, and then x/log(2)
1004a238c70SJohn Marino      is about 2^62: not every integer of that size can be represented as a
1014a238c70SJohn Marino      'double', thus the argument reduction would fail. */
1024a238c70SJohn Marino   if (expx <= -2)
1034a238c70SJohn Marino     /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
1044a238c70SJohn Marino     n = 0;
1054a238c70SJohn Marino   else
1064a238c70SJohn Marino     {
1074a238c70SJohn Marino       mpfr_init2 (r, sizeof (long) * CHAR_BIT);
1084a238c70SJohn Marino       mpfr_const_log2 (r, MPFR_RNDZ);
1094a238c70SJohn Marino       mpfr_div (r, x, r, MPFR_RNDN);
1104a238c70SJohn Marino       n = mpfr_get_si (r, MPFR_RNDN);
1114a238c70SJohn Marino       mpfr_clear (r);
1124a238c70SJohn Marino     }
1134a238c70SJohn Marino   /* we have |x| <= (|n|+1)*log(2) */
1144a238c70SJohn Marino   MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n));
1154a238c70SJohn Marino 
1164a238c70SJohn Marino   /* error_r bounds the cancelled bits in x - n*log(2) */
1174a238c70SJohn Marino   if (MPFR_UNLIKELY (n == 0))
1184a238c70SJohn Marino     error_r = 0;
1194a238c70SJohn Marino   else
1204a238c70SJohn Marino     {
1214a238c70SJohn Marino       count_leading_zeros (error_r, (mp_limb_t) SAFE_ABS (unsigned long, n) + 1);
1224a238c70SJohn Marino       error_r = GMP_NUMB_BITS - error_r;
1234a238c70SJohn Marino       /* we have |x| <= 2^error_r * log(2) */
1244a238c70SJohn Marino     }
1254a238c70SJohn Marino 
1264a238c70SJohn Marino   /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
1274a238c70SJohn Marino      n/K terms costs about n/(2K) multiplications when computed in fixed
1284a238c70SJohn Marino      point */
1294a238c70SJohn Marino   K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2)
1304a238c70SJohn Marino     : __gmpfr_cuberoot (4*precy);
1314a238c70SJohn Marino   l = (precy - 1) / K + 1;
1324a238c70SJohn Marino   err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18);
1334a238c70SJohn Marino   /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
1344a238c70SJohn Marino   q = precy + err + K + 8;
1354a238c70SJohn Marino   /* if |x| >> 1, take into account the cancelled bits */
1364a238c70SJohn Marino   if (expx > 0)
1374a238c70SJohn Marino     q += expx;
1384a238c70SJohn Marino 
1394a238c70SJohn Marino   /* Note: due to the mpfr_prec_round below, it is not possible to use
1404a238c70SJohn Marino      the MPFR_GROUP_* macros here. */
1414a238c70SJohn Marino 
1424a238c70SJohn Marino   mpfr_init2 (r, q + error_r);
1434a238c70SJohn Marino   mpfr_init2 (s, q + error_r);
1444a238c70SJohn Marino 
1454a238c70SJohn Marino   /* the algorithm consists in computing an upper bound of exp(x) using
1464a238c70SJohn Marino      a precision of q bits, and see if we can round to MPFR_PREC(y) taking
1474a238c70SJohn Marino      into account the maximal error. Otherwise we increase q. */
1484a238c70SJohn Marino   MPFR_ZIV_INIT (loop, q);
1494a238c70SJohn Marino   for (;;)
1504a238c70SJohn Marino     {
1514a238c70SJohn Marino       MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
1524a238c70SJohn Marino                      n, K, l, (unsigned long) q, error_r));
1534a238c70SJohn Marino 
1544a238c70SJohn Marino       /* First reduce the argument to r = x - n * log(2),
1554a238c70SJohn Marino          so that r is small in absolute value. We want an upper
1564a238c70SJohn Marino          bound on r to get an upper bound on exp(x). */
1574a238c70SJohn Marino 
1584a238c70SJohn Marino       /* if n<0, we have to get an upper bound of log(2)
1594a238c70SJohn Marino          in order to get an upper bound of r = x-n*log(2) */
1604a238c70SJohn Marino       mpfr_const_log2 (s, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
1614a238c70SJohn Marino       /* s is within 1 ulp(s) of log(2) */
1624a238c70SJohn Marino 
1634a238c70SJohn Marino       mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
1644a238c70SJohn Marino       /* r is within 3 ulps of |n|*log(2) */
1654a238c70SJohn Marino       if (n < 0)
1664a238c70SJohn Marino         MPFR_CHANGE_SIGN (r);
1674a238c70SJohn Marino       /* r <= n*log(2), within 3 ulps */
1684a238c70SJohn Marino 
1694a238c70SJohn Marino       MPFR_LOG_VAR (x);
1704a238c70SJohn Marino       MPFR_LOG_VAR (r);
1714a238c70SJohn Marino 
1724a238c70SJohn Marino       mpfr_sub (r, x, r, MPFR_RNDU);
1734a238c70SJohn Marino 
1744a238c70SJohn Marino       if (MPFR_IS_PURE_FP (r))
1754a238c70SJohn Marino         {
1764a238c70SJohn Marino           while (MPFR_IS_NEG (r))
1774a238c70SJohn Marino             { /* initial approximation n was too large */
1784a238c70SJohn Marino               n--;
1794a238c70SJohn Marino               mpfr_add (r, r, s, MPFR_RNDU);
1804a238c70SJohn Marino             }
1814a238c70SJohn Marino 
1824a238c70SJohn Marino           /* since there was a cancellation in x - n*log(2), the low error_r
1834a238c70SJohn Marino              bits from r are zero and thus non significant, thus we can reduce
1844a238c70SJohn Marino              the working precision */
1854a238c70SJohn Marino           if (error_r > 0)
1864a238c70SJohn Marino             mpfr_prec_round (r, q, MPFR_RNDU);
1874a238c70SJohn Marino           /* the error on r is at most 3 ulps (3 ulps if error_r = 0,
1884a238c70SJohn Marino              and 1 + 3/2 if error_r > 0) */
1894a238c70SJohn Marino           MPFR_LOG_VAR (r);
1904a238c70SJohn Marino           MPFR_ASSERTD (MPFR_IS_POS (r));
1914a238c70SJohn Marino           mpfr_div_2ui (r, r, K, MPFR_RNDU); /* r = (x-n*log(2))/2^K, exact */
1924a238c70SJohn Marino 
1934a238c70SJohn Marino           mpz_init (ss);
1944a238c70SJohn Marino           exps = mpfr_get_z_2exp (ss, s);
1954a238c70SJohn Marino           /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
1964a238c70SJohn Marino           MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0);
1974a238c70SJohn Marino           l = (precy < MPFR_EXP_2_THRESHOLD)
1984a238c70SJohn Marino             ? mpfr_exp2_aux (ss, r, q, &exps)   /* naive method */
1994a238c70SJohn Marino             : mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */
2004a238c70SJohn Marino 
2014a238c70SJohn Marino           MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
2024a238c70SJohn Marino                          l, (unsigned long) q, (K + l) * (double) q * q));
2034a238c70SJohn Marino 
2044a238c70SJohn Marino           for (k = 0; k < K; k++)
2054a238c70SJohn Marino             {
2064a238c70SJohn Marino               mpz_mul (ss, ss, ss);
2074a238c70SJohn Marino               exps <<= 1;
2084a238c70SJohn Marino               exps += mpz_normalize (ss, ss, q);
2094a238c70SJohn Marino             }
2104a238c70SJohn Marino           mpfr_set_z (s, ss, MPFR_RNDN);
2114a238c70SJohn Marino 
2124a238c70SJohn Marino           MPFR_SET_EXP(s, MPFR_GET_EXP (s) + exps);
2134a238c70SJohn Marino           mpz_clear (ss);
2144a238c70SJohn Marino 
2154a238c70SJohn Marino           /* error is at most 2^K*l, plus 2 to take into account of
2164a238c70SJohn Marino              the error of 3 ulps on r */
2174a238c70SJohn Marino           err = K + MPFR_INT_CEIL_LOG2 (l) + 2;
2184a238c70SJohn Marino 
2194a238c70SJohn Marino           MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
2204a238c70SJohn Marino           MPFR_LOG_VAR (s);
2214a238c70SJohn Marino           MPFR_LOG_MSG (("err=%lu bits\n", K));
2224a238c70SJohn Marino 
2234a238c70SJohn Marino           if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - err, precy, rnd_mode)))
2244a238c70SJohn Marino             {
2254a238c70SJohn Marino               mpfr_clear_flags ();
2264a238c70SJohn Marino               inexact = mpfr_mul_2si (y, s, n, rnd_mode);
2274a238c70SJohn Marino               break;
2284a238c70SJohn Marino             }
2294a238c70SJohn Marino         }
2304a238c70SJohn Marino 
2314a238c70SJohn Marino       MPFR_ZIV_NEXT (loop, q);
2324a238c70SJohn Marino       mpfr_set_prec (r, q + error_r);
2334a238c70SJohn Marino       mpfr_set_prec (s, q + error_r);
2344a238c70SJohn Marino     }
2354a238c70SJohn Marino   MPFR_ZIV_FREE (loop);
2364a238c70SJohn Marino 
2374a238c70SJohn Marino   mpfr_clear (r);
2384a238c70SJohn Marino   mpfr_clear (s);
2394a238c70SJohn Marino 
2404a238c70SJohn Marino   return inexact;
2414a238c70SJohn Marino }
2424a238c70SJohn Marino 
2434a238c70SJohn Marino /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
2444a238c70SJohn Marino    using naive method with O(l) multiplications.
2454a238c70SJohn Marino    Return the number of iterations l.
2464a238c70SJohn Marino    The absolute error on s is less than 3*l*(l+1)*2^(-q).
2474a238c70SJohn Marino    Version using fixed-point arithmetic with mpz instead
2484a238c70SJohn Marino    of mpfr for internal computations.
2494a238c70SJohn Marino    NOTE[VL]: the following sentence seems to be obsolete since MY_INIT_MPZ
2504a238c70SJohn Marino    is no longer used (r6919); qn was the number of limbs of q.
2514a238c70SJohn Marino    s must have at least qn+1 limbs (qn should be enough, but currently fails
2524a238c70SJohn Marino    since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
2534a238c70SJohn Marino */
2544a238c70SJohn Marino static unsigned long
mpfr_exp2_aux(mpz_t s,mpfr_srcptr r,mpfr_prec_t q,mpfr_exp_t * exps)2554a238c70SJohn Marino mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
2564a238c70SJohn Marino {
2574a238c70SJohn Marino   unsigned long l;
2584a238c70SJohn Marino   mpfr_exp_t dif, expt, expr;
2594a238c70SJohn Marino   mpz_t t, rr;
2604a238c70SJohn Marino   mp_size_t sbit, tbit;
2614a238c70SJohn Marino 
2624a238c70SJohn Marino   MPFR_ASSERTN (MPFR_IS_PURE_FP (r));
2634a238c70SJohn Marino 
2644a238c70SJohn Marino   expt = 0;
2654a238c70SJohn Marino   *exps = 1 - (mpfr_exp_t) q;                   /* s = 2^(q-1) */
2664a238c70SJohn Marino   mpz_init (t);
2674a238c70SJohn Marino   mpz_init (rr);
2684a238c70SJohn Marino   mpz_set_ui(t, 1);
2694a238c70SJohn Marino   mpz_set_ui(s, 1);
2704a238c70SJohn Marino   mpz_mul_2exp(s, s, q-1);
2714a238c70SJohn Marino   expr = mpfr_get_z_2exp(rr, r);               /* no error here */
2724a238c70SJohn Marino 
2734a238c70SJohn Marino   l = 0;
2744a238c70SJohn Marino   for (;;) {
2754a238c70SJohn Marino     l++;
2764a238c70SJohn Marino     mpz_mul(t, t, rr);
2774a238c70SJohn Marino     expt += expr;
2784a238c70SJohn Marino     MPFR_MPZ_SIZEINBASE2 (sbit, s);
2794a238c70SJohn Marino     MPFR_MPZ_SIZEINBASE2 (tbit, t);
2804a238c70SJohn Marino     dif = *exps + sbit - expt - tbit;
2814a238c70SJohn Marino     /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
2824a238c70SJohn Marino     expt += mpz_normalize(t, t, (mpfr_exp_t) q-dif); /* error at most 2^(1-q) */
2834a238c70SJohn Marino     mpz_fdiv_q_ui (t, t, l);                   /* error at most 2^(1-q) */
2844a238c70SJohn Marino     /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
2854a238c70SJohn Marino     MPFR_ASSERTD (expt == *exps);
2864a238c70SJohn Marino     if (mpz_sgn (t) == 0)
2874a238c70SJohn Marino       break;
2884a238c70SJohn Marino     mpz_add(s, s, t);                      /* no error here: exact */
2894a238c70SJohn Marino     /* ensures rr has the same size as t: after several shifts, the error
2904a238c70SJohn Marino        on rr is still at most ulp(t)=ulp(s) */
2914a238c70SJohn Marino     MPFR_MPZ_SIZEINBASE2 (tbit, t);
2924a238c70SJohn Marino     expr += mpz_normalize(rr, rr, tbit);
2934a238c70SJohn Marino   }
2944a238c70SJohn Marino 
2954a238c70SJohn Marino   mpz_clear (t);
2964a238c70SJohn Marino   mpz_clear (rr);
2974a238c70SJohn Marino 
2984a238c70SJohn Marino   return 3 * l * (l + 1);
2994a238c70SJohn Marino }
3004a238c70SJohn Marino 
3014a238c70SJohn Marino /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
3024a238c70SJohn Marino    using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
3034a238c70SJohn Marino    Return l.
3044a238c70SJohn Marino    Uses m multiplications of full size and 2l/m of decreasing size,
3054a238c70SJohn Marino    i.e. a total equivalent to about m+l/m full multiplications,
3064a238c70SJohn Marino    i.e. 2*sqrt(l) for m=sqrt(l).
3074a238c70SJohn Marino    NOTE[VL]: The following sentence seems to be obsolete since MY_INIT_MPZ
3084a238c70SJohn Marino    is no longer used (r6919); sizer was the number of limbs of r.
3094a238c70SJohn Marino    Version using mpz. ss must have at least (sizer+1) limbs.
3104a238c70SJohn Marino    The error is bounded by (l^2+4*l) ulps where l is the return value.
3114a238c70SJohn Marino */
3124a238c70SJohn Marino static unsigned long
mpfr_exp2_aux2(mpz_t s,mpfr_srcptr r,mpfr_prec_t q,mpfr_exp_t * exps)3134a238c70SJohn Marino mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
3144a238c70SJohn Marino {
3154a238c70SJohn Marino   mpfr_exp_t expr, *expR, expt;
3164a238c70SJohn Marino   mpfr_prec_t ql;
3174a238c70SJohn Marino   unsigned long l, m, i;
3184a238c70SJohn Marino   mpz_t t, *R, rr, tmp;
3194a238c70SJohn Marino   mp_size_t sbit, rrbit;
3204a238c70SJohn Marino   MPFR_TMP_DECL(marker);
3214a238c70SJohn Marino 
3224a238c70SJohn Marino   /* estimate value of l */
3234a238c70SJohn Marino   MPFR_ASSERTD (MPFR_GET_EXP (r) < 0);
3244a238c70SJohn Marino   l = q / (- MPFR_GET_EXP (r));
3254a238c70SJohn Marino   m = __gmpfr_isqrt (l);
3264a238c70SJohn Marino   /* we access R[2], thus we need m >= 2 */
3274a238c70SJohn Marino   if (m < 2)
3284a238c70SJohn Marino     m = 2;
3294a238c70SJohn Marino 
3304a238c70SJohn Marino   MPFR_TMP_MARK(marker);
3314a238c70SJohn Marino   R = (mpz_t*) MPFR_TMP_ALLOC ((m + 1) * sizeof (mpz_t));     /* R[i] is r^i */
3324a238c70SJohn Marino   expR = (mpfr_exp_t*) MPFR_TMP_ALLOC((m + 1) * sizeof (mpfr_exp_t));
3334a238c70SJohn Marino   /* expR[i] is the exponent for R[i] */
3344a238c70SJohn Marino   mpz_init (tmp);
3354a238c70SJohn Marino   mpz_init (rr);
3364a238c70SJohn Marino   mpz_init (t);
3374a238c70SJohn Marino   mpz_set_ui (s, 0);
3384a238c70SJohn Marino   *exps = 1 - q;                        /* 1 ulp = 2^(1-q) */
3394a238c70SJohn Marino   for (i = 0 ; i <= m ; i++)
3404a238c70SJohn Marino     mpz_init (R[i]);
3414a238c70SJohn Marino   expR[1] = mpfr_get_z_2exp (R[1], r); /* exact operation: no error */
3424a238c70SJohn Marino   expR[1] = mpz_normalize2 (R[1], R[1], expR[1], 1 - q); /* error <= 1 ulp */
3434a238c70SJohn Marino   mpz_mul (t, R[1], R[1]); /* err(t) <= 2 ulps */
3444a238c70SJohn Marino   mpz_fdiv_q_2exp (R[2], t, q - 1); /* err(R[2]) <= 3 ulps */
3454a238c70SJohn Marino   expR[2] = 1 - q;
3464a238c70SJohn Marino   for (i = 3 ; i <= m ; i++)
3474a238c70SJohn Marino     {
3484a238c70SJohn Marino       if ((i & 1) == 1)
3494a238c70SJohn Marino         mpz_mul (t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
3504a238c70SJohn Marino       else
3514a238c70SJohn Marino         mpz_mul (t, R[i/2], R[i/2]);
3524a238c70SJohn Marino       mpz_fdiv_q_2exp (R[i], t, q - 1); /* err(R[i]) <= 2*i-1 ulps */
3534a238c70SJohn Marino       expR[i] = 1 - q;
3544a238c70SJohn Marino     }
3554a238c70SJohn Marino   mpz_set_ui (R[0], 1);
3564a238c70SJohn Marino   mpz_mul_2exp (R[0], R[0], q-1);
3574a238c70SJohn Marino   expR[0] = 1-q; /* R[0]=1 */
3584a238c70SJohn Marino   mpz_set_ui (rr, 1);
3594a238c70SJohn Marino   expr = 0; /* rr contains r^l/l! */
3604a238c70SJohn Marino   /* by induction: err(rr) <= 2*l ulps */
3614a238c70SJohn Marino 
3624a238c70SJohn Marino   l = 0;
3634a238c70SJohn Marino   ql = q; /* precision used for current giant step */
3644a238c70SJohn Marino   do
3654a238c70SJohn Marino     {
3664a238c70SJohn Marino       /* all R[i] must have exponent 1-ql */
3674a238c70SJohn Marino       if (l != 0)
3684a238c70SJohn Marino         for (i = 0 ; i < m ; i++)
3694a238c70SJohn Marino           expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1 - ql);
3704a238c70SJohn Marino       /* the absolute error on R[i]*rr is still 2*i-1 ulps */
3714a238c70SJohn Marino       expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1 - ql);
3724a238c70SJohn Marino       /* err(t) <= 2*m-1 ulps */
3734a238c70SJohn Marino       /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
3744a238c70SJohn Marino          using Horner's scheme */
3754a238c70SJohn Marino       for (i = m-1 ; i-- != 0 ; )
3764a238c70SJohn Marino         {
3774a238c70SJohn Marino           mpz_fdiv_q_ui (t, t, l+i+1); /* err(t) += 1 ulp */
3784a238c70SJohn Marino           mpz_add (t, t, R[i]);
3794a238c70SJohn Marino         }
3804a238c70SJohn Marino       /* now err(t) <= (3m-2) ulps */
3814a238c70SJohn Marino 
3824a238c70SJohn Marino       /* now multiplies t by r^l/l! and adds to s */
3834a238c70SJohn Marino       mpz_mul (t, t, rr);
3844a238c70SJohn Marino       expt += expr;
3854a238c70SJohn Marino       expt = mpz_normalize2 (t, t, expt, *exps);
3864a238c70SJohn Marino       /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
3874a238c70SJohn Marino       MPFR_ASSERTD (expt == *exps);
3884a238c70SJohn Marino       mpz_add (s, s, t); /* no error here */
3894a238c70SJohn Marino 
3904a238c70SJohn Marino       /* updates rr, the multiplication of the factors l+i could be done
3914a238c70SJohn Marino          using binary splitting too, but it is not sure it would save much */
3924a238c70SJohn Marino       mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
3934a238c70SJohn Marino       expr += expR[m];
3944a238c70SJohn Marino       mpz_set_ui (tmp, 1);
3954a238c70SJohn Marino       for (i = 1 ; i <= m ; i++)
3964a238c70SJohn Marino         mpz_mul_ui (tmp, tmp, l + i);
3974a238c70SJohn Marino       mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */
3984a238c70SJohn Marino       l += m;
3994a238c70SJohn Marino       if (MPFR_UNLIKELY (mpz_sgn (t) == 0))
4004a238c70SJohn Marino         break;
4014a238c70SJohn Marino       expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
4024a238c70SJohn Marino       if (MPFR_UNLIKELY (mpz_sgn (rr) == 0))
4034a238c70SJohn Marino         rrbit = 1;
4044a238c70SJohn Marino       else
4054a238c70SJohn Marino         MPFR_MPZ_SIZEINBASE2 (rrbit, rr);
4064a238c70SJohn Marino       MPFR_MPZ_SIZEINBASE2 (sbit, s);
4074a238c70SJohn Marino       ql = q - *exps - sbit + expr + rrbit;
4084a238c70SJohn Marino       /* TODO: Wrong cast. I don't want what is right, but this is
4094a238c70SJohn Marino          certainly wrong */
4104a238c70SJohn Marino     }
4114a238c70SJohn Marino   while ((size_t) expr + rrbit > (size_t) -q);
4124a238c70SJohn Marino 
4134a238c70SJohn Marino   for (i = 0 ; i <= m ; i++)
4144a238c70SJohn Marino     mpz_clear (R[i]);
4154a238c70SJohn Marino   MPFR_TMP_FREE(marker);
4164a238c70SJohn Marino   mpz_clear (rr);
4174a238c70SJohn Marino   mpz_clear (t);
4184a238c70SJohn Marino   mpz_clear (tmp);
4194a238c70SJohn Marino 
4204a238c70SJohn Marino   return l * (l + 4);
4214a238c70SJohn Marino }
422