mpfr/src/exp_2.c

4a238c70SJohn Marino/* mpfr_exp_2 -- exponential of a floating-point number
4a238c70SJohn Marino                 using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
4a238c70SJohn Marino
*ab6d115fSJohn MarinoCopyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
*ab6d115fSJohn MarinoContributed by the AriC and Caramel projects, INRIA.
4a238c70SJohn Marino
4a238c70SJohn MarinoThis file is part of the GNU MPFR Library.
4a238c70SJohn Marino
4a238c70SJohn MarinoThe GNU MPFR Library is free software; you can redistribute it and/or modify
4a238c70SJohn Marinoit under the terms of the GNU Lesser General Public License as published by
4a238c70SJohn Marinothe Free Software Foundation; either version 3 of the License, or (at your
4a238c70SJohn Marinooption) any later version.
4a238c70SJohn Marino
4a238c70SJohn MarinoThe GNU MPFR Library is distributed in the hope that it will be useful, but
4a238c70SJohn MarinoWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
4a238c70SJohn Marinoor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
4a238c70SJohn MarinoLicense for more details.
4a238c70SJohn Marino
4a238c70SJohn MarinoYou should have received a copy of the GNU Lesser General Public License
4a238c70SJohn Marinoalong with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
4a238c70SJohn Marinohttp://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
4a238c70SJohn Marino51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
4a238c70SJohn Marino
4a238c70SJohn Marino/* #define DEBUG */
4a238c70SJohn Marino#define MPFR_NEED_LONGLONG_H /* for count_leading_zeros */
4a238c70SJohn Marino#include "mpfr-impl.h"
4a238c70SJohn Marino
4a238c70SJohn Marinostatic unsigned long
4a238c70SJohn Marinompfr_exp2_aux (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
4a238c70SJohn Marinostatic unsigned long
4a238c70SJohn Marinompfr_exp2_aux2 (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
4a238c70SJohn Marinostatic mpfr_exp_t
4a238c70SJohn Marinompz_normalize  (mpz_t, mpz_t, mpfr_exp_t);
4a238c70SJohn Marinostatic mpfr_exp_t
4a238c70SJohn Marinompz_normalize2 (mpz_t, mpz_t, mpfr_exp_t, mpfr_exp_t);
4a238c70SJohn Marino
4a238c70SJohn Marino/* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
4a238c70SJohn Marino   Otherwise do nothing and return 0.
4a238c70SJohn Marino */
4a238c70SJohn Marinostatic mpfr_exp_t
4a238c70SJohn Marinompz_normalize (mpz_t rop, mpz_t z, mpfr_exp_t q)
4a238c70SJohn Marino{
4a238c70SJohn Marino  size_t k;
4a238c70SJohn Marino
4a238c70SJohn Marino  MPFR_MPZ_SIZEINBASE2 (k, z);
4a238c70SJohn Marino  MPFR_ASSERTD (k == (mpfr_uexp_t) k);
4a238c70SJohn Marino  if (q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q)
4a238c70SJohn Marino    {
4a238c70SJohn Marino      mpz_fdiv_q_2exp (rop, z, (unsigned long) ((mpfr_uexp_t) k - q));
4a238c70SJohn Marino      return (mpfr_exp_t) k - q;
4a238c70SJohn Marino    }
4a238c70SJohn Marino  if (MPFR_UNLIKELY(rop != z))
4a238c70SJohn Marino    mpz_set (rop, z);
4a238c70SJohn Marino  return 0;
4a238c70SJohn Marino}
4a238c70SJohn Marino
4a238c70SJohn Marino/* if expz > target, shift z by (expz-target) bits to the left.
4a238c70SJohn Marino   if expz < target, shift z by (target-expz) bits to the right.
4a238c70SJohn Marino   Returns target.
4a238c70SJohn Marino*/
4a238c70SJohn Marinostatic mpfr_exp_t
4a238c70SJohn Marinompz_normalize2 (mpz_t rop, mpz_t z, mpfr_exp_t expz, mpfr_exp_t target)
4a238c70SJohn Marino{
4a238c70SJohn Marino  if (target > expz)
4a238c70SJohn Marino    mpz_fdiv_q_2exp (rop, z, target - expz);
4a238c70SJohn Marino  else
4a238c70SJohn Marino    mpz_mul_2exp (rop, z, expz - target);
4a238c70SJohn Marino  return target;
4a238c70SJohn Marino}
4a238c70SJohn Marino
4a238c70SJohn Marino/* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
4a238c70SJohn Marino   where x = n*log(2)+(2^K)*r
4a238c70SJohn Marino   together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
4a238c70SJohn Marino   evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
4a238c70SJohn Marino   This function returns with the exact flags due to exp.
4a238c70SJohn Marino*/
4a238c70SJohn Marinoint
4a238c70SJohn Marinompfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
4a238c70SJohn Marino{
4a238c70SJohn Marino  long n;
4a238c70SJohn Marino  unsigned long K, k, l, err; /* FIXME: Which type ? */
4a238c70SJohn Marino  int error_r;
4a238c70SJohn Marino  mpfr_exp_t exps, expx;
4a238c70SJohn Marino  mpfr_prec_t q, precy;
4a238c70SJohn Marino  int inexact;
4a238c70SJohn Marino  mpfr_t r, s;
4a238c70SJohn Marino  mpz_t ss;
4a238c70SJohn Marino  MPFR_ZIV_DECL (loop);
4a238c70SJohn Marino
4a238c70SJohn Marino  MPFR_LOG_FUNC
4a238c70SJohn Marino    (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode),
4a238c70SJohn Marino     ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y,
4a238c70SJohn Marino      inexact));
4a238c70SJohn Marino
4a238c70SJohn Marino  expx = MPFR_GET_EXP (x);
4a238c70SJohn Marino  precy = MPFR_PREC(y);
4a238c70SJohn Marino
4a238c70SJohn Marino  /* Warning: we cannot use the 'double' type here, since on 64-bit machines
4a238c70SJohn Marino     x may be as large as 2^62*log(2) without overflow, and then x/log(2)
4a238c70SJohn Marino     is about 2^62: not every integer of that size can be represented as a
4a238c70SJohn Marino     'double', thus the argument reduction would fail. */
4a238c70SJohn Marino  if (expx <= -2)
4a238c70SJohn Marino    /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
4a238c70SJohn Marino    n = 0;
4a238c70SJohn Marino  else
4a238c70SJohn Marino    {
4a238c70SJohn Marino      mpfr_init2 (r, sizeof (long) * CHAR_BIT);
4a238c70SJohn Marino      mpfr_const_log2 (r, MPFR_RNDZ);
4a238c70SJohn Marino      mpfr_div (r, x, r, MPFR_RNDN);
4a238c70SJohn Marino      n = mpfr_get_si (r, MPFR_RNDN);
4a238c70SJohn Marino      mpfr_clear (r);
4a238c70SJohn Marino    }
4a238c70SJohn Marino  /* we have |x| <= (|n|+1)*log(2) */
4a238c70SJohn Marino  MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n));
4a238c70SJohn Marino
4a238c70SJohn Marino  /* error_r bounds the cancelled bits in x - n*log(2) */
4a238c70SJohn Marino  if (MPFR_UNLIKELY (n == 0))
4a238c70SJohn Marino    error_r = 0;
4a238c70SJohn Marino  else
4a238c70SJohn Marino    {
4a238c70SJohn Marino      count_leading_zeros (error_r, (mp_limb_t) SAFE_ABS (unsigned long, n) + 1);
4a238c70SJohn Marino      error_r = GMP_NUMB_BITS - error_r;
4a238c70SJohn Marino      /* we have |x| <= 2^error_r * log(2) */
4a238c70SJohn Marino    }
4a238c70SJohn Marino
4a238c70SJohn Marino  /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
4a238c70SJohn Marino     n/K terms costs about n/(2K) multiplications when computed in fixed
4a238c70SJohn Marino     point */
4a238c70SJohn Marino  K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2)
4a238c70SJohn Marino    : __gmpfr_cuberoot (4*precy);
4a238c70SJohn Marino  l = (precy - 1) / K + 1;
4a238c70SJohn Marino  err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18);
4a238c70SJohn Marino  /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
4a238c70SJohn Marino  q = precy + err + K + 8;
4a238c70SJohn Marino  /* if |x| >> 1, take into account the cancelled bits */
4a238c70SJohn Marino  if (expx > 0)
4a238c70SJohn Marino    q += expx;
4a238c70SJohn Marino
4a238c70SJohn Marino  /* Note: due to the mpfr_prec_round below, it is not possible to use
4a238c70SJohn Marino     the MPFR_GROUP_* macros here. */
4a238c70SJohn Marino
4a238c70SJohn Marino  mpfr_init2 (r, q + error_r);
4a238c70SJohn Marino  mpfr_init2 (s, q + error_r);
4a238c70SJohn Marino
4a238c70SJohn Marino  /* the algorithm consists in computing an upper bound of exp(x) using
4a238c70SJohn Marino     a precision of q bits, and see if we can round to MPFR_PREC(y) taking
4a238c70SJohn Marino     into account the maximal error. Otherwise we increase q. */
4a238c70SJohn Marino  MPFR_ZIV_INIT (loop, q);
4a238c70SJohn Marino  for (;;)
4a238c70SJohn Marino    {
4a238c70SJohn Marino      MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
4a238c70SJohn Marino                     n, K, l, (unsigned long) q, error_r));
4a238c70SJohn Marino
4a238c70SJohn Marino      /* First reduce the argument to r = x - n * log(2),
4a238c70SJohn Marino         so that r is small in absolute value. We want an upper
4a238c70SJohn Marino         bound on r to get an upper bound on exp(x). */
4a238c70SJohn Marino
4a238c70SJohn Marino      /* if n<0, we have to get an upper bound of log(2)
4a238c70SJohn Marino         in order to get an upper bound of r = x-n*log(2) */
4a238c70SJohn Marino      mpfr_const_log2 (s, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
4a238c70SJohn Marino      /* s is within 1 ulp(s) of log(2) */
4a238c70SJohn Marino
4a238c70SJohn Marino      mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
4a238c70SJohn Marino      /* r is within 3 ulps of |n|*log(2) */
4a238c70SJohn Marino      if (n < 0)
4a238c70SJohn Marino        MPFR_CHANGE_SIGN (r);
4a238c70SJohn Marino      /* r <= n*log(2), within 3 ulps */
4a238c70SJohn Marino
4a238c70SJohn Marino      MPFR_LOG_VAR (x);
4a238c70SJohn Marino      MPFR_LOG_VAR (r);
4a238c70SJohn Marino
4a238c70SJohn Marino      mpfr_sub (r, x, r, MPFR_RNDU);
4a238c70SJohn Marino
4a238c70SJohn Marino      if (MPFR_IS_PURE_FP (r))
4a238c70SJohn Marino        {
4a238c70SJohn Marino          while (MPFR_IS_NEG (r))
4a238c70SJohn Marino            { /* initial approximation n was too large */
4a238c70SJohn Marino              n--;
4a238c70SJohn Marino              mpfr_add (r, r, s, MPFR_RNDU);
4a238c70SJohn Marino            }
4a238c70SJohn Marino
4a238c70SJohn Marino          /* since there was a cancellation in x - n*log(2), the low error_r
4a238c70SJohn Marino             bits from r are zero and thus non significant, thus we can reduce
4a238c70SJohn Marino             the working precision */
4a238c70SJohn Marino          if (error_r > 0)
4a238c70SJohn Marino            mpfr_prec_round (r, q, MPFR_RNDU);
4a238c70SJohn Marino          /* the error on r is at most 3 ulps (3 ulps if error_r = 0,
4a238c70SJohn Marino             and 1 + 3/2 if error_r > 0) */
4a238c70SJohn Marino          MPFR_LOG_VAR (r);
4a238c70SJohn Marino          MPFR_ASSERTD (MPFR_IS_POS (r));
4a238c70SJohn Marino          mpfr_div_2ui (r, r, K, MPFR_RNDU); /* r = (x-n*log(2))/2^K, exact */
4a238c70SJohn Marino
4a238c70SJohn Marino          mpz_init (ss);
4a238c70SJohn Marino          exps = mpfr_get_z_2exp (ss, s);
4a238c70SJohn Marino          /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
4a238c70SJohn Marino          MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0);
4a238c70SJohn Marino          l = (precy < MPFR_EXP_2_THRESHOLD)
4a238c70SJohn Marino            ? mpfr_exp2_aux (ss, r, q, &exps)   /* naive method */
4a238c70SJohn Marino            : mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */
4a238c70SJohn Marino
4a238c70SJohn Marino          MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
4a238c70SJohn Marino                         l, (unsigned long) q, (K + l) * (double) q * q));
4a238c70SJohn Marino
4a238c70SJohn Marino          for (k = 0; k < K; k++)
4a238c70SJohn Marino            {
4a238c70SJohn Marino              mpz_mul (ss, ss, ss);
4a238c70SJohn Marino              exps <<= 1;
4a238c70SJohn Marino              exps += mpz_normalize (ss, ss, q);
4a238c70SJohn Marino            }
4a238c70SJohn Marino          mpfr_set_z (s, ss, MPFR_RNDN);
4a238c70SJohn Marino
4a238c70SJohn Marino          MPFR_SET_EXP(s, MPFR_GET_EXP (s) + exps);
4a238c70SJohn Marino          mpz_clear (ss);
4a238c70SJohn Marino
4a238c70SJohn Marino          /* error is at most 2^K*l, plus 2 to take into account of
4a238c70SJohn Marino             the error of 3 ulps on r */
4a238c70SJohn Marino          err = K + MPFR_INT_CEIL_LOG2 (l) + 2;
4a238c70SJohn Marino
4a238c70SJohn Marino          MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
4a238c70SJohn Marino          MPFR_LOG_VAR (s);
4a238c70SJohn Marino          MPFR_LOG_MSG (("err=%lu bits\n", K));
4a238c70SJohn Marino
4a238c70SJohn Marino          if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - err, precy, rnd_mode)))
4a238c70SJohn Marino            {
4a238c70SJohn Marino              mpfr_clear_flags ();
4a238c70SJohn Marino              inexact = mpfr_mul_2si (y, s, n, rnd_mode);
4a238c70SJohn Marino              break;
4a238c70SJohn Marino            }
4a238c70SJohn Marino        }
4a238c70SJohn Marino
4a238c70SJohn Marino      MPFR_ZIV_NEXT (loop, q);
4a238c70SJohn Marino      mpfr_set_prec (r, q + error_r);
4a238c70SJohn Marino      mpfr_set_prec (s, q + error_r);
4a238c70SJohn Marino    }
4a238c70SJohn Marino  MPFR_ZIV_FREE (loop);
4a238c70SJohn Marino
4a238c70SJohn Marino  mpfr_clear (r);
4a238c70SJohn Marino  mpfr_clear (s);
4a238c70SJohn Marino
4a238c70SJohn Marino  return inexact;
4a238c70SJohn Marino}
4a238c70SJohn Marino
4a238c70SJohn Marino/* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
4a238c70SJohn Marino   using naive method with O(l) multiplications.
4a238c70SJohn Marino   Return the number of iterations l.
4a238c70SJohn Marino   The absolute error on s is less than 3*l*(l+1)*2^(-q).
4a238c70SJohn Marino   Version using fixed-point arithmetic with mpz instead
4a238c70SJohn Marino   of mpfr for internal computations.
4a238c70SJohn Marino   NOTE[VL]: the following sentence seems to be obsolete since MY_INIT_MPZ
4a238c70SJohn Marino   is no longer used (r6919); qn was the number of limbs of q.
4a238c70SJohn Marino   s must have at least qn+1 limbs (qn should be enough, but currently fails
4a238c70SJohn Marino   since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
4a238c70SJohn Marino*/
4a238c70SJohn Marinostatic unsigned long
4a238c70SJohn Marinompfr_exp2_aux (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
4a238c70SJohn Marino{
4a238c70SJohn Marino  unsigned long l;
4a238c70SJohn Marino  mpfr_exp_t dif, expt, expr;
4a238c70SJohn Marino  mpz_t t, rr;
4a238c70SJohn Marino  mp_size_t sbit, tbit;
4a238c70SJohn Marino
4a238c70SJohn Marino  MPFR_ASSERTN (MPFR_IS_PURE_FP (r));
4a238c70SJohn Marino
4a238c70SJohn Marino  expt = 0;
4a238c70SJohn Marino  *exps = 1 - (mpfr_exp_t) q;                   /* s = 2^(q-1) */
4a238c70SJohn Marino  mpz_init (t);
4a238c70SJohn Marino  mpz_init (rr);
4a238c70SJohn Marino  mpz_set_ui(t, 1);
4a238c70SJohn Marino  mpz_set_ui(s, 1);
4a238c70SJohn Marino  mpz_mul_2exp(s, s, q-1);
4a238c70SJohn Marino  expr = mpfr_get_z_2exp(rr, r);               /* no error here */
4a238c70SJohn Marino
4a238c70SJohn Marino  l = 0;
4a238c70SJohn Marino  for (;;) {
4a238c70SJohn Marino    l++;
4a238c70SJohn Marino    mpz_mul(t, t, rr);
4a238c70SJohn Marino    expt += expr;
4a238c70SJohn Marino    MPFR_MPZ_SIZEINBASE2 (sbit, s);
4a238c70SJohn Marino    MPFR_MPZ_SIZEINBASE2 (tbit, t);
4a238c70SJohn Marino    dif = *exps + sbit - expt - tbit;
4a238c70SJohn Marino    /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
4a238c70SJohn Marino    expt += mpz_normalize(t, t, (mpfr_exp_t) q-dif); /* error at most 2^(1-q) */
4a238c70SJohn Marino    mpz_fdiv_q_ui (t, t, l);                   /* error at most 2^(1-q) */
4a238c70SJohn Marino    /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
4a238c70SJohn Marino    MPFR_ASSERTD (expt == *exps);
4a238c70SJohn Marino    if (mpz_sgn (t) == 0)
4a238c70SJohn Marino      break;
4a238c70SJohn Marino    mpz_add(s, s, t);                      /* no error here: exact */
4a238c70SJohn Marino    /* ensures rr has the same size as t: after several shifts, the error
4a238c70SJohn Marino       on rr is still at most ulp(t)=ulp(s) */
4a238c70SJohn Marino    MPFR_MPZ_SIZEINBASE2 (tbit, t);
4a238c70SJohn Marino    expr += mpz_normalize(rr, rr, tbit);
4a238c70SJohn Marino  }
4a238c70SJohn Marino
4a238c70SJohn Marino  mpz_clear (t);
4a238c70SJohn Marino  mpz_clear (rr);
4a238c70SJohn Marino
4a238c70SJohn Marino  return 3 * l * (l + 1);
4a238c70SJohn Marino}
4a238c70SJohn Marino
4a238c70SJohn Marino/* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
4a238c70SJohn Marino   using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
4a238c70SJohn Marino   Return l.
4a238c70SJohn Marino   Uses m multiplications of full size and 2l/m of decreasing size,
4a238c70SJohn Marino   i.e. a total equivalent to about m+l/m full multiplications,
4a238c70SJohn Marino   i.e. 2*sqrt(l) for m=sqrt(l).
4a238c70SJohn Marino   NOTE[VL]: The following sentence seems to be obsolete since MY_INIT_MPZ
4a238c70SJohn Marino   is no longer used (r6919); sizer was the number of limbs of r.
4a238c70SJohn Marino   Version using mpz. ss must have at least (sizer+1) limbs.
4a238c70SJohn Marino   The error is bounded by (l^2+4*l) ulps where l is the return value.
4a238c70SJohn Marino*/
4a238c70SJohn Marinostatic unsigned long
4a238c70SJohn Marinompfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
4a238c70SJohn Marino{
4a238c70SJohn Marino  mpfr_exp_t expr, *expR, expt;
4a238c70SJohn Marino  mpfr_prec_t ql;
4a238c70SJohn Marino  unsigned long l, m, i;
4a238c70SJohn Marino  mpz_t t, *R, rr, tmp;
4a238c70SJohn Marino  mp_size_t sbit, rrbit;
4a238c70SJohn Marino  MPFR_TMP_DECL(marker);
4a238c70SJohn Marino
4a238c70SJohn Marino  /* estimate value of l */
4a238c70SJohn Marino  MPFR_ASSERTD (MPFR_GET_EXP (r) < 0);
4a238c70SJohn Marino  l = q / (- MPFR_GET_EXP (r));
4a238c70SJohn Marino  m = __gmpfr_isqrt (l);
4a238c70SJohn Marino  /* we access R[2], thus we need m >= 2 */
4a238c70SJohn Marino  if (m < 2)
4a238c70SJohn Marino    m = 2;
4a238c70SJohn Marino
4a238c70SJohn Marino  MPFR_TMP_MARK(marker);
4a238c70SJohn Marino  R = (mpz_t*) MPFR_TMP_ALLOC ((m + 1) * sizeof (mpz_t));     /* R[i] is r^i */
4a238c70SJohn Marino  expR = (mpfr_exp_t*) MPFR_TMP_ALLOC((m + 1) * sizeof (mpfr_exp_t));
4a238c70SJohn Marino  /* expR[i] is the exponent for R[i] */
4a238c70SJohn Marino  mpz_init (tmp);
4a238c70SJohn Marino  mpz_init (rr);
4a238c70SJohn Marino  mpz_init (t);
4a238c70SJohn Marino  mpz_set_ui (s, 0);
4a238c70SJohn Marino  *exps = 1 - q;                        /* 1 ulp = 2^(1-q) */
4a238c70SJohn Marino  for (i = 0 ; i <= m ; i++)
4a238c70SJohn Marino    mpz_init (R[i]);
4a238c70SJohn Marino  expR[1] = mpfr_get_z_2exp (R[1], r); /* exact operation: no error */
4a238c70SJohn Marino  expR[1] = mpz_normalize2 (R[1], R[1], expR[1], 1 - q); /* error <= 1 ulp */
4a238c70SJohn Marino  mpz_mul (t, R[1], R[1]); /* err(t) <= 2 ulps */
4a238c70SJohn Marino  mpz_fdiv_q_2exp (R[2], t, q - 1); /* err(R[2]) <= 3 ulps */
4a238c70SJohn Marino  expR[2] = 1 - q;
4a238c70SJohn Marino  for (i = 3 ; i <= m ; i++)
4a238c70SJohn Marino    {
4a238c70SJohn Marino      if ((i & 1) == 1)
4a238c70SJohn Marino        mpz_mul (t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
4a238c70SJohn Marino      else
4a238c70SJohn Marino        mpz_mul (t, R[i/2], R[i/2]);
4a238c70SJohn Marino      mpz_fdiv_q_2exp (R[i], t, q - 1); /* err(R[i]) <= 2*i-1 ulps */
4a238c70SJohn Marino      expR[i] = 1 - q;
4a238c70SJohn Marino    }
4a238c70SJohn Marino  mpz_set_ui (R[0], 1);
4a238c70SJohn Marino  mpz_mul_2exp (R[0], R[0], q-1);
4a238c70SJohn Marino  expR[0] = 1-q; /* R[0]=1 */
4a238c70SJohn Marino  mpz_set_ui (rr, 1);
4a238c70SJohn Marino  expr = 0; /* rr contains r^l/l! */
4a238c70SJohn Marino  /* by induction: err(rr) <= 2*l ulps */
4a238c70SJohn Marino
4a238c70SJohn Marino  l = 0;
4a238c70SJohn Marino  ql = q; /* precision used for current giant step */
4a238c70SJohn Marino  do
4a238c70SJohn Marino    {
4a238c70SJohn Marino      /* all R[i] must have exponent 1-ql */
4a238c70SJohn Marino      if (l != 0)
4a238c70SJohn Marino        for (i = 0 ; i < m ; i++)
4a238c70SJohn Marino          expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1 - ql);
4a238c70SJohn Marino      /* the absolute error on R[i]*rr is still 2*i-1 ulps */
4a238c70SJohn Marino      expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1 - ql);
4a238c70SJohn Marino      /* err(t) <= 2*m-1 ulps */
4a238c70SJohn Marino      /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
4a238c70SJohn Marino         using Horner's scheme */
4a238c70SJohn Marino      for (i = m-1 ; i-- != 0 ; )
4a238c70SJohn Marino        {
4a238c70SJohn Marino          mpz_fdiv_q_ui (t, t, l+i+1); /* err(t) += 1 ulp */
4a238c70SJohn Marino          mpz_add (t, t, R[i]);
4a238c70SJohn Marino        }
4a238c70SJohn Marino      /* now err(t) <= (3m-2) ulps */
4a238c70SJohn Marino
4a238c70SJohn Marino      /* now multiplies t by r^l/l! and adds to s */
4a238c70SJohn Marino      mpz_mul (t, t, rr);
4a238c70SJohn Marino      expt += expr;
4a238c70SJohn Marino      expt = mpz_normalize2 (t, t, expt, *exps);
4a238c70SJohn Marino      /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
4a238c70SJohn Marino      MPFR_ASSERTD (expt == *exps);
4a238c70SJohn Marino      mpz_add (s, s, t); /* no error here */
4a238c70SJohn Marino
4a238c70SJohn Marino      /* updates rr, the multiplication of the factors l+i could be done
4a238c70SJohn Marino         using binary splitting too, but it is not sure it would save much */
4a238c70SJohn Marino      mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
4a238c70SJohn Marino      expr += expR[m];
4a238c70SJohn Marino      mpz_set_ui (tmp, 1);
4a238c70SJohn Marino      for (i = 1 ; i <= m ; i++)
4a238c70SJohn Marino        mpz_mul_ui (tmp, tmp, l + i);
4a238c70SJohn Marino      mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */
4a238c70SJohn Marino      l += m;
4a238c70SJohn Marino      if (MPFR_UNLIKELY (mpz_sgn (t) == 0))
4a238c70SJohn Marino        break;
4a238c70SJohn Marino      expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
4a238c70SJohn Marino      if (MPFR_UNLIKELY (mpz_sgn (rr) == 0))
4a238c70SJohn Marino        rrbit = 1;
4a238c70SJohn Marino      else
4a238c70SJohn Marino        MPFR_MPZ_SIZEINBASE2 (rrbit, rr);
4a238c70SJohn Marino      MPFR_MPZ_SIZEINBASE2 (sbit, s);
4a238c70SJohn Marino      ql = q - *exps - sbit + expr + rrbit;
4a238c70SJohn Marino      /* TODO: Wrong cast. I don't want what is right, but this is
4a238c70SJohn Marino         certainly wrong */
4a238c70SJohn Marino    }
4a238c70SJohn Marino  while ((size_t) expr + rrbit > (size_t) -q);
4a238c70SJohn Marino
4a238c70SJohn Marino  for (i = 0 ; i <= m ; i++)
4a238c70SJohn Marino    mpz_clear (R[i]);
4a238c70SJohn Marino  MPFR_TMP_FREE(marker);
4a238c70SJohn Marino  mpz_clear (rr);
4a238c70SJohn Marino  mpz_clear (t);
4a238c70SJohn Marino  mpz_clear (tmp);
4a238c70SJohn Marino
4a238c70SJohn Marino  return l * (l + 4);
4a238c70SJohn Marino}