dist/src/log_ui.c

299c6f0cSmrg/* mpfr_log_ui -- compute natural logarithm of an unsigned long
299c6f0cSmrg
*ba125506SmrgCopyright 2014-2023 Free Software Foundation, Inc.
299c6f0cSmrgContributed by the AriC and Caramba projects, INRIA.
299c6f0cSmrg
299c6f0cSmrgThis file is part of the GNU MPFR Library.
299c6f0cSmrg
299c6f0cSmrgThe GNU MPFR Library is free software; you can redistribute it and/or modify
299c6f0cSmrgit under the terms of the GNU Lesser General Public License as published by
299c6f0cSmrgthe Free Software Foundation; either version 3 of the License, or (at your
299c6f0cSmrgoption) any later version.
299c6f0cSmrg
299c6f0cSmrgThe GNU MPFR Library is distributed in the hope that it will be useful, but
299c6f0cSmrgWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
299c6f0cSmrgor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
299c6f0cSmrgLicense for more details.
299c6f0cSmrg
299c6f0cSmrgYou should have received a copy of the GNU Lesser General Public License
299c6f0cSmrgalong with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
2ba2404bSmrghttps://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
299c6f0cSmrg51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
299c6f0cSmrg
299c6f0cSmrg#define MPFR_NEED_LONGLONG_H
299c6f0cSmrg#include "mpfr-impl.h"
299c6f0cSmrg
299c6f0cSmrg/* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n,
299c6f0cSmrg   e.g. about 4 times as slow for n around ULONG_MAX/3 on an
299c6f0cSmrg   x86_64 Linux machine, for 10^6 bits of precision. The reason is that
299c6f0cSmrg   for say n=6148914691236517205 and prec=10^6, the value of T computed
299c6f0cSmrg   has more than 50M bits, which is much more than needed. Indeed the
299c6f0cSmrg   binary splitting algorithm for series with a finite radius of convergence
299c6f0cSmrg   gives rationals of size n*log(n) for a target precision n. One might
299c6f0cSmrg   truncate the rationals inside the algorithm, but then the error analysis
299c6f0cSmrg   should be redone. */
299c6f0cSmrg
*ba125506Smrg/* Cf https://www.ginac.de/CLN/binsplit.pdf - the Taylor series of log(1+x)
299c6f0cSmrg   up to order N for x=p/2^k is T/(B*Q).
299c6f0cSmrg   P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1]
299c6f0cSmrg   q <- k*(n2-n1) [corresponding to Q[0] = 2^q]
299c6f0cSmrg   B[0] <- n1 * (n1+1) * ... * (n2-1)
299c6f0cSmrg   T[0] <- B[0]*Q[0] * S(n1,n2)
299c6f0cSmrg   where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1)
299c6f0cSmrg   Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3.
299c6f0cSmrg*/
299c6f0cSmrgstatic void
299c6f0cSmrgS (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1,
299c6f0cSmrg   unsigned long n2, long p, unsigned long k, int need_P)
299c6f0cSmrg{
299c6f0cSmrg  MPFR_ASSERTD (n1 < n2);
299c6f0cSmrg  MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0);
299c6f0cSmrg  if (n2 == n1 + 1)
299c6f0cSmrg    {
299c6f0cSmrg      mpz_set_si (P[0], (n1 == 1) ? p : -p);
299c6f0cSmrg      *q = k;
299c6f0cSmrg      mpz_set_ui (B[0], n1);
299c6f0cSmrg      /* T = B*Q*S where S = P/(B*Q) thus T = P */
299c6f0cSmrg      mpz_set (T[0], P[0]);
299c6f0cSmrg      /* since p is odd (or zero), there is no common factor 2 between
299c6f0cSmrg         P and Q, or T and B */
299c6f0cSmrg    }
299c6f0cSmrg  else
299c6f0cSmrg    {
299c6f0cSmrg      unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1;
299c6f0cSmrg      /* m = floor((n1+n2)/2) */
299c6f0cSmrg
299c6f0cSmrg      MPFR_ASSERTD (n1 < m && m < n2);
299c6f0cSmrg      S (P, q, B, T, n1, m, p, k, 1);
299c6f0cSmrg      S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P);
299c6f0cSmrg
299c6f0cSmrg      /* T0 <- T0*B1*Q1 + P0*B0*T1 */
299c6f0cSmrg      mpz_mul (T[1], T[1], P[0]);
299c6f0cSmrg      mpz_mul (T[1], T[1], B[0]);
299c6f0cSmrg      mpz_mul (T[0], T[0], B[1]);
299c6f0cSmrg      /* Q[1] = 2^q1 */
299c6f0cSmrg      mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */
299c6f0cSmrg      mpz_add (T[0], T[0], T[1]);
299c6f0cSmrg      if (need_P)
299c6f0cSmrg        mpz_mul (P[0], P[0], P[1]);
299c6f0cSmrg      *q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */
299c6f0cSmrg      mpz_mul (B[0], B[0], B[1]);
299c6f0cSmrg
299c6f0cSmrg      /* there should be no common factors 2 between P, Q and T,
299c6f0cSmrg         since P is odd (or zero) */
299c6f0cSmrg    }
299c6f0cSmrg}
299c6f0cSmrg
299c6f0cSmrgint
299c6f0cSmrgmpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
299c6f0cSmrg{
299c6f0cSmrg  unsigned long k;
299c6f0cSmrg  mpfr_prec_t w; /* working precision */
299c6f0cSmrg  mpz_t three_n, *P, *B, *T;
299c6f0cSmrg  mpfr_t t, q;
299c6f0cSmrg  int inexact;
299c6f0cSmrg  unsigned long N, lgN, i, kk;
299c6f0cSmrg  long p;
299c6f0cSmrg  MPFR_GROUP_DECL(group);
299c6f0cSmrg  MPFR_TMP_DECL(marker);
299c6f0cSmrg  MPFR_ZIV_DECL(loop);
299c6f0cSmrg  MPFR_SAVE_EXPO_DECL (expo);
299c6f0cSmrg
299c6f0cSmrg  if (n <= 2)
299c6f0cSmrg    {
299c6f0cSmrg      if (n == 0)
299c6f0cSmrg        {
299c6f0cSmrg          MPFR_SET_INF (x);
299c6f0cSmrg          MPFR_SET_NEG (x);
299c6f0cSmrg          MPFR_SET_DIVBY0 ();
299c6f0cSmrg          MPFR_RET (0); /* log(0) is an exact -infinity */
299c6f0cSmrg        }
299c6f0cSmrg      else if (n == 1)
299c6f0cSmrg        {
299c6f0cSmrg          MPFR_SET_ZERO (x);
299c6f0cSmrg          MPFR_SET_POS (x);
299c6f0cSmrg          MPFR_RET (0); /* only "normal" case where the result is exact */
299c6f0cSmrg        }
299c6f0cSmrg      /* now n=2 */
299c6f0cSmrg      return mpfr_const_log2 (x, rnd_mode);
299c6f0cSmrg    }
299c6f0cSmrg
299c6f0cSmrg  /* here n >= 3 */
299c6f0cSmrg
*ba125506Smrg  /* Argument reduction: compute k such that 2/3 < n/2^k < 4/3,
*ba125506Smrg     i.e., 2^(k+1) < 3n < 2^(k+2).
299c6f0cSmrg
299c6f0cSmrg     FIXME: we could do better by considering n/(2^k*3^i*5^j),
299c6f0cSmrg     which reduces the maximal distance to 1 from 1/3 to 1/8,
299c6f0cSmrg     thus needing about 1.89 less terms in the Taylor expansion of
299c6f0cSmrg     the reduced argument. Then log(2^k*3^i*5^j) can be computed
299c6f0cSmrg     using a combination of log(16/15), log(25/24) and log(81/80),
299c6f0cSmrg     see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package",
299c6f0cSmrg     Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */
299c6f0cSmrg
299c6f0cSmrg  mpz_init_set_ui (three_n, n);
299c6f0cSmrg  mpz_mul_ui (three_n, three_n, 3);
299c6f0cSmrg  k = mpz_sizeinbase (three_n, 2) - 2;
299c6f0cSmrg  MPFR_ASSERTD (k >= 2);
299c6f0cSmrg  mpz_clear (three_n);
299c6f0cSmrg
299c6f0cSmrg  /* The reduced argument is n/2^k - 1 = (n-2^k)/2^k.
299c6f0cSmrg     Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX,
299c6f0cSmrg     so that p and -p both fit in a long. */
*ba125506Smrg  if (k < sizeof (unsigned long) * CHAR_BIT)  /* assume no padding bits */
299c6f0cSmrg    n -= 1UL << k;
*ba125506Smrg  /* n is now the value of p mod ULONG_MAX+1.
*ba125506Smrg     Since |p| <= LONG_MAX, if n > LONG_MAX, this means that p < 0 and
*ba125506Smrg     -n as an unsigned long value is at most LONG_MAX, thus fits in a
*ba125506Smrg     long. */
*ba125506Smrg  p = ULONG2LONG (n);
299c6f0cSmrg
299c6f0cSmrg  MPFR_TMP_MARK(marker);
299c6f0cSmrg  w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10;
299c6f0cSmrg  MPFR_GROUP_INIT_2(group, w, t, q);
299c6f0cSmrg  MPFR_SAVE_EXPO_MARK (expo);
299c6f0cSmrg
299c6f0cSmrg  kk = k;
299c6f0cSmrg  if (p != 0)
299c6f0cSmrg    while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */
299c6f0cSmrg      {
299c6f0cSmrg        p /= 2;
299c6f0cSmrg        kk --;
299c6f0cSmrg      }
299c6f0cSmrg
299c6f0cSmrg  MPFR_ZIV_INIT (loop, w);
299c6f0cSmrg  for (;;)
299c6f0cSmrg    {
299c6f0cSmrg      mpfr_t tmp;
299c6f0cSmrg      unsigned int err;
299c6f0cSmrg      unsigned long q0;
299c6f0cSmrg
299c6f0cSmrg      /* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */
299c6f0cSmrg      mpfr_init2 (tmp, 32);
299c6f0cSmrg      mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU);
299c6f0cSmrg      mpfr_log2 (tmp, tmp, MPFR_RNDU);
299c6f0cSmrg      mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD);
299c6f0cSmrg      MPFR_ASSERTN (w <= ULONG_MAX);
299c6f0cSmrg      mpfr_ui_div (tmp, w, tmp, MPFR_RNDU);
299c6f0cSmrg      N = mpfr_get_ui (tmp, MPFR_RNDU);
299c6f0cSmrg      if (N < 2)
299c6f0cSmrg        N = 2;
299c6f0cSmrg      lgN = MPFR_INT_CEIL_LOG2 (N) + 1;
299c6f0cSmrg      mpfr_clear (tmp);
299c6f0cSmrg      P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t));
299c6f0cSmrg      B = P + lgN;
299c6f0cSmrg      T = B + lgN;
299c6f0cSmrg      for (i = 0; i < lgN; i++)
299c6f0cSmrg        {
299c6f0cSmrg          mpz_init (P[i]);
299c6f0cSmrg          mpz_init (B[i]);
299c6f0cSmrg          mpz_init (T[i]);
299c6f0cSmrg        }
299c6f0cSmrg
299c6f0cSmrg      S (P, &q0, B, T, 1, N, p, kk, 0);
299c6f0cSmrg      /* mpz_mul (Q[0], B[0], Q[0]); */
299c6f0cSmrg      /* mpz_mul_2exp (B[0], B[0], q0); */
299c6f0cSmrg
299c6f0cSmrg      mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
299c6f0cSmrg      mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */
2ba2404bSmrg      mpfr_mul_2ui (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */
299c6f0cSmrg      mpfr_div (t, t, q, MPFR_RNDN);   /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3
299c6f0cSmrg                                            = log(n/2^k) * (1 + theta_4)^4
299c6f0cSmrg                                            for |theta_i| < 2^(-w) */
299c6f0cSmrg
299c6f0cSmrg      /* argument reconstruction: add k*log(2) */
299c6f0cSmrg      mpfr_const_log2 (q, MPFR_RNDN);
299c6f0cSmrg      mpfr_mul_ui (q, q, k, MPFR_RNDN);
299c6f0cSmrg      mpfr_add (t, t, q, MPFR_RNDN);
299c6f0cSmrg      for (i = 0; i < lgN; i++)
299c6f0cSmrg        {
299c6f0cSmrg          mpz_clear (P[i]);
299c6f0cSmrg          mpz_clear (B[i]);
299c6f0cSmrg          mpz_clear (T[i]);
299c6f0cSmrg        }
299c6f0cSmrg      /* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
299c6f0cSmrg         for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition,
299c6f0cSmrg         thus at most k+6 ulps.
299c6f0cSmrg         Note that there might be some cancellation in the addition: the worst
299c6f0cSmrg         case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which
299c6f0cSmrg         gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we
299c6f0cSmrg         have an exponent decrease of 1, which accounts for +1 in the error. */
299c6f0cSmrg      err = MPFR_INT_CEIL_LOG2 (k + 6) + 1;
299c6f0cSmrg      if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode)))
299c6f0cSmrg        break;
299c6f0cSmrg
299c6f0cSmrg      MPFR_ZIV_NEXT (loop, w);
299c6f0cSmrg      MPFR_GROUP_REPREC_2(group, w, t, q);
299c6f0cSmrg    }
299c6f0cSmrg  MPFR_ZIV_FREE (loop);
299c6f0cSmrg
299c6f0cSmrg  inexact = mpfr_set (x, t, rnd_mode);
299c6f0cSmrg
299c6f0cSmrg  MPFR_GROUP_CLEAR(group);
299c6f0cSmrg  MPFR_TMP_FREE(marker);
299c6f0cSmrg
299c6f0cSmrg  MPFR_SAVE_EXPO_FREE (expo);
299c6f0cSmrg  return mpfr_check_range (x, inexact, rnd_mode);
299c6f0cSmrg}