xref: /netbsd-src/external/lgpl3/mpfr/dist/src/log_ui.c (revision ba125506a622fe649968631a56eba5d42ff57863)
1299c6f0cSmrg /* mpfr_log_ui -- compute natural logarithm of an unsigned long
2299c6f0cSmrg 
3*ba125506Smrg Copyright 2014-2023 Free Software Foundation, Inc.
4299c6f0cSmrg Contributed by the AriC and Caramba projects, INRIA.
5299c6f0cSmrg 
6299c6f0cSmrg This file is part of the GNU MPFR Library.
7299c6f0cSmrg 
8299c6f0cSmrg The GNU MPFR Library is free software; you can redistribute it and/or modify
9299c6f0cSmrg it under the terms of the GNU Lesser General Public License as published by
10299c6f0cSmrg the Free Software Foundation; either version 3 of the License, or (at your
11299c6f0cSmrg option) any later version.
12299c6f0cSmrg 
13299c6f0cSmrg The GNU MPFR Library is distributed in the hope that it will be useful, but
14299c6f0cSmrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15299c6f0cSmrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16299c6f0cSmrg License for more details.
17299c6f0cSmrg 
18299c6f0cSmrg You should have received a copy of the GNU Lesser General Public License
19299c6f0cSmrg along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
202ba2404bSmrg https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21299c6f0cSmrg 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22299c6f0cSmrg 
23299c6f0cSmrg #define MPFR_NEED_LONGLONG_H
24299c6f0cSmrg #include "mpfr-impl.h"
25299c6f0cSmrg 
26299c6f0cSmrg /* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n,
27299c6f0cSmrg    e.g. about 4 times as slow for n around ULONG_MAX/3 on an
28299c6f0cSmrg    x86_64 Linux machine, for 10^6 bits of precision. The reason is that
29299c6f0cSmrg    for say n=6148914691236517205 and prec=10^6, the value of T computed
30299c6f0cSmrg    has more than 50M bits, which is much more than needed. Indeed the
31299c6f0cSmrg    binary splitting algorithm for series with a finite radius of convergence
32299c6f0cSmrg    gives rationals of size n*log(n) for a target precision n. One might
33299c6f0cSmrg    truncate the rationals inside the algorithm, but then the error analysis
34299c6f0cSmrg    should be redone. */
35299c6f0cSmrg 
36*ba125506Smrg /* Cf https://www.ginac.de/CLN/binsplit.pdf - the Taylor series of log(1+x)
37299c6f0cSmrg    up to order N for x=p/2^k is T/(B*Q).
38299c6f0cSmrg    P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1]
39299c6f0cSmrg    q <- k*(n2-n1) [corresponding to Q[0] = 2^q]
40299c6f0cSmrg    B[0] <- n1 * (n1+1) * ... * (n2-1)
41299c6f0cSmrg    T[0] <- B[0]*Q[0] * S(n1,n2)
42299c6f0cSmrg    where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1)
43299c6f0cSmrg    Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3.
44299c6f0cSmrg */
45299c6f0cSmrg static void
S(mpz_t * P,unsigned long * q,mpz_t * B,mpz_t * T,unsigned long n1,unsigned long n2,long p,unsigned long k,int need_P)46299c6f0cSmrg S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1,
47299c6f0cSmrg    unsigned long n2, long p, unsigned long k, int need_P)
48299c6f0cSmrg {
49299c6f0cSmrg   MPFR_ASSERTD (n1 < n2);
50299c6f0cSmrg   MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0);
51299c6f0cSmrg   if (n2 == n1 + 1)
52299c6f0cSmrg     {
53299c6f0cSmrg       mpz_set_si (P[0], (n1 == 1) ? p : -p);
54299c6f0cSmrg       *q = k;
55299c6f0cSmrg       mpz_set_ui (B[0], n1);
56299c6f0cSmrg       /* T = B*Q*S where S = P/(B*Q) thus T = P */
57299c6f0cSmrg       mpz_set (T[0], P[0]);
58299c6f0cSmrg       /* since p is odd (or zero), there is no common factor 2 between
59299c6f0cSmrg          P and Q, or T and B */
60299c6f0cSmrg     }
61299c6f0cSmrg   else
62299c6f0cSmrg     {
63299c6f0cSmrg       unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1;
64299c6f0cSmrg       /* m = floor((n1+n2)/2) */
65299c6f0cSmrg 
66299c6f0cSmrg       MPFR_ASSERTD (n1 < m && m < n2);
67299c6f0cSmrg       S (P, q, B, T, n1, m, p, k, 1);
68299c6f0cSmrg       S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P);
69299c6f0cSmrg 
70299c6f0cSmrg       /* T0 <- T0*B1*Q1 + P0*B0*T1 */
71299c6f0cSmrg       mpz_mul (T[1], T[1], P[0]);
72299c6f0cSmrg       mpz_mul (T[1], T[1], B[0]);
73299c6f0cSmrg       mpz_mul (T[0], T[0], B[1]);
74299c6f0cSmrg       /* Q[1] = 2^q1 */
75299c6f0cSmrg       mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */
76299c6f0cSmrg       mpz_add (T[0], T[0], T[1]);
77299c6f0cSmrg       if (need_P)
78299c6f0cSmrg         mpz_mul (P[0], P[0], P[1]);
79299c6f0cSmrg       *q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */
80299c6f0cSmrg       mpz_mul (B[0], B[0], B[1]);
81299c6f0cSmrg 
82299c6f0cSmrg       /* there should be no common factors 2 between P, Q and T,
83299c6f0cSmrg          since P is odd (or zero) */
84299c6f0cSmrg     }
85299c6f0cSmrg }
86299c6f0cSmrg 
87299c6f0cSmrg int
mpfr_log_ui(mpfr_ptr x,unsigned long n,mpfr_rnd_t rnd_mode)88299c6f0cSmrg mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
89299c6f0cSmrg {
90299c6f0cSmrg   unsigned long k;
91299c6f0cSmrg   mpfr_prec_t w; /* working precision */
92299c6f0cSmrg   mpz_t three_n, *P, *B, *T;
93299c6f0cSmrg   mpfr_t t, q;
94299c6f0cSmrg   int inexact;
95299c6f0cSmrg   unsigned long N, lgN, i, kk;
96299c6f0cSmrg   long p;
97299c6f0cSmrg   MPFR_GROUP_DECL(group);
98299c6f0cSmrg   MPFR_TMP_DECL(marker);
99299c6f0cSmrg   MPFR_ZIV_DECL(loop);
100299c6f0cSmrg   MPFR_SAVE_EXPO_DECL (expo);
101299c6f0cSmrg 
102299c6f0cSmrg   if (n <= 2)
103299c6f0cSmrg     {
104299c6f0cSmrg       if (n == 0)
105299c6f0cSmrg         {
106299c6f0cSmrg           MPFR_SET_INF (x);
107299c6f0cSmrg           MPFR_SET_NEG (x);
108299c6f0cSmrg           MPFR_SET_DIVBY0 ();
109299c6f0cSmrg           MPFR_RET (0); /* log(0) is an exact -infinity */
110299c6f0cSmrg         }
111299c6f0cSmrg       else if (n == 1)
112299c6f0cSmrg         {
113299c6f0cSmrg           MPFR_SET_ZERO (x);
114299c6f0cSmrg           MPFR_SET_POS (x);
115299c6f0cSmrg           MPFR_RET (0); /* only "normal" case where the result is exact */
116299c6f0cSmrg         }
117299c6f0cSmrg       /* now n=2 */
118299c6f0cSmrg       return mpfr_const_log2 (x, rnd_mode);
119299c6f0cSmrg     }
120299c6f0cSmrg 
121299c6f0cSmrg   /* here n >= 3 */
122299c6f0cSmrg 
123*ba125506Smrg   /* Argument reduction: compute k such that 2/3 < n/2^k < 4/3,
124*ba125506Smrg      i.e., 2^(k+1) < 3n < 2^(k+2).
125299c6f0cSmrg 
126299c6f0cSmrg      FIXME: we could do better by considering n/(2^k*3^i*5^j),
127299c6f0cSmrg      which reduces the maximal distance to 1 from 1/3 to 1/8,
128299c6f0cSmrg      thus needing about 1.89 less terms in the Taylor expansion of
129299c6f0cSmrg      the reduced argument. Then log(2^k*3^i*5^j) can be computed
130299c6f0cSmrg      using a combination of log(16/15), log(25/24) and log(81/80),
131299c6f0cSmrg      see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package",
132299c6f0cSmrg      Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */
133299c6f0cSmrg 
134299c6f0cSmrg   mpz_init_set_ui (three_n, n);
135299c6f0cSmrg   mpz_mul_ui (three_n, three_n, 3);
136299c6f0cSmrg   k = mpz_sizeinbase (three_n, 2) - 2;
137299c6f0cSmrg   MPFR_ASSERTD (k >= 2);
138299c6f0cSmrg   mpz_clear (three_n);
139299c6f0cSmrg 
140299c6f0cSmrg   /* The reduced argument is n/2^k - 1 = (n-2^k)/2^k.
141299c6f0cSmrg      Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX,
142299c6f0cSmrg      so that p and -p both fit in a long. */
143*ba125506Smrg   if (k < sizeof (unsigned long) * CHAR_BIT)  /* assume no padding bits */
144299c6f0cSmrg     n -= 1UL << k;
145*ba125506Smrg   /* n is now the value of p mod ULONG_MAX+1.
146*ba125506Smrg      Since |p| <= LONG_MAX, if n > LONG_MAX, this means that p < 0 and
147*ba125506Smrg      -n as an unsigned long value is at most LONG_MAX, thus fits in a
148*ba125506Smrg      long. */
149*ba125506Smrg   p = ULONG2LONG (n);
150299c6f0cSmrg 
151299c6f0cSmrg   MPFR_TMP_MARK(marker);
152299c6f0cSmrg   w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10;
153299c6f0cSmrg   MPFR_GROUP_INIT_2(group, w, t, q);
154299c6f0cSmrg   MPFR_SAVE_EXPO_MARK (expo);
155299c6f0cSmrg 
156299c6f0cSmrg   kk = k;
157299c6f0cSmrg   if (p != 0)
158299c6f0cSmrg     while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */
159299c6f0cSmrg       {
160299c6f0cSmrg         p /= 2;
161299c6f0cSmrg         kk --;
162299c6f0cSmrg       }
163299c6f0cSmrg 
164299c6f0cSmrg   MPFR_ZIV_INIT (loop, w);
165299c6f0cSmrg   for (;;)
166299c6f0cSmrg     {
167299c6f0cSmrg       mpfr_t tmp;
168299c6f0cSmrg       unsigned int err;
169299c6f0cSmrg       unsigned long q0;
170299c6f0cSmrg 
171299c6f0cSmrg       /* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */
172299c6f0cSmrg       mpfr_init2 (tmp, 32);
173299c6f0cSmrg       mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU);
174299c6f0cSmrg       mpfr_log2 (tmp, tmp, MPFR_RNDU);
175299c6f0cSmrg       mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD);
176299c6f0cSmrg       MPFR_ASSERTN (w <= ULONG_MAX);
177299c6f0cSmrg       mpfr_ui_div (tmp, w, tmp, MPFR_RNDU);
178299c6f0cSmrg       N = mpfr_get_ui (tmp, MPFR_RNDU);
179299c6f0cSmrg       if (N < 2)
180299c6f0cSmrg         N = 2;
181299c6f0cSmrg       lgN = MPFR_INT_CEIL_LOG2 (N) + 1;
182299c6f0cSmrg       mpfr_clear (tmp);
183299c6f0cSmrg       P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t));
184299c6f0cSmrg       B = P + lgN;
185299c6f0cSmrg       T = B + lgN;
186299c6f0cSmrg       for (i = 0; i < lgN; i++)
187299c6f0cSmrg         {
188299c6f0cSmrg           mpz_init (P[i]);
189299c6f0cSmrg           mpz_init (B[i]);
190299c6f0cSmrg           mpz_init (T[i]);
191299c6f0cSmrg         }
192299c6f0cSmrg 
193299c6f0cSmrg       S (P, &q0, B, T, 1, N, p, kk, 0);
194299c6f0cSmrg       /* mpz_mul (Q[0], B[0], Q[0]); */
195299c6f0cSmrg       /* mpz_mul_2exp (B[0], B[0], q0); */
196299c6f0cSmrg 
197299c6f0cSmrg       mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
198299c6f0cSmrg       mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */
1992ba2404bSmrg       mpfr_mul_2ui (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */
200299c6f0cSmrg       mpfr_div (t, t, q, MPFR_RNDN);   /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3
201299c6f0cSmrg                                             = log(n/2^k) * (1 + theta_4)^4
202299c6f0cSmrg                                             for |theta_i| < 2^(-w) */
203299c6f0cSmrg 
204299c6f0cSmrg       /* argument reconstruction: add k*log(2) */
205299c6f0cSmrg       mpfr_const_log2 (q, MPFR_RNDN);
206299c6f0cSmrg       mpfr_mul_ui (q, q, k, MPFR_RNDN);
207299c6f0cSmrg       mpfr_add (t, t, q, MPFR_RNDN);
208299c6f0cSmrg       for (i = 0; i < lgN; i++)
209299c6f0cSmrg         {
210299c6f0cSmrg           mpz_clear (P[i]);
211299c6f0cSmrg           mpz_clear (B[i]);
212299c6f0cSmrg           mpz_clear (T[i]);
213299c6f0cSmrg         }
214299c6f0cSmrg       /* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
215299c6f0cSmrg          for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition,
216299c6f0cSmrg          thus at most k+6 ulps.
217299c6f0cSmrg          Note that there might be some cancellation in the addition: the worst
218299c6f0cSmrg          case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which
219299c6f0cSmrg          gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we
220299c6f0cSmrg          have an exponent decrease of 1, which accounts for +1 in the error. */
221299c6f0cSmrg       err = MPFR_INT_CEIL_LOG2 (k + 6) + 1;
222299c6f0cSmrg       if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode)))
223299c6f0cSmrg         break;
224299c6f0cSmrg 
225299c6f0cSmrg       MPFR_ZIV_NEXT (loop, w);
226299c6f0cSmrg       MPFR_GROUP_REPREC_2(group, w, t, q);
227299c6f0cSmrg     }
228299c6f0cSmrg   MPFR_ZIV_FREE (loop);
229299c6f0cSmrg 
230299c6f0cSmrg   inexact = mpfr_set (x, t, rnd_mode);
231299c6f0cSmrg 
232299c6f0cSmrg   MPFR_GROUP_CLEAR(group);
233299c6f0cSmrg   MPFR_TMP_FREE(marker);
234299c6f0cSmrg 
235299c6f0cSmrg   MPFR_SAVE_EXPO_FREE (expo);
236299c6f0cSmrg   return mpfr_check_range (x, inexact, rnd_mode);
237299c6f0cSmrg }
238