1299c6f0cSmrg /* mpfr_log_ui -- compute natural logarithm of an unsigned long
2299c6f0cSmrg
3*ba125506Smrg Copyright 2014-2023 Free Software Foundation, Inc.
4299c6f0cSmrg Contributed by the AriC and Caramba projects, INRIA.
5299c6f0cSmrg
6299c6f0cSmrg This file is part of the GNU MPFR Library.
7299c6f0cSmrg
8299c6f0cSmrg The GNU MPFR Library is free software; you can redistribute it and/or modify
9299c6f0cSmrg it under the terms of the GNU Lesser General Public License as published by
10299c6f0cSmrg the Free Software Foundation; either version 3 of the License, or (at your
11299c6f0cSmrg option) any later version.
12299c6f0cSmrg
13299c6f0cSmrg The GNU MPFR Library is distributed in the hope that it will be useful, but
14299c6f0cSmrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15299c6f0cSmrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16299c6f0cSmrg License for more details.
17299c6f0cSmrg
18299c6f0cSmrg You should have received a copy of the GNU Lesser General Public License
19299c6f0cSmrg along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
202ba2404bSmrg https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21299c6f0cSmrg 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22299c6f0cSmrg
23299c6f0cSmrg #define MPFR_NEED_LONGLONG_H
24299c6f0cSmrg #include "mpfr-impl.h"
25299c6f0cSmrg
26299c6f0cSmrg /* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n,
27299c6f0cSmrg e.g. about 4 times as slow for n around ULONG_MAX/3 on an
28299c6f0cSmrg x86_64 Linux machine, for 10^6 bits of precision. The reason is that
29299c6f0cSmrg for say n=6148914691236517205 and prec=10^6, the value of T computed
30299c6f0cSmrg has more than 50M bits, which is much more than needed. Indeed the
31299c6f0cSmrg binary splitting algorithm for series with a finite radius of convergence
32299c6f0cSmrg gives rationals of size n*log(n) for a target precision n. One might
33299c6f0cSmrg truncate the rationals inside the algorithm, but then the error analysis
34299c6f0cSmrg should be redone. */
35299c6f0cSmrg
36*ba125506Smrg /* Cf https://www.ginac.de/CLN/binsplit.pdf - the Taylor series of log(1+x)
37299c6f0cSmrg up to order N for x=p/2^k is T/(B*Q).
38299c6f0cSmrg P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1]
39299c6f0cSmrg q <- k*(n2-n1) [corresponding to Q[0] = 2^q]
40299c6f0cSmrg B[0] <- n1 * (n1+1) * ... * (n2-1)
41299c6f0cSmrg T[0] <- B[0]*Q[0] * S(n1,n2)
42299c6f0cSmrg where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1)
43299c6f0cSmrg Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3.
44299c6f0cSmrg */
45299c6f0cSmrg static void
S(mpz_t * P,unsigned long * q,mpz_t * B,mpz_t * T,unsigned long n1,unsigned long n2,long p,unsigned long k,int need_P)46299c6f0cSmrg S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1,
47299c6f0cSmrg unsigned long n2, long p, unsigned long k, int need_P)
48299c6f0cSmrg {
49299c6f0cSmrg MPFR_ASSERTD (n1 < n2);
50299c6f0cSmrg MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0);
51299c6f0cSmrg if (n2 == n1 + 1)
52299c6f0cSmrg {
53299c6f0cSmrg mpz_set_si (P[0], (n1 == 1) ? p : -p);
54299c6f0cSmrg *q = k;
55299c6f0cSmrg mpz_set_ui (B[0], n1);
56299c6f0cSmrg /* T = B*Q*S where S = P/(B*Q) thus T = P */
57299c6f0cSmrg mpz_set (T[0], P[0]);
58299c6f0cSmrg /* since p is odd (or zero), there is no common factor 2 between
59299c6f0cSmrg P and Q, or T and B */
60299c6f0cSmrg }
61299c6f0cSmrg else
62299c6f0cSmrg {
63299c6f0cSmrg unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1;
64299c6f0cSmrg /* m = floor((n1+n2)/2) */
65299c6f0cSmrg
66299c6f0cSmrg MPFR_ASSERTD (n1 < m && m < n2);
67299c6f0cSmrg S (P, q, B, T, n1, m, p, k, 1);
68299c6f0cSmrg S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P);
69299c6f0cSmrg
70299c6f0cSmrg /* T0 <- T0*B1*Q1 + P0*B0*T1 */
71299c6f0cSmrg mpz_mul (T[1], T[1], P[0]);
72299c6f0cSmrg mpz_mul (T[1], T[1], B[0]);
73299c6f0cSmrg mpz_mul (T[0], T[0], B[1]);
74299c6f0cSmrg /* Q[1] = 2^q1 */
75299c6f0cSmrg mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */
76299c6f0cSmrg mpz_add (T[0], T[0], T[1]);
77299c6f0cSmrg if (need_P)
78299c6f0cSmrg mpz_mul (P[0], P[0], P[1]);
79299c6f0cSmrg *q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */
80299c6f0cSmrg mpz_mul (B[0], B[0], B[1]);
81299c6f0cSmrg
82299c6f0cSmrg /* there should be no common factors 2 between P, Q and T,
83299c6f0cSmrg since P is odd (or zero) */
84299c6f0cSmrg }
85299c6f0cSmrg }
86299c6f0cSmrg
87299c6f0cSmrg int
mpfr_log_ui(mpfr_ptr x,unsigned long n,mpfr_rnd_t rnd_mode)88299c6f0cSmrg mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
89299c6f0cSmrg {
90299c6f0cSmrg unsigned long k;
91299c6f0cSmrg mpfr_prec_t w; /* working precision */
92299c6f0cSmrg mpz_t three_n, *P, *B, *T;
93299c6f0cSmrg mpfr_t t, q;
94299c6f0cSmrg int inexact;
95299c6f0cSmrg unsigned long N, lgN, i, kk;
96299c6f0cSmrg long p;
97299c6f0cSmrg MPFR_GROUP_DECL(group);
98299c6f0cSmrg MPFR_TMP_DECL(marker);
99299c6f0cSmrg MPFR_ZIV_DECL(loop);
100299c6f0cSmrg MPFR_SAVE_EXPO_DECL (expo);
101299c6f0cSmrg
102299c6f0cSmrg if (n <= 2)
103299c6f0cSmrg {
104299c6f0cSmrg if (n == 0)
105299c6f0cSmrg {
106299c6f0cSmrg MPFR_SET_INF (x);
107299c6f0cSmrg MPFR_SET_NEG (x);
108299c6f0cSmrg MPFR_SET_DIVBY0 ();
109299c6f0cSmrg MPFR_RET (0); /* log(0) is an exact -infinity */
110299c6f0cSmrg }
111299c6f0cSmrg else if (n == 1)
112299c6f0cSmrg {
113299c6f0cSmrg MPFR_SET_ZERO (x);
114299c6f0cSmrg MPFR_SET_POS (x);
115299c6f0cSmrg MPFR_RET (0); /* only "normal" case where the result is exact */
116299c6f0cSmrg }
117299c6f0cSmrg /* now n=2 */
118299c6f0cSmrg return mpfr_const_log2 (x, rnd_mode);
119299c6f0cSmrg }
120299c6f0cSmrg
121299c6f0cSmrg /* here n >= 3 */
122299c6f0cSmrg
123*ba125506Smrg /* Argument reduction: compute k such that 2/3 < n/2^k < 4/3,
124*ba125506Smrg i.e., 2^(k+1) < 3n < 2^(k+2).
125299c6f0cSmrg
126299c6f0cSmrg FIXME: we could do better by considering n/(2^k*3^i*5^j),
127299c6f0cSmrg which reduces the maximal distance to 1 from 1/3 to 1/8,
128299c6f0cSmrg thus needing about 1.89 less terms in the Taylor expansion of
129299c6f0cSmrg the reduced argument. Then log(2^k*3^i*5^j) can be computed
130299c6f0cSmrg using a combination of log(16/15), log(25/24) and log(81/80),
131299c6f0cSmrg see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package",
132299c6f0cSmrg Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */
133299c6f0cSmrg
134299c6f0cSmrg mpz_init_set_ui (three_n, n);
135299c6f0cSmrg mpz_mul_ui (three_n, three_n, 3);
136299c6f0cSmrg k = mpz_sizeinbase (three_n, 2) - 2;
137299c6f0cSmrg MPFR_ASSERTD (k >= 2);
138299c6f0cSmrg mpz_clear (three_n);
139299c6f0cSmrg
140299c6f0cSmrg /* The reduced argument is n/2^k - 1 = (n-2^k)/2^k.
141299c6f0cSmrg Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX,
142299c6f0cSmrg so that p and -p both fit in a long. */
143*ba125506Smrg if (k < sizeof (unsigned long) * CHAR_BIT) /* assume no padding bits */
144299c6f0cSmrg n -= 1UL << k;
145*ba125506Smrg /* n is now the value of p mod ULONG_MAX+1.
146*ba125506Smrg Since |p| <= LONG_MAX, if n > LONG_MAX, this means that p < 0 and
147*ba125506Smrg -n as an unsigned long value is at most LONG_MAX, thus fits in a
148*ba125506Smrg long. */
149*ba125506Smrg p = ULONG2LONG (n);
150299c6f0cSmrg
151299c6f0cSmrg MPFR_TMP_MARK(marker);
152299c6f0cSmrg w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10;
153299c6f0cSmrg MPFR_GROUP_INIT_2(group, w, t, q);
154299c6f0cSmrg MPFR_SAVE_EXPO_MARK (expo);
155299c6f0cSmrg
156299c6f0cSmrg kk = k;
157299c6f0cSmrg if (p != 0)
158299c6f0cSmrg while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */
159299c6f0cSmrg {
160299c6f0cSmrg p /= 2;
161299c6f0cSmrg kk --;
162299c6f0cSmrg }
163299c6f0cSmrg
164299c6f0cSmrg MPFR_ZIV_INIT (loop, w);
165299c6f0cSmrg for (;;)
166299c6f0cSmrg {
167299c6f0cSmrg mpfr_t tmp;
168299c6f0cSmrg unsigned int err;
169299c6f0cSmrg unsigned long q0;
170299c6f0cSmrg
171299c6f0cSmrg /* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */
172299c6f0cSmrg mpfr_init2 (tmp, 32);
173299c6f0cSmrg mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU);
174299c6f0cSmrg mpfr_log2 (tmp, tmp, MPFR_RNDU);
175299c6f0cSmrg mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD);
176299c6f0cSmrg MPFR_ASSERTN (w <= ULONG_MAX);
177299c6f0cSmrg mpfr_ui_div (tmp, w, tmp, MPFR_RNDU);
178299c6f0cSmrg N = mpfr_get_ui (tmp, MPFR_RNDU);
179299c6f0cSmrg if (N < 2)
180299c6f0cSmrg N = 2;
181299c6f0cSmrg lgN = MPFR_INT_CEIL_LOG2 (N) + 1;
182299c6f0cSmrg mpfr_clear (tmp);
183299c6f0cSmrg P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t));
184299c6f0cSmrg B = P + lgN;
185299c6f0cSmrg T = B + lgN;
186299c6f0cSmrg for (i = 0; i < lgN; i++)
187299c6f0cSmrg {
188299c6f0cSmrg mpz_init (P[i]);
189299c6f0cSmrg mpz_init (B[i]);
190299c6f0cSmrg mpz_init (T[i]);
191299c6f0cSmrg }
192299c6f0cSmrg
193299c6f0cSmrg S (P, &q0, B, T, 1, N, p, kk, 0);
194299c6f0cSmrg /* mpz_mul (Q[0], B[0], Q[0]); */
195299c6f0cSmrg /* mpz_mul_2exp (B[0], B[0], q0); */
196299c6f0cSmrg
197299c6f0cSmrg mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
198299c6f0cSmrg mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */
1992ba2404bSmrg mpfr_mul_2ui (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */
200299c6f0cSmrg mpfr_div (t, t, q, MPFR_RNDN); /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3
201299c6f0cSmrg = log(n/2^k) * (1 + theta_4)^4
202299c6f0cSmrg for |theta_i| < 2^(-w) */
203299c6f0cSmrg
204299c6f0cSmrg /* argument reconstruction: add k*log(2) */
205299c6f0cSmrg mpfr_const_log2 (q, MPFR_RNDN);
206299c6f0cSmrg mpfr_mul_ui (q, q, k, MPFR_RNDN);
207299c6f0cSmrg mpfr_add (t, t, q, MPFR_RNDN);
208299c6f0cSmrg for (i = 0; i < lgN; i++)
209299c6f0cSmrg {
210299c6f0cSmrg mpz_clear (P[i]);
211299c6f0cSmrg mpz_clear (B[i]);
212299c6f0cSmrg mpz_clear (T[i]);
213299c6f0cSmrg }
214299c6f0cSmrg /* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
215299c6f0cSmrg for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition,
216299c6f0cSmrg thus at most k+6 ulps.
217299c6f0cSmrg Note that there might be some cancellation in the addition: the worst
218299c6f0cSmrg case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which
219299c6f0cSmrg gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we
220299c6f0cSmrg have an exponent decrease of 1, which accounts for +1 in the error. */
221299c6f0cSmrg err = MPFR_INT_CEIL_LOG2 (k + 6) + 1;
222299c6f0cSmrg if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode)))
223299c6f0cSmrg break;
224299c6f0cSmrg
225299c6f0cSmrg MPFR_ZIV_NEXT (loop, w);
226299c6f0cSmrg MPFR_GROUP_REPREC_2(group, w, t, q);
227299c6f0cSmrg }
228299c6f0cSmrg MPFR_ZIV_FREE (loop);
229299c6f0cSmrg
230299c6f0cSmrg inexact = mpfr_set (x, t, rnd_mode);
231299c6f0cSmrg
232299c6f0cSmrg MPFR_GROUP_CLEAR(group);
233299c6f0cSmrg MPFR_TMP_FREE(marker);
234299c6f0cSmrg
235299c6f0cSmrg MPFR_SAVE_EXPO_FREE (expo);
236299c6f0cSmrg return mpfr_check_range (x, inexact, rnd_mode);
237299c6f0cSmrg }
238