1 /* mpfr_log_ui -- compute natural logarithm of an unsigned long
2
3 Copyright 2014-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26 /* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n,
27 e.g. about 4 times as slow for n around ULONG_MAX/3 on an
28 x86_64 Linux machine, for 10^6 bits of precision. The reason is that
29 for say n=6148914691236517205 and prec=10^6, the value of T computed
30 has more than 50M bits, which is much more than needed. Indeed the
31 binary splitting algorithm for series with a finite radius of convergence
32 gives rationals of size n*log(n) for a target precision n. One might
33 truncate the rationals inside the algorithm, but then the error analysis
34 should be redone. */
35
36 /* Cf https://www.ginac.de/CLN/binsplit.pdf - the Taylor series of log(1+x)
37 up to order N for x=p/2^k is T/(B*Q).
38 P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1]
39 q <- k*(n2-n1) [corresponding to Q[0] = 2^q]
40 B[0] <- n1 * (n1+1) * ... * (n2-1)
41 T[0] <- B[0]*Q[0] * S(n1,n2)
42 where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1)
43 Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3.
44 */
45 static void
S(mpz_t * P,unsigned long * q,mpz_t * B,mpz_t * T,unsigned long n1,unsigned long n2,long p,unsigned long k,int need_P)46 S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1,
47 unsigned long n2, long p, unsigned long k, int need_P)
48 {
49 MPFR_ASSERTD (n1 < n2);
50 MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0);
51 if (n2 == n1 + 1)
52 {
53 mpz_set_si (P[0], (n1 == 1) ? p : -p);
54 *q = k;
55 mpz_set_ui (B[0], n1);
56 /* T = B*Q*S where S = P/(B*Q) thus T = P */
57 mpz_set (T[0], P[0]);
58 /* since p is odd (or zero), there is no common factor 2 between
59 P and Q, or T and B */
60 }
61 else
62 {
63 unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1;
64 /* m = floor((n1+n2)/2) */
65
66 MPFR_ASSERTD (n1 < m && m < n2);
67 S (P, q, B, T, n1, m, p, k, 1);
68 S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P);
69
70 /* T0 <- T0*B1*Q1 + P0*B0*T1 */
71 mpz_mul (T[1], T[1], P[0]);
72 mpz_mul (T[1], T[1], B[0]);
73 mpz_mul (T[0], T[0], B[1]);
74 /* Q[1] = 2^q1 */
75 mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */
76 mpz_add (T[0], T[0], T[1]);
77 if (need_P)
78 mpz_mul (P[0], P[0], P[1]);
79 *q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */
80 mpz_mul (B[0], B[0], B[1]);
81
82 /* there should be no common factors 2 between P, Q and T,
83 since P is odd (or zero) */
84 }
85 }
86
87 int
mpfr_log_ui(mpfr_ptr x,unsigned long n,mpfr_rnd_t rnd_mode)88 mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
89 {
90 unsigned long k;
91 mpfr_prec_t w; /* working precision */
92 mpz_t three_n, *P, *B, *T;
93 mpfr_t t, q;
94 int inexact;
95 unsigned long N, lgN, i, kk;
96 long p;
97 MPFR_GROUP_DECL(group);
98 MPFR_TMP_DECL(marker);
99 MPFR_ZIV_DECL(loop);
100 MPFR_SAVE_EXPO_DECL (expo);
101
102 if (n <= 2)
103 {
104 if (n == 0)
105 {
106 MPFR_SET_INF (x);
107 MPFR_SET_NEG (x);
108 MPFR_SET_DIVBY0 ();
109 MPFR_RET (0); /* log(0) is an exact -infinity */
110 }
111 else if (n == 1)
112 {
113 MPFR_SET_ZERO (x);
114 MPFR_SET_POS (x);
115 MPFR_RET (0); /* only "normal" case where the result is exact */
116 }
117 /* now n=2 */
118 return mpfr_const_log2 (x, rnd_mode);
119 }
120
121 /* here n >= 3 */
122
123 /* Argument reduction: compute k such that 2/3 < n/2^k < 4/3,
124 i.e., 2^(k+1) < 3n < 2^(k+2).
125
126 FIXME: we could do better by considering n/(2^k*3^i*5^j),
127 which reduces the maximal distance to 1 from 1/3 to 1/8,
128 thus needing about 1.89 less terms in the Taylor expansion of
129 the reduced argument. Then log(2^k*3^i*5^j) can be computed
130 using a combination of log(16/15), log(25/24) and log(81/80),
131 see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package",
132 Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */
133
134 mpz_init_set_ui (three_n, n);
135 mpz_mul_ui (three_n, three_n, 3);
136 k = mpz_sizeinbase (three_n, 2) - 2;
137 MPFR_ASSERTD (k >= 2);
138 mpz_clear (three_n);
139
140 /* The reduced argument is n/2^k - 1 = (n-2^k)/2^k.
141 Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX,
142 so that p and -p both fit in a long. */
143 if (k < sizeof (unsigned long) * CHAR_BIT) /* assume no padding bits */
144 n -= 1UL << k;
145 /* n is now the value of p mod ULONG_MAX+1.
146 Since |p| <= LONG_MAX, if n > LONG_MAX, this means that p < 0 and
147 -n as an unsigned long value is at most LONG_MAX, thus fits in a
148 long. */
149 p = ULONG2LONG (n);
150
151 MPFR_TMP_MARK(marker);
152 w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10;
153 MPFR_GROUP_INIT_2(group, w, t, q);
154 MPFR_SAVE_EXPO_MARK (expo);
155
156 kk = k;
157 if (p != 0)
158 while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */
159 {
160 p /= 2;
161 kk --;
162 }
163
164 MPFR_ZIV_INIT (loop, w);
165 for (;;)
166 {
167 mpfr_t tmp;
168 unsigned int err;
169 unsigned long q0;
170
171 /* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */
172 mpfr_init2 (tmp, 32);
173 mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU);
174 mpfr_log2 (tmp, tmp, MPFR_RNDU);
175 mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD);
176 MPFR_ASSERTN (w <= ULONG_MAX);
177 mpfr_ui_div (tmp, w, tmp, MPFR_RNDU);
178 N = mpfr_get_ui (tmp, MPFR_RNDU);
179 if (N < 2)
180 N = 2;
181 lgN = MPFR_INT_CEIL_LOG2 (N) + 1;
182 mpfr_clear (tmp);
183 P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t));
184 B = P + lgN;
185 T = B + lgN;
186 for (i = 0; i < lgN; i++)
187 {
188 mpz_init (P[i]);
189 mpz_init (B[i]);
190 mpz_init (T[i]);
191 }
192
193 S (P, &q0, B, T, 1, N, p, kk, 0);
194 /* mpz_mul (Q[0], B[0], Q[0]); */
195 /* mpz_mul_2exp (B[0], B[0], q0); */
196
197 mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
198 mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */
199 mpfr_mul_2ui (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */
200 mpfr_div (t, t, q, MPFR_RNDN); /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3
201 = log(n/2^k) * (1 + theta_4)^4
202 for |theta_i| < 2^(-w) */
203
204 /* argument reconstruction: add k*log(2) */
205 mpfr_const_log2 (q, MPFR_RNDN);
206 mpfr_mul_ui (q, q, k, MPFR_RNDN);
207 mpfr_add (t, t, q, MPFR_RNDN);
208 for (i = 0; i < lgN; i++)
209 {
210 mpz_clear (P[i]);
211 mpz_clear (B[i]);
212 mpz_clear (T[i]);
213 }
214 /* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
215 for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition,
216 thus at most k+6 ulps.
217 Note that there might be some cancellation in the addition: the worst
218 case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which
219 gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we
220 have an exponent decrease of 1, which accounts for +1 in the error. */
221 err = MPFR_INT_CEIL_LOG2 (k + 6) + 1;
222 if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode)))
223 break;
224
225 MPFR_ZIV_NEXT (loop, w);
226 MPFR_GROUP_REPREC_2(group, w, t, q);
227 }
228 MPFR_ZIV_FREE (loop);
229
230 inexact = mpfr_set (x, t, rnd_mode);
231
232 MPFR_GROUP_CLEAR(group);
233 MPFR_TMP_FREE(marker);
234
235 MPFR_SAVE_EXPO_FREE (expo);
236 return mpfr_check_range (x, inexact, rnd_mode);
237 }
238