1 /* mpfr_log_ui -- compute natural logarithm of an unsigned long 2 3 Copyright 2014-2020 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramba projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n, 27 e.g. about 4 times as slow for n around ULONG_MAX/3 on an 28 x86_64 Linux machine, for 10^6 bits of precision. The reason is that 29 for say n=6148914691236517205 and prec=10^6, the value of T computed 30 has more than 50M bits, which is much more than needed. Indeed the 31 binary splitting algorithm for series with a finite radius of convergence 32 gives rationals of size n*log(n) for a target precision n. One might 33 truncate the rationals inside the algorithm, but then the error analysis 34 should be redone. */ 35 36 /* Cf http://www.ginac.de/CLN/binsplit.pdf: the Taylor series of log(1+x) 37 up to order N for x=p/2^k is T/(B*Q). 38 P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1] 39 q <- k*(n2-n1) [corresponding to Q[0] = 2^q] 40 B[0] <- n1 * (n1+1) * ... * (n2-1) 41 T[0] <- B[0]*Q[0] * S(n1,n2) 42 where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1) 43 Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3. 44 */ 45 static void 46 S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1, 47 unsigned long n2, long p, unsigned long k, int need_P) 48 { 49 MPFR_ASSERTD (n1 < n2); 50 MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0); 51 if (n2 == n1 + 1) 52 { 53 mpz_set_si (P[0], (n1 == 1) ? p : -p); 54 *q = k; 55 mpz_set_ui (B[0], n1); 56 /* T = B*Q*S where S = P/(B*Q) thus T = P */ 57 mpz_set (T[0], P[0]); 58 /* since p is odd (or zero), there is no common factor 2 between 59 P and Q, or T and B */ 60 } 61 else 62 { 63 unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1; 64 /* m = floor((n1+n2)/2) */ 65 66 MPFR_ASSERTD (n1 < m && m < n2); 67 S (P, q, B, T, n1, m, p, k, 1); 68 S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P); 69 70 /* T0 <- T0*B1*Q1 + P0*B0*T1 */ 71 mpz_mul (T[1], T[1], P[0]); 72 mpz_mul (T[1], T[1], B[0]); 73 mpz_mul (T[0], T[0], B[1]); 74 /* Q[1] = 2^q1 */ 75 mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */ 76 mpz_add (T[0], T[0], T[1]); 77 if (need_P) 78 mpz_mul (P[0], P[0], P[1]); 79 *q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */ 80 mpz_mul (B[0], B[0], B[1]); 81 82 /* there should be no common factors 2 between P, Q and T, 83 since P is odd (or zero) */ 84 } 85 } 86 87 int 88 mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode) 89 { 90 unsigned long k; 91 mpfr_prec_t w; /* working precision */ 92 mpz_t three_n, *P, *B, *T; 93 mpfr_t t, q; 94 int inexact; 95 unsigned long N, lgN, i, kk; 96 long p; 97 MPFR_GROUP_DECL(group); 98 MPFR_TMP_DECL(marker); 99 MPFR_ZIV_DECL(loop); 100 MPFR_SAVE_EXPO_DECL (expo); 101 102 if (n <= 2) 103 { 104 if (n == 0) 105 { 106 MPFR_SET_INF (x); 107 MPFR_SET_NEG (x); 108 MPFR_SET_DIVBY0 (); 109 MPFR_RET (0); /* log(0) is an exact -infinity */ 110 } 111 else if (n == 1) 112 { 113 MPFR_SET_ZERO (x); 114 MPFR_SET_POS (x); 115 MPFR_RET (0); /* only "normal" case where the result is exact */ 116 } 117 /* now n=2 */ 118 return mpfr_const_log2 (x, rnd_mode); 119 } 120 121 /* here n >= 3 */ 122 123 /* Argument reduction: compute k such that 2/3 <= n/2^k < 4/3, 124 i.e., 2^(k+1) <= 3n < 2^(k+2). 125 126 FIXME: we could do better by considering n/(2^k*3^i*5^j), 127 which reduces the maximal distance to 1 from 1/3 to 1/8, 128 thus needing about 1.89 less terms in the Taylor expansion of 129 the reduced argument. Then log(2^k*3^i*5^j) can be computed 130 using a combination of log(16/15), log(25/24) and log(81/80), 131 see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package", 132 Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */ 133 134 mpz_init_set_ui (three_n, n); 135 mpz_mul_ui (three_n, three_n, 3); 136 k = mpz_sizeinbase (three_n, 2) - 2; 137 MPFR_ASSERTD (k >= 2); 138 mpz_clear (three_n); 139 140 /* The reduced argument is n/2^k - 1 = (n-2^k)/2^k. 141 Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX, 142 so that p and -p both fit in a long. */ 143 if (k < sizeof (unsigned long) * CHAR_BIT) 144 n -= 1UL << k; 145 /* n is now the value of p mod ULONG_MAX+1 */ 146 p = n > LONG_MAX ? - (long) - n : (long) n; 147 148 MPFR_TMP_MARK(marker); 149 w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10; 150 MPFR_GROUP_INIT_2(group, w, t, q); 151 MPFR_SAVE_EXPO_MARK (expo); 152 153 kk = k; 154 if (p != 0) 155 while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */ 156 { 157 p /= 2; 158 kk --; 159 } 160 161 MPFR_ZIV_INIT (loop, w); 162 for (;;) 163 { 164 mpfr_t tmp; 165 unsigned int err; 166 unsigned long q0; 167 168 /* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */ 169 mpfr_init2 (tmp, 32); 170 mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU); 171 mpfr_log2 (tmp, tmp, MPFR_RNDU); 172 mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD); 173 MPFR_ASSERTN (w <= ULONG_MAX); 174 mpfr_ui_div (tmp, w, tmp, MPFR_RNDU); 175 N = mpfr_get_ui (tmp, MPFR_RNDU); 176 if (N < 2) 177 N = 2; 178 lgN = MPFR_INT_CEIL_LOG2 (N) + 1; 179 mpfr_clear (tmp); 180 P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t)); 181 B = P + lgN; 182 T = B + lgN; 183 for (i = 0; i < lgN; i++) 184 { 185 mpz_init (P[i]); 186 mpz_init (B[i]); 187 mpz_init (T[i]); 188 } 189 190 S (P, &q0, B, T, 1, N, p, kk, 0); 191 /* mpz_mul (Q[0], B[0], Q[0]); */ 192 /* mpz_mul_2exp (B[0], B[0], q0); */ 193 194 mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */ 195 mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */ 196 mpfr_mul_2ui (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */ 197 mpfr_div (t, t, q, MPFR_RNDN); /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3 198 = log(n/2^k) * (1 + theta_4)^4 199 for |theta_i| < 2^(-w) */ 200 201 /* argument reconstruction: add k*log(2) */ 202 mpfr_const_log2 (q, MPFR_RNDN); 203 mpfr_mul_ui (q, q, k, MPFR_RNDN); 204 mpfr_add (t, t, q, MPFR_RNDN); 205 for (i = 0; i < lgN; i++) 206 { 207 mpz_clear (P[i]); 208 mpz_clear (B[i]); 209 mpz_clear (T[i]); 210 } 211 /* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u 212 for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition, 213 thus at most k+6 ulps. 214 Note that there might be some cancellation in the addition: the worst 215 case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which 216 gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we 217 have an exponent decrease of 1, which accounts for +1 in the error. */ 218 err = MPFR_INT_CEIL_LOG2 (k + 6) + 1; 219 if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode))) 220 break; 221 222 MPFR_ZIV_NEXT (loop, w); 223 MPFR_GROUP_REPREC_2(group, w, t, q); 224 } 225 MPFR_ZIV_FREE (loop); 226 227 inexact = mpfr_set (x, t, rnd_mode); 228 229 MPFR_GROUP_CLEAR(group); 230 MPFR_TMP_FREE(marker); 231 232 MPFR_SAVE_EXPO_FREE (expo); 233 return mpfr_check_range (x, inexact, rnd_mode); 234 } 235