xref: /netbsd-src/external/lgpl3/mpfr/dist/src/log_ui.c (revision 122b5006ee1bd67145794b4cde92f4fe4781a5ec)
1 /* mpfr_log_ui -- compute natural logarithm of an unsigned long
2 
3 Copyright 2014-2020 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 /* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n,
27    e.g. about 4 times as slow for n around ULONG_MAX/3 on an
28    x86_64 Linux machine, for 10^6 bits of precision. The reason is that
29    for say n=6148914691236517205 and prec=10^6, the value of T computed
30    has more than 50M bits, which is much more than needed. Indeed the
31    binary splitting algorithm for series with a finite radius of convergence
32    gives rationals of size n*log(n) for a target precision n. One might
33    truncate the rationals inside the algorithm, but then the error analysis
34    should be redone. */
35 
36 /* Cf http://www.ginac.de/CLN/binsplit.pdf: the Taylor series of log(1+x)
37    up to order N for x=p/2^k is T/(B*Q).
38    P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1]
39    q <- k*(n2-n1) [corresponding to Q[0] = 2^q]
40    B[0] <- n1 * (n1+1) * ... * (n2-1)
41    T[0] <- B[0]*Q[0] * S(n1,n2)
42    where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1)
43    Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3.
44 */
45 static void
46 S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1,
47    unsigned long n2, long p, unsigned long k, int need_P)
48 {
49   MPFR_ASSERTD (n1 < n2);
50   MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0);
51   if (n2 == n1 + 1)
52     {
53       mpz_set_si (P[0], (n1 == 1) ? p : -p);
54       *q = k;
55       mpz_set_ui (B[0], n1);
56       /* T = B*Q*S where S = P/(B*Q) thus T = P */
57       mpz_set (T[0], P[0]);
58       /* since p is odd (or zero), there is no common factor 2 between
59          P and Q, or T and B */
60     }
61   else
62     {
63       unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1;
64       /* m = floor((n1+n2)/2) */
65 
66       MPFR_ASSERTD (n1 < m && m < n2);
67       S (P, q, B, T, n1, m, p, k, 1);
68       S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P);
69 
70       /* T0 <- T0*B1*Q1 + P0*B0*T1 */
71       mpz_mul (T[1], T[1], P[0]);
72       mpz_mul (T[1], T[1], B[0]);
73       mpz_mul (T[0], T[0], B[1]);
74       /* Q[1] = 2^q1 */
75       mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */
76       mpz_add (T[0], T[0], T[1]);
77       if (need_P)
78         mpz_mul (P[0], P[0], P[1]);
79       *q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */
80       mpz_mul (B[0], B[0], B[1]);
81 
82       /* there should be no common factors 2 between P, Q and T,
83          since P is odd (or zero) */
84     }
85 }
86 
87 int
88 mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
89 {
90   unsigned long k;
91   mpfr_prec_t w; /* working precision */
92   mpz_t three_n, *P, *B, *T;
93   mpfr_t t, q;
94   int inexact;
95   unsigned long N, lgN, i, kk;
96   long p;
97   MPFR_GROUP_DECL(group);
98   MPFR_TMP_DECL(marker);
99   MPFR_ZIV_DECL(loop);
100   MPFR_SAVE_EXPO_DECL (expo);
101 
102   if (n <= 2)
103     {
104       if (n == 0)
105         {
106           MPFR_SET_INF (x);
107           MPFR_SET_NEG (x);
108           MPFR_SET_DIVBY0 ();
109           MPFR_RET (0); /* log(0) is an exact -infinity */
110         }
111       else if (n == 1)
112         {
113           MPFR_SET_ZERO (x);
114           MPFR_SET_POS (x);
115           MPFR_RET (0); /* only "normal" case where the result is exact */
116         }
117       /* now n=2 */
118       return mpfr_const_log2 (x, rnd_mode);
119     }
120 
121   /* here n >= 3 */
122 
123   /* Argument reduction: compute k such that 2/3 <= n/2^k < 4/3,
124      i.e., 2^(k+1) <= 3n < 2^(k+2).
125 
126      FIXME: we could do better by considering n/(2^k*3^i*5^j),
127      which reduces the maximal distance to 1 from 1/3 to 1/8,
128      thus needing about 1.89 less terms in the Taylor expansion of
129      the reduced argument. Then log(2^k*3^i*5^j) can be computed
130      using a combination of log(16/15), log(25/24) and log(81/80),
131      see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package",
132      Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */
133 
134   mpz_init_set_ui (three_n, n);
135   mpz_mul_ui (three_n, three_n, 3);
136   k = mpz_sizeinbase (three_n, 2) - 2;
137   MPFR_ASSERTD (k >= 2);
138   mpz_clear (three_n);
139 
140   /* The reduced argument is n/2^k - 1 = (n-2^k)/2^k.
141      Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX,
142      so that p and -p both fit in a long. */
143   if (k < sizeof (unsigned long) * CHAR_BIT)
144     n -= 1UL << k;
145   /* n is now the value of p mod ULONG_MAX+1 */
146   p = n > LONG_MAX ? - (long) - n : (long) n;
147 
148   MPFR_TMP_MARK(marker);
149   w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10;
150   MPFR_GROUP_INIT_2(group, w, t, q);
151   MPFR_SAVE_EXPO_MARK (expo);
152 
153   kk = k;
154   if (p != 0)
155     while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */
156       {
157         p /= 2;
158         kk --;
159       }
160 
161   MPFR_ZIV_INIT (loop, w);
162   for (;;)
163     {
164       mpfr_t tmp;
165       unsigned int err;
166       unsigned long q0;
167 
168       /* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */
169       mpfr_init2 (tmp, 32);
170       mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU);
171       mpfr_log2 (tmp, tmp, MPFR_RNDU);
172       mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD);
173       MPFR_ASSERTN (w <= ULONG_MAX);
174       mpfr_ui_div (tmp, w, tmp, MPFR_RNDU);
175       N = mpfr_get_ui (tmp, MPFR_RNDU);
176       if (N < 2)
177         N = 2;
178       lgN = MPFR_INT_CEIL_LOG2 (N) + 1;
179       mpfr_clear (tmp);
180       P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t));
181       B = P + lgN;
182       T = B + lgN;
183       for (i = 0; i < lgN; i++)
184         {
185           mpz_init (P[i]);
186           mpz_init (B[i]);
187           mpz_init (T[i]);
188         }
189 
190       S (P, &q0, B, T, 1, N, p, kk, 0);
191       /* mpz_mul (Q[0], B[0], Q[0]); */
192       /* mpz_mul_2exp (B[0], B[0], q0); */
193 
194       mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
195       mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */
196       mpfr_mul_2ui (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */
197       mpfr_div (t, t, q, MPFR_RNDN);   /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3
198                                             = log(n/2^k) * (1 + theta_4)^4
199                                             for |theta_i| < 2^(-w) */
200 
201       /* argument reconstruction: add k*log(2) */
202       mpfr_const_log2 (q, MPFR_RNDN);
203       mpfr_mul_ui (q, q, k, MPFR_RNDN);
204       mpfr_add (t, t, q, MPFR_RNDN);
205       for (i = 0; i < lgN; i++)
206         {
207           mpz_clear (P[i]);
208           mpz_clear (B[i]);
209           mpz_clear (T[i]);
210         }
211       /* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
212          for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition,
213          thus at most k+6 ulps.
214          Note that there might be some cancellation in the addition: the worst
215          case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which
216          gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we
217          have an exponent decrease of 1, which accounts for +1 in the error. */
218       err = MPFR_INT_CEIL_LOG2 (k + 6) + 1;
219       if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode)))
220         break;
221 
222       MPFR_ZIV_NEXT (loop, w);
223       MPFR_GROUP_REPREC_2(group, w, t, q);
224     }
225   MPFR_ZIV_FREE (loop);
226 
227   inexact = mpfr_set (x, t, rnd_mode);
228 
229   MPFR_GROUP_CLEAR(group);
230   MPFR_TMP_FREE(marker);
231 
232   MPFR_SAVE_EXPO_FREE (expo);
233   return mpfr_check_range (x, inexact, rnd_mode);
234 }
235