xref: /netbsd-src/external/lgpl3/mpfr/dist/src/log_ui.c (revision ba125506a622fe649968631a56eba5d42ff57863)
1 /* mpfr_log_ui -- compute natural logarithm of an unsigned long
2 
3 Copyright 2014-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 /* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n,
27    e.g. about 4 times as slow for n around ULONG_MAX/3 on an
28    x86_64 Linux machine, for 10^6 bits of precision. The reason is that
29    for say n=6148914691236517205 and prec=10^6, the value of T computed
30    has more than 50M bits, which is much more than needed. Indeed the
31    binary splitting algorithm for series with a finite radius of convergence
32    gives rationals of size n*log(n) for a target precision n. One might
33    truncate the rationals inside the algorithm, but then the error analysis
34    should be redone. */
35 
36 /* Cf https://www.ginac.de/CLN/binsplit.pdf - the Taylor series of log(1+x)
37    up to order N for x=p/2^k is T/(B*Q).
38    P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1]
39    q <- k*(n2-n1) [corresponding to Q[0] = 2^q]
40    B[0] <- n1 * (n1+1) * ... * (n2-1)
41    T[0] <- B[0]*Q[0] * S(n1,n2)
42    where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1)
43    Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3.
44 */
45 static void
S(mpz_t * P,unsigned long * q,mpz_t * B,mpz_t * T,unsigned long n1,unsigned long n2,long p,unsigned long k,int need_P)46 S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1,
47    unsigned long n2, long p, unsigned long k, int need_P)
48 {
49   MPFR_ASSERTD (n1 < n2);
50   MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0);
51   if (n2 == n1 + 1)
52     {
53       mpz_set_si (P[0], (n1 == 1) ? p : -p);
54       *q = k;
55       mpz_set_ui (B[0], n1);
56       /* T = B*Q*S where S = P/(B*Q) thus T = P */
57       mpz_set (T[0], P[0]);
58       /* since p is odd (or zero), there is no common factor 2 between
59          P and Q, or T and B */
60     }
61   else
62     {
63       unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1;
64       /* m = floor((n1+n2)/2) */
65 
66       MPFR_ASSERTD (n1 < m && m < n2);
67       S (P, q, B, T, n1, m, p, k, 1);
68       S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P);
69 
70       /* T0 <- T0*B1*Q1 + P0*B0*T1 */
71       mpz_mul (T[1], T[1], P[0]);
72       mpz_mul (T[1], T[1], B[0]);
73       mpz_mul (T[0], T[0], B[1]);
74       /* Q[1] = 2^q1 */
75       mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */
76       mpz_add (T[0], T[0], T[1]);
77       if (need_P)
78         mpz_mul (P[0], P[0], P[1]);
79       *q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */
80       mpz_mul (B[0], B[0], B[1]);
81 
82       /* there should be no common factors 2 between P, Q and T,
83          since P is odd (or zero) */
84     }
85 }
86 
87 int
mpfr_log_ui(mpfr_ptr x,unsigned long n,mpfr_rnd_t rnd_mode)88 mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
89 {
90   unsigned long k;
91   mpfr_prec_t w; /* working precision */
92   mpz_t three_n, *P, *B, *T;
93   mpfr_t t, q;
94   int inexact;
95   unsigned long N, lgN, i, kk;
96   long p;
97   MPFR_GROUP_DECL(group);
98   MPFR_TMP_DECL(marker);
99   MPFR_ZIV_DECL(loop);
100   MPFR_SAVE_EXPO_DECL (expo);
101 
102   if (n <= 2)
103     {
104       if (n == 0)
105         {
106           MPFR_SET_INF (x);
107           MPFR_SET_NEG (x);
108           MPFR_SET_DIVBY0 ();
109           MPFR_RET (0); /* log(0) is an exact -infinity */
110         }
111       else if (n == 1)
112         {
113           MPFR_SET_ZERO (x);
114           MPFR_SET_POS (x);
115           MPFR_RET (0); /* only "normal" case where the result is exact */
116         }
117       /* now n=2 */
118       return mpfr_const_log2 (x, rnd_mode);
119     }
120 
121   /* here n >= 3 */
122 
123   /* Argument reduction: compute k such that 2/3 < n/2^k < 4/3,
124      i.e., 2^(k+1) < 3n < 2^(k+2).
125 
126      FIXME: we could do better by considering n/(2^k*3^i*5^j),
127      which reduces the maximal distance to 1 from 1/3 to 1/8,
128      thus needing about 1.89 less terms in the Taylor expansion of
129      the reduced argument. Then log(2^k*3^i*5^j) can be computed
130      using a combination of log(16/15), log(25/24) and log(81/80),
131      see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package",
132      Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */
133 
134   mpz_init_set_ui (three_n, n);
135   mpz_mul_ui (three_n, three_n, 3);
136   k = mpz_sizeinbase (three_n, 2) - 2;
137   MPFR_ASSERTD (k >= 2);
138   mpz_clear (three_n);
139 
140   /* The reduced argument is n/2^k - 1 = (n-2^k)/2^k.
141      Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX,
142      so that p and -p both fit in a long. */
143   if (k < sizeof (unsigned long) * CHAR_BIT)  /* assume no padding bits */
144     n -= 1UL << k;
145   /* n is now the value of p mod ULONG_MAX+1.
146      Since |p| <= LONG_MAX, if n > LONG_MAX, this means that p < 0 and
147      -n as an unsigned long value is at most LONG_MAX, thus fits in a
148      long. */
149   p = ULONG2LONG (n);
150 
151   MPFR_TMP_MARK(marker);
152   w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10;
153   MPFR_GROUP_INIT_2(group, w, t, q);
154   MPFR_SAVE_EXPO_MARK (expo);
155 
156   kk = k;
157   if (p != 0)
158     while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */
159       {
160         p /= 2;
161         kk --;
162       }
163 
164   MPFR_ZIV_INIT (loop, w);
165   for (;;)
166     {
167       mpfr_t tmp;
168       unsigned int err;
169       unsigned long q0;
170 
171       /* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */
172       mpfr_init2 (tmp, 32);
173       mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU);
174       mpfr_log2 (tmp, tmp, MPFR_RNDU);
175       mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD);
176       MPFR_ASSERTN (w <= ULONG_MAX);
177       mpfr_ui_div (tmp, w, tmp, MPFR_RNDU);
178       N = mpfr_get_ui (tmp, MPFR_RNDU);
179       if (N < 2)
180         N = 2;
181       lgN = MPFR_INT_CEIL_LOG2 (N) + 1;
182       mpfr_clear (tmp);
183       P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t));
184       B = P + lgN;
185       T = B + lgN;
186       for (i = 0; i < lgN; i++)
187         {
188           mpz_init (P[i]);
189           mpz_init (B[i]);
190           mpz_init (T[i]);
191         }
192 
193       S (P, &q0, B, T, 1, N, p, kk, 0);
194       /* mpz_mul (Q[0], B[0], Q[0]); */
195       /* mpz_mul_2exp (B[0], B[0], q0); */
196 
197       mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
198       mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */
199       mpfr_mul_2ui (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */
200       mpfr_div (t, t, q, MPFR_RNDN);   /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3
201                                             = log(n/2^k) * (1 + theta_4)^4
202                                             for |theta_i| < 2^(-w) */
203 
204       /* argument reconstruction: add k*log(2) */
205       mpfr_const_log2 (q, MPFR_RNDN);
206       mpfr_mul_ui (q, q, k, MPFR_RNDN);
207       mpfr_add (t, t, q, MPFR_RNDN);
208       for (i = 0; i < lgN; i++)
209         {
210           mpz_clear (P[i]);
211           mpz_clear (B[i]);
212           mpz_clear (T[i]);
213         }
214       /* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
215          for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition,
216          thus at most k+6 ulps.
217          Note that there might be some cancellation in the addition: the worst
218          case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which
219          gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we
220          have an exponent decrease of 1, which accounts for +1 in the error. */
221       err = MPFR_INT_CEIL_LOG2 (k + 6) + 1;
222       if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode)))
223         break;
224 
225       MPFR_ZIV_NEXT (loop, w);
226       MPFR_GROUP_REPREC_2(group, w, t, q);
227     }
228   MPFR_ZIV_FREE (loop);
229 
230   inexact = mpfr_set (x, t, rnd_mode);
231 
232   MPFR_GROUP_CLEAR(group);
233   MPFR_TMP_FREE(marker);
234 
235   MPFR_SAVE_EXPO_FREE (expo);
236   return mpfr_check_range (x, inexact, rnd_mode);
237 }
238