1 /* mpfr_exp_2 -- exponential of a floating-point number
2 using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
3
4 Copyright 1999-2023 Free Software Foundation, Inc.
5 Contributed by the AriC and Caramba projects, INRIA.
6
7 This file is part of the GNU MPFR Library.
8
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17 License for more details.
18
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23
24 #define MPFR_NEED_LONGLONG_H /* MPFR_INT_CEIL_LOG2 */
25 #include "mpfr-impl.h"
26
27 static unsigned long
28 mpfr_exp2_aux (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
29 static unsigned long
30 mpfr_exp2_aux2 (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
31 static mpfr_exp_t
32 mpz_normalize (mpz_t, mpz_t, mpfr_exp_t);
33 static mpfr_exp_t
34 mpz_normalize2 (mpz_t, mpz_t, mpfr_exp_t, mpfr_exp_t);
35
36 /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
37 Otherwise do nothing and return 0.
38 */
39 static mpfr_exp_t
mpz_normalize(mpz_t rop,mpz_t z,mpfr_exp_t q)40 mpz_normalize (mpz_t rop, mpz_t z, mpfr_exp_t q)
41 {
42 size_t k;
43
44 MPFR_MPZ_SIZEINBASE2 (k, z);
45 MPFR_ASSERTD (k == (mpfr_uexp_t) k);
46 if (MPFR_LIKELY(q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q))
47 {
48 mpz_fdiv_q_2exp (rop, z, (unsigned long) ((mpfr_uexp_t) k - q));
49 return (mpfr_exp_t) k - q;
50 }
51 mpz_set (rop, z);
52 return 0;
53 }
54
55 /* if expz > target, shift z by (expz-target) bits to the left.
56 if expz < target, shift z by (target-expz) bits to the right.
57 Returns target.
58 */
59 static mpfr_exp_t
mpz_normalize2(mpz_t rop,mpz_t z,mpfr_exp_t expz,mpfr_exp_t target)60 mpz_normalize2 (mpz_t rop, mpz_t z, mpfr_exp_t expz, mpfr_exp_t target)
61 {
62 if (MPFR_LIKELY(target > expz))
63 mpz_fdiv_q_2exp (rop, z, target - expz);
64 else
65 mpz_mul_2exp (rop, z, expz - target);
66 return target;
67 }
68
69 /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
70 where x = n*log(2)+(2^K)*r
71 together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
72 evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
73 This function returns with the exact flags due to exp.
74 */
75 int
mpfr_exp_2(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd_mode)76 mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
77 {
78 long n;
79 unsigned long K, k, l, err; /* FIXME: Which type ? */
80 int error_r;
81 mpfr_exp_t exps, expx;
82 mpfr_prec_t q, precy;
83 int inexact;
84 mpfr_t s, r;
85 mpz_t ss;
86 MPFR_GROUP_DECL(group);
87 MPFR_ZIV_DECL (loop);
88
89 MPFR_LOG_FUNC
90 (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode),
91 ("y[%Pd]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y,
92 inexact));
93
94 expx = MPFR_GET_EXP (x);
95 precy = MPFR_PREC(y);
96
97 /* First perform argument reduction: if x' = x - n*log(2) we have
98 exp(x) = exp(x)*2^n. We should take n near from x/log(2) but it does not
99 need to be exact.
100 Warning: we cannot use the 'double' type here, since on 64-bit machines
101 x may be as large as 2^62*log(2) without overflow, and then x/log(2)
102 is about 2^62: not every integer of that size can be represented as a
103 'double', thus the argument reduction would fail. */
104 if (MPFR_UNLIKELY(expx <= -2))
105 /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
106 n = 0;
107 else
108 {
109 mp_limb_t r_limb[(sizeof (long) -1) / sizeof(mp_limb_t) + 1];
110 /* Note: we use precision sizeof (long) * CHAR_BIT - 1 here since it is
111 more efficient that full limb precision.
112 The value of n will depend on whether MPFR_LONG_WITHIN_LIMB is
113 defined or not. For instance, for r = 0.111E0, one gets n = 0
114 in the former case and n = 1 in the latter case. */
115 MPFR_TMP_INIT1(r_limb, r, sizeof (long) * CHAR_BIT - 1);
116 mpfr_div (r, x, __gmpfr_const_log2_RNDD, MPFR_RNDN);
117 #ifdef MPFR_LONG_WITHIN_LIMB
118 /* The following code assume an unsigned long can fit in a mp_limb_t */
119 {
120 mp_limb_t a;
121 mpfr_exp_t exp;
122 /* Read the long directly (faster than using mpfr_get_si
123 since it fits, it is not singular, it can't be zero
124 and there is no conversion to do) */
125 MPFR_ASSERTD (MPFR_NOTZERO (r));
126 exp = MPFR_GET_EXP (r);
127 MPFR_ASSERTD (exp <= GMP_NUMB_BITS);
128 if (exp >= 1)
129 {
130 a = MPFR_MANT(r)[0] >> (GMP_NUMB_BITS - exp);
131 n = MPFR_IS_POS (r) ? a : a <= LONG_MAX ? - (long) a : LONG_MIN;
132 }
133 else
134 n = 0;
135 }
136 #else
137 /* Use generic way to get the long */
138 n = mpfr_get_si (r, MPFR_RNDN);
139 #endif
140 }
141 /* we have |x| <= (|n|+1)*log(2) */
142 MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n));
143
144 /* error_r bounds the cancelled bits in x - n*log(2) */
145 if (MPFR_UNLIKELY (n == 0))
146 error_r = 0;
147 else
148 {
149 error_r = mpfr_nbits_ulong (SAFE_ABS (unsigned long, n) + 1);
150 /* we have |x| <= 2^error_r * log(2) */
151 }
152
153 /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
154 n/K terms costs about n/(2K) multiplications when computed in fixed
155 point */
156 K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2) + 3
157 : __gmpfr_cuberoot (4*precy);
158 l = (precy - 1) / K + 1;
159 err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18);
160 /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
161 q = precy + err + K + 10;
162 /* if |x| >> 1, take into account the cancelled bits */
163 if (expx > 0)
164 q += expx;
165
166 /* Even with to the mpfr_prec_round below, it is possible to use
167 the MPFR_GROUP_* macros here because mpfr_prec_round is only
168 called in a special case. */
169 MPFR_GROUP_INIT_2(group, q + error_r, r, s);
170 mpz_init (ss);
171
172 /* the algorithm consists in computing an upper bound of exp(x) using
173 a precision of q bits, and see if we can round to MPFR_PREC(y) taking
174 into account the maximal error. Otherwise we increase q. */
175 MPFR_ZIV_INIT (loop, q);
176 for (;;)
177 {
178 MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
179 n, K, l, (unsigned long) q, error_r));
180
181 /* First reduce the argument to r = x - n * log(2),
182 so that r is small in absolute value. We want an upper
183 bound on r to get an upper bound on exp(x). */
184
185 /* if n<0, we have to get an upper bound of log(2)
186 in order to get an upper bound of r = x-n*log(2) */
187 mpfr_const_log2 (s, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
188 /* s is within 1 ulp(s) of log(2) */
189
190 mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
191 /* r is within 3 ulps of |n|*log(2) */
192 if (n < 0)
193 MPFR_CHANGE_SIGN (r);
194 /* r <= n*log(2), within 3 ulps */
195
196 MPFR_LOG_VAR (x);
197 MPFR_LOG_VAR (r);
198
199 mpfr_sub (r, x, r, MPFR_RNDU);
200
201 while (MPFR_IS_PURE_FP(r) && MPFR_IS_NEG (r))
202 { /* initial approximation n was too large */
203 n--;
204 mpfr_add (r, r, s, MPFR_RNDU);
205 }
206
207 /* if r is 0, we cannot round correctly */
208 if (MPFR_LIKELY(MPFR_IS_PURE_FP (r)))
209 {
210 /* since there was a cancellation in x - n*log(2), the low error_r
211 bits from r are zero and thus non significant, thus we can reduce
212 the working precision */
213 if (MPFR_LIKELY(error_r > 0))
214 {
215 MPFR_ASSERTD( MPFR_PREC(r) > q);
216 /* since MPFR_PREC(r) > q, there is no reallocation to do,
217 and it is safe to use it with MPFR_GROUP functions */
218 mpfr_prec_round (r, q, MPFR_RNDU);
219 }
220 /* the error on r is at most 3 ulps (3 ulps if error_r = 0,
221 and 1 + 3/2 if error_r > 0) */
222 MPFR_LOG_VAR (r);
223 MPFR_ASSERTD (MPFR_IS_POS (r));
224 mpfr_div_2ui (r, r, K, MPFR_RNDU); /* r = (x-n*log(2))/2^K, exact */
225
226 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
227 MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0);
228 l = (precy < MPFR_EXP_2_THRESHOLD)
229 ? mpfr_exp2_aux (ss, r, q, &exps) /* naive method */
230 : mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */
231
232 MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
233 l, (unsigned long) q, (K + l) * (double) q * q));
234
235 for (k = 0; k < K; k++)
236 {
237 mpz_mul (ss, ss, ss);
238 exps *= 2;
239 exps += mpz_normalize (ss, ss, q);
240 }
241 mpfr_set_z_2exp (s, ss, exps, MPFR_RNDN);
242
243 /* error is at most 2^K*l, plus 2 to take into account of
244 the error of 3 ulps on r */
245 err = K + MPFR_INT_CEIL_LOG2 (l) + 2;
246
247 MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
248 MPFR_LOG_VAR (s);
249 MPFR_LOG_MSG (("err=%lu bits\n", K));
250
251 if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - err, precy, rnd_mode)))
252 {
253 MPFR_CLEAR_FLAGS ();
254 inexact = mpfr_mul_2si (y, s, n, rnd_mode);
255 break;
256 }
257 }
258 MPFR_ZIV_NEXT (loop, q);
259 MPFR_GROUP_REPREC_2(group, q+error_r, r, s);
260 }
261 MPFR_ZIV_FREE (loop);
262 mpz_clear (ss);
263 MPFR_GROUP_CLEAR (group);
264
265 return inexact;
266 }
267
268 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
269 using naive method with O(l) multiplications.
270 Return the number of iterations l.
271 The absolute error on s is less than 3*l*(l+1)*2^(-q).
272 Version using fixed-point arithmetic with mpz instead
273 of mpfr for internal computations.
274 */
275 static unsigned long
mpfr_exp2_aux(mpz_t s,mpfr_srcptr r,mpfr_prec_t q,mpfr_exp_t * exps)276 mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
277 {
278 unsigned long l;
279 mpfr_exp_t dif, expt, expr;
280 mpz_t t, rr;
281 mp_size_t sbit, tbit;
282
283 MPFR_ASSERTD (MPFR_IS_PURE_FP (r));
284
285 expt = 0;
286 *exps = 1 - (mpfr_exp_t) q; /* s = 2^(q-1) */
287 mpz_init (t);
288 mpz_init (rr);
289 mpz_set_ui (t, 1);
290 mpz_set_ui (s, 1);
291 mpz_mul_2exp (s, s, q-1);
292 expr = mpfr_get_z_2exp (rr, r); /* no error here */
293
294 l = 0;
295 for (;;)
296 {
297 l++;
298 mpz_mul(t, t, rr);
299 expt += expr;
300 MPFR_MPZ_SIZEINBASE2 (sbit, s);
301 MPFR_MPZ_SIZEINBASE2 (tbit, t);
302 dif = *exps + sbit - expt - tbit;
303 /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
304 expt += mpz_normalize (t, t, (mpfr_exp_t) q - dif);
305 /* error at most 2^(1-q) */
306 if (l > 1)
307 {
308 /* GMP doesn't optimize the case of power of 2 */
309 if (IS_POW2(l))
310 {
311 int bits = MPFR_INT_CEIL_LOG2(l);
312 mpz_fdiv_q_2exp (t, t, bits); /* error at most 2^(1-q) */
313 }
314 else
315 {
316 mpz_fdiv_q_ui (t, t, l); /* error at most 2^(1-q) */
317 }
318 /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
319 MPFR_ASSERTD (expt == *exps);
320 }
321 if (mpz_sgn (t) == 0)
322 break;
323 mpz_add (s, s, t); /* no error here: exact */
324 /* ensures rr has the same size as t: after several shifts, the error
325 on rr is still at most ulp(t)=ulp(s) */
326 MPFR_MPZ_SIZEINBASE2 (tbit, t);
327 expr += mpz_normalize (rr, rr, tbit);
328 }
329
330 mpz_clear (t);
331 mpz_clear (rr);
332
333 return 3 * l * (l + 1);
334 }
335
336 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
337 using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
338 Return l.
339 Uses m multiplications of full size and 2l/m of decreasing size,
340 i.e. a total equivalent to about m+l/m full multiplications,
341 i.e. 2*sqrt(l) for m=sqrt(l).
342 NOTE[VL]: The following sentence seems to be obsolete since MY_INIT_MPZ
343 is no longer used (r6919); sizer was the number of limbs of r.
344 Version using mpz. ss must have at least (sizer+1) limbs.
345 The error is bounded by (l^2+4*l) ulps where l is the return value.
346 */
347 static unsigned long
mpfr_exp2_aux2(mpz_t s,mpfr_srcptr r,mpfr_prec_t q,mpfr_exp_t * exps)348 mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
349 {
350 mpfr_exp_t expr, *expR, expt;
351 mpfr_prec_t ql;
352 unsigned long l, m, i;
353 mpz_t t, *R, rr, tmp;
354 mp_size_t sbit, rrbit;
355 MPFR_TMP_DECL(marker);
356
357 /* estimate value of l */
358 MPFR_ASSERTD (MPFR_GET_EXP (r) < 0);
359 l = q / (- MPFR_GET_EXP (r));
360 m = __gmpfr_isqrt (l);
361 /* we access R[2], thus we need m >= 2 */
362 if (m < 2)
363 m = 2;
364
365 MPFR_TMP_MARK(marker);
366 R = (mpz_t*) MPFR_TMP_ALLOC ((m + 1) * sizeof (mpz_t)); /* R[i] is r^i */
367 expR = (mpfr_exp_t*) MPFR_TMP_ALLOC((m + 1) * sizeof (mpfr_exp_t));
368 /* expR[i] is the exponent for R[i] */
369 mpz_init (tmp);
370 mpz_init (rr);
371 mpz_init (t);
372 mpz_set_ui (s, 0);
373 *exps = 1 - q; /* 1 ulp = 2^(1-q) */
374 for (i = 0 ; i <= m ; i++)
375 mpz_init (R[i]);
376 expR[1] = mpfr_get_z_2exp (R[1], r); /* exact operation: no error */
377 expR[1] = mpz_normalize2 (R[1], R[1], expR[1], 1 - q); /* error <= 1 ulp */
378 mpz_mul (t, R[1], R[1]); /* err(t) <= 2 ulps */
379 mpz_fdiv_q_2exp (R[2], t, q - 1); /* err(R[2]) <= 3 ulps */
380 expR[2] = 1 - q;
381 for (i = 3 ; i <= m ; i++)
382 {
383 if ((i & 1) == 1)
384 mpz_mul (t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
385 else
386 mpz_mul (t, R[i/2], R[i/2]);
387 mpz_fdiv_q_2exp (R[i], t, q - 1); /* err(R[i]) <= 2*i-1 ulps */
388 expR[i] = 1 - q;
389 }
390 mpz_set_ui (R[0], 1);
391 mpz_mul_2exp (R[0], R[0], q-1);
392 expR[0] = 1-q; /* R[0]=1 */
393 mpz_set_ui (rr, 1);
394 expr = 0; /* rr contains r^l/l! */
395 /* by induction: err(rr) <= 2*l ulps */
396
397 l = 0;
398 ql = q; /* precision used for current giant step */
399 do
400 {
401 /* all R[i] must have exponent 1-ql */
402 if (l != 0)
403 for (i = 0 ; i < m ; i++)
404 expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1 - ql);
405 /* the absolute error on R[i]*rr is still 2*i-1 ulps */
406 expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1 - ql);
407 /* err(t) <= 2*m-1 ulps */
408 /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
409 using Horner's scheme */
410 for (i = m-1 ; i-- != 0 ; )
411 {
412 mpz_fdiv_q_ui (t, t, l+i+1); /* err(t) += 1 ulp */
413 mpz_add (t, t, R[i]);
414 }
415 /* now err(t) <= (3m-2) ulps */
416
417 /* now multiplies t by r^l/l! and adds to s */
418 mpz_mul (t, t, rr);
419 expt += expr;
420 expt = mpz_normalize2 (t, t, expt, *exps);
421 /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
422 MPFR_ASSERTD (expt == *exps);
423 mpz_add (s, s, t); /* no error here */
424
425 /* updates rr, the multiplication of the factors l+i could be done
426 using binary splitting too, but it is not sure it would save much */
427 mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
428 expr += expR[m];
429 mpz_set_ui (tmp, 1);
430 for (i = 1 ; i <= m ; i++)
431 mpz_mul_ui (tmp, tmp, l + i);
432 mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */
433 l += m;
434 if (MPFR_UNLIKELY (mpz_sgn (t) == 0))
435 break;
436 expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
437 if (MPFR_UNLIKELY (mpz_sgn (rr) == 0))
438 rrbit = 1;
439 else
440 MPFR_MPZ_SIZEINBASE2 (rrbit, rr);
441 MPFR_MPZ_SIZEINBASE2 (sbit, s);
442 ql = q - *exps - sbit + expr + rrbit;
443 /* TODO: Wrong cast. I don't want what is right, but this is
444 certainly wrong */
445 }
446 while ((size_t) expr + rrbit > (size_t) -q);
447
448 for (i = 0 ; i <= m ; i++)
449 mpz_clear (R[i]);
450 MPFR_TMP_FREE(marker);
451 mpz_clear (rr);
452 mpz_clear (t);
453 mpz_clear (tmp);
454
455 return l * (l + 4);
456 }
457