1 /* mpfr_root, mpfr_rootn_ui, mpfr_rootn_si -- kth root.
2
3 Copyright 2005-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26 /* The computation of y = x^(1/k) is done as follows, except for large
27 values of k, for which this would be inefficient or yield internal
28 integer overflows:
29
30 Let x = sign * m * 2^(k*e) where m is an integer
31
32 with 2^(k*(n-1)) <= m < 2^(k*n) where n = PREC(y)
33
34 and m = s^k + t where 0 <= t and m < (s+1)^k
35
36 we want that s has n bits i.e. s >= 2^(n-1), or m >= 2^(k*(n-1))
37 i.e. m must have at least k*(n-1)+1 bits
38
39 then, not taking into account the sign, the result will be
40 x^(1/k) = s * 2^e or (s+1) * 2^e according to the rounding mode.
41 */
42
43 static int
44 mpfr_root_aux (mpfr_ptr y, mpfr_srcptr x, unsigned long k,
45 mpfr_rnd_t rnd_mode);
46
47 int
mpfr_rootn_ui(mpfr_ptr y,mpfr_srcptr x,unsigned long k,mpfr_rnd_t rnd_mode)48 mpfr_rootn_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long k, mpfr_rnd_t rnd_mode)
49 {
50 mpz_t m;
51 mpfr_exp_t e, r, sh, f;
52 mpfr_prec_t n, size_m, tmp;
53 int inexact, negative;
54 MPFR_SAVE_EXPO_DECL (expo);
55
56 MPFR_LOG_FUNC
57 (("x[%Pd]=%.*Rg k=%lu rnd=%d",
58 mpfr_get_prec (x), mpfr_log_prec, x, k, rnd_mode),
59 ("y[%Pd]=%.*Rg inexact=%d",
60 mpfr_get_prec (y), mpfr_log_prec, y, inexact));
61
62 if (MPFR_UNLIKELY (k <= 1))
63 {
64 if (k == 0)
65 {
66 /* rootn(x,0) is NaN (IEEE 754-2008). */
67 MPFR_SET_NAN (y);
68 MPFR_RET_NAN;
69 }
70 else /* y = x^(1/1) = x */
71 return mpfr_set (y, x, rnd_mode);
72 }
73
74 /* Singular values */
75 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
76 {
77 if (MPFR_IS_NAN (x))
78 {
79 MPFR_SET_NAN (y); /* NaN^(1/k) = NaN */
80 MPFR_RET_NAN;
81 }
82
83 if (MPFR_IS_INF (x)) /* (+Inf)^(1/k) = +Inf
84 (-Inf)^(1/k) = -Inf if k odd
85 (-Inf)^(1/k) = NaN if k even */
86 {
87 if (MPFR_IS_NEG (x) && (k & 1) == 0)
88 {
89 MPFR_SET_NAN (y);
90 MPFR_RET_NAN;
91 }
92 MPFR_SET_INF (y);
93 MPFR_SET_SAME_SIGN (y, x);
94 }
95 else /* x is necessarily 0: (+0)^(1/k) = +0
96 (-0)^(1/k) = +0 if k even
97 (-0)^(1/k) = -0 if k odd */
98 {
99 MPFR_ASSERTD (MPFR_IS_ZERO (x));
100 MPFR_SET_ZERO (y);
101 if (MPFR_IS_POS (x) || (k & 1) == 0)
102 MPFR_SET_POS (y);
103 else
104 MPFR_SET_NEG (y);
105 }
106 MPFR_RET (0);
107 }
108
109 /* Returns NAN for x < 0 and k even */
110 if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && (k & 1) == 0))
111 {
112 MPFR_SET_NAN (y);
113 MPFR_RET_NAN;
114 }
115
116 /* Special case |x| = 1. Note that if x = -1, then k is odd
117 (NaN results have already been filtered), so that y = -1. */
118 if (mpfr_cmpabs (x, __gmpfr_one) == 0)
119 return mpfr_set (y, x, rnd_mode);
120
121 /* General case */
122
123 /* For large k, use exp(log(x)/k). The threshold of 100 seems to be quite
124 good when the precision goes to infinity. */
125 if (k > 100)
126 return mpfr_root_aux (y, x, k, rnd_mode);
127
128 MPFR_SAVE_EXPO_MARK (expo);
129 mpz_init (m);
130
131 e = mpfr_get_z_2exp (m, x); /* x = m * 2^e */
132 if ((negative = MPFR_IS_NEG(x)))
133 mpz_neg (m, m);
134 r = e % (mpfr_exp_t) k;
135 if (r < 0)
136 r += k; /* now r = e (mod k) with 0 <= r < k */
137 MPFR_ASSERTD (0 <= r && r < k);
138 /* x = (m*2^r) * 2^(e-r) where e-r is a multiple of k */
139
140 MPFR_MPZ_SIZEINBASE2 (size_m, m);
141 /* for rounding to nearest, we want the round bit to be in the root */
142 n = MPFR_PREC (y) + (rnd_mode == MPFR_RNDN);
143
144 /* we now multiply m by 2^sh so that root(m,k) will give
145 exactly n bits: we want k*(n-1)+1 <= size_m + sh <= k*n
146 i.e. sh = k*f + r with f = max(floor((k*n-size_m-r)/k),0) */
147 if ((mpfr_exp_t) size_m + r >= k * (mpfr_exp_t) n)
148 f = 0; /* we already have too many bits */
149 else
150 f = (k * (mpfr_exp_t) n - (mpfr_exp_t) size_m - r) / k;
151 sh = k * f + r;
152 mpz_mul_2exp (m, m, sh);
153 e = e - sh;
154
155 /* invariant: x = m*2^e, with e divisible by k */
156
157 /* we reuse the variable m to store the kth root, since it is not needed
158 any more: we just need to know if the root is exact */
159 inexact = mpz_root (m, m, k) == 0;
160
161 MPFR_MPZ_SIZEINBASE2 (tmp, m);
162 sh = tmp - n;
163 if (sh > 0) /* we have to flush to 0 the last sh bits from m */
164 {
165 inexact = inexact || (mpz_scan1 (m, 0) < sh);
166 mpz_fdiv_q_2exp (m, m, sh);
167 e += k * sh;
168 }
169
170 if (inexact)
171 {
172 if (negative)
173 rnd_mode = MPFR_INVERT_RND (rnd_mode);
174 if (rnd_mode == MPFR_RNDU || rnd_mode == MPFR_RNDA
175 || (rnd_mode == MPFR_RNDN && mpz_tstbit (m, 0)))
176 inexact = 1, mpz_add_ui (m, m, 1);
177 else
178 inexact = -1;
179 }
180
181 /* either inexact is not zero, and the conversion is exact, i.e. inexact
182 is not changed; or inexact=0, and inexact is set only when
183 rnd_mode=MPFR_RNDN and bit (n+1) from m is 1 */
184 inexact += mpfr_set_z (y, m, MPFR_RNDN);
185 MPFR_SET_EXP (y, MPFR_GET_EXP (y) + e / (mpfr_exp_t) k);
186
187 if (negative)
188 {
189 MPFR_CHANGE_SIGN (y);
190 inexact = -inexact;
191 }
192
193 mpz_clear (m);
194 MPFR_SAVE_EXPO_FREE (expo);
195 return mpfr_check_range (y, inexact, rnd_mode);
196 }
197
198 /* Compute y <- x^(1/k) using exp(log(x)/k).
199 Assume all special cases have been eliminated before.
200 In the extended exponent range, overflows/underflows are not possible.
201 Assume x > 0, or x < 0 and k odd.
202 Also assume |x| <> 1 because log(1) = 0, which does not have an exponent
203 and would yield a failure in the error bound computation. A priori, this
204 constraint is quite artificial because if |x| is close enough to 1, then
205 the exponent of log|x| does not need to be used (in the code, err would
206 be 1 in such a domain). So this constraint |x| <> 1 could be avoided in
207 the code. However, this is an exact case easy to detect, so that such a
208 change would be useless. Values very close to 1 are not an issue, since
209 an underflow is not possible before the MPFR_GET_EXP.
210 */
211 static int
mpfr_root_aux(mpfr_ptr y,mpfr_srcptr x,unsigned long k,mpfr_rnd_t rnd_mode)212 mpfr_root_aux (mpfr_ptr y, mpfr_srcptr x, unsigned long k, mpfr_rnd_t rnd_mode)
213 {
214 int inexact, exact_root = 0;
215 mpfr_prec_t w; /* working precision */
216 mpfr_t absx, t;
217 MPFR_GROUP_DECL(group);
218 MPFR_TMP_DECL(marker);
219 MPFR_ZIV_DECL(loop);
220 MPFR_SAVE_EXPO_DECL (expo);
221
222 MPFR_TMP_INIT_ABS (absx, x);
223
224 MPFR_TMP_MARK(marker);
225 w = MPFR_PREC(y) + 10;
226 /* Take some guard bits to prepare for the 'expt' lost bits below.
227 If |x| < 2^k, then log|x| < k, thus taking log2(k) bits should be fine. */
228 if (MPFR_GET_EXP(x) > 0)
229 w += MPFR_INT_CEIL_LOG2 (MPFR_GET_EXP(x));
230 MPFR_GROUP_INIT_1(group, w, t);
231 MPFR_SAVE_EXPO_MARK (expo);
232 MPFR_ZIV_INIT (loop, w);
233 for (;;)
234 {
235 mpfr_exp_t expt;
236 unsigned int err;
237
238 mpfr_log (t, absx, MPFR_RNDN);
239 /* t = log|x| * (1 + theta) with |theta| <= 2^(-w) */
240 mpfr_div_ui (t, t, k, MPFR_RNDN);
241 /* No possible underflow in mpfr_log and mpfr_div_ui. */
242 expt = MPFR_GET_EXP (t); /* assumes t <> 0 */
243 /* t = log|x|/k * (1 + theta) + eps with |theta| <= 2^(-w)
244 and |eps| <= 1/2 ulp(t), thus the total error is bounded
245 by 1.5 * 2^(expt - w) */
246 mpfr_exp (t, t, MPFR_RNDN);
247 /* t = |x|^(1/k) * exp(tau) * (1 + theta1) with
248 |tau| <= 1.5 * 2^(expt - w) and |theta1| <= 2^(-w).
249 For |tau| <= 0.5 we have |exp(tau)-1| < 4/3*tau, thus
250 for w >= expt + 2 we have:
251 t = |x|^(1/k) * (1 + 2^(expt+2)*theta2) * (1 + theta1) with
252 |theta1|, |theta2| <= 2^(-w).
253 If expt+2 > 0, as long as w >= 1, we have:
254 t = |x|^(1/k) * (1 + 2^(expt+3)*theta3) with |theta3| < 2^(-w).
255 For expt+2 = 0, we have:
256 t = |x|^(1/k) * (1 + 2^2*theta3) with |theta3| < 2^(-w).
257 Finally for expt+2 < 0 we have:
258 t = |x|^(1/k) * (1 + 2*theta3) with |theta3| < 2^(-w).
259 */
260 err = (expt + 2 > 0) ? expt + 3
261 : (expt + 2 == 0) ? 2 : 1;
262 /* now t = |x|^(1/k) * (1 + 2^(err-w)) thus the error is at most
263 2^(EXP(t) - w + err) */
264 if (MPFR_LIKELY (MPFR_CAN_ROUND(t, w - err, MPFR_PREC(y), rnd_mode)))
265 break;
266
267 /* If we fail to round correctly, check for an exact result or a
268 midpoint result with MPFR_RNDN (regarded as hard-to-round in
269 all precisions in order to determine the ternary value). */
270 {
271 mpfr_t z, zk;
272
273 mpfr_init2 (z, MPFR_PREC(y) + (rnd_mode == MPFR_RNDN));
274 mpfr_init2 (zk, MPFR_PREC(x));
275 mpfr_set (z, t, MPFR_RNDN);
276 inexact = mpfr_pow_ui (zk, z, k, MPFR_RNDN);
277 exact_root = !inexact && mpfr_equal_p (zk, absx);
278 if (exact_root) /* z is the exact root, thus round z directly */
279 inexact = mpfr_set4 (y, z, rnd_mode, MPFR_SIGN (x));
280 mpfr_clear (zk);
281 mpfr_clear (z);
282 if (exact_root)
283 break;
284 }
285
286 MPFR_ZIV_NEXT (loop, w);
287 MPFR_GROUP_REPREC_1(group, w, t);
288 }
289 MPFR_ZIV_FREE (loop);
290
291 if (!exact_root)
292 inexact = mpfr_set4 (y, t, rnd_mode, MPFR_SIGN (x));
293
294 MPFR_GROUP_CLEAR(group);
295 MPFR_TMP_FREE(marker);
296 MPFR_SAVE_EXPO_FREE (expo);
297
298 return mpfr_check_range (y, inexact, rnd_mode);
299 }
300
301 int
mpfr_rootn_si(mpfr_ptr y,mpfr_srcptr x,long k,mpfr_rnd_t rnd_mode)302 mpfr_rootn_si (mpfr_ptr y, mpfr_srcptr x, long k, mpfr_rnd_t rnd_mode)
303 {
304 int inexact;
305 MPFR_ZIV_DECL(loop);
306 MPFR_SAVE_EXPO_DECL (expo);
307
308 MPFR_LOG_FUNC
309 (("x[%Pd]=%.*Rg k=%lu rnd=%d",
310 mpfr_get_prec (x), mpfr_log_prec, x, k, rnd_mode),
311 ("y[%Pd]=%.*Rg inexact=%d",
312 mpfr_get_prec (y), mpfr_log_prec, y, inexact));
313
314 if (k >= 0)
315 return mpfr_rootn_ui (y, x, k, rnd_mode);
316
317 /* Singular values for k < 0 */
318 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
319 {
320 if (MPFR_IS_NAN (x))
321 {
322 MPFR_SET_NAN (y); /* NaN^(1/k) = NaN */
323 MPFR_RET_NAN;
324 }
325
326 if (MPFR_IS_INF (x)) /* (+Inf)^(1/k) = +0
327 (-Inf)^(1/k) = -0 if k odd
328 (-Inf)^(1/k) = NaN if k even */
329 {
330 /* Cast k to an unsigned type so that this is well-defined. */
331 if (MPFR_IS_NEG (x) && ((unsigned long) k & 1) == 0)
332 {
333 MPFR_SET_NAN (y);
334 MPFR_RET_NAN;
335 }
336 MPFR_SET_ZERO (y);
337 MPFR_SET_SAME_SIGN (y, x);
338 }
339 else /* x is necessarily 0: (+0)^(1/k) = +Inf
340 (-0)^(1/k) = +Inf if k even
341 (-0)^(1/k) = -Inf if k odd */
342 {
343 MPFR_ASSERTD (MPFR_IS_ZERO (x));
344 MPFR_SET_INF (y);
345 /* Cast k to an unsigned type so that this is well-defined. */
346 if (MPFR_IS_POS (x) || ((unsigned long) k & 1) == 0)
347 MPFR_SET_POS (y);
348 else
349 MPFR_SET_NEG (y);
350 MPFR_SET_DIVBY0 ();
351 }
352 MPFR_RET (0);
353 }
354
355 /* Returns NAN for x < 0 and k even */
356 /* Cast k to an unsigned type so that this is well-defined. */
357 if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && ((unsigned long) k & 1) == 0))
358 {
359 MPFR_SET_NAN (y);
360 MPFR_RET_NAN;
361 }
362
363 /* Special case |x| = 1. Note that if x = -1, then k is odd
364 (NaN results have already been filtered), so that y = -1. */
365 if (mpfr_cmpabs (x, __gmpfr_one) == 0)
366 return mpfr_set (y, x, rnd_mode);
367
368 /* The case k = -1 is probably rare in practice (the user would directly
369 do a division if k is a constant, and even mpfr_pow_si is more natural).
370 But let's take it into account here, so that in the general case below,
371 overflows and underflows will be impossible, and we won't need to test
372 and handle the corresponding flags. And let's take the opportunity to
373 handle k = -2 as well since mpfr_rec_sqrt is faster than the generic
374 mpfr_rootn_si (this is visible when running the trec_sqrt tests with
375 mpfr_rootn_si + generic code for k = -2 instead of mpfr_rec_sqrt). */
376 /* TODO: If MPFR_WANT_ASSERT >= 2, define a new mpfr_rootn_si function
377 so that for k = -2, compute the result with both mpfr_rec_sqrt and
378 the generic code, and compare (ditto for mpfr_rec_sqrt), like what
379 is done in add1sp.c (mpfr_add1sp and mpfr_add1 results compared). */
380 if (k >= -2)
381 {
382 if (k == -1)
383 return mpfr_ui_div (y, 1, x, rnd_mode);
384 else
385 return mpfr_rec_sqrt (y, x, rnd_mode);
386 }
387
388 /* TODO: Should we expand mpfr_root_aux to negative values of k
389 and call it if k < -100, a bit like in mpfr_rootn_ui? */
390
391 /* General case */
392 {
393 mpfr_t t;
394 mpfr_prec_t Ny; /* target precision */
395 mpfr_prec_t Nt; /* working precision */
396
397 /* initial working precision */
398 Ny = MPFR_PREC (y);
399 Nt = Ny + 10;
400
401 MPFR_SAVE_EXPO_MARK (expo);
402
403 mpfr_init2 (t, Nt);
404
405 MPFR_ZIV_INIT (loop, Nt);
406 for (;;)
407 {
408 /* Compute the root before the division, in particular to avoid
409 overflows and underflows.
410 Moreover, midpoints are impossible. And an exact case implies
411 that |x| is a power of 2; such a case is not the most common
412 one, so that we detect it only after MPFR_CAN_ROUND. */
413
414 /* Let's use MPFR_RNDF to avoid the potentially costly detection
415 of exact cases in mpfr_rootn_ui (we just lose one bit in the
416 final approximation). */
417 mpfr_rootn_ui (t, x, - (unsigned long) k, MPFR_RNDF);
418 inexact = mpfr_ui_div (t, 1, t, rnd_mode);
419
420 /* The final error is bounded by 5 ulp (see algorithms.tex,
421 "Generic error of inverse"), which is <= 2^3 ulp. */
422 MPFR_ASSERTD (! MPFR_IS_SINGULAR (t));
423 if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - 3, Ny, rnd_mode) ||
424 (inexact == 0 && mpfr_powerof2_raw (x))))
425 break;
426
427 MPFR_ZIV_NEXT (loop, Nt);
428 mpfr_set_prec (t, Nt);
429 }
430 MPFR_ZIV_FREE (loop);
431
432 inexact = mpfr_set (y, t, rnd_mode);
433 mpfr_clear (t);
434
435 MPFR_SAVE_EXPO_FREE (expo);
436 return mpfr_check_range (y, inexact, rnd_mode);
437 }
438 }
439
440 int
mpfr_root(mpfr_ptr y,mpfr_srcptr x,unsigned long k,mpfr_rnd_t rnd_mode)441 mpfr_root (mpfr_ptr y, mpfr_srcptr x, unsigned long k, mpfr_rnd_t rnd_mode)
442 {
443 MPFR_LOG_FUNC
444 (("x[%Pd]=%.*Rg k=%lu rnd=%d",
445 mpfr_get_prec (x), mpfr_log_prec, x, k, rnd_mode),
446 ("y[%Pd]=%.*Rg",
447 mpfr_get_prec (y), mpfr_log_prec, y));
448
449 /* Like mpfr_rootn_ui... */
450 if (MPFR_UNLIKELY (k <= 1))
451 {
452 if (k == 0)
453 {
454 /* rootn(x,0) is NaN (IEEE 754-2008). */
455 MPFR_SET_NAN (y);
456 MPFR_RET_NAN;
457 }
458 else /* y = x^(1/1) = x */
459 return mpfr_set (y, x, rnd_mode);
460 }
461
462 if (MPFR_UNLIKELY (MPFR_IS_ZERO (x)))
463 {
464 /* The only case that may differ from mpfr_rootn_ui. */
465 MPFR_SET_ZERO (y);
466 MPFR_SET_SAME_SIGN (y, x);
467 MPFR_RET (0);
468 }
469 else
470 return mpfr_rootn_ui (y, x, k, rnd_mode);
471 }
472