xref: /netbsd-src/external/lgpl3/mpfr/dist/src/root.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd)
1 /* mpfr_root, mpfr_rootn_ui, mpfr_rootn_si -- kth root.
2 
3 Copyright 2005-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26  /* The computation of y = x^(1/k) is done as follows, except for large
27     values of k, for which this would be inefficient or yield internal
28     integer overflows:
29 
30     Let x = sign * m * 2^(k*e) where m is an integer
31 
32     with 2^(k*(n-1)) <= m < 2^(k*n) where n = PREC(y)
33 
34     and m = s^k + t where 0 <= t and m < (s+1)^k
35 
36     we want that s has n bits i.e. s >= 2^(n-1), or m >= 2^(k*(n-1))
37     i.e. m must have at least k*(n-1)+1 bits
38 
39     then, not taking into account the sign, the result will be
40     x^(1/k) = s * 2^e or (s+1) * 2^e according to the rounding mode.
41  */
42 
43 static int
44 mpfr_root_aux (mpfr_ptr y, mpfr_srcptr x, unsigned long k,
45                mpfr_rnd_t rnd_mode);
46 
47 int
mpfr_rootn_ui(mpfr_ptr y,mpfr_srcptr x,unsigned long k,mpfr_rnd_t rnd_mode)48 mpfr_rootn_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long k, mpfr_rnd_t rnd_mode)
49 {
50   mpz_t m;
51   mpfr_exp_t e, r, sh, f;
52   mpfr_prec_t n, size_m, tmp;
53   int inexact, negative;
54   MPFR_SAVE_EXPO_DECL (expo);
55 
56   MPFR_LOG_FUNC
57     (("x[%Pd]=%.*Rg k=%lu rnd=%d",
58       mpfr_get_prec (x), mpfr_log_prec, x, k, rnd_mode),
59      ("y[%Pd]=%.*Rg inexact=%d",
60       mpfr_get_prec (y), mpfr_log_prec, y, inexact));
61 
62   if (MPFR_UNLIKELY (k <= 1))
63     {
64       if (k == 0)
65         {
66           /* rootn(x,0) is NaN (IEEE 754-2008). */
67           MPFR_SET_NAN (y);
68           MPFR_RET_NAN;
69         }
70       else /* y = x^(1/1) = x */
71         return mpfr_set (y, x, rnd_mode);
72     }
73 
74   /* Singular values */
75   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
76     {
77       if (MPFR_IS_NAN (x))
78         {
79           MPFR_SET_NAN (y); /* NaN^(1/k) = NaN */
80           MPFR_RET_NAN;
81         }
82 
83       if (MPFR_IS_INF (x)) /* (+Inf)^(1/k) = +Inf
84                               (-Inf)^(1/k) = -Inf if k odd
85                               (-Inf)^(1/k) = NaN if k even */
86         {
87           if (MPFR_IS_NEG (x) && (k & 1) == 0)
88             {
89               MPFR_SET_NAN (y);
90               MPFR_RET_NAN;
91             }
92           MPFR_SET_INF (y);
93           MPFR_SET_SAME_SIGN (y, x);
94         }
95       else /* x is necessarily 0: (+0)^(1/k) = +0
96                                   (-0)^(1/k) = +0 if k even
97                                   (-0)^(1/k) = -0 if k odd */
98         {
99           MPFR_ASSERTD (MPFR_IS_ZERO (x));
100           MPFR_SET_ZERO (y);
101           if (MPFR_IS_POS (x) || (k & 1) == 0)
102             MPFR_SET_POS (y);
103           else
104             MPFR_SET_NEG (y);
105         }
106       MPFR_RET (0);
107     }
108 
109   /* Returns NAN for x < 0 and k even */
110   if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && (k & 1) == 0))
111     {
112       MPFR_SET_NAN (y);
113       MPFR_RET_NAN;
114     }
115 
116   /* Special case |x| = 1. Note that if x = -1, then k is odd
117      (NaN results have already been filtered), so that y = -1. */
118   if (mpfr_cmpabs (x, __gmpfr_one) == 0)
119     return mpfr_set (y, x, rnd_mode);
120 
121   /* General case */
122 
123   /* For large k, use exp(log(x)/k). The threshold of 100 seems to be quite
124      good when the precision goes to infinity. */
125   if (k > 100)
126     return mpfr_root_aux (y, x, k, rnd_mode);
127 
128   MPFR_SAVE_EXPO_MARK (expo);
129   mpz_init (m);
130 
131   e = mpfr_get_z_2exp (m, x);                /* x = m * 2^e */
132   if ((negative = MPFR_IS_NEG(x)))
133     mpz_neg (m, m);
134   r = e % (mpfr_exp_t) k;
135   if (r < 0)
136     r += k; /* now r = e (mod k) with 0 <= r < k */
137   MPFR_ASSERTD (0 <= r && r < k);
138   /* x = (m*2^r) * 2^(e-r) where e-r is a multiple of k */
139 
140   MPFR_MPZ_SIZEINBASE2 (size_m, m);
141   /* for rounding to nearest, we want the round bit to be in the root */
142   n = MPFR_PREC (y) + (rnd_mode == MPFR_RNDN);
143 
144   /* we now multiply m by 2^sh so that root(m,k) will give
145      exactly n bits: we want k*(n-1)+1 <= size_m + sh <= k*n
146      i.e. sh = k*f + r with f = max(floor((k*n-size_m-r)/k),0) */
147   if ((mpfr_exp_t) size_m + r >= k * (mpfr_exp_t) n)
148     f = 0; /* we already have too many bits */
149   else
150     f = (k * (mpfr_exp_t) n - (mpfr_exp_t) size_m - r) / k;
151   sh = k * f + r;
152   mpz_mul_2exp (m, m, sh);
153   e = e - sh;
154 
155   /* invariant: x = m*2^e, with e divisible by k */
156 
157   /* we reuse the variable m to store the kth root, since it is not needed
158      any more: we just need to know if the root is exact */
159   inexact = mpz_root (m, m, k) == 0;
160 
161   MPFR_MPZ_SIZEINBASE2 (tmp, m);
162   sh = tmp - n;
163   if (sh > 0) /* we have to flush to 0 the last sh bits from m */
164     {
165       inexact = inexact || (mpz_scan1 (m, 0) < sh);
166       mpz_fdiv_q_2exp (m, m, sh);
167       e += k * sh;
168     }
169 
170   if (inexact)
171     {
172       if (negative)
173         rnd_mode = MPFR_INVERT_RND (rnd_mode);
174       if (rnd_mode == MPFR_RNDU || rnd_mode == MPFR_RNDA
175           || (rnd_mode == MPFR_RNDN && mpz_tstbit (m, 0)))
176         inexact = 1, mpz_add_ui (m, m, 1);
177       else
178         inexact = -1;
179     }
180 
181   /* either inexact is not zero, and the conversion is exact, i.e. inexact
182      is not changed; or inexact=0, and inexact is set only when
183      rnd_mode=MPFR_RNDN and bit (n+1) from m is 1 */
184   inexact += mpfr_set_z (y, m, MPFR_RNDN);
185   MPFR_SET_EXP (y, MPFR_GET_EXP (y) + e / (mpfr_exp_t) k);
186 
187   if (negative)
188     {
189       MPFR_CHANGE_SIGN (y);
190       inexact = -inexact;
191     }
192 
193   mpz_clear (m);
194   MPFR_SAVE_EXPO_FREE (expo);
195   return mpfr_check_range (y, inexact, rnd_mode);
196 }
197 
198 /* Compute y <- x^(1/k) using exp(log(x)/k).
199    Assume all special cases have been eliminated before.
200    In the extended exponent range, overflows/underflows are not possible.
201    Assume x > 0, or x < 0 and k odd.
202    Also assume |x| <> 1 because log(1) = 0, which does not have an exponent
203    and would yield a failure in the error bound computation. A priori, this
204    constraint is quite artificial because if |x| is close enough to 1, then
205    the exponent of log|x| does not need to be used (in the code, err would
206    be 1 in such a domain). So this constraint |x| <> 1 could be avoided in
207    the code. However, this is an exact case easy to detect, so that such a
208    change would be useless. Values very close to 1 are not an issue, since
209    an underflow is not possible before the MPFR_GET_EXP.
210 */
211 static int
mpfr_root_aux(mpfr_ptr y,mpfr_srcptr x,unsigned long k,mpfr_rnd_t rnd_mode)212 mpfr_root_aux (mpfr_ptr y, mpfr_srcptr x, unsigned long k, mpfr_rnd_t rnd_mode)
213 {
214   int inexact, exact_root = 0;
215   mpfr_prec_t w; /* working precision */
216   mpfr_t absx, t;
217   MPFR_GROUP_DECL(group);
218   MPFR_TMP_DECL(marker);
219   MPFR_ZIV_DECL(loop);
220   MPFR_SAVE_EXPO_DECL (expo);
221 
222   MPFR_TMP_INIT_ABS (absx, x);
223 
224   MPFR_TMP_MARK(marker);
225   w = MPFR_PREC(y) + 10;
226   /* Take some guard bits to prepare for the 'expt' lost bits below.
227      If |x| < 2^k, then log|x| < k, thus taking log2(k) bits should be fine. */
228   if (MPFR_GET_EXP(x) > 0)
229     w += MPFR_INT_CEIL_LOG2 (MPFR_GET_EXP(x));
230   MPFR_GROUP_INIT_1(group, w, t);
231   MPFR_SAVE_EXPO_MARK (expo);
232   MPFR_ZIV_INIT (loop, w);
233   for (;;)
234     {
235       mpfr_exp_t expt;
236       unsigned int err;
237 
238       mpfr_log (t, absx, MPFR_RNDN);
239       /* t = log|x| * (1 + theta) with |theta| <= 2^(-w) */
240       mpfr_div_ui (t, t, k, MPFR_RNDN);
241       /* No possible underflow in mpfr_log and mpfr_div_ui. */
242       expt = MPFR_GET_EXP (t);  /* assumes t <> 0 */
243       /* t = log|x|/k * (1 + theta) + eps with |theta| <= 2^(-w)
244          and |eps| <= 1/2 ulp(t), thus the total error is bounded
245          by 1.5 * 2^(expt - w) */
246       mpfr_exp (t, t, MPFR_RNDN);
247       /* t = |x|^(1/k) * exp(tau) * (1 + theta1) with
248          |tau| <= 1.5 * 2^(expt - w) and |theta1| <= 2^(-w).
249          For |tau| <= 0.5 we have |exp(tau)-1| < 4/3*tau, thus
250          for w >= expt + 2 we have:
251          t = |x|^(1/k) * (1 + 2^(expt+2)*theta2) * (1 + theta1) with
252          |theta1|, |theta2| <= 2^(-w).
253          If expt+2 > 0, as long as w >= 1, we have:
254          t = |x|^(1/k) * (1 + 2^(expt+3)*theta3) with |theta3| < 2^(-w).
255          For expt+2 = 0, we have:
256          t = |x|^(1/k) * (1 + 2^2*theta3) with |theta3| < 2^(-w).
257          Finally for expt+2 < 0 we have:
258          t = |x|^(1/k) * (1 + 2*theta3) with |theta3| < 2^(-w).
259       */
260       err = (expt + 2 > 0) ? expt + 3
261         : (expt + 2 == 0) ? 2 : 1;
262       /* now t = |x|^(1/k) * (1 + 2^(err-w)) thus the error is at most
263          2^(EXP(t) - w + err) */
264       if (MPFR_LIKELY (MPFR_CAN_ROUND(t, w - err, MPFR_PREC(y), rnd_mode)))
265         break;
266 
267       /* If we fail to round correctly, check for an exact result or a
268          midpoint result with MPFR_RNDN (regarded as hard-to-round in
269          all precisions in order to determine the ternary value). */
270       {
271         mpfr_t z, zk;
272 
273         mpfr_init2 (z, MPFR_PREC(y) + (rnd_mode == MPFR_RNDN));
274         mpfr_init2 (zk, MPFR_PREC(x));
275         mpfr_set (z, t, MPFR_RNDN);
276         inexact = mpfr_pow_ui (zk, z, k, MPFR_RNDN);
277         exact_root = !inexact && mpfr_equal_p (zk, absx);
278         if (exact_root) /* z is the exact root, thus round z directly */
279           inexact = mpfr_set4 (y, z, rnd_mode, MPFR_SIGN (x));
280         mpfr_clear (zk);
281         mpfr_clear (z);
282         if (exact_root)
283           break;
284       }
285 
286       MPFR_ZIV_NEXT (loop, w);
287       MPFR_GROUP_REPREC_1(group, w, t);
288     }
289   MPFR_ZIV_FREE (loop);
290 
291   if (!exact_root)
292     inexact = mpfr_set4 (y, t, rnd_mode, MPFR_SIGN (x));
293 
294   MPFR_GROUP_CLEAR(group);
295   MPFR_TMP_FREE(marker);
296   MPFR_SAVE_EXPO_FREE (expo);
297 
298   return mpfr_check_range (y, inexact, rnd_mode);
299 }
300 
301 int
mpfr_rootn_si(mpfr_ptr y,mpfr_srcptr x,long k,mpfr_rnd_t rnd_mode)302 mpfr_rootn_si (mpfr_ptr y, mpfr_srcptr x, long k, mpfr_rnd_t rnd_mode)
303 {
304   int inexact;
305   MPFR_ZIV_DECL(loop);
306   MPFR_SAVE_EXPO_DECL (expo);
307 
308   MPFR_LOG_FUNC
309     (("x[%Pd]=%.*Rg k=%lu rnd=%d",
310       mpfr_get_prec (x), mpfr_log_prec, x, k, rnd_mode),
311      ("y[%Pd]=%.*Rg inexact=%d",
312       mpfr_get_prec (y), mpfr_log_prec, y, inexact));
313 
314   if (k >= 0)
315     return mpfr_rootn_ui (y, x, k, rnd_mode);
316 
317   /* Singular values for k < 0 */
318   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
319     {
320       if (MPFR_IS_NAN (x))
321         {
322           MPFR_SET_NAN (y); /* NaN^(1/k) = NaN */
323           MPFR_RET_NAN;
324         }
325 
326       if (MPFR_IS_INF (x)) /* (+Inf)^(1/k) = +0
327                               (-Inf)^(1/k) = -0 if k odd
328                               (-Inf)^(1/k) = NaN if k even */
329         {
330           /* Cast k to an unsigned type so that this is well-defined. */
331           if (MPFR_IS_NEG (x) && ((unsigned long) k & 1) == 0)
332             {
333               MPFR_SET_NAN (y);
334               MPFR_RET_NAN;
335             }
336           MPFR_SET_ZERO (y);
337           MPFR_SET_SAME_SIGN (y, x);
338         }
339       else /* x is necessarily 0: (+0)^(1/k) = +Inf
340                                   (-0)^(1/k) = +Inf if k even
341                                   (-0)^(1/k) = -Inf if k odd */
342         {
343           MPFR_ASSERTD (MPFR_IS_ZERO (x));
344           MPFR_SET_INF (y);
345           /* Cast k to an unsigned type so that this is well-defined. */
346           if (MPFR_IS_POS (x) || ((unsigned long) k & 1) == 0)
347             MPFR_SET_POS (y);
348           else
349             MPFR_SET_NEG (y);
350           MPFR_SET_DIVBY0 ();
351         }
352       MPFR_RET (0);
353     }
354 
355   /* Returns NAN for x < 0 and k even */
356   /* Cast k to an unsigned type so that this is well-defined. */
357   if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && ((unsigned long) k & 1) == 0))
358     {
359       MPFR_SET_NAN (y);
360       MPFR_RET_NAN;
361     }
362 
363   /* Special case |x| = 1. Note that if x = -1, then k is odd
364      (NaN results have already been filtered), so that y = -1. */
365   if (mpfr_cmpabs (x, __gmpfr_one) == 0)
366     return mpfr_set (y, x, rnd_mode);
367 
368   /* The case k = -1 is probably rare in practice (the user would directly
369      do a division if k is a constant, and even mpfr_pow_si is more natural).
370      But let's take it into account here, so that in the general case below,
371      overflows and underflows will be impossible, and we won't need to test
372      and handle the corresponding flags. And let's take the opportunity to
373      handle k = -2 as well since mpfr_rec_sqrt is faster than the generic
374      mpfr_rootn_si (this is visible when running the trec_sqrt tests with
375      mpfr_rootn_si + generic code for k = -2 instead of mpfr_rec_sqrt). */
376   /* TODO: If MPFR_WANT_ASSERT >= 2, define a new mpfr_rootn_si function
377      so that for k = -2, compute the result with both mpfr_rec_sqrt and
378      the generic code, and compare (ditto for mpfr_rec_sqrt), like what
379      is done in add1sp.c (mpfr_add1sp and mpfr_add1 results compared). */
380   if (k >= -2)
381     {
382       if (k == -1)
383         return mpfr_ui_div (y, 1, x, rnd_mode);
384       else
385         return mpfr_rec_sqrt (y, x, rnd_mode);
386     }
387 
388   /* TODO: Should we expand mpfr_root_aux to negative values of k
389      and call it if k < -100, a bit like in mpfr_rootn_ui? */
390 
391   /* General case */
392   {
393     mpfr_t t;
394     mpfr_prec_t Ny;  /* target precision */
395     mpfr_prec_t Nt;  /* working precision */
396 
397     /* initial working precision */
398     Ny = MPFR_PREC (y);
399     Nt = Ny + 10;
400 
401     MPFR_SAVE_EXPO_MARK (expo);
402 
403     mpfr_init2 (t, Nt);
404 
405     MPFR_ZIV_INIT (loop, Nt);
406     for (;;)
407       {
408         /* Compute the root before the division, in particular to avoid
409            overflows and underflows.
410            Moreover, midpoints are impossible. And an exact case implies
411            that |x| is a power of 2; such a case is not the most common
412            one, so that we detect it only after MPFR_CAN_ROUND. */
413 
414         /* Let's use MPFR_RNDF to avoid the potentially costly detection
415            of exact cases in mpfr_rootn_ui (we just lose one bit in the
416            final approximation). */
417         mpfr_rootn_ui (t, x, - (unsigned long) k, MPFR_RNDF);
418         inexact = mpfr_ui_div (t, 1, t, rnd_mode);
419 
420         /* The final error is bounded by 5 ulp (see algorithms.tex,
421            "Generic error of inverse"), which is <= 2^3 ulp. */
422         MPFR_ASSERTD (! MPFR_IS_SINGULAR (t));
423         if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - 3, Ny, rnd_mode) ||
424                          (inexact == 0 && mpfr_powerof2_raw (x))))
425           break;
426 
427         MPFR_ZIV_NEXT (loop, Nt);
428         mpfr_set_prec (t, Nt);
429       }
430     MPFR_ZIV_FREE (loop);
431 
432     inexact = mpfr_set (y, t, rnd_mode);
433     mpfr_clear (t);
434 
435     MPFR_SAVE_EXPO_FREE (expo);
436     return mpfr_check_range (y, inexact, rnd_mode);
437   }
438 }
439 
440 int
mpfr_root(mpfr_ptr y,mpfr_srcptr x,unsigned long k,mpfr_rnd_t rnd_mode)441 mpfr_root (mpfr_ptr y, mpfr_srcptr x, unsigned long k, mpfr_rnd_t rnd_mode)
442 {
443   MPFR_LOG_FUNC
444     (("x[%Pd]=%.*Rg k=%lu rnd=%d",
445       mpfr_get_prec (x), mpfr_log_prec, x, k, rnd_mode),
446      ("y[%Pd]=%.*Rg",
447       mpfr_get_prec (y), mpfr_log_prec, y));
448 
449   /* Like mpfr_rootn_ui... */
450   if (MPFR_UNLIKELY (k <= 1))
451     {
452       if (k == 0)
453         {
454           /* rootn(x,0) is NaN (IEEE 754-2008). */
455           MPFR_SET_NAN (y);
456           MPFR_RET_NAN;
457         }
458       else /* y = x^(1/1) = x */
459         return mpfr_set (y, x, rnd_mode);
460     }
461 
462   if (MPFR_UNLIKELY (MPFR_IS_ZERO (x)))
463     {
464       /* The only case that may differ from mpfr_rootn_ui. */
465       MPFR_SET_ZERO (y);
466       MPFR_SET_SAME_SIGN (y, x);
467       MPFR_RET (0);
468     }
469   else
470     return mpfr_rootn_ui (y, x, k, rnd_mode);
471 }
472