xref: /netbsd-src/external/lgpl3/mpfr/dist/src/agm.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd)
1 /* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers
2 
3 Copyright 1999-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 /* agm(x,y) is between x and y, so we don't need to save exponent range */
27 int
mpfr_agm(mpfr_ptr r,mpfr_srcptr op2,mpfr_srcptr op1,mpfr_rnd_t rnd_mode)28 mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode)
29 {
30   int compare, inexact;
31   mp_size_t s;
32   mpfr_prec_t p, q;
33   mp_limb_t *up, *vp, *ufp, *vfp;
34   mpfr_t u, v, uf, vf, sc1, sc2;
35   mpfr_exp_t scaleop = 0, scaleit;
36   unsigned long n; /* number of iterations */
37   MPFR_ZIV_DECL (loop);
38   MPFR_TMP_DECL(marker);
39   MPFR_SAVE_EXPO_DECL (expo);
40 
41   MPFR_LOG_FUNC
42     (("op2[%Pd]=%.*Rg op1[%Pd]=%.*Rg rnd=%d",
43       mpfr_get_prec (op2), mpfr_log_prec, op2,
44       mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode),
45      ("r[%Pd]=%.*Rg inexact=%d",
46       mpfr_get_prec (r), mpfr_log_prec, r, inexact));
47 
48   /* Deal with special values */
49   if (MPFR_ARE_SINGULAR (op1, op2))
50     {
51       /* If a or b is NaN, the result is NaN */
52       if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2))
53         {
54           MPFR_SET_NAN(r);
55           MPFR_RET_NAN;
56         }
57       /* now one of a or b is Inf or 0 */
58       /* If a and b is +Inf, the result is +Inf.
59          Otherwise if a or b is -Inf or 0, the result is NaN */
60       else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2))
61         {
62           if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2))
63             {
64               MPFR_SET_INF(r);
65               MPFR_SET_SAME_SIGN(r, op1);
66               MPFR_RET(0); /* exact */
67             }
68           else
69             {
70               MPFR_SET_NAN(r);
71               MPFR_RET_NAN;
72             }
73         }
74       else /* a and b are neither NaN nor Inf, and one is zero */
75         {  /* If a or b is 0, the result is +0, in particular because the
76               result is always >= 0 with our definition (Maple sometimes
77               chooses a different sign for GaussAGM, but it uses another
78               definition, with possible negative results). */
79           MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2));
80           MPFR_SET_POS (r);
81           MPFR_SET_ZERO (r);
82           MPFR_RET (0); /* exact */
83         }
84     }
85 
86   /* If a or b is negative (excluding -Infinity), the result is NaN */
87   if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2)))
88     {
89       MPFR_SET_NAN(r);
90       MPFR_RET_NAN;
91     }
92 
93   /* Precision of the following calculus */
94   q = MPFR_PREC(r);
95   p = q + MPFR_INT_CEIL_LOG2(q) + 15;
96   MPFR_ASSERTD (p >= 7); /* see algorithms.tex */
97   s = MPFR_PREC2LIMBS (p);
98 
99   /* b (op2) and a (op1) are the 2 operands but we want b >= a */
100   compare = mpfr_cmp (op1, op2);
101   if (MPFR_UNLIKELY( compare == 0 ))
102     return mpfr_set (r, op1, rnd_mode);
103   else if (compare > 0)
104     {
105       mpfr_srcptr t = op1;
106       op1 = op2;
107       op2 = t;
108     }
109 
110   /* Now b (=op2) > a (=op1) */
111 
112   MPFR_SAVE_EXPO_MARK (expo);
113 
114   MPFR_TMP_MARK(marker);
115 
116   /* Main loop */
117   MPFR_ZIV_INIT (loop, p);
118   for (;;)
119     {
120       mpfr_prec_t eq;
121       unsigned long err = 0;  /* must be set to 0 at each Ziv iteration */
122       MPFR_BLOCK_DECL (flags);
123 
124       /* Init temporary vars */
125       MPFR_TMP_INIT (up, u, p, s);
126       MPFR_TMP_INIT (vp, v, p, s);
127       MPFR_TMP_INIT (ufp, uf, p, s);
128       MPFR_TMP_INIT (vfp, vf, p, s);
129 
130       /* Calculus of un and vn */
131     retry:
132       MPFR_BLOCK (flags,
133                   mpfr_mul (u, op1, op2, MPFR_RNDN);
134                   /* mpfr_mul(...): faster since PREC(op) < PREC(u) */
135                   mpfr_add (v, op1, op2, MPFR_RNDN);
136                   /* mpfr_add with !=prec is still good */);
137       if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)))
138         {
139           mpfr_exp_t e1, e2;
140 
141           MPFR_ASSERTN (scaleop == 0);
142           e1 = MPFR_GET_EXP (op1);
143           e2 = MPFR_GET_EXP (op2);
144 
145           /* Let's determine scaleop to avoid an overflow/underflow. */
146           if (MPFR_OVERFLOW (flags))
147             {
148               /* Let's recall that emin <= e1 <= e2 <= emax.
149                  There has been an overflow. Thus e2 >= emax/2.
150                  If the mpfr_mul overflowed, then e1 + e2 > emax.
151                  If the mpfr_add overflowed, then e2 = emax.
152                  We want: (e1 + scale) + (e2 + scale) <= emax,
153                  i.e. scale <= (emax - e1 - e2) / 2. Let's take
154                  scale = min(floor((emax - e1 - e2) / 2), -1).
155                  This is OK, as:
156                  1. emin <= scale <= -1.
157                  2. e1 + scale >= emin. Indeed:
158                     * If e1 + e2 > emax, then
159                       e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1
160                                  >= (emax + e1 - emax) / 2 - 1
161                                  >= e1 / 2 - 1 >= emin.
162                     * Otherwise, mpfr_mul didn't overflow, therefore
163                       mpfr_add overflowed and e2 = emax, so that
164                       e1 > emin (see restriction below).
165                       e1 + scale > emin - 1, thus e1 + scale >= emin.
166                  3. e2 + scale <= emax, since scale < 0. */
167               if (e1 + e2 > MPFR_EMAX_MAX)
168                 {
169                   scaleop = - (((e1 + e2) - MPFR_EMAX_MAX + 1) / 2);
170                   MPFR_ASSERTN (scaleop < 0);
171                 }
172               else
173                 {
174                   /* The addition necessarily overflowed. */
175                   MPFR_ASSERTN (e2 == MPFR_EMAX_MAX);
176                   /* The case where e1 = emin and e2 = emax is not supported
177                      here. This would mean that the precision of e2 would be
178                      huge (and possibly not supported in practice anyway). */
179                   MPFR_ASSERTN (e1 > MPFR_EMIN_MIN);
180                   /* Note: this case is probably impossible to have in practice
181                      since we need e2 = emax, and no overflow in the product.
182                      Since the product is >= 2^(e1+e2-2), it implies
183                      e1 + e2 - 2 <= emax, thus e1 <= 2. Now to get an overflow
184                      we need op1 >= 1/2 ulp(op2), which implies that the
185                      precision of op2 should be at least emax-2. On a 64-bit
186                      computer this is impossible to have, and would require
187                      a huge amount of memory on a 32-bit computer. */
188                   scaleop = -1;
189                 }
190 
191             }
192           else  /* underflow only (in the multiplication) */
193             {
194               /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0).
195                  We want: (e1 + scale) + (e2 + scale) >= emin + 1,
196                  i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take
197                  scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as:
198                  1. 1 <= scale <= emax.
199                  2. e1 + scale >= emin + 1 >= emin.
200                  3. e2 + scale <= scale <= emax. */
201               MPFR_ASSERTN (e1 <= e2 && e2 <= 0);
202               scaleop = (MPFR_EMIN_MIN + 2 - e1 - e2) / 2;
203               MPFR_ASSERTN (scaleop > 0);
204             }
205 
206           MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop);
207           MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop);
208           op1 = sc1;
209           op2 = sc2;
210           MPFR_LOG_MSG (("Exception in pre-iteration, scale = %"
211                          MPFR_EXP_FSPEC "d\n", scaleop));
212           goto retry;
213         }
214 
215       MPFR_CLEAR_FLAGS ();
216       mpfr_sqrt (u, u, MPFR_RNDN);
217       mpfr_div_2ui (v, v, 1, MPFR_RNDN);
218 
219       scaleit = 0;
220       n = 1;
221       while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2)
222         {
223           MPFR_BLOCK_DECL (flags2);
224 
225           MPFR_LOG_MSG (("Iteration n = %lu\n", n));
226 
227         retry2:
228           mpfr_add (vf, u, v, MPFR_RNDN);  /* No overflow? */
229           mpfr_div_2ui (vf, vf, 1, MPFR_RNDN);
230           /* See proof in algorithms.tex */
231           if (eq > p / 4)
232             {
233               mpfr_t w;
234               MPFR_BLOCK_DECL (flags3);
235 
236               MPFR_LOG_MSG (("4*eq > p\n", 0));
237 
238               /* vf = V(k) */
239               mpfr_init2 (w, (p + 1) / 2);
240               MPFR_BLOCK
241                 (flags3,
242                  mpfr_sub (w, v, u, MPFR_RNDN);       /* e = V(k-1)-U(k-1) */
243                  mpfr_sqr (w, w, MPFR_RNDN);          /* e = e^2 */
244                  mpfr_div_2ui (w, w, 4, MPFR_RNDN);   /* e*= (1/2)^2*1/4  */
245                  mpfr_div (w, w, vf, MPFR_RNDN);      /* 1/4*e^2/V(k) */
246                  );
247               if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3)))
248                 {
249                   mpfr_sub (v, vf, w, MPFR_RNDN);
250                   err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */
251                   mpfr_clear (w);
252                   break;
253                 }
254               /* There has been an underflow because of the cancellation
255                  between V(k-1) and U(k-1). Let's use the conventional
256                  method. */
257               MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0));
258               mpfr_clear (w);
259               MPFR_CLEAR_UNDERFLOW ();
260             }
261           /* U(k) increases, so that U.V can overflow (but not underflow). */
262           MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN););
263           if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2)))
264             {
265               mpfr_exp_t scale2;
266 
267               scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v))
268                            - MPFR_EMAX_MAX + 1) / 2);
269               MPFR_EXP (u) += scale2;
270               MPFR_EXP (v) += scale2;
271               scaleit += scale2;
272               MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %"
273                              MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n",
274                              n, scaleit, scale2));
275               MPFR_CLEAR_OVERFLOW ();
276               goto retry2;
277             }
278           mpfr_sqrt (u, uf, MPFR_RNDN);
279           mpfr_swap (v, vf);
280           n ++;
281         }
282 
283       MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n));
284 
285       /* the error on v is bounded by (18n+51) ulps, or twice if there
286          was an exponent loss in the final subtraction */
287       err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow
288                                                  since n is about log(p) */
289       /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */
290       if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 &&
291                        MPFR_CAN_ROUND (v, p - err, q, rnd_mode)))
292         break; /* Stop the loop */
293 
294       /* Next iteration */
295       MPFR_ZIV_NEXT (loop, p);
296       s = MPFR_PREC2LIMBS (p);
297     }
298   MPFR_ZIV_FREE (loop);
299 
300   if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT))
301                      != 0))
302     {
303       MPFR_ASSERTN (! mpfr_overflow_p ());   /* since mpfr_clear_flags */
304       MPFR_ASSERTN (! mpfr_underflow_p ());  /* since mpfr_clear_flags */
305       MPFR_ASSERTN (! mpfr_divby0_p ());     /* since mpfr_clear_flags */
306       MPFR_ASSERTN (! mpfr_nanflag_p ());    /* since mpfr_clear_flags */
307     }
308 
309   /* Setting of the result */
310   inexact = mpfr_set (r, v, rnd_mode);
311   MPFR_EXP (r) -= scaleop + scaleit;
312 
313   /* Let's clean */
314   MPFR_TMP_FREE(marker);
315 
316   MPFR_SAVE_EXPO_FREE (expo);
317   /* From the definition of the AGM, underflow and overflow
318      are not possible. */
319   return mpfr_check_range (r, inexact, rnd_mode);
320   /* agm(u,v) can be exact for u, v rational only for u=v.
321      Proof (due to Nicolas Brisebarre): it suffices to consider
322      u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2),
323      and a theorem due to G.V. Chudnovsky states that for x a
324      non-zero algebraic number with |x|<1, then
325      2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically
326      independent over Q. */
327 }
328