1 /* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers
2
3 Copyright 1999-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26 /* agm(x,y) is between x and y, so we don't need to save exponent range */
27 int
mpfr_agm(mpfr_ptr r,mpfr_srcptr op2,mpfr_srcptr op1,mpfr_rnd_t rnd_mode)28 mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode)
29 {
30 int compare, inexact;
31 mp_size_t s;
32 mpfr_prec_t p, q;
33 mp_limb_t *up, *vp, *ufp, *vfp;
34 mpfr_t u, v, uf, vf, sc1, sc2;
35 mpfr_exp_t scaleop = 0, scaleit;
36 unsigned long n; /* number of iterations */
37 MPFR_ZIV_DECL (loop);
38 MPFR_TMP_DECL(marker);
39 MPFR_SAVE_EXPO_DECL (expo);
40
41 MPFR_LOG_FUNC
42 (("op2[%Pd]=%.*Rg op1[%Pd]=%.*Rg rnd=%d",
43 mpfr_get_prec (op2), mpfr_log_prec, op2,
44 mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode),
45 ("r[%Pd]=%.*Rg inexact=%d",
46 mpfr_get_prec (r), mpfr_log_prec, r, inexact));
47
48 /* Deal with special values */
49 if (MPFR_ARE_SINGULAR (op1, op2))
50 {
51 /* If a or b is NaN, the result is NaN */
52 if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2))
53 {
54 MPFR_SET_NAN(r);
55 MPFR_RET_NAN;
56 }
57 /* now one of a or b is Inf or 0 */
58 /* If a and b is +Inf, the result is +Inf.
59 Otherwise if a or b is -Inf or 0, the result is NaN */
60 else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2))
61 {
62 if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2))
63 {
64 MPFR_SET_INF(r);
65 MPFR_SET_SAME_SIGN(r, op1);
66 MPFR_RET(0); /* exact */
67 }
68 else
69 {
70 MPFR_SET_NAN(r);
71 MPFR_RET_NAN;
72 }
73 }
74 else /* a and b are neither NaN nor Inf, and one is zero */
75 { /* If a or b is 0, the result is +0, in particular because the
76 result is always >= 0 with our definition (Maple sometimes
77 chooses a different sign for GaussAGM, but it uses another
78 definition, with possible negative results). */
79 MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2));
80 MPFR_SET_POS (r);
81 MPFR_SET_ZERO (r);
82 MPFR_RET (0); /* exact */
83 }
84 }
85
86 /* If a or b is negative (excluding -Infinity), the result is NaN */
87 if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2)))
88 {
89 MPFR_SET_NAN(r);
90 MPFR_RET_NAN;
91 }
92
93 /* Precision of the following calculus */
94 q = MPFR_PREC(r);
95 p = q + MPFR_INT_CEIL_LOG2(q) + 15;
96 MPFR_ASSERTD (p >= 7); /* see algorithms.tex */
97 s = MPFR_PREC2LIMBS (p);
98
99 /* b (op2) and a (op1) are the 2 operands but we want b >= a */
100 compare = mpfr_cmp (op1, op2);
101 if (MPFR_UNLIKELY( compare == 0 ))
102 return mpfr_set (r, op1, rnd_mode);
103 else if (compare > 0)
104 {
105 mpfr_srcptr t = op1;
106 op1 = op2;
107 op2 = t;
108 }
109
110 /* Now b (=op2) > a (=op1) */
111
112 MPFR_SAVE_EXPO_MARK (expo);
113
114 MPFR_TMP_MARK(marker);
115
116 /* Main loop */
117 MPFR_ZIV_INIT (loop, p);
118 for (;;)
119 {
120 mpfr_prec_t eq;
121 unsigned long err = 0; /* must be set to 0 at each Ziv iteration */
122 MPFR_BLOCK_DECL (flags);
123
124 /* Init temporary vars */
125 MPFR_TMP_INIT (up, u, p, s);
126 MPFR_TMP_INIT (vp, v, p, s);
127 MPFR_TMP_INIT (ufp, uf, p, s);
128 MPFR_TMP_INIT (vfp, vf, p, s);
129
130 /* Calculus of un and vn */
131 retry:
132 MPFR_BLOCK (flags,
133 mpfr_mul (u, op1, op2, MPFR_RNDN);
134 /* mpfr_mul(...): faster since PREC(op) < PREC(u) */
135 mpfr_add (v, op1, op2, MPFR_RNDN);
136 /* mpfr_add with !=prec is still good */);
137 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)))
138 {
139 mpfr_exp_t e1, e2;
140
141 MPFR_ASSERTN (scaleop == 0);
142 e1 = MPFR_GET_EXP (op1);
143 e2 = MPFR_GET_EXP (op2);
144
145 /* Let's determine scaleop to avoid an overflow/underflow. */
146 if (MPFR_OVERFLOW (flags))
147 {
148 /* Let's recall that emin <= e1 <= e2 <= emax.
149 There has been an overflow. Thus e2 >= emax/2.
150 If the mpfr_mul overflowed, then e1 + e2 > emax.
151 If the mpfr_add overflowed, then e2 = emax.
152 We want: (e1 + scale) + (e2 + scale) <= emax,
153 i.e. scale <= (emax - e1 - e2) / 2. Let's take
154 scale = min(floor((emax - e1 - e2) / 2), -1).
155 This is OK, as:
156 1. emin <= scale <= -1.
157 2. e1 + scale >= emin. Indeed:
158 * If e1 + e2 > emax, then
159 e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1
160 >= (emax + e1 - emax) / 2 - 1
161 >= e1 / 2 - 1 >= emin.
162 * Otherwise, mpfr_mul didn't overflow, therefore
163 mpfr_add overflowed and e2 = emax, so that
164 e1 > emin (see restriction below).
165 e1 + scale > emin - 1, thus e1 + scale >= emin.
166 3. e2 + scale <= emax, since scale < 0. */
167 if (e1 + e2 > MPFR_EMAX_MAX)
168 {
169 scaleop = - (((e1 + e2) - MPFR_EMAX_MAX + 1) / 2);
170 MPFR_ASSERTN (scaleop < 0);
171 }
172 else
173 {
174 /* The addition necessarily overflowed. */
175 MPFR_ASSERTN (e2 == MPFR_EMAX_MAX);
176 /* The case where e1 = emin and e2 = emax is not supported
177 here. This would mean that the precision of e2 would be
178 huge (and possibly not supported in practice anyway). */
179 MPFR_ASSERTN (e1 > MPFR_EMIN_MIN);
180 /* Note: this case is probably impossible to have in practice
181 since we need e2 = emax, and no overflow in the product.
182 Since the product is >= 2^(e1+e2-2), it implies
183 e1 + e2 - 2 <= emax, thus e1 <= 2. Now to get an overflow
184 we need op1 >= 1/2 ulp(op2), which implies that the
185 precision of op2 should be at least emax-2. On a 64-bit
186 computer this is impossible to have, and would require
187 a huge amount of memory on a 32-bit computer. */
188 scaleop = -1;
189 }
190
191 }
192 else /* underflow only (in the multiplication) */
193 {
194 /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0).
195 We want: (e1 + scale) + (e2 + scale) >= emin + 1,
196 i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take
197 scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as:
198 1. 1 <= scale <= emax.
199 2. e1 + scale >= emin + 1 >= emin.
200 3. e2 + scale <= scale <= emax. */
201 MPFR_ASSERTN (e1 <= e2 && e2 <= 0);
202 scaleop = (MPFR_EMIN_MIN + 2 - e1 - e2) / 2;
203 MPFR_ASSERTN (scaleop > 0);
204 }
205
206 MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop);
207 MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop);
208 op1 = sc1;
209 op2 = sc2;
210 MPFR_LOG_MSG (("Exception in pre-iteration, scale = %"
211 MPFR_EXP_FSPEC "d\n", scaleop));
212 goto retry;
213 }
214
215 MPFR_CLEAR_FLAGS ();
216 mpfr_sqrt (u, u, MPFR_RNDN);
217 mpfr_div_2ui (v, v, 1, MPFR_RNDN);
218
219 scaleit = 0;
220 n = 1;
221 while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2)
222 {
223 MPFR_BLOCK_DECL (flags2);
224
225 MPFR_LOG_MSG (("Iteration n = %lu\n", n));
226
227 retry2:
228 mpfr_add (vf, u, v, MPFR_RNDN); /* No overflow? */
229 mpfr_div_2ui (vf, vf, 1, MPFR_RNDN);
230 /* See proof in algorithms.tex */
231 if (eq > p / 4)
232 {
233 mpfr_t w;
234 MPFR_BLOCK_DECL (flags3);
235
236 MPFR_LOG_MSG (("4*eq > p\n", 0));
237
238 /* vf = V(k) */
239 mpfr_init2 (w, (p + 1) / 2);
240 MPFR_BLOCK
241 (flags3,
242 mpfr_sub (w, v, u, MPFR_RNDN); /* e = V(k-1)-U(k-1) */
243 mpfr_sqr (w, w, MPFR_RNDN); /* e = e^2 */
244 mpfr_div_2ui (w, w, 4, MPFR_RNDN); /* e*= (1/2)^2*1/4 */
245 mpfr_div (w, w, vf, MPFR_RNDN); /* 1/4*e^2/V(k) */
246 );
247 if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3)))
248 {
249 mpfr_sub (v, vf, w, MPFR_RNDN);
250 err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */
251 mpfr_clear (w);
252 break;
253 }
254 /* There has been an underflow because of the cancellation
255 between V(k-1) and U(k-1). Let's use the conventional
256 method. */
257 MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0));
258 mpfr_clear (w);
259 MPFR_CLEAR_UNDERFLOW ();
260 }
261 /* U(k) increases, so that U.V can overflow (but not underflow). */
262 MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN););
263 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2)))
264 {
265 mpfr_exp_t scale2;
266
267 scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v))
268 - MPFR_EMAX_MAX + 1) / 2);
269 MPFR_EXP (u) += scale2;
270 MPFR_EXP (v) += scale2;
271 scaleit += scale2;
272 MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %"
273 MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n",
274 n, scaleit, scale2));
275 MPFR_CLEAR_OVERFLOW ();
276 goto retry2;
277 }
278 mpfr_sqrt (u, uf, MPFR_RNDN);
279 mpfr_swap (v, vf);
280 n ++;
281 }
282
283 MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n));
284
285 /* the error on v is bounded by (18n+51) ulps, or twice if there
286 was an exponent loss in the final subtraction */
287 err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow
288 since n is about log(p) */
289 /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */
290 if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 &&
291 MPFR_CAN_ROUND (v, p - err, q, rnd_mode)))
292 break; /* Stop the loop */
293
294 /* Next iteration */
295 MPFR_ZIV_NEXT (loop, p);
296 s = MPFR_PREC2LIMBS (p);
297 }
298 MPFR_ZIV_FREE (loop);
299
300 if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT))
301 != 0))
302 {
303 MPFR_ASSERTN (! mpfr_overflow_p ()); /* since mpfr_clear_flags */
304 MPFR_ASSERTN (! mpfr_underflow_p ()); /* since mpfr_clear_flags */
305 MPFR_ASSERTN (! mpfr_divby0_p ()); /* since mpfr_clear_flags */
306 MPFR_ASSERTN (! mpfr_nanflag_p ()); /* since mpfr_clear_flags */
307 }
308
309 /* Setting of the result */
310 inexact = mpfr_set (r, v, rnd_mode);
311 MPFR_EXP (r) -= scaleop + scaleit;
312
313 /* Let's clean */
314 MPFR_TMP_FREE(marker);
315
316 MPFR_SAVE_EXPO_FREE (expo);
317 /* From the definition of the AGM, underflow and overflow
318 are not possible. */
319 return mpfr_check_range (r, inexact, rnd_mode);
320 /* agm(u,v) can be exact for u, v rational only for u=v.
321 Proof (due to Nicolas Brisebarre): it suffices to consider
322 u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2),
323 and a theorem due to G.V. Chudnovsky states that for x a
324 non-zero algebraic number with |x|<1, then
325 2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically
326 independent over Q. */
327 }
328