xref: /netbsd-src/external/lgpl3/mpfr/dist/src/hypot.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd)
1 /* mpfr_hypot -- Euclidean distance
2 
3 Copyright 2001-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 /* The computation of hypot of x and y is done by  *
27  *    hypot(x,y)= sqrt(x^2+y^2) = z                */
28 
29 int
mpfr_hypot(mpfr_ptr z,mpfr_srcptr x,mpfr_srcptr y,mpfr_rnd_t rnd_mode)30 mpfr_hypot (mpfr_ptr z, mpfr_srcptr x, mpfr_srcptr y, mpfr_rnd_t rnd_mode)
31 {
32   int inexact;
33   unsigned int exact;  /* Warning: 0 will mean "exact" */
34   mpfr_t t, te, ti; /* auxiliary variables */
35   mpfr_prec_t N, Nz; /* size variables */
36   mpfr_prec_t Nt;   /* precision of the intermediary variable */
37   mpfr_prec_t threshold;
38   mpfr_exp_t Ex, sh;
39   mpfr_uexp_t diff_exp;
40 
41   MPFR_SAVE_EXPO_DECL (expo);
42   MPFR_ZIV_DECL (loop);
43   MPFR_BLOCK_DECL (flags);
44 
45   MPFR_LOG_FUNC
46     (("x[%Pd]=%.*Rg y[%Pd]=%.*Rg rnd=%d",
47       mpfr_get_prec (x), mpfr_log_prec, x,
48       mpfr_get_prec (y), mpfr_log_prec, y, rnd_mode),
49      ("z[%Pd]=%.*Rg inexact=%d",
50       mpfr_get_prec (z), mpfr_log_prec, z, inexact));
51 
52   /* particular cases */
53   if (MPFR_ARE_SINGULAR (x, y))
54     {
55       if (MPFR_IS_INF (x) || MPFR_IS_INF (y))
56         {
57           /* Return +inf, even when the other number is NaN. */
58           MPFR_SET_INF (z);
59           MPFR_SET_POS (z);
60           MPFR_RET (0);
61         }
62       else if (MPFR_IS_NAN (x) || MPFR_IS_NAN (y))
63         {
64           MPFR_SET_NAN (z);
65           MPFR_RET_NAN;
66         }
67       else if (MPFR_IS_ZERO (x))
68         return mpfr_abs (z, y, rnd_mode);
69       else /* y is necessarily 0 */
70         return mpfr_abs (z, x, rnd_mode);
71     }
72 
73   /* TODO: It may be sufficient to just compare the exponents.
74      The error analysis would need to be updated. */
75   if (mpfr_cmpabs (x, y) < 0)
76     {
77       mpfr_srcptr u;
78       u = x;
79       x = y;
80       y = u;
81     }
82 
83   /* now |x| >= |y| */
84 
85   Ex = MPFR_GET_EXP (x);
86   diff_exp = (mpfr_uexp_t) Ex - MPFR_GET_EXP (y);
87 
88   N = MPFR_PREC (x);   /* Precision of input variable */
89   Nz = MPFR_PREC (z);   /* Precision of output variable */
90   threshold = (MAX (N, Nz) + (rnd_mode == MPFR_RNDN ? 1 : 0)) << 1;
91   if (rnd_mode == MPFR_RNDA)
92     rnd_mode = MPFR_RNDU; /* since the result is positive, RNDA = RNDU */
93 
94   /* Is |x| a suitable approximation to the precision Nz ?
95      (see algorithms.tex for explanations) */
96   if (diff_exp > threshold)
97     /* result is |x| or |x|+ulp(|x|,Nz) */
98     {
99       if (MPFR_UNLIKELY (rnd_mode == MPFR_RNDU))
100         {
101           /* If z > abs(x), then it was already rounded up; otherwise
102              z = abs(x), and we need to add one ulp due to y. */
103           if (mpfr_abs (z, x, rnd_mode) == 0)
104             {
105               mpfr_nexttoinf (z);
106               /* since mpfr_nexttoinf does not set the overflow flag,
107                  we have to check manually for overflow */
108               if (MPFR_UNLIKELY (MPFR_IS_INF (z)))
109                 MPFR_SET_OVERFLOW ();
110             }
111           MPFR_RET (1);
112         }
113       else /* MPFR_RNDZ, MPFR_RNDD, MPFR_RNDN */
114         {
115           if (MPFR_LIKELY (Nz >= N))
116             {
117               mpfr_abs (z, x, rnd_mode);  /* exact */
118               MPFR_RET (-1);
119             }
120           else
121             {
122               MPFR_SET_EXP (z, Ex);
123               MPFR_SET_SIGN (z, MPFR_SIGN_POS);
124               MPFR_RNDRAW_GEN (inexact, z, MPFR_MANT (x), N, rnd_mode, 1,
125                                goto addoneulp,
126                                if (MPFR_UNLIKELY (++ MPFR_EXP (z) >
127                                                   __gmpfr_emax))
128                                  return mpfr_overflow (z, rnd_mode, 1);
129                                );
130 
131               if (MPFR_UNLIKELY (inexact == 0))
132                 inexact = -1;
133               MPFR_RET (inexact);
134             }
135         }
136     }
137 
138   /* General case */
139 
140   N = MAX (MPFR_PREC (x), MPFR_PREC (y));
141 
142   /* working precision */
143   Nt = Nz + MPFR_INT_CEIL_LOG2 (Nz) + 4;
144 
145   mpfr_init2 (t, Nt);
146   mpfr_init2 (te, Nt);
147   mpfr_init2 (ti, Nt);
148 
149   MPFR_SAVE_EXPO_MARK (expo);
150 
151   /* Scale x and y to avoid any overflow and underflow in x^2
152      (as |x| >= |y|), and to limit underflow cases for y or y^2.
153      We have: x = Mx * 2^Ex with 1/2 <= |Mx| < 1, and we choose:
154      sh = floor((Emax - 1) / 2) - Ex.
155      Thus (x * 2^sh)^2 = Mx^2 * 2^(2*floor((Emax-1)/2)), which has an
156      exponent of at most Emax - 1. Therefore (x * 2^sh)^2 + (y * 2^sh)^2
157      will have an exponent of at most Emax, even after rounding as we
158      round toward zero. Using a larger sh wouldn't guarantee an absence
159      of overflow. Note that the scaling of y can underflow only when the
160      target precision is huge, otherwise the case would already have been
161      handled by the diff_exp > threshold code; but this case is avoided
162      thanks to a FMA (this problem is transferred to the FMA code). */
163   sh = (mpfr_get_emax () - 1) / 2 - Ex;
164 
165   /* FIXME: ti is subject to underflow. Solution: x and y could be
166      aliased with MPFR_ALIAS, and if need be, the aliases be pre-scaled
167      exactly as UBF, so that x^2 + y^2 is in range. Then call mpfr_fmma
168      and the square root, and scale the result. The error analysis would
169      be simpler.
170      Note: mpfr_fmma is currently not optimized. */
171 
172   MPFR_ZIV_INIT (loop, Nt);
173   for (;;)
174     {
175       mpfr_prec_t err;
176 
177       exact = mpfr_mul_2si (te, x, sh, MPFR_RNDZ);
178       exact |= mpfr_mul_2si (ti, y, sh, MPFR_RNDZ);
179       exact |= mpfr_sqr (te, te, MPFR_RNDZ);
180       /* Use fma in order to avoid underflow when diff_exp<=MPFR_EMAX_MAX-2 */
181       exact |= mpfr_fma (t, ti, ti, te, MPFR_RNDZ);
182       exact |= mpfr_sqrt (t, t, MPFR_RNDZ);
183 
184       err = Nt < N ? 4 : 2;
185       if (MPFR_LIKELY (exact == 0
186                        || MPFR_CAN_ROUND (t, Nt-err, Nz, rnd_mode)))
187         break;
188 
189       MPFR_ZIV_NEXT (loop, Nt);
190       mpfr_set_prec (t, Nt);
191       mpfr_set_prec (te, Nt);
192       mpfr_set_prec (ti, Nt);
193     }
194   MPFR_ZIV_FREE (loop);
195 
196   MPFR_BLOCK (flags, inexact = mpfr_div_2si (z, t, sh, rnd_mode));
197   MPFR_ASSERTD (exact == 0 || inexact != 0 || rnd_mode == MPFR_RNDF);
198 
199   mpfr_clear (t);
200   mpfr_clear (ti);
201   mpfr_clear (te);
202 
203   /*
204     exact  inexact
205     0         0         result is exact, ternary flag is 0
206     0       non zero    t is exact, ternary flag given by inexact
207     1         0         impossible (see above)
208     1       non zero    ternary flag given by inexact
209   */
210 
211   MPFR_SAVE_EXPO_FREE (expo);
212 
213   if (MPFR_OVERFLOW (flags))
214     MPFR_SET_OVERFLOW ();
215   /* hypot(x,y) >= |x|, thus underflow is not possible. */
216 
217   return mpfr_check_range (z, inexact, rnd_mode);
218 }
219