1 /* mpfr_hypot -- Euclidean distance 2 3 Copyright 2001-2023 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramba projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* The computation of hypot of x and y is done by * 27 * hypot(x,y)= sqrt(x^2+y^2) = z */ 28 29 int 30 mpfr_hypot (mpfr_ptr z, mpfr_srcptr x, mpfr_srcptr y, mpfr_rnd_t rnd_mode) 31 { 32 int inexact; 33 unsigned int exact; /* Warning: 0 will mean "exact" */ 34 mpfr_t t, te, ti; /* auxiliary variables */ 35 mpfr_prec_t N, Nz; /* size variables */ 36 mpfr_prec_t Nt; /* precision of the intermediary variable */ 37 mpfr_prec_t threshold; 38 mpfr_exp_t Ex, sh; 39 mpfr_uexp_t diff_exp; 40 41 MPFR_SAVE_EXPO_DECL (expo); 42 MPFR_ZIV_DECL (loop); 43 MPFR_BLOCK_DECL (flags); 44 45 MPFR_LOG_FUNC 46 (("x[%Pd]=%.*Rg y[%Pd]=%.*Rg rnd=%d", 47 mpfr_get_prec (x), mpfr_log_prec, x, 48 mpfr_get_prec (y), mpfr_log_prec, y, rnd_mode), 49 ("z[%Pd]=%.*Rg inexact=%d", 50 mpfr_get_prec (z), mpfr_log_prec, z, inexact)); 51 52 /* particular cases */ 53 if (MPFR_ARE_SINGULAR (x, y)) 54 { 55 if (MPFR_IS_INF (x) || MPFR_IS_INF (y)) 56 { 57 /* Return +inf, even when the other number is NaN. */ 58 MPFR_SET_INF (z); 59 MPFR_SET_POS (z); 60 MPFR_RET (0); 61 } 62 else if (MPFR_IS_NAN (x) || MPFR_IS_NAN (y)) 63 { 64 MPFR_SET_NAN (z); 65 MPFR_RET_NAN; 66 } 67 else if (MPFR_IS_ZERO (x)) 68 return mpfr_abs (z, y, rnd_mode); 69 else /* y is necessarily 0 */ 70 return mpfr_abs (z, x, rnd_mode); 71 } 72 73 /* TODO: It may be sufficient to just compare the exponents. 74 The error analysis would need to be updated. */ 75 if (mpfr_cmpabs (x, y) < 0) 76 { 77 mpfr_srcptr u; 78 u = x; 79 x = y; 80 y = u; 81 } 82 83 /* now |x| >= |y| */ 84 85 Ex = MPFR_GET_EXP (x); 86 diff_exp = (mpfr_uexp_t) Ex - MPFR_GET_EXP (y); 87 88 N = MPFR_PREC (x); /* Precision of input variable */ 89 Nz = MPFR_PREC (z); /* Precision of output variable */ 90 threshold = (MAX (N, Nz) + (rnd_mode == MPFR_RNDN ? 1 : 0)) << 1; 91 if (rnd_mode == MPFR_RNDA) 92 rnd_mode = MPFR_RNDU; /* since the result is positive, RNDA = RNDU */ 93 94 /* Is |x| a suitable approximation to the precision Nz ? 95 (see algorithms.tex for explanations) */ 96 if (diff_exp > threshold) 97 /* result is |x| or |x|+ulp(|x|,Nz) */ 98 { 99 if (MPFR_UNLIKELY (rnd_mode == MPFR_RNDU)) 100 { 101 /* If z > abs(x), then it was already rounded up; otherwise 102 z = abs(x), and we need to add one ulp due to y. */ 103 if (mpfr_abs (z, x, rnd_mode) == 0) 104 { 105 mpfr_nexttoinf (z); 106 /* since mpfr_nexttoinf does not set the overflow flag, 107 we have to check manually for overflow */ 108 if (MPFR_UNLIKELY (MPFR_IS_INF (z))) 109 MPFR_SET_OVERFLOW (); 110 } 111 MPFR_RET (1); 112 } 113 else /* MPFR_RNDZ, MPFR_RNDD, MPFR_RNDN */ 114 { 115 if (MPFR_LIKELY (Nz >= N)) 116 { 117 mpfr_abs (z, x, rnd_mode); /* exact */ 118 MPFR_RET (-1); 119 } 120 else 121 { 122 MPFR_SET_EXP (z, Ex); 123 MPFR_SET_SIGN (z, MPFR_SIGN_POS); 124 MPFR_RNDRAW_GEN (inexact, z, MPFR_MANT (x), N, rnd_mode, 1, 125 goto addoneulp, 126 if (MPFR_UNLIKELY (++ MPFR_EXP (z) > 127 __gmpfr_emax)) 128 return mpfr_overflow (z, rnd_mode, 1); 129 ); 130 131 if (MPFR_UNLIKELY (inexact == 0)) 132 inexact = -1; 133 MPFR_RET (inexact); 134 } 135 } 136 } 137 138 /* General case */ 139 140 N = MAX (MPFR_PREC (x), MPFR_PREC (y)); 141 142 /* working precision */ 143 Nt = Nz + MPFR_INT_CEIL_LOG2 (Nz) + 4; 144 145 mpfr_init2 (t, Nt); 146 mpfr_init2 (te, Nt); 147 mpfr_init2 (ti, Nt); 148 149 MPFR_SAVE_EXPO_MARK (expo); 150 151 /* Scale x and y to avoid any overflow and underflow in x^2 152 (as |x| >= |y|), and to limit underflow cases for y or y^2. 153 We have: x = Mx * 2^Ex with 1/2 <= |Mx| < 1, and we choose: 154 sh = floor((Emax - 1) / 2) - Ex. 155 Thus (x * 2^sh)^2 = Mx^2 * 2^(2*floor((Emax-1)/2)), which has an 156 exponent of at most Emax - 1. Therefore (x * 2^sh)^2 + (y * 2^sh)^2 157 will have an exponent of at most Emax, even after rounding as we 158 round toward zero. Using a larger sh wouldn't guarantee an absence 159 of overflow. Note that the scaling of y can underflow only when the 160 target precision is huge, otherwise the case would already have been 161 handled by the diff_exp > threshold code; but this case is avoided 162 thanks to a FMA (this problem is transferred to the FMA code). */ 163 sh = (mpfr_get_emax () - 1) / 2 - Ex; 164 165 /* FIXME: ti is subject to underflow. Solution: x and y could be 166 aliased with MPFR_ALIAS, and if need be, the aliases be pre-scaled 167 exactly as UBF, so that x^2 + y^2 is in range. Then call mpfr_fmma 168 and the square root, and scale the result. The error analysis would 169 be simpler. 170 Note: mpfr_fmma is currently not optimized. */ 171 172 MPFR_ZIV_INIT (loop, Nt); 173 for (;;) 174 { 175 mpfr_prec_t err; 176 177 exact = mpfr_mul_2si (te, x, sh, MPFR_RNDZ); 178 exact |= mpfr_mul_2si (ti, y, sh, MPFR_RNDZ); 179 exact |= mpfr_sqr (te, te, MPFR_RNDZ); 180 /* Use fma in order to avoid underflow when diff_exp<=MPFR_EMAX_MAX-2 */ 181 exact |= mpfr_fma (t, ti, ti, te, MPFR_RNDZ); 182 exact |= mpfr_sqrt (t, t, MPFR_RNDZ); 183 184 err = Nt < N ? 4 : 2; 185 if (MPFR_LIKELY (exact == 0 186 || MPFR_CAN_ROUND (t, Nt-err, Nz, rnd_mode))) 187 break; 188 189 MPFR_ZIV_NEXT (loop, Nt); 190 mpfr_set_prec (t, Nt); 191 mpfr_set_prec (te, Nt); 192 mpfr_set_prec (ti, Nt); 193 } 194 MPFR_ZIV_FREE (loop); 195 196 MPFR_BLOCK (flags, inexact = mpfr_div_2si (z, t, sh, rnd_mode)); 197 MPFR_ASSERTD (exact == 0 || inexact != 0 || rnd_mode == MPFR_RNDF); 198 199 mpfr_clear (t); 200 mpfr_clear (ti); 201 mpfr_clear (te); 202 203 /* 204 exact inexact 205 0 0 result is exact, ternary flag is 0 206 0 non zero t is exact, ternary flag given by inexact 207 1 0 impossible (see above) 208 1 non zero ternary flag given by inexact 209 */ 210 211 MPFR_SAVE_EXPO_FREE (expo); 212 213 if (MPFR_OVERFLOW (flags)) 214 MPFR_SET_OVERFLOW (); 215 /* hypot(x,y) >= |x|, thus underflow is not possible. */ 216 217 return mpfr_check_range (z, inexact, rnd_mode); 218 } 219