hypot.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd) - OpenGrok cross reference for /netbsd-src/external/lgpl3/mpfr/dist/src/hypot.c

/* mpfr_hypot -- Euclidean distance

Copyright 2001-2023 Free Software Foundation, Inc.
Contributed by the AriC and Caramba projects, INRIA.

This file is part of the GNU MPFR Library.

The GNU MPFR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

The GNU MPFR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */

#define MPFR_NEED_LONGLONG_H
#include "mpfr-impl.h"

/* The computation of hypot of x and y is done by  *
 *    hypot(x,y)= sqrt(x^2+y^2) = z                */

int
mpfr_hypot (mpfr_ptr z, mpfr_srcptr x, mpfr_srcptr y, mpfr_rnd_t rnd_mode)
{
  int inexact;
  unsigned int exact;  /* Warning: 0 will mean "exact" */
  mpfr_t t, te, ti; /* auxiliary variables */
  mpfr_prec_t N, Nz; /* size variables */
  mpfr_prec_t Nt;   /* precision of the intermediary variable */
  mpfr_prec_t threshold;
  mpfr_exp_t Ex, sh;
  mpfr_uexp_t diff_exp;

  MPFR_SAVE_EXPO_DECL (expo);
  MPFR_ZIV_DECL (loop);
  MPFR_BLOCK_DECL (flags);

  MPFR_LOG_FUNC
    (("x[%Pd]=%.*Rg y[%Pd]=%.*Rg rnd=%d",
      mpfr_get_prec (x), mpfr_log_prec, x,
      mpfr_get_prec (y), mpfr_log_prec, y, rnd_mode),
     ("z[%Pd]=%.*Rg inexact=%d",
      mpfr_get_prec (z), mpfr_log_prec, z, inexact));

  /* particular cases */
  if (MPFR_ARE_SINGULAR (x, y))
    {
      if (MPFR_IS_INF (x) || MPFR_IS_INF (y))
        {
          /* Return +inf, even when the other number is NaN. */
          MPFR_SET_INF (z);
          MPFR_SET_POS (z);
          MPFR_RET (0);
        }
      else if (MPFR_IS_NAN (x) || MPFR_IS_NAN (y))
        {
          MPFR_SET_NAN (z);
          MPFR_RET_NAN;
        }
      else if (MPFR_IS_ZERO (x))
        return mpfr_abs (z, y, rnd_mode);
      else /* y is necessarily 0 */
        return mpfr_abs (z, x, rnd_mode);
    }

  /* TODO: It may be sufficient to just compare the exponents.
     The error analysis would need to be updated. */
  if (mpfr_cmpabs (x, y) < 0)
    {
      mpfr_srcptr u;
      u = x;
      x = y;
      y = u;
    }

  /* now |x| >= |y| */

  Ex = MPFR_GET_EXP (x);
  diff_exp = (mpfr_uexp_t) Ex - MPFR_GET_EXP (y);

  N = MPFR_PREC (x);   /* Precision of input variable */
  Nz = MPFR_PREC (z);   /* Precision of output variable */
  threshold = (MAX (N, Nz) + (rnd_mode == MPFR_RNDN ? 1 : 0)) << 1;
  if (rnd_mode == MPFR_RNDA)
    rnd_mode = MPFR_RNDU; /* since the result is positive, RNDA = RNDU */

  /* Is |x| a suitable approximation to the precision Nz ?
     (see algorithms.tex for explanations) */
  if (diff_exp > threshold)
    /* result is |x| or |x|+ulp(|x|,Nz) */
    {
      if (MPFR_UNLIKELY (rnd_mode == MPFR_RNDU))
        {
          /* If z > abs(x), then it was already rounded up; otherwise
             z = abs(x), and we need to add one ulp due to y. */
          if (mpfr_abs (z, x, rnd_mode) == 0)
            {
              mpfr_nexttoinf (z);
              /* since mpfr_nexttoinf does not set the overflow flag,
                 we have to check manually for overflow */
              if (MPFR_UNLIKELY (MPFR_IS_INF (z)))
                MPFR_SET_OVERFLOW ();
            }
          MPFR_RET (1);
        }
      else /* MPFR_RNDZ, MPFR_RNDD, MPFR_RNDN */
        {
          if (MPFR_LIKELY (Nz >= N))
            {
              mpfr_abs (z, x, rnd_mode);  /* exact */
              MPFR_RET (-1);
            }
          else
            {
              MPFR_SET_EXP (z, Ex);
              MPFR_SET_SIGN (z, MPFR_SIGN_POS);
              MPFR_RNDRAW_GEN (inexact, z, MPFR_MANT (x), N, rnd_mode, 1,
                               goto addoneulp,
                               if (MPFR_UNLIKELY (++ MPFR_EXP (z) >
                                                  __gmpfr_emax))
                                 return mpfr_overflow (z, rnd_mode, 1);
                               );

              if (MPFR_UNLIKELY (inexact == 0))
                inexact = -1;
              MPFR_RET (inexact);
            }
        }
    }

  /* General case */

  N = MAX (MPFR_PREC (x), MPFR_PREC (y));

  /* working precision */
  Nt = Nz + MPFR_INT_CEIL_LOG2 (Nz) + 4;

  mpfr_init2 (t, Nt);
  mpfr_init2 (te, Nt);
  mpfr_init2 (ti, Nt);

  MPFR_SAVE_EXPO_MARK (expo);

  /* Scale x and y to avoid any overflow and underflow in x^2
     (as |x| >= |y|), and to limit underflow cases for y or y^2.
     We have: x = Mx * 2^Ex with 1/2 <= |Mx| < 1, and we choose:
     sh = floor((Emax - 1) / 2) - Ex.
     Thus (x * 2^sh)^2 = Mx^2 * 2^(2*floor((Emax-1)/2)), which has an
     exponent of at most Emax - 1. Therefore (x * 2^sh)^2 + (y * 2^sh)^2
     will have an exponent of at most Emax, even after rounding as we
     round toward zero. Using a larger sh wouldn't guarantee an absence
     of overflow. Note that the scaling of y can underflow only when the
     target precision is huge, otherwise the case would already have been
     handled by the diff_exp > threshold code; but this case is avoided
     thanks to a FMA (this problem is transferred to the FMA code). */
  sh = (mpfr_get_emax () - 1) / 2 - Ex;

  /* FIXME: ti is subject to underflow. Solution: x and y could be
     aliased with MPFR_ALIAS, and if need be, the aliases be pre-scaled
     exactly as UBF, so that x^2 + y^2 is in range. Then call mpfr_fmma
     and the square root, and scale the result. The error analysis would
     be simpler.
     Note: mpfr_fmma is currently not optimized. */

  MPFR_ZIV_INIT (loop, Nt);
  for (;;)
    {
      mpfr_prec_t err;

      exact = mpfr_mul_2si (te, x, sh, MPFR_RNDZ);
      exact |= mpfr_mul_2si (ti, y, sh, MPFR_RNDZ);
      exact |= mpfr_sqr (te, te, MPFR_RNDZ);
      /* Use fma in order to avoid underflow when diff_exp<=MPFR_EMAX_MAX-2 */
      exact |= mpfr_fma (t, ti, ti, te, MPFR_RNDZ);
      exact |= mpfr_sqrt (t, t, MPFR_RNDZ);

      err = Nt < N ? 4 : 2;
      if (MPFR_LIKELY (exact == 0
                       || MPFR_CAN_ROUND (t, Nt-err, Nz, rnd_mode)))
        break;

      MPFR_ZIV_NEXT (loop, Nt);
      mpfr_set_prec (t, Nt);
      mpfr_set_prec (te, Nt);
      mpfr_set_prec (ti, Nt);
    }
  MPFR_ZIV_FREE (loop);

  MPFR_BLOCK (flags, inexact = mpfr_div_2si (z, t, sh, rnd_mode));
  MPFR_ASSERTD (exact == 0 || inexact != 0 || rnd_mode == MPFR_RNDF);

  mpfr_clear (t);
  mpfr_clear (ti);
  mpfr_clear (te);

  /*
    exact  inexact
    0         0         result is exact, ternary flag is 0
    0       non zero    t is exact, ternary flag given by inexact
    1         0         impossible (see above)
    1       non zero    ternary flag given by inexact
  */

  MPFR_SAVE_EXPO_FREE (expo);

  if (MPFR_OVERFLOW (flags))
    MPFR_SET_OVERFLOW ();
  /* hypot(x,y) >= |x|, thus underflow is not possible. */

  return mpfr_check_range (z, inexact, rnd_mode);
}