1bbb75554SSiva Chandra //===-- Implementation of hypotf function ---------------------------------===// 2bbb75554SSiva Chandra // 3bbb75554SSiva Chandra // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4bbb75554SSiva Chandra // See https://llvm.org/LICENSE.txt for license information. 5bbb75554SSiva Chandra // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6bbb75554SSiva Chandra // 7bbb75554SSiva Chandra //===----------------------------------------------------------------------===// 8bbb75554SSiva Chandra #include "src/math/hypotf.h" 9*9da9127fSlntue #include "src/__support/FPUtil/FEnvImpl.h" 10f1ec99f9STue Ly #include "src/__support/FPUtil/FPBits.h" 11*9da9127fSlntue #include "src/__support/FPUtil/double_double.h" 12*9da9127fSlntue #include "src/__support/FPUtil/multiply_add.h" 13f1ec99f9STue Ly #include "src/__support/FPUtil/sqrt.h" 14bbb75554SSiva Chandra #include "src/__support/common.h" 155ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 16*9da9127fSlntue #include "src/__support/macros/optimization.h" 17bbb75554SSiva Chandra 185ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 19bbb75554SSiva Chandra 20bbb75554SSiva Chandra LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) { 21f1ec99f9STue Ly using DoubleBits = fputil::FPBits<double>; 22f1ec99f9STue Ly using FPBits = fputil::FPBits<float>; 23f1ec99f9STue Ly 24*9da9127fSlntue FPBits x_abs = FPBits(x).abs(); 25*9da9127fSlntue FPBits y_abs = FPBits(y).abs(); 26f1ec99f9STue Ly 27*9da9127fSlntue bool x_abs_larger = x_abs.uintval() >= y_abs.uintval(); 28f1ec99f9STue Ly 29*9da9127fSlntue FPBits a_bits = x_abs_larger ? x_abs : y_abs; 30*9da9127fSlntue FPBits b_bits = x_abs_larger ? y_abs : x_abs; 31*9da9127fSlntue 32*9da9127fSlntue uint32_t a_u = a_bits.uintval(); 33*9da9127fSlntue uint32_t b_u = b_bits.uintval(); 34*9da9127fSlntue 35*9da9127fSlntue // Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()` 36*9da9127fSlntue // generates extra exponent bit masking instructions on x86-64. 37*9da9127fSlntue if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) { 38*9da9127fSlntue // x or y is inf or nan 39*9da9127fSlntue if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) { 40*9da9127fSlntue fputil::raise_except_if_required(FE_INVALID); 41*9da9127fSlntue return FPBits::quiet_nan().get_val(); 42*9da9127fSlntue } 43*9da9127fSlntue if (a_bits.is_inf() || b_bits.is_inf()) 44*9da9127fSlntue return FPBits::inf().get_val(); 45*9da9127fSlntue return a_bits.get_val(); 46f1ec99f9STue Ly } 47f1ec99f9STue Ly 48*9da9127fSlntue if (LIBC_UNLIKELY(a_u - b_u >= 49*9da9127fSlntue static_cast<uint32_t>((FPBits::FRACTION_LEN + 2) 50*9da9127fSlntue << FPBits::FRACTION_LEN))) 51*9da9127fSlntue return x_abs.get_val() + y_abs.get_val(); 52*9da9127fSlntue 53*9da9127fSlntue double ad = static_cast<double>(a_bits.get_val()); 54*9da9127fSlntue double bd = static_cast<double>(b_bits.get_val()); 55f1ec99f9STue Ly 56f1ec99f9STue Ly // These squares are exact. 57*9da9127fSlntue double a_sq = ad * ad; 58*9da9127fSlntue #ifdef LIBC_TARGET_CPU_HAS_FMA 59*9da9127fSlntue double sum_sq = fputil::multiply_add(bd, bd, a_sq); 60*9da9127fSlntue #else 61*9da9127fSlntue double b_sq = bd * bd; 62*9da9127fSlntue double sum_sq = a_sq + b_sq; 63*9da9127fSlntue #endif 64f1ec99f9STue Ly 65f1ec99f9STue Ly // Take sqrt in double precision. 66a2393435SOverMighty DoubleBits result(fputil::sqrt<double>(sum_sq)); 67*9da9127fSlntue uint64_t r_u = result.uintval(); 68f1ec99f9STue Ly 69*9da9127fSlntue // If any of the sticky bits of the result are non-zero, except the LSB, then 70*9da9127fSlntue // the rounded result is correct. 71*9da9127fSlntue if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0000'0FFF'FFFE) == 0)) { 72*9da9127fSlntue double r_d = result.get_val(); 73f1ec99f9STue Ly 74*9da9127fSlntue // Perform rounding correction. 75*9da9127fSlntue #ifdef LIBC_TARGET_CPU_HAS_FMA 76*9da9127fSlntue double sum_sq_lo = fputil::multiply_add(bd, bd, a_sq - sum_sq); 77*9da9127fSlntue double err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq); 78*9da9127fSlntue #else 79*9da9127fSlntue fputil::DoubleDouble r_sq = fputil::exact_mult(r_d, r_d); 80*9da9127fSlntue double sum_sq_lo = b_sq - (sum_sq - a_sq); 81*9da9127fSlntue double err = (sum_sq - r_sq.hi) + (sum_sq_lo - r_sq.lo); 82*9da9127fSlntue #endif 83*9da9127fSlntue 84*9da9127fSlntue if (err > 0) { 85*9da9127fSlntue r_u |= 1; 86*9da9127fSlntue } else if ((err < 0) && (r_u & 1) == 0) { 87*9da9127fSlntue r_u -= 1; 88*9da9127fSlntue } else if ((r_u & 0x0000'0000'1FFF'FFFF) == 0) { 89*9da9127fSlntue // The rounded result is exact. 90*9da9127fSlntue fputil::clear_except_if_required(FE_INEXACT); 91f1ec99f9STue Ly } 92*9da9127fSlntue return static_cast<float>(DoubleBits(r_u).get_val()); 93f1ec99f9STue Ly } 94f1ec99f9STue Ly 952856db0dSGuillaume Chatelet return static_cast<float>(result.get_val()); 96bbb75554SSiva Chandra } 97bbb75554SSiva Chandra 985ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 99