1 //===-- Square root of x86 long double numbers ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_SQRT_80_BIT_LONG_DOUBLE_H 10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_SQRT_80_BIT_LONG_DOUBLE_H 11 12 #include "src/__support/CPP/bit.h" 13 #include "src/__support/FPUtil/FEnvImpl.h" 14 #include "src/__support/FPUtil/FPBits.h" 15 #include "src/__support/FPUtil/rounding_mode.h" 16 #include "src/__support/common.h" 17 #include "src/__support/macros/config.h" 18 #include "src/__support/uint128.h" 19 20 namespace LIBC_NAMESPACE_DECL { 21 namespace fputil { 22 namespace x86 { 23 24 LIBC_INLINE void normalize(int &exponent, 25 FPBits<long double>::StorageType &mantissa) { 26 const unsigned int shift = static_cast<unsigned int>( 27 cpp::countl_zero(static_cast<uint64_t>(mantissa)) - 28 (8 * sizeof(uint64_t) - 1 - FPBits<long double>::FRACTION_LEN)); 29 exponent -= shift; 30 mantissa <<= shift; 31 } 32 33 // if constexpr statement in sqrt.h still requires x86::sqrt to be declared 34 // even when it's not used. 35 LIBC_INLINE long double sqrt(long double x); 36 37 // Correctly rounded SQRT for all rounding modes. 38 // Shift-and-add algorithm. 39 #if defined(LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80) 40 LIBC_INLINE long double sqrt(long double x) { 41 using LDBits = FPBits<long double>; 42 using StorageType = typename LDBits::StorageType; 43 constexpr StorageType ONE = StorageType(1) << int(LDBits::FRACTION_LEN); 44 constexpr auto LDNAN = LDBits::quiet_nan().get_val(); 45 46 LDBits bits(x); 47 48 if (bits == LDBits::inf(Sign::POS) || bits.is_zero() || bits.is_nan()) { 49 // sqrt(+Inf) = +Inf 50 // sqrt(+0) = +0 51 // sqrt(-0) = -0 52 // sqrt(NaN) = NaN 53 // sqrt(-NaN) = -NaN 54 return x; 55 } else if (bits.is_neg()) { 56 // sqrt(-Inf) = NaN 57 // sqrt(-x) = NaN 58 return LDNAN; 59 } else { 60 int x_exp = bits.get_explicit_exponent(); 61 StorageType x_mant = bits.get_mantissa(); 62 63 // Step 1a: Normalize denormal input 64 if (bits.get_implicit_bit()) { 65 x_mant |= ONE; 66 } else if (bits.is_subnormal()) { 67 normalize(x_exp, x_mant); 68 } 69 70 // Step 1b: Make sure the exponent is even. 71 if (x_exp & 1) { 72 --x_exp; 73 x_mant <<= 1; 74 } 75 76 // After step 1b, x = 2^(x_exp) * x_mant, where x_exp is even, and 77 // 1 <= x_mant < 4. So sqrt(x) = 2^(x_exp / 2) * y, with 1 <= y < 2. 78 // Notice that the output of sqrt is always in the normal range. 79 // To perform shift-and-add algorithm to find y, let denote: 80 // y(n) = 1.y_1 y_2 ... y_n, we can define the nth residue to be: 81 // r(n) = 2^n ( x_mant - y(n)^2 ). 82 // That leads to the following recurrence formula: 83 // r(n) = 2*r(n-1) - y_n*[ 2*y(n-1) + 2^(-n-1) ] 84 // with the initial conditions: y(0) = 1, and r(0) = x - 1. 85 // So the nth digit y_n of the mantissa of sqrt(x) can be found by: 86 // y_n = 1 if 2*r(n-1) >= 2*y(n - 1) + 2^(-n-1) 87 // 0 otherwise. 88 StorageType y = ONE; 89 StorageType r = x_mant - ONE; 90 91 for (StorageType current_bit = ONE >> 1; current_bit; current_bit >>= 1) { 92 r <<= 1; 93 StorageType tmp = (y << 1) + current_bit; // 2*y(n - 1) + 2^(-n-1) 94 if (r >= tmp) { 95 r -= tmp; 96 y += current_bit; 97 } 98 } 99 100 // We compute one more iteration in order to round correctly. 101 bool lsb = static_cast<bool>(y & 1); // Least significant bit 102 bool rb = false; // Round bit 103 r <<= 2; 104 StorageType tmp = (y << 2) + 1; 105 if (r >= tmp) { 106 r -= tmp; 107 rb = true; 108 } 109 110 // Append the exponent field. 111 x_exp = ((x_exp >> 1) + LDBits::EXP_BIAS); 112 y |= (static_cast<StorageType>(x_exp) << (LDBits::FRACTION_LEN + 1)); 113 114 switch (quick_get_round()) { 115 case FE_TONEAREST: 116 // Round to nearest, ties to even 117 if (rb && (lsb || (r != 0))) 118 ++y; 119 break; 120 case FE_UPWARD: 121 if (rb || (r != 0)) 122 ++y; 123 break; 124 } 125 126 // Extract output 127 FPBits<long double> out(0.0L); 128 out.set_biased_exponent(x_exp); 129 out.set_implicit_bit(1); 130 out.set_mantissa((y & (ONE - 1))); 131 132 return out.get_val(); 133 } 134 } 135 #endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 136 137 } // namespace x86 138 } // namespace fputil 139 } // namespace LIBC_NAMESPACE_DECL 140 141 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_SQRT_80_BIT_LONG_DOUBLE_H 142