17fc9fb9fSlntue //===-- Implementation of cbrt function -----------------------------------===// 27fc9fb9fSlntue // 37fc9fb9fSlntue // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 47fc9fb9fSlntue // See https://llvm.org/LICENSE.txt for license information. 57fc9fb9fSlntue // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 67fc9fb9fSlntue // 77fc9fb9fSlntue //===----------------------------------------------------------------------===// 87fc9fb9fSlntue 97fc9fb9fSlntue #include "src/math/cbrt.h" 107fc9fb9fSlntue #include "hdr/fenv_macros.h" 117fc9fb9fSlntue #include "src/__support/FPUtil/FEnvImpl.h" 127fc9fb9fSlntue #include "src/__support/FPUtil/FPBits.h" 137fc9fb9fSlntue #include "src/__support/FPUtil/PolyEval.h" 147fc9fb9fSlntue #include "src/__support/FPUtil/double_double.h" 157fc9fb9fSlntue #include "src/__support/FPUtil/dyadic_float.h" 167fc9fb9fSlntue #include "src/__support/FPUtil/multiply_add.h" 177fc9fb9fSlntue #include "src/__support/common.h" 187fc9fb9fSlntue #include "src/__support/integer_literals.h" 197fc9fb9fSlntue #include "src/__support/macros/config.h" 207fc9fb9fSlntue #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY 217fc9fb9fSlntue 227fc9fb9fSlntue #if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0) 237fc9fb9fSlntue #define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS 247fc9fb9fSlntue #endif 257fc9fb9fSlntue 267fc9fb9fSlntue namespace LIBC_NAMESPACE_DECL { 277fc9fb9fSlntue 287fc9fb9fSlntue using DoubleDouble = fputil::DoubleDouble; 297fc9fb9fSlntue using Float128 = fputil::DyadicFloat<128>; 307fc9fb9fSlntue 317fc9fb9fSlntue namespace { 327fc9fb9fSlntue 337fc9fb9fSlntue // Initial approximation of x^(-2/3) for 1 <= x < 2. 347fc9fb9fSlntue // Polynomial generated by Sollya with: 357fc9fb9fSlntue // > P = fpminimax(x^(-2/3), 7, [|D...|], [1, 2]); 367fc9fb9fSlntue // > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]); 377fc9fb9fSlntue // 0x1.28...p-21 38ac1d5facSlntue double intial_approximation(double x) { 397fc9fb9fSlntue constexpr double COEFFS[8] = { 407fc9fb9fSlntue 0x1.bc52aedead5c6p1, -0x1.b52bfebf110b3p2, 0x1.1d8d71d53d126p3, 417fc9fb9fSlntue -0x1.de2db9e81cf87p2, 0x1.0154ca06153bdp2, -0x1.5973c66ee6da7p0, 427fc9fb9fSlntue 0x1.07bf6ac832552p-2, -0x1.5e53d9ce41cb8p-6, 437fc9fb9fSlntue }; 447fc9fb9fSlntue 457fc9fb9fSlntue double x_sq = x * x; 467fc9fb9fSlntue 477fc9fb9fSlntue double c0 = fputil::multiply_add(x, COEFFS[1], COEFFS[0]); 487fc9fb9fSlntue double c1 = fputil::multiply_add(x, COEFFS[3], COEFFS[2]); 497fc9fb9fSlntue double c2 = fputil::multiply_add(x, COEFFS[5], COEFFS[4]); 507fc9fb9fSlntue double c3 = fputil::multiply_add(x, COEFFS[7], COEFFS[6]); 517fc9fb9fSlntue 527fc9fb9fSlntue double x_4 = x_sq * x_sq; 537fc9fb9fSlntue double d0 = fputil::multiply_add(x_sq, c1, c0); 547fc9fb9fSlntue double d1 = fputil::multiply_add(x_sq, c3, c2); 557fc9fb9fSlntue 567fc9fb9fSlntue return fputil::multiply_add(x_4, d1, d0); 577fc9fb9fSlntue } 587fc9fb9fSlntue 597fc9fb9fSlntue // Get the error term for Newton iteration: 607fc9fb9fSlntue // h(x) = x^3 * a^2 - 1, 617fc9fb9fSlntue #ifdef LIBC_TARGET_CPU_HAS_FMA 62ac1d5facSlntue double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { 637fc9fb9fSlntue return fputil::multiply_add(x_3.hi, a_sq.hi, -1.0) + 647fc9fb9fSlntue fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo); 657fc9fb9fSlntue } 667fc9fb9fSlntue #else 67ac1d5facSlntue double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { 687fc9fb9fSlntue DoubleDouble x_3_a_sq = fputil::quick_mult(a_sq, x_3); 697fc9fb9fSlntue return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo; 707fc9fb9fSlntue } 717fc9fb9fSlntue #endif 727fc9fb9fSlntue 737fc9fb9fSlntue } // anonymous namespace 747fc9fb9fSlntue 757fc9fb9fSlntue // Correctly rounded cbrt algorithm: 767fc9fb9fSlntue // 777fc9fb9fSlntue // === Step 1 - Range reduction === 787fc9fb9fSlntue // For x = (-1)^s * 2^e * (1.m), we get 2 reduced arguments x_r and a as: 797fc9fb9fSlntue // x_r = 1.m 807fc9fb9fSlntue // a = (-1)^s * 2^(e % 3) * (1.m) 817fc9fb9fSlntue // Then cbrt(x) = x^(1/3) can be computed as: 827fc9fb9fSlntue // x^(1/3) = 2^(e / 3) * a^(1/3). 837fc9fb9fSlntue // 847fc9fb9fSlntue // In order to avoid division, we compute a^(-2/3) using Newton method and then 857fc9fb9fSlntue // multiply the results by a: 867fc9fb9fSlntue // a^(1/3) = a * a^(-2/3). 877fc9fb9fSlntue // 887fc9fb9fSlntue // === Step 2 - First approximation to a^(-2/3) === 897fc9fb9fSlntue // First, we use a degree-7 minimax polynomial generated by Sollya to 907fc9fb9fSlntue // approximate x_r^(-2/3) for 1 <= x_r < 2. 917fc9fb9fSlntue // p = P(x_r) ~ x_r^(-2/3), 927fc9fb9fSlntue // with relative errors bounded by: 937fc9fb9fSlntue // | p / x_r^(-2/3) - 1 | < 1.16 * 2^-21. 947fc9fb9fSlntue // 957fc9fb9fSlntue // Then we multiply with 2^(e % 3) from a small lookup table to get: 967fc9fb9fSlntue // x_0 = 2^(-2*(e % 3)/3) * p 977fc9fb9fSlntue // ~ 2^(-2*(e % 3)/3) * x_r^(-2/3) 987fc9fb9fSlntue // = a^(-2/3) 997fc9fb9fSlntue // With relative errors: 1007fc9fb9fSlntue // | x_0 / a^(-2/3) - 1 | < 1.16 * 2^-21. 1017fc9fb9fSlntue // This step is done in double precision. 1027fc9fb9fSlntue // 1037fc9fb9fSlntue // === Step 3 - First Newton iteration === 1047fc9fb9fSlntue // We follow the method described in: 1057fc9fb9fSlntue // Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation 1067fc9fb9fSlntue // in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf 1077fc9fb9fSlntue // to derive multiplicative Newton iterations as below: 1087fc9fb9fSlntue // Let x_n be the nth approximation to a^(-2/3). Define the n^th error as: 1097fc9fb9fSlntue // h_n = x_n^3 * a^2 - 1 1107fc9fb9fSlntue // Then: 1117fc9fb9fSlntue // a^(-2/3) = x_n / (1 + h_n)^(1/3) 1127fc9fb9fSlntue // = x_n * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...) 1137fc9fb9fSlntue // using the Taylor series expansion of (1 + h_n)^(-1/3). 1147fc9fb9fSlntue // 1157fc9fb9fSlntue // Apply to x_0 above: 1167fc9fb9fSlntue // h_0 = x_0^3 * a^2 - 1 1177fc9fb9fSlntue // = a^2 * (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)), 1187fc9fb9fSlntue // it's bounded by: 1197fc9fb9fSlntue // |h_0| < 4 * 3 * 1.16 * 2^-21 * 4 < 2^-17. 1207fc9fb9fSlntue // So in the first iteration step, we use: 1217fc9fb9fSlntue // x_1 = x_0 * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3) 1227fc9fb9fSlntue // Its relative error is bounded by: 1237fc9fb9fSlntue // | x_1 / a^(-2/3) - 1 | < 35/242 * |h_0|^4 < 2^-70. 1247fc9fb9fSlntue // Then we perform Ziv's rounding test and check if the answer is exact. 1257fc9fb9fSlntue // This step is done in double-double precision. 1267fc9fb9fSlntue // 1277fc9fb9fSlntue // === Step 4 - Second Newton iteration === 1287fc9fb9fSlntue // If the Ziv's rounding test from the previous step fails, we define the error 1297fc9fb9fSlntue // term: 1307fc9fb9fSlntue // h_1 = x_1^3 * a^2 - 1, 1317fc9fb9fSlntue // And perform another iteration: 1327fc9fb9fSlntue // x_2 = x_1 * (1 - h_1 / 3) 1337fc9fb9fSlntue // with the relative errors exceed the precision of double-double. 1347fc9fb9fSlntue // We then check the Ziv's accuracy test with relative errors < 2^-102 to 1357fc9fb9fSlntue // compensate for rounding errors. 1367fc9fb9fSlntue // 1377fc9fb9fSlntue // === Step 5 - Final iteration === 1387fc9fb9fSlntue // If the Ziv's accuracy test from the previous step fails, we perform another 1397fc9fb9fSlntue // iteration in 128-bit precision and check for exact outputs. 1407fc9fb9fSlntue // 1417fc9fb9fSlntue // TODO: It is possible to replace this costly computation step with special 1427fc9fb9fSlntue // exceptional handling, similar to what was done in the CORE-MATH project: 1437fc9fb9fSlntue // https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c 1447fc9fb9fSlntue 1457fc9fb9fSlntue LLVM_LIBC_FUNCTION(double, cbrt, (double x)) { 1467fc9fb9fSlntue using FPBits = fputil::FPBits<double>; 1477fc9fb9fSlntue 1487fc9fb9fSlntue uint64_t x_abs = FPBits(x).abs().uintval(); 1497fc9fb9fSlntue 1507fc9fb9fSlntue unsigned exp_bias_correction = 682; // 1023 * 2/3 1517fc9fb9fSlntue 1527fc9fb9fSlntue if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() || 1537fc9fb9fSlntue x_abs >= FPBits::inf().uintval())) { 154*0f4b3c40Slntue if (x == 0.0 || x_abs >= FPBits::inf().uintval()) 1557fc9fb9fSlntue // x is 0, Inf, or NaN. 156*0f4b3c40Slntue // Make sure it works for FTZ/DAZ modes. 157*0f4b3c40Slntue return static_cast<double>(x + x); 1587fc9fb9fSlntue 1597fc9fb9fSlntue // x is non-zero denormal number. 1607fc9fb9fSlntue // Normalize x. 1617fc9fb9fSlntue x *= 0x1.0p60; 1627fc9fb9fSlntue exp_bias_correction -= 20; 1637fc9fb9fSlntue } 1647fc9fb9fSlntue 1657fc9fb9fSlntue FPBits x_bits(x); 1667fc9fb9fSlntue 1677fc9fb9fSlntue // When using biased exponent of x in double precision, 1687fc9fb9fSlntue // x_e = real_exponent_of_x + 1023 1697fc9fb9fSlntue // Then: 1707fc9fb9fSlntue // x_e / 3 = real_exponent_of_x / 3 + 1023/3 1717fc9fb9fSlntue // = real_exponent_of_x / 3 + 341 1727fc9fb9fSlntue // So to make it the correct biased exponent of x^(1/3), we add 1737fc9fb9fSlntue // 1023 - 341 = 682 1747fc9fb9fSlntue // to the quotient x_e / 3. 1757fc9fb9fSlntue unsigned x_e = static_cast<unsigned>(x_bits.get_biased_exponent()); 1767fc9fb9fSlntue unsigned out_e = (x_e / 3 + exp_bias_correction); 1777fc9fb9fSlntue unsigned shift_e = x_e % 3; 1787fc9fb9fSlntue 1797fc9fb9fSlntue // Set x_r = 1.mantissa 1807fc9fb9fSlntue double x_r = 1817fc9fb9fSlntue FPBits(x_bits.get_mantissa() | 1827fc9fb9fSlntue (static_cast<uint64_t>(FPBits::EXP_BIAS) << FPBits::FRACTION_LEN)) 1837fc9fb9fSlntue .get_val(); 1847fc9fb9fSlntue 1857fc9fb9fSlntue // Set a = (-1)^x_sign * 2^(x_e % 3) * (1.mantissa) 1867fc9fb9fSlntue uint64_t a_bits = x_bits.uintval() & 0x800F'FFFF'FFFF'FFFF; 1877fc9fb9fSlntue a_bits |= 1887fc9fb9fSlntue (static_cast<uint64_t>(shift_e + static_cast<unsigned>(FPBits::EXP_BIAS)) 1897fc9fb9fSlntue << FPBits::FRACTION_LEN); 1907fc9fb9fSlntue double a = FPBits(a_bits).get_val(); 1917fc9fb9fSlntue 1927fc9fb9fSlntue // Initial approximation of x_r^(-2/3). 1937fc9fb9fSlntue double p = intial_approximation(x_r); 1947fc9fb9fSlntue 1957fc9fb9fSlntue // Look up for 2^(-2*n/3) used for first approximation step. 1967fc9fb9fSlntue constexpr double EXP2_M2_OVER_3[3] = {1.0, 0x1.428a2f98d728bp-1, 1977fc9fb9fSlntue 0x1.965fea53d6e3dp-2}; 1987fc9fb9fSlntue 1997fc9fb9fSlntue // x0 is an initial approximation of a^(-2/3) for 1 <= |a| < 8. 2007fc9fb9fSlntue // Relative error: < 1.16 * 2^(-21). 2017fc9fb9fSlntue double x0 = static_cast<double>(EXP2_M2_OVER_3[shift_e] * p); 2027fc9fb9fSlntue 2037fc9fb9fSlntue // First iteration in double precision. 2047fc9fb9fSlntue DoubleDouble a_sq = fputil::exact_mult(a, a); 2057fc9fb9fSlntue 2067fc9fb9fSlntue // h0 = x0^3 * a^2 - 1 2077fc9fb9fSlntue DoubleDouble x0_sq = fputil::exact_mult(x0, x0); 2087fc9fb9fSlntue DoubleDouble x0_3 = fputil::quick_mult(x0, x0_sq); 2097fc9fb9fSlntue 2107fc9fb9fSlntue double h0 = get_error(x0_3, a_sq); 2117fc9fb9fSlntue 2127fc9fb9fSlntue #ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS 2137fc9fb9fSlntue constexpr double REL_ERROR = 0; 2147fc9fb9fSlntue #else 2157fc9fb9fSlntue constexpr double REL_ERROR = 0x1.0p-51; 2167fc9fb9fSlntue #endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS 2177fc9fb9fSlntue 2187fc9fb9fSlntue // Taylor polynomial of (1 + h)^(-1/3): 2197fc9fb9fSlntue // (1 + h)^(-1/3) = 1 - h/3 + 2 h^2 / 9 - 14 h^3 / 81 + ... 2207fc9fb9fSlntue constexpr double ERR_COEFFS[3] = { 2217fc9fb9fSlntue -0x1.5555555555555p-2 - REL_ERROR, // -1/3 - relative_error 2227fc9fb9fSlntue 0x1.c71c71c71c71cp-3, // 2/9 2237fc9fb9fSlntue -0x1.61f9add3c0ca4p-3, // -14/81 2247fc9fb9fSlntue }; 2257fc9fb9fSlntue // e0 = -14 * h^2 / 81 + 2 * h / 9 - 1/3 - relative_error. 2267fc9fb9fSlntue double e0 = fputil::polyeval(h0, ERR_COEFFS[0], ERR_COEFFS[1], ERR_COEFFS[2]); 2277fc9fb9fSlntue double x0_h0 = x0 * h0; 2287fc9fb9fSlntue 2297fc9fb9fSlntue // x1 = x0 (1 - h0/3 + 2 h0^2 / 9 - 14 h0^3 / 81) 2307fc9fb9fSlntue // x1 approximate a^(-2/3) with relative errors bounded by: 2317fc9fb9fSlntue // | x1 / a^(-2/3) - 1 | < (34/243) h0^4 < h0 * REL_ERROR 2327fc9fb9fSlntue DoubleDouble x1_dd{x0_h0 * e0, x0}; 2337fc9fb9fSlntue 2347fc9fb9fSlntue // r1 = x1 * a ~ a^(-2/3) * a = a^(1/3). 2357fc9fb9fSlntue DoubleDouble r1 = fputil::quick_mult(a, x1_dd); 2367fc9fb9fSlntue 2377fc9fb9fSlntue // Lambda function to update the exponent of the result. 2387fc9fb9fSlntue auto update_exponent = [=](double r) -> double { 239a8790734Slntue uint64_t r_m = FPBits(r).uintval() - 0x3FF0'0000'0000'0000; 2407fc9fb9fSlntue // Adjust exponent and sign. 2417fc9fb9fSlntue uint64_t r_bits = 242a8790734Slntue r_m + (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN); 2437fc9fb9fSlntue return FPBits(r_bits).get_val(); 2447fc9fb9fSlntue }; 2457fc9fb9fSlntue 2467fc9fb9fSlntue #ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS 2477fc9fb9fSlntue // TODO: We probably don't need to use double-double if accurate tests and 2487fc9fb9fSlntue // passes are skipped. 2497fc9fb9fSlntue return update_exponent(r1.hi + r1.lo); 2507fc9fb9fSlntue #else 2517fc9fb9fSlntue // Accurate checks and passes. 2527fc9fb9fSlntue double r1_lower = r1.hi + r1.lo; 2537fc9fb9fSlntue double r1_upper = 2547fc9fb9fSlntue r1.hi + fputil::multiply_add(x0_h0, 2.0 * REL_ERROR * a, r1.lo); 2557fc9fb9fSlntue 2567fc9fb9fSlntue // Ziv's accuracy test. 2577fc9fb9fSlntue if (LIBC_LIKELY(r1_upper == r1_lower)) { 2587fc9fb9fSlntue // Test for exact outputs. 2597fc9fb9fSlntue // Check if lower (52 - 17 = 35) bits are 0's. 2607fc9fb9fSlntue if (LIBC_UNLIKELY((FPBits(r1_lower).uintval() & 0x0000'0007'FFFF'FFFF) == 2617fc9fb9fSlntue 0)) { 2627fc9fb9fSlntue double r1_err = (r1_lower - r1.hi) - r1.lo; 2637fc9fb9fSlntue if (FPBits(r1_err).abs().get_val() < 0x1.0p69) 2647fc9fb9fSlntue fputil::clear_except_if_required(FE_INEXACT); 2657fc9fb9fSlntue } 2667fc9fb9fSlntue 2677fc9fb9fSlntue return update_exponent(r1_lower); 2687fc9fb9fSlntue } 2697fc9fb9fSlntue 2707fc9fb9fSlntue // Accuracy test failed, perform another Newton iteration. 2717fc9fb9fSlntue double x1 = x1_dd.hi + (e0 + REL_ERROR) * x0_h0; 2727fc9fb9fSlntue 2737fc9fb9fSlntue // Second iteration in double-double precision. 2747fc9fb9fSlntue // h1 = x1^3 * a^2 - 1. 2757fc9fb9fSlntue DoubleDouble x1_sq = fputil::exact_mult(x1, x1); 2767fc9fb9fSlntue DoubleDouble x1_3 = fputil::quick_mult(x1, x1_sq); 2777fc9fb9fSlntue double h1 = get_error(x1_3, a_sq); 2787fc9fb9fSlntue 2797fc9fb9fSlntue // e1 = -x1*h1/3. 2807fc9fb9fSlntue double e1 = h1 * (x1 * -0x1.5555555555555p-2); 2817fc9fb9fSlntue // x2 = x1*(1 - h1/3) = x1 + e1 ~ a^(-2/3) with relative errors < 2^-101. 2827fc9fb9fSlntue DoubleDouble x2 = fputil::exact_add(x1, e1); 2837fc9fb9fSlntue // r2 = a * x2 ~ a * a^(-2/3) = a^(1/3) with relative errors < 2^-100. 2847fc9fb9fSlntue DoubleDouble r2 = fputil::quick_mult(a, x2); 2857fc9fb9fSlntue 2867fc9fb9fSlntue double r2_upper = r2.hi + fputil::multiply_add(a, 0x1.0p-102, r2.lo); 2877fc9fb9fSlntue double r2_lower = r2.hi + fputil::multiply_add(a, -0x1.0p-102, r2.lo); 2887fc9fb9fSlntue 2897fc9fb9fSlntue // Ziv's accuracy test. 2907fc9fb9fSlntue if (LIBC_LIKELY(r2_upper == r2_lower)) 2917fc9fb9fSlntue return update_exponent(r2_upper); 2927fc9fb9fSlntue 2937fc9fb9fSlntue // TODO: Investigate removing float128 and just list exceptional cases. 2947fc9fb9fSlntue // Apply another Newton iteration with ~126-bit accuracy. 2957fc9fb9fSlntue Float128 x2_f128 = fputil::quick_add(Float128(x2.hi), Float128(x2.lo)); 2967fc9fb9fSlntue // x2^3 2977fc9fb9fSlntue Float128 x2_3 = 2987fc9fb9fSlntue fputil::quick_mul(fputil::quick_mul(x2_f128, x2_f128), x2_f128); 2997fc9fb9fSlntue // a^2 3007fc9fb9fSlntue Float128 a_sq_f128 = fputil::quick_mul(Float128(a), Float128(a)); 3017fc9fb9fSlntue // x2^3 * a^2 3027fc9fb9fSlntue Float128 x2_3_a_sq = fputil::quick_mul(x2_3, a_sq_f128); 3037fc9fb9fSlntue // h2 = x2^3 * a^2 - 1 3047fc9fb9fSlntue Float128 h2_f128 = fputil::quick_add(x2_3_a_sq, Float128(-1.0)); 3057fc9fb9fSlntue double h2 = static_cast<double>(h2_f128); 3067fc9fb9fSlntue // t2 = 1 - h2 / 3 3077fc9fb9fSlntue Float128 t2 = 3087fc9fb9fSlntue fputil::quick_add(Float128(1.0), Float128(h2 * (-0x1.5555555555555p-2))); 3097fc9fb9fSlntue // x3 = x2 * (1 - h2 / 3) ~ a^(-2/3) 3107fc9fb9fSlntue Float128 x3 = fputil::quick_mul(x2_f128, t2); 3117fc9fb9fSlntue // r3 = a * x3 ~ a * a^(-2/3) = a^(1/3) 3127fc9fb9fSlntue Float128 r3 = fputil::quick_mul(Float128(a), x3); 3137fc9fb9fSlntue 3147fc9fb9fSlntue // Check for exact cases: 3157fc9fb9fSlntue Float128::MantissaType rounding_bits = 3167fc9fb9fSlntue r3.mantissa & 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFFF_u128; 3177fc9fb9fSlntue 3187fc9fb9fSlntue double result = static_cast<double>(r3); 3197fc9fb9fSlntue if ((rounding_bits < 0x0000'0000'0000'0000'0000'0000'0000'000F_u128) || 3207fc9fb9fSlntue (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128)) { 3217fc9fb9fSlntue // Output is exact. 3227fc9fb9fSlntue r3.mantissa &= 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFF0_u128; 3237fc9fb9fSlntue 3247fc9fb9fSlntue if (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128) { 3257fc9fb9fSlntue Float128 tmp{r3.sign, r3.exponent - 123, 3267fc9fb9fSlntue 0x8000'0000'0000'0000'0000'0000'0000'0000_u128}; 3277fc9fb9fSlntue Float128 r4 = fputil::quick_add(r3, tmp); 3287fc9fb9fSlntue result = static_cast<double>(r4); 3297fc9fb9fSlntue } else { 3307fc9fb9fSlntue result = static_cast<double>(r3); 3317fc9fb9fSlntue } 3327fc9fb9fSlntue 3337fc9fb9fSlntue fputil::clear_except_if_required(FE_INEXACT); 3347fc9fb9fSlntue } 3357fc9fb9fSlntue 3367fc9fb9fSlntue return update_exponent(result); 3377fc9fb9fSlntue #endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS 3387fc9fb9fSlntue } 3397fc9fb9fSlntue 3407fc9fb9fSlntue } // namespace LIBC_NAMESPACE_DECL 341