xref: /llvm-project/libc/src/math/generic/cbrt.cpp (revision 0f4b3c409fbd756d826c89d5539d9ea22bcc56aa)
17fc9fb9fSlntue //===-- Implementation of cbrt function -----------------------------------===//
27fc9fb9fSlntue //
37fc9fb9fSlntue // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47fc9fb9fSlntue // See https://llvm.org/LICENSE.txt for license information.
57fc9fb9fSlntue // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67fc9fb9fSlntue //
77fc9fb9fSlntue //===----------------------------------------------------------------------===//
87fc9fb9fSlntue 
97fc9fb9fSlntue #include "src/math/cbrt.h"
107fc9fb9fSlntue #include "hdr/fenv_macros.h"
117fc9fb9fSlntue #include "src/__support/FPUtil/FEnvImpl.h"
127fc9fb9fSlntue #include "src/__support/FPUtil/FPBits.h"
137fc9fb9fSlntue #include "src/__support/FPUtil/PolyEval.h"
147fc9fb9fSlntue #include "src/__support/FPUtil/double_double.h"
157fc9fb9fSlntue #include "src/__support/FPUtil/dyadic_float.h"
167fc9fb9fSlntue #include "src/__support/FPUtil/multiply_add.h"
177fc9fb9fSlntue #include "src/__support/common.h"
187fc9fb9fSlntue #include "src/__support/integer_literals.h"
197fc9fb9fSlntue #include "src/__support/macros/config.h"
207fc9fb9fSlntue #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
217fc9fb9fSlntue 
227fc9fb9fSlntue #if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
237fc9fb9fSlntue #define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
247fc9fb9fSlntue #endif
257fc9fb9fSlntue 
267fc9fb9fSlntue namespace LIBC_NAMESPACE_DECL {
277fc9fb9fSlntue 
287fc9fb9fSlntue using DoubleDouble = fputil::DoubleDouble;
297fc9fb9fSlntue using Float128 = fputil::DyadicFloat<128>;
307fc9fb9fSlntue 
317fc9fb9fSlntue namespace {
327fc9fb9fSlntue 
337fc9fb9fSlntue // Initial approximation of x^(-2/3) for 1 <= x < 2.
347fc9fb9fSlntue // Polynomial generated by Sollya with:
357fc9fb9fSlntue // > P = fpminimax(x^(-2/3), 7, [|D...|], [1, 2]);
367fc9fb9fSlntue // > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]);
377fc9fb9fSlntue // 0x1.28...p-21
38ac1d5facSlntue double intial_approximation(double x) {
397fc9fb9fSlntue   constexpr double COEFFS[8] = {
407fc9fb9fSlntue       0x1.bc52aedead5c6p1,  -0x1.b52bfebf110b3p2,  0x1.1d8d71d53d126p3,
417fc9fb9fSlntue       -0x1.de2db9e81cf87p2, 0x1.0154ca06153bdp2,   -0x1.5973c66ee6da7p0,
427fc9fb9fSlntue       0x1.07bf6ac832552p-2, -0x1.5e53d9ce41cb8p-6,
437fc9fb9fSlntue   };
447fc9fb9fSlntue 
457fc9fb9fSlntue   double x_sq = x * x;
467fc9fb9fSlntue 
477fc9fb9fSlntue   double c0 = fputil::multiply_add(x, COEFFS[1], COEFFS[0]);
487fc9fb9fSlntue   double c1 = fputil::multiply_add(x, COEFFS[3], COEFFS[2]);
497fc9fb9fSlntue   double c2 = fputil::multiply_add(x, COEFFS[5], COEFFS[4]);
507fc9fb9fSlntue   double c3 = fputil::multiply_add(x, COEFFS[7], COEFFS[6]);
517fc9fb9fSlntue 
527fc9fb9fSlntue   double x_4 = x_sq * x_sq;
537fc9fb9fSlntue   double d0 = fputil::multiply_add(x_sq, c1, c0);
547fc9fb9fSlntue   double d1 = fputil::multiply_add(x_sq, c3, c2);
557fc9fb9fSlntue 
567fc9fb9fSlntue   return fputil::multiply_add(x_4, d1, d0);
577fc9fb9fSlntue }
587fc9fb9fSlntue 
597fc9fb9fSlntue // Get the error term for Newton iteration:
607fc9fb9fSlntue //   h(x) = x^3 * a^2 - 1,
617fc9fb9fSlntue #ifdef LIBC_TARGET_CPU_HAS_FMA
62ac1d5facSlntue double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) {
637fc9fb9fSlntue   return fputil::multiply_add(x_3.hi, a_sq.hi, -1.0) +
647fc9fb9fSlntue          fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo);
657fc9fb9fSlntue }
667fc9fb9fSlntue #else
67ac1d5facSlntue double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) {
687fc9fb9fSlntue   DoubleDouble x_3_a_sq = fputil::quick_mult(a_sq, x_3);
697fc9fb9fSlntue   return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo;
707fc9fb9fSlntue }
717fc9fb9fSlntue #endif
727fc9fb9fSlntue 
737fc9fb9fSlntue } // anonymous namespace
747fc9fb9fSlntue 
757fc9fb9fSlntue // Correctly rounded cbrt algorithm:
767fc9fb9fSlntue //
777fc9fb9fSlntue // === Step 1 - Range reduction ===
787fc9fb9fSlntue // For x = (-1)^s * 2^e * (1.m), we get 2 reduced arguments x_r and a as:
797fc9fb9fSlntue //   x_r = 1.m
807fc9fb9fSlntue //   a   = (-1)^s * 2^(e % 3) * (1.m)
817fc9fb9fSlntue // Then cbrt(x) = x^(1/3) can be computed as:
827fc9fb9fSlntue //   x^(1/3) = 2^(e / 3) * a^(1/3).
837fc9fb9fSlntue //
847fc9fb9fSlntue // In order to avoid division, we compute a^(-2/3) using Newton method and then
857fc9fb9fSlntue // multiply the results by a:
867fc9fb9fSlntue //   a^(1/3) = a * a^(-2/3).
877fc9fb9fSlntue //
887fc9fb9fSlntue // === Step 2 - First approximation to a^(-2/3) ===
897fc9fb9fSlntue // First, we use a degree-7 minimax polynomial generated by Sollya to
907fc9fb9fSlntue // approximate x_r^(-2/3) for 1 <= x_r < 2.
917fc9fb9fSlntue //   p = P(x_r) ~ x_r^(-2/3),
927fc9fb9fSlntue // with relative errors bounded by:
937fc9fb9fSlntue //   | p / x_r^(-2/3) - 1 | < 1.16 * 2^-21.
947fc9fb9fSlntue //
957fc9fb9fSlntue // Then we multiply with 2^(e % 3) from a small lookup table to get:
967fc9fb9fSlntue //   x_0 = 2^(-2*(e % 3)/3) * p
977fc9fb9fSlntue //       ~ 2^(-2*(e % 3)/3) * x_r^(-2/3)
987fc9fb9fSlntue //       = a^(-2/3)
997fc9fb9fSlntue // With relative errors:
1007fc9fb9fSlntue //   | x_0 / a^(-2/3) - 1 | < 1.16 * 2^-21.
1017fc9fb9fSlntue // This step is done in double precision.
1027fc9fb9fSlntue //
1037fc9fb9fSlntue // === Step 3 - First Newton iteration ===
1047fc9fb9fSlntue // We follow the method described in:
1057fc9fb9fSlntue //   Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation
1067fc9fb9fSlntue //   in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf
1077fc9fb9fSlntue // to derive multiplicative Newton iterations as below:
1087fc9fb9fSlntue // Let x_n be the nth approximation to a^(-2/3).  Define the n^th error as:
1097fc9fb9fSlntue //   h_n = x_n^3 * a^2 - 1
1107fc9fb9fSlntue // Then:
1117fc9fb9fSlntue //   a^(-2/3) = x_n / (1 + h_n)^(1/3)
1127fc9fb9fSlntue //            = x_n * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...)
1137fc9fb9fSlntue // using the Taylor series expansion of (1 + h_n)^(-1/3).
1147fc9fb9fSlntue //
1157fc9fb9fSlntue // Apply to x_0 above:
1167fc9fb9fSlntue //   h_0 = x_0^3 * a^2 - 1
1177fc9fb9fSlntue //       = a^2 * (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)),
1187fc9fb9fSlntue // it's bounded by:
1197fc9fb9fSlntue //   |h_0| < 4 * 3 * 1.16 * 2^-21 * 4 < 2^-17.
1207fc9fb9fSlntue // So in the first iteration step, we use:
1217fc9fb9fSlntue //   x_1 = x_0 * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3)
1227fc9fb9fSlntue // Its relative error is bounded by:
1237fc9fb9fSlntue //   | x_1 / a^(-2/3) - 1 | < 35/242 * |h_0|^4 < 2^-70.
1247fc9fb9fSlntue // Then we perform Ziv's rounding test and check if the answer is exact.
1257fc9fb9fSlntue // This step is done in double-double precision.
1267fc9fb9fSlntue //
1277fc9fb9fSlntue // === Step 4 - Second Newton iteration ===
1287fc9fb9fSlntue // If the Ziv's rounding test from the previous step fails, we define the error
1297fc9fb9fSlntue // term:
1307fc9fb9fSlntue //   h_1 = x_1^3 * a^2 - 1,
1317fc9fb9fSlntue // And perform another iteration:
1327fc9fb9fSlntue //   x_2 = x_1 * (1 - h_1 / 3)
1337fc9fb9fSlntue // with the relative errors exceed the precision of double-double.
1347fc9fb9fSlntue // We then check the Ziv's accuracy test with relative errors < 2^-102 to
1357fc9fb9fSlntue // compensate for rounding errors.
1367fc9fb9fSlntue //
1377fc9fb9fSlntue // === Step 5 - Final iteration ===
1387fc9fb9fSlntue // If the Ziv's accuracy test from the previous step fails, we perform another
1397fc9fb9fSlntue // iteration in 128-bit precision and check for exact outputs.
1407fc9fb9fSlntue //
1417fc9fb9fSlntue // TODO: It is possible to replace this costly computation step with special
1427fc9fb9fSlntue // exceptional handling, similar to what was done in the CORE-MATH project:
1437fc9fb9fSlntue // https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c
1447fc9fb9fSlntue 
1457fc9fb9fSlntue LLVM_LIBC_FUNCTION(double, cbrt, (double x)) {
1467fc9fb9fSlntue   using FPBits = fputil::FPBits<double>;
1477fc9fb9fSlntue 
1487fc9fb9fSlntue   uint64_t x_abs = FPBits(x).abs().uintval();
1497fc9fb9fSlntue 
1507fc9fb9fSlntue   unsigned exp_bias_correction = 682; // 1023 * 2/3
1517fc9fb9fSlntue 
1527fc9fb9fSlntue   if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() ||
1537fc9fb9fSlntue                     x_abs >= FPBits::inf().uintval())) {
154*0f4b3c40Slntue     if (x == 0.0 || x_abs >= FPBits::inf().uintval())
1557fc9fb9fSlntue       // x is 0, Inf, or NaN.
156*0f4b3c40Slntue       // Make sure it works for FTZ/DAZ modes.
157*0f4b3c40Slntue       return static_cast<double>(x + x);
1587fc9fb9fSlntue 
1597fc9fb9fSlntue     // x is non-zero denormal number.
1607fc9fb9fSlntue     // Normalize x.
1617fc9fb9fSlntue     x *= 0x1.0p60;
1627fc9fb9fSlntue     exp_bias_correction -= 20;
1637fc9fb9fSlntue   }
1647fc9fb9fSlntue 
1657fc9fb9fSlntue   FPBits x_bits(x);
1667fc9fb9fSlntue 
1677fc9fb9fSlntue   // When using biased exponent of x in double precision,
1687fc9fb9fSlntue   //   x_e = real_exponent_of_x + 1023
1697fc9fb9fSlntue   // Then:
1707fc9fb9fSlntue   //   x_e / 3 = real_exponent_of_x / 3 + 1023/3
1717fc9fb9fSlntue   //           = real_exponent_of_x / 3 + 341
1727fc9fb9fSlntue   // So to make it the correct biased exponent of x^(1/3), we add
1737fc9fb9fSlntue   //   1023 - 341 = 682
1747fc9fb9fSlntue   // to the quotient x_e / 3.
1757fc9fb9fSlntue   unsigned x_e = static_cast<unsigned>(x_bits.get_biased_exponent());
1767fc9fb9fSlntue   unsigned out_e = (x_e / 3 + exp_bias_correction);
1777fc9fb9fSlntue   unsigned shift_e = x_e % 3;
1787fc9fb9fSlntue 
1797fc9fb9fSlntue   // Set x_r = 1.mantissa
1807fc9fb9fSlntue   double x_r =
1817fc9fb9fSlntue       FPBits(x_bits.get_mantissa() |
1827fc9fb9fSlntue              (static_cast<uint64_t>(FPBits::EXP_BIAS) << FPBits::FRACTION_LEN))
1837fc9fb9fSlntue           .get_val();
1847fc9fb9fSlntue 
1857fc9fb9fSlntue   // Set a = (-1)^x_sign * 2^(x_e % 3) * (1.mantissa)
1867fc9fb9fSlntue   uint64_t a_bits = x_bits.uintval() & 0x800F'FFFF'FFFF'FFFF;
1877fc9fb9fSlntue   a_bits |=
1887fc9fb9fSlntue       (static_cast<uint64_t>(shift_e + static_cast<unsigned>(FPBits::EXP_BIAS))
1897fc9fb9fSlntue        << FPBits::FRACTION_LEN);
1907fc9fb9fSlntue   double a = FPBits(a_bits).get_val();
1917fc9fb9fSlntue 
1927fc9fb9fSlntue   // Initial approximation of x_r^(-2/3).
1937fc9fb9fSlntue   double p = intial_approximation(x_r);
1947fc9fb9fSlntue 
1957fc9fb9fSlntue   // Look up for 2^(-2*n/3) used for first approximation step.
1967fc9fb9fSlntue   constexpr double EXP2_M2_OVER_3[3] = {1.0, 0x1.428a2f98d728bp-1,
1977fc9fb9fSlntue                                         0x1.965fea53d6e3dp-2};
1987fc9fb9fSlntue 
1997fc9fb9fSlntue   // x0 is an initial approximation of a^(-2/3) for 1 <= |a| < 8.
2007fc9fb9fSlntue   // Relative error: < 1.16 * 2^(-21).
2017fc9fb9fSlntue   double x0 = static_cast<double>(EXP2_M2_OVER_3[shift_e] * p);
2027fc9fb9fSlntue 
2037fc9fb9fSlntue   // First iteration in double precision.
2047fc9fb9fSlntue   DoubleDouble a_sq = fputil::exact_mult(a, a);
2057fc9fb9fSlntue 
2067fc9fb9fSlntue   // h0 = x0^3 * a^2 - 1
2077fc9fb9fSlntue   DoubleDouble x0_sq = fputil::exact_mult(x0, x0);
2087fc9fb9fSlntue   DoubleDouble x0_3 = fputil::quick_mult(x0, x0_sq);
2097fc9fb9fSlntue 
2107fc9fb9fSlntue   double h0 = get_error(x0_3, a_sq);
2117fc9fb9fSlntue 
2127fc9fb9fSlntue #ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
2137fc9fb9fSlntue   constexpr double REL_ERROR = 0;
2147fc9fb9fSlntue #else
2157fc9fb9fSlntue   constexpr double REL_ERROR = 0x1.0p-51;
2167fc9fb9fSlntue #endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
2177fc9fb9fSlntue 
2187fc9fb9fSlntue   // Taylor polynomial of (1 + h)^(-1/3):
2197fc9fb9fSlntue   //   (1 + h)^(-1/3) = 1 - h/3 + 2 h^2 / 9 - 14 h^3 / 81 + ...
2207fc9fb9fSlntue   constexpr double ERR_COEFFS[3] = {
2217fc9fb9fSlntue       -0x1.5555555555555p-2 - REL_ERROR, // -1/3 - relative_error
2227fc9fb9fSlntue       0x1.c71c71c71c71cp-3,              // 2/9
2237fc9fb9fSlntue       -0x1.61f9add3c0ca4p-3,             // -14/81
2247fc9fb9fSlntue   };
2257fc9fb9fSlntue   // e0 = -14 * h^2 / 81 + 2 * h / 9 - 1/3 - relative_error.
2267fc9fb9fSlntue   double e0 = fputil::polyeval(h0, ERR_COEFFS[0], ERR_COEFFS[1], ERR_COEFFS[2]);
2277fc9fb9fSlntue   double x0_h0 = x0 * h0;
2287fc9fb9fSlntue 
2297fc9fb9fSlntue   // x1 = x0 (1 - h0/3 + 2 h0^2 / 9 - 14 h0^3 / 81)
2307fc9fb9fSlntue   // x1 approximate a^(-2/3) with relative errors bounded by:
2317fc9fb9fSlntue   //   | x1 / a^(-2/3) - 1 | < (34/243) h0^4 < h0 * REL_ERROR
2327fc9fb9fSlntue   DoubleDouble x1_dd{x0_h0 * e0, x0};
2337fc9fb9fSlntue 
2347fc9fb9fSlntue   // r1 = x1 * a ~ a^(-2/3) * a = a^(1/3).
2357fc9fb9fSlntue   DoubleDouble r1 = fputil::quick_mult(a, x1_dd);
2367fc9fb9fSlntue 
2377fc9fb9fSlntue   // Lambda function to update the exponent of the result.
2387fc9fb9fSlntue   auto update_exponent = [=](double r) -> double {
239a8790734Slntue     uint64_t r_m = FPBits(r).uintval() - 0x3FF0'0000'0000'0000;
2407fc9fb9fSlntue     // Adjust exponent and sign.
2417fc9fb9fSlntue     uint64_t r_bits =
242a8790734Slntue         r_m + (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN);
2437fc9fb9fSlntue     return FPBits(r_bits).get_val();
2447fc9fb9fSlntue   };
2457fc9fb9fSlntue 
2467fc9fb9fSlntue #ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
2477fc9fb9fSlntue   // TODO: We probably don't need to use double-double if accurate tests and
2487fc9fb9fSlntue   // passes are skipped.
2497fc9fb9fSlntue   return update_exponent(r1.hi + r1.lo);
2507fc9fb9fSlntue #else
2517fc9fb9fSlntue   // Accurate checks and passes.
2527fc9fb9fSlntue   double r1_lower = r1.hi + r1.lo;
2537fc9fb9fSlntue   double r1_upper =
2547fc9fb9fSlntue       r1.hi + fputil::multiply_add(x0_h0, 2.0 * REL_ERROR * a, r1.lo);
2557fc9fb9fSlntue 
2567fc9fb9fSlntue   // Ziv's accuracy test.
2577fc9fb9fSlntue   if (LIBC_LIKELY(r1_upper == r1_lower)) {
2587fc9fb9fSlntue     // Test for exact outputs.
2597fc9fb9fSlntue     // Check if lower (52 - 17 = 35) bits are 0's.
2607fc9fb9fSlntue     if (LIBC_UNLIKELY((FPBits(r1_lower).uintval() & 0x0000'0007'FFFF'FFFF) ==
2617fc9fb9fSlntue                       0)) {
2627fc9fb9fSlntue       double r1_err = (r1_lower - r1.hi) - r1.lo;
2637fc9fb9fSlntue       if (FPBits(r1_err).abs().get_val() < 0x1.0p69)
2647fc9fb9fSlntue         fputil::clear_except_if_required(FE_INEXACT);
2657fc9fb9fSlntue     }
2667fc9fb9fSlntue 
2677fc9fb9fSlntue     return update_exponent(r1_lower);
2687fc9fb9fSlntue   }
2697fc9fb9fSlntue 
2707fc9fb9fSlntue   // Accuracy test failed, perform another Newton iteration.
2717fc9fb9fSlntue   double x1 = x1_dd.hi + (e0 + REL_ERROR) * x0_h0;
2727fc9fb9fSlntue 
2737fc9fb9fSlntue   // Second iteration in double-double precision.
2747fc9fb9fSlntue   // h1 = x1^3 * a^2 - 1.
2757fc9fb9fSlntue   DoubleDouble x1_sq = fputil::exact_mult(x1, x1);
2767fc9fb9fSlntue   DoubleDouble x1_3 = fputil::quick_mult(x1, x1_sq);
2777fc9fb9fSlntue   double h1 = get_error(x1_3, a_sq);
2787fc9fb9fSlntue 
2797fc9fb9fSlntue   // e1 = -x1*h1/3.
2807fc9fb9fSlntue   double e1 = h1 * (x1 * -0x1.5555555555555p-2);
2817fc9fb9fSlntue   // x2 = x1*(1 - h1/3) = x1 + e1 ~ a^(-2/3) with relative errors < 2^-101.
2827fc9fb9fSlntue   DoubleDouble x2 = fputil::exact_add(x1, e1);
2837fc9fb9fSlntue   // r2 = a * x2 ~ a * a^(-2/3) = a^(1/3) with relative errors < 2^-100.
2847fc9fb9fSlntue   DoubleDouble r2 = fputil::quick_mult(a, x2);
2857fc9fb9fSlntue 
2867fc9fb9fSlntue   double r2_upper = r2.hi + fputil::multiply_add(a, 0x1.0p-102, r2.lo);
2877fc9fb9fSlntue   double r2_lower = r2.hi + fputil::multiply_add(a, -0x1.0p-102, r2.lo);
2887fc9fb9fSlntue 
2897fc9fb9fSlntue   // Ziv's accuracy test.
2907fc9fb9fSlntue   if (LIBC_LIKELY(r2_upper == r2_lower))
2917fc9fb9fSlntue     return update_exponent(r2_upper);
2927fc9fb9fSlntue 
2937fc9fb9fSlntue   // TODO: Investigate removing float128 and just list exceptional cases.
2947fc9fb9fSlntue   // Apply another Newton iteration with ~126-bit accuracy.
2957fc9fb9fSlntue   Float128 x2_f128 = fputil::quick_add(Float128(x2.hi), Float128(x2.lo));
2967fc9fb9fSlntue   // x2^3
2977fc9fb9fSlntue   Float128 x2_3 =
2987fc9fb9fSlntue       fputil::quick_mul(fputil::quick_mul(x2_f128, x2_f128), x2_f128);
2997fc9fb9fSlntue   // a^2
3007fc9fb9fSlntue   Float128 a_sq_f128 = fputil::quick_mul(Float128(a), Float128(a));
3017fc9fb9fSlntue   // x2^3 * a^2
3027fc9fb9fSlntue   Float128 x2_3_a_sq = fputil::quick_mul(x2_3, a_sq_f128);
3037fc9fb9fSlntue   // h2 = x2^3 * a^2 - 1
3047fc9fb9fSlntue   Float128 h2_f128 = fputil::quick_add(x2_3_a_sq, Float128(-1.0));
3057fc9fb9fSlntue   double h2 = static_cast<double>(h2_f128);
3067fc9fb9fSlntue   // t2 = 1 - h2 / 3
3077fc9fb9fSlntue   Float128 t2 =
3087fc9fb9fSlntue       fputil::quick_add(Float128(1.0), Float128(h2 * (-0x1.5555555555555p-2)));
3097fc9fb9fSlntue   // x3 = x2 * (1 - h2 / 3) ~ a^(-2/3)
3107fc9fb9fSlntue   Float128 x3 = fputil::quick_mul(x2_f128, t2);
3117fc9fb9fSlntue   // r3 = a * x3 ~ a * a^(-2/3) = a^(1/3)
3127fc9fb9fSlntue   Float128 r3 = fputil::quick_mul(Float128(a), x3);
3137fc9fb9fSlntue 
3147fc9fb9fSlntue   // Check for exact cases:
3157fc9fb9fSlntue   Float128::MantissaType rounding_bits =
3167fc9fb9fSlntue       r3.mantissa & 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFFF_u128;
3177fc9fb9fSlntue 
3187fc9fb9fSlntue   double result = static_cast<double>(r3);
3197fc9fb9fSlntue   if ((rounding_bits < 0x0000'0000'0000'0000'0000'0000'0000'000F_u128) ||
3207fc9fb9fSlntue       (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128)) {
3217fc9fb9fSlntue     // Output is exact.
3227fc9fb9fSlntue     r3.mantissa &= 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFF0_u128;
3237fc9fb9fSlntue 
3247fc9fb9fSlntue     if (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128) {
3257fc9fb9fSlntue       Float128 tmp{r3.sign, r3.exponent - 123,
3267fc9fb9fSlntue                    0x8000'0000'0000'0000'0000'0000'0000'0000_u128};
3277fc9fb9fSlntue       Float128 r4 = fputil::quick_add(r3, tmp);
3287fc9fb9fSlntue       result = static_cast<double>(r4);
3297fc9fb9fSlntue     } else {
3307fc9fb9fSlntue       result = static_cast<double>(r3);
3317fc9fb9fSlntue     }
3327fc9fb9fSlntue 
3337fc9fb9fSlntue     fputil::clear_except_if_required(FE_INEXACT);
3347fc9fb9fSlntue   }
3357fc9fb9fSlntue 
3367fc9fb9fSlntue   return update_exponent(result);
3377fc9fb9fSlntue #endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
3387fc9fb9fSlntue }
3397fc9fb9fSlntue 
3407fc9fb9fSlntue } // namespace LIBC_NAMESPACE_DECL
341