1 //===-- Utilities for double-double data type. ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H 10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H 11 12 #include "multiply_add.h" 13 #include "src/__support/common.h" 14 #include "src/__support/macros/config.h" 15 #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA 16 #include "src/__support/number_pair.h" 17 18 namespace LIBC_NAMESPACE_DECL { 19 namespace fputil { 20 21 #define DEFAULT_DOUBLE_SPLIT 27 22 23 using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>; 24 25 // The output of Dekker's FastTwoSum algorithm is correct, i.e.: 26 // r.hi + r.lo = a + b exactly 27 // and |r.lo| < eps(r.lo) 28 // Assumption: |a| >= |b|, or a = 0. 29 template <bool FAST2SUM = true> 30 LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) { 31 DoubleDouble r{0.0, 0.0}; 32 if constexpr (FAST2SUM) { 33 r.hi = a + b; 34 double t = r.hi - a; 35 r.lo = b - t; 36 } else { 37 r.hi = a + b; 38 double t1 = r.hi - a; 39 double t2 = r.hi - t1; 40 double t3 = b - t1; 41 double t4 = a - t2; 42 r.lo = t3 + t4; 43 } 44 return r; 45 } 46 47 // Assumption: |a.hi| >= |b.hi| 48 LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, 49 const DoubleDouble &b) { 50 DoubleDouble r = exact_add(a.hi, b.hi); 51 double lo = a.lo + b.lo; 52 return exact_add(r.hi, r.lo + lo); 53 } 54 55 // Assumption: |a.hi| >= |b| 56 LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) { 57 DoubleDouble r = exact_add<false>(a.hi, b); 58 return exact_add(r.hi, r.lo + a.lo); 59 } 60 61 // Veltkamp's Splitting for double precision. 62 // Note: This is proved to be correct for all rounding modes: 63 // Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed 64 // Roundings," https://inria.hal.science/hal-04480440. 65 // Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1. 66 template <size_t N = DEFAULT_DOUBLE_SPLIT> 67 LIBC_INLINE constexpr DoubleDouble split(double a) { 68 DoubleDouble r{0.0, 0.0}; 69 // CN = 2^N. 70 constexpr double CN = static_cast<double>(1 << N); 71 constexpr double C = CN + 1.0; 72 double t1 = C * a; 73 double t2 = a - t1; 74 r.hi = t1 + t2; 75 r.lo = a - r.hi; 76 return r; 77 } 78 79 // Helper for non-fma exact mult where the first number is already split. 80 template <size_t SPLIT_B = DEFAULT_DOUBLE_SPLIT> 81 LIBC_INLINE DoubleDouble exact_mult(const DoubleDouble &as, double a, 82 double b) { 83 DoubleDouble bs = split<SPLIT_B>(b); 84 DoubleDouble r{0.0, 0.0}; 85 86 r.hi = a * b; 87 double t1 = as.hi * bs.hi - r.hi; 88 double t2 = as.hi * bs.lo + t1; 89 double t3 = as.lo * bs.hi + t2; 90 r.lo = as.lo * bs.lo + t3; 91 92 return r; 93 } 94 95 // Note: When FMA instruction is not available, the `exact_mult` function is 96 // only correct for round-to-nearest mode. See: 97 // Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed 98 // Roundings," https://inria.hal.science/hal-04480440. 99 // Using Theorem 1 in the paper above, without FMA instruction, if we restrict 100 // the generated constants to precision <= 51, and splitting it by 2^28 + 1, 101 // then a * b = r.hi + r.lo is exact for all rounding modes. 102 template <size_t SPLIT_B = 27> 103 LIBC_INLINE DoubleDouble exact_mult(double a, double b) { 104 DoubleDouble r{0.0, 0.0}; 105 106 #ifdef LIBC_TARGET_CPU_HAS_FMA 107 r.hi = a * b; 108 r.lo = fputil::multiply_add(a, b, -r.hi); 109 #else 110 // Dekker's Product. 111 DoubleDouble as = split(a); 112 113 r = exact_mult<SPLIT_B>(as, a, b); 114 #endif // LIBC_TARGET_CPU_HAS_FMA 115 116 return r; 117 } 118 119 LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) { 120 DoubleDouble r = exact_mult(a, b.hi); 121 r.lo = multiply_add(a, b.lo, r.lo); 122 return r; 123 } 124 125 template <size_t SPLIT_B = 27> 126 LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a, 127 const DoubleDouble &b) { 128 DoubleDouble r = exact_mult<SPLIT_B>(a.hi, b.hi); 129 double t1 = multiply_add(a.hi, b.lo, r.lo); 130 double t2 = multiply_add(a.lo, b.hi, t1); 131 r.lo = t2; 132 return r; 133 } 134 135 // Assuming |c| >= |a * b|. 136 template <> 137 LIBC_INLINE DoubleDouble multiply_add<DoubleDouble>(const DoubleDouble &a, 138 const DoubleDouble &b, 139 const DoubleDouble &c) { 140 return add(c, quick_mult(a, b)); 141 } 142 143 // Accurate double-double division, following Karp-Markstein's trick for 144 // division, implemented in the CORE-MATH project at: 145 // https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/tan/tan.c#L1855 146 // 147 // Error bounds: 148 // Let a = ah + al, b = bh + bl. 149 // Let r = rh + rl be the approximation of (ah + al) / (bh + bl). 150 // Then: 151 // (ah + al) / (bh + bl) - rh = 152 // = ((ah - bh * rh) + (al - bl * rh)) / (bh + bl) 153 // = (1 + O(bl/bh)) * ((ah - bh * rh) + (al - bl * rh)) / bh 154 // Let q = round(1/bh), then the above expressions are approximately: 155 // = (1 + O(bl / bh)) * (1 + O(2^-52)) * q * ((ah - bh * rh) + (al - bl * rh)) 156 // So we can compute: 157 // rl = q * (ah - bh * rh) + q * (al - bl * rh) 158 // as accurate as possible, then the error is bounded by: 159 // |(ah + al) / (bh + bl) - (rh + rl)| < O(bl/bh) * (2^-52 + al/ah + bl/bh) 160 LIBC_INLINE DoubleDouble div(const DoubleDouble &a, const DoubleDouble &b) { 161 DoubleDouble r; 162 double q = 1.0 / b.hi; 163 r.hi = a.hi * q; 164 165 #ifdef LIBC_TARGET_CPU_HAS_FMA 166 double e_hi = fputil::multiply_add(b.hi, -r.hi, a.hi); 167 double e_lo = fputil::multiply_add(b.lo, -r.hi, a.lo); 168 #else 169 DoubleDouble b_hi_r_hi = fputil::exact_mult(b.hi, -r.hi); 170 DoubleDouble b_lo_r_hi = fputil::exact_mult(b.lo, -r.hi); 171 double e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo; 172 double e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo; 173 #endif // LIBC_TARGET_CPU_HAS_FMA 174 175 r.lo = q * (e_hi + e_lo); 176 return r; 177 } 178 179 } // namespace fputil 180 } // namespace LIBC_NAMESPACE_DECL 181 182 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H 183