1 /* $NetBSD: s_fma.c,v 1.6 2013/02/14 09:24:50 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #if 0 31 __FBSDID("$FreeBSD: src/lib/msun/src/s_fma.c,v 1.8 2011/10/21 06:30:43 das Exp $"); 32 #else 33 __RCSID("$NetBSD: s_fma.c,v 1.6 2013/02/14 09:24:50 matt Exp $"); 34 #endif 35 36 #include <machine/ieee.h> 37 #include <fenv.h> 38 #include <float.h> 39 #include <math.h> 40 41 #include "math_private.h" 42 43 #ifndef __HAVE_LONG_DOUBLE 44 __strong_alias(fmal, fma) 45 #endif 46 47 /* 48 * A struct dd represents a floating-point number with twice the precision 49 * of a double. We maintain the invariant that "hi" stores the 53 high-order 50 * bits of the result. 51 */ 52 struct dd { 53 double hi; 54 double lo; 55 }; 56 57 /* 58 * Compute a+b exactly, returning the exact result in a struct dd. We assume 59 * that both a and b are finite, but make no assumptions about their relative 60 * magnitudes. 61 */ 62 static inline struct dd 63 dd_add(double a, double b) 64 { 65 struct dd ret; 66 double s; 67 68 ret.hi = a + b; 69 s = ret.hi - a; 70 ret.lo = (a - (ret.hi - s)) + (b - s); 71 return (ret); 72 } 73 74 /* 75 * Compute a+b, with a small tweak: The least significant bit of the 76 * result is adjusted into a sticky bit summarizing all the bits that 77 * were lost to rounding. This adjustment negates the effects of double 78 * rounding when the result is added to another number with a higher 79 * exponent. For an explanation of round and sticky bits, see any reference 80 * on FPU design, e.g., 81 * 82 * J. Coonen. An Implementation Guide to a Proposed Standard for 83 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 84 */ 85 static inline double 86 add_adjusted(double a, double b) 87 { 88 struct dd sum; 89 uint64_t hibits, lobits; 90 91 sum = dd_add(a, b); 92 if (sum.lo != 0) { 93 EXTRACT_WORD64(hibits, sum.hi); 94 if ((hibits & 1) == 0) { 95 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 96 EXTRACT_WORD64(lobits, sum.lo); 97 hibits += 1 - ((hibits ^ lobits) >> 62); 98 INSERT_WORD64(sum.hi, hibits); 99 } 100 } 101 return (sum.hi); 102 } 103 104 /* 105 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 106 * that the result will be subnormal, and care is taken to ensure that 107 * double rounding does not occur. 108 */ 109 static inline double 110 add_and_denormalize(double a, double b, int scale) 111 { 112 struct dd sum; 113 uint64_t hibits, lobits; 114 int bits_lost; 115 116 sum = dd_add(a, b); 117 118 /* 119 * If we are losing at least two bits of accuracy to denormalization, 120 * then the first lost bit becomes a round bit, and we adjust the 121 * lowest bit of sum.hi to make it a sticky bit summarizing all the 122 * bits in sum.lo. With the sticky bit adjusted, the hardware will 123 * break any ties in the correct direction. 124 * 125 * If we are losing only one bit to denormalization, however, we must 126 * break the ties manually. 127 */ 128 if (sum.lo != 0) { 129 EXTRACT_WORD64(hibits, sum.hi); 130 bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1; 131 if ((bits_lost != 1) ^ (int)(hibits & 1)) { 132 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 133 EXTRACT_WORD64(lobits, sum.lo); 134 hibits += 1 - (((hibits ^ lobits) >> 62) & 2); 135 INSERT_WORD64(sum.hi, hibits); 136 } 137 } 138 return (ldexp(sum.hi, scale)); 139 } 140 141 /* 142 * Compute a*b exactly, returning the exact result in a struct dd. We assume 143 * that both a and b are normalized, so no underflow or overflow will occur. 144 * The current rounding mode must be round-to-nearest. 145 */ 146 static inline struct dd 147 dd_mul(double a, double b) 148 { 149 static const double split = 0x1p27 + 1.0; 150 struct dd ret; 151 double ha, hb, la, lb, p, q; 152 153 p = a * split; 154 ha = a - p; 155 ha += p; 156 la = a - ha; 157 158 p = b * split; 159 hb = b - p; 160 hb += p; 161 lb = b - hb; 162 163 p = ha * hb; 164 q = ha * lb + la * hb; 165 166 ret.hi = p + q; 167 ret.lo = p - ret.hi + q + la * lb; 168 return (ret); 169 } 170 171 /* 172 * Fused multiply-add: Compute x * y + z with a single rounding error. 173 * 174 * We use scaling to avoid overflow/underflow, along with the 175 * canonical precision-doubling technique adapted from: 176 * 177 * Dekker, T. A Floating-Point Technique for Extending the 178 * Available Precision. Numer. Math. 18, 224-242 (1971). 179 * 180 * This algorithm is sensitive to the rounding precision. FPUs such 181 * as the i387 must be set in double-precision mode if variables are 182 * to be stored in FP registers in order to avoid incorrect results. 183 * This is the default on FreeBSD, but not on many other systems. 184 * 185 * Hardware instructions should be used on architectures that support it, 186 * since this implementation will likely be several times slower. 187 */ 188 double 189 fma(double x, double y, double z) 190 { 191 double xs, ys, zs, adj; 192 struct dd xy, r; 193 int oround; 194 int ex, ey, ez; 195 int spread; 196 197 /* 198 * Handle special cases. The order of operations and the particular 199 * return values here are crucial in handling special cases involving 200 * infinities, NaNs, overflows, and signed zeroes correctly. 201 */ 202 if (x == 0.0 || y == 0.0) 203 return (x * y + z); 204 if (z == 0.0) 205 return (x * y); 206 if (!isfinite(x) || !isfinite(y)) 207 return (x * y + z); 208 if (!isfinite(z)) 209 return (z); 210 211 xs = frexp(x, &ex); 212 ys = frexp(y, &ey); 213 zs = frexp(z, &ez); 214 oround = fegetround(); 215 spread = ex + ey - ez; 216 217 /* 218 * If x * y and z are many orders of magnitude apart, the scaling 219 * will overflow, so we handle these cases specially. Rounding 220 * modes other than FE_TONEAREST are painful. 221 */ 222 if (spread < -DBL_MANT_DIG) { 223 feraiseexcept(FE_INEXACT); 224 if (!isnormal(z)) 225 feraiseexcept(FE_UNDERFLOW); 226 switch (oround) { 227 case FE_TONEAREST: 228 return (z); 229 case FE_TOWARDZERO: 230 if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0)) 231 return (z); 232 else 233 return (nextafter(z, 0)); 234 case FE_DOWNWARD: 235 if ((x > 0.0) ^ (y < 0.0)) 236 return (z); 237 else 238 return (nextafter(z, -INFINITY)); 239 default: /* FE_UPWARD */ 240 if ((x > 0.0) ^ (y < 0.0)) 241 return (nextafter(z, INFINITY)); 242 else 243 return (z); 244 } 245 } 246 if (spread <= DBL_MANT_DIG * 2) 247 zs = ldexp(zs, -spread); 248 else 249 zs = copysign(DBL_MIN, zs); 250 251 fesetround(FE_TONEAREST); 252 253 /* 254 * Basic approach for round-to-nearest: 255 * 256 * (xy.hi, xy.lo) = x * y (exact) 257 * (r.hi, r.lo) = xy.hi + z (exact) 258 * adj = xy.lo + r.lo (inexact; low bit is sticky) 259 * result = r.hi + adj (correctly rounded) 260 */ 261 xy = dd_mul(xs, ys); 262 r = dd_add(xy.hi, zs); 263 264 spread = ex + ey; 265 266 if (r.hi == 0.0) { 267 /* 268 * When the addends cancel to 0, ensure that the result has 269 * the correct sign. 270 */ 271 fesetround(oround); 272 { 273 volatile double vzs = zs; /* XXX gcc CSE bug workaround */ 274 return (xy.hi + vzs + ldexp(xy.lo, spread)); 275 } 276 } 277 278 if (oround != FE_TONEAREST) { 279 /* 280 * There is no need to worry about double rounding in directed 281 * rounding modes. 282 */ 283 fesetround(oround); 284 adj = r.lo + xy.lo; 285 return (ldexp(r.hi + adj, spread)); 286 } 287 288 adj = add_adjusted(r.lo, xy.lo); 289 if (spread + ilogb(r.hi) > -1023) 290 return (ldexp(r.hi + adj, spread)); 291 else 292 return (add_and_denormalize(r.hi, adj, spread)); 293 } 294