1 /* mpfr_fma -- Floating multiply-add 2 3 Copyright 2001-2002, 2004, 2006-2016 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramba projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #include "mpfr-impl.h" 24 25 /* The fused-multiply-add (fma) of x, y and z is defined by: 26 fma(x,y,z)= x*y + z 27 */ 28 29 int 30 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z, 31 mpfr_rnd_t rnd_mode) 32 { 33 int inexact; 34 mpfr_t u; 35 MPFR_SAVE_EXPO_DECL (expo); 36 MPFR_GROUP_DECL(group); 37 38 MPFR_LOG_FUNC 39 (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg z[%Pu]=%.*Rg rnd=%d", 40 mpfr_get_prec (x), mpfr_log_prec, x, 41 mpfr_get_prec (y), mpfr_log_prec, y, 42 mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode), 43 ("s[%Pu]=%.*Rg inexact=%d", 44 mpfr_get_prec (s), mpfr_log_prec, s, inexact)); 45 46 /* particular cases */ 47 if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) || 48 MPFR_IS_SINGULAR(y) || 49 MPFR_IS_SINGULAR(z) )) 50 { 51 if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z)) 52 { 53 MPFR_SET_NAN(s); 54 MPFR_RET_NAN; 55 } 56 /* now neither x, y or z is NaN */ 57 else if (MPFR_IS_INF(x) || MPFR_IS_INF(y)) 58 { 59 /* cases Inf*0+z, 0*Inf+z, Inf-Inf */ 60 if ((MPFR_IS_ZERO(y)) || 61 (MPFR_IS_ZERO(x)) || 62 (MPFR_IS_INF(z) && 63 ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z)))) 64 { 65 MPFR_SET_NAN(s); 66 MPFR_RET_NAN; 67 } 68 else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */ 69 { 70 MPFR_SET_INF(s); 71 MPFR_SET_SAME_SIGN(s, z); 72 MPFR_RET(0); 73 } 74 else /* z is finite */ 75 { 76 MPFR_SET_INF(s); 77 MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y))); 78 MPFR_RET(0); 79 } 80 } 81 /* now x and y are finite */ 82 else if (MPFR_IS_INF(z)) 83 { 84 MPFR_SET_INF(s); 85 MPFR_SET_SAME_SIGN(s, z); 86 MPFR_RET(0); 87 } 88 else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y)) 89 { 90 if (MPFR_IS_ZERO(z)) 91 { 92 int sign_p; 93 sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) ); 94 MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ? 95 ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z)) 96 ? -1 : 1) : 97 ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z)) 98 ? 1 : -1))); 99 MPFR_SET_ZERO(s); 100 MPFR_RET(0); 101 } 102 else 103 return mpfr_set (s, z, rnd_mode); 104 } 105 else /* necessarily z is zero here */ 106 { 107 MPFR_ASSERTD(MPFR_IS_ZERO(z)); 108 return mpfr_mul (s, x, y, rnd_mode); 109 } 110 } 111 112 /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y 113 is exact, except in case of overflow or underflow. */ 114 MPFR_SAVE_EXPO_MARK (expo); 115 MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u); 116 117 if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN))) 118 { 119 /* overflow or underflow - this case is regarded as rare, thus 120 does not need to be very efficient (even if some tests below 121 could have been done earlier). 122 It is an overflow iff u is an infinity (since MPFR_RNDN was used). 123 Alternatively, we could test the overflow flag, but in this case, 124 mpfr_clear_flags would have been necessary. */ 125 126 if (MPFR_IS_INF (u)) /* overflow */ 127 { 128 MPFR_LOG_MSG (("Overflow on x*y\n", 0)); 129 130 /* Let's eliminate the obvious case where x*y and z have the 131 same sign. No possible cancellation -> real overflow. 132 Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3, 133 then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case 134 is also an overflow. */ 135 if (MPFR_SIGN (u) == MPFR_SIGN (z) || 136 MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3) 137 { 138 MPFR_GROUP_CLEAR (group); 139 MPFR_SAVE_EXPO_FREE (expo); 140 return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z)); 141 } 142 143 /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and 144 (x/4)*y does not overflow (let's recall that the result 145 is exact with an unbounded exponent range). It does not 146 underflow either, because x*y overflows and the exponent 147 range is large enough. */ 148 inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN); 149 MPFR_ASSERTN (inexact == 0); 150 inexact = mpfr_mul (u, u, y, MPFR_RNDN); 151 MPFR_ASSERTN (inexact == 0); 152 153 /* Now, we need to add z/4... But it may underflow! */ 154 { 155 mpfr_t zo4; 156 mpfr_srcptr zz; 157 MPFR_BLOCK_DECL (flags); 158 159 if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) && 160 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u)) 161 { 162 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */ 163 zz = z; 164 } 165 else 166 { 167 mpfr_init2 (zo4, MPFR_PREC (z)); 168 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ)) 169 { 170 /* The division by 4 underflowed! */ 171 MPFR_ASSERTN (0); /* TODO... */ 172 } 173 zz = zo4; 174 } 175 176 /* Let's recall that u = x*y/4 and zz = z/4 (or z if the 177 following addition would give the same result). */ 178 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode)); 179 /* u and zz have different signs, so that an overflow 180 is not possible. But an underflow is theoretically 181 possible! */ 182 if (MPFR_UNDERFLOW (flags)) 183 { 184 MPFR_ASSERTN (zz != z); 185 MPFR_ASSERTN (0); /* TODO... */ 186 mpfr_clears (zo4, u, (mpfr_ptr) 0); 187 } 188 else 189 { 190 int inex2; 191 192 if (zz != z) 193 mpfr_clear (zo4); 194 MPFR_GROUP_CLEAR (group); 195 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); 196 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode); 197 if (inex2) /* overflow */ 198 { 199 inexact = inex2; 200 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 201 } 202 goto end; 203 } 204 } 205 } 206 else /* underflow: one has |xy| < 2^(emin-1). */ 207 { 208 unsigned long scale = 0; 209 mpfr_t scaled_z; 210 mpfr_srcptr new_z; 211 mpfr_exp_t diffexp; 212 mpfr_prec_t pzs; 213 int xy_underflows; 214 215 MPFR_LOG_MSG (("Underflow on x*y\n", 0)); 216 217 /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin 218 (the + 1 on MPFR_PREC (s) is necessary because the exponent 219 of the result can be EXP(z) - 1). */ 220 diffexp = MPFR_GET_EXP (z) - __gmpfr_emin; 221 pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1); 222 MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d pzs=%Pd\n", 223 diffexp, pzs)); 224 if (diffexp <= pzs) 225 { 226 mpfr_uexp_t uscale; 227 mpfr_t scaled_v; 228 MPFR_BLOCK_DECL (flags); 229 230 uscale = (mpfr_uexp_t) pzs - diffexp + 1; 231 MPFR_ASSERTN (uscale > 0); 232 MPFR_ASSERTN (uscale <= ULONG_MAX); 233 scale = uscale; 234 mpfr_init2 (scaled_z, MPFR_PREC (z)); 235 inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN); 236 MPFR_ASSERTN (inexact == 0); /* TODO: overflow case */ 237 new_z = scaled_z; 238 /* Now we need to recompute u = xy * 2^scale. */ 239 MPFR_BLOCK (flags, 240 if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y)) 241 { 242 mpfr_init2 (scaled_v, MPFR_PREC (x)); 243 mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN); 244 mpfr_mul (u, scaled_v, y, MPFR_RNDN); 245 } 246 else 247 { 248 mpfr_init2 (scaled_v, MPFR_PREC (y)); 249 mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN); 250 mpfr_mul (u, x, scaled_v, MPFR_RNDN); 251 }); 252 mpfr_clear (scaled_v); 253 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); 254 xy_underflows = MPFR_UNDERFLOW (flags); 255 } 256 else 257 { 258 new_z = z; 259 xy_underflows = 1; 260 } 261 262 MPFR_LOG_MSG (("scale=%lu xy_underflows=%d\n", 263 scale, xy_underflows)); 264 265 if (xy_underflows) 266 { 267 /* Let's replace xy by sign(xy) * 2^(emin-1). */ 268 MPFR_PREC (u) = MPFR_PREC_MIN; 269 mpfr_setmin (u, __gmpfr_emin); 270 MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x), 271 MPFR_SIGN (y))); 272 } 273 274 { 275 MPFR_BLOCK_DECL (flags); 276 277 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode)); 278 MPFR_LOG_MSG (("inexact=%d\n", inexact)); 279 MPFR_GROUP_CLEAR (group); 280 if (scale != 0) 281 { 282 int inex2; 283 284 mpfr_clear (scaled_z); 285 /* Here an overflow is theoretically possible, in which case 286 the result may be wrong, hence the assert. An underflow 287 is not possible, but let's check that anyway. */ 288 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); /* TODO... */ 289 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags)); /* not possible */ 290 if (rnd_mode == MPFR_RNDN && 291 MPFR_GET_EXP (s) == __gmpfr_emin - 1 + scale && 292 mpfr_powerof2_raw (s)) 293 { 294 MPFR_LOG_MSG (("Double rounding\n", 0)); 295 rnd_mode = (MPFR_IS_NEG (s) ? inexact <= 0 : inexact >= 0) 296 ? MPFR_RNDZ : MPFR_RNDA; 297 } 298 inex2 = mpfr_div_2ui (s, s, scale, rnd_mode); 299 MPFR_LOG_MSG (("inex2=%d\n", inex2)); 300 if (inex2) /* underflow */ 301 inexact = inex2; 302 } 303 } 304 305 /* FIXME/TODO: I'm not sure that the following is correct. 306 Check for possible spurious exceptions due to intermediate 307 computations. */ 308 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 309 goto end; 310 } 311 } 312 313 inexact = mpfr_add (s, u, z, rnd_mode); 314 MPFR_GROUP_CLEAR (group); 315 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 316 end: 317 MPFR_SAVE_EXPO_FREE (expo); 318 return mpfr_check_range (s, inexact, rnd_mode); 319 } 320