1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License as published by the Free 7 Software Foundation; either version 3, or (at your option) any later 8 version. 9 10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 /***************************************************************************** 25 * BID64 fma 26 ***************************************************************************** 27 * 28 * Algorithm description: 29 * 30 * if multiplication is guranteed exact (short coefficients) 31 * call the unpacked arg. equivalent of bid64_add(x*y, z) 32 * else 33 * get full coefficient_x*coefficient_y product 34 * call subroutine to perform addition of 64-bit argument 35 * to 128-bit product 36 * 37 ****************************************************************************/ 38 39 #include "bid_inline_add.h" 40 41 #if DECIMAL_CALL_BY_REFERENCE 42 extern void bid64_mul (UINT64 * pres, UINT64 * px, 43 UINT64 * 44 py _RND_MODE_PARAM _EXC_FLAGS_PARAM 45 _EXC_MASKS_PARAM _EXC_INFO_PARAM); 46 #else 47 48 extern UINT64 bid64_mul (UINT64 x, 49 UINT64 y _RND_MODE_PARAM 50 _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 51 _EXC_INFO_PARAM); 52 #endif 53 54 #if DECIMAL_CALL_BY_REFERENCE 55 56 void 57 bid64_fma (UINT64 * pres, UINT64 * px, UINT64 * py, 58 UINT64 * 59 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 60 _EXC_INFO_PARAM) { 61 UINT64 x, y, z; 62 #else 63 64 UINT64 65 bid64_fma (UINT64 x, UINT64 y, 66 UINT64 z _RND_MODE_PARAM _EXC_FLAGS_PARAM 67 _EXC_MASKS_PARAM _EXC_INFO_PARAM) { 68 #endif 69 UINT128 P, PU, CT, CZ; 70 UINT64 sign_x, sign_y, coefficient_x, coefficient_y, sign_z, 71 coefficient_z; 72 UINT64 C64, remainder_y, res; 73 UINT64 CYh, CY0L, T, valid_x, valid_y, valid_z; 74 int_double tempx, tempy; 75 int extra_digits, exponent_x, exponent_y, bin_expon_cx, bin_expon_cy, 76 bin_expon_product, rmode; 77 int digits_p, bp, final_exponent, exponent_z, digits_z, ez, ey, 78 scale_z, uf_status; 79 80 #if DECIMAL_CALL_BY_REFERENCE 81 #if !DECIMAL_GLOBAL_ROUNDING 82 _IDEC_round rnd_mode = *prnd_mode; 83 #endif 84 x = *px; 85 y = *py; 86 z = *pz; 87 #endif 88 89 valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x); 90 valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y); 91 valid_z = unpack_BID64 (&sign_z, &exponent_z, &coefficient_z, z); 92 93 // unpack arguments, check for NaN, Infinity, or 0 94 if (!valid_x || !valid_y || !valid_z) { 95 96 if ((y & MASK_NAN) == MASK_NAN) { // y is NAN 97 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) 98 // check first for non-canonical NaN payload 99 y = y & 0xfe03ffffffffffffull; // clear G6-G12 100 if ((y & 0x0003ffffffffffffull) > 999999999999999ull) { 101 y = y & 0xfe00000000000000ull; // clear G6-G12 and the payload bits 102 } 103 if ((y & MASK_SNAN) == MASK_SNAN) { // y is SNAN 104 // set invalid flag 105 *pfpsf |= INVALID_EXCEPTION; 106 // return quiet (y) 107 res = y & 0xfdffffffffffffffull; 108 } else { // y is QNaN 109 // return y 110 res = y; 111 // if z = SNaN or x = SNaN signal invalid exception 112 if ((z & MASK_SNAN) == MASK_SNAN 113 || (x & MASK_SNAN) == MASK_SNAN) { 114 // set invalid flag 115 *pfpsf |= INVALID_EXCEPTION; 116 } 117 } 118 BID_RETURN (res) 119 } else if ((z & MASK_NAN) == MASK_NAN) { // z is NAN 120 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) 121 // check first for non-canonical NaN payload 122 z = z & 0xfe03ffffffffffffull; // clear G6-G12 123 if ((z & 0x0003ffffffffffffull) > 999999999999999ull) { 124 z = z & 0xfe00000000000000ull; // clear G6-G12 and the payload bits 125 } 126 if ((z & MASK_SNAN) == MASK_SNAN) { // z is SNAN 127 // set invalid flag 128 *pfpsf |= INVALID_EXCEPTION; 129 // return quiet (z) 130 res = z & 0xfdffffffffffffffull; 131 } else { // z is QNaN 132 // return z 133 res = z; 134 // if x = SNaN signal invalid exception 135 if ((x & MASK_SNAN) == MASK_SNAN) { 136 // set invalid flag 137 *pfpsf |= INVALID_EXCEPTION; 138 } 139 } 140 BID_RETURN (res) 141 } else if ((x & MASK_NAN) == MASK_NAN) { // x is NAN 142 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) 143 // check first for non-canonical NaN payload 144 x = x & 0xfe03ffffffffffffull; // clear G6-G12 145 if ((x & 0x0003ffffffffffffull) > 999999999999999ull) { 146 x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits 147 } 148 if ((x & MASK_SNAN) == MASK_SNAN) { // x is SNAN 149 // set invalid flag 150 *pfpsf |= INVALID_EXCEPTION; 151 // return quiet (x) 152 res = x & 0xfdffffffffffffffull; 153 } else { // x is QNaN 154 // return x 155 res = x; // clear out G[6]-G[16] 156 } 157 BID_RETURN (res) 158 } 159 160 if (!valid_x) { 161 // x is Inf. or 0 162 163 // x is Infinity? 164 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) { 165 // check if y is 0 166 if (!coefficient_y) { 167 // y==0, return NaN 168 #ifdef SET_STATUS_FLAGS 169 if ((z & 0x7e00000000000000ull) != 0x7c00000000000000ull) 170 __set_status_flags (pfpsf, INVALID_EXCEPTION); 171 #endif 172 BID_RETURN (0x7c00000000000000ull); 173 } 174 // test if z is Inf of oposite sign 175 if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull) 176 && (((x ^ y) ^ z) & 0x8000000000000000ull)) { 177 // return NaN 178 #ifdef SET_STATUS_FLAGS 179 __set_status_flags (pfpsf, INVALID_EXCEPTION); 180 #endif 181 BID_RETURN (0x7c00000000000000ull); 182 } 183 // otherwise return +/-Inf 184 BID_RETURN (((x ^ y) & 0x8000000000000000ull) | 185 0x7800000000000000ull); 186 } 187 // x is 0 188 if (((y & 0x7800000000000000ull) != 0x7800000000000000ull) 189 && ((z & 0x7800000000000000ull) != 0x7800000000000000ull)) { 190 191 if (coefficient_z) { 192 exponent_y = exponent_x - DECIMAL_EXPONENT_BIAS + exponent_y; 193 194 sign_z = z & 0x8000000000000000ull; 195 196 if (exponent_y >= exponent_z) 197 BID_RETURN (z); 198 res = 199 add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z, 200 &rnd_mode, pfpsf); 201 BID_RETURN (res); 202 } 203 } 204 } 205 if (!valid_y) { 206 // y is Inf. or 0 207 208 // y is Infinity? 209 if ((y & 0x7800000000000000ull) == 0x7800000000000000ull) { 210 // check if x is 0 211 if (!coefficient_x) { 212 // y==0, return NaN 213 #ifdef SET_STATUS_FLAGS 214 __set_status_flags (pfpsf, INVALID_EXCEPTION); 215 #endif 216 BID_RETURN (0x7c00000000000000ull); 217 } 218 // test if z is Inf of oposite sign 219 if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull) 220 && (((x ^ y) ^ z) & 0x8000000000000000ull)) { 221 #ifdef SET_STATUS_FLAGS 222 __set_status_flags (pfpsf, INVALID_EXCEPTION); 223 #endif 224 // return NaN 225 BID_RETURN (0x7c00000000000000ull); 226 } 227 // otherwise return +/-Inf 228 BID_RETURN (((x ^ y) & 0x8000000000000000ull) | 229 0x7800000000000000ull); 230 } 231 // y is 0 232 if (((z & 0x7800000000000000ull) != 0x7800000000000000ull)) { 233 234 if (coefficient_z) { 235 exponent_y += exponent_x - DECIMAL_EXPONENT_BIAS; 236 237 sign_z = z & 0x8000000000000000ull; 238 239 if (exponent_y >= exponent_z) 240 BID_RETURN (z); 241 res = 242 add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z, 243 &rnd_mode, pfpsf); 244 BID_RETURN (res); 245 } 246 } 247 } 248 249 if (!valid_z) { 250 // y is Inf. or 0 251 252 // test if y is NaN/Inf 253 if ((z & 0x7800000000000000ull) == 0x7800000000000000ull) { 254 BID_RETURN (coefficient_z & QUIET_MASK64); 255 } 256 // z is 0, return x*y 257 if ((!coefficient_x) || (!coefficient_y)) { 258 //0+/-0 259 exponent_x += exponent_y - DECIMAL_EXPONENT_BIAS; 260 if (exponent_x > DECIMAL_MAX_EXPON_64) 261 exponent_x = DECIMAL_MAX_EXPON_64; 262 else if (exponent_x < 0) 263 exponent_x = 0; 264 if (exponent_x <= exponent_z) 265 res = ((UINT64) exponent_x) << 53; 266 else 267 res = ((UINT64) exponent_z) << 53; 268 if ((sign_x ^ sign_y) == sign_z) 269 res |= sign_z; 270 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 271 #ifndef IEEE_ROUND_NEAREST 272 else if (rnd_mode == ROUNDING_DOWN) 273 res |= 0x8000000000000000ull; 274 #endif 275 #endif 276 BID_RETURN (res); 277 } 278 } 279 } 280 281 /* get binary coefficients of x and y */ 282 283 //--- get number of bits in the coefficients of x and y --- 284 // version 2 (original) 285 tempx.d = (double) coefficient_x; 286 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52); 287 288 tempy.d = (double) coefficient_y; 289 bin_expon_cy = ((tempy.i & MASK_BINARY_EXPONENT) >> 52); 290 291 // magnitude estimate for coefficient_x*coefficient_y is 292 // 2^(unbiased_bin_expon_cx + unbiased_bin_expon_cx) 293 bin_expon_product = bin_expon_cx + bin_expon_cy; 294 295 // check if coefficient_x*coefficient_y<2^(10*k+3) 296 // equivalent to unbiased_bin_expon_cx + unbiased_bin_expon_cx < 10*k+1 297 if (bin_expon_product < UPPER_EXPON_LIMIT + 2 * BINARY_EXPONENT_BIAS) { 298 // easy multiply 299 C64 = coefficient_x * coefficient_y; 300 final_exponent = exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS; 301 if ((final_exponent > 0) || (!coefficient_z)) { 302 res = 303 get_add64 (sign_x ^ sign_y, 304 final_exponent, C64, sign_z, exponent_z, coefficient_z, rnd_mode, pfpsf); 305 BID_RETURN (res); 306 } else { 307 P.w[0] = C64; 308 P.w[1] = 0; 309 extra_digits = 0; 310 } 311 } else { 312 if (!coefficient_z) { 313 #if DECIMAL_CALL_BY_REFERENCE 314 bid64_mul (&res, px, 315 py _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 316 _EXC_INFO_ARG); 317 #else 318 res = 319 bid64_mul (x, 320 y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 321 _EXC_INFO_ARG); 322 #endif 323 BID_RETURN (res); 324 } 325 // get 128-bit product: coefficient_x*coefficient_y 326 __mul_64x64_to_128 (P, coefficient_x, coefficient_y); 327 328 // tighten binary range of P: leading bit is 2^bp 329 // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1 330 bin_expon_product -= 2 * BINARY_EXPONENT_BIAS; 331 __tight_bin_range_128 (bp, P, bin_expon_product); 332 333 // get number of decimal digits in the product 334 digits_p = estimate_decimal_digits[bp]; 335 if (!(__unsigned_compare_gt_128 (power10_table_128[digits_p], P))) 336 digits_p++; // if power10_table_128[digits_p] <= P 337 338 // determine number of decimal digits to be rounded out 339 extra_digits = digits_p - MAX_FORMAT_DIGITS; 340 final_exponent = 341 exponent_x + exponent_y + extra_digits - DECIMAL_EXPONENT_BIAS; 342 } 343 344 if (((unsigned) final_exponent) >= 3 * 256) { 345 if (final_exponent < 0) { 346 //--- get number of bits in the coefficients of z --- 347 tempx.d = (double) coefficient_z; 348 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 349 // get number of decimal digits in the coeff_x 350 digits_z = estimate_decimal_digits[bin_expon_cx]; 351 if (coefficient_z >= power10_table_128[digits_z].w[0]) 352 digits_z++; 353 // underflow 354 if ((final_exponent + 16 < 0) 355 || (exponent_z + digits_z > 33 + final_exponent)) { 356 res = 357 BID_normalize (sign_z, exponent_z, coefficient_z, 358 sign_x ^ sign_y, 1, rnd_mode, pfpsf); 359 BID_RETURN (res); 360 } 361 362 ez = exponent_z + digits_z - 16; 363 if (ez < 0) 364 ez = 0; 365 scale_z = exponent_z - ez; 366 coefficient_z *= power10_table_128[scale_z].w[0]; 367 ey = final_exponent - extra_digits; 368 extra_digits = ez - ey; 369 if (extra_digits > 33) { 370 res = 371 BID_normalize (sign_z, exponent_z, coefficient_z, 372 sign_x ^ sign_y, 1, rnd_mode, pfpsf); 373 BID_RETURN (res); 374 } 375 //else // extra_digits<=32 376 377 if (extra_digits > 17) { 378 CYh = __truncate (P, 16); 379 // get remainder 380 T = power10_table_128[16].w[0]; 381 __mul_64x64_to_64 (CY0L, CYh, T); 382 remainder_y = P.w[0] - CY0L; 383 384 extra_digits -= 16; 385 P.w[0] = CYh; 386 P.w[1] = 0; 387 } else 388 remainder_y = 0; 389 390 // align coeff_x, CYh 391 __mul_64x64_to_128 (CZ, coefficient_z, 392 power10_table_128[extra_digits].w[0]); 393 394 if (sign_z == (sign_y ^ sign_x)) { 395 __add_128_128 (CT, CZ, P); 396 if (__unsigned_compare_ge_128 397 (CT, power10_table_128[16 + extra_digits])) { 398 extra_digits++; 399 ez++; 400 } 401 } else { 402 if (remainder_y && (__unsigned_compare_ge_128 (CZ, P))) { 403 P.w[0]++; 404 if (!P.w[0]) 405 P.w[1]++; 406 } 407 __sub_128_128 (CT, CZ, P); 408 if (((SINT64) CT.w[1]) < 0) { 409 sign_z = sign_y ^ sign_x; 410 CT.w[0] = 0 - CT.w[0]; 411 CT.w[1] = 0 - CT.w[1]; 412 if (CT.w[0]) 413 CT.w[1]--; 414 } else if(!(CT.w[1]|CT.w[0])) 415 sign_z = (rnd_mode!=ROUNDING_DOWN)? 0: 0x8000000000000000ull; 416 if (ez 417 && 418 (__unsigned_compare_gt_128 419 (power10_table_128[15 + extra_digits], CT))) { 420 extra_digits--; 421 ez--; 422 } 423 } 424 425 #ifdef SET_STATUS_FLAGS 426 uf_status = 0; 427 if ((!ez) 428 && 429 __unsigned_compare_gt_128 (power10_table_128 430 [extra_digits + 15], CT)) { 431 rmode = rnd_mode; 432 if (sign_z && (unsigned) (rmode - 1) < 2) 433 rmode = 3 - rmode; 434 //__add_128_64(PU, CT, round_const_table[rmode][extra_digits]); 435 PU = power10_table_128[extra_digits + 15]; 436 PU.w[0]--; 437 if (__unsigned_compare_gt_128 (PU, CT) 438 || (rmode == ROUNDING_DOWN) 439 || (rmode == ROUNDING_TO_ZERO)) 440 uf_status = UNDERFLOW_EXCEPTION; 441 else if (extra_digits < 2) { 442 if ((rmode == ROUNDING_UP)) { 443 if (!extra_digits) 444 uf_status = UNDERFLOW_EXCEPTION; 445 else { 446 if (remainder_y && (sign_z != (sign_y ^ sign_x))) 447 remainder_y = power10_table_128[16].w[0] - remainder_y; 448 449 if (power10_table_128[15].w[0] > remainder_y) 450 uf_status = UNDERFLOW_EXCEPTION; 451 } 452 } else // RN or RN_away 453 { 454 if (remainder_y && (sign_z != (sign_y ^ sign_x))) 455 remainder_y = power10_table_128[16].w[0] - remainder_y; 456 457 if (!extra_digits) { 458 remainder_y += round_const_table[rmode][15]; 459 if (remainder_y < power10_table_128[16].w[0]) 460 uf_status = UNDERFLOW_EXCEPTION; 461 } else { 462 if (remainder_y < round_const_table[rmode][16]) 463 uf_status = UNDERFLOW_EXCEPTION; 464 } 465 } 466 //__set_status_flags (pfpsf, uf_status); 467 } 468 } 469 #endif 470 res = 471 __bid_full_round64_remainder (sign_z, ez - extra_digits, CT, 472 extra_digits, remainder_y, 473 rnd_mode, pfpsf, uf_status); 474 BID_RETURN (res); 475 476 } else { 477 if ((sign_z == (sign_x ^ sign_y)) 478 || (final_exponent > 3 * 256 + 15)) { 479 res = 480 fast_get_BID64_check_OF (sign_x ^ sign_y, final_exponent, 481 1000000000000000ull, rnd_mode, 482 pfpsf); 483 BID_RETURN (res); 484 } 485 } 486 } 487 488 489 if (extra_digits > 0) { 490 res = 491 get_add128 (sign_z, exponent_z, coefficient_z, sign_x ^ sign_y, 492 final_exponent, P, extra_digits, rnd_mode, pfpsf); 493 BID_RETURN (res); 494 } 495 // go to convert_format and exit 496 else { 497 C64 = __low_64 (P); 498 499 res = 500 get_add64 (sign_x ^ sign_y, 501 exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS, C64, 502 sign_z, exponent_z, coefficient_z, 503 rnd_mode, pfpsf); 504 BID_RETURN (res); 505 } 506 } 507