1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License as published by the Free 7 Software Foundation; either version 3, or (at your option) any later 8 version. 9 10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 /***************************************************************************** 25 * BID64 add 26 ***************************************************************************** 27 * 28 * Algorithm description: 29 * 30 * if(exponent_a < exponent_b) 31 * switch a, b 32 * diff_expon = exponent_a - exponent_b 33 * if(diff_expon > 16) 34 * return normalize(a) 35 * if(coefficient_a*10^diff_expon guaranteed below 2^62) 36 * S = sign_a*coefficient_a*10^diff_expon + sign_b*coefficient_b 37 * if(|S|<10^16) 38 * return get_BID64(sign(S),exponent_b,|S|) 39 * else 40 * determine number of extra digits in S (1, 2, or 3) 41 * return rounded result 42 * else // large exponent difference 43 * if(number_digits(coefficient_a*10^diff_expon) +/- 10^16) 44 * guaranteed the same as 45 * number_digits(coefficient_a*10^diff_expon) ) 46 * S = normalize(coefficient_a + (sign_a^sign_b)*10^(16-diff_expon)) 47 * corr = 10^16 + (sign_a^sign_b)*coefficient_b 48 * corr*10^exponent_b is rounded so it aligns with S*10^exponent_S 49 * return get_BID64(sign_a,exponent(S),S+rounded(corr)) 50 * else 51 * add sign_a*coefficient_a*10^diff_expon, sign_b*coefficient_b 52 * in 128-bit integer arithmetic, then round to 16 decimal digits 53 * 54 * 55 ****************************************************************************/ 56 57 #include "bid_internal.h" 58 59 #if DECIMAL_CALL_BY_REFERENCE 60 void bid64_add (UINT64 * pres, UINT64 * px, 61 UINT64 * 62 py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 63 _EXC_INFO_PARAM); 64 #else 65 UINT64 bid64_add (UINT64 x, 66 UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM 67 _EXC_MASKS_PARAM _EXC_INFO_PARAM); 68 #endif 69 70 #if DECIMAL_CALL_BY_REFERENCE 71 72 void 73 bid64_sub (UINT64 * pres, UINT64 * px, 74 UINT64 * 75 py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 76 _EXC_INFO_PARAM) { 77 UINT64 y = *py; 78 #if !DECIMAL_GLOBAL_ROUNDING 79 _IDEC_round rnd_mode = *prnd_mode; 80 #endif 81 // check if y is not NaN 82 if (((y & NAN_MASK64) != NAN_MASK64)) 83 y ^= 0x8000000000000000ull; 84 bid64_add (pres, px, 85 &y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 86 _EXC_INFO_ARG); 87 } 88 #else 89 90 UINT64 91 bid64_sub (UINT64 x, 92 UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM 93 _EXC_MASKS_PARAM _EXC_INFO_PARAM) { 94 // check if y is not NaN 95 if (((y & NAN_MASK64) != NAN_MASK64)) 96 y ^= 0x8000000000000000ull; 97 98 return bid64_add (x, 99 y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 100 _EXC_INFO_ARG); 101 } 102 #endif 103 104 105 106 #if DECIMAL_CALL_BY_REFERENCE 107 108 void 109 bid64_add (UINT64 * pres, UINT64 * px, 110 UINT64 * 111 py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 112 _EXC_INFO_PARAM) { 113 UINT64 x, y; 114 #else 115 116 UINT64 117 bid64_add (UINT64 x, 118 UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM 119 _EXC_MASKS_PARAM _EXC_INFO_PARAM) { 120 #endif 121 122 UINT128 CA, CT, CT_new; 123 UINT64 sign_x, sign_y, coefficient_x, coefficient_y, C64_new; 124 UINT64 valid_x, valid_y; 125 UINT64 res; 126 UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab, 127 rem_a; 128 UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp; 129 int_double tempx; 130 int exponent_x, exponent_y, exponent_a, exponent_b, diff_dec_expon; 131 int bin_expon_ca, extra_digits, amount, scale_k, scale_ca; 132 unsigned rmode, status; 133 134 #if DECIMAL_CALL_BY_REFERENCE 135 #if !DECIMAL_GLOBAL_ROUNDING 136 _IDEC_round rnd_mode = *prnd_mode; 137 #endif 138 x = *px; 139 y = *py; 140 #endif 141 142 valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x); 143 valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y); 144 145 // unpack arguments, check for NaN or Infinity 146 if (!valid_x) { 147 // x is Inf. or NaN 148 149 // test if x is NaN 150 if ((x & NAN_MASK64) == NAN_MASK64) { 151 #ifdef SET_STATUS_FLAGS 152 if (((x & SNAN_MASK64) == SNAN_MASK64) // sNaN 153 || ((y & SNAN_MASK64) == SNAN_MASK64)) 154 __set_status_flags (pfpsf, INVALID_EXCEPTION); 155 #endif 156 res = coefficient_x & QUIET_MASK64; 157 BID_RETURN (res); 158 } 159 // x is Infinity? 160 if ((x & INFINITY_MASK64) == INFINITY_MASK64) { 161 // check if y is Inf 162 if (((y & NAN_MASK64) == INFINITY_MASK64)) { 163 if (sign_x == (y & 0x8000000000000000ull)) { 164 res = coefficient_x; 165 BID_RETURN (res); 166 } 167 // return NaN 168 { 169 #ifdef SET_STATUS_FLAGS 170 __set_status_flags (pfpsf, INVALID_EXCEPTION); 171 #endif 172 res = NAN_MASK64; 173 BID_RETURN (res); 174 } 175 } 176 // check if y is NaN 177 if (((y & NAN_MASK64) == NAN_MASK64)) { 178 res = coefficient_y & QUIET_MASK64; 179 #ifdef SET_STATUS_FLAGS 180 if (((y & SNAN_MASK64) == SNAN_MASK64)) 181 __set_status_flags (pfpsf, INVALID_EXCEPTION); 182 #endif 183 BID_RETURN (res); 184 } 185 // otherwise return +/-Inf 186 { 187 res = coefficient_x; 188 BID_RETURN (res); 189 } 190 } 191 // x is 0 192 { 193 if (((y & INFINITY_MASK64) != INFINITY_MASK64) && coefficient_y) { 194 if (exponent_y <= exponent_x) { 195 res = y; 196 BID_RETURN (res); 197 } 198 } 199 } 200 201 } 202 if (!valid_y) { 203 // y is Inf. or NaN? 204 if (((y & INFINITY_MASK64) == INFINITY_MASK64)) { 205 #ifdef SET_STATUS_FLAGS 206 if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN 207 __set_status_flags (pfpsf, INVALID_EXCEPTION); 208 #endif 209 res = coefficient_y & QUIET_MASK64; 210 BID_RETURN (res); 211 } 212 // y is 0 213 if (!coefficient_x) { // x==0 214 if (exponent_x <= exponent_y) 215 res = ((UINT64) exponent_x) << 53; 216 else 217 res = ((UINT64) exponent_y) << 53; 218 if (sign_x == sign_y) 219 res |= sign_x; 220 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 221 #ifndef IEEE_ROUND_NEAREST 222 if (rnd_mode == ROUNDING_DOWN && sign_x != sign_y) 223 res |= 0x8000000000000000ull; 224 #endif 225 #endif 226 BID_RETURN (res); 227 } else if (exponent_y >= exponent_x) { 228 res = x; 229 BID_RETURN (res); 230 } 231 } 232 // sort arguments by exponent 233 if (exponent_x < exponent_y) { 234 sign_a = sign_y; 235 exponent_a = exponent_y; 236 coefficient_a = coefficient_y; 237 sign_b = sign_x; 238 exponent_b = exponent_x; 239 coefficient_b = coefficient_x; 240 } else { 241 sign_a = sign_x; 242 exponent_a = exponent_x; 243 coefficient_a = coefficient_x; 244 sign_b = sign_y; 245 exponent_b = exponent_y; 246 coefficient_b = coefficient_y; 247 } 248 249 // exponent difference 250 diff_dec_expon = exponent_a - exponent_b; 251 252 /* get binary coefficients of x and y */ 253 254 //--- get number of bits in the coefficients of x and y --- 255 256 // version 2 (original) 257 tempx.d = (double) coefficient_a; 258 bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 259 260 if (diff_dec_expon > MAX_FORMAT_DIGITS) { 261 // normalize a to a 16-digit coefficient 262 263 scale_ca = estimate_decimal_digits[bin_expon_ca]; 264 if (coefficient_a >= power10_table_128[scale_ca].w[0]) 265 scale_ca++; 266 267 scale_k = 16 - scale_ca; 268 269 coefficient_a *= power10_table_128[scale_k].w[0]; 270 271 diff_dec_expon -= scale_k; 272 exponent_a -= scale_k; 273 274 /* get binary coefficients of x and y */ 275 276 //--- get number of bits in the coefficients of x and y --- 277 tempx.d = (double) coefficient_a; 278 bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 279 280 if (diff_dec_expon > MAX_FORMAT_DIGITS) { 281 #ifdef SET_STATUS_FLAGS 282 if (coefficient_b) { 283 __set_status_flags (pfpsf, INEXACT_EXCEPTION); 284 } 285 #endif 286 287 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 288 #ifndef IEEE_ROUND_NEAREST 289 if (((rnd_mode) & 3) && coefficient_b) // not ROUNDING_TO_NEAREST 290 { 291 switch (rnd_mode) { 292 case ROUNDING_DOWN: 293 if (sign_b) { 294 coefficient_a -= ((((SINT64) sign_a) >> 63) | 1); 295 if (coefficient_a < 1000000000000000ull) { 296 exponent_a--; 297 coefficient_a = 9999999999999999ull; 298 } else if (coefficient_a >= 10000000000000000ull) { 299 exponent_a++; 300 coefficient_a = 1000000000000000ull; 301 } 302 } 303 break; 304 case ROUNDING_UP: 305 if (!sign_b) { 306 coefficient_a += ((((SINT64) sign_a) >> 63) | 1); 307 if (coefficient_a < 1000000000000000ull) { 308 exponent_a--; 309 coefficient_a = 9999999999999999ull; 310 } else if (coefficient_a >= 10000000000000000ull) { 311 exponent_a++; 312 coefficient_a = 1000000000000000ull; 313 } 314 } 315 break; 316 default: // RZ 317 if (sign_a != sign_b) { 318 coefficient_a--; 319 if (coefficient_a < 1000000000000000ull) { 320 exponent_a--; 321 coefficient_a = 9999999999999999ull; 322 } 323 } 324 break; 325 } 326 } else 327 #endif 328 #endif 329 // check special case here 330 if ((coefficient_a == 1000000000000000ull) 331 && (diff_dec_expon == MAX_FORMAT_DIGITS + 1) 332 && (sign_a ^ sign_b) 333 && (coefficient_b > 5000000000000000ull)) { 334 coefficient_a = 9999999999999999ull; 335 exponent_a--; 336 } 337 338 res = 339 fast_get_BID64_check_OF (sign_a, exponent_a, coefficient_a, 340 rnd_mode, pfpsf); 341 BID_RETURN (res); 342 } 343 } 344 // test whether coefficient_a*10^(exponent_a-exponent_b) may exceed 2^62 345 if (bin_expon_ca + estimate_bin_expon[diff_dec_expon] < 60) { 346 // coefficient_a*10^(exponent_a-exponent_b)<2^63 347 348 // multiply by 10^(exponent_a-exponent_b) 349 coefficient_a *= power10_table_128[diff_dec_expon].w[0]; 350 351 // sign mask 352 sign_b = ((SINT64) sign_b) >> 63; 353 // apply sign to coeff. of b 354 coefficient_b = (coefficient_b + sign_b) ^ sign_b; 355 356 // apply sign to coefficient a 357 sign_a = ((SINT64) sign_a) >> 63; 358 coefficient_a = (coefficient_a + sign_a) ^ sign_a; 359 360 coefficient_a += coefficient_b; 361 // get sign 362 sign_s = ((SINT64) coefficient_a) >> 63; 363 coefficient_a = (coefficient_a + sign_s) ^ sign_s; 364 sign_s &= 0x8000000000000000ull; 365 366 // coefficient_a < 10^16 ? 367 if (coefficient_a < power10_table_128[MAX_FORMAT_DIGITS].w[0]) { 368 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 369 #ifndef IEEE_ROUND_NEAREST 370 if (rnd_mode == ROUNDING_DOWN && (!coefficient_a) 371 && sign_a != sign_b) 372 sign_s = 0x8000000000000000ull; 373 #endif 374 #endif 375 res = very_fast_get_BID64 (sign_s, exponent_b, coefficient_a); 376 BID_RETURN (res); 377 } 378 // otherwise rounding is necessary 379 380 // already know coefficient_a<10^19 381 // coefficient_a < 10^17 ? 382 if (coefficient_a < power10_table_128[17].w[0]) 383 extra_digits = 1; 384 else if (coefficient_a < power10_table_128[18].w[0]) 385 extra_digits = 2; 386 else 387 extra_digits = 3; 388 389 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 390 #ifndef IEEE_ROUND_NEAREST 391 rmode = rnd_mode; 392 if (sign_s && (unsigned) (rmode - 1) < 2) 393 rmode = 3 - rmode; 394 #else 395 rmode = 0; 396 #endif 397 #else 398 rmode = 0; 399 #endif 400 coefficient_a += round_const_table[rmode][extra_digits]; 401 402 // get P*(2^M[extra_digits])/10^extra_digits 403 __mul_64x64_to_128 (CT, coefficient_a, 404 reciprocals10_64[extra_digits]); 405 406 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 407 amount = short_recip_scale[extra_digits]; 408 C64 = CT.w[1] >> amount; 409 410 } else { 411 // coefficient_a*10^(exponent_a-exponent_b) is large 412 sign_s = sign_a; 413 414 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 415 #ifndef IEEE_ROUND_NEAREST 416 rmode = rnd_mode; 417 if (sign_s && (unsigned) (rmode - 1) < 2) 418 rmode = 3 - rmode; 419 #else 420 rmode = 0; 421 #endif 422 #else 423 rmode = 0; 424 #endif 425 426 // check whether we can take faster path 427 scale_ca = estimate_decimal_digits[bin_expon_ca]; 428 429 sign_ab = sign_a ^ sign_b; 430 sign_ab = ((SINT64) sign_ab) >> 63; 431 432 // T1 = 10^(16-diff_dec_expon) 433 T1 = power10_table_128[16 - diff_dec_expon].w[0]; 434 435 // get number of digits in coefficient_a 436 if (coefficient_a >= power10_table_128[scale_ca].w[0]) { 437 scale_ca++; 438 } 439 440 scale_k = 16 - scale_ca; 441 442 // addition 443 saved_ca = coefficient_a - T1; 444 coefficient_a = 445 (SINT64) saved_ca *(SINT64) power10_table_128[scale_k].w[0]; 446 extra_digits = diff_dec_expon - scale_k; 447 448 // apply sign 449 saved_cb = (coefficient_b + sign_ab) ^ sign_ab; 450 // add 10^16 and rounding constant 451 coefficient_b = 452 saved_cb + 10000000000000000ull + 453 round_const_table[rmode][extra_digits]; 454 455 // get P*(2^M[extra_digits])/10^extra_digits 456 __mul_64x64_to_128 (CT, coefficient_b, 457 reciprocals10_64[extra_digits]); 458 459 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 460 amount = short_recip_scale[extra_digits]; 461 C0_64 = CT.w[1] >> amount; 462 463 // result coefficient 464 C64 = C0_64 + coefficient_a; 465 // filter out difficult (corner) cases 466 // this test ensures the number of digits in coefficient_a does not change 467 // after adding (the appropriately scaled and rounded) coefficient_b 468 if ((UINT64) (C64 - 1000000000000000ull - 1) > 469 9000000000000000ull - 2) { 470 if (C64 >= 10000000000000000ull) { 471 // result has more than 16 digits 472 if (!scale_k) { 473 // must divide coeff_a by 10 474 saved_ca = saved_ca + T1; 475 __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull); 476 //reciprocals10_64[1]); 477 coefficient_a = CA.w[1] >> 1; 478 rem_a = 479 saved_ca - (coefficient_a << 3) - (coefficient_a << 1); 480 coefficient_a = coefficient_a - T1; 481 482 saved_cb += rem_a * power10_table_128[diff_dec_expon].w[0]; 483 } else 484 coefficient_a = 485 (SINT64) (saved_ca - T1 - 486 (T1 << 3)) * (SINT64) power10_table_128[scale_k - 487 1].w[0]; 488 489 extra_digits++; 490 coefficient_b = 491 saved_cb + 100000000000000000ull + 492 round_const_table[rmode][extra_digits]; 493 494 // get P*(2^M[extra_digits])/10^extra_digits 495 __mul_64x64_to_128 (CT, coefficient_b, 496 reciprocals10_64[extra_digits]); 497 498 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 499 amount = short_recip_scale[extra_digits]; 500 C0_64 = CT.w[1] >> amount; 501 502 // result coefficient 503 C64 = C0_64 + coefficient_a; 504 } else if (C64 <= 1000000000000000ull) { 505 // less than 16 digits in result 506 coefficient_a = 507 (SINT64) saved_ca *(SINT64) power10_table_128[scale_k + 508 1].w[0]; 509 //extra_digits --; 510 exponent_b--; 511 coefficient_b = 512 (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull + 513 round_const_table[rmode][extra_digits]; 514 515 // get P*(2^M[extra_digits])/10^extra_digits 516 __mul_64x64_to_128 (CT_new, coefficient_b, 517 reciprocals10_64[extra_digits]); 518 519 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 520 amount = short_recip_scale[extra_digits]; 521 C0_64 = CT_new.w[1] >> amount; 522 523 // result coefficient 524 C64_new = C0_64 + coefficient_a; 525 if (C64_new < 10000000000000000ull) { 526 C64 = C64_new; 527 #ifdef SET_STATUS_FLAGS 528 CT = CT_new; 529 #endif 530 } else 531 exponent_b++; 532 } 533 534 } 535 536 } 537 538 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 539 #ifndef IEEE_ROUND_NEAREST 540 if (rmode == 0) //ROUNDING_TO_NEAREST 541 #endif 542 if (C64 & 1) { 543 // check whether fractional part of initial_P/10^extra_digits is 544 // exactly .5 545 // this is the same as fractional part of 546 // (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero 547 548 // get remainder 549 remainder_h = CT.w[1] << (64 - amount); 550 551 // test whether fractional part is 0 552 if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) { 553 C64--; 554 } 555 } 556 #endif 557 558 #ifdef SET_STATUS_FLAGS 559 status = INEXACT_EXCEPTION; 560 561 // get remainder 562 remainder_h = CT.w[1] << (64 - amount); 563 564 switch (rmode) { 565 case ROUNDING_TO_NEAREST: 566 case ROUNDING_TIES_AWAY: 567 // test whether fractional part is 0 568 if ((remainder_h == 0x8000000000000000ull) 569 && (CT.w[0] < reciprocals10_64[extra_digits])) 570 status = EXACT_STATUS; 571 break; 572 case ROUNDING_DOWN: 573 case ROUNDING_TO_ZERO: 574 if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) 575 status = EXACT_STATUS; 576 //if(!C64 && rmode==ROUNDING_DOWN) sign_s=sign_y; 577 break; 578 default: 579 // round up 580 __add_carry_out (tmp, carry, CT.w[0], 581 reciprocals10_64[extra_digits]); 582 if ((remainder_h >> (64 - amount)) + carry >= 583 (((UINT64) 1) << amount)) 584 status = EXACT_STATUS; 585 break; 586 } 587 __set_status_flags (pfpsf, status); 588 589 #endif 590 591 res = 592 fast_get_BID64_check_OF (sign_s, exponent_b + extra_digits, C64, 593 rnd_mode, pfpsf); 594 BID_RETURN (res); 595 } 596