1 /* Copyright (C) 2007-2022 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License as published by the Free 7 Software Foundation; either version 3, or (at your option) any later 8 version. 9 10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 /***************************************************************************** 25 * 26 * BID128 fma x * y + z 27 * 28 ****************************************************************************/ 29 30 #include "bid_internal.h" 31 32 static void 33 rounding_correction (unsigned int rnd_mode, 34 unsigned int is_inexact_lt_midpoint, 35 unsigned int is_inexact_gt_midpoint, 36 unsigned int is_midpoint_lt_even, 37 unsigned int is_midpoint_gt_even, 38 int unbexp, 39 UINT128 * ptrres, _IDEC_flags * ptrfpsf) { 40 // unbiased true exponent unbexp may be larger than emax 41 42 UINT128 res = *ptrres; // expected to have the correct sign and coefficient 43 // (the exponent field is ignored, as unbexp is used instead) 44 UINT64 sign, exp; 45 UINT64 C_hi, C_lo; 46 47 // general correction from RN to RA, RM, RP, RZ 48 // Note: if the result is negative, then is_inexact_lt_midpoint, 49 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even 50 // have to be considered as if determined for the absolute value of the 51 // result (so they seem to be reversed) 52 53 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 54 is_midpoint_lt_even || is_midpoint_gt_even) { 55 *ptrfpsf |= INEXACT_EXCEPTION; 56 } 57 // apply correction to result calculated with unbounded exponent 58 sign = res.w[1] & MASK_SIGN; 59 exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax 60 C_hi = res.w[1] & MASK_COEFF; 61 C_lo = res.w[0]; 62 if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || 63 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && 64 is_midpoint_gt_even))) || 65 (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || 66 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && 67 is_midpoint_gt_even)))) { 68 // C = C + 1 69 C_lo = C_lo + 1; 70 if (C_lo == 0) 71 C_hi = C_hi + 1; 72 if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) { 73 // C = 10^34 => rounding overflow 74 C_hi = 0x0000314dc6448d93ull; 75 C_lo = 0x38c15b0a00000000ull; // 10^33 76 // exp = exp + EXP_P1; 77 unbexp = unbexp + 1; 78 exp = (UINT64) (unbexp + 6176) << 49; 79 } 80 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && 81 ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || 82 (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) { 83 // C = C - 1 84 C_lo = C_lo - 1; 85 if (C_lo == 0xffffffffffffffffull) 86 C_hi--; 87 // check if we crossed into the lower decade 88 if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { 89 // C = 10^33 - 1 90 if (exp > 0) { 91 C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 92 C_lo = 0x378d8e63ffffffffull; 93 // exp = exp - EXP_P1; 94 unbexp = unbexp - 1; 95 exp = (UINT64) (unbexp + 6176) << 49; 96 } else { // if exp = 0 the result is tiny & inexact 97 *ptrfpsf |= UNDERFLOW_EXCEPTION; 98 } 99 } 100 } else { 101 ; // the result is already correct 102 } 103 if (unbexp > expmax) { // 6111 104 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 105 exp = 0; 106 if (!sign) { // result is positive 107 if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf 108 C_hi = 0x7800000000000000ull; 109 C_lo = 0x0000000000000000ull; 110 } else { // res = +MAXFP = (10^34-1) * 10^emax 111 C_hi = 0x5fffed09bead87c0ull; 112 C_lo = 0x378d8e63ffffffffull; 113 } 114 } else { // result is negative 115 if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf 116 C_hi = 0xf800000000000000ull; 117 C_lo = 0x0000000000000000ull; 118 } else { // res = -MAXFP = -(10^34-1) * 10^emax 119 C_hi = 0xdfffed09bead87c0ull; 120 C_lo = 0x378d8e63ffffffffull; 121 } 122 } 123 } 124 // assemble the result 125 res.w[1] = sign | exp | C_hi; 126 res.w[0] = C_lo; 127 *ptrres = res; 128 } 129 130 static void 131 add256 (UINT256 x, UINT256 y, UINT256 * pz) { 132 // *z = x + yl assume the sum fits in 256 bits 133 UINT256 z; 134 z.w[0] = x.w[0] + y.w[0]; 135 if (z.w[0] < x.w[0]) { 136 x.w[1]++; 137 if (x.w[1] == 0x0000000000000000ull) { 138 x.w[2]++; 139 if (x.w[2] == 0x0000000000000000ull) { 140 x.w[3]++; 141 } 142 } 143 } 144 z.w[1] = x.w[1] + y.w[1]; 145 if (z.w[1] < x.w[1]) { 146 x.w[2]++; 147 if (x.w[2] == 0x0000000000000000ull) { 148 x.w[3]++; 149 } 150 } 151 z.w[2] = x.w[2] + y.w[2]; 152 if (z.w[2] < x.w[2]) { 153 x.w[3]++; 154 } 155 z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible 156 *pz = z; 157 } 158 159 static void 160 sub256 (UINT256 x, UINT256 y, UINT256 * pz) { 161 // *z = x - y; assume x >= y 162 UINT256 z; 163 z.w[0] = x.w[0] - y.w[0]; 164 if (z.w[0] > x.w[0]) { 165 x.w[1]--; 166 if (x.w[1] == 0xffffffffffffffffull) { 167 x.w[2]--; 168 if (x.w[2] == 0xffffffffffffffffull) { 169 x.w[3]--; 170 } 171 } 172 } 173 z.w[1] = x.w[1] - y.w[1]; 174 if (z.w[1] > x.w[1]) { 175 x.w[2]--; 176 if (x.w[2] == 0xffffffffffffffffull) { 177 x.w[3]--; 178 } 179 } 180 z.w[2] = x.w[2] - y.w[2]; 181 if (z.w[2] > x.w[2]) { 182 x.w[3]--; 183 } 184 z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y 185 *pz = z; 186 } 187 188 189 static int 190 nr_digits256 (UINT256 R256) { 191 int ind; 192 // determine the number of decimal digits in R256 193 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) { 194 // between 1 and 19 digits 195 for (ind = 1; ind <= 19; ind++) { 196 if (R256.w[0] < ten2k64[ind]) { 197 break; 198 } 199 } 200 // ind digits 201 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && 202 (R256.w[1] < ten2k128[0].w[1] || 203 (R256.w[1] == ten2k128[0].w[1] 204 && R256.w[0] < ten2k128[0].w[0]))) { 205 // 20 digits 206 ind = 20; 207 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) { 208 // between 21 and 38 digits 209 for (ind = 1; ind <= 18; ind++) { 210 if (R256.w[1] < ten2k128[ind].w[1] || 211 (R256.w[1] == ten2k128[ind].w[1] && 212 R256.w[0] < ten2k128[ind].w[0])) { 213 break; 214 } 215 } 216 // ind + 20 digits 217 ind = ind + 20; 218 } else if (R256.w[3] == 0x0 && 219 (R256.w[2] < ten2k256[0].w[2] || 220 (R256.w[2] == ten2k256[0].w[2] && 221 R256.w[1] < ten2k256[0].w[1]) || 222 (R256.w[2] == ten2k256[0].w[2] && 223 R256.w[1] == ten2k256[0].w[1] && 224 R256.w[0] < ten2k256[0].w[0]))) { 225 // 39 digits 226 ind = 39; 227 } else { 228 // between 40 and 68 digits 229 for (ind = 1; ind <= 29; ind++) { 230 if (R256.w[3] < ten2k256[ind].w[3] || 231 (R256.w[3] == ten2k256[ind].w[3] && 232 R256.w[2] < ten2k256[ind].w[2]) || 233 (R256.w[3] == ten2k256[ind].w[3] && 234 R256.w[2] == ten2k256[ind].w[2] && 235 R256.w[1] < ten2k256[ind].w[1]) || 236 (R256.w[3] == ten2k256[ind].w[3] && 237 R256.w[2] == ten2k256[ind].w[2] && 238 R256.w[1] == ten2k256[ind].w[1] && 239 R256.w[0] < ten2k256[ind].w[0])) { 240 break; 241 } 242 } 243 // ind + 39 digits 244 ind = ind + 39; 245 } 246 return (ind); 247 } 248 249 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so 250 // use the rounding information from ptr_is_* to avoid a double rounding error 251 static void 252 add_and_round (int q3, 253 int q4, 254 int e4, 255 int delta, 256 int p34, 257 UINT64 z_sign, 258 UINT64 p_sign, 259 UINT128 C3, 260 UINT256 C4, 261 int rnd_mode, 262 int *ptr_is_midpoint_lt_even, 263 int *ptr_is_midpoint_gt_even, 264 int *ptr_is_inexact_lt_midpoint, 265 int *ptr_is_inexact_gt_midpoint, 266 _IDEC_flags * ptrfpsf, UINT128 * ptrres) { 267 268 int scale; 269 int x0; 270 int ind; 271 UINT64 R64; 272 UINT128 P128, R128; 273 UINT192 P192, R192; 274 UINT256 R256; 275 int is_midpoint_lt_even = 0; 276 int is_midpoint_gt_even = 0; 277 int is_inexact_lt_midpoint = 0; 278 int is_inexact_gt_midpoint = 0; 279 int is_midpoint_lt_even0 = 0; 280 int is_midpoint_gt_even0 = 0; 281 int is_inexact_lt_midpoint0 = 0; 282 int is_inexact_gt_midpoint0 = 0; 283 int incr_exp = 0; 284 int is_tiny = 0; 285 int lt_half_ulp = 0; 286 int eq_half_ulp = 0; 287 // int gt_half_ulp = 0; 288 UINT128 res = *ptrres; 289 290 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66 291 scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this 292 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1 293 294 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for 295 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6)) 296 if (scale == 0) { 297 R256.w[3] = 0x0ull; 298 R256.w[2] = 0x0ull; 299 R256.w[1] = C3.w[1]; 300 R256.w[0] = C3.w[0]; 301 } else if (scale <= 19) { // 10^scale fits in 64 bits 302 P128.w[1] = 0; 303 P128.w[0] = ten2k64[scale]; 304 __mul_128x128_to_256 (R256, P128, C3); 305 } else if (scale <= 38) { // 10^scale fits in 128 bits 306 __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3); 307 } else if (scale <= 57) { // 39 <= scale <= 57 308 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits 309 // (10^67 has 223 bits; 10^69 has 230 bits); 310 // must split the computation: 311 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 312 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty 313 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits 314 __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3); 315 // now multiply R128 by 10^38 316 __mul_128x128_to_256 (R256, R128, ten2k128[18]); 317 } else { // 58 <= scale <= 66 318 // 10^scale takes between 193 and 220 bits, 319 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits) 320 // must split the computation: 321 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 322 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty 323 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits 324 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because 325 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64 326 __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]); 327 // now calculate 10*38 * 10^(scale-38) * C3 328 __mul_128x128_to_256 (R256, R128, ten2k128[18]); 329 } 330 // C3 * 10^scale is now in R256 331 332 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least 333 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is 334 // possible 335 // add/subtract C4 and C3 * 10^scale; the exponent is e4 336 if (p_sign == z_sign) { // R256 = C4 + R256 337 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact, 338 // but may require rounding 339 add256 (C4, R256, &R256); 340 } else { // if (p_sign != z_sign) { // R256 = C4 - R256 341 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or 342 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact, 343 // but may require rounding 344 345 // compare first R256 = C3 * 10^scale and C4 346 if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || 347 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) || 348 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] && 349 R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4 350 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact, 351 // but may require rounding 352 sub256 (R256, C4, &R256); 353 // flip p_sign too, because the result has the sign of z 354 p_sign = z_sign; 355 } else { // if C4 > C3 * 10^scale 356 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact, 357 // but may require rounding 358 sub256 (C4, R256, &R256); 359 } 360 // if the result is pure zero, the sign depends on the rounding mode 361 // (x*y and z had opposite signs) 362 if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull && 363 R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) { 364 if (rnd_mode != ROUNDING_DOWN) 365 p_sign = 0x0000000000000000ull; 366 else 367 p_sign = 0x8000000000000000ull; 368 // the exponent is max (e4, expmin) 369 if (e4 < -6176) 370 e4 = expmin; 371 // assemble result 372 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49); 373 res.w[0] = 0x0; 374 *ptrres = res; 375 return; 376 } 377 } 378 379 // determine the number of decimal digits in R256 380 ind = nr_digits256 (R256); 381 382 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind; 383 // round to the destination precision, with unbounded exponent 384 385 if (ind <= p34) { 386 // result rounded to the destination precision with unbounded exponent 387 // is exact 388 if (ind + e4 < p34 + expmin) { 389 is_tiny = 1; // applies to all rounding modes 390 } 391 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1]; 392 res.w[0] = R256.w[0]; 393 // Note: res is correct only if expmin <= e4 <= expmax 394 } else { // if (ind > p34) 395 // if more than P digits, round to nearest to P digits 396 // round R256 to p34 digits 397 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 398 if (ind <= 38) { 399 P128.w[1] = R256.w[1]; 400 P128.w[0] = R256.w[0]; 401 round128_19_38 (ind, x0, P128, &R128, &incr_exp, 402 &is_midpoint_lt_even, &is_midpoint_gt_even, 403 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 404 } else if (ind <= 57) { 405 P192.w[2] = R256.w[2]; 406 P192.w[1] = R256.w[1]; 407 P192.w[0] = R256.w[0]; 408 round192_39_57 (ind, x0, P192, &R192, &incr_exp, 409 &is_midpoint_lt_even, &is_midpoint_gt_even, 410 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 411 R128.w[1] = R192.w[1]; 412 R128.w[0] = R192.w[0]; 413 } else { // if (ind <= 68) 414 round256_58_76 (ind, x0, R256, &R256, &incr_exp, 415 &is_midpoint_lt_even, &is_midpoint_gt_even, 416 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 417 R128.w[1] = R256.w[1]; 418 R128.w[0] = R256.w[0]; 419 } 420 // the rounded result has p34 = 34 digits 421 e4 = e4 + x0 + incr_exp; 422 if (rnd_mode == ROUNDING_TO_NEAREST) { 423 if (e4 < expmin) { 424 is_tiny = 1; // for other rounding modes apply correction 425 } 426 } else { 427 // for RM, RP, RZ, RA apply correction in order to determine tininess 428 // but do not save the result; apply the correction to 429 // (-1)^p_sign * significand * 10^0 430 P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1]; 431 P128.w[0] = R128.w[0]; 432 rounding_correction (rnd_mode, 433 is_inexact_lt_midpoint, 434 is_inexact_gt_midpoint, is_midpoint_lt_even, 435 is_midpoint_gt_even, 0, &P128, ptrfpsf); 436 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 437 // the number of digits in the significand is p34 = 34 438 if (e4 + scale < expmin) { 439 is_tiny = 1; 440 } 441 } 442 ind = p34; // the number of decimal digits in the signifcand of res 443 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN 444 res.w[0] = R128.w[0]; 445 // Note: res is correct only if expmin <= e4 <= expmax 446 // set the inexact flag after rounding with bounded exponent, if any 447 } 448 // at this point we have the result rounded with unbounded exponent in 449 // res and we know its tininess: 450 // res = (-1)^p_sign * significand * 10^e4, 451 // where q (significand) = ind <= p34 452 // Note: res is correct only if expmin <= e4 <= expmax 453 454 // check for overflow if RN 455 if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) { 456 res.w[1] = p_sign | 0x7800000000000000ull; 457 res.w[0] = 0x0000000000000000ull; 458 *ptrres = res; 459 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 460 return; // BID_RETURN (res) 461 } // else not overflow or not RN, so continue 462 463 // if (e4 >= expmin) we have the result rounded with bounded exponent 464 if (e4 < expmin) { 465 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res 466 // where the result rounded [at most] once is 467 // (-1)^p_sign * significand_res * 10^e4 468 469 // avoid double rounding error 470 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 471 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 472 is_midpoint_lt_even0 = is_midpoint_lt_even; 473 is_midpoint_gt_even0 = is_midpoint_gt_even; 474 is_inexact_lt_midpoint = 0; 475 is_inexact_gt_midpoint = 0; 476 is_midpoint_lt_even = 0; 477 is_midpoint_gt_even = 0; 478 479 if (x0 > ind) { 480 // nothing is left of res when moving the decimal point left x0 digits 481 is_inexact_lt_midpoint = 1; 482 res.w[1] = p_sign | 0x0000000000000000ull; 483 res.w[0] = 0x0000000000000000ull; 484 e4 = expmin; 485 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 486 // this is <, =, or > 1/2 ulp 487 // compare the ind-digit value in the significand of res with 488 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 489 // less than, equal to, or greater than 1/2 ulp (significand of res) 490 R128.w[1] = res.w[1] & MASK_COEFF; 491 R128.w[0] = res.w[0]; 492 if (ind <= 19) { 493 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp 494 lt_half_ulp = 1; 495 is_inexact_lt_midpoint = 1; 496 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp 497 eq_half_ulp = 1; 498 is_midpoint_gt_even = 1; 499 } else { // > 1/2 ulp 500 // gt_half_ulp = 1; 501 is_inexact_gt_midpoint = 1; 502 } 503 } else { // if (ind <= 38) { 504 if (R128.w[1] < midpoint128[ind - 20].w[1] || 505 (R128.w[1] == midpoint128[ind - 20].w[1] && 506 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp 507 lt_half_ulp = 1; 508 is_inexact_lt_midpoint = 1; 509 } else if (R128.w[1] == midpoint128[ind - 20].w[1] && 510 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp 511 eq_half_ulp = 1; 512 is_midpoint_gt_even = 1; 513 } else { // > 1/2 ulp 514 // gt_half_ulp = 1; 515 is_inexact_gt_midpoint = 1; 516 } 517 } 518 if (lt_half_ulp || eq_half_ulp) { 519 // res = +0.0 * 10^expmin 520 res.w[1] = 0x0000000000000000ull; 521 res.w[0] = 0x0000000000000000ull; 522 } else { // if (gt_half_ulp) 523 // res = +1 * 10^expmin 524 res.w[1] = 0x0000000000000000ull; 525 res.w[0] = 0x0000000000000001ull; 526 } 527 res.w[1] = p_sign | res.w[1]; 528 e4 = expmin; 529 } else { // if (1 <= x0 <= ind - 1 <= 33) 530 // round the ind-digit result to ind - x0 digits 531 532 if (ind <= 18) { // 2 <= ind <= 18 533 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 534 &is_midpoint_lt_even, &is_midpoint_gt_even, 535 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 536 res.w[1] = 0x0; 537 res.w[0] = R64; 538 } else if (ind <= 38) { 539 P128.w[1] = res.w[1] & MASK_COEFF; 540 P128.w[0] = res.w[0]; 541 round128_19_38 (ind, x0, P128, &res, &incr_exp, 542 &is_midpoint_lt_even, &is_midpoint_gt_even, 543 &is_inexact_lt_midpoint, 544 &is_inexact_gt_midpoint); 545 } 546 e4 = e4 + x0; // expmin 547 // we want the exponent to be expmin, so if incr_exp = 1 then 548 // multiply the rounded result by 10 - it will still fit in 113 bits 549 if (incr_exp) { 550 // 64 x 128 -> 128 551 P128.w[1] = res.w[1] & MASK_COEFF; 552 P128.w[0] = res.w[0]; 553 __mul_64x128_to_128 (res, ten2k64[1], P128); 554 } 555 res.w[1] = 556 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); 557 // avoid a double rounding error 558 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 559 is_midpoint_lt_even) { // double rounding error upward 560 // res = res - 1 561 res.w[0]--; 562 if (res.w[0] == 0xffffffffffffffffull) 563 res.w[1]--; 564 // Note: a double rounding error upward is not possible; for this 565 // the result after the first rounding would have to be 99...95 566 // (35 digits in all), possibly followed by a number of zeros; this 567 // is not possible in Cases (2)-(6) or (15)-(17) which may get here 568 is_midpoint_lt_even = 0; 569 is_inexact_lt_midpoint = 1; 570 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 571 is_midpoint_gt_even) { // double rounding error downward 572 // res = res + 1 573 res.w[0]++; 574 if (res.w[0] == 0) 575 res.w[1]++; 576 is_midpoint_gt_even = 0; 577 is_inexact_gt_midpoint = 1; 578 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 579 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 580 // if this second rounding was exact the result may still be 581 // inexact because of the first rounding 582 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 583 is_inexact_gt_midpoint = 1; 584 } 585 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 586 is_inexact_lt_midpoint = 1; 587 } 588 } else if (is_midpoint_gt_even && 589 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 590 // pulled up to a midpoint 591 is_inexact_lt_midpoint = 1; 592 is_inexact_gt_midpoint = 0; 593 is_midpoint_lt_even = 0; 594 is_midpoint_gt_even = 0; 595 } else if (is_midpoint_lt_even && 596 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 597 // pulled down to a midpoint 598 is_inexact_lt_midpoint = 0; 599 is_inexact_gt_midpoint = 1; 600 is_midpoint_lt_even = 0; 601 is_midpoint_gt_even = 0; 602 } else { 603 ; 604 } 605 } 606 } 607 // res contains the correct result 608 // apply correction if not rounding to nearest 609 if (rnd_mode != ROUNDING_TO_NEAREST) { 610 rounding_correction (rnd_mode, 611 is_inexact_lt_midpoint, is_inexact_gt_midpoint, 612 is_midpoint_lt_even, is_midpoint_gt_even, 613 e4, &res, ptrfpsf); 614 } 615 if (is_midpoint_lt_even || is_midpoint_gt_even || 616 is_inexact_lt_midpoint || is_inexact_gt_midpoint) { 617 // set the inexact flag 618 *ptrfpsf |= INEXACT_EXCEPTION; 619 if (is_tiny) 620 *ptrfpsf |= UNDERFLOW_EXCEPTION; 621 } 622 623 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 624 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 625 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 626 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 627 *ptrres = res; 628 return; 629 } 630 631 632 #if DECIMAL_CALL_BY_REFERENCE 633 static void 634 bid128_ext_fma (int *ptr_is_midpoint_lt_even, 635 int *ptr_is_midpoint_gt_even, 636 int *ptr_is_inexact_lt_midpoint, 637 int *ptr_is_inexact_gt_midpoint, UINT128 * pres, 638 UINT128 * px, UINT128 * py, 639 UINT128 * 640 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 641 _EXC_INFO_PARAM) { 642 UINT128 x = *px, y = *py, z = *pz; 643 #if !DECIMAL_GLOBAL_ROUNDING 644 unsigned int rnd_mode = *prnd_mode; 645 #endif 646 #else 647 static UINT128 648 bid128_ext_fma (int *ptr_is_midpoint_lt_even, 649 int *ptr_is_midpoint_gt_even, 650 int *ptr_is_inexact_lt_midpoint, 651 int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y, 652 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM 653 _EXC_MASKS_PARAM _EXC_INFO_PARAM) { 654 #endif 655 656 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 657 UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign; 658 UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp; 659 int true_p_exp; 660 UINT128 C1, C2, C3; 661 UINT256 C4; 662 int q1 = 0, q2 = 0, q3 = 0, q4; 663 int e1, e2, e3, e4; 664 int scale, ind, delta, x0; 665 int p34 = P34; // used to modify the limit on the number of digits 666 BID_UI64DOUBLE tmp; 667 int x_nr_bits, y_nr_bits, z_nr_bits; 668 unsigned int save_fpsf; 669 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; 670 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 671 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0; 672 int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; 673 int incr_exp = 0; 674 int lsb; 675 int lt_half_ulp = 0; 676 int eq_half_ulp = 0; 677 int gt_half_ulp = 0; 678 int is_tiny = 0; 679 UINT64 R64, tmp64; 680 UINT128 P128, R128; 681 UINT192 P192, R192; 682 UINT256 R256; 683 684 // the following are based on the table of special cases for fma; the NaN 685 // behavior is similar to that of the IA-64 Architecture fma 686 687 // identify cases where at least one operand is NaN 688 689 BID_SWAP128 (x); 690 BID_SWAP128 (y); 691 BID_SWAP128 (z); 692 if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN 693 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) 694 // check first for non-canonical NaN payload 695 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 696 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 697 (y.w[0] > 0x38c15b09ffffffffull))) { 698 y.w[1] = y.w[1] & 0xffffc00000000000ull; 699 y.w[0] = 0x0ull; 700 } 701 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN 702 // set invalid flag 703 *pfpsf |= INVALID_EXCEPTION; 704 // return quiet (y) 705 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 706 res.w[0] = y.w[0]; 707 } else { // y is QNaN 708 // return y 709 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 710 res.w[0] = y.w[0]; 711 // if z = SNaN or x = SNaN signal invalid exception 712 if ((z.w[1] & MASK_SNAN) == MASK_SNAN || 713 (x.w[1] & MASK_SNAN) == MASK_SNAN) { 714 // set invalid flag 715 *pfpsf |= INVALID_EXCEPTION; 716 } 717 } 718 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 719 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 720 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 721 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 722 BID_SWAP128 (res); 723 BID_RETURN (res) 724 } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN 725 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) 726 // check first for non-canonical NaN payload 727 if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 728 (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 729 (z.w[0] > 0x38c15b09ffffffffull))) { 730 z.w[1] = z.w[1] & 0xffffc00000000000ull; 731 z.w[0] = 0x0ull; 732 } 733 if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN 734 // set invalid flag 735 *pfpsf |= INVALID_EXCEPTION; 736 // return quiet (z) 737 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 738 res.w[0] = z.w[0]; 739 } else { // z is QNaN 740 // return z 741 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 742 res.w[0] = z.w[0]; 743 // if x = SNaN signal invalid exception 744 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { 745 // set invalid flag 746 *pfpsf |= INVALID_EXCEPTION; 747 } 748 } 749 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 750 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 751 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 752 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 753 BID_SWAP128 (res); 754 BID_RETURN (res) 755 } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN 756 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) 757 // check first for non-canonical NaN payload 758 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 759 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 760 (x.w[0] > 0x38c15b09ffffffffull))) { 761 x.w[1] = x.w[1] & 0xffffc00000000000ull; 762 x.w[0] = 0x0ull; 763 } 764 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN 765 // set invalid flag 766 *pfpsf |= INVALID_EXCEPTION; 767 // return quiet (x) 768 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 769 res.w[0] = x.w[0]; 770 } else { // x is QNaN 771 // return x 772 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 773 res.w[0] = x.w[0]; 774 } 775 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 776 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 777 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 778 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 779 BID_SWAP128 (res); 780 BID_RETURN (res) 781 } 782 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check 783 // for non-canonical values 784 785 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 786 C1.w[1] = x.w[1] & MASK_COEFF; 787 C1.w[0] = x.w[0]; 788 if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf 789 // if x is not infinity check for non-canonical values - treated as zero 790 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 791 // non-canonical 792 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 793 C1.w[1] = 0; // significand high 794 C1.w[0] = 0; // significand low 795 } else { // G0_G1 != 11 796 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits 797 if (C1.w[1] > 0x0001ed09bead87c0ull || 798 (C1.w[1] == 0x0001ed09bead87c0ull && 799 C1.w[0] > 0x378d8e63ffffffffull)) { 800 // x is non-canonical if coefficient is larger than 10^34 -1 801 C1.w[1] = 0; 802 C1.w[0] = 0; 803 } else { // canonical 804 ; 805 } 806 } 807 } 808 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 809 C2.w[1] = y.w[1] & MASK_COEFF; 810 C2.w[0] = y.w[0]; 811 if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf 812 // if y is not infinity check for non-canonical values - treated as zero 813 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 814 // non-canonical 815 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 816 C2.w[1] = 0; // significand high 817 C2.w[0] = 0; // significand low 818 } else { // G0_G1 != 11 819 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits 820 if (C2.w[1] > 0x0001ed09bead87c0ull || 821 (C2.w[1] == 0x0001ed09bead87c0ull && 822 C2.w[0] > 0x378d8e63ffffffffull)) { 823 // y is non-canonical if coefficient is larger than 10^34 -1 824 C2.w[1] = 0; 825 C2.w[0] = 0; 826 } else { // canonical 827 ; 828 } 829 } 830 } 831 z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 832 C3.w[1] = z.w[1] & MASK_COEFF; 833 C3.w[0] = z.w[0]; 834 if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf 835 // if z is not infinity check for non-canonical values - treated as zero 836 if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 837 // non-canonical 838 z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 839 C3.w[1] = 0; // significand high 840 C3.w[0] = 0; // significand low 841 } else { // G0_G1 != 11 842 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits 843 if (C3.w[1] > 0x0001ed09bead87c0ull || 844 (C3.w[1] == 0x0001ed09bead87c0ull && 845 C3.w[0] > 0x378d8e63ffffffffull)) { 846 // z is non-canonical if coefficient is larger than 10^34 -1 847 C3.w[1] = 0; 848 C3.w[0] = 0; 849 } else { // canonical 850 ; 851 } 852 } 853 } 854 855 p_sign = x_sign ^ y_sign; // sign of the product 856 857 // identify cases where at least one operand is infinity 858 859 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf 860 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf 861 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 862 if (p_sign == z_sign) { 863 res.w[1] = z_sign | MASK_INF; 864 res.w[0] = 0x0; 865 } else { 866 // return QNaN Indefinite 867 res.w[1] = 0x7c00000000000000ull; 868 res.w[0] = 0x0000000000000000ull; 869 // set invalid flag 870 *pfpsf |= INVALID_EXCEPTION; 871 } 872 } else { // z = 0 or z = f 873 res.w[1] = p_sign | MASK_INF; 874 res.w[0] = 0x0; 875 } 876 } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f 877 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 878 if (p_sign == z_sign) { 879 res.w[1] = z_sign | MASK_INF; 880 res.w[0] = 0x0; 881 } else { 882 // return QNaN Indefinite 883 res.w[1] = 0x7c00000000000000ull; 884 res.w[0] = 0x0000000000000000ull; 885 // set invalid flag 886 *pfpsf |= INVALID_EXCEPTION; 887 } 888 } else { // z = 0 or z = f 889 res.w[1] = p_sign | MASK_INF; 890 res.w[0] = 0x0; 891 } 892 } else { // y = 0 893 // return QNaN Indefinite 894 res.w[1] = 0x7c00000000000000ull; 895 res.w[0] = 0x0000000000000000ull; 896 // set invalid flag 897 *pfpsf |= INVALID_EXCEPTION; 898 } 899 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 900 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 901 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 902 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 903 BID_SWAP128 (res); 904 BID_RETURN (res) 905 } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf 906 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 907 // x = f, necessarily 908 if ((p_sign != z_sign) 909 || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) { 910 // return QNaN Indefinite 911 res.w[1] = 0x7c00000000000000ull; 912 res.w[0] = 0x0000000000000000ull; 913 // set invalid flag 914 *pfpsf |= INVALID_EXCEPTION; 915 } else { 916 res.w[1] = z_sign | MASK_INF; 917 res.w[0] = 0x0; 918 } 919 } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0 920 // z = 0, f, inf 921 // return QNaN Indefinite 922 res.w[1] = 0x7c00000000000000ull; 923 res.w[0] = 0x0000000000000000ull; 924 // set invalid flag 925 *pfpsf |= INVALID_EXCEPTION; 926 } else { 927 // x = f and z = 0, f, necessarily 928 res.w[1] = p_sign | MASK_INF; 929 res.w[0] = 0x0; 930 } 931 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 932 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 933 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 934 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 935 BID_SWAP128 (res); 936 BID_RETURN (res) 937 } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 938 // x = 0, f and y = 0, f, necessarily 939 res.w[1] = z_sign | MASK_INF; 940 res.w[0] = 0x0; 941 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 942 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 943 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 944 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 945 BID_SWAP128 (res); 946 BID_RETURN (res) 947 } 948 949 true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; 950 if (true_p_exp < -6176) 951 p_exp = 0; // cannot be less than EXP_MIN 952 else 953 p_exp = (UINT64) (true_p_exp + 6176) << 49; 954 955 if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 956 // the result is 0 957 if (p_exp < z_exp) 958 res.w[1] = p_exp; // preferred exponent 959 else 960 res.w[1] = z_exp; // preferred exponent 961 if (p_sign == z_sign) { 962 res.w[1] |= z_sign; 963 res.w[0] = 0x0; 964 } else { // x * y and z have opposite signs 965 if (rnd_mode == ROUNDING_DOWN) { 966 // res = -0.0 967 res.w[1] |= MASK_SIGN; 968 res.w[0] = 0x0; 969 } else { 970 // res = +0.0 971 // res.w[1] |= 0x0; 972 res.w[0] = 0x0; 973 } 974 } 975 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 976 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 977 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 978 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 979 BID_SWAP128 (res); 980 BID_RETURN (res) 981 } 982 // from this point on, we may need to know the number of decimal digits 983 // in the significands of x, y, z when x, y, z != 0 984 985 if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite) 986 // q1 = nr. of decimal digits in x 987 // determine first the nr. of bits in x 988 if (C1.w[1] == 0) { 989 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 990 // split the 64-bit value in two 32-bit halves to avoid rounding errors 991 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 992 tmp.d = (double) (C1.w[0] >> 32); // exact conversion 993 x_nr_bits = 994 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 995 } else { // x < 2^32 996 tmp.d = (double) (C1.w[0]); // exact conversion 997 x_nr_bits = 998 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 999 } 1000 } else { // if x < 2^53 1001 tmp.d = (double) C1.w[0]; // exact conversion 1002 x_nr_bits = 1003 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1004 } 1005 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) 1006 tmp.d = (double) C1.w[1]; // exact conversion 1007 x_nr_bits = 1008 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1009 } 1010 q1 = nr_digits[x_nr_bits - 1].digits; 1011 if (q1 == 0) { 1012 q1 = nr_digits[x_nr_bits - 1].digits1; 1013 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || 1014 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && 1015 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) 1016 q1++; 1017 } 1018 } 1019 1020 if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite) 1021 if (C2.w[1] == 0) { 1022 if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53 1023 // split the 64-bit value in two 32-bit halves to avoid rounding errors 1024 if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32 1025 tmp.d = (double) (C2.w[0] >> 32); // exact conversion 1026 y_nr_bits = 1027 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1028 } else { // y < 2^32 1029 tmp.d = (double) C2.w[0]; // exact conversion 1030 y_nr_bits = 1031 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1032 } 1033 } else { // if y < 2^53 1034 tmp.d = (double) C2.w[0]; // exact conversion 1035 y_nr_bits = 1036 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1037 } 1038 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1]) 1039 tmp.d = (double) C2.w[1]; // exact conversion 1040 y_nr_bits = 1041 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1042 } 1043 1044 q2 = nr_digits[y_nr_bits].digits; 1045 if (q2 == 0) { 1046 q2 = nr_digits[y_nr_bits].digits1; 1047 if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi || 1048 (C2.w[1] == nr_digits[y_nr_bits].threshold_hi && 1049 C2.w[0] >= nr_digits[y_nr_bits].threshold_lo)) 1050 q2++; 1051 } 1052 } 1053 1054 if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite) 1055 if (C3.w[1] == 0) { 1056 if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53 1057 // split the 64-bit value in two 32-bit halves to avoid rounding errors 1058 if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32 1059 tmp.d = (double) (C3.w[0] >> 32); // exact conversion 1060 z_nr_bits = 1061 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1062 } else { // z < 2^32 1063 tmp.d = (double) C3.w[0]; // exact conversion 1064 z_nr_bits = 1065 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1066 } 1067 } else { // if z < 2^53 1068 tmp.d = (double) C3.w[0]; // exact conversion 1069 z_nr_bits = 1070 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1071 } 1072 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1]) 1073 tmp.d = (double) C3.w[1]; // exact conversion 1074 z_nr_bits = 1075 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1076 } 1077 1078 q3 = nr_digits[z_nr_bits].digits; 1079 if (q3 == 0) { 1080 q3 = nr_digits[z_nr_bits].digits1; 1081 if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi || 1082 (C3.w[1] == nr_digits[z_nr_bits].threshold_hi && 1083 C3.w[0] >= nr_digits[z_nr_bits].threshold_lo)) 1084 q3++; 1085 } 1086 } 1087 1088 if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || 1089 (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { 1090 // x = 0 or y = 0 1091 // z = f, necessarily; for 0 + z return z, with the preferred exponent 1092 // the result is z, but need to get the preferred exponent 1093 if (z_exp <= p_exp) { // the preferred exponent is z_exp 1094 res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1]; 1095 res.w[0] = C3.w[0]; 1096 } else { // if (p_exp < z_exp) the preferred exponent is p_exp 1097 // return (C3 * 10^scale) * 10^(z_exp - scale) 1098 // where scale = min (p34-q3, (z_exp-p_exp) >> 49) 1099 scale = p34 - q3; 1100 ind = (z_exp - p_exp) >> 49; 1101 if (ind < scale) 1102 scale = ind; 1103 if (scale == 0) { 1104 res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant 1105 res.w[0] = z.w[0]; 1106 } else if (q3 <= 19) { // z fits in 64 bits 1107 if (scale <= 19) { // 10^scale fits in 64 bits 1108 // 64 x 64 C3.w[0] * ten2k64[scale] 1109 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1110 } else { // 10^scale fits in 128 bits 1111 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1112 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1113 } 1114 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1115 // 64 x 128 ten2k64[scale] * C3 1116 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1117 } 1118 // subtract scale from the exponent 1119 z_exp = z_exp - ((UINT64) scale << 49); 1120 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1121 } 1122 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1123 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1124 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1125 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1126 BID_SWAP128 (res); 1127 BID_RETURN (res) 1128 } else { 1129 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f 1130 } 1131 1132 e1 = (x_exp >> 49) - 6176; // unbiased exponent of x 1133 e2 = (y_exp >> 49) - 6176; // unbiased exponent of y 1134 e3 = (z_exp >> 49) - 6176; // unbiased exponent of z 1135 e4 = e1 + e2; // unbiased exponent of the exact x * y 1136 1137 // calculate C1 * C2 and its number of decimal digits, q4 1138 1139 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits 1140 // where 2 <= q1 + q2 <= 68 1141 // calculate C4 = C1 * C2 and determine q 1142 C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0; 1143 if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits 1144 C4.w[0] = C1.w[0] * C2.w[0]; 1145 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1146 if (C4.w[0] < ten2k64[q1 + q2 - 1]) 1147 q4 = q1 + q2 - 1; // q4 in [1, 18] 1148 else 1149 q4 = q1 + q2; // q4 in [2, 19] 1150 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1151 } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits 1152 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits 1153 __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]); 1154 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20 1155 if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1 1156 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1157 q4 = 19; // 19 = q1 + q2 - 1 1158 } else { 1159 // if (C4.w[1] == 0) 1160 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1161 // else 1162 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1163 q4 = 20; // 20 = q1 + q2 1164 } 1165 } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38 1166 // C4 = C1 * C2 fits in 64 or 128 bits 1167 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits) 1168 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits 1169 if (q1 <= 19) { 1170 __mul_128x64_to_128 (C4, C1.w[0], C2); 1171 } else { // q2 <= 19 1172 __mul_128x64_to_128 (C4, C2.w[0], C1); 1173 } 1174 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1175 if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] || 1176 (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] && 1177 C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) { 1178 // if (C4.w[1] == 0) // q4 = 20, necessarily 1179 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1180 // else 1181 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1182 q4 = q1 + q2 - 1; // q4 in [20, 37] 1183 } else { 1184 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1185 q4 = q1 + q2; // q4 in [21, 38] 1186 } 1187 } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits 1188 // both C1 and C2 fit in 128 bits (actually in 113 bits) 1189 // may replace this by 128x128_to192 1190 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0 1191 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39 1192 if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] || 1193 (C4.w[1] == ten2k128[18].w[1] 1194 && C4.w[0] < ten2k128[18].w[0]))) { 1195 // 18 = 38 - 20 = q1+q2-1 - 20 1196 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1197 q4 = 38; // 38 = q1 + q2 - 1 1198 } else { 1199 // if (C4.w[2] == 0) 1200 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1201 // else 1202 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1203 q4 = 39; // 39 = q1 + q2 1204 } 1205 } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57 1206 // C4 = C1 * C2 fits in 128 or 192 bits 1207 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits) 1208 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one 1209 // may fit in 64 bits 1210 if (C1.w[1] == 0) { // C1 fits in 64 bits 1211 // __mul_64x128_full (REShi64, RESlo128, A64, B128) 1212 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); 1213 } else if (C2.w[1] == 0) { // C2 fits in 64 bits 1214 // __mul_64x128_full (REShi64, RESlo128, A64, B128) 1215 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); 1216 } else { // both C1 and C2 require 128 bits 1217 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); 1218 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 1219 } 1220 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1221 if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || 1222 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && 1223 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || 1224 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && 1225 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) { 1226 // if (C4.w[2] == 0) // q4 = 39, necessarily 1227 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1228 // else 1229 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1230 q4 = q1 + q2 - 1; // q4 in [39, 56] 1231 } else { 1232 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1233 q4 = q1 + q2; // q4 in [40, 57] 1234 } 1235 } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits 1236 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one 1237 // may fit in 64 bits 1238 if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits 1239 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192 1240 } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits 1241 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192 1242 } else { // C1 * C2 will fit in 192 bits or in 256 bits 1243 __mul_128x128_to_256 (C4, C1, C2); 1244 } 1245 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58 1246 if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] || 1247 (C4.w[2] == ten2k256[18].w[2] 1248 && (C4.w[1] < ten2k256[18].w[1] 1249 || (C4.w[1] == ten2k256[18].w[1] 1250 && C4.w[0] < ten2k256[18].w[0]))))) { 1251 // 18 = 57 - 39 = q1+q2-1 - 39 1252 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1253 q4 = 57; // 57 = q1 + q2 - 1 1254 } else { 1255 // if (C4.w[3] == 0) 1256 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1257 // else 1258 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1259 q4 = 58; // 58 = q1 + q2 1260 } 1261 } else { // if 59 <= q1 + q2 <= 68 1262 // C4 = C1 * C2 fits in 192 or 256 bits 1263 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits) 1264 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in 1265 // 64 bits 1266 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); 1267 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 1268 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1269 if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] || 1270 (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] && 1271 (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || 1272 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && 1273 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || 1274 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && 1275 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) { 1276 // if (C4.w[3] == 0) // q4 = 58, necessarily 1277 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1278 // else 1279 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1280 q4 = q1 + q2 - 1; // q4 in [58, 67] 1281 } else { 1282 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1283 q4 = q1 + q2; // q4 in [59, 68] 1284 } 1285 } 1286 1287 if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0 1288 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved 1289 *pfpsf = 0; 1290 1291 if (q4 > p34) { 1292 1293 // truncate C4 to p34 digits into res 1294 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 1295 x0 = q4 - p34; 1296 if (q4 <= 38) { 1297 P128.w[1] = C4.w[1]; 1298 P128.w[0] = C4.w[0]; 1299 round128_19_38 (q4, x0, P128, &res, &incr_exp, 1300 &is_midpoint_lt_even, &is_midpoint_gt_even, 1301 &is_inexact_lt_midpoint, 1302 &is_inexact_gt_midpoint); 1303 } else if (q4 <= 57) { // 35 <= q4 <= 57 1304 P192.w[2] = C4.w[2]; 1305 P192.w[1] = C4.w[1]; 1306 P192.w[0] = C4.w[0]; 1307 round192_39_57 (q4, x0, P192, &R192, &incr_exp, 1308 &is_midpoint_lt_even, &is_midpoint_gt_even, 1309 &is_inexact_lt_midpoint, 1310 &is_inexact_gt_midpoint); 1311 res.w[0] = R192.w[0]; 1312 res.w[1] = R192.w[1]; 1313 } else { // if (q4 <= 68) 1314 round256_58_76 (q4, x0, C4, &R256, &incr_exp, 1315 &is_midpoint_lt_even, &is_midpoint_gt_even, 1316 &is_inexact_lt_midpoint, 1317 &is_inexact_gt_midpoint); 1318 res.w[0] = R256.w[0]; 1319 res.w[1] = R256.w[1]; 1320 } 1321 e4 = e4 + x0; 1322 if (incr_exp) { 1323 e4 = e4 + 1; 1324 } 1325 q4 = p34; 1326 // res is now the coefficient of the result rounded to the destination 1327 // precision, with unbounded exponent; the exponent is e4; q4=digits(res) 1328 } else { // if (q4 <= p34) 1329 // C4 * 10^e4 is the result rounded to the destination precision, with 1330 // unbounded exponent (which is exact) 1331 1332 if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) { 1333 // e4 is too large, but can be brought within range by scaling up C4 1334 scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2 1335 // res = (C4 * 10^scale) * 10^expmax 1336 if (q4 <= 19) { // C4 fits in 64 bits 1337 if (scale <= 19) { // 10^scale fits in 64 bits 1338 // 64 x 64 C4.w[0] * ten2k64[scale] 1339 __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]); 1340 } else { // 10^scale fits in 128 bits 1341 // 64 x 128 C4.w[0] * ten2k128[scale - 20] 1342 __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]); 1343 } 1344 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits 1345 // 64 x 128 ten2k64[scale] * CC43 1346 __mul_128x64_to_128 (res, ten2k64[scale], C4); 1347 } 1348 e4 = e4 - scale; // expmax 1349 q4 = q4 + scale; 1350 } else { 1351 res.w[1] = C4.w[1]; 1352 res.w[0] = C4.w[0]; 1353 } 1354 // res is the coefficient of the result rounded to the destination 1355 // precision, with unbounded exponent (it has q4 digits); the exponent 1356 // is e4 (exact result) 1357 } 1358 1359 // check for overflow 1360 if (q4 + e4 > p34 + expmax) { 1361 if (rnd_mode == ROUNDING_TO_NEAREST) { 1362 res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf 1363 res.w[0] = 0x0000000000000000ull; 1364 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 1365 } else { 1366 res.w[1] = p_sign | res.w[1]; 1367 rounding_correction (rnd_mode, 1368 is_inexact_lt_midpoint, 1369 is_inexact_gt_midpoint, 1370 is_midpoint_lt_even, is_midpoint_gt_even, 1371 e4, &res, pfpsf); 1372 } 1373 *pfpsf |= save_fpsf; 1374 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1375 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1376 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1377 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1378 BID_SWAP128 (res); 1379 BID_RETURN (res) 1380 } 1381 // check for underflow 1382 if (q4 + e4 < expmin + P34) { 1383 is_tiny = 1; // the result is tiny 1384 if (e4 < expmin) { 1385 // if e4 < expmin, we must truncate more of res 1386 x0 = expmin - e4; // x0 >= 1 1387 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 1388 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 1389 is_midpoint_lt_even0 = is_midpoint_lt_even; 1390 is_midpoint_gt_even0 = is_midpoint_gt_even; 1391 is_inexact_lt_midpoint = 0; 1392 is_inexact_gt_midpoint = 0; 1393 is_midpoint_lt_even = 0; 1394 is_midpoint_gt_even = 0; 1395 // the number of decimal digits in res is q4 1396 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits 1397 if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17 1398 round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, 1399 &is_midpoint_lt_even, &is_midpoint_gt_even, 1400 &is_inexact_lt_midpoint, 1401 &is_inexact_gt_midpoint); 1402 if (incr_exp) { 1403 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 1404 R64 = ten2k64[q4 - x0]; 1405 } 1406 // res.w[1] = 0; (from above) 1407 res.w[0] = R64; 1408 } else { // if (q4 <= 34) 1409 // 19 <= q4 <= 38 1410 P128.w[1] = res.w[1]; 1411 P128.w[0] = res.w[0]; 1412 round128_19_38 (q4, x0, P128, &res, &incr_exp, 1413 &is_midpoint_lt_even, &is_midpoint_gt_even, 1414 &is_inexact_lt_midpoint, 1415 &is_inexact_gt_midpoint); 1416 if (incr_exp) { 1417 // increase coefficient by a factor of 10; this will be <= 10^33 1418 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 1419 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 1420 // res.w[1] = 0; 1421 res.w[0] = ten2k64[q4 - x0]; 1422 } else { // 20 <= q4 - x0 <= 37 1423 res.w[0] = ten2k128[q4 - x0 - 20].w[0]; 1424 res.w[1] = ten2k128[q4 - x0 - 20].w[1]; 1425 } 1426 } 1427 } 1428 e4 = e4 + x0; // expmin 1429 } else if (x0 == q4) { 1430 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin 1431 // determine relationship with 1/2 ulp 1432 if (q4 <= 19) { 1433 if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp 1434 lt_half_ulp = 1; 1435 is_inexact_lt_midpoint = 1; 1436 } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp 1437 eq_half_ulp = 1; 1438 is_midpoint_gt_even = 1; 1439 } else { // > 1/2 ulp 1440 // gt_half_ulp = 1; 1441 is_inexact_gt_midpoint = 1; 1442 } 1443 } else { // if (q4 <= 34) 1444 if (res.w[1] < midpoint128[q4 - 20].w[1] || 1445 (res.w[1] == midpoint128[q4 - 20].w[1] && 1446 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp 1447 lt_half_ulp = 1; 1448 is_inexact_lt_midpoint = 1; 1449 } else if (res.w[1] == midpoint128[q4 - 20].w[1] && 1450 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp 1451 eq_half_ulp = 1; 1452 is_midpoint_gt_even = 1; 1453 } else { // > 1/2 ulp 1454 // gt_half_ulp = 1; 1455 is_inexact_gt_midpoint = 1; 1456 } 1457 } 1458 if (lt_half_ulp || eq_half_ulp) { 1459 // res = +0.0 * 10^expmin 1460 res.w[1] = 0x0000000000000000ull; 1461 res.w[0] = 0x0000000000000000ull; 1462 } else { // if (gt_half_ulp) 1463 // res = +1 * 10^expmin 1464 res.w[1] = 0x0000000000000000ull; 1465 res.w[0] = 0x0000000000000001ull; 1466 } 1467 e4 = expmin; 1468 } else { // if (x0 > q4) 1469 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin 1470 res.w[1] = 0; 1471 res.w[0] = 0; 1472 e4 = expmin; 1473 is_inexact_lt_midpoint = 1; 1474 } 1475 // avoid a double rounding error 1476 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 1477 is_midpoint_lt_even) { // double rounding error upward 1478 // res = res - 1 1479 res.w[0]--; 1480 if (res.w[0] == 0xffffffffffffffffull) 1481 res.w[1]--; 1482 // Note: a double rounding error upward is not possible; for this 1483 // the result after the first rounding would have to be 99...95 1484 // (35 digits in all), possibly followed by a number of zeros; this 1485 // not possible for f * f + 0 1486 is_midpoint_lt_even = 0; 1487 is_inexact_lt_midpoint = 1; 1488 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 1489 is_midpoint_gt_even) { // double rounding error downward 1490 // res = res + 1 1491 res.w[0]++; 1492 if (res.w[0] == 0) 1493 res.w[1]++; 1494 is_midpoint_gt_even = 0; 1495 is_inexact_gt_midpoint = 1; 1496 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 1497 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 1498 // if this second rounding was exact the result may still be 1499 // inexact because of the first rounding 1500 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 1501 is_inexact_gt_midpoint = 1; 1502 } 1503 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 1504 is_inexact_lt_midpoint = 1; 1505 } 1506 } else if (is_midpoint_gt_even && 1507 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 1508 // pulled up to a midpoint 1509 is_inexact_lt_midpoint = 1; 1510 is_inexact_gt_midpoint = 0; 1511 is_midpoint_lt_even = 0; 1512 is_midpoint_gt_even = 0; 1513 } else if (is_midpoint_lt_even && 1514 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 1515 // pulled down to a midpoint 1516 is_inexact_lt_midpoint = 0; 1517 is_inexact_gt_midpoint = 1; 1518 is_midpoint_lt_even = 0; 1519 is_midpoint_gt_even = 0; 1520 } else { 1521 ; 1522 } 1523 } else { // if e4 >= emin then q4 < P and the result is tiny and exact 1524 if (e3 < e4) { 1525 // if (e3 < e4) the preferred exponent is e3 1526 // return (C4 * 10^scale) * 10^(e4 - scale) 1527 // where scale = min (p34-q4, (e4 - e3)) 1528 scale = p34 - q4; 1529 ind = e4 - e3; 1530 if (ind < scale) 1531 scale = ind; 1532 if (scale == 0) { 1533 ; // res and e4 are unchanged 1534 } else if (q4 <= 19) { // C4 fits in 64 bits 1535 if (scale <= 19) { // 10^scale fits in 64 bits 1536 // 64 x 64 res.w[0] * ten2k64[scale] 1537 __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]); 1538 } else { // 10^scale fits in 128 bits 1539 // 64 x 128 res.w[0] * ten2k128[scale - 20] 1540 __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]); 1541 } 1542 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits 1543 // 64 x 128 ten2k64[scale] * C3 1544 __mul_128x64_to_128 (res, ten2k64[scale], res); 1545 } 1546 // subtract scale from the exponent 1547 e4 = e4 - scale; 1548 } 1549 } 1550 1551 // check for inexact result 1552 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 1553 is_midpoint_lt_even || is_midpoint_gt_even) { 1554 // set the inexact flag and the underflow flag 1555 *pfpsf |= INEXACT_EXCEPTION; 1556 *pfpsf |= UNDERFLOW_EXCEPTION; 1557 } 1558 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; 1559 if (rnd_mode != ROUNDING_TO_NEAREST) { 1560 rounding_correction (rnd_mode, 1561 is_inexact_lt_midpoint, 1562 is_inexact_gt_midpoint, 1563 is_midpoint_lt_even, is_midpoint_gt_even, 1564 e4, &res, pfpsf); 1565 } 1566 *pfpsf |= save_fpsf; 1567 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1568 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1569 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1570 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1571 BID_SWAP128 (res); 1572 BID_RETURN (res) 1573 } 1574 // no overflow, and no underflow for rounding to nearest 1575 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; 1576 1577 if (rnd_mode != ROUNDING_TO_NEAREST) { 1578 rounding_correction (rnd_mode, 1579 is_inexact_lt_midpoint, 1580 is_inexact_gt_midpoint, 1581 is_midpoint_lt_even, is_midpoint_gt_even, 1582 e4, &res, pfpsf); 1583 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ) 1584 if (e4 == expmin) { 1585 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || 1586 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && 1587 res.w[0] < 0x38c15b0a00000000ull)) { 1588 is_tiny = 1; 1589 } 1590 } 1591 } 1592 1593 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 1594 is_midpoint_lt_even || is_midpoint_gt_even) { 1595 // set the inexact flag 1596 *pfpsf |= INEXACT_EXCEPTION; 1597 if (is_tiny) 1598 *pfpsf |= UNDERFLOW_EXCEPTION; 1599 } 1600 1601 if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact 1602 // need to ensure that the result has the preferred exponent 1603 p_exp = res.w[1] & MASK_EXP; 1604 if (z_exp < p_exp) { // the preferred exponent is z_exp 1605 // signficand of res in C3 1606 C3.w[1] = res.w[1] & MASK_COEFF; 1607 C3.w[0] = res.w[0]; 1608 // the number of decimal digits of x * y is q4 <= 34 1609 // Note: the coefficient fits in 128 bits 1610 1611 // return (C3 * 10^scale) * 10^(p_exp - scale) 1612 // where scale = min (p34-q4, (p_exp-z_exp) >> 49) 1613 scale = p34 - q4; 1614 ind = (p_exp - z_exp) >> 49; 1615 if (ind < scale) 1616 scale = ind; 1617 // subtract scale from the exponent 1618 p_exp = p_exp - ((UINT64) scale << 49); 1619 if (scale == 0) { 1620 ; // leave res unchanged 1621 } else if (q4 <= 19) { // x * y fits in 64 bits 1622 if (scale <= 19) { // 10^scale fits in 64 bits 1623 // 64 x 64 C3.w[0] * ten2k64[scale] 1624 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1625 } else { // 10^scale fits in 128 bits 1626 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1627 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1628 } 1629 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 1630 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits 1631 // 64 x 128 ten2k64[scale] * C3 1632 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1633 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 1634 } 1635 } // else leave the result as it is, because p_exp <= z_exp 1636 } 1637 *pfpsf |= save_fpsf; 1638 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1639 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1640 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1641 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1642 BID_SWAP128 (res); 1643 BID_RETURN (res) 1644 } // else we have f * f + f 1645 1646 // continue with x = f, y = f, z = f 1647 1648 delta = q3 + e3 - q4 - e4; 1649 delta_ge_zero: 1650 if (delta >= 0) { 1651 1652 if (p34 <= delta - 1 || // Case (1') 1653 (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A) 1654 // check for overflow, which can occur only in Case (1') 1655 if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) { 1656 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary 1657 // condition for (q3 + e3) > (p34 + expmax) 1658 if (rnd_mode == ROUNDING_TO_NEAREST) { 1659 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 1660 res.w[0] = 0x0000000000000000ull; 1661 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 1662 } else { 1663 if (p_sign == z_sign) { 1664 is_inexact_lt_midpoint = 1; 1665 } else { 1666 is_inexact_gt_midpoint = 1; 1667 } 1668 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3) 1669 scale = p34 - q3; 1670 if (scale == 0) { 1671 res.w[1] = z_sign | C3.w[1]; 1672 res.w[0] = C3.w[0]; 1673 } else { 1674 if (q3 <= 19) { // C3 fits in 64 bits 1675 if (scale <= 19) { // 10^scale fits in 64 bits 1676 // 64 x 64 C3.w[0] * ten2k64[scale] 1677 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1678 } else { // 10^scale fits in 128 bits 1679 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1680 __mul_128x64_to_128 (res, C3.w[0], 1681 ten2k128[scale - 20]); 1682 } 1683 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits 1684 // 64 x 128 ten2k64[scale] * C3 1685 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1686 } 1687 // the coefficient in res has q3 + scale = p34 digits 1688 } 1689 e3 = e3 - scale; 1690 res.w[1] = z_sign | res.w[1]; 1691 rounding_correction (rnd_mode, 1692 is_inexact_lt_midpoint, 1693 is_inexact_gt_midpoint, 1694 is_midpoint_lt_even, is_midpoint_gt_even, 1695 e3, &res, pfpsf); 1696 } 1697 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1698 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1699 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1700 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1701 BID_SWAP128 (res); 1702 BID_RETURN (res) 1703 } 1704 // res = z 1705 if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3) 1706 // return (C3 * 10^scale) * 10^(z_exp - scale) 1707 // where scale = min (p34-q3, z_exp-EMIN) 1708 scale = p34 - q3; 1709 ind = e3 + 6176; 1710 if (ind < scale) 1711 scale = ind; 1712 if (scale == 0) { 1713 res.w[1] = C3.w[1]; 1714 res.w[0] = C3.w[0]; 1715 } else if (q3 <= 19) { // z fits in 64 bits 1716 if (scale <= 19) { // 10^scale fits in 64 bits 1717 // 64 x 64 C3.w[0] * ten2k64[scale] 1718 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1719 } else { // 10^scale fits in 128 bits 1720 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1721 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1722 } 1723 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1724 // 64 x 128 ten2k64[scale] * C3 1725 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1726 } 1727 // the coefficient in res has q3 + scale digits 1728 // subtract scale from the exponent 1729 z_exp = z_exp - ((UINT64) scale << 49); 1730 e3 = e3 - scale; 1731 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1732 if (scale + q3 < p34) 1733 *pfpsf |= UNDERFLOW_EXCEPTION; 1734 } else { 1735 scale = 0; 1736 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1]; 1737 res.w[0] = C3.w[0]; 1738 } 1739 1740 // use the following to avoid double rounding errors when operating on 1741 // mixed formats in rounding to nearest, and for correcting the result 1742 // if not rounding to nearest 1743 if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) { 1744 // there is a gap of exactly one digit between the scaled C3 and C4 1745 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case 1746 if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) || 1747 (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) || 1748 (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] || 1749 C3.w[0] != ten2k128[q3 - 21].w[0]))) { 1750 // C3 * 10^ scale != 10^(q3-1) 1751 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull || 1752 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 1753 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value 1754 } else { // if C3 * 10^scale = 10^(q3+scale-1) 1755 // ok from above e3 = (z_exp >> 49) - 6176; 1756 // the result is always inexact 1757 if (q4 == 1) { 1758 R64 = C4.w[0]; 1759 } else { 1760 // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 1761 // x = q4-1, 1 <= x <= 67 and check if this operation is exact 1762 if (q4 <= 18) { // 2 <= q4 <= 18 1763 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, 1764 &is_midpoint_lt_even, &is_midpoint_gt_even, 1765 &is_inexact_lt_midpoint, 1766 &is_inexact_gt_midpoint); 1767 } else if (q4 <= 38) { 1768 P128.w[1] = C4.w[1]; 1769 P128.w[0] = C4.w[0]; 1770 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, 1771 &is_midpoint_lt_even, 1772 &is_midpoint_gt_even, 1773 &is_inexact_lt_midpoint, 1774 &is_inexact_gt_midpoint); 1775 R64 = R128.w[0]; // one decimal digit 1776 } else if (q4 <= 57) { 1777 P192.w[2] = C4.w[2]; 1778 P192.w[1] = C4.w[1]; 1779 P192.w[0] = C4.w[0]; 1780 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, 1781 &is_midpoint_lt_even, 1782 &is_midpoint_gt_even, 1783 &is_inexact_lt_midpoint, 1784 &is_inexact_gt_midpoint); 1785 R64 = R192.w[0]; // one decimal digit 1786 } else { // if (q4 <= 68) 1787 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, 1788 &is_midpoint_lt_even, 1789 &is_midpoint_gt_even, 1790 &is_inexact_lt_midpoint, 1791 &is_inexact_gt_midpoint); 1792 R64 = R256.w[0]; // one decimal digit 1793 } 1794 if (incr_exp) { 1795 R64 = 10; 1796 } 1797 } 1798 if (q4 == 1 && C4.w[0] == 5) { 1799 is_inexact_lt_midpoint = 0; 1800 is_inexact_gt_midpoint = 0; 1801 is_midpoint_lt_even = 1; 1802 is_midpoint_gt_even = 0; 1803 } else if ((e3 == expmin) || 1804 R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { 1805 // result does not change 1806 is_inexact_lt_midpoint = 0; 1807 is_inexact_gt_midpoint = 1; 1808 is_midpoint_lt_even = 0; 1809 is_midpoint_gt_even = 0; 1810 } else { 1811 is_inexact_lt_midpoint = 1; 1812 is_inexact_gt_midpoint = 0; 1813 is_midpoint_lt_even = 0; 1814 is_midpoint_gt_even = 0; 1815 // result decremented is 10^(q3+scale) - 1 1816 if ((q3 + scale) <= 19) { 1817 res.w[1] = 0; 1818 res.w[0] = ten2k64[q3 + scale]; 1819 } else { // if ((q3 + scale + 1) <= 35) 1820 res.w[1] = ten2k128[q3 + scale - 20].w[1]; 1821 res.w[0] = ten2k128[q3 + scale - 20].w[0]; 1822 } 1823 res.w[0] = res.w[0] - 1; // borrow never occurs 1824 z_exp = z_exp - EXP_P1; 1825 e3 = e3 - 1; 1826 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; 1827 } 1828 if (e3 == expmin) { 1829 if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) { 1830 ; // result not tiny (in round-to-nearest mode) 1831 } else { 1832 *pfpsf |= UNDERFLOW_EXCEPTION; 1833 } 1834 } 1835 } // end 10^(q3+scale-1) 1836 // set the inexact flag 1837 *pfpsf |= INEXACT_EXCEPTION; 1838 } else { 1839 if (p_sign == z_sign) { 1840 // if (z_sign), set as if for absolute value 1841 is_inexact_lt_midpoint = 1; 1842 } else { // if (p_sign != z_sign) 1843 // if (z_sign), set as if for absolute value 1844 is_inexact_gt_midpoint = 1; 1845 } 1846 *pfpsf |= INEXACT_EXCEPTION; 1847 } 1848 // the result is always inexact => set the inexact flag 1849 // Determine tininess: 1850 // if (exp > expmin) 1851 // the result is not tiny 1852 // else // if exp = emin 1853 // if (q3 + scale < p34) 1854 // the result is tiny 1855 // else // if (q3 + scale = p34) 1856 // if (C3 * 10^scale > 10^33) 1857 // the result is not tiny 1858 // else // if C3 * 10^scale = 10^33 1859 // if (xy * z > 0) 1860 // the result is not tiny 1861 // else // if (xy * z < 0) 1862 // if (z > 0) 1863 // if rnd_mode != RP 1864 // the result is tiny 1865 // else // if RP 1866 // the result is not tiny 1867 // else // if (z < 0) 1868 // if rnd_mode != RM 1869 // the result is tiny 1870 // else // if RM 1871 // the result is not tiny 1872 // endif 1873 // endif 1874 // endif 1875 // endif 1876 // endif 1877 // endif 1878 if ((e3 == expmin && (q3 + scale) < p34) || 1879 (e3 == expmin && (q3 + scale) == p34 && 1880 (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high 1881 res.w[0] == 0x38c15b0a00000000ull && // 10^33_low 1882 z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) || 1883 (z_sign && rnd_mode != ROUNDING_DOWN)))) { 1884 *pfpsf |= UNDERFLOW_EXCEPTION; 1885 } 1886 if (rnd_mode != ROUNDING_TO_NEAREST) { 1887 rounding_correction (rnd_mode, 1888 is_inexact_lt_midpoint, 1889 is_inexact_gt_midpoint, 1890 is_midpoint_lt_even, is_midpoint_gt_even, 1891 e3, &res, pfpsf); 1892 } 1893 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1894 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1895 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1896 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1897 BID_SWAP128 (res); 1898 BID_RETURN (res) 1899 1900 } else if (p34 == delta) { // Case (1''B) 1901 1902 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3 1903 // and C3 can be scaled up to p34 digits if needed 1904 1905 // scale C3 to p34 digits if needed 1906 scale = p34 - q3; // 0 <= scale <= p34 - 1 1907 if (scale == 0) { 1908 res.w[1] = C3.w[1]; 1909 res.w[0] = C3.w[0]; 1910 } else if (q3 <= 19) { // z fits in 64 bits 1911 if (scale <= 19) { // 10^scale fits in 64 bits 1912 // 64 x 64 C3.w[0] * ten2k64[scale] 1913 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1914 } else { // 10^scale fits in 128 bits 1915 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1916 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1917 } 1918 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1919 // 64 x 128 ten2k64[scale] * C3 1920 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1921 } 1922 // subtract scale from the exponent 1923 z_exp = z_exp - ((UINT64) scale << 49); 1924 e3 = e3 - scale; 1925 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits 1926 1927 // determine whether x * y is less than, equal to, or greater than 1928 // 1/2 ulp (z) 1929 if (q4 <= 19) { 1930 if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp 1931 lt_half_ulp = 1; 1932 } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp 1933 eq_half_ulp = 1; 1934 } else { // > 1/2 ulp 1935 gt_half_ulp = 1; 1936 } 1937 } else if (q4 <= 38) { 1938 if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] || 1939 (C4.w[1] == midpoint128[q4 - 20].w[1] && 1940 C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp 1941 lt_half_ulp = 1; 1942 } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] && 1943 C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp 1944 eq_half_ulp = 1; 1945 } else { // > 1/2 ulp 1946 gt_half_ulp = 1; 1947 } 1948 } else if (q4 <= 58) { 1949 if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] || 1950 (C4.w[2] == midpoint192[q4 - 39].w[2] && 1951 C4.w[1] < midpoint192[q4 - 39].w[1]) || 1952 (C4.w[2] == midpoint192[q4 - 39].w[2] && 1953 C4.w[1] == midpoint192[q4 - 39].w[1] && 1954 C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp 1955 lt_half_ulp = 1; 1956 } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] && 1957 C4.w[1] == midpoint192[q4 - 39].w[1] && 1958 C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp 1959 eq_half_ulp = 1; 1960 } else { // > 1/2 ulp 1961 gt_half_ulp = 1; 1962 } 1963 } else { 1964 if (C4.w[3] < midpoint256[q4 - 59].w[3] || 1965 (C4.w[3] == midpoint256[q4 - 59].w[3] && 1966 C4.w[2] < midpoint256[q4 - 59].w[2]) || 1967 (C4.w[3] == midpoint256[q4 - 59].w[3] && 1968 C4.w[2] == midpoint256[q4 - 59].w[2] && 1969 C4.w[1] < midpoint256[q4 - 59].w[1]) || 1970 (C4.w[3] == midpoint256[q4 - 59].w[3] && 1971 C4.w[2] == midpoint256[q4 - 59].w[2] && 1972 C4.w[1] == midpoint256[q4 - 59].w[1] && 1973 C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp 1974 lt_half_ulp = 1; 1975 } else if (C4.w[3] == midpoint256[q4 - 59].w[3] && 1976 C4.w[2] == midpoint256[q4 - 59].w[2] && 1977 C4.w[1] == midpoint256[q4 - 59].w[1] && 1978 C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp 1979 eq_half_ulp = 1; 1980 } else { // > 1/2 ulp 1981 gt_half_ulp = 1; 1982 } 1983 } 1984 1985 if (p_sign == z_sign) { 1986 if (lt_half_ulp) { 1987 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1988 // use the following to avoid double rounding errors when operating on 1989 // mixed formats in rounding to nearest 1990 is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value 1991 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { 1992 // add 1 ulp to the significand 1993 res.w[0]++; 1994 if (res.w[0] == 0x0ull) 1995 res.w[1]++; 1996 // check for rounding overflow, when coeff == 10^34 1997 if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull && 1998 res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34 1999 e3 = e3 + 1; 2000 // coeff = 10^33 2001 z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP; 2002 res.w[1] = 0x0000314dc6448d93ull; 2003 res.w[0] = 0x38c15b0a00000000ull; 2004 } 2005 // end add 1 ulp 2006 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2007 if (eq_half_ulp) { 2008 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2009 } else { 2010 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value 2011 } 2012 } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) 2013 // leave unchanged 2014 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2015 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value 2016 } 2017 // the result is always inexact, and never tiny 2018 // set the inexact flag 2019 *pfpsf |= INEXACT_EXCEPTION; 2020 // check for overflow 2021 if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) { 2022 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2023 res.w[0] = 0x0000000000000000ull; 2024 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2025 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2026 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2027 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2028 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2029 BID_SWAP128 (res); 2030 BID_RETURN (res) 2031 } 2032 if (rnd_mode != ROUNDING_TO_NEAREST) { 2033 rounding_correction (rnd_mode, 2034 is_inexact_lt_midpoint, 2035 is_inexact_gt_midpoint, 2036 is_midpoint_lt_even, is_midpoint_gt_even, 2037 e3, &res, pfpsf); 2038 z_exp = res.w[1] & MASK_EXP; 2039 } 2040 } else { // if (p_sign != z_sign) 2041 // consider two cases, because C3 * 10^scale = 10^33 is a special case 2042 if (res.w[1] != 0x0000314dc6448d93ull || 2043 res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 2044 if (lt_half_ulp) { 2045 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2046 // use the following to avoid double rounding errors when operating 2047 // on mixed formats in rounding to nearest 2048 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value 2049 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { 2050 // subtract 1 ulp from the significand 2051 res.w[0]--; 2052 if (res.w[0] == 0xffffffffffffffffull) 2053 res.w[1]--; 2054 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2055 if (eq_half_ulp) { 2056 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value 2057 } else { 2058 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value 2059 } 2060 } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) 2061 // leave unchanged 2062 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2063 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2064 } 2065 // the result is always inexact, and never tiny 2066 // check for overflow for RN 2067 if (e3 > expmax) { 2068 if (rnd_mode == ROUNDING_TO_NEAREST) { 2069 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2070 res.w[0] = 0x0000000000000000ull; 2071 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2072 } else { 2073 rounding_correction (rnd_mode, 2074 is_inexact_lt_midpoint, 2075 is_inexact_gt_midpoint, 2076 is_midpoint_lt_even, 2077 is_midpoint_gt_even, e3, &res, 2078 pfpsf); 2079 } 2080 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2081 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2082 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2083 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2084 BID_SWAP128 (res); 2085 BID_RETURN (res) 2086 } 2087 // set the inexact flag 2088 *pfpsf |= INEXACT_EXCEPTION; 2089 if (rnd_mode != ROUNDING_TO_NEAREST) { 2090 rounding_correction (rnd_mode, 2091 is_inexact_lt_midpoint, 2092 is_inexact_gt_midpoint, 2093 is_midpoint_lt_even, 2094 is_midpoint_gt_even, e3, &res, pfpsf); 2095 } 2096 z_exp = res.w[1] & MASK_EXP; 2097 } else { // if C3 * 10^scale = 10^33 2098 e3 = (z_exp >> 49) - 6176; 2099 if (e3 > expmin) { 2100 // the result is exact if exp > expmin and C4 = d*10^(q4-1), 2101 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact 2102 if (q4 == 1) { 2103 // if q4 = 1 the result is exact 2104 // result coefficient = 10^34 - C4 2105 res.w[1] = 0x0001ed09bead87c0ull; 2106 res.w[0] = 0x378d8e6400000000ull - C4.w[0]; 2107 z_exp = z_exp - EXP_P1; 2108 e3 = e3 - 1; 2109 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2110 } else { 2111 // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 2112 // x = q4-1, 1 <= x <= 67 and check if this operation is exact 2113 if (q4 <= 18) { // 2 <= q4 <= 18 2114 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, 2115 &is_midpoint_lt_even, 2116 &is_midpoint_gt_even, 2117 &is_inexact_lt_midpoint, 2118 &is_inexact_gt_midpoint); 2119 } else if (q4 <= 38) { 2120 P128.w[1] = C4.w[1]; 2121 P128.w[0] = C4.w[0]; 2122 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, 2123 &is_midpoint_lt_even, 2124 &is_midpoint_gt_even, 2125 &is_inexact_lt_midpoint, 2126 &is_inexact_gt_midpoint); 2127 R64 = R128.w[0]; // one decimal digit 2128 } else if (q4 <= 57) { 2129 P192.w[2] = C4.w[2]; 2130 P192.w[1] = C4.w[1]; 2131 P192.w[0] = C4.w[0]; 2132 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, 2133 &is_midpoint_lt_even, 2134 &is_midpoint_gt_even, 2135 &is_inexact_lt_midpoint, 2136 &is_inexact_gt_midpoint); 2137 R64 = R192.w[0]; // one decimal digit 2138 } else { // if (q4 <= 68) 2139 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, 2140 &is_midpoint_lt_even, 2141 &is_midpoint_gt_even, 2142 &is_inexact_lt_midpoint, 2143 &is_inexact_gt_midpoint); 2144 R64 = R256.w[0]; // one decimal digit 2145 } 2146 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2147 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2148 // the result is exact: 10^34 - R64 2149 // incr_exp = 0 with certainty 2150 z_exp = z_exp - EXP_P1; 2151 e3 = e3 - 1; 2152 res.w[1] = 2153 z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; 2154 res.w[0] = 0x378d8e6400000000ull - R64; 2155 } else { 2156 // We want R64 to be the top digit of C4, but we actually 2157 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, 2158 // because the top digit is (C4 * 10^(-q4+1))RZ 2159 // however, if incr_exp = 1 then R64 = 10 with certainty 2160 if (incr_exp) { 2161 R64 = 10; 2162 } 2163 // the result is inexact as C4 has more than 1 significant digit 2164 // and C3 * 10^scale = 10^33 2165 // example of case that is treated here: 2166 // 100...0 * 10^e3 - 0.41 * 10^e3 = 2167 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) 2168 // note that (e3 > expmin} 2169 // in order to round, subtract R64 from 10^34 and then compare 2170 // C4 - R64 * 10^(q4-1) with 1/2 ulp 2171 // calculate 10^34 - R64 2172 res.w[1] = 0x0001ed09bead87c0ull; 2173 res.w[0] = 0x378d8e6400000000ull - R64; 2174 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand 2175 // calculate C4 - R64 * 10^(q4-1); this is a rare case and 2176 // R64 is small, 1 <= R64 <= 9 2177 e3 = e3 - 1; 2178 if (is_inexact_lt_midpoint) { 2179 is_inexact_lt_midpoint = 0; 2180 is_inexact_gt_midpoint = 1; 2181 } else if (is_inexact_gt_midpoint) { 2182 is_inexact_gt_midpoint = 0; 2183 is_inexact_lt_midpoint = 1; 2184 } else if (is_midpoint_lt_even) { 2185 is_midpoint_lt_even = 0; 2186 is_midpoint_gt_even = 1; 2187 } else if (is_midpoint_gt_even) { 2188 is_midpoint_gt_even = 0; 2189 is_midpoint_lt_even = 1; 2190 } else { 2191 ; 2192 } 2193 // the result is always inexact, and never tiny 2194 // check for overflow for RN 2195 if (e3 > expmax) { 2196 if (rnd_mode == ROUNDING_TO_NEAREST) { 2197 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2198 res.w[0] = 0x0000000000000000ull; 2199 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2200 } else { 2201 rounding_correction (rnd_mode, 2202 is_inexact_lt_midpoint, 2203 is_inexact_gt_midpoint, 2204 is_midpoint_lt_even, 2205 is_midpoint_gt_even, e3, &res, 2206 pfpsf); 2207 } 2208 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2209 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2210 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2211 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2212 BID_SWAP128 (res); 2213 BID_RETURN (res) 2214 } 2215 // set the inexact flag 2216 *pfpsf |= INEXACT_EXCEPTION; 2217 res.w[1] = 2218 z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; 2219 if (rnd_mode != ROUNDING_TO_NEAREST) { 2220 rounding_correction (rnd_mode, 2221 is_inexact_lt_midpoint, 2222 is_inexact_gt_midpoint, 2223 is_midpoint_lt_even, 2224 is_midpoint_gt_even, e3, &res, 2225 pfpsf); 2226 } 2227 z_exp = res.w[1] & MASK_EXP; 2228 } // end result is inexact 2229 } // end q4 > 1 2230 } else { // if (e3 = emin) 2231 // if e3 = expmin the result is also tiny (the condition for 2232 // tininess is C4 > 050...0 [q4 digits] which is met because 2233 // the msd of C4 is not zero) 2234 // the result is tiny and inexact in all rounding modes; 2235 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, 2236 // gt_half_ulp to calculate) 2237 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged 2238 2239 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp 2240 if (gt_half_ulp) { // res = 10^33 - 1 2241 res.w[1] = 0x0000314dc6448d93ull; 2242 res.w[0] = 0x38c15b09ffffffffull; 2243 } else { 2244 res.w[1] = 0x0000314dc6448d93ull; 2245 res.w[0] = 0x38c15b0a00000000ull; 2246 } 2247 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2248 *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later 2249 2250 if (eq_half_ulp) { 2251 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2252 } else if (lt_half_ulp) { 2253 is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value 2254 } else { // if (gt_half_ulp) 2255 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value 2256 } 2257 2258 if (rnd_mode != ROUNDING_TO_NEAREST) { 2259 rounding_correction (rnd_mode, 2260 is_inexact_lt_midpoint, 2261 is_inexact_gt_midpoint, 2262 is_midpoint_lt_even, 2263 is_midpoint_gt_even, e3, &res, 2264 pfpsf); 2265 z_exp = res.w[1] & MASK_EXP; 2266 } 2267 } // end e3 = emin 2268 // set the inexact flag (if the result was not exact) 2269 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 2270 is_midpoint_lt_even || is_midpoint_gt_even) 2271 *pfpsf |= INEXACT_EXCEPTION; 2272 } // end 10^33 2273 } // end if (p_sign != z_sign) 2274 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2275 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2276 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2277 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2278 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2279 BID_SWAP128 (res); 2280 BID_RETURN (res) 2281 2282 } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) 2283 (q3 <= delta && delta + q4 <= p34) || // Case (3) 2284 (delta < q3 && p34 < delta + q4) || // Case (4) 2285 (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5) 2286 (delta + q4 < q3)) && // Case (6) 2287 !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6) 2288 2289 // the result has the sign of z 2290 2291 if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) 2292 (delta < q3 && p34 < delta + q4)) { // Case (4) 2293 // round first the sum x * y + z with unbounded exponent 2294 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, 2295 // 1 <= scale <= 33 2296 // calculate res = C3 * 10^scale 2297 scale = p34 - q3; 2298 x0 = delta + q4 - p34; 2299 } else if (delta + q4 < q3) { // Case (6) 2300 // make Case (6) look like Case (3) or Case (5) with scale = 0 2301 // by scaling up C4 by 10^(q3 - delta - q4) 2302 scale = q3 - delta - q4; // 1 <= scale <= 33 2303 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits 2304 if (scale <= 19) { // 10^scale fits in 64 bits 2305 // 64 x 64 C4.w[0] * ten2k64[scale] 2306 __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]); 2307 } else { // 10^scale fits in 128 bits 2308 // 64 x 128 C4.w[0] * ten2k128[scale - 20] 2309 __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]); 2310 } 2311 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits 2312 // 64 x 128 ten2k64[scale] * C4 2313 __mul_128x64_to_128 (P128, ten2k64[scale], C4); 2314 } 2315 C4.w[0] = P128.w[0]; 2316 C4.w[1] = P128.w[1]; 2317 // e4 does not need adjustment, as it is not used from this point on 2318 scale = 0; 2319 x0 = 0; 2320 // now Case (6) looks like Case (3) or Case (5) with scale = 0 2321 } else { // if Case (3) or Case (5) 2322 // Note: Case (3) is similar to Case (2), but scale differs and the 2323 // result is exact, unless it is tiny (so x0 = 0 when calculating the 2324 // result with unbounded exponent) 2325 2326 // calculate first the sum x * y + z with unbounded exponent (exact) 2327 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1, 2328 // 1 <= scale <= 33 2329 // calculate res = C3 * 10^scale 2330 scale = delta + q4 - q3; 2331 x0 = 0; 2332 // Note: the comments which follow refer [mainly] to Case (2)] 2333 } 2334 2335 case2_repeat: 2336 if (scale == 0) { // this could happen e.g. if we return to case2_repeat 2337 // or in Case (4) 2338 res.w[1] = C3.w[1]; 2339 res.w[0] = C3.w[0]; 2340 } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits 2341 if (scale <= 19) { // 10^scale fits in 64 bits 2342 // 64 x 64 C3.w[0] * ten2k64[scale] 2343 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 2344 } else { // 10^scale fits in 128 bits 2345 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 2346 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 2347 } 2348 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 2349 // 64 x 128 ten2k64[scale] * C3 2350 __mul_128x64_to_128 (res, ten2k64[scale], C3); 2351 } 2352 // e3 is already calculated 2353 e3 = e3 - scale; 2354 // now res = C3 * 10^scale and e3 = e3 - scale 2355 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat 2356 // because the result was too small 2357 2358 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34, 2359 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67) 2360 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of 2361 // the rounding fits in 128 bits!) 2362 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat) 2363 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34 2364 if (x0 == 0) { // this could happen only if we return to case2_repeat, or 2365 // for Case (3) or Case (6) 2366 R128.w[1] = C4.w[1]; 2367 R128.w[0] = C4.w[0]; 2368 } else if (q4 <= 18) { 2369 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17 2370 round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, 2371 &is_midpoint_lt_even, &is_midpoint_gt_even, 2372 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 2373 if (incr_exp) { 2374 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 2375 R64 = ten2k64[q4 - x0]; 2376 } 2377 R128.w[1] = 0; 2378 R128.w[0] = R64; 2379 } else if (q4 <= 38) { 2380 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37 2381 P128.w[1] = C4.w[1]; 2382 P128.w[0] = C4.w[0]; 2383 round128_19_38 (q4, x0, P128, &R128, &incr_exp, 2384 &is_midpoint_lt_even, &is_midpoint_gt_even, 2385 &is_inexact_lt_midpoint, 2386 &is_inexact_gt_midpoint); 2387 if (incr_exp) { 2388 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 2389 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2390 R128.w[0] = ten2k64[q4 - x0]; 2391 // R128.w[1] stays 0 2392 } else { // 20 <= q4 - x0 <= 37 2393 R128.w[0] = ten2k128[q4 - x0 - 20].w[0]; 2394 R128.w[1] = ten2k128[q4 - x0 - 20].w[1]; 2395 } 2396 } 2397 } else if (q4 <= 57) { 2398 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56 2399 P192.w[2] = C4.w[2]; 2400 P192.w[1] = C4.w[1]; 2401 P192.w[0] = C4.w[0]; 2402 round192_39_57 (q4, x0, P192, &R192, &incr_exp, 2403 &is_midpoint_lt_even, &is_midpoint_gt_even, 2404 &is_inexact_lt_midpoint, 2405 &is_inexact_gt_midpoint); 2406 // R192.w[2] is always 0 2407 if (incr_exp) { 2408 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52 2409 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2410 R192.w[0] = ten2k64[q4 - x0]; 2411 // R192.w[1] stays 0 2412 // R192.w[2] stays 0 2413 } else { // 20 <= q4 - x0 <= 33 2414 R192.w[0] = ten2k128[q4 - x0 - 20].w[0]; 2415 R192.w[1] = ten2k128[q4 - x0 - 20].w[1]; 2416 // R192.w[2] stays 0 2417 } 2418 } 2419 R128.w[1] = R192.w[1]; 2420 R128.w[0] = R192.w[0]; 2421 } else { 2422 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67 2423 round256_58_76 (q4, x0, C4, &R256, &incr_exp, 2424 &is_midpoint_lt_even, &is_midpoint_gt_even, 2425 &is_inexact_lt_midpoint, 2426 &is_inexact_gt_midpoint); 2427 // R256.w[3] and R256.w[2] are always 0 2428 if (incr_exp) { 2429 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43 2430 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2431 R256.w[0] = ten2k64[q4 - x0]; 2432 // R256.w[1] stays 0 2433 // R256.w[2] stays 0 2434 // R256.w[3] stays 0 2435 } else { // 20 <= q4 - x0 <= 33 2436 R256.w[0] = ten2k128[q4 - x0 - 20].w[0]; 2437 R256.w[1] = ten2k128[q4 - x0 - 20].w[1]; 2438 // R256.w[2] stays 0 2439 // R256.w[3] stays 0 2440 } 2441 } 2442 R128.w[1] = R256.w[1]; 2443 R128.w[0] = R256.w[0]; 2444 } 2445 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4, 2446 // rounded to nearest, which were copied into R128 2447 if (z_sign == p_sign) { 2448 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale 2449 // the sum can result in [up to] p34 or p34 + 1 digits 2450 res.w[0] = res.w[0] + R128.w[0]; 2451 res.w[1] = res.w[1] + R128.w[1]; 2452 if (res.w[0] < R128.w[0]) 2453 res.w[1]++; // carry 2454 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1 2455 if (res.w[1] > 0x0001ed09bead87c0ull || 2456 (res.w[1] == 0x0001ed09bead87c0ull && 2457 res.w[0] > 0x378d8e63ffffffffull)) { 2458 // avoid double rounding error 2459 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 2460 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 2461 is_midpoint_lt_even0 = is_midpoint_lt_even; 2462 is_midpoint_gt_even0 = is_midpoint_gt_even; 2463 is_inexact_lt_midpoint = 0; 2464 is_inexact_gt_midpoint = 0; 2465 is_midpoint_lt_even = 0; 2466 is_midpoint_gt_even = 0; 2467 P128.w[1] = res.w[1]; 2468 P128.w[0] = res.w[0]; 2469 round128_19_38 (35, 1, P128, &res, &incr_exp, 2470 &is_midpoint_lt_even, &is_midpoint_gt_even, 2471 &is_inexact_lt_midpoint, 2472 &is_inexact_gt_midpoint); 2473 // incr_exp is 0 with certainty in this case 2474 // avoid a double rounding error 2475 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 2476 is_midpoint_lt_even) { // double rounding error upward 2477 // res = res - 1 2478 res.w[0]--; 2479 if (res.w[0] == 0xffffffffffffffffull) 2480 res.w[1]--; 2481 // Note: a double rounding error upward is not possible; for this 2482 // the result after the first rounding would have to be 99...95 2483 // (35 digits in all), possibly followed by a number of zeros; this 2484 // not possible in Cases (2)-(6) or (15)-(17) which may get here 2485 is_midpoint_lt_even = 0; 2486 is_inexact_lt_midpoint = 1; 2487 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 2488 is_midpoint_gt_even) { // double rounding error downward 2489 // res = res + 1 2490 res.w[0]++; 2491 if (res.w[0] == 0) 2492 res.w[1]++; 2493 is_midpoint_gt_even = 0; 2494 is_inexact_gt_midpoint = 1; 2495 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2496 !is_inexact_lt_midpoint 2497 && !is_inexact_gt_midpoint) { 2498 // if this second rounding was exact the result may still be 2499 // inexact because of the first rounding 2500 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 2501 is_inexact_gt_midpoint = 1; 2502 } 2503 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 2504 is_inexact_lt_midpoint = 1; 2505 } 2506 } else if (is_midpoint_gt_even && 2507 (is_inexact_gt_midpoint0 2508 || is_midpoint_lt_even0)) { 2509 // pulled up to a midpoint 2510 is_inexact_lt_midpoint = 1; 2511 is_inexact_gt_midpoint = 0; 2512 is_midpoint_lt_even = 0; 2513 is_midpoint_gt_even = 0; 2514 } else if (is_midpoint_lt_even && 2515 (is_inexact_lt_midpoint0 2516 || is_midpoint_gt_even0)) { 2517 // pulled down to a midpoint 2518 is_inexact_lt_midpoint = 0; 2519 is_inexact_gt_midpoint = 1; 2520 is_midpoint_lt_even = 0; 2521 is_midpoint_gt_even = 0; 2522 } else { 2523 ; 2524 } 2525 // adjust exponent 2526 e3 = e3 + 1; 2527 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2528 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2529 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || 2530 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { 2531 is_inexact_lt_midpoint = 1; 2532 } 2533 } 2534 } else { 2535 // this is the result rounded with unbounded exponent, unless a 2536 // correction is needed 2537 res.w[1] = res.w[1] & MASK_COEFF; 2538 if (lsb == 1) { 2539 if (is_midpoint_gt_even) { 2540 // res = res + 1 2541 is_midpoint_gt_even = 0; 2542 is_midpoint_lt_even = 1; 2543 res.w[0]++; 2544 if (res.w[0] == 0x0) 2545 res.w[1]++; 2546 // check for rounding overflow 2547 if (res.w[1] == 0x0001ed09bead87c0ull && 2548 res.w[0] == 0x378d8e6400000000ull) { 2549 // res = 10^34 => rounding overflow 2550 res.w[1] = 0x0000314dc6448d93ull; 2551 res.w[0] = 0x38c15b0a00000000ull; // 10^33 2552 e3++; 2553 } 2554 } else if (is_midpoint_lt_even) { 2555 // res = res - 1 2556 is_midpoint_lt_even = 0; 2557 is_midpoint_gt_even = 1; 2558 res.w[0]--; 2559 if (res.w[0] == 0xffffffffffffffffull) 2560 res.w[1]--; 2561 // if the result is pure zero, the sign depends on the rounding 2562 // mode (x*y and z had opposite signs) 2563 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { 2564 if (rnd_mode != ROUNDING_DOWN) 2565 z_sign = 0x0000000000000000ull; 2566 else 2567 z_sign = 0x8000000000000000ull; 2568 // the exponent is max (e3, expmin) 2569 res.w[1] = 0x0; 2570 res.w[0] = 0x0; 2571 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2572 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2573 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2574 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2575 BID_SWAP128 (res); 2576 BID_RETURN (res) 2577 } 2578 } else { 2579 ; 2580 } 2581 } 2582 } 2583 } else { // if (z_sign != p_sign) 2584 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4 2585 // used to swap rounding indicators if p_sign != z_sign 2586 // the sum can result in [up to] p34 or p34 - 1 digits 2587 tmp64 = res.w[0]; 2588 res.w[0] = res.w[0] - R128.w[0]; 2589 res.w[1] = res.w[1] - R128.w[1]; 2590 if (res.w[0] > tmp64) 2591 res.w[1]--; // borrow 2592 // if res < 10^33 and exp > expmin need to decrease x0 and 2593 // increase scale by 1 2594 if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || 2595 (res.w[1] == 0x0000314dc6448d93ull && 2596 res.w[0] < 0x38c15b0a00000000ull)) || 2597 (is_inexact_lt_midpoint 2598 && res.w[1] == 0x0000314dc6448d93ull 2599 && res.w[0] == 0x38c15b0a00000000ull)) 2600 && x0 >= 1) { 2601 x0 = x0 - 1; 2602 // first restore e3, otherwise it will be too small 2603 e3 = e3 + scale; 2604 scale = scale + 1; 2605 is_inexact_lt_midpoint = 0; 2606 is_inexact_gt_midpoint = 0; 2607 is_midpoint_lt_even = 0; 2608 is_midpoint_gt_even = 0; 2609 incr_exp = 0; 2610 goto case2_repeat; 2611 } 2612 // else this is the result rounded with unbounded exponent; 2613 // because the result has opposite sign to that of C4 which was 2614 // rounded, need to change the rounding indicators 2615 if (is_inexact_lt_midpoint) { 2616 is_inexact_lt_midpoint = 0; 2617 is_inexact_gt_midpoint = 1; 2618 } else if (is_inexact_gt_midpoint) { 2619 is_inexact_gt_midpoint = 0; 2620 is_inexact_lt_midpoint = 1; 2621 } else if (lsb == 0) { 2622 if (is_midpoint_lt_even) { 2623 is_midpoint_lt_even = 0; 2624 is_midpoint_gt_even = 1; 2625 } else if (is_midpoint_gt_even) { 2626 is_midpoint_gt_even = 0; 2627 is_midpoint_lt_even = 1; 2628 } else { 2629 ; 2630 } 2631 } else if (lsb == 1) { 2632 if (is_midpoint_lt_even) { 2633 // res = res + 1 2634 res.w[0]++; 2635 if (res.w[0] == 0x0) 2636 res.w[1]++; 2637 // check for rounding overflow 2638 if (res.w[1] == 0x0001ed09bead87c0ull && 2639 res.w[0] == 0x378d8e6400000000ull) { 2640 // res = 10^34 => rounding overflow 2641 res.w[1] = 0x0000314dc6448d93ull; 2642 res.w[0] = 0x38c15b0a00000000ull; // 10^33 2643 e3++; 2644 } 2645 } else if (is_midpoint_gt_even) { 2646 // res = res - 1 2647 res.w[0]--; 2648 if (res.w[0] == 0xffffffffffffffffull) 2649 res.w[1]--; 2650 // if the result is pure zero, the sign depends on the rounding 2651 // mode (x*y and z had opposite signs) 2652 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { 2653 if (rnd_mode != ROUNDING_DOWN) 2654 z_sign = 0x0000000000000000ull; 2655 else 2656 z_sign = 0x8000000000000000ull; 2657 // the exponent is max (e3, expmin) 2658 res.w[1] = 0x0; 2659 res.w[0] = 0x0; 2660 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2661 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2662 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2663 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2664 BID_SWAP128 (res); 2665 BID_RETURN (res) 2666 } 2667 } else { 2668 ; 2669 } 2670 } else { 2671 ; 2672 } 2673 } 2674 // check for underflow 2675 if (e3 == expmin) { // and if significand < 10^33 => result is tiny 2676 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || 2677 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && 2678 res.w[0] < 0x38c15b0a00000000ull)) { 2679 is_tiny = 1; 2680 } 2681 } else if (e3 < expmin) { 2682 // the result is tiny, so we must truncate more of res 2683 is_tiny = 1; 2684 x0 = expmin - e3; 2685 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 2686 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 2687 is_midpoint_lt_even0 = is_midpoint_lt_even; 2688 is_midpoint_gt_even0 = is_midpoint_gt_even; 2689 is_inexact_lt_midpoint = 0; 2690 is_inexact_gt_midpoint = 0; 2691 is_midpoint_lt_even = 0; 2692 is_midpoint_gt_even = 0; 2693 // determine the number of decimal digits in res 2694 if (res.w[1] == 0x0) { 2695 // between 1 and 19 digits 2696 for (ind = 1; ind <= 19; ind++) { 2697 if (res.w[0] < ten2k64[ind]) { 2698 break; 2699 } 2700 } 2701 // ind digits 2702 } else if (res.w[1] < ten2k128[0].w[1] || 2703 (res.w[1] == ten2k128[0].w[1] 2704 && res.w[0] < ten2k128[0].w[0])) { 2705 // 20 digits 2706 ind = 20; 2707 } else { // between 21 and 38 digits 2708 for (ind = 1; ind <= 18; ind++) { 2709 if (res.w[1] < ten2k128[ind].w[1] || 2710 (res.w[1] == ten2k128[ind].w[1] && 2711 res.w[0] < ten2k128[ind].w[0])) { 2712 break; 2713 } 2714 } 2715 // ind + 20 digits 2716 ind = ind + 20; 2717 } 2718 2719 // at this point ind >= x0; because delta >= 2 on this path, the case 2720 // ind = x0 can occur only in Case (2) or case (3), when C3 has one 2721 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), 2722 // the signs of x * y and z are opposite, and through cancellation 2723 // the most significant decimal digit in res has the weight 2724 // 10^(emin-1); however, it is clear that in this case the most 2725 // significant digit is 9, so the result before rounding is 2726 // 0.9... * 10^emin 2727 // Otherwise, ind > x0 because there are non-zero decimal digits in the 2728 // result with weight of at least 10^emin, and correction for underflow 2729 // can be carried out using the round*_*_2_* () routines 2730 if (x0 == ind) { // the result before rounding is 0.9... * 10^emin 2731 res.w[1] = 0x0; 2732 res.w[0] = 0x1; 2733 is_inexact_gt_midpoint = 1; 2734 } else if (ind <= 18) { // check that 2 <= ind 2735 // 2 <= ind <= 18, 1 <= x0 <= 17 2736 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 2737 &is_midpoint_lt_even, &is_midpoint_gt_even, 2738 &is_inexact_lt_midpoint, 2739 &is_inexact_gt_midpoint); 2740 if (incr_exp) { 2741 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17 2742 R64 = ten2k64[ind - x0]; 2743 } 2744 res.w[1] = 0; 2745 res.w[0] = R64; 2746 } else if (ind <= 38) { 2747 // 19 <= ind <= 38 2748 P128.w[1] = res.w[1]; 2749 P128.w[0] = res.w[0]; 2750 round128_19_38 (ind, x0, P128, &res, &incr_exp, 2751 &is_midpoint_lt_even, &is_midpoint_gt_even, 2752 &is_inexact_lt_midpoint, 2753 &is_inexact_gt_midpoint); 2754 if (incr_exp) { 2755 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37 2756 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19 2757 res.w[0] = ten2k64[ind - x0]; 2758 // res.w[1] stays 0 2759 } else { // 20 <= ind - x0 <= 37 2760 res.w[0] = ten2k128[ind - x0 - 20].w[0]; 2761 res.w[1] = ten2k128[ind - x0 - 20].w[1]; 2762 } 2763 } 2764 } 2765 // avoid a double rounding error 2766 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 2767 is_midpoint_lt_even) { // double rounding error upward 2768 // res = res - 1 2769 res.w[0]--; 2770 if (res.w[0] == 0xffffffffffffffffull) 2771 res.w[1]--; 2772 // Note: a double rounding error upward is not possible; for this 2773 // the result after the first rounding would have to be 99...95 2774 // (35 digits in all), possibly followed by a number of zeros; this 2775 // not possible in Cases (2)-(6) which may get here 2776 is_midpoint_lt_even = 0; 2777 is_inexact_lt_midpoint = 1; 2778 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 2779 is_midpoint_gt_even) { // double rounding error downward 2780 // res = res + 1 2781 res.w[0]++; 2782 if (res.w[0] == 0) 2783 res.w[1]++; 2784 is_midpoint_gt_even = 0; 2785 is_inexact_gt_midpoint = 1; 2786 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2787 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2788 // if this second rounding was exact the result may still be 2789 // inexact because of the first rounding 2790 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 2791 is_inexact_gt_midpoint = 1; 2792 } 2793 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 2794 is_inexact_lt_midpoint = 1; 2795 } 2796 } else if (is_midpoint_gt_even && 2797 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 2798 // pulled up to a midpoint 2799 is_inexact_lt_midpoint = 1; 2800 is_inexact_gt_midpoint = 0; 2801 is_midpoint_lt_even = 0; 2802 is_midpoint_gt_even = 0; 2803 } else if (is_midpoint_lt_even && 2804 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 2805 // pulled down to a midpoint 2806 is_inexact_lt_midpoint = 0; 2807 is_inexact_gt_midpoint = 1; 2808 is_midpoint_lt_even = 0; 2809 is_midpoint_gt_even = 0; 2810 } else { 2811 ; 2812 } 2813 // adjust exponent 2814 e3 = e3 + x0; 2815 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2816 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2817 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || 2818 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { 2819 is_inexact_lt_midpoint = 1; 2820 } 2821 } 2822 } else { 2823 ; // not underflow 2824 } 2825 // check for inexact result 2826 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 2827 is_midpoint_lt_even || is_midpoint_gt_even) { 2828 // set the inexact flag 2829 *pfpsf |= INEXACT_EXCEPTION; 2830 if (is_tiny) 2831 *pfpsf |= UNDERFLOW_EXCEPTION; 2832 } 2833 // now check for significand = 10^34 (may have resulted from going 2834 // back to case2_repeat) 2835 if (res.w[1] == 0x0001ed09bead87c0ull && 2836 res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34 2837 res.w[1] = 0x0000314dc6448d93ull; // res = 10^33 2838 res.w[0] = 0x38c15b0a00000000ull; 2839 e3 = e3 + 1; 2840 } 2841 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; 2842 // check for overflow 2843 if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) { 2844 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2845 res.w[0] = 0x0000000000000000ull; 2846 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2847 } 2848 if (rnd_mode != ROUNDING_TO_NEAREST) { 2849 rounding_correction (rnd_mode, 2850 is_inexact_lt_midpoint, 2851 is_inexact_gt_midpoint, 2852 is_midpoint_lt_even, is_midpoint_gt_even, 2853 e3, &res, pfpsf); 2854 } 2855 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2856 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2857 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2858 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2859 BID_SWAP128 (res); 2860 BID_RETURN (res) 2861 2862 } else { 2863 2864 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and 2865 // the signs of x*y and z are opposite; in these cases massive 2866 // cancellation can occur, so it is better to scale either C3 or C4 and 2867 // to perform the subtraction before rounding; rounding is performed 2868 // next, depending on the number of decimal digits in the result and on 2869 // the exponent value 2870 // Note: overlow is not possible in this case 2871 // this is similar to Cases (15), (16), and (17) 2872 2873 if (delta + q4 < q3) { // from Case (6) 2874 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and 2875 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign) 2876 // and call add_and_round; delta stays positive 2877 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 2878 P128.w[1] = C3.w[1]; 2879 P128.w[0] = C3.w[0]; 2880 C3.w[1] = C4.w[1]; 2881 C3.w[0] = C4.w[0]; 2882 C4.w[1] = P128.w[1]; 2883 C4.w[0] = P128.w[0]; 2884 ind = q3; 2885 q3 = q4; 2886 q4 = ind; 2887 ind = e3; 2888 e3 = e4; 2889 e4 = ind; 2890 tmp_sign = z_sign; 2891 z_sign = p_sign; 2892 p_sign = tmp_sign; 2893 } else { // from Cases (2), (3), (4), (5) 2894 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be 2895 // scaled up by q4 + delta - q3; this is the same as in Cases (15), 2896 // (16), and (17) if we just change the sign of delta 2897 delta = -delta; 2898 } 2899 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, 2900 rnd_mode, &is_midpoint_lt_even, 2901 &is_midpoint_gt_even, &is_inexact_lt_midpoint, 2902 &is_inexact_gt_midpoint, pfpsf, &res); 2903 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2904 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2905 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2906 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2907 BID_SWAP128 (res); 2908 BID_RETURN (res) 2909 2910 } 2911 2912 } else { // if delta < 0 2913 2914 delta = -delta; 2915 2916 if (p34 < q4 && q4 <= delta) { // Case (7) 2917 2918 // truncate C4 to p34 digits into res 2919 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 2920 x0 = q4 - p34; 2921 if (q4 <= 38) { 2922 P128.w[1] = C4.w[1]; 2923 P128.w[0] = C4.w[0]; 2924 round128_19_38 (q4, x0, P128, &res, &incr_exp, 2925 &is_midpoint_lt_even, &is_midpoint_gt_even, 2926 &is_inexact_lt_midpoint, 2927 &is_inexact_gt_midpoint); 2928 } else if (q4 <= 57) { // 35 <= q4 <= 57 2929 P192.w[2] = C4.w[2]; 2930 P192.w[1] = C4.w[1]; 2931 P192.w[0] = C4.w[0]; 2932 round192_39_57 (q4, x0, P192, &R192, &incr_exp, 2933 &is_midpoint_lt_even, &is_midpoint_gt_even, 2934 &is_inexact_lt_midpoint, 2935 &is_inexact_gt_midpoint); 2936 res.w[0] = R192.w[0]; 2937 res.w[1] = R192.w[1]; 2938 } else { // if (q4 <= 68) 2939 round256_58_76 (q4, x0, C4, &R256, &incr_exp, 2940 &is_midpoint_lt_even, &is_midpoint_gt_even, 2941 &is_inexact_lt_midpoint, 2942 &is_inexact_gt_midpoint); 2943 res.w[0] = R256.w[0]; 2944 res.w[1] = R256.w[1]; 2945 } 2946 e4 = e4 + x0; 2947 if (incr_exp) { 2948 e4 = e4 + 1; 2949 } 2950 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2951 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2952 // if C4 rounded to p34 digits is exact then the result is inexact, 2953 // in a way that depends on the signs of x * y and z 2954 if (p_sign == z_sign) { 2955 is_inexact_lt_midpoint = 1; 2956 } else { // if (p_sign != z_sign) 2957 if (res.w[1] != 0x0000314dc6448d93ull || 2958 res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33 2959 is_inexact_gt_midpoint = 1; 2960 } else { // res = 10^33 and exact is a special case 2961 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1 2962 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1 2963 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1 2964 // Note: ulp is really ulp/10 (after borrow which propagates to msd) 2965 if (delta > p34 + 1) { // C3 < 1/2 2966 // res = 10^33, unchanged 2967 is_inexact_gt_midpoint = 1; 2968 } else { // if (delta == p34 + 1) 2969 if (q3 <= 19) { 2970 if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp 2971 // res = 10^33, unchanged 2972 is_inexact_gt_midpoint = 1; 2973 } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp 2974 // res = 10^33, unchanged 2975 is_midpoint_lt_even = 1; 2976 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp 2977 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 2978 res.w[0] = 0x378d8e63ffffffffull; 2979 e4 = e4 - 1; 2980 is_inexact_lt_midpoint = 1; 2981 } 2982 } else { // if (20 <= q3 <=34) 2983 if (C3.w[1] < midpoint128[q3 - 20].w[1] || 2984 (C3.w[1] == midpoint128[q3 - 20].w[1] && 2985 C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp 2986 // res = 10^33, unchanged 2987 is_inexact_gt_midpoint = 1; 2988 } else if (C3.w[1] == midpoint128[q3 - 20].w[1] && 2989 C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp 2990 // res = 10^33, unchanged 2991 is_midpoint_lt_even = 1; 2992 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp 2993 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 2994 res.w[0] = 0x378d8e63ffffffffull; 2995 e4 = e4 - 1; 2996 is_inexact_lt_midpoint = 1; 2997 } 2998 } 2999 } 3000 } 3001 } 3002 } else if (is_midpoint_lt_even) { 3003 if (z_sign != p_sign) { 3004 // needs correction: res = res - 1 3005 res.w[0] = res.w[0] - 1; 3006 if (res.w[0] == 0xffffffffffffffffull) 3007 res.w[1]--; 3008 // if it is (10^33-1)*10^e4 then the corect result is 3009 // (10^34-1)*10(e4-1) 3010 if (res.w[1] == 0x0000314dc6448d93ull && 3011 res.w[0] == 0x38c15b09ffffffffull) { 3012 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 3013 res.w[0] = 0x378d8e63ffffffffull; 3014 e4 = e4 - 1; 3015 } 3016 is_midpoint_lt_even = 0; 3017 is_inexact_lt_midpoint = 1; 3018 } else { // if (z_sign == p_sign) 3019 is_midpoint_lt_even = 0; 3020 is_inexact_gt_midpoint = 1; 3021 } 3022 } else if (is_midpoint_gt_even) { 3023 if (z_sign == p_sign) { 3024 // needs correction: res = res + 1 (cannot cross in the next binade) 3025 res.w[0] = res.w[0] + 1; 3026 if (res.w[0] == 0x0000000000000000ull) 3027 res.w[1]++; 3028 is_midpoint_gt_even = 0; 3029 is_inexact_gt_midpoint = 1; 3030 } else { // if (z_sign != p_sign) 3031 is_midpoint_gt_even = 0; 3032 is_inexact_lt_midpoint = 1; 3033 } 3034 } else { 3035 ; // the rounded result is already correct 3036 } 3037 // check for overflow 3038 if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) { 3039 res.w[1] = p_sign | 0x7800000000000000ull; 3040 res.w[0] = 0x0000000000000000ull; 3041 *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION); 3042 } else { // no overflow or not RN 3043 p_exp = ((UINT64) (e4 + 6176) << 49); 3044 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 3045 } 3046 if (rnd_mode != ROUNDING_TO_NEAREST) { 3047 rounding_correction (rnd_mode, 3048 is_inexact_lt_midpoint, 3049 is_inexact_gt_midpoint, 3050 is_midpoint_lt_even, is_midpoint_gt_even, 3051 e4, &res, pfpsf); 3052 } 3053 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 3054 is_midpoint_lt_even || is_midpoint_gt_even) { 3055 // set the inexact flag 3056 *pfpsf |= INEXACT_EXCEPTION; 3057 } 3058 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3059 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3060 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3061 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3062 BID_SWAP128 (res); 3063 BID_RETURN (res) 3064 3065 } else if ((q4 <= p34 && p34 <= delta) || // Case (8) 3066 (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9) 3067 (q4 <= delta && delta + q3 <= p34) || // Case (10) 3068 (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13) 3069 (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14) 3070 (delta + q3 < q4 && q4 <= p34)) { // Case (18) 3071 3072 // Case (8) is similar to Case (1), with C3 and C4 swapped 3073 // Case (9) is similar to Case (2), with C3 and C4 swapped 3074 // Case (10) is similar to Case (3), with C3 and C4 swapped 3075 // Case (13) is similar to Case (4), with C3 and C4 swapped 3076 // Case (14) is similar to Case (5), with C3 and C4 swapped 3077 // Case (18) is similar to Case (6), with C3 and C4 swapped 3078 3079 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp) 3080 // and go back to delta_ge_zero 3081 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 3082 P128.w[1] = C3.w[1]; 3083 P128.w[0] = C3.w[0]; 3084 C3.w[1] = C4.w[1]; 3085 C3.w[0] = C4.w[0]; 3086 C4.w[1] = P128.w[1]; 3087 C4.w[0] = P128.w[0]; 3088 ind = q3; 3089 q3 = q4; 3090 q4 = ind; 3091 ind = e3; 3092 e3 = e4; 3093 e4 = ind; 3094 tmp_sign = z_sign; 3095 z_sign = p_sign; 3096 p_sign = tmp_sign; 3097 tmp.ui64 = z_exp; 3098 z_exp = p_exp; 3099 p_exp = tmp.ui64; 3100 goto delta_ge_zero; 3101 3102 } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11) 3103 (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12) 3104 3105 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3, 3106 // 1 <= x0 <= q3 - 1 <= p34 - 1 3107 x0 = e4 - e3; // or x0 = delta + q3 - q4 3108 if (q3 <= 18) { // 2 <= q3 <= 18 3109 round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp, 3110 &is_midpoint_lt_even, &is_midpoint_gt_even, 3111 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 3112 // C3.w[1] = 0; 3113 C3.w[0] = R64; 3114 } else if (q3 <= 38) { 3115 round128_19_38 (q3, x0, C3, &R128, &incr_exp, 3116 &is_midpoint_lt_even, &is_midpoint_gt_even, 3117 &is_inexact_lt_midpoint, 3118 &is_inexact_gt_midpoint); 3119 C3.w[1] = R128.w[1]; 3120 C3.w[0] = R128.w[0]; 3121 } 3122 // the rounded result has q3 - x0 digits 3123 // we want the exponent to be e4, so if incr_exp = 1 then 3124 // multiply the rounded result by 10 - it will still fit in 113 bits 3125 if (incr_exp) { 3126 // 64 x 128 -> 128 3127 P128.w[1] = C3.w[1]; 3128 P128.w[0] = C3.w[0]; 3129 __mul_64x128_to_128 (C3, ten2k64[1], P128); 3130 } 3131 e3 = e3 + x0; // this is e4 3132 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; 3133 // the result will have the sign of x * y; the exponent is e4 3134 R256.w[3] = 0; 3135 R256.w[2] = 0; 3136 R256.w[1] = C3.w[1]; 3137 R256.w[0] = C3.w[0]; 3138 if (p_sign == z_sign) { // R256 = C4 + R256 3139 add256 (C4, R256, &R256); 3140 } else { // if (p_sign != z_sign) { // R256 = C4 - R256 3141 sub256 (C4, R256, &R256); // the result cannot be pure zero 3142 // because the result has opposite sign to that of R256 which was 3143 // rounded, need to change the rounding indicators 3144 lsb = C4.w[0] & 0x01; 3145 if (is_inexact_lt_midpoint) { 3146 is_inexact_lt_midpoint = 0; 3147 is_inexact_gt_midpoint = 1; 3148 } else if (is_inexact_gt_midpoint) { 3149 is_inexact_gt_midpoint = 0; 3150 is_inexact_lt_midpoint = 1; 3151 } else if (lsb == 0) { 3152 if (is_midpoint_lt_even) { 3153 is_midpoint_lt_even = 0; 3154 is_midpoint_gt_even = 1; 3155 } else if (is_midpoint_gt_even) { 3156 is_midpoint_gt_even = 0; 3157 is_midpoint_lt_even = 1; 3158 } else { 3159 ; 3160 } 3161 } else if (lsb == 1) { 3162 if (is_midpoint_lt_even) { 3163 // res = res + 1 3164 R256.w[0]++; 3165 if (R256.w[0] == 0x0) { 3166 R256.w[1]++; 3167 if (R256.w[1] == 0x0) { 3168 R256.w[2]++; 3169 if (R256.w[2] == 0x0) { 3170 R256.w[3]++; 3171 } 3172 } 3173 } 3174 // no check for rounding overflow - R256 was a difference 3175 } else if (is_midpoint_gt_even) { 3176 // res = res - 1 3177 R256.w[0]--; 3178 if (R256.w[0] == 0xffffffffffffffffull) { 3179 R256.w[1]--; 3180 if (R256.w[1] == 0xffffffffffffffffull) { 3181 R256.w[2]--; 3182 if (R256.w[2] == 0xffffffffffffffffull) { 3183 R256.w[3]--; 3184 } 3185 } 3186 } 3187 } else { 3188 ; 3189 } 3190 } else { 3191 ; 3192 } 3193 } 3194 // determine the number of decimal digits in R256 3195 ind = nr_digits256 (R256); // ind >= p34 3196 // if R256 is sum, then ind > p34; if R256 is a difference, then 3197 // ind >= p34; this means that we can calculate the result rounded to 3198 // the destination precision, with unbounded exponent, starting from R256 3199 // and using the indicators from the rounding of C3 to avoid a double 3200 // rounding error 3201 3202 if (ind < p34) { 3203 ; 3204 } else if (ind == p34) { 3205 // the result rounded to the destination precision with 3206 // unbounded exponent 3207 // is (-1)^p_sign * R256 * 10^e4 3208 res.w[1] = R256.w[1]; 3209 res.w[0] = R256.w[0]; 3210 } else { // if (ind > p34) 3211 // if more than P digits, round to nearest to P digits 3212 // round R256 to p34 digits 3213 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 3214 // save C3 rounding indicators to help avoid double rounding error 3215 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 3216 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 3217 is_midpoint_lt_even0 = is_midpoint_lt_even; 3218 is_midpoint_gt_even0 = is_midpoint_gt_even; 3219 // initialize rounding indicators 3220 is_inexact_lt_midpoint = 0; 3221 is_inexact_gt_midpoint = 0; 3222 is_midpoint_lt_even = 0; 3223 is_midpoint_gt_even = 0; 3224 // round to p34 digits; the result fits in 113 bits 3225 if (ind <= 38) { 3226 P128.w[1] = R256.w[1]; 3227 P128.w[0] = R256.w[0]; 3228 round128_19_38 (ind, x0, P128, &R128, &incr_exp, 3229 &is_midpoint_lt_even, &is_midpoint_gt_even, 3230 &is_inexact_lt_midpoint, 3231 &is_inexact_gt_midpoint); 3232 } else if (ind <= 57) { 3233 P192.w[2] = R256.w[2]; 3234 P192.w[1] = R256.w[1]; 3235 P192.w[0] = R256.w[0]; 3236 round192_39_57 (ind, x0, P192, &R192, &incr_exp, 3237 &is_midpoint_lt_even, &is_midpoint_gt_even, 3238 &is_inexact_lt_midpoint, 3239 &is_inexact_gt_midpoint); 3240 R128.w[1] = R192.w[1]; 3241 R128.w[0] = R192.w[0]; 3242 } else { // if (ind <= 68) 3243 round256_58_76 (ind, x0, R256, &R256, &incr_exp, 3244 &is_midpoint_lt_even, &is_midpoint_gt_even, 3245 &is_inexact_lt_midpoint, 3246 &is_inexact_gt_midpoint); 3247 R128.w[1] = R256.w[1]; 3248 R128.w[0] = R256.w[0]; 3249 } 3250 // the rounded result has p34 = 34 digits 3251 e4 = e4 + x0 + incr_exp; 3252 3253 res.w[1] = R128.w[1]; 3254 res.w[0] = R128.w[0]; 3255 3256 // avoid a double rounding error 3257 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 3258 is_midpoint_lt_even) { // double rounding error upward 3259 // res = res - 1 3260 res.w[0]--; 3261 if (res.w[0] == 0xffffffffffffffffull) 3262 res.w[1]--; 3263 is_midpoint_lt_even = 0; 3264 is_inexact_lt_midpoint = 1; 3265 // Note: a double rounding error upward is not possible; for this 3266 // the result after the first rounding would have to be 99...95 3267 // (35 digits in all), possibly followed by a number of zeros; this 3268 // not possible in Cases (2)-(6) or (15)-(17) which may get here 3269 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent 3270 if (res.w[1] == 0x0000314dc6448d93ull && 3271 res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1 3272 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 3273 res.w[0] = 0x378d8e63ffffffffull; 3274 e4--; 3275 } 3276 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 3277 is_midpoint_gt_even) { // double rounding error downward 3278 // res = res + 1 3279 res.w[0]++; 3280 if (res.w[0] == 0) 3281 res.w[1]++; 3282 is_midpoint_gt_even = 0; 3283 is_inexact_gt_midpoint = 1; 3284 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 3285 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 3286 // if this second rounding was exact the result may still be 3287 // inexact because of the first rounding 3288 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 3289 is_inexact_gt_midpoint = 1; 3290 } 3291 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 3292 is_inexact_lt_midpoint = 1; 3293 } 3294 } else if (is_midpoint_gt_even && 3295 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 3296 // pulled up to a midpoint 3297 is_inexact_lt_midpoint = 1; 3298 is_inexact_gt_midpoint = 0; 3299 is_midpoint_lt_even = 0; 3300 is_midpoint_gt_even = 0; 3301 } else if (is_midpoint_lt_even && 3302 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 3303 // pulled down to a midpoint 3304 is_inexact_lt_midpoint = 0; 3305 is_inexact_gt_midpoint = 1; 3306 is_midpoint_lt_even = 0; 3307 is_midpoint_gt_even = 0; 3308 } else { 3309 ; 3310 } 3311 } 3312 3313 // determine tininess 3314 if (rnd_mode == ROUNDING_TO_NEAREST) { 3315 if (e4 < expmin) { 3316 is_tiny = 1; // for other rounding modes apply correction 3317 } 3318 } else { 3319 // for RM, RP, RZ, RA apply correction in order to determine tininess 3320 // but do not save the result; apply the correction to 3321 // (-1)^p_sign * res * 10^0 3322 P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1]; 3323 P128.w[0] = res.w[0]; 3324 rounding_correction (rnd_mode, 3325 is_inexact_lt_midpoint, 3326 is_inexact_gt_midpoint, 3327 is_midpoint_lt_even, is_midpoint_gt_even, 3328 0, &P128, pfpsf); 3329 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 3330 // the number of digits in the significand is p34 = 34 3331 if (e4 + scale < expmin) { 3332 is_tiny = 1; 3333 } 3334 } 3335 3336 // the result rounded to the destination precision with unbounded exponent 3337 // is (-1)^p_sign * res * 10^e4 3338 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN 3339 // res.w[0] unchanged; 3340 // Note: res is correct only if expmin <= e4 <= expmax 3341 ind = p34; // the number of decimal digits in the signifcand of res 3342 3343 // at this point we have the result rounded with unbounded exponent in 3344 // res and we know its tininess: 3345 // res = (-1)^p_sign * significand * 10^e4, 3346 // where q (significand) = ind = p34 3347 // Note: res is correct only if expmin <= e4 <= expmax 3348 3349 // check for overflow if RN 3350 if (rnd_mode == ROUNDING_TO_NEAREST 3351 && (ind + e4) > (p34 + expmax)) { 3352 res.w[1] = p_sign | 0x7800000000000000ull; 3353 res.w[0] = 0x0000000000000000ull; 3354 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 3355 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3356 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3357 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3358 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3359 BID_SWAP128 (res); 3360 BID_RETURN (res) 3361 } // else not overflow or not RN, so continue 3362 3363 // from this point on this is similar to the last part of the computation 3364 // for Cases (15), (16), (17) 3365 3366 // if (e4 >= expmin) we have the result rounded with bounded exponent 3367 if (e4 < expmin) { 3368 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res 3369 // where the result rounded [at most] once is 3370 // (-1)^p_sign * significand_res * 10^e4 3371 3372 // avoid double rounding error 3373 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 3374 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 3375 is_midpoint_lt_even0 = is_midpoint_lt_even; 3376 is_midpoint_gt_even0 = is_midpoint_gt_even; 3377 is_inexact_lt_midpoint = 0; 3378 is_inexact_gt_midpoint = 0; 3379 is_midpoint_lt_even = 0; 3380 is_midpoint_gt_even = 0; 3381 3382 if (x0 > ind) { 3383 // nothing is left of res when moving the decimal point left x0 digits 3384 is_inexact_lt_midpoint = 1; 3385 res.w[1] = p_sign | 0x0000000000000000ull; 3386 res.w[0] = 0x0000000000000000ull; 3387 e4 = expmin; 3388 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 3389 // this is <, =, or > 1/2 ulp 3390 // compare the ind-digit value in the significand of res with 3391 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 3392 // less than, equal to, or greater than 1/2 ulp (significand of res) 3393 R128.w[1] = res.w[1] & MASK_COEFF; 3394 R128.w[0] = res.w[0]; 3395 if (ind <= 19) { 3396 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp 3397 lt_half_ulp = 1; 3398 is_inexact_lt_midpoint = 1; 3399 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp 3400 eq_half_ulp = 1; 3401 is_midpoint_gt_even = 1; 3402 } else { // > 1/2 ulp 3403 gt_half_ulp = 1; 3404 is_inexact_gt_midpoint = 1; 3405 } 3406 } else { // if (ind <= 38) 3407 if (R128.w[1] < midpoint128[ind - 20].w[1] || 3408 (R128.w[1] == midpoint128[ind - 20].w[1] && 3409 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp 3410 lt_half_ulp = 1; 3411 is_inexact_lt_midpoint = 1; 3412 } else if (R128.w[1] == midpoint128[ind - 20].w[1] && 3413 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp 3414 eq_half_ulp = 1; 3415 is_midpoint_gt_even = 1; 3416 } else { // > 1/2 ulp 3417 gt_half_ulp = 1; 3418 is_inexact_gt_midpoint = 1; 3419 } 3420 } 3421 if (lt_half_ulp || eq_half_ulp) { 3422 // res = +0.0 * 10^expmin 3423 res.w[1] = 0x0000000000000000ull; 3424 res.w[0] = 0x0000000000000000ull; 3425 } else { // if (gt_half_ulp) 3426 // res = +1 * 10^expmin 3427 res.w[1] = 0x0000000000000000ull; 3428 res.w[0] = 0x0000000000000001ull; 3429 } 3430 res.w[1] = p_sign | res.w[1]; 3431 e4 = expmin; 3432 } else { // if (1 <= x0 <= ind - 1 <= 33) 3433 // round the ind-digit result to ind - x0 digits 3434 3435 if (ind <= 18) { // 2 <= ind <= 18 3436 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 3437 &is_midpoint_lt_even, &is_midpoint_gt_even, 3438 &is_inexact_lt_midpoint, 3439 &is_inexact_gt_midpoint); 3440 res.w[1] = 0x0; 3441 res.w[0] = R64; 3442 } else if (ind <= 38) { 3443 P128.w[1] = res.w[1] & MASK_COEFF; 3444 P128.w[0] = res.w[0]; 3445 round128_19_38 (ind, x0, P128, &res, &incr_exp, 3446 &is_midpoint_lt_even, &is_midpoint_gt_even, 3447 &is_inexact_lt_midpoint, 3448 &is_inexact_gt_midpoint); 3449 } 3450 e4 = e4 + x0; // expmin 3451 // we want the exponent to be expmin, so if incr_exp = 1 then 3452 // multiply the rounded result by 10 - it will still fit in 113 bits 3453 if (incr_exp) { 3454 // 64 x 128 -> 128 3455 P128.w[1] = res.w[1] & MASK_COEFF; 3456 P128.w[0] = res.w[0]; 3457 __mul_64x128_to_128 (res, ten2k64[1], P128); 3458 } 3459 res.w[1] = 3460 p_sign | ((UINT64) (e4 + 6176) << 49) | (res. 3461 w[1] & MASK_COEFF); 3462 // avoid a double rounding error 3463 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 3464 is_midpoint_lt_even) { // double rounding error upward 3465 // res = res - 1 3466 res.w[0]--; 3467 if (res.w[0] == 0xffffffffffffffffull) 3468 res.w[1]--; 3469 // Note: a double rounding error upward is not possible; for this 3470 // the result after the first rounding would have to be 99...95 3471 // (35 digits in all), possibly followed by a number of zeros; this 3472 // not possible in this underflow case 3473 is_midpoint_lt_even = 0; 3474 is_inexact_lt_midpoint = 1; 3475 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 3476 is_midpoint_gt_even) { // double rounding error downward 3477 // res = res + 1 3478 res.w[0]++; 3479 if (res.w[0] == 0) 3480 res.w[1]++; 3481 is_midpoint_gt_even = 0; 3482 is_inexact_gt_midpoint = 1; 3483 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 3484 !is_inexact_lt_midpoint 3485 && !is_inexact_gt_midpoint) { 3486 // if this second rounding was exact the result may still be 3487 // inexact because of the first rounding 3488 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 3489 is_inexact_gt_midpoint = 1; 3490 } 3491 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 3492 is_inexact_lt_midpoint = 1; 3493 } 3494 } else if (is_midpoint_gt_even && 3495 (is_inexact_gt_midpoint0 3496 || is_midpoint_lt_even0)) { 3497 // pulled up to a midpoint 3498 is_inexact_lt_midpoint = 1; 3499 is_inexact_gt_midpoint = 0; 3500 is_midpoint_lt_even = 0; 3501 is_midpoint_gt_even = 0; 3502 } else if (is_midpoint_lt_even && 3503 (is_inexact_lt_midpoint0 3504 || is_midpoint_gt_even0)) { 3505 // pulled down to a midpoint 3506 is_inexact_lt_midpoint = 0; 3507 is_inexact_gt_midpoint = 1; 3508 is_midpoint_lt_even = 0; 3509 is_midpoint_gt_even = 0; 3510 } else { 3511 ; 3512 } 3513 } 3514 } 3515 // res contains the correct result 3516 // apply correction if not rounding to nearest 3517 if (rnd_mode != ROUNDING_TO_NEAREST) { 3518 rounding_correction (rnd_mode, 3519 is_inexact_lt_midpoint, 3520 is_inexact_gt_midpoint, 3521 is_midpoint_lt_even, is_midpoint_gt_even, 3522 e4, &res, pfpsf); 3523 } 3524 if (is_midpoint_lt_even || is_midpoint_gt_even || 3525 is_inexact_lt_midpoint || is_inexact_gt_midpoint) { 3526 // set the inexact flag 3527 *pfpsf |= INEXACT_EXCEPTION; 3528 if (is_tiny) 3529 *pfpsf |= UNDERFLOW_EXCEPTION; 3530 } 3531 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3532 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3533 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3534 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3535 BID_SWAP128 (res); 3536 BID_RETURN (res) 3537 3538 } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15) 3539 (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16) 3540 (delta + q3 <= p34 && p34 < q4)) { // Case (17) 3541 3542 // calculate first the result rounded to the destination precision, with 3543 // unbounded exponent 3544 3545 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, 3546 rnd_mode, &is_midpoint_lt_even, 3547 &is_midpoint_gt_even, &is_inexact_lt_midpoint, 3548 &is_inexact_gt_midpoint, pfpsf, &res); 3549 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3550 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3551 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3552 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3553 BID_SWAP128 (res); 3554 BID_RETURN (res) 3555 3556 } else { 3557 ; 3558 } 3559 3560 } // end if delta < 0 3561 3562 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3563 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3564 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3565 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3566 BID_SWAP128 (res); 3567 BID_RETURN (res) 3568 3569 } 3570 3571 3572 #if DECIMAL_CALL_BY_REFERENCE 3573 void 3574 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz 3575 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3576 _EXC_INFO_PARAM) { 3577 UINT128 x = *px, y = *py, z = *pz; 3578 #if !DECIMAL_GLOBAL_ROUNDING 3579 unsigned int rnd_mode = *prnd_mode; 3580 #endif 3581 #else 3582 UINT128 3583 bid128_fma (UINT128 x, UINT128 y, UINT128 z 3584 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3585 _EXC_INFO_PARAM) { 3586 #endif 3587 int is_midpoint_lt_even, is_midpoint_gt_even, 3588 is_inexact_lt_midpoint, is_inexact_gt_midpoint; 3589 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3590 3591 #if DECIMAL_CALL_BY_REFERENCE 3592 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3593 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3594 &res, &x, &y, &z 3595 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3596 _EXC_INFO_ARG); 3597 #else 3598 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3599 &is_inexact_lt_midpoint, 3600 &is_inexact_gt_midpoint, x, y, 3601 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3602 _EXC_INFO_ARG); 3603 #endif 3604 BID_RETURN (res); 3605 } 3606 3607 3608 #if DECIMAL_CALL_BY_REFERENCE 3609 void 3610 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz 3611 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3612 _EXC_INFO_PARAM) { 3613 UINT64 x = *px, y = *py, z = *pz; 3614 #if !DECIMAL_GLOBAL_ROUNDING 3615 unsigned int rnd_mode = *prnd_mode; 3616 #endif 3617 #else 3618 UINT128 3619 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z 3620 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3621 _EXC_INFO_PARAM) { 3622 #endif 3623 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3624 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3625 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3626 UINT128 x1, y1, z1; 3627 3628 #if DECIMAL_CALL_BY_REFERENCE 3629 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3630 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3631 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3632 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3633 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3634 &res, &x1, &y1, &z1 3635 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3636 _EXC_INFO_ARG); 3637 #else 3638 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3639 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3640 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3641 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3642 &is_inexact_lt_midpoint, 3643 &is_inexact_gt_midpoint, x1, y1, 3644 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3645 _EXC_INFO_ARG); 3646 #endif 3647 BID_RETURN (res); 3648 } 3649 3650 3651 #if DECIMAL_CALL_BY_REFERENCE 3652 void 3653 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz 3654 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3655 _EXC_INFO_PARAM) { 3656 UINT64 x = *px, y = *py; 3657 UINT128 z = *pz; 3658 #if !DECIMAL_GLOBAL_ROUNDING 3659 unsigned int rnd_mode = *prnd_mode; 3660 #endif 3661 #else 3662 UINT128 3663 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z 3664 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3665 _EXC_INFO_PARAM) { 3666 #endif 3667 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3668 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3669 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3670 UINT128 x1, y1; 3671 3672 #if DECIMAL_CALL_BY_REFERENCE 3673 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3674 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3675 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3676 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3677 &res, &x1, &y1, &z 3678 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3679 _EXC_INFO_ARG); 3680 #else 3681 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3682 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3683 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3684 &is_inexact_lt_midpoint, 3685 &is_inexact_gt_midpoint, x1, y1, 3686 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3687 _EXC_INFO_ARG); 3688 #endif 3689 BID_RETURN (res); 3690 } 3691 3692 3693 #if DECIMAL_CALL_BY_REFERENCE 3694 void 3695 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz 3696 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3697 _EXC_INFO_PARAM) { 3698 UINT64 x = *px, z = *pz; 3699 #if !DECIMAL_GLOBAL_ROUNDING 3700 unsigned int rnd_mode = *prnd_mode; 3701 #endif 3702 #else 3703 UINT128 3704 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z 3705 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3706 _EXC_INFO_PARAM) { 3707 #endif 3708 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3709 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3710 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3711 UINT128 x1, z1; 3712 3713 #if DECIMAL_CALL_BY_REFERENCE 3714 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3715 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3716 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3717 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3718 &res, &x1, py, &z1 3719 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3720 _EXC_INFO_ARG); 3721 #else 3722 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3723 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3724 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3725 &is_inexact_lt_midpoint, 3726 &is_inexact_gt_midpoint, x1, y, 3727 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3728 _EXC_INFO_ARG); 3729 #endif 3730 BID_RETURN (res); 3731 } 3732 3733 3734 #if DECIMAL_CALL_BY_REFERENCE 3735 void 3736 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz 3737 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3738 _EXC_INFO_PARAM) { 3739 UINT64 x = *px; 3740 #if !DECIMAL_GLOBAL_ROUNDING 3741 unsigned int rnd_mode = *prnd_mode; 3742 #endif 3743 #else 3744 UINT128 3745 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z 3746 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3747 _EXC_INFO_PARAM) { 3748 #endif 3749 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3750 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3751 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3752 UINT128 x1; 3753 3754 #if DECIMAL_CALL_BY_REFERENCE 3755 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3756 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3757 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3758 &res, &x1, py, pz 3759 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3760 _EXC_INFO_ARG); 3761 #else 3762 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3763 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3764 &is_inexact_lt_midpoint, 3765 &is_inexact_gt_midpoint, x1, y, 3766 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3767 _EXC_INFO_ARG); 3768 #endif 3769 BID_RETURN (res); 3770 } 3771 3772 3773 #if DECIMAL_CALL_BY_REFERENCE 3774 void 3775 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz 3776 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3777 _EXC_INFO_PARAM) { 3778 UINT64 y = *py, z = *pz; 3779 #if !DECIMAL_GLOBAL_ROUNDING 3780 unsigned int rnd_mode = *prnd_mode; 3781 #endif 3782 #else 3783 UINT128 3784 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z 3785 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3786 _EXC_INFO_PARAM) { 3787 #endif 3788 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3789 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3790 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3791 UINT128 y1, z1; 3792 3793 #if DECIMAL_CALL_BY_REFERENCE 3794 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3795 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3796 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3797 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3798 &res, px, &y1, &z1 3799 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3800 _EXC_INFO_ARG); 3801 #else 3802 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3803 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3804 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3805 &is_inexact_lt_midpoint, 3806 &is_inexact_gt_midpoint, x, y1, 3807 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3808 _EXC_INFO_ARG); 3809 #endif 3810 BID_RETURN (res); 3811 } 3812 3813 3814 #if DECIMAL_CALL_BY_REFERENCE 3815 void 3816 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz 3817 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3818 _EXC_INFO_PARAM) { 3819 UINT64 y = *py; 3820 #if !DECIMAL_GLOBAL_ROUNDING 3821 unsigned int rnd_mode = *prnd_mode; 3822 #endif 3823 #else 3824 UINT128 3825 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z 3826 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3827 _EXC_INFO_PARAM) { 3828 #endif 3829 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3830 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3831 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3832 UINT128 y1; 3833 3834 #if DECIMAL_CALL_BY_REFERENCE 3835 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3836 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3837 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3838 &res, px, &y1, pz 3839 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3840 _EXC_INFO_ARG); 3841 #else 3842 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3843 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3844 &is_inexact_lt_midpoint, 3845 &is_inexact_gt_midpoint, x, y1, 3846 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3847 _EXC_INFO_ARG); 3848 #endif 3849 BID_RETURN (res); 3850 } 3851 3852 3853 #if DECIMAL_CALL_BY_REFERENCE 3854 void 3855 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz 3856 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3857 _EXC_INFO_PARAM) { 3858 UINT64 z = *pz; 3859 #if !DECIMAL_GLOBAL_ROUNDING 3860 unsigned int rnd_mode = *prnd_mode; 3861 #endif 3862 #else 3863 UINT128 3864 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z 3865 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3866 _EXC_INFO_PARAM) { 3867 #endif 3868 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3869 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3870 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3871 UINT128 z1; 3872 3873 #if DECIMAL_CALL_BY_REFERENCE 3874 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3875 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3876 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3877 &res, px, py, &z1 3878 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3879 _EXC_INFO_ARG); 3880 #else 3881 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3882 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3883 &is_inexact_lt_midpoint, 3884 &is_inexact_gt_midpoint, x, y, 3885 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3886 _EXC_INFO_ARG); 3887 #endif 3888 BID_RETURN (res); 3889 } 3890 3891 // Note: bid128qqq_fma is represented by bid128_fma 3892 3893 // Note: bid64ddd_fma is represented by bid64_fma 3894 3895 #if DECIMAL_CALL_BY_REFERENCE 3896 void 3897 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz 3898 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3899 _EXC_INFO_PARAM) { 3900 UINT64 x = *px, y = *py; 3901 #if !DECIMAL_GLOBAL_ROUNDING 3902 unsigned int rnd_mode = *prnd_mode; 3903 #endif 3904 #else 3905 UINT64 3906 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z 3907 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3908 _EXC_INFO_PARAM) { 3909 #endif 3910 UINT64 res1 = 0xbaddbaddbaddbaddull; 3911 UINT128 x1, y1; 3912 3913 #if DECIMAL_CALL_BY_REFERENCE 3914 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3915 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3916 bid64qqq_fma (&res1, &x1, &y1, pz 3917 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3918 _EXC_INFO_ARG); 3919 #else 3920 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3921 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3922 res1 = bid64qqq_fma (x1, y1, z 3923 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3924 _EXC_INFO_ARG); 3925 #endif 3926 BID_RETURN (res1); 3927 } 3928 3929 3930 #if DECIMAL_CALL_BY_REFERENCE 3931 void 3932 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz 3933 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3934 _EXC_INFO_PARAM) { 3935 UINT64 x = *px, z = *pz; 3936 #if !DECIMAL_GLOBAL_ROUNDING 3937 unsigned int rnd_mode = *prnd_mode; 3938 #endif 3939 #else 3940 UINT64 3941 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z 3942 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3943 _EXC_INFO_PARAM) { 3944 #endif 3945 UINT64 res1 = 0xbaddbaddbaddbaddull; 3946 UINT128 x1, z1; 3947 3948 #if DECIMAL_CALL_BY_REFERENCE 3949 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3950 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3951 bid64qqq_fma (&res1, &x1, py, &z1 3952 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3953 _EXC_INFO_ARG); 3954 #else 3955 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3956 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3957 res1 = bid64qqq_fma (x1, y, z1 3958 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3959 _EXC_INFO_ARG); 3960 #endif 3961 BID_RETURN (res1); 3962 } 3963 3964 3965 #if DECIMAL_CALL_BY_REFERENCE 3966 void 3967 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz 3968 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3969 _EXC_INFO_PARAM) { 3970 UINT64 x = *px; 3971 #if !DECIMAL_GLOBAL_ROUNDING 3972 unsigned int rnd_mode = *prnd_mode; 3973 #endif 3974 #else 3975 UINT64 3976 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z 3977 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3978 _EXC_INFO_PARAM) { 3979 #endif 3980 UINT64 res1 = 0xbaddbaddbaddbaddull; 3981 UINT128 x1; 3982 3983 #if DECIMAL_CALL_BY_REFERENCE 3984 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3985 bid64qqq_fma (&res1, &x1, py, pz 3986 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3987 _EXC_INFO_ARG); 3988 #else 3989 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3990 res1 = bid64qqq_fma (x1, y, z 3991 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3992 _EXC_INFO_ARG); 3993 #endif 3994 BID_RETURN (res1); 3995 } 3996 3997 3998 #if DECIMAL_CALL_BY_REFERENCE 3999 void 4000 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz 4001 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4002 _EXC_INFO_PARAM) { 4003 UINT64 y = *py, z = *pz; 4004 #if !DECIMAL_GLOBAL_ROUNDING 4005 unsigned int rnd_mode = *prnd_mode; 4006 #endif 4007 #else 4008 UINT64 4009 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z 4010 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4011 _EXC_INFO_PARAM) { 4012 #endif 4013 UINT64 res1 = 0xbaddbaddbaddbaddull; 4014 UINT128 y1, z1; 4015 4016 #if DECIMAL_CALL_BY_REFERENCE 4017 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4018 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4019 bid64qqq_fma (&res1, px, &y1, &z1 4020 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4021 _EXC_INFO_ARG); 4022 #else 4023 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4024 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4025 res1 = bid64qqq_fma (x, y1, z1 4026 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4027 _EXC_INFO_ARG); 4028 #endif 4029 BID_RETURN (res1); 4030 } 4031 4032 4033 #if DECIMAL_CALL_BY_REFERENCE 4034 void 4035 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz 4036 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4037 _EXC_INFO_PARAM) { 4038 UINT64 y = *py; 4039 #if !DECIMAL_GLOBAL_ROUNDING 4040 unsigned int rnd_mode = *prnd_mode; 4041 #endif 4042 #else 4043 UINT64 4044 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z 4045 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4046 _EXC_INFO_PARAM) { 4047 #endif 4048 UINT64 res1 = 0xbaddbaddbaddbaddull; 4049 UINT128 y1; 4050 4051 #if DECIMAL_CALL_BY_REFERENCE 4052 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4053 bid64qqq_fma (&res1, px, &y1, pz 4054 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4055 _EXC_INFO_ARG); 4056 #else 4057 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4058 res1 = bid64qqq_fma (x, y1, z 4059 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4060 _EXC_INFO_ARG); 4061 #endif 4062 BID_RETURN (res1); 4063 } 4064 4065 4066 #if DECIMAL_CALL_BY_REFERENCE 4067 void 4068 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz 4069 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4070 _EXC_INFO_PARAM) { 4071 UINT64 z = *pz; 4072 #if !DECIMAL_GLOBAL_ROUNDING 4073 unsigned int rnd_mode = *prnd_mode; 4074 #endif 4075 #else 4076 UINT64 4077 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z 4078 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4079 _EXC_INFO_PARAM) { 4080 #endif 4081 UINT64 res1 = 0xbaddbaddbaddbaddull; 4082 UINT128 z1; 4083 4084 #if DECIMAL_CALL_BY_REFERENCE 4085 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4086 bid64qqq_fma (&res1, px, py, &z1 4087 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4088 _EXC_INFO_ARG); 4089 #else 4090 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4091 res1 = bid64qqq_fma (x, y, z1 4092 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4093 _EXC_INFO_ARG); 4094 #endif 4095 BID_RETURN (res1); 4096 } 4097 4098 4099 #if DECIMAL_CALL_BY_REFERENCE 4100 void 4101 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz 4102 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4103 _EXC_INFO_PARAM) { 4104 UINT128 x = *px, y = *py, z = *pz; 4105 #if !DECIMAL_GLOBAL_ROUNDING 4106 unsigned int rnd_mode = *prnd_mode; 4107 #endif 4108 #else 4109 UINT64 4110 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z 4111 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4112 _EXC_INFO_PARAM) { 4113 #endif 4114 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0, 4115 is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; 4116 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 4117 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 4118 int incr_exp; 4119 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 4120 UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 4121 UINT64 res1 = 0xbaddbaddbaddbaddull; 4122 unsigned int save_fpsf; // needed because of the call to bid128_ext_fma 4123 UINT64 sign; 4124 UINT64 exp; 4125 int unbexp; 4126 UINT128 C; 4127 BID_UI64DOUBLE tmp; 4128 int nr_bits; 4129 int q, x0; 4130 int scale; 4131 int lt_half_ulp = 0, eq_half_ulp = 0; 4132 4133 // Note: for rounding modes other than RN or RA, the result can be obtained 4134 // by rounding first to BID128 and then to BID64 4135 4136 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved 4137 *pfpsf = 0; 4138 4139 #if DECIMAL_CALL_BY_REFERENCE 4140 bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, 4141 &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0, 4142 &res, &x, &y, &z 4143 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4144 _EXC_INFO_ARG); 4145 #else 4146 res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, 4147 &is_inexact_lt_midpoint0, 4148 &is_inexact_gt_midpoint0, x, y, 4149 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4150 _EXC_INFO_ARG); 4151 #endif 4152 4153 if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) || 4154 (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible 4155 ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN) 4156 ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity 4157 #if DECIMAL_CALL_BY_REFERENCE 4158 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); 4159 #else 4160 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); 4161 #endif 4162 // determine the unbiased exponent of the result 4163 unbexp = ((res1 >> 53) & 0x3ff) - 398; 4164 4165 // if subnormal, res1 must have exp = -398 4166 // if tiny and inexact set underflow and inexact status flags 4167 if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN 4168 (unbexp == -398) 4169 && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull) 4170 && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 4171 || is_midpoint_lt_even0 || is_midpoint_gt_even0)) { 4172 // set the inexact flag and the underflow flag 4173 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); 4174 } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || 4175 is_midpoint_lt_even0 || is_midpoint_gt_even0) { 4176 // set the inexact flag and the underflow flag 4177 *pfpsf |= INEXACT_EXCEPTION; 4178 } 4179 4180 *pfpsf |= save_fpsf; 4181 BID_RETURN (res1); 4182 } // else continue, and use rounding to nearest to round to 16 digits 4183 4184 // at this point the result is rounded to nearest (even or away) to 34 digits 4185 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal 4186 sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 4187 exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits 4188 unbexp = (exp >> 49) - 6176; 4189 C.w[1] = res.w[HIGH_128W] & MASK_COEFF; 4190 C.w[0] = res.w[LOW_128W]; 4191 4192 if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero 4193 (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) { 4194 // clear under/overflow 4195 #if DECIMAL_CALL_BY_REFERENCE 4196 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); 4197 #else 4198 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); 4199 #endif 4200 *pfpsf |= save_fpsf; 4201 BID_RETURN (res1); 4202 } // else continue 4203 4204 // -398 - 34 <= unbexp <= 369 + 15 4205 if (rnd_mode == ROUNDING_TIES_AWAY) { 4206 // apply correction, if needed, to make the result rounded to nearest-even 4207 if (is_midpoint_gt_even) { 4208 // res = res - 1 4209 res1--; // res1 is now even 4210 } // else the result is already correctly rounded to nearest-even 4211 } 4212 // at this point the result is finite, non-zero canonical normal or subnormal, 4213 // and in most cases overflow or underflow will not occur 4214 4215 // determine the number of digits q in the result 4216 // q = nr. of decimal digits in x 4217 // determine first the nr. of bits in x 4218 if (C.w[1] == 0) { 4219 if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53 4220 // split the 64-bit value in two 32-bit halves to avoid rounding errors 4221 if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32 4222 tmp.d = (double) (C.w[0] >> 32); // exact conversion 4223 nr_bits = 4224 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4225 } else { // x < 2^32 4226 tmp.d = (double) (C.w[0]); // exact conversion 4227 nr_bits = 4228 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4229 } 4230 } else { // if x < 2^53 4231 tmp.d = (double) C.w[0]; // exact conversion 4232 nr_bits = 4233 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4234 } 4235 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1]) 4236 tmp.d = (double) C.w[1]; // exact conversion 4237 nr_bits = 4238 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4239 } 4240 q = nr_digits[nr_bits - 1].digits; 4241 if (q == 0) { 4242 q = nr_digits[nr_bits - 1].digits1; 4243 if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi || 4244 (C.w[1] == nr_digits[nr_bits - 1].threshold_hi && 4245 C.w[0] >= nr_digits[nr_bits - 1].threshold_lo)) 4246 q++; 4247 } 4248 // if q > 16, round to nearest even to 16 digits (but for underflow it may 4249 // have to be truncated even more) 4250 if (q > 16) { 4251 x0 = q - 16; 4252 if (q <= 18) { 4253 round64_2_18 (q, x0, C.w[0], &res1, &incr_exp, 4254 &is_midpoint_lt_even, &is_midpoint_gt_even, 4255 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4256 } else { // 19 <= q <= 34 4257 round128_19_38 (q, x0, C, &res128, &incr_exp, 4258 &is_midpoint_lt_even, &is_midpoint_gt_even, 4259 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4260 res1 = res128.w[0]; // the result fits in 64 bits 4261 } 4262 unbexp = unbexp + x0; 4263 if (incr_exp) 4264 unbexp++; 4265 q = 16; // need to set in case denormalization is necessary 4266 } else { 4267 // the result does not require a second rounding (and it must have 4268 // been exact in the first rounding, since q <= 16) 4269 res1 = C.w[0]; 4270 } 4271 4272 // avoid a double rounding error 4273 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 4274 is_midpoint_lt_even) { // double rounding error upward 4275 // res = res - 1 4276 res1--; // res1 becomes odd 4277 is_midpoint_lt_even = 0; 4278 is_inexact_lt_midpoint = 1; 4279 if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1 4280 res1 = 0x002386f26fc0ffffull; // 10^16 - 1 4281 unbexp--; 4282 } 4283 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 4284 is_midpoint_gt_even) { // double rounding error downward 4285 // res = res + 1 4286 res1++; // res1 becomes odd (so it cannot be 10^16) 4287 is_midpoint_gt_even = 0; 4288 is_inexact_gt_midpoint = 1; 4289 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 4290 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 4291 // if this second rounding was exact the result may still be 4292 // inexact because of the first rounding 4293 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 4294 is_inexact_gt_midpoint = 1; 4295 } 4296 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 4297 is_inexact_lt_midpoint = 1; 4298 } 4299 } else if (is_midpoint_gt_even && 4300 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 4301 // pulled up to a midpoint 4302 is_inexact_lt_midpoint = 1; 4303 is_inexact_gt_midpoint = 0; 4304 is_midpoint_lt_even = 0; 4305 is_midpoint_gt_even = 0; 4306 } else if (is_midpoint_lt_even && 4307 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 4308 // pulled down to a midpoint 4309 is_inexact_lt_midpoint = 0; 4310 is_inexact_gt_midpoint = 1; 4311 is_midpoint_lt_even = 0; 4312 is_midpoint_gt_even = 0; 4313 } else { 4314 ; 4315 } 4316 // this is the result rounded correctly to nearest even, with unbounded exp. 4317 4318 // check for overflow 4319 if (q + unbexp > P16 + expmax16) { 4320 res1 = sign | 0x7800000000000000ull; 4321 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 4322 *pfpsf |= save_fpsf; 4323 BID_RETURN (res1) 4324 } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16 4325 // not overflow; the result must be exact, and we can multiply res1 by 4326 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits 4327 scale = unbexp - expmax16; 4328 res1 = res1 * ten2k64[scale]; // res1 * 10^scale 4329 unbexp = expmax16; // unbexp - scale 4330 } else { 4331 ; // continue 4332 } 4333 4334 // check for underflow 4335 if (q + unbexp < P16 + expmin16) { 4336 if (unbexp < expmin16) { 4337 // we must truncate more of res 4338 x0 = expmin16 - unbexp; // x0 >= 1 4339 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 4340 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 4341 is_midpoint_lt_even0 = is_midpoint_lt_even; 4342 is_midpoint_gt_even0 = is_midpoint_gt_even; 4343 is_inexact_lt_midpoint = 0; 4344 is_inexact_gt_midpoint = 0; 4345 is_midpoint_lt_even = 0; 4346 is_midpoint_gt_even = 0; 4347 // the number of decimal digits in res1 is q 4348 if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits 4349 // 2 <= q <= 16, 1 <= x0 <= 15 4350 round64_2_18 (q, x0, res1, &res1, &incr_exp, 4351 &is_midpoint_lt_even, &is_midpoint_gt_even, 4352 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4353 if (incr_exp) { 4354 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15 4355 res1 = ten2k64[q - x0]; 4356 } 4357 unbexp = unbexp + x0; // expmin16 4358 } else if (x0 == q) { 4359 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin 4360 // determine relationship with 1/2 ulp 4361 // q <= 16 4362 if (res1 < midpoint64[q - 1]) { // < 1/2 ulp 4363 lt_half_ulp = 1; 4364 is_inexact_lt_midpoint = 1; 4365 } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp 4366 eq_half_ulp = 1; 4367 is_midpoint_gt_even = 1; 4368 } else { // > 1/2 ulp 4369 // gt_half_ulp = 1; 4370 is_inexact_gt_midpoint = 1; 4371 } 4372 if (lt_half_ulp || eq_half_ulp) { 4373 // res = +0.0 * 10^expmin16 4374 res1 = 0x0000000000000000ull; 4375 } else { // if (gt_half_ulp) 4376 // res = +1 * 10^expmin16 4377 res1 = 0x0000000000000001ull; 4378 } 4379 unbexp = expmin16; 4380 } else { // if (x0 > q) 4381 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin 4382 res1 = 0x0000000000000000ull; 4383 unbexp = expmin16; 4384 is_inexact_lt_midpoint = 1; 4385 } 4386 // avoid a double rounding error 4387 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 4388 is_midpoint_lt_even) { // double rounding error upward 4389 // res = res - 1 4390 res1--; // res1 becomes odd 4391 is_midpoint_lt_even = 0; 4392 is_inexact_lt_midpoint = 1; 4393 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 4394 is_midpoint_gt_even) { // double rounding error downward 4395 // res = res + 1 4396 res1++; // res1 becomes odd 4397 is_midpoint_gt_even = 0; 4398 is_inexact_gt_midpoint = 1; 4399 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 4400 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 4401 // if this rounding was exact the result may still be 4402 // inexact because of the previous roundings 4403 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 4404 is_inexact_gt_midpoint = 1; 4405 } 4406 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 4407 is_inexact_lt_midpoint = 1; 4408 } 4409 } else if (is_midpoint_gt_even && 4410 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 4411 // pulled up to a midpoint 4412 is_inexact_lt_midpoint = 1; 4413 is_inexact_gt_midpoint = 0; 4414 is_midpoint_lt_even = 0; 4415 is_midpoint_gt_even = 0; 4416 } else if (is_midpoint_lt_even && 4417 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 4418 // pulled down to a midpoint 4419 is_inexact_lt_midpoint = 0; 4420 is_inexact_gt_midpoint = 1; 4421 is_midpoint_lt_even = 0; 4422 is_midpoint_gt_even = 0; 4423 } else { 4424 ; 4425 } 4426 } 4427 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16) 4428 // and the result is tiny and exact 4429 4430 // check for inexact result 4431 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 4432 is_midpoint_lt_even || is_midpoint_gt_even || 4433 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || 4434 is_midpoint_lt_even0 || is_midpoint_gt_even0) { 4435 // set the inexact flag and the underflow flag 4436 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); 4437 } 4438 } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 4439 is_midpoint_lt_even || is_midpoint_gt_even) { 4440 *pfpsf |= INEXACT_EXCEPTION; 4441 } 4442 // this is the result rounded correctly to nearest, with bounded exponent 4443 4444 if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction 4445 // res = res + 1 4446 res1++; // res1 is now odd 4447 } // else the result is already correct 4448 4449 // assemble the result 4450 if (res1 < 0x0020000000000000ull) { // res < 2^53 4451 res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1; 4452 } else { // res1 >= 2^53 4453 res1 = sign | MASK_STEERING_BITS | 4454 ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2); 4455 } 4456 *pfpsf |= save_fpsf; 4457 BID_RETURN (res1); 4458 } 4459