1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License as published by the Free 7 Software Foundation; either version 3, or (at your option) any later 8 version. 9 10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 /***************************************************************************** 25 * 26 * BID128 fma x * y + z 27 * 28 ****************************************************************************/ 29 30 #include "bid_internal.h" 31 32 static void 33 rounding_correction (unsigned int rnd_mode, 34 unsigned int is_inexact_lt_midpoint, 35 unsigned int is_inexact_gt_midpoint, 36 unsigned int is_midpoint_lt_even, 37 unsigned int is_midpoint_gt_even, 38 int unbexp, 39 UINT128 * ptrres, _IDEC_flags * ptrfpsf) { 40 // unbiased true exponent unbexp may be larger than emax 41 42 UINT128 res = *ptrres; // expected to have the correct sign and coefficient 43 // (the exponent field is ignored, as unbexp is used instead) 44 UINT64 sign, exp; 45 UINT64 C_hi, C_lo; 46 47 // general correction from RN to RA, RM, RP, RZ 48 // Note: if the result is negative, then is_inexact_lt_midpoint, 49 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even 50 // have to be considered as if determined for the absolute value of the 51 // result (so they seem to be reversed) 52 53 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 54 is_midpoint_lt_even || is_midpoint_gt_even) { 55 *ptrfpsf |= INEXACT_EXCEPTION; 56 } 57 // apply correction to result calculated with unbounded exponent 58 sign = res.w[1] & MASK_SIGN; 59 exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax 60 C_hi = res.w[1] & MASK_COEFF; 61 C_lo = res.w[0]; 62 if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || 63 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && 64 is_midpoint_gt_even))) || 65 (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || 66 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && 67 is_midpoint_gt_even)))) { 68 // C = C + 1 69 C_lo = C_lo + 1; 70 if (C_lo == 0) 71 C_hi = C_hi + 1; 72 if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) { 73 // C = 10^34 => rounding overflow 74 C_hi = 0x0000314dc6448d93ull; 75 C_lo = 0x38c15b0a00000000ull; // 10^33 76 // exp = exp + EXP_P1; 77 unbexp = unbexp + 1; 78 exp = (UINT64) (unbexp + 6176) << 49; 79 } 80 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && 81 ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || 82 (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) { 83 // C = C - 1 84 C_lo = C_lo - 1; 85 if (C_lo == 0xffffffffffffffffull) 86 C_hi--; 87 // check if we crossed into the lower decade 88 if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { 89 // C = 10^33 - 1 90 if (exp > 0) { 91 C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 92 C_lo = 0x378d8e63ffffffffull; 93 // exp = exp - EXP_P1; 94 unbexp = unbexp - 1; 95 exp = (UINT64) (unbexp + 6176) << 49; 96 } else { // if exp = 0 97 if (is_midpoint_lt_even || is_midpoint_lt_even || 98 is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact 99 *ptrfpsf |= UNDERFLOW_EXCEPTION; 100 } 101 } 102 } else { 103 ; // the result is already correct 104 } 105 if (unbexp > expmax) { // 6111 106 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 107 exp = 0; 108 if (!sign) { // result is positive 109 if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf 110 C_hi = 0x7800000000000000ull; 111 C_lo = 0x0000000000000000ull; 112 } else { // res = +MAXFP = (10^34-1) * 10^emax 113 C_hi = 0x5fffed09bead87c0ull; 114 C_lo = 0x378d8e63ffffffffull; 115 } 116 } else { // result is negative 117 if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf 118 C_hi = 0xf800000000000000ull; 119 C_lo = 0x0000000000000000ull; 120 } else { // res = -MAXFP = -(10^34-1) * 10^emax 121 C_hi = 0xdfffed09bead87c0ull; 122 C_lo = 0x378d8e63ffffffffull; 123 } 124 } 125 } 126 // assemble the result 127 res.w[1] = sign | exp | C_hi; 128 res.w[0] = C_lo; 129 *ptrres = res; 130 } 131 132 static void 133 add256 (UINT256 x, UINT256 y, UINT256 * pz) { 134 // *z = x + yl assume the sum fits in 256 bits 135 UINT256 z; 136 z.w[0] = x.w[0] + y.w[0]; 137 if (z.w[0] < x.w[0]) { 138 x.w[1]++; 139 if (x.w[1] == 0x0000000000000000ull) { 140 x.w[2]++; 141 if (x.w[2] == 0x0000000000000000ull) { 142 x.w[3]++; 143 } 144 } 145 } 146 z.w[1] = x.w[1] + y.w[1]; 147 if (z.w[1] < x.w[1]) { 148 x.w[2]++; 149 if (x.w[2] == 0x0000000000000000ull) { 150 x.w[3]++; 151 } 152 } 153 z.w[2] = x.w[2] + y.w[2]; 154 if (z.w[2] < x.w[2]) { 155 x.w[3]++; 156 } 157 z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible 158 *pz = z; 159 } 160 161 static void 162 sub256 (UINT256 x, UINT256 y, UINT256 * pz) { 163 // *z = x - y; assume x >= y 164 UINT256 z; 165 z.w[0] = x.w[0] - y.w[0]; 166 if (z.w[0] > x.w[0]) { 167 x.w[1]--; 168 if (x.w[1] == 0xffffffffffffffffull) { 169 x.w[2]--; 170 if (x.w[2] == 0xffffffffffffffffull) { 171 x.w[3]--; 172 } 173 } 174 } 175 z.w[1] = x.w[1] - y.w[1]; 176 if (z.w[1] > x.w[1]) { 177 x.w[2]--; 178 if (x.w[2] == 0xffffffffffffffffull) { 179 x.w[3]--; 180 } 181 } 182 z.w[2] = x.w[2] - y.w[2]; 183 if (z.w[2] > x.w[2]) { 184 x.w[3]--; 185 } 186 z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y 187 *pz = z; 188 } 189 190 191 static int 192 nr_digits256 (UINT256 R256) { 193 int ind; 194 // determine the number of decimal digits in R256 195 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) { 196 // between 1 and 19 digits 197 for (ind = 1; ind <= 19; ind++) { 198 if (R256.w[0] < ten2k64[ind]) { 199 break; 200 } 201 } 202 // ind digits 203 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && 204 (R256.w[1] < ten2k128[0].w[1] || 205 (R256.w[1] == ten2k128[0].w[1] 206 && R256.w[0] < ten2k128[0].w[0]))) { 207 // 20 digits 208 ind = 20; 209 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) { 210 // between 21 and 38 digits 211 for (ind = 1; ind <= 18; ind++) { 212 if (R256.w[1] < ten2k128[ind].w[1] || 213 (R256.w[1] == ten2k128[ind].w[1] && 214 R256.w[0] < ten2k128[ind].w[0])) { 215 break; 216 } 217 } 218 // ind + 20 digits 219 ind = ind + 20; 220 } else if (R256.w[3] == 0x0 && 221 (R256.w[2] < ten2k256[0].w[2] || 222 (R256.w[2] == ten2k256[0].w[2] && 223 R256.w[1] < ten2k256[0].w[1]) || 224 (R256.w[2] == ten2k256[0].w[2] && 225 R256.w[1] == ten2k256[0].w[1] && 226 R256.w[0] < ten2k256[0].w[0]))) { 227 // 39 digits 228 ind = 39; 229 } else { 230 // between 40 and 68 digits 231 for (ind = 1; ind <= 29; ind++) { 232 if (R256.w[3] < ten2k256[ind].w[3] || 233 (R256.w[3] == ten2k256[ind].w[3] && 234 R256.w[2] < ten2k256[ind].w[2]) || 235 (R256.w[3] == ten2k256[ind].w[3] && 236 R256.w[2] == ten2k256[ind].w[2] && 237 R256.w[1] < ten2k256[ind].w[1]) || 238 (R256.w[3] == ten2k256[ind].w[3] && 239 R256.w[2] == ten2k256[ind].w[2] && 240 R256.w[1] == ten2k256[ind].w[1] && 241 R256.w[0] < ten2k256[ind].w[0])) { 242 break; 243 } 244 } 245 // ind + 39 digits 246 ind = ind + 39; 247 } 248 return (ind); 249 } 250 251 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so 252 // use the rounding information from ptr_is_* to avoid a double rounding error 253 static void 254 add_and_round (int q3, 255 int q4, 256 int e4, 257 int delta, 258 int p34, 259 UINT64 z_sign, 260 UINT64 p_sign, 261 UINT128 C3, 262 UINT256 C4, 263 int rnd_mode, 264 int *ptr_is_midpoint_lt_even, 265 int *ptr_is_midpoint_gt_even, 266 int *ptr_is_inexact_lt_midpoint, 267 int *ptr_is_inexact_gt_midpoint, 268 _IDEC_flags * ptrfpsf, UINT128 * ptrres) { 269 270 int scale; 271 int x0; 272 int ind; 273 UINT64 R64; 274 UINT128 P128, R128; 275 UINT192 P192, R192; 276 UINT256 R256; 277 int is_midpoint_lt_even = 0; 278 int is_midpoint_gt_even = 0; 279 int is_inexact_lt_midpoint = 0; 280 int is_inexact_gt_midpoint = 0; 281 int is_midpoint_lt_even0 = 0; 282 int is_midpoint_gt_even0 = 0; 283 int is_inexact_lt_midpoint0 = 0; 284 int is_inexact_gt_midpoint0 = 0; 285 int incr_exp = 0; 286 int is_tiny = 0; 287 int lt_half_ulp = 0; 288 int eq_half_ulp = 0; 289 // int gt_half_ulp = 0; 290 UINT128 res = *ptrres; 291 292 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66 293 scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this 294 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1 295 296 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for 297 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6)) 298 if (scale == 0) { 299 R256.w[3] = 0x0ull; 300 R256.w[2] = 0x0ull; 301 R256.w[1] = C3.w[1]; 302 R256.w[0] = C3.w[0]; 303 } else if (scale <= 19) { // 10^scale fits in 64 bits 304 P128.w[1] = 0; 305 P128.w[0] = ten2k64[scale]; 306 __mul_128x128_to_256 (R256, P128, C3); 307 } else if (scale <= 38) { // 10^scale fits in 128 bits 308 __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3); 309 } else if (scale <= 57) { // 39 <= scale <= 57 310 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits 311 // (10^67 has 223 bits; 10^69 has 230 bits); 312 // must split the computation: 313 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 314 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty 315 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits 316 __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3); 317 // now multiply R128 by 10^38 318 __mul_128x128_to_256 (R256, R128, ten2k128[18]); 319 } else { // 58 <= scale <= 66 320 // 10^scale takes between 193 and 220 bits, 321 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits) 322 // must split the computation: 323 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 324 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty 325 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits 326 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because 327 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64 328 __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]); 329 // now calculate 10*38 * 10^(scale-38) * C3 330 __mul_128x128_to_256 (R256, R128, ten2k128[18]); 331 } 332 // C3 * 10^scale is now in R256 333 334 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least 335 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is 336 // possible 337 // add/subtract C4 and C3 * 10^scale; the exponent is e4 338 if (p_sign == z_sign) { // R256 = C4 + R256 339 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact, 340 // but may require rounding 341 add256 (C4, R256, &R256); 342 } else { // if (p_sign != z_sign) { // R256 = C4 - R256 343 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or 344 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact, 345 // but may require rounding 346 347 // compare first R256 = C3 * 10^scale and C4 348 if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || 349 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) || 350 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] && 351 R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4 352 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact, 353 // but may require rounding 354 sub256 (R256, C4, &R256); 355 // flip p_sign too, because the result has the sign of z 356 p_sign = z_sign; 357 } else { // if C4 > C3 * 10^scale 358 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact, 359 // but may require rounding 360 sub256 (C4, R256, &R256); 361 } 362 // if the result is pure zero, the sign depends on the rounding mode 363 // (x*y and z had opposite signs) 364 if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull && 365 R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) { 366 if (rnd_mode != ROUNDING_DOWN) 367 p_sign = 0x0000000000000000ull; 368 else 369 p_sign = 0x8000000000000000ull; 370 // the exponent is max (e4, expmin) 371 if (e4 < -6176) 372 e4 = expmin; 373 // assemble result 374 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49); 375 res.w[0] = 0x0; 376 *ptrres = res; 377 return; 378 } 379 } 380 381 // determine the number of decimal digits in R256 382 ind = nr_digits256 (R256); 383 384 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind; 385 // round to the destination precision, with unbounded exponent 386 387 if (ind <= p34) { 388 // result rounded to the destination precision with unbounded exponent 389 // is exact 390 if (ind + e4 < p34 + expmin) { 391 is_tiny = 1; // applies to all rounding modes 392 } 393 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1]; 394 res.w[0] = R256.w[0]; 395 // Note: res is correct only if expmin <= e4 <= expmax 396 } else { // if (ind > p34) 397 // if more than P digits, round to nearest to P digits 398 // round R256 to p34 digits 399 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 400 if (ind <= 38) { 401 P128.w[1] = R256.w[1]; 402 P128.w[0] = R256.w[0]; 403 round128_19_38 (ind, x0, P128, &R128, &incr_exp, 404 &is_midpoint_lt_even, &is_midpoint_gt_even, 405 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 406 } else if (ind <= 57) { 407 P192.w[2] = R256.w[2]; 408 P192.w[1] = R256.w[1]; 409 P192.w[0] = R256.w[0]; 410 round192_39_57 (ind, x0, P192, &R192, &incr_exp, 411 &is_midpoint_lt_even, &is_midpoint_gt_even, 412 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 413 R128.w[1] = R192.w[1]; 414 R128.w[0] = R192.w[0]; 415 } else { // if (ind <= 68) 416 round256_58_76 (ind, x0, R256, &R256, &incr_exp, 417 &is_midpoint_lt_even, &is_midpoint_gt_even, 418 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 419 R128.w[1] = R256.w[1]; 420 R128.w[0] = R256.w[0]; 421 } 422 // the rounded result has p34 = 34 digits 423 e4 = e4 + x0 + incr_exp; 424 if (rnd_mode == ROUNDING_TO_NEAREST) { 425 if (e4 < expmin) { 426 is_tiny = 1; // for other rounding modes apply correction 427 } 428 } else { 429 // for RM, RP, RZ, RA apply correction in order to determine tininess 430 // but do not save the result; apply the correction to 431 // (-1)^p_sign * significand * 10^0 432 P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1]; 433 P128.w[0] = R128.w[0]; 434 rounding_correction (rnd_mode, 435 is_inexact_lt_midpoint, 436 is_inexact_gt_midpoint, is_midpoint_lt_even, 437 is_midpoint_gt_even, 0, &P128, ptrfpsf); 438 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 439 // the number of digits in the significand is p34 = 34 440 if (e4 + scale < expmin) { 441 is_tiny = 1; 442 } 443 } 444 ind = p34; // the number of decimal digits in the signifcand of res 445 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN 446 res.w[0] = R128.w[0]; 447 // Note: res is correct only if expmin <= e4 <= expmax 448 // set the inexact flag after rounding with bounded exponent, if any 449 } 450 // at this point we have the result rounded with unbounded exponent in 451 // res and we know its tininess: 452 // res = (-1)^p_sign * significand * 10^e4, 453 // where q (significand) = ind <= p34 454 // Note: res is correct only if expmin <= e4 <= expmax 455 456 // check for overflow if RN 457 if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) { 458 res.w[1] = p_sign | 0x7800000000000000ull; 459 res.w[0] = 0x0000000000000000ull; 460 *ptrres = res; 461 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 462 return; // BID_RETURN (res) 463 } // else not overflow or not RN, so continue 464 465 // if (e4 >= expmin) we have the result rounded with bounded exponent 466 if (e4 < expmin) { 467 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res 468 // where the result rounded [at most] once is 469 // (-1)^p_sign * significand_res * 10^e4 470 471 // avoid double rounding error 472 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 473 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 474 is_midpoint_lt_even0 = is_midpoint_lt_even; 475 is_midpoint_gt_even0 = is_midpoint_gt_even; 476 is_inexact_lt_midpoint = 0; 477 is_inexact_gt_midpoint = 0; 478 is_midpoint_lt_even = 0; 479 is_midpoint_gt_even = 0; 480 481 if (x0 > ind) { 482 // nothing is left of res when moving the decimal point left x0 digits 483 is_inexact_lt_midpoint = 1; 484 res.w[1] = p_sign | 0x0000000000000000ull; 485 res.w[0] = 0x0000000000000000ull; 486 e4 = expmin; 487 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 488 // this is <, =, or > 1/2 ulp 489 // compare the ind-digit value in the significand of res with 490 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 491 // less than, equal to, or greater than 1/2 ulp (significand of res) 492 R128.w[1] = res.w[1] & MASK_COEFF; 493 R128.w[0] = res.w[0]; 494 if (ind <= 19) { 495 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp 496 lt_half_ulp = 1; 497 is_inexact_lt_midpoint = 1; 498 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp 499 eq_half_ulp = 1; 500 is_midpoint_gt_even = 1; 501 } else { // > 1/2 ulp 502 // gt_half_ulp = 1; 503 is_inexact_gt_midpoint = 1; 504 } 505 } else { // if (ind <= 38) { 506 if (R128.w[1] < midpoint128[ind - 20].w[1] || 507 (R128.w[1] == midpoint128[ind - 20].w[1] && 508 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp 509 lt_half_ulp = 1; 510 is_inexact_lt_midpoint = 1; 511 } else if (R128.w[1] == midpoint128[ind - 20].w[1] && 512 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp 513 eq_half_ulp = 1; 514 is_midpoint_gt_even = 1; 515 } else { // > 1/2 ulp 516 // gt_half_ulp = 1; 517 is_inexact_gt_midpoint = 1; 518 } 519 } 520 if (lt_half_ulp || eq_half_ulp) { 521 // res = +0.0 * 10^expmin 522 res.w[1] = 0x0000000000000000ull; 523 res.w[0] = 0x0000000000000000ull; 524 } else { // if (gt_half_ulp) 525 // res = +1 * 10^expmin 526 res.w[1] = 0x0000000000000000ull; 527 res.w[0] = 0x0000000000000001ull; 528 } 529 res.w[1] = p_sign | res.w[1]; 530 e4 = expmin; 531 } else { // if (1 <= x0 <= ind - 1 <= 33) 532 // round the ind-digit result to ind - x0 digits 533 534 if (ind <= 18) { // 2 <= ind <= 18 535 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 536 &is_midpoint_lt_even, &is_midpoint_gt_even, 537 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 538 res.w[1] = 0x0; 539 res.w[0] = R64; 540 } else if (ind <= 38) { 541 P128.w[1] = res.w[1] & MASK_COEFF; 542 P128.w[0] = res.w[0]; 543 round128_19_38 (ind, x0, P128, &res, &incr_exp, 544 &is_midpoint_lt_even, &is_midpoint_gt_even, 545 &is_inexact_lt_midpoint, 546 &is_inexact_gt_midpoint); 547 } 548 e4 = e4 + x0; // expmin 549 // we want the exponent to be expmin, so if incr_exp = 1 then 550 // multiply the rounded result by 10 - it will still fit in 113 bits 551 if (incr_exp) { 552 // 64 x 128 -> 128 553 P128.w[1] = res.w[1] & MASK_COEFF; 554 P128.w[0] = res.w[0]; 555 __mul_64x128_to_128 (res, ten2k64[1], P128); 556 } 557 res.w[1] = 558 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); 559 // avoid a double rounding error 560 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 561 is_midpoint_lt_even) { // double rounding error upward 562 // res = res - 1 563 res.w[0]--; 564 if (res.w[0] == 0xffffffffffffffffull) 565 res.w[1]--; 566 // Note: a double rounding error upward is not possible; for this 567 // the result after the first rounding would have to be 99...95 568 // (35 digits in all), possibly followed by a number of zeros; this 569 // is not possible in Cases (2)-(6) or (15)-(17) which may get here 570 is_midpoint_lt_even = 0; 571 is_inexact_lt_midpoint = 1; 572 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 573 is_midpoint_gt_even) { // double rounding error downward 574 // res = res + 1 575 res.w[0]++; 576 if (res.w[0] == 0) 577 res.w[1]++; 578 is_midpoint_gt_even = 0; 579 is_inexact_gt_midpoint = 1; 580 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 581 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 582 // if this second rounding was exact the result may still be 583 // inexact because of the first rounding 584 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 585 is_inexact_gt_midpoint = 1; 586 } 587 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 588 is_inexact_lt_midpoint = 1; 589 } 590 } else if (is_midpoint_gt_even && 591 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 592 // pulled up to a midpoint 593 is_inexact_lt_midpoint = 1; 594 is_inexact_gt_midpoint = 0; 595 is_midpoint_lt_even = 0; 596 is_midpoint_gt_even = 0; 597 } else if (is_midpoint_lt_even && 598 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 599 // pulled down to a midpoint 600 is_inexact_lt_midpoint = 0; 601 is_inexact_gt_midpoint = 1; 602 is_midpoint_lt_even = 0; 603 is_midpoint_gt_even = 0; 604 } else { 605 ; 606 } 607 } 608 } 609 // res contains the correct result 610 // apply correction if not rounding to nearest 611 if (rnd_mode != ROUNDING_TO_NEAREST) { 612 rounding_correction (rnd_mode, 613 is_inexact_lt_midpoint, is_inexact_gt_midpoint, 614 is_midpoint_lt_even, is_midpoint_gt_even, 615 e4, &res, ptrfpsf); 616 } 617 if (is_midpoint_lt_even || is_midpoint_gt_even || 618 is_inexact_lt_midpoint || is_inexact_gt_midpoint) { 619 // set the inexact flag 620 *ptrfpsf |= INEXACT_EXCEPTION; 621 if (is_tiny) 622 *ptrfpsf |= UNDERFLOW_EXCEPTION; 623 } 624 625 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 626 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 627 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 628 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 629 *ptrres = res; 630 return; 631 } 632 633 634 #if DECIMAL_CALL_BY_REFERENCE 635 static void 636 bid128_ext_fma (int *ptr_is_midpoint_lt_even, 637 int *ptr_is_midpoint_gt_even, 638 int *ptr_is_inexact_lt_midpoint, 639 int *ptr_is_inexact_gt_midpoint, UINT128 * pres, 640 UINT128 * px, UINT128 * py, 641 UINT128 * 642 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 643 _EXC_INFO_PARAM) { 644 UINT128 x = *px, y = *py, z = *pz; 645 #if !DECIMAL_GLOBAL_ROUNDING 646 unsigned int rnd_mode = *prnd_mode; 647 #endif 648 #else 649 static UINT128 650 bid128_ext_fma (int *ptr_is_midpoint_lt_even, 651 int *ptr_is_midpoint_gt_even, 652 int *ptr_is_inexact_lt_midpoint, 653 int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y, 654 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM 655 _EXC_MASKS_PARAM _EXC_INFO_PARAM) { 656 #endif 657 658 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 659 UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign; 660 UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp; 661 int true_p_exp; 662 UINT128 C1, C2, C3; 663 UINT256 C4; 664 int q1 = 0, q2 = 0, q3 = 0, q4; 665 int e1, e2, e3, e4; 666 int scale, ind, delta, x0; 667 int p34 = P34; // used to modify the limit on the number of digits 668 BID_UI64DOUBLE tmp; 669 int x_nr_bits, y_nr_bits, z_nr_bits; 670 unsigned int save_fpsf; 671 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; 672 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 673 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0; 674 int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; 675 int incr_exp = 0; 676 int lsb; 677 int lt_half_ulp = 0; 678 int eq_half_ulp = 0; 679 int gt_half_ulp = 0; 680 int is_tiny = 0; 681 UINT64 R64, tmp64; 682 UINT128 P128, R128; 683 UINT192 P192, R192; 684 UINT256 R256; 685 686 // the following are based on the table of special cases for fma; the NaN 687 // behavior is similar to that of the IA-64 Architecture fma 688 689 // identify cases where at least one operand is NaN 690 691 BID_SWAP128 (x); 692 BID_SWAP128 (y); 693 BID_SWAP128 (z); 694 if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN 695 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) 696 // check first for non-canonical NaN payload 697 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 698 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 699 (y.w[0] > 0x38c15b09ffffffffull))) { 700 y.w[1] = y.w[1] & 0xffffc00000000000ull; 701 y.w[0] = 0x0ull; 702 } 703 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN 704 // set invalid flag 705 *pfpsf |= INVALID_EXCEPTION; 706 // return quiet (y) 707 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 708 res.w[0] = y.w[0]; 709 } else { // y is QNaN 710 // return y 711 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 712 res.w[0] = y.w[0]; 713 // if z = SNaN or x = SNaN signal invalid exception 714 if ((z.w[1] & MASK_SNAN) == MASK_SNAN || 715 (x.w[1] & MASK_SNAN) == MASK_SNAN) { 716 // set invalid flag 717 *pfpsf |= INVALID_EXCEPTION; 718 } 719 } 720 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 721 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 722 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 723 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 724 BID_SWAP128 (res); 725 BID_RETURN (res) 726 } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN 727 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) 728 // check first for non-canonical NaN payload 729 if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 730 (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 731 (z.w[0] > 0x38c15b09ffffffffull))) { 732 z.w[1] = z.w[1] & 0xffffc00000000000ull; 733 z.w[0] = 0x0ull; 734 } 735 if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN 736 // set invalid flag 737 *pfpsf |= INVALID_EXCEPTION; 738 // return quiet (z) 739 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 740 res.w[0] = z.w[0]; 741 } else { // z is QNaN 742 // return z 743 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 744 res.w[0] = z.w[0]; 745 // if x = SNaN signal invalid exception 746 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { 747 // set invalid flag 748 *pfpsf |= INVALID_EXCEPTION; 749 } 750 } 751 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 752 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 753 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 754 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 755 BID_SWAP128 (res); 756 BID_RETURN (res) 757 } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN 758 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) 759 // check first for non-canonical NaN payload 760 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 761 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 762 (x.w[0] > 0x38c15b09ffffffffull))) { 763 x.w[1] = x.w[1] & 0xffffc00000000000ull; 764 x.w[0] = 0x0ull; 765 } 766 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN 767 // set invalid flag 768 *pfpsf |= INVALID_EXCEPTION; 769 // return quiet (x) 770 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 771 res.w[0] = x.w[0]; 772 } else { // x is QNaN 773 // return x 774 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 775 res.w[0] = x.w[0]; 776 } 777 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 778 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 779 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 780 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 781 BID_SWAP128 (res); 782 BID_RETURN (res) 783 } 784 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check 785 // for non-canonical values 786 787 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 788 C1.w[1] = x.w[1] & MASK_COEFF; 789 C1.w[0] = x.w[0]; 790 if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf 791 // if x is not infinity check for non-canonical values - treated as zero 792 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 793 // non-canonical 794 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 795 C1.w[1] = 0; // significand high 796 C1.w[0] = 0; // significand low 797 } else { // G0_G1 != 11 798 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits 799 if (C1.w[1] > 0x0001ed09bead87c0ull || 800 (C1.w[1] == 0x0001ed09bead87c0ull && 801 C1.w[0] > 0x378d8e63ffffffffull)) { 802 // x is non-canonical if coefficient is larger than 10^34 -1 803 C1.w[1] = 0; 804 C1.w[0] = 0; 805 } else { // canonical 806 ; 807 } 808 } 809 } 810 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 811 C2.w[1] = y.w[1] & MASK_COEFF; 812 C2.w[0] = y.w[0]; 813 if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf 814 // if y is not infinity check for non-canonical values - treated as zero 815 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 816 // non-canonical 817 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 818 C2.w[1] = 0; // significand high 819 C2.w[0] = 0; // significand low 820 } else { // G0_G1 != 11 821 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits 822 if (C2.w[1] > 0x0001ed09bead87c0ull || 823 (C2.w[1] == 0x0001ed09bead87c0ull && 824 C2.w[0] > 0x378d8e63ffffffffull)) { 825 // y is non-canonical if coefficient is larger than 10^34 -1 826 C2.w[1] = 0; 827 C2.w[0] = 0; 828 } else { // canonical 829 ; 830 } 831 } 832 } 833 z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 834 C3.w[1] = z.w[1] & MASK_COEFF; 835 C3.w[0] = z.w[0]; 836 if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf 837 // if z is not infinity check for non-canonical values - treated as zero 838 if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 839 // non-canonical 840 z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 841 C3.w[1] = 0; // significand high 842 C3.w[0] = 0; // significand low 843 } else { // G0_G1 != 11 844 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits 845 if (C3.w[1] > 0x0001ed09bead87c0ull || 846 (C3.w[1] == 0x0001ed09bead87c0ull && 847 C3.w[0] > 0x378d8e63ffffffffull)) { 848 // z is non-canonical if coefficient is larger than 10^34 -1 849 C3.w[1] = 0; 850 C3.w[0] = 0; 851 } else { // canonical 852 ; 853 } 854 } 855 } 856 857 p_sign = x_sign ^ y_sign; // sign of the product 858 859 // identify cases where at least one operand is infinity 860 861 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf 862 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf 863 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 864 if (p_sign == z_sign) { 865 res.w[1] = z_sign | MASK_INF; 866 res.w[0] = 0x0; 867 } else { 868 // return QNaN Indefinite 869 res.w[1] = 0x7c00000000000000ull; 870 res.w[0] = 0x0000000000000000ull; 871 // set invalid flag 872 *pfpsf |= INVALID_EXCEPTION; 873 } 874 } else { // z = 0 or z = f 875 res.w[1] = p_sign | MASK_INF; 876 res.w[0] = 0x0; 877 } 878 } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f 879 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 880 if (p_sign == z_sign) { 881 res.w[1] = z_sign | MASK_INF; 882 res.w[0] = 0x0; 883 } else { 884 // return QNaN Indefinite 885 res.w[1] = 0x7c00000000000000ull; 886 res.w[0] = 0x0000000000000000ull; 887 // set invalid flag 888 *pfpsf |= INVALID_EXCEPTION; 889 } 890 } else { // z = 0 or z = f 891 res.w[1] = p_sign | MASK_INF; 892 res.w[0] = 0x0; 893 } 894 } else { // y = 0 895 // return QNaN Indefinite 896 res.w[1] = 0x7c00000000000000ull; 897 res.w[0] = 0x0000000000000000ull; 898 // set invalid flag 899 *pfpsf |= INVALID_EXCEPTION; 900 } 901 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 902 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 903 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 904 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 905 BID_SWAP128 (res); 906 BID_RETURN (res) 907 } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf 908 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 909 // x = f, necessarily 910 if ((p_sign != z_sign) 911 || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) { 912 // return QNaN Indefinite 913 res.w[1] = 0x7c00000000000000ull; 914 res.w[0] = 0x0000000000000000ull; 915 // set invalid flag 916 *pfpsf |= INVALID_EXCEPTION; 917 } else { 918 res.w[1] = z_sign | MASK_INF; 919 res.w[0] = 0x0; 920 } 921 } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0 922 // z = 0, f, inf 923 // return QNaN Indefinite 924 res.w[1] = 0x7c00000000000000ull; 925 res.w[0] = 0x0000000000000000ull; 926 // set invalid flag 927 *pfpsf |= INVALID_EXCEPTION; 928 } else { 929 // x = f and z = 0, f, necessarily 930 res.w[1] = p_sign | MASK_INF; 931 res.w[0] = 0x0; 932 } 933 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 934 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 935 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 936 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 937 BID_SWAP128 (res); 938 BID_RETURN (res) 939 } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 940 // x = 0, f and y = 0, f, necessarily 941 res.w[1] = z_sign | MASK_INF; 942 res.w[0] = 0x0; 943 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 944 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 945 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 946 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 947 BID_SWAP128 (res); 948 BID_RETURN (res) 949 } 950 951 true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; 952 if (true_p_exp < -6176) 953 p_exp = 0; // cannot be less than EXP_MIN 954 else 955 p_exp = (UINT64) (true_p_exp + 6176) << 49; 956 957 if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 958 // the result is 0 959 if (p_exp < z_exp) 960 res.w[1] = p_exp; // preferred exponent 961 else 962 res.w[1] = z_exp; // preferred exponent 963 if (p_sign == z_sign) { 964 res.w[1] |= z_sign; 965 res.w[0] = 0x0; 966 } else { // x * y and z have opposite signs 967 if (rnd_mode == ROUNDING_DOWN) { 968 // res = -0.0 969 res.w[1] |= MASK_SIGN; 970 res.w[0] = 0x0; 971 } else { 972 // res = +0.0 973 // res.w[1] |= 0x0; 974 res.w[0] = 0x0; 975 } 976 } 977 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 978 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 979 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 980 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 981 BID_SWAP128 (res); 982 BID_RETURN (res) 983 } 984 // from this point on, we may need to know the number of decimal digits 985 // in the significands of x, y, z when x, y, z != 0 986 987 if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite) 988 // q1 = nr. of decimal digits in x 989 // determine first the nr. of bits in x 990 if (C1.w[1] == 0) { 991 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 992 // split the 64-bit value in two 32-bit halves to avoid rounding errors 993 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 994 tmp.d = (double) (C1.w[0] >> 32); // exact conversion 995 x_nr_bits = 996 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 997 } else { // x < 2^32 998 tmp.d = (double) (C1.w[0]); // exact conversion 999 x_nr_bits = 1000 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1001 } 1002 } else { // if x < 2^53 1003 tmp.d = (double) C1.w[0]; // exact conversion 1004 x_nr_bits = 1005 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1006 } 1007 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) 1008 tmp.d = (double) C1.w[1]; // exact conversion 1009 x_nr_bits = 1010 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1011 } 1012 q1 = nr_digits[x_nr_bits - 1].digits; 1013 if (q1 == 0) { 1014 q1 = nr_digits[x_nr_bits - 1].digits1; 1015 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || 1016 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && 1017 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) 1018 q1++; 1019 } 1020 } 1021 1022 if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite) 1023 if (C2.w[1] == 0) { 1024 if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53 1025 // split the 64-bit value in two 32-bit halves to avoid rounding errors 1026 if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32 1027 tmp.d = (double) (C2.w[0] >> 32); // exact conversion 1028 y_nr_bits = 1029 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1030 } else { // y < 2^32 1031 tmp.d = (double) C2.w[0]; // exact conversion 1032 y_nr_bits = 1033 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1034 } 1035 } else { // if y < 2^53 1036 tmp.d = (double) C2.w[0]; // exact conversion 1037 y_nr_bits = 1038 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1039 } 1040 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1]) 1041 tmp.d = (double) C2.w[1]; // exact conversion 1042 y_nr_bits = 1043 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1044 } 1045 1046 q2 = nr_digits[y_nr_bits].digits; 1047 if (q2 == 0) { 1048 q2 = nr_digits[y_nr_bits].digits1; 1049 if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi || 1050 (C2.w[1] == nr_digits[y_nr_bits].threshold_hi && 1051 C2.w[0] >= nr_digits[y_nr_bits].threshold_lo)) 1052 q2++; 1053 } 1054 } 1055 1056 if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite) 1057 if (C3.w[1] == 0) { 1058 if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53 1059 // split the 64-bit value in two 32-bit halves to avoid rounding errors 1060 if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32 1061 tmp.d = (double) (C3.w[0] >> 32); // exact conversion 1062 z_nr_bits = 1063 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1064 } else { // z < 2^32 1065 tmp.d = (double) C3.w[0]; // exact conversion 1066 z_nr_bits = 1067 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1068 } 1069 } else { // if z < 2^53 1070 tmp.d = (double) C3.w[0]; // exact conversion 1071 z_nr_bits = 1072 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1073 } 1074 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1]) 1075 tmp.d = (double) C3.w[1]; // exact conversion 1076 z_nr_bits = 1077 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1078 } 1079 1080 q3 = nr_digits[z_nr_bits].digits; 1081 if (q3 == 0) { 1082 q3 = nr_digits[z_nr_bits].digits1; 1083 if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi || 1084 (C3.w[1] == nr_digits[z_nr_bits].threshold_hi && 1085 C3.w[0] >= nr_digits[z_nr_bits].threshold_lo)) 1086 q3++; 1087 } 1088 } 1089 1090 if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || 1091 (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { 1092 // x = 0 or y = 0 1093 // z = f, necessarily; for 0 + z return z, with the preferred exponent 1094 // the result is z, but need to get the preferred exponent 1095 if (z_exp <= p_exp) { // the preferred exponent is z_exp 1096 res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1]; 1097 res.w[0] = C3.w[0]; 1098 } else { // if (p_exp < z_exp) the preferred exponent is p_exp 1099 // return (C3 * 10^scale) * 10^(z_exp - scale) 1100 // where scale = min (p34-q3, (z_exp-p_exp) >> 49) 1101 scale = p34 - q3; 1102 ind = (z_exp - p_exp) >> 49; 1103 if (ind < scale) 1104 scale = ind; 1105 if (scale == 0) { 1106 res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant 1107 res.w[0] = z.w[0]; 1108 } else if (q3 <= 19) { // z fits in 64 bits 1109 if (scale <= 19) { // 10^scale fits in 64 bits 1110 // 64 x 64 C3.w[0] * ten2k64[scale] 1111 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1112 } else { // 10^scale fits in 128 bits 1113 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1114 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1115 } 1116 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1117 // 64 x 128 ten2k64[scale] * C3 1118 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1119 } 1120 // subtract scale from the exponent 1121 z_exp = z_exp - ((UINT64) scale << 49); 1122 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1123 } 1124 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1125 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1126 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1127 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1128 BID_SWAP128 (res); 1129 BID_RETURN (res) 1130 } else { 1131 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f 1132 } 1133 1134 e1 = (x_exp >> 49) - 6176; // unbiased exponent of x 1135 e2 = (y_exp >> 49) - 6176; // unbiased exponent of y 1136 e3 = (z_exp >> 49) - 6176; // unbiased exponent of z 1137 e4 = e1 + e2; // unbiased exponent of the exact x * y 1138 1139 // calculate C1 * C2 and its number of decimal digits, q4 1140 1141 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits 1142 // where 2 <= q1 + q2 <= 68 1143 // calculate C4 = C1 * C2 and determine q 1144 C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0; 1145 if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits 1146 C4.w[0] = C1.w[0] * C2.w[0]; 1147 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1148 if (C4.w[0] < ten2k64[q1 + q2 - 1]) 1149 q4 = q1 + q2 - 1; // q4 in [1, 18] 1150 else 1151 q4 = q1 + q2; // q4 in [2, 19] 1152 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1153 } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits 1154 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits 1155 __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]); 1156 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20 1157 if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1 1158 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1159 q4 = 19; // 19 = q1 + q2 - 1 1160 } else { 1161 // if (C4.w[1] == 0) 1162 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1163 // else 1164 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1165 q4 = 20; // 20 = q1 + q2 1166 } 1167 } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38 1168 // C4 = C1 * C2 fits in 64 or 128 bits 1169 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits) 1170 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits 1171 if (q1 <= 19) { 1172 __mul_128x64_to_128 (C4, C1.w[0], C2); 1173 } else { // q2 <= 19 1174 __mul_128x64_to_128 (C4, C2.w[0], C1); 1175 } 1176 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1177 if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] || 1178 (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] && 1179 C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) { 1180 // if (C4.w[1] == 0) // q4 = 20, necessarily 1181 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1182 // else 1183 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1184 q4 = q1 + q2 - 1; // q4 in [20, 37] 1185 } else { 1186 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1187 q4 = q1 + q2; // q4 in [21, 38] 1188 } 1189 } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits 1190 // both C1 and C2 fit in 128 bits (actually in 113 bits) 1191 // may replace this by 128x128_to192 1192 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0 1193 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39 1194 if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] || 1195 (C4.w[1] == ten2k128[18].w[1] 1196 && C4.w[0] < ten2k128[18].w[0]))) { 1197 // 18 = 38 - 20 = q1+q2-1 - 20 1198 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1199 q4 = 38; // 38 = q1 + q2 - 1 1200 } else { 1201 // if (C4.w[2] == 0) 1202 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1203 // else 1204 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1205 q4 = 39; // 39 = q1 + q2 1206 } 1207 } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57 1208 // C4 = C1 * C2 fits in 128 or 192 bits 1209 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits) 1210 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one 1211 // may fit in 64 bits 1212 if (C1.w[1] == 0) { // C1 fits in 64 bits 1213 // __mul_64x128_full (REShi64, RESlo128, A64, B128) 1214 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); 1215 } else if (C2.w[1] == 0) { // C2 fits in 64 bits 1216 // __mul_64x128_full (REShi64, RESlo128, A64, B128) 1217 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); 1218 } else { // both C1 and C2 require 128 bits 1219 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); 1220 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 1221 } 1222 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1223 if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || 1224 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && 1225 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || 1226 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && 1227 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) { 1228 // if (C4.w[2] == 0) // q4 = 39, necessarily 1229 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1230 // else 1231 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1232 q4 = q1 + q2 - 1; // q4 in [39, 56] 1233 } else { 1234 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1235 q4 = q1 + q2; // q4 in [40, 57] 1236 } 1237 } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits 1238 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one 1239 // may fit in 64 bits 1240 if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits 1241 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192 1242 } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits 1243 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192 1244 } else { // C1 * C2 will fit in 192 bits or in 256 bits 1245 __mul_128x128_to_256 (C4, C1, C2); 1246 } 1247 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58 1248 if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] || 1249 (C4.w[2] == ten2k256[18].w[2] 1250 && (C4.w[1] < ten2k256[18].w[1] 1251 || (C4.w[1] == ten2k256[18].w[1] 1252 && C4.w[0] < ten2k256[18].w[0]))))) { 1253 // 18 = 57 - 39 = q1+q2-1 - 39 1254 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1255 q4 = 57; // 57 = q1 + q2 - 1 1256 } else { 1257 // if (C4.w[3] == 0) 1258 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1259 // else 1260 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1261 q4 = 58; // 58 = q1 + q2 1262 } 1263 } else { // if 59 <= q1 + q2 <= 68 1264 // C4 = C1 * C2 fits in 192 or 256 bits 1265 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits) 1266 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in 1267 // 64 bits 1268 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); 1269 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 1270 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1271 if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] || 1272 (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] && 1273 (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || 1274 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && 1275 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || 1276 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && 1277 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) { 1278 // if (C4.w[3] == 0) // q4 = 58, necessarily 1279 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1280 // else 1281 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1282 q4 = q1 + q2 - 1; // q4 in [58, 67] 1283 } else { 1284 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1285 q4 = q1 + q2; // q4 in [59, 68] 1286 } 1287 } 1288 1289 if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0 1290 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved 1291 *pfpsf = 0; 1292 1293 if (q4 > p34) { 1294 1295 // truncate C4 to p34 digits into res 1296 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 1297 x0 = q4 - p34; 1298 if (q4 <= 38) { 1299 P128.w[1] = C4.w[1]; 1300 P128.w[0] = C4.w[0]; 1301 round128_19_38 (q4, x0, P128, &res, &incr_exp, 1302 &is_midpoint_lt_even, &is_midpoint_gt_even, 1303 &is_inexact_lt_midpoint, 1304 &is_inexact_gt_midpoint); 1305 } else if (q4 <= 57) { // 35 <= q4 <= 57 1306 P192.w[2] = C4.w[2]; 1307 P192.w[1] = C4.w[1]; 1308 P192.w[0] = C4.w[0]; 1309 round192_39_57 (q4, x0, P192, &R192, &incr_exp, 1310 &is_midpoint_lt_even, &is_midpoint_gt_even, 1311 &is_inexact_lt_midpoint, 1312 &is_inexact_gt_midpoint); 1313 res.w[0] = R192.w[0]; 1314 res.w[1] = R192.w[1]; 1315 } else { // if (q4 <= 68) 1316 round256_58_76 (q4, x0, C4, &R256, &incr_exp, 1317 &is_midpoint_lt_even, &is_midpoint_gt_even, 1318 &is_inexact_lt_midpoint, 1319 &is_inexact_gt_midpoint); 1320 res.w[0] = R256.w[0]; 1321 res.w[1] = R256.w[1]; 1322 } 1323 e4 = e4 + x0; 1324 if (incr_exp) { 1325 e4 = e4 + 1; 1326 } 1327 q4 = p34; 1328 // res is now the coefficient of the result rounded to the destination 1329 // precision, with unbounded exponent; the exponent is e4; q4=digits(res) 1330 } else { // if (q4 <= p34) 1331 // C4 * 10^e4 is the result rounded to the destination precision, with 1332 // unbounded exponent (which is exact) 1333 1334 if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) { 1335 // e4 is too large, but can be brought within range by scaling up C4 1336 scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2 1337 // res = (C4 * 10^scale) * 10^expmax 1338 if (q4 <= 19) { // C4 fits in 64 bits 1339 if (scale <= 19) { // 10^scale fits in 64 bits 1340 // 64 x 64 C4.w[0] * ten2k64[scale] 1341 __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]); 1342 } else { // 10^scale fits in 128 bits 1343 // 64 x 128 C4.w[0] * ten2k128[scale - 20] 1344 __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]); 1345 } 1346 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits 1347 // 64 x 128 ten2k64[scale] * CC43 1348 __mul_128x64_to_128 (res, ten2k64[scale], C4); 1349 } 1350 e4 = e4 - scale; // expmax 1351 q4 = q4 + scale; 1352 } else { 1353 res.w[1] = C4.w[1]; 1354 res.w[0] = C4.w[0]; 1355 } 1356 // res is the coefficient of the result rounded to the destination 1357 // precision, with unbounded exponent (it has q4 digits); the exponent 1358 // is e4 (exact result) 1359 } 1360 1361 // check for overflow 1362 if (q4 + e4 > p34 + expmax) { 1363 if (rnd_mode == ROUNDING_TO_NEAREST) { 1364 res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf 1365 res.w[0] = 0x0000000000000000ull; 1366 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 1367 } else { 1368 res.w[1] = p_sign | res.w[1]; 1369 rounding_correction (rnd_mode, 1370 is_inexact_lt_midpoint, 1371 is_inexact_gt_midpoint, 1372 is_midpoint_lt_even, is_midpoint_gt_even, 1373 e4, &res, pfpsf); 1374 } 1375 *pfpsf |= save_fpsf; 1376 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1377 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1378 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1379 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1380 BID_SWAP128 (res); 1381 BID_RETURN (res) 1382 } 1383 // check for underflow 1384 if (q4 + e4 < expmin + P34) { 1385 is_tiny = 1; // the result is tiny 1386 if (e4 < expmin) { 1387 // if e4 < expmin, we must truncate more of res 1388 x0 = expmin - e4; // x0 >= 1 1389 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 1390 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 1391 is_midpoint_lt_even0 = is_midpoint_lt_even; 1392 is_midpoint_gt_even0 = is_midpoint_gt_even; 1393 is_inexact_lt_midpoint = 0; 1394 is_inexact_gt_midpoint = 0; 1395 is_midpoint_lt_even = 0; 1396 is_midpoint_gt_even = 0; 1397 // the number of decimal digits in res is q4 1398 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits 1399 if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17 1400 round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, 1401 &is_midpoint_lt_even, &is_midpoint_gt_even, 1402 &is_inexact_lt_midpoint, 1403 &is_inexact_gt_midpoint); 1404 if (incr_exp) { 1405 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 1406 R64 = ten2k64[q4 - x0]; 1407 } 1408 // res.w[1] = 0; (from above) 1409 res.w[0] = R64; 1410 } else { // if (q4 <= 34) 1411 // 19 <= q4 <= 38 1412 P128.w[1] = res.w[1]; 1413 P128.w[0] = res.w[0]; 1414 round128_19_38 (q4, x0, P128, &res, &incr_exp, 1415 &is_midpoint_lt_even, &is_midpoint_gt_even, 1416 &is_inexact_lt_midpoint, 1417 &is_inexact_gt_midpoint); 1418 if (incr_exp) { 1419 // increase coefficient by a factor of 10; this will be <= 10^33 1420 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 1421 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 1422 // res.w[1] = 0; 1423 res.w[0] = ten2k64[q4 - x0]; 1424 } else { // 20 <= q4 - x0 <= 37 1425 res.w[0] = ten2k128[q4 - x0 - 20].w[0]; 1426 res.w[1] = ten2k128[q4 - x0 - 20].w[1]; 1427 } 1428 } 1429 } 1430 e4 = e4 + x0; // expmin 1431 } else if (x0 == q4) { 1432 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin 1433 // determine relationship with 1/2 ulp 1434 if (q4 <= 19) { 1435 if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp 1436 lt_half_ulp = 1; 1437 is_inexact_lt_midpoint = 1; 1438 } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp 1439 eq_half_ulp = 1; 1440 is_midpoint_gt_even = 1; 1441 } else { // > 1/2 ulp 1442 // gt_half_ulp = 1; 1443 is_inexact_gt_midpoint = 1; 1444 } 1445 } else { // if (q4 <= 34) 1446 if (res.w[1] < midpoint128[q4 - 20].w[1] || 1447 (res.w[1] == midpoint128[q4 - 20].w[1] && 1448 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp 1449 lt_half_ulp = 1; 1450 is_inexact_lt_midpoint = 1; 1451 } else if (res.w[1] == midpoint128[q4 - 20].w[1] && 1452 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp 1453 eq_half_ulp = 1; 1454 is_midpoint_gt_even = 1; 1455 } else { // > 1/2 ulp 1456 // gt_half_ulp = 1; 1457 is_inexact_gt_midpoint = 1; 1458 } 1459 } 1460 if (lt_half_ulp || eq_half_ulp) { 1461 // res = +0.0 * 10^expmin 1462 res.w[1] = 0x0000000000000000ull; 1463 res.w[0] = 0x0000000000000000ull; 1464 } else { // if (gt_half_ulp) 1465 // res = +1 * 10^expmin 1466 res.w[1] = 0x0000000000000000ull; 1467 res.w[0] = 0x0000000000000001ull; 1468 } 1469 e4 = expmin; 1470 } else { // if (x0 > q4) 1471 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin 1472 res.w[1] = 0; 1473 res.w[0] = 0; 1474 e4 = expmin; 1475 is_inexact_lt_midpoint = 1; 1476 } 1477 // avoid a double rounding error 1478 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 1479 is_midpoint_lt_even) { // double rounding error upward 1480 // res = res - 1 1481 res.w[0]--; 1482 if (res.w[0] == 0xffffffffffffffffull) 1483 res.w[1]--; 1484 // Note: a double rounding error upward is not possible; for this 1485 // the result after the first rounding would have to be 99...95 1486 // (35 digits in all), possibly followed by a number of zeros; this 1487 // not possible for f * f + 0 1488 is_midpoint_lt_even = 0; 1489 is_inexact_lt_midpoint = 1; 1490 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 1491 is_midpoint_gt_even) { // double rounding error downward 1492 // res = res + 1 1493 res.w[0]++; 1494 if (res.w[0] == 0) 1495 res.w[1]++; 1496 is_midpoint_gt_even = 0; 1497 is_inexact_gt_midpoint = 1; 1498 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 1499 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 1500 // if this second rounding was exact the result may still be 1501 // inexact because of the first rounding 1502 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 1503 is_inexact_gt_midpoint = 1; 1504 } 1505 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 1506 is_inexact_lt_midpoint = 1; 1507 } 1508 } else if (is_midpoint_gt_even && 1509 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 1510 // pulled up to a midpoint 1511 is_inexact_lt_midpoint = 1; 1512 is_inexact_gt_midpoint = 0; 1513 is_midpoint_lt_even = 0; 1514 is_midpoint_gt_even = 0; 1515 } else if (is_midpoint_lt_even && 1516 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 1517 // pulled down to a midpoint 1518 is_inexact_lt_midpoint = 0; 1519 is_inexact_gt_midpoint = 1; 1520 is_midpoint_lt_even = 0; 1521 is_midpoint_gt_even = 0; 1522 } else { 1523 ; 1524 } 1525 } else { // if e4 >= emin then q4 < P and the result is tiny and exact 1526 if (e3 < e4) { 1527 // if (e3 < e4) the preferred exponent is e3 1528 // return (C4 * 10^scale) * 10^(e4 - scale) 1529 // where scale = min (p34-q4, (e4 - e3)) 1530 scale = p34 - q4; 1531 ind = e4 - e3; 1532 if (ind < scale) 1533 scale = ind; 1534 if (scale == 0) { 1535 ; // res and e4 are unchanged 1536 } else if (q4 <= 19) { // C4 fits in 64 bits 1537 if (scale <= 19) { // 10^scale fits in 64 bits 1538 // 64 x 64 res.w[0] * ten2k64[scale] 1539 __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]); 1540 } else { // 10^scale fits in 128 bits 1541 // 64 x 128 res.w[0] * ten2k128[scale - 20] 1542 __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]); 1543 } 1544 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits 1545 // 64 x 128 ten2k64[scale] * C3 1546 __mul_128x64_to_128 (res, ten2k64[scale], res); 1547 } 1548 // subtract scale from the exponent 1549 e4 = e4 - scale; 1550 } 1551 } 1552 1553 // check for inexact result 1554 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 1555 is_midpoint_lt_even || is_midpoint_gt_even) { 1556 // set the inexact flag and the underflow flag 1557 *pfpsf |= INEXACT_EXCEPTION; 1558 *pfpsf |= UNDERFLOW_EXCEPTION; 1559 } 1560 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; 1561 if (rnd_mode != ROUNDING_TO_NEAREST) { 1562 rounding_correction (rnd_mode, 1563 is_inexact_lt_midpoint, 1564 is_inexact_gt_midpoint, 1565 is_midpoint_lt_even, is_midpoint_gt_even, 1566 e4, &res, pfpsf); 1567 } 1568 *pfpsf |= save_fpsf; 1569 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1570 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1571 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1572 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1573 BID_SWAP128 (res); 1574 BID_RETURN (res) 1575 } 1576 // no overflow, and no underflow for rounding to nearest 1577 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; 1578 1579 if (rnd_mode != ROUNDING_TO_NEAREST) { 1580 rounding_correction (rnd_mode, 1581 is_inexact_lt_midpoint, 1582 is_inexact_gt_midpoint, 1583 is_midpoint_lt_even, is_midpoint_gt_even, 1584 e4, &res, pfpsf); 1585 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ) 1586 if (e4 == expmin) { 1587 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || 1588 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && 1589 res.w[0] < 0x38c15b0a00000000ull)) { 1590 is_tiny = 1; 1591 } 1592 } 1593 } 1594 1595 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 1596 is_midpoint_lt_even || is_midpoint_gt_even) { 1597 // set the inexact flag 1598 *pfpsf |= INEXACT_EXCEPTION; 1599 if (is_tiny) 1600 *pfpsf |= UNDERFLOW_EXCEPTION; 1601 } 1602 1603 if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact 1604 // need to ensure that the result has the preferred exponent 1605 p_exp = res.w[1] & MASK_EXP; 1606 if (z_exp < p_exp) { // the preferred exponent is z_exp 1607 // signficand of res in C3 1608 C3.w[1] = res.w[1] & MASK_COEFF; 1609 C3.w[0] = res.w[0]; 1610 // the number of decimal digits of x * y is q4 <= 34 1611 // Note: the coefficient fits in 128 bits 1612 1613 // return (C3 * 10^scale) * 10^(p_exp - scale) 1614 // where scale = min (p34-q4, (p_exp-z_exp) >> 49) 1615 scale = p34 - q4; 1616 ind = (p_exp - z_exp) >> 49; 1617 if (ind < scale) 1618 scale = ind; 1619 // subtract scale from the exponent 1620 p_exp = p_exp - ((UINT64) scale << 49); 1621 if (scale == 0) { 1622 ; // leave res unchanged 1623 } else if (q4 <= 19) { // x * y fits in 64 bits 1624 if (scale <= 19) { // 10^scale fits in 64 bits 1625 // 64 x 64 C3.w[0] * ten2k64[scale] 1626 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1627 } else { // 10^scale fits in 128 bits 1628 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1629 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1630 } 1631 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 1632 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits 1633 // 64 x 128 ten2k64[scale] * C3 1634 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1635 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 1636 } 1637 } // else leave the result as it is, because p_exp <= z_exp 1638 } 1639 *pfpsf |= save_fpsf; 1640 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1641 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1642 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1643 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1644 BID_SWAP128 (res); 1645 BID_RETURN (res) 1646 } // else we have f * f + f 1647 1648 // continue with x = f, y = f, z = f 1649 1650 delta = q3 + e3 - q4 - e4; 1651 delta_ge_zero: 1652 if (delta >= 0) { 1653 1654 if (p34 <= delta - 1 || // Case (1') 1655 (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A) 1656 // check for overflow, which can occur only in Case (1') 1657 if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) { 1658 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary 1659 // condition for (q3 + e3) > (p34 + expmax) 1660 if (rnd_mode == ROUNDING_TO_NEAREST) { 1661 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 1662 res.w[0] = 0x0000000000000000ull; 1663 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 1664 } else { 1665 if (p_sign == z_sign) { 1666 is_inexact_lt_midpoint = 1; 1667 } else { 1668 is_inexact_gt_midpoint = 1; 1669 } 1670 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3) 1671 scale = p34 - q3; 1672 if (scale == 0) { 1673 res.w[1] = z_sign | C3.w[1]; 1674 res.w[0] = C3.w[0]; 1675 } else { 1676 if (q3 <= 19) { // C3 fits in 64 bits 1677 if (scale <= 19) { // 10^scale fits in 64 bits 1678 // 64 x 64 C3.w[0] * ten2k64[scale] 1679 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1680 } else { // 10^scale fits in 128 bits 1681 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1682 __mul_128x64_to_128 (res, C3.w[0], 1683 ten2k128[scale - 20]); 1684 } 1685 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits 1686 // 64 x 128 ten2k64[scale] * C3 1687 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1688 } 1689 // the coefficient in res has q3 + scale = p34 digits 1690 } 1691 e3 = e3 - scale; 1692 res.w[1] = z_sign | res.w[1]; 1693 rounding_correction (rnd_mode, 1694 is_inexact_lt_midpoint, 1695 is_inexact_gt_midpoint, 1696 is_midpoint_lt_even, is_midpoint_gt_even, 1697 e3, &res, pfpsf); 1698 } 1699 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1700 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1701 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1702 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1703 BID_SWAP128 (res); 1704 BID_RETURN (res) 1705 } 1706 // res = z 1707 if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3) 1708 // return (C3 * 10^scale) * 10^(z_exp - scale) 1709 // where scale = min (p34-q3, z_exp-EMIN) 1710 scale = p34 - q3; 1711 ind = e3 + 6176; 1712 if (ind < scale) 1713 scale = ind; 1714 if (scale == 0) { 1715 res.w[1] = C3.w[1]; 1716 res.w[0] = C3.w[0]; 1717 } else if (q3 <= 19) { // z fits in 64 bits 1718 if (scale <= 19) { // 10^scale fits in 64 bits 1719 // 64 x 64 C3.w[0] * ten2k64[scale] 1720 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1721 } else { // 10^scale fits in 128 bits 1722 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1723 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1724 } 1725 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1726 // 64 x 128 ten2k64[scale] * C3 1727 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1728 } 1729 // the coefficient in res has q3 + scale digits 1730 // subtract scale from the exponent 1731 z_exp = z_exp - ((UINT64) scale << 49); 1732 e3 = e3 - scale; 1733 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1734 if (scale + q3 < p34) 1735 *pfpsf |= UNDERFLOW_EXCEPTION; 1736 } else { 1737 scale = 0; 1738 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1]; 1739 res.w[0] = C3.w[0]; 1740 } 1741 1742 // use the following to avoid double rounding errors when operating on 1743 // mixed formats in rounding to nearest, and for correcting the result 1744 // if not rounding to nearest 1745 if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) { 1746 // there is a gap of exactly one digit between the scaled C3 and C4 1747 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case 1748 if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) || 1749 (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) || 1750 (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] || 1751 C3.w[0] != ten2k128[q3 - 21].w[0]))) { 1752 // C3 * 10^ scale != 10^(q3-1) 1753 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull || 1754 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 1755 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value 1756 } else { // if C3 * 10^scale = 10^(q3+scale-1) 1757 // ok from above e3 = (z_exp >> 49) - 6176; 1758 // the result is always inexact 1759 if (q4 == 1) { 1760 R64 = C4.w[0]; 1761 } else { 1762 // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 1763 // x = q4-1, 1 <= x <= 67 and check if this operation is exact 1764 if (q4 <= 18) { // 2 <= q4 <= 18 1765 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, 1766 &is_midpoint_lt_even, &is_midpoint_gt_even, 1767 &is_inexact_lt_midpoint, 1768 &is_inexact_gt_midpoint); 1769 } else if (q4 <= 38) { 1770 P128.w[1] = C4.w[1]; 1771 P128.w[0] = C4.w[0]; 1772 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, 1773 &is_midpoint_lt_even, 1774 &is_midpoint_gt_even, 1775 &is_inexact_lt_midpoint, 1776 &is_inexact_gt_midpoint); 1777 R64 = R128.w[0]; // one decimal digit 1778 } else if (q4 <= 57) { 1779 P192.w[2] = C4.w[2]; 1780 P192.w[1] = C4.w[1]; 1781 P192.w[0] = C4.w[0]; 1782 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, 1783 &is_midpoint_lt_even, 1784 &is_midpoint_gt_even, 1785 &is_inexact_lt_midpoint, 1786 &is_inexact_gt_midpoint); 1787 R64 = R192.w[0]; // one decimal digit 1788 } else { // if (q4 <= 68) 1789 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, 1790 &is_midpoint_lt_even, 1791 &is_midpoint_gt_even, 1792 &is_inexact_lt_midpoint, 1793 &is_inexact_gt_midpoint); 1794 R64 = R256.w[0]; // one decimal digit 1795 } 1796 if (incr_exp) { 1797 R64 = 10; 1798 } 1799 } 1800 if (q4 == 1 && C4.w[0] == 5) { 1801 is_inexact_lt_midpoint = 0; 1802 is_inexact_gt_midpoint = 0; 1803 is_midpoint_lt_even = 1; 1804 is_midpoint_gt_even = 0; 1805 } else if ((e3 == expmin) || 1806 R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { 1807 // result does not change 1808 is_inexact_lt_midpoint = 0; 1809 is_inexact_gt_midpoint = 1; 1810 is_midpoint_lt_even = 0; 1811 is_midpoint_gt_even = 0; 1812 } else { 1813 is_inexact_lt_midpoint = 1; 1814 is_inexact_gt_midpoint = 0; 1815 is_midpoint_lt_even = 0; 1816 is_midpoint_gt_even = 0; 1817 // result decremented is 10^(q3+scale) - 1 1818 if ((q3 + scale) <= 19) { 1819 res.w[1] = 0; 1820 res.w[0] = ten2k64[q3 + scale]; 1821 } else { // if ((q3 + scale + 1) <= 35) 1822 res.w[1] = ten2k128[q3 + scale - 20].w[1]; 1823 res.w[0] = ten2k128[q3 + scale - 20].w[0]; 1824 } 1825 res.w[0] = res.w[0] - 1; // borrow never occurs 1826 z_exp = z_exp - EXP_P1; 1827 e3 = e3 - 1; 1828 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; 1829 } 1830 if (e3 == expmin) { 1831 if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) { 1832 ; // result not tiny (in round-to-nearest mode) 1833 } else { 1834 *pfpsf |= UNDERFLOW_EXCEPTION; 1835 } 1836 } 1837 } // end 10^(q3+scale-1) 1838 // set the inexact flag 1839 *pfpsf |= INEXACT_EXCEPTION; 1840 } else { 1841 if (p_sign == z_sign) { 1842 // if (z_sign), set as if for absolute value 1843 is_inexact_lt_midpoint = 1; 1844 } else { // if (p_sign != z_sign) 1845 // if (z_sign), set as if for absolute value 1846 is_inexact_gt_midpoint = 1; 1847 } 1848 *pfpsf |= INEXACT_EXCEPTION; 1849 } 1850 // the result is always inexact => set the inexact flag 1851 // Determine tininess: 1852 // if (exp > expmin) 1853 // the result is not tiny 1854 // else // if exp = emin 1855 // if (q3 + scale < p34) 1856 // the result is tiny 1857 // else // if (q3 + scale = p34) 1858 // if (C3 * 10^scale > 10^33) 1859 // the result is not tiny 1860 // else // if C3 * 10^scale = 10^33 1861 // if (xy * z > 0) 1862 // the result is not tiny 1863 // else // if (xy * z < 0) 1864 // if (z > 0) 1865 // if rnd_mode != RP 1866 // the result is tiny 1867 // else // if RP 1868 // the result is not tiny 1869 // else // if (z < 0) 1870 // if rnd_mode != RM 1871 // the result is tiny 1872 // else // if RM 1873 // the result is not tiny 1874 // endif 1875 // endif 1876 // endif 1877 // endif 1878 // endif 1879 // endif 1880 if ((e3 == expmin && (q3 + scale) < p34) || 1881 (e3 == expmin && (q3 + scale) == p34 && 1882 (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high 1883 res.w[0] == 0x38c15b0a00000000ull && // 10^33_low 1884 z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) || 1885 (z_sign && rnd_mode != ROUNDING_DOWN)))) { 1886 *pfpsf |= UNDERFLOW_EXCEPTION; 1887 } 1888 if (rnd_mode != ROUNDING_TO_NEAREST) { 1889 rounding_correction (rnd_mode, 1890 is_inexact_lt_midpoint, 1891 is_inexact_gt_midpoint, 1892 is_midpoint_lt_even, is_midpoint_gt_even, 1893 e3, &res, pfpsf); 1894 } 1895 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1896 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1897 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1898 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1899 BID_SWAP128 (res); 1900 BID_RETURN (res) 1901 1902 } else if (p34 == delta) { // Case (1''B) 1903 1904 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3 1905 // and C3 can be scaled up to p34 digits if needed 1906 1907 // scale C3 to p34 digits if needed 1908 scale = p34 - q3; // 0 <= scale <= p34 - 1 1909 if (scale == 0) { 1910 res.w[1] = C3.w[1]; 1911 res.w[0] = C3.w[0]; 1912 } else if (q3 <= 19) { // z fits in 64 bits 1913 if (scale <= 19) { // 10^scale fits in 64 bits 1914 // 64 x 64 C3.w[0] * ten2k64[scale] 1915 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 1916 } else { // 10^scale fits in 128 bits 1917 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 1918 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 1919 } 1920 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1921 // 64 x 128 ten2k64[scale] * C3 1922 __mul_128x64_to_128 (res, ten2k64[scale], C3); 1923 } 1924 // subtract scale from the exponent 1925 z_exp = z_exp - ((UINT64) scale << 49); 1926 e3 = e3 - scale; 1927 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits 1928 1929 // determine whether x * y is less than, equal to, or greater than 1930 // 1/2 ulp (z) 1931 if (q4 <= 19) { 1932 if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp 1933 lt_half_ulp = 1; 1934 } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp 1935 eq_half_ulp = 1; 1936 } else { // > 1/2 ulp 1937 gt_half_ulp = 1; 1938 } 1939 } else if (q4 <= 38) { 1940 if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] || 1941 (C4.w[1] == midpoint128[q4 - 20].w[1] && 1942 C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp 1943 lt_half_ulp = 1; 1944 } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] && 1945 C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp 1946 eq_half_ulp = 1; 1947 } else { // > 1/2 ulp 1948 gt_half_ulp = 1; 1949 } 1950 } else if (q4 <= 58) { 1951 if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] || 1952 (C4.w[2] == midpoint192[q4 - 39].w[2] && 1953 C4.w[1] < midpoint192[q4 - 39].w[1]) || 1954 (C4.w[2] == midpoint192[q4 - 39].w[2] && 1955 C4.w[1] == midpoint192[q4 - 39].w[1] && 1956 C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp 1957 lt_half_ulp = 1; 1958 } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] && 1959 C4.w[1] == midpoint192[q4 - 39].w[1] && 1960 C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp 1961 eq_half_ulp = 1; 1962 } else { // > 1/2 ulp 1963 gt_half_ulp = 1; 1964 } 1965 } else { 1966 if (C4.w[3] < midpoint256[q4 - 59].w[3] || 1967 (C4.w[3] == midpoint256[q4 - 59].w[3] && 1968 C4.w[2] < midpoint256[q4 - 59].w[2]) || 1969 (C4.w[3] == midpoint256[q4 - 59].w[3] && 1970 C4.w[2] == midpoint256[q4 - 59].w[2] && 1971 C4.w[1] < midpoint256[q4 - 59].w[1]) || 1972 (C4.w[3] == midpoint256[q4 - 59].w[3] && 1973 C4.w[2] == midpoint256[q4 - 59].w[2] && 1974 C4.w[1] == midpoint256[q4 - 59].w[1] && 1975 C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp 1976 lt_half_ulp = 1; 1977 } else if (C4.w[3] == midpoint256[q4 - 59].w[3] && 1978 C4.w[2] == midpoint256[q4 - 59].w[2] && 1979 C4.w[1] == midpoint256[q4 - 59].w[1] && 1980 C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp 1981 eq_half_ulp = 1; 1982 } else { // > 1/2 ulp 1983 gt_half_ulp = 1; 1984 } 1985 } 1986 1987 if (p_sign == z_sign) { 1988 if (lt_half_ulp) { 1989 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1990 // use the following to avoid double rounding errors when operating on 1991 // mixed formats in rounding to nearest 1992 is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value 1993 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { 1994 // add 1 ulp to the significand 1995 res.w[0]++; 1996 if (res.w[0] == 0x0ull) 1997 res.w[1]++; 1998 // check for rounding overflow, when coeff == 10^34 1999 if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull && 2000 res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34 2001 e3 = e3 + 1; 2002 // coeff = 10^33 2003 z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP; 2004 res.w[1] = 0x0000314dc6448d93ull; 2005 res.w[0] = 0x38c15b0a00000000ull; 2006 } 2007 // end add 1 ulp 2008 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2009 if (eq_half_ulp) { 2010 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2011 } else { 2012 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value 2013 } 2014 } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) 2015 // leave unchanged 2016 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2017 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value 2018 } 2019 // the result is always inexact, and never tiny 2020 // set the inexact flag 2021 *pfpsf |= INEXACT_EXCEPTION; 2022 // check for overflow 2023 if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) { 2024 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2025 res.w[0] = 0x0000000000000000ull; 2026 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2027 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2028 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2029 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2030 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2031 BID_SWAP128 (res); 2032 BID_RETURN (res) 2033 } 2034 if (rnd_mode != ROUNDING_TO_NEAREST) { 2035 rounding_correction (rnd_mode, 2036 is_inexact_lt_midpoint, 2037 is_inexact_gt_midpoint, 2038 is_midpoint_lt_even, is_midpoint_gt_even, 2039 e3, &res, pfpsf); 2040 z_exp = res.w[1] & MASK_EXP; 2041 } 2042 } else { // if (p_sign != z_sign) 2043 // consider two cases, because C3 * 10^scale = 10^33 is a special case 2044 if (res.w[1] != 0x0000314dc6448d93ull || 2045 res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 2046 if (lt_half_ulp) { 2047 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2048 // use the following to avoid double rounding errors when operating 2049 // on mixed formats in rounding to nearest 2050 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value 2051 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { 2052 // subtract 1 ulp from the significand 2053 res.w[0]--; 2054 if (res.w[0] == 0xffffffffffffffffull) 2055 res.w[1]--; 2056 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2057 if (eq_half_ulp) { 2058 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value 2059 } else { 2060 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value 2061 } 2062 } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) 2063 // leave unchanged 2064 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2065 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2066 } 2067 // the result is always inexact, and never tiny 2068 // check for overflow for RN 2069 if (e3 > expmax) { 2070 if (rnd_mode == ROUNDING_TO_NEAREST) { 2071 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2072 res.w[0] = 0x0000000000000000ull; 2073 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2074 } else { 2075 rounding_correction (rnd_mode, 2076 is_inexact_lt_midpoint, 2077 is_inexact_gt_midpoint, 2078 is_midpoint_lt_even, 2079 is_midpoint_gt_even, e3, &res, 2080 pfpsf); 2081 } 2082 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2083 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2084 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2085 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2086 BID_SWAP128 (res); 2087 BID_RETURN (res) 2088 } 2089 // set the inexact flag 2090 *pfpsf |= INEXACT_EXCEPTION; 2091 if (rnd_mode != ROUNDING_TO_NEAREST) { 2092 rounding_correction (rnd_mode, 2093 is_inexact_lt_midpoint, 2094 is_inexact_gt_midpoint, 2095 is_midpoint_lt_even, 2096 is_midpoint_gt_even, e3, &res, pfpsf); 2097 } 2098 z_exp = res.w[1] & MASK_EXP; 2099 } else { // if C3 * 10^scale = 10^33 2100 e3 = (z_exp >> 49) - 6176; 2101 if (e3 > expmin) { 2102 // the result is exact if exp > expmin and C4 = d*10^(q4-1), 2103 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact 2104 if (q4 == 1) { 2105 // if q4 = 1 the result is exact 2106 // result coefficient = 10^34 - C4 2107 res.w[1] = 0x0001ed09bead87c0ull; 2108 res.w[0] = 0x378d8e6400000000ull - C4.w[0]; 2109 z_exp = z_exp - EXP_P1; 2110 e3 = e3 - 1; 2111 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2112 } else { 2113 // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 2114 // x = q4-1, 1 <= x <= 67 and check if this operation is exact 2115 if (q4 <= 18) { // 2 <= q4 <= 18 2116 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, 2117 &is_midpoint_lt_even, 2118 &is_midpoint_gt_even, 2119 &is_inexact_lt_midpoint, 2120 &is_inexact_gt_midpoint); 2121 } else if (q4 <= 38) { 2122 P128.w[1] = C4.w[1]; 2123 P128.w[0] = C4.w[0]; 2124 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, 2125 &is_midpoint_lt_even, 2126 &is_midpoint_gt_even, 2127 &is_inexact_lt_midpoint, 2128 &is_inexact_gt_midpoint); 2129 R64 = R128.w[0]; // one decimal digit 2130 } else if (q4 <= 57) { 2131 P192.w[2] = C4.w[2]; 2132 P192.w[1] = C4.w[1]; 2133 P192.w[0] = C4.w[0]; 2134 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, 2135 &is_midpoint_lt_even, 2136 &is_midpoint_gt_even, 2137 &is_inexact_lt_midpoint, 2138 &is_inexact_gt_midpoint); 2139 R64 = R192.w[0]; // one decimal digit 2140 } else { // if (q4 <= 68) 2141 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, 2142 &is_midpoint_lt_even, 2143 &is_midpoint_gt_even, 2144 &is_inexact_lt_midpoint, 2145 &is_inexact_gt_midpoint); 2146 R64 = R256.w[0]; // one decimal digit 2147 } 2148 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2149 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2150 // the result is exact: 10^34 - R64 2151 // incr_exp = 0 with certainty 2152 z_exp = z_exp - EXP_P1; 2153 e3 = e3 - 1; 2154 res.w[1] = 2155 z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; 2156 res.w[0] = 0x378d8e6400000000ull - R64; 2157 } else { 2158 // We want R64 to be the top digit of C4, but we actually 2159 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, 2160 // because the top digit is (C4 * 10^(-q4+1))RZ 2161 // however, if incr_exp = 1 then R64 = 10 with certainty 2162 if (incr_exp) { 2163 R64 = 10; 2164 } 2165 // the result is inexact as C4 has more than 1 significant digit 2166 // and C3 * 10^scale = 10^33 2167 // example of case that is treated here: 2168 // 100...0 * 10^e3 - 0.41 * 10^e3 = 2169 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) 2170 // note that (e3 > expmin} 2171 // in order to round, subtract R64 from 10^34 and then compare 2172 // C4 - R64 * 10^(q4-1) with 1/2 ulp 2173 // calculate 10^34 - R64 2174 res.w[1] = 0x0001ed09bead87c0ull; 2175 res.w[0] = 0x378d8e6400000000ull - R64; 2176 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand 2177 // calculate C4 - R64 * 10^(q4-1); this is a rare case and 2178 // R64 is small, 1 <= R64 <= 9 2179 e3 = e3 - 1; 2180 if (is_inexact_lt_midpoint) { 2181 is_inexact_lt_midpoint = 0; 2182 is_inexact_gt_midpoint = 1; 2183 } else if (is_inexact_gt_midpoint) { 2184 is_inexact_gt_midpoint = 0; 2185 is_inexact_lt_midpoint = 1; 2186 } else if (is_midpoint_lt_even) { 2187 is_midpoint_lt_even = 0; 2188 is_midpoint_gt_even = 1; 2189 } else if (is_midpoint_gt_even) { 2190 is_midpoint_gt_even = 0; 2191 is_midpoint_lt_even = 1; 2192 } else { 2193 ; 2194 } 2195 // the result is always inexact, and never tiny 2196 // check for overflow for RN 2197 if (e3 > expmax) { 2198 if (rnd_mode == ROUNDING_TO_NEAREST) { 2199 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2200 res.w[0] = 0x0000000000000000ull; 2201 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2202 } else { 2203 rounding_correction (rnd_mode, 2204 is_inexact_lt_midpoint, 2205 is_inexact_gt_midpoint, 2206 is_midpoint_lt_even, 2207 is_midpoint_gt_even, e3, &res, 2208 pfpsf); 2209 } 2210 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2211 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2212 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2213 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2214 BID_SWAP128 (res); 2215 BID_RETURN (res) 2216 } 2217 // set the inexact flag 2218 *pfpsf |= INEXACT_EXCEPTION; 2219 res.w[1] = 2220 z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; 2221 if (rnd_mode != ROUNDING_TO_NEAREST) { 2222 rounding_correction (rnd_mode, 2223 is_inexact_lt_midpoint, 2224 is_inexact_gt_midpoint, 2225 is_midpoint_lt_even, 2226 is_midpoint_gt_even, e3, &res, 2227 pfpsf); 2228 } 2229 z_exp = res.w[1] & MASK_EXP; 2230 } // end result is inexact 2231 } // end q4 > 1 2232 } else { // if (e3 = emin) 2233 // if e3 = expmin the result is also tiny (the condition for 2234 // tininess is C4 > 050...0 [q4 digits] which is met because 2235 // the msd of C4 is not zero) 2236 // the result is tiny and inexact in all rounding modes; 2237 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, 2238 // gt_half_ulp to calculate) 2239 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged 2240 2241 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp 2242 if (gt_half_ulp) { // res = 10^33 - 1 2243 res.w[1] = 0x0000314dc6448d93ull; 2244 res.w[0] = 0x38c15b09ffffffffull; 2245 } else { 2246 res.w[1] = 0x0000314dc6448d93ull; 2247 res.w[0] = 0x38c15b0a00000000ull; 2248 } 2249 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2250 *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later 2251 2252 if (eq_half_ulp) { 2253 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2254 } else if (lt_half_ulp) { 2255 is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value 2256 } else { // if (gt_half_ulp) 2257 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value 2258 } 2259 2260 if (rnd_mode != ROUNDING_TO_NEAREST) { 2261 rounding_correction (rnd_mode, 2262 is_inexact_lt_midpoint, 2263 is_inexact_gt_midpoint, 2264 is_midpoint_lt_even, 2265 is_midpoint_gt_even, e3, &res, 2266 pfpsf); 2267 z_exp = res.w[1] & MASK_EXP; 2268 } 2269 } // end e3 = emin 2270 // set the inexact flag (if the result was not exact) 2271 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 2272 is_midpoint_lt_even || is_midpoint_gt_even) 2273 *pfpsf |= INEXACT_EXCEPTION; 2274 } // end 10^33 2275 } // end if (p_sign != z_sign) 2276 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2277 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2278 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2279 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2280 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2281 BID_SWAP128 (res); 2282 BID_RETURN (res) 2283 2284 } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) 2285 (q3 <= delta && delta + q4 <= p34) || // Case (3) 2286 (delta < q3 && p34 < delta + q4) || // Case (4) 2287 (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5) 2288 (delta + q4 < q3)) && // Case (6) 2289 !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6) 2290 2291 // the result has the sign of z 2292 2293 if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) 2294 (delta < q3 && p34 < delta + q4)) { // Case (4) 2295 // round first the sum x * y + z with unbounded exponent 2296 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, 2297 // 1 <= scale <= 33 2298 // calculate res = C3 * 10^scale 2299 scale = p34 - q3; 2300 x0 = delta + q4 - p34; 2301 } else if (delta + q4 < q3) { // Case (6) 2302 // make Case (6) look like Case (3) or Case (5) with scale = 0 2303 // by scaling up C4 by 10^(q3 - delta - q4) 2304 scale = q3 - delta - q4; // 1 <= scale <= 33 2305 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits 2306 if (scale <= 19) { // 10^scale fits in 64 bits 2307 // 64 x 64 C4.w[0] * ten2k64[scale] 2308 __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]); 2309 } else { // 10^scale fits in 128 bits 2310 // 64 x 128 C4.w[0] * ten2k128[scale - 20] 2311 __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]); 2312 } 2313 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits 2314 // 64 x 128 ten2k64[scale] * C4 2315 __mul_128x64_to_128 (P128, ten2k64[scale], C4); 2316 } 2317 C4.w[0] = P128.w[0]; 2318 C4.w[1] = P128.w[1]; 2319 // e4 does not need adjustment, as it is not used from this point on 2320 scale = 0; 2321 x0 = 0; 2322 // now Case (6) looks like Case (3) or Case (5) with scale = 0 2323 } else { // if Case (3) or Case (5) 2324 // Note: Case (3) is similar to Case (2), but scale differs and the 2325 // result is exact, unless it is tiny (so x0 = 0 when calculating the 2326 // result with unbounded exponent) 2327 2328 // calculate first the sum x * y + z with unbounded exponent (exact) 2329 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1, 2330 // 1 <= scale <= 33 2331 // calculate res = C3 * 10^scale 2332 scale = delta + q4 - q3; 2333 x0 = 0; 2334 // Note: the comments which follow refer [mainly] to Case (2)] 2335 } 2336 2337 case2_repeat: 2338 if (scale == 0) { // this could happen e.g. if we return to case2_repeat 2339 // or in Case (4) 2340 res.w[1] = C3.w[1]; 2341 res.w[0] = C3.w[0]; 2342 } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits 2343 if (scale <= 19) { // 10^scale fits in 64 bits 2344 // 64 x 64 C3.w[0] * ten2k64[scale] 2345 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); 2346 } else { // 10^scale fits in 128 bits 2347 // 64 x 128 C3.w[0] * ten2k128[scale - 20] 2348 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); 2349 } 2350 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 2351 // 64 x 128 ten2k64[scale] * C3 2352 __mul_128x64_to_128 (res, ten2k64[scale], C3); 2353 } 2354 // e3 is already calculated 2355 e3 = e3 - scale; 2356 // now res = C3 * 10^scale and e3 = e3 - scale 2357 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat 2358 // because the result was too small 2359 2360 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34, 2361 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67) 2362 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of 2363 // the rounding fits in 128 bits!) 2364 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat) 2365 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34 2366 if (x0 == 0) { // this could happen only if we return to case2_repeat, or 2367 // for Case (3) or Case (6) 2368 R128.w[1] = C4.w[1]; 2369 R128.w[0] = C4.w[0]; 2370 } else if (q4 <= 18) { 2371 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17 2372 round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, 2373 &is_midpoint_lt_even, &is_midpoint_gt_even, 2374 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 2375 if (incr_exp) { 2376 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 2377 R64 = ten2k64[q4 - x0]; 2378 } 2379 R128.w[1] = 0; 2380 R128.w[0] = R64; 2381 } else if (q4 <= 38) { 2382 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37 2383 P128.w[1] = C4.w[1]; 2384 P128.w[0] = C4.w[0]; 2385 round128_19_38 (q4, x0, P128, &R128, &incr_exp, 2386 &is_midpoint_lt_even, &is_midpoint_gt_even, 2387 &is_inexact_lt_midpoint, 2388 &is_inexact_gt_midpoint); 2389 if (incr_exp) { 2390 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 2391 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2392 R128.w[0] = ten2k64[q4 - x0]; 2393 // R128.w[1] stays 0 2394 } else { // 20 <= q4 - x0 <= 37 2395 R128.w[0] = ten2k128[q4 - x0 - 20].w[0]; 2396 R128.w[1] = ten2k128[q4 - x0 - 20].w[1]; 2397 } 2398 } 2399 } else if (q4 <= 57) { 2400 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56 2401 P192.w[2] = C4.w[2]; 2402 P192.w[1] = C4.w[1]; 2403 P192.w[0] = C4.w[0]; 2404 round192_39_57 (q4, x0, P192, &R192, &incr_exp, 2405 &is_midpoint_lt_even, &is_midpoint_gt_even, 2406 &is_inexact_lt_midpoint, 2407 &is_inexact_gt_midpoint); 2408 // R192.w[2] is always 0 2409 if (incr_exp) { 2410 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52 2411 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2412 R192.w[0] = ten2k64[q4 - x0]; 2413 // R192.w[1] stays 0 2414 // R192.w[2] stays 0 2415 } else { // 20 <= q4 - x0 <= 33 2416 R192.w[0] = ten2k128[q4 - x0 - 20].w[0]; 2417 R192.w[1] = ten2k128[q4 - x0 - 20].w[1]; 2418 // R192.w[2] stays 0 2419 } 2420 } 2421 R128.w[1] = R192.w[1]; 2422 R128.w[0] = R192.w[0]; 2423 } else { 2424 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67 2425 round256_58_76 (q4, x0, C4, &R256, &incr_exp, 2426 &is_midpoint_lt_even, &is_midpoint_gt_even, 2427 &is_inexact_lt_midpoint, 2428 &is_inexact_gt_midpoint); 2429 // R256.w[3] and R256.w[2] are always 0 2430 if (incr_exp) { 2431 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43 2432 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2433 R256.w[0] = ten2k64[q4 - x0]; 2434 // R256.w[1] stays 0 2435 // R256.w[2] stays 0 2436 // R256.w[3] stays 0 2437 } else { // 20 <= q4 - x0 <= 33 2438 R256.w[0] = ten2k128[q4 - x0 - 20].w[0]; 2439 R256.w[1] = ten2k128[q4 - x0 - 20].w[1]; 2440 // R256.w[2] stays 0 2441 // R256.w[3] stays 0 2442 } 2443 } 2444 R128.w[1] = R256.w[1]; 2445 R128.w[0] = R256.w[0]; 2446 } 2447 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4, 2448 // rounded to nearest, which were copied into R128 2449 if (z_sign == p_sign) { 2450 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale 2451 // the sum can result in [up to] p34 or p34 + 1 digits 2452 res.w[0] = res.w[0] + R128.w[0]; 2453 res.w[1] = res.w[1] + R128.w[1]; 2454 if (res.w[0] < R128.w[0]) 2455 res.w[1]++; // carry 2456 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1 2457 if (res.w[1] > 0x0001ed09bead87c0ull || 2458 (res.w[1] == 0x0001ed09bead87c0ull && 2459 res.w[0] > 0x378d8e63ffffffffull)) { 2460 // avoid double rounding error 2461 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 2462 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 2463 is_midpoint_lt_even0 = is_midpoint_lt_even; 2464 is_midpoint_gt_even0 = is_midpoint_gt_even; 2465 is_inexact_lt_midpoint = 0; 2466 is_inexact_gt_midpoint = 0; 2467 is_midpoint_lt_even = 0; 2468 is_midpoint_gt_even = 0; 2469 P128.w[1] = res.w[1]; 2470 P128.w[0] = res.w[0]; 2471 round128_19_38 (35, 1, P128, &res, &incr_exp, 2472 &is_midpoint_lt_even, &is_midpoint_gt_even, 2473 &is_inexact_lt_midpoint, 2474 &is_inexact_gt_midpoint); 2475 // incr_exp is 0 with certainty in this case 2476 // avoid a double rounding error 2477 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 2478 is_midpoint_lt_even) { // double rounding error upward 2479 // res = res - 1 2480 res.w[0]--; 2481 if (res.w[0] == 0xffffffffffffffffull) 2482 res.w[1]--; 2483 // Note: a double rounding error upward is not possible; for this 2484 // the result after the first rounding would have to be 99...95 2485 // (35 digits in all), possibly followed by a number of zeros; this 2486 // not possible in Cases (2)-(6) or (15)-(17) which may get here 2487 is_midpoint_lt_even = 0; 2488 is_inexact_lt_midpoint = 1; 2489 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 2490 is_midpoint_gt_even) { // double rounding error downward 2491 // res = res + 1 2492 res.w[0]++; 2493 if (res.w[0] == 0) 2494 res.w[1]++; 2495 is_midpoint_gt_even = 0; 2496 is_inexact_gt_midpoint = 1; 2497 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2498 !is_inexact_lt_midpoint 2499 && !is_inexact_gt_midpoint) { 2500 // if this second rounding was exact the result may still be 2501 // inexact because of the first rounding 2502 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 2503 is_inexact_gt_midpoint = 1; 2504 } 2505 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 2506 is_inexact_lt_midpoint = 1; 2507 } 2508 } else if (is_midpoint_gt_even && 2509 (is_inexact_gt_midpoint0 2510 || is_midpoint_lt_even0)) { 2511 // pulled up to a midpoint 2512 is_inexact_lt_midpoint = 1; 2513 is_inexact_gt_midpoint = 0; 2514 is_midpoint_lt_even = 0; 2515 is_midpoint_gt_even = 0; 2516 } else if (is_midpoint_lt_even && 2517 (is_inexact_lt_midpoint0 2518 || is_midpoint_gt_even0)) { 2519 // pulled down to a midpoint 2520 is_inexact_lt_midpoint = 0; 2521 is_inexact_gt_midpoint = 1; 2522 is_midpoint_lt_even = 0; 2523 is_midpoint_gt_even = 0; 2524 } else { 2525 ; 2526 } 2527 // adjust exponent 2528 e3 = e3 + 1; 2529 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2530 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2531 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || 2532 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { 2533 is_inexact_lt_midpoint = 1; 2534 } 2535 } 2536 } else { 2537 // this is the result rounded with unbounded exponent, unless a 2538 // correction is needed 2539 res.w[1] = res.w[1] & MASK_COEFF; 2540 if (lsb == 1) { 2541 if (is_midpoint_gt_even) { 2542 // res = res + 1 2543 is_midpoint_gt_even = 0; 2544 is_midpoint_lt_even = 1; 2545 res.w[0]++; 2546 if (res.w[0] == 0x0) 2547 res.w[1]++; 2548 // check for rounding overflow 2549 if (res.w[1] == 0x0001ed09bead87c0ull && 2550 res.w[0] == 0x378d8e6400000000ull) { 2551 // res = 10^34 => rounding overflow 2552 res.w[1] = 0x0000314dc6448d93ull; 2553 res.w[0] = 0x38c15b0a00000000ull; // 10^33 2554 e3++; 2555 } 2556 } else if (is_midpoint_lt_even) { 2557 // res = res - 1 2558 is_midpoint_lt_even = 0; 2559 is_midpoint_gt_even = 1; 2560 res.w[0]--; 2561 if (res.w[0] == 0xffffffffffffffffull) 2562 res.w[1]--; 2563 // if the result is pure zero, the sign depends on the rounding 2564 // mode (x*y and z had opposite signs) 2565 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { 2566 if (rnd_mode != ROUNDING_DOWN) 2567 z_sign = 0x0000000000000000ull; 2568 else 2569 z_sign = 0x8000000000000000ull; 2570 // the exponent is max (e3, expmin) 2571 res.w[1] = 0x0; 2572 res.w[0] = 0x0; 2573 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2574 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2575 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2576 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2577 BID_SWAP128 (res); 2578 BID_RETURN (res) 2579 } 2580 } else { 2581 ; 2582 } 2583 } 2584 } 2585 } else { // if (z_sign != p_sign) 2586 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4 2587 // used to swap rounding indicators if p_sign != z_sign 2588 // the sum can result in [up to] p34 or p34 - 1 digits 2589 tmp64 = res.w[0]; 2590 res.w[0] = res.w[0] - R128.w[0]; 2591 res.w[1] = res.w[1] - R128.w[1]; 2592 if (res.w[0] > tmp64) 2593 res.w[1]--; // borrow 2594 // if res < 10^33 and exp > expmin need to decrease x0 and 2595 // increase scale by 1 2596 if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || 2597 (res.w[1] == 0x0000314dc6448d93ull && 2598 res.w[0] < 0x38c15b0a00000000ull)) || 2599 (is_inexact_lt_midpoint 2600 && res.w[1] == 0x0000314dc6448d93ull 2601 && res.w[0] == 0x38c15b0a00000000ull)) 2602 && x0 >= 1) { 2603 x0 = x0 - 1; 2604 // first restore e3, otherwise it will be too small 2605 e3 = e3 + scale; 2606 scale = scale + 1; 2607 is_inexact_lt_midpoint = 0; 2608 is_inexact_gt_midpoint = 0; 2609 is_midpoint_lt_even = 0; 2610 is_midpoint_gt_even = 0; 2611 incr_exp = 0; 2612 goto case2_repeat; 2613 } 2614 // else this is the result rounded with unbounded exponent; 2615 // because the result has opposite sign to that of C4 which was 2616 // rounded, need to change the rounding indicators 2617 if (is_inexact_lt_midpoint) { 2618 is_inexact_lt_midpoint = 0; 2619 is_inexact_gt_midpoint = 1; 2620 } else if (is_inexact_gt_midpoint) { 2621 is_inexact_gt_midpoint = 0; 2622 is_inexact_lt_midpoint = 1; 2623 } else if (lsb == 0) { 2624 if (is_midpoint_lt_even) { 2625 is_midpoint_lt_even = 0; 2626 is_midpoint_gt_even = 1; 2627 } else if (is_midpoint_gt_even) { 2628 is_midpoint_gt_even = 0; 2629 is_midpoint_lt_even = 1; 2630 } else { 2631 ; 2632 } 2633 } else if (lsb == 1) { 2634 if (is_midpoint_lt_even) { 2635 // res = res + 1 2636 res.w[0]++; 2637 if (res.w[0] == 0x0) 2638 res.w[1]++; 2639 // check for rounding overflow 2640 if (res.w[1] == 0x0001ed09bead87c0ull && 2641 res.w[0] == 0x378d8e6400000000ull) { 2642 // res = 10^34 => rounding overflow 2643 res.w[1] = 0x0000314dc6448d93ull; 2644 res.w[0] = 0x38c15b0a00000000ull; // 10^33 2645 e3++; 2646 } 2647 } else if (is_midpoint_gt_even) { 2648 // res = res - 1 2649 res.w[0]--; 2650 if (res.w[0] == 0xffffffffffffffffull) 2651 res.w[1]--; 2652 // if the result is pure zero, the sign depends on the rounding 2653 // mode (x*y and z had opposite signs) 2654 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { 2655 if (rnd_mode != ROUNDING_DOWN) 2656 z_sign = 0x0000000000000000ull; 2657 else 2658 z_sign = 0x8000000000000000ull; 2659 // the exponent is max (e3, expmin) 2660 res.w[1] = 0x0; 2661 res.w[0] = 0x0; 2662 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2663 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2664 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2665 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2666 BID_SWAP128 (res); 2667 BID_RETURN (res) 2668 } 2669 } else { 2670 ; 2671 } 2672 } else { 2673 ; 2674 } 2675 } 2676 // check for underflow 2677 if (e3 == expmin) { // and if significand < 10^33 => result is tiny 2678 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || 2679 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && 2680 res.w[0] < 0x38c15b0a00000000ull)) { 2681 is_tiny = 1; 2682 } 2683 } else if (e3 < expmin) { 2684 // the result is tiny, so we must truncate more of res 2685 is_tiny = 1; 2686 x0 = expmin - e3; 2687 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 2688 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 2689 is_midpoint_lt_even0 = is_midpoint_lt_even; 2690 is_midpoint_gt_even0 = is_midpoint_gt_even; 2691 is_inexact_lt_midpoint = 0; 2692 is_inexact_gt_midpoint = 0; 2693 is_midpoint_lt_even = 0; 2694 is_midpoint_gt_even = 0; 2695 // determine the number of decimal digits in res 2696 if (res.w[1] == 0x0) { 2697 // between 1 and 19 digits 2698 for (ind = 1; ind <= 19; ind++) { 2699 if (res.w[0] < ten2k64[ind]) { 2700 break; 2701 } 2702 } 2703 // ind digits 2704 } else if (res.w[1] < ten2k128[0].w[1] || 2705 (res.w[1] == ten2k128[0].w[1] 2706 && res.w[0] < ten2k128[0].w[0])) { 2707 // 20 digits 2708 ind = 20; 2709 } else { // between 21 and 38 digits 2710 for (ind = 1; ind <= 18; ind++) { 2711 if (res.w[1] < ten2k128[ind].w[1] || 2712 (res.w[1] == ten2k128[ind].w[1] && 2713 res.w[0] < ten2k128[ind].w[0])) { 2714 break; 2715 } 2716 } 2717 // ind + 20 digits 2718 ind = ind + 20; 2719 } 2720 2721 // at this point ind >= x0; because delta >= 2 on this path, the case 2722 // ind = x0 can occur only in Case (2) or case (3), when C3 has one 2723 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), 2724 // the signs of x * y and z are opposite, and through cancellation 2725 // the most significant decimal digit in res has the weight 2726 // 10^(emin-1); however, it is clear that in this case the most 2727 // significant digit is 9, so the result before rounding is 2728 // 0.9... * 10^emin 2729 // Otherwise, ind > x0 because there are non-zero decimal digits in the 2730 // result with weight of at least 10^emin, and correction for underflow 2731 // can be carried out using the round*_*_2_* () routines 2732 if (x0 == ind) { // the result before rounding is 0.9... * 10^emin 2733 res.w[1] = 0x0; 2734 res.w[0] = 0x1; 2735 is_inexact_gt_midpoint = 1; 2736 } else if (ind <= 18) { // check that 2 <= ind 2737 // 2 <= ind <= 18, 1 <= x0 <= 17 2738 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 2739 &is_midpoint_lt_even, &is_midpoint_gt_even, 2740 &is_inexact_lt_midpoint, 2741 &is_inexact_gt_midpoint); 2742 if (incr_exp) { 2743 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17 2744 R64 = ten2k64[ind - x0]; 2745 } 2746 res.w[1] = 0; 2747 res.w[0] = R64; 2748 } else if (ind <= 38) { 2749 // 19 <= ind <= 38 2750 P128.w[1] = res.w[1]; 2751 P128.w[0] = res.w[0]; 2752 round128_19_38 (ind, x0, P128, &res, &incr_exp, 2753 &is_midpoint_lt_even, &is_midpoint_gt_even, 2754 &is_inexact_lt_midpoint, 2755 &is_inexact_gt_midpoint); 2756 if (incr_exp) { 2757 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37 2758 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19 2759 res.w[0] = ten2k64[ind - x0]; 2760 // res.w[1] stays 0 2761 } else { // 20 <= ind - x0 <= 37 2762 res.w[0] = ten2k128[ind - x0 - 20].w[0]; 2763 res.w[1] = ten2k128[ind - x0 - 20].w[1]; 2764 } 2765 } 2766 } 2767 // avoid a double rounding error 2768 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 2769 is_midpoint_lt_even) { // double rounding error upward 2770 // res = res - 1 2771 res.w[0]--; 2772 if (res.w[0] == 0xffffffffffffffffull) 2773 res.w[1]--; 2774 // Note: a double rounding error upward is not possible; for this 2775 // the result after the first rounding would have to be 99...95 2776 // (35 digits in all), possibly followed by a number of zeros; this 2777 // not possible in Cases (2)-(6) which may get here 2778 is_midpoint_lt_even = 0; 2779 is_inexact_lt_midpoint = 1; 2780 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 2781 is_midpoint_gt_even) { // double rounding error downward 2782 // res = res + 1 2783 res.w[0]++; 2784 if (res.w[0] == 0) 2785 res.w[1]++; 2786 is_midpoint_gt_even = 0; 2787 is_inexact_gt_midpoint = 1; 2788 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2789 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2790 // if this second rounding was exact the result may still be 2791 // inexact because of the first rounding 2792 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 2793 is_inexact_gt_midpoint = 1; 2794 } 2795 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 2796 is_inexact_lt_midpoint = 1; 2797 } 2798 } else if (is_midpoint_gt_even && 2799 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 2800 // pulled up to a midpoint 2801 is_inexact_lt_midpoint = 1; 2802 is_inexact_gt_midpoint = 0; 2803 is_midpoint_lt_even = 0; 2804 is_midpoint_gt_even = 0; 2805 } else if (is_midpoint_lt_even && 2806 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 2807 // pulled down to a midpoint 2808 is_inexact_lt_midpoint = 0; 2809 is_inexact_gt_midpoint = 1; 2810 is_midpoint_lt_even = 0; 2811 is_midpoint_gt_even = 0; 2812 } else { 2813 ; 2814 } 2815 // adjust exponent 2816 e3 = e3 + x0; 2817 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2818 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2819 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || 2820 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { 2821 is_inexact_lt_midpoint = 1; 2822 } 2823 } 2824 } else { 2825 ; // not underflow 2826 } 2827 // check for inexact result 2828 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 2829 is_midpoint_lt_even || is_midpoint_gt_even) { 2830 // set the inexact flag 2831 *pfpsf |= INEXACT_EXCEPTION; 2832 if (is_tiny) 2833 *pfpsf |= UNDERFLOW_EXCEPTION; 2834 } 2835 // now check for significand = 10^34 (may have resulted from going 2836 // back to case2_repeat) 2837 if (res.w[1] == 0x0001ed09bead87c0ull && 2838 res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34 2839 res.w[1] = 0x0000314dc6448d93ull; // res = 10^33 2840 res.w[0] = 0x38c15b0a00000000ull; 2841 e3 = e3 + 1; 2842 } 2843 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; 2844 // check for overflow 2845 if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) { 2846 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2847 res.w[0] = 0x0000000000000000ull; 2848 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 2849 } 2850 if (rnd_mode != ROUNDING_TO_NEAREST) { 2851 rounding_correction (rnd_mode, 2852 is_inexact_lt_midpoint, 2853 is_inexact_gt_midpoint, 2854 is_midpoint_lt_even, is_midpoint_gt_even, 2855 e3, &res, pfpsf); 2856 } 2857 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2858 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2859 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2860 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2861 BID_SWAP128 (res); 2862 BID_RETURN (res) 2863 2864 } else { 2865 2866 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and 2867 // the signs of x*y and z are opposite; in these cases massive 2868 // cancellation can occur, so it is better to scale either C3 or C4 and 2869 // to perform the subtraction before rounding; rounding is performed 2870 // next, depending on the number of decimal digits in the result and on 2871 // the exponent value 2872 // Note: overlow is not possible in this case 2873 // this is similar to Cases (15), (16), and (17) 2874 2875 if (delta + q4 < q3) { // from Case (6) 2876 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and 2877 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign) 2878 // and call add_and_round; delta stays positive 2879 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 2880 P128.w[1] = C3.w[1]; 2881 P128.w[0] = C3.w[0]; 2882 C3.w[1] = C4.w[1]; 2883 C3.w[0] = C4.w[0]; 2884 C4.w[1] = P128.w[1]; 2885 C4.w[0] = P128.w[0]; 2886 ind = q3; 2887 q3 = q4; 2888 q4 = ind; 2889 ind = e3; 2890 e3 = e4; 2891 e4 = ind; 2892 tmp_sign = z_sign; 2893 z_sign = p_sign; 2894 p_sign = tmp_sign; 2895 } else { // from Cases (2), (3), (4), (5) 2896 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be 2897 // scaled up by q4 + delta - q3; this is the same as in Cases (15), 2898 // (16), and (17) if we just change the sign of delta 2899 delta = -delta; 2900 } 2901 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, 2902 rnd_mode, &is_midpoint_lt_even, 2903 &is_midpoint_gt_even, &is_inexact_lt_midpoint, 2904 &is_inexact_gt_midpoint, pfpsf, &res); 2905 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2906 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2907 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2908 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2909 BID_SWAP128 (res); 2910 BID_RETURN (res) 2911 2912 } 2913 2914 } else { // if delta < 0 2915 2916 delta = -delta; 2917 2918 if (p34 < q4 && q4 <= delta) { // Case (7) 2919 2920 // truncate C4 to p34 digits into res 2921 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 2922 x0 = q4 - p34; 2923 if (q4 <= 38) { 2924 P128.w[1] = C4.w[1]; 2925 P128.w[0] = C4.w[0]; 2926 round128_19_38 (q4, x0, P128, &res, &incr_exp, 2927 &is_midpoint_lt_even, &is_midpoint_gt_even, 2928 &is_inexact_lt_midpoint, 2929 &is_inexact_gt_midpoint); 2930 } else if (q4 <= 57) { // 35 <= q4 <= 57 2931 P192.w[2] = C4.w[2]; 2932 P192.w[1] = C4.w[1]; 2933 P192.w[0] = C4.w[0]; 2934 round192_39_57 (q4, x0, P192, &R192, &incr_exp, 2935 &is_midpoint_lt_even, &is_midpoint_gt_even, 2936 &is_inexact_lt_midpoint, 2937 &is_inexact_gt_midpoint); 2938 res.w[0] = R192.w[0]; 2939 res.w[1] = R192.w[1]; 2940 } else { // if (q4 <= 68) 2941 round256_58_76 (q4, x0, C4, &R256, &incr_exp, 2942 &is_midpoint_lt_even, &is_midpoint_gt_even, 2943 &is_inexact_lt_midpoint, 2944 &is_inexact_gt_midpoint); 2945 res.w[0] = R256.w[0]; 2946 res.w[1] = R256.w[1]; 2947 } 2948 e4 = e4 + x0; 2949 if (incr_exp) { 2950 e4 = e4 + 1; 2951 } 2952 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2953 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2954 // if C4 rounded to p34 digits is exact then the result is inexact, 2955 // in a way that depends on the signs of x * y and z 2956 if (p_sign == z_sign) { 2957 is_inexact_lt_midpoint = 1; 2958 } else { // if (p_sign != z_sign) 2959 if (res.w[1] != 0x0000314dc6448d93ull || 2960 res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33 2961 is_inexact_gt_midpoint = 1; 2962 } else { // res = 10^33 and exact is a special case 2963 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1 2964 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1 2965 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1 2966 // Note: ulp is really ulp/10 (after borrow which propagates to msd) 2967 if (delta > p34 + 1) { // C3 < 1/2 2968 // res = 10^33, unchanged 2969 is_inexact_gt_midpoint = 1; 2970 } else { // if (delta == p34 + 1) 2971 if (q3 <= 19) { 2972 if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp 2973 // res = 10^33, unchanged 2974 is_inexact_gt_midpoint = 1; 2975 } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp 2976 // res = 10^33, unchanged 2977 is_midpoint_lt_even = 1; 2978 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp 2979 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 2980 res.w[0] = 0x378d8e63ffffffffull; 2981 e4 = e4 - 1; 2982 is_inexact_lt_midpoint = 1; 2983 } 2984 } else { // if (20 <= q3 <=34) 2985 if (C3.w[1] < midpoint128[q3 - 20].w[1] || 2986 (C3.w[1] == midpoint128[q3 - 20].w[1] && 2987 C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp 2988 // res = 10^33, unchanged 2989 is_inexact_gt_midpoint = 1; 2990 } else if (C3.w[1] == midpoint128[q3 - 20].w[1] && 2991 C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp 2992 // res = 10^33, unchanged 2993 is_midpoint_lt_even = 1; 2994 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp 2995 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 2996 res.w[0] = 0x378d8e63ffffffffull; 2997 e4 = e4 - 1; 2998 is_inexact_lt_midpoint = 1; 2999 } 3000 } 3001 } 3002 } 3003 } 3004 } else if (is_midpoint_lt_even) { 3005 if (z_sign != p_sign) { 3006 // needs correction: res = res - 1 3007 res.w[0] = res.w[0] - 1; 3008 if (res.w[0] == 0xffffffffffffffffull) 3009 res.w[1]--; 3010 // if it is (10^33-1)*10^e4 then the corect result is 3011 // (10^34-1)*10(e4-1) 3012 if (res.w[1] == 0x0000314dc6448d93ull && 3013 res.w[0] == 0x38c15b09ffffffffull) { 3014 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 3015 res.w[0] = 0x378d8e63ffffffffull; 3016 e4 = e4 - 1; 3017 } 3018 is_midpoint_lt_even = 0; 3019 is_inexact_lt_midpoint = 1; 3020 } else { // if (z_sign == p_sign) 3021 is_midpoint_lt_even = 0; 3022 is_inexact_gt_midpoint = 1; 3023 } 3024 } else if (is_midpoint_gt_even) { 3025 if (z_sign == p_sign) { 3026 // needs correction: res = res + 1 (cannot cross in the next binade) 3027 res.w[0] = res.w[0] + 1; 3028 if (res.w[0] == 0x0000000000000000ull) 3029 res.w[1]++; 3030 is_midpoint_gt_even = 0; 3031 is_inexact_gt_midpoint = 1; 3032 } else { // if (z_sign != p_sign) 3033 is_midpoint_gt_even = 0; 3034 is_inexact_lt_midpoint = 1; 3035 } 3036 } else { 3037 ; // the rounded result is already correct 3038 } 3039 // check for overflow 3040 if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) { 3041 res.w[1] = p_sign | 0x7800000000000000ull; 3042 res.w[0] = 0x0000000000000000ull; 3043 *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION); 3044 } else { // no overflow or not RN 3045 p_exp = ((UINT64) (e4 + 6176) << 49); 3046 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 3047 } 3048 if (rnd_mode != ROUNDING_TO_NEAREST) { 3049 rounding_correction (rnd_mode, 3050 is_inexact_lt_midpoint, 3051 is_inexact_gt_midpoint, 3052 is_midpoint_lt_even, is_midpoint_gt_even, 3053 e4, &res, pfpsf); 3054 } 3055 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 3056 is_midpoint_lt_even || is_midpoint_gt_even) { 3057 // set the inexact flag 3058 *pfpsf |= INEXACT_EXCEPTION; 3059 } 3060 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3061 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3062 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3063 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3064 BID_SWAP128 (res); 3065 BID_RETURN (res) 3066 3067 } else if ((q4 <= p34 && p34 <= delta) || // Case (8) 3068 (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9) 3069 (q4 <= delta && delta + q3 <= p34) || // Case (10) 3070 (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13) 3071 (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14) 3072 (delta + q3 < q4 && q4 <= p34)) { // Case (18) 3073 3074 // Case (8) is similar to Case (1), with C3 and C4 swapped 3075 // Case (9) is similar to Case (2), with C3 and C4 swapped 3076 // Case (10) is similar to Case (3), with C3 and C4 swapped 3077 // Case (13) is similar to Case (4), with C3 and C4 swapped 3078 // Case (14) is similar to Case (5), with C3 and C4 swapped 3079 // Case (18) is similar to Case (6), with C3 and C4 swapped 3080 3081 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp) 3082 // and go back to delta_ge_zero 3083 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 3084 P128.w[1] = C3.w[1]; 3085 P128.w[0] = C3.w[0]; 3086 C3.w[1] = C4.w[1]; 3087 C3.w[0] = C4.w[0]; 3088 C4.w[1] = P128.w[1]; 3089 C4.w[0] = P128.w[0]; 3090 ind = q3; 3091 q3 = q4; 3092 q4 = ind; 3093 ind = e3; 3094 e3 = e4; 3095 e4 = ind; 3096 tmp_sign = z_sign; 3097 z_sign = p_sign; 3098 p_sign = tmp_sign; 3099 tmp.ui64 = z_exp; 3100 z_exp = p_exp; 3101 p_exp = tmp.ui64; 3102 goto delta_ge_zero; 3103 3104 } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11) 3105 (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12) 3106 3107 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3, 3108 // 1 <= x0 <= q3 - 1 <= p34 - 1 3109 x0 = e4 - e3; // or x0 = delta + q3 - q4 3110 if (q3 <= 18) { // 2 <= q3 <= 18 3111 round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp, 3112 &is_midpoint_lt_even, &is_midpoint_gt_even, 3113 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 3114 // C3.w[1] = 0; 3115 C3.w[0] = R64; 3116 } else if (q3 <= 38) { 3117 round128_19_38 (q3, x0, C3, &R128, &incr_exp, 3118 &is_midpoint_lt_even, &is_midpoint_gt_even, 3119 &is_inexact_lt_midpoint, 3120 &is_inexact_gt_midpoint); 3121 C3.w[1] = R128.w[1]; 3122 C3.w[0] = R128.w[0]; 3123 } 3124 // the rounded result has q3 - x0 digits 3125 // we want the exponent to be e4, so if incr_exp = 1 then 3126 // multiply the rounded result by 10 - it will still fit in 113 bits 3127 if (incr_exp) { 3128 // 64 x 128 -> 128 3129 P128.w[1] = C3.w[1]; 3130 P128.w[0] = C3.w[0]; 3131 __mul_64x128_to_128 (C3, ten2k64[1], P128); 3132 } 3133 e3 = e3 + x0; // this is e4 3134 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; 3135 // the result will have the sign of x * y; the exponent is e4 3136 R256.w[3] = 0; 3137 R256.w[2] = 0; 3138 R256.w[1] = C3.w[1]; 3139 R256.w[0] = C3.w[0]; 3140 if (p_sign == z_sign) { // R256 = C4 + R256 3141 add256 (C4, R256, &R256); 3142 } else { // if (p_sign != z_sign) { // R256 = C4 - R256 3143 sub256 (C4, R256, &R256); // the result cannot be pure zero 3144 // because the result has opposite sign to that of R256 which was 3145 // rounded, need to change the rounding indicators 3146 lsb = C4.w[0] & 0x01; 3147 if (is_inexact_lt_midpoint) { 3148 is_inexact_lt_midpoint = 0; 3149 is_inexact_gt_midpoint = 1; 3150 } else if (is_inexact_gt_midpoint) { 3151 is_inexact_gt_midpoint = 0; 3152 is_inexact_lt_midpoint = 1; 3153 } else if (lsb == 0) { 3154 if (is_midpoint_lt_even) { 3155 is_midpoint_lt_even = 0; 3156 is_midpoint_gt_even = 1; 3157 } else if (is_midpoint_gt_even) { 3158 is_midpoint_gt_even = 0; 3159 is_midpoint_lt_even = 1; 3160 } else { 3161 ; 3162 } 3163 } else if (lsb == 1) { 3164 if (is_midpoint_lt_even) { 3165 // res = res + 1 3166 R256.w[0]++; 3167 if (R256.w[0] == 0x0) { 3168 R256.w[1]++; 3169 if (R256.w[1] == 0x0) { 3170 R256.w[2]++; 3171 if (R256.w[2] == 0x0) { 3172 R256.w[3]++; 3173 } 3174 } 3175 } 3176 // no check for rounding overflow - R256 was a difference 3177 } else if (is_midpoint_gt_even) { 3178 // res = res - 1 3179 R256.w[0]--; 3180 if (R256.w[0] == 0xffffffffffffffffull) { 3181 R256.w[1]--; 3182 if (R256.w[1] == 0xffffffffffffffffull) { 3183 R256.w[2]--; 3184 if (R256.w[2] == 0xffffffffffffffffull) { 3185 R256.w[3]--; 3186 } 3187 } 3188 } 3189 } else { 3190 ; 3191 } 3192 } else { 3193 ; 3194 } 3195 } 3196 // determine the number of decimal digits in R256 3197 ind = nr_digits256 (R256); // ind >= p34 3198 // if R256 is sum, then ind > p34; if R256 is a difference, then 3199 // ind >= p34; this means that we can calculate the result rounded to 3200 // the destination precision, with unbounded exponent, starting from R256 3201 // and using the indicators from the rounding of C3 to avoid a double 3202 // rounding error 3203 3204 if (ind < p34) { 3205 ; 3206 } else if (ind == p34) { 3207 // the result rounded to the destination precision with 3208 // unbounded exponent 3209 // is (-1)^p_sign * R256 * 10^e4 3210 res.w[1] = R256.w[1]; 3211 res.w[0] = R256.w[0]; 3212 } else { // if (ind > p34) 3213 // if more than P digits, round to nearest to P digits 3214 // round R256 to p34 digits 3215 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 3216 // save C3 rounding indicators to help avoid double rounding error 3217 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 3218 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 3219 is_midpoint_lt_even0 = is_midpoint_lt_even; 3220 is_midpoint_gt_even0 = is_midpoint_gt_even; 3221 // initialize rounding indicators 3222 is_inexact_lt_midpoint = 0; 3223 is_inexact_gt_midpoint = 0; 3224 is_midpoint_lt_even = 0; 3225 is_midpoint_gt_even = 0; 3226 // round to p34 digits; the result fits in 113 bits 3227 if (ind <= 38) { 3228 P128.w[1] = R256.w[1]; 3229 P128.w[0] = R256.w[0]; 3230 round128_19_38 (ind, x0, P128, &R128, &incr_exp, 3231 &is_midpoint_lt_even, &is_midpoint_gt_even, 3232 &is_inexact_lt_midpoint, 3233 &is_inexact_gt_midpoint); 3234 } else if (ind <= 57) { 3235 P192.w[2] = R256.w[2]; 3236 P192.w[1] = R256.w[1]; 3237 P192.w[0] = R256.w[0]; 3238 round192_39_57 (ind, x0, P192, &R192, &incr_exp, 3239 &is_midpoint_lt_even, &is_midpoint_gt_even, 3240 &is_inexact_lt_midpoint, 3241 &is_inexact_gt_midpoint); 3242 R128.w[1] = R192.w[1]; 3243 R128.w[0] = R192.w[0]; 3244 } else { // if (ind <= 68) 3245 round256_58_76 (ind, x0, R256, &R256, &incr_exp, 3246 &is_midpoint_lt_even, &is_midpoint_gt_even, 3247 &is_inexact_lt_midpoint, 3248 &is_inexact_gt_midpoint); 3249 R128.w[1] = R256.w[1]; 3250 R128.w[0] = R256.w[0]; 3251 } 3252 // the rounded result has p34 = 34 digits 3253 e4 = e4 + x0 + incr_exp; 3254 3255 res.w[1] = R128.w[1]; 3256 res.w[0] = R128.w[0]; 3257 3258 // avoid a double rounding error 3259 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 3260 is_midpoint_lt_even) { // double rounding error upward 3261 // res = res - 1 3262 res.w[0]--; 3263 if (res.w[0] == 0xffffffffffffffffull) 3264 res.w[1]--; 3265 is_midpoint_lt_even = 0; 3266 is_inexact_lt_midpoint = 1; 3267 // Note: a double rounding error upward is not possible; for this 3268 // the result after the first rounding would have to be 99...95 3269 // (35 digits in all), possibly followed by a number of zeros; this 3270 // not possible in Cases (2)-(6) or (15)-(17) which may get here 3271 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent 3272 if (res.w[1] == 0x0000314dc6448d93ull && 3273 res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1 3274 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 3275 res.w[0] = 0x378d8e63ffffffffull; 3276 e4--; 3277 } 3278 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 3279 is_midpoint_gt_even) { // double rounding error downward 3280 // res = res + 1 3281 res.w[0]++; 3282 if (res.w[0] == 0) 3283 res.w[1]++; 3284 is_midpoint_gt_even = 0; 3285 is_inexact_gt_midpoint = 1; 3286 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 3287 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 3288 // if this second rounding was exact the result may still be 3289 // inexact because of the first rounding 3290 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 3291 is_inexact_gt_midpoint = 1; 3292 } 3293 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 3294 is_inexact_lt_midpoint = 1; 3295 } 3296 } else if (is_midpoint_gt_even && 3297 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 3298 // pulled up to a midpoint 3299 is_inexact_lt_midpoint = 1; 3300 is_inexact_gt_midpoint = 0; 3301 is_midpoint_lt_even = 0; 3302 is_midpoint_gt_even = 0; 3303 } else if (is_midpoint_lt_even && 3304 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 3305 // pulled down to a midpoint 3306 is_inexact_lt_midpoint = 0; 3307 is_inexact_gt_midpoint = 1; 3308 is_midpoint_lt_even = 0; 3309 is_midpoint_gt_even = 0; 3310 } else { 3311 ; 3312 } 3313 } 3314 3315 // determine tininess 3316 if (rnd_mode == ROUNDING_TO_NEAREST) { 3317 if (e4 < expmin) { 3318 is_tiny = 1; // for other rounding modes apply correction 3319 } 3320 } else { 3321 // for RM, RP, RZ, RA apply correction in order to determine tininess 3322 // but do not save the result; apply the correction to 3323 // (-1)^p_sign * res * 10^0 3324 P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1]; 3325 P128.w[0] = res.w[0]; 3326 rounding_correction (rnd_mode, 3327 is_inexact_lt_midpoint, 3328 is_inexact_gt_midpoint, 3329 is_midpoint_lt_even, is_midpoint_gt_even, 3330 0, &P128, pfpsf); 3331 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 3332 // the number of digits in the significand is p34 = 34 3333 if (e4 + scale < expmin) { 3334 is_tiny = 1; 3335 } 3336 } 3337 3338 // the result rounded to the destination precision with unbounded exponent 3339 // is (-1)^p_sign * res * 10^e4 3340 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN 3341 // res.w[0] unchanged; 3342 // Note: res is correct only if expmin <= e4 <= expmax 3343 ind = p34; // the number of decimal digits in the signifcand of res 3344 3345 // at this point we have the result rounded with unbounded exponent in 3346 // res and we know its tininess: 3347 // res = (-1)^p_sign * significand * 10^e4, 3348 // where q (significand) = ind = p34 3349 // Note: res is correct only if expmin <= e4 <= expmax 3350 3351 // check for overflow if RN 3352 if (rnd_mode == ROUNDING_TO_NEAREST 3353 && (ind + e4) > (p34 + expmax)) { 3354 res.w[1] = p_sign | 0x7800000000000000ull; 3355 res.w[0] = 0x0000000000000000ull; 3356 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 3357 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3358 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3359 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3360 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3361 BID_SWAP128 (res); 3362 BID_RETURN (res) 3363 } // else not overflow or not RN, so continue 3364 3365 // from this point on this is similar to the last part of the computation 3366 // for Cases (15), (16), (17) 3367 3368 // if (e4 >= expmin) we have the result rounded with bounded exponent 3369 if (e4 < expmin) { 3370 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res 3371 // where the result rounded [at most] once is 3372 // (-1)^p_sign * significand_res * 10^e4 3373 3374 // avoid double rounding error 3375 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 3376 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 3377 is_midpoint_lt_even0 = is_midpoint_lt_even; 3378 is_midpoint_gt_even0 = is_midpoint_gt_even; 3379 is_inexact_lt_midpoint = 0; 3380 is_inexact_gt_midpoint = 0; 3381 is_midpoint_lt_even = 0; 3382 is_midpoint_gt_even = 0; 3383 3384 if (x0 > ind) { 3385 // nothing is left of res when moving the decimal point left x0 digits 3386 is_inexact_lt_midpoint = 1; 3387 res.w[1] = p_sign | 0x0000000000000000ull; 3388 res.w[0] = 0x0000000000000000ull; 3389 e4 = expmin; 3390 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 3391 // this is <, =, or > 1/2 ulp 3392 // compare the ind-digit value in the significand of res with 3393 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 3394 // less than, equal to, or greater than 1/2 ulp (significand of res) 3395 R128.w[1] = res.w[1] & MASK_COEFF; 3396 R128.w[0] = res.w[0]; 3397 if (ind <= 19) { 3398 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp 3399 lt_half_ulp = 1; 3400 is_inexact_lt_midpoint = 1; 3401 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp 3402 eq_half_ulp = 1; 3403 is_midpoint_gt_even = 1; 3404 } else { // > 1/2 ulp 3405 gt_half_ulp = 1; 3406 is_inexact_gt_midpoint = 1; 3407 } 3408 } else { // if (ind <= 38) 3409 if (R128.w[1] < midpoint128[ind - 20].w[1] || 3410 (R128.w[1] == midpoint128[ind - 20].w[1] && 3411 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp 3412 lt_half_ulp = 1; 3413 is_inexact_lt_midpoint = 1; 3414 } else if (R128.w[1] == midpoint128[ind - 20].w[1] && 3415 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp 3416 eq_half_ulp = 1; 3417 is_midpoint_gt_even = 1; 3418 } else { // > 1/2 ulp 3419 gt_half_ulp = 1; 3420 is_inexact_gt_midpoint = 1; 3421 } 3422 } 3423 if (lt_half_ulp || eq_half_ulp) { 3424 // res = +0.0 * 10^expmin 3425 res.w[1] = 0x0000000000000000ull; 3426 res.w[0] = 0x0000000000000000ull; 3427 } else { // if (gt_half_ulp) 3428 // res = +1 * 10^expmin 3429 res.w[1] = 0x0000000000000000ull; 3430 res.w[0] = 0x0000000000000001ull; 3431 } 3432 res.w[1] = p_sign | res.w[1]; 3433 e4 = expmin; 3434 } else { // if (1 <= x0 <= ind - 1 <= 33) 3435 // round the ind-digit result to ind - x0 digits 3436 3437 if (ind <= 18) { // 2 <= ind <= 18 3438 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 3439 &is_midpoint_lt_even, &is_midpoint_gt_even, 3440 &is_inexact_lt_midpoint, 3441 &is_inexact_gt_midpoint); 3442 res.w[1] = 0x0; 3443 res.w[0] = R64; 3444 } else if (ind <= 38) { 3445 P128.w[1] = res.w[1] & MASK_COEFF; 3446 P128.w[0] = res.w[0]; 3447 round128_19_38 (ind, x0, P128, &res, &incr_exp, 3448 &is_midpoint_lt_even, &is_midpoint_gt_even, 3449 &is_inexact_lt_midpoint, 3450 &is_inexact_gt_midpoint); 3451 } 3452 e4 = e4 + x0; // expmin 3453 // we want the exponent to be expmin, so if incr_exp = 1 then 3454 // multiply the rounded result by 10 - it will still fit in 113 bits 3455 if (incr_exp) { 3456 // 64 x 128 -> 128 3457 P128.w[1] = res.w[1] & MASK_COEFF; 3458 P128.w[0] = res.w[0]; 3459 __mul_64x128_to_128 (res, ten2k64[1], P128); 3460 } 3461 res.w[1] = 3462 p_sign | ((UINT64) (e4 + 6176) << 49) | (res. 3463 w[1] & MASK_COEFF); 3464 // avoid a double rounding error 3465 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 3466 is_midpoint_lt_even) { // double rounding error upward 3467 // res = res - 1 3468 res.w[0]--; 3469 if (res.w[0] == 0xffffffffffffffffull) 3470 res.w[1]--; 3471 // Note: a double rounding error upward is not possible; for this 3472 // the result after the first rounding would have to be 99...95 3473 // (35 digits in all), possibly followed by a number of zeros; this 3474 // not possible in this underflow case 3475 is_midpoint_lt_even = 0; 3476 is_inexact_lt_midpoint = 1; 3477 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 3478 is_midpoint_gt_even) { // double rounding error downward 3479 // res = res + 1 3480 res.w[0]++; 3481 if (res.w[0] == 0) 3482 res.w[1]++; 3483 is_midpoint_gt_even = 0; 3484 is_inexact_gt_midpoint = 1; 3485 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 3486 !is_inexact_lt_midpoint 3487 && !is_inexact_gt_midpoint) { 3488 // if this second rounding was exact the result may still be 3489 // inexact because of the first rounding 3490 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 3491 is_inexact_gt_midpoint = 1; 3492 } 3493 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 3494 is_inexact_lt_midpoint = 1; 3495 } 3496 } else if (is_midpoint_gt_even && 3497 (is_inexact_gt_midpoint0 3498 || is_midpoint_lt_even0)) { 3499 // pulled up to a midpoint 3500 is_inexact_lt_midpoint = 1; 3501 is_inexact_gt_midpoint = 0; 3502 is_midpoint_lt_even = 0; 3503 is_midpoint_gt_even = 0; 3504 } else if (is_midpoint_lt_even && 3505 (is_inexact_lt_midpoint0 3506 || is_midpoint_gt_even0)) { 3507 // pulled down to a midpoint 3508 is_inexact_lt_midpoint = 0; 3509 is_inexact_gt_midpoint = 1; 3510 is_midpoint_lt_even = 0; 3511 is_midpoint_gt_even = 0; 3512 } else { 3513 ; 3514 } 3515 } 3516 } 3517 // res contains the correct result 3518 // apply correction if not rounding to nearest 3519 if (rnd_mode != ROUNDING_TO_NEAREST) { 3520 rounding_correction (rnd_mode, 3521 is_inexact_lt_midpoint, 3522 is_inexact_gt_midpoint, 3523 is_midpoint_lt_even, is_midpoint_gt_even, 3524 e4, &res, pfpsf); 3525 } 3526 if (is_midpoint_lt_even || is_midpoint_gt_even || 3527 is_inexact_lt_midpoint || is_inexact_gt_midpoint) { 3528 // set the inexact flag 3529 *pfpsf |= INEXACT_EXCEPTION; 3530 if (is_tiny) 3531 *pfpsf |= UNDERFLOW_EXCEPTION; 3532 } 3533 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3534 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3535 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3536 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3537 BID_SWAP128 (res); 3538 BID_RETURN (res) 3539 3540 } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15) 3541 (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16) 3542 (delta + q3 <= p34 && p34 < q4)) { // Case (17) 3543 3544 // calculate first the result rounded to the destination precision, with 3545 // unbounded exponent 3546 3547 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, 3548 rnd_mode, &is_midpoint_lt_even, 3549 &is_midpoint_gt_even, &is_inexact_lt_midpoint, 3550 &is_inexact_gt_midpoint, pfpsf, &res); 3551 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3552 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3553 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3554 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3555 BID_SWAP128 (res); 3556 BID_RETURN (res) 3557 3558 } else { 3559 ; 3560 } 3561 3562 } // end if delta < 0 3563 3564 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3565 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3566 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3567 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3568 BID_SWAP128 (res); 3569 BID_RETURN (res) 3570 3571 } 3572 3573 3574 #if DECIMAL_CALL_BY_REFERENCE 3575 void 3576 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz 3577 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3578 _EXC_INFO_PARAM) { 3579 UINT128 x = *px, y = *py, z = *pz; 3580 #if !DECIMAL_GLOBAL_ROUNDING 3581 unsigned int rnd_mode = *prnd_mode; 3582 #endif 3583 #else 3584 UINT128 3585 bid128_fma (UINT128 x, UINT128 y, UINT128 z 3586 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3587 _EXC_INFO_PARAM) { 3588 #endif 3589 int is_midpoint_lt_even, is_midpoint_gt_even, 3590 is_inexact_lt_midpoint, is_inexact_gt_midpoint; 3591 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3592 3593 #if DECIMAL_CALL_BY_REFERENCE 3594 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3595 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3596 &res, &x, &y, &z 3597 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3598 _EXC_INFO_ARG); 3599 #else 3600 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3601 &is_inexact_lt_midpoint, 3602 &is_inexact_gt_midpoint, x, y, 3603 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3604 _EXC_INFO_ARG); 3605 #endif 3606 BID_RETURN (res); 3607 } 3608 3609 3610 #if DECIMAL_CALL_BY_REFERENCE 3611 void 3612 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz 3613 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3614 _EXC_INFO_PARAM) { 3615 UINT64 x = *px, y = *py, z = *pz; 3616 #if !DECIMAL_GLOBAL_ROUNDING 3617 unsigned int rnd_mode = *prnd_mode; 3618 #endif 3619 #else 3620 UINT128 3621 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z 3622 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3623 _EXC_INFO_PARAM) { 3624 #endif 3625 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3626 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3627 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3628 UINT128 x1, y1, z1; 3629 3630 #if DECIMAL_CALL_BY_REFERENCE 3631 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3632 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3633 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3634 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3635 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3636 &res, &x1, &y1, &z1 3637 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3638 _EXC_INFO_ARG); 3639 #else 3640 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3641 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3642 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3643 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3644 &is_inexact_lt_midpoint, 3645 &is_inexact_gt_midpoint, x1, y1, 3646 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3647 _EXC_INFO_ARG); 3648 #endif 3649 BID_RETURN (res); 3650 } 3651 3652 3653 #if DECIMAL_CALL_BY_REFERENCE 3654 void 3655 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz 3656 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3657 _EXC_INFO_PARAM) { 3658 UINT64 x = *px, y = *py; 3659 UINT128 z = *pz; 3660 #if !DECIMAL_GLOBAL_ROUNDING 3661 unsigned int rnd_mode = *prnd_mode; 3662 #endif 3663 #else 3664 UINT128 3665 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z 3666 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3667 _EXC_INFO_PARAM) { 3668 #endif 3669 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3670 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3671 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3672 UINT128 x1, y1; 3673 3674 #if DECIMAL_CALL_BY_REFERENCE 3675 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3676 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3677 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3678 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3679 &res, &x1, &y1, &z 3680 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3681 _EXC_INFO_ARG); 3682 #else 3683 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3684 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3685 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3686 &is_inexact_lt_midpoint, 3687 &is_inexact_gt_midpoint, x1, y1, 3688 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3689 _EXC_INFO_ARG); 3690 #endif 3691 BID_RETURN (res); 3692 } 3693 3694 3695 #if DECIMAL_CALL_BY_REFERENCE 3696 void 3697 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz 3698 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3699 _EXC_INFO_PARAM) { 3700 UINT64 x = *px, z = *pz; 3701 #if !DECIMAL_GLOBAL_ROUNDING 3702 unsigned int rnd_mode = *prnd_mode; 3703 #endif 3704 #else 3705 UINT128 3706 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z 3707 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3708 _EXC_INFO_PARAM) { 3709 #endif 3710 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3711 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3712 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3713 UINT128 x1, z1; 3714 3715 #if DECIMAL_CALL_BY_REFERENCE 3716 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3717 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3718 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3719 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3720 &res, &x1, py, &z1 3721 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3722 _EXC_INFO_ARG); 3723 #else 3724 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3725 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3726 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3727 &is_inexact_lt_midpoint, 3728 &is_inexact_gt_midpoint, x1, y, 3729 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3730 _EXC_INFO_ARG); 3731 #endif 3732 BID_RETURN (res); 3733 } 3734 3735 3736 #if DECIMAL_CALL_BY_REFERENCE 3737 void 3738 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz 3739 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3740 _EXC_INFO_PARAM) { 3741 UINT64 x = *px; 3742 #if !DECIMAL_GLOBAL_ROUNDING 3743 unsigned int rnd_mode = *prnd_mode; 3744 #endif 3745 #else 3746 UINT128 3747 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z 3748 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3749 _EXC_INFO_PARAM) { 3750 #endif 3751 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3752 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3753 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3754 UINT128 x1; 3755 3756 #if DECIMAL_CALL_BY_REFERENCE 3757 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3758 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3759 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3760 &res, &x1, py, pz 3761 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3762 _EXC_INFO_ARG); 3763 #else 3764 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3765 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3766 &is_inexact_lt_midpoint, 3767 &is_inexact_gt_midpoint, x1, y, 3768 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3769 _EXC_INFO_ARG); 3770 #endif 3771 BID_RETURN (res); 3772 } 3773 3774 3775 #if DECIMAL_CALL_BY_REFERENCE 3776 void 3777 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz 3778 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3779 _EXC_INFO_PARAM) { 3780 UINT64 y = *py, z = *pz; 3781 #if !DECIMAL_GLOBAL_ROUNDING 3782 unsigned int rnd_mode = *prnd_mode; 3783 #endif 3784 #else 3785 UINT128 3786 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z 3787 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3788 _EXC_INFO_PARAM) { 3789 #endif 3790 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3791 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3792 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3793 UINT128 y1, z1; 3794 3795 #if DECIMAL_CALL_BY_REFERENCE 3796 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3797 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3798 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3799 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3800 &res, px, &y1, &z1 3801 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3802 _EXC_INFO_ARG); 3803 #else 3804 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3805 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3806 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3807 &is_inexact_lt_midpoint, 3808 &is_inexact_gt_midpoint, x, y1, 3809 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3810 _EXC_INFO_ARG); 3811 #endif 3812 BID_RETURN (res); 3813 } 3814 3815 3816 #if DECIMAL_CALL_BY_REFERENCE 3817 void 3818 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz 3819 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3820 _EXC_INFO_PARAM) { 3821 UINT64 y = *py; 3822 #if !DECIMAL_GLOBAL_ROUNDING 3823 unsigned int rnd_mode = *prnd_mode; 3824 #endif 3825 #else 3826 UINT128 3827 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z 3828 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3829 _EXC_INFO_PARAM) { 3830 #endif 3831 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3832 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3833 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3834 UINT128 y1; 3835 3836 #if DECIMAL_CALL_BY_REFERENCE 3837 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3838 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3839 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3840 &res, px, &y1, pz 3841 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3842 _EXC_INFO_ARG); 3843 #else 3844 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3845 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3846 &is_inexact_lt_midpoint, 3847 &is_inexact_gt_midpoint, x, y1, 3848 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3849 _EXC_INFO_ARG); 3850 #endif 3851 BID_RETURN (res); 3852 } 3853 3854 3855 #if DECIMAL_CALL_BY_REFERENCE 3856 void 3857 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz 3858 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3859 _EXC_INFO_PARAM) { 3860 UINT64 z = *pz; 3861 #if !DECIMAL_GLOBAL_ROUNDING 3862 unsigned int rnd_mode = *prnd_mode; 3863 #endif 3864 #else 3865 UINT128 3866 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z 3867 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3868 _EXC_INFO_PARAM) { 3869 #endif 3870 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3871 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3872 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3873 UINT128 z1; 3874 3875 #if DECIMAL_CALL_BY_REFERENCE 3876 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3877 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3878 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3879 &res, px, py, &z1 3880 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3881 _EXC_INFO_ARG); 3882 #else 3883 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3884 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3885 &is_inexact_lt_midpoint, 3886 &is_inexact_gt_midpoint, x, y, 3887 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3888 _EXC_INFO_ARG); 3889 #endif 3890 BID_RETURN (res); 3891 } 3892 3893 // Note: bid128qqq_fma is represented by bid128_fma 3894 3895 // Note: bid64ddd_fma is represented by bid64_fma 3896 3897 #if DECIMAL_CALL_BY_REFERENCE 3898 void 3899 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz 3900 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3901 _EXC_INFO_PARAM) { 3902 UINT64 x = *px, y = *py; 3903 #if !DECIMAL_GLOBAL_ROUNDING 3904 unsigned int rnd_mode = *prnd_mode; 3905 #endif 3906 #else 3907 UINT64 3908 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z 3909 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3910 _EXC_INFO_PARAM) { 3911 #endif 3912 UINT64 res1 = 0xbaddbaddbaddbaddull; 3913 UINT128 x1, y1; 3914 3915 #if DECIMAL_CALL_BY_REFERENCE 3916 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3917 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3918 bid64qqq_fma (&res1, &x1, &y1, pz 3919 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3920 _EXC_INFO_ARG); 3921 #else 3922 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3923 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3924 res1 = bid64qqq_fma (x1, y1, z 3925 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3926 _EXC_INFO_ARG); 3927 #endif 3928 BID_RETURN (res1); 3929 } 3930 3931 3932 #if DECIMAL_CALL_BY_REFERENCE 3933 void 3934 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz 3935 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3936 _EXC_INFO_PARAM) { 3937 UINT64 x = *px, z = *pz; 3938 #if !DECIMAL_GLOBAL_ROUNDING 3939 unsigned int rnd_mode = *prnd_mode; 3940 #endif 3941 #else 3942 UINT64 3943 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z 3944 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3945 _EXC_INFO_PARAM) { 3946 #endif 3947 UINT64 res1 = 0xbaddbaddbaddbaddull; 3948 UINT128 x1, z1; 3949 3950 #if DECIMAL_CALL_BY_REFERENCE 3951 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3952 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3953 bid64qqq_fma (&res1, &x1, py, &z1 3954 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3955 _EXC_INFO_ARG); 3956 #else 3957 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3958 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3959 res1 = bid64qqq_fma (x1, y, z1 3960 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3961 _EXC_INFO_ARG); 3962 #endif 3963 BID_RETURN (res1); 3964 } 3965 3966 3967 #if DECIMAL_CALL_BY_REFERENCE 3968 void 3969 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz 3970 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3971 _EXC_INFO_PARAM) { 3972 UINT64 x = *px; 3973 #if !DECIMAL_GLOBAL_ROUNDING 3974 unsigned int rnd_mode = *prnd_mode; 3975 #endif 3976 #else 3977 UINT64 3978 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z 3979 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3980 _EXC_INFO_PARAM) { 3981 #endif 3982 UINT64 res1 = 0xbaddbaddbaddbaddull; 3983 UINT128 x1; 3984 3985 #if DECIMAL_CALL_BY_REFERENCE 3986 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3987 bid64qqq_fma (&res1, &x1, py, pz 3988 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3989 _EXC_INFO_ARG); 3990 #else 3991 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3992 res1 = bid64qqq_fma (x1, y, z 3993 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3994 _EXC_INFO_ARG); 3995 #endif 3996 BID_RETURN (res1); 3997 } 3998 3999 4000 #if DECIMAL_CALL_BY_REFERENCE 4001 void 4002 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz 4003 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4004 _EXC_INFO_PARAM) { 4005 UINT64 y = *py, z = *pz; 4006 #if !DECIMAL_GLOBAL_ROUNDING 4007 unsigned int rnd_mode = *prnd_mode; 4008 #endif 4009 #else 4010 UINT64 4011 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z 4012 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4013 _EXC_INFO_PARAM) { 4014 #endif 4015 UINT64 res1 = 0xbaddbaddbaddbaddull; 4016 UINT128 y1, z1; 4017 4018 #if DECIMAL_CALL_BY_REFERENCE 4019 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4020 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4021 bid64qqq_fma (&res1, px, &y1, &z1 4022 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4023 _EXC_INFO_ARG); 4024 #else 4025 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4026 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4027 res1 = bid64qqq_fma (x, y1, z1 4028 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4029 _EXC_INFO_ARG); 4030 #endif 4031 BID_RETURN (res1); 4032 } 4033 4034 4035 #if DECIMAL_CALL_BY_REFERENCE 4036 void 4037 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz 4038 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4039 _EXC_INFO_PARAM) { 4040 UINT64 y = *py; 4041 #if !DECIMAL_GLOBAL_ROUNDING 4042 unsigned int rnd_mode = *prnd_mode; 4043 #endif 4044 #else 4045 UINT64 4046 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z 4047 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4048 _EXC_INFO_PARAM) { 4049 #endif 4050 UINT64 res1 = 0xbaddbaddbaddbaddull; 4051 UINT128 y1; 4052 4053 #if DECIMAL_CALL_BY_REFERENCE 4054 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4055 bid64qqq_fma (&res1, px, &y1, pz 4056 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4057 _EXC_INFO_ARG); 4058 #else 4059 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4060 res1 = bid64qqq_fma (x, y1, z 4061 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4062 _EXC_INFO_ARG); 4063 #endif 4064 BID_RETURN (res1); 4065 } 4066 4067 4068 #if DECIMAL_CALL_BY_REFERENCE 4069 void 4070 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz 4071 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4072 _EXC_INFO_PARAM) { 4073 UINT64 z = *pz; 4074 #if !DECIMAL_GLOBAL_ROUNDING 4075 unsigned int rnd_mode = *prnd_mode; 4076 #endif 4077 #else 4078 UINT64 4079 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z 4080 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4081 _EXC_INFO_PARAM) { 4082 #endif 4083 UINT64 res1 = 0xbaddbaddbaddbaddull; 4084 UINT128 z1; 4085 4086 #if DECIMAL_CALL_BY_REFERENCE 4087 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4088 bid64qqq_fma (&res1, px, py, &z1 4089 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4090 _EXC_INFO_ARG); 4091 #else 4092 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4093 res1 = bid64qqq_fma (x, y, z1 4094 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4095 _EXC_INFO_ARG); 4096 #endif 4097 BID_RETURN (res1); 4098 } 4099 4100 4101 #if DECIMAL_CALL_BY_REFERENCE 4102 void 4103 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz 4104 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4105 _EXC_INFO_PARAM) { 4106 UINT128 x = *px, y = *py, z = *pz; 4107 #if !DECIMAL_GLOBAL_ROUNDING 4108 unsigned int rnd_mode = *prnd_mode; 4109 #endif 4110 #else 4111 UINT64 4112 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z 4113 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4114 _EXC_INFO_PARAM) { 4115 #endif 4116 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0, 4117 is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; 4118 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 4119 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 4120 int incr_exp; 4121 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 4122 UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 4123 UINT64 res1 = 0xbaddbaddbaddbaddull; 4124 unsigned int save_fpsf; // needed because of the call to bid128_ext_fma 4125 UINT64 sign; 4126 UINT64 exp; 4127 int unbexp; 4128 UINT128 C; 4129 BID_UI64DOUBLE tmp; 4130 int nr_bits; 4131 int q, x0; 4132 int scale; 4133 int lt_half_ulp = 0, eq_half_ulp = 0; 4134 4135 // Note: for rounding modes other than RN or RA, the result can be obtained 4136 // by rounding first to BID128 and then to BID64 4137 4138 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved 4139 *pfpsf = 0; 4140 4141 #if DECIMAL_CALL_BY_REFERENCE 4142 bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, 4143 &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0, 4144 &res, &x, &y, &z 4145 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4146 _EXC_INFO_ARG); 4147 #else 4148 res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, 4149 &is_inexact_lt_midpoint0, 4150 &is_inexact_gt_midpoint0, x, y, 4151 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4152 _EXC_INFO_ARG); 4153 #endif 4154 4155 if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) || 4156 (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible 4157 ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN) 4158 ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity 4159 #if DECIMAL_CALL_BY_REFERENCE 4160 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); 4161 #else 4162 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); 4163 #endif 4164 // determine the unbiased exponent of the result 4165 unbexp = ((res1 >> 53) & 0x3ff) - 398; 4166 4167 // if subnormal, res1 must have exp = -398 4168 // if tiny and inexact set underflow and inexact status flags 4169 if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN 4170 (unbexp == -398) 4171 && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull) 4172 && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 4173 || is_midpoint_lt_even0 || is_midpoint_gt_even0)) { 4174 // set the inexact flag and the underflow flag 4175 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); 4176 } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || 4177 is_midpoint_lt_even0 || is_midpoint_gt_even0) { 4178 // set the inexact flag and the underflow flag 4179 *pfpsf |= INEXACT_EXCEPTION; 4180 } 4181 4182 *pfpsf |= save_fpsf; 4183 BID_RETURN (res1); 4184 } // else continue, and use rounding to nearest to round to 16 digits 4185 4186 // at this point the result is rounded to nearest (even or away) to 34 digits 4187 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal 4188 sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 4189 exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits 4190 unbexp = (exp >> 49) - 6176; 4191 C.w[1] = res.w[HIGH_128W] & MASK_COEFF; 4192 C.w[0] = res.w[LOW_128W]; 4193 4194 if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero 4195 (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) { 4196 // clear under/overflow 4197 #if DECIMAL_CALL_BY_REFERENCE 4198 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); 4199 #else 4200 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); 4201 #endif 4202 *pfpsf |= save_fpsf; 4203 BID_RETURN (res1); 4204 } // else continue 4205 4206 // -398 - 34 <= unbexp <= 369 + 15 4207 if (rnd_mode == ROUNDING_TIES_AWAY) { 4208 // apply correction, if needed, to make the result rounded to nearest-even 4209 if (is_midpoint_gt_even) { 4210 // res = res - 1 4211 res1--; // res1 is now even 4212 } // else the result is already correctly rounded to nearest-even 4213 } 4214 // at this point the result is finite, non-zero canonical normal or subnormal, 4215 // and in most cases overflow or underflow will not occur 4216 4217 // determine the number of digits q in the result 4218 // q = nr. of decimal digits in x 4219 // determine first the nr. of bits in x 4220 if (C.w[1] == 0) { 4221 if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53 4222 // split the 64-bit value in two 32-bit halves to avoid rounding errors 4223 if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32 4224 tmp.d = (double) (C.w[0] >> 32); // exact conversion 4225 nr_bits = 4226 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4227 } else { // x < 2^32 4228 tmp.d = (double) (C.w[0]); // exact conversion 4229 nr_bits = 4230 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4231 } 4232 } else { // if x < 2^53 4233 tmp.d = (double) C.w[0]; // exact conversion 4234 nr_bits = 4235 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4236 } 4237 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1]) 4238 tmp.d = (double) C.w[1]; // exact conversion 4239 nr_bits = 4240 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4241 } 4242 q = nr_digits[nr_bits - 1].digits; 4243 if (q == 0) { 4244 q = nr_digits[nr_bits - 1].digits1; 4245 if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi || 4246 (C.w[1] == nr_digits[nr_bits - 1].threshold_hi && 4247 C.w[0] >= nr_digits[nr_bits - 1].threshold_lo)) 4248 q++; 4249 } 4250 // if q > 16, round to nearest even to 16 digits (but for underflow it may 4251 // have to be truncated even more) 4252 if (q > 16) { 4253 x0 = q - 16; 4254 if (q <= 18) { 4255 round64_2_18 (q, x0, C.w[0], &res1, &incr_exp, 4256 &is_midpoint_lt_even, &is_midpoint_gt_even, 4257 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4258 } else { // 19 <= q <= 34 4259 round128_19_38 (q, x0, C, &res128, &incr_exp, 4260 &is_midpoint_lt_even, &is_midpoint_gt_even, 4261 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4262 res1 = res128.w[0]; // the result fits in 64 bits 4263 } 4264 unbexp = unbexp + x0; 4265 if (incr_exp) 4266 unbexp++; 4267 q = 16; // need to set in case denormalization is necessary 4268 } else { 4269 // the result does not require a second rounding (and it must have 4270 // been exact in the first rounding, since q <= 16) 4271 res1 = C.w[0]; 4272 } 4273 4274 // avoid a double rounding error 4275 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 4276 is_midpoint_lt_even) { // double rounding error upward 4277 // res = res - 1 4278 res1--; // res1 becomes odd 4279 is_midpoint_lt_even = 0; 4280 is_inexact_lt_midpoint = 1; 4281 if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1 4282 res1 = 0x002386f26fc0ffffull; // 10^16 - 1 4283 unbexp--; 4284 } 4285 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 4286 is_midpoint_gt_even) { // double rounding error downward 4287 // res = res + 1 4288 res1++; // res1 becomes odd (so it cannot be 10^16) 4289 is_midpoint_gt_even = 0; 4290 is_inexact_gt_midpoint = 1; 4291 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 4292 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 4293 // if this second rounding was exact the result may still be 4294 // inexact because of the first rounding 4295 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 4296 is_inexact_gt_midpoint = 1; 4297 } 4298 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 4299 is_inexact_lt_midpoint = 1; 4300 } 4301 } else if (is_midpoint_gt_even && 4302 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 4303 // pulled up to a midpoint 4304 is_inexact_lt_midpoint = 1; 4305 is_inexact_gt_midpoint = 0; 4306 is_midpoint_lt_even = 0; 4307 is_midpoint_gt_even = 0; 4308 } else if (is_midpoint_lt_even && 4309 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 4310 // pulled down to a midpoint 4311 is_inexact_lt_midpoint = 0; 4312 is_inexact_gt_midpoint = 1; 4313 is_midpoint_lt_even = 0; 4314 is_midpoint_gt_even = 0; 4315 } else { 4316 ; 4317 } 4318 // this is the result rounded correctly to nearest even, with unbounded exp. 4319 4320 // check for overflow 4321 if (q + unbexp > P16 + expmax16) { 4322 res1 = sign | 0x7800000000000000ull; 4323 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); 4324 *pfpsf |= save_fpsf; 4325 BID_RETURN (res1) 4326 } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16 4327 // not overflow; the result must be exact, and we can multiply res1 by 4328 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits 4329 scale = unbexp - expmax16; 4330 res1 = res1 * ten2k64[scale]; // res1 * 10^scale 4331 unbexp = expmax16; // unbexp - scale 4332 } else { 4333 ; // continue 4334 } 4335 4336 // check for underflow 4337 if (q + unbexp < P16 + expmin16) { 4338 if (unbexp < expmin16) { 4339 // we must truncate more of res 4340 x0 = expmin16 - unbexp; // x0 >= 1 4341 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 4342 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 4343 is_midpoint_lt_even0 = is_midpoint_lt_even; 4344 is_midpoint_gt_even0 = is_midpoint_gt_even; 4345 is_inexact_lt_midpoint = 0; 4346 is_inexact_gt_midpoint = 0; 4347 is_midpoint_lt_even = 0; 4348 is_midpoint_gt_even = 0; 4349 // the number of decimal digits in res1 is q 4350 if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits 4351 // 2 <= q <= 16, 1 <= x0 <= 15 4352 round64_2_18 (q, x0, res1, &res1, &incr_exp, 4353 &is_midpoint_lt_even, &is_midpoint_gt_even, 4354 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4355 if (incr_exp) { 4356 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15 4357 res1 = ten2k64[q - x0]; 4358 } 4359 unbexp = unbexp + x0; // expmin16 4360 } else if (x0 == q) { 4361 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin 4362 // determine relationship with 1/2 ulp 4363 // q <= 16 4364 if (res1 < midpoint64[q - 1]) { // < 1/2 ulp 4365 lt_half_ulp = 1; 4366 is_inexact_lt_midpoint = 1; 4367 } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp 4368 eq_half_ulp = 1; 4369 is_midpoint_gt_even = 1; 4370 } else { // > 1/2 ulp 4371 // gt_half_ulp = 1; 4372 is_inexact_gt_midpoint = 1; 4373 } 4374 if (lt_half_ulp || eq_half_ulp) { 4375 // res = +0.0 * 10^expmin16 4376 res1 = 0x0000000000000000ull; 4377 } else { // if (gt_half_ulp) 4378 // res = +1 * 10^expmin16 4379 res1 = 0x0000000000000001ull; 4380 } 4381 unbexp = expmin16; 4382 } else { // if (x0 > q) 4383 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin 4384 res1 = 0x0000000000000000ull; 4385 unbexp = expmin16; 4386 is_inexact_lt_midpoint = 1; 4387 } 4388 // avoid a double rounding error 4389 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 4390 is_midpoint_lt_even) { // double rounding error upward 4391 // res = res - 1 4392 res1--; // res1 becomes odd 4393 is_midpoint_lt_even = 0; 4394 is_inexact_lt_midpoint = 1; 4395 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 4396 is_midpoint_gt_even) { // double rounding error downward 4397 // res = res + 1 4398 res1++; // res1 becomes odd 4399 is_midpoint_gt_even = 0; 4400 is_inexact_gt_midpoint = 1; 4401 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 4402 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 4403 // if this rounding was exact the result may still be 4404 // inexact because of the previous roundings 4405 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 4406 is_inexact_gt_midpoint = 1; 4407 } 4408 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 4409 is_inexact_lt_midpoint = 1; 4410 } 4411 } else if (is_midpoint_gt_even && 4412 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 4413 // pulled up to a midpoint 4414 is_inexact_lt_midpoint = 1; 4415 is_inexact_gt_midpoint = 0; 4416 is_midpoint_lt_even = 0; 4417 is_midpoint_gt_even = 0; 4418 } else if (is_midpoint_lt_even && 4419 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 4420 // pulled down to a midpoint 4421 is_inexact_lt_midpoint = 0; 4422 is_inexact_gt_midpoint = 1; 4423 is_midpoint_lt_even = 0; 4424 is_midpoint_gt_even = 0; 4425 } else { 4426 ; 4427 } 4428 } 4429 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16) 4430 // and the result is tiny and exact 4431 4432 // check for inexact result 4433 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 4434 is_midpoint_lt_even || is_midpoint_gt_even || 4435 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || 4436 is_midpoint_lt_even0 || is_midpoint_gt_even0) { 4437 // set the inexact flag and the underflow flag 4438 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); 4439 } 4440 } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 4441 is_midpoint_lt_even || is_midpoint_gt_even) { 4442 *pfpsf |= INEXACT_EXCEPTION; 4443 } 4444 // this is the result rounded correctly to nearest, with bounded exponent 4445 4446 if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction 4447 // res = res + 1 4448 res1++; // res1 is now odd 4449 } // else the result is already correct 4450 4451 // assemble the result 4452 if (res1 < 0x0020000000000000ull) { // res < 2^53 4453 res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1; 4454 } else { // res1 >= 2^53 4455 res1 = sign | MASK_STEERING_BITS | 4456 ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2); 4457 } 4458 *pfpsf |= save_fpsf; 4459 BID_RETURN (res1); 4460 } 4461