1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License as published by the Free 7 Software Foundation; either version 3, or (at your option) any later 8 version. 9 10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _DIV_MACROS_H_ 25 #define _DIV_MACROS_H_ 26 27 #include "bid_internal.h" 28 29 #define FENCE __fence 30 //#define FENCE 31 32 //#define DOUBLE_EXTENDED_ON 33 34 #if DOUBLE_EXTENDED_ON 35 36 37 __BID_INLINE__ void 38 __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX, UINT128 CY) { 39 UINT128 CB, CB2, CB4, CB8, CQB, CA; 40 int_double d64, dm64, ds; 41 int_float t64; 42 double dx, dq, dqh; 43 BINARY80 lq, lx, ly; 44 UINT64 Rh, R, B2, B4, Ph, Ql, Ql2, carry, Qh; 45 46 if (!CY.w[1]) { 47 pCR->w[1] = 0; 48 49 if (!CX.w[1]) { 50 pCQ->w[0] = CX.w[0] / CY.w[0]; 51 pCQ->w[1] = 0; 52 pCR->w[1] = 0; 53 pCR->w[0] = CX.w[0] - pCQ->w[0] * CY.w[0]; 54 } else { 55 56 // This path works for CX<2^116 only 57 58 // 2^64 59 d64.i = 0x43f0000000000000; 60 // 2^64 61 dm64.i = 0x3bf0000000000000; 62 // 1.5*2^(-52) 63 ds.i = 0x3cb8000000000000; 64 dx = (BINARY80) CX.w[1] * d64.d + (BINARY80) CX.w[0]; 65 dq = dx / (BINARY80) CY.w[0]; 66 dq -= dq * (ds.d); 67 dqh = dq * dm64.d; 68 Qh = (UINT64) dqh; 69 Ql = (UINT64) (dq - ((double) Qh) * d64.d); 70 71 Rh = CX.w[0] - Ql * CY.w[0]; 72 Ql2 = Rh / CY.w[0]; 73 pCR->w[0] = Rh - Ql2 * CY.w[0]; 74 __add_carry_out ((pCQ->w[0]), carry, Ql, Ql2); 75 pCQ->w[1] = Qh + carry; 76 77 } 78 return; 79 } 80 // now CY.w[1] > 0 81 82 // 2^64 83 t64.i = 0x5f800000; 84 lx = (BINARY80) CX.w[1] * (BINARY80) t64.d + (BINARY80) CX.w[0]; 85 ly = (BINARY80) CY.w[1] * (BINARY80) t64.d + (BINARY80) CY.w[0]; 86 lq = lx / ly; 87 pCQ->w[0] = (UINT64) lq; 88 89 pCQ->w[1] = 0; 90 91 if (!pCQ->w[0]) { 92 /*if(__unsigned_compare_ge_128(CX,CY)) 93 { 94 pCQ->w[0] = 1; 95 __sub_128_128((*pCR), CX, CY); 96 } 97 else */ 98 { 99 pCR->w[1] = CX.w[1]; 100 pCR->w[0] = CX.w[0]; 101 } 102 return; 103 } 104 105 if (CY.w[1] >= 16 || pCQ->w[0] <= 0x1000000000000000ull) { 106 pCQ->w[0] = (UINT64) lq - 1; 107 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY); 108 __sub_128_128 (CA, CX, CQB); 109 if (__unsigned_compare_ge_128 (CA, CY)) { 110 __sub_128_128 (CA, CA, CY); 111 pCQ->w[0]++; 112 if (__unsigned_compare_ge_128 (CA, CY)) { 113 __sub_128_128 (CA, CA, CY); 114 pCQ->w[0]++; 115 } 116 } 117 pCR->w[1] = CA.w[1]; 118 pCR->w[0] = CA.w[0]; 119 } else { 120 pCQ->w[0] = (UINT64) lq - 6; 121 122 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY); 123 __sub_128_128 (CA, CX, CQB); 124 125 CB8.w[1] = (CY.w[1] << 3) | (CY.w[0] >> 61); 126 CB8.w[0] = CY.w[0] << 3; 127 CB4.w[1] = (CY.w[1] << 2) | (CY.w[0] >> 62); 128 CB4.w[0] = CY.w[0] << 2; 129 CB2.w[1] = (CY.w[1] << 1) | (CY.w[0] >> 63); 130 CB2.w[0] = CY.w[0] << 1; 131 132 if (__unsigned_compare_ge_128 (CA, CB8)) { 133 pCQ->w[0] += 8; 134 __sub_128_128 (CA, CA, CB8); 135 } 136 if (__unsigned_compare_ge_128 (CA, CB4)) { 137 pCQ->w[0] += 4; 138 __sub_128_128 (CA, CA, CB4); 139 } 140 if (__unsigned_compare_ge_128 (CA, CB2)) { 141 pCQ->w[0] += 2; 142 __sub_128_128 (CA, CA, CB2); 143 } 144 if (__unsigned_compare_ge_128 (CA, CY)) { 145 pCQ->w[0] += 1; 146 __sub_128_128 (CA, CA, CY); 147 } 148 149 pCR->w[1] = CA.w[1]; 150 pCR->w[0] = CA.w[0]; 151 } 152 } 153 154 155 156 157 158 159 __BID_INLINE__ void 160 __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) { 161 UINT256 CQ2Y; 162 UINT128 CQ2, CQ3Y; 163 UINT64 Q3, carry64; 164 int_double d64; 165 BINARY80 lx, ly, lq, l64, l128; 166 167 // 2^64 168 d64.i = 0x43f0000000000000ull; 169 l64 = (BINARY80) d64.d; 170 // 2^128 171 l128 = l64 * l64; 172 173 lx = 174 ((BINARY80) (*pCA4).w[3] * l64 + 175 (BINARY80) (*pCA4).w[2]) * l128 + 176 (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0]; 177 ly = (BINARY80) CY.w[1] * l128 + (BINARY80) CY.w[0] * l64; 178 179 lq = lx / ly; 180 CQ2.w[1] = (UINT64) lq; 181 lq = (lq - CQ2.w[1]) * l64; 182 CQ2.w[0] = (UINT64) lq; 183 184 // CQ2*CY 185 __mul_128x128_to_256 (CQ2Y, CY, CQ2); 186 187 // CQ2Y <= (*pCA4) ? 188 if (CQ2Y.w[3] < (*pCA4).w[3] 189 || (CQ2Y.w[3] == (*pCA4).w[3] 190 && (CQ2Y.w[2] < (*pCA4).w[2] 191 || (CQ2Y.w[2] == (*pCA4).w[2] 192 && (CQ2Y.w[1] < (*pCA4).w[1] 193 || (CQ2Y.w[1] == (*pCA4).w[1] 194 && (CQ2Y.w[0] <= (*pCA4).w[0]))))))) { 195 196 // (*pCA4) -CQ2Y, guaranteed below 5*2^49*CY < 5*2^(49+128) 197 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ2Y.w[0]); 198 __sub_borrow_in_out ((*pCA4).w[1], carry64, (*pCA4).w[1], CQ2Y.w[1], 199 carry64); 200 (*pCA4).w[2] = (*pCA4).w[2] - CQ2Y.w[2] - carry64; 201 202 lx = ((BINARY80) (*pCA4).w[2] * l128 + 203 ((BINARY80) (*pCA4).w[1] * l64 + 204 (BINARY80) (*pCA4).w[0])) * l64; 205 lq = lx / ly; 206 Q3 = (UINT64) lq; 207 208 if (Q3) { 209 Q3--; 210 __mul_64x128_short (CQ3Y, Q3, CY); 211 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ3Y.w[0]); 212 (*pCA4).w[1] = (*pCA4).w[1] - CQ3Y.w[1] - carry64; 213 214 if ((*pCA4).w[1] > CY.w[1] 215 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) { 216 Q3++; 217 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]); 218 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64; 219 if ((*pCA4).w[1] > CY.w[1] 220 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) { 221 Q3++; 222 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], 223 CY.w[0]); 224 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64; 225 } 226 } 227 // add Q3 to Q2 228 __add_carry_out (CQ2.w[0], carry64, Q3, CQ2.w[0]); 229 CQ2.w[1] += carry64; 230 } 231 } else { 232 // CQ2Y - (*pCA4), guaranteed below 5*2^(49+128) 233 __sub_borrow_out ((*pCA4).w[0], carry64, CQ2Y.w[0], (*pCA4).w[0]); 234 __sub_borrow_in_out ((*pCA4).w[1], carry64, CQ2Y.w[1], (*pCA4).w[1], 235 carry64); 236 (*pCA4).w[2] = CQ2Y.w[2] - (*pCA4).w[2] - carry64; 237 238 lx = 239 ((BINARY80) (*pCA4).w[2] * l128 + 240 (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0]) * l64; 241 lq = lx / ly; 242 Q3 = 1 + (UINT64) lq; 243 244 __mul_64x128_short (CQ3Y, Q3, CY); 245 __sub_borrow_out ((*pCA4).w[0], carry64, CQ3Y.w[0], (*pCA4).w[0]); 246 (*pCA4).w[1] = CQ3Y.w[1] - (*pCA4).w[1] - carry64; 247 248 if ((SINT64) (*pCA4).w[1] > (SINT64) CY.w[1] 249 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) { 250 Q3--; 251 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]); 252 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64; 253 } else if ((SINT64) (*pCA4).w[1] < 0) { 254 Q3++; 255 __add_carry_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]); 256 (*pCA4).w[1] = (*pCA4).w[1] + CY.w[1] + carry64; 257 } 258 // subtract Q3 from Q2 259 __sub_borrow_out (CQ2.w[0], carry64, CQ2.w[0], Q3); 260 CQ2.w[1] -= carry64; 261 } 262 263 // (*pCQ) + CQ2 + carry 264 __add_carry_out ((*pCQ).w[0], carry64, CQ2.w[0], (*pCQ).w[0]); 265 (*pCQ).w[1] = (*pCQ).w[1] + CQ2.w[1] + carry64; 266 267 268 } 269 #else 270 271 __BID_INLINE__ void 272 __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX0, UINT128 CY) { 273 UINT128 CY36, CY51, CQ, A2, CX, CQT; 274 UINT64 Q; 275 int_double t64, d49, d60; 276 double lx, ly, lq; 277 278 if (!CX0.w[1] && !CY.w[1]) { 279 pCQ->w[0] = CX0.w[0] / CY.w[0]; 280 pCQ->w[1] = 0; 281 pCR->w[1] = pCR->w[0] = 0; 282 pCR->w[0] = CX0.w[0] - pCQ->w[0] * CY.w[0]; 283 return; 284 } 285 286 CX.w[1] = CX0.w[1]; 287 CX.w[0] = CX0.w[0]; 288 289 // 2^64 290 t64.i = 0x43f0000000000000ull; 291 lx = (double) CX.w[1] * t64.d + (double) CX.w[0]; 292 ly = (double) CY.w[1] * t64.d + (double) CY.w[0]; 293 lq = lx / ly; 294 295 CY36.w[1] = CY.w[0] >> (64 - 36); 296 CY36.w[0] = CY.w[0] << 36; 297 298 CQ.w[1] = CQ.w[0] = 0; 299 300 // Q >= 2^100 ? 301 if (!CY.w[1] && !CY36.w[1] && (CX.w[1] >= CY36.w[0])) { 302 // then Q >= 2^100 303 304 // 2^(-60)*CX/CY 305 d60.i = 0x3c30000000000000ull; 306 lq *= d60.d; 307 Q = (UINT64) lq - 4ull; 308 309 // Q*CY 310 __mul_64x64_to_128 (A2, Q, CY.w[0]); 311 312 // A2 <<= 60 313 A2.w[1] = (A2.w[1] << 60) | (A2.w[0] >> (64 - 60)); 314 A2.w[0] <<= 60; 315 316 __sub_128_128 (CX, CX, A2); 317 318 lx = (double) CX.w[1] * t64.d + (double) CX.w[0]; 319 lq = lx / ly; 320 321 CQ.w[1] = Q >> (64 - 60); 322 CQ.w[0] = Q << 60; 323 } 324 325 326 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51)); 327 CY51.w[0] = CY.w[0] << 51; 328 329 if (CY.w[1] < (UINT64) (1 << (64 - 51)) 330 && (__unsigned_compare_gt_128 (CX, CY51))) { 331 // Q > 2^51 332 333 // 2^(-49)*CX/CY 334 d49.i = 0x3ce0000000000000ull; 335 lq *= d49.d; 336 337 Q = (UINT64) lq - 1ull; 338 339 // Q*CY 340 __mul_64x64_to_128 (A2, Q, CY.w[0]); 341 A2.w[1] += Q * CY.w[1]; 342 343 // A2 <<= 49 344 A2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49)); 345 A2.w[0] <<= 49; 346 347 __sub_128_128 (CX, CX, A2); 348 349 CQT.w[1] = Q >> (64 - 49); 350 CQT.w[0] = Q << 49; 351 __add_128_128 (CQ, CQ, CQT); 352 353 lx = (double) CX.w[1] * t64.d + (double) CX.w[0]; 354 lq = lx / ly; 355 } 356 357 Q = (UINT64) lq; 358 359 __mul_64x64_to_128 (A2, Q, CY.w[0]); 360 A2.w[1] += Q * CY.w[1]; 361 362 __sub_128_128 (CX, CX, A2); 363 if ((SINT64) CX.w[1] < 0) { 364 Q--; 365 CX.w[0] += CY.w[0]; 366 if (CX.w[0] < CY.w[0]) 367 CX.w[1]++; 368 CX.w[1] += CY.w[1]; 369 if ((SINT64) CX.w[1] < 0) { 370 Q--; 371 CX.w[0] += CY.w[0]; 372 if (CX.w[0] < CY.w[0]) 373 CX.w[1]++; 374 CX.w[1] += CY.w[1]; 375 } 376 } else if (__unsigned_compare_ge_128 (CX, CY)) { 377 Q++; 378 __sub_128_128 (CX, CX, CY); 379 } 380 381 __add_128_64 (CQ, CQ, Q); 382 383 384 pCQ->w[1] = CQ.w[1]; 385 pCQ->w[0] = CQ.w[0]; 386 pCR->w[1] = CX.w[1]; 387 pCR->w[0] = CX.w[0]; 388 return; 389 } 390 391 392 __BID_INLINE__ void 393 __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) { 394 UINT256 CA4, CA2, CY51, CY36; 395 UINT128 CQ, A2, A2h, CQT; 396 UINT64 Q, carry64; 397 int_double t64, d49, d60; 398 double lx, ly, lq, d128, d192; 399 400 // the quotient is assumed to be at most 113 bits, 401 // as needed by BID128 divide routines 402 403 // initial dividend 404 CA4.w[3] = (*pCA4).w[3]; 405 CA4.w[2] = (*pCA4).w[2]; 406 CA4.w[1] = (*pCA4).w[1]; 407 CA4.w[0] = (*pCA4).w[0]; 408 CQ.w[1] = (*pCQ).w[1]; 409 CQ.w[0] = (*pCQ).w[0]; 410 411 // 2^64 412 t64.i = 0x43f0000000000000ull; 413 d128 = t64.d * t64.d; 414 d192 = d128 * t64.d; 415 lx = (double) CA4.w[3] * d192 + ((double) CA4.w[2] * d128 + 416 ((double) CA4.w[1] * t64.d + 417 (double) CA4.w[0])); 418 ly = (double) CY.w[1] * t64.d + (double) CY.w[0]; 419 lq = lx / ly; 420 421 CY36.w[2] = CY.w[1] >> (64 - 36); 422 CY36.w[1] = (CY.w[1] << 36) | (CY.w[0] >> (64 - 36)); 423 CY36.w[0] = CY.w[0] << 36; 424 425 CQ.w[1] = (*pCQ).w[1]; 426 CQ.w[0] = (*pCQ).w[0]; 427 428 // Q >= 2^100 ? 429 if (CA4.w[3] > CY36.w[2] 430 || (CA4.w[3] == CY36.w[2] 431 && (CA4.w[2] > CY36.w[1] 432 || (CA4.w[2] == CY36.w[1] && CA4.w[1] >= CY36.w[0])))) { 433 // 2^(-60)*CA4/CY 434 d60.i = 0x3c30000000000000ull; 435 lq *= d60.d; 436 Q = (UINT64) lq - 4ull; 437 438 // Q*CY 439 __mul_64x128_to_192 (CA2, Q, CY); 440 441 // CA2 <<= 60 442 // CA2.w[3] = CA2.w[2] >> (64-60); 443 CA2.w[2] = (CA2.w[2] << 60) | (CA2.w[1] >> (64 - 60)); 444 CA2.w[1] = (CA2.w[1] << 60) | (CA2.w[0] >> (64 - 60)); 445 CA2.w[0] <<= 60; 446 447 // CA4 -= CA2 448 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]); 449 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1], 450 carry64); 451 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64; 452 453 lx = ((double) CA4.w[2] * d128 + 454 ((double) CA4.w[1] * t64.d + (double) CA4.w[0])); 455 lq = lx / ly; 456 457 CQT.w[1] = Q >> (64 - 60); 458 CQT.w[0] = Q << 60; 459 __add_128_128 (CQ, CQ, CQT); 460 } 461 462 CY51.w[2] = CY.w[1] >> (64 - 51); 463 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51)); 464 CY51.w[0] = CY.w[0] << 51; 465 466 if (CA4.w[2] > CY51.w[2] || ((CA4.w[2] == CY51.w[2]) 467 && 468 (__unsigned_compare_gt_128 (CA4, CY51)))) 469 { 470 // Q > 2^51 471 472 // 2^(-49)*CA4/CY 473 d49.i = 0x3ce0000000000000ull; 474 lq *= d49.d; 475 476 Q = (UINT64) lq - 1ull; 477 478 // Q*CY 479 __mul_64x64_to_128 (A2, Q, CY.w[0]); 480 __mul_64x64_to_128 (A2h, Q, CY.w[1]); 481 A2.w[1] += A2h.w[0]; 482 if (A2.w[1] < A2h.w[0]) 483 A2h.w[1]++; 484 485 // A2 <<= 49 486 CA2.w[2] = (A2h.w[1] << 49) | (A2.w[1] >> (64 - 49)); 487 CA2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49)); 488 CA2.w[0] = A2.w[0] << 49; 489 490 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]); 491 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1], 492 carry64); 493 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64; 494 495 CQT.w[1] = Q >> (64 - 49); 496 CQT.w[0] = Q << 49; 497 __add_128_128 (CQ, CQ, CQT); 498 499 lx = ((double) CA4.w[2] * d128 + 500 ((double) CA4.w[1] * t64.d + (double) CA4.w[0])); 501 lq = lx / ly; 502 } 503 504 Q = (UINT64) lq; 505 __mul_64x64_to_128 (A2, Q, CY.w[0]); 506 A2.w[1] += Q * CY.w[1]; 507 508 __sub_128_128 (CA4, CA4, A2); 509 if ((SINT64) CA4.w[1] < 0) { 510 Q--; 511 CA4.w[0] += CY.w[0]; 512 if (CA4.w[0] < CY.w[0]) 513 CA4.w[1]++; 514 CA4.w[1] += CY.w[1]; 515 if ((SINT64) CA4.w[1] < 0) { 516 Q--; 517 CA4.w[0] += CY.w[0]; 518 if (CA4.w[0] < CY.w[0]) 519 CA4.w[1]++; 520 CA4.w[1] += CY.w[1]; 521 } 522 } else if (__unsigned_compare_ge_128 (CA4, CY)) { 523 Q++; 524 __sub_128_128 (CA4, CA4, CY); 525 } 526 527 __add_128_64 (CQ, CQ, Q); 528 529 pCQ->w[1] = CQ.w[1]; 530 pCQ->w[0] = CQ.w[0]; 531 pCA4->w[1] = CA4.w[1]; 532 pCA4->w[0] = CA4.w[0]; 533 return; 534 535 536 537 } 538 539 #endif 540 #endif 541