lib/builtins/udivmodti4.c

3cab2bb3Spatrick//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
3cab2bb3Spatrick//
3cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information.
3cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3cab2bb3Spatrick//
3cab2bb3Spatrick//===----------------------------------------------------------------------===//
3cab2bb3Spatrick//
3cab2bb3Spatrick// This file implements __udivmodti4 for the compiler_rt library.
3cab2bb3Spatrick//
3cab2bb3Spatrick//===----------------------------------------------------------------------===//
3cab2bb3Spatrick
3cab2bb3Spatrick#include "int_lib.h"
3cab2bb3Spatrick
3cab2bb3Spatrick#ifdef CRT_HAS_128BIT
3cab2bb3Spatrick
*1f9cb04fSpatrick// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
*1f9cb04fSpatrick// Remainder stored in r.
*1f9cb04fSpatrick// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
*1f9cb04fSpatrick// fallback. For a correctness proof see the reference for this algorithm
*1f9cb04fSpatrick// in Knuth, Volume 2, section 4.3.1, Algorithm D.
*1f9cb04fSpatrickUNUSED
*1f9cb04fSpatrickstatic inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
*1f9cb04fSpatrick                                            du_int *r) {
*1f9cb04fSpatrick  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
*1f9cb04fSpatrick  const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
*1f9cb04fSpatrick  du_int un1, un0;                                // Norm. dividend LSD's
*1f9cb04fSpatrick  du_int vn1, vn0;                                // Norm. divisor digits
*1f9cb04fSpatrick  du_int q1, q0;                                  // Quotient digits
*1f9cb04fSpatrick  du_int un64, un21, un10;                        // Dividend digit pairs
*1f9cb04fSpatrick  du_int rhat;                                    // A remainder
*1f9cb04fSpatrick  si_int s;                                       // Shift amount for normalization
*1f9cb04fSpatrick
*1f9cb04fSpatrick  s = __builtin_clzll(v);
*1f9cb04fSpatrick  if (s > 0) {
*1f9cb04fSpatrick    // Normalize the divisor.
*1f9cb04fSpatrick    v = v << s;
*1f9cb04fSpatrick    un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
*1f9cb04fSpatrick    un10 = u0 << s; // Shift dividend left
*1f9cb04fSpatrick  } else {
*1f9cb04fSpatrick    // Avoid undefined behavior of (u0 >> 64).
*1f9cb04fSpatrick    un64 = u1;
*1f9cb04fSpatrick    un10 = u0;
*1f9cb04fSpatrick  }
*1f9cb04fSpatrick
*1f9cb04fSpatrick  // Break divisor up into two 32-bit digits.
*1f9cb04fSpatrick  vn1 = v >> (n_udword_bits / 2);
*1f9cb04fSpatrick  vn0 = v & 0xFFFFFFFF;
*1f9cb04fSpatrick
*1f9cb04fSpatrick  // Break right half of dividend into two digits.
*1f9cb04fSpatrick  un1 = un10 >> (n_udword_bits / 2);
*1f9cb04fSpatrick  un0 = un10 & 0xFFFFFFFF;
*1f9cb04fSpatrick
*1f9cb04fSpatrick  // Compute the first quotient digit, q1.
*1f9cb04fSpatrick  q1 = un64 / vn1;
*1f9cb04fSpatrick  rhat = un64 - q1 * vn1;
*1f9cb04fSpatrick
*1f9cb04fSpatrick  // q1 has at most error 2. No more than 2 iterations.
*1f9cb04fSpatrick  while (q1 >= b || q1 * vn0 > b * rhat + un1) {
*1f9cb04fSpatrick    q1 = q1 - 1;
*1f9cb04fSpatrick    rhat = rhat + vn1;
*1f9cb04fSpatrick    if (rhat >= b)
*1f9cb04fSpatrick      break;
*1f9cb04fSpatrick  }
*1f9cb04fSpatrick
*1f9cb04fSpatrick  un21 = un64 * b + un1 - q1 * v;
*1f9cb04fSpatrick
*1f9cb04fSpatrick  // Compute the second quotient digit.
*1f9cb04fSpatrick  q0 = un21 / vn1;
*1f9cb04fSpatrick  rhat = un21 - q0 * vn1;
*1f9cb04fSpatrick
*1f9cb04fSpatrick  // q0 has at most error 2. No more than 2 iterations.
*1f9cb04fSpatrick  while (q0 >= b || q0 * vn0 > b * rhat + un0) {
*1f9cb04fSpatrick    q0 = q0 - 1;
*1f9cb04fSpatrick    rhat = rhat + vn1;
*1f9cb04fSpatrick    if (rhat >= b)
*1f9cb04fSpatrick      break;
*1f9cb04fSpatrick  }
*1f9cb04fSpatrick
*1f9cb04fSpatrick  *r = (un21 * b + un0 - q0 * v) >> s;
*1f9cb04fSpatrick  return q1 * b + q0;
*1f9cb04fSpatrick}
*1f9cb04fSpatrick
*1f9cb04fSpatrickstatic inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
*1f9cb04fSpatrick                                     du_int *r) {
*1f9cb04fSpatrick#if defined(__x86_64__)
*1f9cb04fSpatrick  du_int result;
*1f9cb04fSpatrick  __asm__("divq %[v]"
*1f9cb04fSpatrick          : "=a"(result), "=d"(*r)
*1f9cb04fSpatrick          : [ v ] "r"(v), "a"(u0), "d"(u1));
*1f9cb04fSpatrick  return result;
*1f9cb04fSpatrick#else
*1f9cb04fSpatrick  return udiv128by64to64default(u1, u0, v, r);
*1f9cb04fSpatrick#endif
*1f9cb04fSpatrick}
*1f9cb04fSpatrick
3cab2bb3Spatrick// Effects: if rem != 0, *rem = a % b
3cab2bb3Spatrick// Returns: a / b
3cab2bb3Spatrick
3cab2bb3SpatrickCOMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
3cab2bb3Spatrick  const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
*1f9cb04fSpatrick  utwords dividend;
*1f9cb04fSpatrick  dividend.all = a;
*1f9cb04fSpatrick  utwords divisor;
*1f9cb04fSpatrick  divisor.all = b;
*1f9cb04fSpatrick  utwords quotient;
*1f9cb04fSpatrick  utwords remainder;
*1f9cb04fSpatrick  if (divisor.all > dividend.all) {
3cab2bb3Spatrick    if (rem)
*1f9cb04fSpatrick      *rem = dividend.all;
3cab2bb3Spatrick    return 0;
3cab2bb3Spatrick  }
*1f9cb04fSpatrick  // When the divisor fits in 64 bits, we can use an optimized path.
*1f9cb04fSpatrick  if (divisor.s.high == 0) {
*1f9cb04fSpatrick    remainder.s.high = 0;
*1f9cb04fSpatrick    if (dividend.s.high < divisor.s.low) {
*1f9cb04fSpatrick      // The result fits in 64 bits.
*1f9cb04fSpatrick      quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
*1f9cb04fSpatrick                                       divisor.s.low, &remainder.s.low);
*1f9cb04fSpatrick      quotient.s.high = 0;
3cab2bb3Spatrick    } else {
*1f9cb04fSpatrick      // First, divide with the high part to get the remainder in dividend.s.high.
*1f9cb04fSpatrick      // After that dividend.s.high < divisor.s.low.
*1f9cb04fSpatrick      quotient.s.high = dividend.s.high / divisor.s.low;
*1f9cb04fSpatrick      dividend.s.high = dividend.s.high % divisor.s.low;
*1f9cb04fSpatrick      quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
*1f9cb04fSpatrick                                       divisor.s.low, &remainder.s.low);
*1f9cb04fSpatrick    }
3cab2bb3Spatrick    if (rem)
*1f9cb04fSpatrick      *rem = remainder.all;
*1f9cb04fSpatrick    return quotient.all;
3cab2bb3Spatrick  }
*1f9cb04fSpatrick  // 0 <= shift <= 63.
*1f9cb04fSpatrick  si_int shift =
*1f9cb04fSpatrick      __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
*1f9cb04fSpatrick  divisor.all <<= shift;
*1f9cb04fSpatrick  quotient.s.high = 0;
*1f9cb04fSpatrick  quotient.s.low = 0;
*1f9cb04fSpatrick  for (; shift >= 0; --shift) {
*1f9cb04fSpatrick    quotient.s.low <<= 1;
*1f9cb04fSpatrick    // Branch free version of.
*1f9cb04fSpatrick    // if (dividend.all >= divisor.all)
3cab2bb3Spatrick    // {
*1f9cb04fSpatrick    //    dividend.all -= divisor.all;
3cab2bb3Spatrick    //    carry = 1;
3cab2bb3Spatrick    // }
*1f9cb04fSpatrick    const ti_int s =
*1f9cb04fSpatrick        (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
*1f9cb04fSpatrick    quotient.s.low |= s & 1;
*1f9cb04fSpatrick    dividend.all -= divisor.all & s;
*1f9cb04fSpatrick    divisor.all >>= 1;
3cab2bb3Spatrick  }
3cab2bb3Spatrick  if (rem)
*1f9cb04fSpatrick    *rem = dividend.all;
*1f9cb04fSpatrick  return quotient.all;
3cab2bb3Spatrick}
3cab2bb3Spatrick
3cab2bb3Spatrick#endif // CRT_HAS_128BIT