xref: /openbsd-src/gnu/llvm/compiler-rt/lib/builtins/udivmodti4.c (revision 1f9cb04fc6f537ca6cf5a53c28927340cba218a2)
13cab2bb3Spatrick //===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
23cab2bb3Spatrick //
33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information.
53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63cab2bb3Spatrick //
73cab2bb3Spatrick //===----------------------------------------------------------------------===//
83cab2bb3Spatrick //
93cab2bb3Spatrick // This file implements __udivmodti4 for the compiler_rt library.
103cab2bb3Spatrick //
113cab2bb3Spatrick //===----------------------------------------------------------------------===//
123cab2bb3Spatrick 
133cab2bb3Spatrick #include "int_lib.h"
143cab2bb3Spatrick 
153cab2bb3Spatrick #ifdef CRT_HAS_128BIT
163cab2bb3Spatrick 
17*1f9cb04fSpatrick // Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
18*1f9cb04fSpatrick // Remainder stored in r.
19*1f9cb04fSpatrick // Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
20*1f9cb04fSpatrick // fallback. For a correctness proof see the reference for this algorithm
21*1f9cb04fSpatrick // in Knuth, Volume 2, section 4.3.1, Algorithm D.
22*1f9cb04fSpatrick UNUSED
udiv128by64to64default(du_int u1,du_int u0,du_int v,du_int * r)23*1f9cb04fSpatrick static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
24*1f9cb04fSpatrick                                             du_int *r) {
25*1f9cb04fSpatrick   const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
26*1f9cb04fSpatrick   const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
27*1f9cb04fSpatrick   du_int un1, un0;                                // Norm. dividend LSD's
28*1f9cb04fSpatrick   du_int vn1, vn0;                                // Norm. divisor digits
29*1f9cb04fSpatrick   du_int q1, q0;                                  // Quotient digits
30*1f9cb04fSpatrick   du_int un64, un21, un10;                        // Dividend digit pairs
31*1f9cb04fSpatrick   du_int rhat;                                    // A remainder
32*1f9cb04fSpatrick   si_int s;                                       // Shift amount for normalization
33*1f9cb04fSpatrick 
34*1f9cb04fSpatrick   s = __builtin_clzll(v);
35*1f9cb04fSpatrick   if (s > 0) {
36*1f9cb04fSpatrick     // Normalize the divisor.
37*1f9cb04fSpatrick     v = v << s;
38*1f9cb04fSpatrick     un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
39*1f9cb04fSpatrick     un10 = u0 << s; // Shift dividend left
40*1f9cb04fSpatrick   } else {
41*1f9cb04fSpatrick     // Avoid undefined behavior of (u0 >> 64).
42*1f9cb04fSpatrick     un64 = u1;
43*1f9cb04fSpatrick     un10 = u0;
44*1f9cb04fSpatrick   }
45*1f9cb04fSpatrick 
46*1f9cb04fSpatrick   // Break divisor up into two 32-bit digits.
47*1f9cb04fSpatrick   vn1 = v >> (n_udword_bits / 2);
48*1f9cb04fSpatrick   vn0 = v & 0xFFFFFFFF;
49*1f9cb04fSpatrick 
50*1f9cb04fSpatrick   // Break right half of dividend into two digits.
51*1f9cb04fSpatrick   un1 = un10 >> (n_udword_bits / 2);
52*1f9cb04fSpatrick   un0 = un10 & 0xFFFFFFFF;
53*1f9cb04fSpatrick 
54*1f9cb04fSpatrick   // Compute the first quotient digit, q1.
55*1f9cb04fSpatrick   q1 = un64 / vn1;
56*1f9cb04fSpatrick   rhat = un64 - q1 * vn1;
57*1f9cb04fSpatrick 
58*1f9cb04fSpatrick   // q1 has at most error 2. No more than 2 iterations.
59*1f9cb04fSpatrick   while (q1 >= b || q1 * vn0 > b * rhat + un1) {
60*1f9cb04fSpatrick     q1 = q1 - 1;
61*1f9cb04fSpatrick     rhat = rhat + vn1;
62*1f9cb04fSpatrick     if (rhat >= b)
63*1f9cb04fSpatrick       break;
64*1f9cb04fSpatrick   }
65*1f9cb04fSpatrick 
66*1f9cb04fSpatrick   un21 = un64 * b + un1 - q1 * v;
67*1f9cb04fSpatrick 
68*1f9cb04fSpatrick   // Compute the second quotient digit.
69*1f9cb04fSpatrick   q0 = un21 / vn1;
70*1f9cb04fSpatrick   rhat = un21 - q0 * vn1;
71*1f9cb04fSpatrick 
72*1f9cb04fSpatrick   // q0 has at most error 2. No more than 2 iterations.
73*1f9cb04fSpatrick   while (q0 >= b || q0 * vn0 > b * rhat + un0) {
74*1f9cb04fSpatrick     q0 = q0 - 1;
75*1f9cb04fSpatrick     rhat = rhat + vn1;
76*1f9cb04fSpatrick     if (rhat >= b)
77*1f9cb04fSpatrick       break;
78*1f9cb04fSpatrick   }
79*1f9cb04fSpatrick 
80*1f9cb04fSpatrick   *r = (un21 * b + un0 - q0 * v) >> s;
81*1f9cb04fSpatrick   return q1 * b + q0;
82*1f9cb04fSpatrick }
83*1f9cb04fSpatrick 
udiv128by64to64(du_int u1,du_int u0,du_int v,du_int * r)84*1f9cb04fSpatrick static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
85*1f9cb04fSpatrick                                      du_int *r) {
86*1f9cb04fSpatrick #if defined(__x86_64__)
87*1f9cb04fSpatrick   du_int result;
88*1f9cb04fSpatrick   __asm__("divq %[v]"
89*1f9cb04fSpatrick           : "=a"(result), "=d"(*r)
90*1f9cb04fSpatrick           : [ v ] "r"(v), "a"(u0), "d"(u1));
91*1f9cb04fSpatrick   return result;
92*1f9cb04fSpatrick #else
93*1f9cb04fSpatrick   return udiv128by64to64default(u1, u0, v, r);
94*1f9cb04fSpatrick #endif
95*1f9cb04fSpatrick }
96*1f9cb04fSpatrick 
973cab2bb3Spatrick // Effects: if rem != 0, *rem = a % b
983cab2bb3Spatrick // Returns: a / b
993cab2bb3Spatrick 
__udivmodti4(tu_int a,tu_int b,tu_int * rem)1003cab2bb3Spatrick COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
1013cab2bb3Spatrick   const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
102*1f9cb04fSpatrick   utwords dividend;
103*1f9cb04fSpatrick   dividend.all = a;
104*1f9cb04fSpatrick   utwords divisor;
105*1f9cb04fSpatrick   divisor.all = b;
106*1f9cb04fSpatrick   utwords quotient;
107*1f9cb04fSpatrick   utwords remainder;
108*1f9cb04fSpatrick   if (divisor.all > dividend.all) {
1093cab2bb3Spatrick     if (rem)
110*1f9cb04fSpatrick       *rem = dividend.all;
1113cab2bb3Spatrick     return 0;
1123cab2bb3Spatrick   }
113*1f9cb04fSpatrick   // When the divisor fits in 64 bits, we can use an optimized path.
114*1f9cb04fSpatrick   if (divisor.s.high == 0) {
115*1f9cb04fSpatrick     remainder.s.high = 0;
116*1f9cb04fSpatrick     if (dividend.s.high < divisor.s.low) {
117*1f9cb04fSpatrick       // The result fits in 64 bits.
118*1f9cb04fSpatrick       quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
119*1f9cb04fSpatrick                                        divisor.s.low, &remainder.s.low);
120*1f9cb04fSpatrick       quotient.s.high = 0;
1213cab2bb3Spatrick     } else {
122*1f9cb04fSpatrick       // First, divide with the high part to get the remainder in dividend.s.high.
123*1f9cb04fSpatrick       // After that dividend.s.high < divisor.s.low.
124*1f9cb04fSpatrick       quotient.s.high = dividend.s.high / divisor.s.low;
125*1f9cb04fSpatrick       dividend.s.high = dividend.s.high % divisor.s.low;
126*1f9cb04fSpatrick       quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
127*1f9cb04fSpatrick                                        divisor.s.low, &remainder.s.low);
128*1f9cb04fSpatrick     }
1293cab2bb3Spatrick     if (rem)
130*1f9cb04fSpatrick       *rem = remainder.all;
131*1f9cb04fSpatrick     return quotient.all;
1323cab2bb3Spatrick   }
133*1f9cb04fSpatrick   // 0 <= shift <= 63.
134*1f9cb04fSpatrick   si_int shift =
135*1f9cb04fSpatrick       __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
136*1f9cb04fSpatrick   divisor.all <<= shift;
137*1f9cb04fSpatrick   quotient.s.high = 0;
138*1f9cb04fSpatrick   quotient.s.low = 0;
139*1f9cb04fSpatrick   for (; shift >= 0; --shift) {
140*1f9cb04fSpatrick     quotient.s.low <<= 1;
141*1f9cb04fSpatrick     // Branch free version of.
142*1f9cb04fSpatrick     // if (dividend.all >= divisor.all)
1433cab2bb3Spatrick     // {
144*1f9cb04fSpatrick     //    dividend.all -= divisor.all;
1453cab2bb3Spatrick     //    carry = 1;
1463cab2bb3Spatrick     // }
147*1f9cb04fSpatrick     const ti_int s =
148*1f9cb04fSpatrick         (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
149*1f9cb04fSpatrick     quotient.s.low |= s & 1;
150*1f9cb04fSpatrick     dividend.all -= divisor.all & s;
151*1f9cb04fSpatrick     divisor.all >>= 1;
1523cab2bb3Spatrick   }
1533cab2bb3Spatrick   if (rem)
154*1f9cb04fSpatrick     *rem = dividend.all;
155*1f9cb04fSpatrick   return quotient.all;
1563cab2bb3Spatrick }
1573cab2bb3Spatrick 
1583cab2bb3Spatrick #endif // CRT_HAS_128BIT
159