1df930be7Sderaadt /*-
2df930be7Sderaadt * Copyright (c) 1992, 1993
3df930be7Sderaadt * The Regents of the University of California. All rights reserved.
4df930be7Sderaadt *
5df930be7Sderaadt * This software was developed by the Computer Systems Engineering group
6df930be7Sderaadt * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7df930be7Sderaadt * contributed to Berkeley.
8df930be7Sderaadt *
9df930be7Sderaadt * Redistribution and use in source and binary forms, with or without
10df930be7Sderaadt * modification, are permitted provided that the following conditions
11df930be7Sderaadt * are met:
12df930be7Sderaadt * 1. Redistributions of source code must retain the above copyright
13df930be7Sderaadt * notice, this list of conditions and the following disclaimer.
14df930be7Sderaadt * 2. Redistributions in binary form must reproduce the above copyright
15df930be7Sderaadt * notice, this list of conditions and the following disclaimer in the
16df930be7Sderaadt * documentation and/or other materials provided with the distribution.
1729295d1cSmillert * 3. Neither the name of the University nor the names of its contributors
18df930be7Sderaadt * may be used to endorse or promote products derived from this software
19df930be7Sderaadt * without specific prior written permission.
20df930be7Sderaadt *
21df930be7Sderaadt * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22df930be7Sderaadt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23df930be7Sderaadt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24df930be7Sderaadt * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25df930be7Sderaadt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26df930be7Sderaadt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27df930be7Sderaadt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28df930be7Sderaadt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29df930be7Sderaadt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30df930be7Sderaadt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31df930be7Sderaadt * SUCH DAMAGE.
32df930be7Sderaadt */
33df930be7Sderaadt
34df930be7Sderaadt /*
35df930be7Sderaadt * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
36df930be7Sderaadt * section 4.3.1, pp. 257--259.
37df930be7Sderaadt */
38df930be7Sderaadt
39df930be7Sderaadt #include "quad.h"
40df930be7Sderaadt
41f6a3c9b1Smickey #define B ((int)1 << HALF_BITS) /* digit base */
42df930be7Sderaadt
43df930be7Sderaadt /* Combine two `digits' to make a single two-digit number. */
44f6a3c9b1Smickey #define COMBINE(a, b) (((u_int)(a) << HALF_BITS) | (b))
45df930be7Sderaadt
46df930be7Sderaadt /* select a type for digits in base B: use unsigned short if they fit */
47f6a3c9b1Smickey #if UINT_MAX == 0xffffffffU && USHRT_MAX >= 0xffff
48df930be7Sderaadt typedef unsigned short digit;
49df930be7Sderaadt #else
50f6a3c9b1Smickey typedef u_int digit;
51df930be7Sderaadt #endif
52df930be7Sderaadt
53*85e3b77dSjsg static void shl(digit *p, int len, int sh);
54df930be7Sderaadt
55df930be7Sderaadt /*
56df930be7Sderaadt * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
57df930be7Sderaadt *
58df930be7Sderaadt * We do this in base 2-sup-HALF_BITS, so that all intermediate products
59f6a3c9b1Smickey * fit within u_int. As a consequence, the maximum length dividend and
60df930be7Sderaadt * divisor are 4 `digits' in this base (they are shorter if they have
61df930be7Sderaadt * leading zeros).
62df930be7Sderaadt */
63df930be7Sderaadt u_quad_t
__qdivrem(u_quad_t uq,u_quad_t vq,u_quad_t * arq)64a26aa419Sderaadt __qdivrem(u_quad_t uq, u_quad_t vq, u_quad_t *arq)
65df930be7Sderaadt {
66df930be7Sderaadt union uu tmp;
67df930be7Sderaadt digit *u, *v, *q;
68a26aa419Sderaadt digit v1, v2;
69f6a3c9b1Smickey u_int qhat, rhat, t;
70df930be7Sderaadt int m, n, d, j, i;
71df930be7Sderaadt digit uspace[5], vspace[5], qspace[5];
72df930be7Sderaadt
73df930be7Sderaadt /*
74df930be7Sderaadt * Take care of special cases: divide by zero, and u < v.
75df930be7Sderaadt */
76df930be7Sderaadt if (vq == 0) {
77df930be7Sderaadt /* divide by zero. */
78df930be7Sderaadt static volatile const unsigned int zero = 0;
79df930be7Sderaadt
80df930be7Sderaadt tmp.ul[H] = tmp.ul[L] = 1 / zero;
81df930be7Sderaadt if (arq)
82df930be7Sderaadt *arq = uq;
83df930be7Sderaadt return (tmp.q);
84df930be7Sderaadt }
85df930be7Sderaadt if (uq < vq) {
86df930be7Sderaadt if (arq)
87df930be7Sderaadt *arq = uq;
88df930be7Sderaadt return (0);
89df930be7Sderaadt }
90df930be7Sderaadt u = &uspace[0];
91df930be7Sderaadt v = &vspace[0];
92df930be7Sderaadt q = &qspace[0];
93df930be7Sderaadt
94df930be7Sderaadt /*
95df930be7Sderaadt * Break dividend and divisor into digits in base B, then
96df930be7Sderaadt * count leading zeros to determine m and n. When done, we
97df930be7Sderaadt * will have:
98df930be7Sderaadt * u = (u[1]u[2]...u[m+n]) sub B
99df930be7Sderaadt * v = (v[1]v[2]...v[n]) sub B
100df930be7Sderaadt * v[1] != 0
101df930be7Sderaadt * 1 < n <= 4 (if n = 1, we use a different division algorithm)
102df930be7Sderaadt * m >= 0 (otherwise u < v, which we already checked)
103df930be7Sderaadt * m + n = 4
104df930be7Sderaadt * and thus
105df930be7Sderaadt * m = 4 - n <= 2
106df930be7Sderaadt */
107df930be7Sderaadt tmp.uq = uq;
108df930be7Sderaadt u[0] = 0;
109f6a3c9b1Smickey u[1] = (digit)HHALF(tmp.ul[H]);
110f6a3c9b1Smickey u[2] = (digit)LHALF(tmp.ul[H]);
111f6a3c9b1Smickey u[3] = (digit)HHALF(tmp.ul[L]);
112f6a3c9b1Smickey u[4] = (digit)LHALF(tmp.ul[L]);
113df930be7Sderaadt tmp.uq = vq;
114f6a3c9b1Smickey v[1] = (digit)HHALF(tmp.ul[H]);
115f6a3c9b1Smickey v[2] = (digit)LHALF(tmp.ul[H]);
116f6a3c9b1Smickey v[3] = (digit)HHALF(tmp.ul[L]);
117f6a3c9b1Smickey v[4] = (digit)LHALF(tmp.ul[L]);
118df930be7Sderaadt for (n = 4; v[1] == 0; v++) {
119df930be7Sderaadt if (--n == 1) {
120f6a3c9b1Smickey u_int rbj; /* r*B+u[j] (not root boy jim) */
121df930be7Sderaadt digit q1, q2, q3, q4;
122df930be7Sderaadt
123df930be7Sderaadt /*
124df930be7Sderaadt * Change of plan, per exercise 16.
125df930be7Sderaadt * r = 0;
126df930be7Sderaadt * for j = 1..4:
127df930be7Sderaadt * q[j] = floor((r*B + u[j]) / v),
128df930be7Sderaadt * r = (r*B + u[j]) % v;
129df930be7Sderaadt * We unroll this completely here.
130df930be7Sderaadt */
131df930be7Sderaadt t = v[2]; /* nonzero, by definition */
132f6a3c9b1Smickey q1 = (digit)(u[1] / t);
133df930be7Sderaadt rbj = COMBINE(u[1] % t, u[2]);
134f6a3c9b1Smickey q2 = (digit)(rbj / t);
135df930be7Sderaadt rbj = COMBINE(rbj % t, u[3]);
136f6a3c9b1Smickey q3 = (digit)(rbj / t);
137df930be7Sderaadt rbj = COMBINE(rbj % t, u[4]);
138f6a3c9b1Smickey q4 = (digit)(rbj / t);
139df930be7Sderaadt if (arq)
140df930be7Sderaadt *arq = rbj % t;
141df930be7Sderaadt tmp.ul[H] = COMBINE(q1, q2);
142df930be7Sderaadt tmp.ul[L] = COMBINE(q3, q4);
143df930be7Sderaadt return (tmp.q);
144df930be7Sderaadt }
145df930be7Sderaadt }
146df930be7Sderaadt
147df930be7Sderaadt /*
148df930be7Sderaadt * By adjusting q once we determine m, we can guarantee that
149df930be7Sderaadt * there is a complete four-digit quotient at &qspace[1] when
150df930be7Sderaadt * we finally stop.
151df930be7Sderaadt */
152df930be7Sderaadt for (m = 4 - n; u[1] == 0; u++)
153df930be7Sderaadt m--;
154df930be7Sderaadt for (i = 4 - m; --i >= 0;)
155df930be7Sderaadt q[i] = 0;
156df930be7Sderaadt q += 4 - m;
157df930be7Sderaadt
158df930be7Sderaadt /*
159df930be7Sderaadt * Here we run Program D, translated from MIX to C and acquiring
160df930be7Sderaadt * a few minor changes.
161df930be7Sderaadt *
162df930be7Sderaadt * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
163df930be7Sderaadt */
164df930be7Sderaadt d = 0;
165df930be7Sderaadt for (t = v[1]; t < B / 2; t <<= 1)
166df930be7Sderaadt d++;
167df930be7Sderaadt if (d > 0) {
168df930be7Sderaadt shl(&u[0], m + n, d); /* u <<= d */
169df930be7Sderaadt shl(&v[1], n - 1, d); /* v <<= d */
170df930be7Sderaadt }
171df930be7Sderaadt /*
172df930be7Sderaadt * D2: j = 0.
173df930be7Sderaadt */
174df930be7Sderaadt j = 0;
175df930be7Sderaadt v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
176df930be7Sderaadt v2 = v[2]; /* for D3 */
177df930be7Sderaadt do {
178a26aa419Sderaadt digit uj0, uj1, uj2;
179df930be7Sderaadt
180df930be7Sderaadt /*
181df930be7Sderaadt * D3: Calculate qhat (\^q, in TeX notation).
182df930be7Sderaadt * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
183df930be7Sderaadt * let rhat = (u[j]*B + u[j+1]) mod v[1].
184df930be7Sderaadt * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
185df930be7Sderaadt * decrement qhat and increase rhat correspondingly.
186df930be7Sderaadt * Note that if rhat >= B, v[2]*qhat < rhat*B.
187df930be7Sderaadt */
188df930be7Sderaadt uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
189df930be7Sderaadt uj1 = u[j + 1]; /* for D3 only */
190df930be7Sderaadt uj2 = u[j + 2]; /* for D3 only */
191df930be7Sderaadt if (uj0 == v1) {
192df930be7Sderaadt qhat = B;
193df930be7Sderaadt rhat = uj1;
194df930be7Sderaadt goto qhat_too_big;
195df930be7Sderaadt } else {
196f6a3c9b1Smickey u_int nn = COMBINE(uj0, uj1);
197f6a3c9b1Smickey qhat = nn / v1;
198f6a3c9b1Smickey rhat = nn % v1;
199df930be7Sderaadt }
200df930be7Sderaadt while (v2 * qhat > COMBINE(rhat, uj2)) {
201df930be7Sderaadt qhat_too_big:
202df930be7Sderaadt qhat--;
203df930be7Sderaadt if ((rhat += v1) >= B)
204df930be7Sderaadt break;
205df930be7Sderaadt }
206df930be7Sderaadt /*
207df930be7Sderaadt * D4: Multiply and subtract.
208df930be7Sderaadt * The variable `t' holds any borrows across the loop.
209df930be7Sderaadt * We split this up so that we do not require v[0] = 0,
210df930be7Sderaadt * and to eliminate a final special case.
211df930be7Sderaadt */
212df930be7Sderaadt for (t = 0, i = n; i > 0; i--) {
213df930be7Sderaadt t = u[i + j] - v[i] * qhat - t;
214f6a3c9b1Smickey u[i + j] = (digit)LHALF(t);
215df930be7Sderaadt t = (B - HHALF(t)) & (B - 1);
216df930be7Sderaadt }
217df930be7Sderaadt t = u[j] - t;
218f6a3c9b1Smickey u[j] = (digit)LHALF(t);
219df930be7Sderaadt /*
220df930be7Sderaadt * D5: test remainder.
221df930be7Sderaadt * There is a borrow if and only if HHALF(t) is nonzero;
222df930be7Sderaadt * in that (rare) case, qhat was too large (by exactly 1).
223df930be7Sderaadt * Fix it by adding v[1..n] to u[j..j+n].
224df930be7Sderaadt */
225df930be7Sderaadt if (HHALF(t)) {
226df930be7Sderaadt qhat--;
227df930be7Sderaadt for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
228df930be7Sderaadt t += u[i + j] + v[i];
229f6a3c9b1Smickey u[i + j] = (digit)LHALF(t);
230df930be7Sderaadt t = HHALF(t);
231df930be7Sderaadt }
232f6a3c9b1Smickey u[j] = (digit)LHALF(u[j] + t);
233df930be7Sderaadt }
234f6a3c9b1Smickey q[j] = (digit)qhat;
235df930be7Sderaadt } while (++j <= m); /* D7: loop on j. */
236df930be7Sderaadt
237df930be7Sderaadt /*
238df930be7Sderaadt * If caller wants the remainder, we have to calculate it as
239df930be7Sderaadt * u[m..m+n] >> d (this is at most n digits and thus fits in
240df930be7Sderaadt * u[m+1..m+n], but we may need more source digits).
241df930be7Sderaadt */
242df930be7Sderaadt if (arq) {
243df930be7Sderaadt if (d) {
244df930be7Sderaadt for (i = m + n; i > m; --i)
245f6a3c9b1Smickey u[i] = (digit)(((u_int)u[i] >> d) |
246f6a3c9b1Smickey LHALF((u_int)u[i - 1] << (HALF_BITS - d)));
247df930be7Sderaadt u[i] = 0;
248df930be7Sderaadt }
249df930be7Sderaadt tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
250df930be7Sderaadt tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
251df930be7Sderaadt *arq = tmp.q;
252df930be7Sderaadt }
253df930be7Sderaadt
254df930be7Sderaadt tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
255df930be7Sderaadt tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
256df930be7Sderaadt return (tmp.q);
257df930be7Sderaadt }
258df930be7Sderaadt
259df930be7Sderaadt /*
260df930be7Sderaadt * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
261df930be7Sderaadt * `fall out' the left (there never will be any such anyway).
262df930be7Sderaadt * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
263df930be7Sderaadt */
264df930be7Sderaadt static void
shl(digit * p,int len,int sh)265a26aa419Sderaadt shl(digit *p, int len, int sh)
266df930be7Sderaadt {
267a26aa419Sderaadt int i;
268df930be7Sderaadt
269df930be7Sderaadt for (i = 0; i < len; i++)
270f6a3c9b1Smickey p[i] = (digit)(LHALF((u_int)p[i] << sh) |
271f6a3c9b1Smickey ((u_int)p[i + 1] >> (HALF_BITS - sh)));
272f6a3c9b1Smickey p[i] = (digit)(LHALF((u_int)p[i] << sh));
273df930be7Sderaadt }
274