1 /* $OpenBSD: qdivrem.c,v 1.8 2014/06/07 15:28:21 deraadt Exp $ */
2 /*-
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This software was developed by the Computer Systems Engineering group
7 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
8 * contributed to Berkeley.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 /*
36 * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
37 * section 4.3.1, pp. 257--259.
38 */
39
40 #include "quad.h"
41
42 #define B ((int)1 << HALF_BITS) /* digit base */
43
44 /* Combine two `digits' to make a single two-digit number. */
45 #define COMBINE(a, b) (((u_int)(a) << HALF_BITS) | (b))
46
47 /* select a type for digits in base B: use unsigned short if they fit */
48 #if UINT_MAX == 0xffffffffU && USHRT_MAX >= 0xffff
49 typedef unsigned short digit;
50 #else
51 typedef u_int digit;
52 #endif
53
54 static void shl(digit *p, int len, int sh);
55
56 /*
57 * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
58 *
59 * We do this in base 2-sup-HALF_BITS, so that all intermediate products
60 * fit within u_int. As a consequence, the maximum length dividend and
61 * divisor are 4 `digits' in this base (they are shorter if they have
62 * leading zeros).
63 */
64 u_quad_t
__qdivrem(u_quad_t uq,u_quad_t vq,u_quad_t * arq)65 __qdivrem(u_quad_t uq, u_quad_t vq, u_quad_t *arq)
66 {
67 union uu tmp;
68 digit *u, *v, *q;
69 digit v1, v2;
70 u_int qhat, rhat, t;
71 int m, n, d, j, i;
72 digit uspace[5], vspace[5], qspace[5];
73
74 /*
75 * Take care of special cases: divide by zero, and u < v.
76 */
77 if (vq == 0) {
78 /* divide by zero. */
79 static volatile const unsigned int zero = 0;
80
81 tmp.ul[H] = tmp.ul[L] = 1 / zero;
82 if (arq)
83 *arq = uq;
84 return (tmp.q);
85 }
86 if (uq < vq) {
87 if (arq)
88 *arq = uq;
89 return (0);
90 }
91 u = &uspace[0];
92 v = &vspace[0];
93 q = &qspace[0];
94
95 /*
96 * Break dividend and divisor into digits in base B, then
97 * count leading zeros to determine m and n. When done, we
98 * will have:
99 * u = (u[1]u[2]...u[m+n]) sub B
100 * v = (v[1]v[2]...v[n]) sub B
101 * v[1] != 0
102 * 1 < n <= 4 (if n = 1, we use a different division algorithm)
103 * m >= 0 (otherwise u < v, which we already checked)
104 * m + n = 4
105 * and thus
106 * m = 4 - n <= 2
107 */
108 tmp.uq = uq;
109 u[0] = 0;
110 u[1] = (digit)HHALF(tmp.ul[H]);
111 u[2] = (digit)LHALF(tmp.ul[H]);
112 u[3] = (digit)HHALF(tmp.ul[L]);
113 u[4] = (digit)LHALF(tmp.ul[L]);
114 tmp.uq = vq;
115 v[1] = (digit)HHALF(tmp.ul[H]);
116 v[2] = (digit)LHALF(tmp.ul[H]);
117 v[3] = (digit)HHALF(tmp.ul[L]);
118 v[4] = (digit)LHALF(tmp.ul[L]);
119 for (n = 4; v[1] == 0; v++) {
120 if (--n == 1) {
121 u_int rbj; /* r*B+u[j] (not root boy jim) */
122 digit q1, q2, q3, q4;
123
124 /*
125 * Change of plan, per exercise 16.
126 * r = 0;
127 * for j = 1..4:
128 * q[j] = floor((r*B + u[j]) / v),
129 * r = (r*B + u[j]) % v;
130 * We unroll this completely here.
131 */
132 t = v[2]; /* nonzero, by definition */
133 q1 = (digit)(u[1] / t);
134 rbj = COMBINE(u[1] % t, u[2]);
135 q2 = (digit)(rbj / t);
136 rbj = COMBINE(rbj % t, u[3]);
137 q3 = (digit)(rbj / t);
138 rbj = COMBINE(rbj % t, u[4]);
139 q4 = (digit)(rbj / t);
140 if (arq)
141 *arq = rbj % t;
142 tmp.ul[H] = COMBINE(q1, q2);
143 tmp.ul[L] = COMBINE(q3, q4);
144 return (tmp.q);
145 }
146 }
147
148 /*
149 * By adjusting q once we determine m, we can guarantee that
150 * there is a complete four-digit quotient at &qspace[1] when
151 * we finally stop.
152 */
153 for (m = 4 - n; u[1] == 0; u++)
154 m--;
155 for (i = 4 - m; --i >= 0;)
156 q[i] = 0;
157 q += 4 - m;
158
159 /*
160 * Here we run Program D, translated from MIX to C and acquiring
161 * a few minor changes.
162 *
163 * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
164 */
165 d = 0;
166 for (t = v[1]; t < B / 2; t <<= 1)
167 d++;
168 if (d > 0) {
169 shl(&u[0], m + n, d); /* u <<= d */
170 shl(&v[1], n - 1, d); /* v <<= d */
171 }
172 /*
173 * D2: j = 0.
174 */
175 j = 0;
176 v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
177 v2 = v[2]; /* for D3 */
178 do {
179 digit uj0, uj1, uj2;
180
181 /*
182 * D3: Calculate qhat (\^q, in TeX notation).
183 * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
184 * let rhat = (u[j]*B + u[j+1]) mod v[1].
185 * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
186 * decrement qhat and increase rhat correspondingly.
187 * Note that if rhat >= B, v[2]*qhat < rhat*B.
188 */
189 uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
190 uj1 = u[j + 1]; /* for D3 only */
191 uj2 = u[j + 2]; /* for D3 only */
192 if (uj0 == v1) {
193 qhat = B;
194 rhat = uj1;
195 goto qhat_too_big;
196 } else {
197 u_int nn = COMBINE(uj0, uj1);
198 qhat = nn / v1;
199 rhat = nn % v1;
200 }
201 while (v2 * qhat > COMBINE(rhat, uj2)) {
202 qhat_too_big:
203 qhat--;
204 if ((rhat += v1) >= B)
205 break;
206 }
207 /*
208 * D4: Multiply and subtract.
209 * The variable `t' holds any borrows across the loop.
210 * We split this up so that we do not require v[0] = 0,
211 * and to eliminate a final special case.
212 */
213 for (t = 0, i = n; i > 0; i--) {
214 t = u[i + j] - v[i] * qhat - t;
215 u[i + j] = (digit)LHALF(t);
216 t = (B - HHALF(t)) & (B - 1);
217 }
218 t = u[j] - t;
219 u[j] = (digit)LHALF(t);
220 /*
221 * D5: test remainder.
222 * There is a borrow if and only if HHALF(t) is nonzero;
223 * in that (rare) case, qhat was too large (by exactly 1).
224 * Fix it by adding v[1..n] to u[j..j+n].
225 */
226 if (HHALF(t)) {
227 qhat--;
228 for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
229 t += u[i + j] + v[i];
230 u[i + j] = (digit)LHALF(t);
231 t = HHALF(t);
232 }
233 u[j] = (digit)LHALF(u[j] + t);
234 }
235 q[j] = (digit)qhat;
236 } while (++j <= m); /* D7: loop on j. */
237
238 /*
239 * If caller wants the remainder, we have to calculate it as
240 * u[m..m+n] >> d (this is at most n digits and thus fits in
241 * u[m+1..m+n], but we may need more source digits).
242 */
243 if (arq) {
244 if (d) {
245 for (i = m + n; i > m; --i)
246 u[i] = (digit)(((u_int)u[i] >> d) |
247 LHALF((u_int)u[i - 1] << (HALF_BITS - d)));
248 u[i] = 0;
249 }
250 tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
251 tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
252 *arq = tmp.q;
253 }
254
255 tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
256 tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
257 return (tmp.q);
258 }
259
260 /*
261 * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
262 * `fall out' the left (there never will be any such anyway).
263 * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
264 */
265 static void
shl(digit * p,int len,int sh)266 shl(digit *p, int len, int sh)
267 {
268 int i;
269
270 for (i = 0; i < len; i++)
271 p[i] = (digit)(LHALF((u_int)p[i] << sh) |
272 ((u_int)p[i + 1] >> (HALF_BITS - sh)));
273 p[i] = (digit)(LHALF((u_int)p[i] << sh));
274 }
275