1 /*- 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This software was developed by the Computer Systems Engineering group 6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7 * contributed to Berkeley. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #if defined(LIBC_SCCS) && !defined(lint) 39 static char rcsid[] = "$OpenBSD: qdivrem.c,v 1.2 1996/08/19 08:30:42 tholo Exp $"; 40 #endif /* LIBC_SCCS and not lint */ 41 42 /* 43 * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), 44 * section 4.3.1, pp. 257--259. 45 */ 46 47 #include "quad.h" 48 49 #define B ((long)1 << HALF_BITS) /* digit base */ 50 51 /* Combine two `digits' to make a single two-digit number. */ 52 #define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b)) 53 54 /* select a type for digits in base B: use unsigned short if they fit */ 55 #if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff 56 typedef unsigned short digit; 57 #else 58 typedef u_long digit; 59 #endif 60 61 static void shl __P((digit *p, int len, int sh)); 62 63 /* 64 * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. 65 * 66 * We do this in base 2-sup-HALF_BITS, so that all intermediate products 67 * fit within u_long. As a consequence, the maximum length dividend and 68 * divisor are 4 `digits' in this base (they are shorter if they have 69 * leading zeros). 70 */ 71 u_quad_t 72 __qdivrem(uq, vq, arq) 73 u_quad_t uq, vq, *arq; 74 { 75 union uu tmp; 76 digit *u, *v, *q; 77 register digit v1, v2; 78 u_long qhat, rhat, t; 79 int m, n, d, j, i; 80 digit uspace[5], vspace[5], qspace[5]; 81 82 /* 83 * Take care of special cases: divide by zero, and u < v. 84 */ 85 if (vq == 0) { 86 /* divide by zero. */ 87 static volatile const unsigned int zero = 0; 88 89 tmp.ul[H] = tmp.ul[L] = 1 / zero; 90 if (arq) 91 *arq = uq; 92 return (tmp.q); 93 } 94 if (uq < vq) { 95 if (arq) 96 *arq = uq; 97 return (0); 98 } 99 u = &uspace[0]; 100 v = &vspace[0]; 101 q = &qspace[0]; 102 103 /* 104 * Break dividend and divisor into digits in base B, then 105 * count leading zeros to determine m and n. When done, we 106 * will have: 107 * u = (u[1]u[2]...u[m+n]) sub B 108 * v = (v[1]v[2]...v[n]) sub B 109 * v[1] != 0 110 * 1 < n <= 4 (if n = 1, we use a different division algorithm) 111 * m >= 0 (otherwise u < v, which we already checked) 112 * m + n = 4 113 * and thus 114 * m = 4 - n <= 2 115 */ 116 tmp.uq = uq; 117 u[0] = 0; 118 u[1] = HHALF(tmp.ul[H]); 119 u[2] = LHALF(tmp.ul[H]); 120 u[3] = HHALF(tmp.ul[L]); 121 u[4] = LHALF(tmp.ul[L]); 122 tmp.uq = vq; 123 v[1] = HHALF(tmp.ul[H]); 124 v[2] = LHALF(tmp.ul[H]); 125 v[3] = HHALF(tmp.ul[L]); 126 v[4] = LHALF(tmp.ul[L]); 127 for (n = 4; v[1] == 0; v++) { 128 if (--n == 1) { 129 u_long rbj; /* r*B+u[j] (not root boy jim) */ 130 digit q1, q2, q3, q4; 131 132 /* 133 * Change of plan, per exercise 16. 134 * r = 0; 135 * for j = 1..4: 136 * q[j] = floor((r*B + u[j]) / v), 137 * r = (r*B + u[j]) % v; 138 * We unroll this completely here. 139 */ 140 t = v[2]; /* nonzero, by definition */ 141 q1 = u[1] / t; 142 rbj = COMBINE(u[1] % t, u[2]); 143 q2 = rbj / t; 144 rbj = COMBINE(rbj % t, u[3]); 145 q3 = rbj / t; 146 rbj = COMBINE(rbj % t, u[4]); 147 q4 = rbj / t; 148 if (arq) 149 *arq = rbj % t; 150 tmp.ul[H] = COMBINE(q1, q2); 151 tmp.ul[L] = COMBINE(q3, q4); 152 return (tmp.q); 153 } 154 } 155 156 /* 157 * By adjusting q once we determine m, we can guarantee that 158 * there is a complete four-digit quotient at &qspace[1] when 159 * we finally stop. 160 */ 161 for (m = 4 - n; u[1] == 0; u++) 162 m--; 163 for (i = 4 - m; --i >= 0;) 164 q[i] = 0; 165 q += 4 - m; 166 167 /* 168 * Here we run Program D, translated from MIX to C and acquiring 169 * a few minor changes. 170 * 171 * D1: choose multiplier 1 << d to ensure v[1] >= B/2. 172 */ 173 d = 0; 174 for (t = v[1]; t < B / 2; t <<= 1) 175 d++; 176 if (d > 0) { 177 shl(&u[0], m + n, d); /* u <<= d */ 178 shl(&v[1], n - 1, d); /* v <<= d */ 179 } 180 /* 181 * D2: j = 0. 182 */ 183 j = 0; 184 v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ 185 v2 = v[2]; /* for D3 */ 186 do { 187 register digit uj0, uj1, uj2; 188 189 /* 190 * D3: Calculate qhat (\^q, in TeX notation). 191 * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and 192 * let rhat = (u[j]*B + u[j+1]) mod v[1]. 193 * While rhat < B and v[2]*qhat > rhat*B+u[j+2], 194 * decrement qhat and increase rhat correspondingly. 195 * Note that if rhat >= B, v[2]*qhat < rhat*B. 196 */ 197 uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ 198 uj1 = u[j + 1]; /* for D3 only */ 199 uj2 = u[j + 2]; /* for D3 only */ 200 if (uj0 == v1) { 201 qhat = B; 202 rhat = uj1; 203 goto qhat_too_big; 204 } else { 205 u_long n = COMBINE(uj0, uj1); 206 qhat = n / v1; 207 rhat = n % v1; 208 } 209 while (v2 * qhat > COMBINE(rhat, uj2)) { 210 qhat_too_big: 211 qhat--; 212 if ((rhat += v1) >= B) 213 break; 214 } 215 /* 216 * D4: Multiply and subtract. 217 * The variable `t' holds any borrows across the loop. 218 * We split this up so that we do not require v[0] = 0, 219 * and to eliminate a final special case. 220 */ 221 for (t = 0, i = n; i > 0; i--) { 222 t = u[i + j] - v[i] * qhat - t; 223 u[i + j] = LHALF(t); 224 t = (B - HHALF(t)) & (B - 1); 225 } 226 t = u[j] - t; 227 u[j] = LHALF(t); 228 /* 229 * D5: test remainder. 230 * There is a borrow if and only if HHALF(t) is nonzero; 231 * in that (rare) case, qhat was too large (by exactly 1). 232 * Fix it by adding v[1..n] to u[j..j+n]. 233 */ 234 if (HHALF(t)) { 235 qhat--; 236 for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ 237 t += u[i + j] + v[i]; 238 u[i + j] = LHALF(t); 239 t = HHALF(t); 240 } 241 u[j] = LHALF(u[j] + t); 242 } 243 q[j] = qhat; 244 } while (++j <= m); /* D7: loop on j. */ 245 246 /* 247 * If caller wants the remainder, we have to calculate it as 248 * u[m..m+n] >> d (this is at most n digits and thus fits in 249 * u[m+1..m+n], but we may need more source digits). 250 */ 251 if (arq) { 252 if (d) { 253 for (i = m + n; i > m; --i) 254 u[i] = (u[i] >> d) | 255 LHALF(u[i - 1] << (HALF_BITS - d)); 256 u[i] = 0; 257 } 258 tmp.ul[H] = COMBINE(uspace[1], uspace[2]); 259 tmp.ul[L] = COMBINE(uspace[3], uspace[4]); 260 *arq = tmp.q; 261 } 262 263 tmp.ul[H] = COMBINE(qspace[1], qspace[2]); 264 tmp.ul[L] = COMBINE(qspace[3], qspace[4]); 265 return (tmp.q); 266 } 267 268 /* 269 * Shift p[0]..p[len] left `sh' bits, ignoring any bits that 270 * `fall out' the left (there never will be any such anyway). 271 * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. 272 */ 273 static void 274 shl(register digit *p, register int len, register int sh) 275 { 276 register int i; 277 278 for (i = 0; i < len; i++) 279 p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); 280 p[i] = LHALF(p[i] << sh); 281 } 282