1*181254a7Smrg /* mpn_divrem -- Divide natural numbers, producing both remainder and
2*181254a7Smrg quotient.
3*181254a7Smrg
4*181254a7Smrg Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
5*181254a7Smrg
6*181254a7Smrg This file is part of the GNU MP Library.
7*181254a7Smrg
8*181254a7Smrg The GNU MP Library is free software; you can redistribute it and/or modify
9*181254a7Smrg it under the terms of the GNU Lesser General Public License as published by
10*181254a7Smrg the Free Software Foundation; either version 2.1 of the License, or (at your
11*181254a7Smrg option) any later version.
12*181254a7Smrg
13*181254a7Smrg The GNU MP Library is distributed in the hope that it will be useful, but
14*181254a7Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15*181254a7Smrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16*181254a7Smrg License for more details.
17*181254a7Smrg
18*181254a7Smrg You should have received a copy of the GNU Lesser General Public License
19*181254a7Smrg along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20*181254a7Smrg the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21*181254a7Smrg MA 02111-1307, USA. */
22*181254a7Smrg
23*181254a7Smrg #include <config.h>
24*181254a7Smrg #include "gmp-impl.h"
25*181254a7Smrg
26*181254a7Smrg /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
27*181254a7Smrg the NSIZE-DSIZE least significant quotient limbs at QP
28*181254a7Smrg and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
29*181254a7Smrg non-zero, generate that many fraction bits and append them after the
30*181254a7Smrg other quotient limbs.
31*181254a7Smrg Return the most significant limb of the quotient, this is always 0 or 1.
32*181254a7Smrg
33*181254a7Smrg Preconditions:
34*181254a7Smrg 0. NSIZE >= DSIZE.
35*181254a7Smrg 1. The most significant bit of the divisor must be set.
36*181254a7Smrg 2. QP must either not overlap with the input operands at all, or
37*181254a7Smrg QP + DSIZE >= NP must hold true. (This means that it's
38*181254a7Smrg possible to put the quotient in the high part of NUM, right after the
39*181254a7Smrg remainder in NUM.
40*181254a7Smrg 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. */
41*181254a7Smrg
42*181254a7Smrg mp_limb_t
43*181254a7Smrg #if __STDC__
mpn_divrem(mp_ptr qp,mp_size_t qextra_limbs,mp_ptr np,mp_size_t nsize,mp_srcptr dp,mp_size_t dsize)44*181254a7Smrg mpn_divrem (mp_ptr qp, mp_size_t qextra_limbs,
45*181254a7Smrg mp_ptr np, mp_size_t nsize,
46*181254a7Smrg mp_srcptr dp, mp_size_t dsize)
47*181254a7Smrg #else
48*181254a7Smrg mpn_divrem (qp, qextra_limbs, np, nsize, dp, dsize)
49*181254a7Smrg mp_ptr qp;
50*181254a7Smrg mp_size_t qextra_limbs;
51*181254a7Smrg mp_ptr np;
52*181254a7Smrg mp_size_t nsize;
53*181254a7Smrg mp_srcptr dp;
54*181254a7Smrg mp_size_t dsize;
55*181254a7Smrg #endif
56*181254a7Smrg {
57*181254a7Smrg mp_limb_t most_significant_q_limb = 0;
58*181254a7Smrg
59*181254a7Smrg switch (dsize)
60*181254a7Smrg {
61*181254a7Smrg case 0:
62*181254a7Smrg /* We are asked to divide by zero, so go ahead and do it! (To make
63*181254a7Smrg the compiler not remove this statement, return the value.) */
64*181254a7Smrg return 1 / dsize;
65*181254a7Smrg
66*181254a7Smrg case 1:
67*181254a7Smrg {
68*181254a7Smrg mp_size_t i;
69*181254a7Smrg mp_limb_t n1;
70*181254a7Smrg mp_limb_t d;
71*181254a7Smrg
72*181254a7Smrg d = dp[0];
73*181254a7Smrg n1 = np[nsize - 1];
74*181254a7Smrg
75*181254a7Smrg if (n1 >= d)
76*181254a7Smrg {
77*181254a7Smrg n1 -= d;
78*181254a7Smrg most_significant_q_limb = 1;
79*181254a7Smrg }
80*181254a7Smrg
81*181254a7Smrg qp += qextra_limbs;
82*181254a7Smrg for (i = nsize - 2; i >= 0; i--)
83*181254a7Smrg udiv_qrnnd (qp[i], n1, n1, np[i], d);
84*181254a7Smrg qp -= qextra_limbs;
85*181254a7Smrg
86*181254a7Smrg for (i = qextra_limbs - 1; i >= 0; i--)
87*181254a7Smrg udiv_qrnnd (qp[i], n1, n1, 0, d);
88*181254a7Smrg
89*181254a7Smrg np[0] = n1;
90*181254a7Smrg }
91*181254a7Smrg break;
92*181254a7Smrg
93*181254a7Smrg case 2:
94*181254a7Smrg {
95*181254a7Smrg mp_size_t i;
96*181254a7Smrg mp_limb_t n1, n0, n2;
97*181254a7Smrg mp_limb_t d1, d0;
98*181254a7Smrg
99*181254a7Smrg np += nsize - 2;
100*181254a7Smrg d1 = dp[1];
101*181254a7Smrg d0 = dp[0];
102*181254a7Smrg n1 = np[1];
103*181254a7Smrg n0 = np[0];
104*181254a7Smrg
105*181254a7Smrg if (n1 >= d1 && (n1 > d1 || n0 >= d0))
106*181254a7Smrg {
107*181254a7Smrg sub_ddmmss (n1, n0, n1, n0, d1, d0);
108*181254a7Smrg most_significant_q_limb = 1;
109*181254a7Smrg }
110*181254a7Smrg
111*181254a7Smrg for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--)
112*181254a7Smrg {
113*181254a7Smrg mp_limb_t q;
114*181254a7Smrg mp_limb_t r;
115*181254a7Smrg
116*181254a7Smrg if (i >= qextra_limbs)
117*181254a7Smrg np--;
118*181254a7Smrg else
119*181254a7Smrg np[0] = 0;
120*181254a7Smrg
121*181254a7Smrg if (n1 == d1)
122*181254a7Smrg {
123*181254a7Smrg /* Q should be either 111..111 or 111..110. Need special
124*181254a7Smrg treatment of this rare case as normal division would
125*181254a7Smrg give overflow. */
126*181254a7Smrg q = ~(mp_limb_t) 0;
127*181254a7Smrg
128*181254a7Smrg r = n0 + d1;
129*181254a7Smrg if (r < d1) /* Carry in the addition? */
130*181254a7Smrg {
131*181254a7Smrg add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
132*181254a7Smrg qp[i] = q;
133*181254a7Smrg continue;
134*181254a7Smrg }
135*181254a7Smrg n1 = d0 - (d0 != 0);
136*181254a7Smrg n0 = -d0;
137*181254a7Smrg }
138*181254a7Smrg else
139*181254a7Smrg {
140*181254a7Smrg udiv_qrnnd (q, r, n1, n0, d1);
141*181254a7Smrg umul_ppmm (n1, n0, d0, q);
142*181254a7Smrg }
143*181254a7Smrg
144*181254a7Smrg n2 = np[0];
145*181254a7Smrg q_test:
146*181254a7Smrg if (n1 > r || (n1 == r && n0 > n2))
147*181254a7Smrg {
148*181254a7Smrg /* The estimated Q was too large. */
149*181254a7Smrg q--;
150*181254a7Smrg
151*181254a7Smrg sub_ddmmss (n1, n0, n1, n0, 0, d0);
152*181254a7Smrg r += d1;
153*181254a7Smrg if (r >= d1) /* If not carry, test Q again. */
154*181254a7Smrg goto q_test;
155*181254a7Smrg }
156*181254a7Smrg
157*181254a7Smrg qp[i] = q;
158*181254a7Smrg sub_ddmmss (n1, n0, r, n2, n1, n0);
159*181254a7Smrg }
160*181254a7Smrg np[1] = n1;
161*181254a7Smrg np[0] = n0;
162*181254a7Smrg }
163*181254a7Smrg break;
164*181254a7Smrg
165*181254a7Smrg default:
166*181254a7Smrg {
167*181254a7Smrg mp_size_t i;
168*181254a7Smrg mp_limb_t dX, d1, n0;
169*181254a7Smrg
170*181254a7Smrg np += nsize - dsize;
171*181254a7Smrg dX = dp[dsize - 1];
172*181254a7Smrg d1 = dp[dsize - 2];
173*181254a7Smrg n0 = np[dsize - 1];
174*181254a7Smrg
175*181254a7Smrg if (n0 >= dX)
176*181254a7Smrg {
177*181254a7Smrg if (n0 > dX || mpn_cmp (np, dp, dsize - 1) >= 0)
178*181254a7Smrg {
179*181254a7Smrg mpn_sub_n (np, np, dp, dsize);
180*181254a7Smrg n0 = np[dsize - 1];
181*181254a7Smrg most_significant_q_limb = 1;
182*181254a7Smrg }
183*181254a7Smrg }
184*181254a7Smrg
185*181254a7Smrg for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--)
186*181254a7Smrg {
187*181254a7Smrg mp_limb_t q;
188*181254a7Smrg mp_limb_t n1, n2;
189*181254a7Smrg mp_limb_t cy_limb;
190*181254a7Smrg
191*181254a7Smrg if (i >= qextra_limbs)
192*181254a7Smrg {
193*181254a7Smrg np--;
194*181254a7Smrg n2 = np[dsize];
195*181254a7Smrg }
196*181254a7Smrg else
197*181254a7Smrg {
198*181254a7Smrg n2 = np[dsize - 1];
199*181254a7Smrg MPN_COPY_DECR (np + 1, np, dsize);
200*181254a7Smrg np[0] = 0;
201*181254a7Smrg }
202*181254a7Smrg
203*181254a7Smrg if (n0 == dX)
204*181254a7Smrg /* This might over-estimate q, but it's probably not worth
205*181254a7Smrg the extra code here to find out. */
206*181254a7Smrg q = ~(mp_limb_t) 0;
207*181254a7Smrg else
208*181254a7Smrg {
209*181254a7Smrg mp_limb_t r;
210*181254a7Smrg
211*181254a7Smrg udiv_qrnnd (q, r, n0, np[dsize - 1], dX);
212*181254a7Smrg umul_ppmm (n1, n0, d1, q);
213*181254a7Smrg
214*181254a7Smrg while (n1 > r || (n1 == r && n0 > np[dsize - 2]))
215*181254a7Smrg {
216*181254a7Smrg q--;
217*181254a7Smrg r += dX;
218*181254a7Smrg if (r < dX) /* I.e. "carry in previous addition?" */
219*181254a7Smrg break;
220*181254a7Smrg n1 -= n0 < d1;
221*181254a7Smrg n0 -= d1;
222*181254a7Smrg }
223*181254a7Smrg }
224*181254a7Smrg
225*181254a7Smrg /* Possible optimization: We already have (q * n0) and (1 * n1)
226*181254a7Smrg after the calculation of q. Taking advantage of that, we
227*181254a7Smrg could make this loop make two iterations less. */
228*181254a7Smrg
229*181254a7Smrg cy_limb = mpn_submul_1 (np, dp, dsize, q);
230*181254a7Smrg
231*181254a7Smrg if (n2 != cy_limb)
232*181254a7Smrg {
233*181254a7Smrg mpn_add_n (np, np, dp, dsize);
234*181254a7Smrg q--;
235*181254a7Smrg }
236*181254a7Smrg
237*181254a7Smrg qp[i] = q;
238*181254a7Smrg n0 = np[dsize - 1];
239*181254a7Smrg }
240*181254a7Smrg }
241*181254a7Smrg }
242*181254a7Smrg
243*181254a7Smrg return most_significant_q_limb;
244*181254a7Smrg }
245