1 /* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
2 quotient. The quotient returned is either correct, or one too large.
3
4 Contributed to the GNU project by Torbjorn Granlund.
5
6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10 Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
11
12 This file is part of the GNU MP Library.
13
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
20
21 or
22
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
25 later version.
26
27 or both in parallel, as here.
28
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32 for more details.
33
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
37
38 #include "gmp-impl.h"
39 #include "longlong.h"
40
41
42 static mp_limb_t
mpn_dcpi1_divappr_q_n(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n,gmp_pi1_t * dinv,mp_ptr tp)43 mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
44 gmp_pi1_t *dinv, mp_ptr tp)
45 {
46 mp_size_t lo, hi;
47 mp_limb_t cy, qh, ql;
48
49 lo = n >> 1; /* floor(n/2) */
50 hi = n - lo; /* ceil(n/2) */
51
52 if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
53 qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
54 else
55 qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
56
57 mpn_mul (tp, qp + lo, hi, dp, lo);
58
59 cy = mpn_sub_n (np + lo, np + lo, tp, n);
60 if (qh != 0)
61 cy += mpn_sub_n (np + n, np + n, dp, lo);
62
63 while (cy != 0)
64 {
65 qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
66 cy -= mpn_add_n (np + lo, np + lo, dp, n);
67 }
68
69 if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
70 ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
71 else
72 ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
73
74 if (UNLIKELY (ql != 0))
75 {
76 mp_size_t i;
77 for (i = 0; i < lo; i++)
78 qp[i] = GMP_NUMB_MASK;
79 }
80
81 return qh;
82 }
83
84 mp_limb_t
mpn_dcpi1_divappr_q(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,gmp_pi1_t * dinv)85 mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
86 mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
87 {
88 mp_size_t qn;
89 mp_limb_t qh, cy, qsave;
90 mp_ptr tp;
91 TMP_DECL;
92
93 TMP_MARK;
94
95 ASSERT (dn >= 6);
96 ASSERT (nn > dn);
97 ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
98
99 qn = nn - dn;
100 qp += qn;
101 np += nn;
102 dp += dn;
103
104 if (qn >= dn)
105 {
106 qn++; /* pretend we'll need an extra limb */
107 /* Reduce qn mod dn without division, optimizing small operations. */
108 do
109 qn -= dn;
110 while (qn > dn);
111
112 qp -= qn; /* point at low limb of next quotient block */
113 np -= qn; /* point in the middle of partial remainder */
114
115 tp = TMP_SALLOC_LIMBS (dn);
116
117 /* Perform the typically smaller block first. */
118 if (qn == 1)
119 {
120 mp_limb_t q, n2, n1, n0, d1, d0;
121
122 /* Handle qh up front, for simplicity. */
123 qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
124 if (qh)
125 ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
126
127 /* A single iteration of schoolbook: One 3/2 division,
128 followed by the bignum update and adjustment. */
129 n2 = np[0];
130 n1 = np[-1];
131 n0 = np[-2];
132 d1 = dp[-1];
133 d0 = dp[-2];
134
135 ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
136
137 if (UNLIKELY (n2 == d1) && n1 == d0)
138 {
139 q = GMP_NUMB_MASK;
140 cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
141 ASSERT (cy == n2);
142 }
143 else
144 {
145 udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
146
147 if (dn > 2)
148 {
149 mp_limb_t cy, cy1;
150 cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
151
152 cy1 = n0 < cy;
153 n0 = (n0 - cy) & GMP_NUMB_MASK;
154 cy = n1 < cy1;
155 n1 = (n1 - cy1) & GMP_NUMB_MASK;
156 np[-2] = n0;
157
158 if (UNLIKELY (cy != 0))
159 {
160 n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
161 qh -= (q == 0);
162 q = (q - 1) & GMP_NUMB_MASK;
163 }
164 }
165 else
166 np[-2] = n0;
167
168 np[-1] = n1;
169 }
170 qp[0] = q;
171 }
172 else
173 {
174 if (qn == 2)
175 qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
176 else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
177 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
178 else
179 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
180
181 if (qn != dn)
182 {
183 if (qn > dn - qn)
184 mpn_mul (tp, qp, qn, dp - dn, dn - qn);
185 else
186 mpn_mul (tp, dp - dn, dn - qn, qp, qn);
187
188 cy = mpn_sub_n (np - dn, np - dn, tp, dn);
189 if (qh != 0)
190 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
191
192 while (cy != 0)
193 {
194 qh -= mpn_sub_1 (qp, qp, qn, 1);
195 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
196 }
197 }
198 }
199 qn = nn - dn - qn + 1;
200 while (qn > dn)
201 {
202 qp -= dn;
203 np -= dn;
204 mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
205 qn -= dn;
206 }
207
208 /* Since we pretended we'd need an extra quotient limb before, we now
209 have made sure the code above left just dn-1=qn quotient limbs to
210 develop. Develop that plus a guard limb. */
211 qn--;
212 qp -= qn;
213 np -= dn;
214 qsave = qp[qn];
215 mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
216 MPN_COPY_INCR (qp, qp + 1, qn);
217 qp[qn] = qsave;
218 }
219 else /* (qn < dn) */
220 {
221 mp_ptr q2p;
222 #if 0 /* not possible since we demand nn > dn */
223 if (qn == 0)
224 {
225 qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
226 if (qh)
227 mpn_sub_n (np - dn, np - dn, dp - dn, dn);
228 TMP_FREE;
229 return qh;
230 }
231 #endif
232
233 qp -= qn; /* point at low limb of next quotient block */
234 np -= qn; /* point in the middle of partial remainder */
235
236 q2p = TMP_SALLOC_LIMBS (qn + 1);
237 /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
238 callers not to be silly? */
239 if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
240 {
241 qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
242 dp - (qn + 1), qn + 1, dinv->inv32);
243 }
244 else
245 {
246 /* It is tempting to use qp for recursive scratch and put quotient in
247 tp, but the recursive scratch needs one limb too many. */
248 tp = TMP_SALLOC_LIMBS (qn + 1);
249 qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
250 }
251 MPN_COPY (qp, q2p + 1, qn);
252 }
253
254 TMP_FREE;
255 return qh;
256 }
257