1 /* mpn_div_q -- division for arbitrary size operands.
2
3 Contributed to the GNU project by Torbjorn Granlund.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8
9 Copyright 2009, 2010, 2015, 2018 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37 #include "gmp-impl.h"
38 #include "longlong.h"
39
40
41 /* Compute Q = N/D with truncation.
42 N = {np,nn}
43 D = {dp,dn}
44 Q = {qp,nn-dn+1}
45 T = {scratch,nn+1} is scratch space
46 N and D are both untouched by the computation.
47 N and T may overlap; pass the same space if N is irrelevant after the call,
48 but note that tp needs an extra limb.
49
50 Operand requirements:
51 N >= D > 0
52 dp[dn-1] != 0
53 No overlap between the N, D, and Q areas.
54
55 This division function does not clobber its input operands, since it is
56 intended to support average-O(qn) division, and for that to be effective, it
57 cannot put requirements on callers to copy a O(nn) operand.
58
59 If a caller does not care about the value of {np,nn+1} after calling this
60 function, it should pass np also for the scratch argument. This function
61 will then save some time and space by avoiding allocation and copying.
62 (FIXME: Is this a good design? We only really save any copying for
63 already-normalised divisors, which should be rare. It also prevents us from
64 reasonably asking for all scratch space we need.)
65
66 We write nn-dn+1 limbs for the quotient, but return void. Why not return
67 the most significant quotient limb? Look at the 4 main code blocks below
68 (consisting of an outer if-else where each arm contains an if-else). It is
69 tricky for the first code block, since the mpn_*_div_q calls will typically
70 generate all nn-dn+1 and return 0 or 1. I don't see how to fix that unless
71 we generate the most significant quotient limb here, before calling
72 mpn_*_div_q, or put the quotient in a temporary area. Since this is a
73 critical division case (the SB sub-case in particular) copying is not a good
74 idea.
75
76 It might make sense to split the if-else parts of the (qn + FUDGE
77 >= dn) blocks into separate functions, since we could promise quite
78 different things to callers in these two cases. The 'then' case
79 benefits from np=scratch, and it could perhaps even tolerate qp=np,
80 saving some headache for many callers.
81
82 FIXME: Scratch allocation leaves a lot to be desired. E.g., for the MU size
83 operands, we do not reuse the huge scratch for adjustments. This can be a
84 serious waste of memory for the largest operands.
85 */
86
87 /* FUDGE determines when to try getting an approximate quotient from the upper
88 parts of the dividend and divisor, then adjust. N.B. FUDGE must be >= 2
89 for the code to be correct. */
90 #define FUDGE 5 /* FIXME: tune this */
91
92 #define DC_DIV_Q_THRESHOLD DC_DIVAPPR_Q_THRESHOLD
93 #define MU_DIV_Q_THRESHOLD MU_DIVAPPR_Q_THRESHOLD
94 #define MUPI_DIV_Q_THRESHOLD MUPI_DIVAPPR_Q_THRESHOLD
95 #ifndef MUPI_DIVAPPR_Q_THRESHOLD
96 #define MUPI_DIVAPPR_Q_THRESHOLD MUPI_DIV_QR_THRESHOLD
97 #endif
98
99 void
mpn_div_q(mp_ptr qp,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_ptr scratch)100 mpn_div_q (mp_ptr qp,
101 mp_srcptr np, mp_size_t nn,
102 mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
103 {
104 mp_ptr new_dp, new_np, tp, rp;
105 mp_limb_t cy, dh, qh;
106 mp_size_t new_nn, qn;
107 gmp_pi1_t dinv;
108 int cnt;
109 TMP_DECL;
110 TMP_MARK;
111
112 ASSERT (nn >= dn);
113 ASSERT (dn > 0);
114 ASSERT (dp[dn - 1] != 0);
115 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
116 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
117 ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
118
119 ASSERT_ALWAYS (FUDGE >= 2);
120
121 dh = dp[dn - 1];
122 if (dn == 1)
123 {
124 mpn_divrem_1 (qp, 0L, np, nn, dh);
125 return;
126 }
127
128 qn = nn - dn + 1; /* Quotient size, high limb might be zero */
129
130 if (qn + FUDGE >= dn)
131 {
132 /* |________________________|
133 |_______| */
134 new_np = scratch;
135
136 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
137 {
138 count_leading_zeros (cnt, dh);
139
140 cy = mpn_lshift (new_np, np, nn, cnt);
141 new_np[nn] = cy;
142 new_nn = nn + (cy != 0);
143
144 new_dp = TMP_ALLOC_LIMBS (dn);
145 mpn_lshift (new_dp, dp, dn, cnt);
146
147 if (dn == 2)
148 {
149 qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
150 }
151 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
152 BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
153 {
154 invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
155 qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
156 }
157 else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */
158 BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
159 (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
160 + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
161 {
162 invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
163 qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
164 }
165 else
166 {
167 mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
168 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
169 qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
170 }
171 if (cy == 0)
172 qp[qn - 1] = qh;
173 else
174 ASSERT (qh == 0);
175 }
176 else /* divisor is already normalised */
177 {
178 if (new_np != np)
179 MPN_COPY (new_np, np, nn);
180
181 if (dn == 2)
182 {
183 qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
184 }
185 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
186 BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
187 {
188 invert_pi1 (dinv, dh, dp[dn - 2]);
189 qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
190 }
191 else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */
192 BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
193 (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
194 + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
195 {
196 invert_pi1 (dinv, dh, dp[dn - 2]);
197 qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
198 }
199 else
200 {
201 mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
202 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
203 qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
204 }
205 qp[nn - dn] = qh;
206 }
207 }
208 else
209 {
210 /* |________________________|
211 |_________________| */
212 tp = TMP_ALLOC_LIMBS (qn + 1);
213
214 new_np = scratch;
215 new_nn = 2 * qn + 1;
216 if (new_np == np)
217 /* We need {np,nn} to remain untouched until the final adjustment, so
218 we need to allocate separate space for new_np. */
219 new_np = TMP_ALLOC_LIMBS (new_nn + 1);
220
221
222 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
223 {
224 count_leading_zeros (cnt, dh);
225
226 cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
227 new_np[new_nn] = cy;
228
229 new_nn += (cy != 0);
230
231 new_dp = TMP_ALLOC_LIMBS (qn + 1);
232 mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
233 new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
234
235 if (qn + 1 == 2)
236 {
237 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
238 }
239 else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
240 {
241 invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
242 qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
243 }
244 else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
245 {
246 invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
247 qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
248 }
249 else
250 {
251 mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
252 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
253 qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
254 }
255 if (cy == 0)
256 tp[qn] = qh;
257 else if (UNLIKELY (qh != 0))
258 {
259 /* This happens only when the quotient is close to B^n and
260 mpn_*_divappr_q returned B^n. */
261 mp_size_t i, n;
262 n = new_nn - (qn + 1);
263 for (i = 0; i < n; i++)
264 tp[i] = GMP_NUMB_MAX;
265 qh = 0; /* currently ignored */
266 }
267 }
268 else /* divisor is already normalised */
269 {
270 MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
271
272 new_dp = (mp_ptr) dp + dn - (qn + 1);
273
274 if (qn == 2 - 1)
275 {
276 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
277 }
278 else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
279 {
280 invert_pi1 (dinv, dh, new_dp[qn - 1]);
281 qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
282 }
283 else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
284 {
285 invert_pi1 (dinv, dh, new_dp[qn - 1]);
286 qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
287 }
288 else
289 {
290 mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
291 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
292 qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
293 }
294 tp[qn] = qh;
295 }
296
297 MPN_COPY (qp, tp + 1, qn);
298 if (tp[0] <= 4)
299 {
300 mp_size_t rn;
301
302 rp = TMP_ALLOC_LIMBS (dn + qn);
303 mpn_mul (rp, dp, dn, tp + 1, qn);
304 rn = dn + qn;
305 rn -= rp[rn - 1] == 0;
306
307 if (rn > nn || mpn_cmp (np, rp, nn) < 0)
308 MPN_DECR_U (qp, qn, 1);
309 }
310 }
311
312 TMP_FREE;
313 }
314