1 /* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
2 division algorithm.
3
4 Contributed to the GNU project by Torbjorn Granlund.
5
6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10 Copyright 2007, 2009 Free Software Foundation, Inc.
11
12 This file is part of the GNU MP Library.
13
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
20
21 or
22
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
25 later version.
26
27 or both in parallel, as here.
28
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32 for more details.
33
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
37
38
39 #include "gmp-impl.h"
40 #include "longlong.h"
41
42 mp_limb_t
mpn_sbpi1_div_q(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_limb_t dinv)43 mpn_sbpi1_div_q (mp_ptr qp,
44 mp_ptr np, mp_size_t nn,
45 mp_srcptr dp, mp_size_t dn,
46 mp_limb_t dinv)
47 {
48 mp_limb_t qh;
49 mp_size_t qn, i;
50 mp_limb_t n1, n0;
51 mp_limb_t d1, d0;
52 mp_limb_t cy, cy1;
53 mp_limb_t q;
54 mp_limb_t flag;
55
56 mp_size_t dn_orig = dn;
57 mp_srcptr dp_orig = dp;
58 mp_ptr np_orig = np;
59
60 ASSERT (dn > 2);
61 ASSERT (nn >= dn);
62 ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
63
64 np += nn;
65
66 qn = nn - dn;
67 if (qn + 1 < dn)
68 {
69 dp += dn - (qn + 1);
70 dn = qn + 1;
71 }
72
73 qh = mpn_cmp (np - dn, dp, dn) >= 0;
74 if (qh != 0)
75 mpn_sub_n (np - dn, np - dn, dp, dn);
76
77 qp += qn;
78
79 dn -= 2; /* offset dn by 2 for main division loops,
80 saving two iterations in mpn_submul_1. */
81 d1 = dp[dn + 1];
82 d0 = dp[dn + 0];
83
84 np -= 2;
85
86 n1 = np[1];
87
88 for (i = qn - (dn + 2); i >= 0; i--)
89 {
90 np--;
91 if (UNLIKELY (n1 == d1) && np[1] == d0)
92 {
93 q = GMP_NUMB_MASK;
94 mpn_submul_1 (np - dn, dp, dn + 2, q);
95 n1 = np[1]; /* update n1, last loop's value will now be invalid */
96 }
97 else
98 {
99 udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
100
101 cy = mpn_submul_1 (np - dn, dp, dn, q);
102
103 cy1 = n0 < cy;
104 n0 = (n0 - cy) & GMP_NUMB_MASK;
105 cy = n1 < cy1;
106 n1 -= cy1;
107 np[0] = n0;
108
109 if (UNLIKELY (cy != 0))
110 {
111 n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
112 q--;
113 }
114 }
115
116 *--qp = q;
117 }
118
119 flag = ~CNST_LIMB(0);
120
121 if (dn >= 0)
122 {
123 for (i = dn; i > 0; i--)
124 {
125 np--;
126 if (UNLIKELY (n1 >= (d1 & flag)))
127 {
128 q = GMP_NUMB_MASK;
129 cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
130
131 if (UNLIKELY (n1 != cy))
132 {
133 if (n1 < (cy & flag))
134 {
135 q--;
136 mpn_add_n (np - dn, np - dn, dp, dn + 2);
137 }
138 else
139 flag = 0;
140 }
141 n1 = np[1];
142 }
143 else
144 {
145 udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
146
147 cy = mpn_submul_1 (np - dn, dp, dn, q);
148
149 cy1 = n0 < cy;
150 n0 = (n0 - cy) & GMP_NUMB_MASK;
151 cy = n1 < cy1;
152 n1 -= cy1;
153 np[0] = n0;
154
155 if (UNLIKELY (cy != 0))
156 {
157 n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
158 q--;
159 }
160 }
161
162 *--qp = q;
163
164 /* Truncate operands. */
165 dn--;
166 dp++;
167 }
168
169 np--;
170 if (UNLIKELY (n1 >= (d1 & flag)))
171 {
172 q = GMP_NUMB_MASK;
173 cy = mpn_submul_1 (np, dp, 2, q);
174
175 if (UNLIKELY (n1 != cy))
176 {
177 if (n1 < (cy & flag))
178 {
179 q--;
180 add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
181 }
182 else
183 flag = 0;
184 }
185 n1 = np[1];
186 }
187 else
188 {
189 udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
190
191 np[0] = n0;
192 np[1] = n1;
193 }
194
195 *--qp = q;
196 }
197 ASSERT_ALWAYS (np[1] == n1);
198 np += 2;
199
200
201 dn = dn_orig;
202 if (UNLIKELY (n1 < (dn & flag)))
203 {
204 mp_limb_t q, x;
205
206 /* The quotient may be too large if the remainder is small. Recompute
207 for above ignored operand parts, until the remainder spills.
208
209 FIXME: The quality of this code isn't the same as the code above.
210 1. We don't compute things in an optimal order, high-to-low, in order
211 to terminate as quickly as possible.
212 2. We mess with pointers and sizes, adding and subtracting and
213 adjusting to get things right. It surely could be streamlined.
214 3. The only termination criteria are that we determine that the
215 quotient needs to be adjusted, or that we have recomputed
216 everything. We should stop when the remainder is so large
217 that no additional subtracting could make it spill.
218 4. If nothing else, we should not do two loops of submul_1 over the
219 data, instead handle both the triangularization and chopping at
220 once. */
221
222 x = n1;
223
224 if (dn > 2)
225 {
226 /* Compensate for triangularization. */
227 mp_limb_t y;
228
229 dp = dp_orig;
230 if (qn + 1 < dn)
231 {
232 dp += dn - (qn + 1);
233 dn = qn + 1;
234 }
235
236 y = np[-2];
237
238 for (i = dn - 3; i >= 0; i--)
239 {
240 q = qp[i];
241 cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
242
243 if (y < cy)
244 {
245 if (x == 0)
246 {
247 cy = mpn_sub_1 (qp, qp, qn, 1);
248 ASSERT_ALWAYS (cy == 0);
249 return qh - cy;
250 }
251 x--;
252 }
253 y -= cy;
254 }
255 np[-2] = y;
256 }
257
258 dn = dn_orig;
259 if (qn + 1 < dn)
260 {
261 /* Compensate for ignored dividend and divisor tails. */
262
263 dp = dp_orig;
264 np = np_orig;
265
266 if (qh != 0)
267 {
268 cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
269 if (cy != 0)
270 {
271 if (x == 0)
272 {
273 if (qn != 0)
274 cy = mpn_sub_1 (qp, qp, qn, 1);
275 return qh - cy;
276 }
277 x--;
278 }
279 }
280
281 if (qn == 0)
282 return qh;
283
284 for (i = dn - qn - 2; i >= 0; i--)
285 {
286 cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
287 cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
288 if (cy != 0)
289 {
290 if (x == 0)
291 {
292 cy = mpn_sub_1 (qp, qp, qn, 1);
293 return qh;
294 }
295 x--;
296 }
297 }
298 }
299 }
300
301 return qh;
302 }
303