xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/sbpi1_div_q.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
2    division algorithm.
3 
4    Contributed to the GNU project by Torbjorn Granlund.
5 
6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9 
10 Copyright 2007, 2009 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 
39 #include "gmp-impl.h"
40 #include "longlong.h"
41 
42 mp_limb_t
mpn_sbpi1_div_q(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_limb_t dinv)43 mpn_sbpi1_div_q (mp_ptr qp,
44 		 mp_ptr np, mp_size_t nn,
45 		 mp_srcptr dp, mp_size_t dn,
46 		 mp_limb_t dinv)
47 {
48   mp_limb_t qh;
49   mp_size_t qn, i;
50   mp_limb_t n1, n0;
51   mp_limb_t d1, d0;
52   mp_limb_t cy, cy1;
53   mp_limb_t q;
54   mp_limb_t flag;
55 
56   mp_size_t dn_orig = dn;
57   mp_srcptr dp_orig = dp;
58   mp_ptr np_orig = np;
59 
60   ASSERT (dn > 2);
61   ASSERT (nn >= dn);
62   ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
63 
64   np += nn;
65 
66   qn = nn - dn;
67   if (qn + 1 < dn)
68     {
69       dp += dn - (qn + 1);
70       dn = qn + 1;
71     }
72 
73   qh = mpn_cmp (np - dn, dp, dn) >= 0;
74   if (qh != 0)
75     mpn_sub_n (np - dn, np - dn, dp, dn);
76 
77   qp += qn;
78 
79   dn -= 2;			/* offset dn by 2 for main division loops,
80 				   saving two iterations in mpn_submul_1.  */
81   d1 = dp[dn + 1];
82   d0 = dp[dn + 0];
83 
84   np -= 2;
85 
86   n1 = np[1];
87 
88   for (i = qn - (dn + 2); i >= 0; i--)
89     {
90       np--;
91       if (UNLIKELY (n1 == d1) && np[1] == d0)
92 	{
93 	  q = GMP_NUMB_MASK;
94 	  mpn_submul_1 (np - dn, dp, dn + 2, q);
95 	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
96 	}
97       else
98 	{
99 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
100 
101 	  cy = mpn_submul_1 (np - dn, dp, dn, q);
102 
103 	  cy1 = n0 < cy;
104 	  n0 = (n0 - cy) & GMP_NUMB_MASK;
105 	  cy = n1 < cy1;
106 	  n1 -= cy1;
107 	  np[0] = n0;
108 
109 	  if (UNLIKELY (cy != 0))
110 	    {
111 	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
112 	      q--;
113 	    }
114 	}
115 
116       *--qp = q;
117     }
118 
119   flag = ~CNST_LIMB(0);
120 
121   if (dn >= 0)
122     {
123       for (i = dn; i > 0; i--)
124 	{
125 	  np--;
126 	  if (UNLIKELY (n1 >= (d1 & flag)))
127 	    {
128 	      q = GMP_NUMB_MASK;
129 	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
130 
131 	      if (UNLIKELY (n1 != cy))
132 		{
133 		  if (n1 < (cy & flag))
134 		    {
135 		      q--;
136 		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
137 		    }
138 		  else
139 		    flag = 0;
140 		}
141 	      n1 = np[1];
142 	    }
143 	  else
144 	    {
145 	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
146 
147 	      cy = mpn_submul_1 (np - dn, dp, dn, q);
148 
149 	      cy1 = n0 < cy;
150 	      n0 = (n0 - cy) & GMP_NUMB_MASK;
151 	      cy = n1 < cy1;
152 	      n1 -= cy1;
153 	      np[0] = n0;
154 
155 	      if (UNLIKELY (cy != 0))
156 		{
157 		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
158 		  q--;
159 		}
160 	    }
161 
162 	  *--qp = q;
163 
164 	  /* Truncate operands.  */
165 	  dn--;
166 	  dp++;
167 	}
168 
169       np--;
170       if (UNLIKELY (n1 >= (d1 & flag)))
171 	{
172 	  q = GMP_NUMB_MASK;
173 	  cy = mpn_submul_1 (np, dp, 2, q);
174 
175 	  if (UNLIKELY (n1 != cy))
176 	    {
177 	      if (n1 < (cy & flag))
178 		{
179 		  q--;
180 		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
181 		}
182 	      else
183 		flag = 0;
184 	    }
185 	  n1 = np[1];
186 	}
187       else
188 	{
189 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
190 
191 	  np[0] = n0;
192 	  np[1] = n1;
193 	}
194 
195       *--qp = q;
196     }
197   ASSERT_ALWAYS (np[1] == n1);
198   np += 2;
199 
200 
201   dn = dn_orig;
202   if (UNLIKELY (n1 < (dn & flag)))
203     {
204       mp_limb_t q, x;
205 
206       /* The quotient may be too large if the remainder is small.  Recompute
207 	 for above ignored operand parts, until the remainder spills.
208 
209 	 FIXME: The quality of this code isn't the same as the code above.
210 	 1. We don't compute things in an optimal order, high-to-low, in order
211 	    to terminate as quickly as possible.
212 	 2. We mess with pointers and sizes, adding and subtracting and
213 	    adjusting to get things right.  It surely could be streamlined.
214 	 3. The only termination criteria are that we determine that the
215 	    quotient needs to be adjusted, or that we have recomputed
216 	    everything.  We should stop when the remainder is so large
217 	    that no additional subtracting could make it spill.
218 	 4. If nothing else, we should not do two loops of submul_1 over the
219 	    data, instead handle both the triangularization and chopping at
220 	    once.  */
221 
222       x = n1;
223 
224       if (dn > 2)
225 	{
226 	  /* Compensate for triangularization.  */
227 	  mp_limb_t y;
228 
229 	  dp = dp_orig;
230 	  if (qn + 1 < dn)
231 	    {
232 	      dp += dn - (qn + 1);
233 	      dn = qn + 1;
234 	    }
235 
236 	  y = np[-2];
237 
238 	  for (i = dn - 3; i >= 0; i--)
239 	    {
240 	      q = qp[i];
241 	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
242 
243 	      if (y < cy)
244 		{
245 		  if (x == 0)
246 		    {
247 		      cy = mpn_sub_1 (qp, qp, qn, 1);
248 		      ASSERT_ALWAYS (cy == 0);
249 		      return qh - cy;
250 		    }
251 		  x--;
252 		}
253 	      y -= cy;
254 	    }
255 	  np[-2] = y;
256 	}
257 
258       dn = dn_orig;
259       if (qn + 1 < dn)
260 	{
261 	  /* Compensate for ignored dividend and divisor tails.  */
262 
263 	  dp = dp_orig;
264 	  np = np_orig;
265 
266 	  if (qh != 0)
267 	    {
268 	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
269 	      if (cy != 0)
270 		{
271 		  if (x == 0)
272 		    {
273 		      if (qn != 0)
274 			cy = mpn_sub_1 (qp, qp, qn, 1);
275 		      return qh - cy;
276 		    }
277 		  x--;
278 		}
279 	    }
280 
281 	  if (qn == 0)
282 	    return qh;
283 
284 	  for (i = dn - qn - 2; i >= 0; i--)
285 	    {
286 	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
287 	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
288 	      if (cy != 0)
289 		{
290 		  if (x == 0)
291 		    {
292 		      cy = mpn_sub_1 (qp, qp, qn, 1);
293 		      return qh;
294 		    }
295 		  x--;
296 		}
297 	    }
298 	}
299     }
300 
301   return qh;
302 }
303