xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/dcpi1_divappr_q.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
2    quotient.  The quotient returned is either correct, or one too large.
3 
4    Contributed to the GNU project by Torbjorn Granlund.
5 
6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9 
10 Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 #include "gmp-impl.h"
39 #include "longlong.h"
40 
41 
42 static mp_limb_t
mpn_dcpi1_divappr_q_n(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n,gmp_pi1_t * dinv,mp_ptr tp)43 mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
44 		       gmp_pi1_t *dinv, mp_ptr tp)
45 {
46   mp_size_t lo, hi;
47   mp_limb_t cy, qh, ql;
48 
49   lo = n >> 1;			/* floor(n/2) */
50   hi = n - lo;			/* ceil(n/2) */
51 
52   if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
53     qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
54   else
55     qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
56 
57   mpn_mul (tp, qp + lo, hi, dp, lo);
58 
59   cy = mpn_sub_n (np + lo, np + lo, tp, n);
60   if (qh != 0)
61     cy += mpn_sub_n (np + n, np + n, dp, lo);
62 
63   while (cy != 0)
64     {
65       qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
66       cy -= mpn_add_n (np + lo, np + lo, dp, n);
67     }
68 
69   if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
70     ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
71   else
72     ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
73 
74   if (UNLIKELY (ql != 0))
75     {
76       mp_size_t i;
77       for (i = 0; i < lo; i++)
78 	qp[i] = GMP_NUMB_MASK;
79     }
80 
81   return qh;
82 }
83 
84 mp_limb_t
mpn_dcpi1_divappr_q(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,gmp_pi1_t * dinv)85 mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
86 		     mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
87 {
88   mp_size_t qn;
89   mp_limb_t qh, cy, qsave;
90   mp_ptr tp;
91   TMP_DECL;
92 
93   TMP_MARK;
94 
95   ASSERT (dn >= 6);
96   ASSERT (nn > dn);
97   ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
98 
99   qn = nn - dn;
100   qp += qn;
101   np += nn;
102   dp += dn;
103 
104   if (qn >= dn)
105     {
106       qn++;			/* pretend we'll need an extra limb */
107       /* Reduce qn mod dn without division, optimizing small operations.  */
108       do
109 	qn -= dn;
110       while (qn > dn);
111 
112       qp -= qn;			/* point at low limb of next quotient block */
113       np -= qn;			/* point in the middle of partial remainder */
114 
115       tp = TMP_SALLOC_LIMBS (dn);
116 
117       /* Perform the typically smaller block first.  */
118       if (qn == 1)
119 	{
120 	  mp_limb_t q, n2, n1, n0, d1, d0;
121 
122 	  /* Handle qh up front, for simplicity. */
123 	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
124 	  if (qh)
125 	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
126 
127 	  /* A single iteration of schoolbook: One 3/2 division,
128 	     followed by the bignum update and adjustment. */
129 	  n2 = np[0];
130 	  n1 = np[-1];
131 	  n0 = np[-2];
132 	  d1 = dp[-1];
133 	  d0 = dp[-2];
134 
135 	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
136 
137 	  if (UNLIKELY (n2 == d1) && n1 == d0)
138 	    {
139 	      q = GMP_NUMB_MASK;
140 	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
141 	      ASSERT (cy == n2);
142 	    }
143 	  else
144 	    {
145 	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
146 
147 	      if (dn > 2)
148 		{
149 		  mp_limb_t cy, cy1;
150 		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
151 
152 		  cy1 = n0 < cy;
153 		  n0 = (n0 - cy) & GMP_NUMB_MASK;
154 		  cy = n1 < cy1;
155 		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
156 		  np[-2] = n0;
157 
158 		  if (UNLIKELY (cy != 0))
159 		    {
160 		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
161 		      qh -= (q == 0);
162 		      q = (q - 1) & GMP_NUMB_MASK;
163 		    }
164 		}
165 	      else
166 		np[-2] = n0;
167 
168 	      np[-1] = n1;
169 	    }
170 	  qp[0] = q;
171 	}
172       else
173 	{
174 	  if (qn == 2)
175 	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
176 	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
177 	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
178 	  else
179 	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
180 
181 	  if (qn != dn)
182 	    {
183 	      if (qn > dn - qn)
184 		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
185 	      else
186 		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
187 
188 	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
189 	      if (qh != 0)
190 		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
191 
192 	      while (cy != 0)
193 		{
194 		  qh -= mpn_sub_1 (qp, qp, qn, 1);
195 		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
196 		}
197 	    }
198 	}
199       qn = nn - dn - qn + 1;
200       while (qn > dn)
201 	{
202 	  qp -= dn;
203 	  np -= dn;
204 	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
205 	  qn -= dn;
206 	}
207 
208       /* Since we pretended we'd need an extra quotient limb before, we now
209 	 have made sure the code above left just dn-1=qn quotient limbs to
210 	 develop.  Develop that plus a guard limb. */
211       qn--;
212       qp -= qn;
213       np -= dn;
214       qsave = qp[qn];
215       mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
216       MPN_COPY_INCR (qp, qp + 1, qn);
217       qp[qn] = qsave;
218     }
219   else    /* (qn < dn) */
220     {
221       mp_ptr q2p;
222 #if 0				/* not possible since we demand nn > dn */
223       if (qn == 0)
224 	{
225 	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
226 	  if (qh)
227 	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
228 	  TMP_FREE;
229 	  return qh;
230 	}
231 #endif
232 
233       qp -= qn;			/* point at low limb of next quotient block */
234       np -= qn;			/* point in the middle of partial remainder */
235 
236       q2p = TMP_SALLOC_LIMBS (qn + 1);
237       /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
238 	 callers not to be silly?  */
239       if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
240 	{
241 	  qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
242 				    dp - (qn + 1), qn + 1, dinv->inv32);
243 	}
244       else
245 	{
246 	  /* It is tempting to use qp for recursive scratch and put quotient in
247 	     tp, but the recursive scratch needs one limb too many.  */
248 	  tp = TMP_SALLOC_LIMBS (qn + 1);
249 	  qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
250 	}
251       MPN_COPY (qp, q2p + 1, qn);
252     }
253 
254   TMP_FREE;
255   return qh;
256 }
257