xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/mod_1.c (revision 48fb7bfab72acd4281a53bbee5ccf3f809019e75)
1 /* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
2    Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
3    Return the single-limb remainder.
4    There are no constraints on the value of the divisor.
5 
6 Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009, 2012 Free
7 Software Foundation, Inc.
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15 
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19 License for more details.
20 
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23 
24 #include "gmp.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
27 
28 
29 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
30    meaning the quotient size where that should happen, the quotient size
31    being how many udiv divisions will be done.
32 
33    The default is to use preinv always, CPUs where this doesn't suit have
34    tuned thresholds.  Note in particular that preinv should certainly be
35    used if that's the only division available (USE_PREINV_ALWAYS).  */
36 
37 #ifndef MOD_1_NORM_THRESHOLD
38 #define MOD_1_NORM_THRESHOLD  0
39 #endif
40 
41 #ifndef MOD_1_UNNORM_THRESHOLD
42 #define MOD_1_UNNORM_THRESHOLD  0
43 #endif
44 
45 #ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
46 #define MOD_1U_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
47 #endif
48 
49 #ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
50 #define MOD_1N_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
51 #endif
52 
53 #ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
54 #define MOD_1_1_TO_MOD_1_2_THRESHOLD  10
55 #endif
56 
57 #ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
58 #define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
59 #endif
60 
61 #if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p
62 /* Duplicates declaratinos in tune/speed.h */
63 mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
64 mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
65 
66 void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
67 void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
68 
69 #undef mpn_mod_1_1p
70 #define mpn_mod_1_1p(ap, n, b, pre)			     \
71   (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre)     \
72    : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre)  \
73       : __gmpn_mod_1_1p (ap, n, b, pre)))
74 
75 #undef mpn_mod_1_1p_cps
76 #define mpn_mod_1_1p_cps(pre, b)				\
77   (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b)		\
78    : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b)	\
79       : __gmpn_mod_1_1p_cps (pre, b)))
80 #endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */
81 
82 
83 /* The comments in mpn/generic/divrem_1.c apply here too.
84 
85    As noted in the algorithms section of the manual, the shifts in the loop
86    for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
87    by a final (r*2^n)%(d*2^n).  In fact if it happens that a%(d*2^n) can
88    skip a division where (a*2^n)%(d*2^n) can't then there's the same number
89    of divide steps, though how often that happens depends on the assumed
90    distributions of dividend and divisor.  In any case this idea is left to
91    CPU specific implementations to consider.  */
92 
93 static mp_limb_t
94 mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
95 {
96   mp_size_t  i;
97   mp_limb_t  n1, n0, r;
98   mp_limb_t  dummy;
99   int cnt;
100 
101   ASSERT (un > 0);
102   ASSERT (d != 0);
103 
104   d <<= GMP_NAIL_BITS;
105 
106   /* Skip a division if high < divisor.  Having the test here before
107      normalizing will still skip as often as possible.  */
108   r = up[un - 1] << GMP_NAIL_BITS;
109   if (r < d)
110     {
111       r >>= GMP_NAIL_BITS;
112       un--;
113       if (un == 0)
114 	return r;
115     }
116   else
117     r = 0;
118 
119   /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
120      code above. */
121   if (! UDIV_NEEDS_NORMALIZATION
122       && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
123     {
124       for (i = un - 1; i >= 0; i--)
125 	{
126 	  n0 = up[i] << GMP_NAIL_BITS;
127 	  udiv_qrnnd (dummy, r, r, n0, d);
128 	  r >>= GMP_NAIL_BITS;
129 	}
130       return r;
131     }
132 
133   count_leading_zeros (cnt, d);
134   d <<= cnt;
135 
136   n1 = up[un - 1] << GMP_NAIL_BITS;
137   r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt));
138 
139   if (UDIV_NEEDS_NORMALIZATION
140       && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
141     {
142       mp_limb_t nshift;
143       for (i = un - 2; i >= 0; i--)
144 	{
145 	  n0 = up[i] << GMP_NAIL_BITS;
146 	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
147 	  udiv_qrnnd (dummy, r, r, nshift, d);
148 	  r >>= GMP_NAIL_BITS;
149 	  n1 = n0;
150 	}
151       udiv_qrnnd (dummy, r, r, n1 << cnt, d);
152       r >>= GMP_NAIL_BITS;
153       return r >> cnt;
154     }
155   else
156     {
157       mp_limb_t inv, nshift;
158       invert_limb (inv, d);
159 
160       for (i = un - 2; i >= 0; i--)
161 	{
162 	  n0 = up[i] << GMP_NAIL_BITS;
163 	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
164 	  udiv_rnnd_preinv (r, r, nshift, d, inv);
165 	  r >>= GMP_NAIL_BITS;
166 	  n1 = n0;
167 	}
168       udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
169       r >>= GMP_NAIL_BITS;
170       return r >> cnt;
171     }
172 }
173 
174 static mp_limb_t
175 mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
176 {
177   mp_size_t  i;
178   mp_limb_t  n0, r;
179   mp_limb_t  dummy;
180 
181   ASSERT (un > 0);
182 
183   d <<= GMP_NAIL_BITS;
184 
185   ASSERT (d & GMP_LIMB_HIGHBIT);
186 
187   /* High limb is initial remainder, possibly with one subtract of
188      d to get r<d.  */
189   r = up[un - 1] << GMP_NAIL_BITS;
190   if (r >= d)
191     r -= d;
192   r >>= GMP_NAIL_BITS;
193   un--;
194   if (un == 0)
195     return r;
196 
197   if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
198     {
199       for (i = un - 1; i >= 0; i--)
200 	{
201 	  n0 = up[i] << GMP_NAIL_BITS;
202 	  udiv_qrnnd (dummy, r, r, n0, d);
203 	  r >>= GMP_NAIL_BITS;
204 	}
205       return r;
206     }
207   else
208     {
209       mp_limb_t  inv;
210       invert_limb (inv, d);
211       for (i = un - 1; i >= 0; i--)
212 	{
213 	  n0 = up[i] << GMP_NAIL_BITS;
214 	  udiv_rnnd_preinv (r, r, n0, d, inv);
215 	  r >>= GMP_NAIL_BITS;
216 	}
217       return r;
218     }
219 }
220 
221 mp_limb_t
222 mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
223 {
224   ASSERT (n >= 0);
225   ASSERT (b != 0);
226 
227   /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
228      required by mpz/fdiv_r_ui.c and possibly other places.  */
229   if (n == 0)
230     return 0;
231 
232   if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
233     {
234       if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
235 	{
236 	  return mpn_mod_1_norm (ap, n, b);
237 	}
238       else
239 	{
240 	  mp_limb_t pre[4];
241 	  mpn_mod_1_1p_cps (pre, b);
242 	  return mpn_mod_1_1p (ap, n, b, pre);
243 	}
244     }
245   else
246     {
247       if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
248 	{
249 	  return mpn_mod_1_unnorm (ap, n, b);
250 	}
251       else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
252 	{
253 	  mp_limb_t pre[4];
254 	  mpn_mod_1_1p_cps (pre, b);
255 	  return mpn_mod_1_1p (ap, n, b << pre[1], pre);
256 	}
257       else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
258 	{
259 	  mp_limb_t pre[5];
260 	  mpn_mod_1s_2p_cps (pre, b);
261 	  return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
262 	}
263       else
264 	{
265 	  mp_limb_t pre[7];
266 	  mpn_mod_1s_4p_cps (pre, b);
267 	  return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
268 	}
269     }
270 }
271