1 /* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) -- 2 Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. 3 Return the single-limb remainder. 4 There are no constraints on the value of the divisor. 5 6 Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009, 2012 Free 7 Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of the GNU Lesser General Public License as published by 13 the Free Software Foundation; either version 3 of the License, or (at your 14 option) any later version. 15 16 The GNU MP Library is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 19 License for more details. 20 21 You should have received a copy of the GNU Lesser General Public License 22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 23 24 #include "gmp.h" 25 #include "gmp-impl.h" 26 #include "longlong.h" 27 28 29 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, 30 meaning the quotient size where that should happen, the quotient size 31 being how many udiv divisions will be done. 32 33 The default is to use preinv always, CPUs where this doesn't suit have 34 tuned thresholds. Note in particular that preinv should certainly be 35 used if that's the only division available (USE_PREINV_ALWAYS). */ 36 37 #ifndef MOD_1_NORM_THRESHOLD 38 #define MOD_1_NORM_THRESHOLD 0 39 #endif 40 41 #ifndef MOD_1_UNNORM_THRESHOLD 42 #define MOD_1_UNNORM_THRESHOLD 0 43 #endif 44 45 #ifndef MOD_1U_TO_MOD_1_1_THRESHOLD 46 #define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */ 47 #endif 48 49 #ifndef MOD_1N_TO_MOD_1_1_THRESHOLD 50 #define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */ 51 #endif 52 53 #ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD 54 #define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 55 #endif 56 57 #ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD 58 #define MOD_1_2_TO_MOD_1_4_THRESHOLD 20 59 #endif 60 61 #if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p 62 /* Duplicates declaratinos in tune/speed.h */ 63 mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]); 64 mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]); 65 66 void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t); 67 void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t); 68 69 #undef mpn_mod_1_1p 70 #define mpn_mod_1_1p(ap, n, b, pre) \ 71 (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre) \ 72 : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre) \ 73 : __gmpn_mod_1_1p (ap, n, b, pre))) 74 75 #undef mpn_mod_1_1p_cps 76 #define mpn_mod_1_1p_cps(pre, b) \ 77 (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b) \ 78 : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b) \ 79 : __gmpn_mod_1_1p_cps (pre, b))) 80 #endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */ 81 82 83 /* The comments in mpn/generic/divrem_1.c apply here too. 84 85 As noted in the algorithms section of the manual, the shifts in the loop 86 for the unnorm case can be avoided by calculating r = a%(d*2^n), followed 87 by a final (r*2^n)%(d*2^n). In fact if it happens that a%(d*2^n) can 88 skip a division where (a*2^n)%(d*2^n) can't then there's the same number 89 of divide steps, though how often that happens depends on the assumed 90 distributions of dividend and divisor. In any case this idea is left to 91 CPU specific implementations to consider. */ 92 93 static mp_limb_t 94 mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d) 95 { 96 mp_size_t i; 97 mp_limb_t n1, n0, r; 98 mp_limb_t dummy; 99 int cnt; 100 101 ASSERT (un > 0); 102 ASSERT (d != 0); 103 104 d <<= GMP_NAIL_BITS; 105 106 /* Skip a division if high < divisor. Having the test here before 107 normalizing will still skip as often as possible. */ 108 r = up[un - 1] << GMP_NAIL_BITS; 109 if (r < d) 110 { 111 r >>= GMP_NAIL_BITS; 112 un--; 113 if (un == 0) 114 return r; 115 } 116 else 117 r = 0; 118 119 /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple 120 code above. */ 121 if (! UDIV_NEEDS_NORMALIZATION 122 && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) 123 { 124 for (i = un - 1; i >= 0; i--) 125 { 126 n0 = up[i] << GMP_NAIL_BITS; 127 udiv_qrnnd (dummy, r, r, n0, d); 128 r >>= GMP_NAIL_BITS; 129 } 130 return r; 131 } 132 133 count_leading_zeros (cnt, d); 134 d <<= cnt; 135 136 n1 = up[un - 1] << GMP_NAIL_BITS; 137 r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt)); 138 139 if (UDIV_NEEDS_NORMALIZATION 140 && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) 141 { 142 mp_limb_t nshift; 143 for (i = un - 2; i >= 0; i--) 144 { 145 n0 = up[i] << GMP_NAIL_BITS; 146 nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)); 147 udiv_qrnnd (dummy, r, r, nshift, d); 148 r >>= GMP_NAIL_BITS; 149 n1 = n0; 150 } 151 udiv_qrnnd (dummy, r, r, n1 << cnt, d); 152 r >>= GMP_NAIL_BITS; 153 return r >> cnt; 154 } 155 else 156 { 157 mp_limb_t inv, nshift; 158 invert_limb (inv, d); 159 160 for (i = un - 2; i >= 0; i--) 161 { 162 n0 = up[i] << GMP_NAIL_BITS; 163 nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)); 164 udiv_rnnd_preinv (r, r, nshift, d, inv); 165 r >>= GMP_NAIL_BITS; 166 n1 = n0; 167 } 168 udiv_rnnd_preinv (r, r, n1 << cnt, d, inv); 169 r >>= GMP_NAIL_BITS; 170 return r >> cnt; 171 } 172 } 173 174 static mp_limb_t 175 mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d) 176 { 177 mp_size_t i; 178 mp_limb_t n0, r; 179 mp_limb_t dummy; 180 181 ASSERT (un > 0); 182 183 d <<= GMP_NAIL_BITS; 184 185 ASSERT (d & GMP_LIMB_HIGHBIT); 186 187 /* High limb is initial remainder, possibly with one subtract of 188 d to get r<d. */ 189 r = up[un - 1] << GMP_NAIL_BITS; 190 if (r >= d) 191 r -= d; 192 r >>= GMP_NAIL_BITS; 193 un--; 194 if (un == 0) 195 return r; 196 197 if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD)) 198 { 199 for (i = un - 1; i >= 0; i--) 200 { 201 n0 = up[i] << GMP_NAIL_BITS; 202 udiv_qrnnd (dummy, r, r, n0, d); 203 r >>= GMP_NAIL_BITS; 204 } 205 return r; 206 } 207 else 208 { 209 mp_limb_t inv; 210 invert_limb (inv, d); 211 for (i = un - 1; i >= 0; i--) 212 { 213 n0 = up[i] << GMP_NAIL_BITS; 214 udiv_rnnd_preinv (r, r, n0, d, inv); 215 r >>= GMP_NAIL_BITS; 216 } 217 return r; 218 } 219 } 220 221 mp_limb_t 222 mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b) 223 { 224 ASSERT (n >= 0); 225 ASSERT (b != 0); 226 227 /* Should this be handled at all? Rely on callers? Note un==0 is currently 228 required by mpz/fdiv_r_ui.c and possibly other places. */ 229 if (n == 0) 230 return 0; 231 232 if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0)) 233 { 234 if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD)) 235 { 236 return mpn_mod_1_norm (ap, n, b); 237 } 238 else 239 { 240 mp_limb_t pre[4]; 241 mpn_mod_1_1p_cps (pre, b); 242 return mpn_mod_1_1p (ap, n, b, pre); 243 } 244 } 245 else 246 { 247 if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD)) 248 { 249 return mpn_mod_1_unnorm (ap, n, b); 250 } 251 else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD)) 252 { 253 mp_limb_t pre[4]; 254 mpn_mod_1_1p_cps (pre, b); 255 return mpn_mod_1_1p (ap, n, b << pre[1], pre); 256 } 257 else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4)) 258 { 259 mp_limb_t pre[5]; 260 mpn_mod_1s_2p_cps (pre, b); 261 return mpn_mod_1s_2p (ap, n, b << pre[1], pre); 262 } 263 else 264 { 265 mp_limb_t pre[7]; 266 mpn_mod_1s_4p_cps (pre, b); 267 return mpn_mod_1s_4p (ap, n, b << pre[1], pre); 268 } 269 } 270 } 271