1 /* mpn_compute_powtab. 2 3 Contributed to the GNU project by Torbjorn Granlund. 4 5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 1991-2017 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 /* 38 CAVEATS: 39 * The exptab and powtab vectors are in opposite orders. Probably OK. 40 * Consider getting rid of exptab, doing bit ops on the un argument instead. 41 * Consider rounding greatest power slightly upwards to save adjustments. 42 * In powtab_decide, consider computing cost from just the 2-3 largest 43 operands, since smaller operand contribute little. This makes most sense 44 if exptab is suppressed. 45 */ 46 47 #include "gmp-impl.h" 48 49 #ifndef DIV_1_VS_MUL_1_PERCENT 50 #define DIV_1_VS_MUL_1_PERCENT 150 51 #endif 52 53 #define SET_powers_t(dest, ptr, size, dib, b, sh) \ 54 do { \ 55 dest.p = ptr; \ 56 dest.n = size; \ 57 dest.digits_in_base = dib; \ 58 dest.base = b; \ 59 dest.shift = sh; \ 60 } while (0) 61 62 #if DIV_1_VS_MUL_1_PERCENT > 120 63 #define HAVE_mpn_compute_powtab_mul 1 64 static void 65 mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, 66 int base, const size_t *exptab, size_t n_pows) 67 { 68 mp_size_t n; 69 mp_ptr p, t; 70 mp_limb_t cy; 71 long start_idx; 72 int c; 73 74 mp_limb_t big_base = mp_bases[base].big_base; 75 int chars_per_limb = mp_bases[base].chars_per_limb; 76 77 mp_ptr powtab_mem_ptr = powtab_mem; 78 79 size_t digits_in_base = chars_per_limb; 80 81 powers_t *pt = powtab; 82 83 p = powtab_mem_ptr; 84 powtab_mem_ptr += 1; 85 p[0] = big_base; 86 87 SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); 88 pt++; 89 90 t = powtab_mem_ptr; 91 powtab_mem_ptr += 2; 92 t[1] = mpn_mul_1 (t, p, 1, big_base); 93 n = 2; 94 95 digits_in_base *= 2; 96 97 c = t[0] == 0; 98 t += c; 99 n -= c; 100 mp_size_t shift = c; 101 102 SET_powers_t (pt[0], t, n, digits_in_base, base, shift); 103 p = t; 104 pt++; 105 106 if (exptab[0] == ((size_t) chars_per_limb << n_pows)) 107 { 108 start_idx = n_pows - 2; 109 } 110 else 111 { 112 if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0]) 113 { 114 /* 3, sometimes adjusted to 4. */ 115 t = powtab_mem_ptr; 116 powtab_mem_ptr += 4; 117 t[n] = cy = mpn_mul_1 (t, p, n, big_base); 118 n += cy != 0;; 119 120 digits_in_base += chars_per_limb; 121 122 c = t[0] == 0; 123 t += c; 124 n -= c; 125 shift += c; 126 } 127 else 128 { 129 /* 2 copy, will always become 3 with back-multiplication. */ 130 t = powtab_mem_ptr; 131 powtab_mem_ptr += 3; 132 t[0] = p[0]; 133 t[1] = p[1]; 134 } 135 136 SET_powers_t (pt[0], t, n, digits_in_base, base, shift); 137 p = t; 138 pt++; 139 start_idx = n_pows - 3; 140 } 141 142 for (long pi = start_idx; pi >= 0; pi--) 143 { 144 t = powtab_mem_ptr; 145 powtab_mem_ptr += 2 * n + 2; 146 147 ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); 148 149 mpn_sqr (t, p, n); 150 151 digits_in_base *= 2; 152 n *= 2; 153 n -= t[n - 1] == 0; 154 shift *= 2; 155 156 c = t[0] == 0; 157 t += c; 158 n -= c; 159 shift += c; 160 161 /* Adjust new value if it is too small as input to the next squaring. */ 162 if (((digits_in_base + chars_per_limb) << pi) <= exptab[0]) 163 { 164 t[n] = cy = mpn_mul_1 (t, t, n, big_base); 165 n += cy != 0; 166 167 digits_in_base += chars_per_limb; 168 169 c = t[0] == 0; 170 t += c; 171 n -= c; 172 shift += c; 173 } 174 175 SET_powers_t (pt[0], t, n, digits_in_base, base, shift); 176 177 /* Adjust previous value if it is not at its target power. */ 178 if (pt[-1].digits_in_base < exptab[pi + 1]) 179 { 180 mp_size_t n = pt[-1].n; 181 mp_ptr p = pt[-1].p; 182 p[n] = cy = mpn_mul_1 (p, p, n, big_base); 183 n += cy != 0; 184 185 ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]); 186 pt[-1].digits_in_base = exptab[pi + 1]; 187 188 c = p[0] == 0; 189 pt[-1].p = p + c; 190 pt[-1].n = n - c; 191 pt[-1].shift += c; 192 } 193 194 p = t; 195 pt++; 196 } 197 } 198 #endif 199 200 #if DIV_1_VS_MUL_1_PERCENT < 275 201 #define HAVE_mpn_compute_powtab_div 1 202 static void 203 mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, 204 int base, const size_t *exptab, size_t n_pows) 205 { 206 mp_ptr p, t; 207 208 mp_limb_t big_base = mp_bases[base].big_base; 209 int chars_per_limb = mp_bases[base].chars_per_limb; 210 211 mp_ptr powtab_mem_ptr = powtab_mem; 212 213 size_t digits_in_base = chars_per_limb; 214 215 powers_t *pt = powtab; 216 217 p = powtab_mem_ptr; 218 powtab_mem_ptr += 1; 219 p[0] = big_base; 220 221 SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); 222 pt++; 223 224 mp_size_t n = 1; 225 mp_size_t shift = 0; 226 for (long pi = n_pows - 1; pi >= 0; pi--) 227 { 228 t = powtab_mem_ptr; 229 powtab_mem_ptr += 2 * n; 230 231 ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); 232 233 mpn_sqr (t, p, n); 234 n = 2 * n - 1; n += t[n] != 0; 235 digits_in_base *= 2; 236 237 if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */ 238 { 239 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1 240 if (__GMP_LIKELY (base == 10)) 241 mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10, 242 MP_BASES_BIG_BASE_BINVERTED_10, 243 MP_BASES_BIG_BASE_CTZ_10); 244 else 245 #endif 246 /* FIXME: We could use _pi1 here if we add big_base_binverted and 247 big_base_ctz fields to struct bases. That would add about 2 KiB 248 to mp_bases.c. 249 FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to 250 mpn_bdiv_q_1 for more machines. */ 251 mpn_divexact_1 (t, t, n, big_base); 252 253 n -= t[n - 1] == 0; 254 digits_in_base -= chars_per_limb; 255 } 256 257 shift *= 2; 258 /* Strip low zero limbs, but be careful to keep the result divisible by 259 big_base. */ 260 while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0) 261 { 262 t++; 263 n--; 264 shift++; 265 } 266 p = t; 267 268 SET_powers_t (pt[0], p, n, digits_in_base, base, shift); 269 pt++; 270 } 271 272 /* Strip any remaining low zero limbs. */ 273 pt -= n_pows + 1; 274 for (long pi = n_pows; pi >= 0; pi--) 275 { 276 mp_ptr t = pt[pi].p; 277 mp_size_t shift = pt[pi].shift; 278 mp_size_t n = pt[pi].n; 279 int c; 280 c = t[0] == 0; 281 t += c; 282 n -= c; 283 shift += c; 284 pt[pi].p = t; 285 pt[pi].shift = shift; 286 pt[pi].n = n; 287 } 288 } 289 #endif 290 291 static long 292 powtab_decide (size_t *exptab, size_t un, int base) 293 { 294 int chars_per_limb = mp_bases[base].chars_per_limb; 295 long n_pows = 0; 296 for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1) 297 { 298 exptab[n_pows] = pn * chars_per_limb; 299 n_pows++; 300 } 301 exptab[n_pows] = chars_per_limb; 302 303 #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div 304 size_t pn = un - 1; 305 size_t xn = (un + 1) >> 1; 306 unsigned mcost = 1; 307 unsigned dcost = 1; 308 for (long i = n_pows - 2; i >= 0; i--) 309 { 310 size_t pow = (pn >> (i + 1)) + 1; 311 312 if (pow & 1) 313 dcost += pow; 314 315 if (xn != (pow << i)) 316 { 317 if (pow > 2 && (pow & 1) == 0) 318 mcost += 2 * pow; 319 else 320 mcost += pow; 321 } 322 else 323 { 324 if (pow & 1) 325 mcost += pow; 326 } 327 } 328 329 dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100; 330 331 if (mcost <= dcost) 332 return n_pows; 333 else 334 return -n_pows; 335 #elif HAVE_mpn_compute_powtab_mul 336 return n_pows; 337 #elif HAVE_mpn_compute_powtab_div 338 return -n_pows; 339 #else 340 #error "no powtab function available" 341 #endif 342 } 343 344 size_t 345 mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base) 346 { 347 size_t exptab[GMP_LIMB_BITS]; 348 349 long n_pows = powtab_decide (exptab, un, base); 350 351 #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div 352 if (n_pows >= 0) 353 { 354 mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); 355 return n_pows; 356 } 357 else 358 { 359 mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); 360 return -n_pows; 361 } 362 #elif HAVE_mpn_compute_powtab_mul 363 ASSERT (n_pows > 0); 364 mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); 365 return n_pows; 366 #elif HAVE_mpn_compute_powtab_div 367 ASSERT (n_pows < 0); 368 mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); 369 return -n_pows; 370 #else 371 #error "no powtab function available" 372 #endif 373 } 374