1 /* mpn_compute_powtab.
2
3 Contributed to the GNU project by Torbjorn Granlund.
4
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 1991-2017 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37 /*
38 CAVEATS:
39 * The exptab and powtab vectors are in opposite orders. Probably OK.
40 * Consider getting rid of exptab, doing bit ops on the un argument instead.
41 * Consider rounding greatest power slightly upwards to save adjustments.
42 * In powtab_decide, consider computing cost from just the 2-3 largest
43 operands, since smaller operand contribute little. This makes most sense
44 if exptab is suppressed.
45 */
46
47 #include "gmp-impl.h"
48
49 #ifndef DIV_1_VS_MUL_1_PERCENT
50 #define DIV_1_VS_MUL_1_PERCENT 150
51 #endif
52
53 #define SET_powers_t(dest, ptr, size, dib, b, sh) \
54 do { \
55 dest.p = ptr; \
56 dest.n = size; \
57 dest.digits_in_base = dib; \
58 dest.base = b; \
59 dest.shift = sh; \
60 } while (0)
61
62 #if DIV_1_VS_MUL_1_PERCENT > 120
63 #define HAVE_mpn_compute_powtab_mul 1
64 static void
mpn_compute_powtab_mul(powers_t * powtab,mp_ptr powtab_mem,mp_size_t un,int base,const size_t * exptab,size_t n_pows)65 mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
66 int base, const size_t *exptab, size_t n_pows)
67 {
68 mp_size_t n;
69 mp_ptr p, t;
70 mp_limb_t cy;
71 long start_idx;
72 int c;
73 mp_size_t shift;
74 long pi;
75
76 mp_limb_t big_base = mp_bases[base].big_base;
77 int chars_per_limb = mp_bases[base].chars_per_limb;
78
79 mp_ptr powtab_mem_ptr = powtab_mem;
80
81 size_t digits_in_base = chars_per_limb;
82
83 powers_t *pt = powtab;
84
85 p = powtab_mem_ptr;
86 powtab_mem_ptr += 1;
87 p[0] = big_base;
88
89 SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
90 pt++;
91
92 t = powtab_mem_ptr;
93 powtab_mem_ptr += 2;
94 t[1] = mpn_mul_1 (t, p, 1, big_base);
95 n = 2;
96
97 digits_in_base *= 2;
98
99 c = t[0] == 0;
100 t += c;
101 n -= c;
102 shift = c;
103
104 SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
105 p = t;
106 pt++;
107
108 if (exptab[0] == ((size_t) chars_per_limb << n_pows))
109 {
110 start_idx = n_pows - 2;
111 }
112 else
113 {
114 if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
115 {
116 /* 3, sometimes adjusted to 4. */
117 t = powtab_mem_ptr;
118 powtab_mem_ptr += 4;
119 t[n] = cy = mpn_mul_1 (t, p, n, big_base);
120 n += cy != 0;;
121
122 digits_in_base += chars_per_limb;
123
124 c = t[0] == 0;
125 t += c;
126 n -= c;
127 shift += c;
128 }
129 else
130 {
131 /* 2 copy, will always become 3 with back-multiplication. */
132 t = powtab_mem_ptr;
133 powtab_mem_ptr += 3;
134 t[0] = p[0];
135 t[1] = p[1];
136 }
137
138 SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
139 p = t;
140 pt++;
141 start_idx = n_pows - 3;
142 }
143
144 for (pi = start_idx; pi >= 0; pi--)
145 {
146 t = powtab_mem_ptr;
147 powtab_mem_ptr += 2 * n + 2;
148
149 ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
150
151 mpn_sqr (t, p, n);
152
153 digits_in_base *= 2;
154 n *= 2;
155 n -= t[n - 1] == 0;
156 shift *= 2;
157
158 c = t[0] == 0;
159 t += c;
160 n -= c;
161 shift += c;
162
163 /* Adjust new value if it is too small as input to the next squaring. */
164 if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
165 {
166 t[n] = cy = mpn_mul_1 (t, t, n, big_base);
167 n += cy != 0;
168
169 digits_in_base += chars_per_limb;
170
171 c = t[0] == 0;
172 t += c;
173 n -= c;
174 shift += c;
175 }
176
177 SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
178
179 /* Adjust previous value if it is not at its target power. */
180 if (pt[-1].digits_in_base < exptab[pi + 1])
181 {
182 mp_size_t n = pt[-1].n;
183 mp_ptr p = pt[-1].p;
184 p[n] = cy = mpn_mul_1 (p, p, n, big_base);
185 n += cy != 0;
186
187 ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
188 pt[-1].digits_in_base = exptab[pi + 1];
189
190 c = p[0] == 0;
191 pt[-1].p = p + c;
192 pt[-1].n = n - c;
193 pt[-1].shift += c;
194 }
195
196 p = t;
197 pt++;
198 }
199 }
200 #endif
201
202 #if DIV_1_VS_MUL_1_PERCENT < 275
203 #define HAVE_mpn_compute_powtab_div 1
204 static void
mpn_compute_powtab_div(powers_t * powtab,mp_ptr powtab_mem,mp_size_t un,int base,const size_t * exptab,size_t n_pows)205 mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
206 int base, const size_t *exptab, size_t n_pows)
207 {
208 mp_ptr p, t;
209
210 mp_limb_t big_base = mp_bases[base].big_base;
211 int chars_per_limb = mp_bases[base].chars_per_limb;
212
213 mp_ptr powtab_mem_ptr = powtab_mem;
214
215 size_t digits_in_base = chars_per_limb;
216
217 powers_t *pt = powtab;
218
219 mp_size_t n = 1;
220 mp_size_t shift = 0;
221 long pi;
222
223 p = powtab_mem_ptr;
224 powtab_mem_ptr += 1;
225 p[0] = big_base;
226
227 SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
228 pt++;
229
230 for (pi = n_pows - 1; pi >= 0; pi--)
231 {
232 t = powtab_mem_ptr;
233 powtab_mem_ptr += 2 * n;
234
235 ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
236
237 mpn_sqr (t, p, n);
238 n = 2 * n - 1; n += t[n] != 0;
239 digits_in_base *= 2;
240
241 if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */
242 {
243 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
244 if (__GMP_LIKELY (base == 10))
245 mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
246 MP_BASES_BIG_BASE_BINVERTED_10,
247 MP_BASES_BIG_BASE_CTZ_10);
248 else
249 #endif
250 /* FIXME: We could use _pi1 here if we add big_base_binverted and
251 big_base_ctz fields to struct bases. That would add about 2 KiB
252 to mp_bases.c.
253 FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
254 mpn_bdiv_q_1 for more machines. */
255 mpn_divexact_1 (t, t, n, big_base);
256
257 n -= t[n - 1] == 0;
258 digits_in_base -= chars_per_limb;
259 }
260
261 shift *= 2;
262 /* Strip low zero limbs, but be careful to keep the result divisible by
263 big_base. */
264 while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
265 {
266 t++;
267 n--;
268 shift++;
269 }
270 p = t;
271
272 SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
273 pt++;
274 }
275
276 /* Strip any remaining low zero limbs. */
277 pt -= n_pows + 1;
278 for (pi = n_pows; pi >= 0; pi--)
279 {
280 mp_ptr t = pt[pi].p;
281 mp_size_t shift = pt[pi].shift;
282 mp_size_t n = pt[pi].n;
283 int c;
284 c = t[0] == 0;
285 t += c;
286 n -= c;
287 shift += c;
288 pt[pi].p = t;
289 pt[pi].shift = shift;
290 pt[pi].n = n;
291 }
292 }
293 #endif
294
295 static long
powtab_decide(size_t * exptab,size_t un,int base)296 powtab_decide (size_t *exptab, size_t un, int base)
297 {
298 int chars_per_limb = mp_bases[base].chars_per_limb;
299 long n_pows = 0;
300 size_t pn;
301 for (pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
302 {
303 exptab[n_pows] = pn * chars_per_limb;
304 n_pows++;
305 }
306 exptab[n_pows] = chars_per_limb;
307
308 #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
309 {
310 size_t pn = un - 1;
311 size_t xn = (un + 1) >> 1;
312 unsigned mcost = 1;
313 unsigned dcost = 1;
314 long i;
315 for (i = n_pows - 2; i >= 0; i--)
316 {
317 size_t pow = (pn >> (i + 1)) + 1;
318
319 if (pow & 1)
320 dcost += pow;
321
322 if (xn != (pow << i))
323 {
324 if (pow > 2 && (pow & 1) == 0)
325 mcost += 2 * pow;
326 else
327 mcost += pow;
328 }
329 else
330 {
331 if (pow & 1)
332 mcost += pow;
333 }
334 }
335
336 dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
337
338 if (mcost <= dcost)
339 return n_pows;
340 else
341 return -n_pows;
342 }
343 #elif HAVE_mpn_compute_powtab_mul
344 return n_pows;
345 #elif HAVE_mpn_compute_powtab_div
346 return -n_pows;
347 #else
348 #error "no powtab function available"
349 #endif
350 }
351
352 size_t
mpn_compute_powtab(powers_t * powtab,mp_ptr powtab_mem,mp_size_t un,int base)353 mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
354 {
355 size_t exptab[GMP_LIMB_BITS];
356
357 long n_pows = powtab_decide (exptab, un, base);
358
359 #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
360 if (n_pows >= 0)
361 {
362 mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
363 return n_pows;
364 }
365 else
366 {
367 mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
368 return -n_pows;
369 }
370 #elif HAVE_mpn_compute_powtab_mul
371 ASSERT (n_pows > 0);
372 mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
373 return n_pows;
374 #elif HAVE_mpn_compute_powtab_div
375 ASSERT (n_pows < 0);
376 mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
377 return -n_pows;
378 #else
379 #error "no powtab function available"
380 #endif
381 }
382