xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/compute_powtab.c (revision b5c47949a45ac972130c38cf13dfd8afb1f09285)
1 /* mpn_compute_powtab.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 1991-2017 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 /*
38   CAVEATS:
39   * The exptab and powtab vectors are in opposite orders.  Probably OK.
40   * Consider getting rid of exptab, doing bit ops on the un argument instead.
41   * Consider rounding greatest power slightly upwards to save adjustments.
42   * In powtab_decide, consider computing cost from just the 2-3 largest
43     operands, since smaller operand contribute little.  This makes most sense
44     if exptab is suppressed.
45 */
46 
47 #include "gmp-impl.h"
48 
49 #ifndef DIV_1_VS_MUL_1_PERCENT
50 #define DIV_1_VS_MUL_1_PERCENT 150
51 #endif
52 
53 #define SET_powers_t(dest, ptr, size, dib, b, sh)	\
54   do {							\
55     dest.p = ptr;					\
56     dest.n = size;					\
57     dest.digits_in_base = dib;				\
58     dest.base = b;					\
59     dest.shift = sh;					\
60   } while (0)
61 
62 #if DIV_1_VS_MUL_1_PERCENT > 120
63 #define HAVE_mpn_compute_powtab_mul 1
64 static void
65 mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
66 			int base, const size_t *exptab, size_t n_pows)
67 {
68   mp_size_t n;
69   mp_ptr p, t;
70   mp_limb_t cy;
71   long start_idx;
72   int c;
73 
74   mp_limb_t big_base = mp_bases[base].big_base;
75   int chars_per_limb = mp_bases[base].chars_per_limb;
76 
77   mp_ptr powtab_mem_ptr = powtab_mem;
78 
79   size_t digits_in_base = chars_per_limb;
80 
81   powers_t *pt = powtab;
82 
83   p = powtab_mem_ptr;
84   powtab_mem_ptr += 1;
85   p[0] = big_base;
86 
87   SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
88   pt++;
89 
90   t = powtab_mem_ptr;
91   powtab_mem_ptr += 2;
92   t[1] = mpn_mul_1 (t, p, 1, big_base);
93   n = 2;
94 
95   digits_in_base *= 2;
96 
97   c = t[0] == 0;
98   t += c;
99   n -= c;
100   mp_size_t shift = c;
101 
102   SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
103   p = t;
104   pt++;
105 
106   if (exptab[0] == ((size_t) chars_per_limb << n_pows))
107     {
108       start_idx = n_pows - 2;
109     }
110   else
111     {
112       if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
113 	{
114 	  /* 3, sometimes adjusted to 4.  */
115 	  t = powtab_mem_ptr;
116 	  powtab_mem_ptr += 4;
117 	  t[n] = cy = mpn_mul_1 (t, p, n, big_base);
118 	  n += cy != 0;;
119 
120 	  digits_in_base += chars_per_limb;
121 
122 	  c  = t[0] == 0;
123 	  t += c;
124 	  n -= c;
125 	  shift += c;
126 	}
127       else
128 	{
129 	  /* 2 copy, will always become 3 with back-multiplication.  */
130 	  t = powtab_mem_ptr;
131 	  powtab_mem_ptr += 3;
132 	  t[0] = p[0];
133 	  t[1] = p[1];
134 	}
135 
136       SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
137       p = t;
138       pt++;
139       start_idx = n_pows - 3;
140     }
141 
142   for (long pi = start_idx; pi >= 0; pi--)
143     {
144       t = powtab_mem_ptr;
145       powtab_mem_ptr += 2 * n + 2;
146 
147       ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
148 
149       mpn_sqr (t, p, n);
150 
151       digits_in_base *= 2;
152       n *= 2;
153       n -= t[n - 1] == 0;
154       shift *= 2;
155 
156       c = t[0] == 0;
157       t += c;
158       n -= c;
159       shift += c;
160 
161       /* Adjust new value if it is too small as input to the next squaring.  */
162       if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
163 	{
164 	  t[n] = cy = mpn_mul_1 (t, t, n, big_base);
165 	  n += cy != 0;
166 
167 	  digits_in_base += chars_per_limb;
168 
169 	  c  = t[0] == 0;
170 	  t += c;
171 	  n -= c;
172 	  shift += c;
173 	}
174 
175       SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
176 
177       /* Adjust previous value if it is not at its target power.  */
178       if (pt[-1].digits_in_base < exptab[pi + 1])
179 	{
180 	  mp_size_t n = pt[-1].n;
181 	  mp_ptr p = pt[-1].p;
182 	  p[n] = cy = mpn_mul_1 (p, p, n, big_base);
183 	  n += cy != 0;
184 
185 	  ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
186 	  pt[-1].digits_in_base = exptab[pi + 1];
187 
188 	  c = p[0] == 0;
189 	  pt[-1].p = p + c;
190 	  pt[-1].n = n - c;
191 	  pt[-1].shift += c;
192 	}
193 
194       p = t;
195       pt++;
196     }
197 }
198 #endif
199 
200 #if DIV_1_VS_MUL_1_PERCENT < 275
201 #define HAVE_mpn_compute_powtab_div 1
202 static void
203 mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
204 			int base, const size_t *exptab, size_t n_pows)
205 {
206   mp_ptr p, t;
207 
208   mp_limb_t big_base = mp_bases[base].big_base;
209   int chars_per_limb = mp_bases[base].chars_per_limb;
210 
211   mp_ptr powtab_mem_ptr = powtab_mem;
212 
213   size_t digits_in_base = chars_per_limb;
214 
215   powers_t *pt = powtab;
216 
217   p = powtab_mem_ptr;
218   powtab_mem_ptr += 1;
219   p[0] = big_base;
220 
221   SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
222   pt++;
223 
224   mp_size_t n = 1;
225   mp_size_t shift = 0;
226   for (long pi = n_pows - 1; pi >= 0; pi--)
227     {
228       t = powtab_mem_ptr;
229       powtab_mem_ptr += 2 * n;
230 
231       ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
232 
233       mpn_sqr (t, p, n);
234       n = 2 * n - 1; n += t[n] != 0;
235       digits_in_base *= 2;
236 
237       if (digits_in_base != exptab[pi])	/* if ((((un - 1) >> pi) & 2) == 0) */
238 	{
239 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
240 	  if (__GMP_LIKELY (base == 10))
241 	    mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
242 			      MP_BASES_BIG_BASE_BINVERTED_10,
243 			      MP_BASES_BIG_BASE_CTZ_10);
244 	  else
245 #endif
246 	    /* FIXME: We could use _pi1 here if we add big_base_binverted and
247 	       big_base_ctz fields to struct bases.  That would add about 2 KiB
248 	       to mp_bases.c.
249 	       FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
250 	       mpn_bdiv_q_1 for more machines. */
251 	    mpn_divexact_1 (t, t, n, big_base);
252 
253 	  n -= t[n - 1] == 0;
254 	  digits_in_base -= chars_per_limb;
255 	}
256 
257       shift *= 2;
258       /* Strip low zero limbs, but be careful to keep the result divisible by
259 	 big_base.  */
260       while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
261 	{
262 	  t++;
263 	  n--;
264 	  shift++;
265 	}
266       p = t;
267 
268       SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
269       pt++;
270     }
271 
272   /* Strip any remaining low zero limbs.  */
273   pt -= n_pows + 1;
274   for (long pi = n_pows; pi >= 0; pi--)
275     {
276       mp_ptr t = pt[pi].p;
277       mp_size_t shift = pt[pi].shift;
278       mp_size_t n = pt[pi].n;
279       int c;
280       c = t[0] == 0;
281       t += c;
282       n -= c;
283       shift += c;
284       pt[pi].p = t;
285       pt[pi].shift = shift;
286       pt[pi].n = n;
287     }
288 }
289 #endif
290 
291 static long
292 powtab_decide (size_t *exptab, size_t un, int base)
293 {
294   int chars_per_limb = mp_bases[base].chars_per_limb;
295   long n_pows = 0;
296   for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
297     {
298       exptab[n_pows] = pn * chars_per_limb;
299       n_pows++;
300     }
301   exptab[n_pows] = chars_per_limb;
302 
303 #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
304   size_t pn = un - 1;
305   size_t xn = (un + 1) >> 1;
306   unsigned mcost = 1;
307   unsigned dcost = 1;
308   for (long i = n_pows - 2; i >= 0; i--)
309     {
310       size_t pow = (pn >> (i + 1)) + 1;
311 
312       if (pow & 1)
313 	dcost += pow;
314 
315       if (xn != (pow << i))
316 	{
317 	  if (pow > 2 && (pow & 1) == 0)
318 	    mcost += 2 * pow;
319 	  else
320 	    mcost += pow;
321 	}
322       else
323 	{
324 	  if (pow & 1)
325 	    mcost += pow;
326 	}
327     }
328 
329   dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
330 
331   if (mcost <= dcost)
332     return n_pows;
333   else
334     return -n_pows;
335 #elif HAVE_mpn_compute_powtab_mul
336   return n_pows;
337 #elif HAVE_mpn_compute_powtab_div
338   return -n_pows;
339 #else
340 #error "no powtab function available"
341 #endif
342 }
343 
344 size_t
345 mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
346 {
347   size_t exptab[GMP_LIMB_BITS];
348 
349   long n_pows = powtab_decide (exptab, un, base);
350 
351 #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
352   if (n_pows >= 0)
353     {
354       mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
355       return n_pows;
356     }
357   else
358     {
359       mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
360       return -n_pows;
361     }
362 #elif HAVE_mpn_compute_powtab_mul
363   ASSERT (n_pows > 0);
364   mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
365   return n_pows;
366 #elif HAVE_mpn_compute_powtab_div
367   ASSERT (n_pows < 0);
368   mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
369   return -n_pows;
370 #else
371 #error "no powtab function available"
372 #endif
373 }
374