xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/compute_powtab.c (revision 1daf83e636cd998f45e5597a8f995a540e2d5b4a)
1 /* mpn_compute_powtab.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 1991-2017 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 /*
38   CAVEATS:
39   * The exptab and powtab vectors are in opposite orders.  Probably OK.
40   * Consider getting rid of exptab, doing bit ops on the un argument instead.
41   * Consider rounding greatest power slightly upwards to save adjustments.
42   * In powtab_decide, consider computing cost from just the 2-3 largest
43     operands, since smaller operand contribute little.  This makes most sense
44     if exptab is suppressed.
45 */
46 
47 #include "gmp-impl.h"
48 
49 #ifndef DIV_1_VS_MUL_1_PERCENT
50 #define DIV_1_VS_MUL_1_PERCENT 150
51 #endif
52 
53 #define SET_powers_t(dest, ptr, size, dib, b, sh)	\
54   do {							\
55     dest.p = ptr;					\
56     dest.n = size;					\
57     dest.digits_in_base = dib;				\
58     dest.base = b;					\
59     dest.shift = sh;					\
60   } while (0)
61 
62 #if DIV_1_VS_MUL_1_PERCENT > 120
63 #define HAVE_mpn_compute_powtab_mul 1
64 static void
mpn_compute_powtab_mul(powers_t * powtab,mp_ptr powtab_mem,mp_size_t un,int base,const size_t * exptab,size_t n_pows)65 mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
66 			int base, const size_t *exptab, size_t n_pows)
67 {
68   mp_size_t n;
69   mp_ptr p, t;
70   mp_limb_t cy;
71   long start_idx;
72   int c;
73   mp_size_t shift;
74   long pi;
75 
76   mp_limb_t big_base = mp_bases[base].big_base;
77   int chars_per_limb = mp_bases[base].chars_per_limb;
78 
79   mp_ptr powtab_mem_ptr = powtab_mem;
80 
81   size_t digits_in_base = chars_per_limb;
82 
83   powers_t *pt = powtab;
84 
85   p = powtab_mem_ptr;
86   powtab_mem_ptr += 1;
87   p[0] = big_base;
88 
89   SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
90   pt++;
91 
92   t = powtab_mem_ptr;
93   powtab_mem_ptr += 2;
94   t[1] = mpn_mul_1 (t, p, 1, big_base);
95   n = 2;
96 
97   digits_in_base *= 2;
98 
99   c = t[0] == 0;
100   t += c;
101   n -= c;
102   shift = c;
103 
104   SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
105   p = t;
106   pt++;
107 
108   if (exptab[0] == ((size_t) chars_per_limb << n_pows))
109     {
110       start_idx = n_pows - 2;
111     }
112   else
113     {
114       if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
115 	{
116 	  /* 3, sometimes adjusted to 4.  */
117 	  t = powtab_mem_ptr;
118 	  powtab_mem_ptr += 4;
119 	  t[n] = cy = mpn_mul_1 (t, p, n, big_base);
120 	  n += cy != 0;;
121 
122 	  digits_in_base += chars_per_limb;
123 
124 	  c  = t[0] == 0;
125 	  t += c;
126 	  n -= c;
127 	  shift += c;
128 	}
129       else
130 	{
131 	  /* 2 copy, will always become 3 with back-multiplication.  */
132 	  t = powtab_mem_ptr;
133 	  powtab_mem_ptr += 3;
134 	  t[0] = p[0];
135 	  t[1] = p[1];
136 	}
137 
138       SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
139       p = t;
140       pt++;
141       start_idx = n_pows - 3;
142     }
143 
144   for (pi = start_idx; pi >= 0; pi--)
145     {
146       t = powtab_mem_ptr;
147       powtab_mem_ptr += 2 * n + 2;
148 
149       ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
150 
151       mpn_sqr (t, p, n);
152 
153       digits_in_base *= 2;
154       n *= 2;
155       n -= t[n - 1] == 0;
156       shift *= 2;
157 
158       c = t[0] == 0;
159       t += c;
160       n -= c;
161       shift += c;
162 
163       /* Adjust new value if it is too small as input to the next squaring.  */
164       if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
165 	{
166 	  t[n] = cy = mpn_mul_1 (t, t, n, big_base);
167 	  n += cy != 0;
168 
169 	  digits_in_base += chars_per_limb;
170 
171 	  c  = t[0] == 0;
172 	  t += c;
173 	  n -= c;
174 	  shift += c;
175 	}
176 
177       SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
178 
179       /* Adjust previous value if it is not at its target power.  */
180       if (pt[-1].digits_in_base < exptab[pi + 1])
181 	{
182 	  mp_size_t n = pt[-1].n;
183 	  mp_ptr p = pt[-1].p;
184 	  p[n] = cy = mpn_mul_1 (p, p, n, big_base);
185 	  n += cy != 0;
186 
187 	  ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
188 	  pt[-1].digits_in_base = exptab[pi + 1];
189 
190 	  c = p[0] == 0;
191 	  pt[-1].p = p + c;
192 	  pt[-1].n = n - c;
193 	  pt[-1].shift += c;
194 	}
195 
196       p = t;
197       pt++;
198     }
199 }
200 #endif
201 
202 #if DIV_1_VS_MUL_1_PERCENT < 275
203 #define HAVE_mpn_compute_powtab_div 1
204 static void
mpn_compute_powtab_div(powers_t * powtab,mp_ptr powtab_mem,mp_size_t un,int base,const size_t * exptab,size_t n_pows)205 mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
206 			int base, const size_t *exptab, size_t n_pows)
207 {
208   mp_ptr p, t;
209 
210   mp_limb_t big_base = mp_bases[base].big_base;
211   int chars_per_limb = mp_bases[base].chars_per_limb;
212 
213   mp_ptr powtab_mem_ptr = powtab_mem;
214 
215   size_t digits_in_base = chars_per_limb;
216 
217   powers_t *pt = powtab;
218 
219   mp_size_t n = 1;
220   mp_size_t shift = 0;
221   long pi;
222 
223   p = powtab_mem_ptr;
224   powtab_mem_ptr += 1;
225   p[0] = big_base;
226 
227   SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
228   pt++;
229 
230   for (pi = n_pows - 1; pi >= 0; pi--)
231     {
232       t = powtab_mem_ptr;
233       powtab_mem_ptr += 2 * n;
234 
235       ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
236 
237       mpn_sqr (t, p, n);
238       n = 2 * n - 1; n += t[n] != 0;
239       digits_in_base *= 2;
240 
241       if (digits_in_base != exptab[pi])	/* if ((((un - 1) >> pi) & 2) == 0) */
242 	{
243 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
244 	  if (__GMP_LIKELY (base == 10))
245 	    mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
246 			      MP_BASES_BIG_BASE_BINVERTED_10,
247 			      MP_BASES_BIG_BASE_CTZ_10);
248 	  else
249 #endif
250 	    /* FIXME: We could use _pi1 here if we add big_base_binverted and
251 	       big_base_ctz fields to struct bases.  That would add about 2 KiB
252 	       to mp_bases.c.
253 	       FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
254 	       mpn_bdiv_q_1 for more machines. */
255 	    mpn_divexact_1 (t, t, n, big_base);
256 
257 	  n -= t[n - 1] == 0;
258 	  digits_in_base -= chars_per_limb;
259 	}
260 
261       shift *= 2;
262       /* Strip low zero limbs, but be careful to keep the result divisible by
263 	 big_base.  */
264       while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
265 	{
266 	  t++;
267 	  n--;
268 	  shift++;
269 	}
270       p = t;
271 
272       SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
273       pt++;
274     }
275 
276   /* Strip any remaining low zero limbs.  */
277   pt -= n_pows + 1;
278   for (pi = n_pows; pi >= 0; pi--)
279     {
280       mp_ptr t = pt[pi].p;
281       mp_size_t shift = pt[pi].shift;
282       mp_size_t n = pt[pi].n;
283       int c;
284       c = t[0] == 0;
285       t += c;
286       n -= c;
287       shift += c;
288       pt[pi].p = t;
289       pt[pi].shift = shift;
290       pt[pi].n = n;
291     }
292 }
293 #endif
294 
295 static long
powtab_decide(size_t * exptab,size_t un,int base)296 powtab_decide (size_t *exptab, size_t un, int base)
297 {
298   int chars_per_limb = mp_bases[base].chars_per_limb;
299   long n_pows = 0;
300   size_t pn;
301   for (pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
302     {
303       exptab[n_pows] = pn * chars_per_limb;
304       n_pows++;
305     }
306   exptab[n_pows] = chars_per_limb;
307 
308 #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
309   {
310   size_t pn = un - 1;
311   size_t xn = (un + 1) >> 1;
312   unsigned mcost = 1;
313   unsigned dcost = 1;
314   long i;
315   for (i = n_pows - 2; i >= 0; i--)
316     {
317       size_t pow = (pn >> (i + 1)) + 1;
318 
319       if (pow & 1)
320 	dcost += pow;
321 
322       if (xn != (pow << i))
323 	{
324 	  if (pow > 2 && (pow & 1) == 0)
325 	    mcost += 2 * pow;
326 	  else
327 	    mcost += pow;
328 	}
329       else
330 	{
331 	  if (pow & 1)
332 	    mcost += pow;
333 	}
334     }
335 
336   dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
337 
338   if (mcost <= dcost)
339     return n_pows;
340   else
341     return -n_pows;
342   }
343 #elif HAVE_mpn_compute_powtab_mul
344   return n_pows;
345 #elif HAVE_mpn_compute_powtab_div
346   return -n_pows;
347 #else
348 #error "no powtab function available"
349 #endif
350 }
351 
352 size_t
mpn_compute_powtab(powers_t * powtab,mp_ptr powtab_mem,mp_size_t un,int base)353 mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
354 {
355   size_t exptab[GMP_LIMB_BITS];
356 
357   long n_pows = powtab_decide (exptab, un, base);
358 
359 #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
360   if (n_pows >= 0)
361     {
362       mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
363       return n_pows;
364     }
365   else
366     {
367       mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
368       return -n_pows;
369     }
370 #elif HAVE_mpn_compute_powtab_mul
371   ASSERT (n_pows > 0);
372   mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
373   return n_pows;
374 #elif HAVE_mpn_compute_powtab_div
375   ASSERT (n_pows < 0);
376   mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
377   return -n_pows;
378 #else
379 #error "no powtab function available"
380 #endif
381 }
382