xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/powlo.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.
2 
3 Copyright 2007-2009, 2012, 2015, 2016, 2018 Free Software Foundation, Inc.
4 
5 This file is part of the GNU MP Library.
6 
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
9 
10   * the GNU Lesser General Public License as published by the Free
11     Software Foundation; either version 3 of the License, or (at your
12     option) any later version.
13 
14 or
15 
16   * the GNU General Public License as published by the Free Software
17     Foundation; either version 2 of the License, or (at your option) any
18     later version.
19 
20 or both in parallel, as here.
21 
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25 for more details.
26 
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library.  If not,
29 see https://www.gnu.org/licenses/.  */
30 
31 
32 #include "gmp-impl.h"
33 #include "longlong.h"
34 
35 
36 #define getbit(p,bi) \
37   ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
38 
39 static inline mp_limb_t
getbits(const mp_limb_t * p,mp_bitcnt_t bi,unsigned nbits)40 getbits (const mp_limb_t *p, mp_bitcnt_t bi, unsigned nbits)
41 {
42   unsigned nbits_in_r;
43   mp_limb_t r;
44   mp_size_t i;
45 
46   if (bi < nbits)
47     {
48       return p[0] & (((mp_limb_t) 1 << bi) - 1);
49     }
50   else
51     {
52       bi -= nbits;			/* bit index of low bit to extract */
53       i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
54       bi %= GMP_NUMB_BITS;		/* bit index in low word */
55       r = p[i] >> bi;			/* extract (low) bits */
56       nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
57       if (nbits_in_r < nbits)		/* did we get enough bits? */
58 	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
59       return r & (((mp_limb_t ) 1 << nbits) - 1);
60     }
61 }
62 
63 static inline unsigned
win_size(mp_bitcnt_t eb)64 win_size (mp_bitcnt_t eb)
65 {
66   unsigned k;
67   static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
68   ASSERT (eb > 1);
69   for (k = 0; eb > x[k++];)
70     ;
71   return k;
72 }
73 
74 /* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base.
75    Requires that ep[en-1] is non-zero.
76    Uses scratch space tp[3n-1..0], i.e., 3n words.  */
77 /* We only use n words in the scratch space, we should pass tp + n to
78    mullo/sqrlo as a temporary area, it is needed. */
79 void
mpn_powlo(mp_ptr rp,mp_srcptr bp,mp_srcptr ep,mp_size_t en,mp_size_t n,mp_ptr tp)80 mpn_powlo (mp_ptr rp, mp_srcptr bp,
81 	   mp_srcptr ep, mp_size_t en,
82 	   mp_size_t n, mp_ptr tp)
83 {
84   unsigned cnt;
85   mp_bitcnt_t ebi;
86   unsigned windowsize, this_windowsize;
87   mp_limb_t expbits;
88   mp_limb_t *pp;
89   long i;
90   int flipflop;
91   TMP_DECL;
92 
93   ASSERT (en > 1 || (en == 1 && ep[0] > 1));
94 
95   TMP_MARK;
96 
97   MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
98 
99   windowsize = win_size (ebi);
100   if (windowsize > 1)
101     {
102       mp_limb_t *this_pp, *last_pp;
103       ASSERT (windowsize < ebi);
104 
105       pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)));
106 
107       this_pp = pp;
108 
109       MPN_COPY (this_pp, bp, n);
110 
111       /* Store b^2 in tp.  */
112       mpn_sqrlo (tp, bp, n);
113 
114       /* Precompute odd powers of b and put them in the temporary area at pp.  */
115       i = (1 << (windowsize - 1)) - 1;
116       do
117 	{
118 	  last_pp = this_pp;
119 	  this_pp += n;
120 	  mpn_mullo_n (this_pp, last_pp, tp, n);
121 	} while (--i != 0);
122 
123       expbits = getbits (ep, ebi, windowsize);
124 
125       /* THINK: Should we initialise the case expbits % 4 == 0 with a mullo? */
126       count_trailing_zeros (cnt, expbits);
127       ebi -= windowsize;
128       ebi += cnt;
129       expbits >>= cnt;
130 
131       MPN_COPY (rp, pp + n * (expbits >> 1), n);
132     }
133   else
134     {
135       pp = tp + n;
136       MPN_COPY (pp, bp, n);
137       MPN_COPY (rp, bp, n);
138       --ebi;
139     }
140 
141   flipflop = 0;
142 
143   do
144     {
145       while (getbit (ep, ebi) == 0)
146 	{
147 	  mpn_sqrlo (tp, rp, n);
148 	  MP_PTR_SWAP (rp, tp);
149 	  flipflop = ! flipflop;
150 	  if (--ebi == 0)
151 	    goto done;
152 	}
153 
154       /* The next bit of the exponent is 1.  Now extract the largest block of
155 	 bits <= windowsize, and such that the least significant bit is 1.  */
156 
157       expbits = getbits (ep, ebi, windowsize);
158       this_windowsize = MIN (windowsize, ebi);
159       ebi -= this_windowsize;
160 
161       count_trailing_zeros (cnt, expbits);
162       this_windowsize -= cnt;
163       ebi += cnt;
164       expbits >>= cnt;
165 
166       while (this_windowsize > 1)
167 	{
168 	  mpn_sqrlo (tp, rp, n);
169 	  mpn_sqrlo (rp, tp, n);
170 	  this_windowsize -= 2;
171 	}
172 
173       if (this_windowsize != 0)
174 	mpn_sqrlo (tp, rp, n);
175       else
176 	{
177 	  MP_PTR_SWAP (rp, tp);
178 	  flipflop = ! flipflop;
179 	}
180 
181       mpn_mullo_n (rp, tp, pp + n * (expbits >> 1), n);
182     } while (ebi != 0);
183 
184  done:
185   if (flipflop)
186     MPN_COPY (tp, rp, n);
187   TMP_FREE;
188 }
189