xref: /netbsd-src/external/lgpl3/gmp/dist/mpz/powm_ui.c (revision 4d342c046e3288fb5a1edcd33cfec48c41c80664)
1 /* mpz_powm_ui(res,base,exp,mod) -- Set R to (B^E) mod M.
2 
3    Contributed to the GNU project by Torbjörn Granlund.
4 
5 Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2008, 2009, 2011-2013
6 Free Software Foundation, Inc.
7 
8 This file is part of the GNU MP Library.
9 
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of either:
12 
13   * the GNU Lesser General Public License as published by the Free
14     Software Foundation; either version 3 of the License, or (at your
15     option) any later version.
16 
17 or
18 
19   * the GNU General Public License as published by the Free Software
20     Foundation; either version 2 of the License, or (at your option) any
21     later version.
22 
23 or both in parallel, as here.
24 
25 The GNU MP Library is distributed in the hope that it will be useful, but
26 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28 for more details.
29 
30 You should have received copies of the GNU General Public License and the
31 GNU Lesser General Public License along with the GNU MP Library.  If not,
32 see https://www.gnu.org/licenses/.  */
33 
34 
35 #include "gmp.h"
36 #include "gmp-impl.h"
37 #include "longlong.h"
38 
39 
40 /* This code is very old, and should be rewritten to current GMP standard.  It
41    is slower than mpz_powm for large exponents, but also for small exponents
42    when the mod argument is small.
43 
44    As an intermediate solution, we now deflect to mpz_powm for exponents >= 20.
45 */
46 
47 /*
48   b ^ e mod m   res
49   0   0     0    ?
50   0   e     0    ?
51   0   0     m    ?
52   0   e     m    0
53   b   0     0    ?
54   b   e     0    ?
55   b   0     m    1 mod m
56   b   e     m    b^e mod m
57 */
58 
59 static void
60 mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv, mp_ptr tp)
61 {
62   mp_ptr qp;
63   TMP_DECL;
64   TMP_MARK;
65 
66   qp = tp;
67 
68   if (dn == 1)
69     {
70       np[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
71     }
72   else if (dn == 2)
73     {
74       mpn_div_qr_2n_pi1 (qp, np, np, nn, dp[1], dp[0], dinv->inv32);
75     }
76   else if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD) ||
77 	   BELOW_THRESHOLD (nn - dn, DC_DIV_QR_THRESHOLD))
78     {
79       mpn_sbpi1_div_qr (qp, np, nn, dp, dn, dinv->inv32);
80     }
81   else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
82 	   BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
83 	   (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
84 	   + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
85     {
86       mpn_dcpi1_div_qr (qp, np, nn, dp, dn, dinv);
87     }
88   else
89     {
90       /* We need to allocate separate remainder area, since mpn_mu_div_qr does
91 	 not handle overlap between the numerator and remainder areas.
92 	 FIXME: Make it handle such overlap.  */
93       mp_ptr rp = TMP_BALLOC_LIMBS (dn);
94       mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
95       mp_ptr scratch = TMP_BALLOC_LIMBS (itch);
96       mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
97       MPN_COPY (np, rp, dn);
98     }
99 
100   TMP_FREE;
101 }
102 
103 /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
104    t is defined by (tp,mn).  */
105 static void
106 reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn, gmp_pi1_t *dinv)
107 {
108   mp_ptr rp, scratch;
109   TMP_DECL;
110   TMP_MARK;
111 
112   rp = TMP_ALLOC_LIMBS (an);
113   scratch = TMP_ALLOC_LIMBS (an - mn + 1);
114   MPN_COPY (rp, ap, an);
115   mod (rp, an, mp, mn, dinv, scratch);
116   MPN_COPY (tp, rp, mn);
117 
118   TMP_FREE;
119 }
120 
121 void
122 mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
123 {
124   if (el < 20)
125     {
126       mp_ptr xp, tp, mp, bp, scratch;
127       mp_size_t xn, tn, mn, bn;
128       int m_zero_cnt;
129       int c;
130       mp_limb_t e, m2;
131       gmp_pi1_t dinv;
132       TMP_DECL;
133 
134       mp = PTR(m);
135       mn = ABSIZ(m);
136       if (UNLIKELY (mn == 0))
137 	DIVIDE_BY_ZERO;
138 
139       if (el == 0)
140 	{
141 	  /* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if
142 	     M equals 1.  */
143 	  SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
144 	  PTR(r)[0] = 1;
145 	  return;
146 	}
147 
148       TMP_MARK;
149 
150       /* Normalize m (i.e. make its most significant bit set) as required by
151 	 division functions below.  */
152       count_leading_zeros (m_zero_cnt, mp[mn - 1]);
153       m_zero_cnt -= GMP_NAIL_BITS;
154       if (m_zero_cnt != 0)
155 	{
156 	  mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
157 	  mpn_lshift (new_mp, mp, mn, m_zero_cnt);
158 	  mp = new_mp;
159 	}
160 
161       m2 = mn == 1 ? 0 : mp[mn - 2];
162       invert_pi1 (dinv, mp[mn - 1], m2);
163 
164       bn = ABSIZ(b);
165       bp = PTR(b);
166       if (bn > mn)
167 	{
168 	  /* Reduce possibly huge base.  Use a function call to reduce, since we
169 	     don't want the quotient allocation to live until function return.  */
170 	  mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
171 	  reduce (new_bp, bp, bn, mp, mn, &dinv);
172 	  bp = new_bp;
173 	  bn = mn;
174 	  /* Canonicalize the base, since we are potentially going to multiply with
175 	     it quite a few times.  */
176 	  MPN_NORMALIZE (bp, bn);
177 	}
178 
179       if (bn == 0)
180 	{
181 	  SIZ(r) = 0;
182 	  TMP_FREE;
183 	  return;
184 	}
185 
186       tp = TMP_ALLOC_LIMBS (2 * mn + 1);
187       xp = TMP_ALLOC_LIMBS (mn);
188       scratch = TMP_ALLOC_LIMBS (mn + 1);
189 
190       MPN_COPY (xp, bp, bn);
191       xn = bn;
192 
193       e = el;
194       count_leading_zeros (c, e);
195       e = (e << c) << 1;		/* shift the exp bits to the left, lose msb */
196       c = GMP_LIMB_BITS - 1 - c;
197 
198       if (c == 0)
199 	{
200 	  /* If m is already normalized (high bit of high limb set), and b is
201 	     the same size, but a bigger value, and e==1, then there's no
202 	     modular reductions done and we can end up with a result out of
203 	     range at the end. */
204 	  if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
205 	    mpn_sub_n (xp, xp, mp, mn);
206 	}
207       else
208 	{
209 	  /* Main loop. */
210 	  do
211 	    {
212 	      mpn_sqr (tp, xp, xn);
213 	      tn = 2 * xn; tn -= tp[tn - 1] == 0;
214 	      if (tn < mn)
215 		{
216 		  MPN_COPY (xp, tp, tn);
217 		  xn = tn;
218 		}
219 	      else
220 		{
221 		  mod (tp, tn, mp, mn, &dinv, scratch);
222 		  MPN_COPY (xp, tp, mn);
223 		  xn = mn;
224 		}
225 
226 	      if ((mp_limb_signed_t) e < 0)
227 		{
228 		  mpn_mul (tp, xp, xn, bp, bn);
229 		  tn = xn + bn; tn -= tp[tn - 1] == 0;
230 		  if (tn < mn)
231 		    {
232 		      MPN_COPY (xp, tp, tn);
233 		      xn = tn;
234 		    }
235 		  else
236 		    {
237 		      mod (tp, tn, mp, mn, &dinv, scratch);
238 		      MPN_COPY (xp, tp, mn);
239 		      xn = mn;
240 		    }
241 		}
242 	      e <<= 1;
243 	      c--;
244 	    }
245 	  while (c != 0);
246 	}
247 
248       /* We shifted m left m_zero_cnt steps.  Adjust the result by reducing it
249 	 with the original M.  */
250       if (m_zero_cnt != 0)
251 	{
252 	  mp_limb_t cy;
253 	  cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
254 	  tp[xn] = cy; xn += cy != 0;
255 
256 	  if (xn < mn)
257 	    {
258 	      MPN_COPY (xp, tp, xn);
259 	    }
260 	  else
261 	    {
262 	      mod (tp, xn, mp, mn, &dinv, scratch);
263 	      MPN_COPY (xp, tp, mn);
264 	      xn = mn;
265 	    }
266 	  mpn_rshift (xp, xp, xn, m_zero_cnt);
267 	}
268       MPN_NORMALIZE (xp, xn);
269 
270       if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
271 	{
272 	  mp = PTR(m);			/* want original, unnormalized m */
273 	  mpn_sub (xp, mp, mn, xp, xn);
274 	  xn = mn;
275 	  MPN_NORMALIZE (xp, xn);
276 	}
277       MPZ_REALLOC (r, xn);
278       SIZ (r) = xn;
279       MPN_COPY (PTR(r), xp, xn);
280 
281       TMP_FREE;
282     }
283   else
284     {
285       /* For large exponents, fake an mpz_t exponent and deflect to the more
286 	 sophisticated mpz_powm.  */
287       mpz_t e;
288       mp_limb_t ep[LIMBS_PER_ULONG];
289       MPZ_FAKE_UI (e, ep, el);
290       mpz_powm (r, b, e, m);
291     }
292 }
293