xref: /netbsd-src/external/lgpl3/gmp/dist/mpz/powm_ui.c (revision fdd524d4ccd2bb0c6f67401e938dabf773eb0372)
1 /* mpz_powm_ui(res,base,exp,mod) -- Set R to (U^E) mod M.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5 Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009,
6 2011, 2012, 2013 Free Software Foundation, Inc.
7 
8 This file is part of the GNU MP Library.
9 
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of the GNU Lesser General Public License as published by
12 the Free Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
14 
15 The GNU MP Library is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
18 License for more details.
19 
20 You should have received a copy of the GNU Lesser General Public License
21 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
22 
23 
24 #include "gmp.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
27 
28 
29 /* This code is very old, and should be rewritten to current GMP standard.  It
30    is slower than mpz_powm for large exponents, but also for small exponents
31    when the mod argument is small.
32 
33    As an intermediate solution, we now deflect to mpz_powm for exponents >= 20.
34 */
35 
36 /*
37   b ^ e mod m   res
38   0   0     0    ?
39   0   e     0    ?
40   0   0     m    ?
41   0   e     m    0
42   b   0     0    ?
43   b   e     0    ?
44   b   0     m    1 mod m
45   b   e     m    b^e mod m
46 */
47 
48 static void
49 mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv, mp_ptr tp)
50 {
51   mp_ptr qp;
52   TMP_DECL;
53   TMP_MARK;
54 
55   qp = tp;
56 
57   if (dn == 1)
58     np[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
59   else if (dn == 2)
60     mpn_div_qr_2n_pi1 (qp, np, np, nn, dp[1], dp[0], dinv->inv32);
61   else if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD) ||
62 	   BELOW_THRESHOLD (nn - dn, DC_DIV_QR_THRESHOLD))
63     mpn_sbpi1_div_qr (qp, np, nn, dp, dn, dinv->inv32);
64   else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
65 	   BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
66 	   (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
67 	   + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
68     {
69       mpn_dcpi1_div_qr (qp, np, nn, dp, dn, dinv);
70     }
71   else
72     {
73       /* We need to allocate separate remainder area, since mpn_mu_div_qr does
74 	 not handle overlap between the numerator and remainder areas.
75 	 FIXME: Make it handle such overlap.  */
76       mp_ptr rp = TMP_ALLOC_LIMBS (dn);
77       mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
78       mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
79       mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
80       MPN_COPY (np, rp, dn);
81     }
82 
83   TMP_FREE;
84 }
85 
86 /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
87    t is defined by (tp,mn).  */
88 static void
89 reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn, gmp_pi1_t *dinv)
90 {
91   mp_ptr rp, scratch;
92   TMP_DECL;
93   TMP_MARK;
94 
95   rp = TMP_ALLOC_LIMBS (an);
96   scratch = TMP_ALLOC_LIMBS (an - mn + 1);
97   MPN_COPY (rp, ap, an);
98   mod (rp, an, mp, mn, dinv, scratch);
99   MPN_COPY (tp, rp, mn);
100 
101   TMP_FREE;
102 }
103 
104 void
105 mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
106 {
107   if (el < 20)
108     {
109       mp_ptr xp, tp, mp, bp, scratch;
110       mp_size_t xn, tn, mn, bn;
111       int m_zero_cnt;
112       int c;
113       mp_limb_t e, m2;
114       gmp_pi1_t dinv;
115       TMP_DECL;
116 
117       mp = PTR(m);
118       mn = ABSIZ(m);
119       if (UNLIKELY (mn == 0))
120 	DIVIDE_BY_ZERO;
121 
122       if (el == 0)
123 	{
124 	  /* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if
125 	     M equals 1.  */
126 	  SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
127 	  PTR(r)[0] = 1;
128 	  return;
129 	}
130 
131       TMP_MARK;
132 
133       /* Normalize m (i.e. make its most significant bit set) as required by
134 	 division functions below.  */
135       count_leading_zeros (m_zero_cnt, mp[mn - 1]);
136       m_zero_cnt -= GMP_NAIL_BITS;
137       if (m_zero_cnt != 0)
138 	{
139 	  mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
140 	  mpn_lshift (new_mp, mp, mn, m_zero_cnt);
141 	  mp = new_mp;
142 	}
143 
144       m2 = mn == 1 ? 0 : mp[mn - 2];
145       invert_pi1 (dinv, mp[mn - 1], m2);
146 
147       bn = ABSIZ(b);
148       bp = PTR(b);
149       if (bn > mn)
150 	{
151 	  /* Reduce possibly huge base.  Use a function call to reduce, since we
152 	     don't want the quotient allocation to live until function return.  */
153 	  mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
154 	  reduce (new_bp, bp, bn, mp, mn, &dinv);
155 	  bp = new_bp;
156 	  bn = mn;
157 	  /* Canonicalize the base, since we are potentially going to multiply with
158 	     it quite a few times.  */
159 	  MPN_NORMALIZE (bp, bn);
160 	}
161 
162       if (bn == 0)
163 	{
164 	  SIZ(r) = 0;
165 	  TMP_FREE;
166 	  return;
167 	}
168 
169       tp = TMP_ALLOC_LIMBS (2 * mn + 1);
170       xp = TMP_ALLOC_LIMBS (mn);
171       scratch = TMP_ALLOC_LIMBS (mn + 1);
172 
173       MPN_COPY (xp, bp, bn);
174       xn = bn;
175 
176       e = el;
177       count_leading_zeros (c, e);
178       e = (e << c) << 1;		/* shift the exp bits to the left, lose msb */
179       c = GMP_LIMB_BITS - 1 - c;
180 
181       if (c == 0)
182 	{
183 	  /* If m is already normalized (high bit of high limb set), and b is
184 	     the same size, but a bigger value, and e==1, then there's no
185 	     modular reductions done and we can end up with a result out of
186 	     range at the end. */
187 	  if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
188 	    mpn_sub_n (xp, xp, mp, mn);
189 	}
190       else
191 	{
192 	  /* Main loop. */
193 	  do
194 	    {
195 	      mpn_sqr (tp, xp, xn);
196 	      tn = 2 * xn; tn -= tp[tn - 1] == 0;
197 	      if (tn < mn)
198 		{
199 		  MPN_COPY (xp, tp, tn);
200 		  xn = tn;
201 		}
202 	      else
203 		{
204 		  mod (tp, tn, mp, mn, &dinv, scratch);
205 		  MPN_COPY (xp, tp, mn);
206 		  xn = mn;
207 		}
208 
209 	      if ((mp_limb_signed_t) e < 0)
210 		{
211 		  mpn_mul (tp, xp, xn, bp, bn);
212 		  tn = xn + bn; tn -= tp[tn - 1] == 0;
213 		  if (tn < mn)
214 		    {
215 		      MPN_COPY (xp, tp, tn);
216 		      xn = tn;
217 		    }
218 		  else
219 		    {
220 		      mod (tp, tn, mp, mn, &dinv, scratch);
221 		      MPN_COPY (xp, tp, mn);
222 		      xn = mn;
223 		    }
224 		}
225 	      e <<= 1;
226 	      c--;
227 	    }
228 	  while (c != 0);
229 	}
230 
231       /* We shifted m left m_zero_cnt steps.  Adjust the result by reducing it
232 	 with the original M.  */
233       if (m_zero_cnt != 0)
234 	{
235 	  mp_limb_t cy;
236 	  cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
237 	  tp[xn] = cy; xn += cy != 0;
238 
239 	  if (xn < mn)
240 	    {
241 	      MPN_COPY (xp, tp, xn);
242 	    }
243 	  else
244 	    {
245 	      mod (tp, xn, mp, mn, &dinv, scratch);
246 	      MPN_COPY (xp, tp, mn);
247 	      xn = mn;
248 	    }
249 	  mpn_rshift (xp, xp, xn, m_zero_cnt);
250 	}
251       MPN_NORMALIZE (xp, xn);
252 
253       if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
254 	{
255 	  mp = PTR(m);			/* want original, unnormalized m */
256 	  mpn_sub (xp, mp, mn, xp, xn);
257 	  xn = mn;
258 	  MPN_NORMALIZE (xp, xn);
259 	}
260       MPZ_REALLOC (r, xn);
261       SIZ (r) = xn;
262       MPN_COPY (PTR(r), xp, xn);
263 
264       TMP_FREE;
265     }
266   else
267     {
268       /* For large exponents, fake a mpz_t exponent and deflect to the more
269 	 sophisticated mpz_powm.  */
270       mpz_t e;
271       mp_limb_t ep[LIMBS_PER_ULONG];
272       MPZ_FAKE_UI (e, ep, el);
273       mpz_powm (r, b, e, m);
274     }
275 }
276