xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/powm.c (revision 7863ba460b0a05b553c754e5dbc29247dddec322)
1 /* mpn_powm -- Compute R = U^E mod M.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2007-2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 
38 /*
39   BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
40 
41   1. W <- U
42 
43   2. T <- (B^n * U) mod M                Convert to REDC form
44 
45   3. Compute table U^1, U^3, U^5... of E-dependent size
46 
47   4. While there are more bits in E
48        W <- power left-to-right base-k
49 
50 
51   TODO:
52 
53    * Make getbits a macro, thereby allowing it to update the index operand.
54      That will simplify the code using getbits.  (Perhaps make getbits' sibling
55      getbit then have similar form, for symmetry.)
56 
57    * Write an itch function.  Or perhaps get rid of tp parameter since the huge
58      pp area is allocated locally anyway?
59 
60    * Choose window size without looping.  (Superoptimize or think(tm).)
61 
62    * Handle small bases with initial, reduction-free exponentiation.
63 
64    * Call new division functions, not mpn_tdiv_qr.
65 
66    * Consider special code for one-limb M.
67 
68    * How should we handle the redc1/redc2/redc_n choice?
69      - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
70      - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
71      - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
72      This disregards the addmul_N constant term, but we could think of
73      that as part of the respective mullo.
74 
75    * When U (the base) is small, we should start the exponentiation with plain
76      operations, then convert that partial result to REDC form.
77 
78    * When U is just one limb, should it be handled without the k-ary tricks?
79      We could keep a factor of B^n in W, but use U' = BU as base.  After
80      multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
81      mod M.
82 */
83 
84 #include "gmp.h"
85 #include "gmp-impl.h"
86 #include "longlong.h"
87 
88 #undef MPN_REDC_1
89 #define MPN_REDC_1(rp, up, mp, n, invm)					\
90   do {									\
91     mp_limb_t cy;							\
92     cy = mpn_redc_1 (rp, up, mp, n, invm);				\
93     if (cy != 0)							\
94       mpn_sub_n (rp, rp, mp, n);					\
95   } while (0)
96 
97 #undef MPN_REDC_2
98 #define MPN_REDC_2(rp, up, mp, n, mip)					\
99   do {									\
100     mp_limb_t cy;							\
101     cy = mpn_redc_2 (rp, up, mp, n, mip);				\
102     if (cy != 0)							\
103       mpn_sub_n (rp, rp, mp, n);					\
104   } while (0)
105 
106 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
107 #define WANT_REDC_2 1
108 #endif
109 
110 #define getbit(p,bi) \
111   ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
112 
113 static inline mp_limb_t
114 getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
115 {
116   int nbits_in_r;
117   mp_limb_t r;
118   mp_size_t i;
119 
120   if (bi < nbits)
121     {
122       return p[0] & (((mp_limb_t) 1 << bi) - 1);
123     }
124   else
125     {
126       bi -= nbits;			/* bit index of low bit to extract */
127       i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
128       bi %= GMP_NUMB_BITS;		/* bit index in low word */
129       r = p[i] >> bi;			/* extract (low) bits */
130       nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
131       if (nbits_in_r < nbits)		/* did we get enough bits? */
132 	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
133       return r & (((mp_limb_t ) 1 << nbits) - 1);
134     }
135 }
136 
137 static inline int
138 win_size (mp_bitcnt_t eb)
139 {
140   int k;
141   static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
142   for (k = 1; eb > x[k]; k++)
143     ;
144   return k;
145 }
146 
147 /* Convert U to REDC form, U_r = B^n * U mod M */
148 static void
149 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
150 {
151   mp_ptr tp, qp;
152   TMP_DECL;
153   TMP_MARK;
154 
155   TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
156 
157   MPN_ZERO (tp, n);
158   MPN_COPY (tp + n, up, un);
159   mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
160   TMP_FREE;
161 }
162 
163 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
164    Requires that mp[n-1..0] is odd.
165    Requires that ep[en-1..0] is > 1.
166    Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
167 void
168 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
169 	  mp_srcptr ep, mp_size_t en,
170 	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
171 {
172   mp_limb_t ip[2], *mip;
173   int cnt;
174   mp_bitcnt_t ebi;
175   int windowsize, this_windowsize;
176   mp_limb_t expbits;
177   mp_ptr pp, this_pp;
178   long i;
179   TMP_DECL;
180 
181   ASSERT (en > 1 || (en == 1 && ep[0] > 1));
182   ASSERT (n >= 1 && ((mp[0] & 1) != 0));
183 
184   TMP_MARK;
185 
186   MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
187 
188 #if 0
189   if (bn < n)
190     {
191       /* Do the first few exponent bits without mod reductions,
192 	 until the result is greater than the mod argument.  */
193       for (;;)
194 	{
195 	  mpn_sqr (tp, this_pp, tn);
196 	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
197 	  if (getbit (ep, ebi) != 0)
198 	    mpn_mul (..., tp, tn, bp, bn);
199 	  ebi--;
200 	}
201     }
202 #endif
203 
204   windowsize = win_size (ebi);
205 
206 #if WANT_REDC_2
207   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
208     {
209       mip = ip;
210       binvert_limb (mip[0], mp[0]);
211       mip[0] = -mip[0];
212     }
213   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
214     {
215       mip = ip;
216       mpn_binvert (mip, mp, 2, tp);
217       mip[0] = -mip[0]; mip[1] = ~mip[1];
218     }
219 #else
220   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
221     {
222       mip = ip;
223       binvert_limb (mip[0], mp[0]);
224       mip[0] = -mip[0];
225     }
226 #endif
227   else
228     {
229       mip = TMP_ALLOC_LIMBS (n);
230       mpn_binvert (mip, mp, n, tp);
231     }
232 
233   pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
234 
235   this_pp = pp;
236   redcify (this_pp, bp, bn, mp, n);
237 
238   /* Store b^2 at rp.  */
239   mpn_sqr (tp, this_pp, n);
240 #if WANT_REDC_2
241   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
242     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
243   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
244     MPN_REDC_2 (rp, tp, mp, n, mip);
245 #else
246   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
247     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
248 #endif
249   else
250     mpn_redc_n (rp, tp, mp, n, mip);
251 
252   /* Precompute odd powers of b and put them in the temporary area at pp.  */
253   for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
254     {
255       mpn_mul_n (tp, this_pp, rp, n);
256       this_pp += n;
257 #if WANT_REDC_2
258       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
259 	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
260       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
261 	MPN_REDC_2 (this_pp, tp, mp, n, mip);
262 #else
263       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
264 	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
265 #endif
266       else
267 	mpn_redc_n (this_pp, tp, mp, n, mip);
268     }
269 
270   expbits = getbits (ep, ebi, windowsize);
271   if (ebi < windowsize)
272     ebi = 0;
273   else
274     ebi -= windowsize;
275 
276   count_trailing_zeros (cnt, expbits);
277   ebi += cnt;
278   expbits >>= cnt;
279 
280   MPN_COPY (rp, pp + n * (expbits >> 1), n);
281 
282 #define INNERLOOP							\
283   while (ebi != 0)							\
284     {									\
285       while (getbit (ep, ebi) == 0)					\
286 	{								\
287 	  MPN_SQR (tp, rp, n);						\
288 	  MPN_REDUCE (rp, tp, mp, n, mip);				\
289 	  ebi--;							\
290 	  if (ebi == 0)							\
291 	    goto done;							\
292 	}								\
293 									\
294       /* The next bit of the exponent is 1.  Now extract the largest	\
295 	 block of bits <= windowsize, and such that the least		\
296 	 significant bit is 1.  */					\
297 									\
298       expbits = getbits (ep, ebi, windowsize);				\
299       this_windowsize = windowsize;					\
300       if (ebi < windowsize)						\
301 	{								\
302 	  this_windowsize -= windowsize - ebi;				\
303 	  ebi = 0;							\
304 	}								\
305       else								\
306         ebi -= windowsize;						\
307 									\
308       count_trailing_zeros (cnt, expbits);				\
309       this_windowsize -= cnt;						\
310       ebi += cnt;							\
311       expbits >>= cnt;							\
312 									\
313       do								\
314 	{								\
315 	  MPN_SQR (tp, rp, n);						\
316 	  MPN_REDUCE (rp, tp, mp, n, mip);				\
317 	  this_windowsize--;						\
318 	}								\
319       while (this_windowsize != 0);					\
320 									\
321       MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);			\
322       MPN_REDUCE (rp, tp, mp, n, mip);					\
323     }
324 
325 
326 #if WANT_REDC_2
327   if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
328     {
329       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
330 	{
331 	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
332 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
333 	    {
334 #undef MPN_MUL_N
335 #undef MPN_SQR
336 #undef MPN_REDUCE
337 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
338 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
339 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
340 	      INNERLOOP;
341 	    }
342 	  else
343 	    {
344 #undef MPN_MUL_N
345 #undef MPN_SQR
346 #undef MPN_REDUCE
347 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
348 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
349 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
350 	      INNERLOOP;
351 	    }
352 	}
353       else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
354 	{
355 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
356 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
357 	    {
358 #undef MPN_MUL_N
359 #undef MPN_SQR
360 #undef MPN_REDUCE
361 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
362 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
363 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
364 	      INNERLOOP;
365 	    }
366 	  else
367 	    {
368 #undef MPN_MUL_N
369 #undef MPN_SQR
370 #undef MPN_REDUCE
371 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
372 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
373 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
374 	      INNERLOOP;
375 	    }
376 	}
377       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
378 	{
379 #undef MPN_MUL_N
380 #undef MPN_SQR
381 #undef MPN_REDUCE
382 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
383 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
384 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
385 	  INNERLOOP;
386 	}
387       else
388 	{
389 #undef MPN_MUL_N
390 #undef MPN_SQR
391 #undef MPN_REDUCE
392 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
393 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
394 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
395 	  INNERLOOP;
396 	}
397     }
398   else
399     {
400       if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
401 	{
402 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
403 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
404 	    {
405 #undef MPN_MUL_N
406 #undef MPN_SQR
407 #undef MPN_REDUCE
408 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
409 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
410 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
411 	      INNERLOOP;
412 	    }
413 	  else
414 	    {
415 #undef MPN_MUL_N
416 #undef MPN_SQR
417 #undef MPN_REDUCE
418 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
419 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
420 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
421 	      INNERLOOP;
422 	    }
423 	}
424       else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
425 	{
426 #undef MPN_MUL_N
427 #undef MPN_SQR
428 #undef MPN_REDUCE
429 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
430 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
431 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
432 	  INNERLOOP;
433 	}
434       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
435 	{
436 #undef MPN_MUL_N
437 #undef MPN_SQR
438 #undef MPN_REDUCE
439 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
440 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
441 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
442 	  INNERLOOP;
443 	}
444       else
445 	{
446 #undef MPN_MUL_N
447 #undef MPN_SQR
448 #undef MPN_REDUCE
449 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
450 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
451 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
452 	  INNERLOOP;
453 	}
454     }
455 
456 #else  /* WANT_REDC_2 */
457 
458   if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
459     {
460       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
461 	{
462 	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
463 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
464 	    {
465 #undef MPN_MUL_N
466 #undef MPN_SQR
467 #undef MPN_REDUCE
468 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
469 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
470 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
471 	      INNERLOOP;
472 	    }
473 	  else
474 	    {
475 #undef MPN_MUL_N
476 #undef MPN_SQR
477 #undef MPN_REDUCE
478 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
479 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
480 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
481 	      INNERLOOP;
482 	    }
483 	}
484       else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
485 	{
486 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
487 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
488 	    {
489 #undef MPN_MUL_N
490 #undef MPN_SQR
491 #undef MPN_REDUCE
492 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
493 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
494 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
495 	      INNERLOOP;
496 	    }
497 	  else
498 	    {
499 #undef MPN_MUL_N
500 #undef MPN_SQR
501 #undef MPN_REDUCE
502 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
503 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
504 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
505 	      INNERLOOP;
506 	    }
507 	}
508       else
509 	{
510 #undef MPN_MUL_N
511 #undef MPN_SQR
512 #undef MPN_REDUCE
513 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
514 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
515 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
516 	  INNERLOOP;
517 	}
518     }
519   else
520     {
521       if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
522 	{
523 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
524 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
525 	    {
526 #undef MPN_MUL_N
527 #undef MPN_SQR
528 #undef MPN_REDUCE
529 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
530 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
531 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
532 	      INNERLOOP;
533 	    }
534 	  else
535 	    {
536 #undef MPN_MUL_N
537 #undef MPN_SQR
538 #undef MPN_REDUCE
539 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
540 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
541 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
542 	      INNERLOOP;
543 	    }
544 	}
545       else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
546 	{
547 #undef MPN_MUL_N
548 #undef MPN_SQR
549 #undef MPN_REDUCE
550 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
551 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
552 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
553 	  INNERLOOP;
554 	}
555       else
556 	{
557 #undef MPN_MUL_N
558 #undef MPN_SQR
559 #undef MPN_REDUCE
560 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
561 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
562 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
563 	  INNERLOOP;
564 	}
565     }
566 #endif  /* WANT_REDC_2 */
567 
568  done:
569 
570   MPN_COPY (tp, rp, n);
571   MPN_ZERO (tp + n, n);
572 
573 #if WANT_REDC_2
574   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
575     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
576   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
577     MPN_REDC_2 (rp, tp, mp, n, mip);
578 #else
579   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
580     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
581 #endif
582   else
583     mpn_redc_n (rp, tp, mp, n, mip);
584 
585   if (mpn_cmp (rp, mp, n) >= 0)
586     mpn_sub_n (rp, rp, mp, n);
587 
588   TMP_FREE;
589 }
590