xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/mulmod_bnm1.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mulmod_bnm1.c -- multiplication mod B^n-1.
2 
3    Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
4    Marco Bodrato.
5 
6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 
10 Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 
39 #include "gmp-impl.h"
40 #include "longlong.h"
41 
42 /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
43    mod B^rn - 1, and values are semi-normalised; zero is represented
44    as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
45    tp==rp is allowed. */
46 void
mpn_bc_mulmod_bnm1(mp_ptr rp,mp_srcptr ap,mp_srcptr bp,mp_size_t rn,mp_ptr tp)47 mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
48 		    mp_ptr tp)
49 {
50   mp_limb_t cy;
51 
52   ASSERT (0 < rn);
53 
54   mpn_mul_n (tp, ap, bp, rn);
55   cy = mpn_add_n (rp, tp, tp + rn, rn);
56   /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
57    * be no overflow when adding in the carry. */
58   MPN_INCR_U (rp, rn, cy);
59 }
60 
61 
62 /* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
63    semi-normalised representation, computation is mod B^rn + 1. Needs
64    a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
65    Output is normalised. */
66 static void
mpn_bc_mulmod_bnp1(mp_ptr rp,mp_srcptr ap,mp_srcptr bp,mp_size_t rn,mp_ptr tp)67 mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
68 		    mp_ptr tp)
69 {
70   mp_limb_t cy;
71 
72   ASSERT (0 < rn);
73 
74   mpn_mul_n (tp, ap, bp, rn + 1);
75   ASSERT (tp[2*rn+1] == 0);
76   ASSERT (tp[2*rn] < GMP_NUMB_MAX);
77   cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
78   rp[rn] = 0;
79   MPN_INCR_U (rp, rn+1, cy);
80 }
81 
82 
83 /* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
84  *
85  * The result is expected to be ZERO if and only if one of the operand
86  * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
87  * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
88  * combine results and obtain a natural number when one knows in
89  * advance that the final value is less than (B^rn-1).
90  * Moreover it should not be a problem if mulmod_bnm1 is used to
91  * compute the full product with an+bn <= rn, because this condition
92  * implies (B^an-1)(B^bn-1) < (B^rn-1) .
93  *
94  * Requires 0 < bn <= an <= rn and an + bn > rn/2
95  * Scratch need: rn + (need for recursive call OR rn + 4). This gives
96  *
97  * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
98  */
99 void
mpn_mulmod_bnm1(mp_ptr rp,mp_size_t rn,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr tp)100 mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
101 {
102   ASSERT (0 < bn);
103   ASSERT (bn <= an);
104   ASSERT (an <= rn);
105 
106   if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
107     {
108       if (UNLIKELY (bn < rn))
109 	{
110 	  if (UNLIKELY (an + bn <= rn))
111 	    {
112 	      mpn_mul (rp, ap, an, bp, bn);
113 	    }
114 	  else
115 	    {
116 	      mp_limb_t cy;
117 	      mpn_mul (tp, ap, an, bp, bn);
118 	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
119 	      MPN_INCR_U (rp, rn, cy);
120 	    }
121 	}
122       else
123 	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
124     }
125   else
126     {
127       mp_size_t n;
128       mp_limb_t cy;
129       mp_limb_t hi;
130 
131       n = rn >> 1;
132 
133       /* We need at least an + bn >= n, to be able to fit one of the
134 	 recursive products at rp. Requiring strict inequality makes
135 	 the code slightly simpler. If desired, we could avoid this
136 	 restriction by initially halving rn as long as rn is even and
137 	 an + bn <= rn/2. */
138 
139       ASSERT (an + bn > n);
140 
141       /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
142 	 and crt together as
143 
144 	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
145       */
146 
147 #define a0 ap
148 #define a1 (ap + n)
149 #define b0 bp
150 #define b1 (bp + n)
151 
152 #define xp  tp	/* 2n + 2 */
153       /* am1  maybe in {xp, n} */
154       /* bm1  maybe in {xp + n, n} */
155 #define sp1 (tp + 2*n + 2)
156       /* ap1  maybe in {sp1, n + 1} */
157       /* bp1  maybe in {sp1 + n + 1, n + 1} */
158 
159       {
160 	mp_srcptr am1, bm1;
161 	mp_size_t anm, bnm;
162 	mp_ptr so;
163 
164 	bm1 = b0;
165 	bnm = bn;
166 	if (LIKELY (an > n))
167 	  {
168 	    am1 = xp;
169 	    cy = mpn_add (xp, a0, n, a1, an - n);
170 	    MPN_INCR_U (xp, n, cy);
171 	    anm = n;
172 	    so = xp + n;
173 	    if (LIKELY (bn > n))
174 	      {
175 		bm1 = so;
176 		cy = mpn_add (so, b0, n, b1, bn - n);
177 		MPN_INCR_U (so, n, cy);
178 		bnm = n;
179 		so += n;
180 	      }
181 	  }
182 	else
183 	  {
184 	    so = xp;
185 	    am1 = a0;
186 	    anm = an;
187 	  }
188 
189 	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
190       }
191 
192       {
193 	int       k;
194 	mp_srcptr ap1, bp1;
195 	mp_size_t anp, bnp;
196 
197 	bp1 = b0;
198 	bnp = bn;
199 	if (LIKELY (an > n)) {
200 	  ap1 = sp1;
201 	  cy = mpn_sub (sp1, a0, n, a1, an - n);
202 	  sp1[n] = 0;
203 	  MPN_INCR_U (sp1, n + 1, cy);
204 	  anp = n + ap1[n];
205 	  if (LIKELY (bn > n)) {
206 	    bp1 = sp1 + n + 1;
207 	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
208 	    sp1[2*n+1] = 0;
209 	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
210 	    bnp = n + bp1[n];
211 	  }
212 	} else {
213 	  ap1 = a0;
214 	  anp = an;
215 	}
216 
217 	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
218 	  k=0;
219 	else
220 	  {
221 	    int mask;
222 	    k = mpn_fft_best_k (n, 0);
223 	    mask = (1<<k) - 1;
224 	    while (n & mask) {k--; mask >>=1;};
225 	  }
226 	if (k >= FFT_FIRST_K)
227 	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
228 	else if (UNLIKELY (bp1 == b0))
229 	  {
230 	    ASSERT (anp + bnp <= 2*n+1);
231 	    ASSERT (anp + bnp > n);
232 	    ASSERT (anp >= bnp);
233 	    mpn_mul (xp, ap1, anp, bp1, bnp);
234 	    anp = anp + bnp - n;
235 	    ASSERT (anp <= n || xp[2*n]==0);
236 	    anp-= anp > n;
237 	    cy = mpn_sub (xp, xp, n, xp + n, anp);
238 	    xp[n] = 0;
239 	    MPN_INCR_U (xp, n+1, cy);
240 	  }
241 	else
242 	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
243       }
244 
245       /* Here the CRT recomposition begins.
246 
247 	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
248 	 Division by 2 is a bitwise rotation.
249 
250 	 Assumes xp normalised mod (B^n+1).
251 
252 	 The residue class [0] is represented by [B^n-1]; except when
253 	 both input are ZERO.
254       */
255 
256 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
257 #if HAVE_NATIVE_mpn_rsh1add_nc
258       cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
259       hi = cy << (GMP_NUMB_BITS - 1);
260       cy = 0;
261       /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
262 	 overflows, i.e. a further increment will not overflow again. */
263 #else /* ! _nc */
264       cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
265       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
266       cy >>= 1;
267       /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
268 	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
269 #endif
270 #if GMP_NAIL_BITS == 0
271       add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
272 #else
273       cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
274       rp[n-1] ^= hi;
275 #endif
276 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
277 #if HAVE_NATIVE_mpn_add_nc
278       cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
279 #else /* ! _nc */
280       cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
281 #endif
282       cy += (rp[0]&1);
283       mpn_rshift(rp, rp, n, 1);
284       ASSERT (cy <= 2);
285       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
286       cy >>= 1;
287       /* We can have cy != 0 only if hi = 0... */
288       ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
289       rp[n-1] |= hi;
290       /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
291 #endif
292       ASSERT (cy <= 1);
293       /* Next increment can not overflow, read the previous comments about cy. */
294       ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
295       MPN_INCR_U(rp, n, cy);
296 
297       /* Compute the highest half:
298 	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
299        */
300       if (UNLIKELY (an + bn < rn))
301 	{
302 	  /* Note that in this case, the only way the result can equal
303 	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
304 	     then the output of both the recursive calls and this CRT
305 	     reconstruction is zero, not B^{rn} - 1. Which is good,
306 	     since the latter representation doesn't fit in the output
307 	     area.*/
308 	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
309 
310 	  /* FIXME: This subtraction of the high parts is not really
311 	     necessary, we do it to get the carry out, and for sanity
312 	     checking. */
313 	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
314 				   xp + an + bn - n, rn - (an + bn), cy);
315 	  ASSERT (an + bn == rn - 1 ||
316 		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
317 	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
318 	  ASSERT (cy == (xp + an + bn - n)[0]);
319 	}
320       else
321 	{
322 	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
323 	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
324 	     DECR will affect _at most_ the lowest n limbs. */
325 	  MPN_DECR_U (rp, 2*n, cy);
326 	}
327 #undef a0
328 #undef a1
329 #undef b0
330 #undef b1
331 #undef xp
332 #undef sp1
333     }
334 }
335 
336 mp_size_t
mpn_mulmod_bnm1_next_size(mp_size_t n)337 mpn_mulmod_bnm1_next_size (mp_size_t n)
338 {
339   mp_size_t nh;
340 
341   if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
342     return n;
343   if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
344     return (n + (2-1)) & (-2);
345   if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
346     return (n + (4-1)) & (-4);
347 
348   nh = (n + 1) >> 1;
349 
350   if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
351     return (n + (8-1)) & (-8);
352 
353   return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
354 }
355