1*181254a7Smrg /* mpn_mul_n -- Multiply two natural numbers of length n.
2*181254a7Smrg
3*181254a7Smrg Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
4*181254a7Smrg
5*181254a7Smrg This file is part of the GNU MP Library.
6*181254a7Smrg
7*181254a7Smrg The GNU MP Library is free software; you can redistribute it and/or modify
8*181254a7Smrg it under the terms of the GNU Lesser General Public License as published by
9*181254a7Smrg the Free Software Foundation; either version 2.1 of the License, or (at your
10*181254a7Smrg option) any later version.
11*181254a7Smrg
12*181254a7Smrg The GNU MP Library is distributed in the hope that it will be useful, but
13*181254a7Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14*181254a7Smrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15*181254a7Smrg License for more details.
16*181254a7Smrg
17*181254a7Smrg You should have received a copy of the GNU Lesser General Public License
18*181254a7Smrg along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19*181254a7Smrg the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20*181254a7Smrg MA 02111-1307, USA. */
21*181254a7Smrg
22*181254a7Smrg #include <config.h>
23*181254a7Smrg #include "gmp-impl.h"
24*181254a7Smrg
25*181254a7Smrg /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
26*181254a7Smrg both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
27*181254a7Smrg always stored. Return the most significant limb.
28*181254a7Smrg
29*181254a7Smrg Argument constraints:
30*181254a7Smrg 1. PRODP != UP and PRODP != VP, i.e. the destination
31*181254a7Smrg must be distinct from the multiplier and the multiplicand. */
32*181254a7Smrg
33*181254a7Smrg /* If KARATSUBA_THRESHOLD is not already defined, define it to a
34*181254a7Smrg value which is good on most machines. */
35*181254a7Smrg #ifndef KARATSUBA_THRESHOLD
36*181254a7Smrg #define KARATSUBA_THRESHOLD 32
37*181254a7Smrg #endif
38*181254a7Smrg
39*181254a7Smrg /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
40*181254a7Smrg #if KARATSUBA_THRESHOLD < 2
41*181254a7Smrg #undef KARATSUBA_THRESHOLD
42*181254a7Smrg #define KARATSUBA_THRESHOLD 2
43*181254a7Smrg #endif
44*181254a7Smrg
45*181254a7Smrg /* Handle simple cases with traditional multiplication.
46*181254a7Smrg
47*181254a7Smrg This is the most critical code of multiplication. All multiplies rely
48*181254a7Smrg on this, both small and huge. Small ones arrive here immediately. Huge
49*181254a7Smrg ones arrive here as this is the base case for Karatsuba's recursive
50*181254a7Smrg algorithm below. */
51*181254a7Smrg
52*181254a7Smrg void
53*181254a7Smrg #if __STDC__
impn_mul_n_basecase(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)54*181254a7Smrg impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
55*181254a7Smrg #else
56*181254a7Smrg impn_mul_n_basecase (prodp, up, vp, size)
57*181254a7Smrg mp_ptr prodp;
58*181254a7Smrg mp_srcptr up;
59*181254a7Smrg mp_srcptr vp;
60*181254a7Smrg mp_size_t size;
61*181254a7Smrg #endif
62*181254a7Smrg {
63*181254a7Smrg mp_size_t i;
64*181254a7Smrg mp_limb_t cy_limb;
65*181254a7Smrg mp_limb_t v_limb;
66*181254a7Smrg
67*181254a7Smrg /* Multiply by the first limb in V separately, as the result can be
68*181254a7Smrg stored (not added) to PROD. We also avoid a loop for zeroing. */
69*181254a7Smrg v_limb = vp[0];
70*181254a7Smrg if (v_limb <= 1)
71*181254a7Smrg {
72*181254a7Smrg if (v_limb == 1)
73*181254a7Smrg MPN_COPY (prodp, up, size);
74*181254a7Smrg else
75*181254a7Smrg MPN_ZERO (prodp, size);
76*181254a7Smrg cy_limb = 0;
77*181254a7Smrg }
78*181254a7Smrg else
79*181254a7Smrg cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
80*181254a7Smrg
81*181254a7Smrg prodp[size] = cy_limb;
82*181254a7Smrg prodp++;
83*181254a7Smrg
84*181254a7Smrg /* For each iteration in the outer loop, multiply one limb from
85*181254a7Smrg U with one limb from V, and add it to PROD. */
86*181254a7Smrg for (i = 1; i < size; i++)
87*181254a7Smrg {
88*181254a7Smrg v_limb = vp[i];
89*181254a7Smrg if (v_limb <= 1)
90*181254a7Smrg {
91*181254a7Smrg cy_limb = 0;
92*181254a7Smrg if (v_limb == 1)
93*181254a7Smrg cy_limb = mpn_add_n (prodp, prodp, up, size);
94*181254a7Smrg }
95*181254a7Smrg else
96*181254a7Smrg cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
97*181254a7Smrg
98*181254a7Smrg prodp[size] = cy_limb;
99*181254a7Smrg prodp++;
100*181254a7Smrg }
101*181254a7Smrg }
102*181254a7Smrg
103*181254a7Smrg void
104*181254a7Smrg #if __STDC__
impn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size,mp_ptr tspace)105*181254a7Smrg impn_mul_n (mp_ptr prodp,
106*181254a7Smrg mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
107*181254a7Smrg #else
108*181254a7Smrg impn_mul_n (prodp, up, vp, size, tspace)
109*181254a7Smrg mp_ptr prodp;
110*181254a7Smrg mp_srcptr up;
111*181254a7Smrg mp_srcptr vp;
112*181254a7Smrg mp_size_t size;
113*181254a7Smrg mp_ptr tspace;
114*181254a7Smrg #endif
115*181254a7Smrg {
116*181254a7Smrg if ((size & 1) != 0)
117*181254a7Smrg {
118*181254a7Smrg /* The size is odd, the code code below doesn't handle that.
119*181254a7Smrg Multiply the least significant (size - 1) limbs with a recursive
120*181254a7Smrg call, and handle the most significant limb of S1 and S2
121*181254a7Smrg separately. */
122*181254a7Smrg /* A slightly faster way to do this would be to make the Karatsuba
123*181254a7Smrg code below behave as if the size were even, and let it check for
124*181254a7Smrg odd size in the end. I.e., in essence move this code to the end.
125*181254a7Smrg Doing so would save us a recursive call, and potentially make the
126*181254a7Smrg stack grow a lot less. */
127*181254a7Smrg
128*181254a7Smrg mp_size_t esize = size - 1; /* even size */
129*181254a7Smrg mp_limb_t cy_limb;
130*181254a7Smrg
131*181254a7Smrg MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
132*181254a7Smrg cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
133*181254a7Smrg prodp[esize + esize] = cy_limb;
134*181254a7Smrg cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
135*181254a7Smrg
136*181254a7Smrg prodp[esize + size] = cy_limb;
137*181254a7Smrg }
138*181254a7Smrg else
139*181254a7Smrg {
140*181254a7Smrg /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
141*181254a7Smrg
142*181254a7Smrg Split U in two pieces, U1 and U0, such that
143*181254a7Smrg U = U0 + U1*(B**n),
144*181254a7Smrg and V in V1 and V0, such that
145*181254a7Smrg V = V0 + V1*(B**n).
146*181254a7Smrg
147*181254a7Smrg UV is then computed recursively using the identity
148*181254a7Smrg
149*181254a7Smrg 2n n n n
150*181254a7Smrg UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
151*181254a7Smrg 1 1 1 0 0 1 0 0
152*181254a7Smrg
153*181254a7Smrg Where B = 2**BITS_PER_MP_LIMB. */
154*181254a7Smrg
155*181254a7Smrg mp_size_t hsize = size >> 1;
156*181254a7Smrg mp_limb_t cy;
157*181254a7Smrg int negflg;
158*181254a7Smrg
159*181254a7Smrg /*** Product H. ________________ ________________
160*181254a7Smrg |_____U1 x V1____||____U0 x V0_____| */
161*181254a7Smrg /* Put result in upper part of PROD and pass low part of TSPACE
162*181254a7Smrg as new TSPACE. */
163*181254a7Smrg MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
164*181254a7Smrg
165*181254a7Smrg /*** Product M. ________________
166*181254a7Smrg |_(U1-U0)(V0-V1)_| */
167*181254a7Smrg if (mpn_cmp (up + hsize, up, hsize) >= 0)
168*181254a7Smrg {
169*181254a7Smrg mpn_sub_n (prodp, up + hsize, up, hsize);
170*181254a7Smrg negflg = 0;
171*181254a7Smrg }
172*181254a7Smrg else
173*181254a7Smrg {
174*181254a7Smrg mpn_sub_n (prodp, up, up + hsize, hsize);
175*181254a7Smrg negflg = 1;
176*181254a7Smrg }
177*181254a7Smrg if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
178*181254a7Smrg {
179*181254a7Smrg mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
180*181254a7Smrg negflg ^= 1;
181*181254a7Smrg }
182*181254a7Smrg else
183*181254a7Smrg {
184*181254a7Smrg mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
185*181254a7Smrg /* No change of NEGFLG. */
186*181254a7Smrg }
187*181254a7Smrg /* Read temporary operands from low part of PROD.
188*181254a7Smrg Put result in low part of TSPACE using upper part of TSPACE
189*181254a7Smrg as new TSPACE. */
190*181254a7Smrg MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
191*181254a7Smrg
192*181254a7Smrg /*** Add/copy product H. */
193*181254a7Smrg MPN_COPY (prodp + hsize, prodp + size, hsize);
194*181254a7Smrg cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
195*181254a7Smrg
196*181254a7Smrg /*** Add product M (if NEGFLG M is a negative number). */
197*181254a7Smrg if (negflg)
198*181254a7Smrg cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
199*181254a7Smrg else
200*181254a7Smrg cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
201*181254a7Smrg
202*181254a7Smrg /*** Product L. ________________ ________________
203*181254a7Smrg |________________||____U0 x V0_____| */
204*181254a7Smrg /* Read temporary operands from low part of PROD.
205*181254a7Smrg Put result in low part of TSPACE using upper part of TSPACE
206*181254a7Smrg as new TSPACE. */
207*181254a7Smrg MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
208*181254a7Smrg
209*181254a7Smrg /*** Add/copy Product L (twice). */
210*181254a7Smrg
211*181254a7Smrg cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
212*181254a7Smrg if (cy)
213*181254a7Smrg mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
214*181254a7Smrg
215*181254a7Smrg MPN_COPY (prodp, tspace, hsize);
216*181254a7Smrg cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
217*181254a7Smrg if (cy)
218*181254a7Smrg mpn_add_1 (prodp + size, prodp + size, size, 1);
219*181254a7Smrg }
220*181254a7Smrg }
221