1*627f7eb2Smrg /* mpn_mul_n -- Multiply two natural numbers of length n.
2*627f7eb2Smrg
3*627f7eb2Smrg Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
4*627f7eb2Smrg
5*627f7eb2Smrg This file is part of the GNU MP Library.
6*627f7eb2Smrg
7*627f7eb2Smrg The GNU MP Library is free software; you can redistribute it and/or modify
8*627f7eb2Smrg it under the terms of the GNU Lesser General Public License as published by
9*627f7eb2Smrg the Free Software Foundation; either version 2.1 of the License, or (at your
10*627f7eb2Smrg option) any later version.
11*627f7eb2Smrg
12*627f7eb2Smrg The GNU MP Library is distributed in the hope that it will be useful, but
13*627f7eb2Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14*627f7eb2Smrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15*627f7eb2Smrg License for more details.
16*627f7eb2Smrg
17*627f7eb2Smrg You should have received a copy of the GNU Lesser General Public License
18*627f7eb2Smrg along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19*627f7eb2Smrg the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20*627f7eb2Smrg MA 02111-1307, USA. */
21*627f7eb2Smrg
22*627f7eb2Smrg #include <config.h>
23*627f7eb2Smrg #include "gmp-impl.h"
24*627f7eb2Smrg
25*627f7eb2Smrg /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
26*627f7eb2Smrg both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
27*627f7eb2Smrg always stored. Return the most significant limb.
28*627f7eb2Smrg
29*627f7eb2Smrg Argument constraints:
30*627f7eb2Smrg 1. PRODP != UP and PRODP != VP, i.e. the destination
31*627f7eb2Smrg must be distinct from the multiplier and the multiplicand. */
32*627f7eb2Smrg
33*627f7eb2Smrg /* If KARATSUBA_THRESHOLD is not already defined, define it to a
34*627f7eb2Smrg value which is good on most machines. */
35*627f7eb2Smrg #ifndef KARATSUBA_THRESHOLD
36*627f7eb2Smrg #define KARATSUBA_THRESHOLD 32
37*627f7eb2Smrg #endif
38*627f7eb2Smrg
39*627f7eb2Smrg /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
40*627f7eb2Smrg #if KARATSUBA_THRESHOLD < 2
41*627f7eb2Smrg #undef KARATSUBA_THRESHOLD
42*627f7eb2Smrg #define KARATSUBA_THRESHOLD 2
43*627f7eb2Smrg #endif
44*627f7eb2Smrg
45*627f7eb2Smrg /* Handle simple cases with traditional multiplication.
46*627f7eb2Smrg
47*627f7eb2Smrg This is the most critical code of multiplication. All multiplies rely
48*627f7eb2Smrg on this, both small and huge. Small ones arrive here immediately. Huge
49*627f7eb2Smrg ones arrive here as this is the base case for Karatsuba's recursive
50*627f7eb2Smrg algorithm below. */
51*627f7eb2Smrg
52*627f7eb2Smrg void
53*627f7eb2Smrg #if __STDC__
impn_mul_n_basecase(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)54*627f7eb2Smrg impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
55*627f7eb2Smrg #else
56*627f7eb2Smrg impn_mul_n_basecase (prodp, up, vp, size)
57*627f7eb2Smrg mp_ptr prodp;
58*627f7eb2Smrg mp_srcptr up;
59*627f7eb2Smrg mp_srcptr vp;
60*627f7eb2Smrg mp_size_t size;
61*627f7eb2Smrg #endif
62*627f7eb2Smrg {
63*627f7eb2Smrg mp_size_t i;
64*627f7eb2Smrg mp_limb_t cy_limb;
65*627f7eb2Smrg mp_limb_t v_limb;
66*627f7eb2Smrg
67*627f7eb2Smrg /* Multiply by the first limb in V separately, as the result can be
68*627f7eb2Smrg stored (not added) to PROD. We also avoid a loop for zeroing. */
69*627f7eb2Smrg v_limb = vp[0];
70*627f7eb2Smrg if (v_limb <= 1)
71*627f7eb2Smrg {
72*627f7eb2Smrg if (v_limb == 1)
73*627f7eb2Smrg MPN_COPY (prodp, up, size);
74*627f7eb2Smrg else
75*627f7eb2Smrg MPN_ZERO (prodp, size);
76*627f7eb2Smrg cy_limb = 0;
77*627f7eb2Smrg }
78*627f7eb2Smrg else
79*627f7eb2Smrg cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
80*627f7eb2Smrg
81*627f7eb2Smrg prodp[size] = cy_limb;
82*627f7eb2Smrg prodp++;
83*627f7eb2Smrg
84*627f7eb2Smrg /* For each iteration in the outer loop, multiply one limb from
85*627f7eb2Smrg U with one limb from V, and add it to PROD. */
86*627f7eb2Smrg for (i = 1; i < size; i++)
87*627f7eb2Smrg {
88*627f7eb2Smrg v_limb = vp[i];
89*627f7eb2Smrg if (v_limb <= 1)
90*627f7eb2Smrg {
91*627f7eb2Smrg cy_limb = 0;
92*627f7eb2Smrg if (v_limb == 1)
93*627f7eb2Smrg cy_limb = mpn_add_n (prodp, prodp, up, size);
94*627f7eb2Smrg }
95*627f7eb2Smrg else
96*627f7eb2Smrg cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
97*627f7eb2Smrg
98*627f7eb2Smrg prodp[size] = cy_limb;
99*627f7eb2Smrg prodp++;
100*627f7eb2Smrg }
101*627f7eb2Smrg }
102*627f7eb2Smrg
103*627f7eb2Smrg void
104*627f7eb2Smrg #if __STDC__
impn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size,mp_ptr tspace)105*627f7eb2Smrg impn_mul_n (mp_ptr prodp,
106*627f7eb2Smrg mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
107*627f7eb2Smrg #else
108*627f7eb2Smrg impn_mul_n (prodp, up, vp, size, tspace)
109*627f7eb2Smrg mp_ptr prodp;
110*627f7eb2Smrg mp_srcptr up;
111*627f7eb2Smrg mp_srcptr vp;
112*627f7eb2Smrg mp_size_t size;
113*627f7eb2Smrg mp_ptr tspace;
114*627f7eb2Smrg #endif
115*627f7eb2Smrg {
116*627f7eb2Smrg if ((size & 1) != 0)
117*627f7eb2Smrg {
118*627f7eb2Smrg /* The size is odd, the code code below doesn't handle that.
119*627f7eb2Smrg Multiply the least significant (size - 1) limbs with a recursive
120*627f7eb2Smrg call, and handle the most significant limb of S1 and S2
121*627f7eb2Smrg separately. */
122*627f7eb2Smrg /* A slightly faster way to do this would be to make the Karatsuba
123*627f7eb2Smrg code below behave as if the size were even, and let it check for
124*627f7eb2Smrg odd size in the end. I.e., in essence move this code to the end.
125*627f7eb2Smrg Doing so would save us a recursive call, and potentially make the
126*627f7eb2Smrg stack grow a lot less. */
127*627f7eb2Smrg
128*627f7eb2Smrg mp_size_t esize = size - 1; /* even size */
129*627f7eb2Smrg mp_limb_t cy_limb;
130*627f7eb2Smrg
131*627f7eb2Smrg MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
132*627f7eb2Smrg cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
133*627f7eb2Smrg prodp[esize + esize] = cy_limb;
134*627f7eb2Smrg cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
135*627f7eb2Smrg
136*627f7eb2Smrg prodp[esize + size] = cy_limb;
137*627f7eb2Smrg }
138*627f7eb2Smrg else
139*627f7eb2Smrg {
140*627f7eb2Smrg /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
141*627f7eb2Smrg
142*627f7eb2Smrg Split U in two pieces, U1 and U0, such that
143*627f7eb2Smrg U = U0 + U1*(B**n),
144*627f7eb2Smrg and V in V1 and V0, such that
145*627f7eb2Smrg V = V0 + V1*(B**n).
146*627f7eb2Smrg
147*627f7eb2Smrg UV is then computed recursively using the identity
148*627f7eb2Smrg
149*627f7eb2Smrg 2n n n n
150*627f7eb2Smrg UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
151*627f7eb2Smrg 1 1 1 0 0 1 0 0
152*627f7eb2Smrg
153*627f7eb2Smrg Where B = 2**BITS_PER_MP_LIMB. */
154*627f7eb2Smrg
155*627f7eb2Smrg mp_size_t hsize = size >> 1;
156*627f7eb2Smrg mp_limb_t cy;
157*627f7eb2Smrg int negflg;
158*627f7eb2Smrg
159*627f7eb2Smrg /*** Product H. ________________ ________________
160*627f7eb2Smrg |_____U1 x V1____||____U0 x V0_____| */
161*627f7eb2Smrg /* Put result in upper part of PROD and pass low part of TSPACE
162*627f7eb2Smrg as new TSPACE. */
163*627f7eb2Smrg MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
164*627f7eb2Smrg
165*627f7eb2Smrg /*** Product M. ________________
166*627f7eb2Smrg |_(U1-U0)(V0-V1)_| */
167*627f7eb2Smrg if (mpn_cmp (up + hsize, up, hsize) >= 0)
168*627f7eb2Smrg {
169*627f7eb2Smrg mpn_sub_n (prodp, up + hsize, up, hsize);
170*627f7eb2Smrg negflg = 0;
171*627f7eb2Smrg }
172*627f7eb2Smrg else
173*627f7eb2Smrg {
174*627f7eb2Smrg mpn_sub_n (prodp, up, up + hsize, hsize);
175*627f7eb2Smrg negflg = 1;
176*627f7eb2Smrg }
177*627f7eb2Smrg if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
178*627f7eb2Smrg {
179*627f7eb2Smrg mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
180*627f7eb2Smrg negflg ^= 1;
181*627f7eb2Smrg }
182*627f7eb2Smrg else
183*627f7eb2Smrg {
184*627f7eb2Smrg mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
185*627f7eb2Smrg /* No change of NEGFLG. */
186*627f7eb2Smrg }
187*627f7eb2Smrg /* Read temporary operands from low part of PROD.
188*627f7eb2Smrg Put result in low part of TSPACE using upper part of TSPACE
189*627f7eb2Smrg as new TSPACE. */
190*627f7eb2Smrg MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
191*627f7eb2Smrg
192*627f7eb2Smrg /*** Add/copy product H. */
193*627f7eb2Smrg MPN_COPY (prodp + hsize, prodp + size, hsize);
194*627f7eb2Smrg cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
195*627f7eb2Smrg
196*627f7eb2Smrg /*** Add product M (if NEGFLG M is a negative number). */
197*627f7eb2Smrg if (negflg)
198*627f7eb2Smrg cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
199*627f7eb2Smrg else
200*627f7eb2Smrg cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
201*627f7eb2Smrg
202*627f7eb2Smrg /*** Product L. ________________ ________________
203*627f7eb2Smrg |________________||____U0 x V0_____| */
204*627f7eb2Smrg /* Read temporary operands from low part of PROD.
205*627f7eb2Smrg Put result in low part of TSPACE using upper part of TSPACE
206*627f7eb2Smrg as new TSPACE. */
207*627f7eb2Smrg MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
208*627f7eb2Smrg
209*627f7eb2Smrg /*** Add/copy Product L (twice). */
210*627f7eb2Smrg
211*627f7eb2Smrg cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
212*627f7eb2Smrg if (cy)
213*627f7eb2Smrg mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
214*627f7eb2Smrg
215*627f7eb2Smrg MPN_COPY (prodp, tspace, hsize);
216*627f7eb2Smrg cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
217*627f7eb2Smrg if (cy)
218*627f7eb2Smrg mpn_add_1 (prodp + size, prodp + size, size, 1);
219*627f7eb2Smrg }
220*627f7eb2Smrg }
221