1*181254a7Smrg /* mpn_mul -- Multiply two natural numbers.
2*181254a7Smrg
3*181254a7Smrg Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
4*181254a7Smrg
5*181254a7Smrg This file is part of the GNU MP Library.
6*181254a7Smrg
7*181254a7Smrg The GNU MP Library is free software; you can redistribute it and/or modify
8*181254a7Smrg it under the terms of the GNU Lesser General Public License as published by
9*181254a7Smrg the Free Software Foundation; either version 2.1 of the License, or (at your
10*181254a7Smrg option) any later version.
11*181254a7Smrg
12*181254a7Smrg The GNU MP Library is distributed in the hope that it will be useful, but
13*181254a7Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14*181254a7Smrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15*181254a7Smrg License for more details.
16*181254a7Smrg
17*181254a7Smrg You should have received a copy of the GNU Lesser General Public License
18*181254a7Smrg along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19*181254a7Smrg the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20*181254a7Smrg MA 02111-1307, USA. */
21*181254a7Smrg
22*181254a7Smrg #include <config.h>
23*181254a7Smrg #include "gmp-impl.h"
24*181254a7Smrg
25*181254a7Smrg /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
26*181254a7Smrg and v (pointed to by VP, with VSIZE limbs), and store the result at
27*181254a7Smrg PRODP. USIZE + VSIZE limbs are always stored, but if the input
28*181254a7Smrg operands are normalized. Return the most significant limb of the
29*181254a7Smrg result.
30*181254a7Smrg
31*181254a7Smrg NOTE: The space pointed to by PRODP is overwritten before finished
32*181254a7Smrg with U and V, so overlap is an error.
33*181254a7Smrg
34*181254a7Smrg Argument constraints:
35*181254a7Smrg 1. USIZE >= VSIZE.
36*181254a7Smrg 2. PRODP != UP and PRODP != VP, i.e. the destination
37*181254a7Smrg must be distinct from the multiplier and the multiplicand. */
38*181254a7Smrg
39*181254a7Smrg /* If KARATSUBA_THRESHOLD is not already defined, define it to a
40*181254a7Smrg value which is good on most machines. */
41*181254a7Smrg #ifndef KARATSUBA_THRESHOLD
42*181254a7Smrg #define KARATSUBA_THRESHOLD 32
43*181254a7Smrg #endif
44*181254a7Smrg
45*181254a7Smrg mp_limb_t
46*181254a7Smrg #if __STDC__
mpn_mul(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)47*181254a7Smrg mpn_mul (mp_ptr prodp,
48*181254a7Smrg mp_srcptr up, mp_size_t usize,
49*181254a7Smrg mp_srcptr vp, mp_size_t vsize)
50*181254a7Smrg #else
51*181254a7Smrg mpn_mul (prodp, up, usize, vp, vsize)
52*181254a7Smrg mp_ptr prodp;
53*181254a7Smrg mp_srcptr up;
54*181254a7Smrg mp_size_t usize;
55*181254a7Smrg mp_srcptr vp;
56*181254a7Smrg mp_size_t vsize;
57*181254a7Smrg #endif
58*181254a7Smrg {
59*181254a7Smrg mp_ptr prod_endp = prodp + usize + vsize - 1;
60*181254a7Smrg mp_limb_t cy;
61*181254a7Smrg mp_ptr tspace;
62*181254a7Smrg
63*181254a7Smrg if (vsize < KARATSUBA_THRESHOLD)
64*181254a7Smrg {
65*181254a7Smrg /* Handle simple cases with traditional multiplication.
66*181254a7Smrg
67*181254a7Smrg This is the most critical code of the entire function. All
68*181254a7Smrg multiplies rely on this, both small and huge. Small ones arrive
69*181254a7Smrg here immediately. Huge ones arrive here as this is the base case
70*181254a7Smrg for Karatsuba's recursive algorithm below. */
71*181254a7Smrg mp_size_t i;
72*181254a7Smrg mp_limb_t cy_limb;
73*181254a7Smrg mp_limb_t v_limb;
74*181254a7Smrg
75*181254a7Smrg if (vsize == 0)
76*181254a7Smrg return 0;
77*181254a7Smrg
78*181254a7Smrg /* Multiply by the first limb in V separately, as the result can be
79*181254a7Smrg stored (not added) to PROD. We also avoid a loop for zeroing. */
80*181254a7Smrg v_limb = vp[0];
81*181254a7Smrg if (v_limb <= 1)
82*181254a7Smrg {
83*181254a7Smrg if (v_limb == 1)
84*181254a7Smrg MPN_COPY (prodp, up, usize);
85*181254a7Smrg else
86*181254a7Smrg MPN_ZERO (prodp, usize);
87*181254a7Smrg cy_limb = 0;
88*181254a7Smrg }
89*181254a7Smrg else
90*181254a7Smrg cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
91*181254a7Smrg
92*181254a7Smrg prodp[usize] = cy_limb;
93*181254a7Smrg prodp++;
94*181254a7Smrg
95*181254a7Smrg /* For each iteration in the outer loop, multiply one limb from
96*181254a7Smrg U with one limb from V, and add it to PROD. */
97*181254a7Smrg for (i = 1; i < vsize; i++)
98*181254a7Smrg {
99*181254a7Smrg v_limb = vp[i];
100*181254a7Smrg if (v_limb <= 1)
101*181254a7Smrg {
102*181254a7Smrg cy_limb = 0;
103*181254a7Smrg if (v_limb == 1)
104*181254a7Smrg cy_limb = mpn_add_n (prodp, prodp, up, usize);
105*181254a7Smrg }
106*181254a7Smrg else
107*181254a7Smrg cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
108*181254a7Smrg
109*181254a7Smrg prodp[usize] = cy_limb;
110*181254a7Smrg prodp++;
111*181254a7Smrg }
112*181254a7Smrg return cy_limb;
113*181254a7Smrg }
114*181254a7Smrg
115*181254a7Smrg tspace = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
116*181254a7Smrg MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
117*181254a7Smrg
118*181254a7Smrg prodp += vsize;
119*181254a7Smrg up += vsize;
120*181254a7Smrg usize -= vsize;
121*181254a7Smrg if (usize >= vsize)
122*181254a7Smrg {
123*181254a7Smrg mp_ptr tp = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
124*181254a7Smrg do
125*181254a7Smrg {
126*181254a7Smrg MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
127*181254a7Smrg cy = mpn_add_n (prodp, prodp, tp, vsize);
128*181254a7Smrg mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
129*181254a7Smrg prodp += vsize;
130*181254a7Smrg up += vsize;
131*181254a7Smrg usize -= vsize;
132*181254a7Smrg }
133*181254a7Smrg while (usize >= vsize);
134*181254a7Smrg }
135*181254a7Smrg
136*181254a7Smrg /* True: usize < vsize. */
137*181254a7Smrg
138*181254a7Smrg /* Make life simple: Recurse. */
139*181254a7Smrg
140*181254a7Smrg if (usize != 0)
141*181254a7Smrg {
142*181254a7Smrg mpn_mul (tspace, vp, vsize, up, usize);
143*181254a7Smrg cy = mpn_add_n (prodp, prodp, tspace, vsize);
144*181254a7Smrg mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
145*181254a7Smrg }
146*181254a7Smrg
147*181254a7Smrg return *prod_endp;
148*181254a7Smrg }
149