1*627f7eb2Smrg /* mpn_mul -- Multiply two natural numbers.
2*627f7eb2Smrg
3*627f7eb2Smrg Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
4*627f7eb2Smrg
5*627f7eb2Smrg This file is part of the GNU MP Library.
6*627f7eb2Smrg
7*627f7eb2Smrg The GNU MP Library is free software; you can redistribute it and/or modify
8*627f7eb2Smrg it under the terms of the GNU Lesser General Public License as published by
9*627f7eb2Smrg the Free Software Foundation; either version 2.1 of the License, or (at your
10*627f7eb2Smrg option) any later version.
11*627f7eb2Smrg
12*627f7eb2Smrg The GNU MP Library is distributed in the hope that it will be useful, but
13*627f7eb2Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14*627f7eb2Smrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15*627f7eb2Smrg License for more details.
16*627f7eb2Smrg
17*627f7eb2Smrg You should have received a copy of the GNU Lesser General Public License
18*627f7eb2Smrg along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19*627f7eb2Smrg the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20*627f7eb2Smrg MA 02111-1307, USA. */
21*627f7eb2Smrg
22*627f7eb2Smrg #include <config.h>
23*627f7eb2Smrg #include "gmp-impl.h"
24*627f7eb2Smrg
25*627f7eb2Smrg /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
26*627f7eb2Smrg and v (pointed to by VP, with VSIZE limbs), and store the result at
27*627f7eb2Smrg PRODP. USIZE + VSIZE limbs are always stored, but if the input
28*627f7eb2Smrg operands are normalized. Return the most significant limb of the
29*627f7eb2Smrg result.
30*627f7eb2Smrg
31*627f7eb2Smrg NOTE: The space pointed to by PRODP is overwritten before finished
32*627f7eb2Smrg with U and V, so overlap is an error.
33*627f7eb2Smrg
34*627f7eb2Smrg Argument constraints:
35*627f7eb2Smrg 1. USIZE >= VSIZE.
36*627f7eb2Smrg 2. PRODP != UP and PRODP != VP, i.e. the destination
37*627f7eb2Smrg must be distinct from the multiplier and the multiplicand. */
38*627f7eb2Smrg
39*627f7eb2Smrg /* If KARATSUBA_THRESHOLD is not already defined, define it to a
40*627f7eb2Smrg value which is good on most machines. */
41*627f7eb2Smrg #ifndef KARATSUBA_THRESHOLD
42*627f7eb2Smrg #define KARATSUBA_THRESHOLD 32
43*627f7eb2Smrg #endif
44*627f7eb2Smrg
45*627f7eb2Smrg mp_limb_t
46*627f7eb2Smrg #if __STDC__
mpn_mul(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)47*627f7eb2Smrg mpn_mul (mp_ptr prodp,
48*627f7eb2Smrg mp_srcptr up, mp_size_t usize,
49*627f7eb2Smrg mp_srcptr vp, mp_size_t vsize)
50*627f7eb2Smrg #else
51*627f7eb2Smrg mpn_mul (prodp, up, usize, vp, vsize)
52*627f7eb2Smrg mp_ptr prodp;
53*627f7eb2Smrg mp_srcptr up;
54*627f7eb2Smrg mp_size_t usize;
55*627f7eb2Smrg mp_srcptr vp;
56*627f7eb2Smrg mp_size_t vsize;
57*627f7eb2Smrg #endif
58*627f7eb2Smrg {
59*627f7eb2Smrg mp_ptr prod_endp = prodp + usize + vsize - 1;
60*627f7eb2Smrg mp_limb_t cy;
61*627f7eb2Smrg mp_ptr tspace;
62*627f7eb2Smrg
63*627f7eb2Smrg if (vsize < KARATSUBA_THRESHOLD)
64*627f7eb2Smrg {
65*627f7eb2Smrg /* Handle simple cases with traditional multiplication.
66*627f7eb2Smrg
67*627f7eb2Smrg This is the most critical code of the entire function. All
68*627f7eb2Smrg multiplies rely on this, both small and huge. Small ones arrive
69*627f7eb2Smrg here immediately. Huge ones arrive here as this is the base case
70*627f7eb2Smrg for Karatsuba's recursive algorithm below. */
71*627f7eb2Smrg mp_size_t i;
72*627f7eb2Smrg mp_limb_t cy_limb;
73*627f7eb2Smrg mp_limb_t v_limb;
74*627f7eb2Smrg
75*627f7eb2Smrg if (vsize == 0)
76*627f7eb2Smrg return 0;
77*627f7eb2Smrg
78*627f7eb2Smrg /* Multiply by the first limb in V separately, as the result can be
79*627f7eb2Smrg stored (not added) to PROD. We also avoid a loop for zeroing. */
80*627f7eb2Smrg v_limb = vp[0];
81*627f7eb2Smrg if (v_limb <= 1)
82*627f7eb2Smrg {
83*627f7eb2Smrg if (v_limb == 1)
84*627f7eb2Smrg MPN_COPY (prodp, up, usize);
85*627f7eb2Smrg else
86*627f7eb2Smrg MPN_ZERO (prodp, usize);
87*627f7eb2Smrg cy_limb = 0;
88*627f7eb2Smrg }
89*627f7eb2Smrg else
90*627f7eb2Smrg cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
91*627f7eb2Smrg
92*627f7eb2Smrg prodp[usize] = cy_limb;
93*627f7eb2Smrg prodp++;
94*627f7eb2Smrg
95*627f7eb2Smrg /* For each iteration in the outer loop, multiply one limb from
96*627f7eb2Smrg U with one limb from V, and add it to PROD. */
97*627f7eb2Smrg for (i = 1; i < vsize; i++)
98*627f7eb2Smrg {
99*627f7eb2Smrg v_limb = vp[i];
100*627f7eb2Smrg if (v_limb <= 1)
101*627f7eb2Smrg {
102*627f7eb2Smrg cy_limb = 0;
103*627f7eb2Smrg if (v_limb == 1)
104*627f7eb2Smrg cy_limb = mpn_add_n (prodp, prodp, up, usize);
105*627f7eb2Smrg }
106*627f7eb2Smrg else
107*627f7eb2Smrg cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
108*627f7eb2Smrg
109*627f7eb2Smrg prodp[usize] = cy_limb;
110*627f7eb2Smrg prodp++;
111*627f7eb2Smrg }
112*627f7eb2Smrg return cy_limb;
113*627f7eb2Smrg }
114*627f7eb2Smrg
115*627f7eb2Smrg tspace = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
116*627f7eb2Smrg MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
117*627f7eb2Smrg
118*627f7eb2Smrg prodp += vsize;
119*627f7eb2Smrg up += vsize;
120*627f7eb2Smrg usize -= vsize;
121*627f7eb2Smrg if (usize >= vsize)
122*627f7eb2Smrg {
123*627f7eb2Smrg mp_ptr tp = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
124*627f7eb2Smrg do
125*627f7eb2Smrg {
126*627f7eb2Smrg MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
127*627f7eb2Smrg cy = mpn_add_n (prodp, prodp, tp, vsize);
128*627f7eb2Smrg mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
129*627f7eb2Smrg prodp += vsize;
130*627f7eb2Smrg up += vsize;
131*627f7eb2Smrg usize -= vsize;
132*627f7eb2Smrg }
133*627f7eb2Smrg while (usize >= vsize);
134*627f7eb2Smrg }
135*627f7eb2Smrg
136*627f7eb2Smrg /* True: usize < vsize. */
137*627f7eb2Smrg
138*627f7eb2Smrg /* Make life simple: Recurse. */
139*627f7eb2Smrg
140*627f7eb2Smrg if (usize != 0)
141*627f7eb2Smrg {
142*627f7eb2Smrg mpn_mul (tspace, vp, vsize, up, usize);
143*627f7eb2Smrg cy = mpn_add_n (prodp, prodp, tspace, vsize);
144*627f7eb2Smrg mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
145*627f7eb2Smrg }
146*627f7eb2Smrg
147*627f7eb2Smrg return *prod_endp;
148*627f7eb2Smrg }
149