xref: /netbsd-src/external/gpl3/gcc.old/dist/libquadmath/printf/mul_n.c (revision 627f7eb200a4419d89b531d55fccd2ee3ffdcde0)
1*627f7eb2Smrg /* mpn_mul_n -- Multiply two natural numbers of length n.
2*627f7eb2Smrg 
3*627f7eb2Smrg Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
4*627f7eb2Smrg 
5*627f7eb2Smrg This file is part of the GNU MP Library.
6*627f7eb2Smrg 
7*627f7eb2Smrg The GNU MP Library is free software; you can redistribute it and/or modify
8*627f7eb2Smrg it under the terms of the GNU Lesser General Public License as published by
9*627f7eb2Smrg the Free Software Foundation; either version 2.1 of the License, or (at your
10*627f7eb2Smrg option) any later version.
11*627f7eb2Smrg 
12*627f7eb2Smrg The GNU MP Library is distributed in the hope that it will be useful, but
13*627f7eb2Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14*627f7eb2Smrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15*627f7eb2Smrg License for more details.
16*627f7eb2Smrg 
17*627f7eb2Smrg You should have received a copy of the GNU Lesser General Public License
18*627f7eb2Smrg along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
19*627f7eb2Smrg the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20*627f7eb2Smrg MA 02111-1307, USA. */
21*627f7eb2Smrg 
22*627f7eb2Smrg #include <config.h>
23*627f7eb2Smrg #include "gmp-impl.h"
24*627f7eb2Smrg 
25*627f7eb2Smrg /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
26*627f7eb2Smrg    both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
27*627f7eb2Smrg    always stored.  Return the most significant limb.
28*627f7eb2Smrg 
29*627f7eb2Smrg    Argument constraints:
30*627f7eb2Smrg    1. PRODP != UP and PRODP != VP, i.e. the destination
31*627f7eb2Smrg       must be distinct from the multiplier and the multiplicand.  */
32*627f7eb2Smrg 
33*627f7eb2Smrg /* If KARATSUBA_THRESHOLD is not already defined, define it to a
34*627f7eb2Smrg    value which is good on most machines.  */
35*627f7eb2Smrg #ifndef KARATSUBA_THRESHOLD
36*627f7eb2Smrg #define KARATSUBA_THRESHOLD 32
37*627f7eb2Smrg #endif
38*627f7eb2Smrg 
39*627f7eb2Smrg /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
40*627f7eb2Smrg #if KARATSUBA_THRESHOLD < 2
41*627f7eb2Smrg #undef KARATSUBA_THRESHOLD
42*627f7eb2Smrg #define KARATSUBA_THRESHOLD 2
43*627f7eb2Smrg #endif
44*627f7eb2Smrg 
45*627f7eb2Smrg /* Handle simple cases with traditional multiplication.
46*627f7eb2Smrg 
47*627f7eb2Smrg    This is the most critical code of multiplication.  All multiplies rely
48*627f7eb2Smrg    on this, both small and huge.  Small ones arrive here immediately.  Huge
49*627f7eb2Smrg    ones arrive here as this is the base case for Karatsuba's recursive
50*627f7eb2Smrg    algorithm below.  */
51*627f7eb2Smrg 
52*627f7eb2Smrg void
53*627f7eb2Smrg #if __STDC__
impn_mul_n_basecase(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)54*627f7eb2Smrg impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
55*627f7eb2Smrg #else
56*627f7eb2Smrg impn_mul_n_basecase (prodp, up, vp, size)
57*627f7eb2Smrg      mp_ptr prodp;
58*627f7eb2Smrg      mp_srcptr up;
59*627f7eb2Smrg      mp_srcptr vp;
60*627f7eb2Smrg      mp_size_t size;
61*627f7eb2Smrg #endif
62*627f7eb2Smrg {
63*627f7eb2Smrg   mp_size_t i;
64*627f7eb2Smrg   mp_limb_t cy_limb;
65*627f7eb2Smrg   mp_limb_t v_limb;
66*627f7eb2Smrg 
67*627f7eb2Smrg   /* Multiply by the first limb in V separately, as the result can be
68*627f7eb2Smrg      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
69*627f7eb2Smrg   v_limb = vp[0];
70*627f7eb2Smrg   if (v_limb <= 1)
71*627f7eb2Smrg     {
72*627f7eb2Smrg       if (v_limb == 1)
73*627f7eb2Smrg 	MPN_COPY (prodp, up, size);
74*627f7eb2Smrg       else
75*627f7eb2Smrg 	MPN_ZERO (prodp, size);
76*627f7eb2Smrg       cy_limb = 0;
77*627f7eb2Smrg     }
78*627f7eb2Smrg   else
79*627f7eb2Smrg     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
80*627f7eb2Smrg 
81*627f7eb2Smrg   prodp[size] = cy_limb;
82*627f7eb2Smrg   prodp++;
83*627f7eb2Smrg 
84*627f7eb2Smrg   /* For each iteration in the outer loop, multiply one limb from
85*627f7eb2Smrg      U with one limb from V, and add it to PROD.  */
86*627f7eb2Smrg   for (i = 1; i < size; i++)
87*627f7eb2Smrg     {
88*627f7eb2Smrg       v_limb = vp[i];
89*627f7eb2Smrg       if (v_limb <= 1)
90*627f7eb2Smrg 	{
91*627f7eb2Smrg 	  cy_limb = 0;
92*627f7eb2Smrg 	  if (v_limb == 1)
93*627f7eb2Smrg 	    cy_limb = mpn_add_n (prodp, prodp, up, size);
94*627f7eb2Smrg 	}
95*627f7eb2Smrg       else
96*627f7eb2Smrg 	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
97*627f7eb2Smrg 
98*627f7eb2Smrg       prodp[size] = cy_limb;
99*627f7eb2Smrg       prodp++;
100*627f7eb2Smrg     }
101*627f7eb2Smrg }
102*627f7eb2Smrg 
103*627f7eb2Smrg void
104*627f7eb2Smrg #if __STDC__
impn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size,mp_ptr tspace)105*627f7eb2Smrg impn_mul_n (mp_ptr prodp,
106*627f7eb2Smrg 	     mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
107*627f7eb2Smrg #else
108*627f7eb2Smrg impn_mul_n (prodp, up, vp, size, tspace)
109*627f7eb2Smrg      mp_ptr prodp;
110*627f7eb2Smrg      mp_srcptr up;
111*627f7eb2Smrg      mp_srcptr vp;
112*627f7eb2Smrg      mp_size_t size;
113*627f7eb2Smrg      mp_ptr tspace;
114*627f7eb2Smrg #endif
115*627f7eb2Smrg {
116*627f7eb2Smrg   if ((size & 1) != 0)
117*627f7eb2Smrg     {
118*627f7eb2Smrg       /* The size is odd, the code code below doesn't handle that.
119*627f7eb2Smrg 	 Multiply the least significant (size - 1) limbs with a recursive
120*627f7eb2Smrg 	 call, and handle the most significant limb of S1 and S2
121*627f7eb2Smrg 	 separately.  */
122*627f7eb2Smrg       /* A slightly faster way to do this would be to make the Karatsuba
123*627f7eb2Smrg 	 code below behave as if the size were even, and let it check for
124*627f7eb2Smrg 	 odd size in the end.  I.e., in essence move this code to the end.
125*627f7eb2Smrg 	 Doing so would save us a recursive call, and potentially make the
126*627f7eb2Smrg 	 stack grow a lot less.  */
127*627f7eb2Smrg 
128*627f7eb2Smrg       mp_size_t esize = size - 1;	/* even size */
129*627f7eb2Smrg       mp_limb_t cy_limb;
130*627f7eb2Smrg 
131*627f7eb2Smrg       MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
132*627f7eb2Smrg       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
133*627f7eb2Smrg       prodp[esize + esize] = cy_limb;
134*627f7eb2Smrg       cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
135*627f7eb2Smrg 
136*627f7eb2Smrg       prodp[esize + size] = cy_limb;
137*627f7eb2Smrg     }
138*627f7eb2Smrg   else
139*627f7eb2Smrg     {
140*627f7eb2Smrg       /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
141*627f7eb2Smrg 
142*627f7eb2Smrg 	 Split U in two pieces, U1 and U0, such that
143*627f7eb2Smrg 	 U = U0 + U1*(B**n),
144*627f7eb2Smrg 	 and V in V1 and V0, such that
145*627f7eb2Smrg 	 V = V0 + V1*(B**n).
146*627f7eb2Smrg 
147*627f7eb2Smrg 	 UV is then computed recursively using the identity
148*627f7eb2Smrg 
149*627f7eb2Smrg 		2n   n          n                     n
150*627f7eb2Smrg 	 UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
151*627f7eb2Smrg 			1 1        1  0   0  1              0 0
152*627f7eb2Smrg 
153*627f7eb2Smrg 	 Where B = 2**BITS_PER_MP_LIMB.  */
154*627f7eb2Smrg 
155*627f7eb2Smrg       mp_size_t hsize = size >> 1;
156*627f7eb2Smrg       mp_limb_t cy;
157*627f7eb2Smrg       int negflg;
158*627f7eb2Smrg 
159*627f7eb2Smrg       /*** Product H.	 ________________  ________________
160*627f7eb2Smrg 			|_____U1 x V1____||____U0 x V0_____|  */
161*627f7eb2Smrg       /* Put result in upper part of PROD and pass low part of TSPACE
162*627f7eb2Smrg 	 as new TSPACE.  */
163*627f7eb2Smrg       MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
164*627f7eb2Smrg 
165*627f7eb2Smrg       /*** Product M.	 ________________
166*627f7eb2Smrg 			|_(U1-U0)(V0-V1)_|  */
167*627f7eb2Smrg       if (mpn_cmp (up + hsize, up, hsize) >= 0)
168*627f7eb2Smrg 	{
169*627f7eb2Smrg 	  mpn_sub_n (prodp, up + hsize, up, hsize);
170*627f7eb2Smrg 	  negflg = 0;
171*627f7eb2Smrg 	}
172*627f7eb2Smrg       else
173*627f7eb2Smrg 	{
174*627f7eb2Smrg 	  mpn_sub_n (prodp, up, up + hsize, hsize);
175*627f7eb2Smrg 	  negflg = 1;
176*627f7eb2Smrg 	}
177*627f7eb2Smrg       if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
178*627f7eb2Smrg 	{
179*627f7eb2Smrg 	  mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
180*627f7eb2Smrg 	  negflg ^= 1;
181*627f7eb2Smrg 	}
182*627f7eb2Smrg       else
183*627f7eb2Smrg 	{
184*627f7eb2Smrg 	  mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
185*627f7eb2Smrg 	  /* No change of NEGFLG.  */
186*627f7eb2Smrg 	}
187*627f7eb2Smrg       /* Read temporary operands from low part of PROD.
188*627f7eb2Smrg 	 Put result in low part of TSPACE using upper part of TSPACE
189*627f7eb2Smrg 	 as new TSPACE.  */
190*627f7eb2Smrg       MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
191*627f7eb2Smrg 
192*627f7eb2Smrg       /*** Add/copy product H.  */
193*627f7eb2Smrg       MPN_COPY (prodp + hsize, prodp + size, hsize);
194*627f7eb2Smrg       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
195*627f7eb2Smrg 
196*627f7eb2Smrg       /*** Add product M (if NEGFLG M is a negative number).  */
197*627f7eb2Smrg       if (negflg)
198*627f7eb2Smrg 	cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
199*627f7eb2Smrg       else
200*627f7eb2Smrg 	cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
201*627f7eb2Smrg 
202*627f7eb2Smrg       /*** Product L.	 ________________  ________________
203*627f7eb2Smrg 			|________________||____U0 x V0_____|  */
204*627f7eb2Smrg       /* Read temporary operands from low part of PROD.
205*627f7eb2Smrg 	 Put result in low part of TSPACE using upper part of TSPACE
206*627f7eb2Smrg 	 as new TSPACE.  */
207*627f7eb2Smrg       MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
208*627f7eb2Smrg 
209*627f7eb2Smrg       /*** Add/copy Product L (twice).  */
210*627f7eb2Smrg 
211*627f7eb2Smrg       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
212*627f7eb2Smrg       if (cy)
213*627f7eb2Smrg 	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
214*627f7eb2Smrg 
215*627f7eb2Smrg       MPN_COPY (prodp, tspace, hsize);
216*627f7eb2Smrg       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
217*627f7eb2Smrg       if (cy)
218*627f7eb2Smrg 	mpn_add_1 (prodp + size, prodp + size, size, 1);
219*627f7eb2Smrg     }
220*627f7eb2Smrg }
221