xref: /netbsd-src/external/gpl3/gcc/dist/libquadmath/printf/mul.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1*181254a7Smrg /* mpn_mul -- Multiply two natural numbers.
2*181254a7Smrg 
3*181254a7Smrg Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
4*181254a7Smrg 
5*181254a7Smrg This file is part of the GNU MP Library.
6*181254a7Smrg 
7*181254a7Smrg The GNU MP Library is free software; you can redistribute it and/or modify
8*181254a7Smrg it under the terms of the GNU Lesser General Public License as published by
9*181254a7Smrg the Free Software Foundation; either version 2.1 of the License, or (at your
10*181254a7Smrg option) any later version.
11*181254a7Smrg 
12*181254a7Smrg The GNU MP Library is distributed in the hope that it will be useful, but
13*181254a7Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14*181254a7Smrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15*181254a7Smrg License for more details.
16*181254a7Smrg 
17*181254a7Smrg You should have received a copy of the GNU Lesser General Public License
18*181254a7Smrg along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
19*181254a7Smrg the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20*181254a7Smrg MA 02111-1307, USA. */
21*181254a7Smrg 
22*181254a7Smrg #include <config.h>
23*181254a7Smrg #include "gmp-impl.h"
24*181254a7Smrg 
25*181254a7Smrg /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
26*181254a7Smrg    and v (pointed to by VP, with VSIZE limbs), and store the result at
27*181254a7Smrg    PRODP.  USIZE + VSIZE limbs are always stored, but if the input
28*181254a7Smrg    operands are normalized.  Return the most significant limb of the
29*181254a7Smrg    result.
30*181254a7Smrg 
31*181254a7Smrg    NOTE: The space pointed to by PRODP is overwritten before finished
32*181254a7Smrg    with U and V, so overlap is an error.
33*181254a7Smrg 
34*181254a7Smrg    Argument constraints:
35*181254a7Smrg    1. USIZE >= VSIZE.
36*181254a7Smrg    2. PRODP != UP and PRODP != VP, i.e. the destination
37*181254a7Smrg       must be distinct from the multiplier and the multiplicand.  */
38*181254a7Smrg 
39*181254a7Smrg /* If KARATSUBA_THRESHOLD is not already defined, define it to a
40*181254a7Smrg    value which is good on most machines.  */
41*181254a7Smrg #ifndef KARATSUBA_THRESHOLD
42*181254a7Smrg #define KARATSUBA_THRESHOLD 32
43*181254a7Smrg #endif
44*181254a7Smrg 
45*181254a7Smrg mp_limb_t
46*181254a7Smrg #if __STDC__
mpn_mul(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)47*181254a7Smrg mpn_mul (mp_ptr prodp,
48*181254a7Smrg 	 mp_srcptr up, mp_size_t usize,
49*181254a7Smrg 	 mp_srcptr vp, mp_size_t vsize)
50*181254a7Smrg #else
51*181254a7Smrg mpn_mul (prodp, up, usize, vp, vsize)
52*181254a7Smrg      mp_ptr prodp;
53*181254a7Smrg      mp_srcptr up;
54*181254a7Smrg      mp_size_t usize;
55*181254a7Smrg      mp_srcptr vp;
56*181254a7Smrg      mp_size_t vsize;
57*181254a7Smrg #endif
58*181254a7Smrg {
59*181254a7Smrg   mp_ptr prod_endp = prodp + usize + vsize - 1;
60*181254a7Smrg   mp_limb_t cy;
61*181254a7Smrg   mp_ptr tspace;
62*181254a7Smrg 
63*181254a7Smrg   if (vsize < KARATSUBA_THRESHOLD)
64*181254a7Smrg     {
65*181254a7Smrg       /* Handle simple cases with traditional multiplication.
66*181254a7Smrg 
67*181254a7Smrg 	 This is the most critical code of the entire function.  All
68*181254a7Smrg 	 multiplies rely on this, both small and huge.  Small ones arrive
69*181254a7Smrg 	 here immediately.  Huge ones arrive here as this is the base case
70*181254a7Smrg 	 for Karatsuba's recursive algorithm below.  */
71*181254a7Smrg       mp_size_t i;
72*181254a7Smrg       mp_limb_t cy_limb;
73*181254a7Smrg       mp_limb_t v_limb;
74*181254a7Smrg 
75*181254a7Smrg       if (vsize == 0)
76*181254a7Smrg 	return 0;
77*181254a7Smrg 
78*181254a7Smrg       /* Multiply by the first limb in V separately, as the result can be
79*181254a7Smrg 	 stored (not added) to PROD.  We also avoid a loop for zeroing.  */
80*181254a7Smrg       v_limb = vp[0];
81*181254a7Smrg       if (v_limb <= 1)
82*181254a7Smrg 	{
83*181254a7Smrg 	  if (v_limb == 1)
84*181254a7Smrg 	    MPN_COPY (prodp, up, usize);
85*181254a7Smrg 	  else
86*181254a7Smrg 	    MPN_ZERO (prodp, usize);
87*181254a7Smrg 	  cy_limb = 0;
88*181254a7Smrg 	}
89*181254a7Smrg       else
90*181254a7Smrg 	cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
91*181254a7Smrg 
92*181254a7Smrg       prodp[usize] = cy_limb;
93*181254a7Smrg       prodp++;
94*181254a7Smrg 
95*181254a7Smrg       /* For each iteration in the outer loop, multiply one limb from
96*181254a7Smrg 	 U with one limb from V, and add it to PROD.  */
97*181254a7Smrg       for (i = 1; i < vsize; i++)
98*181254a7Smrg 	{
99*181254a7Smrg 	  v_limb = vp[i];
100*181254a7Smrg 	  if (v_limb <= 1)
101*181254a7Smrg 	    {
102*181254a7Smrg 	      cy_limb = 0;
103*181254a7Smrg 	      if (v_limb == 1)
104*181254a7Smrg 		cy_limb = mpn_add_n (prodp, prodp, up, usize);
105*181254a7Smrg 	    }
106*181254a7Smrg 	  else
107*181254a7Smrg 	    cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
108*181254a7Smrg 
109*181254a7Smrg 	  prodp[usize] = cy_limb;
110*181254a7Smrg 	  prodp++;
111*181254a7Smrg 	}
112*181254a7Smrg       return cy_limb;
113*181254a7Smrg     }
114*181254a7Smrg 
115*181254a7Smrg   tspace = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
116*181254a7Smrg   MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
117*181254a7Smrg 
118*181254a7Smrg   prodp += vsize;
119*181254a7Smrg   up += vsize;
120*181254a7Smrg   usize -= vsize;
121*181254a7Smrg   if (usize >= vsize)
122*181254a7Smrg     {
123*181254a7Smrg       mp_ptr tp = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
124*181254a7Smrg       do
125*181254a7Smrg 	{
126*181254a7Smrg 	  MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
127*181254a7Smrg 	  cy = mpn_add_n (prodp, prodp, tp, vsize);
128*181254a7Smrg 	  mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
129*181254a7Smrg 	  prodp += vsize;
130*181254a7Smrg 	  up += vsize;
131*181254a7Smrg 	  usize -= vsize;
132*181254a7Smrg 	}
133*181254a7Smrg       while (usize >= vsize);
134*181254a7Smrg     }
135*181254a7Smrg 
136*181254a7Smrg   /* True: usize < vsize.  */
137*181254a7Smrg 
138*181254a7Smrg   /* Make life simple: Recurse.  */
139*181254a7Smrg 
140*181254a7Smrg   if (usize != 0)
141*181254a7Smrg     {
142*181254a7Smrg       mpn_mul (tspace, vp, vsize, up, usize);
143*181254a7Smrg       cy = mpn_add_n (prodp, prodp, tspace, vsize);
144*181254a7Smrg       mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
145*181254a7Smrg     }
146*181254a7Smrg 
147*181254a7Smrg   return *prod_endp;
148*181254a7Smrg }
149