xref: /dflybsd-src/contrib/gmp/mpn/generic/mul_basecase.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_mul_basecase -- Internal routine to multiply two natural numbers
286d7f5d3SJohn Marino    of length m and n.
386d7f5d3SJohn Marino 
486d7f5d3SJohn Marino    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
586d7f5d3SJohn Marino    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
686d7f5d3SJohn Marino 
786d7f5d3SJohn Marino 
886d7f5d3SJohn Marino Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
986d7f5d3SJohn Marino Foundation, Inc.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino This file is part of the GNU MP Library.
1286d7f5d3SJohn Marino 
1386d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1486d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1586d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1686d7f5d3SJohn Marino option) any later version.
1786d7f5d3SJohn Marino 
1886d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1986d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2086d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
2186d7f5d3SJohn Marino License for more details.
2286d7f5d3SJohn Marino 
2386d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2486d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2586d7f5d3SJohn Marino 
2686d7f5d3SJohn Marino #include "gmp.h"
2786d7f5d3SJohn Marino #include "gmp-impl.h"
2886d7f5d3SJohn Marino 
2986d7f5d3SJohn Marino 
3086d7f5d3SJohn Marino /* Multiply {up,usize} by {vp,vsize} and write the result to
3186d7f5d3SJohn Marino    {prodp,usize+vsize}.  Must have usize>=vsize.
3286d7f5d3SJohn Marino 
3386d7f5d3SJohn Marino    Note that prodp gets usize+vsize limbs stored, even if the actual result
3486d7f5d3SJohn Marino    only needs usize+vsize-1.
3586d7f5d3SJohn Marino 
3686d7f5d3SJohn Marino    There's no good reason to call here with vsize>=MUL_TOOM22_THRESHOLD.
3786d7f5d3SJohn Marino    Currently this is allowed, but it might not be in the future.
3886d7f5d3SJohn Marino 
3986d7f5d3SJohn Marino    This is the most critical code for multiplication.  All multiplies rely
4086d7f5d3SJohn Marino    on this, both small and huge.  Small ones arrive here immediately, huge
4186d7f5d3SJohn Marino    ones arrive here as this is the base case for Karatsuba's recursive
4286d7f5d3SJohn Marino    algorithm.  */
4386d7f5d3SJohn Marino 
4486d7f5d3SJohn Marino void
mpn_mul_basecase(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)4586d7f5d3SJohn Marino mpn_mul_basecase (mp_ptr rp,
4686d7f5d3SJohn Marino 		  mp_srcptr up, mp_size_t un,
4786d7f5d3SJohn Marino 		  mp_srcptr vp, mp_size_t vn)
4886d7f5d3SJohn Marino {
4986d7f5d3SJohn Marino   ASSERT (un >= vn);
5086d7f5d3SJohn Marino   ASSERT (vn >= 1);
5186d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (rp, un+vn, up, un));
5286d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (rp, un+vn, vp, vn));
5386d7f5d3SJohn Marino 
5486d7f5d3SJohn Marino   /* We first multiply by the low order limb (or depending on optional function
5586d7f5d3SJohn Marino      availability, limbs).  This result can be stored, not added, to rp.  We
5686d7f5d3SJohn Marino      also avoid a loop for zeroing this way.  */
5786d7f5d3SJohn Marino 
5886d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_2
5986d7f5d3SJohn Marino   if (vn >= 2)
6086d7f5d3SJohn Marino     {
6186d7f5d3SJohn Marino       rp[un + 1] = mpn_mul_2 (rp, up, un, vp);
6286d7f5d3SJohn Marino       rp += 2, vp += 2, vn -= 2;
6386d7f5d3SJohn Marino     }
6486d7f5d3SJohn Marino   else
6586d7f5d3SJohn Marino     {
6686d7f5d3SJohn Marino       rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
6786d7f5d3SJohn Marino       return;
6886d7f5d3SJohn Marino     }
6986d7f5d3SJohn Marino #else
7086d7f5d3SJohn Marino   rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
7186d7f5d3SJohn Marino   rp += 1, vp += 1, vn -= 1;
7286d7f5d3SJohn Marino #endif
7386d7f5d3SJohn Marino 
7486d7f5d3SJohn Marino   /* Now accumulate the product of up[] and the next higher limb (or depending
7586d7f5d3SJohn Marino      on optional function availability, limbs) from vp[].  */
7686d7f5d3SJohn Marino 
7786d7f5d3SJohn Marino #define MAX_LEFT MP_SIZE_T_MAX	/* Used to simplify loops into if statements */
7886d7f5d3SJohn Marino 
7986d7f5d3SJohn Marino 
8086d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_addmul_6
8186d7f5d3SJohn Marino   while (vn >= 6)
8286d7f5d3SJohn Marino     {
8386d7f5d3SJohn Marino       rp[un + 6 - 1] = mpn_addmul_6 (rp, up, un, vp);
8486d7f5d3SJohn Marino       if (MAX_LEFT == 6)
8586d7f5d3SJohn Marino 	return;
8686d7f5d3SJohn Marino       rp += 6, vp += 6, vn -= 6;
8786d7f5d3SJohn Marino       if (MAX_LEFT < 2 * 6)
8886d7f5d3SJohn Marino 	break;
8986d7f5d3SJohn Marino     }
9086d7f5d3SJohn Marino #undef MAX_LEFT
9186d7f5d3SJohn Marino #define MAX_LEFT (6 - 1)
9286d7f5d3SJohn Marino #endif
9386d7f5d3SJohn Marino 
9486d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_addmul_5
9586d7f5d3SJohn Marino   while (vn >= 5)
9686d7f5d3SJohn Marino     {
9786d7f5d3SJohn Marino       rp[un + 5 - 1] = mpn_addmul_5 (rp, up, un, vp);
9886d7f5d3SJohn Marino       if (MAX_LEFT == 5)
9986d7f5d3SJohn Marino 	return;
10086d7f5d3SJohn Marino       rp += 5, vp += 5, vn -= 5;
10186d7f5d3SJohn Marino       if (MAX_LEFT < 2 * 5)
10286d7f5d3SJohn Marino 	break;
10386d7f5d3SJohn Marino     }
10486d7f5d3SJohn Marino #undef MAX_LEFT
10586d7f5d3SJohn Marino #define MAX_LEFT (5 - 1)
10686d7f5d3SJohn Marino #endif
10786d7f5d3SJohn Marino 
10886d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_addmul_4
10986d7f5d3SJohn Marino   while (vn >= 4)
11086d7f5d3SJohn Marino     {
11186d7f5d3SJohn Marino       rp[un + 4 - 1] = mpn_addmul_4 (rp, up, un, vp);
11286d7f5d3SJohn Marino       if (MAX_LEFT == 4)
11386d7f5d3SJohn Marino 	return;
11486d7f5d3SJohn Marino       rp += 4, vp += 4, vn -= 4;
11586d7f5d3SJohn Marino       if (MAX_LEFT < 2 * 4)
11686d7f5d3SJohn Marino 	break;
11786d7f5d3SJohn Marino     }
11886d7f5d3SJohn Marino #undef MAX_LEFT
11986d7f5d3SJohn Marino #define MAX_LEFT (4 - 1)
12086d7f5d3SJohn Marino #endif
12186d7f5d3SJohn Marino 
12286d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_addmul_3
12386d7f5d3SJohn Marino   while (vn >= 3)
12486d7f5d3SJohn Marino     {
12586d7f5d3SJohn Marino       rp[un + 3 - 1] = mpn_addmul_3 (rp, up, un, vp);
12686d7f5d3SJohn Marino       if (MAX_LEFT == 3)
12786d7f5d3SJohn Marino 	return;
12886d7f5d3SJohn Marino       rp += 3, vp += 3, vn -= 3;
12986d7f5d3SJohn Marino       if (MAX_LEFT < 2 * 3)
13086d7f5d3SJohn Marino 	break;
13186d7f5d3SJohn Marino     }
13286d7f5d3SJohn Marino #undef MAX_LEFT
13386d7f5d3SJohn Marino #define MAX_LEFT (3 - 1)
13486d7f5d3SJohn Marino #endif
13586d7f5d3SJohn Marino 
13686d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_addmul_2
13786d7f5d3SJohn Marino   while (vn >= 2)
13886d7f5d3SJohn Marino     {
13986d7f5d3SJohn Marino       rp[un + 2 - 1] = mpn_addmul_2 (rp, up, un, vp);
14086d7f5d3SJohn Marino       if (MAX_LEFT == 2)
14186d7f5d3SJohn Marino 	return;
14286d7f5d3SJohn Marino       rp += 2, vp += 2, vn -= 2;
14386d7f5d3SJohn Marino       if (MAX_LEFT < 2 * 2)
14486d7f5d3SJohn Marino 	break;
14586d7f5d3SJohn Marino     }
14686d7f5d3SJohn Marino #undef MAX_LEFT
14786d7f5d3SJohn Marino #define MAX_LEFT (2 - 1)
14886d7f5d3SJohn Marino #endif
14986d7f5d3SJohn Marino 
15086d7f5d3SJohn Marino   while (vn >= 1)
15186d7f5d3SJohn Marino     {
15286d7f5d3SJohn Marino       rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
15386d7f5d3SJohn Marino       if (MAX_LEFT == 1)
15486d7f5d3SJohn Marino 	return;
15586d7f5d3SJohn Marino       rp += 1, vp += 1, vn -= 1;
15686d7f5d3SJohn Marino     }
15786d7f5d3SJohn Marino }
158