xref: /dflybsd-src/contrib/gmp/mpn/generic/mul.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_mul -- Multiply two natural numbers.
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino    Contributed to the GNU project by Torbjorn Granlund.
486d7f5d3SJohn Marino 
586d7f5d3SJohn Marino Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005,
686d7f5d3SJohn Marino 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
786d7f5d3SJohn Marino 
886d7f5d3SJohn Marino This file is part of the GNU MP Library.
986d7f5d3SJohn Marino 
1086d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1186d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1286d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1386d7f5d3SJohn Marino option) any later version.
1486d7f5d3SJohn Marino 
1586d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1686d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1786d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1886d7f5d3SJohn Marino License for more details.
1986d7f5d3SJohn Marino 
2086d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2186d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2286d7f5d3SJohn Marino 
2386d7f5d3SJohn Marino #include "gmp.h"
2486d7f5d3SJohn Marino #include "gmp-impl.h"
2586d7f5d3SJohn Marino 
2686d7f5d3SJohn Marino 
2786d7f5d3SJohn Marino #ifndef MUL_BASECASE_MAX_UN
2886d7f5d3SJohn Marino #define MUL_BASECASE_MAX_UN 500
2986d7f5d3SJohn Marino #endif
3086d7f5d3SJohn Marino 
3186d7f5d3SJohn Marino #define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)
3286d7f5d3SJohn Marino #define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)
3386d7f5d3SJohn Marino 
3486d7f5d3SJohn Marino /* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
3586d7f5d3SJohn Marino    (pointed to by VP, with VN limbs), and store the result at PRODP.  The
3686d7f5d3SJohn Marino    result is UN + VN limbs.  Return the most significant limb of the result.
3786d7f5d3SJohn Marino 
3886d7f5d3SJohn Marino    NOTE: The space pointed to by PRODP is overwritten before finished with U
3986d7f5d3SJohn Marino    and V, so overlap is an error.
4086d7f5d3SJohn Marino 
4186d7f5d3SJohn Marino    Argument constraints:
4286d7f5d3SJohn Marino    1. UN >= VN.
4386d7f5d3SJohn Marino    2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
4486d7f5d3SJohn Marino       the multiplier and the multiplicand.  */
4586d7f5d3SJohn Marino 
4686d7f5d3SJohn Marino /*
4786d7f5d3SJohn Marino   * The cutoff lines in the toomX2 and toomX3 code are now exactly between the
4886d7f5d3SJohn Marino     ideal lines of the surrounding algorithms.  Is that optimal?
4986d7f5d3SJohn Marino 
5086d7f5d3SJohn Marino   * The toomX3 code now uses a structure similar to the one of toomX2, except
5186d7f5d3SJohn Marino     that it loops longer in the unbalanced case.  The result is that the
5286d7f5d3SJohn Marino     remaining area might have un < vn.  Should we fix the toomX2 code in a
5386d7f5d3SJohn Marino     similar way?
5486d7f5d3SJohn Marino 
5586d7f5d3SJohn Marino   * The toomX3 code is used for the largest non-FFT unbalanced operands.  It
5686d7f5d3SJohn Marino     therefore calls mpn_mul recursively for certain cases.
5786d7f5d3SJohn Marino 
5886d7f5d3SJohn Marino   * Allocate static temp space using THRESHOLD variables (except for toom44
5986d7f5d3SJohn Marino     when !WANT_FFT).  That way, we can typically have no TMP_ALLOC at all.
6086d7f5d3SJohn Marino 
6186d7f5d3SJohn Marino   * We sort ToomX2 algorithms together, assuming the toom22, toom32, toom42
6286d7f5d3SJohn Marino     have the same vn threshold.  This is not true, we should actually use
6386d7f5d3SJohn Marino     mul_basecase for slightly larger operands for toom32 than for toom22, and
6486d7f5d3SJohn Marino     even larger for toom42.
6586d7f5d3SJohn Marino 
6686d7f5d3SJohn Marino   * That problem is even more prevalent for toomX3.  We therefore use special
6786d7f5d3SJohn Marino     THRESHOLD variables there.
6886d7f5d3SJohn Marino 
6986d7f5d3SJohn Marino   * Is our ITCH allocation correct?
7086d7f5d3SJohn Marino */
7186d7f5d3SJohn Marino 
7286d7f5d3SJohn Marino #define ITCH (16*vn + 100)
7386d7f5d3SJohn Marino 
7486d7f5d3SJohn Marino mp_limb_t
mpn_mul(mp_ptr prodp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)7586d7f5d3SJohn Marino mpn_mul (mp_ptr prodp,
7686d7f5d3SJohn Marino 	 mp_srcptr up, mp_size_t un,
7786d7f5d3SJohn Marino 	 mp_srcptr vp, mp_size_t vn)
7886d7f5d3SJohn Marino {
7986d7f5d3SJohn Marino   ASSERT (un >= vn);
8086d7f5d3SJohn Marino   ASSERT (vn >= 1);
8186d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));
8286d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));
8386d7f5d3SJohn Marino 
8486d7f5d3SJohn Marino   if (un == vn)
8586d7f5d3SJohn Marino     {
8686d7f5d3SJohn Marino       if (up == vp)
8786d7f5d3SJohn Marino 	mpn_sqr (prodp, up, un);
8886d7f5d3SJohn Marino       else
8986d7f5d3SJohn Marino 	mpn_mul_n (prodp, up, vp, un);
9086d7f5d3SJohn Marino     }
9186d7f5d3SJohn Marino   else if (vn < MUL_TOOM22_THRESHOLD)
9286d7f5d3SJohn Marino     { /* plain schoolbook multiplication */
9386d7f5d3SJohn Marino 
9486d7f5d3SJohn Marino       /* Unless un is very large, or else if have an applicable mpn_mul_N,
9586d7f5d3SJohn Marino 	 perform basecase multiply directly.  */
9686d7f5d3SJohn Marino       if (un <= MUL_BASECASE_MAX_UN
9786d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_2
9886d7f5d3SJohn Marino 	  || vn <= 2
9986d7f5d3SJohn Marino #else
10086d7f5d3SJohn Marino 	  || vn == 1
10186d7f5d3SJohn Marino #endif
10286d7f5d3SJohn Marino 	  )
10386d7f5d3SJohn Marino 	mpn_mul_basecase (prodp, up, un, vp, vn);
10486d7f5d3SJohn Marino       else
10586d7f5d3SJohn Marino 	{
10686d7f5d3SJohn Marino 	  /* We have un >> MUL_BASECASE_MAX_UN > vn.  For better memory
10786d7f5d3SJohn Marino 	     locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply
10886d7f5d3SJohn Marino 	     these pieces with the vp[] operand.  After each such partial
10986d7f5d3SJohn Marino 	     multiplication (but the last) we copy the most significant vn
11086d7f5d3SJohn Marino 	     limbs into a temporary buffer since that part would otherwise be
11186d7f5d3SJohn Marino 	     overwritten by the next multiplication.  After the next
11286d7f5d3SJohn Marino 	     multiplication, we add it back.  This illustrates the situation:
11386d7f5d3SJohn Marino 
11486d7f5d3SJohn Marino                                                     -->vn<--
11586d7f5d3SJohn Marino                                                       |  |<------- un ------->|
11686d7f5d3SJohn Marino                                                          _____________________|
11786d7f5d3SJohn Marino                                                         X                    /|
11886d7f5d3SJohn Marino                                                       /XX__________________/  |
11986d7f5d3SJohn Marino                                     _____________________                     |
12086d7f5d3SJohn Marino                                    X                    /                     |
12186d7f5d3SJohn Marino                                  /XX__________________/                       |
12286d7f5d3SJohn Marino                _____________________                                          |
12386d7f5d3SJohn Marino               /                    /                                          |
12486d7f5d3SJohn Marino             /____________________/                                            |
12586d7f5d3SJohn Marino 	    ==================================================================
12686d7f5d3SJohn Marino 
12786d7f5d3SJohn Marino 	    The parts marked with X are the parts whose sums are copied into
12886d7f5d3SJohn Marino 	    the temporary buffer.  */
12986d7f5d3SJohn Marino 
13086d7f5d3SJohn Marino 	  mp_limb_t tp[MUL_TOOM22_THRESHOLD_LIMIT];
13186d7f5d3SJohn Marino 	  mp_limb_t cy;
13286d7f5d3SJohn Marino 	  ASSERT (MUL_TOOM22_THRESHOLD <= MUL_TOOM22_THRESHOLD_LIMIT);
13386d7f5d3SJohn Marino 
13486d7f5d3SJohn Marino 	  mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
13586d7f5d3SJohn Marino 	  prodp += MUL_BASECASE_MAX_UN;
13686d7f5d3SJohn Marino 	  MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
13786d7f5d3SJohn Marino 	  up += MUL_BASECASE_MAX_UN;
13886d7f5d3SJohn Marino 	  un -= MUL_BASECASE_MAX_UN;
13986d7f5d3SJohn Marino 	  while (un > MUL_BASECASE_MAX_UN)
14086d7f5d3SJohn Marino 	    {
14186d7f5d3SJohn Marino 	      mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
14286d7f5d3SJohn Marino 	      cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
14386d7f5d3SJohn Marino 	      mpn_incr_u (prodp + vn, cy);
14486d7f5d3SJohn Marino 	      prodp += MUL_BASECASE_MAX_UN;
14586d7f5d3SJohn Marino 	      MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
14686d7f5d3SJohn Marino 	      up += MUL_BASECASE_MAX_UN;
14786d7f5d3SJohn Marino 	      un -= MUL_BASECASE_MAX_UN;
14886d7f5d3SJohn Marino 	    }
14986d7f5d3SJohn Marino 	  if (un > vn)
15086d7f5d3SJohn Marino 	    {
15186d7f5d3SJohn Marino 	      mpn_mul_basecase (prodp, up, un, vp, vn);
15286d7f5d3SJohn Marino 	    }
15386d7f5d3SJohn Marino 	  else
15486d7f5d3SJohn Marino 	    {
15586d7f5d3SJohn Marino 	      ASSERT (un > 0);
15686d7f5d3SJohn Marino 	      mpn_mul_basecase (prodp, vp, vn, up, un);
15786d7f5d3SJohn Marino 	    }
15886d7f5d3SJohn Marino 	  cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
15986d7f5d3SJohn Marino 	  mpn_incr_u (prodp + vn, cy);
16086d7f5d3SJohn Marino 	}
16186d7f5d3SJohn Marino     }
16286d7f5d3SJohn Marino   else if (BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD))
16386d7f5d3SJohn Marino     {
16486d7f5d3SJohn Marino       /* Use ToomX2 variants */
16586d7f5d3SJohn Marino       mp_ptr scratch;
16686d7f5d3SJohn Marino       TMP_SDECL; TMP_SMARK;
16786d7f5d3SJohn Marino 
16886d7f5d3SJohn Marino       scratch = TMP_SALLOC_LIMBS (ITCH);
16986d7f5d3SJohn Marino 
17086d7f5d3SJohn Marino       /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
17186d7f5d3SJohn Marino 	 square to a (3vn-1)*vn rectangle.  Leaving such a rectangle is hardly
17286d7f5d3SJohn Marino 	 wise; we would get better balance by slightly moving the bound.  We
17386d7f5d3SJohn Marino 	 will sometimes end up with un < vn, like the the X3 arm below.  */
17486d7f5d3SJohn Marino       if (un >= 3 * vn)
17586d7f5d3SJohn Marino 	{
17686d7f5d3SJohn Marino 	  mp_limb_t cy;
17786d7f5d3SJohn Marino 	  mp_ptr ws;
17886d7f5d3SJohn Marino 
17986d7f5d3SJohn Marino 	  /* The maximum ws usage is for the mpn_mul result.  */
18086d7f5d3SJohn Marino 	  ws = TMP_SALLOC_LIMBS (4 * vn);
18186d7f5d3SJohn Marino 
18286d7f5d3SJohn Marino 	  mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
18386d7f5d3SJohn Marino 	  un -= 2 * vn;
18486d7f5d3SJohn Marino 	  up += 2 * vn;
18586d7f5d3SJohn Marino 	  prodp += 2 * vn;
18686d7f5d3SJohn Marino 
18786d7f5d3SJohn Marino 	  while (un >= 3 * vn)
18886d7f5d3SJohn Marino 	    {
18986d7f5d3SJohn Marino 	      mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
19086d7f5d3SJohn Marino 	      un -= 2 * vn;
19186d7f5d3SJohn Marino 	      up += 2 * vn;
19286d7f5d3SJohn Marino 	      cy = mpn_add_n (prodp, prodp, ws, vn);
19386d7f5d3SJohn Marino 	      MPN_COPY (prodp + vn, ws + vn, 2 * vn);
19486d7f5d3SJohn Marino 	      mpn_incr_u (prodp + vn, cy);
19586d7f5d3SJohn Marino 	      prodp += 2 * vn;
19686d7f5d3SJohn Marino 	    }
19786d7f5d3SJohn Marino 
19886d7f5d3SJohn Marino 	  /* vn <= un < 3vn */
19986d7f5d3SJohn Marino 
20086d7f5d3SJohn Marino 	  if (4 * un < 5 * vn)
20186d7f5d3SJohn Marino 	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
20286d7f5d3SJohn Marino 	  else if (4 * un < 7 * vn)
20386d7f5d3SJohn Marino 	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
20486d7f5d3SJohn Marino 	  else
20586d7f5d3SJohn Marino 	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
20686d7f5d3SJohn Marino 
20786d7f5d3SJohn Marino 	  cy = mpn_add_n (prodp, prodp, ws, vn);
20886d7f5d3SJohn Marino 	  MPN_COPY (prodp + vn, ws + vn, un);
20986d7f5d3SJohn Marino 	  mpn_incr_u (prodp + vn, cy);
21086d7f5d3SJohn Marino 	}
21186d7f5d3SJohn Marino       else
21286d7f5d3SJohn Marino 	{
21386d7f5d3SJohn Marino 	  if (4 * un < 5 * vn)
21486d7f5d3SJohn Marino 	    mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
21586d7f5d3SJohn Marino 	  else if (4 * un < 7 * vn)
21686d7f5d3SJohn Marino 	    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
21786d7f5d3SJohn Marino 	  else
21886d7f5d3SJohn Marino 	    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
21986d7f5d3SJohn Marino 	}
22086d7f5d3SJohn Marino       TMP_SFREE;
22186d7f5d3SJohn Marino     }
22286d7f5d3SJohn Marino   else if (BELOW_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) ||
22386d7f5d3SJohn Marino 	   BELOW_THRESHOLD (3 * vn, MUL_FFT_THRESHOLD))
22486d7f5d3SJohn Marino     {
22586d7f5d3SJohn Marino       /* Handle the largest operands that are not in the FFT range.  The 2nd
22686d7f5d3SJohn Marino 	 condition makes very unbalanced operands avoid the FFT code (except
22786d7f5d3SJohn Marino 	 perhaps as coefficient products of the Toom code.  */
22886d7f5d3SJohn Marino 
22986d7f5d3SJohn Marino       if (BELOW_THRESHOLD (vn, MUL_TOOM44_THRESHOLD) || !TOOM44_OK (un, vn))
23086d7f5d3SJohn Marino 	{
23186d7f5d3SJohn Marino 	  /* Use ToomX3 variants */
23286d7f5d3SJohn Marino 	  mp_ptr scratch;
23386d7f5d3SJohn Marino 	  TMP_SDECL; TMP_SMARK;
23486d7f5d3SJohn Marino 
23586d7f5d3SJohn Marino 	  scratch = TMP_SALLOC_LIMBS (ITCH);
23686d7f5d3SJohn Marino 
23786d7f5d3SJohn Marino 	  if (2 * un >= 5 * vn)
23886d7f5d3SJohn Marino 	    {
23986d7f5d3SJohn Marino 	      mp_limb_t cy;
24086d7f5d3SJohn Marino 	      mp_ptr ws;
24186d7f5d3SJohn Marino 
24286d7f5d3SJohn Marino 	      /* The maximum ws usage is for the mpn_mul result.  */
24386d7f5d3SJohn Marino 	      ws = TMP_SALLOC_LIMBS (7 * vn >> 1);
24486d7f5d3SJohn Marino 
24586d7f5d3SJohn Marino 	      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
24686d7f5d3SJohn Marino 		mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
24786d7f5d3SJohn Marino 	      else
24886d7f5d3SJohn Marino 		mpn_toom63_mul (prodp, up, 2 * vn, vp, vn, scratch);
24986d7f5d3SJohn Marino 	      un -= 2 * vn;
25086d7f5d3SJohn Marino 	      up += 2 * vn;
25186d7f5d3SJohn Marino 	      prodp += 2 * vn;
25286d7f5d3SJohn Marino 
25386d7f5d3SJohn Marino 	      while (2 * un >= 5 * vn)	/* un >= 2.5vn */
25486d7f5d3SJohn Marino 		{
25586d7f5d3SJohn Marino 		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
25686d7f5d3SJohn Marino 		    mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
25786d7f5d3SJohn Marino 		  else
25886d7f5d3SJohn Marino 		    mpn_toom63_mul (ws, up, 2 * vn, vp, vn, scratch);
25986d7f5d3SJohn Marino 		  un -= 2 * vn;
26086d7f5d3SJohn Marino 		  up += 2 * vn;
26186d7f5d3SJohn Marino 		  cy = mpn_add_n (prodp, prodp, ws, vn);
26286d7f5d3SJohn Marino 		  MPN_COPY (prodp + vn, ws + vn, 2 * vn);
26386d7f5d3SJohn Marino 		  mpn_incr_u (prodp + vn, cy);
26486d7f5d3SJohn Marino 		  prodp += 2 * vn;
26586d7f5d3SJohn Marino 		}
26686d7f5d3SJohn Marino 
26786d7f5d3SJohn Marino 	      /* vn / 2 <= un < 2.5vn */
26886d7f5d3SJohn Marino 
26986d7f5d3SJohn Marino 	      if (un < vn)
27086d7f5d3SJohn Marino 		mpn_mul (ws, vp, vn, up, un);
27186d7f5d3SJohn Marino 	      else
27286d7f5d3SJohn Marino 		mpn_mul (ws, up, un, vp, vn);
27386d7f5d3SJohn Marino 
27486d7f5d3SJohn Marino 	      cy = mpn_add_n (prodp, prodp, ws, vn);
27586d7f5d3SJohn Marino 	      MPN_COPY (prodp + vn, ws + vn, un);
27686d7f5d3SJohn Marino 	      mpn_incr_u (prodp + vn, cy);
27786d7f5d3SJohn Marino 	    }
27886d7f5d3SJohn Marino 	  else
27986d7f5d3SJohn Marino 	    {
28086d7f5d3SJohn Marino 	      if (6 * un < 7 * vn)
28186d7f5d3SJohn Marino 		mpn_toom33_mul (prodp, up, un, vp, vn, scratch);
28286d7f5d3SJohn Marino 	      else if (2 * un < 3 * vn)
28386d7f5d3SJohn Marino 		{
28486d7f5d3SJohn Marino 		  if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM43_THRESHOLD))
28586d7f5d3SJohn Marino 		    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
28686d7f5d3SJohn Marino 		  else
28786d7f5d3SJohn Marino 		    mpn_toom43_mul (prodp, up, un, vp, vn, scratch);
28886d7f5d3SJohn Marino 		}
28986d7f5d3SJohn Marino 	      else if (6 * un < 11 * vn)
29086d7f5d3SJohn Marino 		{
29186d7f5d3SJohn Marino 		  if (4 * un < 7 * vn)
29286d7f5d3SJohn Marino 		    {
29386d7f5d3SJohn Marino 		      if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM53_THRESHOLD))
29486d7f5d3SJohn Marino 			mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
29586d7f5d3SJohn Marino 		      else
29686d7f5d3SJohn Marino 			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
29786d7f5d3SJohn Marino 		    }
29886d7f5d3SJohn Marino 		  else
29986d7f5d3SJohn Marino 		    {
30086d7f5d3SJohn Marino 		      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM53_THRESHOLD))
30186d7f5d3SJohn Marino 			mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
30286d7f5d3SJohn Marino 		      else
30386d7f5d3SJohn Marino 			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
30486d7f5d3SJohn Marino 		    }
30586d7f5d3SJohn Marino 		}
30686d7f5d3SJohn Marino 	      else
30786d7f5d3SJohn Marino 		{
30886d7f5d3SJohn Marino 		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
30986d7f5d3SJohn Marino 		    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
31086d7f5d3SJohn Marino 		  else
31186d7f5d3SJohn Marino 		    mpn_toom63_mul (prodp, up, un, vp, vn, scratch);
31286d7f5d3SJohn Marino 		}
31386d7f5d3SJohn Marino 	    }
31486d7f5d3SJohn Marino 	  TMP_SFREE;
31586d7f5d3SJohn Marino 	}
31686d7f5d3SJohn Marino       else
31786d7f5d3SJohn Marino 	{
31886d7f5d3SJohn Marino 	  mp_ptr scratch;
31986d7f5d3SJohn Marino 	  TMP_DECL; TMP_MARK;
32086d7f5d3SJohn Marino 
32186d7f5d3SJohn Marino 	  if (BELOW_THRESHOLD (vn, MUL_TOOM6H_THRESHOLD))
32286d7f5d3SJohn Marino 	    {
32386d7f5d3SJohn Marino 	      scratch = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (un, vn));
32486d7f5d3SJohn Marino 	      mpn_toom44_mul (prodp, up, un, vp, vn, scratch);
32586d7f5d3SJohn Marino 	    }
32686d7f5d3SJohn Marino 	  else if (BELOW_THRESHOLD (vn, MUL_TOOM8H_THRESHOLD))
32786d7f5d3SJohn Marino 	    {
32886d7f5d3SJohn Marino 	      scratch = TMP_ALLOC_LIMBS (mpn_toom6h_mul_itch (un, vn));
32986d7f5d3SJohn Marino 	      mpn_toom6h_mul (prodp, up, un, vp, vn, scratch);
33086d7f5d3SJohn Marino 	    }
33186d7f5d3SJohn Marino 	  else
33286d7f5d3SJohn Marino 	    {
33386d7f5d3SJohn Marino 	      scratch = TMP_ALLOC_LIMBS (mpn_toom8h_mul_itch (un, vn));
33486d7f5d3SJohn Marino 	      mpn_toom8h_mul (prodp, up, un, vp, vn, scratch);
33586d7f5d3SJohn Marino 	    }
33686d7f5d3SJohn Marino 	  TMP_FREE;
33786d7f5d3SJohn Marino 	}
33886d7f5d3SJohn Marino     }
33986d7f5d3SJohn Marino   else
34086d7f5d3SJohn Marino     {
34186d7f5d3SJohn Marino       if (un >= 8 * vn)
34286d7f5d3SJohn Marino 	{
34386d7f5d3SJohn Marino 	  mp_limb_t cy;
34486d7f5d3SJohn Marino 	  mp_ptr ws;
34586d7f5d3SJohn Marino 	  TMP_DECL; TMP_MARK;
34686d7f5d3SJohn Marino 
34786d7f5d3SJohn Marino 	  /* The maximum ws usage is for the mpn_mul result.  */
34886d7f5d3SJohn Marino 	  ws = TMP_BALLOC_LIMBS (9 * vn >> 1);
34986d7f5d3SJohn Marino 
35086d7f5d3SJohn Marino 	  mpn_fft_mul (prodp, up, 3 * vn, vp, vn);
35186d7f5d3SJohn Marino 	  un -= 3 * vn;
35286d7f5d3SJohn Marino 	  up += 3 * vn;
35386d7f5d3SJohn Marino 	  prodp += 3 * vn;
35486d7f5d3SJohn Marino 
35586d7f5d3SJohn Marino 	  while (2 * un >= 7 * vn)	/* un >= 3.5vn  */
35686d7f5d3SJohn Marino 	    {
35786d7f5d3SJohn Marino 	      mpn_fft_mul (ws, up, 3 * vn, vp, vn);
35886d7f5d3SJohn Marino 	      un -= 3 * vn;
35986d7f5d3SJohn Marino 	      up += 3 * vn;
36086d7f5d3SJohn Marino 	      cy = mpn_add_n (prodp, prodp, ws, vn);
36186d7f5d3SJohn Marino 	      MPN_COPY (prodp + vn, ws + vn, 3 * vn);
36286d7f5d3SJohn Marino 	      mpn_incr_u (prodp + vn, cy);
36386d7f5d3SJohn Marino 	      prodp += 3 * vn;
36486d7f5d3SJohn Marino 	    }
36586d7f5d3SJohn Marino 
36686d7f5d3SJohn Marino 	  /* vn / 2 <= un < 3.5vn */
36786d7f5d3SJohn Marino 
36886d7f5d3SJohn Marino 	  if (un < vn)
36986d7f5d3SJohn Marino 	    mpn_mul (ws, vp, vn, up, un);
37086d7f5d3SJohn Marino 	  else
37186d7f5d3SJohn Marino 	    mpn_mul (ws, up, un, vp, vn);
37286d7f5d3SJohn Marino 
37386d7f5d3SJohn Marino 	  cy = mpn_add_n (prodp, prodp, ws, vn);
37486d7f5d3SJohn Marino 	  MPN_COPY (prodp + vn, ws + vn, un);
37586d7f5d3SJohn Marino 	  mpn_incr_u (prodp + vn, cy);
37686d7f5d3SJohn Marino 
37786d7f5d3SJohn Marino 	  TMP_FREE;
37886d7f5d3SJohn Marino 	}
37986d7f5d3SJohn Marino       else
38086d7f5d3SJohn Marino 	mpn_fft_mul (prodp, up, un, vp, vn);
38186d7f5d3SJohn Marino     }
38286d7f5d3SJohn Marino 
38386d7f5d3SJohn Marino   return prodp[un + vn - 1];	/* historic */
38486d7f5d3SJohn Marino }
385