xref: /dflybsd-src/contrib/gmp/mpn/generic/toom22_mul.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn.  Or more
286d7f5d3SJohn Marino    accurately, bn <= an < 2bn.
386d7f5d3SJohn Marino 
486d7f5d3SJohn Marino    Contributed to the GNU project by Torbjorn Granlund.
586d7f5d3SJohn Marino 
686d7f5d3SJohn Marino    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
786d7f5d3SJohn Marino    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
886d7f5d3SJohn Marino    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
986d7f5d3SJohn Marino 
1086d7f5d3SJohn Marino Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
1186d7f5d3SJohn Marino 
1286d7f5d3SJohn Marino This file is part of the GNU MP Library.
1386d7f5d3SJohn Marino 
1486d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1586d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1686d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1786d7f5d3SJohn Marino option) any later version.
1886d7f5d3SJohn Marino 
1986d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
2086d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2186d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
2286d7f5d3SJohn Marino License for more details.
2386d7f5d3SJohn Marino 
2486d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2586d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2686d7f5d3SJohn Marino 
2786d7f5d3SJohn Marino 
2886d7f5d3SJohn Marino #include "gmp.h"
2986d7f5d3SJohn Marino #include "gmp-impl.h"
3086d7f5d3SJohn Marino 
3186d7f5d3SJohn Marino /* Evaluate in: -1, 0, +inf
3286d7f5d3SJohn Marino 
3386d7f5d3SJohn Marino   <-s--><--n-->
3486d7f5d3SJohn Marino    ____ ______
3586d7f5d3SJohn Marino   |_a1_|___a0_|
3686d7f5d3SJohn Marino    |b1_|___b0_|
3786d7f5d3SJohn Marino    <-t-><--n-->
3886d7f5d3SJohn Marino 
3986d7f5d3SJohn Marino   v0  =  a0     * b0       #   A(0)*B(0)
4086d7f5d3SJohn Marino   vm1 = (a0- a1)*(b0- b1)  #  A(-1)*B(-1)
4186d7f5d3SJohn Marino   vinf=      a1 *     b1   # A(inf)*B(inf)
4286d7f5d3SJohn Marino */
4386d7f5d3SJohn Marino 
4486d7f5d3SJohn Marino #if TUNE_PROGRAM_BUILD
4586d7f5d3SJohn Marino #define MAYBE_mul_toom22   1
4686d7f5d3SJohn Marino #else
4786d7f5d3SJohn Marino #define MAYBE_mul_toom22						\
4886d7f5d3SJohn Marino   (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
4986d7f5d3SJohn Marino #endif
5086d7f5d3SJohn Marino 
5186d7f5d3SJohn Marino #define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
5286d7f5d3SJohn Marino   do {									\
5386d7f5d3SJohn Marino     if (! MAYBE_mul_toom22						\
5486d7f5d3SJohn Marino 	|| BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
5586d7f5d3SJohn Marino       mpn_mul_basecase (p, a, n, b, n);					\
5686d7f5d3SJohn Marino     else								\
5786d7f5d3SJohn Marino       mpn_toom22_mul (p, a, n, b, n, ws);				\
5886d7f5d3SJohn Marino   } while (0)
5986d7f5d3SJohn Marino 
6086d7f5d3SJohn Marino /* Normally, this calls mul_basecase or toom22_mul.  But when when the fraction
6186d7f5d3SJohn Marino    MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
6286d7f5d3SJohn Marino    relative unbalance will become a larger and larger relative unbalance with
6386d7f5d3SJohn Marino    each recursion (the difference s-t will be invariant over recursive calls).
6486d7f5d3SJohn Marino    Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
6586d7f5d3SJohn Marino    MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
6686d7f5d3SJohn Marino #define TOOM22_MUL_REC(p, a, an, b, bn, ws)				\
6786d7f5d3SJohn Marino   do {									\
6886d7f5d3SJohn Marino     if (! MAYBE_mul_toom22						\
6986d7f5d3SJohn Marino 	|| BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))			\
7086d7f5d3SJohn Marino       mpn_mul_basecase (p, a, an, b, bn);				\
7186d7f5d3SJohn Marino     else if (4 * an < 5 * bn)						\
7286d7f5d3SJohn Marino       mpn_toom22_mul (p, a, an, b, bn, ws);				\
7386d7f5d3SJohn Marino     else								\
7486d7f5d3SJohn Marino       mpn_toom32_mul (p, a, an, b, bn, ws);				\
7586d7f5d3SJohn Marino   } while (0)
7686d7f5d3SJohn Marino 
7786d7f5d3SJohn Marino void
mpn_toom22_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)7886d7f5d3SJohn Marino mpn_toom22_mul (mp_ptr pp,
7986d7f5d3SJohn Marino 		mp_srcptr ap, mp_size_t an,
8086d7f5d3SJohn Marino 		mp_srcptr bp, mp_size_t bn,
8186d7f5d3SJohn Marino 		mp_ptr scratch)
8286d7f5d3SJohn Marino {
8386d7f5d3SJohn Marino   mp_size_t n, s, t;
8486d7f5d3SJohn Marino   int vm1_neg;
8586d7f5d3SJohn Marino   mp_limb_t cy, cy2;
8686d7f5d3SJohn Marino   mp_ptr asm1;
8786d7f5d3SJohn Marino   mp_ptr bsm1;
8886d7f5d3SJohn Marino 
8986d7f5d3SJohn Marino #define a0  ap
9086d7f5d3SJohn Marino #define a1  (ap + n)
9186d7f5d3SJohn Marino #define b0  bp
9286d7f5d3SJohn Marino #define b1  (bp + n)
9386d7f5d3SJohn Marino 
9486d7f5d3SJohn Marino   s = an >> 1;
9586d7f5d3SJohn Marino   n = an - s;
9686d7f5d3SJohn Marino   t = bn - n;
9786d7f5d3SJohn Marino 
9886d7f5d3SJohn Marino   ASSERT (an >= bn);
9986d7f5d3SJohn Marino 
10086d7f5d3SJohn Marino   ASSERT (0 < s && s <= n);
10186d7f5d3SJohn Marino   ASSERT (0 < t && t <= s);
10286d7f5d3SJohn Marino 
10386d7f5d3SJohn Marino   asm1 = pp;
10486d7f5d3SJohn Marino   bsm1 = pp + n;
10586d7f5d3SJohn Marino 
10686d7f5d3SJohn Marino   vm1_neg = 0;
10786d7f5d3SJohn Marino 
10886d7f5d3SJohn Marino   /* Compute asm1.  */
10986d7f5d3SJohn Marino   if (s == n)
11086d7f5d3SJohn Marino     {
11186d7f5d3SJohn Marino       if (mpn_cmp (a0, a1, n) < 0)
11286d7f5d3SJohn Marino 	{
11386d7f5d3SJohn Marino 	  mpn_sub_n (asm1, a1, a0, n);
11486d7f5d3SJohn Marino 	  vm1_neg = 1;
11586d7f5d3SJohn Marino 	}
11686d7f5d3SJohn Marino       else
11786d7f5d3SJohn Marino 	{
11886d7f5d3SJohn Marino 	  mpn_sub_n (asm1, a0, a1, n);
11986d7f5d3SJohn Marino 	}
12086d7f5d3SJohn Marino     }
12186d7f5d3SJohn Marino   else
12286d7f5d3SJohn Marino     {
12386d7f5d3SJohn Marino       if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0)
12486d7f5d3SJohn Marino 	{
12586d7f5d3SJohn Marino 	  mpn_sub_n (asm1, a1, a0, s);
12686d7f5d3SJohn Marino 	  MPN_ZERO (asm1 + s, n - s);
12786d7f5d3SJohn Marino 	  vm1_neg = 1;
12886d7f5d3SJohn Marino 	}
12986d7f5d3SJohn Marino       else
13086d7f5d3SJohn Marino 	{
13186d7f5d3SJohn Marino 	  mpn_sub (asm1, a0, n, a1, s);
13286d7f5d3SJohn Marino 	}
13386d7f5d3SJohn Marino     }
13486d7f5d3SJohn Marino 
13586d7f5d3SJohn Marino   /* Compute bsm1.  */
13686d7f5d3SJohn Marino   if (t == n)
13786d7f5d3SJohn Marino     {
13886d7f5d3SJohn Marino       if (mpn_cmp (b0, b1, n) < 0)
13986d7f5d3SJohn Marino 	{
14086d7f5d3SJohn Marino 	  mpn_sub_n (bsm1, b1, b0, n);
14186d7f5d3SJohn Marino 	  vm1_neg ^= 1;
14286d7f5d3SJohn Marino 	}
14386d7f5d3SJohn Marino       else
14486d7f5d3SJohn Marino 	{
14586d7f5d3SJohn Marino 	  mpn_sub_n (bsm1, b0, b1, n);
14686d7f5d3SJohn Marino 	}
14786d7f5d3SJohn Marino     }
14886d7f5d3SJohn Marino   else
14986d7f5d3SJohn Marino     {
15086d7f5d3SJohn Marino       if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
15186d7f5d3SJohn Marino 	{
15286d7f5d3SJohn Marino 	  mpn_sub_n (bsm1, b1, b0, t);
15386d7f5d3SJohn Marino 	  MPN_ZERO (bsm1 + t, n - t);
15486d7f5d3SJohn Marino 	  vm1_neg ^= 1;
15586d7f5d3SJohn Marino 	}
15686d7f5d3SJohn Marino       else
15786d7f5d3SJohn Marino 	{
15886d7f5d3SJohn Marino 	  mpn_sub (bsm1, b0, n, b1, t);
15986d7f5d3SJohn Marino 	}
16086d7f5d3SJohn Marino     }
16186d7f5d3SJohn Marino 
16286d7f5d3SJohn Marino #define v0	pp				/* 2n */
16386d7f5d3SJohn Marino #define vinf	(pp + 2 * n)			/* s+t */
16486d7f5d3SJohn Marino #define vm1	scratch				/* 2n */
16586d7f5d3SJohn Marino #define scratch_out	scratch + 2 * n
16686d7f5d3SJohn Marino 
16786d7f5d3SJohn Marino   /* vm1, 2n limbs */
16886d7f5d3SJohn Marino   TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
16986d7f5d3SJohn Marino 
17086d7f5d3SJohn Marino   if (s > t)  TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
17186d7f5d3SJohn Marino   else        TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
17286d7f5d3SJohn Marino 
17386d7f5d3SJohn Marino   /* v0, 2n limbs */
17486d7f5d3SJohn Marino   TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
17586d7f5d3SJohn Marino 
17686d7f5d3SJohn Marino   /* H(v0) + L(vinf) */
17786d7f5d3SJohn Marino   cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
17886d7f5d3SJohn Marino 
17986d7f5d3SJohn Marino   /* L(v0) + H(v0) */
18086d7f5d3SJohn Marino   cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
18186d7f5d3SJohn Marino 
18286d7f5d3SJohn Marino   /* L(vinf) + H(vinf) */
18386d7f5d3SJohn Marino   cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n);
18486d7f5d3SJohn Marino 
18586d7f5d3SJohn Marino   if (vm1_neg)
18686d7f5d3SJohn Marino     cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n);
18786d7f5d3SJohn Marino   else
18886d7f5d3SJohn Marino     cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
18986d7f5d3SJohn Marino 
19086d7f5d3SJohn Marino   ASSERT (cy + 1  <= 3);
19186d7f5d3SJohn Marino   ASSERT (cy2 <= 2);
19286d7f5d3SJohn Marino 
19386d7f5d3SJohn Marino   mpn_incr_u (pp + 2 * n, cy2);
19486d7f5d3SJohn Marino   if (LIKELY (cy <= 2))
19586d7f5d3SJohn Marino     mpn_incr_u (pp + 3 * n, cy);
19686d7f5d3SJohn Marino   else
19786d7f5d3SJohn Marino     mpn_decr_u (pp + 3 * n, 1);
19886d7f5d3SJohn Marino }
199