xref: /dflybsd-src/contrib/gmp/mpn/generic/toom6h_mul.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino    Contributed to the GNU project by Marco Bodrato.
486d7f5d3SJohn Marino 
586d7f5d3SJohn Marino    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
686d7f5d3SJohn Marino    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
786d7f5d3SJohn Marino    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
886d7f5d3SJohn Marino 
986d7f5d3SJohn Marino Copyright 2009, 2010 Free Software Foundation, Inc.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino This file is part of the GNU MP Library.
1286d7f5d3SJohn Marino 
1386d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1486d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1586d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1686d7f5d3SJohn Marino option) any later version.
1786d7f5d3SJohn Marino 
1886d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1986d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2086d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
2186d7f5d3SJohn Marino License for more details.
2286d7f5d3SJohn Marino 
2386d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2486d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2586d7f5d3SJohn Marino 
2686d7f5d3SJohn Marino 
2786d7f5d3SJohn Marino #include "gmp.h"
2886d7f5d3SJohn Marino #include "gmp-impl.h"
2986d7f5d3SJohn Marino 
3086d7f5d3SJohn Marino 
3186d7f5d3SJohn Marino #if GMP_NUMB_BITS < 21
3286d7f5d3SJohn Marino #error Not implemented.
3386d7f5d3SJohn Marino #endif
3486d7f5d3SJohn Marino 
3586d7f5d3SJohn Marino #if TUNE_PROGRAM_BUILD
3686d7f5d3SJohn Marino #define MAYBE_mul_basecase 1
3786d7f5d3SJohn Marino #define MAYBE_mul_toom22   1
3886d7f5d3SJohn Marino #define MAYBE_mul_toom33   1
3986d7f5d3SJohn Marino #define MAYBE_mul_toom6h   1
4086d7f5d3SJohn Marino #else
4186d7f5d3SJohn Marino #define MAYBE_mul_basecase						\
4286d7f5d3SJohn Marino   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
4386d7f5d3SJohn Marino #define MAYBE_mul_toom22						\
4486d7f5d3SJohn Marino   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
4586d7f5d3SJohn Marino #define MAYBE_mul_toom33						\
4686d7f5d3SJohn Marino   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
4786d7f5d3SJohn Marino #define MAYBE_mul_toom6h						\
4886d7f5d3SJohn Marino   (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
4986d7f5d3SJohn Marino #endif
5086d7f5d3SJohn Marino 
5186d7f5d3SJohn Marino #define TOOM6H_MUL_N_REC(p, a, b, n, ws)				\
5286d7f5d3SJohn Marino   do {									\
5386d7f5d3SJohn Marino     if (MAYBE_mul_basecase						\
5486d7f5d3SJohn Marino 	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
5586d7f5d3SJohn Marino       mpn_mul_basecase (p, a, n, b, n);					\
5686d7f5d3SJohn Marino     else if (MAYBE_mul_toom22						\
5786d7f5d3SJohn Marino 	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
5886d7f5d3SJohn Marino       mpn_toom22_mul (p, a, n, b, n, ws);				\
5986d7f5d3SJohn Marino     else if (MAYBE_mul_toom33						\
6086d7f5d3SJohn Marino 	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))		\
6186d7f5d3SJohn Marino       mpn_toom33_mul (p, a, n, b, n, ws);				\
6286d7f5d3SJohn Marino     else if (! MAYBE_mul_toom6h						\
6386d7f5d3SJohn Marino 	     || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))		\
6486d7f5d3SJohn Marino       mpn_toom44_mul (p, a, n, b, n, ws);				\
6586d7f5d3SJohn Marino     else								\
6686d7f5d3SJohn Marino       mpn_toom6h_mul (p, a, n, b, n, ws);				\
6786d7f5d3SJohn Marino   } while (0)
6886d7f5d3SJohn Marino 
6986d7f5d3SJohn Marino #define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
7086d7f5d3SJohn Marino   do {	mpn_mul (p, a, na, b, nb);			\
7186d7f5d3SJohn Marino   } while (0)
7286d7f5d3SJohn Marino 
7386d7f5d3SJohn Marino /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
7486d7f5d3SJohn Marino    With: an >= bn >= 46, an*6 <  bn * 17.
7586d7f5d3SJohn Marino    It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
7686d7f5d3SJohn Marino 
7786d7f5d3SJohn Marino    Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
7886d7f5d3SJohn Marino */
7986d7f5d3SJohn Marino /* Estimate on needed scratch:
8086d7f5d3SJohn Marino    S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
8186d7f5d3SJohn Marino    since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
8286d7f5d3SJohn Marino  */
8386d7f5d3SJohn Marino 
8486d7f5d3SJohn Marino void
mpn_toom6h_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)8586d7f5d3SJohn Marino mpn_toom6h_mul   (mp_ptr pp,
8686d7f5d3SJohn Marino 		  mp_srcptr ap, mp_size_t an,
8786d7f5d3SJohn Marino 		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
8886d7f5d3SJohn Marino {
8986d7f5d3SJohn Marino   mp_size_t n, s, t;
9086d7f5d3SJohn Marino   int p, q, half;
9186d7f5d3SJohn Marino   int sign;
9286d7f5d3SJohn Marino 
9386d7f5d3SJohn Marino   /***************************** decomposition *******************************/
9486d7f5d3SJohn Marino 
9586d7f5d3SJohn Marino   ASSERT( an >= bn);
9686d7f5d3SJohn Marino   /* Can not handle too much unbalancement */
9786d7f5d3SJohn Marino   ASSERT( bn >= 42 );
9886d7f5d3SJohn Marino   /* Can not handle too much unbalancement */
9986d7f5d3SJohn Marino   ASSERT((an*3 <  bn * 8) || ( bn >= 46 && an*6 <  bn * 17 ));
10086d7f5d3SJohn Marino 
10186d7f5d3SJohn Marino   /* Limit num/den is a rational number between
10286d7f5d3SJohn Marino      (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
10386d7f5d3SJohn Marino #define LIMIT_numerator (18)
10486d7f5d3SJohn Marino #define LIMIT_denominat (17)
10586d7f5d3SJohn Marino 
10686d7f5d3SJohn Marino   if( an * LIMIT_denominat < LIMIT_numerator * bn ) /* is 6*... < 6*... */
10786d7f5d3SJohn Marino     { p = q = 6; }
10886d7f5d3SJohn Marino   else if( an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn )
10986d7f5d3SJohn Marino     { p = 7; q = 6; }
11086d7f5d3SJohn Marino   else if( an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn )
11186d7f5d3SJohn Marino     { p = 7; q = 5; }
11286d7f5d3SJohn Marino   else if( an * LIMIT_numerator < LIMIT_denominat * 2 * bn )  /* is 4*... < 8*... */
11386d7f5d3SJohn Marino     { p = 8; q = 5; }
11486d7f5d3SJohn Marino   else if( an * LIMIT_denominat < LIMIT_numerator * 2 * bn )  /* is 4*... < 8*... */
11586d7f5d3SJohn Marino     { p = 8; q = 4; }
11686d7f5d3SJohn Marino   else
11786d7f5d3SJohn Marino     { p = 9; q = 4; }
11886d7f5d3SJohn Marino 
11986d7f5d3SJohn Marino   half = (p ^ q) & 1;
12086d7f5d3SJohn Marino   n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
12186d7f5d3SJohn Marino   p--; q--;
12286d7f5d3SJohn Marino 
12386d7f5d3SJohn Marino   s = an - p * n;
12486d7f5d3SJohn Marino   t = bn - q * n;
12586d7f5d3SJohn Marino 
12686d7f5d3SJohn Marino   /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
12786d7f5d3SJohn Marino   if (half) { /* Recover from badly chosen splitting */
12886d7f5d3SJohn Marino     if (s<1) {p--; s+=n; half=0;}
12986d7f5d3SJohn Marino     else if (t<1) {q--; t+=n; half=0;}
13086d7f5d3SJohn Marino   }
13186d7f5d3SJohn Marino #undef LIMIT_numerator
13286d7f5d3SJohn Marino #undef LIMIT_denominat
13386d7f5d3SJohn Marino 
13486d7f5d3SJohn Marino   ASSERT (0 < s && s <= n);
13586d7f5d3SJohn Marino   ASSERT (0 < t && t <= n);
13686d7f5d3SJohn Marino   ASSERT (half || s + t > 3);
13786d7f5d3SJohn Marino   ASSERT (n > 2);
13886d7f5d3SJohn Marino 
13986d7f5d3SJohn Marino #define   r4    (pp + 3 * n)			/* 3n+1 */
14086d7f5d3SJohn Marino #define   r2    (pp + 7 * n)			/* 3n+1 */
14186d7f5d3SJohn Marino #define   r0    (pp +11 * n)			/* s+t <= 2*n */
14286d7f5d3SJohn Marino #define   r5    (scratch)			/* 3n+1 */
14386d7f5d3SJohn Marino #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
14486d7f5d3SJohn Marino #define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
14586d7f5d3SJohn Marino #define   v0    (pp + 7 * n)			/* n+1 */
14686d7f5d3SJohn Marino #define   v1    (pp + 8 * n+1)			/* n+1 */
14786d7f5d3SJohn Marino #define   v2    (pp + 9 * n+2)			/* n+1 */
14886d7f5d3SJohn Marino #define   v3    (scratch + 9 * n + 3)		/* n+1 */
14986d7f5d3SJohn Marino #define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
15086d7f5d3SJohn Marino #define   wse   (scratch +10 * n + 4)		/* 2n+1 */
15186d7f5d3SJohn Marino 
15286d7f5d3SJohn Marino   /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
15386d7f5d3SJohn Marino      need all of them  */
15486d7f5d3SJohn Marino /*   if (scratch == NULL) */
15586d7f5d3SJohn Marino /*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
15686d7f5d3SJohn Marino   ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
15786d7f5d3SJohn Marino   ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
15886d7f5d3SJohn Marino 
15986d7f5d3SJohn Marino   /********************** evaluation and recursive calls *********************/
16086d7f5d3SJohn Marino   /* $\pm1/2$ */
16186d7f5d3SJohn Marino   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
16286d7f5d3SJohn Marino 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
16386d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
16486d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
16586d7f5d3SJohn Marino   mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
16686d7f5d3SJohn Marino 
16786d7f5d3SJohn Marino   /* $\pm1$ */
16886d7f5d3SJohn Marino   sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
16986d7f5d3SJohn Marino   if (q == 3)
17086d7f5d3SJohn Marino     sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
17186d7f5d3SJohn Marino   else
17286d7f5d3SJohn Marino     sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
17386d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
17486d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(1)*B(1) */
17586d7f5d3SJohn Marino   mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
17686d7f5d3SJohn Marino 
17786d7f5d3SJohn Marino   /* $\pm4$ */
17886d7f5d3SJohn Marino   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
17986d7f5d3SJohn Marino 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
18086d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
18186d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
18286d7f5d3SJohn Marino   mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
18386d7f5d3SJohn Marino 
18486d7f5d3SJohn Marino   /* $\pm1/4$ */
18586d7f5d3SJohn Marino   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
18686d7f5d3SJohn Marino 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
18786d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
18886d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
18986d7f5d3SJohn Marino   mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
19086d7f5d3SJohn Marino 
19186d7f5d3SJohn Marino   /* $\pm2$ */
19286d7f5d3SJohn Marino   sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
19386d7f5d3SJohn Marino 	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
19486d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
19586d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
19686d7f5d3SJohn Marino   mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
19786d7f5d3SJohn Marino 
19886d7f5d3SJohn Marino #undef v0
19986d7f5d3SJohn Marino #undef v1
20086d7f5d3SJohn Marino #undef v2
20186d7f5d3SJohn Marino #undef v3
20286d7f5d3SJohn Marino #undef wse
20386d7f5d3SJohn Marino 
20486d7f5d3SJohn Marino   /* A(0)*B(0) */
20586d7f5d3SJohn Marino   TOOM6H_MUL_N_REC(pp, ap, bp, n, wsi);
20686d7f5d3SJohn Marino 
20786d7f5d3SJohn Marino   /* Infinity */
20886d7f5d3SJohn Marino   if( half != 0) {
20986d7f5d3SJohn Marino     if(s>t) {
21086d7f5d3SJohn Marino       TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
21186d7f5d3SJohn Marino     } else {
21286d7f5d3SJohn Marino       TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
21386d7f5d3SJohn Marino     };
21486d7f5d3SJohn Marino   };
21586d7f5d3SJohn Marino 
21686d7f5d3SJohn Marino   mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
21786d7f5d3SJohn Marino 
21886d7f5d3SJohn Marino #undef r0
21986d7f5d3SJohn Marino #undef r1
22086d7f5d3SJohn Marino #undef r2
22186d7f5d3SJohn Marino #undef r3
22286d7f5d3SJohn Marino #undef r4
22386d7f5d3SJohn Marino #undef r5
22486d7f5d3SJohn Marino #undef wsi
22586d7f5d3SJohn Marino }
22686d7f5d3SJohn Marino 
22786d7f5d3SJohn Marino #undef TOOM6H_MUL_N_REC
22886d7f5d3SJohn Marino #undef TOOM6H_MUL_REC
22986d7f5d3SJohn Marino #undef MAYBE_mul_basecase
23086d7f5d3SJohn Marino #undef MAYBE_mul_toom22
23186d7f5d3SJohn Marino #undef MAYBE_mul_toom33
23286d7f5d3SJohn Marino #undef MAYBE_mul_toom6h
233