xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/toom6h_mul.c (revision b1bb3099bf4d47bbe8c7be5b78240a535263771f)
1 /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17 
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21 License for more details.
22 
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25 
26 
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 
30 
31 #if GMP_NUMB_BITS < 21
32 #error Not implemented.
33 #endif
34 
35 #if TUNE_PROGRAM_BUILD
36 #define MAYBE_mul_basecase 1
37 #define MAYBE_mul_toom22   1
38 #define MAYBE_mul_toom33   1
39 #define MAYBE_mul_toom6h   1
40 #else
41 #define MAYBE_mul_basecase						\
42   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
43 #define MAYBE_mul_toom22						\
44   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
45 #define MAYBE_mul_toom33						\
46   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
47 #define MAYBE_mul_toom6h						\
48   (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
49 #endif
50 
51 #define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
52   do {									\
53     if (MAYBE_mul_basecase						\
54 	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
55       mpn_mul_basecase (p, a, n, b, n);					\
56       if (f)								\
57 	mpn_mul_basecase (p2, a2, n, b2, n);				\
58     } else if (MAYBE_mul_toom22						\
59 	       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
60       mpn_toom22_mul (p, a, n, b, n, ws);				\
61       if (f)								\
62 	mpn_toom22_mul (p2, a2, n, b2, n, ws);				\
63     } else if (MAYBE_mul_toom33						\
64 	       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
65       mpn_toom33_mul (p, a, n, b, n, ws);				\
66       if (f)								\
67 	mpn_toom33_mul (p2, a2, n, b2, n, ws);				\
68     } else if (! MAYBE_mul_toom6h					\
69 	       || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
70       mpn_toom44_mul (p, a, n, b, n, ws);				\
71       if (f)								\
72 	mpn_toom44_mul (p2, a2, n, b2, n, ws);				\
73     } else {								\
74       mpn_toom6h_mul (p, a, n, b, n, ws);				\
75       if (f)								\
76 	mpn_toom6h_mul (p2, a2, n, b2, n, ws);				\
77     }									\
78   } while (0)
79 
80 #define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
81   do { mpn_mul (p, a, na, b, nb);			\
82   } while (0)
83 
84 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
85    With: an >= bn >= 46, an*6 <  bn * 17.
86    It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
87 
88    Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
89 */
90 /* Estimate on needed scratch:
91    S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
92    since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
93  */
94 
95 void
96 mpn_toom6h_mul   (mp_ptr pp,
97 		  mp_srcptr ap, mp_size_t an,
98 		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
99 {
100   mp_size_t n, s, t;
101   int p, q, half;
102   int sign;
103 
104   /***************************** decomposition *******************************/
105 
106   ASSERT (an >= bn);
107   /* Can not handle too much unbalancement */
108   ASSERT (bn >= 42);
109   /* Can not handle too much unbalancement */
110   ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
111 
112   /* Limit num/den is a rational number between
113      (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
114 #define LIMIT_numerator (18)
115 #define LIMIT_denominat (17)
116 
117   if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
118     {
119       n = 1 + (an - 1) / (size_t) 6;
120       p = q = 5;
121       half = 0;
122 
123       s = an - 5 * n;
124       t = bn - 5 * n;
125     }
126   else {
127     if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
128       { p = 7; q = 6; }
129     else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
130       { p = 7; q = 5; }
131     else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
132       { p = 8; q = 5; }
133     else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
134       { p = 8; q = 4; }
135     else
136       { p = 9; q = 4; }
137 
138     half = (p ^ q) & 1;
139     n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
140     p--; q--;
141 
142     s = an - p * n;
143     t = bn - q * n;
144 
145     /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
146     if (half) { /* Recover from badly chosen splitting */
147       if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
148       else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
149     }
150   }
151 #undef LIMIT_numerator
152 #undef LIMIT_denominat
153 
154   ASSERT (0 < s && s <= n);
155   ASSERT (0 < t && t <= n);
156   ASSERT (half || s + t > 3);
157   ASSERT (n > 2);
158 
159 #define   r4    (pp + 3 * n)			/* 3n+1 */
160 #define   r2    (pp + 7 * n)			/* 3n+1 */
161 #define   r0    (pp +11 * n)			/* s+t <= 2*n */
162 #define   r5    (scratch)			/* 3n+1 */
163 #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
164 #define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
165 #define   v0    (pp + 7 * n)			/* n+1 */
166 #define   v1    (pp + 8 * n+1)			/* n+1 */
167 #define   v2    (pp + 9 * n+2)			/* n+1 */
168 #define   v3    (scratch + 9 * n + 3)		/* n+1 */
169 #define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
170 #define   wse   (scratch +10 * n + 4)		/* 2n+1 */
171 
172   /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
173      need all of them  */
174 /*   if (scratch == NULL) */
175 /*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
176   ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
177   ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
178 
179   /********************** evaluation and recursive calls *********************/
180   /* $\pm1/2$ */
181   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
182 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
183   /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
184   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
185   mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
186 
187   /* $\pm1$ */
188   sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
189   if (UNLIKELY (q == 3))
190     sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
191   else
192     sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
193   /* A(-1)*B(-1) */ /* A(1)*B(1) */
194   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
195   mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
196 
197   /* $\pm4$ */
198   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
199 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
200   /* A(-4)*B(-4) */
201   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
202   mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
203 
204   /* $\pm1/4$ */
205   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
206 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
207   /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
208   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
209   mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
210 
211   /* $\pm2$ */
212   sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
213 	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
214   /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
215   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
216   mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
217 
218 #undef v0
219 #undef v1
220 #undef v2
221 #undef v3
222 #undef wse
223 
224   /* A(0)*B(0) */
225   TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
226 
227   /* Infinity */
228   if (UNLIKELY (half != 0)) {
229     if (s > t) {
230       TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
231     } else {
232       TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
233     };
234   };
235 
236   mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
237 
238 #undef r0
239 #undef r1
240 #undef r2
241 #undef r3
242 #undef r4
243 #undef r5
244 #undef wsi
245 }
246 
247 #undef TOOM6H_MUL_N_REC
248 #undef TOOM6H_MUL_REC
249 #undef MAYBE_mul_basecase
250 #undef MAYBE_mul_toom22
251 #undef MAYBE_mul_toom33
252 #undef MAYBE_mul_toom6h
253