xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/toom8h_mul.c (revision f89f6560d453f5e37386cc7938c072d2f528b9fa)
1 /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17 
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21 License for more details.
22 
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25 
26 
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 
30 
31 #if GMP_NUMB_BITS < 29
32 #error Not implemented.
33 #endif
34 
35 #if GMP_NUMB_BITS < 43
36 #define BIT_CORRECTION 1
37 #define CORRECTION_BITS GMP_NUMB_BITS
38 #else
39 #define BIT_CORRECTION 0
40 #define CORRECTION_BITS 0
41 #endif
42 
43 
44 #if TUNE_PROGRAM_BUILD
45 #define MAYBE_mul_basecase 1
46 #define MAYBE_mul_toom22   1
47 #define MAYBE_mul_toom33   1
48 #define MAYBE_mul_toom44   1
49 #define MAYBE_mul_toom8h   1
50 #else
51 #define MAYBE_mul_basecase						\
52   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
53 #define MAYBE_mul_toom22						\
54   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
55 #define MAYBE_mul_toom33						\
56   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
57 #define MAYBE_mul_toom44						\
58   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
59 #define MAYBE_mul_toom8h						\
60   (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
61 #endif
62 
63 #define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
64   do {									\
65     if (MAYBE_mul_basecase						\
66 	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
67       mpn_mul_basecase (p, a, n, b, n);					\
68       if (f) mpn_mul_basecase (p2, a2, n, b2, n);			\
69     } else if (MAYBE_mul_toom22						\
70 	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
71       mpn_toom22_mul (p, a, n, b, n, ws);				\
72       if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);			\
73     } else if (MAYBE_mul_toom33						\
74 	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
75       mpn_toom33_mul (p, a, n, b, n, ws);				\
76       if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);			\
77     } else if (MAYBE_mul_toom44						\
78 	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
79       mpn_toom44_mul (p, a, n, b, n, ws);				\
80       if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);			\
81     } else if (! MAYBE_mul_toom8h					\
82 	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {		\
83       mpn_toom6h_mul (p, a, n, b, n, ws);				\
84       if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);			\
85     } else {								\
86       mpn_toom8h_mul (p, a, n, b, n, ws);				\
87       if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);			\
88     }									\
89   } while (0)
90 
91 #define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
92   do { mpn_mul (p, a, na, b, nb); } while (0)
93 
94 /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
95    With: an >= bn >= 86, an*5 <  bn * 11.
96    It _may_ work with bn<=?? and bn*?? < an*? < bn*??
97 
98    Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
99 */
100 /* Estimate on needed scratch:
101    S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
102    since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
103  */
104 
105 void
106 mpn_toom8h_mul   (mp_ptr pp,
107 		  mp_srcptr ap, mp_size_t an,
108 		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
109 {
110   mp_size_t n, s, t;
111   int p, q, half;
112   int sign;
113 
114   /***************************** decomposition *******************************/
115 
116   ASSERT (an >= bn);
117   /* Can not handle too small operands */
118   ASSERT (bn >= 86);
119   /* Can not handle too much unbalancement */
120   ASSERT (an <= bn*4);
121   ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
122   ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
123   ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
124 
125   /* Limit num/den is a rational number between
126      (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
127 #define LIMIT_numerator (21)
128 #define LIMIT_denominat (20)
129 
130   if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
131     {
132       half = 0;
133       n = 1 + ((an - 1)>>3);
134       p = q = 7;
135       s = an - 7 * n;
136       t = bn - 7 * n;
137     }
138   else
139     {
140       if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
141 	{ p = 9; q = 8; }
142       else if (GMP_NUMB_BITS <= 9*3 ||
143 	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
144 	{ p = 9; q = 7; }
145       else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
146 	{ p =10; q = 7; }
147       else if (GMP_NUMB_BITS <= 10*3 ||
148 	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
149 	{ p =10; q = 6; }
150       else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
151 	{ p =11; q = 6; }
152       else if (GMP_NUMB_BITS <= 11*3 ||
153 	       an * 4 < 9 * bn)
154 	{ p =11; q = 5; }
155       else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
156 	{ p =12; q = 5; }
157       else if (GMP_NUMB_BITS <= 12*3 ||
158 	       an * 9 < 28 * bn )  /* is 4*... <12*... */
159 	{ p =12; q = 4; }
160       else
161 	{ p =13; q = 4; }
162 
163       half = (p+q)&1;
164       n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
165       p--; q--;
166 
167       s = an - p * n;
168       t = bn - q * n;
169 
170       if(half) { /* Recover from badly chosen splitting */
171 	if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
172 	else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
173       }
174     }
175 #undef LIMIT_numerator
176 #undef LIMIT_denominat
177 
178   ASSERT (0 < s && s <= n);
179   ASSERT (0 < t && t <= n);
180   ASSERT (half || s + t > 3);
181   ASSERT (n > 2);
182 
183 #define   r6    (pp + 3 * n)			/* 3n+1 */
184 #define   r4    (pp + 7 * n)			/* 3n+1 */
185 #define   r2    (pp +11 * n)			/* 3n+1 */
186 #define   r0    (pp +15 * n)			/* s+t <= 2*n */
187 #define   r7    (scratch)			/* 3n+1 */
188 #define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
189 #define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
190 #define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
191 #define   v0    (pp +11 * n)			/* n+1 */
192 #define   v1    (pp +12 * n+1)			/* n+1 */
193 #define   v2    (pp +13 * n+2)			/* n+1 */
194 #define   v3    (scratch +12 * n + 4)		/* n+1 */
195 #define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
196 #define   wse   (scratch +13 * n + 5)		/* 2n+1 */
197 
198   /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
199      need all of them  */
200 /*   if (scratch == NULL) */
201 /*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
202   ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
203   ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
204 
205   /********************** evaluation and recursive calls *********************/
206 
207   /* $\pm1/8$ */
208   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
209 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
210   /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
211   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
212   mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
213 
214   /* $\pm1/4$ */
215   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
216 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
217   /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
218   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
219   mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
220 
221   /* $\pm2$ */
222   sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
223 	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
224   /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
225   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
226   mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
227 
228   /* $\pm8$ */
229   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
230 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
231   /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
232   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
233   mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
234 
235   /* $\pm1/2$ */
236   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
237 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
238   /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
239   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
240   mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
241 
242   /* $\pm1$ */
243   sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
244   if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
245     sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
246   else
247     sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
248   /* A(-1)*B(-1) */ /* A(1)*B(1) */
249   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
250   mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
251 
252   /* $\pm4$ */
253   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
254 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
255   /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
256   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
257   mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
258 
259 #undef v0
260 #undef v1
261 #undef v2
262 #undef v3
263 #undef wse
264 
265   /* A(0)*B(0) */
266   TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
267 
268   /* Infinity */
269   if (UNLIKELY (half != 0)) {
270     if (s > t) {
271       TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
272     } else {
273       TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
274     };
275   };
276 
277   mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
278 
279 #undef r0
280 #undef r1
281 #undef r2
282 #undef r3
283 #undef r4
284 #undef r5
285 #undef r6
286 #undef wsi
287 }
288 
289 #undef TOOM8H_MUL_N_REC
290 #undef TOOM8H_MUL_REC
291 #undef MAYBE_mul_basecase
292 #undef MAYBE_mul_toom22
293 #undef MAYBE_mul_toom33
294 #undef MAYBE_mul_toom44
295 #undef MAYBE_mul_toom8h
296