1 /* Implementation of the algorithm for Toom-Cook 4.5-way.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009, 2012 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37
38 #include "gmp-impl.h"
39
40 /* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
41 static int
abs_sub_n(mp_ptr rp,mp_srcptr ap,mp_srcptr bp,mp_size_t n)42 abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
43 {
44 mp_limb_t x, y;
45 while (--n >= 0)
46 {
47 x = ap[n];
48 y = bp[n];
49 if (x != y)
50 {
51 n++;
52 if (x > y)
53 {
54 mpn_sub_n (rp, ap, bp, n);
55 return 0;
56 }
57 else
58 {
59 mpn_sub_n (rp, bp, ap, n);
60 return ~0;
61 }
62 }
63 rp[n] = 0;
64 }
65 return 0;
66 }
67
68 static int
abs_sub_add_n(mp_ptr rm,mp_ptr rp,mp_srcptr rs,mp_size_t n)69 abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
70 int result;
71 result = abs_sub_n (rm, rp, rs, n);
72 ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
73 return result;
74 }
75
76
77 /* Toom-4.5, the splitting 6x3 unbalanced version.
78 Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
79
80 <--s-><--n--><--n--><--n--><--n--><--n-->
81 ____ ______ ______ ______ ______ ______
82 |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
83 |b2_|__b1__|__b0__|
84 <-t-><--n--><--n-->
85
86 */
87 #define TOOM_63_MUL_N_REC(p, a, b, n, ws) \
88 do { mpn_mul_n (p, a, b, n); \
89 } while (0)
90
91 #define TOOM_63_MUL_REC(p, a, na, b, nb, ws) \
92 do { mpn_mul (p, a, na, b, nb); \
93 } while (0)
94
95 void
mpn_toom63_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)96 mpn_toom63_mul (mp_ptr pp,
97 mp_srcptr ap, mp_size_t an,
98 mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
99 {
100 mp_size_t n, s, t;
101 mp_limb_t cy;
102 int sign;
103
104 /***************************** decomposition *******************************/
105 #define a5 (ap + 5 * n)
106 #define b0 (bp + 0 * n)
107 #define b1 (bp + 1 * n)
108 #define b2 (bp + 2 * n)
109
110 ASSERT (an >= bn);
111 n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
112
113 s = an - 5 * n;
114 t = bn - 2 * n;
115
116 ASSERT (0 < s && s <= n);
117 ASSERT (0 < t && t <= n);
118 /* WARNING! it assumes s+t>=n */
119 ASSERT ( s + t >= n );
120 ASSERT ( s + t > 4);
121 /* WARNING! it assumes n>1 */
122 ASSERT ( n > 2);
123
124 #define r8 pp /* 2n */
125 #define r7 scratch /* 3n+1 */
126 #define r5 (pp + 3*n) /* 3n+1 */
127 #define v0 (pp + 3*n) /* n+1 */
128 #define v1 (pp + 4*n+1) /* n+1 */
129 #define v2 (pp + 5*n+2) /* n+1 */
130 #define v3 (pp + 6*n+3) /* n+1 */
131 #define r3 (scratch + 3 * n + 1) /* 3n+1 */
132 #define r1 (pp + 7*n) /* s+t <= 2*n */
133 #define ws (scratch + 6 * n + 2) /* ??? */
134
135 /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
136 need all of them, when DO_mpn_sublsh_n usea a scratch */
137 /* if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
138
139 /********************** evaluation and recursive calls *********************/
140 /* $\pm4$ */
141 sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
142 pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
143 /* FIXME: use addlsh */
144 v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
145 if ( n == t )
146 v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
147 else
148 v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
149 sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
150 TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
151 TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
152 mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
153
154 /* $\pm1$ */
155 sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s, pp);
156 /* Compute bs1 and bsm1. Code taken from toom33 */
157 cy = mpn_add (ws, b0, n, b2, t);
158 #if HAVE_NATIVE_mpn_add_n_sub_n
159 if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
160 {
161 cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
162 v3[n] = cy >> 1;
163 v1[n] = 0;
164 sign = ~sign;
165 }
166 else
167 {
168 mp_limb_t cy2;
169 cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
170 v3[n] = cy + (cy2 >> 1);
171 v1[n] = cy - (cy2 & 1);
172 }
173 #else
174 v3[n] = cy + mpn_add_n (v3, ws, b1, n);
175 if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
176 {
177 mpn_sub_n (v1, b1, ws, n);
178 v1[n] = 0;
179 sign = ~sign;
180 }
181 else
182 {
183 cy -= mpn_sub_n (v1, ws, b1, n);
184 v1[n] = cy;
185 }
186 #endif
187 TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
188 TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
189 mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
190
191 /* $\pm2$ */
192 sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
193 pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
194 /* FIXME: use addlsh or addlsh2 */
195 v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
196 if ( n == t )
197 v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
198 else
199 v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
200 sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
201 TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
202 TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
203 mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
204
205 /* A(0)*B(0) */
206 TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
207
208 /* Infinity */
209 if (s > t) {
210 TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
211 } else {
212 TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
213 };
214
215 mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
216
217 #undef a5
218 #undef b0
219 #undef b1
220 #undef b2
221 #undef r1
222 #undef r3
223 #undef r5
224 #undef v0
225 #undef v1
226 #undef v2
227 #undef v3
228 #undef r7
229 #undef r8
230 #undef ws
231 }
232