1 /* Implementation of the algorithm for Toom-Cook 4.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2012 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 38 #include "gmp-impl.h" 39 40 /* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */ 41 static int 42 abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) 43 { 44 mp_limb_t x, y; 45 while (--n >= 0) 46 { 47 x = ap[n]; 48 y = bp[n]; 49 if (x != y) 50 { 51 n++; 52 if (x > y) 53 { 54 mpn_sub_n (rp, ap, bp, n); 55 return 0; 56 } 57 else 58 { 59 mpn_sub_n (rp, bp, ap, n); 60 return ~0; 61 } 62 } 63 rp[n] = 0; 64 } 65 return 0; 66 } 67 68 static int 69 abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) { 70 int result; 71 result = abs_sub_n (rm, rp, rs, n); 72 ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n)); 73 return result; 74 } 75 76 77 /* Toom-4.5, the splitting 6x3 unbalanced version. 78 Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0. 79 80 <--s-><--n--><--n--><--n--><--n--><--n--> 81 ____ ______ ______ ______ ______ ______ 82 |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__| 83 |b2_|__b1__|__b0__| 84 <-t-><--n--><--n--> 85 86 */ 87 #define TOOM_63_MUL_N_REC(p, a, b, n, ws) \ 88 do { mpn_mul_n (p, a, b, n); \ 89 } while (0) 90 91 #define TOOM_63_MUL_REC(p, a, na, b, nb, ws) \ 92 do { mpn_mul (p, a, na, b, nb); \ 93 } while (0) 94 95 void 96 mpn_toom63_mul (mp_ptr pp, 97 mp_srcptr ap, mp_size_t an, 98 mp_srcptr bp, mp_size_t bn, mp_ptr scratch) 99 { 100 mp_size_t n, s, t; 101 mp_limb_t cy; 102 int sign; 103 104 /***************************** decomposition *******************************/ 105 #define a5 (ap + 5 * n) 106 #define b0 (bp + 0 * n) 107 #define b1 (bp + 1 * n) 108 #define b2 (bp + 2 * n) 109 110 ASSERT (an >= bn); 111 n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3); 112 113 s = an - 5 * n; 114 t = bn - 2 * n; 115 116 ASSERT (0 < s && s <= n); 117 ASSERT (0 < t && t <= n); 118 /* WARNING! it assumes s+t>=n */ 119 ASSERT ( s + t >= n ); 120 ASSERT ( s + t > 4); 121 /* WARNING! it assumes n>1 */ 122 ASSERT ( n > 2); 123 124 #define r8 pp /* 2n */ 125 #define r7 scratch /* 3n+1 */ 126 #define r5 (pp + 3*n) /* 3n+1 */ 127 #define v0 (pp + 3*n) /* n+1 */ 128 #define v1 (pp + 4*n+1) /* n+1 */ 129 #define v2 (pp + 5*n+2) /* n+1 */ 130 #define v3 (pp + 6*n+3) /* n+1 */ 131 #define r3 (scratch + 3 * n + 1) /* 3n+1 */ 132 #define r1 (pp + 7*n) /* s+t <= 2*n */ 133 #define ws (scratch + 6 * n + 2) /* ??? */ 134 135 /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may 136 need all of them, when DO_mpn_sublsh_n usea a scratch */ 137 /* if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */ 138 139 /********************** evaluation and recursive calls *********************/ 140 /* $\pm4$ */ 141 sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp); 142 pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */ 143 /* FIXME: use addlsh */ 144 v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */ 145 if ( n == t ) 146 v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */ 147 else 148 v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */ 149 sign ^= abs_sub_add_n (v1, v3, pp, n + 1); 150 TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */ 151 TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */ 152 mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4); 153 154 /* $\pm1$ */ 155 sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s, pp); 156 /* Compute bs1 and bsm1. Code taken from toom33 */ 157 cy = mpn_add (ws, b0, n, b2, t); 158 #if HAVE_NATIVE_mpn_add_n_sub_n 159 if (cy == 0 && mpn_cmp (ws, b1, n) < 0) 160 { 161 cy = mpn_add_n_sub_n (v3, v1, b1, ws, n); 162 v3[n] = cy >> 1; 163 v1[n] = 0; 164 sign = ~sign; 165 } 166 else 167 { 168 mp_limb_t cy2; 169 cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n); 170 v3[n] = cy + (cy2 >> 1); 171 v1[n] = cy - (cy2 & 1); 172 } 173 #else 174 v3[n] = cy + mpn_add_n (v3, ws, b1, n); 175 if (cy == 0 && mpn_cmp (ws, b1, n) < 0) 176 { 177 mpn_sub_n (v1, b1, ws, n); 178 v1[n] = 0; 179 sign = ~sign; 180 } 181 else 182 { 183 cy -= mpn_sub_n (v1, ws, b1, n); 184 v1[n] = cy; 185 } 186 #endif 187 TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */ 188 TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */ 189 mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0); 190 191 /* $\pm2$ */ 192 sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp); 193 pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */ 194 /* FIXME: use addlsh or addlsh2 */ 195 v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */ 196 if ( n == t ) 197 v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */ 198 else 199 v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */ 200 sign ^= abs_sub_add_n (v1, v3, pp, n + 1); 201 TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */ 202 TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */ 203 mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2); 204 205 /* A(0)*B(0) */ 206 TOOM_63_MUL_N_REC(pp, ap, bp, n, ws); 207 208 /* Infinity */ 209 if (s > t) { 210 TOOM_63_MUL_REC(r1, a5, s, b2, t, ws); 211 } else { 212 TOOM_63_MUL_REC(r1, b2, t, a5, s, ws); 213 }; 214 215 mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws); 216 217 #undef a5 218 #undef b0 219 #undef b1 220 #undef b2 221 #undef r1 222 #undef r3 223 #undef r5 224 #undef v0 225 #undef v1 226 #undef v2 227 #undef v3 228 #undef r7 229 #undef r8 230 #undef ws 231 } 232