1 /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27 #include "gmp.h" 28 #include "gmp-impl.h" 29 30 31 #if GMP_NUMB_BITS < 21 32 #error Not implemented. 33 #endif 34 35 #if TUNE_PROGRAM_BUILD 36 #define MAYBE_mul_basecase 1 37 #define MAYBE_mul_toom22 1 38 #define MAYBE_mul_toom33 1 39 #define MAYBE_mul_toom6h 1 40 #else 41 #define MAYBE_mul_basecase \ 42 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD) 43 #define MAYBE_mul_toom22 \ 44 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD) 45 #define MAYBE_mul_toom33 \ 46 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD) 47 #define MAYBE_mul_toom6h \ 48 (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD) 49 #endif 50 51 #define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \ 52 do { \ 53 if (MAYBE_mul_basecase \ 54 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \ 55 mpn_mul_basecase (p, a, n, b, n); \ 56 if (f) \ 57 mpn_mul_basecase (p2, a2, n, b2, n); \ 58 } else if (MAYBE_mul_toom22 \ 59 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \ 60 mpn_toom22_mul (p, a, n, b, n, ws); \ 61 if (f) \ 62 mpn_toom22_mul (p2, a2, n, b2, n, ws); \ 63 } else if (MAYBE_mul_toom33 \ 64 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \ 65 mpn_toom33_mul (p, a, n, b, n, ws); \ 66 if (f) \ 67 mpn_toom33_mul (p2, a2, n, b2, n, ws); \ 68 } else if (! MAYBE_mul_toom6h \ 69 || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \ 70 mpn_toom44_mul (p, a, n, b, n, ws); \ 71 if (f) \ 72 mpn_toom44_mul (p2, a2, n, b2, n, ws); \ 73 } else { \ 74 mpn_toom6h_mul (p, a, n, b, n, ws); \ 75 if (f) \ 76 mpn_toom6h_mul (p2, a2, n, b2, n, ws); \ 77 } \ 78 } while (0) 79 80 #define TOOM6H_MUL_REC(p, a, na, b, nb, ws) \ 81 do { mpn_mul (p, a, na, b, nb); \ 82 } while (0) 83 84 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn} 85 With: an >= bn >= 46, an*6 < bn * 17. 86 It _may_ work with bn<=46 and bn*17 < an*6 < bn*18 87 88 Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0. 89 */ 90 /* Estimate on needed scratch: 91 S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6), 92 since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6 93 */ 94 95 void 96 mpn_toom6h_mul (mp_ptr pp, 97 mp_srcptr ap, mp_size_t an, 98 mp_srcptr bp, mp_size_t bn, mp_ptr scratch) 99 { 100 mp_size_t n, s, t; 101 int p, q, half; 102 int sign; 103 104 /***************************** decomposition *******************************/ 105 106 ASSERT (an >= bn); 107 /* Can not handle too much unbalancement */ 108 ASSERT (bn >= 42); 109 /* Can not handle too much unbalancement */ 110 ASSERT ((an*3 < bn * 8) || (bn >= 46 && an * 6 < bn * 17)); 111 112 /* Limit num/den is a rational number between 113 (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1)) */ 114 #define LIMIT_numerator (18) 115 #define LIMIT_denominat (17) 116 117 if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */ 118 { 119 n = 1 + (an - 1) / (size_t) 6; 120 p = q = 5; 121 half = 0; 122 123 s = an - 5 * n; 124 t = bn - 5 * n; 125 } 126 else { 127 if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn) 128 { p = 7; q = 6; } 129 else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn) 130 { p = 7; q = 5; } 131 else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn) /* is 4*... < 8*... */ 132 { p = 8; q = 5; } 133 else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn) /* is 4*... < 8*... */ 134 { p = 8; q = 4; } 135 else 136 { p = 9; q = 4; } 137 138 half = (p ^ q) & 1; 139 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q); 140 p--; q--; 141 142 s = an - p * n; 143 t = bn - q * n; 144 145 /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/ 146 if (half) { /* Recover from badly chosen splitting */ 147 if (UNLIKELY (s<1)) {p--; s+=n; half=0;} 148 else if (UNLIKELY (t<1)) {q--; t+=n; half=0;} 149 } 150 } 151 #undef LIMIT_numerator 152 #undef LIMIT_denominat 153 154 ASSERT (0 < s && s <= n); 155 ASSERT (0 < t && t <= n); 156 ASSERT (half || s + t > 3); 157 ASSERT (n > 2); 158 159 #define r4 (pp + 3 * n) /* 3n+1 */ 160 #define r2 (pp + 7 * n) /* 3n+1 */ 161 #define r0 (pp +11 * n) /* s+t <= 2*n */ 162 #define r5 (scratch) /* 3n+1 */ 163 #define r3 (scratch + 3 * n + 1) /* 3n+1 */ 164 #define r1 (scratch + 6 * n + 2) /* 3n+1 */ 165 #define v0 (pp + 7 * n) /* n+1 */ 166 #define v1 (pp + 8 * n+1) /* n+1 */ 167 #define v2 (pp + 9 * n+2) /* n+1 */ 168 #define v3 (scratch + 9 * n + 3) /* n+1 */ 169 #define wsi (scratch + 9 * n + 3) /* 3n+1 */ 170 #define wse (scratch +10 * n + 4) /* 2n+1 */ 171 172 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may 173 need all of them */ 174 /* if (scratch == NULL) */ 175 /* scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */ 176 ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn)); 177 ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6)); 178 179 /********************** evaluation and recursive calls *********************/ 180 /* $\pm1/2$ */ 181 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^ 182 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp); 183 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */ 184 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse); 185 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half); 186 187 /* $\pm1$ */ 188 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp); 189 if (UNLIKELY (q == 3)) 190 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp); 191 else 192 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp); 193 /* A(-1)*B(-1) */ /* A(1)*B(1) */ 194 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse); 195 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0); 196 197 /* $\pm4$ */ 198 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^ 199 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp); 200 /* A(-4)*B(-4) */ 201 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */ 202 mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4); 203 204 /* $\pm1/4$ */ 205 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^ 206 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp); 207 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */ 208 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse); 209 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half)); 210 211 /* $\pm2$ */ 212 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^ 213 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp); 214 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */ 215 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse); 216 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2); 217 218 #undef v0 219 #undef v1 220 #undef v2 221 #undef v3 222 #undef wse 223 224 /* A(0)*B(0) */ 225 TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi); 226 227 /* Infinity */ 228 if (UNLIKELY (half != 0)) { 229 if (s > t) { 230 TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi); 231 } else { 232 TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi); 233 }; 234 }; 235 236 mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi); 237 238 #undef r0 239 #undef r1 240 #undef r2 241 #undef r3 242 #undef r4 243 #undef r5 244 #undef wsi 245 } 246 247 #undef TOOM6H_MUL_N_REC 248 #undef TOOM6H_MUL_REC 249 #undef MAYBE_mul_basecase 250 #undef MAYBE_mul_toom22 251 #undef MAYBE_mul_toom33 252 #undef MAYBE_mul_toom6h 253