1 /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27 #include "gmp.h" 28 #include "gmp-impl.h" 29 30 31 #if GMP_NUMB_BITS < 29 32 #error Not implemented. 33 #endif 34 35 #if GMP_NUMB_BITS < 43 36 #define BIT_CORRECTION 1 37 #define CORRECTION_BITS GMP_NUMB_BITS 38 #else 39 #define BIT_CORRECTION 0 40 #define CORRECTION_BITS 0 41 #endif 42 43 44 #if TUNE_PROGRAM_BUILD 45 #define MAYBE_mul_basecase 1 46 #define MAYBE_mul_toom22 1 47 #define MAYBE_mul_toom33 1 48 #define MAYBE_mul_toom44 1 49 #define MAYBE_mul_toom8h 1 50 #else 51 #define MAYBE_mul_basecase \ 52 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD) 53 #define MAYBE_mul_toom22 \ 54 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD) 55 #define MAYBE_mul_toom33 \ 56 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD) 57 #define MAYBE_mul_toom44 \ 58 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD) 59 #define MAYBE_mul_toom8h \ 60 (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD) 61 #endif 62 63 #define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \ 64 do { \ 65 if (MAYBE_mul_basecase \ 66 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \ 67 mpn_mul_basecase (p, a, n, b, n); \ 68 if (f) mpn_mul_basecase (p2, a2, n, b2, n); \ 69 } else if (MAYBE_mul_toom22 \ 70 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \ 71 mpn_toom22_mul (p, a, n, b, n, ws); \ 72 if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws); \ 73 } else if (MAYBE_mul_toom33 \ 74 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \ 75 mpn_toom33_mul (p, a, n, b, n, ws); \ 76 if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws); \ 77 } else if (MAYBE_mul_toom44 \ 78 && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \ 79 mpn_toom44_mul (p, a, n, b, n, ws); \ 80 if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws); \ 81 } else if (! MAYBE_mul_toom8h \ 82 || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) { \ 83 mpn_toom6h_mul (p, a, n, b, n, ws); \ 84 if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws); \ 85 } else { \ 86 mpn_toom8h_mul (p, a, n, b, n, ws); \ 87 if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws); \ 88 } \ 89 } while (0) 90 91 #define TOOM8H_MUL_REC(p, a, na, b, nb, ws) \ 92 do { mpn_mul (p, a, na, b, nb); } while (0) 93 94 /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn} 95 With: an >= bn >= 86, an*5 < bn * 11. 96 It _may_ work with bn<=?? and bn*?? < an*? < bn*?? 97 98 Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0. 99 */ 100 /* Estimate on needed scratch: 101 S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8), 102 since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6 103 */ 104 105 void 106 mpn_toom8h_mul (mp_ptr pp, 107 mp_srcptr ap, mp_size_t an, 108 mp_srcptr bp, mp_size_t bn, mp_ptr scratch) 109 { 110 mp_size_t n, s, t; 111 int p, q, half; 112 int sign; 113 114 /***************************** decomposition *******************************/ 115 116 ASSERT (an >= bn); 117 /* Can not handle too small operands */ 118 ASSERT (bn >= 86); 119 /* Can not handle too much unbalancement */ 120 ASSERT (an <= bn*4); 121 ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11); 122 ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2); 123 ASSERT (GMP_NUMB_BITS > 9*3 || an*2 <= bn* 3); 124 125 /* Limit num/den is a rational number between 126 (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1)) */ 127 #define LIMIT_numerator (21) 128 #define LIMIT_denominat (20) 129 130 if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */ 131 { 132 half = 0; 133 n = 1 + ((an - 1)>>3); 134 p = q = 7; 135 s = an - 7 * n; 136 t = bn - 7 * n; 137 } 138 else 139 { 140 if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */ 141 { p = 9; q = 8; } 142 else if (GMP_NUMB_BITS <= 9*3 || 143 an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1)) 144 { p = 9; q = 7; } 145 else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */ 146 { p =10; q = 7; } 147 else if (GMP_NUMB_BITS <= 10*3 || 148 an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn) 149 { p =10; q = 6; } 150 else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/ 151 { p =11; q = 6; } 152 else if (GMP_NUMB_BITS <= 11*3 || 153 an * 4 < 9 * bn) 154 { p =11; q = 5; } 155 else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn) /* is 4*... <12*... */ 156 { p =12; q = 5; } 157 else if (GMP_NUMB_BITS <= 12*3 || 158 an * 9 < 28 * bn ) /* is 4*... <12*... */ 159 { p =12; q = 4; } 160 else 161 { p =13; q = 4; } 162 163 half = (p+q)&1; 164 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q); 165 p--; q--; 166 167 s = an - p * n; 168 t = bn - q * n; 169 170 if(half) { /* Recover from badly chosen splitting */ 171 if (UNLIKELY (s<1)) {p--; s+=n; half=0;} 172 else if (UNLIKELY (t<1)) {q--; t+=n; half=0;} 173 } 174 } 175 #undef LIMIT_numerator 176 #undef LIMIT_denominat 177 178 ASSERT (0 < s && s <= n); 179 ASSERT (0 < t && t <= n); 180 ASSERT (half || s + t > 3); 181 ASSERT (n > 2); 182 183 #define r6 (pp + 3 * n) /* 3n+1 */ 184 #define r4 (pp + 7 * n) /* 3n+1 */ 185 #define r2 (pp +11 * n) /* 3n+1 */ 186 #define r0 (pp +15 * n) /* s+t <= 2*n */ 187 #define r7 (scratch) /* 3n+1 */ 188 #define r5 (scratch + 3 * n + 1) /* 3n+1 */ 189 #define r3 (scratch + 6 * n + 2) /* 3n+1 */ 190 #define r1 (scratch + 9 * n + 3) /* 3n+1 */ 191 #define v0 (pp +11 * n) /* n+1 */ 192 #define v1 (pp +12 * n+1) /* n+1 */ 193 #define v2 (pp +13 * n+2) /* n+1 */ 194 #define v3 (scratch +12 * n + 4) /* n+1 */ 195 #define wsi (scratch +12 * n + 4) /* 3n+1 */ 196 #define wse (scratch +13 * n + 5) /* 2n+1 */ 197 198 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may 199 need all of them */ 200 /* if (scratch == NULL) */ 201 /* scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */ 202 ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn)); 203 ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8)); 204 205 /********************** evaluation and recursive calls *********************/ 206 207 /* $\pm1/8$ */ 208 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^ 209 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp); 210 /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */ 211 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse); 212 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half)); 213 214 /* $\pm1/4$ */ 215 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^ 216 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp); 217 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */ 218 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse); 219 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half)); 220 221 /* $\pm2$ */ 222 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^ 223 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp); 224 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */ 225 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse); 226 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2); 227 228 /* $\pm8$ */ 229 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^ 230 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp); 231 /* A(-8)*B(-8) */ /* A(+8)*B(+8) */ 232 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); 233 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6); 234 235 /* $\pm1/2$ */ 236 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^ 237 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp); 238 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */ 239 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse); 240 mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half); 241 242 /* $\pm1$ */ 243 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp); 244 if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3)) 245 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp); 246 else 247 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp); 248 /* A(-1)*B(-1) */ /* A(1)*B(1) */ 249 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse); 250 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0); 251 252 /* $\pm4$ */ 253 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^ 254 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp); 255 /* A(-4)*B(-4) */ /* A(+4)*B(+4) */ 256 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse); 257 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4); 258 259 #undef v0 260 #undef v1 261 #undef v2 262 #undef v3 263 #undef wse 264 265 /* A(0)*B(0) */ 266 TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi); 267 268 /* Infinity */ 269 if (UNLIKELY (half != 0)) { 270 if (s > t) { 271 TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi); 272 } else { 273 TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi); 274 }; 275 }; 276 277 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi); 278 279 #undef r0 280 #undef r1 281 #undef r2 282 #undef r3 283 #undef r4 284 #undef r5 285 #undef r6 286 #undef wsi 287 } 288 289 #undef TOOM8H_MUL_N_REC 290 #undef TOOM8H_MUL_REC 291 #undef MAYBE_mul_basecase 292 #undef MAYBE_mul_toom22 293 #undef MAYBE_mul_toom33 294 #undef MAYBE_mul_toom44 295 #undef MAYBE_mul_toom8h 296