1 /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2011, 2012 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 #include "gmp-impl.h" 38 39 #define BINVERT_3 MODLIMB_INVERSE_3 40 41 #define BINVERT_15 \ 42 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15) 43 44 #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK) 45 46 #ifndef mpn_divexact_by3 47 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 48 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0) 49 #else 50 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3) 51 #endif 52 #endif 53 54 #ifndef mpn_divexact_by45 55 #if GMP_NUMB_BITS % 12 == 0 56 #define mpn_divexact_by45(dst,src,size) \ 57 (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45))) 58 #else 59 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 60 #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0) 61 #else 62 #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45) 63 #endif 64 #endif 65 #endif 66 67 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 68 #define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n) 69 #else 70 #define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws) 71 #endif 72 73 #if HAVE_NATIVE_mpn_sublsh_n 74 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s) 75 #else 76 static mp_limb_t 77 DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 78 { 79 #if USE_MUL_1 && 0 80 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s)); 81 #else 82 mp_limb_t __cy; 83 __cy = mpn_lshift (ws,src,n,s); 84 return __cy + mpn_sub_n (dst,dst,ws,n); 85 #endif 86 } 87 #endif 88 89 90 #if HAVE_NATIVE_mpn_subrsh 91 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s) 92 #else 93 /* This is not a correct definition, it assumes no carry */ 94 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \ 95 do { \ 96 mp_limb_t __cy; \ 97 MPN_DECR_U (dst, nd, src[0] >> s); \ 98 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \ 99 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \ 100 } while (0) 101 #endif 102 103 /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation 104 points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely, 105 we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of 106 degree 7 (or 6), given the 8 (rsp. 7) values: 107 108 r1 = limit at infinity of f(x) / x^7, 109 r2 = f(4), 110 r3 = f(-4), 111 r4 = f(2), 112 r5 = f(-2), 113 r6 = f(1), 114 r7 = f(-1), 115 r8 = f(0). 116 117 All couples of the form f(n),f(-n) must be already mixed with 118 toom_couple_handling(f(n),...,f(-n),...) 119 120 The result is stored in {pp, spt + 7*n (or 6*n)}. 121 At entry, r8 is stored at {pp, 2n}, 122 r5 is stored at {pp + 3n, 3n + 1}. 123 124 The other values are 2n+... limbs each (with most significant limbs small). 125 126 All intermediate results are positive. 127 Inputs are destroyed. 128 */ 129 130 void 131 mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n, 132 mp_ptr r3, mp_ptr r7, 133 mp_size_t spt, mp_ptr ws) 134 { 135 mp_limb_signed_t cy; 136 mp_ptr r5, r1; 137 r5 = (pp + 3 * n); /* 3n+1 */ 138 r1 = (pp + 7 * n); /* spt */ 139 140 /******************************* interpolation *****************************/ 141 142 DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws); 143 cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws); 144 MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy); 145 146 DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws); 147 cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws); 148 MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy); 149 150 r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n); 151 cy = mpn_sub_n (r7, r7, r1, spt); 152 MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy); 153 154 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1)); 155 ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2)); 156 157 ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1)); 158 159 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1)); 160 161 mpn_divexact_by45 (r3, r3, 3 * n + 1); 162 163 ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1)); 164 165 ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws)); 166 167 /* last interpolation steps... */ 168 /* ... are mixed with recomposition */ 169 170 /***************************** recomposition *******************************/ 171 /* 172 pp[] prior to operations: 173 |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp 174 175 summation scheme for remaining operations: 176 |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp 177 |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp 178 ||_H r3|_M r3|_L*r3| 179 ||_H_r7|_M_r7|_L_r7| 180 ||-H r3|-M r3|-L*r3| 181 ||-H*r5|-M_r5|-L_r5| 182 */ 183 184 cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */ 185 cy-= mpn_sub_n (pp + n, pp + n, r5, n); 186 if (cy > 0) { 187 MPN_INCR_U (r7 + n, 2*n + 1, 1); 188 cy = 0; 189 } 190 191 cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */ 192 MPN_DECR_U (r7 + 2*n, n + 1, cy); 193 194 cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */ 195 r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */ 196 cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */ 197 if (UNLIKELY(0 > cy)) 198 MPN_DECR_U (r5 + n + 1, 2*n, 1); 199 else 200 MPN_INCR_U (r5 + n + 1, 2*n, cy); 201 202 ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */ 203 204 cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]); 205 MPN_INCR_U (r3 + 2*n, n + 1, cy); 206 cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n); 207 if (LIKELY(spt != n)) 208 MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]); 209 else 210 ASSERT (r3[3*n] + cy == 0); 211 } 212