1 /* Interpolation for the algorithm Toom-Cook 8.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010, 2012, 2015, 2020 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 38 #include "gmp-impl.h" 39 40 41 #if GMP_NUMB_BITS < 29 42 #error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB. 43 #endif 44 45 #if GMP_NUMB_BITS < 28 46 #error Not implemented: divexact_by188513325 and _by182712915 will not work. 47 #endif 48 49 50 /* FIXME: tuneup should decide the best variant */ 51 #ifndef AORSMUL_FASTER_AORS_AORSLSH 52 #define AORSMUL_FASTER_AORS_AORSLSH 1 53 #endif 54 #ifndef AORSMUL_FASTER_AORS_2AORSLSH 55 #define AORSMUL_FASTER_AORS_2AORSLSH 1 56 #endif 57 #ifndef AORSMUL_FASTER_2AORSLSH 58 #define AORSMUL_FASTER_2AORSLSH 1 59 #endif 60 #ifndef AORSMUL_FASTER_3AORSLSH 61 #define AORSMUL_FASTER_3AORSLSH 1 62 #endif 63 64 65 #if HAVE_NATIVE_mpn_sublsh_n 66 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s) 67 #else 68 static mp_limb_t 69 DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 70 { 71 #if USE_MUL_1 && 0 72 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s)); 73 #else 74 mp_limb_t __cy; 75 __cy = mpn_lshift(ws,src,n,s); 76 return __cy + mpn_sub_n(dst,dst,ws,n); 77 #endif 78 } 79 #endif 80 81 #if HAVE_NATIVE_mpn_addlsh_n 82 #define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s) 83 #else 84 #if !defined (AORSMUL_FASTER_2AORSLSH) && !defined (AORSMUL_FASTER_AORS_2AORSLSH) 85 static mp_limb_t 86 DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 87 { 88 #if USE_MUL_1 && 0 89 return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s)); 90 #else 91 mp_limb_t __cy; 92 __cy = mpn_lshift(ws,src,n,s); 93 return __cy + mpn_add_n(dst,dst,ws,n); 94 #endif 95 } 96 #endif 97 #endif 98 99 #if HAVE_NATIVE_mpn_subrsh 100 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s) 101 #else 102 /* FIXME: This is not a correct definition, it assumes no carry */ 103 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \ 104 do { \ 105 mp_limb_t __cy; \ 106 MPN_DECR_U (dst, nd, src[0] >> s); \ 107 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \ 108 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \ 109 } while (0) 110 #endif 111 112 113 #if GMP_NUMB_BITS < 43 114 #define BIT_CORRECTION 1 115 #define CORRECTION_BITS GMP_NUMB_BITS 116 #else 117 #define BIT_CORRECTION 0 118 #define CORRECTION_BITS 0 119 #endif 120 121 #define BINVERT_9 \ 122 ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39) 123 124 #define BINVERT_255 \ 125 (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8))) 126 127 /* FIXME: find some more general expressions for inverses */ 128 #if GMP_LIMB_BITS == 32 129 #define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x53E3771B)) 130 #define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0x9F314C35)) 131 #define BINVERT_182712915 (GMP_NUMB_MASK & CNST_LIMB(0x550659DB)) 132 #define BINVERT_188513325 (GMP_NUMB_MASK & CNST_LIMB(0xFBC333A5)) 133 #define BINVERT_255x182712915L (GMP_NUMB_MASK & CNST_LIMB(0x6FC4CB25)) 134 #define BINVERT_255x188513325L (GMP_NUMB_MASK & CNST_LIMB(0x6864275B)) 135 #if GMP_NAIL_BITS == 0 136 #define BINVERT_255x182712915H CNST_LIMB(0x1B649A07) 137 #define BINVERT_255x188513325H CNST_LIMB(0x06DB993A) 138 #else /* GMP_NAIL_BITS != 0 */ 139 #define BINVERT_255x182712915H \ 140 (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS))) 141 #define BINVERT_255x188513325H \ 142 (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS))) 143 #endif 144 #else 145 #if GMP_LIMB_BITS == 64 146 #define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x938CC70553E3771B)) 147 #define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0xE7B40D449F314C35)) 148 #define BINVERT_255x182712915 (GMP_NUMB_MASK & CNST_LIMB(0x1B649A076FC4CB25)) 149 #define BINVERT_255x188513325 (GMP_NUMB_MASK & CNST_LIMB(0x06DB993A6864275B)) 150 #endif 151 #endif 152 153 #ifndef mpn_divexact_by255 154 #if GMP_NUMB_BITS % 8 == 0 155 #define mpn_divexact_by255(dst,src,size) \ 156 (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255))) 157 #else 158 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 159 #define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0) 160 #else 161 #define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)) 162 #endif 163 #endif 164 #endif 165 166 #ifndef mpn_divexact_by255x4 167 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 168 #define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2) 169 #else 170 #define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2) 171 #endif 172 #endif 173 174 #ifndef mpn_divexact_by9x16 175 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 176 #define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4) 177 #else 178 #define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4) 179 #endif 180 #endif 181 182 #ifndef mpn_divexact_by42525x16 183 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525) 184 #define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4) 185 #else 186 #define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4) 187 #endif 188 #endif 189 190 #ifndef mpn_divexact_by2835x64 191 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835) 192 #define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6) 193 #else 194 #define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6) 195 #endif 196 #endif 197 198 #ifndef mpn_divexact_by255x182712915 199 #if GMP_NUMB_BITS < 36 200 #if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H) 201 /* FIXME: use mpn_bdiv_q_2_pi2 */ 202 #endif 203 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915) 204 #define mpn_divexact_by255x182712915(dst,src,size) \ 205 do { \ 206 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0); \ 207 mpn_divexact_by255(dst,dst,size); \ 208 } while(0) 209 #else 210 #define mpn_divexact_by255x182712915(dst,src,size) \ 211 do { \ 212 mpn_divexact_1(dst,src,size,CNST_LIMB(182712915)); \ 213 mpn_divexact_by255(dst,dst,size); \ 214 } while(0) 215 #endif 216 #else /* GMP_NUMB_BITS > 35 */ 217 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915) 218 #define mpn_divexact_by255x182712915(dst,src,size) \ 219 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0) 220 #else 221 #define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915)) 222 #endif 223 #endif /* GMP_NUMB_BITS >?< 36 */ 224 #endif 225 226 #ifndef mpn_divexact_by255x188513325 227 #if GMP_NUMB_BITS < 36 228 #if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H) 229 /* FIXME: use mpn_bdiv_q_1_pi2 */ 230 #endif 231 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325) 232 #define mpn_divexact_by255x188513325(dst,src,size) \ 233 do { \ 234 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0); \ 235 mpn_divexact_by255(dst,dst,size); \ 236 } while(0) 237 #else 238 #define mpn_divexact_by255x188513325(dst,src,size) \ 239 do { \ 240 mpn_divexact_1(dst,src,size,CNST_LIMB(188513325)); \ 241 mpn_divexact_by255(dst,dst,size); \ 242 } while(0) 243 #endif 244 #else /* GMP_NUMB_BITS > 35 */ 245 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325) 246 #define mpn_divexact_by255x188513325(dst,src,size) \ 247 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0) 248 #else 249 #define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325)) 250 #endif 251 #endif /* GMP_NUMB_BITS >?< 36 */ 252 #endif 253 254 /* Interpolation for Toom-8.5 (or Toom-8), using the evaluation 255 points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2, 256 +-1/8, 0. More precisely, we want to compute 257 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or 258 14), given the 16 (rsp. 15) values: 259 260 r0 = limit at infinity of f(x) / x^15, 261 r1 = f(8),f(-8), 262 r2 = f(4),f(-4), 263 r3 = f(2),f(-2), 264 r4 = f(1),f(-1), 265 r5 = f(1/4),f(-1/4), 266 r6 = f(1/2),f(-1/2), 267 r7 = f(1/8),f(-1/8), 268 r8 = f(0). 269 270 All couples of the form f(n),f(-n) must be already mixed with 271 toom_couple_handling(f(n),...,f(-n),...) 272 273 The result is stored in {pp, spt + 7*n (or 8*n)}. 274 At entry, r8 is stored at {pp, 2n}, 275 r6 is stored at {pp + 3n, 3n + 1}. 276 r4 is stored at {pp + 7n, 3n + 1}. 277 r2 is stored at {pp +11n, 3n + 1}. 278 r0 is stored at {pp +15n, spt}. 279 280 The other values are 3n+1 limbs each (with most significant limbs small). 281 282 Negative intermediate results are stored two-complemented. 283 Inputs are destroyed. 284 */ 285 286 void 287 mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7, 288 mp_size_t n, mp_size_t spt, int half, mp_ptr wsi) 289 { 290 mp_limb_t cy; 291 mp_size_t n3; 292 mp_size_t n3p1; 293 n3 = 3 * n; 294 n3p1 = n3 + 1; 295 296 #define r6 (pp + n3) /* 3n+1 */ 297 #define r4 (pp + 7 * n) /* 3n+1 */ 298 #define r2 (pp +11 * n) /* 3n+1 */ 299 #define r0 (pp +15 * n) /* s+t <= 2*n */ 300 301 ASSERT( spt <= 2 * n ); 302 /******************************* interpolation *****************************/ 303 if( half != 0) { 304 cy = mpn_sub_n (r4, r4, r0, spt); 305 MPN_DECR_U (r4 + spt, n3p1 - spt, cy); 306 307 cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi); 308 MPN_DECR_U (r3 + spt, n3p1 - spt, cy); 309 DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi); 310 311 cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi); 312 MPN_DECR_U (r2 + spt, n3p1 - spt, cy); 313 DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi); 314 315 cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi); 316 #if BIT_CORRECTION 317 cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION, 318 n3p1 - spt - BIT_CORRECTION, cy); 319 ASSERT (BIT_CORRECTION > 0 || cy == 0); 320 /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */ 321 cy = r7[n3p1]; 322 r7[n3p1] = 0x80; 323 #else 324 MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy); 325 #endif 326 DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi); 327 #if BIT_CORRECTION 328 /* FIXME: assumes r7[n3p1] is writable. */ 329 ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 ); 330 r7[n3p1] = cy; 331 #endif 332 }; 333 334 r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi); 335 DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi); 336 337 #if HAVE_NATIVE_mpn_add_n_sub_n 338 mpn_add_n_sub_n (r2, r5, r5, r2, n3p1); 339 #else 340 mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */ 341 ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1)); 342 MP_PTR_SWAP(r5, wsi); 343 #endif 344 345 r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi); 346 DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi); 347 348 #if HAVE_NATIVE_mpn_add_n_sub_n 349 mpn_add_n_sub_n (r3, r6, r6, r3, n3p1); 350 #else 351 ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1)); 352 mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */ 353 MP_PTR_SWAP(r3, wsi); 354 #endif 355 356 cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi); 357 #if BIT_CORRECTION 358 MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6); 359 cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi); 360 cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy); 361 ASSERT ( BIT_CORRECTION > 0 || cy != 0 ); 362 #else 363 r7[n3] -= cy; 364 DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi); 365 #endif 366 367 #if HAVE_NATIVE_mpn_add_n_sub_n 368 mpn_add_n_sub_n (r1, r7, r7, r1, n3p1); 369 #else 370 mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */ 371 mpn_add_n (r1, r1, r7, n3p1); /* if BIT_CORRECTION != 0, can give a carry. */ 372 MP_PTR_SWAP(r7, wsi); 373 #endif 374 375 r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n); 376 377 #if AORSMUL_FASTER_2AORSLSH 378 mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */ 379 #else 380 DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */ 381 DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */ 382 #endif 383 384 mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */ 385 #if AORSMUL_FASTER_3AORSLSH 386 mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */ 387 #else 388 DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */ 389 DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */ 390 DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */ 391 #endif 392 mpn_divexact_by255x188513325(r7, r7, n3p1); 393 394 mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */ 395 /* A division by 2835x64 follows. Warning: the operand can be negative! */ 396 mpn_divexact_by2835x64(r5, r5, n3p1); 397 if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0) 398 r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6)); 399 400 #if AORSMUL_FASTER_AORS_AORSLSH 401 mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */ 402 #else 403 mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */ 404 DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */ 405 #endif 406 #if AORSMUL_FASTER_2AORSLSH 407 mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */ 408 #else 409 DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */ 410 DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */ 411 #endif 412 /* A division by 255x4 follows. Warning: the operand can be negative! */ 413 mpn_divexact_by255x4(r6, r6, n3p1); 414 if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0) 415 r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2)); 416 417 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi)); 418 419 ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi)); 420 ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400)); 421 422 /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/ 423 DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi); 424 mpn_submul_1 (r1, r2, n3p1, 1428); 425 mpn_submul_1 (r1, r3, n3p1, 112896); 426 mpn_divexact_by255x182712915(r1, r1, n3p1); 427 428 ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425)); 429 mpn_divexact_by42525x16(r2, r2, n3p1); 430 431 #if AORSMUL_FASTER_AORS_2AORSLSH 432 ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969)); 433 #else 434 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1)); 435 ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi)); 436 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi)); 437 #endif 438 ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900)); 439 mpn_divexact_by9x16(r3, r3, n3p1); 440 441 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1)); 442 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1)); 443 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1)); 444 445 #ifdef HAVE_NATIVE_mpn_rsh1add_n 446 mpn_rsh1add_n (r6, r2, r6, n3p1); 447 r6 [n3p1 - 1] &= GMP_NUMB_MASK >> 1; 448 #else 449 mpn_add_n (r6, r2, r6, n3p1); 450 ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1)); 451 #endif 452 ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1)); 453 454 #ifdef HAVE_NATIVE_mpn_rsh1sub_n 455 mpn_rsh1sub_n (r5, r3, r5, n3p1); 456 r5 [n3p1 - 1] &= GMP_NUMB_MASK >> 1; 457 #else 458 mpn_sub_n (r5, r3, r5, n3p1); 459 ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1)); 460 #endif 461 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1)); 462 463 #ifdef HAVE_NATIVE_mpn_rsh1add_n 464 mpn_rsh1add_n (r7, r1, r7, n3p1); 465 r7 [n3p1 - 1] &= GMP_NUMB_MASK >> 1; 466 #else 467 mpn_add_n (r7, r1, r7, n3p1); 468 ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1)); 469 #endif 470 ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1)); 471 472 /* last interpolation steps... */ 473 /* ... could be mixed with recomposition 474 ||H-r7|M-r7|L-r7| ||H-r5|M-r5|L-r5| 475 */ 476 477 /***************************** recomposition *******************************/ 478 /* 479 pp[] prior to operations: 480 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp 481 482 summation scheme for remaining operations: 483 |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp 484 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp 485 ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5| ||H r7|M r7|L r7| 486 */ 487 488 cy = mpn_add_n (pp + n, pp + n, r7, n); 489 cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy); 490 #if HAVE_NATIVE_mpn_add_nc 491 cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy); 492 #else 493 MPN_INCR_U (r7 + 2 * n, n + 1, cy); 494 cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n); 495 #endif 496 MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy); 497 498 pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n); 499 cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]); 500 #if HAVE_NATIVE_mpn_add_nc 501 cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy); 502 #else 503 MPN_INCR_U (r5 + 2 * n, n + 1, cy); 504 cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n); 505 #endif 506 MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy); 507 508 pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n); 509 cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]); 510 #if HAVE_NATIVE_mpn_add_nc 511 cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy); 512 #else 513 MPN_INCR_U (r3 + 2 * n, n + 1, cy); 514 cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n); 515 #endif 516 MPN_INCR_U (pp +12 * n, 2 * n + 1, cy); 517 518 pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n); 519 if ( half ) { 520 cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]); 521 #if HAVE_NATIVE_mpn_add_nc 522 if(LIKELY(spt > n)) { 523 cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy); 524 MPN_INCR_U (pp + 16 * n, spt - n, cy); 525 } else { 526 ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy)); 527 } 528 #else 529 MPN_INCR_U (r1 + 2 * n, n + 1, cy); 530 if(LIKELY(spt > n)) { 531 cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n); 532 MPN_INCR_U (pp + 16 * n, spt - n, cy); 533 } else { 534 ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt)); 535 } 536 #endif 537 } else { 538 ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n])); 539 } 540 541 #undef r0 542 #undef r2 543 #undef r4 544 #undef r6 545 } 546