1 /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37 #include "gmp-impl.h"
38
39 #define BINVERT_3 MODLIMB_INVERSE_3
40
41 #define BINVERT_15 \
42 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
43
44 #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
45
46 #ifndef mpn_divexact_by3
47 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
48 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
49 #else
50 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
51 #endif
52 #endif
53
54 #ifndef mpn_divexact_by45
55 #if GMP_NUMB_BITS % 12 == 0
56 #define mpn_divexact_by45(dst,src,size) \
57 (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
58 #else
59 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
60 #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
61 #else
62 #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
63 #endif
64 #endif
65 #endif
66
67 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
68 #define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
69 #else
70 #define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
71 #endif
72
73 #if HAVE_NATIVE_mpn_sublsh_n
74 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
75 #else
76 static mp_limb_t
DO_mpn_sublsh_n(mp_ptr dst,mp_srcptr src,mp_size_t n,unsigned int s,mp_ptr ws)77 DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
78 {
79 #if USE_MUL_1 && 0
80 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
81 #else
82 mp_limb_t __cy;
83 __cy = mpn_lshift (ws,src,n,s);
84 return __cy + mpn_sub_n (dst,dst,ws,n);
85 #endif
86 }
87 #endif
88
89
90 #if HAVE_NATIVE_mpn_subrsh
91 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
92 #else
93 /* This is not a correct definition, it assumes no carry */
94 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
95 do { \
96 mp_limb_t __cy; \
97 MPN_DECR_U (dst, nd, src[0] >> s); \
98 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
99 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
100 } while (0)
101 #endif
102
103 /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
104 points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
105 we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
106 degree 7 (or 6), given the 8 (rsp. 7) values:
107
108 r1 = limit at infinity of f(x) / x^7,
109 r2 = f(4),
110 r3 = f(-4),
111 r4 = f(2),
112 r5 = f(-2),
113 r6 = f(1),
114 r7 = f(-1),
115 r8 = f(0).
116
117 All couples of the form f(n),f(-n) must be already mixed with
118 toom_couple_handling(f(n),...,f(-n),...)
119
120 The result is stored in {pp, spt + 7*n (or 6*n)}.
121 At entry, r8 is stored at {pp, 2n},
122 r5 is stored at {pp + 3n, 3n + 1}.
123
124 The other values are 2n+... limbs each (with most significant limbs small).
125
126 All intermediate results are positive.
127 Inputs are destroyed.
128 */
129
130 void
mpn_toom_interpolate_8pts(mp_ptr pp,mp_size_t n,mp_ptr r3,mp_ptr r7,mp_size_t spt,mp_ptr ws)131 mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
132 mp_ptr r3, mp_ptr r7,
133 mp_size_t spt, mp_ptr ws)
134 {
135 mp_limb_signed_t cy;
136 mp_ptr r5, r1;
137 r5 = (pp + 3 * n); /* 3n+1 */
138 r1 = (pp + 7 * n); /* spt */
139
140 /******************************* interpolation *****************************/
141
142 DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
143 cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
144 MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
145
146 DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
147 cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
148 MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
149
150 r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
151 cy = mpn_sub_n (r7, r7, r1, spt);
152 MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
153
154 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
155 ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
156
157 ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
158
159 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
160
161 mpn_divexact_by45 (r3, r3, 3 * n + 1);
162
163 ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
164
165 ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
166
167 /* last interpolation steps... */
168 /* ... are mixed with recomposition */
169
170 /***************************** recomposition *******************************/
171 /*
172 pp[] prior to operations:
173 |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
174
175 summation scheme for remaining operations:
176 |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
177 |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
178 ||_H r3|_M r3|_L*r3|
179 ||_H_r7|_M_r7|_L_r7|
180 ||-H r3|-M r3|-L*r3|
181 ||-H*r5|-M_r5|-L_r5|
182 */
183
184 cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
185 cy-= mpn_sub_n (pp + n, pp + n, r5, n);
186 if (cy > 0) {
187 MPN_INCR_U (r7 + n, 2*n + 1, 1);
188 cy = 0;
189 }
190
191 cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
192 MPN_DECR_U (r7 + 2*n, n + 1, cy);
193
194 cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
195 r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
196 cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
197 if (UNLIKELY(0 > cy))
198 MPN_DECR_U (r5 + n + 1, 2*n, 1);
199 else
200 MPN_INCR_U (r5 + n + 1, 2*n, cy);
201
202 ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
203
204 cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
205 MPN_INCR_U (r3 + 2*n, n + 1, cy);
206 cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
207 if (LIKELY(spt != n))
208 MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
209 else
210 ASSERT (r3[3*n] + cy == 0);
211 }
212