xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/toom_interpolate_8pts.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 #include "gmp-impl.h"
38 
39 #define BINVERT_3 MODLIMB_INVERSE_3
40 
41 #define BINVERT_15 \
42   ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
43 
44 #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
45 
46 #ifndef mpn_divexact_by3
47 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
48 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
49 #else
50 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
51 #endif
52 #endif
53 
54 #ifndef mpn_divexact_by45
55 #if GMP_NUMB_BITS % 12 == 0
56 #define mpn_divexact_by45(dst,src,size) \
57   (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
58 #else
59 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
60 #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
61 #else
62 #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
63 #endif
64 #endif
65 #endif
66 
67 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
68 #define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
69 #else
70 #define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
71 #endif
72 
73 #if HAVE_NATIVE_mpn_sublsh_n
74 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
75 #else
76 static mp_limb_t
DO_mpn_sublsh_n(mp_ptr dst,mp_srcptr src,mp_size_t n,unsigned int s,mp_ptr ws)77 DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
78 {
79 #if USE_MUL_1 && 0
80   return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
81 #else
82   mp_limb_t __cy;
83   __cy = mpn_lshift (ws,src,n,s);
84   return __cy + mpn_sub_n (dst,dst,ws,n);
85 #endif
86 }
87 #endif
88 
89 
90 #if HAVE_NATIVE_mpn_subrsh
91 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
92 #else
93 /* This is not a correct definition, it assumes no carry */
94 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
95 do {									\
96   mp_limb_t __cy;							\
97   MPN_DECR_U (dst, nd, src[0] >> s);					\
98   __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
99   MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
100 } while (0)
101 #endif
102 
103 /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
104    points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
105    we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
106    degree 7 (or 6), given the 8 (rsp. 7) values:
107 
108      r1 = limit at infinity of f(x) / x^7,
109      r2 = f(4),
110      r3 = f(-4),
111      r4 = f(2),
112      r5 = f(-2),
113      r6 = f(1),
114      r7 = f(-1),
115      r8 = f(0).
116 
117    All couples of the form f(n),f(-n) must be already mixed with
118    toom_couple_handling(f(n),...,f(-n),...)
119 
120    The result is stored in {pp, spt + 7*n (or 6*n)}.
121    At entry, r8 is stored at {pp, 2n},
122    r5 is stored at {pp + 3n, 3n + 1}.
123 
124    The other values are 2n+... limbs each (with most significant limbs small).
125 
126    All intermediate results are positive.
127    Inputs are destroyed.
128 */
129 
130 void
mpn_toom_interpolate_8pts(mp_ptr pp,mp_size_t n,mp_ptr r3,mp_ptr r7,mp_size_t spt,mp_ptr ws)131 mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
132 			   mp_ptr r3, mp_ptr r7,
133 			   mp_size_t spt, mp_ptr ws)
134 {
135   mp_limb_signed_t cy;
136   mp_ptr r5, r1;
137   r5 = (pp + 3 * n);			/* 3n+1 */
138   r1 = (pp + 7 * n);			/* spt */
139 
140   /******************************* interpolation *****************************/
141 
142   DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
143   cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
144   MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
145 
146   DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
147   cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
148   MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
149 
150   r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
151   cy = mpn_sub_n (r7, r7, r1, spt);
152   MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
153 
154   ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
155   ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
156 
157   ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
158 
159   ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
160 
161   mpn_divexact_by45 (r3, r3, 3 * n + 1);
162 
163   ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
164 
165   ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
166 
167   /* last interpolation steps... */
168   /* ... are mixed with recomposition */
169 
170   /***************************** recomposition *******************************/
171   /*
172     pp[] prior to operations:
173      |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
174 
175     summation scheme for remaining operations:
176      |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
177      |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
178 	  ||_H r3|_M r3|_L*r3|
179 				  ||_H_r7|_M_r7|_L_r7|
180 		      ||-H r3|-M r3|-L*r3|
181 				  ||-H*r5|-M_r5|-L_r5|
182   */
183 
184   cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
185   cy-= mpn_sub_n (pp + n, pp + n, r5, n);
186   if (cy > 0) {
187     MPN_INCR_U (r7 + n, 2*n + 1, 1);
188     cy = 0;
189   }
190 
191   cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
192   MPN_DECR_U (r7 + 2*n, n + 1, cy);
193 
194   cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
195   r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
196   cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
197   if (UNLIKELY(0 > cy))
198     MPN_DECR_U (r5 + n + 1, 2*n, 1);
199   else
200     MPN_INCR_U (r5 + n + 1, 2*n, cy);
201 
202   ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
203 
204   cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
205   MPN_INCR_U (r3 + 2*n, n + 1, cy);
206   cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
207   if (LIKELY(spt != n))
208     MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
209   else
210     ASSERT (r3[3*n] + cy == 0);
211 }
212