1 /* mpn_toom3_sqr -- Square {ap,an}.
2
3 Contributed to the GNU project by Torbjorn Granlund.
4 Additional improvements by Marco Bodrato.
5
6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10 Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
11
12 This file is part of the GNU MP Library.
13
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
18
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22 License for more details.
23
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
26
27
28 #include "gmp.h"
29 #include "gmp-impl.h"
30
31 /* Evaluate in: -1, 0, +1, +2, +inf
32
33 <-s--><--n--><--n-->
34 ____ ______ ______
35 |_a2_|___a1_|___a0_|
36
37 v0 = a0 ^2 # A(0)^2
38 v1 = (a0+ a1+ a2)^2 # A(1)^2 ah <= 2
39 vm1 = (a0- a1+ a2)^2 # A(-1)^2 |ah| <= 1
40 v2 = (a0+2a1+4a2)^2 # A(2)^2 ah <= 6
41 vinf= a2 ^2 # A(inf)^2
42 */
43
44 #if TUNE_PROGRAM_BUILD
45 #define MAYBE_sqr_basecase 1
46 #define MAYBE_sqr_toom3 1
47 #else
48 #define MAYBE_sqr_basecase \
49 (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD)
50 #define MAYBE_sqr_toom3 \
51 (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD)
52 #endif
53
54 #define TOOM3_SQR_REC(p, a, n, ws) \
55 do { \
56 if (MAYBE_sqr_basecase \
57 && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \
58 mpn_sqr_basecase (p, a, n); \
59 else if (! MAYBE_sqr_toom3 \
60 || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)) \
61 mpn_toom2_sqr (p, a, n, ws); \
62 else \
63 mpn_toom3_sqr (p, a, n, ws); \
64 } while (0)
65
66 void
mpn_toom3_sqr(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_ptr scratch)67 mpn_toom3_sqr (mp_ptr pp,
68 mp_srcptr ap, mp_size_t an,
69 mp_ptr scratch)
70 {
71 mp_size_t n, s;
72 mp_limb_t cy, vinf0;
73 mp_ptr gp;
74 mp_ptr as1, asm1, as2;
75
76 #define a0 ap
77 #define a1 (ap + n)
78 #define a2 (ap + 2*n)
79
80 n = (an + 2) / (size_t) 3;
81
82 s = an - 2 * n;
83
84 ASSERT (0 < s && s <= n);
85
86 as1 = scratch + 4 * n + 4;
87 asm1 = scratch + 2 * n + 2;
88 as2 = pp + n + 1;
89
90 gp = scratch;
91
92 /* Compute as1 and asm1. */
93 cy = mpn_add (gp, a0, n, a2, s);
94 #if HAVE_NATIVE_mpn_add_n_sub_n
95 if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
96 {
97 cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
98 as1[n] = cy >> 1;
99 asm1[n] = 0;
100 }
101 else
102 {
103 mp_limb_t cy2;
104 cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
105 as1[n] = cy + (cy2 >> 1);
106 asm1[n] = cy - (cy2 & 1);
107 }
108 #else
109 as1[n] = cy + mpn_add_n (as1, gp, a1, n);
110 if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
111 {
112 mpn_sub_n (asm1, a1, gp, n);
113 asm1[n] = 0;
114 }
115 else
116 {
117 cy -= mpn_sub_n (asm1, gp, a1, n);
118 asm1[n] = cy;
119 }
120 #endif
121
122 /* Compute as2. */
123 #if HAVE_NATIVE_mpn_rsblsh1_n
124 cy = mpn_add_n (as2, a2, as1, s);
125 if (s != n)
126 cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
127 cy += as1[n];
128 cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
129 #else
130 #if HAVE_NATIVE_mpn_addlsh1_n
131 cy = mpn_addlsh1_n (as2, a1, a2, s);
132 if (s != n)
133 cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
134 cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
135 #else
136 cy = mpn_add_n (as2, a2, as1, s);
137 if (s != n)
138 cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
139 cy += as1[n];
140 cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
141 cy -= mpn_sub_n (as2, as2, a0, n);
142 #endif
143 #endif
144 as2[n] = cy;
145
146 ASSERT (as1[n] <= 2);
147 ASSERT (asm1[n] <= 1);
148
149 #define v0 pp /* 2n */
150 #define v1 (pp + 2 * n) /* 2n+1 */
151 #define vinf (pp + 4 * n) /* s+s */
152 #define vm1 scratch /* 2n+1 */
153 #define v2 (scratch + 2 * n + 1) /* 2n+2 */
154 #define scratch_out (scratch + 5 * n + 5)
155
156 /* vm1, 2n+1 limbs */
157 #ifdef SMALLER_RECURSION
158 TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
159 cy = 0;
160 if (asm1[n] != 0)
161 cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n);
162 if (asm1[n] != 0)
163 cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
164 vm1[2 * n] = cy;
165 #else
166 TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out);
167 #endif
168
169 TOOM3_SQR_REC (v2, as2, n + 1, scratch_out); /* v2, 2n+1 limbs */
170
171 TOOM3_SQR_REC (vinf, a2, s, scratch_out); /* vinf, s+s limbs */
172
173 vinf0 = vinf[0]; /* v1 overlaps with this */
174
175 #ifdef SMALLER_RECURSION
176 /* v1, 2n+1 limbs */
177 TOOM3_SQR_REC (v1, as1, n, scratch_out);
178 if (as1[n] == 1)
179 {
180 cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
181 }
182 else if (as1[n] != 0)
183 {
184 #if HAVE_NATIVE_mpn_addlsh1_n
185 cy = 2 * as1[n] + mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
186 #else
187 cy = 2 * as1[n] + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
188 #endif
189 }
190 else
191 cy = 0;
192 if (as1[n] == 1)
193 {
194 cy += mpn_add_n (v1 + n, v1 + n, as1, n);
195 }
196 else if (as1[n] != 0)
197 {
198 #if HAVE_NATIVE_mpn_addlsh1_n
199 cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
200 #else
201 cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
202 #endif
203 }
204 v1[2 * n] = cy;
205 #else
206 cy = vinf[1];
207 TOOM3_SQR_REC (v1, as1, n + 1, scratch_out);
208 vinf[1] = cy;
209 #endif
210
211 TOOM3_SQR_REC (v0, ap, n, scratch_out); /* v0, 2n limbs */
212
213 mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0);
214 }
215