1 /* mpn_sqr_basecase -- Internal routine to square a natural number
2 of length n.
3
4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6
7
8 Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
9 2005, 2008 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
22
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25
26 #include "gmp.h"
27 #include "gmp-impl.h"
28 #include "longlong.h"
29
30
31 #if HAVE_NATIVE_mpn_sqr_diagonal
32 #define MPN_SQR_DIAGONAL(rp, up, n) \
33 mpn_sqr_diagonal (rp, up, n)
34 #else
35 #define MPN_SQR_DIAGONAL(rp, up, n) \
36 do { \
37 mp_size_t _i; \
38 for (_i = 0; _i < (n); _i++) \
39 { \
40 mp_limb_t ul, lpl; \
41 ul = (up)[_i]; \
42 umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
43 (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
44 } \
45 } while (0)
46 #endif
47
48
49 #undef READY_WITH_mpn_sqr_basecase
50
51
52 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
53 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)54 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
55 {
56 mp_size_t i;
57 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
58 mp_ptr tp = tarr;
59 mp_limb_t cy;
60
61 /* must fit 2*n limbs in tarr */
62 ASSERT (n <= SQR_TOOM2_THRESHOLD);
63
64 if ((n & 1) != 0)
65 {
66 if (n == 1)
67 {
68 mp_limb_t ul, lpl;
69 ul = up[0];
70 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
71 rp[0] = lpl >> GMP_NAIL_BITS;
72 return;
73 }
74
75 MPN_ZERO (tp, n);
76
77 for (i = 0; i <= n - 2; i += 2)
78 {
79 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
80 tp[n + i] = cy;
81 }
82 }
83 else
84 {
85 if (n == 2)
86 {
87 rp[0] = 0;
88 rp[1] = 0;
89 rp[3] = mpn_addmul_2 (rp, up, 2, up);
90 return;
91 }
92
93 MPN_ZERO (tp, n);
94
95 for (i = 0; i <= n - 4; i += 2)
96 {
97 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
98 tp[n + i] = cy;
99 }
100 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
101 tp[2 * n - 3] = cy;
102 }
103
104 MPN_SQR_DIAGONAL (rp, up, n);
105
106 #if HAVE_NATIVE_mpn_addlsh1_n
107 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
108 #else
109 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
110 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
111 #endif
112 rp[2 * n - 1] += cy;
113 }
114 #define READY_WITH_mpn_sqr_basecase
115 #endif
116
117
118 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
119
120 /* mpn_sqr_basecase using plain mpn_addmul_2.
121
122 This is tricky, since we have to let mpn_addmul_2 make some undesirable
123 multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
124 This forces us to conditionally add or subtract the mpn_sqr_diagonal
125 results. Examples of the product we form:
126
127 n = 4 n = 5 n = 6
128 u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
129 u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
130 u4 * u5
131 add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
132 sub: u1 sub: u1 u3 sub: u1 u3
133 */
134
135 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)136 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
137 {
138 mp_size_t i;
139 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
140 mp_ptr tp = tarr;
141 mp_limb_t cy;
142
143 /* must fit 2*n limbs in tarr */
144 ASSERT (n <= SQR_TOOM2_THRESHOLD);
145
146 if ((n & 1) != 0)
147 {
148 mp_limb_t x0, x1;
149
150 if (n == 1)
151 {
152 mp_limb_t ul, lpl;
153 ul = up[0];
154 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
155 rp[0] = lpl >> GMP_NAIL_BITS;
156 return;
157 }
158
159 /* The code below doesn't like unnormalized operands. Since such
160 operands are unusual, handle them with a dumb recursion. */
161 if (up[n - 1] == 0)
162 {
163 rp[2 * n - 2] = 0;
164 rp[2 * n - 1] = 0;
165 mpn_sqr_basecase (rp, up, n - 1);
166 return;
167 }
168
169 MPN_ZERO (tp, n);
170
171 for (i = 0; i <= n - 2; i += 2)
172 {
173 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
174 tp[n + i] = cy;
175 }
176
177 MPN_SQR_DIAGONAL (rp, up, n);
178
179 for (i = 2;; i += 4)
180 {
181 x0 = rp[i + 0];
182 rp[i + 0] = (-x0) & GMP_NUMB_MASK;
183 x1 = rp[i + 1];
184 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
185 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
186 if (i + 4 >= 2 * n)
187 break;
188 mpn_incr_u (rp + i + 4, cy);
189 }
190 }
191 else
192 {
193 mp_limb_t x0, x1;
194
195 if (n == 2)
196 {
197 rp[0] = 0;
198 rp[1] = 0;
199 rp[3] = mpn_addmul_2 (rp, up, 2, up);
200 return;
201 }
202
203 /* The code below doesn't like unnormalized operands. Since such
204 operands are unusual, handle them with a dumb recursion. */
205 if (up[n - 1] == 0)
206 {
207 rp[2 * n - 2] = 0;
208 rp[2 * n - 1] = 0;
209 mpn_sqr_basecase (rp, up, n - 1);
210 return;
211 }
212
213 MPN_ZERO (tp, n);
214
215 for (i = 0; i <= n - 4; i += 2)
216 {
217 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
218 tp[n + i] = cy;
219 }
220 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
221 tp[2 * n - 3] = cy;
222
223 MPN_SQR_DIAGONAL (rp, up, n);
224
225 for (i = 2;; i += 4)
226 {
227 x0 = rp[i + 0];
228 rp[i + 0] = (-x0) & GMP_NUMB_MASK;
229 x1 = rp[i + 1];
230 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
231 if (i + 6 >= 2 * n)
232 break;
233 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
234 mpn_incr_u (rp + i + 4, cy);
235 }
236 mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
237 }
238
239 #if HAVE_NATIVE_mpn_addlsh1_n
240 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
241 #else
242 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
243 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
244 #endif
245 rp[2 * n - 1] += cy;
246 }
247 #define READY_WITH_mpn_sqr_basecase
248 #endif
249
250
251 #if ! defined (READY_WITH_mpn_sqr_basecase)
252
253 /* Default mpn_sqr_basecase using mpn_addmul_1. */
254
255 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)256 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
257 {
258 mp_size_t i;
259
260 ASSERT (n >= 1);
261 ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
262
263 {
264 mp_limb_t ul, lpl;
265 ul = up[0];
266 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
267 rp[0] = lpl >> GMP_NAIL_BITS;
268 }
269 if (n > 1)
270 {
271 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
272 mp_ptr tp = tarr;
273 mp_limb_t cy;
274
275 /* must fit 2*n limbs in tarr */
276 ASSERT (n <= SQR_TOOM2_THRESHOLD);
277
278 cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
279 tp[n - 1] = cy;
280 for (i = 2; i < n; i++)
281 {
282 mp_limb_t cy;
283 cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
284 tp[n + i - 2] = cy;
285 }
286 MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
287
288 {
289 mp_limb_t cy;
290 #if HAVE_NATIVE_mpn_addlsh1_n
291 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
292 #else
293 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
294 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
295 #endif
296 rp[2 * n - 1] += cy;
297 }
298 }
299 }
300 #endif
301