1 /* mpn_sqr_basecase -- Internal routine to square a natural number
2 of length n.
3
4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6
7
8 Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free
9 Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37 #include "gmp-impl.h"
38 #include "longlong.h"
39
40
41 #if HAVE_NATIVE_mpn_sqr_diagonal
42 #define MPN_SQR_DIAGONAL(rp, up, n) \
43 mpn_sqr_diagonal (rp, up, n)
44 #else
45 #define MPN_SQR_DIAGONAL(rp, up, n) \
46 do { \
47 mp_size_t _i; \
48 for (_i = 0; _i < (n); _i++) \
49 { \
50 mp_limb_t ul, lpl; \
51 ul = (up)[_i]; \
52 umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
53 (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
54 } \
55 } while (0)
56 #endif
57
58 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
59 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
60 mpn_sqr_diag_addlsh1 (rp, tp, up, n)
61 #else
62 #if HAVE_NATIVE_mpn_addlsh1_n
63 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
64 do { \
65 mp_limb_t cy; \
66 MPN_SQR_DIAGONAL (rp, up, n); \
67 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \
68 rp[2 * n - 1] += cy; \
69 } while (0)
70 #else
71 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
72 do { \
73 mp_limb_t cy; \
74 MPN_SQR_DIAGONAL (rp, up, n); \
75 cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \
76 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \
77 rp[2 * n - 1] += cy; \
78 } while (0)
79 #endif
80 #endif
81
82
83 #undef READY_WITH_mpn_sqr_basecase
84
85
86 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
87 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)88 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
89 {
90 mp_size_t i;
91 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
92 mp_ptr tp = tarr;
93 mp_limb_t cy;
94
95 /* must fit 2*n limbs in tarr */
96 ASSERT (n <= SQR_TOOM2_THRESHOLD);
97
98 if ((n & 1) != 0)
99 {
100 if (n == 1)
101 {
102 mp_limb_t ul, lpl;
103 ul = up[0];
104 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
105 rp[0] = lpl >> GMP_NAIL_BITS;
106 return;
107 }
108
109 MPN_ZERO (tp, n);
110
111 for (i = 0; i <= n - 2; i += 2)
112 {
113 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
114 tp[n + i] = cy;
115 }
116 }
117 else
118 {
119 if (n == 2)
120 {
121 #if HAVE_NATIVE_mpn_mul_2
122 rp[3] = mpn_mul_2 (rp, up, 2, up);
123 #else
124 rp[0] = 0;
125 rp[1] = 0;
126 rp[3] = mpn_addmul_2 (rp, up, 2, up);
127 #endif
128 return;
129 }
130
131 MPN_ZERO (tp, n);
132
133 for (i = 0; i <= n - 4; i += 2)
134 {
135 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
136 tp[n + i] = cy;
137 }
138 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
139 tp[2 * n - 3] = cy;
140 }
141
142 MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
143 }
144 #define READY_WITH_mpn_sqr_basecase
145 #endif
146
147
148 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
149
150 /* mpn_sqr_basecase using plain mpn_addmul_2.
151
152 This is tricky, since we have to let mpn_addmul_2 make some undesirable
153 multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
154 This forces us to conditionally add or subtract the mpn_sqr_diagonal
155 results. Examples of the product we form:
156
157 n = 4 n = 5 n = 6
158 u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
159 u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
160 u4 * u5
161 add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
162 sub: u1 sub: u1 u3 sub: u1 u3
163 */
164
165 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)166 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
167 {
168 mp_size_t i;
169 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
170 mp_ptr tp = tarr;
171 mp_limb_t cy;
172
173 /* must fit 2*n limbs in tarr */
174 ASSERT (n <= SQR_TOOM2_THRESHOLD);
175
176 if ((n & 1) != 0)
177 {
178 mp_limb_t x0, x1;
179
180 if (n == 1)
181 {
182 mp_limb_t ul, lpl;
183 ul = up[0];
184 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
185 rp[0] = lpl >> GMP_NAIL_BITS;
186 return;
187 }
188
189 /* The code below doesn't like unnormalized operands. Since such
190 operands are unusual, handle them with a dumb recursion. */
191 if (up[n - 1] == 0)
192 {
193 rp[2 * n - 2] = 0;
194 rp[2 * n - 1] = 0;
195 mpn_sqr_basecase (rp, up, n - 1);
196 return;
197 }
198
199 MPN_ZERO (tp, n);
200
201 for (i = 0; i <= n - 2; i += 2)
202 {
203 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
204 tp[n + i] = cy;
205 }
206
207 MPN_SQR_DIAGONAL (rp, up, n);
208
209 for (i = 2;; i += 4)
210 {
211 x0 = rp[i + 0];
212 rp[i + 0] = (-x0) & GMP_NUMB_MASK;
213 x1 = rp[i + 1];
214 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
215 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
216 if (i + 4 >= 2 * n)
217 break;
218 mpn_incr_u (rp + i + 4, cy);
219 }
220 }
221 else
222 {
223 mp_limb_t x0, x1;
224
225 if (n == 2)
226 {
227 #if HAVE_NATIVE_mpn_mul_2
228 rp[3] = mpn_mul_2 (rp, up, 2, up);
229 #else
230 rp[0] = 0;
231 rp[1] = 0;
232 rp[3] = mpn_addmul_2 (rp, up, 2, up);
233 #endif
234 return;
235 }
236
237 /* The code below doesn't like unnormalized operands. Since such
238 operands are unusual, handle them with a dumb recursion. */
239 if (up[n - 1] == 0)
240 {
241 rp[2 * n - 2] = 0;
242 rp[2 * n - 1] = 0;
243 mpn_sqr_basecase (rp, up, n - 1);
244 return;
245 }
246
247 MPN_ZERO (tp, n);
248
249 for (i = 0; i <= n - 4; i += 2)
250 {
251 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
252 tp[n + i] = cy;
253 }
254 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
255 tp[2 * n - 3] = cy;
256
257 MPN_SQR_DIAGONAL (rp, up, n);
258
259 for (i = 2;; i += 4)
260 {
261 x0 = rp[i + 0];
262 rp[i + 0] = (-x0) & GMP_NUMB_MASK;
263 x1 = rp[i + 1];
264 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
265 if (i + 6 >= 2 * n)
266 break;
267 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
268 mpn_incr_u (rp + i + 4, cy);
269 }
270 mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
271 }
272
273 #if HAVE_NATIVE_mpn_addlsh1_n
274 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
275 #else
276 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
277 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
278 #endif
279 rp[2 * n - 1] += cy;
280 }
281 #define READY_WITH_mpn_sqr_basecase
282 #endif
283
284
285 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1
286
287 /* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack
288 allocation. */
289 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)290 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
291 {
292 if (n == 1)
293 {
294 mp_limb_t ul, lpl;
295 ul = up[0];
296 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
297 rp[0] = lpl >> GMP_NAIL_BITS;
298 }
299 else
300 {
301 mp_size_t i;
302 mp_ptr xp;
303
304 rp += 1;
305 rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]);
306 for (i = n - 2; i != 0; i--)
307 {
308 up += 1;
309 rp += 2;
310 rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]);
311 }
312
313 xp = rp - 2 * n + 3;
314 mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n);
315 }
316 }
317 #define READY_WITH_mpn_sqr_basecase
318 #endif
319
320
321 #if ! defined (READY_WITH_mpn_sqr_basecase)
322
323 /* Default mpn_sqr_basecase using mpn_addmul_1. */
324 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)325 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
326 {
327 mp_size_t i;
328
329 ASSERT (n >= 1);
330 ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
331
332 if (n == 1)
333 {
334 mp_limb_t ul, lpl;
335 ul = up[0];
336 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
337 rp[0] = lpl >> GMP_NAIL_BITS;
338 }
339 else
340 {
341 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
342 mp_ptr tp = tarr;
343 mp_limb_t cy;
344
345 /* must fit 2*n limbs in tarr */
346 ASSERT (n <= SQR_TOOM2_THRESHOLD);
347
348 cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
349 tp[n - 1] = cy;
350 for (i = 2; i < n; i++)
351 {
352 mp_limb_t cy;
353 cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
354 tp[n + i - 2] = cy;
355 }
356
357 MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
358 }
359 }
360 #define READY_WITH_mpn_sqr_basecase
361 #endif
362