xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/sqr_basecase.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpn_sqr_basecase -- Internal routine to square a natural number
2    of length n.
3 
4    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
5    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6 
7 
8 Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free
9 Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 #include "gmp-impl.h"
38 #include "longlong.h"
39 
40 
41 #if HAVE_NATIVE_mpn_sqr_diagonal
42 #define MPN_SQR_DIAGONAL(rp, up, n)					\
43   mpn_sqr_diagonal (rp, up, n)
44 #else
45 #define MPN_SQR_DIAGONAL(rp, up, n)					\
46   do {									\
47     mp_size_t _i;							\
48     for (_i = 0; _i < (n); _i++)					\
49       {									\
50 	mp_limb_t ul, lpl;						\
51 	ul = (up)[_i];							\
52 	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
53 	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
54       }									\
55   } while (0)
56 #endif
57 
58 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
59 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
60   mpn_sqr_diag_addlsh1 (rp, tp, up, n)
61 #else
62 #if HAVE_NATIVE_mpn_addlsh1_n
63 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
64   do {									\
65     mp_limb_t cy;							\
66     MPN_SQR_DIAGONAL (rp, up, n);					\
67     cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
68     rp[2 * n - 1] += cy;						\
69   } while (0)
70 #else
71 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
72   do {									\
73     mp_limb_t cy;							\
74     MPN_SQR_DIAGONAL (rp, up, n);					\
75     cy = mpn_lshift (tp, tp, 2 * n - 2, 1);				\
76     cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
77     rp[2 * n - 1] += cy;						\
78   } while (0)
79 #endif
80 #endif
81 
82 
83 #undef READY_WITH_mpn_sqr_basecase
84 
85 
86 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
87 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)88 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
89 {
90   mp_size_t i;
91   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
92   mp_ptr tp = tarr;
93   mp_limb_t cy;
94 
95   /* must fit 2*n limbs in tarr */
96   ASSERT (n <= SQR_TOOM2_THRESHOLD);
97 
98   if ((n & 1) != 0)
99     {
100       if (n == 1)
101 	{
102 	  mp_limb_t ul, lpl;
103 	  ul = up[0];
104 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
105 	  rp[0] = lpl >> GMP_NAIL_BITS;
106 	  return;
107 	}
108 
109       MPN_ZERO (tp, n);
110 
111       for (i = 0; i <= n - 2; i += 2)
112 	{
113 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
114 	  tp[n + i] = cy;
115 	}
116     }
117   else
118     {
119       if (n == 2)
120 	{
121 #if HAVE_NATIVE_mpn_mul_2
122 	  rp[3] = mpn_mul_2 (rp, up, 2, up);
123 #else
124 	  rp[0] = 0;
125 	  rp[1] = 0;
126 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
127 #endif
128 	  return;
129 	}
130 
131       MPN_ZERO (tp, n);
132 
133       for (i = 0; i <= n - 4; i += 2)
134 	{
135 	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
136 	  tp[n + i] = cy;
137 	}
138       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
139       tp[2 * n - 3] = cy;
140     }
141 
142   MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
143 }
144 #define READY_WITH_mpn_sqr_basecase
145 #endif
146 
147 
148 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
149 
150 /* mpn_sqr_basecase using plain mpn_addmul_2.
151 
152    This is tricky, since we have to let mpn_addmul_2 make some undesirable
153    multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
154    This forces us to conditionally add or subtract the mpn_sqr_diagonal
155    results.  Examples of the product we form:
156 
157    n = 4              n = 5		n = 6
158    u1u0 * u3u2u1      u1u0 * u4u3u2u1	u1u0 * u5u4u3u2u1
159    u2 * u3	      u3u2 * u4u3	u3u2 * u5u4u3
160 					u4 * u5
161    add: u0 u2 u3      add: u0 u2 u4	add: u0 u2 u4 u5
162    sub: u1	      sub: u1 u3	sub: u1 u3
163 */
164 
165 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)166 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
167 {
168   mp_size_t i;
169   mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
170   mp_ptr tp = tarr;
171   mp_limb_t cy;
172 
173   /* must fit 2*n limbs in tarr */
174   ASSERT (n <= SQR_TOOM2_THRESHOLD);
175 
176   if ((n & 1) != 0)
177     {
178       mp_limb_t x0, x1;
179 
180       if (n == 1)
181 	{
182 	  mp_limb_t ul, lpl;
183 	  ul = up[0];
184 	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
185 	  rp[0] = lpl >> GMP_NAIL_BITS;
186 	  return;
187 	}
188 
189       /* The code below doesn't like unnormalized operands.  Since such
190 	 operands are unusual, handle them with a dumb recursion.  */
191       if (up[n - 1] == 0)
192 	{
193 	  rp[2 * n - 2] = 0;
194 	  rp[2 * n - 1] = 0;
195 	  mpn_sqr_basecase (rp, up, n - 1);
196 	  return;
197 	}
198 
199       MPN_ZERO (tp, n);
200 
201       for (i = 0; i <= n - 2; i += 2)
202 	{
203 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
204 	  tp[n + i] = cy;
205 	}
206 
207       MPN_SQR_DIAGONAL (rp, up, n);
208 
209       for (i = 2;; i += 4)
210 	{
211 	  x0 = rp[i + 0];
212 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
213 	  x1 = rp[i + 1];
214 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
215 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
216 	  if (i + 4 >= 2 * n)
217 	    break;
218 	  mpn_incr_u (rp + i + 4, cy);
219 	}
220     }
221   else
222     {
223       mp_limb_t x0, x1;
224 
225       if (n == 2)
226 	{
227 #if HAVE_NATIVE_mpn_mul_2
228 	  rp[3] = mpn_mul_2 (rp, up, 2, up);
229 #else
230 	  rp[0] = 0;
231 	  rp[1] = 0;
232 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
233 #endif
234 	  return;
235 	}
236 
237       /* The code below doesn't like unnormalized operands.  Since such
238 	 operands are unusual, handle them with a dumb recursion.  */
239       if (up[n - 1] == 0)
240 	{
241 	  rp[2 * n - 2] = 0;
242 	  rp[2 * n - 1] = 0;
243 	  mpn_sqr_basecase (rp, up, n - 1);
244 	  return;
245 	}
246 
247       MPN_ZERO (tp, n);
248 
249       for (i = 0; i <= n - 4; i += 2)
250 	{
251 	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
252 	  tp[n + i] = cy;
253 	}
254       cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
255       tp[2 * n - 3] = cy;
256 
257       MPN_SQR_DIAGONAL (rp, up, n);
258 
259       for (i = 2;; i += 4)
260 	{
261 	  x0 = rp[i + 0];
262 	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
263 	  x1 = rp[i + 1];
264 	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
265 	  if (i + 6 >= 2 * n)
266 	    break;
267 	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
268 	  mpn_incr_u (rp + i + 4, cy);
269 	}
270       mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
271     }
272 
273 #if HAVE_NATIVE_mpn_addlsh1_n
274   cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
275 #else
276   cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
277   cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
278 #endif
279   rp[2 * n - 1] += cy;
280 }
281 #define READY_WITH_mpn_sqr_basecase
282 #endif
283 
284 
285 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1
286 
287 /* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack
288    allocation.  */
289 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)290 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
291 {
292   if (n == 1)
293     {
294       mp_limb_t ul, lpl;
295       ul = up[0];
296       umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
297       rp[0] = lpl >> GMP_NAIL_BITS;
298     }
299   else
300     {
301       mp_size_t i;
302       mp_ptr xp;
303 
304       rp += 1;
305       rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]);
306       for (i = n - 2; i != 0; i--)
307 	{
308 	  up += 1;
309 	  rp += 2;
310 	  rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]);
311 	}
312 
313       xp = rp - 2 * n + 3;
314       mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n);
315     }
316 }
317 #define READY_WITH_mpn_sqr_basecase
318 #endif
319 
320 
321 #if ! defined (READY_WITH_mpn_sqr_basecase)
322 
323 /* Default mpn_sqr_basecase using mpn_addmul_1.  */
324 void
mpn_sqr_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)325 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
326 {
327   mp_size_t i;
328 
329   ASSERT (n >= 1);
330   ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
331 
332   if (n == 1)
333     {
334       mp_limb_t ul, lpl;
335       ul = up[0];
336       umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
337       rp[0] = lpl >> GMP_NAIL_BITS;
338     }
339   else
340     {
341       mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
342       mp_ptr tp = tarr;
343       mp_limb_t cy;
344 
345       /* must fit 2*n limbs in tarr */
346       ASSERT (n <= SQR_TOOM2_THRESHOLD);
347 
348       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
349       tp[n - 1] = cy;
350       for (i = 2; i < n; i++)
351 	{
352 	  mp_limb_t cy;
353 	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
354 	  tp[n + i - 2] = cy;
355 	}
356 
357       MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
358     }
359 }
360 #define READY_WITH_mpn_sqr_basecase
361 #endif
362