186d7f5d3SJohn Marino /* mpn_sqr -- square natural numbers.
286d7f5d3SJohn Marino
386d7f5d3SJohn Marino Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
486d7f5d3SJohn Marino 2005, 2008, 2009 Free Software Foundation, Inc.
586d7f5d3SJohn Marino
686d7f5d3SJohn Marino This file is part of the GNU MP Library.
786d7f5d3SJohn Marino
886d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
986d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1086d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1186d7f5d3SJohn Marino option) any later version.
1286d7f5d3SJohn Marino
1386d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1486d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1586d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1686d7f5d3SJohn Marino License for more details.
1786d7f5d3SJohn Marino
1886d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
1986d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
2086d7f5d3SJohn Marino
2186d7f5d3SJohn Marino #include "gmp.h"
2286d7f5d3SJohn Marino #include "gmp-impl.h"
2386d7f5d3SJohn Marino #include "longlong.h"
2486d7f5d3SJohn Marino
2586d7f5d3SJohn Marino void
mpn_sqr(mp_ptr p,mp_srcptr a,mp_size_t n)2686d7f5d3SJohn Marino mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
2786d7f5d3SJohn Marino {
2886d7f5d3SJohn Marino ASSERT (n >= 1);
2986d7f5d3SJohn Marino ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
3086d7f5d3SJohn Marino
3186d7f5d3SJohn Marino if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
3286d7f5d3SJohn Marino { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
3386d7f5d3SJohn Marino mpn_mul_basecase (p, a, n, a, n);
3486d7f5d3SJohn Marino }
3586d7f5d3SJohn Marino else if (BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))
3686d7f5d3SJohn Marino {
3786d7f5d3SJohn Marino mpn_sqr_basecase (p, a, n);
3886d7f5d3SJohn Marino }
3986d7f5d3SJohn Marino else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
4086d7f5d3SJohn Marino {
4186d7f5d3SJohn Marino /* Allocate workspace of fixed size on stack: fast! */
4286d7f5d3SJohn Marino mp_limb_t ws[mpn_toom2_sqr_itch (SQR_TOOM3_THRESHOLD_LIMIT-1)];
4386d7f5d3SJohn Marino ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
4486d7f5d3SJohn Marino mpn_toom2_sqr (p, a, n, ws);
4586d7f5d3SJohn Marino }
4686d7f5d3SJohn Marino else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
4786d7f5d3SJohn Marino {
4886d7f5d3SJohn Marino mp_ptr ws;
4986d7f5d3SJohn Marino TMP_SDECL;
5086d7f5d3SJohn Marino TMP_SMARK;
5186d7f5d3SJohn Marino ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n));
5286d7f5d3SJohn Marino mpn_toom3_sqr (p, a, n, ws);
5386d7f5d3SJohn Marino TMP_SFREE;
5486d7f5d3SJohn Marino }
5586d7f5d3SJohn Marino else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))
5686d7f5d3SJohn Marino {
5786d7f5d3SJohn Marino mp_ptr ws;
5886d7f5d3SJohn Marino TMP_SDECL;
5986d7f5d3SJohn Marino TMP_SMARK;
6086d7f5d3SJohn Marino ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n));
6186d7f5d3SJohn Marino mpn_toom4_sqr (p, a, n, ws);
6286d7f5d3SJohn Marino TMP_SFREE;
6386d7f5d3SJohn Marino }
6486d7f5d3SJohn Marino else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
6586d7f5d3SJohn Marino {
6686d7f5d3SJohn Marino mp_ptr ws;
6786d7f5d3SJohn Marino TMP_SDECL;
6886d7f5d3SJohn Marino TMP_SMARK;
6986d7f5d3SJohn Marino ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
7086d7f5d3SJohn Marino mpn_toom6_sqr (p, a, n, ws);
7186d7f5d3SJohn Marino TMP_SFREE;
7286d7f5d3SJohn Marino }
7386d7f5d3SJohn Marino else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
7486d7f5d3SJohn Marino {
7586d7f5d3SJohn Marino mp_ptr ws;
7686d7f5d3SJohn Marino TMP_DECL;
7786d7f5d3SJohn Marino TMP_MARK;
7886d7f5d3SJohn Marino ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
7986d7f5d3SJohn Marino mpn_toom8_sqr (p, a, n, ws);
8086d7f5d3SJohn Marino TMP_FREE;
8186d7f5d3SJohn Marino }
8286d7f5d3SJohn Marino else
8386d7f5d3SJohn Marino {
8486d7f5d3SJohn Marino /* The current FFT code allocates its own space. That should probably
8586d7f5d3SJohn Marino change. */
8686d7f5d3SJohn Marino mpn_fft_mul (p, a, n, a, n);
8786d7f5d3SJohn Marino }
8886d7f5d3SJohn Marino }
89