1 /* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
22
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25
26
27 #include "gmp.h"
28 #include "gmp-impl.h"
29
30
31 #if GMP_NUMB_BITS < 21
32 #error Not implemented.
33 #endif
34
35
36 #if TUNE_PROGRAM_BUILD
37 #define MAYBE_sqr_basecase 1
38 #define MAYBE_sqr_above_basecase 1
39 #define MAYBE_sqr_toom2 1
40 #define MAYBE_sqr_above_toom2 1
41 #define MAYBE_sqr_toom3 1
42 #define MAYBE_sqr_above_toom3 1
43 #define MAYBE_sqr_above_toom4 1
44 #else
45 #ifdef SQR_TOOM8_THRESHOLD
46 #define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
47 #else
48 #define SQR_TOOM6_MAX \
49 ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ? \
50 ((SQR_FFT_THRESHOLD+6*2-1+5)/6) \
51 : MP_SIZE_T_MAX )
52 #endif
53 #define MAYBE_sqr_basecase \
54 (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
55 #define MAYBE_sqr_above_basecase \
56 (SQR_TOOM6_MAX >= SQR_TOOM2_THRESHOLD)
57 #define MAYBE_sqr_toom2 \
58 (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
59 #define MAYBE_sqr_above_toom2 \
60 (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
61 #define MAYBE_sqr_toom3 \
62 (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
63 #define MAYBE_sqr_above_toom3 \
64 (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
65 #define MAYBE_sqr_above_toom4 \
66 (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
67 #endif
68
69 #define TOOM6_SQR_REC(p, a, n, ws) \
70 do { \
71 if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \
72 || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) \
73 mpn_sqr_basecase (p, a, n); \
74 else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \
75 || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) \
76 mpn_toom2_sqr (p, a, n, ws); \
77 else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \
78 || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) \
79 mpn_toom3_sqr (p, a, n, ws); \
80 else if (! MAYBE_sqr_above_toom4 \
81 || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD)) \
82 mpn_toom4_sqr (p, a, n, ws); \
83 else \
84 mpn_toom6_sqr (p, a, n, ws); \
85 } while (0)
86
87 void
mpn_toom6_sqr(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_ptr scratch)88 mpn_toom6_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
89 {
90 mp_size_t n, s;
91
92 /***************************** decomposition *******************************/
93
94 ASSERT( an >= 18 );
95
96 n = 1 + (an - 1) / (size_t) 6;
97
98 s = an - 5 * n;
99
100 ASSERT (0 < s && s <= n);
101
102 #define r4 (pp + 3 * n) /* 3n+1 */
103 #define r2 (pp + 7 * n) /* 3n+1 */
104 #define r0 (pp +11 * n) /* s+t <= 2*n */
105 #define r5 (scratch) /* 3n+1 */
106 #define r3 (scratch + 3 * n + 1) /* 3n+1 */
107 #define r1 (scratch + 6 * n + 2) /* 3n+1 */
108 #define v0 (pp + 7 * n) /* n+1 */
109 #define v2 (pp + 9 * n+2) /* n+1 */
110 #define wse (scratch + 9 * n + 3) /* 3n+1 */
111
112 /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
113 need all of them, when DO_mpn_sublsh_n usea a scratch */
114 /* if (scratch== NULL) */
115 /* scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
116
117 /********************** evaluation and recursive calls *********************/
118 /* $\pm1/2$ */
119 mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
120 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
121 TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
122 mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
123
124 /* $\pm1$ */
125 mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s, pp);
126 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
127 TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
128 mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
129
130 /* $\pm4$ */
131 mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
132 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
133 TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
134 mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
135
136 /* $\pm1/4$ */
137 mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
138 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
139 TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
140 mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
141
142 /* $\pm2$ */
143 mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
144 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
145 TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
146 mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
147
148 #undef v0
149 #undef v2
150
151 /* A(0)*B(0) */
152 TOOM6_SQR_REC(pp, ap, n, wse);
153
154 mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
155
156 #undef r0
157 #undef r1
158 #undef r2
159 #undef r3
160 #undef r4
161 #undef r5
162
163 }
164 #undef TOOM6_SQR_REC
165 #undef MAYBE_sqr_basecase
166 #undef MAYBE_sqr_above_basecase
167 #undef MAYBE_sqr_toom2
168 #undef MAYBE_sqr_above_toom2
169 #undef MAYBE_sqr_toom3
170 #undef MAYBE_sqr_above_toom3
171 #undef MAYBE_sqr_above_toom4
172