1 /* $OpenBSD: bn_arch.h,v 1.13 2023/07/24 10:21:29 jsing Exp $ */
2 /*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <openssl/bn.h>
19
20 #ifndef HEADER_BN_ARCH_H
21 #define HEADER_BN_ARCH_H
22
23 #ifndef OPENSSL_NO_ASM
24
25 #if defined(__GNUC__)
26
27 #define HAVE_BN_CLZW
28
29 static inline int
bn_clzw(BN_ULONG w)30 bn_clzw(BN_ULONG w)
31 {
32 BN_ULONG n;
33
34 __asm__ ("clz %[n], %[w]"
35 : [n]"=r"(n)
36 : [w]"r"(w));
37
38 return n;
39 }
40
41 #define HAVE_BN_ADDW
42
43 static inline void
bn_addw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_r1,BN_ULONG * out_r0)44 bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
45 {
46 BN_ULONG carry, r0;
47
48 __asm__ (
49 "adds %[r0], %[a], %[b] \n"
50 "cset %[carry], cs \n"
51 : [carry]"=r"(carry), [r0]"=r"(r0)
52 : [a]"r"(a), [b]"r"(b)
53 : "cc");
54
55 *out_r1 = carry;
56 *out_r0 = r0;
57 }
58
59 #define HAVE_BN_ADDW_ADDW
60
61 static inline void
bn_addw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r1,BN_ULONG * out_r0)62 bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
63 BN_ULONG *out_r0)
64 {
65 BN_ULONG carry, r0;
66
67 __asm__ (
68 "adds %[r0], %[a], %[b] \n"
69 "cset %[carry], cs \n"
70 "adds %[r0], %[r0], %[c] \n"
71 "cinc %[carry], %[carry], cs \n"
72 : [carry]"=&r"(carry), [r0]"=&r"(r0)
73 : [a]"r"(a), [b]"r"(b), [c]"r"(c)
74 : "cc");
75
76 *out_r1 = carry;
77 *out_r0 = r0;
78 }
79
80 #define HAVE_BN_QWADDQW
81
82 static inline void
bn_qwaddqw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b3,BN_ULONG b2,BN_ULONG b1,BN_ULONG b0,BN_ULONG carry,BN_ULONG * out_carry,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)83 bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
84 BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
85 BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
86 {
87 BN_ULONG r3, r2, r1, r0;
88
89 __asm__ (
90 "adds xzr, %[carry], #-1 \n"
91 "adcs %[r0], %[a0], %[b0] \n"
92 "adcs %[r1], %[a1], %[b1] \n"
93 "adcs %[r2], %[a2], %[b2] \n"
94 "adcs %[r3], %[a3], %[b3] \n"
95 "cset %[carry], cs \n"
96 : [carry]"+r"(carry), [r3]"=&r"(r3), [r2]"=&r"(r2),
97 [r1]"=&r"(r1), [r0]"=&r"(r0)
98 : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
99 [b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
100 : "cc");
101
102 *out_carry = carry;
103 *out_r3 = r3;
104 *out_r2 = r2;
105 *out_r1 = r1;
106 *out_r0 = r0;
107 }
108
109 #define HAVE_BN_MULW
110
111 static inline void
bn_mulw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_r1,BN_ULONG * out_r0)112 bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
113 {
114 BN_ULONG r1, r0;
115
116 /* Unsigned multiplication using a umulh/mul pair. */
117 __asm__ (
118 "umulh %[r1], %[a], %[b] \n"
119 "mul %[r0], %[a], %[b] \n"
120 : [r1]"=&r"(r1), [r0]"=r"(r0)
121 : [a]"r"(a), [b]"r"(b));
122
123 *out_r1 = r1;
124 *out_r0 = r0;
125 }
126
127 #define HAVE_BN_MULW_ADDW
128
129 static inline void
bn_mulw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r1,BN_ULONG * out_r0)130 bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
131 BN_ULONG *out_r0)
132 {
133 BN_ULONG r1, r0;
134
135 __asm__ (
136 "umulh %[r1], %[a], %[b] \n"
137 "mul %[r0], %[a], %[b] \n"
138 "adds %[r0], %[r0], %[c] \n"
139 "adc %[r1], %[r1], xzr \n"
140 : [r1]"=&r"(r1), [r0]"=&r"(r0)
141 : [a]"r"(a), [b]"r"(b), [c]"r"(c)
142 : "cc");
143
144 *out_r1 = r1;
145 *out_r0 = r0;
146 }
147
148 #define HAVE_BN_MULW_ADDW_ADDW
149
150 static inline void
bn_mulw_addw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG d,BN_ULONG * out_r1,BN_ULONG * out_r0)151 bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
152 BN_ULONG *out_r1, BN_ULONG *out_r0)
153 {
154 BN_ULONG r1, r0;
155
156 __asm__ (
157 "umulh %[r1], %[a], %[b] \n"
158 "mul %[r0], %[a], %[b] \n"
159 "adds %[r0], %[r0], %[c] \n"
160 "adc %[r1], %[r1], xzr \n"
161 "adds %[r0], %[r0], %[d] \n"
162 "adc %[r1], %[r1], xzr \n"
163 : [r1]"=&r"(r1), [r0]"=&r"(r0)
164 : [a]"r"(a), [b]"r"(b), [c]"r"(c), [d]"r"(d)
165 : "cc");
166
167 *out_r1 = r1;
168 *out_r0 = r0;
169 }
170
171 #define HAVE_BN_MULW_ADDTW
172
173 static inline void
bn_mulw_addtw(BN_ULONG a,BN_ULONG b,BN_ULONG c2,BN_ULONG c1,BN_ULONG c0,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)174 bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
175 BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
176 {
177 BN_ULONG r2, r1, r0;
178
179 __asm__ (
180 "umulh %[r1], %[a], %[b] \n"
181 "mul %[r0], %[a], %[b] \n"
182 "adds %[r0], %[r0], %[c0] \n"
183 "adcs %[r1], %[r1], %[c1] \n"
184 "adc %[r2], xzr, %[c2] \n"
185 : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0)
186 : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
187 : "cc");
188
189 *out_r2 = r2;
190 *out_r1 = r1;
191 *out_r0 = r0;
192 }
193
194 #define HAVE_BN_MUL2_MULW_ADDTW
195
196 static inline void
bn_mul2_mulw_addtw(BN_ULONG a,BN_ULONG b,BN_ULONG c2,BN_ULONG c1,BN_ULONG c0,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)197 bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
198 BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
199 {
200 BN_ULONG r2, r1, r0, x1, x0;
201
202 __asm__ (
203 "umulh %[x1], %[a], %[b] \n"
204 "mul %[x0], %[a], %[b] \n"
205 "adds %[r0], %[c0], %[x0] \n"
206 "adcs %[r1], %[c1], %[x1] \n"
207 "adc %[r2], xzr, %[c2] \n"
208 "adds %[r0], %[r0], %[x0] \n"
209 "adcs %[r1], %[r1], %[x1] \n"
210 "adc %[r2], xzr, %[r2] \n"
211 : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0), [x1]"=&r"(x1),
212 [x0]"=&r"(x0)
213 : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
214 : "cc");
215
216 *out_r2 = r2;
217 *out_r1 = r1;
218 *out_r0 = r0;
219 }
220
221 #define HAVE_BN_QWMULW_ADDW
222
223 static inline void
bn_qwmulw_addw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r4,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)224 bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
225 BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
226 BN_ULONG *out_r1, BN_ULONG *out_r0)
227 {
228 BN_ULONG r4, r3, r2, r1, r0;
229
230 __asm__ (
231 "umulh %[r1], %[a0], %[b] \n"
232 "mul %[r0], %[a0], %[b] \n"
233 "adds %[r0], %[r0], %[c] \n"
234 "umulh %[r2], %[a1], %[b] \n"
235 "mul %[c], %[a1], %[b] \n"
236 "adcs %[r1], %[r1], %[c] \n"
237 "umulh %[r3], %[a2], %[b] \n"
238 "mul %[c], %[a2], %[b] \n"
239 "adcs %[r2], %[r2], %[c] \n"
240 "umulh %[r4], %[a3], %[b] \n"
241 "mul %[c], %[a3], %[b] \n"
242 "adcs %[r3], %[r3], %[c] \n"
243 "adc %[r4], %[r4], xzr \n"
244 : [c]"+&r"(c), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
245 [r1]"=&r"(r1), [r0]"=&r"(r0)
246 : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b)
247 : "cc");
248
249 *out_r4 = r4;
250 *out_r3 = r3;
251 *out_r2 = r2;
252 *out_r1 = r1;
253 *out_r0 = r0;
254 }
255
256 #define HAVE_BN_QWMULW_ADDQW_ADDW
257
258 static inline void
bn_qwmulw_addqw_addw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b,BN_ULONG c3,BN_ULONG c2,BN_ULONG c1,BN_ULONG c0,BN_ULONG d,BN_ULONG * out_r4,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)259 bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
260 BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
261 BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
262 BN_ULONG *out_r0)
263 {
264 BN_ULONG r4, r3, r2, r1, r0;
265
266 __asm__ (
267 "umulh %[r1], %[a0], %[b] \n"
268 "mul %[r0], %[a0], %[b] \n"
269 "adds %[r0], %[r0], %[d] \n"
270 "umulh %[r2], %[a1], %[b] \n"
271 "mul %[d], %[a1], %[b] \n"
272 "adcs %[r1], %[r1], %[d] \n"
273 "umulh %[r3], %[a2], %[b] \n"
274 "mul %[d], %[a2], %[b] \n"
275 "adcs %[r2], %[r2], %[d] \n"
276 "umulh %[r4], %[a3], %[b] \n"
277 "mul %[d], %[a3], %[b] \n"
278 "adcs %[r3], %[r3], %[d] \n"
279 "adc %[r4], %[r4], xzr \n"
280 "adds %[r0], %[r0], %[c0] \n"
281 "adcs %[r1], %[r1], %[c1] \n"
282 "adcs %[r2], %[r2], %[c2] \n"
283 "adcs %[r3], %[r3], %[c3] \n"
284 "adc %[r4], %[r4], xzr \n"
285 : [d]"+&r"(d), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
286 [r1]"=&r"(r1), [r0]"=&r"(r0)
287 : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b),
288 [c3]"r"(c3), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
289 : "cc");
290
291 *out_r4 = r4;
292 *out_r3 = r3;
293 *out_r2 = r2;
294 *out_r1 = r1;
295 *out_r0 = r0;
296 }
297
298 #define HAVE_BN_SUBW
299
300 static inline void
bn_subw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_borrow,BN_ULONG * out_r0)301 bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
302 {
303 BN_ULONG borrow, r0;
304
305 __asm__ (
306 "subs %[r0], %[a], %[b] \n"
307 "cset %[borrow], cc \n"
308 : [borrow]"=r"(borrow), [r0]"=r"(r0)
309 : [a]"r"(a), [b]"r"(b)
310 : "cc");
311
312 *out_borrow = borrow;
313 *out_r0 = r0;
314 }
315
316 #define HAVE_BN_SUBW_SUBW
317
318 static inline void
bn_subw_subw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_borrow,BN_ULONG * out_r0)319 bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
320 BN_ULONG *out_r0)
321 {
322 BN_ULONG borrow, r0;
323
324 __asm__ (
325 "subs %[r0], %[a], %[b] \n"
326 "cset %[borrow], cc \n"
327 "subs %[r0], %[r0], %[c] \n"
328 "cinc %[borrow], %[borrow], cc \n"
329 : [borrow]"=&r"(borrow), [r0]"=&r"(r0)
330 : [a]"r"(a), [b]"r"(b), [c]"r"(c)
331 : "cc");
332
333 *out_borrow = borrow;
334 *out_r0 = r0;
335 }
336
337 #define HAVE_BN_QWSUBQW
338
339 static inline void
bn_qwsubqw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b3,BN_ULONG b2,BN_ULONG b1,BN_ULONG b0,BN_ULONG borrow,BN_ULONG * out_borrow,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)340 bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
341 BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
342 BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
343 {
344 BN_ULONG r3, r2, r1, r0;
345
346 __asm__ (
347 "subs xzr, xzr, %[borrow] \n"
348 "sbcs %[r0], %[a0], %[b0] \n"
349 "sbcs %[r1], %[a1], %[b1] \n"
350 "sbcs %[r2], %[a2], %[b2] \n"
351 "sbcs %[r3], %[a3], %[b3] \n"
352 "cset %[borrow], cc \n"
353 : [borrow]"+r"(borrow), [r3]"=&r"(r3), [r2]"=&r"(r2),
354 [r1]"=&r"(r1), [r0]"=&r"(r0)
355 : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
356 [b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
357 : "cc");
358
359 *out_borrow = borrow;
360 *out_r3 = r3;
361 *out_r2 = r2;
362 *out_r1 = r1;
363 *out_r0 = r0;
364 }
365
366 #endif /* __GNUC__ */
367
368 #endif
369 #endif
370