xref: /openbsd-src/lib/libcrypto/bn/arch/aarch64/bn_arch.h (revision 8c374f8eb8bb9ae2675552b2215313ebacdb69f4)
1 /*	$OpenBSD: bn_arch.h,v 1.13 2023/07/24 10:21:29 jsing Exp $ */
2 /*
3  * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <openssl/bn.h>
19 
20 #ifndef HEADER_BN_ARCH_H
21 #define HEADER_BN_ARCH_H
22 
23 #ifndef OPENSSL_NO_ASM
24 
25 #if defined(__GNUC__)
26 
27 #define HAVE_BN_CLZW
28 
29 static inline int
bn_clzw(BN_ULONG w)30 bn_clzw(BN_ULONG w)
31 {
32 	BN_ULONG n;
33 
34 	__asm__ ("clz   %[n], %[w]"
35 	    : [n]"=r"(n)
36 	    : [w]"r"(w));
37 
38 	return n;
39 }
40 
41 #define HAVE_BN_ADDW
42 
43 static inline void
bn_addw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_r1,BN_ULONG * out_r0)44 bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
45 {
46 	BN_ULONG carry, r0;
47 
48 	__asm__ (
49 	    "adds  %[r0], %[a], %[b] \n"
50 	    "cset  %[carry], cs \n"
51 	    : [carry]"=r"(carry), [r0]"=r"(r0)
52 	    : [a]"r"(a), [b]"r"(b)
53 	    : "cc");
54 
55 	*out_r1 = carry;
56 	*out_r0 = r0;
57 }
58 
59 #define HAVE_BN_ADDW_ADDW
60 
61 static inline void
bn_addw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r1,BN_ULONG * out_r0)62 bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
63     BN_ULONG *out_r0)
64 {
65 	BN_ULONG carry, r0;
66 
67 	__asm__ (
68 	    "adds  %[r0], %[a], %[b] \n"
69 	    "cset  %[carry], cs \n"
70 	    "adds  %[r0], %[r0], %[c] \n"
71 	    "cinc  %[carry], %[carry], cs \n"
72 	    : [carry]"=&r"(carry), [r0]"=&r"(r0)
73 	    : [a]"r"(a), [b]"r"(b), [c]"r"(c)
74 	    : "cc");
75 
76 	*out_r1 = carry;
77 	*out_r0 = r0;
78 }
79 
80 #define HAVE_BN_QWADDQW
81 
82 static inline void
bn_qwaddqw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b3,BN_ULONG b2,BN_ULONG b1,BN_ULONG b0,BN_ULONG carry,BN_ULONG * out_carry,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)83 bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
84     BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
85     BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
86 {
87 	BN_ULONG r3, r2, r1, r0;
88 
89 	__asm__ (
90 	    "adds  xzr, %[carry], #-1 \n"
91 	    "adcs  %[r0], %[a0], %[b0] \n"
92 	    "adcs  %[r1], %[a1], %[b1] \n"
93 	    "adcs  %[r2], %[a2], %[b2] \n"
94 	    "adcs  %[r3], %[a3], %[b3] \n"
95 	    "cset  %[carry], cs \n"
96 	    : [carry]"+r"(carry), [r3]"=&r"(r3), [r2]"=&r"(r2),
97 		[r1]"=&r"(r1), [r0]"=&r"(r0)
98 	    : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
99 		[b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
100 	    : "cc");
101 
102 	*out_carry = carry;
103 	*out_r3 = r3;
104 	*out_r2 = r2;
105 	*out_r1 = r1;
106 	*out_r0 = r0;
107 }
108 
109 #define HAVE_BN_MULW
110 
111 static inline void
bn_mulw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_r1,BN_ULONG * out_r0)112 bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
113 {
114 	BN_ULONG r1, r0;
115 
116 	/* Unsigned multiplication using a umulh/mul pair. */
117 	__asm__ (
118 	    "umulh %[r1], %[a], %[b] \n"
119 	    "mul   %[r0], %[a], %[b] \n"
120 	    : [r1]"=&r"(r1), [r0]"=r"(r0)
121 	    : [a]"r"(a), [b]"r"(b));
122 
123 	*out_r1 = r1;
124 	*out_r0 = r0;
125 }
126 
127 #define HAVE_BN_MULW_ADDW
128 
129 static inline void
bn_mulw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r1,BN_ULONG * out_r0)130 bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
131     BN_ULONG *out_r0)
132 {
133 	BN_ULONG r1, r0;
134 
135 	__asm__ (
136 	    "umulh  %[r1], %[a], %[b] \n"
137 	    "mul    %[r0], %[a], %[b] \n"
138 	    "adds   %[r0], %[r0], %[c] \n"
139 	    "adc    %[r1], %[r1], xzr \n"
140 	    : [r1]"=&r"(r1), [r0]"=&r"(r0)
141 	    : [a]"r"(a), [b]"r"(b), [c]"r"(c)
142 	    : "cc");
143 
144 	*out_r1 = r1;
145 	*out_r0 = r0;
146 }
147 
148 #define HAVE_BN_MULW_ADDW_ADDW
149 
150 static inline void
bn_mulw_addw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG d,BN_ULONG * out_r1,BN_ULONG * out_r0)151 bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
152     BN_ULONG *out_r1, BN_ULONG *out_r0)
153 {
154 	BN_ULONG r1, r0;
155 
156 	__asm__ (
157 	    "umulh  %[r1], %[a], %[b] \n"
158 	    "mul    %[r0], %[a], %[b] \n"
159 	    "adds   %[r0], %[r0], %[c] \n"
160 	    "adc    %[r1], %[r1], xzr \n"
161 	    "adds   %[r0], %[r0], %[d] \n"
162 	    "adc    %[r1], %[r1], xzr \n"
163 	    : [r1]"=&r"(r1), [r0]"=&r"(r0)
164 	    : [a]"r"(a), [b]"r"(b), [c]"r"(c), [d]"r"(d)
165 	    : "cc");
166 
167 	*out_r1 = r1;
168 	*out_r0 = r0;
169 }
170 
171 #define HAVE_BN_MULW_ADDTW
172 
173 static inline void
bn_mulw_addtw(BN_ULONG a,BN_ULONG b,BN_ULONG c2,BN_ULONG c1,BN_ULONG c0,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)174 bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
175     BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
176 {
177 	BN_ULONG r2, r1, r0;
178 
179 	__asm__ (
180 	    "umulh  %[r1], %[a], %[b] \n"
181 	    "mul    %[r0], %[a], %[b] \n"
182 	    "adds   %[r0], %[r0], %[c0] \n"
183 	    "adcs   %[r1], %[r1], %[c1] \n"
184 	    "adc    %[r2], xzr, %[c2] \n"
185 	    : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0)
186 	    : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
187 	    : "cc");
188 
189 	*out_r2 = r2;
190 	*out_r1 = r1;
191 	*out_r0 = r0;
192 }
193 
194 #define HAVE_BN_MUL2_MULW_ADDTW
195 
196 static inline void
bn_mul2_mulw_addtw(BN_ULONG a,BN_ULONG b,BN_ULONG c2,BN_ULONG c1,BN_ULONG c0,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)197 bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
198     BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
199 {
200 	BN_ULONG r2, r1, r0, x1, x0;
201 
202 	__asm__ (
203 	    "umulh  %[x1], %[a], %[b] \n"
204 	    "mul    %[x0], %[a], %[b] \n"
205 	    "adds   %[r0], %[c0], %[x0] \n"
206 	    "adcs   %[r1], %[c1], %[x1] \n"
207 	    "adc    %[r2], xzr, %[c2] \n"
208 	    "adds   %[r0], %[r0], %[x0] \n"
209 	    "adcs   %[r1], %[r1], %[x1] \n"
210 	    "adc    %[r2], xzr, %[r2] \n"
211 	    : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0), [x1]"=&r"(x1),
212 		[x0]"=&r"(x0)
213 	    : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
214 	    : "cc");
215 
216 	*out_r2 = r2;
217 	*out_r1 = r1;
218 	*out_r0 = r0;
219 }
220 
221 #define HAVE_BN_QWMULW_ADDW
222 
223 static inline void
bn_qwmulw_addw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r4,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)224 bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
225     BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
226     BN_ULONG *out_r1, BN_ULONG *out_r0)
227 {
228 	BN_ULONG r4, r3, r2, r1, r0;
229 
230 	__asm__ (
231 	    "umulh  %[r1], %[a0], %[b] \n"
232 	    "mul    %[r0], %[a0], %[b] \n"
233 	    "adds   %[r0], %[r0], %[c] \n"
234 	    "umulh  %[r2], %[a1], %[b] \n"
235 	    "mul     %[c], %[a1], %[b] \n"
236 	    "adcs   %[r1], %[r1], %[c] \n"
237 	    "umulh  %[r3], %[a2], %[b] \n"
238 	    "mul     %[c], %[a2], %[b] \n"
239 	    "adcs   %[r2], %[r2], %[c] \n"
240 	    "umulh  %[r4], %[a3], %[b] \n"
241 	    "mul     %[c], %[a3], %[b] \n"
242 	    "adcs   %[r3], %[r3], %[c] \n"
243 	    "adc    %[r4], %[r4], xzr  \n"
244 	    : [c]"+&r"(c), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
245 		[r1]"=&r"(r1), [r0]"=&r"(r0)
246 	    : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b)
247 	    : "cc");
248 
249 	*out_r4 = r4;
250 	*out_r3 = r3;
251 	*out_r2 = r2;
252 	*out_r1 = r1;
253 	*out_r0 = r0;
254 }
255 
256 #define HAVE_BN_QWMULW_ADDQW_ADDW
257 
258 static inline void
bn_qwmulw_addqw_addw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b,BN_ULONG c3,BN_ULONG c2,BN_ULONG c1,BN_ULONG c0,BN_ULONG d,BN_ULONG * out_r4,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)259 bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
260     BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
261     BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
262     BN_ULONG *out_r0)
263 {
264 	BN_ULONG r4, r3, r2, r1, r0;
265 
266 	__asm__ (
267 	    "umulh  %[r1], %[a0], %[b]  \n"
268 	    "mul    %[r0], %[a0], %[b]  \n"
269 	    "adds   %[r0], %[r0], %[d]  \n"
270 	    "umulh  %[r2], %[a1], %[b]  \n"
271 	    "mul     %[d], %[a1], %[b]  \n"
272 	    "adcs   %[r1], %[r1], %[d]  \n"
273 	    "umulh  %[r3], %[a2], %[b]  \n"
274 	    "mul     %[d], %[a2], %[b]  \n"
275 	    "adcs   %[r2], %[r2], %[d]  \n"
276 	    "umulh  %[r4], %[a3], %[b]  \n"
277 	    "mul     %[d], %[a3], %[b]  \n"
278 	    "adcs   %[r3], %[r3], %[d]  \n"
279 	    "adc    %[r4], %[r4], xzr   \n"
280 	    "adds   %[r0], %[r0], %[c0] \n"
281 	    "adcs   %[r1], %[r1], %[c1] \n"
282 	    "adcs   %[r2], %[r2], %[c2] \n"
283 	    "adcs   %[r3], %[r3], %[c3] \n"
284 	    "adc    %[r4], %[r4], xzr   \n"
285 	    : [d]"+&r"(d), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
286 		[r1]"=&r"(r1), [r0]"=&r"(r0)
287 	    : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b),
288 		[c3]"r"(c3), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
289 	    : "cc");
290 
291 	*out_r4 = r4;
292 	*out_r3 = r3;
293 	*out_r2 = r2;
294 	*out_r1 = r1;
295 	*out_r0 = r0;
296 }
297 
298 #define HAVE_BN_SUBW
299 
300 static inline void
bn_subw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_borrow,BN_ULONG * out_r0)301 bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
302 {
303 	BN_ULONG borrow, r0;
304 
305 	__asm__ (
306 	    "subs  %[r0], %[a], %[b] \n"
307 	    "cset  %[borrow], cc \n"
308 	    : [borrow]"=r"(borrow), [r0]"=r"(r0)
309 	    : [a]"r"(a), [b]"r"(b)
310 	    : "cc");
311 
312 	*out_borrow = borrow;
313 	*out_r0 = r0;
314 }
315 
316 #define HAVE_BN_SUBW_SUBW
317 
318 static inline void
bn_subw_subw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_borrow,BN_ULONG * out_r0)319 bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
320     BN_ULONG *out_r0)
321 {
322 	BN_ULONG borrow, r0;
323 
324 	__asm__ (
325 	    "subs  %[r0], %[a], %[b] \n"
326 	    "cset  %[borrow], cc \n"
327 	    "subs  %[r0], %[r0], %[c] \n"
328 	    "cinc  %[borrow], %[borrow], cc \n"
329 	    : [borrow]"=&r"(borrow), [r0]"=&r"(r0)
330 	    : [a]"r"(a), [b]"r"(b), [c]"r"(c)
331 	    : "cc");
332 
333 	*out_borrow = borrow;
334 	*out_r0 = r0;
335 }
336 
337 #define HAVE_BN_QWSUBQW
338 
339 static inline void
bn_qwsubqw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b3,BN_ULONG b2,BN_ULONG b1,BN_ULONG b0,BN_ULONG borrow,BN_ULONG * out_borrow,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)340 bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
341     BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
342     BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
343 {
344 	BN_ULONG r3, r2, r1, r0;
345 
346 	__asm__ (
347 	    "subs  xzr, xzr, %[borrow] \n"
348 	    "sbcs  %[r0], %[a0], %[b0] \n"
349 	    "sbcs  %[r1], %[a1], %[b1] \n"
350 	    "sbcs  %[r2], %[a2], %[b2] \n"
351 	    "sbcs  %[r3], %[a3], %[b3] \n"
352 	    "cset  %[borrow], cc \n"
353 	    : [borrow]"+r"(borrow), [r3]"=&r"(r3), [r2]"=&r"(r2),
354 		[r1]"=&r"(r1), [r0]"=&r"(r0)
355 	    : [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
356 		[b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
357 	    : "cc");
358 
359 	*out_borrow = borrow;
360 	*out_r3 = r3;
361 	*out_r2 = r2;
362 	*out_r1 = r1;
363 	*out_r0 = r0;
364 }
365 
366 #endif /* __GNUC__ */
367 
368 #endif
369 #endif
370