xref: /openbsd-src/lib/libcrypto/bn/bn_internal.h (revision 05c892ee32b66778bad594d80ab6bb86e6972cc4)
1 /*	$OpenBSD: bn_internal.h,v 1.15 2023/06/25 11:42:26 jsing Exp $ */
2 /*
3  * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <openssl/bn.h>
19 
20 #include "bn_arch.h"
21 
22 #ifndef HEADER_BN_INTERNAL_H
23 #define HEADER_BN_INTERNAL_H
24 
25 int bn_word_clz(BN_ULONG w);
26 
27 int bn_bitsize(const BIGNUM *bn);
28 
29 #ifndef HAVE_BN_CT_NE_ZERO
30 static inline int
bn_ct_ne_zero(BN_ULONG w)31 bn_ct_ne_zero(BN_ULONG w)
32 {
33 	return (w | ~(w - 1)) >> (BN_BITS2 - 1);
34 }
35 #endif
36 
37 #ifndef HAVE_BN_CT_NE_ZERO_MASK
38 static inline BN_ULONG
bn_ct_ne_zero_mask(BN_ULONG w)39 bn_ct_ne_zero_mask(BN_ULONG w)
40 {
41 	return 0 - bn_ct_ne_zero(w);
42 }
43 #endif
44 
45 #ifndef HAVE_BN_CT_EQ_ZERO
46 static inline int
bn_ct_eq_zero(BN_ULONG w)47 bn_ct_eq_zero(BN_ULONG w)
48 {
49 	return 1 - bn_ct_ne_zero(w);
50 }
51 #endif
52 
53 #ifndef HAVE_BN_CT_EQ_ZERO_MASK
54 static inline BN_ULONG
bn_ct_eq_zero_mask(BN_ULONG w)55 bn_ct_eq_zero_mask(BN_ULONG w)
56 {
57 	return 0 - bn_ct_eq_zero(w);
58 }
59 #endif
60 
61 #ifndef HAVE_BN_CLZW
62 static inline int
bn_clzw(BN_ULONG w)63 bn_clzw(BN_ULONG w)
64 {
65 	return bn_word_clz(w);
66 }
67 #endif
68 
69 /*
70  * Big number primitives are named as the operation followed by a suffix
71  * that indicates the number of words that it operates on, where 'w' means
72  * single word, 'dw' means double word, 'tw' means triple word and 'qw' means
73  * quadruple word. Unless otherwise noted, the size of the output is implied
74  * based on its inputs, for example bn_mulw() takes two single word inputs
75  * and is going to produce a double word result.
76  *
77  * Where a function implements multiple operations, these are listed in order.
78  * For example, a function that computes (r1:r0) = a * b + c is named
79  * bn_mulw_addw(), producing a double word result.
80  */
81 
82 /*
83  * Default implementations for BN_ULLONG architectures.
84  *
85  * On these platforms the C compiler is generally better at optimising without
86  * the use of inline assembly primitives. However, it can be difficult for the
87  * compiler to see through primitives in order to combine operations, due to
88  * type changes/narrowing. For this reason compound primitives are usually
89  * explicitly provided.
90  */
91 #ifdef BN_ULLONG
92 
93 #ifndef HAVE_BN_ADDW
94 #define HAVE_BN_ADDW
95 static inline void
bn_addw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_r1,BN_ULONG * out_r0)96 bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
97 {
98 	BN_ULLONG r;
99 
100 	r = (BN_ULLONG)a + (BN_ULLONG)b;
101 
102 	*out_r1 = r >> BN_BITS2;
103 	*out_r0 = r & BN_MASK2;
104 }
105 #endif
106 
107 #ifndef HAVE_BN_ADDW_ADDW
108 #define HAVE_BN_ADDW_ADDW
109 static inline void
bn_addw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r1,BN_ULONG * out_r0)110 bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
111     BN_ULONG *out_r0)
112 {
113 	BN_ULLONG r;
114 
115 	r = (BN_ULLONG)a + (BN_ULLONG)b + (BN_ULLONG)c;
116 
117 	*out_r1 = r >> BN_BITS2;
118 	*out_r0 = r & BN_MASK2;
119 }
120 #endif
121 
122 #ifndef HAVE_BN_MULW
123 #define HAVE_BN_MULW
124 static inline void
bn_mulw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_r1,BN_ULONG * out_r0)125 bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
126 {
127 	BN_ULLONG r;
128 
129 	r = (BN_ULLONG)a * (BN_ULLONG)b;
130 
131 	*out_r1 = r >> BN_BITS2;
132 	*out_r0 = r & BN_MASK2;
133 }
134 #endif
135 
136 #ifndef HAVE_BN_MULW_ADDW
137 #define HAVE_BN_MULW_ADDW
138 static inline void
bn_mulw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r1,BN_ULONG * out_r0)139 bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
140     BN_ULONG *out_r0)
141 {
142 	BN_ULLONG r;
143 
144 	r = (BN_ULLONG)a * (BN_ULLONG)b + (BN_ULLONG)c;
145 
146 	*out_r1 = r >> BN_BITS2;
147 	*out_r0 = r & BN_MASK2;
148 }
149 #endif
150 
151 #ifndef HAVE_BN_MULW_ADDW_ADDW
152 #define HAVE_BN_MULW_ADDW_ADDW
153 static inline void
bn_mulw_addw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG d,BN_ULONG * out_r1,BN_ULONG * out_r0)154 bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
155     BN_ULONG *out_r1, BN_ULONG *out_r0)
156 {
157 	BN_ULLONG r;
158 
159 	r = (BN_ULLONG)a * (BN_ULLONG)b + (BN_ULLONG)c + (BN_ULLONG)d;
160 
161 	*out_r1 = r >> BN_BITS2;
162 	*out_r0 = r & BN_MASK2;
163 }
164 #endif
165 
166 #endif /* !BN_ULLONG */
167 
168 /*
169  * bn_addw() computes (r1:r0) = a + b, where both inputs are single words,
170  * producing a double word result. The value of r1 is the carry from the
171  * addition.
172  */
173 #ifndef HAVE_BN_ADDW
174 static inline void
bn_addw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_r1,BN_ULONG * out_r0)175 bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
176 {
177 	BN_ULONG r1, r0, c1, c2;
178 
179 	c1 = a | b;
180 	c2 = a & b;
181 	r0 = a + b;
182 	r1 = ((c1 & ~r0) | c2) >> (BN_BITS2 - 1); /* carry */
183 
184 	*out_r1 = r1;
185 	*out_r0 = r0;
186 }
187 #endif
188 
189 /*
190  * bn_addw_addw() computes (r1:r0) = a + b + c, where all inputs are single
191  * words, producing a double word result.
192  */
193 #ifndef HAVE_BN_ADDW_ADDW
194 static inline void
bn_addw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r1,BN_ULONG * out_r0)195 bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
196     BN_ULONG *out_r0)
197 {
198 	BN_ULONG carry, r1, r0;
199 
200 	bn_addw(a, b, &r1, &r0);
201 	bn_addw(r0, c, &carry, &r0);
202 	r1 += carry;
203 
204 	*out_r1 = r1;
205 	*out_r0 = r0;
206 }
207 #endif
208 
209 /*
210  * bn_qwaddqw() computes
211  * (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) + (b3:b2:b1:b0) + carry, where a is a quad word,
212  * b is a quad word, and carry is a single word with value 0 or 1, producing a four
213  * word result and carry.
214  */
215 #ifndef HAVE_BN_QWADDQW
216 static inline void
bn_qwaddqw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b3,BN_ULONG b2,BN_ULONG b1,BN_ULONG b0,BN_ULONG carry,BN_ULONG * out_carry,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)217 bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
218     BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
219     BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
220 {
221 	BN_ULONG r3, r2, r1, r0;
222 
223 	bn_addw_addw(a0, b0, carry, &carry, &r0);
224 	bn_addw_addw(a1, b1, carry, &carry, &r1);
225 	bn_addw_addw(a2, b2, carry, &carry, &r2);
226 	bn_addw_addw(a3, b3, carry, &carry, &r3);
227 
228 	*out_carry = carry;
229 	*out_r3 = r3;
230 	*out_r2 = r2;
231 	*out_r1 = r1;
232 	*out_r0 = r0;
233 }
234 #endif
235 
236 /*
237  * bn_subw() computes r0 = a - b, where both inputs are single words,
238  * producing a single word result and borrow.
239  */
240 #ifndef HAVE_BN_SUBW
241 static inline void
bn_subw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_borrow,BN_ULONG * out_r0)242 bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
243 {
244 	BN_ULONG borrow, r0;
245 
246 	r0 = a - b;
247 	borrow = ((r0 | (b & ~a)) & (b | ~a)) >> (BN_BITS2 - 1);
248 
249 	*out_borrow = borrow;
250 	*out_r0 = r0;
251 }
252 #endif
253 
254 /*
255  * bn_subw_subw() computes r0 = a - b - c, where all inputs are single words,
256  * producing a single word result and borrow.
257  */
258 #ifndef HAVE_BN_SUBW_SUBW
259 static inline void
bn_subw_subw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_borrow,BN_ULONG * out_r0)260 bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
261     BN_ULONG *out_r0)
262 {
263 	BN_ULONG b1, b2, r0;
264 
265 	bn_subw(a, b, &b1, &r0);
266 	bn_subw(r0, c, &b2, &r0);
267 
268 	*out_borrow = b1 + b2;
269 	*out_r0 = r0;
270 }
271 #endif
272 
273 /*
274  * bn_qwsubqw() computes
275  * (r3:r2:r1:r0) = (a3:a2:a1:a0) - (b3:b2:b1:b0) - borrow, where a is a quad word,
276  * b is a quad word, and borrow is a single word with value 0 or 1, producing a
277  * four word result and borrow.
278  */
279 #ifndef HAVE_BN_QWSUBQW
280 static inline void
bn_qwsubqw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b3,BN_ULONG b2,BN_ULONG b1,BN_ULONG b0,BN_ULONG borrow,BN_ULONG * out_borrow,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)281 bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
282     BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
283     BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
284 {
285 	BN_ULONG r3, r2, r1, r0;
286 
287 	bn_subw_subw(a0, b0, borrow, &borrow, &r0);
288 	bn_subw_subw(a1, b1, borrow, &borrow, &r1);
289 	bn_subw_subw(a2, b2, borrow, &borrow, &r2);
290 	bn_subw_subw(a3, b3, borrow, &borrow, &r3);
291 
292 	*out_borrow = borrow;
293 	*out_r3 = r3;
294 	*out_r2 = r2;
295 	*out_r1 = r1;
296 	*out_r0 = r0;
297 }
298 #endif
299 
300 /*
301  * bn_mulw() computes (r1:r0) = a * b, where both inputs are single words,
302  * producing a double word result.
303  */
304 #ifndef HAVE_BN_MULW
305 /*
306  * Multiply two words (a * b) producing a double word result (h:l).
307  *
308  * This can be rewritten as:
309  *
310  *  a * b = (hi32(a) * 2^32 + lo32(a)) * (hi32(b) * 2^32 + lo32(b))
311  *        = hi32(a) * hi32(b) * 2^64 +
312  *          hi32(a) * lo32(b) * 2^32 +
313  *          hi32(b) * lo32(a) * 2^32 +
314  *          lo32(a) * lo32(b)
315  *
316  * The multiplication for each part of a and b can be calculated for each of
317  * these four terms without overflowing a BN_ULONG, as the maximum value of a
318  * 32 bit x 32 bit multiplication is 32 + 32 = 64 bits. Once these
319  * multiplications have been performed the result can be partitioned and summed
320  * into a double word (h:l). The same applies on a 32 bit system, substituting
321  * 16 for 32 and 32 for 64.
322  */
323 #if 1
324 static inline void
bn_mulw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_r1,BN_ULONG * out_r0)325 bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
326 {
327 	BN_ULONG a1, a0, b1, b0, r1, r0;
328 	BN_ULONG carry, x;
329 
330 	a1 = a >> BN_BITS4;
331 	a0 = a & BN_MASK2l;
332 	b1 = b >> BN_BITS4;
333 	b0 = b & BN_MASK2l;
334 
335 	r1 = a1 * b1;
336 	r0 = a0 * b0;
337 
338 	/* (a1 * b0) << BN_BITS4, partition the result across r1:r0 with carry. */
339 	x = a1 * b0;
340 	r1 += x >> BN_BITS4;
341 	bn_addw(r0, x << BN_BITS4, &carry, &r0);
342 	r1 += carry;
343 
344 	/* (b1 * a0) << BN_BITS4, partition the result across r1:r0 with carry. */
345 	x = b1 * a0;
346 	r1 += x >> BN_BITS4;
347 	bn_addw(r0, x << BN_BITS4, &carry, &r0);
348 	r1 += carry;
349 
350 	*out_r1 = r1;
351 	*out_r0 = r0;
352 }
353 #else
354 
355 /*
356  * XXX - this accumulator based version uses fewer instructions, however
357  * requires more variables/registers. It seems to be slower on at least amd64
358  * and i386, however may be faster on other architectures that have more
359  * registers available. Further testing is required and one of the two
360  * implementations should eventually be removed.
361  */
362 static inline void
bn_mulw(BN_ULONG a,BN_ULONG b,BN_ULONG * out_r1,BN_ULONG * out_r0)363 bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
364 {
365 	BN_ULONG a1, a0, b1, b0, r1, r0, x;
366 	BN_ULONG acc0, acc1, acc2, acc3;
367 
368 	a1 = a >> BN_BITS4;
369 	b1 = b >> BN_BITS4;
370 	a0 = a & BN_MASK2l;
371 	b0 = b & BN_MASK2l;
372 
373 	r1 = a1 * b1;
374 	r0 = a0 * b0;
375 
376 	acc0 = r0 & BN_MASK2l;
377 	acc1 = r0 >> BN_BITS4;
378 	acc2 = r1 & BN_MASK2l;
379 	acc3 = r1 >> BN_BITS4;
380 
381 	/* (a1 * b0) << BN_BITS4, partition the result across r1:r0. */
382 	x = a1 * b0;
383 	acc1 += x & BN_MASK2l;
384 	acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
385 	acc1 &= BN_MASK2l;
386 	acc3 += acc2 >> BN_BITS4;
387 	acc2 &= BN_MASK2l;
388 
389 	/* (b1 * a0) << BN_BITS4, partition the result across r1:r0. */
390 	x = b1 * a0;
391 	acc1 += x & BN_MASK2l;
392 	acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
393 	acc1 &= BN_MASK2l;
394 	acc3 += acc2 >> BN_BITS4;
395 	acc2 &= BN_MASK2l;
396 
397 	*out_r1 = (acc3 << BN_BITS4) | acc2;
398 	*out_r0 = (acc1 << BN_BITS4) | acc0;
399 }
400 #endif
401 #endif
402 
403 #ifndef HAVE_BN_MULW_LO
404 static inline BN_ULONG
bn_mulw_lo(BN_ULONG a,BN_ULONG b)405 bn_mulw_lo(BN_ULONG a, BN_ULONG b)
406 {
407 	return a * b;
408 }
409 #endif
410 
411 #ifndef HAVE_BN_MULW_HI
412 static inline BN_ULONG
bn_mulw_hi(BN_ULONG a,BN_ULONG b)413 bn_mulw_hi(BN_ULONG a, BN_ULONG b)
414 {
415 	BN_ULONG h, l;
416 
417 	bn_mulw(a, b, &h, &l);
418 
419 	return h;
420 }
421 #endif
422 
423 /*
424  * bn_mulw_addw() computes (r1:r0) = a * b + c with all inputs being single
425  * words, producing a double word result.
426  */
427 #ifndef HAVE_BN_MULW_ADDW
428 static inline void
bn_mulw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r1,BN_ULONG * out_r0)429 bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
430     BN_ULONG *out_r0)
431 {
432 	BN_ULONG carry, r1, r0;
433 
434 	bn_mulw(a, b, &r1, &r0);
435 	bn_addw(r0, c, &carry, &r0);
436 	r1 += carry;
437 
438 	*out_r1 = r1;
439 	*out_r0 = r0;
440 }
441 #endif
442 
443 /*
444  * bn_mulw_addw_addw() computes (r1:r0) = a * b + c + d with all inputs being
445  * single words, producing a double word result.
446  */
447 #ifndef HAVE_BN_MULW_ADDW_ADDW
448 static inline void
bn_mulw_addw_addw(BN_ULONG a,BN_ULONG b,BN_ULONG c,BN_ULONG d,BN_ULONG * out_r1,BN_ULONG * out_r0)449 bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
450     BN_ULONG *out_r1, BN_ULONG *out_r0)
451 {
452 	BN_ULONG carry, r1, r0;
453 
454 	bn_mulw_addw(a, b, c, &r1, &r0);
455 	bn_addw(r0, d, &carry, &r0);
456 	r1 += carry;
457 
458 	*out_r1 = r1;
459 	*out_r0 = r0;
460 }
461 #endif
462 
463 /*
464  * bn_mulw_addtw() computes (r2:r1:r0) = a * b + (c2:c1:c0), where a and b are
465  * single words and (c2:c1:c0) is a triple word, producing a triple word result.
466  * The caller must ensure that the inputs provided do not result in c2
467  * overflowing.
468  */
469 #ifndef HAVE_BN_MULW_ADDTW
470 static inline void
bn_mulw_addtw(BN_ULONG a,BN_ULONG b,BN_ULONG c2,BN_ULONG c1,BN_ULONG c0,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)471 bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
472     BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
473 {
474 	BN_ULONG carry, r2, r1, r0, x1;
475 
476 	bn_mulw_addw(a, b, c0, &x1, &r0);
477 	bn_addw(c1, x1, &carry, &r1);
478 	r2 = c2 + carry;
479 
480 	*out_r2 = r2;
481 	*out_r1 = r1;
482 	*out_r0 = r0;
483 }
484 #endif
485 
486 /*
487  * bn_mul2_mulw_addtw() computes (r2:r1:r0) = 2 * a * b + (c2:c1:c0), where a
488  * and b are single words and (c2:c1:c0) is a triple word, producing a triple
489  * word result. The caller must ensure that the inputs provided do not result
490  * in c2 overflowing.
491  */
492 #ifndef HAVE_BN_MUL2_MULW_ADDTW
493 static inline void
bn_mul2_mulw_addtw(BN_ULONG a,BN_ULONG b,BN_ULONG c2,BN_ULONG c1,BN_ULONG c0,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)494 bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
495     BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
496 {
497 	BN_ULONG r2, r1, r0, x1, x0;
498 	BN_ULONG carry;
499 
500 	bn_mulw(a, b, &x1, &x0);
501 	bn_addw(c0, x0, &carry, &r0);
502 	bn_addw(c1, x1 + carry, &r2, &r1);
503 	bn_addw(c2, r2, &carry, &r2);
504 	bn_addw(r0, x0, &carry, &r0);
505 	bn_addw(r1, x1 + carry, &carry, &r1);
506 	r2 += carry;
507 
508 	*out_r2 = r2;
509 	*out_r1 = r1;
510 	*out_r0 = r0;
511 }
512 #endif
513 
514 /*
515  * bn_qwmulw_addw() computes (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + c, where a
516  * is a quad word, b is a single word and c is a single word, producing a five
517  * word result.
518  */
519 #ifndef HAVE_BN_QWMULW_ADDW
520 static inline void
bn_qwmulw_addw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b,BN_ULONG c,BN_ULONG * out_r4,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)521 bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
522     BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
523     BN_ULONG *out_r1, BN_ULONG *out_r0)
524 {
525 	BN_ULONG r3, r2, r1, r0;
526 
527 	bn_mulw_addw(a0, b, c, &c, &r0);
528 	bn_mulw_addw(a1, b, c, &c, &r1);
529 	bn_mulw_addw(a2, b, c, &c, &r2);
530 	bn_mulw_addw(a3, b, c, &c, &r3);
531 
532 	*out_r4 = c;
533 	*out_r3 = r3;
534 	*out_r2 = r2;
535 	*out_r1 = r1;
536 	*out_r0 = r0;
537 }
538 #endif
539 
540 /*
541  * bn_qwmulw_addqw_addw() computes
542  * (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + (c3:c2:c1:c0) + d, where a
543  * is a quad word, b is a single word, c is a quad word, and d is a single word,
544  * producing a five word result.
545  */
546 #ifndef HAVE_BN_QWMULW_ADDQW_ADDW
547 static inline void
bn_qwmulw_addqw_addw(BN_ULONG a3,BN_ULONG a2,BN_ULONG a1,BN_ULONG a0,BN_ULONG b,BN_ULONG c3,BN_ULONG c2,BN_ULONG c1,BN_ULONG c0,BN_ULONG d,BN_ULONG * out_r4,BN_ULONG * out_r3,BN_ULONG * out_r2,BN_ULONG * out_r1,BN_ULONG * out_r0)548 bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
549     BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
550     BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
551     BN_ULONG *out_r0)
552 {
553 	BN_ULONG r3, r2, r1, r0;
554 
555 	bn_mulw_addw_addw(a0, b, c0, d, &d, &r0);
556 	bn_mulw_addw_addw(a1, b, c1, d, &d, &r1);
557 	bn_mulw_addw_addw(a2, b, c2, d, &d, &r2);
558 	bn_mulw_addw_addw(a3, b, c3, d, &d, &r3);
559 
560 	*out_r4 = d;
561 	*out_r3 = r3;
562 	*out_r2 = r2;
563 	*out_r1 = r1;
564 	*out_r0 = r0;
565 }
566 #endif
567 
568 #endif
569