xref: /onnv-gate/usr/src/common/openssl/crypto/bn/bn_asm.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /* crypto/bn/bn_asm.c */
2*0Sstevel@tonic-gate /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3*0Sstevel@tonic-gate  * All rights reserved.
4*0Sstevel@tonic-gate  *
5*0Sstevel@tonic-gate  * This package is an SSL implementation written
6*0Sstevel@tonic-gate  * by Eric Young (eay@cryptsoft.com).
7*0Sstevel@tonic-gate  * The implementation was written so as to conform with Netscapes SSL.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * This library is free for commercial and non-commercial use as long as
10*0Sstevel@tonic-gate  * the following conditions are aheared to.  The following conditions
11*0Sstevel@tonic-gate  * apply to all code found in this distribution, be it the RC4, RSA,
12*0Sstevel@tonic-gate  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13*0Sstevel@tonic-gate  * included with this distribution is covered by the same copyright terms
14*0Sstevel@tonic-gate  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15*0Sstevel@tonic-gate  *
16*0Sstevel@tonic-gate  * Copyright remains Eric Young's, and as such any Copyright notices in
17*0Sstevel@tonic-gate  * the code are not to be removed.
18*0Sstevel@tonic-gate  * If this package is used in a product, Eric Young should be given attribution
19*0Sstevel@tonic-gate  * as the author of the parts of the library used.
20*0Sstevel@tonic-gate  * This can be in the form of a textual message at program startup or
21*0Sstevel@tonic-gate  * in documentation (online or textual) provided with the package.
22*0Sstevel@tonic-gate  *
23*0Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
24*0Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
25*0Sstevel@tonic-gate  * are met:
26*0Sstevel@tonic-gate  * 1. Redistributions of source code must retain the copyright
27*0Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
28*0Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
29*0Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
30*0Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
31*0Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
32*0Sstevel@tonic-gate  *    must display the following acknowledgement:
33*0Sstevel@tonic-gate  *    "This product includes cryptographic software written by
34*0Sstevel@tonic-gate  *     Eric Young (eay@cryptsoft.com)"
35*0Sstevel@tonic-gate  *    The word 'cryptographic' can be left out if the rouines from the library
36*0Sstevel@tonic-gate  *    being used are not cryptographic related :-).
37*0Sstevel@tonic-gate  * 4. If you include any Windows specific code (or a derivative thereof) from
38*0Sstevel@tonic-gate  *    the apps directory (application code) you must include an acknowledgement:
39*0Sstevel@tonic-gate  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40*0Sstevel@tonic-gate  *
41*0Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42*0Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43*0Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44*0Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45*0Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46*0Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47*0Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48*0Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49*0Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50*0Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51*0Sstevel@tonic-gate  * SUCH DAMAGE.
52*0Sstevel@tonic-gate  *
53*0Sstevel@tonic-gate  * The licence and distribution terms for any publically available version or
54*0Sstevel@tonic-gate  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55*0Sstevel@tonic-gate  * copied and put under another distribution licence
56*0Sstevel@tonic-gate  * [including the GNU Public Licence.]
57*0Sstevel@tonic-gate  */
58*0Sstevel@tonic-gate 
59*0Sstevel@tonic-gate #ifndef BN_DEBUG
60*0Sstevel@tonic-gate # undef NDEBUG /* avoid conflicting definitions */
61*0Sstevel@tonic-gate # define NDEBUG
62*0Sstevel@tonic-gate #endif
63*0Sstevel@tonic-gate 
64*0Sstevel@tonic-gate #include <stdio.h>
65*0Sstevel@tonic-gate #include <assert.h>
66*0Sstevel@tonic-gate #include "cryptlib.h"
67*0Sstevel@tonic-gate #include "bn_lcl.h"
68*0Sstevel@tonic-gate 
69*0Sstevel@tonic-gate #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
70*0Sstevel@tonic-gate 
71*0Sstevel@tonic-gate BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
72*0Sstevel@tonic-gate 	{
73*0Sstevel@tonic-gate 	BN_ULONG c1=0;
74*0Sstevel@tonic-gate 
75*0Sstevel@tonic-gate 	assert(num >= 0);
76*0Sstevel@tonic-gate 	if (num <= 0) return(c1);
77*0Sstevel@tonic-gate 
78*0Sstevel@tonic-gate 	while (num&~3)
79*0Sstevel@tonic-gate 		{
80*0Sstevel@tonic-gate 		mul_add(rp[0],ap[0],w,c1);
81*0Sstevel@tonic-gate 		mul_add(rp[1],ap[1],w,c1);
82*0Sstevel@tonic-gate 		mul_add(rp[2],ap[2],w,c1);
83*0Sstevel@tonic-gate 		mul_add(rp[3],ap[3],w,c1);
84*0Sstevel@tonic-gate 		ap+=4; rp+=4; num-=4;
85*0Sstevel@tonic-gate 		}
86*0Sstevel@tonic-gate 	if (num)
87*0Sstevel@tonic-gate 		{
88*0Sstevel@tonic-gate 		mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
89*0Sstevel@tonic-gate 		mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
90*0Sstevel@tonic-gate 		mul_add(rp[2],ap[2],w,c1); return c1;
91*0Sstevel@tonic-gate 		}
92*0Sstevel@tonic-gate 
93*0Sstevel@tonic-gate 	return(c1);
94*0Sstevel@tonic-gate 	}
95*0Sstevel@tonic-gate 
96*0Sstevel@tonic-gate BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
97*0Sstevel@tonic-gate 	{
98*0Sstevel@tonic-gate 	BN_ULONG c1=0;
99*0Sstevel@tonic-gate 
100*0Sstevel@tonic-gate 	assert(num >= 0);
101*0Sstevel@tonic-gate 	if (num <= 0) return(c1);
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate 	while (num&~3)
104*0Sstevel@tonic-gate 		{
105*0Sstevel@tonic-gate 		mul(rp[0],ap[0],w,c1);
106*0Sstevel@tonic-gate 		mul(rp[1],ap[1],w,c1);
107*0Sstevel@tonic-gate 		mul(rp[2],ap[2],w,c1);
108*0Sstevel@tonic-gate 		mul(rp[3],ap[3],w,c1);
109*0Sstevel@tonic-gate 		ap+=4; rp+=4; num-=4;
110*0Sstevel@tonic-gate 		}
111*0Sstevel@tonic-gate 	if (num)
112*0Sstevel@tonic-gate 		{
113*0Sstevel@tonic-gate 		mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
114*0Sstevel@tonic-gate 		mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
115*0Sstevel@tonic-gate 		mul(rp[2],ap[2],w,c1);
116*0Sstevel@tonic-gate 		}
117*0Sstevel@tonic-gate 	return(c1);
118*0Sstevel@tonic-gate 	}
119*0Sstevel@tonic-gate 
120*0Sstevel@tonic-gate void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
121*0Sstevel@tonic-gate         {
122*0Sstevel@tonic-gate 	assert(n >= 0);
123*0Sstevel@tonic-gate 	if (n <= 0) return;
124*0Sstevel@tonic-gate 	while (n&~3)
125*0Sstevel@tonic-gate 		{
126*0Sstevel@tonic-gate 		sqr(r[0],r[1],a[0]);
127*0Sstevel@tonic-gate 		sqr(r[2],r[3],a[1]);
128*0Sstevel@tonic-gate 		sqr(r[4],r[5],a[2]);
129*0Sstevel@tonic-gate 		sqr(r[6],r[7],a[3]);
130*0Sstevel@tonic-gate 		a+=4; r+=8; n-=4;
131*0Sstevel@tonic-gate 		}
132*0Sstevel@tonic-gate 	if (n)
133*0Sstevel@tonic-gate 		{
134*0Sstevel@tonic-gate 		sqr(r[0],r[1],a[0]); if (--n == 0) return;
135*0Sstevel@tonic-gate 		sqr(r[2],r[3],a[1]); if (--n == 0) return;
136*0Sstevel@tonic-gate 		sqr(r[4],r[5],a[2]);
137*0Sstevel@tonic-gate 		}
138*0Sstevel@tonic-gate 	}
139*0Sstevel@tonic-gate 
140*0Sstevel@tonic-gate #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
141*0Sstevel@tonic-gate 
142*0Sstevel@tonic-gate BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
143*0Sstevel@tonic-gate 	{
144*0Sstevel@tonic-gate 	BN_ULONG c=0;
145*0Sstevel@tonic-gate 	BN_ULONG bl,bh;
146*0Sstevel@tonic-gate 
147*0Sstevel@tonic-gate 	assert(num >= 0);
148*0Sstevel@tonic-gate 	if (num <= 0) return((BN_ULONG)0);
149*0Sstevel@tonic-gate 
150*0Sstevel@tonic-gate 	bl=LBITS(w);
151*0Sstevel@tonic-gate 	bh=HBITS(w);
152*0Sstevel@tonic-gate 
153*0Sstevel@tonic-gate 	for (;;)
154*0Sstevel@tonic-gate 		{
155*0Sstevel@tonic-gate 		mul_add(rp[0],ap[0],bl,bh,c);
156*0Sstevel@tonic-gate 		if (--num == 0) break;
157*0Sstevel@tonic-gate 		mul_add(rp[1],ap[1],bl,bh,c);
158*0Sstevel@tonic-gate 		if (--num == 0) break;
159*0Sstevel@tonic-gate 		mul_add(rp[2],ap[2],bl,bh,c);
160*0Sstevel@tonic-gate 		if (--num == 0) break;
161*0Sstevel@tonic-gate 		mul_add(rp[3],ap[3],bl,bh,c);
162*0Sstevel@tonic-gate 		if (--num == 0) break;
163*0Sstevel@tonic-gate 		ap+=4;
164*0Sstevel@tonic-gate 		rp+=4;
165*0Sstevel@tonic-gate 		}
166*0Sstevel@tonic-gate 	return(c);
167*0Sstevel@tonic-gate 	}
168*0Sstevel@tonic-gate 
169*0Sstevel@tonic-gate BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
170*0Sstevel@tonic-gate 	{
171*0Sstevel@tonic-gate 	BN_ULONG carry=0;
172*0Sstevel@tonic-gate 	BN_ULONG bl,bh;
173*0Sstevel@tonic-gate 
174*0Sstevel@tonic-gate 	assert(num >= 0);
175*0Sstevel@tonic-gate 	if (num <= 0) return((BN_ULONG)0);
176*0Sstevel@tonic-gate 
177*0Sstevel@tonic-gate 	bl=LBITS(w);
178*0Sstevel@tonic-gate 	bh=HBITS(w);
179*0Sstevel@tonic-gate 
180*0Sstevel@tonic-gate 	for (;;)
181*0Sstevel@tonic-gate 		{
182*0Sstevel@tonic-gate 		mul(rp[0],ap[0],bl,bh,carry);
183*0Sstevel@tonic-gate 		if (--num == 0) break;
184*0Sstevel@tonic-gate 		mul(rp[1],ap[1],bl,bh,carry);
185*0Sstevel@tonic-gate 		if (--num == 0) break;
186*0Sstevel@tonic-gate 		mul(rp[2],ap[2],bl,bh,carry);
187*0Sstevel@tonic-gate 		if (--num == 0) break;
188*0Sstevel@tonic-gate 		mul(rp[3],ap[3],bl,bh,carry);
189*0Sstevel@tonic-gate 		if (--num == 0) break;
190*0Sstevel@tonic-gate 		ap+=4;
191*0Sstevel@tonic-gate 		rp+=4;
192*0Sstevel@tonic-gate 		}
193*0Sstevel@tonic-gate 	return(carry);
194*0Sstevel@tonic-gate 	}
195*0Sstevel@tonic-gate 
196*0Sstevel@tonic-gate void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
197*0Sstevel@tonic-gate         {
198*0Sstevel@tonic-gate 	assert(n >= 0);
199*0Sstevel@tonic-gate 	if (n <= 0) return;
200*0Sstevel@tonic-gate 	for (;;)
201*0Sstevel@tonic-gate 		{
202*0Sstevel@tonic-gate 		sqr64(r[0],r[1],a[0]);
203*0Sstevel@tonic-gate 		if (--n == 0) break;
204*0Sstevel@tonic-gate 
205*0Sstevel@tonic-gate 		sqr64(r[2],r[3],a[1]);
206*0Sstevel@tonic-gate 		if (--n == 0) break;
207*0Sstevel@tonic-gate 
208*0Sstevel@tonic-gate 		sqr64(r[4],r[5],a[2]);
209*0Sstevel@tonic-gate 		if (--n == 0) break;
210*0Sstevel@tonic-gate 
211*0Sstevel@tonic-gate 		sqr64(r[6],r[7],a[3]);
212*0Sstevel@tonic-gate 		if (--n == 0) break;
213*0Sstevel@tonic-gate 
214*0Sstevel@tonic-gate 		a+=4;
215*0Sstevel@tonic-gate 		r+=8;
216*0Sstevel@tonic-gate 		}
217*0Sstevel@tonic-gate 	}
218*0Sstevel@tonic-gate 
219*0Sstevel@tonic-gate #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
220*0Sstevel@tonic-gate 
221*0Sstevel@tonic-gate #if defined(BN_LLONG) && defined(BN_DIV2W)
222*0Sstevel@tonic-gate 
223*0Sstevel@tonic-gate BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
224*0Sstevel@tonic-gate 	{
225*0Sstevel@tonic-gate 	return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
226*0Sstevel@tonic-gate 	}
227*0Sstevel@tonic-gate 
228*0Sstevel@tonic-gate #else
229*0Sstevel@tonic-gate 
230*0Sstevel@tonic-gate /* Divide h,l by d and return the result. */
231*0Sstevel@tonic-gate /* I need to test this some more :-( */
232*0Sstevel@tonic-gate BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
233*0Sstevel@tonic-gate 	{
234*0Sstevel@tonic-gate 	BN_ULONG dh,dl,q,ret=0,th,tl,t;
235*0Sstevel@tonic-gate 	int i,count=2;
236*0Sstevel@tonic-gate 
237*0Sstevel@tonic-gate 	if (d == 0) return(BN_MASK2);
238*0Sstevel@tonic-gate 
239*0Sstevel@tonic-gate 	i=BN_num_bits_word(d);
240*0Sstevel@tonic-gate 	assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i));
241*0Sstevel@tonic-gate 
242*0Sstevel@tonic-gate 	i=BN_BITS2-i;
243*0Sstevel@tonic-gate 	if (h >= d) h-=d;
244*0Sstevel@tonic-gate 
245*0Sstevel@tonic-gate 	if (i)
246*0Sstevel@tonic-gate 		{
247*0Sstevel@tonic-gate 		d<<=i;
248*0Sstevel@tonic-gate 		h=(h<<i)|(l>>(BN_BITS2-i));
249*0Sstevel@tonic-gate 		l<<=i;
250*0Sstevel@tonic-gate 		}
251*0Sstevel@tonic-gate 	dh=(d&BN_MASK2h)>>BN_BITS4;
252*0Sstevel@tonic-gate 	dl=(d&BN_MASK2l);
253*0Sstevel@tonic-gate 	for (;;)
254*0Sstevel@tonic-gate 		{
255*0Sstevel@tonic-gate 		if ((h>>BN_BITS4) == dh)
256*0Sstevel@tonic-gate 			q=BN_MASK2l;
257*0Sstevel@tonic-gate 		else
258*0Sstevel@tonic-gate 			q=h/dh;
259*0Sstevel@tonic-gate 
260*0Sstevel@tonic-gate 		th=q*dh;
261*0Sstevel@tonic-gate 		tl=dl*q;
262*0Sstevel@tonic-gate 		for (;;)
263*0Sstevel@tonic-gate 			{
264*0Sstevel@tonic-gate 			t=h-th;
265*0Sstevel@tonic-gate 			if ((t&BN_MASK2h) ||
266*0Sstevel@tonic-gate 				((tl) <= (
267*0Sstevel@tonic-gate 					(t<<BN_BITS4)|
268*0Sstevel@tonic-gate 					((l&BN_MASK2h)>>BN_BITS4))))
269*0Sstevel@tonic-gate 				break;
270*0Sstevel@tonic-gate 			q--;
271*0Sstevel@tonic-gate 			th-=dh;
272*0Sstevel@tonic-gate 			tl-=dl;
273*0Sstevel@tonic-gate 			}
274*0Sstevel@tonic-gate 		t=(tl>>BN_BITS4);
275*0Sstevel@tonic-gate 		tl=(tl<<BN_BITS4)&BN_MASK2h;
276*0Sstevel@tonic-gate 		th+=t;
277*0Sstevel@tonic-gate 
278*0Sstevel@tonic-gate 		if (l < tl) th++;
279*0Sstevel@tonic-gate 		l-=tl;
280*0Sstevel@tonic-gate 		if (h < th)
281*0Sstevel@tonic-gate 			{
282*0Sstevel@tonic-gate 			h+=d;
283*0Sstevel@tonic-gate 			q--;
284*0Sstevel@tonic-gate 			}
285*0Sstevel@tonic-gate 		h-=th;
286*0Sstevel@tonic-gate 
287*0Sstevel@tonic-gate 		if (--count == 0) break;
288*0Sstevel@tonic-gate 
289*0Sstevel@tonic-gate 		ret=q<<BN_BITS4;
290*0Sstevel@tonic-gate 		h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
291*0Sstevel@tonic-gate 		l=(l&BN_MASK2l)<<BN_BITS4;
292*0Sstevel@tonic-gate 		}
293*0Sstevel@tonic-gate 	ret|=q;
294*0Sstevel@tonic-gate 	return(ret);
295*0Sstevel@tonic-gate 	}
296*0Sstevel@tonic-gate #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
297*0Sstevel@tonic-gate 
298*0Sstevel@tonic-gate #ifdef BN_LLONG
299*0Sstevel@tonic-gate BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
300*0Sstevel@tonic-gate         {
301*0Sstevel@tonic-gate 	BN_ULLONG ll=0;
302*0Sstevel@tonic-gate 
303*0Sstevel@tonic-gate 	assert(n >= 0);
304*0Sstevel@tonic-gate 	if (n <= 0) return((BN_ULONG)0);
305*0Sstevel@tonic-gate 
306*0Sstevel@tonic-gate 	for (;;)
307*0Sstevel@tonic-gate 		{
308*0Sstevel@tonic-gate 		ll+=(BN_ULLONG)a[0]+b[0];
309*0Sstevel@tonic-gate 		r[0]=(BN_ULONG)ll&BN_MASK2;
310*0Sstevel@tonic-gate 		ll>>=BN_BITS2;
311*0Sstevel@tonic-gate 		if (--n <= 0) break;
312*0Sstevel@tonic-gate 
313*0Sstevel@tonic-gate 		ll+=(BN_ULLONG)a[1]+b[1];
314*0Sstevel@tonic-gate 		r[1]=(BN_ULONG)ll&BN_MASK2;
315*0Sstevel@tonic-gate 		ll>>=BN_BITS2;
316*0Sstevel@tonic-gate 		if (--n <= 0) break;
317*0Sstevel@tonic-gate 
318*0Sstevel@tonic-gate 		ll+=(BN_ULLONG)a[2]+b[2];
319*0Sstevel@tonic-gate 		r[2]=(BN_ULONG)ll&BN_MASK2;
320*0Sstevel@tonic-gate 		ll>>=BN_BITS2;
321*0Sstevel@tonic-gate 		if (--n <= 0) break;
322*0Sstevel@tonic-gate 
323*0Sstevel@tonic-gate 		ll+=(BN_ULLONG)a[3]+b[3];
324*0Sstevel@tonic-gate 		r[3]=(BN_ULONG)ll&BN_MASK2;
325*0Sstevel@tonic-gate 		ll>>=BN_BITS2;
326*0Sstevel@tonic-gate 		if (--n <= 0) break;
327*0Sstevel@tonic-gate 
328*0Sstevel@tonic-gate 		a+=4;
329*0Sstevel@tonic-gate 		b+=4;
330*0Sstevel@tonic-gate 		r+=4;
331*0Sstevel@tonic-gate 		}
332*0Sstevel@tonic-gate 	return((BN_ULONG)ll);
333*0Sstevel@tonic-gate 	}
334*0Sstevel@tonic-gate #else /* !BN_LLONG */
335*0Sstevel@tonic-gate BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
336*0Sstevel@tonic-gate         {
337*0Sstevel@tonic-gate 	BN_ULONG c,l,t;
338*0Sstevel@tonic-gate 
339*0Sstevel@tonic-gate 	assert(n >= 0);
340*0Sstevel@tonic-gate 	if (n <= 0) return((BN_ULONG)0);
341*0Sstevel@tonic-gate 
342*0Sstevel@tonic-gate 	c=0;
343*0Sstevel@tonic-gate 	for (;;)
344*0Sstevel@tonic-gate 		{
345*0Sstevel@tonic-gate 		t=a[0];
346*0Sstevel@tonic-gate 		t=(t+c)&BN_MASK2;
347*0Sstevel@tonic-gate 		c=(t < c);
348*0Sstevel@tonic-gate 		l=(t+b[0])&BN_MASK2;
349*0Sstevel@tonic-gate 		c+=(l < t);
350*0Sstevel@tonic-gate 		r[0]=l;
351*0Sstevel@tonic-gate 		if (--n <= 0) break;
352*0Sstevel@tonic-gate 
353*0Sstevel@tonic-gate 		t=a[1];
354*0Sstevel@tonic-gate 		t=(t+c)&BN_MASK2;
355*0Sstevel@tonic-gate 		c=(t < c);
356*0Sstevel@tonic-gate 		l=(t+b[1])&BN_MASK2;
357*0Sstevel@tonic-gate 		c+=(l < t);
358*0Sstevel@tonic-gate 		r[1]=l;
359*0Sstevel@tonic-gate 		if (--n <= 0) break;
360*0Sstevel@tonic-gate 
361*0Sstevel@tonic-gate 		t=a[2];
362*0Sstevel@tonic-gate 		t=(t+c)&BN_MASK2;
363*0Sstevel@tonic-gate 		c=(t < c);
364*0Sstevel@tonic-gate 		l=(t+b[2])&BN_MASK2;
365*0Sstevel@tonic-gate 		c+=(l < t);
366*0Sstevel@tonic-gate 		r[2]=l;
367*0Sstevel@tonic-gate 		if (--n <= 0) break;
368*0Sstevel@tonic-gate 
369*0Sstevel@tonic-gate 		t=a[3];
370*0Sstevel@tonic-gate 		t=(t+c)&BN_MASK2;
371*0Sstevel@tonic-gate 		c=(t < c);
372*0Sstevel@tonic-gate 		l=(t+b[3])&BN_MASK2;
373*0Sstevel@tonic-gate 		c+=(l < t);
374*0Sstevel@tonic-gate 		r[3]=l;
375*0Sstevel@tonic-gate 		if (--n <= 0) break;
376*0Sstevel@tonic-gate 
377*0Sstevel@tonic-gate 		a+=4;
378*0Sstevel@tonic-gate 		b+=4;
379*0Sstevel@tonic-gate 		r+=4;
380*0Sstevel@tonic-gate 		}
381*0Sstevel@tonic-gate 	return((BN_ULONG)c);
382*0Sstevel@tonic-gate 	}
383*0Sstevel@tonic-gate #endif /* !BN_LLONG */
384*0Sstevel@tonic-gate 
385*0Sstevel@tonic-gate BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
386*0Sstevel@tonic-gate         {
387*0Sstevel@tonic-gate 	BN_ULONG t1,t2;
388*0Sstevel@tonic-gate 	int c=0;
389*0Sstevel@tonic-gate 
390*0Sstevel@tonic-gate 	assert(n >= 0);
391*0Sstevel@tonic-gate 	if (n <= 0) return((BN_ULONG)0);
392*0Sstevel@tonic-gate 
393*0Sstevel@tonic-gate 	for (;;)
394*0Sstevel@tonic-gate 		{
395*0Sstevel@tonic-gate 		t1=a[0]; t2=b[0];
396*0Sstevel@tonic-gate 		r[0]=(t1-t2-c)&BN_MASK2;
397*0Sstevel@tonic-gate 		if (t1 != t2) c=(t1 < t2);
398*0Sstevel@tonic-gate 		if (--n <= 0) break;
399*0Sstevel@tonic-gate 
400*0Sstevel@tonic-gate 		t1=a[1]; t2=b[1];
401*0Sstevel@tonic-gate 		r[1]=(t1-t2-c)&BN_MASK2;
402*0Sstevel@tonic-gate 		if (t1 != t2) c=(t1 < t2);
403*0Sstevel@tonic-gate 		if (--n <= 0) break;
404*0Sstevel@tonic-gate 
405*0Sstevel@tonic-gate 		t1=a[2]; t2=b[2];
406*0Sstevel@tonic-gate 		r[2]=(t1-t2-c)&BN_MASK2;
407*0Sstevel@tonic-gate 		if (t1 != t2) c=(t1 < t2);
408*0Sstevel@tonic-gate 		if (--n <= 0) break;
409*0Sstevel@tonic-gate 
410*0Sstevel@tonic-gate 		t1=a[3]; t2=b[3];
411*0Sstevel@tonic-gate 		r[3]=(t1-t2-c)&BN_MASK2;
412*0Sstevel@tonic-gate 		if (t1 != t2) c=(t1 < t2);
413*0Sstevel@tonic-gate 		if (--n <= 0) break;
414*0Sstevel@tonic-gate 
415*0Sstevel@tonic-gate 		a+=4;
416*0Sstevel@tonic-gate 		b+=4;
417*0Sstevel@tonic-gate 		r+=4;
418*0Sstevel@tonic-gate 		}
419*0Sstevel@tonic-gate 	return(c);
420*0Sstevel@tonic-gate 	}
421*0Sstevel@tonic-gate 
422*0Sstevel@tonic-gate #ifdef BN_MUL_COMBA
423*0Sstevel@tonic-gate 
424*0Sstevel@tonic-gate #undef bn_mul_comba8
425*0Sstevel@tonic-gate #undef bn_mul_comba4
426*0Sstevel@tonic-gate #undef bn_sqr_comba8
427*0Sstevel@tonic-gate #undef bn_sqr_comba4
428*0Sstevel@tonic-gate 
429*0Sstevel@tonic-gate /* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
430*0Sstevel@tonic-gate /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
431*0Sstevel@tonic-gate /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
432*0Sstevel@tonic-gate /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
433*0Sstevel@tonic-gate 
434*0Sstevel@tonic-gate #ifdef BN_LLONG
435*0Sstevel@tonic-gate #define mul_add_c(a,b,c0,c1,c2) \
436*0Sstevel@tonic-gate 	t=(BN_ULLONG)a*b; \
437*0Sstevel@tonic-gate 	t1=(BN_ULONG)Lw(t); \
438*0Sstevel@tonic-gate 	t2=(BN_ULONG)Hw(t); \
439*0Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
440*0Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
441*0Sstevel@tonic-gate 
442*0Sstevel@tonic-gate #define mul_add_c2(a,b,c0,c1,c2) \
443*0Sstevel@tonic-gate 	t=(BN_ULLONG)a*b; \
444*0Sstevel@tonic-gate 	tt=(t+t)&BN_MASK; \
445*0Sstevel@tonic-gate 	if (tt < t) c2++; \
446*0Sstevel@tonic-gate 	t1=(BN_ULONG)Lw(tt); \
447*0Sstevel@tonic-gate 	t2=(BN_ULONG)Hw(tt); \
448*0Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2;  \
449*0Sstevel@tonic-gate 	if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
450*0Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
451*0Sstevel@tonic-gate 
452*0Sstevel@tonic-gate #define sqr_add_c(a,i,c0,c1,c2) \
453*0Sstevel@tonic-gate 	t=(BN_ULLONG)a[i]*a[i]; \
454*0Sstevel@tonic-gate 	t1=(BN_ULONG)Lw(t); \
455*0Sstevel@tonic-gate 	t2=(BN_ULONG)Hw(t); \
456*0Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
457*0Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
458*0Sstevel@tonic-gate 
459*0Sstevel@tonic-gate #define sqr_add_c2(a,i,j,c0,c1,c2) \
460*0Sstevel@tonic-gate 	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
461*0Sstevel@tonic-gate 
462*0Sstevel@tonic-gate #elif defined(BN_UMULT_HIGH)
463*0Sstevel@tonic-gate 
464*0Sstevel@tonic-gate #define mul_add_c(a,b,c0,c1,c2)	{	\
465*0Sstevel@tonic-gate 	BN_ULONG ta=(a),tb=(b);		\
466*0Sstevel@tonic-gate 	t1 = ta * tb;			\
467*0Sstevel@tonic-gate 	t2 = BN_UMULT_HIGH(ta,tb);	\
468*0Sstevel@tonic-gate 	c0 += t1; t2 += (c0<t1)?1:0;	\
469*0Sstevel@tonic-gate 	c1 += t2; c2 += (c1<t2)?1:0;	\
470*0Sstevel@tonic-gate 	}
471*0Sstevel@tonic-gate 
472*0Sstevel@tonic-gate #define mul_add_c2(a,b,c0,c1,c2) {	\
473*0Sstevel@tonic-gate 	BN_ULONG ta=(a),tb=(b),t0;	\
474*0Sstevel@tonic-gate 	t1 = BN_UMULT_HIGH(ta,tb);	\
475*0Sstevel@tonic-gate 	t0 = ta * tb;			\
476*0Sstevel@tonic-gate 	t2 = t1+t1; c2 += (t2<t1)?1:0;	\
477*0Sstevel@tonic-gate 	t1 = t0+t0; t2 += (t1<t0)?1:0;	\
478*0Sstevel@tonic-gate 	c0 += t1; t2 += (c0<t1)?1:0;	\
479*0Sstevel@tonic-gate 	c1 += t2; c2 += (c1<t2)?1:0;	\
480*0Sstevel@tonic-gate 	}
481*0Sstevel@tonic-gate 
482*0Sstevel@tonic-gate #define sqr_add_c(a,i,c0,c1,c2)	{	\
483*0Sstevel@tonic-gate 	BN_ULONG ta=(a)[i];		\
484*0Sstevel@tonic-gate 	t1 = ta * ta;			\
485*0Sstevel@tonic-gate 	t2 = BN_UMULT_HIGH(ta,ta);	\
486*0Sstevel@tonic-gate 	c0 += t1; t2 += (c0<t1)?1:0;	\
487*0Sstevel@tonic-gate 	c1 += t2; c2 += (c1<t2)?1:0;	\
488*0Sstevel@tonic-gate 	}
489*0Sstevel@tonic-gate 
490*0Sstevel@tonic-gate #define sqr_add_c2(a,i,j,c0,c1,c2)	\
491*0Sstevel@tonic-gate 	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
492*0Sstevel@tonic-gate 
493*0Sstevel@tonic-gate #else /* !BN_LLONG */
494*0Sstevel@tonic-gate #define mul_add_c(a,b,c0,c1,c2) \
495*0Sstevel@tonic-gate 	t1=LBITS(a); t2=HBITS(a); \
496*0Sstevel@tonic-gate 	bl=LBITS(b); bh=HBITS(b); \
497*0Sstevel@tonic-gate 	mul64(t1,t2,bl,bh); \
498*0Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
499*0Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
500*0Sstevel@tonic-gate 
501*0Sstevel@tonic-gate #define mul_add_c2(a,b,c0,c1,c2) \
502*0Sstevel@tonic-gate 	t1=LBITS(a); t2=HBITS(a); \
503*0Sstevel@tonic-gate 	bl=LBITS(b); bh=HBITS(b); \
504*0Sstevel@tonic-gate 	mul64(t1,t2,bl,bh); \
505*0Sstevel@tonic-gate 	if (t2 & BN_TBIT) c2++; \
506*0Sstevel@tonic-gate 	t2=(t2+t2)&BN_MASK2; \
507*0Sstevel@tonic-gate 	if (t1 & BN_TBIT) t2++; \
508*0Sstevel@tonic-gate 	t1=(t1+t1)&BN_MASK2; \
509*0Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2;  \
510*0Sstevel@tonic-gate 	if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
511*0Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
512*0Sstevel@tonic-gate 
513*0Sstevel@tonic-gate #define sqr_add_c(a,i,c0,c1,c2) \
514*0Sstevel@tonic-gate 	sqr64(t1,t2,(a)[i]); \
515*0Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
516*0Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
517*0Sstevel@tonic-gate 
518*0Sstevel@tonic-gate #define sqr_add_c2(a,i,j,c0,c1,c2) \
519*0Sstevel@tonic-gate 	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
520*0Sstevel@tonic-gate #endif /* !BN_LLONG */
521*0Sstevel@tonic-gate 
522*0Sstevel@tonic-gate void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
523*0Sstevel@tonic-gate 	{
524*0Sstevel@tonic-gate #ifdef BN_LLONG
525*0Sstevel@tonic-gate 	BN_ULLONG t;
526*0Sstevel@tonic-gate #else
527*0Sstevel@tonic-gate 	BN_ULONG bl,bh;
528*0Sstevel@tonic-gate #endif
529*0Sstevel@tonic-gate 	BN_ULONG t1,t2;
530*0Sstevel@tonic-gate 	BN_ULONG c1,c2,c3;
531*0Sstevel@tonic-gate 
532*0Sstevel@tonic-gate 	c1=0;
533*0Sstevel@tonic-gate 	c2=0;
534*0Sstevel@tonic-gate 	c3=0;
535*0Sstevel@tonic-gate 	mul_add_c(a[0],b[0],c1,c2,c3);
536*0Sstevel@tonic-gate 	r[0]=c1;
537*0Sstevel@tonic-gate 	c1=0;
538*0Sstevel@tonic-gate 	mul_add_c(a[0],b[1],c2,c3,c1);
539*0Sstevel@tonic-gate 	mul_add_c(a[1],b[0],c2,c3,c1);
540*0Sstevel@tonic-gate 	r[1]=c2;
541*0Sstevel@tonic-gate 	c2=0;
542*0Sstevel@tonic-gate 	mul_add_c(a[2],b[0],c3,c1,c2);
543*0Sstevel@tonic-gate 	mul_add_c(a[1],b[1],c3,c1,c2);
544*0Sstevel@tonic-gate 	mul_add_c(a[0],b[2],c3,c1,c2);
545*0Sstevel@tonic-gate 	r[2]=c3;
546*0Sstevel@tonic-gate 	c3=0;
547*0Sstevel@tonic-gate 	mul_add_c(a[0],b[3],c1,c2,c3);
548*0Sstevel@tonic-gate 	mul_add_c(a[1],b[2],c1,c2,c3);
549*0Sstevel@tonic-gate 	mul_add_c(a[2],b[1],c1,c2,c3);
550*0Sstevel@tonic-gate 	mul_add_c(a[3],b[0],c1,c2,c3);
551*0Sstevel@tonic-gate 	r[3]=c1;
552*0Sstevel@tonic-gate 	c1=0;
553*0Sstevel@tonic-gate 	mul_add_c(a[4],b[0],c2,c3,c1);
554*0Sstevel@tonic-gate 	mul_add_c(a[3],b[1],c2,c3,c1);
555*0Sstevel@tonic-gate 	mul_add_c(a[2],b[2],c2,c3,c1);
556*0Sstevel@tonic-gate 	mul_add_c(a[1],b[3],c2,c3,c1);
557*0Sstevel@tonic-gate 	mul_add_c(a[0],b[4],c2,c3,c1);
558*0Sstevel@tonic-gate 	r[4]=c2;
559*0Sstevel@tonic-gate 	c2=0;
560*0Sstevel@tonic-gate 	mul_add_c(a[0],b[5],c3,c1,c2);
561*0Sstevel@tonic-gate 	mul_add_c(a[1],b[4],c3,c1,c2);
562*0Sstevel@tonic-gate 	mul_add_c(a[2],b[3],c3,c1,c2);
563*0Sstevel@tonic-gate 	mul_add_c(a[3],b[2],c3,c1,c2);
564*0Sstevel@tonic-gate 	mul_add_c(a[4],b[1],c3,c1,c2);
565*0Sstevel@tonic-gate 	mul_add_c(a[5],b[0],c3,c1,c2);
566*0Sstevel@tonic-gate 	r[5]=c3;
567*0Sstevel@tonic-gate 	c3=0;
568*0Sstevel@tonic-gate 	mul_add_c(a[6],b[0],c1,c2,c3);
569*0Sstevel@tonic-gate 	mul_add_c(a[5],b[1],c1,c2,c3);
570*0Sstevel@tonic-gate 	mul_add_c(a[4],b[2],c1,c2,c3);
571*0Sstevel@tonic-gate 	mul_add_c(a[3],b[3],c1,c2,c3);
572*0Sstevel@tonic-gate 	mul_add_c(a[2],b[4],c1,c2,c3);
573*0Sstevel@tonic-gate 	mul_add_c(a[1],b[5],c1,c2,c3);
574*0Sstevel@tonic-gate 	mul_add_c(a[0],b[6],c1,c2,c3);
575*0Sstevel@tonic-gate 	r[6]=c1;
576*0Sstevel@tonic-gate 	c1=0;
577*0Sstevel@tonic-gate 	mul_add_c(a[0],b[7],c2,c3,c1);
578*0Sstevel@tonic-gate 	mul_add_c(a[1],b[6],c2,c3,c1);
579*0Sstevel@tonic-gate 	mul_add_c(a[2],b[5],c2,c3,c1);
580*0Sstevel@tonic-gate 	mul_add_c(a[3],b[4],c2,c3,c1);
581*0Sstevel@tonic-gate 	mul_add_c(a[4],b[3],c2,c3,c1);
582*0Sstevel@tonic-gate 	mul_add_c(a[5],b[2],c2,c3,c1);
583*0Sstevel@tonic-gate 	mul_add_c(a[6],b[1],c2,c3,c1);
584*0Sstevel@tonic-gate 	mul_add_c(a[7],b[0],c2,c3,c1);
585*0Sstevel@tonic-gate 	r[7]=c2;
586*0Sstevel@tonic-gate 	c2=0;
587*0Sstevel@tonic-gate 	mul_add_c(a[7],b[1],c3,c1,c2);
588*0Sstevel@tonic-gate 	mul_add_c(a[6],b[2],c3,c1,c2);
589*0Sstevel@tonic-gate 	mul_add_c(a[5],b[3],c3,c1,c2);
590*0Sstevel@tonic-gate 	mul_add_c(a[4],b[4],c3,c1,c2);
591*0Sstevel@tonic-gate 	mul_add_c(a[3],b[5],c3,c1,c2);
592*0Sstevel@tonic-gate 	mul_add_c(a[2],b[6],c3,c1,c2);
593*0Sstevel@tonic-gate 	mul_add_c(a[1],b[7],c3,c1,c2);
594*0Sstevel@tonic-gate 	r[8]=c3;
595*0Sstevel@tonic-gate 	c3=0;
596*0Sstevel@tonic-gate 	mul_add_c(a[2],b[7],c1,c2,c3);
597*0Sstevel@tonic-gate 	mul_add_c(a[3],b[6],c1,c2,c3);
598*0Sstevel@tonic-gate 	mul_add_c(a[4],b[5],c1,c2,c3);
599*0Sstevel@tonic-gate 	mul_add_c(a[5],b[4],c1,c2,c3);
600*0Sstevel@tonic-gate 	mul_add_c(a[6],b[3],c1,c2,c3);
601*0Sstevel@tonic-gate 	mul_add_c(a[7],b[2],c1,c2,c3);
602*0Sstevel@tonic-gate 	r[9]=c1;
603*0Sstevel@tonic-gate 	c1=0;
604*0Sstevel@tonic-gate 	mul_add_c(a[7],b[3],c2,c3,c1);
605*0Sstevel@tonic-gate 	mul_add_c(a[6],b[4],c2,c3,c1);
606*0Sstevel@tonic-gate 	mul_add_c(a[5],b[5],c2,c3,c1);
607*0Sstevel@tonic-gate 	mul_add_c(a[4],b[6],c2,c3,c1);
608*0Sstevel@tonic-gate 	mul_add_c(a[3],b[7],c2,c3,c1);
609*0Sstevel@tonic-gate 	r[10]=c2;
610*0Sstevel@tonic-gate 	c2=0;
611*0Sstevel@tonic-gate 	mul_add_c(a[4],b[7],c3,c1,c2);
612*0Sstevel@tonic-gate 	mul_add_c(a[5],b[6],c3,c1,c2);
613*0Sstevel@tonic-gate 	mul_add_c(a[6],b[5],c3,c1,c2);
614*0Sstevel@tonic-gate 	mul_add_c(a[7],b[4],c3,c1,c2);
615*0Sstevel@tonic-gate 	r[11]=c3;
616*0Sstevel@tonic-gate 	c3=0;
617*0Sstevel@tonic-gate 	mul_add_c(a[7],b[5],c1,c2,c3);
618*0Sstevel@tonic-gate 	mul_add_c(a[6],b[6],c1,c2,c3);
619*0Sstevel@tonic-gate 	mul_add_c(a[5],b[7],c1,c2,c3);
620*0Sstevel@tonic-gate 	r[12]=c1;
621*0Sstevel@tonic-gate 	c1=0;
622*0Sstevel@tonic-gate 	mul_add_c(a[6],b[7],c2,c3,c1);
623*0Sstevel@tonic-gate 	mul_add_c(a[7],b[6],c2,c3,c1);
624*0Sstevel@tonic-gate 	r[13]=c2;
625*0Sstevel@tonic-gate 	c2=0;
626*0Sstevel@tonic-gate 	mul_add_c(a[7],b[7],c3,c1,c2);
627*0Sstevel@tonic-gate 	r[14]=c3;
628*0Sstevel@tonic-gate 	r[15]=c1;
629*0Sstevel@tonic-gate 	}
630*0Sstevel@tonic-gate 
631*0Sstevel@tonic-gate void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
632*0Sstevel@tonic-gate 	{
633*0Sstevel@tonic-gate #ifdef BN_LLONG
634*0Sstevel@tonic-gate 	BN_ULLONG t;
635*0Sstevel@tonic-gate #else
636*0Sstevel@tonic-gate 	BN_ULONG bl,bh;
637*0Sstevel@tonic-gate #endif
638*0Sstevel@tonic-gate 	BN_ULONG t1,t2;
639*0Sstevel@tonic-gate 	BN_ULONG c1,c2,c3;
640*0Sstevel@tonic-gate 
641*0Sstevel@tonic-gate 	c1=0;
642*0Sstevel@tonic-gate 	c2=0;
643*0Sstevel@tonic-gate 	c3=0;
644*0Sstevel@tonic-gate 	mul_add_c(a[0],b[0],c1,c2,c3);
645*0Sstevel@tonic-gate 	r[0]=c1;
646*0Sstevel@tonic-gate 	c1=0;
647*0Sstevel@tonic-gate 	mul_add_c(a[0],b[1],c2,c3,c1);
648*0Sstevel@tonic-gate 	mul_add_c(a[1],b[0],c2,c3,c1);
649*0Sstevel@tonic-gate 	r[1]=c2;
650*0Sstevel@tonic-gate 	c2=0;
651*0Sstevel@tonic-gate 	mul_add_c(a[2],b[0],c3,c1,c2);
652*0Sstevel@tonic-gate 	mul_add_c(a[1],b[1],c3,c1,c2);
653*0Sstevel@tonic-gate 	mul_add_c(a[0],b[2],c3,c1,c2);
654*0Sstevel@tonic-gate 	r[2]=c3;
655*0Sstevel@tonic-gate 	c3=0;
656*0Sstevel@tonic-gate 	mul_add_c(a[0],b[3],c1,c2,c3);
657*0Sstevel@tonic-gate 	mul_add_c(a[1],b[2],c1,c2,c3);
658*0Sstevel@tonic-gate 	mul_add_c(a[2],b[1],c1,c2,c3);
659*0Sstevel@tonic-gate 	mul_add_c(a[3],b[0],c1,c2,c3);
660*0Sstevel@tonic-gate 	r[3]=c1;
661*0Sstevel@tonic-gate 	c1=0;
662*0Sstevel@tonic-gate 	mul_add_c(a[3],b[1],c2,c3,c1);
663*0Sstevel@tonic-gate 	mul_add_c(a[2],b[2],c2,c3,c1);
664*0Sstevel@tonic-gate 	mul_add_c(a[1],b[3],c2,c3,c1);
665*0Sstevel@tonic-gate 	r[4]=c2;
666*0Sstevel@tonic-gate 	c2=0;
667*0Sstevel@tonic-gate 	mul_add_c(a[2],b[3],c3,c1,c2);
668*0Sstevel@tonic-gate 	mul_add_c(a[3],b[2],c3,c1,c2);
669*0Sstevel@tonic-gate 	r[5]=c3;
670*0Sstevel@tonic-gate 	c3=0;
671*0Sstevel@tonic-gate 	mul_add_c(a[3],b[3],c1,c2,c3);
672*0Sstevel@tonic-gate 	r[6]=c1;
673*0Sstevel@tonic-gate 	r[7]=c2;
674*0Sstevel@tonic-gate 	}
675*0Sstevel@tonic-gate 
676*0Sstevel@tonic-gate void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
677*0Sstevel@tonic-gate 	{
678*0Sstevel@tonic-gate #ifdef BN_LLONG
679*0Sstevel@tonic-gate 	BN_ULLONG t,tt;
680*0Sstevel@tonic-gate #else
681*0Sstevel@tonic-gate 	BN_ULONG bl,bh;
682*0Sstevel@tonic-gate #endif
683*0Sstevel@tonic-gate 	BN_ULONG t1,t2;
684*0Sstevel@tonic-gate 	BN_ULONG c1,c2,c3;
685*0Sstevel@tonic-gate 
686*0Sstevel@tonic-gate 	c1=0;
687*0Sstevel@tonic-gate 	c2=0;
688*0Sstevel@tonic-gate 	c3=0;
689*0Sstevel@tonic-gate 	sqr_add_c(a,0,c1,c2,c3);
690*0Sstevel@tonic-gate 	r[0]=c1;
691*0Sstevel@tonic-gate 	c1=0;
692*0Sstevel@tonic-gate 	sqr_add_c2(a,1,0,c2,c3,c1);
693*0Sstevel@tonic-gate 	r[1]=c2;
694*0Sstevel@tonic-gate 	c2=0;
695*0Sstevel@tonic-gate 	sqr_add_c(a,1,c3,c1,c2);
696*0Sstevel@tonic-gate 	sqr_add_c2(a,2,0,c3,c1,c2);
697*0Sstevel@tonic-gate 	r[2]=c3;
698*0Sstevel@tonic-gate 	c3=0;
699*0Sstevel@tonic-gate 	sqr_add_c2(a,3,0,c1,c2,c3);
700*0Sstevel@tonic-gate 	sqr_add_c2(a,2,1,c1,c2,c3);
701*0Sstevel@tonic-gate 	r[3]=c1;
702*0Sstevel@tonic-gate 	c1=0;
703*0Sstevel@tonic-gate 	sqr_add_c(a,2,c2,c3,c1);
704*0Sstevel@tonic-gate 	sqr_add_c2(a,3,1,c2,c3,c1);
705*0Sstevel@tonic-gate 	sqr_add_c2(a,4,0,c2,c3,c1);
706*0Sstevel@tonic-gate 	r[4]=c2;
707*0Sstevel@tonic-gate 	c2=0;
708*0Sstevel@tonic-gate 	sqr_add_c2(a,5,0,c3,c1,c2);
709*0Sstevel@tonic-gate 	sqr_add_c2(a,4,1,c3,c1,c2);
710*0Sstevel@tonic-gate 	sqr_add_c2(a,3,2,c3,c1,c2);
711*0Sstevel@tonic-gate 	r[5]=c3;
712*0Sstevel@tonic-gate 	c3=0;
713*0Sstevel@tonic-gate 	sqr_add_c(a,3,c1,c2,c3);
714*0Sstevel@tonic-gate 	sqr_add_c2(a,4,2,c1,c2,c3);
715*0Sstevel@tonic-gate 	sqr_add_c2(a,5,1,c1,c2,c3);
716*0Sstevel@tonic-gate 	sqr_add_c2(a,6,0,c1,c2,c3);
717*0Sstevel@tonic-gate 	r[6]=c1;
718*0Sstevel@tonic-gate 	c1=0;
719*0Sstevel@tonic-gate 	sqr_add_c2(a,7,0,c2,c3,c1);
720*0Sstevel@tonic-gate 	sqr_add_c2(a,6,1,c2,c3,c1);
721*0Sstevel@tonic-gate 	sqr_add_c2(a,5,2,c2,c3,c1);
722*0Sstevel@tonic-gate 	sqr_add_c2(a,4,3,c2,c3,c1);
723*0Sstevel@tonic-gate 	r[7]=c2;
724*0Sstevel@tonic-gate 	c2=0;
725*0Sstevel@tonic-gate 	sqr_add_c(a,4,c3,c1,c2);
726*0Sstevel@tonic-gate 	sqr_add_c2(a,5,3,c3,c1,c2);
727*0Sstevel@tonic-gate 	sqr_add_c2(a,6,2,c3,c1,c2);
728*0Sstevel@tonic-gate 	sqr_add_c2(a,7,1,c3,c1,c2);
729*0Sstevel@tonic-gate 	r[8]=c3;
730*0Sstevel@tonic-gate 	c3=0;
731*0Sstevel@tonic-gate 	sqr_add_c2(a,7,2,c1,c2,c3);
732*0Sstevel@tonic-gate 	sqr_add_c2(a,6,3,c1,c2,c3);
733*0Sstevel@tonic-gate 	sqr_add_c2(a,5,4,c1,c2,c3);
734*0Sstevel@tonic-gate 	r[9]=c1;
735*0Sstevel@tonic-gate 	c1=0;
736*0Sstevel@tonic-gate 	sqr_add_c(a,5,c2,c3,c1);
737*0Sstevel@tonic-gate 	sqr_add_c2(a,6,4,c2,c3,c1);
738*0Sstevel@tonic-gate 	sqr_add_c2(a,7,3,c2,c3,c1);
739*0Sstevel@tonic-gate 	r[10]=c2;
740*0Sstevel@tonic-gate 	c2=0;
741*0Sstevel@tonic-gate 	sqr_add_c2(a,7,4,c3,c1,c2);
742*0Sstevel@tonic-gate 	sqr_add_c2(a,6,5,c3,c1,c2);
743*0Sstevel@tonic-gate 	r[11]=c3;
744*0Sstevel@tonic-gate 	c3=0;
745*0Sstevel@tonic-gate 	sqr_add_c(a,6,c1,c2,c3);
746*0Sstevel@tonic-gate 	sqr_add_c2(a,7,5,c1,c2,c3);
747*0Sstevel@tonic-gate 	r[12]=c1;
748*0Sstevel@tonic-gate 	c1=0;
749*0Sstevel@tonic-gate 	sqr_add_c2(a,7,6,c2,c3,c1);
750*0Sstevel@tonic-gate 	r[13]=c2;
751*0Sstevel@tonic-gate 	c2=0;
752*0Sstevel@tonic-gate 	sqr_add_c(a,7,c3,c1,c2);
753*0Sstevel@tonic-gate 	r[14]=c3;
754*0Sstevel@tonic-gate 	r[15]=c1;
755*0Sstevel@tonic-gate 	}
756*0Sstevel@tonic-gate 
757*0Sstevel@tonic-gate void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
758*0Sstevel@tonic-gate 	{
759*0Sstevel@tonic-gate #ifdef BN_LLONG
760*0Sstevel@tonic-gate 	BN_ULLONG t,tt;
761*0Sstevel@tonic-gate #else
762*0Sstevel@tonic-gate 	BN_ULONG bl,bh;
763*0Sstevel@tonic-gate #endif
764*0Sstevel@tonic-gate 	BN_ULONG t1,t2;
765*0Sstevel@tonic-gate 	BN_ULONG c1,c2,c3;
766*0Sstevel@tonic-gate 
767*0Sstevel@tonic-gate 	c1=0;
768*0Sstevel@tonic-gate 	c2=0;
769*0Sstevel@tonic-gate 	c3=0;
770*0Sstevel@tonic-gate 	sqr_add_c(a,0,c1,c2,c3);
771*0Sstevel@tonic-gate 	r[0]=c1;
772*0Sstevel@tonic-gate 	c1=0;
773*0Sstevel@tonic-gate 	sqr_add_c2(a,1,0,c2,c3,c1);
774*0Sstevel@tonic-gate 	r[1]=c2;
775*0Sstevel@tonic-gate 	c2=0;
776*0Sstevel@tonic-gate 	sqr_add_c(a,1,c3,c1,c2);
777*0Sstevel@tonic-gate 	sqr_add_c2(a,2,0,c3,c1,c2);
778*0Sstevel@tonic-gate 	r[2]=c3;
779*0Sstevel@tonic-gate 	c3=0;
780*0Sstevel@tonic-gate 	sqr_add_c2(a,3,0,c1,c2,c3);
781*0Sstevel@tonic-gate 	sqr_add_c2(a,2,1,c1,c2,c3);
782*0Sstevel@tonic-gate 	r[3]=c1;
783*0Sstevel@tonic-gate 	c1=0;
784*0Sstevel@tonic-gate 	sqr_add_c(a,2,c2,c3,c1);
785*0Sstevel@tonic-gate 	sqr_add_c2(a,3,1,c2,c3,c1);
786*0Sstevel@tonic-gate 	r[4]=c2;
787*0Sstevel@tonic-gate 	c2=0;
788*0Sstevel@tonic-gate 	sqr_add_c2(a,3,2,c3,c1,c2);
789*0Sstevel@tonic-gate 	r[5]=c3;
790*0Sstevel@tonic-gate 	c3=0;
791*0Sstevel@tonic-gate 	sqr_add_c(a,3,c1,c2,c3);
792*0Sstevel@tonic-gate 	r[6]=c1;
793*0Sstevel@tonic-gate 	r[7]=c2;
794*0Sstevel@tonic-gate 	}
795*0Sstevel@tonic-gate #else /* !BN_MUL_COMBA */
796*0Sstevel@tonic-gate 
797*0Sstevel@tonic-gate /* hmm... is it faster just to do a multiply? */
798*0Sstevel@tonic-gate #undef bn_sqr_comba4
799*0Sstevel@tonic-gate void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
800*0Sstevel@tonic-gate 	{
801*0Sstevel@tonic-gate 	BN_ULONG t[8];
802*0Sstevel@tonic-gate 	bn_sqr_normal(r,a,4,t);
803*0Sstevel@tonic-gate 	}
804*0Sstevel@tonic-gate 
805*0Sstevel@tonic-gate #undef bn_sqr_comba8
806*0Sstevel@tonic-gate void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
807*0Sstevel@tonic-gate 	{
808*0Sstevel@tonic-gate 	BN_ULONG t[16];
809*0Sstevel@tonic-gate 	bn_sqr_normal(r,a,8,t);
810*0Sstevel@tonic-gate 	}
811*0Sstevel@tonic-gate 
812*0Sstevel@tonic-gate void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
813*0Sstevel@tonic-gate 	{
814*0Sstevel@tonic-gate 	r[4]=bn_mul_words(    &(r[0]),a,4,b[0]);
815*0Sstevel@tonic-gate 	r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
816*0Sstevel@tonic-gate 	r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
817*0Sstevel@tonic-gate 	r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
818*0Sstevel@tonic-gate 	}
819*0Sstevel@tonic-gate 
820*0Sstevel@tonic-gate void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
821*0Sstevel@tonic-gate 	{
822*0Sstevel@tonic-gate 	r[ 8]=bn_mul_words(    &(r[0]),a,8,b[0]);
823*0Sstevel@tonic-gate 	r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
824*0Sstevel@tonic-gate 	r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
825*0Sstevel@tonic-gate 	r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
826*0Sstevel@tonic-gate 	r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
827*0Sstevel@tonic-gate 	r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
828*0Sstevel@tonic-gate 	r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
829*0Sstevel@tonic-gate 	r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
830*0Sstevel@tonic-gate 	}
831*0Sstevel@tonic-gate 
832*0Sstevel@tonic-gate #endif /* !BN_MUL_COMBA */
833