xref: /onnv-gate/usr/src/common/openssl/crypto/bn/bn_asm.c (revision 2139:6243c3338933)
10Sstevel@tonic-gate /* crypto/bn/bn_asm.c */
20Sstevel@tonic-gate /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
30Sstevel@tonic-gate  * All rights reserved.
40Sstevel@tonic-gate  *
50Sstevel@tonic-gate  * This package is an SSL implementation written
60Sstevel@tonic-gate  * by Eric Young (eay@cryptsoft.com).
70Sstevel@tonic-gate  * The implementation was written so as to conform with Netscapes SSL.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * This library is free for commercial and non-commercial use as long as
100Sstevel@tonic-gate  * the following conditions are aheared to.  The following conditions
110Sstevel@tonic-gate  * apply to all code found in this distribution, be it the RC4, RSA,
120Sstevel@tonic-gate  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
130Sstevel@tonic-gate  * included with this distribution is covered by the same copyright terms
140Sstevel@tonic-gate  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
150Sstevel@tonic-gate  *
160Sstevel@tonic-gate  * Copyright remains Eric Young's, and as such any Copyright notices in
170Sstevel@tonic-gate  * the code are not to be removed.
180Sstevel@tonic-gate  * If this package is used in a product, Eric Young should be given attribution
190Sstevel@tonic-gate  * as the author of the parts of the library used.
200Sstevel@tonic-gate  * This can be in the form of a textual message at program startup or
210Sstevel@tonic-gate  * in documentation (online or textual) provided with the package.
220Sstevel@tonic-gate  *
230Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
240Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
250Sstevel@tonic-gate  * are met:
260Sstevel@tonic-gate  * 1. Redistributions of source code must retain the copyright
270Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
280Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
290Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
300Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
310Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
320Sstevel@tonic-gate  *    must display the following acknowledgement:
330Sstevel@tonic-gate  *    "This product includes cryptographic software written by
340Sstevel@tonic-gate  *     Eric Young (eay@cryptsoft.com)"
350Sstevel@tonic-gate  *    The word 'cryptographic' can be left out if the rouines from the library
360Sstevel@tonic-gate  *    being used are not cryptographic related :-).
370Sstevel@tonic-gate  * 4. If you include any Windows specific code (or a derivative thereof) from
380Sstevel@tonic-gate  *    the apps directory (application code) you must include an acknowledgement:
390Sstevel@tonic-gate  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
400Sstevel@tonic-gate  *
410Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
420Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
430Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
440Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
450Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
460Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
470Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
480Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
490Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
500Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
510Sstevel@tonic-gate  * SUCH DAMAGE.
520Sstevel@tonic-gate  *
530Sstevel@tonic-gate  * The licence and distribution terms for any publically available version or
540Sstevel@tonic-gate  * derivative of this code cannot be changed.  i.e. this code cannot simply be
550Sstevel@tonic-gate  * copied and put under another distribution licence
560Sstevel@tonic-gate  * [including the GNU Public Licence.]
570Sstevel@tonic-gate  */
580Sstevel@tonic-gate 
590Sstevel@tonic-gate #ifndef BN_DEBUG
600Sstevel@tonic-gate # undef NDEBUG /* avoid conflicting definitions */
610Sstevel@tonic-gate # define NDEBUG
620Sstevel@tonic-gate #endif
630Sstevel@tonic-gate 
640Sstevel@tonic-gate #include <stdio.h>
650Sstevel@tonic-gate #include <assert.h>
660Sstevel@tonic-gate #include "cryptlib.h"
670Sstevel@tonic-gate #include "bn_lcl.h"
680Sstevel@tonic-gate 
690Sstevel@tonic-gate #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
700Sstevel@tonic-gate 
bn_mul_add_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)710Sstevel@tonic-gate BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
720Sstevel@tonic-gate 	{
730Sstevel@tonic-gate 	BN_ULONG c1=0;
740Sstevel@tonic-gate 
750Sstevel@tonic-gate 	assert(num >= 0);
760Sstevel@tonic-gate 	if (num <= 0) return(c1);
770Sstevel@tonic-gate 
780Sstevel@tonic-gate 	while (num&~3)
790Sstevel@tonic-gate 		{
800Sstevel@tonic-gate 		mul_add(rp[0],ap[0],w,c1);
810Sstevel@tonic-gate 		mul_add(rp[1],ap[1],w,c1);
820Sstevel@tonic-gate 		mul_add(rp[2],ap[2],w,c1);
830Sstevel@tonic-gate 		mul_add(rp[3],ap[3],w,c1);
840Sstevel@tonic-gate 		ap+=4; rp+=4; num-=4;
850Sstevel@tonic-gate 		}
860Sstevel@tonic-gate 	if (num)
870Sstevel@tonic-gate 		{
880Sstevel@tonic-gate 		mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
890Sstevel@tonic-gate 		mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
900Sstevel@tonic-gate 		mul_add(rp[2],ap[2],w,c1); return c1;
910Sstevel@tonic-gate 		}
920Sstevel@tonic-gate 
930Sstevel@tonic-gate 	return(c1);
940Sstevel@tonic-gate 	}
950Sstevel@tonic-gate 
bn_mul_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)960Sstevel@tonic-gate BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
970Sstevel@tonic-gate 	{
980Sstevel@tonic-gate 	BN_ULONG c1=0;
990Sstevel@tonic-gate 
1000Sstevel@tonic-gate 	assert(num >= 0);
1010Sstevel@tonic-gate 	if (num <= 0) return(c1);
1020Sstevel@tonic-gate 
1030Sstevel@tonic-gate 	while (num&~3)
1040Sstevel@tonic-gate 		{
1050Sstevel@tonic-gate 		mul(rp[0],ap[0],w,c1);
1060Sstevel@tonic-gate 		mul(rp[1],ap[1],w,c1);
1070Sstevel@tonic-gate 		mul(rp[2],ap[2],w,c1);
1080Sstevel@tonic-gate 		mul(rp[3],ap[3],w,c1);
1090Sstevel@tonic-gate 		ap+=4; rp+=4; num-=4;
1100Sstevel@tonic-gate 		}
1110Sstevel@tonic-gate 	if (num)
1120Sstevel@tonic-gate 		{
1130Sstevel@tonic-gate 		mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
1140Sstevel@tonic-gate 		mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
1150Sstevel@tonic-gate 		mul(rp[2],ap[2],w,c1);
1160Sstevel@tonic-gate 		}
1170Sstevel@tonic-gate 	return(c1);
1180Sstevel@tonic-gate 	}
1190Sstevel@tonic-gate 
bn_sqr_words(BN_ULONG * r,const BN_ULONG * a,int n)1200Sstevel@tonic-gate void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
1210Sstevel@tonic-gate         {
1220Sstevel@tonic-gate 	assert(n >= 0);
1230Sstevel@tonic-gate 	if (n <= 0) return;
1240Sstevel@tonic-gate 	while (n&~3)
1250Sstevel@tonic-gate 		{
1260Sstevel@tonic-gate 		sqr(r[0],r[1],a[0]);
1270Sstevel@tonic-gate 		sqr(r[2],r[3],a[1]);
1280Sstevel@tonic-gate 		sqr(r[4],r[5],a[2]);
1290Sstevel@tonic-gate 		sqr(r[6],r[7],a[3]);
1300Sstevel@tonic-gate 		a+=4; r+=8; n-=4;
1310Sstevel@tonic-gate 		}
1320Sstevel@tonic-gate 	if (n)
1330Sstevel@tonic-gate 		{
1340Sstevel@tonic-gate 		sqr(r[0],r[1],a[0]); if (--n == 0) return;
1350Sstevel@tonic-gate 		sqr(r[2],r[3],a[1]); if (--n == 0) return;
1360Sstevel@tonic-gate 		sqr(r[4],r[5],a[2]);
1370Sstevel@tonic-gate 		}
1380Sstevel@tonic-gate 	}
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
1410Sstevel@tonic-gate 
bn_mul_add_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)1420Sstevel@tonic-gate BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
1430Sstevel@tonic-gate 	{
1440Sstevel@tonic-gate 	BN_ULONG c=0;
1450Sstevel@tonic-gate 	BN_ULONG bl,bh;
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate 	assert(num >= 0);
1480Sstevel@tonic-gate 	if (num <= 0) return((BN_ULONG)0);
1490Sstevel@tonic-gate 
1500Sstevel@tonic-gate 	bl=LBITS(w);
1510Sstevel@tonic-gate 	bh=HBITS(w);
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate 	for (;;)
1540Sstevel@tonic-gate 		{
1550Sstevel@tonic-gate 		mul_add(rp[0],ap[0],bl,bh,c);
1560Sstevel@tonic-gate 		if (--num == 0) break;
1570Sstevel@tonic-gate 		mul_add(rp[1],ap[1],bl,bh,c);
1580Sstevel@tonic-gate 		if (--num == 0) break;
1590Sstevel@tonic-gate 		mul_add(rp[2],ap[2],bl,bh,c);
1600Sstevel@tonic-gate 		if (--num == 0) break;
1610Sstevel@tonic-gate 		mul_add(rp[3],ap[3],bl,bh,c);
1620Sstevel@tonic-gate 		if (--num == 0) break;
1630Sstevel@tonic-gate 		ap+=4;
1640Sstevel@tonic-gate 		rp+=4;
1650Sstevel@tonic-gate 		}
1660Sstevel@tonic-gate 	return(c);
1670Sstevel@tonic-gate 	}
1680Sstevel@tonic-gate 
bn_mul_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)1690Sstevel@tonic-gate BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
1700Sstevel@tonic-gate 	{
1710Sstevel@tonic-gate 	BN_ULONG carry=0;
1720Sstevel@tonic-gate 	BN_ULONG bl,bh;
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate 	assert(num >= 0);
1750Sstevel@tonic-gate 	if (num <= 0) return((BN_ULONG)0);
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	bl=LBITS(w);
1780Sstevel@tonic-gate 	bh=HBITS(w);
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 	for (;;)
1810Sstevel@tonic-gate 		{
1820Sstevel@tonic-gate 		mul(rp[0],ap[0],bl,bh,carry);
1830Sstevel@tonic-gate 		if (--num == 0) break;
1840Sstevel@tonic-gate 		mul(rp[1],ap[1],bl,bh,carry);
1850Sstevel@tonic-gate 		if (--num == 0) break;
1860Sstevel@tonic-gate 		mul(rp[2],ap[2],bl,bh,carry);
1870Sstevel@tonic-gate 		if (--num == 0) break;
1880Sstevel@tonic-gate 		mul(rp[3],ap[3],bl,bh,carry);
1890Sstevel@tonic-gate 		if (--num == 0) break;
1900Sstevel@tonic-gate 		ap+=4;
1910Sstevel@tonic-gate 		rp+=4;
1920Sstevel@tonic-gate 		}
1930Sstevel@tonic-gate 	return(carry);
1940Sstevel@tonic-gate 	}
1950Sstevel@tonic-gate 
bn_sqr_words(BN_ULONG * r,const BN_ULONG * a,int n)1960Sstevel@tonic-gate void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
1970Sstevel@tonic-gate         {
1980Sstevel@tonic-gate 	assert(n >= 0);
1990Sstevel@tonic-gate 	if (n <= 0) return;
2000Sstevel@tonic-gate 	for (;;)
2010Sstevel@tonic-gate 		{
2020Sstevel@tonic-gate 		sqr64(r[0],r[1],a[0]);
2030Sstevel@tonic-gate 		if (--n == 0) break;
2040Sstevel@tonic-gate 
2050Sstevel@tonic-gate 		sqr64(r[2],r[3],a[1]);
2060Sstevel@tonic-gate 		if (--n == 0) break;
2070Sstevel@tonic-gate 
2080Sstevel@tonic-gate 		sqr64(r[4],r[5],a[2]);
2090Sstevel@tonic-gate 		if (--n == 0) break;
2100Sstevel@tonic-gate 
2110Sstevel@tonic-gate 		sqr64(r[6],r[7],a[3]);
2120Sstevel@tonic-gate 		if (--n == 0) break;
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate 		a+=4;
2150Sstevel@tonic-gate 		r+=8;
2160Sstevel@tonic-gate 		}
2170Sstevel@tonic-gate 	}
2180Sstevel@tonic-gate 
2190Sstevel@tonic-gate #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate #if defined(BN_LLONG) && defined(BN_DIV2W)
2220Sstevel@tonic-gate 
bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d)2230Sstevel@tonic-gate BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
2240Sstevel@tonic-gate 	{
2250Sstevel@tonic-gate 	return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
2260Sstevel@tonic-gate 	}
2270Sstevel@tonic-gate 
2280Sstevel@tonic-gate #else
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate /* Divide h,l by d and return the result. */
2310Sstevel@tonic-gate /* I need to test this some more :-( */
bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d)2320Sstevel@tonic-gate BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
2330Sstevel@tonic-gate 	{
2340Sstevel@tonic-gate 	BN_ULONG dh,dl,q,ret=0,th,tl,t;
2350Sstevel@tonic-gate 	int i,count=2;
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	if (d == 0) return(BN_MASK2);
2380Sstevel@tonic-gate 
2390Sstevel@tonic-gate 	i=BN_num_bits_word(d);
240*2139Sjp161948 	assert((i == BN_BITS2) || (h <= (BN_ULONG)1<<i));
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 	i=BN_BITS2-i;
2430Sstevel@tonic-gate 	if (h >= d) h-=d;
2440Sstevel@tonic-gate 
2450Sstevel@tonic-gate 	if (i)
2460Sstevel@tonic-gate 		{
2470Sstevel@tonic-gate 		d<<=i;
2480Sstevel@tonic-gate 		h=(h<<i)|(l>>(BN_BITS2-i));
2490Sstevel@tonic-gate 		l<<=i;
2500Sstevel@tonic-gate 		}
2510Sstevel@tonic-gate 	dh=(d&BN_MASK2h)>>BN_BITS4;
2520Sstevel@tonic-gate 	dl=(d&BN_MASK2l);
2530Sstevel@tonic-gate 	for (;;)
2540Sstevel@tonic-gate 		{
2550Sstevel@tonic-gate 		if ((h>>BN_BITS4) == dh)
2560Sstevel@tonic-gate 			q=BN_MASK2l;
2570Sstevel@tonic-gate 		else
2580Sstevel@tonic-gate 			q=h/dh;
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate 		th=q*dh;
2610Sstevel@tonic-gate 		tl=dl*q;
2620Sstevel@tonic-gate 		for (;;)
2630Sstevel@tonic-gate 			{
2640Sstevel@tonic-gate 			t=h-th;
2650Sstevel@tonic-gate 			if ((t&BN_MASK2h) ||
2660Sstevel@tonic-gate 				((tl) <= (
2670Sstevel@tonic-gate 					(t<<BN_BITS4)|
2680Sstevel@tonic-gate 					((l&BN_MASK2h)>>BN_BITS4))))
2690Sstevel@tonic-gate 				break;
2700Sstevel@tonic-gate 			q--;
2710Sstevel@tonic-gate 			th-=dh;
2720Sstevel@tonic-gate 			tl-=dl;
2730Sstevel@tonic-gate 			}
2740Sstevel@tonic-gate 		t=(tl>>BN_BITS4);
2750Sstevel@tonic-gate 		tl=(tl<<BN_BITS4)&BN_MASK2h;
2760Sstevel@tonic-gate 		th+=t;
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 		if (l < tl) th++;
2790Sstevel@tonic-gate 		l-=tl;
2800Sstevel@tonic-gate 		if (h < th)
2810Sstevel@tonic-gate 			{
2820Sstevel@tonic-gate 			h+=d;
2830Sstevel@tonic-gate 			q--;
2840Sstevel@tonic-gate 			}
2850Sstevel@tonic-gate 		h-=th;
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate 		if (--count == 0) break;
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate 		ret=q<<BN_BITS4;
2900Sstevel@tonic-gate 		h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
2910Sstevel@tonic-gate 		l=(l&BN_MASK2l)<<BN_BITS4;
2920Sstevel@tonic-gate 		}
2930Sstevel@tonic-gate 	ret|=q;
2940Sstevel@tonic-gate 	return(ret);
2950Sstevel@tonic-gate 	}
2960Sstevel@tonic-gate #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate #ifdef BN_LLONG
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)2990Sstevel@tonic-gate BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
3000Sstevel@tonic-gate         {
3010Sstevel@tonic-gate 	BN_ULLONG ll=0;
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate 	assert(n >= 0);
3040Sstevel@tonic-gate 	if (n <= 0) return((BN_ULONG)0);
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate 	for (;;)
3070Sstevel@tonic-gate 		{
3080Sstevel@tonic-gate 		ll+=(BN_ULLONG)a[0]+b[0];
3090Sstevel@tonic-gate 		r[0]=(BN_ULONG)ll&BN_MASK2;
3100Sstevel@tonic-gate 		ll>>=BN_BITS2;
3110Sstevel@tonic-gate 		if (--n <= 0) break;
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 		ll+=(BN_ULLONG)a[1]+b[1];
3140Sstevel@tonic-gate 		r[1]=(BN_ULONG)ll&BN_MASK2;
3150Sstevel@tonic-gate 		ll>>=BN_BITS2;
3160Sstevel@tonic-gate 		if (--n <= 0) break;
3170Sstevel@tonic-gate 
3180Sstevel@tonic-gate 		ll+=(BN_ULLONG)a[2]+b[2];
3190Sstevel@tonic-gate 		r[2]=(BN_ULONG)ll&BN_MASK2;
3200Sstevel@tonic-gate 		ll>>=BN_BITS2;
3210Sstevel@tonic-gate 		if (--n <= 0) break;
3220Sstevel@tonic-gate 
3230Sstevel@tonic-gate 		ll+=(BN_ULLONG)a[3]+b[3];
3240Sstevel@tonic-gate 		r[3]=(BN_ULONG)ll&BN_MASK2;
3250Sstevel@tonic-gate 		ll>>=BN_BITS2;
3260Sstevel@tonic-gate 		if (--n <= 0) break;
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate 		a+=4;
3290Sstevel@tonic-gate 		b+=4;
3300Sstevel@tonic-gate 		r+=4;
3310Sstevel@tonic-gate 		}
3320Sstevel@tonic-gate 	return((BN_ULONG)ll);
3330Sstevel@tonic-gate 	}
3340Sstevel@tonic-gate #else /* !BN_LLONG */
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)3350Sstevel@tonic-gate BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
3360Sstevel@tonic-gate         {
3370Sstevel@tonic-gate 	BN_ULONG c,l,t;
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate 	assert(n >= 0);
3400Sstevel@tonic-gate 	if (n <= 0) return((BN_ULONG)0);
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate 	c=0;
3430Sstevel@tonic-gate 	for (;;)
3440Sstevel@tonic-gate 		{
3450Sstevel@tonic-gate 		t=a[0];
3460Sstevel@tonic-gate 		t=(t+c)&BN_MASK2;
3470Sstevel@tonic-gate 		c=(t < c);
3480Sstevel@tonic-gate 		l=(t+b[0])&BN_MASK2;
3490Sstevel@tonic-gate 		c+=(l < t);
3500Sstevel@tonic-gate 		r[0]=l;
3510Sstevel@tonic-gate 		if (--n <= 0) break;
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate 		t=a[1];
3540Sstevel@tonic-gate 		t=(t+c)&BN_MASK2;
3550Sstevel@tonic-gate 		c=(t < c);
3560Sstevel@tonic-gate 		l=(t+b[1])&BN_MASK2;
3570Sstevel@tonic-gate 		c+=(l < t);
3580Sstevel@tonic-gate 		r[1]=l;
3590Sstevel@tonic-gate 		if (--n <= 0) break;
3600Sstevel@tonic-gate 
3610Sstevel@tonic-gate 		t=a[2];
3620Sstevel@tonic-gate 		t=(t+c)&BN_MASK2;
3630Sstevel@tonic-gate 		c=(t < c);
3640Sstevel@tonic-gate 		l=(t+b[2])&BN_MASK2;
3650Sstevel@tonic-gate 		c+=(l < t);
3660Sstevel@tonic-gate 		r[2]=l;
3670Sstevel@tonic-gate 		if (--n <= 0) break;
3680Sstevel@tonic-gate 
3690Sstevel@tonic-gate 		t=a[3];
3700Sstevel@tonic-gate 		t=(t+c)&BN_MASK2;
3710Sstevel@tonic-gate 		c=(t < c);
3720Sstevel@tonic-gate 		l=(t+b[3])&BN_MASK2;
3730Sstevel@tonic-gate 		c+=(l < t);
3740Sstevel@tonic-gate 		r[3]=l;
3750Sstevel@tonic-gate 		if (--n <= 0) break;
3760Sstevel@tonic-gate 
3770Sstevel@tonic-gate 		a+=4;
3780Sstevel@tonic-gate 		b+=4;
3790Sstevel@tonic-gate 		r+=4;
3800Sstevel@tonic-gate 		}
3810Sstevel@tonic-gate 	return((BN_ULONG)c);
3820Sstevel@tonic-gate 	}
3830Sstevel@tonic-gate #endif /* !BN_LLONG */
3840Sstevel@tonic-gate 
bn_sub_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)3850Sstevel@tonic-gate BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
3860Sstevel@tonic-gate         {
3870Sstevel@tonic-gate 	BN_ULONG t1,t2;
3880Sstevel@tonic-gate 	int c=0;
3890Sstevel@tonic-gate 
3900Sstevel@tonic-gate 	assert(n >= 0);
3910Sstevel@tonic-gate 	if (n <= 0) return((BN_ULONG)0);
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate 	for (;;)
3940Sstevel@tonic-gate 		{
3950Sstevel@tonic-gate 		t1=a[0]; t2=b[0];
3960Sstevel@tonic-gate 		r[0]=(t1-t2-c)&BN_MASK2;
3970Sstevel@tonic-gate 		if (t1 != t2) c=(t1 < t2);
3980Sstevel@tonic-gate 		if (--n <= 0) break;
3990Sstevel@tonic-gate 
4000Sstevel@tonic-gate 		t1=a[1]; t2=b[1];
4010Sstevel@tonic-gate 		r[1]=(t1-t2-c)&BN_MASK2;
4020Sstevel@tonic-gate 		if (t1 != t2) c=(t1 < t2);
4030Sstevel@tonic-gate 		if (--n <= 0) break;
4040Sstevel@tonic-gate 
4050Sstevel@tonic-gate 		t1=a[2]; t2=b[2];
4060Sstevel@tonic-gate 		r[2]=(t1-t2-c)&BN_MASK2;
4070Sstevel@tonic-gate 		if (t1 != t2) c=(t1 < t2);
4080Sstevel@tonic-gate 		if (--n <= 0) break;
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 		t1=a[3]; t2=b[3];
4110Sstevel@tonic-gate 		r[3]=(t1-t2-c)&BN_MASK2;
4120Sstevel@tonic-gate 		if (t1 != t2) c=(t1 < t2);
4130Sstevel@tonic-gate 		if (--n <= 0) break;
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 		a+=4;
4160Sstevel@tonic-gate 		b+=4;
4170Sstevel@tonic-gate 		r+=4;
4180Sstevel@tonic-gate 		}
4190Sstevel@tonic-gate 	return(c);
4200Sstevel@tonic-gate 	}
4210Sstevel@tonic-gate 
4220Sstevel@tonic-gate #ifdef BN_MUL_COMBA
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate #undef bn_mul_comba8
4250Sstevel@tonic-gate #undef bn_mul_comba4
4260Sstevel@tonic-gate #undef bn_sqr_comba8
4270Sstevel@tonic-gate #undef bn_sqr_comba4
4280Sstevel@tonic-gate 
4290Sstevel@tonic-gate /* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
4300Sstevel@tonic-gate /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
4310Sstevel@tonic-gate /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
4320Sstevel@tonic-gate /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate #ifdef BN_LLONG
4350Sstevel@tonic-gate #define mul_add_c(a,b,c0,c1,c2) \
4360Sstevel@tonic-gate 	t=(BN_ULLONG)a*b; \
4370Sstevel@tonic-gate 	t1=(BN_ULONG)Lw(t); \
4380Sstevel@tonic-gate 	t2=(BN_ULONG)Hw(t); \
4390Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
4400Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
4410Sstevel@tonic-gate 
4420Sstevel@tonic-gate #define mul_add_c2(a,b,c0,c1,c2) \
4430Sstevel@tonic-gate 	t=(BN_ULLONG)a*b; \
4440Sstevel@tonic-gate 	tt=(t+t)&BN_MASK; \
4450Sstevel@tonic-gate 	if (tt < t) c2++; \
4460Sstevel@tonic-gate 	t1=(BN_ULONG)Lw(tt); \
4470Sstevel@tonic-gate 	t2=(BN_ULONG)Hw(tt); \
4480Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2;  \
4490Sstevel@tonic-gate 	if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
4500Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate #define sqr_add_c(a,i,c0,c1,c2) \
4530Sstevel@tonic-gate 	t=(BN_ULLONG)a[i]*a[i]; \
4540Sstevel@tonic-gate 	t1=(BN_ULONG)Lw(t); \
4550Sstevel@tonic-gate 	t2=(BN_ULONG)Hw(t); \
4560Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
4570Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate #define sqr_add_c2(a,i,j,c0,c1,c2) \
4600Sstevel@tonic-gate 	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
4610Sstevel@tonic-gate 
462*2139Sjp161948 #elif defined(BN_UMULT_LOHI)
463*2139Sjp161948 
464*2139Sjp161948 #define mul_add_c(a,b,c0,c1,c2)	{	\
465*2139Sjp161948 	BN_ULONG ta=(a),tb=(b);		\
466*2139Sjp161948 	BN_UMULT_LOHI(t1,t2,ta,tb);	\
467*2139Sjp161948 	c0 += t1; t2 += (c0<t1)?1:0;	\
468*2139Sjp161948 	c1 += t2; c2 += (c1<t2)?1:0;	\
469*2139Sjp161948 	}
470*2139Sjp161948 
471*2139Sjp161948 #define mul_add_c2(a,b,c0,c1,c2) {	\
472*2139Sjp161948 	BN_ULONG ta=(a),tb=(b),t0;	\
473*2139Sjp161948 	BN_UMULT_LOHI(t0,t1,ta,tb);	\
474*2139Sjp161948 	t2 = t1+t1; c2 += (t2<t1)?1:0;	\
475*2139Sjp161948 	t1 = t0+t0; t2 += (t1<t0)?1:0;	\
476*2139Sjp161948 	c0 += t1; t2 += (c0<t1)?1:0;	\
477*2139Sjp161948 	c1 += t2; c2 += (c1<t2)?1:0;	\
478*2139Sjp161948 	}
479*2139Sjp161948 
480*2139Sjp161948 #define sqr_add_c(a,i,c0,c1,c2)	{	\
481*2139Sjp161948 	BN_ULONG ta=(a)[i];		\
482*2139Sjp161948 	BN_UMULT_LOHI(t1,t2,ta,ta);	\
483*2139Sjp161948 	c0 += t1; t2 += (c0<t1)?1:0;	\
484*2139Sjp161948 	c1 += t2; c2 += (c1<t2)?1:0;	\
485*2139Sjp161948 	}
486*2139Sjp161948 
487*2139Sjp161948 #define sqr_add_c2(a,i,j,c0,c1,c2)	\
488*2139Sjp161948 	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
489*2139Sjp161948 
4900Sstevel@tonic-gate #elif defined(BN_UMULT_HIGH)
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate #define mul_add_c(a,b,c0,c1,c2)	{	\
4930Sstevel@tonic-gate 	BN_ULONG ta=(a),tb=(b);		\
4940Sstevel@tonic-gate 	t1 = ta * tb;			\
4950Sstevel@tonic-gate 	t2 = BN_UMULT_HIGH(ta,tb);	\
4960Sstevel@tonic-gate 	c0 += t1; t2 += (c0<t1)?1:0;	\
4970Sstevel@tonic-gate 	c1 += t2; c2 += (c1<t2)?1:0;	\
4980Sstevel@tonic-gate 	}
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate #define mul_add_c2(a,b,c0,c1,c2) {	\
5010Sstevel@tonic-gate 	BN_ULONG ta=(a),tb=(b),t0;	\
5020Sstevel@tonic-gate 	t1 = BN_UMULT_HIGH(ta,tb);	\
5030Sstevel@tonic-gate 	t0 = ta * tb;			\
5040Sstevel@tonic-gate 	t2 = t1+t1; c2 += (t2<t1)?1:0;	\
5050Sstevel@tonic-gate 	t1 = t0+t0; t2 += (t1<t0)?1:0;	\
5060Sstevel@tonic-gate 	c0 += t1; t2 += (c0<t1)?1:0;	\
5070Sstevel@tonic-gate 	c1 += t2; c2 += (c1<t2)?1:0;	\
5080Sstevel@tonic-gate 	}
5090Sstevel@tonic-gate 
5100Sstevel@tonic-gate #define sqr_add_c(a,i,c0,c1,c2)	{	\
5110Sstevel@tonic-gate 	BN_ULONG ta=(a)[i];		\
5120Sstevel@tonic-gate 	t1 = ta * ta;			\
5130Sstevel@tonic-gate 	t2 = BN_UMULT_HIGH(ta,ta);	\
5140Sstevel@tonic-gate 	c0 += t1; t2 += (c0<t1)?1:0;	\
5150Sstevel@tonic-gate 	c1 += t2; c2 += (c1<t2)?1:0;	\
5160Sstevel@tonic-gate 	}
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate #define sqr_add_c2(a,i,j,c0,c1,c2)	\
5190Sstevel@tonic-gate 	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
5200Sstevel@tonic-gate 
5210Sstevel@tonic-gate #else /* !BN_LLONG */
5220Sstevel@tonic-gate #define mul_add_c(a,b,c0,c1,c2) \
5230Sstevel@tonic-gate 	t1=LBITS(a); t2=HBITS(a); \
5240Sstevel@tonic-gate 	bl=LBITS(b); bh=HBITS(b); \
5250Sstevel@tonic-gate 	mul64(t1,t2,bl,bh); \
5260Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
5270Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate #define mul_add_c2(a,b,c0,c1,c2) \
5300Sstevel@tonic-gate 	t1=LBITS(a); t2=HBITS(a); \
5310Sstevel@tonic-gate 	bl=LBITS(b); bh=HBITS(b); \
5320Sstevel@tonic-gate 	mul64(t1,t2,bl,bh); \
5330Sstevel@tonic-gate 	if (t2 & BN_TBIT) c2++; \
5340Sstevel@tonic-gate 	t2=(t2+t2)&BN_MASK2; \
5350Sstevel@tonic-gate 	if (t1 & BN_TBIT) t2++; \
5360Sstevel@tonic-gate 	t1=(t1+t1)&BN_MASK2; \
5370Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2;  \
5380Sstevel@tonic-gate 	if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
5390Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate #define sqr_add_c(a,i,c0,c1,c2) \
5420Sstevel@tonic-gate 	sqr64(t1,t2,(a)[i]); \
5430Sstevel@tonic-gate 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
5440Sstevel@tonic-gate 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate #define sqr_add_c2(a,i,j,c0,c1,c2) \
5470Sstevel@tonic-gate 	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
5480Sstevel@tonic-gate #endif /* !BN_LLONG */
5490Sstevel@tonic-gate 
bn_mul_comba8(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)5500Sstevel@tonic-gate void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
5510Sstevel@tonic-gate 	{
5520Sstevel@tonic-gate #ifdef BN_LLONG
5530Sstevel@tonic-gate 	BN_ULLONG t;
5540Sstevel@tonic-gate #else
5550Sstevel@tonic-gate 	BN_ULONG bl,bh;
5560Sstevel@tonic-gate #endif
5570Sstevel@tonic-gate 	BN_ULONG t1,t2;
5580Sstevel@tonic-gate 	BN_ULONG c1,c2,c3;
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 	c1=0;
5610Sstevel@tonic-gate 	c2=0;
5620Sstevel@tonic-gate 	c3=0;
5630Sstevel@tonic-gate 	mul_add_c(a[0],b[0],c1,c2,c3);
5640Sstevel@tonic-gate 	r[0]=c1;
5650Sstevel@tonic-gate 	c1=0;
5660Sstevel@tonic-gate 	mul_add_c(a[0],b[1],c2,c3,c1);
5670Sstevel@tonic-gate 	mul_add_c(a[1],b[0],c2,c3,c1);
5680Sstevel@tonic-gate 	r[1]=c2;
5690Sstevel@tonic-gate 	c2=0;
5700Sstevel@tonic-gate 	mul_add_c(a[2],b[0],c3,c1,c2);
5710Sstevel@tonic-gate 	mul_add_c(a[1],b[1],c3,c1,c2);
5720Sstevel@tonic-gate 	mul_add_c(a[0],b[2],c3,c1,c2);
5730Sstevel@tonic-gate 	r[2]=c3;
5740Sstevel@tonic-gate 	c3=0;
5750Sstevel@tonic-gate 	mul_add_c(a[0],b[3],c1,c2,c3);
5760Sstevel@tonic-gate 	mul_add_c(a[1],b[2],c1,c2,c3);
5770Sstevel@tonic-gate 	mul_add_c(a[2],b[1],c1,c2,c3);
5780Sstevel@tonic-gate 	mul_add_c(a[3],b[0],c1,c2,c3);
5790Sstevel@tonic-gate 	r[3]=c1;
5800Sstevel@tonic-gate 	c1=0;
5810Sstevel@tonic-gate 	mul_add_c(a[4],b[0],c2,c3,c1);
5820Sstevel@tonic-gate 	mul_add_c(a[3],b[1],c2,c3,c1);
5830Sstevel@tonic-gate 	mul_add_c(a[2],b[2],c2,c3,c1);
5840Sstevel@tonic-gate 	mul_add_c(a[1],b[3],c2,c3,c1);
5850Sstevel@tonic-gate 	mul_add_c(a[0],b[4],c2,c3,c1);
5860Sstevel@tonic-gate 	r[4]=c2;
5870Sstevel@tonic-gate 	c2=0;
5880Sstevel@tonic-gate 	mul_add_c(a[0],b[5],c3,c1,c2);
5890Sstevel@tonic-gate 	mul_add_c(a[1],b[4],c3,c1,c2);
5900Sstevel@tonic-gate 	mul_add_c(a[2],b[3],c3,c1,c2);
5910Sstevel@tonic-gate 	mul_add_c(a[3],b[2],c3,c1,c2);
5920Sstevel@tonic-gate 	mul_add_c(a[4],b[1],c3,c1,c2);
5930Sstevel@tonic-gate 	mul_add_c(a[5],b[0],c3,c1,c2);
5940Sstevel@tonic-gate 	r[5]=c3;
5950Sstevel@tonic-gate 	c3=0;
5960Sstevel@tonic-gate 	mul_add_c(a[6],b[0],c1,c2,c3);
5970Sstevel@tonic-gate 	mul_add_c(a[5],b[1],c1,c2,c3);
5980Sstevel@tonic-gate 	mul_add_c(a[4],b[2],c1,c2,c3);
5990Sstevel@tonic-gate 	mul_add_c(a[3],b[3],c1,c2,c3);
6000Sstevel@tonic-gate 	mul_add_c(a[2],b[4],c1,c2,c3);
6010Sstevel@tonic-gate 	mul_add_c(a[1],b[5],c1,c2,c3);
6020Sstevel@tonic-gate 	mul_add_c(a[0],b[6],c1,c2,c3);
6030Sstevel@tonic-gate 	r[6]=c1;
6040Sstevel@tonic-gate 	c1=0;
6050Sstevel@tonic-gate 	mul_add_c(a[0],b[7],c2,c3,c1);
6060Sstevel@tonic-gate 	mul_add_c(a[1],b[6],c2,c3,c1);
6070Sstevel@tonic-gate 	mul_add_c(a[2],b[5],c2,c3,c1);
6080Sstevel@tonic-gate 	mul_add_c(a[3],b[4],c2,c3,c1);
6090Sstevel@tonic-gate 	mul_add_c(a[4],b[3],c2,c3,c1);
6100Sstevel@tonic-gate 	mul_add_c(a[5],b[2],c2,c3,c1);
6110Sstevel@tonic-gate 	mul_add_c(a[6],b[1],c2,c3,c1);
6120Sstevel@tonic-gate 	mul_add_c(a[7],b[0],c2,c3,c1);
6130Sstevel@tonic-gate 	r[7]=c2;
6140Sstevel@tonic-gate 	c2=0;
6150Sstevel@tonic-gate 	mul_add_c(a[7],b[1],c3,c1,c2);
6160Sstevel@tonic-gate 	mul_add_c(a[6],b[2],c3,c1,c2);
6170Sstevel@tonic-gate 	mul_add_c(a[5],b[3],c3,c1,c2);
6180Sstevel@tonic-gate 	mul_add_c(a[4],b[4],c3,c1,c2);
6190Sstevel@tonic-gate 	mul_add_c(a[3],b[5],c3,c1,c2);
6200Sstevel@tonic-gate 	mul_add_c(a[2],b[6],c3,c1,c2);
6210Sstevel@tonic-gate 	mul_add_c(a[1],b[7],c3,c1,c2);
6220Sstevel@tonic-gate 	r[8]=c3;
6230Sstevel@tonic-gate 	c3=0;
6240Sstevel@tonic-gate 	mul_add_c(a[2],b[7],c1,c2,c3);
6250Sstevel@tonic-gate 	mul_add_c(a[3],b[6],c1,c2,c3);
6260Sstevel@tonic-gate 	mul_add_c(a[4],b[5],c1,c2,c3);
6270Sstevel@tonic-gate 	mul_add_c(a[5],b[4],c1,c2,c3);
6280Sstevel@tonic-gate 	mul_add_c(a[6],b[3],c1,c2,c3);
6290Sstevel@tonic-gate 	mul_add_c(a[7],b[2],c1,c2,c3);
6300Sstevel@tonic-gate 	r[9]=c1;
6310Sstevel@tonic-gate 	c1=0;
6320Sstevel@tonic-gate 	mul_add_c(a[7],b[3],c2,c3,c1);
6330Sstevel@tonic-gate 	mul_add_c(a[6],b[4],c2,c3,c1);
6340Sstevel@tonic-gate 	mul_add_c(a[5],b[5],c2,c3,c1);
6350Sstevel@tonic-gate 	mul_add_c(a[4],b[6],c2,c3,c1);
6360Sstevel@tonic-gate 	mul_add_c(a[3],b[7],c2,c3,c1);
6370Sstevel@tonic-gate 	r[10]=c2;
6380Sstevel@tonic-gate 	c2=0;
6390Sstevel@tonic-gate 	mul_add_c(a[4],b[7],c3,c1,c2);
6400Sstevel@tonic-gate 	mul_add_c(a[5],b[6],c3,c1,c2);
6410Sstevel@tonic-gate 	mul_add_c(a[6],b[5],c3,c1,c2);
6420Sstevel@tonic-gate 	mul_add_c(a[7],b[4],c3,c1,c2);
6430Sstevel@tonic-gate 	r[11]=c3;
6440Sstevel@tonic-gate 	c3=0;
6450Sstevel@tonic-gate 	mul_add_c(a[7],b[5],c1,c2,c3);
6460Sstevel@tonic-gate 	mul_add_c(a[6],b[6],c1,c2,c3);
6470Sstevel@tonic-gate 	mul_add_c(a[5],b[7],c1,c2,c3);
6480Sstevel@tonic-gate 	r[12]=c1;
6490Sstevel@tonic-gate 	c1=0;
6500Sstevel@tonic-gate 	mul_add_c(a[6],b[7],c2,c3,c1);
6510Sstevel@tonic-gate 	mul_add_c(a[7],b[6],c2,c3,c1);
6520Sstevel@tonic-gate 	r[13]=c2;
6530Sstevel@tonic-gate 	c2=0;
6540Sstevel@tonic-gate 	mul_add_c(a[7],b[7],c3,c1,c2);
6550Sstevel@tonic-gate 	r[14]=c3;
6560Sstevel@tonic-gate 	r[15]=c1;
6570Sstevel@tonic-gate 	}
6580Sstevel@tonic-gate 
bn_mul_comba4(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)6590Sstevel@tonic-gate void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
6600Sstevel@tonic-gate 	{
6610Sstevel@tonic-gate #ifdef BN_LLONG
6620Sstevel@tonic-gate 	BN_ULLONG t;
6630Sstevel@tonic-gate #else
6640Sstevel@tonic-gate 	BN_ULONG bl,bh;
6650Sstevel@tonic-gate #endif
6660Sstevel@tonic-gate 	BN_ULONG t1,t2;
6670Sstevel@tonic-gate 	BN_ULONG c1,c2,c3;
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate 	c1=0;
6700Sstevel@tonic-gate 	c2=0;
6710Sstevel@tonic-gate 	c3=0;
6720Sstevel@tonic-gate 	mul_add_c(a[0],b[0],c1,c2,c3);
6730Sstevel@tonic-gate 	r[0]=c1;
6740Sstevel@tonic-gate 	c1=0;
6750Sstevel@tonic-gate 	mul_add_c(a[0],b[1],c2,c3,c1);
6760Sstevel@tonic-gate 	mul_add_c(a[1],b[0],c2,c3,c1);
6770Sstevel@tonic-gate 	r[1]=c2;
6780Sstevel@tonic-gate 	c2=0;
6790Sstevel@tonic-gate 	mul_add_c(a[2],b[0],c3,c1,c2);
6800Sstevel@tonic-gate 	mul_add_c(a[1],b[1],c3,c1,c2);
6810Sstevel@tonic-gate 	mul_add_c(a[0],b[2],c3,c1,c2);
6820Sstevel@tonic-gate 	r[2]=c3;
6830Sstevel@tonic-gate 	c3=0;
6840Sstevel@tonic-gate 	mul_add_c(a[0],b[3],c1,c2,c3);
6850Sstevel@tonic-gate 	mul_add_c(a[1],b[2],c1,c2,c3);
6860Sstevel@tonic-gate 	mul_add_c(a[2],b[1],c1,c2,c3);
6870Sstevel@tonic-gate 	mul_add_c(a[3],b[0],c1,c2,c3);
6880Sstevel@tonic-gate 	r[3]=c1;
6890Sstevel@tonic-gate 	c1=0;
6900Sstevel@tonic-gate 	mul_add_c(a[3],b[1],c2,c3,c1);
6910Sstevel@tonic-gate 	mul_add_c(a[2],b[2],c2,c3,c1);
6920Sstevel@tonic-gate 	mul_add_c(a[1],b[3],c2,c3,c1);
6930Sstevel@tonic-gate 	r[4]=c2;
6940Sstevel@tonic-gate 	c2=0;
6950Sstevel@tonic-gate 	mul_add_c(a[2],b[3],c3,c1,c2);
6960Sstevel@tonic-gate 	mul_add_c(a[3],b[2],c3,c1,c2);
6970Sstevel@tonic-gate 	r[5]=c3;
6980Sstevel@tonic-gate 	c3=0;
6990Sstevel@tonic-gate 	mul_add_c(a[3],b[3],c1,c2,c3);
7000Sstevel@tonic-gate 	r[6]=c1;
7010Sstevel@tonic-gate 	r[7]=c2;
7020Sstevel@tonic-gate 	}
7030Sstevel@tonic-gate 
bn_sqr_comba8(BN_ULONG * r,const BN_ULONG * a)7040Sstevel@tonic-gate void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
7050Sstevel@tonic-gate 	{
7060Sstevel@tonic-gate #ifdef BN_LLONG
7070Sstevel@tonic-gate 	BN_ULLONG t,tt;
7080Sstevel@tonic-gate #else
7090Sstevel@tonic-gate 	BN_ULONG bl,bh;
7100Sstevel@tonic-gate #endif
7110Sstevel@tonic-gate 	BN_ULONG t1,t2;
7120Sstevel@tonic-gate 	BN_ULONG c1,c2,c3;
7130Sstevel@tonic-gate 
7140Sstevel@tonic-gate 	c1=0;
7150Sstevel@tonic-gate 	c2=0;
7160Sstevel@tonic-gate 	c3=0;
7170Sstevel@tonic-gate 	sqr_add_c(a,0,c1,c2,c3);
7180Sstevel@tonic-gate 	r[0]=c1;
7190Sstevel@tonic-gate 	c1=0;
7200Sstevel@tonic-gate 	sqr_add_c2(a,1,0,c2,c3,c1);
7210Sstevel@tonic-gate 	r[1]=c2;
7220Sstevel@tonic-gate 	c2=0;
7230Sstevel@tonic-gate 	sqr_add_c(a,1,c3,c1,c2);
7240Sstevel@tonic-gate 	sqr_add_c2(a,2,0,c3,c1,c2);
7250Sstevel@tonic-gate 	r[2]=c3;
7260Sstevel@tonic-gate 	c3=0;
7270Sstevel@tonic-gate 	sqr_add_c2(a,3,0,c1,c2,c3);
7280Sstevel@tonic-gate 	sqr_add_c2(a,2,1,c1,c2,c3);
7290Sstevel@tonic-gate 	r[3]=c1;
7300Sstevel@tonic-gate 	c1=0;
7310Sstevel@tonic-gate 	sqr_add_c(a,2,c2,c3,c1);
7320Sstevel@tonic-gate 	sqr_add_c2(a,3,1,c2,c3,c1);
7330Sstevel@tonic-gate 	sqr_add_c2(a,4,0,c2,c3,c1);
7340Sstevel@tonic-gate 	r[4]=c2;
7350Sstevel@tonic-gate 	c2=0;
7360Sstevel@tonic-gate 	sqr_add_c2(a,5,0,c3,c1,c2);
7370Sstevel@tonic-gate 	sqr_add_c2(a,4,1,c3,c1,c2);
7380Sstevel@tonic-gate 	sqr_add_c2(a,3,2,c3,c1,c2);
7390Sstevel@tonic-gate 	r[5]=c3;
7400Sstevel@tonic-gate 	c3=0;
7410Sstevel@tonic-gate 	sqr_add_c(a,3,c1,c2,c3);
7420Sstevel@tonic-gate 	sqr_add_c2(a,4,2,c1,c2,c3);
7430Sstevel@tonic-gate 	sqr_add_c2(a,5,1,c1,c2,c3);
7440Sstevel@tonic-gate 	sqr_add_c2(a,6,0,c1,c2,c3);
7450Sstevel@tonic-gate 	r[6]=c1;
7460Sstevel@tonic-gate 	c1=0;
7470Sstevel@tonic-gate 	sqr_add_c2(a,7,0,c2,c3,c1);
7480Sstevel@tonic-gate 	sqr_add_c2(a,6,1,c2,c3,c1);
7490Sstevel@tonic-gate 	sqr_add_c2(a,5,2,c2,c3,c1);
7500Sstevel@tonic-gate 	sqr_add_c2(a,4,3,c2,c3,c1);
7510Sstevel@tonic-gate 	r[7]=c2;
7520Sstevel@tonic-gate 	c2=0;
7530Sstevel@tonic-gate 	sqr_add_c(a,4,c3,c1,c2);
7540Sstevel@tonic-gate 	sqr_add_c2(a,5,3,c3,c1,c2);
7550Sstevel@tonic-gate 	sqr_add_c2(a,6,2,c3,c1,c2);
7560Sstevel@tonic-gate 	sqr_add_c2(a,7,1,c3,c1,c2);
7570Sstevel@tonic-gate 	r[8]=c3;
7580Sstevel@tonic-gate 	c3=0;
7590Sstevel@tonic-gate 	sqr_add_c2(a,7,2,c1,c2,c3);
7600Sstevel@tonic-gate 	sqr_add_c2(a,6,3,c1,c2,c3);
7610Sstevel@tonic-gate 	sqr_add_c2(a,5,4,c1,c2,c3);
7620Sstevel@tonic-gate 	r[9]=c1;
7630Sstevel@tonic-gate 	c1=0;
7640Sstevel@tonic-gate 	sqr_add_c(a,5,c2,c3,c1);
7650Sstevel@tonic-gate 	sqr_add_c2(a,6,4,c2,c3,c1);
7660Sstevel@tonic-gate 	sqr_add_c2(a,7,3,c2,c3,c1);
7670Sstevel@tonic-gate 	r[10]=c2;
7680Sstevel@tonic-gate 	c2=0;
7690Sstevel@tonic-gate 	sqr_add_c2(a,7,4,c3,c1,c2);
7700Sstevel@tonic-gate 	sqr_add_c2(a,6,5,c3,c1,c2);
7710Sstevel@tonic-gate 	r[11]=c3;
7720Sstevel@tonic-gate 	c3=0;
7730Sstevel@tonic-gate 	sqr_add_c(a,6,c1,c2,c3);
7740Sstevel@tonic-gate 	sqr_add_c2(a,7,5,c1,c2,c3);
7750Sstevel@tonic-gate 	r[12]=c1;
7760Sstevel@tonic-gate 	c1=0;
7770Sstevel@tonic-gate 	sqr_add_c2(a,7,6,c2,c3,c1);
7780Sstevel@tonic-gate 	r[13]=c2;
7790Sstevel@tonic-gate 	c2=0;
7800Sstevel@tonic-gate 	sqr_add_c(a,7,c3,c1,c2);
7810Sstevel@tonic-gate 	r[14]=c3;
7820Sstevel@tonic-gate 	r[15]=c1;
7830Sstevel@tonic-gate 	}
7840Sstevel@tonic-gate 
bn_sqr_comba4(BN_ULONG * r,const BN_ULONG * a)7850Sstevel@tonic-gate void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
7860Sstevel@tonic-gate 	{
7870Sstevel@tonic-gate #ifdef BN_LLONG
7880Sstevel@tonic-gate 	BN_ULLONG t,tt;
7890Sstevel@tonic-gate #else
7900Sstevel@tonic-gate 	BN_ULONG bl,bh;
7910Sstevel@tonic-gate #endif
7920Sstevel@tonic-gate 	BN_ULONG t1,t2;
7930Sstevel@tonic-gate 	BN_ULONG c1,c2,c3;
7940Sstevel@tonic-gate 
7950Sstevel@tonic-gate 	c1=0;
7960Sstevel@tonic-gate 	c2=0;
7970Sstevel@tonic-gate 	c3=0;
7980Sstevel@tonic-gate 	sqr_add_c(a,0,c1,c2,c3);
7990Sstevel@tonic-gate 	r[0]=c1;
8000Sstevel@tonic-gate 	c1=0;
8010Sstevel@tonic-gate 	sqr_add_c2(a,1,0,c2,c3,c1);
8020Sstevel@tonic-gate 	r[1]=c2;
8030Sstevel@tonic-gate 	c2=0;
8040Sstevel@tonic-gate 	sqr_add_c(a,1,c3,c1,c2);
8050Sstevel@tonic-gate 	sqr_add_c2(a,2,0,c3,c1,c2);
8060Sstevel@tonic-gate 	r[2]=c3;
8070Sstevel@tonic-gate 	c3=0;
8080Sstevel@tonic-gate 	sqr_add_c2(a,3,0,c1,c2,c3);
8090Sstevel@tonic-gate 	sqr_add_c2(a,2,1,c1,c2,c3);
8100Sstevel@tonic-gate 	r[3]=c1;
8110Sstevel@tonic-gate 	c1=0;
8120Sstevel@tonic-gate 	sqr_add_c(a,2,c2,c3,c1);
8130Sstevel@tonic-gate 	sqr_add_c2(a,3,1,c2,c3,c1);
8140Sstevel@tonic-gate 	r[4]=c2;
8150Sstevel@tonic-gate 	c2=0;
8160Sstevel@tonic-gate 	sqr_add_c2(a,3,2,c3,c1,c2);
8170Sstevel@tonic-gate 	r[5]=c3;
8180Sstevel@tonic-gate 	c3=0;
8190Sstevel@tonic-gate 	sqr_add_c(a,3,c1,c2,c3);
8200Sstevel@tonic-gate 	r[6]=c1;
8210Sstevel@tonic-gate 	r[7]=c2;
8220Sstevel@tonic-gate 	}
8230Sstevel@tonic-gate #else /* !BN_MUL_COMBA */
8240Sstevel@tonic-gate 
8250Sstevel@tonic-gate /* hmm... is it faster just to do a multiply? */
8260Sstevel@tonic-gate #undef bn_sqr_comba4
bn_sqr_comba4(BN_ULONG * r,BN_ULONG * a)8270Sstevel@tonic-gate void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
8280Sstevel@tonic-gate 	{
8290Sstevel@tonic-gate 	BN_ULONG t[8];
8300Sstevel@tonic-gate 	bn_sqr_normal(r,a,4,t);
8310Sstevel@tonic-gate 	}
8320Sstevel@tonic-gate 
8330Sstevel@tonic-gate #undef bn_sqr_comba8
bn_sqr_comba8(BN_ULONG * r,BN_ULONG * a)8340Sstevel@tonic-gate void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
8350Sstevel@tonic-gate 	{
8360Sstevel@tonic-gate 	BN_ULONG t[16];
8370Sstevel@tonic-gate 	bn_sqr_normal(r,a,8,t);
8380Sstevel@tonic-gate 	}
8390Sstevel@tonic-gate 
bn_mul_comba4(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)8400Sstevel@tonic-gate void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
8410Sstevel@tonic-gate 	{
8420Sstevel@tonic-gate 	r[4]=bn_mul_words(    &(r[0]),a,4,b[0]);
8430Sstevel@tonic-gate 	r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
8440Sstevel@tonic-gate 	r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
8450Sstevel@tonic-gate 	r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
8460Sstevel@tonic-gate 	}
8470Sstevel@tonic-gate 
bn_mul_comba8(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)8480Sstevel@tonic-gate void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
8490Sstevel@tonic-gate 	{
8500Sstevel@tonic-gate 	r[ 8]=bn_mul_words(    &(r[0]),a,8,b[0]);
8510Sstevel@tonic-gate 	r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
8520Sstevel@tonic-gate 	r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
8530Sstevel@tonic-gate 	r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
8540Sstevel@tonic-gate 	r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
8550Sstevel@tonic-gate 	r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
8560Sstevel@tonic-gate 	r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
8570Sstevel@tonic-gate 	r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
8580Sstevel@tonic-gate 	}
8590Sstevel@tonic-gate 
8600Sstevel@tonic-gate #endif /* !BN_MUL_COMBA */
861