10Sstevel@tonic-gate /* crypto/bn/bn_asm.c */
20Sstevel@tonic-gate /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
30Sstevel@tonic-gate * All rights reserved.
40Sstevel@tonic-gate *
50Sstevel@tonic-gate * This package is an SSL implementation written
60Sstevel@tonic-gate * by Eric Young (eay@cryptsoft.com).
70Sstevel@tonic-gate * The implementation was written so as to conform with Netscapes SSL.
80Sstevel@tonic-gate *
90Sstevel@tonic-gate * This library is free for commercial and non-commercial use as long as
100Sstevel@tonic-gate * the following conditions are aheared to. The following conditions
110Sstevel@tonic-gate * apply to all code found in this distribution, be it the RC4, RSA,
120Sstevel@tonic-gate * lhash, DES, etc., code; not just the SSL code. The SSL documentation
130Sstevel@tonic-gate * included with this distribution is covered by the same copyright terms
140Sstevel@tonic-gate * except that the holder is Tim Hudson (tjh@cryptsoft.com).
150Sstevel@tonic-gate *
160Sstevel@tonic-gate * Copyright remains Eric Young's, and as such any Copyright notices in
170Sstevel@tonic-gate * the code are not to be removed.
180Sstevel@tonic-gate * If this package is used in a product, Eric Young should be given attribution
190Sstevel@tonic-gate * as the author of the parts of the library used.
200Sstevel@tonic-gate * This can be in the form of a textual message at program startup or
210Sstevel@tonic-gate * in documentation (online or textual) provided with the package.
220Sstevel@tonic-gate *
230Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without
240Sstevel@tonic-gate * modification, are permitted provided that the following conditions
250Sstevel@tonic-gate * are met:
260Sstevel@tonic-gate * 1. Redistributions of source code must retain the copyright
270Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer.
280Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright
290Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the
300Sstevel@tonic-gate * documentation and/or other materials provided with the distribution.
310Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software
320Sstevel@tonic-gate * must display the following acknowledgement:
330Sstevel@tonic-gate * "This product includes cryptographic software written by
340Sstevel@tonic-gate * Eric Young (eay@cryptsoft.com)"
350Sstevel@tonic-gate * The word 'cryptographic' can be left out if the rouines from the library
360Sstevel@tonic-gate * being used are not cryptographic related :-).
370Sstevel@tonic-gate * 4. If you include any Windows specific code (or a derivative thereof) from
380Sstevel@tonic-gate * the apps directory (application code) you must include an acknowledgement:
390Sstevel@tonic-gate * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
400Sstevel@tonic-gate *
410Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
420Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
430Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
440Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
450Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
460Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
470Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
480Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
490Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
500Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
510Sstevel@tonic-gate * SUCH DAMAGE.
520Sstevel@tonic-gate *
530Sstevel@tonic-gate * The licence and distribution terms for any publically available version or
540Sstevel@tonic-gate * derivative of this code cannot be changed. i.e. this code cannot simply be
550Sstevel@tonic-gate * copied and put under another distribution licence
560Sstevel@tonic-gate * [including the GNU Public Licence.]
570Sstevel@tonic-gate */
580Sstevel@tonic-gate
590Sstevel@tonic-gate #ifndef BN_DEBUG
600Sstevel@tonic-gate # undef NDEBUG /* avoid conflicting definitions */
610Sstevel@tonic-gate # define NDEBUG
620Sstevel@tonic-gate #endif
630Sstevel@tonic-gate
640Sstevel@tonic-gate #include <stdio.h>
650Sstevel@tonic-gate #include <assert.h>
660Sstevel@tonic-gate #include "cryptlib.h"
670Sstevel@tonic-gate #include "bn_lcl.h"
680Sstevel@tonic-gate
690Sstevel@tonic-gate #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
700Sstevel@tonic-gate
bn_mul_add_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)710Sstevel@tonic-gate BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
720Sstevel@tonic-gate {
730Sstevel@tonic-gate BN_ULONG c1=0;
740Sstevel@tonic-gate
750Sstevel@tonic-gate assert(num >= 0);
760Sstevel@tonic-gate if (num <= 0) return(c1);
770Sstevel@tonic-gate
780Sstevel@tonic-gate while (num&~3)
790Sstevel@tonic-gate {
800Sstevel@tonic-gate mul_add(rp[0],ap[0],w,c1);
810Sstevel@tonic-gate mul_add(rp[1],ap[1],w,c1);
820Sstevel@tonic-gate mul_add(rp[2],ap[2],w,c1);
830Sstevel@tonic-gate mul_add(rp[3],ap[3],w,c1);
840Sstevel@tonic-gate ap+=4; rp+=4; num-=4;
850Sstevel@tonic-gate }
860Sstevel@tonic-gate if (num)
870Sstevel@tonic-gate {
880Sstevel@tonic-gate mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
890Sstevel@tonic-gate mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
900Sstevel@tonic-gate mul_add(rp[2],ap[2],w,c1); return c1;
910Sstevel@tonic-gate }
920Sstevel@tonic-gate
930Sstevel@tonic-gate return(c1);
940Sstevel@tonic-gate }
950Sstevel@tonic-gate
bn_mul_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)960Sstevel@tonic-gate BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
970Sstevel@tonic-gate {
980Sstevel@tonic-gate BN_ULONG c1=0;
990Sstevel@tonic-gate
1000Sstevel@tonic-gate assert(num >= 0);
1010Sstevel@tonic-gate if (num <= 0) return(c1);
1020Sstevel@tonic-gate
1030Sstevel@tonic-gate while (num&~3)
1040Sstevel@tonic-gate {
1050Sstevel@tonic-gate mul(rp[0],ap[0],w,c1);
1060Sstevel@tonic-gate mul(rp[1],ap[1],w,c1);
1070Sstevel@tonic-gate mul(rp[2],ap[2],w,c1);
1080Sstevel@tonic-gate mul(rp[3],ap[3],w,c1);
1090Sstevel@tonic-gate ap+=4; rp+=4; num-=4;
1100Sstevel@tonic-gate }
1110Sstevel@tonic-gate if (num)
1120Sstevel@tonic-gate {
1130Sstevel@tonic-gate mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
1140Sstevel@tonic-gate mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
1150Sstevel@tonic-gate mul(rp[2],ap[2],w,c1);
1160Sstevel@tonic-gate }
1170Sstevel@tonic-gate return(c1);
1180Sstevel@tonic-gate }
1190Sstevel@tonic-gate
bn_sqr_words(BN_ULONG * r,const BN_ULONG * a,int n)1200Sstevel@tonic-gate void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
1210Sstevel@tonic-gate {
1220Sstevel@tonic-gate assert(n >= 0);
1230Sstevel@tonic-gate if (n <= 0) return;
1240Sstevel@tonic-gate while (n&~3)
1250Sstevel@tonic-gate {
1260Sstevel@tonic-gate sqr(r[0],r[1],a[0]);
1270Sstevel@tonic-gate sqr(r[2],r[3],a[1]);
1280Sstevel@tonic-gate sqr(r[4],r[5],a[2]);
1290Sstevel@tonic-gate sqr(r[6],r[7],a[3]);
1300Sstevel@tonic-gate a+=4; r+=8; n-=4;
1310Sstevel@tonic-gate }
1320Sstevel@tonic-gate if (n)
1330Sstevel@tonic-gate {
1340Sstevel@tonic-gate sqr(r[0],r[1],a[0]); if (--n == 0) return;
1350Sstevel@tonic-gate sqr(r[2],r[3],a[1]); if (--n == 0) return;
1360Sstevel@tonic-gate sqr(r[4],r[5],a[2]);
1370Sstevel@tonic-gate }
1380Sstevel@tonic-gate }
1390Sstevel@tonic-gate
1400Sstevel@tonic-gate #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
1410Sstevel@tonic-gate
bn_mul_add_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)1420Sstevel@tonic-gate BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
1430Sstevel@tonic-gate {
1440Sstevel@tonic-gate BN_ULONG c=0;
1450Sstevel@tonic-gate BN_ULONG bl,bh;
1460Sstevel@tonic-gate
1470Sstevel@tonic-gate assert(num >= 0);
1480Sstevel@tonic-gate if (num <= 0) return((BN_ULONG)0);
1490Sstevel@tonic-gate
1500Sstevel@tonic-gate bl=LBITS(w);
1510Sstevel@tonic-gate bh=HBITS(w);
1520Sstevel@tonic-gate
1530Sstevel@tonic-gate for (;;)
1540Sstevel@tonic-gate {
1550Sstevel@tonic-gate mul_add(rp[0],ap[0],bl,bh,c);
1560Sstevel@tonic-gate if (--num == 0) break;
1570Sstevel@tonic-gate mul_add(rp[1],ap[1],bl,bh,c);
1580Sstevel@tonic-gate if (--num == 0) break;
1590Sstevel@tonic-gate mul_add(rp[2],ap[2],bl,bh,c);
1600Sstevel@tonic-gate if (--num == 0) break;
1610Sstevel@tonic-gate mul_add(rp[3],ap[3],bl,bh,c);
1620Sstevel@tonic-gate if (--num == 0) break;
1630Sstevel@tonic-gate ap+=4;
1640Sstevel@tonic-gate rp+=4;
1650Sstevel@tonic-gate }
1660Sstevel@tonic-gate return(c);
1670Sstevel@tonic-gate }
1680Sstevel@tonic-gate
bn_mul_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)1690Sstevel@tonic-gate BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
1700Sstevel@tonic-gate {
1710Sstevel@tonic-gate BN_ULONG carry=0;
1720Sstevel@tonic-gate BN_ULONG bl,bh;
1730Sstevel@tonic-gate
1740Sstevel@tonic-gate assert(num >= 0);
1750Sstevel@tonic-gate if (num <= 0) return((BN_ULONG)0);
1760Sstevel@tonic-gate
1770Sstevel@tonic-gate bl=LBITS(w);
1780Sstevel@tonic-gate bh=HBITS(w);
1790Sstevel@tonic-gate
1800Sstevel@tonic-gate for (;;)
1810Sstevel@tonic-gate {
1820Sstevel@tonic-gate mul(rp[0],ap[0],bl,bh,carry);
1830Sstevel@tonic-gate if (--num == 0) break;
1840Sstevel@tonic-gate mul(rp[1],ap[1],bl,bh,carry);
1850Sstevel@tonic-gate if (--num == 0) break;
1860Sstevel@tonic-gate mul(rp[2],ap[2],bl,bh,carry);
1870Sstevel@tonic-gate if (--num == 0) break;
1880Sstevel@tonic-gate mul(rp[3],ap[3],bl,bh,carry);
1890Sstevel@tonic-gate if (--num == 0) break;
1900Sstevel@tonic-gate ap+=4;
1910Sstevel@tonic-gate rp+=4;
1920Sstevel@tonic-gate }
1930Sstevel@tonic-gate return(carry);
1940Sstevel@tonic-gate }
1950Sstevel@tonic-gate
bn_sqr_words(BN_ULONG * r,const BN_ULONG * a,int n)1960Sstevel@tonic-gate void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
1970Sstevel@tonic-gate {
1980Sstevel@tonic-gate assert(n >= 0);
1990Sstevel@tonic-gate if (n <= 0) return;
2000Sstevel@tonic-gate for (;;)
2010Sstevel@tonic-gate {
2020Sstevel@tonic-gate sqr64(r[0],r[1],a[0]);
2030Sstevel@tonic-gate if (--n == 0) break;
2040Sstevel@tonic-gate
2050Sstevel@tonic-gate sqr64(r[2],r[3],a[1]);
2060Sstevel@tonic-gate if (--n == 0) break;
2070Sstevel@tonic-gate
2080Sstevel@tonic-gate sqr64(r[4],r[5],a[2]);
2090Sstevel@tonic-gate if (--n == 0) break;
2100Sstevel@tonic-gate
2110Sstevel@tonic-gate sqr64(r[6],r[7],a[3]);
2120Sstevel@tonic-gate if (--n == 0) break;
2130Sstevel@tonic-gate
2140Sstevel@tonic-gate a+=4;
2150Sstevel@tonic-gate r+=8;
2160Sstevel@tonic-gate }
2170Sstevel@tonic-gate }
2180Sstevel@tonic-gate
2190Sstevel@tonic-gate #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
2200Sstevel@tonic-gate
2210Sstevel@tonic-gate #if defined(BN_LLONG) && defined(BN_DIV2W)
2220Sstevel@tonic-gate
bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d)2230Sstevel@tonic-gate BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
2240Sstevel@tonic-gate {
2250Sstevel@tonic-gate return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
2260Sstevel@tonic-gate }
2270Sstevel@tonic-gate
2280Sstevel@tonic-gate #else
2290Sstevel@tonic-gate
2300Sstevel@tonic-gate /* Divide h,l by d and return the result. */
2310Sstevel@tonic-gate /* I need to test this some more :-( */
bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d)2320Sstevel@tonic-gate BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
2330Sstevel@tonic-gate {
2340Sstevel@tonic-gate BN_ULONG dh,dl,q,ret=0,th,tl,t;
2350Sstevel@tonic-gate int i,count=2;
2360Sstevel@tonic-gate
2370Sstevel@tonic-gate if (d == 0) return(BN_MASK2);
2380Sstevel@tonic-gate
2390Sstevel@tonic-gate i=BN_num_bits_word(d);
240*2139Sjp161948 assert((i == BN_BITS2) || (h <= (BN_ULONG)1<<i));
2410Sstevel@tonic-gate
2420Sstevel@tonic-gate i=BN_BITS2-i;
2430Sstevel@tonic-gate if (h >= d) h-=d;
2440Sstevel@tonic-gate
2450Sstevel@tonic-gate if (i)
2460Sstevel@tonic-gate {
2470Sstevel@tonic-gate d<<=i;
2480Sstevel@tonic-gate h=(h<<i)|(l>>(BN_BITS2-i));
2490Sstevel@tonic-gate l<<=i;
2500Sstevel@tonic-gate }
2510Sstevel@tonic-gate dh=(d&BN_MASK2h)>>BN_BITS4;
2520Sstevel@tonic-gate dl=(d&BN_MASK2l);
2530Sstevel@tonic-gate for (;;)
2540Sstevel@tonic-gate {
2550Sstevel@tonic-gate if ((h>>BN_BITS4) == dh)
2560Sstevel@tonic-gate q=BN_MASK2l;
2570Sstevel@tonic-gate else
2580Sstevel@tonic-gate q=h/dh;
2590Sstevel@tonic-gate
2600Sstevel@tonic-gate th=q*dh;
2610Sstevel@tonic-gate tl=dl*q;
2620Sstevel@tonic-gate for (;;)
2630Sstevel@tonic-gate {
2640Sstevel@tonic-gate t=h-th;
2650Sstevel@tonic-gate if ((t&BN_MASK2h) ||
2660Sstevel@tonic-gate ((tl) <= (
2670Sstevel@tonic-gate (t<<BN_BITS4)|
2680Sstevel@tonic-gate ((l&BN_MASK2h)>>BN_BITS4))))
2690Sstevel@tonic-gate break;
2700Sstevel@tonic-gate q--;
2710Sstevel@tonic-gate th-=dh;
2720Sstevel@tonic-gate tl-=dl;
2730Sstevel@tonic-gate }
2740Sstevel@tonic-gate t=(tl>>BN_BITS4);
2750Sstevel@tonic-gate tl=(tl<<BN_BITS4)&BN_MASK2h;
2760Sstevel@tonic-gate th+=t;
2770Sstevel@tonic-gate
2780Sstevel@tonic-gate if (l < tl) th++;
2790Sstevel@tonic-gate l-=tl;
2800Sstevel@tonic-gate if (h < th)
2810Sstevel@tonic-gate {
2820Sstevel@tonic-gate h+=d;
2830Sstevel@tonic-gate q--;
2840Sstevel@tonic-gate }
2850Sstevel@tonic-gate h-=th;
2860Sstevel@tonic-gate
2870Sstevel@tonic-gate if (--count == 0) break;
2880Sstevel@tonic-gate
2890Sstevel@tonic-gate ret=q<<BN_BITS4;
2900Sstevel@tonic-gate h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
2910Sstevel@tonic-gate l=(l&BN_MASK2l)<<BN_BITS4;
2920Sstevel@tonic-gate }
2930Sstevel@tonic-gate ret|=q;
2940Sstevel@tonic-gate return(ret);
2950Sstevel@tonic-gate }
2960Sstevel@tonic-gate #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
2970Sstevel@tonic-gate
2980Sstevel@tonic-gate #ifdef BN_LLONG
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)2990Sstevel@tonic-gate BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
3000Sstevel@tonic-gate {
3010Sstevel@tonic-gate BN_ULLONG ll=0;
3020Sstevel@tonic-gate
3030Sstevel@tonic-gate assert(n >= 0);
3040Sstevel@tonic-gate if (n <= 0) return((BN_ULONG)0);
3050Sstevel@tonic-gate
3060Sstevel@tonic-gate for (;;)
3070Sstevel@tonic-gate {
3080Sstevel@tonic-gate ll+=(BN_ULLONG)a[0]+b[0];
3090Sstevel@tonic-gate r[0]=(BN_ULONG)ll&BN_MASK2;
3100Sstevel@tonic-gate ll>>=BN_BITS2;
3110Sstevel@tonic-gate if (--n <= 0) break;
3120Sstevel@tonic-gate
3130Sstevel@tonic-gate ll+=(BN_ULLONG)a[1]+b[1];
3140Sstevel@tonic-gate r[1]=(BN_ULONG)ll&BN_MASK2;
3150Sstevel@tonic-gate ll>>=BN_BITS2;
3160Sstevel@tonic-gate if (--n <= 0) break;
3170Sstevel@tonic-gate
3180Sstevel@tonic-gate ll+=(BN_ULLONG)a[2]+b[2];
3190Sstevel@tonic-gate r[2]=(BN_ULONG)ll&BN_MASK2;
3200Sstevel@tonic-gate ll>>=BN_BITS2;
3210Sstevel@tonic-gate if (--n <= 0) break;
3220Sstevel@tonic-gate
3230Sstevel@tonic-gate ll+=(BN_ULLONG)a[3]+b[3];
3240Sstevel@tonic-gate r[3]=(BN_ULONG)ll&BN_MASK2;
3250Sstevel@tonic-gate ll>>=BN_BITS2;
3260Sstevel@tonic-gate if (--n <= 0) break;
3270Sstevel@tonic-gate
3280Sstevel@tonic-gate a+=4;
3290Sstevel@tonic-gate b+=4;
3300Sstevel@tonic-gate r+=4;
3310Sstevel@tonic-gate }
3320Sstevel@tonic-gate return((BN_ULONG)ll);
3330Sstevel@tonic-gate }
3340Sstevel@tonic-gate #else /* !BN_LLONG */
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)3350Sstevel@tonic-gate BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
3360Sstevel@tonic-gate {
3370Sstevel@tonic-gate BN_ULONG c,l,t;
3380Sstevel@tonic-gate
3390Sstevel@tonic-gate assert(n >= 0);
3400Sstevel@tonic-gate if (n <= 0) return((BN_ULONG)0);
3410Sstevel@tonic-gate
3420Sstevel@tonic-gate c=0;
3430Sstevel@tonic-gate for (;;)
3440Sstevel@tonic-gate {
3450Sstevel@tonic-gate t=a[0];
3460Sstevel@tonic-gate t=(t+c)&BN_MASK2;
3470Sstevel@tonic-gate c=(t < c);
3480Sstevel@tonic-gate l=(t+b[0])&BN_MASK2;
3490Sstevel@tonic-gate c+=(l < t);
3500Sstevel@tonic-gate r[0]=l;
3510Sstevel@tonic-gate if (--n <= 0) break;
3520Sstevel@tonic-gate
3530Sstevel@tonic-gate t=a[1];
3540Sstevel@tonic-gate t=(t+c)&BN_MASK2;
3550Sstevel@tonic-gate c=(t < c);
3560Sstevel@tonic-gate l=(t+b[1])&BN_MASK2;
3570Sstevel@tonic-gate c+=(l < t);
3580Sstevel@tonic-gate r[1]=l;
3590Sstevel@tonic-gate if (--n <= 0) break;
3600Sstevel@tonic-gate
3610Sstevel@tonic-gate t=a[2];
3620Sstevel@tonic-gate t=(t+c)&BN_MASK2;
3630Sstevel@tonic-gate c=(t < c);
3640Sstevel@tonic-gate l=(t+b[2])&BN_MASK2;
3650Sstevel@tonic-gate c+=(l < t);
3660Sstevel@tonic-gate r[2]=l;
3670Sstevel@tonic-gate if (--n <= 0) break;
3680Sstevel@tonic-gate
3690Sstevel@tonic-gate t=a[3];
3700Sstevel@tonic-gate t=(t+c)&BN_MASK2;
3710Sstevel@tonic-gate c=(t < c);
3720Sstevel@tonic-gate l=(t+b[3])&BN_MASK2;
3730Sstevel@tonic-gate c+=(l < t);
3740Sstevel@tonic-gate r[3]=l;
3750Sstevel@tonic-gate if (--n <= 0) break;
3760Sstevel@tonic-gate
3770Sstevel@tonic-gate a+=4;
3780Sstevel@tonic-gate b+=4;
3790Sstevel@tonic-gate r+=4;
3800Sstevel@tonic-gate }
3810Sstevel@tonic-gate return((BN_ULONG)c);
3820Sstevel@tonic-gate }
3830Sstevel@tonic-gate #endif /* !BN_LLONG */
3840Sstevel@tonic-gate
bn_sub_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)3850Sstevel@tonic-gate BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
3860Sstevel@tonic-gate {
3870Sstevel@tonic-gate BN_ULONG t1,t2;
3880Sstevel@tonic-gate int c=0;
3890Sstevel@tonic-gate
3900Sstevel@tonic-gate assert(n >= 0);
3910Sstevel@tonic-gate if (n <= 0) return((BN_ULONG)0);
3920Sstevel@tonic-gate
3930Sstevel@tonic-gate for (;;)
3940Sstevel@tonic-gate {
3950Sstevel@tonic-gate t1=a[0]; t2=b[0];
3960Sstevel@tonic-gate r[0]=(t1-t2-c)&BN_MASK2;
3970Sstevel@tonic-gate if (t1 != t2) c=(t1 < t2);
3980Sstevel@tonic-gate if (--n <= 0) break;
3990Sstevel@tonic-gate
4000Sstevel@tonic-gate t1=a[1]; t2=b[1];
4010Sstevel@tonic-gate r[1]=(t1-t2-c)&BN_MASK2;
4020Sstevel@tonic-gate if (t1 != t2) c=(t1 < t2);
4030Sstevel@tonic-gate if (--n <= 0) break;
4040Sstevel@tonic-gate
4050Sstevel@tonic-gate t1=a[2]; t2=b[2];
4060Sstevel@tonic-gate r[2]=(t1-t2-c)&BN_MASK2;
4070Sstevel@tonic-gate if (t1 != t2) c=(t1 < t2);
4080Sstevel@tonic-gate if (--n <= 0) break;
4090Sstevel@tonic-gate
4100Sstevel@tonic-gate t1=a[3]; t2=b[3];
4110Sstevel@tonic-gate r[3]=(t1-t2-c)&BN_MASK2;
4120Sstevel@tonic-gate if (t1 != t2) c=(t1 < t2);
4130Sstevel@tonic-gate if (--n <= 0) break;
4140Sstevel@tonic-gate
4150Sstevel@tonic-gate a+=4;
4160Sstevel@tonic-gate b+=4;
4170Sstevel@tonic-gate r+=4;
4180Sstevel@tonic-gate }
4190Sstevel@tonic-gate return(c);
4200Sstevel@tonic-gate }
4210Sstevel@tonic-gate
4220Sstevel@tonic-gate #ifdef BN_MUL_COMBA
4230Sstevel@tonic-gate
4240Sstevel@tonic-gate #undef bn_mul_comba8
4250Sstevel@tonic-gate #undef bn_mul_comba4
4260Sstevel@tonic-gate #undef bn_sqr_comba8
4270Sstevel@tonic-gate #undef bn_sqr_comba4
4280Sstevel@tonic-gate
4290Sstevel@tonic-gate /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
4300Sstevel@tonic-gate /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
4310Sstevel@tonic-gate /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
4320Sstevel@tonic-gate /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
4330Sstevel@tonic-gate
4340Sstevel@tonic-gate #ifdef BN_LLONG
4350Sstevel@tonic-gate #define mul_add_c(a,b,c0,c1,c2) \
4360Sstevel@tonic-gate t=(BN_ULLONG)a*b; \
4370Sstevel@tonic-gate t1=(BN_ULONG)Lw(t); \
4380Sstevel@tonic-gate t2=(BN_ULONG)Hw(t); \
4390Sstevel@tonic-gate c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
4400Sstevel@tonic-gate c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
4410Sstevel@tonic-gate
4420Sstevel@tonic-gate #define mul_add_c2(a,b,c0,c1,c2) \
4430Sstevel@tonic-gate t=(BN_ULLONG)a*b; \
4440Sstevel@tonic-gate tt=(t+t)&BN_MASK; \
4450Sstevel@tonic-gate if (tt < t) c2++; \
4460Sstevel@tonic-gate t1=(BN_ULONG)Lw(tt); \
4470Sstevel@tonic-gate t2=(BN_ULONG)Hw(tt); \
4480Sstevel@tonic-gate c0=(c0+t1)&BN_MASK2; \
4490Sstevel@tonic-gate if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
4500Sstevel@tonic-gate c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
4510Sstevel@tonic-gate
4520Sstevel@tonic-gate #define sqr_add_c(a,i,c0,c1,c2) \
4530Sstevel@tonic-gate t=(BN_ULLONG)a[i]*a[i]; \
4540Sstevel@tonic-gate t1=(BN_ULONG)Lw(t); \
4550Sstevel@tonic-gate t2=(BN_ULONG)Hw(t); \
4560Sstevel@tonic-gate c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
4570Sstevel@tonic-gate c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
4580Sstevel@tonic-gate
4590Sstevel@tonic-gate #define sqr_add_c2(a,i,j,c0,c1,c2) \
4600Sstevel@tonic-gate mul_add_c2((a)[i],(a)[j],c0,c1,c2)
4610Sstevel@tonic-gate
462*2139Sjp161948 #elif defined(BN_UMULT_LOHI)
463*2139Sjp161948
464*2139Sjp161948 #define mul_add_c(a,b,c0,c1,c2) { \
465*2139Sjp161948 BN_ULONG ta=(a),tb=(b); \
466*2139Sjp161948 BN_UMULT_LOHI(t1,t2,ta,tb); \
467*2139Sjp161948 c0 += t1; t2 += (c0<t1)?1:0; \
468*2139Sjp161948 c1 += t2; c2 += (c1<t2)?1:0; \
469*2139Sjp161948 }
470*2139Sjp161948
471*2139Sjp161948 #define mul_add_c2(a,b,c0,c1,c2) { \
472*2139Sjp161948 BN_ULONG ta=(a),tb=(b),t0; \
473*2139Sjp161948 BN_UMULT_LOHI(t0,t1,ta,tb); \
474*2139Sjp161948 t2 = t1+t1; c2 += (t2<t1)?1:0; \
475*2139Sjp161948 t1 = t0+t0; t2 += (t1<t0)?1:0; \
476*2139Sjp161948 c0 += t1; t2 += (c0<t1)?1:0; \
477*2139Sjp161948 c1 += t2; c2 += (c1<t2)?1:0; \
478*2139Sjp161948 }
479*2139Sjp161948
480*2139Sjp161948 #define sqr_add_c(a,i,c0,c1,c2) { \
481*2139Sjp161948 BN_ULONG ta=(a)[i]; \
482*2139Sjp161948 BN_UMULT_LOHI(t1,t2,ta,ta); \
483*2139Sjp161948 c0 += t1; t2 += (c0<t1)?1:0; \
484*2139Sjp161948 c1 += t2; c2 += (c1<t2)?1:0; \
485*2139Sjp161948 }
486*2139Sjp161948
487*2139Sjp161948 #define sqr_add_c2(a,i,j,c0,c1,c2) \
488*2139Sjp161948 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
489*2139Sjp161948
4900Sstevel@tonic-gate #elif defined(BN_UMULT_HIGH)
4910Sstevel@tonic-gate
4920Sstevel@tonic-gate #define mul_add_c(a,b,c0,c1,c2) { \
4930Sstevel@tonic-gate BN_ULONG ta=(a),tb=(b); \
4940Sstevel@tonic-gate t1 = ta * tb; \
4950Sstevel@tonic-gate t2 = BN_UMULT_HIGH(ta,tb); \
4960Sstevel@tonic-gate c0 += t1; t2 += (c0<t1)?1:0; \
4970Sstevel@tonic-gate c1 += t2; c2 += (c1<t2)?1:0; \
4980Sstevel@tonic-gate }
4990Sstevel@tonic-gate
5000Sstevel@tonic-gate #define mul_add_c2(a,b,c0,c1,c2) { \
5010Sstevel@tonic-gate BN_ULONG ta=(a),tb=(b),t0; \
5020Sstevel@tonic-gate t1 = BN_UMULT_HIGH(ta,tb); \
5030Sstevel@tonic-gate t0 = ta * tb; \
5040Sstevel@tonic-gate t2 = t1+t1; c2 += (t2<t1)?1:0; \
5050Sstevel@tonic-gate t1 = t0+t0; t2 += (t1<t0)?1:0; \
5060Sstevel@tonic-gate c0 += t1; t2 += (c0<t1)?1:0; \
5070Sstevel@tonic-gate c1 += t2; c2 += (c1<t2)?1:0; \
5080Sstevel@tonic-gate }
5090Sstevel@tonic-gate
5100Sstevel@tonic-gate #define sqr_add_c(a,i,c0,c1,c2) { \
5110Sstevel@tonic-gate BN_ULONG ta=(a)[i]; \
5120Sstevel@tonic-gate t1 = ta * ta; \
5130Sstevel@tonic-gate t2 = BN_UMULT_HIGH(ta,ta); \
5140Sstevel@tonic-gate c0 += t1; t2 += (c0<t1)?1:0; \
5150Sstevel@tonic-gate c1 += t2; c2 += (c1<t2)?1:0; \
5160Sstevel@tonic-gate }
5170Sstevel@tonic-gate
5180Sstevel@tonic-gate #define sqr_add_c2(a,i,j,c0,c1,c2) \
5190Sstevel@tonic-gate mul_add_c2((a)[i],(a)[j],c0,c1,c2)
5200Sstevel@tonic-gate
5210Sstevel@tonic-gate #else /* !BN_LLONG */
5220Sstevel@tonic-gate #define mul_add_c(a,b,c0,c1,c2) \
5230Sstevel@tonic-gate t1=LBITS(a); t2=HBITS(a); \
5240Sstevel@tonic-gate bl=LBITS(b); bh=HBITS(b); \
5250Sstevel@tonic-gate mul64(t1,t2,bl,bh); \
5260Sstevel@tonic-gate c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
5270Sstevel@tonic-gate c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
5280Sstevel@tonic-gate
5290Sstevel@tonic-gate #define mul_add_c2(a,b,c0,c1,c2) \
5300Sstevel@tonic-gate t1=LBITS(a); t2=HBITS(a); \
5310Sstevel@tonic-gate bl=LBITS(b); bh=HBITS(b); \
5320Sstevel@tonic-gate mul64(t1,t2,bl,bh); \
5330Sstevel@tonic-gate if (t2 & BN_TBIT) c2++; \
5340Sstevel@tonic-gate t2=(t2+t2)&BN_MASK2; \
5350Sstevel@tonic-gate if (t1 & BN_TBIT) t2++; \
5360Sstevel@tonic-gate t1=(t1+t1)&BN_MASK2; \
5370Sstevel@tonic-gate c0=(c0+t1)&BN_MASK2; \
5380Sstevel@tonic-gate if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
5390Sstevel@tonic-gate c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
5400Sstevel@tonic-gate
5410Sstevel@tonic-gate #define sqr_add_c(a,i,c0,c1,c2) \
5420Sstevel@tonic-gate sqr64(t1,t2,(a)[i]); \
5430Sstevel@tonic-gate c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
5440Sstevel@tonic-gate c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
5450Sstevel@tonic-gate
5460Sstevel@tonic-gate #define sqr_add_c2(a,i,j,c0,c1,c2) \
5470Sstevel@tonic-gate mul_add_c2((a)[i],(a)[j],c0,c1,c2)
5480Sstevel@tonic-gate #endif /* !BN_LLONG */
5490Sstevel@tonic-gate
bn_mul_comba8(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)5500Sstevel@tonic-gate void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
5510Sstevel@tonic-gate {
5520Sstevel@tonic-gate #ifdef BN_LLONG
5530Sstevel@tonic-gate BN_ULLONG t;
5540Sstevel@tonic-gate #else
5550Sstevel@tonic-gate BN_ULONG bl,bh;
5560Sstevel@tonic-gate #endif
5570Sstevel@tonic-gate BN_ULONG t1,t2;
5580Sstevel@tonic-gate BN_ULONG c1,c2,c3;
5590Sstevel@tonic-gate
5600Sstevel@tonic-gate c1=0;
5610Sstevel@tonic-gate c2=0;
5620Sstevel@tonic-gate c3=0;
5630Sstevel@tonic-gate mul_add_c(a[0],b[0],c1,c2,c3);
5640Sstevel@tonic-gate r[0]=c1;
5650Sstevel@tonic-gate c1=0;
5660Sstevel@tonic-gate mul_add_c(a[0],b[1],c2,c3,c1);
5670Sstevel@tonic-gate mul_add_c(a[1],b[0],c2,c3,c1);
5680Sstevel@tonic-gate r[1]=c2;
5690Sstevel@tonic-gate c2=0;
5700Sstevel@tonic-gate mul_add_c(a[2],b[0],c3,c1,c2);
5710Sstevel@tonic-gate mul_add_c(a[1],b[1],c3,c1,c2);
5720Sstevel@tonic-gate mul_add_c(a[0],b[2],c3,c1,c2);
5730Sstevel@tonic-gate r[2]=c3;
5740Sstevel@tonic-gate c3=0;
5750Sstevel@tonic-gate mul_add_c(a[0],b[3],c1,c2,c3);
5760Sstevel@tonic-gate mul_add_c(a[1],b[2],c1,c2,c3);
5770Sstevel@tonic-gate mul_add_c(a[2],b[1],c1,c2,c3);
5780Sstevel@tonic-gate mul_add_c(a[3],b[0],c1,c2,c3);
5790Sstevel@tonic-gate r[3]=c1;
5800Sstevel@tonic-gate c1=0;
5810Sstevel@tonic-gate mul_add_c(a[4],b[0],c2,c3,c1);
5820Sstevel@tonic-gate mul_add_c(a[3],b[1],c2,c3,c1);
5830Sstevel@tonic-gate mul_add_c(a[2],b[2],c2,c3,c1);
5840Sstevel@tonic-gate mul_add_c(a[1],b[3],c2,c3,c1);
5850Sstevel@tonic-gate mul_add_c(a[0],b[4],c2,c3,c1);
5860Sstevel@tonic-gate r[4]=c2;
5870Sstevel@tonic-gate c2=0;
5880Sstevel@tonic-gate mul_add_c(a[0],b[5],c3,c1,c2);
5890Sstevel@tonic-gate mul_add_c(a[1],b[4],c3,c1,c2);
5900Sstevel@tonic-gate mul_add_c(a[2],b[3],c3,c1,c2);
5910Sstevel@tonic-gate mul_add_c(a[3],b[2],c3,c1,c2);
5920Sstevel@tonic-gate mul_add_c(a[4],b[1],c3,c1,c2);
5930Sstevel@tonic-gate mul_add_c(a[5],b[0],c3,c1,c2);
5940Sstevel@tonic-gate r[5]=c3;
5950Sstevel@tonic-gate c3=0;
5960Sstevel@tonic-gate mul_add_c(a[6],b[0],c1,c2,c3);
5970Sstevel@tonic-gate mul_add_c(a[5],b[1],c1,c2,c3);
5980Sstevel@tonic-gate mul_add_c(a[4],b[2],c1,c2,c3);
5990Sstevel@tonic-gate mul_add_c(a[3],b[3],c1,c2,c3);
6000Sstevel@tonic-gate mul_add_c(a[2],b[4],c1,c2,c3);
6010Sstevel@tonic-gate mul_add_c(a[1],b[5],c1,c2,c3);
6020Sstevel@tonic-gate mul_add_c(a[0],b[6],c1,c2,c3);
6030Sstevel@tonic-gate r[6]=c1;
6040Sstevel@tonic-gate c1=0;
6050Sstevel@tonic-gate mul_add_c(a[0],b[7],c2,c3,c1);
6060Sstevel@tonic-gate mul_add_c(a[1],b[6],c2,c3,c1);
6070Sstevel@tonic-gate mul_add_c(a[2],b[5],c2,c3,c1);
6080Sstevel@tonic-gate mul_add_c(a[3],b[4],c2,c3,c1);
6090Sstevel@tonic-gate mul_add_c(a[4],b[3],c2,c3,c1);
6100Sstevel@tonic-gate mul_add_c(a[5],b[2],c2,c3,c1);
6110Sstevel@tonic-gate mul_add_c(a[6],b[1],c2,c3,c1);
6120Sstevel@tonic-gate mul_add_c(a[7],b[0],c2,c3,c1);
6130Sstevel@tonic-gate r[7]=c2;
6140Sstevel@tonic-gate c2=0;
6150Sstevel@tonic-gate mul_add_c(a[7],b[1],c3,c1,c2);
6160Sstevel@tonic-gate mul_add_c(a[6],b[2],c3,c1,c2);
6170Sstevel@tonic-gate mul_add_c(a[5],b[3],c3,c1,c2);
6180Sstevel@tonic-gate mul_add_c(a[4],b[4],c3,c1,c2);
6190Sstevel@tonic-gate mul_add_c(a[3],b[5],c3,c1,c2);
6200Sstevel@tonic-gate mul_add_c(a[2],b[6],c3,c1,c2);
6210Sstevel@tonic-gate mul_add_c(a[1],b[7],c3,c1,c2);
6220Sstevel@tonic-gate r[8]=c3;
6230Sstevel@tonic-gate c3=0;
6240Sstevel@tonic-gate mul_add_c(a[2],b[7],c1,c2,c3);
6250Sstevel@tonic-gate mul_add_c(a[3],b[6],c1,c2,c3);
6260Sstevel@tonic-gate mul_add_c(a[4],b[5],c1,c2,c3);
6270Sstevel@tonic-gate mul_add_c(a[5],b[4],c1,c2,c3);
6280Sstevel@tonic-gate mul_add_c(a[6],b[3],c1,c2,c3);
6290Sstevel@tonic-gate mul_add_c(a[7],b[2],c1,c2,c3);
6300Sstevel@tonic-gate r[9]=c1;
6310Sstevel@tonic-gate c1=0;
6320Sstevel@tonic-gate mul_add_c(a[7],b[3],c2,c3,c1);
6330Sstevel@tonic-gate mul_add_c(a[6],b[4],c2,c3,c1);
6340Sstevel@tonic-gate mul_add_c(a[5],b[5],c2,c3,c1);
6350Sstevel@tonic-gate mul_add_c(a[4],b[6],c2,c3,c1);
6360Sstevel@tonic-gate mul_add_c(a[3],b[7],c2,c3,c1);
6370Sstevel@tonic-gate r[10]=c2;
6380Sstevel@tonic-gate c2=0;
6390Sstevel@tonic-gate mul_add_c(a[4],b[7],c3,c1,c2);
6400Sstevel@tonic-gate mul_add_c(a[5],b[6],c3,c1,c2);
6410Sstevel@tonic-gate mul_add_c(a[6],b[5],c3,c1,c2);
6420Sstevel@tonic-gate mul_add_c(a[7],b[4],c3,c1,c2);
6430Sstevel@tonic-gate r[11]=c3;
6440Sstevel@tonic-gate c3=0;
6450Sstevel@tonic-gate mul_add_c(a[7],b[5],c1,c2,c3);
6460Sstevel@tonic-gate mul_add_c(a[6],b[6],c1,c2,c3);
6470Sstevel@tonic-gate mul_add_c(a[5],b[7],c1,c2,c3);
6480Sstevel@tonic-gate r[12]=c1;
6490Sstevel@tonic-gate c1=0;
6500Sstevel@tonic-gate mul_add_c(a[6],b[7],c2,c3,c1);
6510Sstevel@tonic-gate mul_add_c(a[7],b[6],c2,c3,c1);
6520Sstevel@tonic-gate r[13]=c2;
6530Sstevel@tonic-gate c2=0;
6540Sstevel@tonic-gate mul_add_c(a[7],b[7],c3,c1,c2);
6550Sstevel@tonic-gate r[14]=c3;
6560Sstevel@tonic-gate r[15]=c1;
6570Sstevel@tonic-gate }
6580Sstevel@tonic-gate
bn_mul_comba4(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)6590Sstevel@tonic-gate void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
6600Sstevel@tonic-gate {
6610Sstevel@tonic-gate #ifdef BN_LLONG
6620Sstevel@tonic-gate BN_ULLONG t;
6630Sstevel@tonic-gate #else
6640Sstevel@tonic-gate BN_ULONG bl,bh;
6650Sstevel@tonic-gate #endif
6660Sstevel@tonic-gate BN_ULONG t1,t2;
6670Sstevel@tonic-gate BN_ULONG c1,c2,c3;
6680Sstevel@tonic-gate
6690Sstevel@tonic-gate c1=0;
6700Sstevel@tonic-gate c2=0;
6710Sstevel@tonic-gate c3=0;
6720Sstevel@tonic-gate mul_add_c(a[0],b[0],c1,c2,c3);
6730Sstevel@tonic-gate r[0]=c1;
6740Sstevel@tonic-gate c1=0;
6750Sstevel@tonic-gate mul_add_c(a[0],b[1],c2,c3,c1);
6760Sstevel@tonic-gate mul_add_c(a[1],b[0],c2,c3,c1);
6770Sstevel@tonic-gate r[1]=c2;
6780Sstevel@tonic-gate c2=0;
6790Sstevel@tonic-gate mul_add_c(a[2],b[0],c3,c1,c2);
6800Sstevel@tonic-gate mul_add_c(a[1],b[1],c3,c1,c2);
6810Sstevel@tonic-gate mul_add_c(a[0],b[2],c3,c1,c2);
6820Sstevel@tonic-gate r[2]=c3;
6830Sstevel@tonic-gate c3=0;
6840Sstevel@tonic-gate mul_add_c(a[0],b[3],c1,c2,c3);
6850Sstevel@tonic-gate mul_add_c(a[1],b[2],c1,c2,c3);
6860Sstevel@tonic-gate mul_add_c(a[2],b[1],c1,c2,c3);
6870Sstevel@tonic-gate mul_add_c(a[3],b[0],c1,c2,c3);
6880Sstevel@tonic-gate r[3]=c1;
6890Sstevel@tonic-gate c1=0;
6900Sstevel@tonic-gate mul_add_c(a[3],b[1],c2,c3,c1);
6910Sstevel@tonic-gate mul_add_c(a[2],b[2],c2,c3,c1);
6920Sstevel@tonic-gate mul_add_c(a[1],b[3],c2,c3,c1);
6930Sstevel@tonic-gate r[4]=c2;
6940Sstevel@tonic-gate c2=0;
6950Sstevel@tonic-gate mul_add_c(a[2],b[3],c3,c1,c2);
6960Sstevel@tonic-gate mul_add_c(a[3],b[2],c3,c1,c2);
6970Sstevel@tonic-gate r[5]=c3;
6980Sstevel@tonic-gate c3=0;
6990Sstevel@tonic-gate mul_add_c(a[3],b[3],c1,c2,c3);
7000Sstevel@tonic-gate r[6]=c1;
7010Sstevel@tonic-gate r[7]=c2;
7020Sstevel@tonic-gate }
7030Sstevel@tonic-gate
bn_sqr_comba8(BN_ULONG * r,const BN_ULONG * a)7040Sstevel@tonic-gate void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
7050Sstevel@tonic-gate {
7060Sstevel@tonic-gate #ifdef BN_LLONG
7070Sstevel@tonic-gate BN_ULLONG t,tt;
7080Sstevel@tonic-gate #else
7090Sstevel@tonic-gate BN_ULONG bl,bh;
7100Sstevel@tonic-gate #endif
7110Sstevel@tonic-gate BN_ULONG t1,t2;
7120Sstevel@tonic-gate BN_ULONG c1,c2,c3;
7130Sstevel@tonic-gate
7140Sstevel@tonic-gate c1=0;
7150Sstevel@tonic-gate c2=0;
7160Sstevel@tonic-gate c3=0;
7170Sstevel@tonic-gate sqr_add_c(a,0,c1,c2,c3);
7180Sstevel@tonic-gate r[0]=c1;
7190Sstevel@tonic-gate c1=0;
7200Sstevel@tonic-gate sqr_add_c2(a,1,0,c2,c3,c1);
7210Sstevel@tonic-gate r[1]=c2;
7220Sstevel@tonic-gate c2=0;
7230Sstevel@tonic-gate sqr_add_c(a,1,c3,c1,c2);
7240Sstevel@tonic-gate sqr_add_c2(a,2,0,c3,c1,c2);
7250Sstevel@tonic-gate r[2]=c3;
7260Sstevel@tonic-gate c3=0;
7270Sstevel@tonic-gate sqr_add_c2(a,3,0,c1,c2,c3);
7280Sstevel@tonic-gate sqr_add_c2(a,2,1,c1,c2,c3);
7290Sstevel@tonic-gate r[3]=c1;
7300Sstevel@tonic-gate c1=0;
7310Sstevel@tonic-gate sqr_add_c(a,2,c2,c3,c1);
7320Sstevel@tonic-gate sqr_add_c2(a,3,1,c2,c3,c1);
7330Sstevel@tonic-gate sqr_add_c2(a,4,0,c2,c3,c1);
7340Sstevel@tonic-gate r[4]=c2;
7350Sstevel@tonic-gate c2=0;
7360Sstevel@tonic-gate sqr_add_c2(a,5,0,c3,c1,c2);
7370Sstevel@tonic-gate sqr_add_c2(a,4,1,c3,c1,c2);
7380Sstevel@tonic-gate sqr_add_c2(a,3,2,c3,c1,c2);
7390Sstevel@tonic-gate r[5]=c3;
7400Sstevel@tonic-gate c3=0;
7410Sstevel@tonic-gate sqr_add_c(a,3,c1,c2,c3);
7420Sstevel@tonic-gate sqr_add_c2(a,4,2,c1,c2,c3);
7430Sstevel@tonic-gate sqr_add_c2(a,5,1,c1,c2,c3);
7440Sstevel@tonic-gate sqr_add_c2(a,6,0,c1,c2,c3);
7450Sstevel@tonic-gate r[6]=c1;
7460Sstevel@tonic-gate c1=0;
7470Sstevel@tonic-gate sqr_add_c2(a,7,0,c2,c3,c1);
7480Sstevel@tonic-gate sqr_add_c2(a,6,1,c2,c3,c1);
7490Sstevel@tonic-gate sqr_add_c2(a,5,2,c2,c3,c1);
7500Sstevel@tonic-gate sqr_add_c2(a,4,3,c2,c3,c1);
7510Sstevel@tonic-gate r[7]=c2;
7520Sstevel@tonic-gate c2=0;
7530Sstevel@tonic-gate sqr_add_c(a,4,c3,c1,c2);
7540Sstevel@tonic-gate sqr_add_c2(a,5,3,c3,c1,c2);
7550Sstevel@tonic-gate sqr_add_c2(a,6,2,c3,c1,c2);
7560Sstevel@tonic-gate sqr_add_c2(a,7,1,c3,c1,c2);
7570Sstevel@tonic-gate r[8]=c3;
7580Sstevel@tonic-gate c3=0;
7590Sstevel@tonic-gate sqr_add_c2(a,7,2,c1,c2,c3);
7600Sstevel@tonic-gate sqr_add_c2(a,6,3,c1,c2,c3);
7610Sstevel@tonic-gate sqr_add_c2(a,5,4,c1,c2,c3);
7620Sstevel@tonic-gate r[9]=c1;
7630Sstevel@tonic-gate c1=0;
7640Sstevel@tonic-gate sqr_add_c(a,5,c2,c3,c1);
7650Sstevel@tonic-gate sqr_add_c2(a,6,4,c2,c3,c1);
7660Sstevel@tonic-gate sqr_add_c2(a,7,3,c2,c3,c1);
7670Sstevel@tonic-gate r[10]=c2;
7680Sstevel@tonic-gate c2=0;
7690Sstevel@tonic-gate sqr_add_c2(a,7,4,c3,c1,c2);
7700Sstevel@tonic-gate sqr_add_c2(a,6,5,c3,c1,c2);
7710Sstevel@tonic-gate r[11]=c3;
7720Sstevel@tonic-gate c3=0;
7730Sstevel@tonic-gate sqr_add_c(a,6,c1,c2,c3);
7740Sstevel@tonic-gate sqr_add_c2(a,7,5,c1,c2,c3);
7750Sstevel@tonic-gate r[12]=c1;
7760Sstevel@tonic-gate c1=0;
7770Sstevel@tonic-gate sqr_add_c2(a,7,6,c2,c3,c1);
7780Sstevel@tonic-gate r[13]=c2;
7790Sstevel@tonic-gate c2=0;
7800Sstevel@tonic-gate sqr_add_c(a,7,c3,c1,c2);
7810Sstevel@tonic-gate r[14]=c3;
7820Sstevel@tonic-gate r[15]=c1;
7830Sstevel@tonic-gate }
7840Sstevel@tonic-gate
bn_sqr_comba4(BN_ULONG * r,const BN_ULONG * a)7850Sstevel@tonic-gate void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
7860Sstevel@tonic-gate {
7870Sstevel@tonic-gate #ifdef BN_LLONG
7880Sstevel@tonic-gate BN_ULLONG t,tt;
7890Sstevel@tonic-gate #else
7900Sstevel@tonic-gate BN_ULONG bl,bh;
7910Sstevel@tonic-gate #endif
7920Sstevel@tonic-gate BN_ULONG t1,t2;
7930Sstevel@tonic-gate BN_ULONG c1,c2,c3;
7940Sstevel@tonic-gate
7950Sstevel@tonic-gate c1=0;
7960Sstevel@tonic-gate c2=0;
7970Sstevel@tonic-gate c3=0;
7980Sstevel@tonic-gate sqr_add_c(a,0,c1,c2,c3);
7990Sstevel@tonic-gate r[0]=c1;
8000Sstevel@tonic-gate c1=0;
8010Sstevel@tonic-gate sqr_add_c2(a,1,0,c2,c3,c1);
8020Sstevel@tonic-gate r[1]=c2;
8030Sstevel@tonic-gate c2=0;
8040Sstevel@tonic-gate sqr_add_c(a,1,c3,c1,c2);
8050Sstevel@tonic-gate sqr_add_c2(a,2,0,c3,c1,c2);
8060Sstevel@tonic-gate r[2]=c3;
8070Sstevel@tonic-gate c3=0;
8080Sstevel@tonic-gate sqr_add_c2(a,3,0,c1,c2,c3);
8090Sstevel@tonic-gate sqr_add_c2(a,2,1,c1,c2,c3);
8100Sstevel@tonic-gate r[3]=c1;
8110Sstevel@tonic-gate c1=0;
8120Sstevel@tonic-gate sqr_add_c(a,2,c2,c3,c1);
8130Sstevel@tonic-gate sqr_add_c2(a,3,1,c2,c3,c1);
8140Sstevel@tonic-gate r[4]=c2;
8150Sstevel@tonic-gate c2=0;
8160Sstevel@tonic-gate sqr_add_c2(a,3,2,c3,c1,c2);
8170Sstevel@tonic-gate r[5]=c3;
8180Sstevel@tonic-gate c3=0;
8190Sstevel@tonic-gate sqr_add_c(a,3,c1,c2,c3);
8200Sstevel@tonic-gate r[6]=c1;
8210Sstevel@tonic-gate r[7]=c2;
8220Sstevel@tonic-gate }
8230Sstevel@tonic-gate #else /* !BN_MUL_COMBA */
8240Sstevel@tonic-gate
8250Sstevel@tonic-gate /* hmm... is it faster just to do a multiply? */
8260Sstevel@tonic-gate #undef bn_sqr_comba4
bn_sqr_comba4(BN_ULONG * r,BN_ULONG * a)8270Sstevel@tonic-gate void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
8280Sstevel@tonic-gate {
8290Sstevel@tonic-gate BN_ULONG t[8];
8300Sstevel@tonic-gate bn_sqr_normal(r,a,4,t);
8310Sstevel@tonic-gate }
8320Sstevel@tonic-gate
8330Sstevel@tonic-gate #undef bn_sqr_comba8
bn_sqr_comba8(BN_ULONG * r,BN_ULONG * a)8340Sstevel@tonic-gate void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
8350Sstevel@tonic-gate {
8360Sstevel@tonic-gate BN_ULONG t[16];
8370Sstevel@tonic-gate bn_sqr_normal(r,a,8,t);
8380Sstevel@tonic-gate }
8390Sstevel@tonic-gate
bn_mul_comba4(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)8400Sstevel@tonic-gate void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
8410Sstevel@tonic-gate {
8420Sstevel@tonic-gate r[4]=bn_mul_words( &(r[0]),a,4,b[0]);
8430Sstevel@tonic-gate r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
8440Sstevel@tonic-gate r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
8450Sstevel@tonic-gate r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
8460Sstevel@tonic-gate }
8470Sstevel@tonic-gate
bn_mul_comba8(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)8480Sstevel@tonic-gate void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
8490Sstevel@tonic-gate {
8500Sstevel@tonic-gate r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
8510Sstevel@tonic-gate r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
8520Sstevel@tonic-gate r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
8530Sstevel@tonic-gate r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
8540Sstevel@tonic-gate r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
8550Sstevel@tonic-gate r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
8560Sstevel@tonic-gate r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
8570Sstevel@tonic-gate r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
8580Sstevel@tonic-gate }
8590Sstevel@tonic-gate
8600Sstevel@tonic-gate #endif /* !BN_MUL_COMBA */
861