xref: /onnv-gate/usr/src/common/openssl/crypto/bn/asm/sparcv8.S (revision 0:68f95e015346)
1*0Sstevel@tonic-gate.ident	"sparcv8.s, Version 1.4"
2*0Sstevel@tonic-gate.ident	"SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3*0Sstevel@tonic-gate
4*0Sstevel@tonic-gate/*
5*0Sstevel@tonic-gate * ====================================================================
6*0Sstevel@tonic-gate * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7*0Sstevel@tonic-gate * project.
8*0Sstevel@tonic-gate *
9*0Sstevel@tonic-gate * Rights for redistribution and usage in source and binary forms are
10*0Sstevel@tonic-gate * granted according to the OpenSSL license. Warranty of any kind is
11*0Sstevel@tonic-gate * disclaimed.
12*0Sstevel@tonic-gate * ====================================================================
13*0Sstevel@tonic-gate */
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gate/*
16*0Sstevel@tonic-gate * This is my modest contributon to OpenSSL project (see
17*0Sstevel@tonic-gate * http://www.openssl.org/ for more information about it) and is
18*0Sstevel@tonic-gate * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
19*0Sstevel@tonic-gate * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20*0Sstevel@tonic-gate *
21*0Sstevel@tonic-gate * See bn_asm.sparc.v8plus.S for more details.
22*0Sstevel@tonic-gate */
23*0Sstevel@tonic-gate
24*0Sstevel@tonic-gate/*
25*0Sstevel@tonic-gate * Revision history.
26*0Sstevel@tonic-gate *
27*0Sstevel@tonic-gate * 1.1	- new loop unrolling model(*);
28*0Sstevel@tonic-gate * 1.2	- made gas friendly;
29*0Sstevel@tonic-gate * 1.3	- fixed problem with /usr/ccs/lib/cpp;
30*0Sstevel@tonic-gate * 1.4	- some retunes;
31*0Sstevel@tonic-gate *
32*0Sstevel@tonic-gate * (*)	see bn_asm.sparc.v8plus.S for details
33*0Sstevel@tonic-gate */
34*0Sstevel@tonic-gate
35*0Sstevel@tonic-gate.section	".text",#alloc,#execinstr
36*0Sstevel@tonic-gate.file		"bn_asm.sparc.v8.S"
37*0Sstevel@tonic-gate
38*0Sstevel@tonic-gate.align	32
39*0Sstevel@tonic-gate
40*0Sstevel@tonic-gate.global bn_mul_add_words
41*0Sstevel@tonic-gate/*
42*0Sstevel@tonic-gate * BN_ULONG bn_mul_add_words(rp,ap,num,w)
43*0Sstevel@tonic-gate * BN_ULONG *rp,*ap;
44*0Sstevel@tonic-gate * int num;
45*0Sstevel@tonic-gate * BN_ULONG w;
46*0Sstevel@tonic-gate */
47*0Sstevel@tonic-gatebn_mul_add_words:
48*0Sstevel@tonic-gate	cmp	%o2,0
49*0Sstevel@tonic-gate	bg,a	.L_bn_mul_add_words_proceed
50*0Sstevel@tonic-gate	ld	[%o1],%g2
51*0Sstevel@tonic-gate	retl
52*0Sstevel@tonic-gate	clr	%o0
53*0Sstevel@tonic-gate
54*0Sstevel@tonic-gate.L_bn_mul_add_words_proceed:
55*0Sstevel@tonic-gate	andcc	%o2,-4,%g0
56*0Sstevel@tonic-gate	bz	.L_bn_mul_add_words_tail
57*0Sstevel@tonic-gate	clr	%o5
58*0Sstevel@tonic-gate
59*0Sstevel@tonic-gate.L_bn_mul_add_words_loop:
60*0Sstevel@tonic-gate	ld	[%o0],%o4
61*0Sstevel@tonic-gate	ld	[%o1+4],%g3
62*0Sstevel@tonic-gate	umul	%o3,%g2,%g2
63*0Sstevel@tonic-gate	rd	%y,%g1
64*0Sstevel@tonic-gate	addcc	%o4,%o5,%o4
65*0Sstevel@tonic-gate	addx	%g1,0,%g1
66*0Sstevel@tonic-gate	addcc	%o4,%g2,%o4
67*0Sstevel@tonic-gate	st	%o4,[%o0]
68*0Sstevel@tonic-gate	addx	%g1,0,%o5
69*0Sstevel@tonic-gate
70*0Sstevel@tonic-gate	ld	[%o0+4],%o4
71*0Sstevel@tonic-gate	ld	[%o1+8],%g2
72*0Sstevel@tonic-gate	umul	%o3,%g3,%g3
73*0Sstevel@tonic-gate	dec	4,%o2
74*0Sstevel@tonic-gate	rd	%y,%g1
75*0Sstevel@tonic-gate	addcc	%o4,%o5,%o4
76*0Sstevel@tonic-gate	addx	%g1,0,%g1
77*0Sstevel@tonic-gate	addcc	%o4,%g3,%o4
78*0Sstevel@tonic-gate	st	%o4,[%o0+4]
79*0Sstevel@tonic-gate	addx	%g1,0,%o5
80*0Sstevel@tonic-gate
81*0Sstevel@tonic-gate	ld	[%o0+8],%o4
82*0Sstevel@tonic-gate	ld	[%o1+12],%g3
83*0Sstevel@tonic-gate	umul	%o3,%g2,%g2
84*0Sstevel@tonic-gate	inc	16,%o1
85*0Sstevel@tonic-gate	rd	%y,%g1
86*0Sstevel@tonic-gate	addcc	%o4,%o5,%o4
87*0Sstevel@tonic-gate	addx	%g1,0,%g1
88*0Sstevel@tonic-gate	addcc	%o4,%g2,%o4
89*0Sstevel@tonic-gate	st	%o4,[%o0+8]
90*0Sstevel@tonic-gate	addx	%g1,0,%o5
91*0Sstevel@tonic-gate
92*0Sstevel@tonic-gate	ld	[%o0+12],%o4
93*0Sstevel@tonic-gate	umul	%o3,%g3,%g3
94*0Sstevel@tonic-gate	inc	16,%o0
95*0Sstevel@tonic-gate	rd	%y,%g1
96*0Sstevel@tonic-gate	addcc	%o4,%o5,%o4
97*0Sstevel@tonic-gate	addx	%g1,0,%g1
98*0Sstevel@tonic-gate	addcc	%o4,%g3,%o4
99*0Sstevel@tonic-gate	st	%o4,[%o0-4]
100*0Sstevel@tonic-gate	addx	%g1,0,%o5
101*0Sstevel@tonic-gate	andcc	%o2,-4,%g0
102*0Sstevel@tonic-gate	bnz,a	.L_bn_mul_add_words_loop
103*0Sstevel@tonic-gate	ld	[%o1],%g2
104*0Sstevel@tonic-gate
105*0Sstevel@tonic-gate	tst	%o2
106*0Sstevel@tonic-gate	bnz,a	.L_bn_mul_add_words_tail
107*0Sstevel@tonic-gate	ld	[%o1],%g2
108*0Sstevel@tonic-gate.L_bn_mul_add_words_return:
109*0Sstevel@tonic-gate	retl
110*0Sstevel@tonic-gate	mov	%o5,%o0
111*0Sstevel@tonic-gate	nop
112*0Sstevel@tonic-gate
113*0Sstevel@tonic-gate.L_bn_mul_add_words_tail:
114*0Sstevel@tonic-gate	ld	[%o0],%o4
115*0Sstevel@tonic-gate	umul	%o3,%g2,%g2
116*0Sstevel@tonic-gate	addcc	%o4,%o5,%o4
117*0Sstevel@tonic-gate	rd	%y,%g1
118*0Sstevel@tonic-gate	addx	%g1,0,%g1
119*0Sstevel@tonic-gate	addcc	%o4,%g2,%o4
120*0Sstevel@tonic-gate	addx	%g1,0,%o5
121*0Sstevel@tonic-gate	deccc	%o2
122*0Sstevel@tonic-gate	bz	.L_bn_mul_add_words_return
123*0Sstevel@tonic-gate	st	%o4,[%o0]
124*0Sstevel@tonic-gate
125*0Sstevel@tonic-gate	ld	[%o1+4],%g2
126*0Sstevel@tonic-gate	ld	[%o0+4],%o4
127*0Sstevel@tonic-gate	umul	%o3,%g2,%g2
128*0Sstevel@tonic-gate	rd	%y,%g1
129*0Sstevel@tonic-gate	addcc	%o4,%o5,%o4
130*0Sstevel@tonic-gate	addx	%g1,0,%g1
131*0Sstevel@tonic-gate	addcc	%o4,%g2,%o4
132*0Sstevel@tonic-gate	addx	%g1,0,%o5
133*0Sstevel@tonic-gate	deccc	%o2
134*0Sstevel@tonic-gate	bz	.L_bn_mul_add_words_return
135*0Sstevel@tonic-gate	st	%o4,[%o0+4]
136*0Sstevel@tonic-gate
137*0Sstevel@tonic-gate	ld	[%o1+8],%g2
138*0Sstevel@tonic-gate	ld	[%o0+8],%o4
139*0Sstevel@tonic-gate	umul	%o3,%g2,%g2
140*0Sstevel@tonic-gate	rd	%y,%g1
141*0Sstevel@tonic-gate	addcc	%o4,%o5,%o4
142*0Sstevel@tonic-gate	addx	%g1,0,%g1
143*0Sstevel@tonic-gate	addcc	%o4,%g2,%o4
144*0Sstevel@tonic-gate	st	%o4,[%o0+8]
145*0Sstevel@tonic-gate	retl
146*0Sstevel@tonic-gate	addx	%g1,0,%o0
147*0Sstevel@tonic-gate
148*0Sstevel@tonic-gate.type	bn_mul_add_words,#function
149*0Sstevel@tonic-gate.size	bn_mul_add_words,(.-bn_mul_add_words)
150*0Sstevel@tonic-gate
151*0Sstevel@tonic-gate.align	32
152*0Sstevel@tonic-gate
153*0Sstevel@tonic-gate.global bn_mul_words
154*0Sstevel@tonic-gate/*
155*0Sstevel@tonic-gate * BN_ULONG bn_mul_words(rp,ap,num,w)
156*0Sstevel@tonic-gate * BN_ULONG *rp,*ap;
157*0Sstevel@tonic-gate * int num;
158*0Sstevel@tonic-gate * BN_ULONG w;
159*0Sstevel@tonic-gate */
160*0Sstevel@tonic-gatebn_mul_words:
161*0Sstevel@tonic-gate	cmp	%o2,0
162*0Sstevel@tonic-gate	bg,a	.L_bn_mul_words_proceeed
163*0Sstevel@tonic-gate	ld	[%o1],%g2
164*0Sstevel@tonic-gate	retl
165*0Sstevel@tonic-gate	clr	%o0
166*0Sstevel@tonic-gate
167*0Sstevel@tonic-gate.L_bn_mul_words_proceeed:
168*0Sstevel@tonic-gate	andcc	%o2,-4,%g0
169*0Sstevel@tonic-gate	bz	.L_bn_mul_words_tail
170*0Sstevel@tonic-gate	clr	%o5
171*0Sstevel@tonic-gate
172*0Sstevel@tonic-gate.L_bn_mul_words_loop:
173*0Sstevel@tonic-gate	ld	[%o1+4],%g3
174*0Sstevel@tonic-gate	umul	%o3,%g2,%g2
175*0Sstevel@tonic-gate	addcc	%g2,%o5,%g2
176*0Sstevel@tonic-gate	rd	%y,%g1
177*0Sstevel@tonic-gate	addx	%g1,0,%o5
178*0Sstevel@tonic-gate	st	%g2,[%o0]
179*0Sstevel@tonic-gate
180*0Sstevel@tonic-gate	ld	[%o1+8],%g2
181*0Sstevel@tonic-gate	umul	%o3,%g3,%g3
182*0Sstevel@tonic-gate	addcc	%g3,%o5,%g3
183*0Sstevel@tonic-gate	rd	%y,%g1
184*0Sstevel@tonic-gate	dec	4,%o2
185*0Sstevel@tonic-gate	addx	%g1,0,%o5
186*0Sstevel@tonic-gate	st	%g3,[%o0+4]
187*0Sstevel@tonic-gate
188*0Sstevel@tonic-gate	ld	[%o1+12],%g3
189*0Sstevel@tonic-gate	umul	%o3,%g2,%g2
190*0Sstevel@tonic-gate	addcc	%g2,%o5,%g2
191*0Sstevel@tonic-gate	rd	%y,%g1
192*0Sstevel@tonic-gate	inc	16,%o1
193*0Sstevel@tonic-gate	st	%g2,[%o0+8]
194*0Sstevel@tonic-gate	addx	%g1,0,%o5
195*0Sstevel@tonic-gate
196*0Sstevel@tonic-gate	umul	%o3,%g3,%g3
197*0Sstevel@tonic-gate	addcc	%g3,%o5,%g3
198*0Sstevel@tonic-gate	rd	%y,%g1
199*0Sstevel@tonic-gate	inc	16,%o0
200*0Sstevel@tonic-gate	addx	%g1,0,%o5
201*0Sstevel@tonic-gate	st	%g3,[%o0-4]
202*0Sstevel@tonic-gate	andcc	%o2,-4,%g0
203*0Sstevel@tonic-gate	nop
204*0Sstevel@tonic-gate	bnz,a	.L_bn_mul_words_loop
205*0Sstevel@tonic-gate	ld	[%o1],%g2
206*0Sstevel@tonic-gate
207*0Sstevel@tonic-gate	tst	%o2
208*0Sstevel@tonic-gate	bnz,a	.L_bn_mul_words_tail
209*0Sstevel@tonic-gate	ld	[%o1],%g2
210*0Sstevel@tonic-gate.L_bn_mul_words_return:
211*0Sstevel@tonic-gate	retl
212*0Sstevel@tonic-gate	mov	%o5,%o0
213*0Sstevel@tonic-gate	nop
214*0Sstevel@tonic-gate
215*0Sstevel@tonic-gate.L_bn_mul_words_tail:
216*0Sstevel@tonic-gate	umul	%o3,%g2,%g2
217*0Sstevel@tonic-gate	addcc	%g2,%o5,%g2
218*0Sstevel@tonic-gate	rd	%y,%g1
219*0Sstevel@tonic-gate	addx	%g1,0,%o5
220*0Sstevel@tonic-gate	deccc	%o2
221*0Sstevel@tonic-gate	bz	.L_bn_mul_words_return
222*0Sstevel@tonic-gate	st	%g2,[%o0]
223*0Sstevel@tonic-gate	nop
224*0Sstevel@tonic-gate
225*0Sstevel@tonic-gate	ld	[%o1+4],%g2
226*0Sstevel@tonic-gate	umul	%o3,%g2,%g2
227*0Sstevel@tonic-gate	addcc	%g2,%o5,%g2
228*0Sstevel@tonic-gate	rd	%y,%g1
229*0Sstevel@tonic-gate	addx	%g1,0,%o5
230*0Sstevel@tonic-gate	deccc	%o2
231*0Sstevel@tonic-gate	bz	.L_bn_mul_words_return
232*0Sstevel@tonic-gate	st	%g2,[%o0+4]
233*0Sstevel@tonic-gate
234*0Sstevel@tonic-gate	ld	[%o1+8],%g2
235*0Sstevel@tonic-gate	umul	%o3,%g2,%g2
236*0Sstevel@tonic-gate	addcc	%g2,%o5,%g2
237*0Sstevel@tonic-gate	rd	%y,%g1
238*0Sstevel@tonic-gate	st	%g2,[%o0+8]
239*0Sstevel@tonic-gate	retl
240*0Sstevel@tonic-gate	addx	%g1,0,%o0
241*0Sstevel@tonic-gate
242*0Sstevel@tonic-gate.type	bn_mul_words,#function
243*0Sstevel@tonic-gate.size	bn_mul_words,(.-bn_mul_words)
244*0Sstevel@tonic-gate
245*0Sstevel@tonic-gate.align  32
246*0Sstevel@tonic-gate.global	bn_sqr_words
247*0Sstevel@tonic-gate/*
248*0Sstevel@tonic-gate * void bn_sqr_words(r,a,n)
249*0Sstevel@tonic-gate * BN_ULONG *r,*a;
250*0Sstevel@tonic-gate * int n;
251*0Sstevel@tonic-gate */
252*0Sstevel@tonic-gatebn_sqr_words:
253*0Sstevel@tonic-gate	cmp	%o2,0
254*0Sstevel@tonic-gate	bg,a	.L_bn_sqr_words_proceeed
255*0Sstevel@tonic-gate	ld	[%o1],%g2
256*0Sstevel@tonic-gate	retl
257*0Sstevel@tonic-gate	clr	%o0
258*0Sstevel@tonic-gate
259*0Sstevel@tonic-gate.L_bn_sqr_words_proceeed:
260*0Sstevel@tonic-gate	andcc	%o2,-4,%g0
261*0Sstevel@tonic-gate	bz	.L_bn_sqr_words_tail
262*0Sstevel@tonic-gate	clr	%o5
263*0Sstevel@tonic-gate
264*0Sstevel@tonic-gate.L_bn_sqr_words_loop:
265*0Sstevel@tonic-gate	ld	[%o1+4],%g3
266*0Sstevel@tonic-gate	umul	%g2,%g2,%o4
267*0Sstevel@tonic-gate	st	%o4,[%o0]
268*0Sstevel@tonic-gate	rd	%y,%o5
269*0Sstevel@tonic-gate	st	%o5,[%o0+4]
270*0Sstevel@tonic-gate
271*0Sstevel@tonic-gate	ld	[%o1+8],%g2
272*0Sstevel@tonic-gate	umul	%g3,%g3,%o4
273*0Sstevel@tonic-gate	dec	4,%o2
274*0Sstevel@tonic-gate	st	%o4,[%o0+8]
275*0Sstevel@tonic-gate	rd	%y,%o5
276*0Sstevel@tonic-gate	st	%o5,[%o0+12]
277*0Sstevel@tonic-gate	nop
278*0Sstevel@tonic-gate
279*0Sstevel@tonic-gate	ld	[%o1+12],%g3
280*0Sstevel@tonic-gate	umul	%g2,%g2,%o4
281*0Sstevel@tonic-gate	st	%o4,[%o0+16]
282*0Sstevel@tonic-gate	rd	%y,%o5
283*0Sstevel@tonic-gate	inc	16,%o1
284*0Sstevel@tonic-gate	st	%o5,[%o0+20]
285*0Sstevel@tonic-gate
286*0Sstevel@tonic-gate	umul	%g3,%g3,%o4
287*0Sstevel@tonic-gate	inc	32,%o0
288*0Sstevel@tonic-gate	st	%o4,[%o0-8]
289*0Sstevel@tonic-gate	rd	%y,%o5
290*0Sstevel@tonic-gate	st	%o5,[%o0-4]
291*0Sstevel@tonic-gate	andcc	%o2,-4,%g2
292*0Sstevel@tonic-gate	bnz,a	.L_bn_sqr_words_loop
293*0Sstevel@tonic-gate	ld	[%o1],%g2
294*0Sstevel@tonic-gate
295*0Sstevel@tonic-gate	tst	%o2
296*0Sstevel@tonic-gate	nop
297*0Sstevel@tonic-gate	bnz,a	.L_bn_sqr_words_tail
298*0Sstevel@tonic-gate	ld	[%o1],%g2
299*0Sstevel@tonic-gate.L_bn_sqr_words_return:
300*0Sstevel@tonic-gate	retl
301*0Sstevel@tonic-gate	clr	%o0
302*0Sstevel@tonic-gate
303*0Sstevel@tonic-gate.L_bn_sqr_words_tail:
304*0Sstevel@tonic-gate	umul	%g2,%g2,%o4
305*0Sstevel@tonic-gate	st	%o4,[%o0]
306*0Sstevel@tonic-gate	deccc	%o2
307*0Sstevel@tonic-gate	rd	%y,%o5
308*0Sstevel@tonic-gate	bz	.L_bn_sqr_words_return
309*0Sstevel@tonic-gate	st	%o5,[%o0+4]
310*0Sstevel@tonic-gate
311*0Sstevel@tonic-gate	ld	[%o1+4],%g2
312*0Sstevel@tonic-gate	umul	%g2,%g2,%o4
313*0Sstevel@tonic-gate	st	%o4,[%o0+8]
314*0Sstevel@tonic-gate	deccc	%o2
315*0Sstevel@tonic-gate	rd	%y,%o5
316*0Sstevel@tonic-gate	nop
317*0Sstevel@tonic-gate	bz	.L_bn_sqr_words_return
318*0Sstevel@tonic-gate	st	%o5,[%o0+12]
319*0Sstevel@tonic-gate
320*0Sstevel@tonic-gate	ld	[%o1+8],%g2
321*0Sstevel@tonic-gate	umul	%g2,%g2,%o4
322*0Sstevel@tonic-gate	st	%o4,[%o0+16]
323*0Sstevel@tonic-gate	rd	%y,%o5
324*0Sstevel@tonic-gate	st	%o5,[%o0+20]
325*0Sstevel@tonic-gate	retl
326*0Sstevel@tonic-gate	clr	%o0
327*0Sstevel@tonic-gate
328*0Sstevel@tonic-gate.type	bn_sqr_words,#function
329*0Sstevel@tonic-gate.size	bn_sqr_words,(.-bn_sqr_words)
330*0Sstevel@tonic-gate
331*0Sstevel@tonic-gate.align	32
332*0Sstevel@tonic-gate
333*0Sstevel@tonic-gate.global bn_div_words
334*0Sstevel@tonic-gate/*
335*0Sstevel@tonic-gate * BN_ULONG bn_div_words(h,l,d)
336*0Sstevel@tonic-gate * BN_ULONG h,l,d;
337*0Sstevel@tonic-gate */
338*0Sstevel@tonic-gatebn_div_words:
339*0Sstevel@tonic-gate	wr	%o0,%y
340*0Sstevel@tonic-gate	udiv	%o1,%o2,%o0
341*0Sstevel@tonic-gate	retl
342*0Sstevel@tonic-gate	nop
343*0Sstevel@tonic-gate
344*0Sstevel@tonic-gate.type	bn_div_words,#function
345*0Sstevel@tonic-gate.size	bn_div_words,(.-bn_div_words)
346*0Sstevel@tonic-gate
347*0Sstevel@tonic-gate.align	32
348*0Sstevel@tonic-gate
349*0Sstevel@tonic-gate.global bn_add_words
350*0Sstevel@tonic-gate/*
351*0Sstevel@tonic-gate * BN_ULONG bn_add_words(rp,ap,bp,n)
352*0Sstevel@tonic-gate * BN_ULONG *rp,*ap,*bp;
353*0Sstevel@tonic-gate * int n;
354*0Sstevel@tonic-gate */
355*0Sstevel@tonic-gatebn_add_words:
356*0Sstevel@tonic-gate	cmp	%o3,0
357*0Sstevel@tonic-gate	bg,a	.L_bn_add_words_proceed
358*0Sstevel@tonic-gate	ld	[%o1],%o4
359*0Sstevel@tonic-gate	retl
360*0Sstevel@tonic-gate	clr	%o0
361*0Sstevel@tonic-gate
362*0Sstevel@tonic-gate.L_bn_add_words_proceed:
363*0Sstevel@tonic-gate	andcc	%o3,-4,%g0
364*0Sstevel@tonic-gate	bz	.L_bn_add_words_tail
365*0Sstevel@tonic-gate	clr	%g1
366*0Sstevel@tonic-gate	ba	.L_bn_add_words_warn_loop
367*0Sstevel@tonic-gate	addcc	%g0,0,%g0	! clear carry flag
368*0Sstevel@tonic-gate
369*0Sstevel@tonic-gate.L_bn_add_words_loop:
370*0Sstevel@tonic-gate	ld	[%o1],%o4
371*0Sstevel@tonic-gate.L_bn_add_words_warn_loop:
372*0Sstevel@tonic-gate	ld	[%o2],%o5
373*0Sstevel@tonic-gate	ld	[%o1+4],%g3
374*0Sstevel@tonic-gate	ld	[%o2+4],%g4
375*0Sstevel@tonic-gate	dec	4,%o3
376*0Sstevel@tonic-gate	addxcc	%o5,%o4,%o5
377*0Sstevel@tonic-gate	st	%o5,[%o0]
378*0Sstevel@tonic-gate
379*0Sstevel@tonic-gate	ld	[%o1+8],%o4
380*0Sstevel@tonic-gate	ld	[%o2+8],%o5
381*0Sstevel@tonic-gate	inc	16,%o1
382*0Sstevel@tonic-gate	addxcc	%g3,%g4,%g3
383*0Sstevel@tonic-gate	st	%g3,[%o0+4]
384*0Sstevel@tonic-gate
385*0Sstevel@tonic-gate	ld	[%o1-4],%g3
386*0Sstevel@tonic-gate	ld	[%o2+12],%g4
387*0Sstevel@tonic-gate	inc	16,%o2
388*0Sstevel@tonic-gate	addxcc	%o5,%o4,%o5
389*0Sstevel@tonic-gate	st	%o5,[%o0+8]
390*0Sstevel@tonic-gate
391*0Sstevel@tonic-gate	inc	16,%o0
392*0Sstevel@tonic-gate	addxcc	%g3,%g4,%g3
393*0Sstevel@tonic-gate	st	%g3,[%o0-4]
394*0Sstevel@tonic-gate	addx	%g0,0,%g1
395*0Sstevel@tonic-gate	andcc	%o3,-4,%g0
396*0Sstevel@tonic-gate	bnz,a	.L_bn_add_words_loop
397*0Sstevel@tonic-gate	addcc	%g1,-1,%g0
398*0Sstevel@tonic-gate
399*0Sstevel@tonic-gate	tst	%o3
400*0Sstevel@tonic-gate	bnz,a	.L_bn_add_words_tail
401*0Sstevel@tonic-gate	ld	[%o1],%o4
402*0Sstevel@tonic-gate.L_bn_add_words_return:
403*0Sstevel@tonic-gate	retl
404*0Sstevel@tonic-gate	mov	%g1,%o0
405*0Sstevel@tonic-gate
406*0Sstevel@tonic-gate.L_bn_add_words_tail:
407*0Sstevel@tonic-gate	addcc	%g1,-1,%g0
408*0Sstevel@tonic-gate	ld	[%o2],%o5
409*0Sstevel@tonic-gate	addxcc	%o5,%o4,%o5
410*0Sstevel@tonic-gate	addx	%g0,0,%g1
411*0Sstevel@tonic-gate	deccc	%o3
412*0Sstevel@tonic-gate	bz	.L_bn_add_words_return
413*0Sstevel@tonic-gate	st	%o5,[%o0]
414*0Sstevel@tonic-gate
415*0Sstevel@tonic-gate	ld	[%o1+4],%o4
416*0Sstevel@tonic-gate	addcc	%g1,-1,%g0
417*0Sstevel@tonic-gate	ld	[%o2+4],%o5
418*0Sstevel@tonic-gate	addxcc	%o5,%o4,%o5
419*0Sstevel@tonic-gate	addx	%g0,0,%g1
420*0Sstevel@tonic-gate	deccc	%o3
421*0Sstevel@tonic-gate	bz	.L_bn_add_words_return
422*0Sstevel@tonic-gate	st	%o5,[%o0+4]
423*0Sstevel@tonic-gate
424*0Sstevel@tonic-gate	ld	[%o1+8],%o4
425*0Sstevel@tonic-gate	addcc	%g1,-1,%g0
426*0Sstevel@tonic-gate	ld	[%o2+8],%o5
427*0Sstevel@tonic-gate	addxcc	%o5,%o4,%o5
428*0Sstevel@tonic-gate	st	%o5,[%o0+8]
429*0Sstevel@tonic-gate	retl
430*0Sstevel@tonic-gate	addx	%g0,0,%o0
431*0Sstevel@tonic-gate
432*0Sstevel@tonic-gate.type	bn_add_words,#function
433*0Sstevel@tonic-gate.size	bn_add_words,(.-bn_add_words)
434*0Sstevel@tonic-gate
435*0Sstevel@tonic-gate.align	32
436*0Sstevel@tonic-gate
437*0Sstevel@tonic-gate.global bn_sub_words
438*0Sstevel@tonic-gate/*
439*0Sstevel@tonic-gate * BN_ULONG bn_sub_words(rp,ap,bp,n)
440*0Sstevel@tonic-gate * BN_ULONG *rp,*ap,*bp;
441*0Sstevel@tonic-gate * int n;
442*0Sstevel@tonic-gate */
443*0Sstevel@tonic-gatebn_sub_words:
444*0Sstevel@tonic-gate	cmp	%o3,0
445*0Sstevel@tonic-gate	bg,a	.L_bn_sub_words_proceed
446*0Sstevel@tonic-gate	ld	[%o1],%o4
447*0Sstevel@tonic-gate	retl
448*0Sstevel@tonic-gate	clr	%o0
449*0Sstevel@tonic-gate
450*0Sstevel@tonic-gate.L_bn_sub_words_proceed:
451*0Sstevel@tonic-gate	andcc	%o3,-4,%g0
452*0Sstevel@tonic-gate	bz	.L_bn_sub_words_tail
453*0Sstevel@tonic-gate	clr	%g1
454*0Sstevel@tonic-gate	ba	.L_bn_sub_words_warm_loop
455*0Sstevel@tonic-gate	addcc	%g0,0,%g0	! clear carry flag
456*0Sstevel@tonic-gate
457*0Sstevel@tonic-gate.L_bn_sub_words_loop:
458*0Sstevel@tonic-gate	ld	[%o1],%o4
459*0Sstevel@tonic-gate.L_bn_sub_words_warm_loop:
460*0Sstevel@tonic-gate	ld	[%o2],%o5
461*0Sstevel@tonic-gate	ld	[%o1+4],%g3
462*0Sstevel@tonic-gate	ld	[%o2+4],%g4
463*0Sstevel@tonic-gate	dec	4,%o3
464*0Sstevel@tonic-gate	subxcc	%o4,%o5,%o5
465*0Sstevel@tonic-gate	st	%o5,[%o0]
466*0Sstevel@tonic-gate
467*0Sstevel@tonic-gate	ld	[%o1+8],%o4
468*0Sstevel@tonic-gate	ld	[%o2+8],%o5
469*0Sstevel@tonic-gate	inc	16,%o1
470*0Sstevel@tonic-gate	subxcc	%g3,%g4,%g4
471*0Sstevel@tonic-gate	st	%g4,[%o0+4]
472*0Sstevel@tonic-gate
473*0Sstevel@tonic-gate	ld	[%o1-4],%g3
474*0Sstevel@tonic-gate	ld	[%o2+12],%g4
475*0Sstevel@tonic-gate	inc	16,%o2
476*0Sstevel@tonic-gate	subxcc	%o4,%o5,%o5
477*0Sstevel@tonic-gate	st	%o5,[%o0+8]
478*0Sstevel@tonic-gate
479*0Sstevel@tonic-gate	inc	16,%o0
480*0Sstevel@tonic-gate	subxcc	%g3,%g4,%g4
481*0Sstevel@tonic-gate	st	%g4,[%o0-4]
482*0Sstevel@tonic-gate	addx	%g0,0,%g1
483*0Sstevel@tonic-gate	andcc	%o3,-4,%g0
484*0Sstevel@tonic-gate	bnz,a	.L_bn_sub_words_loop
485*0Sstevel@tonic-gate	addcc	%g1,-1,%g0
486*0Sstevel@tonic-gate
487*0Sstevel@tonic-gate	tst	%o3
488*0Sstevel@tonic-gate	nop
489*0Sstevel@tonic-gate	bnz,a	.L_bn_sub_words_tail
490*0Sstevel@tonic-gate	ld	[%o1],%o4
491*0Sstevel@tonic-gate.L_bn_sub_words_return:
492*0Sstevel@tonic-gate	retl
493*0Sstevel@tonic-gate	mov	%g1,%o0
494*0Sstevel@tonic-gate
495*0Sstevel@tonic-gate.L_bn_sub_words_tail:
496*0Sstevel@tonic-gate	addcc	%g1,-1,%g0
497*0Sstevel@tonic-gate	ld	[%o2],%o5
498*0Sstevel@tonic-gate	subxcc	%o4,%o5,%o5
499*0Sstevel@tonic-gate	addx	%g0,0,%g1
500*0Sstevel@tonic-gate	deccc	%o3
501*0Sstevel@tonic-gate	bz	.L_bn_sub_words_return
502*0Sstevel@tonic-gate	st	%o5,[%o0]
503*0Sstevel@tonic-gate	nop
504*0Sstevel@tonic-gate
505*0Sstevel@tonic-gate	ld	[%o1+4],%o4
506*0Sstevel@tonic-gate	addcc	%g1,-1,%g0
507*0Sstevel@tonic-gate	ld	[%o2+4],%o5
508*0Sstevel@tonic-gate	subxcc	%o4,%o5,%o5
509*0Sstevel@tonic-gate	addx	%g0,0,%g1
510*0Sstevel@tonic-gate	deccc	%o3
511*0Sstevel@tonic-gate	bz	.L_bn_sub_words_return
512*0Sstevel@tonic-gate	st	%o5,[%o0+4]
513*0Sstevel@tonic-gate
514*0Sstevel@tonic-gate	ld	[%o1+8],%o4
515*0Sstevel@tonic-gate	addcc	%g1,-1,%g0
516*0Sstevel@tonic-gate	ld	[%o2+8],%o5
517*0Sstevel@tonic-gate	subxcc	%o4,%o5,%o5
518*0Sstevel@tonic-gate	st	%o5,[%o0+8]
519*0Sstevel@tonic-gate	retl
520*0Sstevel@tonic-gate	addx	%g0,0,%o0
521*0Sstevel@tonic-gate
522*0Sstevel@tonic-gate.type	bn_sub_words,#function
523*0Sstevel@tonic-gate.size	bn_sub_words,(.-bn_sub_words)
524*0Sstevel@tonic-gate
525*0Sstevel@tonic-gate#define FRAME_SIZE	-96
526*0Sstevel@tonic-gate
527*0Sstevel@tonic-gate/*
528*0Sstevel@tonic-gate * Here is register usage map for *all* routines below.
529*0Sstevel@tonic-gate */
530*0Sstevel@tonic-gate#define t_1	%o0
531*0Sstevel@tonic-gate#define	t_2	%o1
532*0Sstevel@tonic-gate#define c_1	%o2
533*0Sstevel@tonic-gate#define c_2	%o3
534*0Sstevel@tonic-gate#define c_3	%o4
535*0Sstevel@tonic-gate
536*0Sstevel@tonic-gate#define ap(I)	[%i1+4*I]
537*0Sstevel@tonic-gate#define bp(I)	[%i2+4*I]
538*0Sstevel@tonic-gate#define rp(I)	[%i0+4*I]
539*0Sstevel@tonic-gate
540*0Sstevel@tonic-gate#define	a_0	%l0
541*0Sstevel@tonic-gate#define	a_1	%l1
542*0Sstevel@tonic-gate#define	a_2	%l2
543*0Sstevel@tonic-gate#define	a_3	%l3
544*0Sstevel@tonic-gate#define	a_4	%l4
545*0Sstevel@tonic-gate#define	a_5	%l5
546*0Sstevel@tonic-gate#define	a_6	%l6
547*0Sstevel@tonic-gate#define	a_7	%l7
548*0Sstevel@tonic-gate
549*0Sstevel@tonic-gate#define	b_0	%i3
550*0Sstevel@tonic-gate#define	b_1	%i4
551*0Sstevel@tonic-gate#define	b_2	%i5
552*0Sstevel@tonic-gate#define	b_3	%o5
553*0Sstevel@tonic-gate#define	b_4	%g1
554*0Sstevel@tonic-gate#define	b_5	%g2
555*0Sstevel@tonic-gate#define	b_6	%g3
556*0Sstevel@tonic-gate#define	b_7	%g4
557*0Sstevel@tonic-gate
558*0Sstevel@tonic-gate.align	32
559*0Sstevel@tonic-gate.global bn_mul_comba8
560*0Sstevel@tonic-gate/*
561*0Sstevel@tonic-gate * void bn_mul_comba8(r,a,b)
562*0Sstevel@tonic-gate * BN_ULONG *r,*a,*b;
563*0Sstevel@tonic-gate */
564*0Sstevel@tonic-gatebn_mul_comba8:
565*0Sstevel@tonic-gate	save	%sp,FRAME_SIZE,%sp
566*0Sstevel@tonic-gate	ld	ap(0),a_0
567*0Sstevel@tonic-gate	ld	bp(0),b_0
568*0Sstevel@tonic-gate	umul	a_0,b_0,c_1	!=!mul_add_c(a[0],b[0],c1,c2,c3);
569*0Sstevel@tonic-gate	ld	bp(1),b_1
570*0Sstevel@tonic-gate	rd	%y,c_2
571*0Sstevel@tonic-gate	st	c_1,rp(0)	!r[0]=c1;
572*0Sstevel@tonic-gate
573*0Sstevel@tonic-gate	umul	a_0,b_1,t_1	!=!mul_add_c(a[0],b[1],c2,c3,c1);
574*0Sstevel@tonic-gate	ld	ap(1),a_1
575*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
576*0Sstevel@tonic-gate	rd	%y,t_2
577*0Sstevel@tonic-gate	addxcc	%g0,t_2,c_3	!=
578*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
579*0Sstevel@tonic-gate	ld	ap(2),a_2
580*0Sstevel@tonic-gate	umul	a_1,b_0,t_1	!mul_add_c(a[1],b[0],c2,c3,c1);
581*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2	!=
582*0Sstevel@tonic-gate	rd	%y,t_2
583*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
584*0Sstevel@tonic-gate	st	c_2,rp(1)	!r[1]=c2;
585*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
586*0Sstevel@tonic-gate
587*0Sstevel@tonic-gate	umul	a_2,b_0,t_1	!mul_add_c(a[2],b[0],c3,c1,c2);
588*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
589*0Sstevel@tonic-gate	rd	%y,t_2
590*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
591*0Sstevel@tonic-gate	addx	%g0,%g0,c_2
592*0Sstevel@tonic-gate	ld	bp(2),b_2
593*0Sstevel@tonic-gate	umul	a_1,b_1,t_1	!mul_add_c(a[1],b[1],c3,c1,c2);
594*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
595*0Sstevel@tonic-gate	rd	%y,t_2
596*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
597*0Sstevel@tonic-gate	ld	bp(3),b_3
598*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
599*0Sstevel@tonic-gate	umul	a_0,b_2,t_1	!mul_add_c(a[0],b[2],c3,c1,c2);
600*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
601*0Sstevel@tonic-gate	rd	%y,t_2
602*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
603*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
604*0Sstevel@tonic-gate	st	c_3,rp(2)	!r[2]=c3;
605*0Sstevel@tonic-gate
606*0Sstevel@tonic-gate	umul	a_0,b_3,t_1	!mul_add_c(a[0],b[3],c1,c2,c3);
607*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
608*0Sstevel@tonic-gate	rd	%y,t_2
609*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
610*0Sstevel@tonic-gate	addx	%g0,%g0,c_3
611*0Sstevel@tonic-gate	umul	a_1,b_2,t_1	!=!mul_add_c(a[1],b[2],c1,c2,c3);
612*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
613*0Sstevel@tonic-gate	rd	%y,t_2
614*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
615*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
616*0Sstevel@tonic-gate	ld	ap(3),a_3
617*0Sstevel@tonic-gate	umul	a_2,b_1,t_1	!mul_add_c(a[2],b[1],c1,c2,c3);
618*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
619*0Sstevel@tonic-gate	rd	%y,t_2		!=
620*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
621*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
622*0Sstevel@tonic-gate	ld	ap(4),a_4
623*0Sstevel@tonic-gate	umul	a_3,b_0,t_1	!mul_add_c(a[3],b[0],c1,c2,c3);!=
624*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
625*0Sstevel@tonic-gate	rd	%y,t_2
626*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
627*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
628*0Sstevel@tonic-gate	st	c_1,rp(3)	!r[3]=c1;
629*0Sstevel@tonic-gate
630*0Sstevel@tonic-gate	umul	a_4,b_0,t_1	!mul_add_c(a[4],b[0],c2,c3,c1);
631*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
632*0Sstevel@tonic-gate	rd	%y,t_2		!=
633*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
634*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
635*0Sstevel@tonic-gate	umul	a_3,b_1,t_1	!mul_add_c(a[3],b[1],c2,c3,c1);
636*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2	!=
637*0Sstevel@tonic-gate	rd	%y,t_2
638*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
639*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
640*0Sstevel@tonic-gate	umul	a_2,b_2,t_1	!=!mul_add_c(a[2],b[2],c2,c3,c1);
641*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
642*0Sstevel@tonic-gate	rd	%y,t_2
643*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
644*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
645*0Sstevel@tonic-gate	ld	bp(4),b_4
646*0Sstevel@tonic-gate	umul	a_1,b_3,t_1	!mul_add_c(a[1],b[3],c2,c3,c1);
647*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
648*0Sstevel@tonic-gate	rd	%y,t_2		!=
649*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
650*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
651*0Sstevel@tonic-gate	ld	bp(5),b_5
652*0Sstevel@tonic-gate	umul	a_0,b_4,t_1	!=!mul_add_c(a[0],b[4],c2,c3,c1);
653*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
654*0Sstevel@tonic-gate	rd	%y,t_2
655*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
656*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
657*0Sstevel@tonic-gate	st	c_2,rp(4)	!r[4]=c2;
658*0Sstevel@tonic-gate
659*0Sstevel@tonic-gate	umul	a_0,b_5,t_1	!mul_add_c(a[0],b[5],c3,c1,c2);
660*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
661*0Sstevel@tonic-gate	rd	%y,t_2		!=
662*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
663*0Sstevel@tonic-gate	addx	%g0,%g0,c_2
664*0Sstevel@tonic-gate	umul	a_1,b_4,t_1	!mul_add_c(a[1],b[4],c3,c1,c2);
665*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
666*0Sstevel@tonic-gate	rd	%y,t_2
667*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
668*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
669*0Sstevel@tonic-gate	umul	a_2,b_3,t_1	!=!mul_add_c(a[2],b[3],c3,c1,c2);
670*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
671*0Sstevel@tonic-gate	rd	%y,t_2
672*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
673*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
674*0Sstevel@tonic-gate	umul	a_3,b_2,t_1	!mul_add_c(a[3],b[2],c3,c1,c2);
675*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
676*0Sstevel@tonic-gate	rd	%y,t_2
677*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
678*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
679*0Sstevel@tonic-gate	ld	ap(5),a_5
680*0Sstevel@tonic-gate	umul	a_4,b_1,t_1	!mul_add_c(a[4],b[1],c3,c1,c2);
681*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
682*0Sstevel@tonic-gate	rd	%y,t_2
683*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
684*0Sstevel@tonic-gate	ld	ap(6),a_6
685*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
686*0Sstevel@tonic-gate	umul	a_5,b_0,t_1	!mul_add_c(a[5],b[0],c3,c1,c2);
687*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
688*0Sstevel@tonic-gate	rd	%y,t_2
689*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
690*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
691*0Sstevel@tonic-gate	st	c_3,rp(5)	!r[5]=c3;
692*0Sstevel@tonic-gate
693*0Sstevel@tonic-gate	umul	a_6,b_0,t_1	!mul_add_c(a[6],b[0],c1,c2,c3);
694*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
695*0Sstevel@tonic-gate	rd	%y,t_2
696*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
697*0Sstevel@tonic-gate	addx	%g0,%g0,c_3
698*0Sstevel@tonic-gate	umul	a_5,b_1,t_1	!=!mul_add_c(a[5],b[1],c1,c2,c3);
699*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
700*0Sstevel@tonic-gate	rd	%y,t_2
701*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
702*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
703*0Sstevel@tonic-gate	umul	a_4,b_2,t_1	!mul_add_c(a[4],b[2],c1,c2,c3);
704*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
705*0Sstevel@tonic-gate	rd	%y,t_2
706*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
707*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
708*0Sstevel@tonic-gate	umul	a_3,b_3,t_1	!mul_add_c(a[3],b[3],c1,c2,c3);
709*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
710*0Sstevel@tonic-gate	rd	%y,t_2		!=
711*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
712*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
713*0Sstevel@tonic-gate	umul	a_2,b_4,t_1	!mul_add_c(a[2],b[4],c1,c2,c3);
714*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
715*0Sstevel@tonic-gate	rd	%y,t_2
716*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
717*0Sstevel@tonic-gate	ld	bp(6),b_6
718*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
719*0Sstevel@tonic-gate	umul	a_1,b_5,t_1	!mul_add_c(a[1],b[5],c1,c2,c3);
720*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
721*0Sstevel@tonic-gate	rd	%y,t_2
722*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
723*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
724*0Sstevel@tonic-gate	ld	bp(7),b_7
725*0Sstevel@tonic-gate	umul	a_0,b_6,t_1	!mul_add_c(a[0],b[6],c1,c2,c3);
726*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
727*0Sstevel@tonic-gate	rd	%y,t_2
728*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
729*0Sstevel@tonic-gate	st	c_1,rp(6)	!r[6]=c1;
730*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
731*0Sstevel@tonic-gate
732*0Sstevel@tonic-gate	umul	a_0,b_7,t_1	!mul_add_c(a[0],b[7],c2,c3,c1);
733*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
734*0Sstevel@tonic-gate	rd	%y,t_2
735*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
736*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
737*0Sstevel@tonic-gate	umul	a_1,b_6,t_1	!mul_add_c(a[1],b[6],c2,c3,c1);
738*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
739*0Sstevel@tonic-gate	rd	%y,t_2		!=
740*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
741*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
742*0Sstevel@tonic-gate	umul	a_2,b_5,t_1	!mul_add_c(a[2],b[5],c2,c3,c1);
743*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2	!=
744*0Sstevel@tonic-gate	rd	%y,t_2
745*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
746*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
747*0Sstevel@tonic-gate	umul	a_3,b_4,t_1	!=!mul_add_c(a[3],b[4],c2,c3,c1);
748*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
749*0Sstevel@tonic-gate	rd	%y,t_2
750*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
751*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
752*0Sstevel@tonic-gate	umul	a_4,b_3,t_1	!mul_add_c(a[4],b[3],c2,c3,c1);
753*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
754*0Sstevel@tonic-gate	rd	%y,t_2
755*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
756*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
757*0Sstevel@tonic-gate	umul	a_5,b_2,t_1	!mul_add_c(a[5],b[2],c2,c3,c1);
758*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
759*0Sstevel@tonic-gate	rd	%y,t_2		!=
760*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
761*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
762*0Sstevel@tonic-gate	ld	ap(7),a_7
763*0Sstevel@tonic-gate	umul	a_6,b_1,t_1	!=!mul_add_c(a[6],b[1],c2,c3,c1);
764*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
765*0Sstevel@tonic-gate	rd	%y,t_2
766*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
767*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
768*0Sstevel@tonic-gate	umul	a_7,b_0,t_1	!mul_add_c(a[7],b[0],c2,c3,c1);
769*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
770*0Sstevel@tonic-gate	rd	%y,t_2
771*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
772*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
773*0Sstevel@tonic-gate	st	c_2,rp(7)	!r[7]=c2;
774*0Sstevel@tonic-gate
775*0Sstevel@tonic-gate	umul	a_7,b_1,t_1	!mul_add_c(a[7],b[1],c3,c1,c2);
776*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
777*0Sstevel@tonic-gate	rd	%y,t_2
778*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
779*0Sstevel@tonic-gate	addx	%g0,%g0,c_2
780*0Sstevel@tonic-gate	umul	a_6,b_2,t_1	!=!mul_add_c(a[6],b[2],c3,c1,c2);
781*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
782*0Sstevel@tonic-gate	rd	%y,t_2
783*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
784*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
785*0Sstevel@tonic-gate	umul	a_5,b_3,t_1	!mul_add_c(a[5],b[3],c3,c1,c2);
786*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
787*0Sstevel@tonic-gate	rd	%y,t_2
788*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
789*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
790*0Sstevel@tonic-gate	umul	a_4,b_4,t_1	!mul_add_c(a[4],b[4],c3,c1,c2);
791*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
792*0Sstevel@tonic-gate	rd	%y,t_2		!=
793*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
794*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
795*0Sstevel@tonic-gate	umul	a_3,b_5,t_1	!mul_add_c(a[3],b[5],c3,c1,c2);
796*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
797*0Sstevel@tonic-gate	rd	%y,t_2
798*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
799*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
800*0Sstevel@tonic-gate	umul	a_2,b_6,t_1	!=!mul_add_c(a[2],b[6],c3,c1,c2);
801*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
802*0Sstevel@tonic-gate	rd	%y,t_2
803*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
804*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
805*0Sstevel@tonic-gate	umul	a_1,b_7,t_1	!mul_add_c(a[1],b[7],c3,c1,c2);
806*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
807*0Sstevel@tonic-gate	rd	%y,t_2
808*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!
809*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
810*0Sstevel@tonic-gate	st	c_3,rp(8)	!r[8]=c3;
811*0Sstevel@tonic-gate
812*0Sstevel@tonic-gate	umul	a_2,b_7,t_1	!mul_add_c(a[2],b[7],c1,c2,c3);
813*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
814*0Sstevel@tonic-gate	rd	%y,t_2
815*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
816*0Sstevel@tonic-gate	addx	%g0,%g0,c_3
817*0Sstevel@tonic-gate	umul	a_3,b_6,t_1	!=!mul_add_c(a[3],b[6],c1,c2,c3);
818*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
819*0Sstevel@tonic-gate	rd	%y,t_2
820*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
821*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
822*0Sstevel@tonic-gate	umul	a_4,b_5,t_1	!mul_add_c(a[4],b[5],c1,c2,c3);
823*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
824*0Sstevel@tonic-gate	rd	%y,t_2
825*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
826*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
827*0Sstevel@tonic-gate	umul	a_5,b_4,t_1	!mul_add_c(a[5],b[4],c1,c2,c3);
828*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
829*0Sstevel@tonic-gate	rd	%y,t_2		!=
830*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
831*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
832*0Sstevel@tonic-gate	umul	a_6,b_3,t_1	!mul_add_c(a[6],b[3],c1,c2,c3);
833*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
834*0Sstevel@tonic-gate	rd	%y,t_2
835*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
836*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
837*0Sstevel@tonic-gate	umul	a_7,b_2,t_1	!=!mul_add_c(a[7],b[2],c1,c2,c3);
838*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
839*0Sstevel@tonic-gate	rd	%y,t_2
840*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
841*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
842*0Sstevel@tonic-gate	st	c_1,rp(9)	!r[9]=c1;
843*0Sstevel@tonic-gate
844*0Sstevel@tonic-gate	umul	a_7,b_3,t_1	!mul_add_c(a[7],b[3],c2,c3,c1);
845*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
846*0Sstevel@tonic-gate	rd	%y,t_2		!=
847*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
848*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
849*0Sstevel@tonic-gate	umul	a_6,b_4,t_1	!mul_add_c(a[6],b[4],c2,c3,c1);
850*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2	!=
851*0Sstevel@tonic-gate	rd	%y,t_2
852*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
853*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
854*0Sstevel@tonic-gate	umul	a_5,b_5,t_1	!=!mul_add_c(a[5],b[5],c2,c3,c1);
855*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
856*0Sstevel@tonic-gate	rd	%y,t_2
857*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
858*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
859*0Sstevel@tonic-gate	umul	a_4,b_6,t_1	!mul_add_c(a[4],b[6],c2,c3,c1);
860*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
861*0Sstevel@tonic-gate	rd	%y,t_2
862*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
863*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
864*0Sstevel@tonic-gate	umul	a_3,b_7,t_1	!mul_add_c(a[3],b[7],c2,c3,c1);
865*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
866*0Sstevel@tonic-gate	rd	%y,t_2		!=
867*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
868*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
869*0Sstevel@tonic-gate	st	c_2,rp(10)	!r[10]=c2;
870*0Sstevel@tonic-gate
871*0Sstevel@tonic-gate	umul	a_4,b_7,t_1	!=!mul_add_c(a[4],b[7],c3,c1,c2);
872*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
873*0Sstevel@tonic-gate	rd	%y,t_2
874*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
875*0Sstevel@tonic-gate	addx	%g0,%g0,c_2	!=
876*0Sstevel@tonic-gate	umul	a_5,b_6,t_1	!mul_add_c(a[5],b[6],c3,c1,c2);
877*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
878*0Sstevel@tonic-gate	rd	%y,t_2
879*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
880*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
881*0Sstevel@tonic-gate	umul	a_6,b_5,t_1	!mul_add_c(a[6],b[5],c3,c1,c2);
882*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
883*0Sstevel@tonic-gate	rd	%y,t_2		!=
884*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
885*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
886*0Sstevel@tonic-gate	umul	a_7,b_4,t_1	!mul_add_c(a[7],b[4],c3,c1,c2);
887*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
888*0Sstevel@tonic-gate	rd	%y,t_2
889*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
890*0Sstevel@tonic-gate	st	c_3,rp(11)	!r[11]=c3;
891*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
892*0Sstevel@tonic-gate
893*0Sstevel@tonic-gate	umul	a_7,b_5,t_1	!mul_add_c(a[7],b[5],c1,c2,c3);
894*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
895*0Sstevel@tonic-gate	rd	%y,t_2
896*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
897*0Sstevel@tonic-gate	addx	%g0,%g0,c_3
898*0Sstevel@tonic-gate	umul	a_6,b_6,t_1	!mul_add_c(a[6],b[6],c1,c2,c3);
899*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
900*0Sstevel@tonic-gate	rd	%y,t_2		!=
901*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
902*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
903*0Sstevel@tonic-gate	umul	a_5,b_7,t_1	!mul_add_c(a[5],b[7],c1,c2,c3);
904*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
905*0Sstevel@tonic-gate	rd	%y,t_2
906*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
907*0Sstevel@tonic-gate	st	c_1,rp(12)	!r[12]=c1;
908*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
909*0Sstevel@tonic-gate
910*0Sstevel@tonic-gate	umul	a_6,b_7,t_1	!mul_add_c(a[6],b[7],c2,c3,c1);
911*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
912*0Sstevel@tonic-gate	rd	%y,t_2
913*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
914*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
915*0Sstevel@tonic-gate	umul	a_7,b_6,t_1	!mul_add_c(a[7],b[6],c2,c3,c1);
916*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
917*0Sstevel@tonic-gate	rd	%y,t_2		!=
918*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
919*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
920*0Sstevel@tonic-gate	st	c_2,rp(13)	!r[13]=c2;
921*0Sstevel@tonic-gate
922*0Sstevel@tonic-gate	umul	a_7,b_7,t_1	!=!mul_add_c(a[7],b[7],c3,c1,c2);
923*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
924*0Sstevel@tonic-gate	rd	%y,t_2
925*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
926*0Sstevel@tonic-gate	nop			!=
927*0Sstevel@tonic-gate	st	c_3,rp(14)	!r[14]=c3;
928*0Sstevel@tonic-gate	st	c_1,rp(15)	!r[15]=c1;
929*0Sstevel@tonic-gate
930*0Sstevel@tonic-gate	ret
931*0Sstevel@tonic-gate	restore	%g0,%g0,%o0
932*0Sstevel@tonic-gate
933*0Sstevel@tonic-gate.type	bn_mul_comba8,#function
934*0Sstevel@tonic-gate.size	bn_mul_comba8,(.-bn_mul_comba8)
935*0Sstevel@tonic-gate
936*0Sstevel@tonic-gate.align	32
937*0Sstevel@tonic-gate
938*0Sstevel@tonic-gate.global bn_mul_comba4
939*0Sstevel@tonic-gate/*
940*0Sstevel@tonic-gate * void bn_mul_comba4(r,a,b)
941*0Sstevel@tonic-gate * BN_ULONG *r,*a,*b;
942*0Sstevel@tonic-gate */
943*0Sstevel@tonic-gatebn_mul_comba4:
944*0Sstevel@tonic-gate	save	%sp,FRAME_SIZE,%sp
945*0Sstevel@tonic-gate	ld	ap(0),a_0
946*0Sstevel@tonic-gate	ld	bp(0),b_0
947*0Sstevel@tonic-gate	umul	a_0,b_0,c_1	!=!mul_add_c(a[0],b[0],c1,c2,c3);
948*0Sstevel@tonic-gate	ld	bp(1),b_1
949*0Sstevel@tonic-gate	rd	%y,c_2
950*0Sstevel@tonic-gate	st	c_1,rp(0)	!r[0]=c1;
951*0Sstevel@tonic-gate
952*0Sstevel@tonic-gate	umul	a_0,b_1,t_1	!=!mul_add_c(a[0],b[1],c2,c3,c1);
953*0Sstevel@tonic-gate	ld	ap(1),a_1
954*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
955*0Sstevel@tonic-gate	rd	%y,t_2		!=
956*0Sstevel@tonic-gate	addxcc	%g0,t_2,c_3
957*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
958*0Sstevel@tonic-gate	ld	ap(2),a_2
959*0Sstevel@tonic-gate	umul	a_1,b_0,t_1	!=!mul_add_c(a[1],b[0],c2,c3,c1);
960*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
961*0Sstevel@tonic-gate	rd	%y,t_2
962*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
963*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
964*0Sstevel@tonic-gate	st	c_2,rp(1)	!r[1]=c2;
965*0Sstevel@tonic-gate
966*0Sstevel@tonic-gate	umul	a_2,b_0,t_1	!mul_add_c(a[2],b[0],c3,c1,c2);
967*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
968*0Sstevel@tonic-gate	rd	%y,t_2		!=
969*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
970*0Sstevel@tonic-gate	addx	%g0,%g0,c_2
971*0Sstevel@tonic-gate	ld	bp(2),b_2
972*0Sstevel@tonic-gate	umul	a_1,b_1,t_1	!=!mul_add_c(a[1],b[1],c3,c1,c2);
973*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
974*0Sstevel@tonic-gate	rd	%y,t_2
975*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
976*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
977*0Sstevel@tonic-gate	ld	bp(3),b_3
978*0Sstevel@tonic-gate	umul	a_0,b_2,t_1	!mul_add_c(a[0],b[2],c3,c1,c2);
979*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
980*0Sstevel@tonic-gate	rd	%y,t_2		!=
981*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
982*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
983*0Sstevel@tonic-gate	st	c_3,rp(2)	!r[2]=c3;
984*0Sstevel@tonic-gate
985*0Sstevel@tonic-gate	umul	a_0,b_3,t_1	!=!mul_add_c(a[0],b[3],c1,c2,c3);
986*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
987*0Sstevel@tonic-gate	rd	%y,t_2
988*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
989*0Sstevel@tonic-gate	addx	%g0,%g0,c_3	!=
990*0Sstevel@tonic-gate	umul	a_1,b_2,t_1	!mul_add_c(a[1],b[2],c1,c2,c3);
991*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
992*0Sstevel@tonic-gate	rd	%y,t_2
993*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
994*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
995*0Sstevel@tonic-gate	ld	ap(3),a_3
996*0Sstevel@tonic-gate	umul	a_2,b_1,t_1	!mul_add_c(a[2],b[1],c1,c2,c3);
997*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
998*0Sstevel@tonic-gate	rd	%y,t_2
999*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1000*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1001*0Sstevel@tonic-gate	umul	a_3,b_0,t_1	!=!mul_add_c(a[3],b[0],c1,c2,c3);
1002*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1003*0Sstevel@tonic-gate	rd	%y,t_2
1004*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1005*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
1006*0Sstevel@tonic-gate	st	c_1,rp(3)	!r[3]=c1;
1007*0Sstevel@tonic-gate
1008*0Sstevel@tonic-gate	umul	a_3,b_1,t_1	!mul_add_c(a[3],b[1],c2,c3,c1);
1009*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1010*0Sstevel@tonic-gate	rd	%y,t_2		!=
1011*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1012*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
1013*0Sstevel@tonic-gate	umul	a_2,b_2,t_1	!mul_add_c(a[2],b[2],c2,c3,c1);
1014*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2	!=
1015*0Sstevel@tonic-gate	rd	%y,t_2
1016*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1017*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1018*0Sstevel@tonic-gate	umul	a_1,b_3,t_1	!=!mul_add_c(a[1],b[3],c2,c3,c1);
1019*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1020*0Sstevel@tonic-gate	rd	%y,t_2
1021*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1022*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
1023*0Sstevel@tonic-gate	st	c_2,rp(4)	!r[4]=c2;
1024*0Sstevel@tonic-gate
1025*0Sstevel@tonic-gate	umul	a_2,b_3,t_1	!mul_add_c(a[2],b[3],c3,c1,c2);
1026*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1027*0Sstevel@tonic-gate	rd	%y,t_2		!=
1028*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1029*0Sstevel@tonic-gate	addx	%g0,%g0,c_2
1030*0Sstevel@tonic-gate	umul	a_3,b_2,t_1	!mul_add_c(a[3],b[2],c3,c1,c2);
1031*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
1032*0Sstevel@tonic-gate	rd	%y,t_2
1033*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1034*0Sstevel@tonic-gate	st	c_3,rp(5)	!r[5]=c3;
1035*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
1036*0Sstevel@tonic-gate
1037*0Sstevel@tonic-gate	umul	a_3,b_3,t_1	!mul_add_c(a[3],b[3],c1,c2,c3);
1038*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1039*0Sstevel@tonic-gate	rd	%y,t_2
1040*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
1041*0Sstevel@tonic-gate	st	c_1,rp(6)	!r[6]=c1;
1042*0Sstevel@tonic-gate	st	c_2,rp(7)	!r[7]=c2;
1043*0Sstevel@tonic-gate
1044*0Sstevel@tonic-gate	ret
1045*0Sstevel@tonic-gate	restore	%g0,%g0,%o0
1046*0Sstevel@tonic-gate
1047*0Sstevel@tonic-gate.type	bn_mul_comba4,#function
1048*0Sstevel@tonic-gate.size	bn_mul_comba4,(.-bn_mul_comba4)
1049*0Sstevel@tonic-gate
1050*0Sstevel@tonic-gate.align	32
1051*0Sstevel@tonic-gate
1052*0Sstevel@tonic-gate.global bn_sqr_comba8
1053*0Sstevel@tonic-gatebn_sqr_comba8:
1054*0Sstevel@tonic-gate	save	%sp,FRAME_SIZE,%sp
1055*0Sstevel@tonic-gate	ld	ap(0),a_0
1056*0Sstevel@tonic-gate	ld	ap(1),a_1
1057*0Sstevel@tonic-gate	umul	a_0,a_0,c_1	!=!sqr_add_c(a,0,c1,c2,c3);
1058*0Sstevel@tonic-gate	rd	%y,c_2
1059*0Sstevel@tonic-gate	st	c_1,rp(0)	!r[0]=c1;
1060*0Sstevel@tonic-gate
1061*0Sstevel@tonic-gate	ld	ap(2),a_2
1062*0Sstevel@tonic-gate	umul	a_0,a_1,t_1	!=!sqr_add_c2(a,1,0,c2,c3,c1);
1063*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1064*0Sstevel@tonic-gate	rd	%y,t_2
1065*0Sstevel@tonic-gate	addxcc	%g0,t_2,c_3
1066*0Sstevel@tonic-gate	addx	%g0,%g0,c_1	!=
1067*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1068*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1069*0Sstevel@tonic-gate	st	c_2,rp(1)	!r[1]=c2;
1070*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
1071*0Sstevel@tonic-gate
1072*0Sstevel@tonic-gate	umul	a_2,a_0,t_1	!sqr_add_c2(a,2,0,c3,c1,c2);
1073*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1074*0Sstevel@tonic-gate	rd	%y,t_2
1075*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
1076*0Sstevel@tonic-gate	addx	%g0,%g0,c_2
1077*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1078*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1079*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
1080*0Sstevel@tonic-gate	ld	ap(3),a_3
1081*0Sstevel@tonic-gate	umul	a_1,a_1,t_1	!sqr_add_c(a,1,c3,c1,c2);
1082*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1083*0Sstevel@tonic-gate	rd	%y,t_2		!=
1084*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1085*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1086*0Sstevel@tonic-gate	st	c_3,rp(2)	!r[2]=c3;
1087*0Sstevel@tonic-gate
1088*0Sstevel@tonic-gate	umul	a_0,a_3,t_1	!=!sqr_add_c2(a,3,0,c1,c2,c3);
1089*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1090*0Sstevel@tonic-gate	rd	%y,t_2
1091*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1092*0Sstevel@tonic-gate	addx	%g0,%g0,c_3	!=
1093*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1094*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1095*0Sstevel@tonic-gate	ld	ap(4),a_4
1096*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
1097*0Sstevel@tonic-gate	umul	a_1,a_2,t_1	!sqr_add_c2(a,2,1,c1,c2,c3);
1098*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1099*0Sstevel@tonic-gate	rd	%y,t_2
1100*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
1101*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1102*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1103*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1104*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
1105*0Sstevel@tonic-gate	st	c_1,rp(3)	!r[3]=c1;
1106*0Sstevel@tonic-gate
1107*0Sstevel@tonic-gate	umul	a_4,a_0,t_1	!sqr_add_c2(a,4,0,c2,c3,c1);
1108*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1109*0Sstevel@tonic-gate	rd	%y,t_2		!=
1110*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1111*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
1112*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1113*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
1114*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1115*0Sstevel@tonic-gate	umul	a_3,a_1,t_1	!sqr_add_c2(a,3,1,c2,c3,c1);
1116*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1117*0Sstevel@tonic-gate	rd	%y,t_2		!=
1118*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1119*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1120*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1121*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
1122*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1123*0Sstevel@tonic-gate	ld	ap(5),a_5
1124*0Sstevel@tonic-gate	umul	a_2,a_2,t_1	!sqr_add_c(a,2,c2,c3,c1);
1125*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2	!=
1126*0Sstevel@tonic-gate	rd	%y,t_2
1127*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1128*0Sstevel@tonic-gate	st	c_2,rp(4)	!r[4]=c2;
1129*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
1130*0Sstevel@tonic-gate
1131*0Sstevel@tonic-gate	umul	a_0,a_5,t_1	!sqr_add_c2(a,5,0,c3,c1,c2);
1132*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1133*0Sstevel@tonic-gate	rd	%y,t_2
1134*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
1135*0Sstevel@tonic-gate	addx	%g0,%g0,c_2
1136*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1137*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1138*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
1139*0Sstevel@tonic-gate	umul	a_1,a_4,t_1	!sqr_add_c2(a,4,1,c3,c1,c2);
1140*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1141*0Sstevel@tonic-gate	rd	%y,t_2
1142*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
1143*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1144*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1145*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1146*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
1147*0Sstevel@tonic-gate	ld	ap(6),a_6
1148*0Sstevel@tonic-gate	umul	a_2,a_3,t_1	!sqr_add_c2(a,3,2,c3,c1,c2);
1149*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1150*0Sstevel@tonic-gate	rd	%y,t_2		!=
1151*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1152*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1153*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1154*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
1155*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1156*0Sstevel@tonic-gate	st	c_3,rp(5)	!r[5]=c3;
1157*0Sstevel@tonic-gate
1158*0Sstevel@tonic-gate	umul	a_6,a_0,t_1	!sqr_add_c2(a,6,0,c1,c2,c3);
1159*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
1160*0Sstevel@tonic-gate	rd	%y,t_2
1161*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1162*0Sstevel@tonic-gate	addx	%g0,%g0,c_3
1163*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
1164*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1165*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1166*0Sstevel@tonic-gate	umul	a_5,a_1,t_1	!sqr_add_c2(a,5,1,c1,c2,c3);
1167*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
1168*0Sstevel@tonic-gate	rd	%y,t_2
1169*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1170*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1171*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
1172*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1173*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1174*0Sstevel@tonic-gate	umul	a_4,a_2,t_1	!sqr_add_c2(a,4,2,c1,c2,c3);
1175*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
1176*0Sstevel@tonic-gate	rd	%y,t_2
1177*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1178*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1179*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1	!=
1180*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1181*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1182*0Sstevel@tonic-gate	ld	ap(7),a_7
1183*0Sstevel@tonic-gate	umul	a_3,a_3,t_1	!=!sqr_add_c(a,3,c1,c2,c3);
1184*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1185*0Sstevel@tonic-gate	rd	%y,t_2
1186*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1187*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
1188*0Sstevel@tonic-gate	st	c_1,rp(6)	!r[6]=c1;
1189*0Sstevel@tonic-gate
1190*0Sstevel@tonic-gate	umul	a_0,a_7,t_1	!sqr_add_c2(a,7,0,c2,c3,c1);
1191*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1192*0Sstevel@tonic-gate	rd	%y,t_2		!=
1193*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1194*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
1195*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1196*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
1197*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1198*0Sstevel@tonic-gate	umul	a_1,a_6,t_1	!sqr_add_c2(a,6,1,c2,c3,c1);
1199*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1200*0Sstevel@tonic-gate	rd	%y,t_2		!=
1201*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1202*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1203*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1204*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
1205*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1206*0Sstevel@tonic-gate	umul	a_2,a_5,t_1	!sqr_add_c2(a,5,2,c2,c3,c1);
1207*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1208*0Sstevel@tonic-gate	rd	%y,t_2		!=
1209*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1210*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1211*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1212*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
1213*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1214*0Sstevel@tonic-gate	umul	a_3,a_4,t_1	!sqr_add_c2(a,4,3,c2,c3,c1);
1215*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1216*0Sstevel@tonic-gate	rd	%y,t_2		!=
1217*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1218*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1219*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1220*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
1221*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1222*0Sstevel@tonic-gate	st	c_2,rp(7)	!r[7]=c2;
1223*0Sstevel@tonic-gate
1224*0Sstevel@tonic-gate	umul	a_7,a_1,t_1	!sqr_add_c2(a,7,1,c3,c1,c2);
1225*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
1226*0Sstevel@tonic-gate	rd	%y,t_2
1227*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1228*0Sstevel@tonic-gate	addx	%g0,%g0,c_2
1229*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
1230*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1231*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1232*0Sstevel@tonic-gate	umul	a_6,a_2,t_1	!sqr_add_c2(a,6,2,c3,c1,c2);
1233*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
1234*0Sstevel@tonic-gate	rd	%y,t_2
1235*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1236*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1237*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
1238*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1239*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1240*0Sstevel@tonic-gate	umul	a_5,a_3,t_1	!sqr_add_c2(a,5,3,c3,c1,c2);
1241*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
1242*0Sstevel@tonic-gate	rd	%y,t_2
1243*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1244*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1245*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
1246*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1247*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1248*0Sstevel@tonic-gate	umul	a_4,a_4,t_1	!sqr_add_c(a,4,c3,c1,c2);
1249*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
1250*0Sstevel@tonic-gate	rd	%y,t_2
1251*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1252*0Sstevel@tonic-gate	st	c_3,rp(8)	!r[8]=c3;
1253*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
1254*0Sstevel@tonic-gate
1255*0Sstevel@tonic-gate	umul	a_2,a_7,t_1	!sqr_add_c2(a,7,2,c1,c2,c3);
1256*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1257*0Sstevel@tonic-gate	rd	%y,t_2
1258*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
1259*0Sstevel@tonic-gate	addx	%g0,%g0,c_3
1260*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1261*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1262*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
1263*0Sstevel@tonic-gate	umul	a_3,a_6,t_1	!sqr_add_c2(a,6,3,c1,c2,c3);
1264*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1265*0Sstevel@tonic-gate	rd	%y,t_2
1266*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
1267*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1268*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1269*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1270*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
1271*0Sstevel@tonic-gate	umul	a_4,a_5,t_1	!sqr_add_c2(a,5,4,c1,c2,c3);
1272*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1273*0Sstevel@tonic-gate	rd	%y,t_2
1274*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
1275*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1276*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1277*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1278*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
1279*0Sstevel@tonic-gate	st	c_1,rp(9)	!r[9]=c1;
1280*0Sstevel@tonic-gate
1281*0Sstevel@tonic-gate	umul	a_7,a_3,t_1	!sqr_add_c2(a,7,3,c2,c3,c1);
1282*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1283*0Sstevel@tonic-gate	rd	%y,t_2		!=
1284*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1285*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
1286*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1287*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
1288*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1289*0Sstevel@tonic-gate	umul	a_6,a_4,t_1	!sqr_add_c2(a,6,4,c2,c3,c1);
1290*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1291*0Sstevel@tonic-gate	rd	%y,t_2		!=
1292*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1293*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1294*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1295*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
1296*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1297*0Sstevel@tonic-gate	umul	a_5,a_5,t_1	!sqr_add_c(a,5,c2,c3,c1);
1298*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1299*0Sstevel@tonic-gate	rd	%y,t_2		!=
1300*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1301*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1302*0Sstevel@tonic-gate	st	c_2,rp(10)	!r[10]=c2;
1303*0Sstevel@tonic-gate
1304*0Sstevel@tonic-gate	umul	a_4,a_7,t_1	!=!sqr_add_c2(a,7,4,c3,c1,c2);
1305*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1306*0Sstevel@tonic-gate	rd	%y,t_2
1307*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1308*0Sstevel@tonic-gate	addx	%g0,%g0,c_2	!=
1309*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1310*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1311*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1312*0Sstevel@tonic-gate	umul	a_5,a_6,t_1	!=!sqr_add_c2(a,6,5,c3,c1,c2);
1313*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1314*0Sstevel@tonic-gate	rd	%y,t_2
1315*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1316*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
1317*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1318*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1319*0Sstevel@tonic-gate	st	c_3,rp(11)	!r[11]=c3;
1320*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
1321*0Sstevel@tonic-gate
1322*0Sstevel@tonic-gate	umul	a_7,a_5,t_1	!sqr_add_c2(a,7,5,c1,c2,c3);
1323*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1324*0Sstevel@tonic-gate	rd	%y,t_2
1325*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
1326*0Sstevel@tonic-gate	addx	%g0,%g0,c_3
1327*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1328*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1329*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
1330*0Sstevel@tonic-gate	umul	a_6,a_6,t_1	!sqr_add_c(a,6,c1,c2,c3);
1331*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1332*0Sstevel@tonic-gate	rd	%y,t_2
1333*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
1334*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1335*0Sstevel@tonic-gate	st	c_1,rp(12)	!r[12]=c1;
1336*0Sstevel@tonic-gate
1337*0Sstevel@tonic-gate	umul	a_6,a_7,t_1	!sqr_add_c2(a,7,6,c2,c3,c1);
1338*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2	!=
1339*0Sstevel@tonic-gate	rd	%y,t_2
1340*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1341*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
1342*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2	!=
1343*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1344*0Sstevel@tonic-gate	st	c_2,rp(13)	!r[13]=c2;
1345*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
1346*0Sstevel@tonic-gate
1347*0Sstevel@tonic-gate	umul	a_7,a_7,t_1	!sqr_add_c(a,7,c3,c1,c2);
1348*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1349*0Sstevel@tonic-gate	rd	%y,t_2
1350*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
1351*0Sstevel@tonic-gate	st	c_3,rp(14)	!r[14]=c3;
1352*0Sstevel@tonic-gate	st	c_1,rp(15)	!r[15]=c1;
1353*0Sstevel@tonic-gate
1354*0Sstevel@tonic-gate	ret
1355*0Sstevel@tonic-gate	restore	%g0,%g0,%o0
1356*0Sstevel@tonic-gate
1357*0Sstevel@tonic-gate.type	bn_sqr_comba8,#function
1358*0Sstevel@tonic-gate.size	bn_sqr_comba8,(.-bn_sqr_comba8)
1359*0Sstevel@tonic-gate
1360*0Sstevel@tonic-gate.align	32
1361*0Sstevel@tonic-gate
1362*0Sstevel@tonic-gate.global bn_sqr_comba4
1363*0Sstevel@tonic-gate/*
1364*0Sstevel@tonic-gate * void bn_sqr_comba4(r,a)
1365*0Sstevel@tonic-gate * BN_ULONG *r,*a;
1366*0Sstevel@tonic-gate */
1367*0Sstevel@tonic-gatebn_sqr_comba4:
1368*0Sstevel@tonic-gate	save	%sp,FRAME_SIZE,%sp
1369*0Sstevel@tonic-gate	ld	ap(0),a_0
1370*0Sstevel@tonic-gate	umul	a_0,a_0,c_1	!sqr_add_c(a,0,c1,c2,c3);
1371*0Sstevel@tonic-gate	ld	ap(1),a_1	!=
1372*0Sstevel@tonic-gate	rd	%y,c_2
1373*0Sstevel@tonic-gate	st	c_1,rp(0)	!r[0]=c1;
1374*0Sstevel@tonic-gate
1375*0Sstevel@tonic-gate	ld	ap(2),a_2
1376*0Sstevel@tonic-gate	umul	a_0,a_1,t_1	!=!sqr_add_c2(a,1,0,c2,c3,c1);
1377*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1378*0Sstevel@tonic-gate	rd	%y,t_2
1379*0Sstevel@tonic-gate	addxcc	%g0,t_2,c_3
1380*0Sstevel@tonic-gate	addx	%g0,%g0,c_1	!=
1381*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1382*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1383*0Sstevel@tonic-gate	addx	c_1,%g0,c_1	!=
1384*0Sstevel@tonic-gate	st	c_2,rp(1)	!r[1]=c2;
1385*0Sstevel@tonic-gate
1386*0Sstevel@tonic-gate	umul	a_2,a_0,t_1	!sqr_add_c2(a,2,0,c3,c1,c2);
1387*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1388*0Sstevel@tonic-gate	rd	%y,t_2		!=
1389*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1390*0Sstevel@tonic-gate	addx	%g0,%g0,c_2
1391*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1392*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1	!=
1393*0Sstevel@tonic-gate	addx	c_2,%g0,c_2
1394*0Sstevel@tonic-gate	ld	ap(3),a_3
1395*0Sstevel@tonic-gate	umul	a_1,a_1,t_1	!sqr_add_c(a,1,c3,c1,c2);
1396*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3	!=
1397*0Sstevel@tonic-gate	rd	%y,t_2
1398*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1399*0Sstevel@tonic-gate	st	c_3,rp(2)	!r[2]=c3;
1400*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
1401*0Sstevel@tonic-gate
1402*0Sstevel@tonic-gate	umul	a_0,a_3,t_1	!sqr_add_c2(a,3,0,c1,c2,c3);
1403*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1404*0Sstevel@tonic-gate	rd	%y,t_2
1405*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
1406*0Sstevel@tonic-gate	addx	%g0,%g0,c_3
1407*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1408*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1409*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
1410*0Sstevel@tonic-gate	umul	a_1,a_2,t_1	!sqr_add_c2(a,2,1,c1,c2,c3);
1411*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1412*0Sstevel@tonic-gate	rd	%y,t_2
1413*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
1414*0Sstevel@tonic-gate	addx	c_3,%g0,c_3
1415*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1416*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2
1417*0Sstevel@tonic-gate	addx	c_3,%g0,c_3	!=
1418*0Sstevel@tonic-gate	st	c_1,rp(3)	!r[3]=c1;
1419*0Sstevel@tonic-gate
1420*0Sstevel@tonic-gate	umul	a_3,a_1,t_1	!sqr_add_c2(a,3,1,c2,c3,c1);
1421*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1422*0Sstevel@tonic-gate	rd	%y,t_2		!=
1423*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1424*0Sstevel@tonic-gate	addx	%g0,%g0,c_1
1425*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1426*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3	!=
1427*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1428*0Sstevel@tonic-gate	umul	a_2,a_2,t_1	!sqr_add_c(a,2,c2,c3,c1);
1429*0Sstevel@tonic-gate	addcc	c_2,t_1,c_2
1430*0Sstevel@tonic-gate	rd	%y,t_2		!=
1431*0Sstevel@tonic-gate	addxcc	c_3,t_2,c_3
1432*0Sstevel@tonic-gate	addx	c_1,%g0,c_1
1433*0Sstevel@tonic-gate	st	c_2,rp(4)	!r[4]=c2;
1434*0Sstevel@tonic-gate
1435*0Sstevel@tonic-gate	umul	a_2,a_3,t_1	!=!sqr_add_c2(a,3,2,c3,c1,c2);
1436*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1437*0Sstevel@tonic-gate	rd	%y,t_2
1438*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1439*0Sstevel@tonic-gate	addx	%g0,%g0,c_2	!=
1440*0Sstevel@tonic-gate	addcc	c_3,t_1,c_3
1441*0Sstevel@tonic-gate	addxcc	c_1,t_2,c_1
1442*0Sstevel@tonic-gate	st	c_3,rp(5)	!r[5]=c3;
1443*0Sstevel@tonic-gate	addx	c_2,%g0,c_2	!=
1444*0Sstevel@tonic-gate
1445*0Sstevel@tonic-gate	umul	a_3,a_3,t_1	!sqr_add_c(a,3,c1,c2,c3);
1446*0Sstevel@tonic-gate	addcc	c_1,t_1,c_1
1447*0Sstevel@tonic-gate	rd	%y,t_2
1448*0Sstevel@tonic-gate	addxcc	c_2,t_2,c_2	!=
1449*0Sstevel@tonic-gate	st	c_1,rp(6)	!r[6]=c1;
1450*0Sstevel@tonic-gate	st	c_2,rp(7)	!r[7]=c2;
1451*0Sstevel@tonic-gate
1452*0Sstevel@tonic-gate	ret
1453*0Sstevel@tonic-gate	restore	%g0,%g0,%o0
1454*0Sstevel@tonic-gate
1455*0Sstevel@tonic-gate.type	bn_sqr_comba4,#function
1456*0Sstevel@tonic-gate.size	bn_sqr_comba4,(.-bn_sqr_comba4)
1457*0Sstevel@tonic-gate
1458*0Sstevel@tonic-gate.align	32
1459