xref: /minix3/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/sparcv9-mont.S (revision ebfedea0ce5bbe81e252ddf32d732e40fb633fae)
1*ebfedea0SLionel Sambuc.section	".text",#alloc,#execinstr
2*ebfedea0SLionel Sambuc
3*ebfedea0SLionel Sambuc.global	bn_mul_mont_int
4*ebfedea0SLionel Sambuc.align	32
5*ebfedea0SLionel Sambucbn_mul_mont_int:
6*ebfedea0SLionel Sambuc	cmp	%o5,4			! 128 bits minimum
7*ebfedea0SLionel Sambuc	bge,pt	%icc,.Lenter
8*ebfedea0SLionel Sambuc	sethi	%hi(0xffffffff),%g1
9*ebfedea0SLionel Sambuc	retl
10*ebfedea0SLionel Sambuc	clr	%o0
11*ebfedea0SLionel Sambuc.align	32
12*ebfedea0SLionel Sambuc.Lenter:
13*ebfedea0SLionel Sambuc	save	%sp,-192,%sp
14*ebfedea0SLionel Sambuc	sll	%i5,2,%i5		! num*=4
15*ebfedea0SLionel Sambuc	or	%g1,%lo(0xffffffff),%g1
16*ebfedea0SLionel Sambuc	ld	[%i4],%i4
17*ebfedea0SLionel Sambuc	cmp	%i1,%i2
18*ebfedea0SLionel Sambuc	and	%i5,%g1,%i5
19*ebfedea0SLionel Sambuc	ld	[%i2],%l2		! bp[0]
20*ebfedea0SLionel Sambuc	nop
21*ebfedea0SLionel Sambuc
22*ebfedea0SLionel Sambuc	add	%sp,2047,%o7		! real top of stack
23*ebfedea0SLionel Sambuc	ld	[%i1],%o0		! ap[0] ! redundant in squaring context
24*ebfedea0SLionel Sambuc	sub	%o7,%i5,%o7
25*ebfedea0SLionel Sambuc	ld	[%i1+4],%l5		! ap[1]
26*ebfedea0SLionel Sambuc	and	%o7,-1024,%o7
27*ebfedea0SLionel Sambuc	ld	[%i3],%o1		! np[0]
28*ebfedea0SLionel Sambuc	sub	%o7,2047,%sp		! alloca
29*ebfedea0SLionel Sambuc	ld	[%i3+4],%l6		! np[1]
30*ebfedea0SLionel Sambuc	be,pt	%xcc,.Lbn_sqr_mont
31*ebfedea0SLionel Sambuc	mov	12,%l1
32*ebfedea0SLionel Sambuc
33*ebfedea0SLionel Sambuc	mulx	%o0,%l2,%o0	! ap[0]*bp[0]
34*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%g4	!prologue! ap[1]*bp[0]
35*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
36*ebfedea0SLionel Sambuc	add	%sp,2047+192,%l4
37*ebfedea0SLionel Sambuc	ld	[%i1+8],%l5		!prologue!
38*ebfedea0SLionel Sambuc
39*ebfedea0SLionel Sambuc	mulx	%i4,%o3,%l3		! "t[0]"*n0
40*ebfedea0SLionel Sambuc	and	%l3,%g1,%l3
41*ebfedea0SLionel Sambuc
42*ebfedea0SLionel Sambuc	mulx	%o1,%l3,%o1	! np[0]*"t[0]"*n0
43*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4	!prologue! np[1]*"t[0]"*n0
44*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
45*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
46*ebfedea0SLionel Sambuc	ld	[%i3+8],%l6		!prologue!
47*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
48*ebfedea0SLionel Sambuc	mov	%g4,%o3		!prologue!
49*ebfedea0SLionel Sambuc
50*ebfedea0SLionel Sambuc.L1st:
51*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%g4
52*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%g5
53*ebfedea0SLionel Sambuc	add	%o3,%o0,%o0
54*ebfedea0SLionel Sambuc	ld	[%i1+%l1],%l5		! ap[j]
55*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
56*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
57*ebfedea0SLionel Sambuc	ld	[%i3+%l1],%l6		! np[j]
58*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
59*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
60*ebfedea0SLionel Sambuc	add	%l1,4,%l1			! j++
61*ebfedea0SLionel Sambuc	mov	%g4,%o3
62*ebfedea0SLionel Sambuc	st	%o1,[%l4]
63*ebfedea0SLionel Sambuc	cmp	%l1,%i5
64*ebfedea0SLionel Sambuc	mov	%g5,%o4
65*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
66*ebfedea0SLionel Sambuc	bl	%icc,.L1st
67*ebfedea0SLionel Sambuc	add	%l4,4,%l4		! tp++
68*ebfedea0SLionel Sambuc!.L1st
69*ebfedea0SLionel Sambuc
70*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%g4	!epilogue!
71*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%g5
72*ebfedea0SLionel Sambuc	add	%o3,%o0,%o0
73*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
74*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
75*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
76*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
77*ebfedea0SLionel Sambuc	st	%o1,[%l4]
78*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
79*ebfedea0SLionel Sambuc
80*ebfedea0SLionel Sambuc	add	%g4,%o0,%o0
81*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
82*ebfedea0SLionel Sambuc	add	%g5,%o1,%o1
83*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
84*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
85*ebfedea0SLionel Sambuc	st	%o1,[%l4+4]
86*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
87*ebfedea0SLionel Sambuc
88*ebfedea0SLionel Sambuc	add	%o0,%o1,%o1
89*ebfedea0SLionel Sambuc	st	%o1,[%l4+8]
90*ebfedea0SLionel Sambuc	srlx	%o1,32,%o2
91*ebfedea0SLionel Sambuc
92*ebfedea0SLionel Sambuc	mov	4,%l0			! i++
93*ebfedea0SLionel Sambuc	ld	[%i2+4],%l2		! bp[1]
94*ebfedea0SLionel Sambuc.Louter:
95*ebfedea0SLionel Sambuc	add	%sp,2047+192,%l4
96*ebfedea0SLionel Sambuc	ld	[%i1],%o0		! ap[0]
97*ebfedea0SLionel Sambuc	ld	[%i1+4],%l5		! ap[1]
98*ebfedea0SLionel Sambuc	ld	[%i3],%o1		! np[0]
99*ebfedea0SLionel Sambuc	ld	[%i3+4],%l6		! np[1]
100*ebfedea0SLionel Sambuc	ld	[%l4],%g5		! tp[0]
101*ebfedea0SLionel Sambuc	ld	[%l4+4],%l7		! tp[1]
102*ebfedea0SLionel Sambuc	mov	12,%l1
103*ebfedea0SLionel Sambuc
104*ebfedea0SLionel Sambuc	mulx	%o0,%l2,%o0
105*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%g4	!prologue!
106*ebfedea0SLionel Sambuc	add	%g5,%o0,%o0
107*ebfedea0SLionel Sambuc	ld	[%i1+8],%l5		!prologue!
108*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
109*ebfedea0SLionel Sambuc
110*ebfedea0SLionel Sambuc	mulx	%i4,%o3,%l3
111*ebfedea0SLionel Sambuc	and	%l3,%g1,%l3
112*ebfedea0SLionel Sambuc
113*ebfedea0SLionel Sambuc	mulx	%o1,%l3,%o1
114*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4	!prologue!
115*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
116*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
117*ebfedea0SLionel Sambuc	ld	[%i3+8],%l6		!prologue!
118*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
119*ebfedea0SLionel Sambuc	mov	%g4,%o3		!prologue!
120*ebfedea0SLionel Sambuc
121*ebfedea0SLionel Sambuc.Linner:
122*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%g4
123*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%g5
124*ebfedea0SLionel Sambuc	add	%l7,%o0,%o0
125*ebfedea0SLionel Sambuc	ld	[%i1+%l1],%l5		! ap[j]
126*ebfedea0SLionel Sambuc	add	%o3,%o0,%o0
127*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
128*ebfedea0SLionel Sambuc	ld	[%i3+%l1],%l6		! np[j]
129*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
130*ebfedea0SLionel Sambuc	ld	[%l4+8],%l7		! tp[j]
131*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
132*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
133*ebfedea0SLionel Sambuc	add	%l1,4,%l1			! j++
134*ebfedea0SLionel Sambuc	mov	%g4,%o3
135*ebfedea0SLionel Sambuc	st	%o1,[%l4]		! tp[j-1]
136*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
137*ebfedea0SLionel Sambuc	mov	%g5,%o4
138*ebfedea0SLionel Sambuc	cmp	%l1,%i5
139*ebfedea0SLionel Sambuc	bl	%icc,.Linner
140*ebfedea0SLionel Sambuc	add	%l4,4,%l4		! tp++
141*ebfedea0SLionel Sambuc!.Linner
142*ebfedea0SLionel Sambuc
143*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%g4	!epilogue!
144*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%g5
145*ebfedea0SLionel Sambuc	add	%l7,%o0,%o0
146*ebfedea0SLionel Sambuc	add	%o3,%o0,%o0
147*ebfedea0SLionel Sambuc	ld	[%l4+8],%l7		! tp[j]
148*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
149*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
150*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
151*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
152*ebfedea0SLionel Sambuc	st	%o1,[%l4]		! tp[j-1]
153*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
154*ebfedea0SLionel Sambuc
155*ebfedea0SLionel Sambuc	add	%l7,%o0,%o0
156*ebfedea0SLionel Sambuc	add	%g4,%o0,%o0
157*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
158*ebfedea0SLionel Sambuc	add	%g5,%o1,%o1
159*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
160*ebfedea0SLionel Sambuc	st	%o1,[%l4+4]		! tp[j-1]
161*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
162*ebfedea0SLionel Sambuc	add	%l0,4,%l0			! i++
163*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
164*ebfedea0SLionel Sambuc
165*ebfedea0SLionel Sambuc	add	%o0,%o1,%o1
166*ebfedea0SLionel Sambuc	cmp	%l0,%i5
167*ebfedea0SLionel Sambuc	add	%o2,%o1,%o1
168*ebfedea0SLionel Sambuc	st	%o1,[%l4+8]
169*ebfedea0SLionel Sambuc
170*ebfedea0SLionel Sambuc	srlx	%o1,32,%o2
171*ebfedea0SLionel Sambuc	bl,a	%icc,.Louter
172*ebfedea0SLionel Sambuc	ld	[%i2+%l0],%l2		! bp[i]
173*ebfedea0SLionel Sambuc!.Louter
174*ebfedea0SLionel Sambuc
175*ebfedea0SLionel Sambuc	add	%l4,12,%l4
176*ebfedea0SLionel Sambuc
177*ebfedea0SLionel Sambuc.Ltail:
178*ebfedea0SLionel Sambuc	add	%i3,%i5,%i3
179*ebfedea0SLionel Sambuc	add	%i0,%i5,%i0
180*ebfedea0SLionel Sambuc	mov	%l4,%i1
181*ebfedea0SLionel Sambuc	sub	%g0,%i5,%o7		! k=-num
182*ebfedea0SLionel Sambuc	ba	.Lsub
183*ebfedea0SLionel Sambuc	subcc	%g0,%g0,%g0		! clear %icc.c
184*ebfedea0SLionel Sambuc.align	16
185*ebfedea0SLionel Sambuc.Lsub:
186*ebfedea0SLionel Sambuc	ld	[%l4+%o7],%o0
187*ebfedea0SLionel Sambuc	ld	[%i3+%o7],%o1
188*ebfedea0SLionel Sambuc	subccc	%o0,%o1,%o1		! tp[j]-np[j]
189*ebfedea0SLionel Sambuc	add	%i0,%o7,%l0
190*ebfedea0SLionel Sambuc	add	%o7,4,%o7
191*ebfedea0SLionel Sambuc	brnz	%o7,.Lsub
192*ebfedea0SLionel Sambuc	st	%o1,[%l0]
193*ebfedea0SLionel Sambuc	subc	%o2,0,%o2		! handle upmost overflow bit
194*ebfedea0SLionel Sambuc	and	%l4,%o2,%i1
195*ebfedea0SLionel Sambuc	andn	%i0,%o2,%i3
196*ebfedea0SLionel Sambuc	or	%i1,%i3,%i1
197*ebfedea0SLionel Sambuc	sub	%g0,%i5,%o7
198*ebfedea0SLionel Sambuc
199*ebfedea0SLionel Sambuc.Lcopy:
200*ebfedea0SLionel Sambuc	ld	[%i1+%o7],%o0		! copy or in-place refresh
201*ebfedea0SLionel Sambuc	st	%g0,[%l4+%o7]		! zap tp
202*ebfedea0SLionel Sambuc	st	%o0,[%i0+%o7]
203*ebfedea0SLionel Sambuc	add	%o7,4,%o7
204*ebfedea0SLionel Sambuc	brnz	%o7,.Lcopy
205*ebfedea0SLionel Sambuc	nop
206*ebfedea0SLionel Sambuc	mov	1,%i0
207*ebfedea0SLionel Sambuc	ret
208*ebfedea0SLionel Sambuc	restore
209*ebfedea0SLionel Sambuc.align	32
210*ebfedea0SLionel Sambuc.Lbn_sqr_mont:
211*ebfedea0SLionel Sambuc	mulx	%l2,%l2,%o0		! ap[0]*ap[0]
212*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%g4		!prologue!
213*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
214*ebfedea0SLionel Sambuc	add	%sp,2047+192,%l4
215*ebfedea0SLionel Sambuc	ld	[%i1+8],%l5			!prologue!
216*ebfedea0SLionel Sambuc
217*ebfedea0SLionel Sambuc	mulx	%i4,%o3,%l3			! "t[0]"*n0
218*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
219*ebfedea0SLionel Sambuc	and	%l3,%g1,%l3
220*ebfedea0SLionel Sambuc
221*ebfedea0SLionel Sambuc	mulx	%o1,%l3,%o1		! np[0]*"t[0]"*n0
222*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4		!prologue!
223*ebfedea0SLionel Sambuc	and	%o0,1,%i2
224*ebfedea0SLionel Sambuc	ld	[%i3+8],%l6			!prologue!
225*ebfedea0SLionel Sambuc	srlx	%o0,1,%o0
226*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
227*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
228*ebfedea0SLionel Sambuc	mov	%g4,%o3			!prologue!
229*ebfedea0SLionel Sambuc
230*ebfedea0SLionel Sambuc.Lsqr_1st:
231*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%g4
232*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%g5
233*ebfedea0SLionel Sambuc	add	%o3,%o0,%o0		! ap[j]*a0+c0
234*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
235*ebfedea0SLionel Sambuc	ld	[%i1+%l1],%l5			! ap[j]
236*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
237*ebfedea0SLionel Sambuc	ld	[%i3+%l1],%l6			! np[j]
238*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
239*ebfedea0SLionel Sambuc	add	%o3,%o3,%o3
240*ebfedea0SLionel Sambuc	or	%i2,%o3,%o3
241*ebfedea0SLionel Sambuc	mov	%g5,%o4
242*ebfedea0SLionel Sambuc	srlx	%o3,32,%i2
243*ebfedea0SLionel Sambuc	add	%l1,4,%l1				! j++
244*ebfedea0SLionel Sambuc	and	%o3,%g1,%o3
245*ebfedea0SLionel Sambuc	cmp	%l1,%i5
246*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
247*ebfedea0SLionel Sambuc	st	%o1,[%l4]
248*ebfedea0SLionel Sambuc	mov	%g4,%o3
249*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
250*ebfedea0SLionel Sambuc	bl	%icc,.Lsqr_1st
251*ebfedea0SLionel Sambuc	add	%l4,4,%l4			! tp++
252*ebfedea0SLionel Sambuc!.Lsqr_1st
253*ebfedea0SLionel Sambuc
254*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%g4		! epilogue
255*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%g5
256*ebfedea0SLionel Sambuc	add	%o3,%o0,%o0		! ap[j]*a0+c0
257*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
258*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
259*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
260*ebfedea0SLionel Sambuc	add	%o3,%o3,%o3
261*ebfedea0SLionel Sambuc	or	%i2,%o3,%o3
262*ebfedea0SLionel Sambuc	srlx	%o3,32,%i2
263*ebfedea0SLionel Sambuc	and	%o3,%g1,%o3
264*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
265*ebfedea0SLionel Sambuc	st	%o1,[%l4]
266*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
267*ebfedea0SLionel Sambuc
268*ebfedea0SLionel Sambuc	add	%g4,%o0,%o0		! ap[j]*a0+c0
269*ebfedea0SLionel Sambuc	add	%g5,%o1,%o1
270*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
271*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
272*ebfedea0SLionel Sambuc	add	%o3,%o3,%o3
273*ebfedea0SLionel Sambuc	or	%i2,%o3,%o3
274*ebfedea0SLionel Sambuc	srlx	%o3,32,%i2
275*ebfedea0SLionel Sambuc	and	%o3,%g1,%o3
276*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
277*ebfedea0SLionel Sambuc	st	%o1,[%l4+4]
278*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
279*ebfedea0SLionel Sambuc
280*ebfedea0SLionel Sambuc	add	%o0,%o0,%o0
281*ebfedea0SLionel Sambuc	or	%i2,%o0,%o0
282*ebfedea0SLionel Sambuc	add	%o0,%o1,%o1
283*ebfedea0SLionel Sambuc	st	%o1,[%l4+8]
284*ebfedea0SLionel Sambuc	srlx	%o1,32,%o2
285*ebfedea0SLionel Sambuc
286*ebfedea0SLionel Sambuc	ld	[%sp+2047+192],%g4	! tp[0]
287*ebfedea0SLionel Sambuc	ld	[%sp+2047+192+4],%g5	! tp[1]
288*ebfedea0SLionel Sambuc	ld	[%sp+2047+192+8],%l7	! tp[2]
289*ebfedea0SLionel Sambuc	ld	[%i1+4],%l2			! ap[1]
290*ebfedea0SLionel Sambuc	ld	[%i1+8],%l5			! ap[2]
291*ebfedea0SLionel Sambuc	ld	[%i3],%o1			! np[0]
292*ebfedea0SLionel Sambuc	ld	[%i3+4],%l6			! np[1]
293*ebfedea0SLionel Sambuc	mulx	%i4,%g4,%l3
294*ebfedea0SLionel Sambuc
295*ebfedea0SLionel Sambuc	mulx	%l2,%l2,%o0
296*ebfedea0SLionel Sambuc	and	%l3,%g1,%l3
297*ebfedea0SLionel Sambuc
298*ebfedea0SLionel Sambuc	mulx	%o1,%l3,%o1
299*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4
300*ebfedea0SLionel Sambuc	add	%g4,%o1,%o1
301*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
302*ebfedea0SLionel Sambuc	ld	[%i3+8],%l6			! np[2]
303*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
304*ebfedea0SLionel Sambuc	add	%g5,%o1,%o1
305*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
306*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
307*ebfedea0SLionel Sambuc	and	%o0,1,%i2
308*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
309*ebfedea0SLionel Sambuc	srlx	%o0,1,%o0
310*ebfedea0SLionel Sambuc	mov	12,%l1
311*ebfedea0SLionel Sambuc	st	%o1,[%sp+2047+192]	! tp[0]=
312*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
313*ebfedea0SLionel Sambuc	add	%sp,2047+192+4,%l4
314*ebfedea0SLionel Sambuc
315*ebfedea0SLionel Sambuc.Lsqr_2nd:
316*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%o3
317*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4
318*ebfedea0SLionel Sambuc	add	%o3,%o0,%o0
319*ebfedea0SLionel Sambuc	add	%l7,%o1,%o1
320*ebfedea0SLionel Sambuc	ld	[%i1+%l1],%l5			! ap[j]
321*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
322*ebfedea0SLionel Sambuc	ld	[%i3+%l1],%l6			! np[j]
323*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
324*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
325*ebfedea0SLionel Sambuc	ld	[%l4+8],%l7			! tp[j]
326*ebfedea0SLionel Sambuc	add	%o3,%o3,%o3
327*ebfedea0SLionel Sambuc	add	%l1,4,%l1				! j++
328*ebfedea0SLionel Sambuc	or	%i2,%o3,%o3
329*ebfedea0SLionel Sambuc	srlx	%o3,32,%i2
330*ebfedea0SLionel Sambuc	and	%o3,%g1,%o3
331*ebfedea0SLionel Sambuc	cmp	%l1,%i5
332*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
333*ebfedea0SLionel Sambuc	st	%o1,[%l4]			! tp[j-1]
334*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
335*ebfedea0SLionel Sambuc	bl	%icc,.Lsqr_2nd
336*ebfedea0SLionel Sambuc	add	%l4,4,%l4			! tp++
337*ebfedea0SLionel Sambuc!.Lsqr_2nd
338*ebfedea0SLionel Sambuc
339*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%o3
340*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4
341*ebfedea0SLionel Sambuc	add	%o3,%o0,%o0
342*ebfedea0SLionel Sambuc	add	%l7,%o1,%o1
343*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
344*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
345*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
346*ebfedea0SLionel Sambuc	add	%o3,%o3,%o3
347*ebfedea0SLionel Sambuc	or	%i2,%o3,%o3
348*ebfedea0SLionel Sambuc	srlx	%o3,32,%i2
349*ebfedea0SLionel Sambuc	and	%o3,%g1,%o3
350*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
351*ebfedea0SLionel Sambuc	st	%o1,[%l4]			! tp[j-1]
352*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
353*ebfedea0SLionel Sambuc
354*ebfedea0SLionel Sambuc	add	%o0,%o0,%o0
355*ebfedea0SLionel Sambuc	or	%i2,%o0,%o0
356*ebfedea0SLionel Sambuc	add	%o0,%o1,%o1
357*ebfedea0SLionel Sambuc	add	%o2,%o1,%o1
358*ebfedea0SLionel Sambuc	st	%o1,[%l4+4]
359*ebfedea0SLionel Sambuc	srlx	%o1,32,%o2
360*ebfedea0SLionel Sambuc
361*ebfedea0SLionel Sambuc	ld	[%sp+2047+192],%g5	! tp[0]
362*ebfedea0SLionel Sambuc	ld	[%sp+2047+192+4],%l7	! tp[1]
363*ebfedea0SLionel Sambuc	ld	[%i1+8],%l2			! ap[2]
364*ebfedea0SLionel Sambuc	ld	[%i3],%o1			! np[0]
365*ebfedea0SLionel Sambuc	ld	[%i3+4],%l6			! np[1]
366*ebfedea0SLionel Sambuc	mulx	%i4,%g5,%l3
367*ebfedea0SLionel Sambuc	and	%l3,%g1,%l3
368*ebfedea0SLionel Sambuc	mov	8,%l0
369*ebfedea0SLionel Sambuc
370*ebfedea0SLionel Sambuc	mulx	%l2,%l2,%o0
371*ebfedea0SLionel Sambuc	mulx	%o1,%l3,%o1
372*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
373*ebfedea0SLionel Sambuc	add	%g5,%o1,%o1
374*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
375*ebfedea0SLionel Sambuc	add	%sp,2047+192,%l4
376*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
377*ebfedea0SLionel Sambuc	and	%o0,1,%i2
378*ebfedea0SLionel Sambuc	srlx	%o0,1,%o0
379*ebfedea0SLionel Sambuc	mov	4,%l1
380*ebfedea0SLionel Sambuc
381*ebfedea0SLionel Sambuc.Lsqr_outer:
382*ebfedea0SLionel Sambuc.Lsqr_inner1:
383*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4
384*ebfedea0SLionel Sambuc	add	%l7,%o1,%o1
385*ebfedea0SLionel Sambuc	add	%l1,4,%l1
386*ebfedea0SLionel Sambuc	ld	[%l4+8],%l7
387*ebfedea0SLionel Sambuc	cmp	%l1,%l0
388*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
389*ebfedea0SLionel Sambuc	ld	[%i3+%l1],%l6
390*ebfedea0SLionel Sambuc	st	%o1,[%l4]
391*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
392*ebfedea0SLionel Sambuc	bl	%icc,.Lsqr_inner1
393*ebfedea0SLionel Sambuc	add	%l4,4,%l4
394*ebfedea0SLionel Sambuc!.Lsqr_inner1
395*ebfedea0SLionel Sambuc
396*ebfedea0SLionel Sambuc	add	%l1,4,%l1
397*ebfedea0SLionel Sambuc	ld	[%i1+%l1],%l5			! ap[j]
398*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4
399*ebfedea0SLionel Sambuc	add	%l7,%o1,%o1
400*ebfedea0SLionel Sambuc	ld	[%i3+%l1],%l6			! np[j]
401*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
402*ebfedea0SLionel Sambuc	ld	[%l4+8],%l7			! tp[j]
403*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
404*ebfedea0SLionel Sambuc	st	%o1,[%l4]
405*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
406*ebfedea0SLionel Sambuc
407*ebfedea0SLionel Sambuc	add	%l1,4,%l1
408*ebfedea0SLionel Sambuc	cmp	%l1,%i5
409*ebfedea0SLionel Sambuc	be,pn	%icc,.Lsqr_no_inner2
410*ebfedea0SLionel Sambuc	add	%l4,4,%l4
411*ebfedea0SLionel Sambuc
412*ebfedea0SLionel Sambuc.Lsqr_inner2:
413*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%o3
414*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4
415*ebfedea0SLionel Sambuc	add	%l7,%o1,%o1
416*ebfedea0SLionel Sambuc	add	%o3,%o0,%o0
417*ebfedea0SLionel Sambuc	ld	[%i1+%l1],%l5			! ap[j]
418*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
419*ebfedea0SLionel Sambuc	ld	[%i3+%l1],%l6			! np[j]
420*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
421*ebfedea0SLionel Sambuc	add	%o3,%o3,%o3
422*ebfedea0SLionel Sambuc	ld	[%l4+8],%l7			! tp[j]
423*ebfedea0SLionel Sambuc	or	%i2,%o3,%o3
424*ebfedea0SLionel Sambuc	add	%l1,4,%l1				! j++
425*ebfedea0SLionel Sambuc	srlx	%o3,32,%i2
426*ebfedea0SLionel Sambuc	and	%o3,%g1,%o3
427*ebfedea0SLionel Sambuc	cmp	%l1,%i5
428*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
429*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
430*ebfedea0SLionel Sambuc	st	%o1,[%l4]			! tp[j-1]
431*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
432*ebfedea0SLionel Sambuc	bl	%icc,.Lsqr_inner2
433*ebfedea0SLionel Sambuc	add	%l4,4,%l4			! tp++
434*ebfedea0SLionel Sambuc
435*ebfedea0SLionel Sambuc.Lsqr_no_inner2:
436*ebfedea0SLionel Sambuc	mulx	%l5,%l2,%o3
437*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4
438*ebfedea0SLionel Sambuc	add	%l7,%o1,%o1
439*ebfedea0SLionel Sambuc	add	%o3,%o0,%o0
440*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
441*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
442*ebfedea0SLionel Sambuc	add	%o3,%o3,%o3
443*ebfedea0SLionel Sambuc	or	%i2,%o3,%o3
444*ebfedea0SLionel Sambuc	srlx	%o3,32,%i2
445*ebfedea0SLionel Sambuc	and	%o3,%g1,%o3
446*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
447*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
448*ebfedea0SLionel Sambuc	st	%o1,[%l4]			! tp[j-1]
449*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
450*ebfedea0SLionel Sambuc
451*ebfedea0SLionel Sambuc	add	%o0,%o0,%o0
452*ebfedea0SLionel Sambuc	or	%i2,%o0,%o0
453*ebfedea0SLionel Sambuc	add	%o0,%o1,%o1
454*ebfedea0SLionel Sambuc	add	%o2,%o1,%o1
455*ebfedea0SLionel Sambuc	st	%o1,[%l4+4]
456*ebfedea0SLionel Sambuc	srlx	%o1,32,%o2
457*ebfedea0SLionel Sambuc
458*ebfedea0SLionel Sambuc	add	%l0,4,%l0				! i++
459*ebfedea0SLionel Sambuc	ld	[%sp+2047+192],%g5	! tp[0]
460*ebfedea0SLionel Sambuc	ld	[%sp+2047+192+4],%l7	! tp[1]
461*ebfedea0SLionel Sambuc	ld	[%i1+%l0],%l2			! ap[j]
462*ebfedea0SLionel Sambuc	ld	[%i3],%o1			! np[0]
463*ebfedea0SLionel Sambuc	ld	[%i3+4],%l6			! np[1]
464*ebfedea0SLionel Sambuc	mulx	%i4,%g5,%l3
465*ebfedea0SLionel Sambuc	and	%l3,%g1,%l3
466*ebfedea0SLionel Sambuc	add	%l0,4,%g4
467*ebfedea0SLionel Sambuc
468*ebfedea0SLionel Sambuc	mulx	%l2,%l2,%o0
469*ebfedea0SLionel Sambuc	mulx	%o1,%l3,%o1
470*ebfedea0SLionel Sambuc	and	%o0,%g1,%o3
471*ebfedea0SLionel Sambuc	add	%g5,%o1,%o1
472*ebfedea0SLionel Sambuc	srlx	%o0,32,%o0
473*ebfedea0SLionel Sambuc	add	%sp,2047+192,%l4
474*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
475*ebfedea0SLionel Sambuc	and	%o0,1,%i2
476*ebfedea0SLionel Sambuc	srlx	%o0,1,%o0
477*ebfedea0SLionel Sambuc
478*ebfedea0SLionel Sambuc	cmp	%g4,%i5			! i<num-1
479*ebfedea0SLionel Sambuc	bl	%icc,.Lsqr_outer
480*ebfedea0SLionel Sambuc	mov	4,%l1
481*ebfedea0SLionel Sambuc
482*ebfedea0SLionel Sambuc.Lsqr_last:
483*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4
484*ebfedea0SLionel Sambuc	add	%l7,%o1,%o1
485*ebfedea0SLionel Sambuc	add	%l1,4,%l1
486*ebfedea0SLionel Sambuc	ld	[%l4+8],%l7
487*ebfedea0SLionel Sambuc	cmp	%l1,%l0
488*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
489*ebfedea0SLionel Sambuc	ld	[%i3+%l1],%l6
490*ebfedea0SLionel Sambuc	st	%o1,[%l4]
491*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
492*ebfedea0SLionel Sambuc	bl	%icc,.Lsqr_last
493*ebfedea0SLionel Sambuc	add	%l4,4,%l4
494*ebfedea0SLionel Sambuc!.Lsqr_last
495*ebfedea0SLionel Sambuc
496*ebfedea0SLionel Sambuc	mulx	%l6,%l3,%o4
497*ebfedea0SLionel Sambuc	add	%l7,%o1,%o1
498*ebfedea0SLionel Sambuc	add	%o3,%o1,%o1
499*ebfedea0SLionel Sambuc	add	%o4,%o1,%o1
500*ebfedea0SLionel Sambuc	st	%o1,[%l4]
501*ebfedea0SLionel Sambuc	srlx	%o1,32,%o1
502*ebfedea0SLionel Sambuc
503*ebfedea0SLionel Sambuc	add	%o0,%o0,%o0		! recover %o0
504*ebfedea0SLionel Sambuc	or	%i2,%o0,%o0
505*ebfedea0SLionel Sambuc	add	%o0,%o1,%o1
506*ebfedea0SLionel Sambuc	add	%o2,%o1,%o1
507*ebfedea0SLionel Sambuc	st	%o1,[%l4+4]
508*ebfedea0SLionel Sambuc	srlx	%o1,32,%o2
509*ebfedea0SLionel Sambuc
510*ebfedea0SLionel Sambuc	ba	.Ltail
511*ebfedea0SLionel Sambuc	add	%l4,8,%l4
512*ebfedea0SLionel Sambuc.type	bn_mul_mont_int,#function
513*ebfedea0SLionel Sambuc.size	bn_mul_mont_int,(.-bn_mul_mont_int)
514*ebfedea0SLionel Sambuc.asciz	"Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro@openssl.org>"
515*ebfedea0SLionel Sambuc.align	32
516