xref: /minix3/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/sparcv9a-mont.S (revision ebfedea0ce5bbe81e252ddf32d732e40fb633fae)
1*ebfedea0SLionel Sambuc.section	".text",#alloc,#execinstr
2*ebfedea0SLionel Sambuc
3*ebfedea0SLionel Sambuc.global bn_mul_mont_fpu
4*ebfedea0SLionel Sambuc.align  32
5*ebfedea0SLionel Sambucbn_mul_mont_fpu:
6*ebfedea0SLionel Sambuc	save	%sp,-192-64,%sp
7*ebfedea0SLionel Sambuc
8*ebfedea0SLionel Sambuc	cmp	%i5,4
9*ebfedea0SLionel Sambuc	bl,a,pn %icc,.Lret
10*ebfedea0SLionel Sambuc	clr	%i0
11*ebfedea0SLionel Sambuc	andcc	%i5,1,%g0		! %i5 has to be even...
12*ebfedea0SLionel Sambuc	bnz,a,pn %icc,.Lret
13*ebfedea0SLionel Sambuc	clr	%i0			! signal "unsupported input value"
14*ebfedea0SLionel Sambuc
15*ebfedea0SLionel Sambuc	srl	%i5,1,%i5
16*ebfedea0SLionel Sambuc	sethi	%hi(0xffff),%l7
17*ebfedea0SLionel Sambuc	ld	[%i4+0],%g4		! %g4 reassigned, remember?
18*ebfedea0SLionel Sambuc	or	%l7,%lo(0xffff),%l7
19*ebfedea0SLionel Sambuc	ld	[%i4+4],%o0
20*ebfedea0SLionel Sambuc	sllx	%o0,32,%o0
21*ebfedea0SLionel Sambuc	or	%o0,%g4,%g4		! %g4=n0[1].n0[0]
22*ebfedea0SLionel Sambuc
23*ebfedea0SLionel Sambuc	sll	%i5,3,%i5		! num*=8
24*ebfedea0SLionel Sambuc
25*ebfedea0SLionel Sambuc	add	%sp,2047,%o0		! real top of stack
26*ebfedea0SLionel Sambuc	sll	%i5,2,%o1
27*ebfedea0SLionel Sambuc	add	%o1,%i5,%o1		! %o1=num*5
28*ebfedea0SLionel Sambuc	sub	%o0,%o1,%o0
29*ebfedea0SLionel Sambuc	and	%o0,-2048,%o0		! optimize TLB utilization
30*ebfedea0SLionel Sambuc	sub	%o0,2047,%sp		! alloca(5*num*8)
31*ebfedea0SLionel Sambuc
32*ebfedea0SLionel Sambuc	rd	%asi,%o7		! save %asi
33*ebfedea0SLionel Sambuc	add	%sp,2047+192+64,%l0
34*ebfedea0SLionel Sambuc	add	%l0,%i5,%l1
35*ebfedea0SLionel Sambuc	add	%l1,%i5,%l1	! [an]p_[lh] point at the vectors' ends !
36*ebfedea0SLionel Sambuc	add	%l1,%i5,%l2
37*ebfedea0SLionel Sambuc	add	%l2,%i5,%l3
38*ebfedea0SLionel Sambuc	add	%l3,%i5,%l4
39*ebfedea0SLionel Sambuc
40*ebfedea0SLionel Sambuc	wr	%g0,210,%asi	! setup %asi for 16-bit FP loads
41*ebfedea0SLionel Sambuc
42*ebfedea0SLionel Sambuc	add	%i0,%i5,%i0		! readjust input pointers to point
43*ebfedea0SLionel Sambuc	add	%i1,%i5,%i1		! at the ends too...
44*ebfedea0SLionel Sambuc	add	%i2,%i5,%i2
45*ebfedea0SLionel Sambuc	add	%i3,%i5,%i3
46*ebfedea0SLionel Sambuc
47*ebfedea0SLionel Sambuc	stx	%o7,[%sp+2047+192+48]	! save %asi
48*ebfedea0SLionel Sambuc
49*ebfedea0SLionel Sambuc	sub	%g0,%i5,%l5		! i=-num
50*ebfedea0SLionel Sambuc	sub	%g0,%i5,%l6		! j=-num
51*ebfedea0SLionel Sambuc
52*ebfedea0SLionel Sambuc	add	%i1,%l6,%o3
53*ebfedea0SLionel Sambuc	add	%i2,%l5,%o4
54*ebfedea0SLionel Sambuc
55*ebfedea0SLionel Sambuc	ld	[%o3+4],%g1		! bp[0]
56*ebfedea0SLionel Sambuc	ld	[%o3+0],%o0
57*ebfedea0SLionel Sambuc	ld	[%o4+4],%g5		! ap[0]
58*ebfedea0SLionel Sambuc	sllx	%g1,32,%g1
59*ebfedea0SLionel Sambuc	ld	[%o4+0],%o1
60*ebfedea0SLionel Sambuc	sllx	%g5,32,%g5
61*ebfedea0SLionel Sambuc	or	%g1,%o0,%o0
62*ebfedea0SLionel Sambuc	or	%g5,%o1,%o1
63*ebfedea0SLionel Sambuc
64*ebfedea0SLionel Sambuc	add	%i3,%l6,%o5
65*ebfedea0SLionel Sambuc
66*ebfedea0SLionel Sambuc	mulx	%o1,%o0,%o0		! ap[0]*bp[0]
67*ebfedea0SLionel Sambuc	mulx	%g4,%o0,%o0		! ap[0]*bp[0]*n0
68*ebfedea0SLionel Sambuc	stx	%o0,[%sp+2047+192+0]
69*ebfedea0SLionel Sambuc
70*ebfedea0SLionel Sambuc	ld	[%o3+0],%f17	! load a[j] as pair of 32-bit words
71*ebfedea0SLionel Sambuc	.word	0xa1b00c20	! fzeros %f16
72*ebfedea0SLionel Sambuc	ld	[%o3+4],%f19
73*ebfedea0SLionel Sambuc	.word	0xa5b00c20	! fzeros %f18
74*ebfedea0SLionel Sambuc	ld	[%o5+0],%f21	! load n[j] as pair of 32-bit words
75*ebfedea0SLionel Sambuc	.word	0xa9b00c20	! fzeros %f20
76*ebfedea0SLionel Sambuc	ld	[%o5+4],%f23
77*ebfedea0SLionel Sambuc	.word	0xadb00c20	! fzeros %f22
78*ebfedea0SLionel Sambuc
79*ebfedea0SLionel Sambuc	! transfer b[i] to FPU as 4x16-bit values
80*ebfedea0SLionel Sambuc	ldda	[%o4+2]%asi,%f0
81*ebfedea0SLionel Sambuc	fxtod	%f16,%f16
82*ebfedea0SLionel Sambuc	ldda	[%o4+0]%asi,%f2
83*ebfedea0SLionel Sambuc	fxtod	%f18,%f18
84*ebfedea0SLionel Sambuc	ldda	[%o4+6]%asi,%f4
85*ebfedea0SLionel Sambuc	fxtod	%f20,%f20
86*ebfedea0SLionel Sambuc	ldda	[%o4+4]%asi,%f6
87*ebfedea0SLionel Sambuc	fxtod	%f22,%f22
88*ebfedea0SLionel Sambuc
89*ebfedea0SLionel Sambuc	! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values
90*ebfedea0SLionel Sambuc	ldda	[%sp+2047+192+6]%asi,%f8
91*ebfedea0SLionel Sambuc	fxtod	%f0,%f0
92*ebfedea0SLionel Sambuc	ldda	[%sp+2047+192+4]%asi,%f10
93*ebfedea0SLionel Sambuc	fxtod	%f2,%f2
94*ebfedea0SLionel Sambuc	ldda	[%sp+2047+192+2]%asi,%f12
95*ebfedea0SLionel Sambuc	fxtod	%f4,%f4
96*ebfedea0SLionel Sambuc	ldda	[%sp+2047+192+0]%asi,%f14
97*ebfedea0SLionel Sambuc	fxtod	%f6,%f6
98*ebfedea0SLionel Sambuc
99*ebfedea0SLionel Sambuc	std	%f16,[%l1+%l6]		! save smashed ap[j] in double format
100*ebfedea0SLionel Sambuc	fxtod	%f8,%f8
101*ebfedea0SLionel Sambuc	std	%f18,[%l2+%l6]
102*ebfedea0SLionel Sambuc	fxtod	%f10,%f10
103*ebfedea0SLionel Sambuc	std	%f20,[%l3+%l6]		! save smashed np[j] in double format
104*ebfedea0SLionel Sambuc	fxtod	%f12,%f12
105*ebfedea0SLionel Sambuc	std	%f22,[%l4+%l6]
106*ebfedea0SLionel Sambuc	fxtod	%f14,%f14
107*ebfedea0SLionel Sambuc
108*ebfedea0SLionel Sambuc		fmuld	%f16,%f0,%f32
109*ebfedea0SLionel Sambuc		fmuld	%f20,%f8,%f48
110*ebfedea0SLionel Sambuc		fmuld	%f16,%f2,%f34
111*ebfedea0SLionel Sambuc		fmuld	%f20,%f10,%f50
112*ebfedea0SLionel Sambuc		fmuld	%f16,%f4,%f36
113*ebfedea0SLionel Sambuc	faddd	%f32,%f48,%f48
114*ebfedea0SLionel Sambuc		fmuld	%f20,%f12,%f52
115*ebfedea0SLionel Sambuc		fmuld	%f16,%f6,%f38
116*ebfedea0SLionel Sambuc	faddd	%f34,%f50,%f50
117*ebfedea0SLionel Sambuc		fmuld	%f20,%f14,%f54
118*ebfedea0SLionel Sambuc		fmuld	%f18,%f0,%f40
119*ebfedea0SLionel Sambuc	faddd	%f36,%f52,%f52
120*ebfedea0SLionel Sambuc		fmuld	%f22,%f8,%f56
121*ebfedea0SLionel Sambuc		fmuld	%f18,%f2,%f42
122*ebfedea0SLionel Sambuc	faddd	%f38,%f54,%f54
123*ebfedea0SLionel Sambuc		fmuld	%f22,%f10,%f58
124*ebfedea0SLionel Sambuc		fmuld	%f18,%f4,%f44
125*ebfedea0SLionel Sambuc	faddd	%f40,%f56,%f56
126*ebfedea0SLionel Sambuc		fmuld	%f22,%f12,%f60
127*ebfedea0SLionel Sambuc		fmuld	%f18,%f6,%f46
128*ebfedea0SLionel Sambuc	faddd	%f42,%f58,%f58
129*ebfedea0SLionel Sambuc		fmuld	%f22,%f14,%f62
130*ebfedea0SLionel Sambuc
131*ebfedea0SLionel Sambuc	faddd	%f44,%f60,%f24	! %f60
132*ebfedea0SLionel Sambuc	faddd	%f46,%f62,%f26	! %f62
133*ebfedea0SLionel Sambuc
134*ebfedea0SLionel Sambuc	faddd	%f52,%f56,%f52
135*ebfedea0SLionel Sambuc	faddd	%f54,%f58,%f54
136*ebfedea0SLionel Sambuc
137*ebfedea0SLionel Sambuc	fdtox	%f48,%f48
138*ebfedea0SLionel Sambuc	fdtox	%f50,%f50
139*ebfedea0SLionel Sambuc	fdtox	%f52,%f52
140*ebfedea0SLionel Sambuc	fdtox	%f54,%f54
141*ebfedea0SLionel Sambuc
142*ebfedea0SLionel Sambuc	std	%f48,[%sp+2047+192+0]
143*ebfedea0SLionel Sambuc	add	%l6,8,%l6
144*ebfedea0SLionel Sambuc	std	%f50,[%sp+2047+192+8]
145*ebfedea0SLionel Sambuc	add	%i1,%l6,%o4
146*ebfedea0SLionel Sambuc	std	%f52,[%sp+2047+192+16]
147*ebfedea0SLionel Sambuc	add	%i3,%l6,%o5
148*ebfedea0SLionel Sambuc	std	%f54,[%sp+2047+192+24]
149*ebfedea0SLionel Sambuc
150*ebfedea0SLionel Sambuc	ld	[%o4+0],%f17	! load a[j] as pair of 32-bit words
151*ebfedea0SLionel Sambuc	.word	0xa1b00c20	! fzeros %f16
152*ebfedea0SLionel Sambuc	ld	[%o4+4],%f19
153*ebfedea0SLionel Sambuc	.word	0xa5b00c20	! fzeros %f18
154*ebfedea0SLionel Sambuc	ld	[%o5+0],%f21	! load n[j] as pair of 32-bit words
155*ebfedea0SLionel Sambuc	.word	0xa9b00c20	! fzeros %f20
156*ebfedea0SLionel Sambuc	ld	[%o5+4],%f23
157*ebfedea0SLionel Sambuc	.word	0xadb00c20	! fzeros %f22
158*ebfedea0SLionel Sambuc
159*ebfedea0SLionel Sambuc	fxtod	%f16,%f16
160*ebfedea0SLionel Sambuc	fxtod	%f18,%f18
161*ebfedea0SLionel Sambuc	fxtod	%f20,%f20
162*ebfedea0SLionel Sambuc	fxtod	%f22,%f22
163*ebfedea0SLionel Sambuc
164*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+0],%o0
165*ebfedea0SLionel Sambuc		fmuld	%f16,%f0,%f32
166*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+8],%o1
167*ebfedea0SLionel Sambuc		fmuld	%f20,%f8,%f48
168*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+16],%o2
169*ebfedea0SLionel Sambuc		fmuld	%f16,%f2,%f34
170*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+24],%o3
171*ebfedea0SLionel Sambuc		fmuld	%f20,%f10,%f50
172*ebfedea0SLionel Sambuc
173*ebfedea0SLionel Sambuc	srlx	%o0,16,%o7
174*ebfedea0SLionel Sambuc	std	%f16,[%l1+%l6]		! save smashed ap[j] in double format
175*ebfedea0SLionel Sambuc		fmuld	%f16,%f4,%f36
176*ebfedea0SLionel Sambuc	add	%o7,%o1,%o1
177*ebfedea0SLionel Sambuc	std	%f18,[%l2+%l6]
178*ebfedea0SLionel Sambuc		faddd	%f32,%f48,%f48
179*ebfedea0SLionel Sambuc		fmuld	%f20,%f12,%f52
180*ebfedea0SLionel Sambuc	srlx	%o1,16,%o7
181*ebfedea0SLionel Sambuc	std	%f20,[%l3+%l6]		! save smashed np[j] in double format
182*ebfedea0SLionel Sambuc		fmuld	%f16,%f6,%f38
183*ebfedea0SLionel Sambuc	add	%o7,%o2,%o2
184*ebfedea0SLionel Sambuc	std	%f22,[%l4+%l6]
185*ebfedea0SLionel Sambuc		faddd	%f34,%f50,%f50
186*ebfedea0SLionel Sambuc		fmuld	%f20,%f14,%f54
187*ebfedea0SLionel Sambuc	srlx	%o2,16,%o7
188*ebfedea0SLionel Sambuc		fmuld	%f18,%f0,%f40
189*ebfedea0SLionel Sambuc	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
190*ebfedea0SLionel Sambuc		faddd	%f36,%f52,%f52
191*ebfedea0SLionel Sambuc		fmuld	%f22,%f8,%f56
192*ebfedea0SLionel Sambuc	!and	%o0,%l7,%o0
193*ebfedea0SLionel Sambuc	!and	%o1,%l7,%o1
194*ebfedea0SLionel Sambuc	!and	%o2,%l7,%o2
195*ebfedea0SLionel Sambuc	!sllx	%o1,16,%o1
196*ebfedea0SLionel Sambuc	!sllx	%o2,32,%o2
197*ebfedea0SLionel Sambuc	!sllx	%o3,48,%o7
198*ebfedea0SLionel Sambuc	!or	%o1,%o0,%o0
199*ebfedea0SLionel Sambuc	!or	%o2,%o0,%o0
200*ebfedea0SLionel Sambuc	!or	%o7,%o0,%o0		! 64-bit result
201*ebfedea0SLionel Sambuc	srlx	%o3,16,%g1		! 34-bit carry
202*ebfedea0SLionel Sambuc		fmuld	%f18,%f2,%f42
203*ebfedea0SLionel Sambuc
204*ebfedea0SLionel Sambuc	faddd	%f38,%f54,%f54
205*ebfedea0SLionel Sambuc		fmuld	%f22,%f10,%f58
206*ebfedea0SLionel Sambuc		fmuld	%f18,%f4,%f44
207*ebfedea0SLionel Sambuc	faddd	%f40,%f56,%f56
208*ebfedea0SLionel Sambuc		fmuld	%f22,%f12,%f60
209*ebfedea0SLionel Sambuc		fmuld	%f18,%f6,%f46
210*ebfedea0SLionel Sambuc	faddd	%f42,%f58,%f58
211*ebfedea0SLionel Sambuc		fmuld	%f22,%f14,%f62
212*ebfedea0SLionel Sambuc
213*ebfedea0SLionel Sambuc	faddd	%f24,%f48,%f48
214*ebfedea0SLionel Sambuc	faddd	%f26,%f50,%f50
215*ebfedea0SLionel Sambuc	faddd	%f44,%f60,%f24	! %f60
216*ebfedea0SLionel Sambuc	faddd	%f46,%f62,%f26	! %f62
217*ebfedea0SLionel Sambuc
218*ebfedea0SLionel Sambuc	faddd	%f52,%f56,%f52
219*ebfedea0SLionel Sambuc	faddd	%f54,%f58,%f54
220*ebfedea0SLionel Sambuc
221*ebfedea0SLionel Sambuc	fdtox	%f48,%f48
222*ebfedea0SLionel Sambuc	fdtox	%f50,%f50
223*ebfedea0SLionel Sambuc	fdtox	%f52,%f52
224*ebfedea0SLionel Sambuc	fdtox	%f54,%f54
225*ebfedea0SLionel Sambuc
226*ebfedea0SLionel Sambuc	std	%f48,[%sp+2047+192+0]
227*ebfedea0SLionel Sambuc	std	%f50,[%sp+2047+192+8]
228*ebfedea0SLionel Sambuc	addcc	%l6,8,%l6
229*ebfedea0SLionel Sambuc	std	%f52,[%sp+2047+192+16]
230*ebfedea0SLionel Sambuc	bz,pn	%icc,.L1stskip
231*ebfedea0SLionel Sambuc	std	%f54,[%sp+2047+192+24]
232*ebfedea0SLionel Sambuc
233*ebfedea0SLionel Sambuc.align	32			! incidentally already aligned !
234*ebfedea0SLionel Sambuc.L1st:
235*ebfedea0SLionel Sambuc	add	%i1,%l6,%o4
236*ebfedea0SLionel Sambuc	add	%i3,%l6,%o5
237*ebfedea0SLionel Sambuc	ld	[%o4+0],%f17	! load a[j] as pair of 32-bit words
238*ebfedea0SLionel Sambuc	.word	0xa1b00c20	! fzeros %f16
239*ebfedea0SLionel Sambuc	ld	[%o4+4],%f19
240*ebfedea0SLionel Sambuc	.word	0xa5b00c20	! fzeros %f18
241*ebfedea0SLionel Sambuc	ld	[%o5+0],%f21	! load n[j] as pair of 32-bit words
242*ebfedea0SLionel Sambuc	.word	0xa9b00c20	! fzeros %f20
243*ebfedea0SLionel Sambuc	ld	[%o5+4],%f23
244*ebfedea0SLionel Sambuc	.word	0xadb00c20	! fzeros %f22
245*ebfedea0SLionel Sambuc
246*ebfedea0SLionel Sambuc	fxtod	%f16,%f16
247*ebfedea0SLionel Sambuc	fxtod	%f18,%f18
248*ebfedea0SLionel Sambuc	fxtod	%f20,%f20
249*ebfedea0SLionel Sambuc	fxtod	%f22,%f22
250*ebfedea0SLionel Sambuc
251*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+0],%o0
252*ebfedea0SLionel Sambuc		fmuld	%f16,%f0,%f32
253*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+8],%o1
254*ebfedea0SLionel Sambuc		fmuld	%f20,%f8,%f48
255*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+16],%o2
256*ebfedea0SLionel Sambuc		fmuld	%f16,%f2,%f34
257*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+24],%o3
258*ebfedea0SLionel Sambuc		fmuld	%f20,%f10,%f50
259*ebfedea0SLionel Sambuc
260*ebfedea0SLionel Sambuc	srlx	%o0,16,%o7
261*ebfedea0SLionel Sambuc	std	%f16,[%l1+%l6]		! save smashed ap[j] in double format
262*ebfedea0SLionel Sambuc		fmuld	%f16,%f4,%f36
263*ebfedea0SLionel Sambuc	add	%o7,%o1,%o1
264*ebfedea0SLionel Sambuc	std	%f18,[%l2+%l6]
265*ebfedea0SLionel Sambuc		faddd	%f32,%f48,%f48
266*ebfedea0SLionel Sambuc		fmuld	%f20,%f12,%f52
267*ebfedea0SLionel Sambuc	srlx	%o1,16,%o7
268*ebfedea0SLionel Sambuc	std	%f20,[%l3+%l6]		! save smashed np[j] in double format
269*ebfedea0SLionel Sambuc		fmuld	%f16,%f6,%f38
270*ebfedea0SLionel Sambuc	add	%o7,%o2,%o2
271*ebfedea0SLionel Sambuc	std	%f22,[%l4+%l6]
272*ebfedea0SLionel Sambuc		faddd	%f34,%f50,%f50
273*ebfedea0SLionel Sambuc		fmuld	%f20,%f14,%f54
274*ebfedea0SLionel Sambuc	srlx	%o2,16,%o7
275*ebfedea0SLionel Sambuc		fmuld	%f18,%f0,%f40
276*ebfedea0SLionel Sambuc	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
277*ebfedea0SLionel Sambuc	and	%o0,%l7,%o0
278*ebfedea0SLionel Sambuc		faddd	%f36,%f52,%f52
279*ebfedea0SLionel Sambuc		fmuld	%f22,%f8,%f56
280*ebfedea0SLionel Sambuc	and	%o1,%l7,%o1
281*ebfedea0SLionel Sambuc	and	%o2,%l7,%o2
282*ebfedea0SLionel Sambuc		fmuld	%f18,%f2,%f42
283*ebfedea0SLionel Sambuc	sllx	%o1,16,%o1
284*ebfedea0SLionel Sambuc		faddd	%f38,%f54,%f54
285*ebfedea0SLionel Sambuc		fmuld	%f22,%f10,%f58
286*ebfedea0SLionel Sambuc	sllx	%o2,32,%o2
287*ebfedea0SLionel Sambuc		fmuld	%f18,%f4,%f44
288*ebfedea0SLionel Sambuc	sllx	%o3,48,%o7
289*ebfedea0SLionel Sambuc	or	%o1,%o0,%o0
290*ebfedea0SLionel Sambuc		faddd	%f40,%f56,%f56
291*ebfedea0SLionel Sambuc		fmuld	%f22,%f12,%f60
292*ebfedea0SLionel Sambuc	or	%o2,%o0,%o0
293*ebfedea0SLionel Sambuc		fmuld	%f18,%f6,%f46
294*ebfedea0SLionel Sambuc	or	%o7,%o0,%o0		! 64-bit result
295*ebfedea0SLionel Sambuc		faddd	%f42,%f58,%f58
296*ebfedea0SLionel Sambuc		fmuld	%f22,%f14,%f62
297*ebfedea0SLionel Sambuc	addcc	%g1,%o0,%o0
298*ebfedea0SLionel Sambuc		faddd	%f24,%f48,%f48
299*ebfedea0SLionel Sambuc	srlx	%o3,16,%g1		! 34-bit carry
300*ebfedea0SLionel Sambuc		faddd	%f26,%f50,%f50
301*ebfedea0SLionel Sambuc	bcs,a	%xcc,.+8
302*ebfedea0SLionel Sambuc	add	%g1,1,%g1
303*ebfedea0SLionel Sambuc
304*ebfedea0SLionel Sambuc	stx	%o0,[%l0]		! tp[j-1]=
305*ebfedea0SLionel Sambuc
306*ebfedea0SLionel Sambuc	faddd	%f44,%f60,%f24	! %f60
307*ebfedea0SLionel Sambuc	faddd	%f46,%f62,%f26	! %f62
308*ebfedea0SLionel Sambuc
309*ebfedea0SLionel Sambuc	faddd	%f52,%f56,%f52
310*ebfedea0SLionel Sambuc	faddd	%f54,%f58,%f54
311*ebfedea0SLionel Sambuc
312*ebfedea0SLionel Sambuc	fdtox	%f48,%f48
313*ebfedea0SLionel Sambuc	fdtox	%f50,%f50
314*ebfedea0SLionel Sambuc	fdtox	%f52,%f52
315*ebfedea0SLionel Sambuc	fdtox	%f54,%f54
316*ebfedea0SLionel Sambuc
317*ebfedea0SLionel Sambuc	std	%f48,[%sp+2047+192+0]
318*ebfedea0SLionel Sambuc	std	%f50,[%sp+2047+192+8]
319*ebfedea0SLionel Sambuc	std	%f52,[%sp+2047+192+16]
320*ebfedea0SLionel Sambuc	std	%f54,[%sp+2047+192+24]
321*ebfedea0SLionel Sambuc
322*ebfedea0SLionel Sambuc	addcc	%l6,8,%l6
323*ebfedea0SLionel Sambuc	bnz,pt	%icc,.L1st
324*ebfedea0SLionel Sambuc	add	%l0,8,%l0
325*ebfedea0SLionel Sambuc
326*ebfedea0SLionel Sambuc.L1stskip:
327*ebfedea0SLionel Sambuc	fdtox	%f24,%f24
328*ebfedea0SLionel Sambuc	fdtox	%f26,%f26
329*ebfedea0SLionel Sambuc
330*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+0],%o0
331*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+8],%o1
332*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+16],%o2
333*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+24],%o3
334*ebfedea0SLionel Sambuc
335*ebfedea0SLionel Sambuc	srlx	%o0,16,%o7
336*ebfedea0SLionel Sambuc	std	%f24,[%sp+2047+192+32]
337*ebfedea0SLionel Sambuc	add	%o7,%o1,%o1
338*ebfedea0SLionel Sambuc	std	%f26,[%sp+2047+192+40]
339*ebfedea0SLionel Sambuc	srlx	%o1,16,%o7
340*ebfedea0SLionel Sambuc	add	%o7,%o2,%o2
341*ebfedea0SLionel Sambuc	srlx	%o2,16,%o7
342*ebfedea0SLionel Sambuc	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
343*ebfedea0SLionel Sambuc	and	%o0,%l7,%o0
344*ebfedea0SLionel Sambuc	and	%o1,%l7,%o1
345*ebfedea0SLionel Sambuc	and	%o2,%l7,%o2
346*ebfedea0SLionel Sambuc	sllx	%o1,16,%o1
347*ebfedea0SLionel Sambuc	sllx	%o2,32,%o2
348*ebfedea0SLionel Sambuc	sllx	%o3,48,%o7
349*ebfedea0SLionel Sambuc	or	%o1,%o0,%o0
350*ebfedea0SLionel Sambuc	or	%o2,%o0,%o0
351*ebfedea0SLionel Sambuc	or	%o7,%o0,%o0		! 64-bit result
352*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+32],%o4
353*ebfedea0SLionel Sambuc	addcc	%g1,%o0,%o0
354*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+40],%o5
355*ebfedea0SLionel Sambuc	srlx	%o3,16,%g1		! 34-bit carry
356*ebfedea0SLionel Sambuc	bcs,a	%xcc,.+8
357*ebfedea0SLionel Sambuc	add	%g1,1,%g1
358*ebfedea0SLionel Sambuc
359*ebfedea0SLionel Sambuc	stx	%o0,[%l0]		! tp[j-1]=
360*ebfedea0SLionel Sambuc	add	%l0,8,%l0
361*ebfedea0SLionel Sambuc
362*ebfedea0SLionel Sambuc	srlx	%o4,16,%o7
363*ebfedea0SLionel Sambuc	add	%o7,%o5,%o5
364*ebfedea0SLionel Sambuc	and	%o4,%l7,%o4
365*ebfedea0SLionel Sambuc	sllx	%o5,16,%o7
366*ebfedea0SLionel Sambuc	or	%o7,%o4,%o4
367*ebfedea0SLionel Sambuc	addcc	%g1,%o4,%o4
368*ebfedea0SLionel Sambuc	srlx	%o5,48,%g1
369*ebfedea0SLionel Sambuc	bcs,a	%xcc,.+8
370*ebfedea0SLionel Sambuc	add	%g1,1,%g1
371*ebfedea0SLionel Sambuc
372*ebfedea0SLionel Sambuc	mov	%g1,%i4
373*ebfedea0SLionel Sambuc	stx	%o4,[%l0]		! tp[num-1]=
374*ebfedea0SLionel Sambuc
375*ebfedea0SLionel Sambuc	ba	.Louter
376*ebfedea0SLionel Sambuc	add	%l5,8,%l5
377*ebfedea0SLionel Sambuc.align	32
378*ebfedea0SLionel Sambuc.Louter:
379*ebfedea0SLionel Sambuc	sub	%g0,%i5,%l6		! j=-num
380*ebfedea0SLionel Sambuc	add	%sp,2047+192+64,%l0
381*ebfedea0SLionel Sambuc
382*ebfedea0SLionel Sambuc	add	%i1,%l6,%o3
383*ebfedea0SLionel Sambuc	add	%i2,%l5,%o4
384*ebfedea0SLionel Sambuc
385*ebfedea0SLionel Sambuc	ld	[%o3+4],%g1		! bp[i]
386*ebfedea0SLionel Sambuc	ld	[%o3+0],%o0
387*ebfedea0SLionel Sambuc	ld	[%o4+4],%g5		! ap[0]
388*ebfedea0SLionel Sambuc	sllx	%g1,32,%g1
389*ebfedea0SLionel Sambuc	ld	[%o4+0],%o1
390*ebfedea0SLionel Sambuc	sllx	%g5,32,%g5
391*ebfedea0SLionel Sambuc	or	%g1,%o0,%o0
392*ebfedea0SLionel Sambuc	or	%g5,%o1,%o1
393*ebfedea0SLionel Sambuc
394*ebfedea0SLionel Sambuc	ldx	[%l0],%o2		! tp[0]
395*ebfedea0SLionel Sambuc	mulx	%o1,%o0,%o0
396*ebfedea0SLionel Sambuc	addcc	%o2,%o0,%o0
397*ebfedea0SLionel Sambuc	mulx	%g4,%o0,%o0		! (ap[0]*bp[i]+t[0])*n0
398*ebfedea0SLionel Sambuc	stx	%o0,[%sp+2047+192+0]
399*ebfedea0SLionel Sambuc
400*ebfedea0SLionel Sambuc	! transfer b[i] to FPU as 4x16-bit values
401*ebfedea0SLionel Sambuc	ldda	[%o4+2]%asi,%f0
402*ebfedea0SLionel Sambuc	ldda	[%o4+0]%asi,%f2
403*ebfedea0SLionel Sambuc	ldda	[%o4+6]%asi,%f4
404*ebfedea0SLionel Sambuc	ldda	[%o4+4]%asi,%f6
405*ebfedea0SLionel Sambuc
406*ebfedea0SLionel Sambuc	! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values
407*ebfedea0SLionel Sambuc	ldda	[%sp+2047+192+6]%asi,%f8
408*ebfedea0SLionel Sambuc	fxtod	%f0,%f0
409*ebfedea0SLionel Sambuc	ldda	[%sp+2047+192+4]%asi,%f10
410*ebfedea0SLionel Sambuc	fxtod	%f2,%f2
411*ebfedea0SLionel Sambuc	ldda	[%sp+2047+192+2]%asi,%f12
412*ebfedea0SLionel Sambuc	fxtod	%f4,%f4
413*ebfedea0SLionel Sambuc	ldda	[%sp+2047+192+0]%asi,%f14
414*ebfedea0SLionel Sambuc	fxtod	%f6,%f6
415*ebfedea0SLionel Sambuc	ldd	[%l1+%l6],%f16		! load a[j] in double format
416*ebfedea0SLionel Sambuc	fxtod	%f8,%f8
417*ebfedea0SLionel Sambuc	ldd	[%l2+%l6],%f18
418*ebfedea0SLionel Sambuc	fxtod	%f10,%f10
419*ebfedea0SLionel Sambuc	ldd	[%l3+%l6],%f20		! load n[j] in double format
420*ebfedea0SLionel Sambuc	fxtod	%f12,%f12
421*ebfedea0SLionel Sambuc	ldd	[%l4+%l6],%f22
422*ebfedea0SLionel Sambuc	fxtod	%f14,%f14
423*ebfedea0SLionel Sambuc
424*ebfedea0SLionel Sambuc		fmuld	%f16,%f0,%f32
425*ebfedea0SLionel Sambuc		fmuld	%f20,%f8,%f48
426*ebfedea0SLionel Sambuc		fmuld	%f16,%f2,%f34
427*ebfedea0SLionel Sambuc		fmuld	%f20,%f10,%f50
428*ebfedea0SLionel Sambuc		fmuld	%f16,%f4,%f36
429*ebfedea0SLionel Sambuc	faddd	%f32,%f48,%f48
430*ebfedea0SLionel Sambuc		fmuld	%f20,%f12,%f52
431*ebfedea0SLionel Sambuc		fmuld	%f16,%f6,%f38
432*ebfedea0SLionel Sambuc	faddd	%f34,%f50,%f50
433*ebfedea0SLionel Sambuc		fmuld	%f20,%f14,%f54
434*ebfedea0SLionel Sambuc		fmuld	%f18,%f0,%f40
435*ebfedea0SLionel Sambuc	faddd	%f36,%f52,%f52
436*ebfedea0SLionel Sambuc		fmuld	%f22,%f8,%f56
437*ebfedea0SLionel Sambuc		fmuld	%f18,%f2,%f42
438*ebfedea0SLionel Sambuc	faddd	%f38,%f54,%f54
439*ebfedea0SLionel Sambuc		fmuld	%f22,%f10,%f58
440*ebfedea0SLionel Sambuc		fmuld	%f18,%f4,%f44
441*ebfedea0SLionel Sambuc	faddd	%f40,%f56,%f56
442*ebfedea0SLionel Sambuc		fmuld	%f22,%f12,%f60
443*ebfedea0SLionel Sambuc		fmuld	%f18,%f6,%f46
444*ebfedea0SLionel Sambuc	faddd	%f42,%f58,%f58
445*ebfedea0SLionel Sambuc		fmuld	%f22,%f14,%f62
446*ebfedea0SLionel Sambuc
447*ebfedea0SLionel Sambuc	faddd	%f44,%f60,%f24	! %f60
448*ebfedea0SLionel Sambuc	faddd	%f46,%f62,%f26	! %f62
449*ebfedea0SLionel Sambuc
450*ebfedea0SLionel Sambuc	faddd	%f52,%f56,%f52
451*ebfedea0SLionel Sambuc	faddd	%f54,%f58,%f54
452*ebfedea0SLionel Sambuc
453*ebfedea0SLionel Sambuc	fdtox	%f48,%f48
454*ebfedea0SLionel Sambuc	fdtox	%f50,%f50
455*ebfedea0SLionel Sambuc	fdtox	%f52,%f52
456*ebfedea0SLionel Sambuc	fdtox	%f54,%f54
457*ebfedea0SLionel Sambuc
458*ebfedea0SLionel Sambuc	std	%f48,[%sp+2047+192+0]
459*ebfedea0SLionel Sambuc	std	%f50,[%sp+2047+192+8]
460*ebfedea0SLionel Sambuc	std	%f52,[%sp+2047+192+16]
461*ebfedea0SLionel Sambuc	add	%l6,8,%l6
462*ebfedea0SLionel Sambuc	std	%f54,[%sp+2047+192+24]
463*ebfedea0SLionel Sambuc
464*ebfedea0SLionel Sambuc	ldd	[%l1+%l6],%f16		! load a[j] in double format
465*ebfedea0SLionel Sambuc	ldd	[%l2+%l6],%f18
466*ebfedea0SLionel Sambuc	ldd	[%l3+%l6],%f20		! load n[j] in double format
467*ebfedea0SLionel Sambuc	ldd	[%l4+%l6],%f22
468*ebfedea0SLionel Sambuc
469*ebfedea0SLionel Sambuc		fmuld	%f16,%f0,%f32
470*ebfedea0SLionel Sambuc		fmuld	%f20,%f8,%f48
471*ebfedea0SLionel Sambuc		fmuld	%f16,%f2,%f34
472*ebfedea0SLionel Sambuc		fmuld	%f20,%f10,%f50
473*ebfedea0SLionel Sambuc		fmuld	%f16,%f4,%f36
474*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+0],%o0
475*ebfedea0SLionel Sambuc		faddd	%f32,%f48,%f48
476*ebfedea0SLionel Sambuc		fmuld	%f20,%f12,%f52
477*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+8],%o1
478*ebfedea0SLionel Sambuc		fmuld	%f16,%f6,%f38
479*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+16],%o2
480*ebfedea0SLionel Sambuc		faddd	%f34,%f50,%f50
481*ebfedea0SLionel Sambuc		fmuld	%f20,%f14,%f54
482*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+24],%o3
483*ebfedea0SLionel Sambuc		fmuld	%f18,%f0,%f40
484*ebfedea0SLionel Sambuc
485*ebfedea0SLionel Sambuc	srlx	%o0,16,%o7
486*ebfedea0SLionel Sambuc		faddd	%f36,%f52,%f52
487*ebfedea0SLionel Sambuc		fmuld	%f22,%f8,%f56
488*ebfedea0SLionel Sambuc	add	%o7,%o1,%o1
489*ebfedea0SLionel Sambuc		fmuld	%f18,%f2,%f42
490*ebfedea0SLionel Sambuc	srlx	%o1,16,%o7
491*ebfedea0SLionel Sambuc		faddd	%f38,%f54,%f54
492*ebfedea0SLionel Sambuc		fmuld	%f22,%f10,%f58
493*ebfedea0SLionel Sambuc	add	%o7,%o2,%o2
494*ebfedea0SLionel Sambuc		fmuld	%f18,%f4,%f44
495*ebfedea0SLionel Sambuc	srlx	%o2,16,%o7
496*ebfedea0SLionel Sambuc		faddd	%f40,%f56,%f56
497*ebfedea0SLionel Sambuc		fmuld	%f22,%f12,%f60
498*ebfedea0SLionel Sambuc	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
499*ebfedea0SLionel Sambuc	! why?
500*ebfedea0SLionel Sambuc	and	%o0,%l7,%o0
501*ebfedea0SLionel Sambuc		fmuld	%f18,%f6,%f46
502*ebfedea0SLionel Sambuc	and	%o1,%l7,%o1
503*ebfedea0SLionel Sambuc	and	%o2,%l7,%o2
504*ebfedea0SLionel Sambuc		faddd	%f42,%f58,%f58
505*ebfedea0SLionel Sambuc		fmuld	%f22,%f14,%f62
506*ebfedea0SLionel Sambuc	sllx	%o1,16,%o1
507*ebfedea0SLionel Sambuc		faddd	%f24,%f48,%f48
508*ebfedea0SLionel Sambuc	sllx	%o2,32,%o2
509*ebfedea0SLionel Sambuc		faddd	%f26,%f50,%f50
510*ebfedea0SLionel Sambuc	sllx	%o3,48,%o7
511*ebfedea0SLionel Sambuc	or	%o1,%o0,%o0
512*ebfedea0SLionel Sambuc		faddd	%f44,%f60,%f24	! %f60
513*ebfedea0SLionel Sambuc	or	%o2,%o0,%o0
514*ebfedea0SLionel Sambuc		faddd	%f46,%f62,%f26	! %f62
515*ebfedea0SLionel Sambuc	or	%o7,%o0,%o0		! 64-bit result
516*ebfedea0SLionel Sambuc	ldx	[%l0],%o7
517*ebfedea0SLionel Sambuc		faddd	%f52,%f56,%f52
518*ebfedea0SLionel Sambuc	addcc	%o7,%o0,%o0
519*ebfedea0SLionel Sambuc	! end-of-why?
520*ebfedea0SLionel Sambuc		faddd	%f54,%f58,%f54
521*ebfedea0SLionel Sambuc	srlx	%o3,16,%g1		! 34-bit carry
522*ebfedea0SLionel Sambuc		fdtox	%f48,%f48
523*ebfedea0SLionel Sambuc	bcs,a	%xcc,.+8
524*ebfedea0SLionel Sambuc	add	%g1,1,%g1
525*ebfedea0SLionel Sambuc
526*ebfedea0SLionel Sambuc	fdtox	%f50,%f50
527*ebfedea0SLionel Sambuc	fdtox	%f52,%f52
528*ebfedea0SLionel Sambuc	fdtox	%f54,%f54
529*ebfedea0SLionel Sambuc
530*ebfedea0SLionel Sambuc	std	%f48,[%sp+2047+192+0]
531*ebfedea0SLionel Sambuc	std	%f50,[%sp+2047+192+8]
532*ebfedea0SLionel Sambuc	addcc	%l6,8,%l6
533*ebfedea0SLionel Sambuc	std	%f52,[%sp+2047+192+16]
534*ebfedea0SLionel Sambuc	bz,pn	%icc,.Linnerskip
535*ebfedea0SLionel Sambuc	std	%f54,[%sp+2047+192+24]
536*ebfedea0SLionel Sambuc
537*ebfedea0SLionel Sambuc	ba	.Linner
538*ebfedea0SLionel Sambuc	nop
539*ebfedea0SLionel Sambuc.align	32
540*ebfedea0SLionel Sambuc.Linner:
541*ebfedea0SLionel Sambuc	ldd	[%l1+%l6],%f16		! load a[j] in double format
542*ebfedea0SLionel Sambuc	ldd	[%l2+%l6],%f18
543*ebfedea0SLionel Sambuc	ldd	[%l3+%l6],%f20		! load n[j] in double format
544*ebfedea0SLionel Sambuc	ldd	[%l4+%l6],%f22
545*ebfedea0SLionel Sambuc
546*ebfedea0SLionel Sambuc		fmuld	%f16,%f0,%f32
547*ebfedea0SLionel Sambuc		fmuld	%f20,%f8,%f48
548*ebfedea0SLionel Sambuc		fmuld	%f16,%f2,%f34
549*ebfedea0SLionel Sambuc		fmuld	%f20,%f10,%f50
550*ebfedea0SLionel Sambuc		fmuld	%f16,%f4,%f36
551*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+0],%o0
552*ebfedea0SLionel Sambuc		faddd	%f32,%f48,%f48
553*ebfedea0SLionel Sambuc		fmuld	%f20,%f12,%f52
554*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+8],%o1
555*ebfedea0SLionel Sambuc		fmuld	%f16,%f6,%f38
556*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+16],%o2
557*ebfedea0SLionel Sambuc		faddd	%f34,%f50,%f50
558*ebfedea0SLionel Sambuc		fmuld	%f20,%f14,%f54
559*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+24],%o3
560*ebfedea0SLionel Sambuc		fmuld	%f18,%f0,%f40
561*ebfedea0SLionel Sambuc
562*ebfedea0SLionel Sambuc	srlx	%o0,16,%o7
563*ebfedea0SLionel Sambuc		faddd	%f36,%f52,%f52
564*ebfedea0SLionel Sambuc		fmuld	%f22,%f8,%f56
565*ebfedea0SLionel Sambuc	add	%o7,%o1,%o1
566*ebfedea0SLionel Sambuc		fmuld	%f18,%f2,%f42
567*ebfedea0SLionel Sambuc	srlx	%o1,16,%o7
568*ebfedea0SLionel Sambuc		faddd	%f38,%f54,%f54
569*ebfedea0SLionel Sambuc		fmuld	%f22,%f10,%f58
570*ebfedea0SLionel Sambuc	add	%o7,%o2,%o2
571*ebfedea0SLionel Sambuc		fmuld	%f18,%f4,%f44
572*ebfedea0SLionel Sambuc	srlx	%o2,16,%o7
573*ebfedea0SLionel Sambuc		faddd	%f40,%f56,%f56
574*ebfedea0SLionel Sambuc		fmuld	%f22,%f12,%f60
575*ebfedea0SLionel Sambuc	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
576*ebfedea0SLionel Sambuc	and	%o0,%l7,%o0
577*ebfedea0SLionel Sambuc		fmuld	%f18,%f6,%f46
578*ebfedea0SLionel Sambuc	and	%o1,%l7,%o1
579*ebfedea0SLionel Sambuc	and	%o2,%l7,%o2
580*ebfedea0SLionel Sambuc		faddd	%f42,%f58,%f58
581*ebfedea0SLionel Sambuc		fmuld	%f22,%f14,%f62
582*ebfedea0SLionel Sambuc	sllx	%o1,16,%o1
583*ebfedea0SLionel Sambuc		faddd	%f24,%f48,%f48
584*ebfedea0SLionel Sambuc	sllx	%o2,32,%o2
585*ebfedea0SLionel Sambuc		faddd	%f26,%f50,%f50
586*ebfedea0SLionel Sambuc	sllx	%o3,48,%o7
587*ebfedea0SLionel Sambuc	or	%o1,%o0,%o0
588*ebfedea0SLionel Sambuc		faddd	%f44,%f60,%f24	! %f60
589*ebfedea0SLionel Sambuc	or	%o2,%o0,%o0
590*ebfedea0SLionel Sambuc		faddd	%f46,%f62,%f26	! %f62
591*ebfedea0SLionel Sambuc	or	%o7,%o0,%o0		! 64-bit result
592*ebfedea0SLionel Sambuc		faddd	%f52,%f56,%f52
593*ebfedea0SLionel Sambuc	addcc	%g1,%o0,%o0
594*ebfedea0SLionel Sambuc	ldx	[%l0+8],%o7		! tp[j]
595*ebfedea0SLionel Sambuc		faddd	%f54,%f58,%f54
596*ebfedea0SLionel Sambuc	srlx	%o3,16,%g1		! 34-bit carry
597*ebfedea0SLionel Sambuc		fdtox	%f48,%f48
598*ebfedea0SLionel Sambuc	bcs,a	%xcc,.+8
599*ebfedea0SLionel Sambuc	add	%g1,1,%g1
600*ebfedea0SLionel Sambuc		fdtox	%f50,%f50
601*ebfedea0SLionel Sambuc	addcc	%o7,%o0,%o0
602*ebfedea0SLionel Sambuc		fdtox	%f52,%f52
603*ebfedea0SLionel Sambuc	bcs,a	%xcc,.+8
604*ebfedea0SLionel Sambuc	add	%g1,1,%g1
605*ebfedea0SLionel Sambuc
606*ebfedea0SLionel Sambuc	stx	%o0,[%l0]		! tp[j-1]
607*ebfedea0SLionel Sambuc		fdtox	%f54,%f54
608*ebfedea0SLionel Sambuc
609*ebfedea0SLionel Sambuc	std	%f48,[%sp+2047+192+0]
610*ebfedea0SLionel Sambuc	std	%f50,[%sp+2047+192+8]
611*ebfedea0SLionel Sambuc	std	%f52,[%sp+2047+192+16]
612*ebfedea0SLionel Sambuc	addcc	%l6,8,%l6
613*ebfedea0SLionel Sambuc	std	%f54,[%sp+2047+192+24]
614*ebfedea0SLionel Sambuc	bnz,pt	%icc,.Linner
615*ebfedea0SLionel Sambuc	add	%l0,8,%l0
616*ebfedea0SLionel Sambuc
617*ebfedea0SLionel Sambuc.Linnerskip:
618*ebfedea0SLionel Sambuc	fdtox	%f24,%f24
619*ebfedea0SLionel Sambuc	fdtox	%f26,%f26
620*ebfedea0SLionel Sambuc
621*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+0],%o0
622*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+8],%o1
623*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+16],%o2
624*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+24],%o3
625*ebfedea0SLionel Sambuc
626*ebfedea0SLionel Sambuc	srlx	%o0,16,%o7
627*ebfedea0SLionel Sambuc	std	%f24,[%sp+2047+192+32]
628*ebfedea0SLionel Sambuc	add	%o7,%o1,%o1
629*ebfedea0SLionel Sambuc	std	%f26,[%sp+2047+192+40]
630*ebfedea0SLionel Sambuc	srlx	%o1,16,%o7
631*ebfedea0SLionel Sambuc	add	%o7,%o2,%o2
632*ebfedea0SLionel Sambuc	srlx	%o2,16,%o7
633*ebfedea0SLionel Sambuc	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
634*ebfedea0SLionel Sambuc	and	%o0,%l7,%o0
635*ebfedea0SLionel Sambuc	and	%o1,%l7,%o1
636*ebfedea0SLionel Sambuc	and	%o2,%l7,%o2
637*ebfedea0SLionel Sambuc	sllx	%o1,16,%o1
638*ebfedea0SLionel Sambuc	sllx	%o2,32,%o2
639*ebfedea0SLionel Sambuc	sllx	%o3,48,%o7
640*ebfedea0SLionel Sambuc	or	%o1,%o0,%o0
641*ebfedea0SLionel Sambuc	or	%o2,%o0,%o0
642*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+32],%o4
643*ebfedea0SLionel Sambuc	or	%o7,%o0,%o0		! 64-bit result
644*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+40],%o5
645*ebfedea0SLionel Sambuc	addcc	%g1,%o0,%o0
646*ebfedea0SLionel Sambuc	ldx	[%l0+8],%o7		! tp[j]
647*ebfedea0SLionel Sambuc	srlx	%o3,16,%g1		! 34-bit carry
648*ebfedea0SLionel Sambuc	bcs,a	%xcc,.+8
649*ebfedea0SLionel Sambuc	add	%g1,1,%g1
650*ebfedea0SLionel Sambuc
651*ebfedea0SLionel Sambuc	addcc	%o7,%o0,%o0
652*ebfedea0SLionel Sambuc	bcs,a	%xcc,.+8
653*ebfedea0SLionel Sambuc	add	%g1,1,%g1
654*ebfedea0SLionel Sambuc
655*ebfedea0SLionel Sambuc	stx	%o0,[%l0]		! tp[j-1]
656*ebfedea0SLionel Sambuc	add	%l0,8,%l0
657*ebfedea0SLionel Sambuc
658*ebfedea0SLionel Sambuc	srlx	%o4,16,%o7
659*ebfedea0SLionel Sambuc	add	%o7,%o5,%o5
660*ebfedea0SLionel Sambuc	and	%o4,%l7,%o4
661*ebfedea0SLionel Sambuc	sllx	%o5,16,%o7
662*ebfedea0SLionel Sambuc	or	%o7,%o4,%o4
663*ebfedea0SLionel Sambuc	addcc	%g1,%o4,%o4
664*ebfedea0SLionel Sambuc	srlx	%o5,48,%g1
665*ebfedea0SLionel Sambuc	bcs,a	%xcc,.+8
666*ebfedea0SLionel Sambuc	add	%g1,1,%g1
667*ebfedea0SLionel Sambuc
668*ebfedea0SLionel Sambuc	addcc	%i4,%o4,%o4
669*ebfedea0SLionel Sambuc	stx	%o4,[%l0]		! tp[num-1]
670*ebfedea0SLionel Sambuc	mov	%g1,%i4
671*ebfedea0SLionel Sambuc	bcs,a	%xcc,.+8
672*ebfedea0SLionel Sambuc	add	%i4,1,%i4
673*ebfedea0SLionel Sambuc
674*ebfedea0SLionel Sambuc	addcc	%l5,8,%l5
675*ebfedea0SLionel Sambuc	bnz	%icc,.Louter
676*ebfedea0SLionel Sambuc	nop
677*ebfedea0SLionel Sambuc
678*ebfedea0SLionel Sambuc	add	%l0,8,%l0		! adjust tp to point at the end
679*ebfedea0SLionel Sambuc	orn	%g0,%g0,%g4
680*ebfedea0SLionel Sambuc	sub	%g0,%i5,%o7		! n=-num
681*ebfedea0SLionel Sambuc	ba	.Lsub
682*ebfedea0SLionel Sambuc	subcc	%g0,%g0,%g0		! clear %icc.c
683*ebfedea0SLionel Sambuc
684*ebfedea0SLionel Sambuc.align	32
685*ebfedea0SLionel Sambuc.Lsub:
686*ebfedea0SLionel Sambuc	ldx	[%l0+%o7],%o0
687*ebfedea0SLionel Sambuc	add	%i3,%o7,%g1
688*ebfedea0SLionel Sambuc	ld	[%g1+0],%o2
689*ebfedea0SLionel Sambuc	ld	[%g1+4],%o3
690*ebfedea0SLionel Sambuc	srlx	%o0,32,%o1
691*ebfedea0SLionel Sambuc	subccc	%o0,%o2,%o2
692*ebfedea0SLionel Sambuc	add	%i0,%o7,%g1
693*ebfedea0SLionel Sambuc	subccc	%o1,%o3,%o3
694*ebfedea0SLionel Sambuc	st	%o2,[%g1+0]
695*ebfedea0SLionel Sambuc	add	%o7,8,%o7
696*ebfedea0SLionel Sambuc	brnz,pt	%o7,.Lsub
697*ebfedea0SLionel Sambuc	st	%o3,[%g1+4]
698*ebfedea0SLionel Sambuc	subc	%i4,0,%g4
699*ebfedea0SLionel Sambuc	sub	%g0,%i5,%o7		! n=-num
700*ebfedea0SLionel Sambuc	ba	.Lcopy
701*ebfedea0SLionel Sambuc	nop
702*ebfedea0SLionel Sambuc
703*ebfedea0SLionel Sambuc.align	32
704*ebfedea0SLionel Sambuc.Lcopy:
705*ebfedea0SLionel Sambuc	ldx	[%l0+%o7],%o0
706*ebfedea0SLionel Sambuc	add	%i0,%o7,%g1
707*ebfedea0SLionel Sambuc	ld	[%g1+0],%o2
708*ebfedea0SLionel Sambuc	ld	[%g1+4],%o3
709*ebfedea0SLionel Sambuc	stx	%g0,[%l0+%o7]
710*ebfedea0SLionel Sambuc	and	%o0,%g4,%o0
711*ebfedea0SLionel Sambuc	srlx	%o0,32,%o1
712*ebfedea0SLionel Sambuc	andn	%o2,%g4,%o2
713*ebfedea0SLionel Sambuc	andn	%o3,%g4,%o3
714*ebfedea0SLionel Sambuc	or	%o2,%o0,%o0
715*ebfedea0SLionel Sambuc	or	%o3,%o1,%o1
716*ebfedea0SLionel Sambuc	st	%o0,[%g1+0]
717*ebfedea0SLionel Sambuc	add	%o7,8,%o7
718*ebfedea0SLionel Sambuc	brnz,pt	%o7,.Lcopy
719*ebfedea0SLionel Sambuc	st	%o1,[%g1+4]
720*ebfedea0SLionel Sambuc	sub	%g0,%i5,%o7		! n=-num
721*ebfedea0SLionel Sambuc
722*ebfedea0SLionel Sambuc.Lzap:
723*ebfedea0SLionel Sambuc	stx	%g0,[%l1+%o7]
724*ebfedea0SLionel Sambuc	stx	%g0,[%l2+%o7]
725*ebfedea0SLionel Sambuc	stx	%g0,[%l3+%o7]
726*ebfedea0SLionel Sambuc	stx	%g0,[%l4+%o7]
727*ebfedea0SLionel Sambuc	add	%o7,8,%o7
728*ebfedea0SLionel Sambuc	brnz,pt	%o7,.Lzap
729*ebfedea0SLionel Sambuc	nop
730*ebfedea0SLionel Sambuc
731*ebfedea0SLionel Sambuc	ldx	[%sp+2047+192+48],%o7
732*ebfedea0SLionel Sambuc	wr	%g0,%o7,%asi		! restore %asi
733*ebfedea0SLionel Sambuc
734*ebfedea0SLionel Sambuc	mov	1,%i0
735*ebfedea0SLionel Sambuc.Lret:
736*ebfedea0SLionel Sambuc	ret
737*ebfedea0SLionel Sambuc	restore
738*ebfedea0SLionel Sambuc.type   bn_mul_mont_fpu,#function
739*ebfedea0SLionel Sambuc.size	bn_mul_mont_fpu,(.-bn_mul_mont_fpu)
740*ebfedea0SLionel Sambuc.asciz	"Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro@openssl.org>"
741*ebfedea0SLionel Sambuc.align	32
742