xref: /netbsd-src/crypto/external/bsd/openssl.old/lib/libcrypto/arch/sparc64/aest4-sparcv9.S (revision 5dd36a3bc8bf2a9dec29ceb6349550414570c447)
1.register	%g2,#scratch
2.register	%g3,#scratch
3
4.text
5
6.globl	aes_t4_encrypt
7.align	32
8aes_t4_encrypt:
9	andcc		%o0, 7, %g1		! is input aligned?
10	andn		%o0, 7, %o0
11
12	ldx		[%o2 + 0], %g4
13	ldx		[%o2 + 8], %g5
14
15	ldx		[%o0 + 0], %o4
16	bz,pt		%icc, 1f
17	ldx		[%o0 + 8], %o5
18	ldx		[%o0 + 16], %o0
19	sll		%g1, 3, %g1
20	sub		%g0, %g1, %o3
21	sllx		%o4, %g1, %o4
22	sllx		%o5, %g1, %g1
23	srlx		%o5, %o3, %o5
24	srlx		%o0, %o3, %o3
25	or		%o5, %o4, %o4
26	or		%o3, %g1, %o5
271:
28	ld		[%o2 + 240], %o3
29	ldd		[%o2 + 16], %f12
30	ldd		[%o2 + 24], %f14
31	xor		%g4, %o4, %o4
32	xor		%g5, %o5, %o5
33	.word	0x81b0230c !movxtod	%o4,%f0
34	.word	0x85b0230d !movxtod	%o5,%f2
35	srl		%o3, 1, %o3
36	ldd		[%o2 + 32], %f16
37	sub		%o3, 1, %o3
38	ldd		[%o2 + 40], %f18
39	add		%o2, 48, %o2
40
41.Lenc:
42	.word	0x88cb0400 !aes_eround01	%f12,%f0,%f2,%f4
43	.word	0x84cb8420 !aes_eround23	%f14,%f0,%f2,%f2
44	ldd		[%o2 + 0], %f12
45	ldd		[%o2 + 8], %f14
46	sub		%o3,1,%o3
47	.word	0x80cc0404 !aes_eround01	%f16,%f4,%f2,%f0
48	.word	0x84cc8424 !aes_eround23	%f18,%f4,%f2,%f2
49	ldd		[%o2 + 16], %f16
50	ldd		[%o2 + 24], %f18
51	brnz,pt		%o3, .Lenc
52	add		%o2, 32, %o2
53
54	andcc		%o1, 7, %o4		! is output aligned?
55	.word	0x88cb0400 !aes_eround01	%f12,%f0,%f2,%f4
56	.word	0x84cb8420 !aes_eround23	%f14,%f0,%f2,%f2
57	.word	0x80cc0484 !aes_eround01_l	%f16,%f4,%f2,%f0
58	.word	0x84cc84a4 !aes_eround23_l	%f18,%f4,%f2,%f2
59
60	bnz,pn		%icc, 2f
61	nop
62
63	std		%f0, [%o1 + 0]
64	retl
65	std		%f2, [%o1 + 8]
66
672:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
68	mov		0xff, %o5
69	srl		%o5, %o4, %o5
70
71	.word	0x89b00900 !faligndata	%f0,%f0,%f4
72	.word	0x8db00902 !faligndata	%f0,%f2,%f6
73	.word	0x91b08902 !faligndata	%f2,%f2,%f8
74
75	stda		%f4, [%o1 + %o5]0xc0	! partial store
76	std		%f6, [%o1 + 8]
77	add		%o1, 16, %o1
78	orn		%g0, %o5, %o5
79	retl
80	stda		%f8, [%o1 + %o5]0xc0	! partial store
81.type	aes_t4_encrypt,#function
82.size	aes_t4_encrypt,.-aes_t4_encrypt
83
84.globl	aes_t4_decrypt
85.align	32
86aes_t4_decrypt:
87	andcc		%o0, 7, %g1		! is input aligned?
88	andn		%o0, 7, %o0
89
90	ldx		[%o2 + 0], %g4
91	ldx		[%o2 + 8], %g5
92
93	ldx		[%o0 + 0], %o4
94	bz,pt		%icc, 1f
95	ldx		[%o0 + 8], %o5
96	ldx		[%o0 + 16], %o0
97	sll		%g1, 3, %g1
98	sub		%g0, %g1, %o3
99	sllx		%o4, %g1, %o4
100	sllx		%o5, %g1, %g1
101	srlx		%o5, %o3, %o5
102	srlx		%o0, %o3, %o3
103	or		%o5, %o4, %o4
104	or		%o3, %g1, %o5
1051:
106	ld		[%o2 + 240], %o3
107	ldd		[%o2 + 16], %f12
108	ldd		[%o2 + 24], %f14
109	xor		%g4, %o4, %o4
110	xor		%g5, %o5, %o5
111	.word	0x81b0230c !movxtod	%o4,%f0
112	.word	0x85b0230d !movxtod	%o5,%f2
113	srl		%o3, 1, %o3
114	ldd		[%o2 + 32], %f16
115	sub		%o3, 1, %o3
116	ldd		[%o2 + 40], %f18
117	add		%o2, 48, %o2
118
119.Ldec:
120	.word	0x88cb0440 !aes_dround01	%f12,%f0,%f2,%f4
121	.word	0x84cb8460 !aes_dround23	%f14,%f0,%f2,%f2
122	ldd		[%o2 + 0], %f12
123	ldd		[%o2 + 8], %f14
124	sub		%o3,1,%o3
125	.word	0x80cc0444 !aes_dround01	%f16,%f4,%f2,%f0
126	.word	0x84cc8464 !aes_dround23	%f18,%f4,%f2,%f2
127	ldd		[%o2 + 16], %f16
128	ldd		[%o2 + 24], %f18
129	brnz,pt		%o3, .Ldec
130	add		%o2, 32, %o2
131
132	andcc		%o1, 7, %o4		! is output aligned?
133	.word	0x88cb0440 !aes_dround01	%f12,%f0,%f2,%f4
134	.word	0x84cb8460 !aes_dround23	%f14,%f0,%f2,%f2
135	.word	0x80cc04c4 !aes_dround01_l	%f16,%f4,%f2,%f0
136	.word	0x84cc84e4 !aes_dround23_l	%f18,%f4,%f2,%f2
137
138	bnz,pn		%icc, 2f
139	nop
140
141	std		%f0, [%o1 + 0]
142	retl
143	std		%f2, [%o1 + 8]
144
1452:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
146	mov		0xff, %o5
147	srl		%o5, %o4, %o5
148
149	.word	0x89b00900 !faligndata	%f0,%f0,%f4
150	.word	0x8db00902 !faligndata	%f0,%f2,%f6
151	.word	0x91b08902 !faligndata	%f2,%f2,%f8
152
153	stda		%f4, [%o1 + %o5]0xc0	! partial store
154	std		%f6, [%o1 + 8]
155	add		%o1, 16, %o1
156	orn		%g0, %o5, %o5
157	retl
158	stda		%f8, [%o1 + %o5]0xc0	! partial store
159.type	aes_t4_decrypt,#function
160.size	aes_t4_decrypt,.-aes_t4_decrypt
161.globl	aes_t4_set_encrypt_key
162.align	32
163aes_t4_set_encrypt_key:
164.Lset_encrypt_key:
165	and		%o0, 7, %o3
166	.word	0x91b20300 !alignaddr	%o0,%g0,%o0
167	cmp		%o1, 192
168	ldd		[%o0 + 0], %f0
169	bl,pt		%icc,.L128
170	ldd		[%o0 + 8], %f2
171
172	be,pt		%icc,.L192
173	ldd		[%o0 + 16], %f4
174	brz,pt		%o3, .L256aligned
175	ldd		[%o0 + 24], %f6
176
177	ldd		[%o0 + 32], %f8
178	.word	0x81b00902 !faligndata	%f0,%f2,%f0
179	.word	0x85b08904 !faligndata	%f2,%f4,%f2
180	.word	0x89b10906 !faligndata	%f4,%f6,%f4
181	.word	0x8db18908 !faligndata	%f6,%f8,%f6
182.L256aligned:
183	std		%f0, [%o2 + 0]
184	.word	0x80c80106 !aes_kexpand1	%f0,%f6,0,%f0
185	std		%f2, [%o2 + 8]
186	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
187	std		%f4, [%o2 + 16]
188	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
189	std		%f6, [%o2 + 24]
190	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
191	std		%f0, [%o2 + 32]
192	.word	0x80c80306 !aes_kexpand1	%f0,%f6,1,%f0
193	std		%f2, [%o2 + 40]
194	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
195	std		%f4, [%o2 + 48]
196	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
197	std		%f6, [%o2 + 56]
198	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
199	std		%f0, [%o2 + 64]
200	.word	0x80c80506 !aes_kexpand1	%f0,%f6,2,%f0
201	std		%f2, [%o2 + 72]
202	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
203	std		%f4, [%o2 + 80]
204	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
205	std		%f6, [%o2 + 88]
206	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
207	std		%f0, [%o2 + 96]
208	.word	0x80c80706 !aes_kexpand1	%f0,%f6,3,%f0
209	std		%f2, [%o2 + 104]
210	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
211	std		%f4, [%o2 + 112]
212	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
213	std		%f6, [%o2 + 120]
214	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
215	std		%f0, [%o2 + 128]
216	.word	0x80c80906 !aes_kexpand1	%f0,%f6,4,%f0
217	std		%f2, [%o2 + 136]
218	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
219	std		%f4, [%o2 + 144]
220	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
221	std		%f6, [%o2 + 152]
222	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
223	std		%f0, [%o2 + 160]
224	.word	0x80c80b06 !aes_kexpand1	%f0,%f6,5,%f0
225	std		%f2, [%o2 + 168]
226	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
227	std		%f4, [%o2 + 176]
228	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
229	std		%f6, [%o2 + 184]
230	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
231	std		%f0, [%o2 + 192]
232	.word	0x80c80d06 !aes_kexpand1	%f0,%f6,6,%f0
233	std		%f2, [%o2 + 200]
234	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
235	std		%f4, [%o2 + 208]
236	std		%f6, [%o2 + 216]
237	std		%f0, [%o2 + 224]
238	std		%f2, [%o2 + 232]
239
240	mov		14, %o3
241	st		%o3, [%o2 + 240]
242	retl
243	xor		%o0, %o0, %o0
244
245.align	16
246.L192:
247	brz,pt		%o3, .L192aligned
248	nop
249
250	ldd		[%o0 + 24], %f6
251	.word	0x81b00902 !faligndata	%f0,%f2,%f0
252	.word	0x85b08904 !faligndata	%f2,%f4,%f2
253	.word	0x89b10906 !faligndata	%f4,%f6,%f4
254.L192aligned:
255	std		%f0, [%o2 + 0]
256	.word	0x80c80104 !aes_kexpand1	%f0,%f4,0,%f0
257	std		%f2, [%o2 + 8]
258	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
259	std		%f4, [%o2 + 16]
260	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
261	std		%f0, [%o2 + 24]
262	.word	0x80c80304 !aes_kexpand1	%f0,%f4,1,%f0
263	std		%f2, [%o2 + 32]
264	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
265	std		%f4, [%o2 + 40]
266	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
267	std		%f0, [%o2 + 48]
268	.word	0x80c80504 !aes_kexpand1	%f0,%f4,2,%f0
269	std		%f2, [%o2 + 56]
270	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
271	std		%f4, [%o2 + 64]
272	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
273	std		%f0, [%o2 + 72]
274	.word	0x80c80704 !aes_kexpand1	%f0,%f4,3,%f0
275	std		%f2, [%o2 + 80]
276	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
277	std		%f4, [%o2 + 88]
278	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
279	std		%f0, [%o2 + 96]
280	.word	0x80c80904 !aes_kexpand1	%f0,%f4,4,%f0
281	std		%f2, [%o2 + 104]
282	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
283	std		%f4, [%o2 + 112]
284	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
285	std		%f0, [%o2 + 120]
286	.word	0x80c80b04 !aes_kexpand1	%f0,%f4,5,%f0
287	std		%f2, [%o2 + 128]
288	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
289	std		%f4, [%o2 + 136]
290	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
291	std		%f0, [%o2 + 144]
292	.word	0x80c80d04 !aes_kexpand1	%f0,%f4,6,%f0
293	std		%f2, [%o2 + 152]
294	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
295	std		%f4, [%o2 + 160]
296	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
297	std		%f0, [%o2 + 168]
298	.word	0x80c80f04 !aes_kexpand1	%f0,%f4,7,%f0
299	std		%f2, [%o2 + 176]
300	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
301	std		%f4, [%o2 + 184]
302	std		%f0, [%o2 + 192]
303	std		%f2, [%o2 + 200]
304
305	mov		12, %o3
306	st		%o3, [%o2 + 240]
307	retl
308	xor		%o0, %o0, %o0
309
310.align	16
311.L128:
312	brz,pt		%o3, .L128aligned
313	nop
314
315	ldd		[%o0 + 16], %f4
316	.word	0x81b00902 !faligndata	%f0,%f2,%f0
317	.word	0x85b08904 !faligndata	%f2,%f4,%f2
318.L128aligned:
319	std		%f0, [%o2 + 0]
320	.word	0x80c80102 !aes_kexpand1	%f0,%f2,0,%f0
321	std		%f2, [%o2 + 8]
322	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
323	std		%f0, [%o2 + 16]
324	.word	0x80c80302 !aes_kexpand1	%f0,%f2,1,%f0
325	std		%f2, [%o2 + 24]
326	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
327	std		%f0, [%o2 + 32]
328	.word	0x80c80502 !aes_kexpand1	%f0,%f2,2,%f0
329	std		%f2, [%o2 + 40]
330	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
331	std		%f0, [%o2 + 48]
332	.word	0x80c80702 !aes_kexpand1	%f0,%f2,3,%f0
333	std		%f2, [%o2 + 56]
334	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
335	std		%f0, [%o2 + 64]
336	.word	0x80c80902 !aes_kexpand1	%f0,%f2,4,%f0
337	std		%f2, [%o2 + 72]
338	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
339	std		%f0, [%o2 + 80]
340	.word	0x80c80b02 !aes_kexpand1	%f0,%f2,5,%f0
341	std		%f2, [%o2 + 88]
342	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
343	std		%f0, [%o2 + 96]
344	.word	0x80c80d02 !aes_kexpand1	%f0,%f2,6,%f0
345	std		%f2, [%o2 + 104]
346	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
347	std		%f0, [%o2 + 112]
348	.word	0x80c80f02 !aes_kexpand1	%f0,%f2,7,%f0
349	std		%f2, [%o2 + 120]
350	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
351	std		%f0, [%o2 + 128]
352	.word	0x80c81102 !aes_kexpand1	%f0,%f2,8,%f0
353	std		%f2, [%o2 + 136]
354	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
355	std		%f0, [%o2 + 144]
356	.word	0x80c81302 !aes_kexpand1	%f0,%f2,9,%f0
357	std		%f2, [%o2 + 152]
358	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
359	std		%f0, [%o2 + 160]
360	std		%f2, [%o2 + 168]
361
362	mov		10, %o3
363	st		%o3, [%o2 + 240]
364	retl
365	xor		%o0, %o0, %o0
366.type	aes_t4_set_encrypt_key,#function
367.size	aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
368
369.globl	aes_t4_set_decrypt_key
370.align	32
371aes_t4_set_decrypt_key:
372	mov		%o7, %o5
373	call		.Lset_encrypt_key
374	nop
375
376	mov		%o5, %o7
377	sll		%o3, 4, %o0		! %o3 is number of rounds
378	add		%o3, 2, %o3
379	add		%o2, %o0, %o0	! %o0=%o2+16*rounds
380	srl		%o3, 2, %o3		! %o3=(rounds+2)/4
381
382.Lkey_flip:
383	ldd		[%o2 + 0],  %f0
384	ldd		[%o2 + 8],  %f2
385	ldd		[%o2 + 16], %f4
386	ldd		[%o2 + 24], %f6
387	ldd		[%o0 + 0],  %f8
388	ldd		[%o0 + 8],  %f10
389	ldd		[%o0 - 16], %f12
390	ldd		[%o0 - 8],  %f14
391	sub		%o3, 1, %o3
392	std		%f0, [%o0 + 0]
393	std		%f2, [%o0 + 8]
394	std		%f4, [%o0 - 16]
395	std		%f6, [%o0 - 8]
396	std		%f8, [%o2 + 0]
397	std		%f10, [%o2 + 8]
398	std		%f12, [%o2 + 16]
399	std		%f14, [%o2 + 24]
400	add		%o2, 32, %o2
401	brnz		%o3, .Lkey_flip
402	sub		%o0, 32, %o0
403
404	retl
405	xor		%o0, %o0, %o0
406.type	aes_t4_set_decrypt_key,#function
407.size	aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
408.align	32
409_aes128_encrypt_1x:
410	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
411	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
412	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
413	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
414	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
415	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
416	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
417	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
418	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
419	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
420	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
421	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
422	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
423	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
424	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
425	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
426	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
427	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
428	.word	0x80cd4484 !aes_eround01_l	%f52,%f4,%f2,%f0
429	retl
430	.word	0x84cdc4a4 !aes_eround23_l	%f54,%f4,%f2,%f2
431.type	_aes128_encrypt_1x,#function
432.size	_aes128_encrypt_1x,.-_aes128_encrypt_1x
433
434.align	32
435_aes128_encrypt_2x:
436	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
437	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
438	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
439	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
440	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
441	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
442	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
443	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
444	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
445	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
446	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
447	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
448	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
449	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
450	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
451	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
452	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
453	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
454	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
455	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
456	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
457	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
458	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
459	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
460	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
461	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
462	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
463	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
464	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
465	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
466	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
467	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
468	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
469	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
470	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
471	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
472	.word	0x80cd4488 !aes_eround01_l	%f52,%f8,%f2,%f0
473	.word	0x84cdc4a8 !aes_eround23_l	%f54,%f8,%f2,%f2
474	.word	0x88cd4c8a !aes_eround01_l	%f52,%f10,%f6,%f4
475	retl
476	.word	0x8ccdccaa !aes_eround23_l	%f54,%f10,%f6,%f6
477.type	_aes128_encrypt_2x,#function
478.size	_aes128_encrypt_2x,.-_aes128_encrypt_2x
479
480.align	32
481_aes128_loadkey:
482	ldx		[%i3 + 0], %g4
483	ldx		[%i3 + 8], %g5
484	ldd		[%i3 + 16], %f16
485	ldd		[%i3 + 24], %f18
486	ldd		[%i3 + 32], %f20
487	ldd		[%i3 + 40], %f22
488	ldd		[%i3 + 48], %f24
489	ldd		[%i3 + 56], %f26
490	ldd		[%i3 + 64], %f28
491	ldd		[%i3 + 72], %f30
492	ldd		[%i3 + 80], %f32
493	ldd		[%i3 + 88], %f34
494	ldd		[%i3 + 96], %f36
495	ldd		[%i3 + 104], %f38
496	ldd		[%i3 + 112], %f40
497	ldd		[%i3 + 120], %f42
498	ldd		[%i3 + 128], %f44
499	ldd		[%i3 + 136], %f46
500	ldd		[%i3 + 144], %f48
501	ldd		[%i3 + 152], %f50
502	ldd		[%i3 + 160], %f52
503	ldd		[%i3 + 168], %f54
504	retl
505	nop
506.type	_aes128_loadkey,#function
507.size	_aes128_loadkey,.-_aes128_loadkey
508_aes128_load_enckey=_aes128_loadkey
509_aes128_load_deckey=_aes128_loadkey
510
511.globl	aes128_t4_cbc_encrypt
512.align	32
513aes128_t4_cbc_encrypt:
514	save		%sp, -192, %sp
515	cmp		%i2, 0
516	be,pn		%xcc, .L128_cbc_enc_abort
517	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
518	sub		%i0, %i1, %l5	! %i0!=%i1
519	ld		[%i4 + 0], %f0
520	ld		[%i4 + 4], %f1
521	ld		[%i4 + 8], %f2
522	ld		[%i4 + 12], %f3
523	prefetch	[%i0], 20
524	prefetch	[%i0 + 63], 20
525	call		_aes128_load_enckey
526	and		%i0, 7, %l0
527	andn		%i0, 7, %i0
528	sll		%l0, 3, %l0
529	mov		64, %l1
530	mov		0xff, %l3
531	sub		%l1, %l0, %l1
532	and		%i1, 7, %l2
533	cmp		%i2, 127
534	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
535	movleu		%xcc, 0, %l5	!	%i2<128 ||
536	brnz,pn		%l5, .L128cbc_enc_blk	!	%i0==%i1)
537	srl		%l3, %l2, %l3
538
539	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
540	srlx		%i2, 4, %i2
541	prefetch	[%i1], 22
542
543.L128_cbc_enc_loop:
544	ldx		[%i0 + 0], %o0
545	brz,pt		%l0, 4f
546	ldx		[%i0 + 8], %o1
547
548	ldx		[%i0 + 16], %o2
549	sllx		%o0, %l0, %o0
550	srlx		%o1, %l1, %g1
551	sllx		%o1, %l0, %o1
552	or		%g1, %o0, %o0
553	srlx		%o2, %l1, %o2
554	or		%o2, %o1, %o1
5554:
556	xor		%g4, %o0, %o0		! ^= rk[0]
557	xor		%g5, %o1, %o1
558	.word	0x99b02308 !movxtod	%o0,%f12
559	.word	0x9db02309 !movxtod	%o1,%f14
560
561	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
562	.word	0x85b38d82 !fxor	%f14,%f2,%f2
563	prefetch	[%i1 + 63], 22
564	prefetch	[%i0 + 16+63], 20
565	call		_aes128_encrypt_1x
566	add		%i0, 16, %i0
567
568	brnz,pn		%l2, 2f
569	sub		%i2, 1, %i2
570
571	std		%f0, [%i1 + 0]
572	std		%f2, [%i1 + 8]
573	brnz,pt		%i2, .L128_cbc_enc_loop
574	add		%i1, 16, %i1
575	st		%f0, [%i4 + 0]
576	st		%f1, [%i4 + 4]
577	st		%f2, [%i4 + 8]
578	st		%f3, [%i4 + 12]
579.L128_cbc_enc_abort:
580	ret
581	restore
582
583.align	16
5842:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
585						! and ~3x deterioration
586						! in inp==out case
587	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
588	.word	0x8db00902 !faligndata	%f0,%f2,%f6
589	.word	0x91b08902 !faligndata	%f2,%f2,%f8
590
591	stda		%f4, [%i1 + %l3]0xc0	! partial store
592	std		%f6, [%i1 + 8]
593	add		%i1, 16, %i1
594	orn		%g0, %l3, %l3
595	stda		%f8, [%i1 + %l3]0xc0	! partial store
596
597	brnz,pt		%i2, .L128_cbc_enc_loop+4
598	orn		%g0, %l3, %l3
599	st		%f0, [%i4 + 0]
600	st		%f1, [%i4 + 4]
601	st		%f2, [%i4 + 8]
602	st		%f3, [%i4 + 12]
603	ret
604	restore
605
606!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
607.align	32
608.L128cbc_enc_blk:
609	add	%i1, %i2, %l5
610	and	%l5, 63, %l5	! tail
611	sub	%i2, %l5, %i2
612	add	%l5, 15, %l5	! round up to 16n
613	srlx	%i2, 4, %i2
614	srl	%l5, 4, %l5
615
616.L128_cbc_enc_blk_loop:
617	ldx		[%i0 + 0], %o0
618	brz,pt		%l0, 5f
619	ldx		[%i0 + 8], %o1
620
621	ldx		[%i0 + 16], %o2
622	sllx		%o0, %l0, %o0
623	srlx		%o1, %l1, %g1
624	sllx		%o1, %l0, %o1
625	or		%g1, %o0, %o0
626	srlx		%o2, %l1, %o2
627	or		%o2, %o1, %o1
6285:
629	xor		%g4, %o0, %o0		! ^= rk[0]
630	xor		%g5, %o1, %o1
631	.word	0x99b02308 !movxtod	%o0,%f12
632	.word	0x9db02309 !movxtod	%o1,%f14
633
634	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
635	.word	0x85b38d82 !fxor	%f14,%f2,%f2
636	prefetch	[%i0 + 16+63], 20
637	call		_aes128_encrypt_1x
638	add		%i0, 16, %i0
639	sub		%i2, 1, %i2
640
641	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
642	add		%i1, 8, %i1
643	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
644	brnz,pt		%i2, .L128_cbc_enc_blk_loop
645	add		%i1, 8, %i1
646
647	membar		#StoreLoad|#StoreStore
648	brnz,pt		%l5, .L128_cbc_enc_loop
649	mov		%l5, %i2
650	st		%f0, [%i4 + 0]
651	st		%f1, [%i4 + 4]
652	st		%f2, [%i4 + 8]
653	st		%f3, [%i4 + 12]
654	ret
655	restore
656.type	aes128_t4_cbc_encrypt,#function
657.size	aes128_t4_cbc_encrypt,.-aes128_t4_cbc_encrypt
658.globl	aes128_t4_ctr32_encrypt
659.align	32
660aes128_t4_ctr32_encrypt:
661	save		%sp, -192, %sp
662	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
663
664	prefetch	[%i0], 20
665	prefetch	[%i0 + 63], 20
666	call		_aes128_load_enckey
667	sllx		%i2, 4, %i2
668
669	ld		[%i4 + 0], %l4	! counter
670	ld		[%i4 + 4], %l5
671	ld		[%i4 + 8], %l6
672	ld		[%i4 + 12], %l7
673
674	sllx		%l4, 32, %o5
675	or		%l5, %o5, %o5
676	sllx		%l6, 32, %g1
677	xor		%o5, %g4, %g4		! ^= rk[0]
678	xor		%g1, %g5, %g5
679	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
680
681	sub		%i0, %i1, %l5	! %i0!=%i1
682	and		%i0, 7, %l0
683	andn		%i0, 7, %i0
684	sll		%l0, 3, %l0
685	mov		64, %l1
686	mov		0xff, %l3
687	sub		%l1, %l0, %l1
688	and		%i1, 7, %l2
689	cmp		%i2, 255
690	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
691	movleu		%xcc, 0, %l5	!	%i2<256 ||
692	brnz,pn		%l5, .L128_ctr32_blk	!	%i0==%i1)
693	srl		%l3, %l2, %l3
694
695	andcc		%i2, 16, %g0		! is number of blocks even?
696	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
697	bz		%icc, .L128_ctr32_loop2x
698	srlx		%i2, 4, %i2
699.L128_ctr32_loop:
700	ldx		[%i0 + 0], %o0
701	brz,pt		%l0, 4f
702	ldx		[%i0 + 8], %o1
703
704	ldx		[%i0 + 16], %o2
705	sllx		%o0, %l0, %o0
706	srlx		%o1, %l1, %g1
707	sllx		%o1, %l0, %o1
708	or		%g1, %o0, %o0
709	srlx		%o2, %l1, %o2
710	or		%o2, %o1, %o1
7114:
712	xor		%g5, %l7, %g1		! ^= rk[0]
713	add		%l7, 1, %l7
714	.word	0x85b02301 !movxtod	%g1,%f2
715	srl		%l7, 0, %l7		! clruw
716	prefetch	[%i1 + 63], 22
717	prefetch	[%i0 + 16+63], 20
718	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
719	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
720	call		_aes128_encrypt_1x+8
721	add		%i0, 16, %i0
722
723	.word	0x95b02308 !movxtod	%o0,%f10
724	.word	0x99b02309 !movxtod	%o1,%f12
725	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
726	.word	0x85b30d82 !fxor	%f12,%f2,%f2
727
728	brnz,pn		%l2, 2f
729	sub		%i2, 1, %i2
730
731	std		%f0, [%i1 + 0]
732	std		%f2, [%i1 + 8]
733	brnz,pt		%i2, .L128_ctr32_loop2x
734	add		%i1, 16, %i1
735
736	ret
737	restore
738
739.align	16
7402:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
741						! and ~3x deterioration
742						! in inp==out case
743	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
744	.word	0x8db00902 !faligndata	%f0,%f2,%f6
745	.word	0x91b08902 !faligndata	%f2,%f2,%f8
746	stda		%f4, [%i1 + %l3]0xc0	! partial store
747	std		%f6, [%i1 + 8]
748	add		%i1, 16, %i1
749	orn		%g0, %l3, %l3
750	stda		%f8, [%i1 + %l3]0xc0	! partial store
751
752	brnz,pt		%i2, .L128_ctr32_loop2x+4
753	orn		%g0, %l3, %l3
754
755	ret
756	restore
757
758!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
759.align	32
760.L128_ctr32_loop2x:
761	ldx		[%i0 + 0], %o0
762	ldx		[%i0 + 8], %o1
763	ldx		[%i0 + 16], %o2
764	brz,pt		%l0, 4f
765	ldx		[%i0 + 24], %o3
766
767	ldx		[%i0 + 32], %o4
768	sllx		%o0, %l0, %o0
769	srlx		%o1, %l1, %g1
770	or		%g1, %o0, %o0
771	sllx		%o1, %l0, %o1
772	srlx		%o2, %l1, %g1
773	or		%g1, %o1, %o1
774	sllx		%o2, %l0, %o2
775	srlx		%o3, %l1, %g1
776	or		%g1, %o2, %o2
777	sllx		%o3, %l0, %o3
778	srlx		%o4, %l1, %o4
779	or		%o4, %o3, %o3
7804:
781	xor		%g5, %l7, %g1		! ^= rk[0]
782	add		%l7, 1, %l7
783	.word	0x85b02301 !movxtod	%g1,%f2
784	srl		%l7, 0, %l7		! clruw
785	xor		%g5, %l7, %g1
786	add		%l7, 1, %l7
787	.word	0x8db02301 !movxtod	%g1,%f6
788	srl		%l7, 0, %l7		! clruw
789	prefetch	[%i1 + 63], 22
790	prefetch	[%i0 + 32+63], 20
791	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
792	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
793	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
794	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
795	call		_aes128_encrypt_2x+16
796	add		%i0, 32, %i0
797
798	.word	0x91b02308 !movxtod	%o0,%f8
799	.word	0x95b02309 !movxtod	%o1,%f10
800	.word	0x99b0230a !movxtod	%o2,%f12
801	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
802	.word	0x91b0230b !movxtod	%o3,%f8
803	.word	0x85b28d82 !fxor	%f10,%f2,%f2
804	.word	0x89b30d84 !fxor	%f12,%f4,%f4
805	.word	0x8db20d86 !fxor	%f8,%f6,%f6
806
807	brnz,pn		%l2, 2f
808	sub		%i2, 2, %i2
809
810	std		%f0, [%i1 + 0]
811	std		%f2, [%i1 + 8]
812	std		%f4, [%i1 + 16]
813	std		%f6, [%i1 + 24]
814	brnz,pt		%i2, .L128_ctr32_loop2x
815	add		%i1, 32, %i1
816
817	ret
818	restore
819
820.align	16
8212:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
822						! and ~3x deterioration
823						! in inp==out case
824	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
825	.word	0x81b00902 !faligndata	%f0,%f2,%f0
826	.word	0x85b08904 !faligndata	%f2,%f4,%f2
827	.word	0x89b10906 !faligndata	%f4,%f6,%f4
828	.word	0x8db18906 !faligndata	%f6,%f6,%f6
829
830	stda		%f8, [%i1 + %l3]0xc0	! partial store
831	std		%f0, [%i1 + 8]
832	std		%f2, [%i1 + 16]
833	std		%f4, [%i1 + 24]
834	add		%i1, 32, %i1
835	orn		%g0, %l3, %l3
836	stda		%f6, [%i1 + %l3]0xc0	! partial store
837
838	brnz,pt		%i2, .L128_ctr32_loop2x+4
839	orn		%g0, %l3, %l3
840
841	ret
842	restore
843
844!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
845.align	32
846.L128_ctr32_blk:
847	add	%i1, %i2, %l5
848	and	%l5, 63, %l5	! tail
849	sub	%i2, %l5, %i2
850	add	%l5, 15, %l5	! round up to 16n
851	srlx	%i2, 4, %i2
852	srl	%l5, 4, %l5
853	sub	%i2, 1, %i2
854	add	%l5, 1, %l5
855
856.L128_ctr32_blk_loop2x:
857	ldx		[%i0 + 0], %o0
858	ldx		[%i0 + 8], %o1
859	ldx		[%i0 + 16], %o2
860	brz,pt		%l0, 5f
861	ldx		[%i0 + 24], %o3
862
863	ldx		[%i0 + 32], %o4
864	sllx		%o0, %l0, %o0
865	srlx		%o1, %l1, %g1
866	or		%g1, %o0, %o0
867	sllx		%o1, %l0, %o1
868	srlx		%o2, %l1, %g1
869	or		%g1, %o1, %o1
870	sllx		%o2, %l0, %o2
871	srlx		%o3, %l1, %g1
872	or		%g1, %o2, %o2
873	sllx		%o3, %l0, %o3
874	srlx		%o4, %l1, %o4
875	or		%o4, %o3, %o3
8765:
877	xor		%g5, %l7, %g1		! ^= rk[0]
878	add		%l7, 1, %l7
879	.word	0x85b02301 !movxtod	%g1,%f2
880	srl		%l7, 0, %l7		! clruw
881	xor		%g5, %l7, %g1
882	add		%l7, 1, %l7
883	.word	0x8db02301 !movxtod	%g1,%f6
884	srl		%l7, 0, %l7		! clruw
885	prefetch	[%i0 + 32+63], 20
886	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
887	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
888	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
889	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
890	call		_aes128_encrypt_2x+16
891	add		%i0, 32, %i0
892	subcc		%i2, 2, %i2
893
894	.word	0x91b02308 !movxtod	%o0,%f8
895	.word	0x95b02309 !movxtod	%o1,%f10
896	.word	0x99b0230a !movxtod	%o2,%f12
897	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
898	.word	0x91b0230b !movxtod	%o3,%f8
899	.word	0x85b28d82 !fxor	%f10,%f2,%f2
900	.word	0x89b30d84 !fxor	%f12,%f4,%f4
901	.word	0x8db20d86 !fxor	%f8,%f6,%f6
902
903	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
904	add		%i1, 8, %i1
905	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
906	add		%i1, 8, %i1
907	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
908	add		%i1, 8, %i1
909	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
910	bgu,pt		%xcc, .L128_ctr32_blk_loop2x
911	add		%i1, 8, %i1
912
913	add		%l5, %i2, %i2
914	andcc		%i2, 1, %g0		! is number of blocks even?
915	membar		#StoreLoad|#StoreStore
916	bnz,pt		%icc, .L128_ctr32_loop
917	srl		%i2, 0, %i2
918	brnz,pn		%i2, .L128_ctr32_loop2x
919	nop
920
921	ret
922	restore
923.type	aes128_t4_ctr32_encrypt,#function
924.size	aes128_t4_ctr32_encrypt,.-aes128_t4_ctr32_encrypt
925.globl	aes128_t4_xts_encrypt
926.align	32
927aes128_t4_xts_encrypt:
928	save		%sp, -192-16, %sp
929	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
930
931	mov		%i5, %o0
932	add		%fp, 2047-16, %o1
933	call		aes_t4_encrypt
934	mov		%i4, %o2
935
936	add		%fp, 2047-16, %l7
937	ldxa		[%l7]0x88, %g2
938	add		%fp, 2047-8, %l7
939	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
940
941	sethi		%hi(0x76543210), %l7
942	or		%l7, %lo(0x76543210), %l7
943	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
944
945	prefetch	[%i0], 20
946	prefetch	[%i0 + 63], 20
947	call		_aes128_load_enckey
948	and		%i2, 15,  %i5
949	and		%i2, -16, %i2
950
951	sub		%i0, %i1, %l5	! %i0!=%i1
952	and		%i0, 7, %l0
953	andn		%i0, 7, %i0
954	sll		%l0, 3, %l0
955	mov		64, %l1
956	mov		0xff, %l3
957	sub		%l1, %l0, %l1
958	and		%i1, 7, %l2
959	cmp		%i2, 255
960	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
961	movleu		%xcc, 0, %l5	!	%i2<256 ||
962	brnz,pn		%l5, .L128_xts_enblk !	%i0==%i1)
963	srl		%l3, %l2, %l3
964
965	andcc		%i2, 16, %g0		! is number of blocks even?
966	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
967	bz		%icc, .L128_xts_enloop2x
968	srlx		%i2, 4, %i2
969.L128_xts_enloop:
970	ldx		[%i0 + 0], %o0
971	brz,pt		%l0, 4f
972	ldx		[%i0 + 8], %o1
973
974	ldx		[%i0 + 16], %o2
975	sllx		%o0, %l0, %o0
976	srlx		%o1, %l1, %g1
977	sllx		%o1, %l0, %o1
978	or		%g1, %o0, %o0
979	srlx		%o2, %l1, %o2
980	or		%o2, %o1, %o1
9814:
982	.word	0x99b02302 !movxtod	%g2,%f12
983	.word	0x9db02303 !movxtod	%g3,%f14
984	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
985	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
986
987	xor		%g4, %o0, %o0		! ^= rk[0]
988	xor		%g5, %o1, %o1
989	.word	0x81b02308 !movxtod	%o0,%f0
990	.word	0x85b02309 !movxtod	%o1,%f2
991
992	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
993	.word	0x85b38d82 !fxor	%f14,%f2,%f2
994
995	prefetch	[%i1 + 63], 22
996	prefetch	[%i0 + 16+63], 20
997	call		_aes128_encrypt_1x
998	add		%i0, 16, %i0
999
1000	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1001	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1002
1003	srax		%g3, 63, %l7		! next tweak value
1004	addcc		%g2, %g2, %g2
1005	and		%l7, 0x87, %l7
1006	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1007	xor		%l7, %g2, %g2
1008
1009	brnz,pn		%l2, 2f
1010	sub		%i2, 1, %i2
1011
1012	std		%f0, [%i1 + 0]
1013	std		%f2, [%i1 + 8]
1014	brnz,pt		%i2, .L128_xts_enloop2x
1015	add		%i1, 16, %i1
1016
1017	brnz,pn		%i5, .L128_xts_ensteal
1018	nop
1019
1020	ret
1021	restore
1022
1023.align	16
10242:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1025						! and ~3x deterioration
1026						! in inp==out case
1027	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1028	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1029	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1030	stda		%f4, [%i1 + %l3]0xc0	! partial store
1031	std		%f6, [%i1 + 8]
1032	add		%i1, 16, %i1
1033	orn		%g0, %l3, %l3
1034	stda		%f8, [%i1 + %l3]0xc0	! partial store
1035
1036	brnz,pt		%i2, .L128_xts_enloop2x+4
1037	orn		%g0, %l3, %l3
1038
1039	brnz,pn		%i5, .L128_xts_ensteal
1040	nop
1041
1042	ret
1043	restore
1044
1045!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1046.align	32
1047.L128_xts_enloop2x:
1048	ldx		[%i0 + 0], %o0
1049	ldx		[%i0 + 8], %o1
1050	ldx		[%i0 + 16], %o2
1051	brz,pt		%l0, 4f
1052	ldx		[%i0 + 24], %o3
1053
1054	ldx		[%i0 + 32], %o4
1055	sllx		%o0, %l0, %o0
1056	srlx		%o1, %l1, %g1
1057	or		%g1, %o0, %o0
1058	sllx		%o1, %l0, %o1
1059	srlx		%o2, %l1, %g1
1060	or		%g1, %o1, %o1
1061	sllx		%o2, %l0, %o2
1062	srlx		%o3, %l1, %g1
1063	or		%g1, %o2, %o2
1064	sllx		%o3, %l0, %o3
1065	srlx		%o4, %l1, %o4
1066	or		%o4, %o3, %o3
10674:
1068	.word	0x99b02302 !movxtod	%g2,%f12
1069	.word	0x9db02303 !movxtod	%g3,%f14
1070	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1071	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1072
1073	srax		%g3, 63, %l7		! next tweak value
1074	addcc		%g2, %g2, %g2
1075	and		%l7, 0x87, %l7
1076	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1077	xor		%l7, %g2, %g2
1078
1079	.word	0x91b02302 !movxtod	%g2,%f8
1080	.word	0x95b02303 !movxtod	%g3,%f10
1081	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1082	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1083
1084	xor		%g4, %o0, %o0		! ^= rk[0]
1085	xor		%g5, %o1, %o1
1086	xor		%g4, %o2, %o2		! ^= rk[0]
1087	xor		%g5, %o3, %o3
1088	.word	0x81b02308 !movxtod	%o0,%f0
1089	.word	0x85b02309 !movxtod	%o1,%f2
1090	.word	0x89b0230a !movxtod	%o2,%f4
1091	.word	0x8db0230b !movxtod	%o3,%f6
1092
1093	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1094	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1095	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1096	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1097
1098	prefetch	[%i1 + 63], 22
1099	prefetch	[%i0 + 32+63], 20
1100	call		_aes128_encrypt_2x
1101	add		%i0, 32, %i0
1102
1103	.word	0x91b02302 !movxtod	%g2,%f8
1104	.word	0x95b02303 !movxtod	%g3,%f10
1105
1106	srax		%g3, 63, %l7		! next tweak value
1107	addcc		%g2, %g2, %g2
1108	and		%l7, 0x87, %l7
1109	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1110	xor		%l7, %g2, %g2
1111
1112	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1113	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1114
1115	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1116	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1117	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1118	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1119
1120	brnz,pn		%l2, 2f
1121	sub		%i2, 2, %i2
1122
1123	std		%f0, [%i1 + 0]
1124	std		%f2, [%i1 + 8]
1125	std		%f4, [%i1 + 16]
1126	std		%f6, [%i1 + 24]
1127	brnz,pt		%i2, .L128_xts_enloop2x
1128	add		%i1, 32, %i1
1129
1130	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1131	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1132	brnz,pn		%i5, .L128_xts_ensteal
1133	nop
1134
1135	ret
1136	restore
1137
1138.align	16
11392:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1140						! and ~3x deterioration
1141						! in inp==out case
1142	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1143	.word	0x95b00902 !faligndata	%f0,%f2,%f10
1144	.word	0x99b08904 !faligndata	%f2,%f4,%f12
1145	.word	0x9db10906 !faligndata	%f4,%f6,%f14
1146	.word	0x81b18906 !faligndata	%f6,%f6,%f0
1147
1148	stda		%f8, [%i1 + %l3]0xc0	! partial store
1149	std		%f10, [%i1 + 8]
1150	std		%f12, [%i1 + 16]
1151	std		%f14, [%i1 + 24]
1152	add		%i1, 32, %i1
1153	orn		%g0, %l3, %l3
1154	stda		%f0, [%i1 + %l3]0xc0	! partial store
1155
1156	brnz,pt		%i2, .L128_xts_enloop2x+4
1157	orn		%g0, %l3, %l3
1158
1159	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1160	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1161	brnz,pn		%i5, .L128_xts_ensteal
1162	nop
1163
1164	ret
1165	restore
1166
1167!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1168.align	32
1169.L128_xts_enblk:
1170	add	%i1, %i2, %l5
1171	and	%l5, 63, %l5	! tail
1172	sub	%i2, %l5, %i2
1173	add	%l5, 15, %l5	! round up to 16n
1174	srlx	%i2, 4, %i2
1175	srl	%l5, 4, %l5
1176	sub	%i2, 1, %i2
1177	add	%l5, 1, %l5
1178
1179.L128_xts_enblk2x:
1180	ldx		[%i0 + 0], %o0
1181	ldx		[%i0 + 8], %o1
1182	ldx		[%i0 + 16], %o2
1183	brz,pt		%l0, 5f
1184	ldx		[%i0 + 24], %o3
1185
1186	ldx		[%i0 + 32], %o4
1187	sllx		%o0, %l0, %o0
1188	srlx		%o1, %l1, %g1
1189	or		%g1, %o0, %o0
1190	sllx		%o1, %l0, %o1
1191	srlx		%o2, %l1, %g1
1192	or		%g1, %o1, %o1
1193	sllx		%o2, %l0, %o2
1194	srlx		%o3, %l1, %g1
1195	or		%g1, %o2, %o2
1196	sllx		%o3, %l0, %o3
1197	srlx		%o4, %l1, %o4
1198	or		%o4, %o3, %o3
11995:
1200	.word	0x99b02302 !movxtod	%g2,%f12
1201	.word	0x9db02303 !movxtod	%g3,%f14
1202	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1203	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1204
1205	srax		%g3, 63, %l7		! next tweak value
1206	addcc		%g2, %g2, %g2
1207	and		%l7, 0x87, %l7
1208	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1209	xor		%l7, %g2, %g2
1210
1211	.word	0x91b02302 !movxtod	%g2,%f8
1212	.word	0x95b02303 !movxtod	%g3,%f10
1213	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1214	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1215
1216	xor		%g4, %o0, %o0		! ^= rk[0]
1217	xor		%g5, %o1, %o1
1218	xor		%g4, %o2, %o2		! ^= rk[0]
1219	xor		%g5, %o3, %o3
1220	.word	0x81b02308 !movxtod	%o0,%f0
1221	.word	0x85b02309 !movxtod	%o1,%f2
1222	.word	0x89b0230a !movxtod	%o2,%f4
1223	.word	0x8db0230b !movxtod	%o3,%f6
1224
1225	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1226	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1227	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1228	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1229
1230	prefetch	[%i0 + 32+63], 20
1231	call		_aes128_encrypt_2x
1232	add		%i0, 32, %i0
1233
1234	.word	0x91b02302 !movxtod	%g2,%f8
1235	.word	0x95b02303 !movxtod	%g3,%f10
1236
1237	srax		%g3, 63, %l7		! next tweak value
1238	addcc		%g2, %g2, %g2
1239	and		%l7, 0x87, %l7
1240	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1241	xor		%l7, %g2, %g2
1242
1243	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1244	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1245
1246	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1247	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1248	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1249	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1250
1251	subcc		%i2, 2, %i2
1252	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1253	add		%i1, 8, %i1
1254	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1255	add		%i1, 8, %i1
1256	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1257	add		%i1, 8, %i1
1258	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1259	bgu,pt		%xcc, .L128_xts_enblk2x
1260	add		%i1, 8, %i1
1261
1262	add		%l5, %i2, %i2
1263	andcc		%i2, 1, %g0		! is number of blocks even?
1264	membar		#StoreLoad|#StoreStore
1265	bnz,pt		%icc, .L128_xts_enloop
1266	srl		%i2, 0, %i2
1267	brnz,pn		%i2, .L128_xts_enloop2x
1268	nop
1269
1270	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1271	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1272	brnz,pn		%i5, .L128_xts_ensteal
1273	nop
1274
1275	ret
1276	restore
1277!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1278.align	32
1279.L128_xts_ensteal:
1280	std		%f0, [%fp + 2047-16]	! copy of output
1281	std		%f2, [%fp + 2047-8]
1282
1283	srl		%l0, 3, %l0
1284	add		%fp, 2047-16, %l7
1285	add		%i0, %l0, %i0	! original %i0+%i2&-15
1286	add		%i1, %l2, %i1	! original %i1+%i2&-15
1287	mov		0, %l0
1288	nop					! align
1289
1290.L128_xts_enstealing:
1291	ldub		[%i0 + %l0], %o0
1292	ldub		[%l7  + %l0], %o1
1293	dec		%i5
1294	stb		%o0, [%l7  + %l0]
1295	stb		%o1, [%i1 + %l0]
1296	brnz		%i5, .L128_xts_enstealing
1297	inc		%l0
1298
1299	mov		%l7, %i0
1300	sub		%i1, 16, %i1
1301	mov		0, %l0
1302	sub		%i1, %l2, %i1
1303	ba		.L128_xts_enloop	! one more time
1304	mov		1, %i2				! %i5 is 0
1305	ret
1306	restore
1307.type	aes128_t4_xts_encrypt,#function
1308.size	aes128_t4_xts_encrypt,.-aes128_t4_xts_encrypt
1309.globl	aes128_t4_xts_decrypt
1310.align	32
1311aes128_t4_xts_decrypt:
1312	save		%sp, -192-16, %sp
1313	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1314
1315	mov		%i5, %o0
1316	add		%fp, 2047-16, %o1
1317	call		aes_t4_encrypt
1318	mov		%i4, %o2
1319
1320	add		%fp, 2047-16, %l7
1321	ldxa		[%l7]0x88, %g2
1322	add		%fp, 2047-8, %l7
1323	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
1324
1325	sethi		%hi(0x76543210), %l7
1326	or		%l7, %lo(0x76543210), %l7
1327	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
1328
1329	prefetch	[%i0], 20
1330	prefetch	[%i0 + 63], 20
1331	call		_aes128_load_deckey
1332	and		%i2, 15,  %i5
1333	and		%i2, -16, %i2
1334	mov		0, %l7
1335	movrnz		%i5, 16,  %l7
1336	sub		%i2, %l7, %i2
1337
1338	sub		%i0, %i1, %l5	! %i0!=%i1
1339	and		%i0, 7, %l0
1340	andn		%i0, 7, %i0
1341	sll		%l0, 3, %l0
1342	mov		64, %l1
1343	mov		0xff, %l3
1344	sub		%l1, %l0, %l1
1345	and		%i1, 7, %l2
1346	cmp		%i2, 255
1347	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1348	movleu		%xcc, 0, %l5	!	%i2<256 ||
1349	brnz,pn		%l5, .L128_xts_deblk !	%i0==%i1)
1350	srl		%l3, %l2, %l3
1351
1352	andcc		%i2, 16, %g0		! is number of blocks even?
1353	brz,pn		%i2, .L128_xts_desteal
1354	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1355	bz		%icc, .L128_xts_deloop2x
1356	srlx		%i2, 4, %i2
1357.L128_xts_deloop:
1358	ldx		[%i0 + 0], %o0
1359	brz,pt		%l0, 4f
1360	ldx		[%i0 + 8], %o1
1361
1362	ldx		[%i0 + 16], %o2
1363	sllx		%o0, %l0, %o0
1364	srlx		%o1, %l1, %g1
1365	sllx		%o1, %l0, %o1
1366	or		%g1, %o0, %o0
1367	srlx		%o2, %l1, %o2
1368	or		%o2, %o1, %o1
13694:
1370	.word	0x99b02302 !movxtod	%g2,%f12
1371	.word	0x9db02303 !movxtod	%g3,%f14
1372	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1373	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1374
1375	xor		%g4, %o0, %o0		! ^= rk[0]
1376	xor		%g5, %o1, %o1
1377	.word	0x81b02308 !movxtod	%o0,%f0
1378	.word	0x85b02309 !movxtod	%o1,%f2
1379
1380	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1381	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1382
1383	prefetch	[%i1 + 63], 22
1384	prefetch	[%i0 + 16+63], 20
1385	call		_aes128_decrypt_1x
1386	add		%i0, 16, %i0
1387
1388	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1389	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1390
1391	srax		%g3, 63, %l7		! next tweak value
1392	addcc		%g2, %g2, %g2
1393	and		%l7, 0x87, %l7
1394	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1395	xor		%l7, %g2, %g2
1396
1397	brnz,pn		%l2, 2f
1398	sub		%i2, 1, %i2
1399
1400	std		%f0, [%i1 + 0]
1401	std		%f2, [%i1 + 8]
1402	brnz,pt		%i2, .L128_xts_deloop2x
1403	add		%i1, 16, %i1
1404
1405	brnz,pn		%i5, .L128_xts_desteal
1406	nop
1407
1408	ret
1409	restore
1410
1411.align	16
14122:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1413						! and ~3x deterioration
1414						! in inp==out case
1415	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1416	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1417	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1418	stda		%f4, [%i1 + %l3]0xc0	! partial store
1419	std		%f6, [%i1 + 8]
1420	add		%i1, 16, %i1
1421	orn		%g0, %l3, %l3
1422	stda		%f8, [%i1 + %l3]0xc0	! partial store
1423
1424	brnz,pt		%i2, .L128_xts_deloop2x+4
1425	orn		%g0, %l3, %l3
1426
1427	brnz,pn		%i5, .L128_xts_desteal
1428	nop
1429
1430	ret
1431	restore
1432
1433!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1434.align	32
1435.L128_xts_deloop2x:
1436	ldx		[%i0 + 0], %o0
1437	ldx		[%i0 + 8], %o1
1438	ldx		[%i0 + 16], %o2
1439	brz,pt		%l0, 4f
1440	ldx		[%i0 + 24], %o3
1441
1442	ldx		[%i0 + 32], %o4
1443	sllx		%o0, %l0, %o0
1444	srlx		%o1, %l1, %g1
1445	or		%g1, %o0, %o0
1446	sllx		%o1, %l0, %o1
1447	srlx		%o2, %l1, %g1
1448	or		%g1, %o1, %o1
1449	sllx		%o2, %l0, %o2
1450	srlx		%o3, %l1, %g1
1451	or		%g1, %o2, %o2
1452	sllx		%o3, %l0, %o3
1453	srlx		%o4, %l1, %o4
1454	or		%o4, %o3, %o3
14554:
1456	.word	0x99b02302 !movxtod	%g2,%f12
1457	.word	0x9db02303 !movxtod	%g3,%f14
1458	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1459	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1460
1461	srax		%g3, 63, %l7		! next tweak value
1462	addcc		%g2, %g2, %g2
1463	and		%l7, 0x87, %l7
1464	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1465	xor		%l7, %g2, %g2
1466
1467	.word	0x91b02302 !movxtod	%g2,%f8
1468	.word	0x95b02303 !movxtod	%g3,%f10
1469	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1470	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1471
1472	xor		%g4, %o0, %o0		! ^= rk[0]
1473	xor		%g5, %o1, %o1
1474	xor		%g4, %o2, %o2		! ^= rk[0]
1475	xor		%g5, %o3, %o3
1476	.word	0x81b02308 !movxtod	%o0,%f0
1477	.word	0x85b02309 !movxtod	%o1,%f2
1478	.word	0x89b0230a !movxtod	%o2,%f4
1479	.word	0x8db0230b !movxtod	%o3,%f6
1480
1481	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1482	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1483	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1484	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1485
1486	prefetch	[%i1 + 63], 22
1487	prefetch	[%i0 + 32+63], 20
1488	call		_aes128_decrypt_2x
1489	add		%i0, 32, %i0
1490
1491	.word	0x91b02302 !movxtod	%g2,%f8
1492	.word	0x95b02303 !movxtod	%g3,%f10
1493
1494	srax		%g3, 63, %l7		! next tweak value
1495	addcc		%g2, %g2, %g2
1496	and		%l7, 0x87, %l7
1497	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1498	xor		%l7, %g2, %g2
1499
1500	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1501	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1502
1503	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1504	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1505	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1506	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1507
1508	brnz,pn		%l2, 2f
1509	sub		%i2, 2, %i2
1510
1511	std		%f0, [%i1 + 0]
1512	std		%f2, [%i1 + 8]
1513	std		%f4, [%i1 + 16]
1514	std		%f6, [%i1 + 24]
1515	brnz,pt		%i2, .L128_xts_deloop2x
1516	add		%i1, 32, %i1
1517
1518	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1519	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1520	brnz,pn		%i5, .L128_xts_desteal
1521	nop
1522
1523	ret
1524	restore
1525
1526.align	16
15272:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1528						! and ~3x deterioration
1529						! in inp==out case
1530	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1531	.word	0x95b00902 !faligndata	%f0,%f2,%f10
1532	.word	0x99b08904 !faligndata	%f2,%f4,%f12
1533	.word	0x9db10906 !faligndata	%f4,%f6,%f14
1534	.word	0x81b18906 !faligndata	%f6,%f6,%f0
1535
1536	stda		%f8, [%i1 + %l3]0xc0	! partial store
1537	std		%f10, [%i1 + 8]
1538	std		%f12, [%i1 + 16]
1539	std		%f14, [%i1 + 24]
1540	add		%i1, 32, %i1
1541	orn		%g0, %l3, %l3
1542	stda		%f0, [%i1 + %l3]0xc0	! partial store
1543
1544	brnz,pt		%i2, .L128_xts_deloop2x+4
1545	orn		%g0, %l3, %l3
1546
1547	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1548	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1549	brnz,pn		%i5, .L128_xts_desteal
1550	nop
1551
1552	ret
1553	restore
1554
1555!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1556.align	32
1557.L128_xts_deblk:
1558	add	%i1, %i2, %l5
1559	and	%l5, 63, %l5	! tail
1560	sub	%i2, %l5, %i2
1561	add	%l5, 15, %l5	! round up to 16n
1562	srlx	%i2, 4, %i2
1563	srl	%l5, 4, %l5
1564	sub	%i2, 1, %i2
1565	add	%l5, 1, %l5
1566
1567.L128_xts_deblk2x:
1568	ldx		[%i0 + 0], %o0
1569	ldx		[%i0 + 8], %o1
1570	ldx		[%i0 + 16], %o2
1571	brz,pt		%l0, 5f
1572	ldx		[%i0 + 24], %o3
1573
1574	ldx		[%i0 + 32], %o4
1575	sllx		%o0, %l0, %o0
1576	srlx		%o1, %l1, %g1
1577	or		%g1, %o0, %o0
1578	sllx		%o1, %l0, %o1
1579	srlx		%o2, %l1, %g1
1580	or		%g1, %o1, %o1
1581	sllx		%o2, %l0, %o2
1582	srlx		%o3, %l1, %g1
1583	or		%g1, %o2, %o2
1584	sllx		%o3, %l0, %o3
1585	srlx		%o4, %l1, %o4
1586	or		%o4, %o3, %o3
15875:
1588	.word	0x99b02302 !movxtod	%g2,%f12
1589	.word	0x9db02303 !movxtod	%g3,%f14
1590	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1591	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1592
1593	srax		%g3, 63, %l7		! next tweak value
1594	addcc		%g2, %g2, %g2
1595	and		%l7, 0x87, %l7
1596	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1597	xor		%l7, %g2, %g2
1598
1599	.word	0x91b02302 !movxtod	%g2,%f8
1600	.word	0x95b02303 !movxtod	%g3,%f10
1601	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1602	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1603
1604	xor		%g4, %o0, %o0		! ^= rk[0]
1605	xor		%g5, %o1, %o1
1606	xor		%g4, %o2, %o2		! ^= rk[0]
1607	xor		%g5, %o3, %o3
1608	.word	0x81b02308 !movxtod	%o0,%f0
1609	.word	0x85b02309 !movxtod	%o1,%f2
1610	.word	0x89b0230a !movxtod	%o2,%f4
1611	.word	0x8db0230b !movxtod	%o3,%f6
1612
1613	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1614	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1615	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1616	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1617
1618	prefetch	[%i0 + 32+63], 20
1619	call		_aes128_decrypt_2x
1620	add		%i0, 32, %i0
1621
1622	.word	0x91b02302 !movxtod	%g2,%f8
1623	.word	0x95b02303 !movxtod	%g3,%f10
1624
1625	srax		%g3, 63, %l7		! next tweak value
1626	addcc		%g2, %g2, %g2
1627	and		%l7, 0x87, %l7
1628	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1629	xor		%l7, %g2, %g2
1630
1631	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1632	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1633
1634	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1635	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1636	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1637	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1638
1639	subcc		%i2, 2, %i2
1640	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1641	add		%i1, 8, %i1
1642	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1643	add		%i1, 8, %i1
1644	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1645	add		%i1, 8, %i1
1646	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1647	bgu,pt		%xcc, .L128_xts_deblk2x
1648	add		%i1, 8, %i1
1649
1650	add		%l5, %i2, %i2
1651	andcc		%i2, 1, %g0		! is number of blocks even?
1652	membar		#StoreLoad|#StoreStore
1653	bnz,pt		%icc, .L128_xts_deloop
1654	srl		%i2, 0, %i2
1655	brnz,pn		%i2, .L128_xts_deloop2x
1656	nop
1657
1658	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1659	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1660	brnz,pn		%i5, .L128_xts_desteal
1661	nop
1662
1663	ret
1664	restore
1665!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1666.align	32
1667.L128_xts_desteal:
1668	ldx		[%i0 + 0], %o0
1669	brz,pt		%l0, 8f
1670	ldx		[%i0 + 8], %o1
1671
1672	ldx		[%i0 + 16], %o2
1673	sllx		%o0, %l0, %o0
1674	srlx		%o1, %l1, %g1
1675	sllx		%o1, %l0, %o1
1676	or		%g1, %o0, %o0
1677	srlx		%o2, %l1, %o2
1678	or		%o2, %o1, %o1
16798:
1680	srax		%g3, 63, %l7		! next tweak value
1681	addcc		%g2, %g2, %o2
1682	and		%l7, 0x87, %l7
1683	.word	0x97b0c223 !addxc	%g3,%g3,%o3
1684	xor		%l7, %o2, %o2
1685
1686	.word	0x99b0230a !movxtod	%o2,%f12
1687	.word	0x9db0230b !movxtod	%o3,%f14
1688	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1689	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1690
1691	xor		%g4, %o0, %o0		! ^= rk[0]
1692	xor		%g5, %o1, %o1
1693	.word	0x81b02308 !movxtod	%o0,%f0
1694	.word	0x85b02309 !movxtod	%o1,%f2
1695
1696	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1697	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1698
1699	call		_aes128_decrypt_1x
1700	add		%i0, 16, %i0
1701
1702	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1703	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1704
1705	std		%f0, [%fp + 2047-16]
1706	std		%f2, [%fp + 2047-8]
1707
1708	srl		%l0, 3, %l0
1709	add		%fp, 2047-16, %l7
1710	add		%i0, %l0, %i0	! original %i0+%i2&-15
1711	add		%i1, %l2, %i1	! original %i1+%i2&-15
1712	mov		0, %l0
1713	add		%i1, 16, %i1
1714	nop					! align
1715
1716.L128_xts_destealing:
1717	ldub		[%i0 + %l0], %o0
1718	ldub		[%l7  + %l0], %o1
1719	dec		%i5
1720	stb		%o0, [%l7  + %l0]
1721	stb		%o1, [%i1 + %l0]
1722	brnz		%i5, .L128_xts_destealing
1723	inc		%l0
1724
1725	mov		%l7, %i0
1726	sub		%i1, 16, %i1
1727	mov		0, %l0
1728	sub		%i1, %l2, %i1
1729	ba		.L128_xts_deloop	! one more time
1730	mov		1, %i2				! %i5 is 0
1731	ret
1732	restore
1733.type	aes128_t4_xts_decrypt,#function
1734.size	aes128_t4_xts_decrypt,.-aes128_t4_xts_decrypt
1735.globl	aes128_t4_cbc_decrypt
1736.align	32
1737aes128_t4_cbc_decrypt:
1738	save		%sp, -192, %sp
1739	cmp		%i2, 0
1740	be,pn		%xcc, .L128_cbc_dec_abort
1741	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1742	sub		%i0, %i1, %l5	! %i0!=%i1
1743	ld		[%i4 + 0], %f12	! load ivec
1744	ld		[%i4 + 4], %f13
1745	ld		[%i4 + 8], %f14
1746	ld		[%i4 + 12], %f15
1747	prefetch	[%i0], 20
1748	prefetch	[%i0 + 63], 20
1749	call		_aes128_load_deckey
1750	and		%i0, 7, %l0
1751	andn		%i0, 7, %i0
1752	sll		%l0, 3, %l0
1753	mov		64, %l1
1754	mov		0xff, %l3
1755	sub		%l1, %l0, %l1
1756	and		%i1, 7, %l2
1757	cmp		%i2, 255
1758	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1759	movleu		%xcc, 0, %l5	!	%i2<256 ||
1760	brnz,pn		%l5, .L128cbc_dec_blk	!	%i0==%i1)
1761	srl		%l3, %l2, %l3
1762
1763	andcc		%i2, 16, %g0		! is number of blocks even?
1764	srlx		%i2, 4, %i2
1765	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1766	bz		%icc, .L128_cbc_dec_loop2x
1767	prefetch	[%i1], 22
1768.L128_cbc_dec_loop:
1769	ldx		[%i0 + 0], %o0
1770	brz,pt		%l0, 4f
1771	ldx		[%i0 + 8], %o1
1772
1773	ldx		[%i0 + 16], %o2
1774	sllx		%o0, %l0, %o0
1775	srlx		%o1, %l1, %g1
1776	sllx		%o1, %l0, %o1
1777	or		%g1, %o0, %o0
1778	srlx		%o2, %l1, %o2
1779	or		%o2, %o1, %o1
17804:
1781	xor		%g4, %o0, %o2		! ^= rk[0]
1782	xor		%g5, %o1, %o3
1783	.word	0x81b0230a !movxtod	%o2,%f0
1784	.word	0x85b0230b !movxtod	%o3,%f2
1785
1786	prefetch	[%i1 + 63], 22
1787	prefetch	[%i0 + 16+63], 20
1788	call		_aes128_decrypt_1x
1789	add		%i0, 16, %i0
1790
1791	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1792	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1793	.word	0x99b02308 !movxtod	%o0,%f12
1794	.word	0x9db02309 !movxtod	%o1,%f14
1795
1796	brnz,pn		%l2, 2f
1797	sub		%i2, 1, %i2
1798
1799	std		%f0, [%i1 + 0]
1800	std		%f2, [%i1 + 8]
1801	brnz,pt		%i2, .L128_cbc_dec_loop2x
1802	add		%i1, 16, %i1
1803	st		%f12, [%i4 + 0]
1804	st		%f13, [%i4 + 4]
1805	st		%f14, [%i4 + 8]
1806	st		%f15, [%i4 + 12]
1807.L128_cbc_dec_abort:
1808	ret
1809	restore
1810
1811.align	16
18122:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1813						! and ~3x deterioration
1814						! in inp==out case
1815	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1816	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1817	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1818
1819	stda		%f4, [%i1 + %l3]0xc0	! partial store
1820	std		%f6, [%i1 + 8]
1821	add		%i1, 16, %i1
1822	orn		%g0, %l3, %l3
1823	stda		%f8, [%i1 + %l3]0xc0	! partial store
1824
1825	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1826	orn		%g0, %l3, %l3
1827	st		%f12, [%i4 + 0]
1828	st		%f13, [%i4 + 4]
1829	st		%f14, [%i4 + 8]
1830	st		%f15, [%i4 + 12]
1831	ret
1832	restore
1833
1834!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1835.align	32
1836.L128_cbc_dec_loop2x:
1837	ldx		[%i0 + 0], %o0
1838	ldx		[%i0 + 8], %o1
1839	ldx		[%i0 + 16], %o2
1840	brz,pt		%l0, 4f
1841	ldx		[%i0 + 24], %o3
1842
1843	ldx		[%i0 + 32], %o4
1844	sllx		%o0, %l0, %o0
1845	srlx		%o1, %l1, %g1
1846	or		%g1, %o0, %o0
1847	sllx		%o1, %l0, %o1
1848	srlx		%o2, %l1, %g1
1849	or		%g1, %o1, %o1
1850	sllx		%o2, %l0, %o2
1851	srlx		%o3, %l1, %g1
1852	or		%g1, %o2, %o2
1853	sllx		%o3, %l0, %o3
1854	srlx		%o4, %l1, %o4
1855	or		%o4, %o3, %o3
18564:
1857	xor		%g4, %o0, %o4		! ^= rk[0]
1858	xor		%g5, %o1, %o5
1859	.word	0x81b0230c !movxtod	%o4,%f0
1860	.word	0x85b0230d !movxtod	%o5,%f2
1861	xor		%g4, %o2, %o4
1862	xor		%g5, %o3, %o5
1863	.word	0x89b0230c !movxtod	%o4,%f4
1864	.word	0x8db0230d !movxtod	%o5,%f6
1865
1866	prefetch	[%i1 + 63], 22
1867	prefetch	[%i0 + 32+63], 20
1868	call		_aes128_decrypt_2x
1869	add		%i0, 32, %i0
1870
1871	.word	0x91b02308 !movxtod	%o0,%f8
1872	.word	0x95b02309 !movxtod	%o1,%f10
1873	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1874	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1875	.word	0x99b0230a !movxtod	%o2,%f12
1876	.word	0x9db0230b !movxtod	%o3,%f14
1877	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1878	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1879
1880	brnz,pn		%l2, 2f
1881	sub		%i2, 2, %i2
1882
1883	std		%f0, [%i1 + 0]
1884	std		%f2, [%i1 + 8]
1885	std		%f4, [%i1 + 16]
1886	std		%f6, [%i1 + 24]
1887	brnz,pt		%i2, .L128_cbc_dec_loop2x
1888	add		%i1, 32, %i1
1889	st		%f12, [%i4 + 0]
1890	st		%f13, [%i4 + 4]
1891	st		%f14, [%i4 + 8]
1892	st		%f15, [%i4 + 12]
1893	ret
1894	restore
1895
1896.align	16
18972:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1898						! and ~3x deterioration
1899						! in inp==out case
1900	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1901	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1902	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1903	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1904	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1905	stda		%f8, [%i1 + %l3]0xc0	! partial store
1906	std		%f0, [%i1 + 8]
1907	std		%f2, [%i1 + 16]
1908	std		%f4, [%i1 + 24]
1909	add		%i1, 32, %i1
1910	orn		%g0, %l3, %l3
1911	stda		%f6, [%i1 + %l3]0xc0	! partial store
1912
1913	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1914	orn		%g0, %l3, %l3
1915	st		%f12, [%i4 + 0]
1916	st		%f13, [%i4 + 4]
1917	st		%f14, [%i4 + 8]
1918	st		%f15, [%i4 + 12]
1919	ret
1920	restore
1921
1922!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1923.align	32
1924.L128cbc_dec_blk:
1925	add	%i1, %i2, %l5
1926	and	%l5, 63, %l5	! tail
1927	sub	%i2, %l5, %i2
1928	add	%l5, 15, %l5	! round up to 16n
1929	srlx	%i2, 4, %i2
1930	srl	%l5, 4, %l5
1931	sub	%i2, 1, %i2
1932	add	%l5, 1, %l5
1933
1934.L128_cbc_dec_blk_loop2x:
1935	ldx		[%i0 + 0], %o0
1936	ldx		[%i0 + 8], %o1
1937	ldx		[%i0 + 16], %o2
1938	brz,pt		%l0, 5f
1939	ldx		[%i0 + 24], %o3
1940
1941	ldx		[%i0 + 32], %o4
1942	sllx		%o0, %l0, %o0
1943	srlx		%o1, %l1, %g1
1944	or		%g1, %o0, %o0
1945	sllx		%o1, %l0, %o1
1946	srlx		%o2, %l1, %g1
1947	or		%g1, %o1, %o1
1948	sllx		%o2, %l0, %o2
1949	srlx		%o3, %l1, %g1
1950	or		%g1, %o2, %o2
1951	sllx		%o3, %l0, %o3
1952	srlx		%o4, %l1, %o4
1953	or		%o4, %o3, %o3
19545:
1955	xor		%g4, %o0, %o4		! ^= rk[0]
1956	xor		%g5, %o1, %o5
1957	.word	0x81b0230c !movxtod	%o4,%f0
1958	.word	0x85b0230d !movxtod	%o5,%f2
1959	xor		%g4, %o2, %o4
1960	xor		%g5, %o3, %o5
1961	.word	0x89b0230c !movxtod	%o4,%f4
1962	.word	0x8db0230d !movxtod	%o5,%f6
1963
1964	prefetch	[%i0 + 32+63], 20
1965	call		_aes128_decrypt_2x
1966	add		%i0, 32, %i0
1967	subcc		%i2, 2, %i2
1968
1969	.word	0x91b02308 !movxtod	%o0,%f8
1970	.word	0x95b02309 !movxtod	%o1,%f10
1971	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1972	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1973	.word	0x99b0230a !movxtod	%o2,%f12
1974	.word	0x9db0230b !movxtod	%o3,%f14
1975	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1976	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1977
1978	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1979	add		%i1, 8, %i1
1980	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1981	add		%i1, 8, %i1
1982	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1983	add		%i1, 8, %i1
1984	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1985	bgu,pt		%xcc, .L128_cbc_dec_blk_loop2x
1986	add		%i1, 8, %i1
1987
1988	add		%l5, %i2, %i2
1989	andcc		%i2, 1, %g0		! is number of blocks even?
1990	membar		#StoreLoad|#StoreStore
1991	bnz,pt		%icc, .L128_cbc_dec_loop
1992	srl		%i2, 0, %i2
1993	brnz,pn		%i2, .L128_cbc_dec_loop2x
1994	nop
1995	st		%f12, [%i4 + 0]	! write out ivec
1996	st		%f13, [%i4 + 4]
1997	st		%f14, [%i4 + 8]
1998	st		%f15, [%i4 + 12]
1999	ret
2000	restore
2001.type	aes128_t4_cbc_decrypt,#function
2002.size	aes128_t4_cbc_decrypt,.-aes128_t4_cbc_decrypt
2003.align	32
2004_aes128_decrypt_1x:
2005	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
2006	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
2007	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
2008	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
2009	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
2010	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
2011	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
2012	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
2013	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
2014	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
2015	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
2016	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
2017	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
2018	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
2019	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
2020	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
2021	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
2022	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
2023	.word	0x80cd44c4 !aes_dround01_l	%f52,%f4,%f2,%f0
2024	retl
2025	.word	0x84cdc4e4 !aes_dround23_l	%f54,%f4,%f2,%f2
2026.type	_aes128_decrypt_1x,#function
2027.size	_aes128_decrypt_1x,.-_aes128_decrypt_1x
2028
2029.align	32
2030_aes128_decrypt_2x:
2031	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
2032	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
2033	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
2034	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
2035	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
2036	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
2037	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
2038	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
2039	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
2040	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
2041	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
2042	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
2043	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
2044	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
2045	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
2046	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
2047	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
2048	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
2049	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
2050	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
2051	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
2052	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
2053	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
2054	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
2055	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
2056	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
2057	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
2058	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
2059	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
2060	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
2061	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
2062	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
2063	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
2064	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
2065	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
2066	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
2067	.word	0x80cd44c8 !aes_dround01_l	%f52,%f8,%f2,%f0
2068	.word	0x84cdc4e8 !aes_dround23_l	%f54,%f8,%f2,%f2
2069	.word	0x88cd4cca !aes_dround01_l	%f52,%f10,%f6,%f4
2070	retl
2071	.word	0x8ccdccea !aes_dround23_l	%f54,%f10,%f6,%f6
2072.type	_aes128_decrypt_2x,#function
2073.size	_aes128_decrypt_2x,.-_aes128_decrypt_2x
2074.align	32
2075_aes192_encrypt_1x:
2076	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2077	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2078	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
2079	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
2080	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
2081	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2082	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
2083	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
2084	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
2085	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2086	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
2087	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
2088	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
2089	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2090	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
2091	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
2092	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
2093	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2094	.word	0x80cd4404 !aes_eround01	%f52,%f4,%f2,%f0
2095	.word	0x84cdc424 !aes_eround23	%f54,%f4,%f2,%f2
2096	.word	0x88ce4400 !aes_eround01	%f56,%f0,%f2,%f4
2097	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2098	.word	0x80cf4484 !aes_eround01_l	%f60,%f4,%f2,%f0
2099	retl
2100	.word	0x84cfc4a4 !aes_eround23_l	%f62,%f4,%f2,%f2
2101.type	_aes192_encrypt_1x,#function
2102.size	_aes192_encrypt_1x,.-_aes192_encrypt_1x
2103
2104.align	32
2105_aes192_encrypt_2x:
2106	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2107	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2108	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2109	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2110	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
2111	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
2112	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
2113	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
2114	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
2115	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2116	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
2117	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
2118	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
2119	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
2120	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
2121	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
2122	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
2123	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2124	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
2125	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
2126	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
2127	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
2128	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
2129	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
2130	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
2131	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2132	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
2133	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
2134	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
2135	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
2136	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
2137	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
2138	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
2139	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2140	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
2141	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
2142	.word	0x80cd4408 !aes_eround01	%f52,%f8,%f2,%f0
2143	.word	0x84cdc428 !aes_eround23	%f54,%f8,%f2,%f2
2144	.word	0x88cd4c0a !aes_eround01	%f52,%f10,%f6,%f4
2145	.word	0x8ccdcc2a !aes_eround23	%f54,%f10,%f6,%f6
2146	.word	0x90ce4400 !aes_eround01	%f56,%f0,%f2,%f8
2147	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2148	.word	0x94ce4c04 !aes_eround01	%f56,%f4,%f6,%f10
2149	.word	0x8ccecc24 !aes_eround23	%f58,%f4,%f6,%f6
2150	.word	0x80cf4488 !aes_eround01_l	%f60,%f8,%f2,%f0
2151	.word	0x84cfc4a8 !aes_eround23_l	%f62,%f8,%f2,%f2
2152	.word	0x88cf4c8a !aes_eround01_l	%f60,%f10,%f6,%f4
2153	retl
2154	.word	0x8ccfccaa !aes_eround23_l	%f62,%f10,%f6,%f6
2155.type	_aes192_encrypt_2x,#function
2156.size	_aes192_encrypt_2x,.-_aes192_encrypt_2x
2157
2158.align	32
2159_aes256_encrypt_1x:
2160	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2161	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2162	ldd		[%i3 + 208], %f16
2163	ldd		[%i3 + 216], %f18
2164	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
2165	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
2166	ldd		[%i3 + 224], %f20
2167	ldd		[%i3 + 232], %f22
2168	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
2169	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2170	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
2171	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
2172	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
2173	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2174	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
2175	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
2176	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
2177	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2178	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
2179	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
2180	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
2181	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2182	.word	0x80cd4404 !aes_eround01	%f52,%f4,%f2,%f0
2183	.word	0x84cdc424 !aes_eround23	%f54,%f4,%f2,%f2
2184	.word	0x88ce4400 !aes_eround01	%f56,%f0,%f2,%f4
2185	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2186	.word	0x80cf4404 !aes_eround01	%f60,%f4,%f2,%f0
2187	.word	0x84cfc424 !aes_eround23	%f62,%f4,%f2,%f2
2188	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2189	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2190	ldd		[%i3 + 16], %f16
2191	ldd		[%i3 + 24], %f18
2192	.word	0x80cd0484 !aes_eround01_l	%f20,%f4,%f2,%f0
2193	.word	0x84cd84a4 !aes_eround23_l	%f22,%f4,%f2,%f2
2194	ldd		[%i3 + 32], %f20
2195	retl
2196	ldd		[%i3 + 40], %f22
2197.type	_aes256_encrypt_1x,#function
2198.size	_aes256_encrypt_1x,.-_aes256_encrypt_1x
2199
2200.align	32
2201_aes256_encrypt_2x:
2202	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2203	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2204	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2205	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2206	ldd		[%i3 + 208], %f16
2207	ldd		[%i3 + 216], %f18
2208	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
2209	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
2210	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
2211	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
2212	ldd		[%i3 + 224], %f20
2213	ldd		[%i3 + 232], %f22
2214	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
2215	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2216	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
2217	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
2218	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
2219	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
2220	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
2221	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
2222	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
2223	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2224	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
2225	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
2226	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
2227	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
2228	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
2229	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
2230	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
2231	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2232	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
2233	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
2234	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
2235	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
2236	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
2237	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
2238	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
2239	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2240	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
2241	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
2242	.word	0x80cd4408 !aes_eround01	%f52,%f8,%f2,%f0
2243	.word	0x84cdc428 !aes_eround23	%f54,%f8,%f2,%f2
2244	.word	0x88cd4c0a !aes_eround01	%f52,%f10,%f6,%f4
2245	.word	0x8ccdcc2a !aes_eround23	%f54,%f10,%f6,%f6
2246	.word	0x90ce4400 !aes_eround01	%f56,%f0,%f2,%f8
2247	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2248	.word	0x94ce4c04 !aes_eround01	%f56,%f4,%f6,%f10
2249	.word	0x8ccecc24 !aes_eround23	%f58,%f4,%f6,%f6
2250	.word	0x80cf4408 !aes_eround01	%f60,%f8,%f2,%f0
2251	.word	0x84cfc428 !aes_eround23	%f62,%f8,%f2,%f2
2252	.word	0x88cf4c0a !aes_eround01	%f60,%f10,%f6,%f4
2253	.word	0x8ccfcc2a !aes_eround23	%f62,%f10,%f6,%f6
2254	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2255	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2256	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2257	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2258	ldd		[%i3 + 16], %f16
2259	ldd		[%i3 + 24], %f18
2260	.word	0x80cd0488 !aes_eround01_l	%f20,%f8,%f2,%f0
2261	.word	0x84cd84a8 !aes_eround23_l	%f22,%f8,%f2,%f2
2262	.word	0x88cd0c8a !aes_eround01_l	%f20,%f10,%f6,%f4
2263	.word	0x8ccd8caa !aes_eround23_l	%f22,%f10,%f6,%f6
2264	ldd		[%i3 + 32], %f20
2265	retl
2266	ldd		[%i3 + 40], %f22
2267.type	_aes256_encrypt_2x,#function
2268.size	_aes256_encrypt_2x,.-_aes256_encrypt_2x
2269
2270.align	32
2271_aes192_loadkey:
2272	ldx		[%i3 + 0], %g4
2273	ldx		[%i3 + 8], %g5
2274	ldd		[%i3 + 16], %f16
2275	ldd		[%i3 + 24], %f18
2276	ldd		[%i3 + 32], %f20
2277	ldd		[%i3 + 40], %f22
2278	ldd		[%i3 + 48], %f24
2279	ldd		[%i3 + 56], %f26
2280	ldd		[%i3 + 64], %f28
2281	ldd		[%i3 + 72], %f30
2282	ldd		[%i3 + 80], %f32
2283	ldd		[%i3 + 88], %f34
2284	ldd		[%i3 + 96], %f36
2285	ldd		[%i3 + 104], %f38
2286	ldd		[%i3 + 112], %f40
2287	ldd		[%i3 + 120], %f42
2288	ldd		[%i3 + 128], %f44
2289	ldd		[%i3 + 136], %f46
2290	ldd		[%i3 + 144], %f48
2291	ldd		[%i3 + 152], %f50
2292	ldd		[%i3 + 160], %f52
2293	ldd		[%i3 + 168], %f54
2294	ldd		[%i3 + 176], %f56
2295	ldd		[%i3 + 184], %f58
2296	ldd		[%i3 + 192], %f60
2297	ldd		[%i3 + 200], %f62
2298	retl
2299	nop
2300.type	_aes192_loadkey,#function
2301.size	_aes192_loadkey,.-_aes192_loadkey
2302_aes256_loadkey=_aes192_loadkey
2303_aes192_load_enckey=_aes192_loadkey
2304_aes192_load_deckey=_aes192_loadkey
2305_aes256_load_enckey=_aes192_loadkey
2306_aes256_load_deckey=_aes192_loadkey
2307.globl	aes256_t4_cbc_encrypt
2308.align	32
2309aes256_t4_cbc_encrypt:
2310	save		%sp, -192, %sp
2311	cmp		%i2, 0
2312	be,pn		%xcc, .L256_cbc_enc_abort
2313	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2314	sub		%i0, %i1, %l5	! %i0!=%i1
2315	ld		[%i4 + 0], %f0
2316	ld		[%i4 + 4], %f1
2317	ld		[%i4 + 8], %f2
2318	ld		[%i4 + 12], %f3
2319	prefetch	[%i0], 20
2320	prefetch	[%i0 + 63], 20
2321	call		_aes256_load_enckey
2322	and		%i0, 7, %l0
2323	andn		%i0, 7, %i0
2324	sll		%l0, 3, %l0
2325	mov		64, %l1
2326	mov		0xff, %l3
2327	sub		%l1, %l0, %l1
2328	and		%i1, 7, %l2
2329	cmp		%i2, 127
2330	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2331	movleu		%xcc, 0, %l5	!	%i2<128 ||
2332	brnz,pn		%l5, .L256cbc_enc_blk	!	%i0==%i1)
2333	srl		%l3, %l2, %l3
2334
2335	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2336	srlx		%i2, 4, %i2
2337	prefetch	[%i1], 22
2338
2339.L256_cbc_enc_loop:
2340	ldx		[%i0 + 0], %o0
2341	brz,pt		%l0, 4f
2342	ldx		[%i0 + 8], %o1
2343
2344	ldx		[%i0 + 16], %o2
2345	sllx		%o0, %l0, %o0
2346	srlx		%o1, %l1, %g1
2347	sllx		%o1, %l0, %o1
2348	or		%g1, %o0, %o0
2349	srlx		%o2, %l1, %o2
2350	or		%o2, %o1, %o1
23514:
2352	xor		%g4, %o0, %o0		! ^= rk[0]
2353	xor		%g5, %o1, %o1
2354	.word	0x99b02308 !movxtod	%o0,%f12
2355	.word	0x9db02309 !movxtod	%o1,%f14
2356
2357	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2358	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2359	prefetch	[%i1 + 63], 22
2360	prefetch	[%i0 + 16+63], 20
2361	call		_aes256_encrypt_1x
2362	add		%i0, 16, %i0
2363
2364	brnz,pn		%l2, 2f
2365	sub		%i2, 1, %i2
2366
2367	std		%f0, [%i1 + 0]
2368	std		%f2, [%i1 + 8]
2369	brnz,pt		%i2, .L256_cbc_enc_loop
2370	add		%i1, 16, %i1
2371	st		%f0, [%i4 + 0]
2372	st		%f1, [%i4 + 4]
2373	st		%f2, [%i4 + 8]
2374	st		%f3, [%i4 + 12]
2375.L256_cbc_enc_abort:
2376	ret
2377	restore
2378
2379.align	16
23802:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2381						! and ~3x deterioration
2382						! in inp==out case
2383	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2384	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2385	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2386
2387	stda		%f4, [%i1 + %l3]0xc0	! partial store
2388	std		%f6, [%i1 + 8]
2389	add		%i1, 16, %i1
2390	orn		%g0, %l3, %l3
2391	stda		%f8, [%i1 + %l3]0xc0	! partial store
2392
2393	brnz,pt		%i2, .L256_cbc_enc_loop+4
2394	orn		%g0, %l3, %l3
2395	st		%f0, [%i4 + 0]
2396	st		%f1, [%i4 + 4]
2397	st		%f2, [%i4 + 8]
2398	st		%f3, [%i4 + 12]
2399	ret
2400	restore
2401
2402!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2403.align	32
2404.L256cbc_enc_blk:
2405	add	%i1, %i2, %l5
2406	and	%l5, 63, %l5	! tail
2407	sub	%i2, %l5, %i2
2408	add	%l5, 15, %l5	! round up to 16n
2409	srlx	%i2, 4, %i2
2410	srl	%l5, 4, %l5
2411
2412.L256_cbc_enc_blk_loop:
2413	ldx		[%i0 + 0], %o0
2414	brz,pt		%l0, 5f
2415	ldx		[%i0 + 8], %o1
2416
2417	ldx		[%i0 + 16], %o2
2418	sllx		%o0, %l0, %o0
2419	srlx		%o1, %l1, %g1
2420	sllx		%o1, %l0, %o1
2421	or		%g1, %o0, %o0
2422	srlx		%o2, %l1, %o2
2423	or		%o2, %o1, %o1
24245:
2425	xor		%g4, %o0, %o0		! ^= rk[0]
2426	xor		%g5, %o1, %o1
2427	.word	0x99b02308 !movxtod	%o0,%f12
2428	.word	0x9db02309 !movxtod	%o1,%f14
2429
2430	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2431	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2432	prefetch	[%i0 + 16+63], 20
2433	call		_aes256_encrypt_1x
2434	add		%i0, 16, %i0
2435	sub		%i2, 1, %i2
2436
2437	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2438	add		%i1, 8, %i1
2439	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2440	brnz,pt		%i2, .L256_cbc_enc_blk_loop
2441	add		%i1, 8, %i1
2442
2443	membar		#StoreLoad|#StoreStore
2444	brnz,pt		%l5, .L256_cbc_enc_loop
2445	mov		%l5, %i2
2446	st		%f0, [%i4 + 0]
2447	st		%f1, [%i4 + 4]
2448	st		%f2, [%i4 + 8]
2449	st		%f3, [%i4 + 12]
2450	ret
2451	restore
2452.type	aes256_t4_cbc_encrypt,#function
2453.size	aes256_t4_cbc_encrypt,.-aes256_t4_cbc_encrypt
2454.globl	aes192_t4_cbc_encrypt
2455.align	32
2456aes192_t4_cbc_encrypt:
2457	save		%sp, -192, %sp
2458	cmp		%i2, 0
2459	be,pn		%xcc, .L192_cbc_enc_abort
2460	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2461	sub		%i0, %i1, %l5	! %i0!=%i1
2462	ld		[%i4 + 0], %f0
2463	ld		[%i4 + 4], %f1
2464	ld		[%i4 + 8], %f2
2465	ld		[%i4 + 12], %f3
2466	prefetch	[%i0], 20
2467	prefetch	[%i0 + 63], 20
2468	call		_aes192_load_enckey
2469	and		%i0, 7, %l0
2470	andn		%i0, 7, %i0
2471	sll		%l0, 3, %l0
2472	mov		64, %l1
2473	mov		0xff, %l3
2474	sub		%l1, %l0, %l1
2475	and		%i1, 7, %l2
2476	cmp		%i2, 127
2477	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2478	movleu		%xcc, 0, %l5	!	%i2<128 ||
2479	brnz,pn		%l5, .L192cbc_enc_blk	!	%i0==%i1)
2480	srl		%l3, %l2, %l3
2481
2482	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2483	srlx		%i2, 4, %i2
2484	prefetch	[%i1], 22
2485
2486.L192_cbc_enc_loop:
2487	ldx		[%i0 + 0], %o0
2488	brz,pt		%l0, 4f
2489	ldx		[%i0 + 8], %o1
2490
2491	ldx		[%i0 + 16], %o2
2492	sllx		%o0, %l0, %o0
2493	srlx		%o1, %l1, %g1
2494	sllx		%o1, %l0, %o1
2495	or		%g1, %o0, %o0
2496	srlx		%o2, %l1, %o2
2497	or		%o2, %o1, %o1
24984:
2499	xor		%g4, %o0, %o0		! ^= rk[0]
2500	xor		%g5, %o1, %o1
2501	.word	0x99b02308 !movxtod	%o0,%f12
2502	.word	0x9db02309 !movxtod	%o1,%f14
2503
2504	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2505	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2506	prefetch	[%i1 + 63], 22
2507	prefetch	[%i0 + 16+63], 20
2508	call		_aes192_encrypt_1x
2509	add		%i0, 16, %i0
2510
2511	brnz,pn		%l2, 2f
2512	sub		%i2, 1, %i2
2513
2514	std		%f0, [%i1 + 0]
2515	std		%f2, [%i1 + 8]
2516	brnz,pt		%i2, .L192_cbc_enc_loop
2517	add		%i1, 16, %i1
2518	st		%f0, [%i4 + 0]
2519	st		%f1, [%i4 + 4]
2520	st		%f2, [%i4 + 8]
2521	st		%f3, [%i4 + 12]
2522.L192_cbc_enc_abort:
2523	ret
2524	restore
2525
2526.align	16
25272:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2528						! and ~3x deterioration
2529						! in inp==out case
2530	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2531	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2532	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2533
2534	stda		%f4, [%i1 + %l3]0xc0	! partial store
2535	std		%f6, [%i1 + 8]
2536	add		%i1, 16, %i1
2537	orn		%g0, %l3, %l3
2538	stda		%f8, [%i1 + %l3]0xc0	! partial store
2539
2540	brnz,pt		%i2, .L192_cbc_enc_loop+4
2541	orn		%g0, %l3, %l3
2542	st		%f0, [%i4 + 0]
2543	st		%f1, [%i4 + 4]
2544	st		%f2, [%i4 + 8]
2545	st		%f3, [%i4 + 12]
2546	ret
2547	restore
2548
2549!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2550.align	32
2551.L192cbc_enc_blk:
2552	add	%i1, %i2, %l5
2553	and	%l5, 63, %l5	! tail
2554	sub	%i2, %l5, %i2
2555	add	%l5, 15, %l5	! round up to 16n
2556	srlx	%i2, 4, %i2
2557	srl	%l5, 4, %l5
2558
2559.L192_cbc_enc_blk_loop:
2560	ldx		[%i0 + 0], %o0
2561	brz,pt		%l0, 5f
2562	ldx		[%i0 + 8], %o1
2563
2564	ldx		[%i0 + 16], %o2
2565	sllx		%o0, %l0, %o0
2566	srlx		%o1, %l1, %g1
2567	sllx		%o1, %l0, %o1
2568	or		%g1, %o0, %o0
2569	srlx		%o2, %l1, %o2
2570	or		%o2, %o1, %o1
25715:
2572	xor		%g4, %o0, %o0		! ^= rk[0]
2573	xor		%g5, %o1, %o1
2574	.word	0x99b02308 !movxtod	%o0,%f12
2575	.word	0x9db02309 !movxtod	%o1,%f14
2576
2577	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2578	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2579	prefetch	[%i0 + 16+63], 20
2580	call		_aes192_encrypt_1x
2581	add		%i0, 16, %i0
2582	sub		%i2, 1, %i2
2583
2584	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2585	add		%i1, 8, %i1
2586	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2587	brnz,pt		%i2, .L192_cbc_enc_blk_loop
2588	add		%i1, 8, %i1
2589
2590	membar		#StoreLoad|#StoreStore
2591	brnz,pt		%l5, .L192_cbc_enc_loop
2592	mov		%l5, %i2
2593	st		%f0, [%i4 + 0]
2594	st		%f1, [%i4 + 4]
2595	st		%f2, [%i4 + 8]
2596	st		%f3, [%i4 + 12]
2597	ret
2598	restore
2599.type	aes192_t4_cbc_encrypt,#function
2600.size	aes192_t4_cbc_encrypt,.-aes192_t4_cbc_encrypt
2601.globl	aes256_t4_ctr32_encrypt
2602.align	32
2603aes256_t4_ctr32_encrypt:
2604	save		%sp, -192, %sp
2605	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2606
2607	prefetch	[%i0], 20
2608	prefetch	[%i0 + 63], 20
2609	call		_aes256_load_enckey
2610	sllx		%i2, 4, %i2
2611
2612	ld		[%i4 + 0], %l4	! counter
2613	ld		[%i4 + 4], %l5
2614	ld		[%i4 + 8], %l6
2615	ld		[%i4 + 12], %l7
2616
2617	sllx		%l4, 32, %o5
2618	or		%l5, %o5, %o5
2619	sllx		%l6, 32, %g1
2620	xor		%o5, %g4, %g4		! ^= rk[0]
2621	xor		%g1, %g5, %g5
2622	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
2623
2624	sub		%i0, %i1, %l5	! %i0!=%i1
2625	and		%i0, 7, %l0
2626	andn		%i0, 7, %i0
2627	sll		%l0, 3, %l0
2628	mov		64, %l1
2629	mov		0xff, %l3
2630	sub		%l1, %l0, %l1
2631	and		%i1, 7, %l2
2632	cmp		%i2, 255
2633	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2634	movleu		%xcc, 0, %l5	!	%i2<256 ||
2635	brnz,pn		%l5, .L256_ctr32_blk	!	%i0==%i1)
2636	srl		%l3, %l2, %l3
2637
2638	andcc		%i2, 16, %g0		! is number of blocks even?
2639	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2640	bz		%icc, .L256_ctr32_loop2x
2641	srlx		%i2, 4, %i2
2642.L256_ctr32_loop:
2643	ldx		[%i0 + 0], %o0
2644	brz,pt		%l0, 4f
2645	ldx		[%i0 + 8], %o1
2646
2647	ldx		[%i0 + 16], %o2
2648	sllx		%o0, %l0, %o0
2649	srlx		%o1, %l1, %g1
2650	sllx		%o1, %l0, %o1
2651	or		%g1, %o0, %o0
2652	srlx		%o2, %l1, %o2
2653	or		%o2, %o1, %o1
26544:
2655	xor		%g5, %l7, %g1		! ^= rk[0]
2656	add		%l7, 1, %l7
2657	.word	0x85b02301 !movxtod	%g1,%f2
2658	srl		%l7, 0, %l7		! clruw
2659	prefetch	[%i1 + 63], 22
2660	prefetch	[%i0 + 16+63], 20
2661	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
2662	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2663	call		_aes256_encrypt_1x+8
2664	add		%i0, 16, %i0
2665
2666	.word	0x95b02308 !movxtod	%o0,%f10
2667	.word	0x99b02309 !movxtod	%o1,%f12
2668	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
2669	.word	0x85b30d82 !fxor	%f12,%f2,%f2
2670
2671	brnz,pn		%l2, 2f
2672	sub		%i2, 1, %i2
2673
2674	std		%f0, [%i1 + 0]
2675	std		%f2, [%i1 + 8]
2676	brnz,pt		%i2, .L256_ctr32_loop2x
2677	add		%i1, 16, %i1
2678
2679	ret
2680	restore
2681
2682.align	16
26832:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2684						! and ~3x deterioration
2685						! in inp==out case
2686	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2687	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2688	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2689	stda		%f4, [%i1 + %l3]0xc0	! partial store
2690	std		%f6, [%i1 + 8]
2691	add		%i1, 16, %i1
2692	orn		%g0, %l3, %l3
2693	stda		%f8, [%i1 + %l3]0xc0	! partial store
2694
2695	brnz,pt		%i2, .L256_ctr32_loop2x+4
2696	orn		%g0, %l3, %l3
2697
2698	ret
2699	restore
2700
2701!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2702.align	32
2703.L256_ctr32_loop2x:
2704	ldx		[%i0 + 0], %o0
2705	ldx		[%i0 + 8], %o1
2706	ldx		[%i0 + 16], %o2
2707	brz,pt		%l0, 4f
2708	ldx		[%i0 + 24], %o3
2709
2710	ldx		[%i0 + 32], %o4
2711	sllx		%o0, %l0, %o0
2712	srlx		%o1, %l1, %g1
2713	or		%g1, %o0, %o0
2714	sllx		%o1, %l0, %o1
2715	srlx		%o2, %l1, %g1
2716	or		%g1, %o1, %o1
2717	sllx		%o2, %l0, %o2
2718	srlx		%o3, %l1, %g1
2719	or		%g1, %o2, %o2
2720	sllx		%o3, %l0, %o3
2721	srlx		%o4, %l1, %o4
2722	or		%o4, %o3, %o3
27234:
2724	xor		%g5, %l7, %g1		! ^= rk[0]
2725	add		%l7, 1, %l7
2726	.word	0x85b02301 !movxtod	%g1,%f2
2727	srl		%l7, 0, %l7		! clruw
2728	xor		%g5, %l7, %g1
2729	add		%l7, 1, %l7
2730	.word	0x8db02301 !movxtod	%g1,%f6
2731	srl		%l7, 0, %l7		! clruw
2732	prefetch	[%i1 + 63], 22
2733	prefetch	[%i0 + 32+63], 20
2734	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
2735	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2736	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
2737	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
2738	call		_aes256_encrypt_2x+16
2739	add		%i0, 32, %i0
2740
2741	.word	0x91b02308 !movxtod	%o0,%f8
2742	.word	0x95b02309 !movxtod	%o1,%f10
2743	.word	0x99b0230a !movxtod	%o2,%f12
2744	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2745	.word	0x91b0230b !movxtod	%o3,%f8
2746	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2747	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2748	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2749
2750	brnz,pn		%l2, 2f
2751	sub		%i2, 2, %i2
2752
2753	std		%f0, [%i1 + 0]
2754	std		%f2, [%i1 + 8]
2755	std		%f4, [%i1 + 16]
2756	std		%f6, [%i1 + 24]
2757	brnz,pt		%i2, .L256_ctr32_loop2x
2758	add		%i1, 32, %i1
2759
2760	ret
2761	restore
2762
2763.align	16
27642:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2765						! and ~3x deterioration
2766						! in inp==out case
2767	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
2768	.word	0x81b00902 !faligndata	%f0,%f2,%f0
2769	.word	0x85b08904 !faligndata	%f2,%f4,%f2
2770	.word	0x89b10906 !faligndata	%f4,%f6,%f4
2771	.word	0x8db18906 !faligndata	%f6,%f6,%f6
2772
2773	stda		%f8, [%i1 + %l3]0xc0	! partial store
2774	std		%f0, [%i1 + 8]
2775	std		%f2, [%i1 + 16]
2776	std		%f4, [%i1 + 24]
2777	add		%i1, 32, %i1
2778	orn		%g0, %l3, %l3
2779	stda		%f6, [%i1 + %l3]0xc0	! partial store
2780
2781	brnz,pt		%i2, .L256_ctr32_loop2x+4
2782	orn		%g0, %l3, %l3
2783
2784	ret
2785	restore
2786
2787!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2788.align	32
2789.L256_ctr32_blk:
2790	add	%i1, %i2, %l5
2791	and	%l5, 63, %l5	! tail
2792	sub	%i2, %l5, %i2
2793	add	%l5, 15, %l5	! round up to 16n
2794	srlx	%i2, 4, %i2
2795	srl	%l5, 4, %l5
2796	sub	%i2, 1, %i2
2797	add	%l5, 1, %l5
2798
2799.L256_ctr32_blk_loop2x:
2800	ldx		[%i0 + 0], %o0
2801	ldx		[%i0 + 8], %o1
2802	ldx		[%i0 + 16], %o2
2803	brz,pt		%l0, 5f
2804	ldx		[%i0 + 24], %o3
2805
2806	ldx		[%i0 + 32], %o4
2807	sllx		%o0, %l0, %o0
2808	srlx		%o1, %l1, %g1
2809	or		%g1, %o0, %o0
2810	sllx		%o1, %l0, %o1
2811	srlx		%o2, %l1, %g1
2812	or		%g1, %o1, %o1
2813	sllx		%o2, %l0, %o2
2814	srlx		%o3, %l1, %g1
2815	or		%g1, %o2, %o2
2816	sllx		%o3, %l0, %o3
2817	srlx		%o4, %l1, %o4
2818	or		%o4, %o3, %o3
28195:
2820	xor		%g5, %l7, %g1		! ^= rk[0]
2821	add		%l7, 1, %l7
2822	.word	0x85b02301 !movxtod	%g1,%f2
2823	srl		%l7, 0, %l7		! clruw
2824	xor		%g5, %l7, %g1
2825	add		%l7, 1, %l7
2826	.word	0x8db02301 !movxtod	%g1,%f6
2827	srl		%l7, 0, %l7		! clruw
2828	prefetch	[%i0 + 32+63], 20
2829	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
2830	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2831	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
2832	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
2833	call		_aes256_encrypt_2x+16
2834	add		%i0, 32, %i0
2835	subcc		%i2, 2, %i2
2836
2837	.word	0x91b02308 !movxtod	%o0,%f8
2838	.word	0x95b02309 !movxtod	%o1,%f10
2839	.word	0x99b0230a !movxtod	%o2,%f12
2840	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2841	.word	0x91b0230b !movxtod	%o3,%f8
2842	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2843	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2844	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2845
2846	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2847	add		%i1, 8, %i1
2848	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2849	add		%i1, 8, %i1
2850	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2851	add		%i1, 8, %i1
2852	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2853	bgu,pt		%xcc, .L256_ctr32_blk_loop2x
2854	add		%i1, 8, %i1
2855
2856	add		%l5, %i2, %i2
2857	andcc		%i2, 1, %g0		! is number of blocks even?
2858	membar		#StoreLoad|#StoreStore
2859	bnz,pt		%icc, .L256_ctr32_loop
2860	srl		%i2, 0, %i2
2861	brnz,pn		%i2, .L256_ctr32_loop2x
2862	nop
2863
2864	ret
2865	restore
2866.type	aes256_t4_ctr32_encrypt,#function
2867.size	aes256_t4_ctr32_encrypt,.-aes256_t4_ctr32_encrypt
2868.globl	aes256_t4_xts_encrypt
2869.align	32
2870aes256_t4_xts_encrypt:
2871	save		%sp, -192-16, %sp
2872	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2873
2874	mov		%i5, %o0
2875	add		%fp, 2047-16, %o1
2876	call		aes_t4_encrypt
2877	mov		%i4, %o2
2878
2879	add		%fp, 2047-16, %l7
2880	ldxa		[%l7]0x88, %g2
2881	add		%fp, 2047-8, %l7
2882	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
2883
2884	sethi		%hi(0x76543210), %l7
2885	or		%l7, %lo(0x76543210), %l7
2886	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
2887
2888	prefetch	[%i0], 20
2889	prefetch	[%i0 + 63], 20
2890	call		_aes256_load_enckey
2891	and		%i2, 15,  %i5
2892	and		%i2, -16, %i2
2893
2894	sub		%i0, %i1, %l5	! %i0!=%i1
2895	and		%i0, 7, %l0
2896	andn		%i0, 7, %i0
2897	sll		%l0, 3, %l0
2898	mov		64, %l1
2899	mov		0xff, %l3
2900	sub		%l1, %l0, %l1
2901	and		%i1, 7, %l2
2902	cmp		%i2, 255
2903	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2904	movleu		%xcc, 0, %l5	!	%i2<256 ||
2905	brnz,pn		%l5, .L256_xts_enblk !	%i0==%i1)
2906	srl		%l3, %l2, %l3
2907
2908	andcc		%i2, 16, %g0		! is number of blocks even?
2909	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2910	bz		%icc, .L256_xts_enloop2x
2911	srlx		%i2, 4, %i2
2912.L256_xts_enloop:
2913	ldx		[%i0 + 0], %o0
2914	brz,pt		%l0, 4f
2915	ldx		[%i0 + 8], %o1
2916
2917	ldx		[%i0 + 16], %o2
2918	sllx		%o0, %l0, %o0
2919	srlx		%o1, %l1, %g1
2920	sllx		%o1, %l0, %o1
2921	or		%g1, %o0, %o0
2922	srlx		%o2, %l1, %o2
2923	or		%o2, %o1, %o1
29244:
2925	.word	0x99b02302 !movxtod	%g2,%f12
2926	.word	0x9db02303 !movxtod	%g3,%f14
2927	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
2928	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
2929
2930	xor		%g4, %o0, %o0		! ^= rk[0]
2931	xor		%g5, %o1, %o1
2932	.word	0x81b02308 !movxtod	%o0,%f0
2933	.word	0x85b02309 !movxtod	%o1,%f2
2934
2935	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
2936	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2937
2938	prefetch	[%i1 + 63], 22
2939	prefetch	[%i0 + 16+63], 20
2940	call		_aes256_encrypt_1x
2941	add		%i0, 16, %i0
2942
2943	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
2944	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2945
2946	srax		%g3, 63, %l7		! next tweak value
2947	addcc		%g2, %g2, %g2
2948	and		%l7, 0x87, %l7
2949	.word	0x87b0c223 !addxc	%g3,%g3,%g3
2950	xor		%l7, %g2, %g2
2951
2952	brnz,pn		%l2, 2f
2953	sub		%i2, 1, %i2
2954
2955	std		%f0, [%i1 + 0]
2956	std		%f2, [%i1 + 8]
2957	brnz,pt		%i2, .L256_xts_enloop2x
2958	add		%i1, 16, %i1
2959
2960	brnz,pn		%i5, .L256_xts_ensteal
2961	nop
2962
2963	ret
2964	restore
2965
2966.align	16
29672:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2968						! and ~3x deterioration
2969						! in inp==out case
2970	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2971	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2972	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2973	stda		%f4, [%i1 + %l3]0xc0	! partial store
2974	std		%f6, [%i1 + 8]
2975	add		%i1, 16, %i1
2976	orn		%g0, %l3, %l3
2977	stda		%f8, [%i1 + %l3]0xc0	! partial store
2978
2979	brnz,pt		%i2, .L256_xts_enloop2x+4
2980	orn		%g0, %l3, %l3
2981
2982	brnz,pn		%i5, .L256_xts_ensteal
2983	nop
2984
2985	ret
2986	restore
2987
2988!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2989.align	32
2990.L256_xts_enloop2x:
2991	ldx		[%i0 + 0], %o0
2992	ldx		[%i0 + 8], %o1
2993	ldx		[%i0 + 16], %o2
2994	brz,pt		%l0, 4f
2995	ldx		[%i0 + 24], %o3
2996
2997	ldx		[%i0 + 32], %o4
2998	sllx		%o0, %l0, %o0
2999	srlx		%o1, %l1, %g1
3000	or		%g1, %o0, %o0
3001	sllx		%o1, %l0, %o1
3002	srlx		%o2, %l1, %g1
3003	or		%g1, %o1, %o1
3004	sllx		%o2, %l0, %o2
3005	srlx		%o3, %l1, %g1
3006	or		%g1, %o2, %o2
3007	sllx		%o3, %l0, %o3
3008	srlx		%o4, %l1, %o4
3009	or		%o4, %o3, %o3
30104:
3011	.word	0x99b02302 !movxtod	%g2,%f12
3012	.word	0x9db02303 !movxtod	%g3,%f14
3013	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3014	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3015
3016	srax		%g3, 63, %l7		! next tweak value
3017	addcc		%g2, %g2, %g2
3018	and		%l7, 0x87, %l7
3019	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3020	xor		%l7, %g2, %g2
3021
3022	.word	0x91b02302 !movxtod	%g2,%f8
3023	.word	0x95b02303 !movxtod	%g3,%f10
3024	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3025	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3026
3027	xor		%g4, %o0, %o0		! ^= rk[0]
3028	xor		%g5, %o1, %o1
3029	xor		%g4, %o2, %o2		! ^= rk[0]
3030	xor		%g5, %o3, %o3
3031	.word	0x81b02308 !movxtod	%o0,%f0
3032	.word	0x85b02309 !movxtod	%o1,%f2
3033	.word	0x89b0230a !movxtod	%o2,%f4
3034	.word	0x8db0230b !movxtod	%o3,%f6
3035
3036	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3037	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3038	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3039	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3040
3041	prefetch	[%i1 + 63], 22
3042	prefetch	[%i0 + 32+63], 20
3043	call		_aes256_encrypt_2x
3044	add		%i0, 32, %i0
3045
3046	.word	0x91b02302 !movxtod	%g2,%f8
3047	.word	0x95b02303 !movxtod	%g3,%f10
3048
3049	srax		%g3, 63, %l7		! next tweak value
3050	addcc		%g2, %g2, %g2
3051	and		%l7, 0x87, %l7
3052	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3053	xor		%l7, %g2, %g2
3054
3055	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3056	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3057
3058	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3059	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3060	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3061	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3062
3063	brnz,pn		%l2, 2f
3064	sub		%i2, 2, %i2
3065
3066	std		%f0, [%i1 + 0]
3067	std		%f2, [%i1 + 8]
3068	std		%f4, [%i1 + 16]
3069	std		%f6, [%i1 + 24]
3070	brnz,pt		%i2, .L256_xts_enloop2x
3071	add		%i1, 32, %i1
3072
3073	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3074	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3075	brnz,pn		%i5, .L256_xts_ensteal
3076	nop
3077
3078	ret
3079	restore
3080
3081.align	16
30822:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3083						! and ~3x deterioration
3084						! in inp==out case
3085	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3086	.word	0x95b00902 !faligndata	%f0,%f2,%f10
3087	.word	0x99b08904 !faligndata	%f2,%f4,%f12
3088	.word	0x9db10906 !faligndata	%f4,%f6,%f14
3089	.word	0x81b18906 !faligndata	%f6,%f6,%f0
3090
3091	stda		%f8, [%i1 + %l3]0xc0	! partial store
3092	std		%f10, [%i1 + 8]
3093	std		%f12, [%i1 + 16]
3094	std		%f14, [%i1 + 24]
3095	add		%i1, 32, %i1
3096	orn		%g0, %l3, %l3
3097	stda		%f0, [%i1 + %l3]0xc0	! partial store
3098
3099	brnz,pt		%i2, .L256_xts_enloop2x+4
3100	orn		%g0, %l3, %l3
3101
3102	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3103	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3104	brnz,pn		%i5, .L256_xts_ensteal
3105	nop
3106
3107	ret
3108	restore
3109
3110!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3111.align	32
3112.L256_xts_enblk:
3113	add	%i1, %i2, %l5
3114	and	%l5, 63, %l5	! tail
3115	sub	%i2, %l5, %i2
3116	add	%l5, 15, %l5	! round up to 16n
3117	srlx	%i2, 4, %i2
3118	srl	%l5, 4, %l5
3119	sub	%i2, 1, %i2
3120	add	%l5, 1, %l5
3121
3122.L256_xts_enblk2x:
3123	ldx		[%i0 + 0], %o0
3124	ldx		[%i0 + 8], %o1
3125	ldx		[%i0 + 16], %o2
3126	brz,pt		%l0, 5f
3127	ldx		[%i0 + 24], %o3
3128
3129	ldx		[%i0 + 32], %o4
3130	sllx		%o0, %l0, %o0
3131	srlx		%o1, %l1, %g1
3132	or		%g1, %o0, %o0
3133	sllx		%o1, %l0, %o1
3134	srlx		%o2, %l1, %g1
3135	or		%g1, %o1, %o1
3136	sllx		%o2, %l0, %o2
3137	srlx		%o3, %l1, %g1
3138	or		%g1, %o2, %o2
3139	sllx		%o3, %l0, %o3
3140	srlx		%o4, %l1, %o4
3141	or		%o4, %o3, %o3
31425:
3143	.word	0x99b02302 !movxtod	%g2,%f12
3144	.word	0x9db02303 !movxtod	%g3,%f14
3145	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3146	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3147
3148	srax		%g3, 63, %l7		! next tweak value
3149	addcc		%g2, %g2, %g2
3150	and		%l7, 0x87, %l7
3151	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3152	xor		%l7, %g2, %g2
3153
3154	.word	0x91b02302 !movxtod	%g2,%f8
3155	.word	0x95b02303 !movxtod	%g3,%f10
3156	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3157	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3158
3159	xor		%g4, %o0, %o0		! ^= rk[0]
3160	xor		%g5, %o1, %o1
3161	xor		%g4, %o2, %o2		! ^= rk[0]
3162	xor		%g5, %o3, %o3
3163	.word	0x81b02308 !movxtod	%o0,%f0
3164	.word	0x85b02309 !movxtod	%o1,%f2
3165	.word	0x89b0230a !movxtod	%o2,%f4
3166	.word	0x8db0230b !movxtod	%o3,%f6
3167
3168	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3169	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3170	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3171	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3172
3173	prefetch	[%i0 + 32+63], 20
3174	call		_aes256_encrypt_2x
3175	add		%i0, 32, %i0
3176
3177	.word	0x91b02302 !movxtod	%g2,%f8
3178	.word	0x95b02303 !movxtod	%g3,%f10
3179
3180	srax		%g3, 63, %l7		! next tweak value
3181	addcc		%g2, %g2, %g2
3182	and		%l7, 0x87, %l7
3183	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3184	xor		%l7, %g2, %g2
3185
3186	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3187	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3188
3189	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3190	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3191	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3192	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3193
3194	subcc		%i2, 2, %i2
3195	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3196	add		%i1, 8, %i1
3197	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3198	add		%i1, 8, %i1
3199	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3200	add		%i1, 8, %i1
3201	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3202	bgu,pt		%xcc, .L256_xts_enblk2x
3203	add		%i1, 8, %i1
3204
3205	add		%l5, %i2, %i2
3206	andcc		%i2, 1, %g0		! is number of blocks even?
3207	membar		#StoreLoad|#StoreStore
3208	bnz,pt		%icc, .L256_xts_enloop
3209	srl		%i2, 0, %i2
3210	brnz,pn		%i2, .L256_xts_enloop2x
3211	nop
3212
3213	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3214	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3215	brnz,pn		%i5, .L256_xts_ensteal
3216	nop
3217
3218	ret
3219	restore
3220!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3221.align	32
3222.L256_xts_ensteal:
3223	std		%f0, [%fp + 2047-16]	! copy of output
3224	std		%f2, [%fp + 2047-8]
3225
3226	srl		%l0, 3, %l0
3227	add		%fp, 2047-16, %l7
3228	add		%i0, %l0, %i0	! original %i0+%i2&-15
3229	add		%i1, %l2, %i1	! original %i1+%i2&-15
3230	mov		0, %l0
3231	nop					! align
3232
3233.L256_xts_enstealing:
3234	ldub		[%i0 + %l0], %o0
3235	ldub		[%l7  + %l0], %o1
3236	dec		%i5
3237	stb		%o0, [%l7  + %l0]
3238	stb		%o1, [%i1 + %l0]
3239	brnz		%i5, .L256_xts_enstealing
3240	inc		%l0
3241
3242	mov		%l7, %i0
3243	sub		%i1, 16, %i1
3244	mov		0, %l0
3245	sub		%i1, %l2, %i1
3246	ba		.L256_xts_enloop	! one more time
3247	mov		1, %i2				! %i5 is 0
3248	ret
3249	restore
3250.type	aes256_t4_xts_encrypt,#function
3251.size	aes256_t4_xts_encrypt,.-aes256_t4_xts_encrypt
3252.globl	aes256_t4_xts_decrypt
3253.align	32
3254aes256_t4_xts_decrypt:
3255	save		%sp, -192-16, %sp
3256	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3257
3258	mov		%i5, %o0
3259	add		%fp, 2047-16, %o1
3260	call		aes_t4_encrypt
3261	mov		%i4, %o2
3262
3263	add		%fp, 2047-16, %l7
3264	ldxa		[%l7]0x88, %g2
3265	add		%fp, 2047-8, %l7
3266	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
3267
3268	sethi		%hi(0x76543210), %l7
3269	or		%l7, %lo(0x76543210), %l7
3270	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
3271
3272	prefetch	[%i0], 20
3273	prefetch	[%i0 + 63], 20
3274	call		_aes256_load_deckey
3275	and		%i2, 15,  %i5
3276	and		%i2, -16, %i2
3277	mov		0, %l7
3278	movrnz		%i5, 16,  %l7
3279	sub		%i2, %l7, %i2
3280
3281	sub		%i0, %i1, %l5	! %i0!=%i1
3282	and		%i0, 7, %l0
3283	andn		%i0, 7, %i0
3284	sll		%l0, 3, %l0
3285	mov		64, %l1
3286	mov		0xff, %l3
3287	sub		%l1, %l0, %l1
3288	and		%i1, 7, %l2
3289	cmp		%i2, 255
3290	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3291	movleu		%xcc, 0, %l5	!	%i2<256 ||
3292	brnz,pn		%l5, .L256_xts_deblk !	%i0==%i1)
3293	srl		%l3, %l2, %l3
3294
3295	andcc		%i2, 16, %g0		! is number of blocks even?
3296	brz,pn		%i2, .L256_xts_desteal
3297	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3298	bz		%icc, .L256_xts_deloop2x
3299	srlx		%i2, 4, %i2
3300.L256_xts_deloop:
3301	ldx		[%i0 + 0], %o0
3302	brz,pt		%l0, 4f
3303	ldx		[%i0 + 8], %o1
3304
3305	ldx		[%i0 + 16], %o2
3306	sllx		%o0, %l0, %o0
3307	srlx		%o1, %l1, %g1
3308	sllx		%o1, %l0, %o1
3309	or		%g1, %o0, %o0
3310	srlx		%o2, %l1, %o2
3311	or		%o2, %o1, %o1
33124:
3313	.word	0x99b02302 !movxtod	%g2,%f12
3314	.word	0x9db02303 !movxtod	%g3,%f14
3315	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3316	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3317
3318	xor		%g4, %o0, %o0		! ^= rk[0]
3319	xor		%g5, %o1, %o1
3320	.word	0x81b02308 !movxtod	%o0,%f0
3321	.word	0x85b02309 !movxtod	%o1,%f2
3322
3323	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3324	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3325
3326	prefetch	[%i1 + 63], 22
3327	prefetch	[%i0 + 16+63], 20
3328	call		_aes256_decrypt_1x
3329	add		%i0, 16, %i0
3330
3331	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3332	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3333
3334	srax		%g3, 63, %l7		! next tweak value
3335	addcc		%g2, %g2, %g2
3336	and		%l7, 0x87, %l7
3337	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3338	xor		%l7, %g2, %g2
3339
3340	brnz,pn		%l2, 2f
3341	sub		%i2, 1, %i2
3342
3343	std		%f0, [%i1 + 0]
3344	std		%f2, [%i1 + 8]
3345	brnz,pt		%i2, .L256_xts_deloop2x
3346	add		%i1, 16, %i1
3347
3348	brnz,pn		%i5, .L256_xts_desteal
3349	nop
3350
3351	ret
3352	restore
3353
3354.align	16
33552:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3356						! and ~3x deterioration
3357						! in inp==out case
3358	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
3359	.word	0x8db00902 !faligndata	%f0,%f2,%f6
3360	.word	0x91b08902 !faligndata	%f2,%f2,%f8
3361	stda		%f4, [%i1 + %l3]0xc0	! partial store
3362	std		%f6, [%i1 + 8]
3363	add		%i1, 16, %i1
3364	orn		%g0, %l3, %l3
3365	stda		%f8, [%i1 + %l3]0xc0	! partial store
3366
3367	brnz,pt		%i2, .L256_xts_deloop2x+4
3368	orn		%g0, %l3, %l3
3369
3370	brnz,pn		%i5, .L256_xts_desteal
3371	nop
3372
3373	ret
3374	restore
3375
3376!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3377.align	32
3378.L256_xts_deloop2x:
3379	ldx		[%i0 + 0], %o0
3380	ldx		[%i0 + 8], %o1
3381	ldx		[%i0 + 16], %o2
3382	brz,pt		%l0, 4f
3383	ldx		[%i0 + 24], %o3
3384
3385	ldx		[%i0 + 32], %o4
3386	sllx		%o0, %l0, %o0
3387	srlx		%o1, %l1, %g1
3388	or		%g1, %o0, %o0
3389	sllx		%o1, %l0, %o1
3390	srlx		%o2, %l1, %g1
3391	or		%g1, %o1, %o1
3392	sllx		%o2, %l0, %o2
3393	srlx		%o3, %l1, %g1
3394	or		%g1, %o2, %o2
3395	sllx		%o3, %l0, %o3
3396	srlx		%o4, %l1, %o4
3397	or		%o4, %o3, %o3
33984:
3399	.word	0x99b02302 !movxtod	%g2,%f12
3400	.word	0x9db02303 !movxtod	%g3,%f14
3401	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3402	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3403
3404	srax		%g3, 63, %l7		! next tweak value
3405	addcc		%g2, %g2, %g2
3406	and		%l7, 0x87, %l7
3407	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3408	xor		%l7, %g2, %g2
3409
3410	.word	0x91b02302 !movxtod	%g2,%f8
3411	.word	0x95b02303 !movxtod	%g3,%f10
3412	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3413	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3414
3415	xor		%g4, %o0, %o0		! ^= rk[0]
3416	xor		%g5, %o1, %o1
3417	xor		%g4, %o2, %o2		! ^= rk[0]
3418	xor		%g5, %o3, %o3
3419	.word	0x81b02308 !movxtod	%o0,%f0
3420	.word	0x85b02309 !movxtod	%o1,%f2
3421	.word	0x89b0230a !movxtod	%o2,%f4
3422	.word	0x8db0230b !movxtod	%o3,%f6
3423
3424	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3425	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3426	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3427	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3428
3429	prefetch	[%i1 + 63], 22
3430	prefetch	[%i0 + 32+63], 20
3431	call		_aes256_decrypt_2x
3432	add		%i0, 32, %i0
3433
3434	.word	0x91b02302 !movxtod	%g2,%f8
3435	.word	0x95b02303 !movxtod	%g3,%f10
3436
3437	srax		%g3, 63, %l7		! next tweak value
3438	addcc		%g2, %g2, %g2
3439	and		%l7, 0x87, %l7
3440	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3441	xor		%l7, %g2, %g2
3442
3443	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3444	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3445
3446	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3447	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3448	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3449	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3450
3451	brnz,pn		%l2, 2f
3452	sub		%i2, 2, %i2
3453
3454	std		%f0, [%i1 + 0]
3455	std		%f2, [%i1 + 8]
3456	std		%f4, [%i1 + 16]
3457	std		%f6, [%i1 + 24]
3458	brnz,pt		%i2, .L256_xts_deloop2x
3459	add		%i1, 32, %i1
3460
3461	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3462	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3463	brnz,pn		%i5, .L256_xts_desteal
3464	nop
3465
3466	ret
3467	restore
3468
3469.align	16
34702:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3471						! and ~3x deterioration
3472						! in inp==out case
3473	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3474	.word	0x95b00902 !faligndata	%f0,%f2,%f10
3475	.word	0x99b08904 !faligndata	%f2,%f4,%f12
3476	.word	0x9db10906 !faligndata	%f4,%f6,%f14
3477	.word	0x81b18906 !faligndata	%f6,%f6,%f0
3478
3479	stda		%f8, [%i1 + %l3]0xc0	! partial store
3480	std		%f10, [%i1 + 8]
3481	std		%f12, [%i1 + 16]
3482	std		%f14, [%i1 + 24]
3483	add		%i1, 32, %i1
3484	orn		%g0, %l3, %l3
3485	stda		%f0, [%i1 + %l3]0xc0	! partial store
3486
3487	brnz,pt		%i2, .L256_xts_deloop2x+4
3488	orn		%g0, %l3, %l3
3489
3490	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3491	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3492	brnz,pn		%i5, .L256_xts_desteal
3493	nop
3494
3495	ret
3496	restore
3497
3498!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3499.align	32
3500.L256_xts_deblk:
3501	add	%i1, %i2, %l5
3502	and	%l5, 63, %l5	! tail
3503	sub	%i2, %l5, %i2
3504	add	%l5, 15, %l5	! round up to 16n
3505	srlx	%i2, 4, %i2
3506	srl	%l5, 4, %l5
3507	sub	%i2, 1, %i2
3508	add	%l5, 1, %l5
3509
3510.L256_xts_deblk2x:
3511	ldx		[%i0 + 0], %o0
3512	ldx		[%i0 + 8], %o1
3513	ldx		[%i0 + 16], %o2
3514	brz,pt		%l0, 5f
3515	ldx		[%i0 + 24], %o3
3516
3517	ldx		[%i0 + 32], %o4
3518	sllx		%o0, %l0, %o0
3519	srlx		%o1, %l1, %g1
3520	or		%g1, %o0, %o0
3521	sllx		%o1, %l0, %o1
3522	srlx		%o2, %l1, %g1
3523	or		%g1, %o1, %o1
3524	sllx		%o2, %l0, %o2
3525	srlx		%o3, %l1, %g1
3526	or		%g1, %o2, %o2
3527	sllx		%o3, %l0, %o3
3528	srlx		%o4, %l1, %o4
3529	or		%o4, %o3, %o3
35305:
3531	.word	0x99b02302 !movxtod	%g2,%f12
3532	.word	0x9db02303 !movxtod	%g3,%f14
3533	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3534	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3535
3536	srax		%g3, 63, %l7		! next tweak value
3537	addcc		%g2, %g2, %g2
3538	and		%l7, 0x87, %l7
3539	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3540	xor		%l7, %g2, %g2
3541
3542	.word	0x91b02302 !movxtod	%g2,%f8
3543	.word	0x95b02303 !movxtod	%g3,%f10
3544	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3545	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3546
3547	xor		%g4, %o0, %o0		! ^= rk[0]
3548	xor		%g5, %o1, %o1
3549	xor		%g4, %o2, %o2		! ^= rk[0]
3550	xor		%g5, %o3, %o3
3551	.word	0x81b02308 !movxtod	%o0,%f0
3552	.word	0x85b02309 !movxtod	%o1,%f2
3553	.word	0x89b0230a !movxtod	%o2,%f4
3554	.word	0x8db0230b !movxtod	%o3,%f6
3555
3556	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3557	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3558	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3559	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3560
3561	prefetch	[%i0 + 32+63], 20
3562	call		_aes256_decrypt_2x
3563	add		%i0, 32, %i0
3564
3565	.word	0x91b02302 !movxtod	%g2,%f8
3566	.word	0x95b02303 !movxtod	%g3,%f10
3567
3568	srax		%g3, 63, %l7		! next tweak value
3569	addcc		%g2, %g2, %g2
3570	and		%l7, 0x87, %l7
3571	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3572	xor		%l7, %g2, %g2
3573
3574	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3575	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3576
3577	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3578	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3579	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3580	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3581
3582	subcc		%i2, 2, %i2
3583	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3584	add		%i1, 8, %i1
3585	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3586	add		%i1, 8, %i1
3587	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3588	add		%i1, 8, %i1
3589	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3590	bgu,pt		%xcc, .L256_xts_deblk2x
3591	add		%i1, 8, %i1
3592
3593	add		%l5, %i2, %i2
3594	andcc		%i2, 1, %g0		! is number of blocks even?
3595	membar		#StoreLoad|#StoreStore
3596	bnz,pt		%icc, .L256_xts_deloop
3597	srl		%i2, 0, %i2
3598	brnz,pn		%i2, .L256_xts_deloop2x
3599	nop
3600
3601	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3602	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3603	brnz,pn		%i5, .L256_xts_desteal
3604	nop
3605
3606	ret
3607	restore
3608!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3609.align	32
3610.L256_xts_desteal:
3611	ldx		[%i0 + 0], %o0
3612	brz,pt		%l0, 8f
3613	ldx		[%i0 + 8], %o1
3614
3615	ldx		[%i0 + 16], %o2
3616	sllx		%o0, %l0, %o0
3617	srlx		%o1, %l1, %g1
3618	sllx		%o1, %l0, %o1
3619	or		%g1, %o0, %o0
3620	srlx		%o2, %l1, %o2
3621	or		%o2, %o1, %o1
36228:
3623	srax		%g3, 63, %l7		! next tweak value
3624	addcc		%g2, %g2, %o2
3625	and		%l7, 0x87, %l7
3626	.word	0x97b0c223 !addxc	%g3,%g3,%o3
3627	xor		%l7, %o2, %o2
3628
3629	.word	0x99b0230a !movxtod	%o2,%f12
3630	.word	0x9db0230b !movxtod	%o3,%f14
3631	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3632	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3633
3634	xor		%g4, %o0, %o0		! ^= rk[0]
3635	xor		%g5, %o1, %o1
3636	.word	0x81b02308 !movxtod	%o0,%f0
3637	.word	0x85b02309 !movxtod	%o1,%f2
3638
3639	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3640	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3641
3642	call		_aes256_decrypt_1x
3643	add		%i0, 16, %i0
3644
3645	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3646	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3647
3648	std		%f0, [%fp + 2047-16]
3649	std		%f2, [%fp + 2047-8]
3650
3651	srl		%l0, 3, %l0
3652	add		%fp, 2047-16, %l7
3653	add		%i0, %l0, %i0	! original %i0+%i2&-15
3654	add		%i1, %l2, %i1	! original %i1+%i2&-15
3655	mov		0, %l0
3656	add		%i1, 16, %i1
3657	nop					! align
3658
3659.L256_xts_destealing:
3660	ldub		[%i0 + %l0], %o0
3661	ldub		[%l7  + %l0], %o1
3662	dec		%i5
3663	stb		%o0, [%l7  + %l0]
3664	stb		%o1, [%i1 + %l0]
3665	brnz		%i5, .L256_xts_destealing
3666	inc		%l0
3667
3668	mov		%l7, %i0
3669	sub		%i1, 16, %i1
3670	mov		0, %l0
3671	sub		%i1, %l2, %i1
3672	ba		.L256_xts_deloop	! one more time
3673	mov		1, %i2				! %i5 is 0
3674	ret
3675	restore
3676.type	aes256_t4_xts_decrypt,#function
3677.size	aes256_t4_xts_decrypt,.-aes256_t4_xts_decrypt
3678.globl	aes192_t4_ctr32_encrypt
3679.align	32
3680aes192_t4_ctr32_encrypt:
3681	save		%sp, -192, %sp
3682	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3683
3684	prefetch	[%i0], 20
3685	prefetch	[%i0 + 63], 20
3686	call		_aes192_load_enckey
3687	sllx		%i2, 4, %i2
3688
3689	ld		[%i4 + 0], %l4	! counter
3690	ld		[%i4 + 4], %l5
3691	ld		[%i4 + 8], %l6
3692	ld		[%i4 + 12], %l7
3693
3694	sllx		%l4, 32, %o5
3695	or		%l5, %o5, %o5
3696	sllx		%l6, 32, %g1
3697	xor		%o5, %g4, %g4		! ^= rk[0]
3698	xor		%g1, %g5, %g5
3699	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
3700
3701	sub		%i0, %i1, %l5	! %i0!=%i1
3702	and		%i0, 7, %l0
3703	andn		%i0, 7, %i0
3704	sll		%l0, 3, %l0
3705	mov		64, %l1
3706	mov		0xff, %l3
3707	sub		%l1, %l0, %l1
3708	and		%i1, 7, %l2
3709	cmp		%i2, 255
3710	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3711	movleu		%xcc, 0, %l5	!	%i2<256 ||
3712	brnz,pn		%l5, .L192_ctr32_blk	!	%i0==%i1)
3713	srl		%l3, %l2, %l3
3714
3715	andcc		%i2, 16, %g0		! is number of blocks even?
3716	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3717	bz		%icc, .L192_ctr32_loop2x
3718	srlx		%i2, 4, %i2
3719.L192_ctr32_loop:
3720	ldx		[%i0 + 0], %o0
3721	brz,pt		%l0, 4f
3722	ldx		[%i0 + 8], %o1
3723
3724	ldx		[%i0 + 16], %o2
3725	sllx		%o0, %l0, %o0
3726	srlx		%o1, %l1, %g1
3727	sllx		%o1, %l0, %o1
3728	or		%g1, %o0, %o0
3729	srlx		%o2, %l1, %o2
3730	or		%o2, %o1, %o1
37314:
3732	xor		%g5, %l7, %g1		! ^= rk[0]
3733	add		%l7, 1, %l7
3734	.word	0x85b02301 !movxtod	%g1,%f2
3735	srl		%l7, 0, %l7		! clruw
3736	prefetch	[%i1 + 63], 22
3737	prefetch	[%i0 + 16+63], 20
3738	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
3739	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3740	call		_aes192_encrypt_1x+8
3741	add		%i0, 16, %i0
3742
3743	.word	0x95b02308 !movxtod	%o0,%f10
3744	.word	0x99b02309 !movxtod	%o1,%f12
3745	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
3746	.word	0x85b30d82 !fxor	%f12,%f2,%f2
3747
3748	brnz,pn		%l2, 2f
3749	sub		%i2, 1, %i2
3750
3751	std		%f0, [%i1 + 0]
3752	std		%f2, [%i1 + 8]
3753	brnz,pt		%i2, .L192_ctr32_loop2x
3754	add		%i1, 16, %i1
3755
3756	ret
3757	restore
3758
3759.align	16
37602:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3761						! and ~3x deterioration
3762						! in inp==out case
3763	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
3764	.word	0x8db00902 !faligndata	%f0,%f2,%f6
3765	.word	0x91b08902 !faligndata	%f2,%f2,%f8
3766	stda		%f4, [%i1 + %l3]0xc0	! partial store
3767	std		%f6, [%i1 + 8]
3768	add		%i1, 16, %i1
3769	orn		%g0, %l3, %l3
3770	stda		%f8, [%i1 + %l3]0xc0	! partial store
3771
3772	brnz,pt		%i2, .L192_ctr32_loop2x+4
3773	orn		%g0, %l3, %l3
3774
3775	ret
3776	restore
3777
3778!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3779.align	32
3780.L192_ctr32_loop2x:
3781	ldx		[%i0 + 0], %o0
3782	ldx		[%i0 + 8], %o1
3783	ldx		[%i0 + 16], %o2
3784	brz,pt		%l0, 4f
3785	ldx		[%i0 + 24], %o3
3786
3787	ldx		[%i0 + 32], %o4
3788	sllx		%o0, %l0, %o0
3789	srlx		%o1, %l1, %g1
3790	or		%g1, %o0, %o0
3791	sllx		%o1, %l0, %o1
3792	srlx		%o2, %l1, %g1
3793	or		%g1, %o1, %o1
3794	sllx		%o2, %l0, %o2
3795	srlx		%o3, %l1, %g1
3796	or		%g1, %o2, %o2
3797	sllx		%o3, %l0, %o3
3798	srlx		%o4, %l1, %o4
3799	or		%o4, %o3, %o3
38004:
3801	xor		%g5, %l7, %g1		! ^= rk[0]
3802	add		%l7, 1, %l7
3803	.word	0x85b02301 !movxtod	%g1,%f2
3804	srl		%l7, 0, %l7		! clruw
3805	xor		%g5, %l7, %g1
3806	add		%l7, 1, %l7
3807	.word	0x8db02301 !movxtod	%g1,%f6
3808	srl		%l7, 0, %l7		! clruw
3809	prefetch	[%i1 + 63], 22
3810	prefetch	[%i0 + 32+63], 20
3811	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
3812	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3813	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
3814	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
3815	call		_aes192_encrypt_2x+16
3816	add		%i0, 32, %i0
3817
3818	.word	0x91b02308 !movxtod	%o0,%f8
3819	.word	0x95b02309 !movxtod	%o1,%f10
3820	.word	0x99b0230a !movxtod	%o2,%f12
3821	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
3822	.word	0x91b0230b !movxtod	%o3,%f8
3823	.word	0x85b28d82 !fxor	%f10,%f2,%f2
3824	.word	0x89b30d84 !fxor	%f12,%f4,%f4
3825	.word	0x8db20d86 !fxor	%f8,%f6,%f6
3826
3827	brnz,pn		%l2, 2f
3828	sub		%i2, 2, %i2
3829
3830	std		%f0, [%i1 + 0]
3831	std		%f2, [%i1 + 8]
3832	std		%f4, [%i1 + 16]
3833	std		%f6, [%i1 + 24]
3834	brnz,pt		%i2, .L192_ctr32_loop2x
3835	add		%i1, 32, %i1
3836
3837	ret
3838	restore
3839
3840.align	16
38412:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3842						! and ~3x deterioration
3843						! in inp==out case
3844	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3845	.word	0x81b00902 !faligndata	%f0,%f2,%f0
3846	.word	0x85b08904 !faligndata	%f2,%f4,%f2
3847	.word	0x89b10906 !faligndata	%f4,%f6,%f4
3848	.word	0x8db18906 !faligndata	%f6,%f6,%f6
3849
3850	stda		%f8, [%i1 + %l3]0xc0	! partial store
3851	std		%f0, [%i1 + 8]
3852	std		%f2, [%i1 + 16]
3853	std		%f4, [%i1 + 24]
3854	add		%i1, 32, %i1
3855	orn		%g0, %l3, %l3
3856	stda		%f6, [%i1 + %l3]0xc0	! partial store
3857
3858	brnz,pt		%i2, .L192_ctr32_loop2x+4
3859	orn		%g0, %l3, %l3
3860
3861	ret
3862	restore
3863
3864!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3865.align	32
3866.L192_ctr32_blk:
3867	add	%i1, %i2, %l5
3868	and	%l5, 63, %l5	! tail
3869	sub	%i2, %l5, %i2
3870	add	%l5, 15, %l5	! round up to 16n
3871	srlx	%i2, 4, %i2
3872	srl	%l5, 4, %l5
3873	sub	%i2, 1, %i2
3874	add	%l5, 1, %l5
3875
3876.L192_ctr32_blk_loop2x:
3877	ldx		[%i0 + 0], %o0
3878	ldx		[%i0 + 8], %o1
3879	ldx		[%i0 + 16], %o2
3880	brz,pt		%l0, 5f
3881	ldx		[%i0 + 24], %o3
3882
3883	ldx		[%i0 + 32], %o4
3884	sllx		%o0, %l0, %o0
3885	srlx		%o1, %l1, %g1
3886	or		%g1, %o0, %o0
3887	sllx		%o1, %l0, %o1
3888	srlx		%o2, %l1, %g1
3889	or		%g1, %o1, %o1
3890	sllx		%o2, %l0, %o2
3891	srlx		%o3, %l1, %g1
3892	or		%g1, %o2, %o2
3893	sllx		%o3, %l0, %o3
3894	srlx		%o4, %l1, %o4
3895	or		%o4, %o3, %o3
38965:
3897	xor		%g5, %l7, %g1		! ^= rk[0]
3898	add		%l7, 1, %l7
3899	.word	0x85b02301 !movxtod	%g1,%f2
3900	srl		%l7, 0, %l7		! clruw
3901	xor		%g5, %l7, %g1
3902	add		%l7, 1, %l7
3903	.word	0x8db02301 !movxtod	%g1,%f6
3904	srl		%l7, 0, %l7		! clruw
3905	prefetch	[%i0 + 32+63], 20
3906	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
3907	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3908	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
3909	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
3910	call		_aes192_encrypt_2x+16
3911	add		%i0, 32, %i0
3912	subcc		%i2, 2, %i2
3913
3914	.word	0x91b02308 !movxtod	%o0,%f8
3915	.word	0x95b02309 !movxtod	%o1,%f10
3916	.word	0x99b0230a !movxtod	%o2,%f12
3917	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
3918	.word	0x91b0230b !movxtod	%o3,%f8
3919	.word	0x85b28d82 !fxor	%f10,%f2,%f2
3920	.word	0x89b30d84 !fxor	%f12,%f4,%f4
3921	.word	0x8db20d86 !fxor	%f8,%f6,%f6
3922
3923	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3924	add		%i1, 8, %i1
3925	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3926	add		%i1, 8, %i1
3927	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3928	add		%i1, 8, %i1
3929	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3930	bgu,pt		%xcc, .L192_ctr32_blk_loop2x
3931	add		%i1, 8, %i1
3932
3933	add		%l5, %i2, %i2
3934	andcc		%i2, 1, %g0		! is number of blocks even?
3935	membar		#StoreLoad|#StoreStore
3936	bnz,pt		%icc, .L192_ctr32_loop
3937	srl		%i2, 0, %i2
3938	brnz,pn		%i2, .L192_ctr32_loop2x
3939	nop
3940
3941	ret
3942	restore
3943.type	aes192_t4_ctr32_encrypt,#function
3944.size	aes192_t4_ctr32_encrypt,.-aes192_t4_ctr32_encrypt
3945.globl	aes192_t4_cbc_decrypt
3946.align	32
3947aes192_t4_cbc_decrypt:
3948	save		%sp, -192, %sp
3949	cmp		%i2, 0
3950	be,pn		%xcc, .L192_cbc_dec_abort
3951	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3952	sub		%i0, %i1, %l5	! %i0!=%i1
3953	ld		[%i4 + 0], %f12	! load ivec
3954	ld		[%i4 + 4], %f13
3955	ld		[%i4 + 8], %f14
3956	ld		[%i4 + 12], %f15
3957	prefetch	[%i0], 20
3958	prefetch	[%i0 + 63], 20
3959	call		_aes192_load_deckey
3960	and		%i0, 7, %l0
3961	andn		%i0, 7, %i0
3962	sll		%l0, 3, %l0
3963	mov		64, %l1
3964	mov		0xff, %l3
3965	sub		%l1, %l0, %l1
3966	and		%i1, 7, %l2
3967	cmp		%i2, 255
3968	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3969	movleu		%xcc, 0, %l5	!	%i2<256 ||
3970	brnz,pn		%l5, .L192cbc_dec_blk	!	%i0==%i1)
3971	srl		%l3, %l2, %l3
3972
3973	andcc		%i2, 16, %g0		! is number of blocks even?
3974	srlx		%i2, 4, %i2
3975	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3976	bz		%icc, .L192_cbc_dec_loop2x
3977	prefetch	[%i1], 22
3978.L192_cbc_dec_loop:
3979	ldx		[%i0 + 0], %o0
3980	brz,pt		%l0, 4f
3981	ldx		[%i0 + 8], %o1
3982
3983	ldx		[%i0 + 16], %o2
3984	sllx		%o0, %l0, %o0
3985	srlx		%o1, %l1, %g1
3986	sllx		%o1, %l0, %o1
3987	or		%g1, %o0, %o0
3988	srlx		%o2, %l1, %o2
3989	or		%o2, %o1, %o1
39904:
3991	xor		%g4, %o0, %o2		! ^= rk[0]
3992	xor		%g5, %o1, %o3
3993	.word	0x81b0230a !movxtod	%o2,%f0
3994	.word	0x85b0230b !movxtod	%o3,%f2
3995
3996	prefetch	[%i1 + 63], 22
3997	prefetch	[%i0 + 16+63], 20
3998	call		_aes192_decrypt_1x
3999	add		%i0, 16, %i0
4000
4001	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4002	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4003	.word	0x99b02308 !movxtod	%o0,%f12
4004	.word	0x9db02309 !movxtod	%o1,%f14
4005
4006	brnz,pn		%l2, 2f
4007	sub		%i2, 1, %i2
4008
4009	std		%f0, [%i1 + 0]
4010	std		%f2, [%i1 + 8]
4011	brnz,pt		%i2, .L192_cbc_dec_loop2x
4012	add		%i1, 16, %i1
4013	st		%f12, [%i4 + 0]
4014	st		%f13, [%i4 + 4]
4015	st		%f14, [%i4 + 8]
4016	st		%f15, [%i4 + 12]
4017.L192_cbc_dec_abort:
4018	ret
4019	restore
4020
4021.align	16
40222:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4023						! and ~3x deterioration
4024						! in inp==out case
4025	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
4026	.word	0x8db00902 !faligndata	%f0,%f2,%f6
4027	.word	0x91b08902 !faligndata	%f2,%f2,%f8
4028
4029	stda		%f4, [%i1 + %l3]0xc0	! partial store
4030	std		%f6, [%i1 + 8]
4031	add		%i1, 16, %i1
4032	orn		%g0, %l3, %l3
4033	stda		%f8, [%i1 + %l3]0xc0	! partial store
4034
4035	brnz,pt		%i2, .L192_cbc_dec_loop2x+4
4036	orn		%g0, %l3, %l3
4037	st		%f12, [%i4 + 0]
4038	st		%f13, [%i4 + 4]
4039	st		%f14, [%i4 + 8]
4040	st		%f15, [%i4 + 12]
4041	ret
4042	restore
4043
4044!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4045.align	32
4046.L192_cbc_dec_loop2x:
4047	ldx		[%i0 + 0], %o0
4048	ldx		[%i0 + 8], %o1
4049	ldx		[%i0 + 16], %o2
4050	brz,pt		%l0, 4f
4051	ldx		[%i0 + 24], %o3
4052
4053	ldx		[%i0 + 32], %o4
4054	sllx		%o0, %l0, %o0
4055	srlx		%o1, %l1, %g1
4056	or		%g1, %o0, %o0
4057	sllx		%o1, %l0, %o1
4058	srlx		%o2, %l1, %g1
4059	or		%g1, %o1, %o1
4060	sllx		%o2, %l0, %o2
4061	srlx		%o3, %l1, %g1
4062	or		%g1, %o2, %o2
4063	sllx		%o3, %l0, %o3
4064	srlx		%o4, %l1, %o4
4065	or		%o4, %o3, %o3
40664:
4067	xor		%g4, %o0, %o4		! ^= rk[0]
4068	xor		%g5, %o1, %o5
4069	.word	0x81b0230c !movxtod	%o4,%f0
4070	.word	0x85b0230d !movxtod	%o5,%f2
4071	xor		%g4, %o2, %o4
4072	xor		%g5, %o3, %o5
4073	.word	0x89b0230c !movxtod	%o4,%f4
4074	.word	0x8db0230d !movxtod	%o5,%f6
4075
4076	prefetch	[%i1 + 63], 22
4077	prefetch	[%i0 + 32+63], 20
4078	call		_aes192_decrypt_2x
4079	add		%i0, 32, %i0
4080
4081	.word	0x91b02308 !movxtod	%o0,%f8
4082	.word	0x95b02309 !movxtod	%o1,%f10
4083	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4084	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4085	.word	0x99b0230a !movxtod	%o2,%f12
4086	.word	0x9db0230b !movxtod	%o3,%f14
4087	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4088	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4089
4090	brnz,pn		%l2, 2f
4091	sub		%i2, 2, %i2
4092
4093	std		%f0, [%i1 + 0]
4094	std		%f2, [%i1 + 8]
4095	std		%f4, [%i1 + 16]
4096	std		%f6, [%i1 + 24]
4097	brnz,pt		%i2, .L192_cbc_dec_loop2x
4098	add		%i1, 32, %i1
4099	st		%f12, [%i4 + 0]
4100	st		%f13, [%i4 + 4]
4101	st		%f14, [%i4 + 8]
4102	st		%f15, [%i4 + 12]
4103	ret
4104	restore
4105
4106.align	16
41072:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4108						! and ~3x deterioration
4109						! in inp==out case
4110	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
4111	.word	0x81b00902 !faligndata	%f0,%f2,%f0
4112	.word	0x85b08904 !faligndata	%f2,%f4,%f2
4113	.word	0x89b10906 !faligndata	%f4,%f6,%f4
4114	.word	0x8db18906 !faligndata	%f6,%f6,%f6
4115	stda		%f8, [%i1 + %l3]0xc0	! partial store
4116	std		%f0, [%i1 + 8]
4117	std		%f2, [%i1 + 16]
4118	std		%f4, [%i1 + 24]
4119	add		%i1, 32, %i1
4120	orn		%g0, %l3, %l3
4121	stda		%f6, [%i1 + %l3]0xc0	! partial store
4122
4123	brnz,pt		%i2, .L192_cbc_dec_loop2x+4
4124	orn		%g0, %l3, %l3
4125	st		%f12, [%i4 + 0]
4126	st		%f13, [%i4 + 4]
4127	st		%f14, [%i4 + 8]
4128	st		%f15, [%i4 + 12]
4129	ret
4130	restore
4131
4132!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4133.align	32
4134.L192cbc_dec_blk:
4135	add	%i1, %i2, %l5
4136	and	%l5, 63, %l5	! tail
4137	sub	%i2, %l5, %i2
4138	add	%l5, 15, %l5	! round up to 16n
4139	srlx	%i2, 4, %i2
4140	srl	%l5, 4, %l5
4141	sub	%i2, 1, %i2
4142	add	%l5, 1, %l5
4143
4144.L192_cbc_dec_blk_loop2x:
4145	ldx		[%i0 + 0], %o0
4146	ldx		[%i0 + 8], %o1
4147	ldx		[%i0 + 16], %o2
4148	brz,pt		%l0, 5f
4149	ldx		[%i0 + 24], %o3
4150
4151	ldx		[%i0 + 32], %o4
4152	sllx		%o0, %l0, %o0
4153	srlx		%o1, %l1, %g1
4154	or		%g1, %o0, %o0
4155	sllx		%o1, %l0, %o1
4156	srlx		%o2, %l1, %g1
4157	or		%g1, %o1, %o1
4158	sllx		%o2, %l0, %o2
4159	srlx		%o3, %l1, %g1
4160	or		%g1, %o2, %o2
4161	sllx		%o3, %l0, %o3
4162	srlx		%o4, %l1, %o4
4163	or		%o4, %o3, %o3
41645:
4165	xor		%g4, %o0, %o4		! ^= rk[0]
4166	xor		%g5, %o1, %o5
4167	.word	0x81b0230c !movxtod	%o4,%f0
4168	.word	0x85b0230d !movxtod	%o5,%f2
4169	xor		%g4, %o2, %o4
4170	xor		%g5, %o3, %o5
4171	.word	0x89b0230c !movxtod	%o4,%f4
4172	.word	0x8db0230d !movxtod	%o5,%f6
4173
4174	prefetch	[%i0 + 32+63], 20
4175	call		_aes192_decrypt_2x
4176	add		%i0, 32, %i0
4177	subcc		%i2, 2, %i2
4178
4179	.word	0x91b02308 !movxtod	%o0,%f8
4180	.word	0x95b02309 !movxtod	%o1,%f10
4181	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4182	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4183	.word	0x99b0230a !movxtod	%o2,%f12
4184	.word	0x9db0230b !movxtod	%o3,%f14
4185	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4186	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4187
4188	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4189	add		%i1, 8, %i1
4190	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4191	add		%i1, 8, %i1
4192	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4193	add		%i1, 8, %i1
4194	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4195	bgu,pt		%xcc, .L192_cbc_dec_blk_loop2x
4196	add		%i1, 8, %i1
4197
4198	add		%l5, %i2, %i2
4199	andcc		%i2, 1, %g0		! is number of blocks even?
4200	membar		#StoreLoad|#StoreStore
4201	bnz,pt		%icc, .L192_cbc_dec_loop
4202	srl		%i2, 0, %i2
4203	brnz,pn		%i2, .L192_cbc_dec_loop2x
4204	nop
4205	st		%f12, [%i4 + 0]	! write out ivec
4206	st		%f13, [%i4 + 4]
4207	st		%f14, [%i4 + 8]
4208	st		%f15, [%i4 + 12]
4209	ret
4210	restore
4211.type	aes192_t4_cbc_decrypt,#function
4212.size	aes192_t4_cbc_decrypt,.-aes192_t4_cbc_decrypt
4213.globl	aes256_t4_cbc_decrypt
4214.align	32
4215aes256_t4_cbc_decrypt:
4216	save		%sp, -192, %sp
4217	cmp		%i2, 0
4218	be,pn		%xcc, .L256_cbc_dec_abort
4219	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
4220	sub		%i0, %i1, %l5	! %i0!=%i1
4221	ld		[%i4 + 0], %f12	! load ivec
4222	ld		[%i4 + 4], %f13
4223	ld		[%i4 + 8], %f14
4224	ld		[%i4 + 12], %f15
4225	prefetch	[%i0], 20
4226	prefetch	[%i0 + 63], 20
4227	call		_aes256_load_deckey
4228	and		%i0, 7, %l0
4229	andn		%i0, 7, %i0
4230	sll		%l0, 3, %l0
4231	mov		64, %l1
4232	mov		0xff, %l3
4233	sub		%l1, %l0, %l1
4234	and		%i1, 7, %l2
4235	cmp		%i2, 255
4236	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
4237	movleu		%xcc, 0, %l5	!	%i2<256 ||
4238	brnz,pn		%l5, .L256cbc_dec_blk	!	%i0==%i1)
4239	srl		%l3, %l2, %l3
4240
4241	andcc		%i2, 16, %g0		! is number of blocks even?
4242	srlx		%i2, 4, %i2
4243	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
4244	bz		%icc, .L256_cbc_dec_loop2x
4245	prefetch	[%i1], 22
4246.L256_cbc_dec_loop:
4247	ldx		[%i0 + 0], %o0
4248	brz,pt		%l0, 4f
4249	ldx		[%i0 + 8], %o1
4250
4251	ldx		[%i0 + 16], %o2
4252	sllx		%o0, %l0, %o0
4253	srlx		%o1, %l1, %g1
4254	sllx		%o1, %l0, %o1
4255	or		%g1, %o0, %o0
4256	srlx		%o2, %l1, %o2
4257	or		%o2, %o1, %o1
42584:
4259	xor		%g4, %o0, %o2		! ^= rk[0]
4260	xor		%g5, %o1, %o3
4261	.word	0x81b0230a !movxtod	%o2,%f0
4262	.word	0x85b0230b !movxtod	%o3,%f2
4263
4264	prefetch	[%i1 + 63], 22
4265	prefetch	[%i0 + 16+63], 20
4266	call		_aes256_decrypt_1x
4267	add		%i0, 16, %i0
4268
4269	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4270	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4271	.word	0x99b02308 !movxtod	%o0,%f12
4272	.word	0x9db02309 !movxtod	%o1,%f14
4273
4274	brnz,pn		%l2, 2f
4275	sub		%i2, 1, %i2
4276
4277	std		%f0, [%i1 + 0]
4278	std		%f2, [%i1 + 8]
4279	brnz,pt		%i2, .L256_cbc_dec_loop2x
4280	add		%i1, 16, %i1
4281	st		%f12, [%i4 + 0]
4282	st		%f13, [%i4 + 4]
4283	st		%f14, [%i4 + 8]
4284	st		%f15, [%i4 + 12]
4285.L256_cbc_dec_abort:
4286	ret
4287	restore
4288
4289.align	16
42902:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4291						! and ~3x deterioration
4292						! in inp==out case
4293	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
4294	.word	0x8db00902 !faligndata	%f0,%f2,%f6
4295	.word	0x91b08902 !faligndata	%f2,%f2,%f8
4296
4297	stda		%f4, [%i1 + %l3]0xc0	! partial store
4298	std		%f6, [%i1 + 8]
4299	add		%i1, 16, %i1
4300	orn		%g0, %l3, %l3
4301	stda		%f8, [%i1 + %l3]0xc0	! partial store
4302
4303	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
4304	orn		%g0, %l3, %l3
4305	st		%f12, [%i4 + 0]
4306	st		%f13, [%i4 + 4]
4307	st		%f14, [%i4 + 8]
4308	st		%f15, [%i4 + 12]
4309	ret
4310	restore
4311
4312!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4313.align	32
4314.L256_cbc_dec_loop2x:
4315	ldx		[%i0 + 0], %o0
4316	ldx		[%i0 + 8], %o1
4317	ldx		[%i0 + 16], %o2
4318	brz,pt		%l0, 4f
4319	ldx		[%i0 + 24], %o3
4320
4321	ldx		[%i0 + 32], %o4
4322	sllx		%o0, %l0, %o0
4323	srlx		%o1, %l1, %g1
4324	or		%g1, %o0, %o0
4325	sllx		%o1, %l0, %o1
4326	srlx		%o2, %l1, %g1
4327	or		%g1, %o1, %o1
4328	sllx		%o2, %l0, %o2
4329	srlx		%o3, %l1, %g1
4330	or		%g1, %o2, %o2
4331	sllx		%o3, %l0, %o3
4332	srlx		%o4, %l1, %o4
4333	or		%o4, %o3, %o3
43344:
4335	xor		%g4, %o0, %o4		! ^= rk[0]
4336	xor		%g5, %o1, %o5
4337	.word	0x81b0230c !movxtod	%o4,%f0
4338	.word	0x85b0230d !movxtod	%o5,%f2
4339	xor		%g4, %o2, %o4
4340	xor		%g5, %o3, %o5
4341	.word	0x89b0230c !movxtod	%o4,%f4
4342	.word	0x8db0230d !movxtod	%o5,%f6
4343
4344	prefetch	[%i1 + 63], 22
4345	prefetch	[%i0 + 32+63], 20
4346	call		_aes256_decrypt_2x
4347	add		%i0, 32, %i0
4348
4349	.word	0x91b02308 !movxtod	%o0,%f8
4350	.word	0x95b02309 !movxtod	%o1,%f10
4351	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4352	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4353	.word	0x99b0230a !movxtod	%o2,%f12
4354	.word	0x9db0230b !movxtod	%o3,%f14
4355	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4356	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4357
4358	brnz,pn		%l2, 2f
4359	sub		%i2, 2, %i2
4360
4361	std		%f0, [%i1 + 0]
4362	std		%f2, [%i1 + 8]
4363	std		%f4, [%i1 + 16]
4364	std		%f6, [%i1 + 24]
4365	brnz,pt		%i2, .L256_cbc_dec_loop2x
4366	add		%i1, 32, %i1
4367	st		%f12, [%i4 + 0]
4368	st		%f13, [%i4 + 4]
4369	st		%f14, [%i4 + 8]
4370	st		%f15, [%i4 + 12]
4371	ret
4372	restore
4373
4374.align	16
43752:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4376						! and ~3x deterioration
4377						! in inp==out case
4378	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
4379	.word	0x81b00902 !faligndata	%f0,%f2,%f0
4380	.word	0x85b08904 !faligndata	%f2,%f4,%f2
4381	.word	0x89b10906 !faligndata	%f4,%f6,%f4
4382	.word	0x8db18906 !faligndata	%f6,%f6,%f6
4383	stda		%f8, [%i1 + %l3]0xc0	! partial store
4384	std		%f0, [%i1 + 8]
4385	std		%f2, [%i1 + 16]
4386	std		%f4, [%i1 + 24]
4387	add		%i1, 32, %i1
4388	orn		%g0, %l3, %l3
4389	stda		%f6, [%i1 + %l3]0xc0	! partial store
4390
4391	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
4392	orn		%g0, %l3, %l3
4393	st		%f12, [%i4 + 0]
4394	st		%f13, [%i4 + 4]
4395	st		%f14, [%i4 + 8]
4396	st		%f15, [%i4 + 12]
4397	ret
4398	restore
4399
4400!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4401.align	32
4402.L256cbc_dec_blk:
4403	add	%i1, %i2, %l5
4404	and	%l5, 63, %l5	! tail
4405	sub	%i2, %l5, %i2
4406	add	%l5, 15, %l5	! round up to 16n
4407	srlx	%i2, 4, %i2
4408	srl	%l5, 4, %l5
4409	sub	%i2, 1, %i2
4410	add	%l5, 1, %l5
4411
4412.L256_cbc_dec_blk_loop2x:
4413	ldx		[%i0 + 0], %o0
4414	ldx		[%i0 + 8], %o1
4415	ldx		[%i0 + 16], %o2
4416	brz,pt		%l0, 5f
4417	ldx		[%i0 + 24], %o3
4418
4419	ldx		[%i0 + 32], %o4
4420	sllx		%o0, %l0, %o0
4421	srlx		%o1, %l1, %g1
4422	or		%g1, %o0, %o0
4423	sllx		%o1, %l0, %o1
4424	srlx		%o2, %l1, %g1
4425	or		%g1, %o1, %o1
4426	sllx		%o2, %l0, %o2
4427	srlx		%o3, %l1, %g1
4428	or		%g1, %o2, %o2
4429	sllx		%o3, %l0, %o3
4430	srlx		%o4, %l1, %o4
4431	or		%o4, %o3, %o3
44325:
4433	xor		%g4, %o0, %o4		! ^= rk[0]
4434	xor		%g5, %o1, %o5
4435	.word	0x81b0230c !movxtod	%o4,%f0
4436	.word	0x85b0230d !movxtod	%o5,%f2
4437	xor		%g4, %o2, %o4
4438	xor		%g5, %o3, %o5
4439	.word	0x89b0230c !movxtod	%o4,%f4
4440	.word	0x8db0230d !movxtod	%o5,%f6
4441
4442	prefetch	[%i0 + 32+63], 20
4443	call		_aes256_decrypt_2x
4444	add		%i0, 32, %i0
4445	subcc		%i2, 2, %i2
4446
4447	.word	0x91b02308 !movxtod	%o0,%f8
4448	.word	0x95b02309 !movxtod	%o1,%f10
4449	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4450	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4451	.word	0x99b0230a !movxtod	%o2,%f12
4452	.word	0x9db0230b !movxtod	%o3,%f14
4453	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4454	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4455
4456	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4457	add		%i1, 8, %i1
4458	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4459	add		%i1, 8, %i1
4460	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4461	add		%i1, 8, %i1
4462	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4463	bgu,pt		%xcc, .L256_cbc_dec_blk_loop2x
4464	add		%i1, 8, %i1
4465
4466	add		%l5, %i2, %i2
4467	andcc		%i2, 1, %g0		! is number of blocks even?
4468	membar		#StoreLoad|#StoreStore
4469	bnz,pt		%icc, .L256_cbc_dec_loop
4470	srl		%i2, 0, %i2
4471	brnz,pn		%i2, .L256_cbc_dec_loop2x
4472	nop
4473	st		%f12, [%i4 + 0]	! write out ivec
4474	st		%f13, [%i4 + 4]
4475	st		%f14, [%i4 + 8]
4476	st		%f15, [%i4 + 12]
4477	ret
4478	restore
4479.type	aes256_t4_cbc_decrypt,#function
4480.size	aes256_t4_cbc_decrypt,.-aes256_t4_cbc_decrypt
4481.align	32
4482_aes256_decrypt_1x:
4483	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4484	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4485	ldd		[%i3 + 208], %f16
4486	ldd		[%i3 + 216], %f18
4487	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
4488	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
4489	ldd		[%i3 + 224], %f20
4490	ldd		[%i3 + 232], %f22
4491	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
4492	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4493	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
4494	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
4495	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
4496	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4497	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
4498	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
4499	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
4500	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4501	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
4502	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
4503	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
4504	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4505	.word	0x80cd4444 !aes_dround01	%f52,%f4,%f2,%f0
4506	.word	0x84cdc464 !aes_dround23	%f54,%f4,%f2,%f2
4507	.word	0x88ce4440 !aes_dround01	%f56,%f0,%f2,%f4
4508	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4509	.word	0x80cf4444 !aes_dround01	%f60,%f4,%f2,%f0
4510	.word	0x84cfc464 !aes_dround23	%f62,%f4,%f2,%f2
4511	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4512	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4513	ldd		[%i3 + 16], %f16
4514	ldd		[%i3 + 24], %f18
4515	.word	0x80cd04c4 !aes_dround01_l	%f20,%f4,%f2,%f0
4516	.word	0x84cd84e4 !aes_dround23_l	%f22,%f4,%f2,%f2
4517	ldd		[%i3 + 32], %f20
4518	retl
4519	ldd		[%i3 + 40], %f22
4520.type	_aes256_decrypt_1x,#function
4521.size	_aes256_decrypt_1x,.-_aes256_decrypt_1x
4522
4523.align	32
4524_aes256_decrypt_2x:
4525	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4526	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4527	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4528	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4529	ldd		[%i3 + 208], %f16
4530	ldd		[%i3 + 216], %f18
4531	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
4532	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
4533	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
4534	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
4535	ldd		[%i3 + 224], %f20
4536	ldd		[%i3 + 232], %f22
4537	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
4538	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4539	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
4540	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
4541	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
4542	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
4543	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
4544	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
4545	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
4546	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4547	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
4548	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
4549	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
4550	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
4551	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
4552	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
4553	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
4554	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4555	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
4556	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
4557	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
4558	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
4559	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
4560	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
4561	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
4562	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4563	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
4564	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
4565	.word	0x80cd4448 !aes_dround01	%f52,%f8,%f2,%f0
4566	.word	0x84cdc468 !aes_dround23	%f54,%f8,%f2,%f2
4567	.word	0x88cd4c4a !aes_dround01	%f52,%f10,%f6,%f4
4568	.word	0x8ccdcc6a !aes_dround23	%f54,%f10,%f6,%f6
4569	.word	0x90ce4440 !aes_dround01	%f56,%f0,%f2,%f8
4570	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4571	.word	0x94ce4c44 !aes_dround01	%f56,%f4,%f6,%f10
4572	.word	0x8ccecc64 !aes_dround23	%f58,%f4,%f6,%f6
4573	.word	0x80cf4448 !aes_dround01	%f60,%f8,%f2,%f0
4574	.word	0x84cfc468 !aes_dround23	%f62,%f8,%f2,%f2
4575	.word	0x88cf4c4a !aes_dround01	%f60,%f10,%f6,%f4
4576	.word	0x8ccfcc6a !aes_dround23	%f62,%f10,%f6,%f6
4577	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4578	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4579	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4580	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4581	ldd		[%i3 + 16], %f16
4582	ldd		[%i3 + 24], %f18
4583	.word	0x80cd04c8 !aes_dround01_l	%f20,%f8,%f2,%f0
4584	.word	0x84cd84e8 !aes_dround23_l	%f22,%f8,%f2,%f2
4585	.word	0x88cd0cca !aes_dround01_l	%f20,%f10,%f6,%f4
4586	.word	0x8ccd8cea !aes_dround23_l	%f22,%f10,%f6,%f6
4587	ldd		[%i3 + 32], %f20
4588	retl
4589	ldd		[%i3 + 40], %f22
4590.type	_aes256_decrypt_2x,#function
4591.size	_aes256_decrypt_2x,.-_aes256_decrypt_2x
4592
4593.align	32
4594_aes192_decrypt_1x:
4595	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4596	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4597	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
4598	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
4599	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
4600	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4601	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
4602	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
4603	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
4604	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4605	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
4606	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
4607	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
4608	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4609	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
4610	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
4611	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
4612	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4613	.word	0x80cd4444 !aes_dround01	%f52,%f4,%f2,%f0
4614	.word	0x84cdc464 !aes_dround23	%f54,%f4,%f2,%f2
4615	.word	0x88ce4440 !aes_dround01	%f56,%f0,%f2,%f4
4616	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4617	.word	0x80cf44c4 !aes_dround01_l	%f60,%f4,%f2,%f0
4618	retl
4619	.word	0x84cfc4e4 !aes_dround23_l	%f62,%f4,%f2,%f2
4620.type	_aes192_decrypt_1x,#function
4621.size	_aes192_decrypt_1x,.-_aes192_decrypt_1x
4622
4623.align	32
4624_aes192_decrypt_2x:
4625	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4626	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4627	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4628	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4629	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
4630	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
4631	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
4632	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
4633	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
4634	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4635	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
4636	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
4637	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
4638	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
4639	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
4640	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
4641	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
4642	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4643	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
4644	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
4645	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
4646	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
4647	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
4648	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
4649	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
4650	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4651	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
4652	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
4653	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
4654	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
4655	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
4656	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
4657	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
4658	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4659	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
4660	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
4661	.word	0x80cd4448 !aes_dround01	%f52,%f8,%f2,%f0
4662	.word	0x84cdc468 !aes_dround23	%f54,%f8,%f2,%f2
4663	.word	0x88cd4c4a !aes_dround01	%f52,%f10,%f6,%f4
4664	.word	0x8ccdcc6a !aes_dround23	%f54,%f10,%f6,%f6
4665	.word	0x90ce4440 !aes_dround01	%f56,%f0,%f2,%f8
4666	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4667	.word	0x94ce4c44 !aes_dround01	%f56,%f4,%f6,%f10
4668	.word	0x8ccecc64 !aes_dround23	%f58,%f4,%f6,%f6
4669	.word	0x80cf44c8 !aes_dround01_l	%f60,%f8,%f2,%f0
4670	.word	0x84cfc4e8 !aes_dround23_l	%f62,%f8,%f2,%f2
4671	.word	0x88cf4cca !aes_dround01_l	%f60,%f10,%f6,%f4
4672	retl
4673	.word	0x8ccfccea !aes_dround23_l	%f62,%f10,%f6,%f6
4674.type	_aes192_decrypt_2x,#function
4675.size	_aes192_decrypt_2x,.-_aes192_decrypt_2x
4676.asciz	"AES for SPARC T4, David S. Miller, Andy Polyakov"
4677.align	4
4678