xref: /netbsd-src/crypto/external/bsd/openssl.old/lib/libcrypto/arch/sparc/aest4-sparcv9.S (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1.text
2
3.globl	aes_t4_encrypt
4.align	32
5aes_t4_encrypt:
6	andcc		%o0, 7, %g1		! is input aligned?
7	andn		%o0, 7, %o0
8
9	ldx		[%o2 + 0], %g4
10	ldx		[%o2 + 8], %g5
11
12	ldx		[%o0 + 0], %o4
13	bz,pt		%icc, 1f
14	ldx		[%o0 + 8], %o5
15	ldx		[%o0 + 16], %o0
16	sll		%g1, 3, %g1
17	sub		%g0, %g1, %o3
18	sllx		%o4, %g1, %o4
19	sllx		%o5, %g1, %g1
20	srlx		%o5, %o3, %o5
21	srlx		%o0, %o3, %o3
22	or		%o5, %o4, %o4
23	or		%o3, %g1, %o5
241:
25	ld		[%o2 + 240], %o3
26	ldd		[%o2 + 16], %f12
27	ldd		[%o2 + 24], %f14
28	xor		%g4, %o4, %o4
29	xor		%g5, %o5, %o5
30	.word	0x81b0230c !movxtod	%o4,%f0
31	.word	0x85b0230d !movxtod	%o5,%f2
32	srl		%o3, 1, %o3
33	ldd		[%o2 + 32], %f16
34	sub		%o3, 1, %o3
35	ldd		[%o2 + 40], %f18
36	add		%o2, 48, %o2
37
38.Lenc:
39	.word	0x88cb0400 !aes_eround01	%f12,%f0,%f2,%f4
40	.word	0x84cb8420 !aes_eround23	%f14,%f0,%f2,%f2
41	ldd		[%o2 + 0], %f12
42	ldd		[%o2 + 8], %f14
43	sub		%o3,1,%o3
44	.word	0x80cc0404 !aes_eround01	%f16,%f4,%f2,%f0
45	.word	0x84cc8424 !aes_eround23	%f18,%f4,%f2,%f2
46	ldd		[%o2 + 16], %f16
47	ldd		[%o2 + 24], %f18
48	brnz,pt		%o3, .Lenc
49	add		%o2, 32, %o2
50
51	andcc		%o1, 7, %o4		! is output aligned?
52	.word	0x88cb0400 !aes_eround01	%f12,%f0,%f2,%f4
53	.word	0x84cb8420 !aes_eround23	%f14,%f0,%f2,%f2
54	.word	0x80cc0484 !aes_eround01_l	%f16,%f4,%f2,%f0
55	.word	0x84cc84a4 !aes_eround23_l	%f18,%f4,%f2,%f2
56
57	bnz,pn		%icc, 2f
58	nop
59
60	std		%f0, [%o1 + 0]
61	retl
62	std		%f2, [%o1 + 8]
63
642:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
65	mov		0xff, %o5
66	srl		%o5, %o4, %o5
67
68	.word	0x89b00900 !faligndata	%f0,%f0,%f4
69	.word	0x8db00902 !faligndata	%f0,%f2,%f6
70	.word	0x91b08902 !faligndata	%f2,%f2,%f8
71
72	stda		%f4, [%o1 + %o5]0xc0	! partial store
73	std		%f6, [%o1 + 8]
74	add		%o1, 16, %o1
75	orn		%g0, %o5, %o5
76	retl
77	stda		%f8, [%o1 + %o5]0xc0	! partial store
78.type	aes_t4_encrypt,#function
79.size	aes_t4_encrypt,.-aes_t4_encrypt
80
81.globl	aes_t4_decrypt
82.align	32
83aes_t4_decrypt:
84	andcc		%o0, 7, %g1		! is input aligned?
85	andn		%o0, 7, %o0
86
87	ldx		[%o2 + 0], %g4
88	ldx		[%o2 + 8], %g5
89
90	ldx		[%o0 + 0], %o4
91	bz,pt		%icc, 1f
92	ldx		[%o0 + 8], %o5
93	ldx		[%o0 + 16], %o0
94	sll		%g1, 3, %g1
95	sub		%g0, %g1, %o3
96	sllx		%o4, %g1, %o4
97	sllx		%o5, %g1, %g1
98	srlx		%o5, %o3, %o5
99	srlx		%o0, %o3, %o3
100	or		%o5, %o4, %o4
101	or		%o3, %g1, %o5
1021:
103	ld		[%o2 + 240], %o3
104	ldd		[%o2 + 16], %f12
105	ldd		[%o2 + 24], %f14
106	xor		%g4, %o4, %o4
107	xor		%g5, %o5, %o5
108	.word	0x81b0230c !movxtod	%o4,%f0
109	.word	0x85b0230d !movxtod	%o5,%f2
110	srl		%o3, 1, %o3
111	ldd		[%o2 + 32], %f16
112	sub		%o3, 1, %o3
113	ldd		[%o2 + 40], %f18
114	add		%o2, 48, %o2
115
116.Ldec:
117	.word	0x88cb0440 !aes_dround01	%f12,%f0,%f2,%f4
118	.word	0x84cb8460 !aes_dround23	%f14,%f0,%f2,%f2
119	ldd		[%o2 + 0], %f12
120	ldd		[%o2 + 8], %f14
121	sub		%o3,1,%o3
122	.word	0x80cc0444 !aes_dround01	%f16,%f4,%f2,%f0
123	.word	0x84cc8464 !aes_dround23	%f18,%f4,%f2,%f2
124	ldd		[%o2 + 16], %f16
125	ldd		[%o2 + 24], %f18
126	brnz,pt		%o3, .Ldec
127	add		%o2, 32, %o2
128
129	andcc		%o1, 7, %o4		! is output aligned?
130	.word	0x88cb0440 !aes_dround01	%f12,%f0,%f2,%f4
131	.word	0x84cb8460 !aes_dround23	%f14,%f0,%f2,%f2
132	.word	0x80cc04c4 !aes_dround01_l	%f16,%f4,%f2,%f0
133	.word	0x84cc84e4 !aes_dround23_l	%f18,%f4,%f2,%f2
134
135	bnz,pn		%icc, 2f
136	nop
137
138	std		%f0, [%o1 + 0]
139	retl
140	std		%f2, [%o1 + 8]
141
1422:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
143	mov		0xff, %o5
144	srl		%o5, %o4, %o5
145
146	.word	0x89b00900 !faligndata	%f0,%f0,%f4
147	.word	0x8db00902 !faligndata	%f0,%f2,%f6
148	.word	0x91b08902 !faligndata	%f2,%f2,%f8
149
150	stda		%f4, [%o1 + %o5]0xc0	! partial store
151	std		%f6, [%o1 + 8]
152	add		%o1, 16, %o1
153	orn		%g0, %o5, %o5
154	retl
155	stda		%f8, [%o1 + %o5]0xc0	! partial store
156.type	aes_t4_decrypt,#function
157.size	aes_t4_decrypt,.-aes_t4_decrypt
158.globl	aes_t4_set_encrypt_key
159.align	32
160aes_t4_set_encrypt_key:
161.Lset_encrypt_key:
162	and		%o0, 7, %o3
163	.word	0x91b20300 !alignaddr	%o0,%g0,%o0
164	cmp		%o1, 192
165	ldd		[%o0 + 0], %f0
166	bl,pt		%icc,.L128
167	ldd		[%o0 + 8], %f2
168
169	be,pt		%icc,.L192
170	ldd		[%o0 + 16], %f4
171	brz,pt		%o3, .L256aligned
172	ldd		[%o0 + 24], %f6
173
174	ldd		[%o0 + 32], %f8
175	.word	0x81b00902 !faligndata	%f0,%f2,%f0
176	.word	0x85b08904 !faligndata	%f2,%f4,%f2
177	.word	0x89b10906 !faligndata	%f4,%f6,%f4
178	.word	0x8db18908 !faligndata	%f6,%f8,%f6
179.L256aligned:
180	std		%f0, [%o2 + 0]
181	.word	0x80c80106 !aes_kexpand1	%f0,%f6,0,%f0
182	std		%f2, [%o2 + 8]
183	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
184	std		%f4, [%o2 + 16]
185	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
186	std		%f6, [%o2 + 24]
187	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
188	std		%f0, [%o2 + 32]
189	.word	0x80c80306 !aes_kexpand1	%f0,%f6,1,%f0
190	std		%f2, [%o2 + 40]
191	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
192	std		%f4, [%o2 + 48]
193	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
194	std		%f6, [%o2 + 56]
195	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
196	std		%f0, [%o2 + 64]
197	.word	0x80c80506 !aes_kexpand1	%f0,%f6,2,%f0
198	std		%f2, [%o2 + 72]
199	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
200	std		%f4, [%o2 + 80]
201	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
202	std		%f6, [%o2 + 88]
203	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
204	std		%f0, [%o2 + 96]
205	.word	0x80c80706 !aes_kexpand1	%f0,%f6,3,%f0
206	std		%f2, [%o2 + 104]
207	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
208	std		%f4, [%o2 + 112]
209	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
210	std		%f6, [%o2 + 120]
211	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
212	std		%f0, [%o2 + 128]
213	.word	0x80c80906 !aes_kexpand1	%f0,%f6,4,%f0
214	std		%f2, [%o2 + 136]
215	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
216	std		%f4, [%o2 + 144]
217	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
218	std		%f6, [%o2 + 152]
219	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
220	std		%f0, [%o2 + 160]
221	.word	0x80c80b06 !aes_kexpand1	%f0,%f6,5,%f0
222	std		%f2, [%o2 + 168]
223	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
224	std		%f4, [%o2 + 176]
225	.word	0x89b12602 !aes_kexpand0	%f4,%f2,%f4
226	std		%f6, [%o2 + 184]
227	.word	0x8db1a624 !aes_kexpand2	%f6,%f4,%f6
228	std		%f0, [%o2 + 192]
229	.word	0x80c80d06 !aes_kexpand1	%f0,%f6,6,%f0
230	std		%f2, [%o2 + 200]
231	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
232	std		%f4, [%o2 + 208]
233	std		%f6, [%o2 + 216]
234	std		%f0, [%o2 + 224]
235	std		%f2, [%o2 + 232]
236
237	mov		14, %o3
238	st		%o3, [%o2 + 240]
239	retl
240	xor		%o0, %o0, %o0
241
242.align	16
243.L192:
244	brz,pt		%o3, .L192aligned
245	nop
246
247	ldd		[%o0 + 24], %f6
248	.word	0x81b00902 !faligndata	%f0,%f2,%f0
249	.word	0x85b08904 !faligndata	%f2,%f4,%f2
250	.word	0x89b10906 !faligndata	%f4,%f6,%f4
251.L192aligned:
252	std		%f0, [%o2 + 0]
253	.word	0x80c80104 !aes_kexpand1	%f0,%f4,0,%f0
254	std		%f2, [%o2 + 8]
255	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
256	std		%f4, [%o2 + 16]
257	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
258	std		%f0, [%o2 + 24]
259	.word	0x80c80304 !aes_kexpand1	%f0,%f4,1,%f0
260	std		%f2, [%o2 + 32]
261	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
262	std		%f4, [%o2 + 40]
263	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
264	std		%f0, [%o2 + 48]
265	.word	0x80c80504 !aes_kexpand1	%f0,%f4,2,%f0
266	std		%f2, [%o2 + 56]
267	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
268	std		%f4, [%o2 + 64]
269	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
270	std		%f0, [%o2 + 72]
271	.word	0x80c80704 !aes_kexpand1	%f0,%f4,3,%f0
272	std		%f2, [%o2 + 80]
273	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
274	std		%f4, [%o2 + 88]
275	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
276	std		%f0, [%o2 + 96]
277	.word	0x80c80904 !aes_kexpand1	%f0,%f4,4,%f0
278	std		%f2, [%o2 + 104]
279	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
280	std		%f4, [%o2 + 112]
281	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
282	std		%f0, [%o2 + 120]
283	.word	0x80c80b04 !aes_kexpand1	%f0,%f4,5,%f0
284	std		%f2, [%o2 + 128]
285	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
286	std		%f4, [%o2 + 136]
287	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
288	std		%f0, [%o2 + 144]
289	.word	0x80c80d04 !aes_kexpand1	%f0,%f4,6,%f0
290	std		%f2, [%o2 + 152]
291	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
292	std		%f4, [%o2 + 160]
293	.word	0x89b12622 !aes_kexpand2	%f4,%f2,%f4
294	std		%f0, [%o2 + 168]
295	.word	0x80c80f04 !aes_kexpand1	%f0,%f4,7,%f0
296	std		%f2, [%o2 + 176]
297	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
298	std		%f4, [%o2 + 184]
299	std		%f0, [%o2 + 192]
300	std		%f2, [%o2 + 200]
301
302	mov		12, %o3
303	st		%o3, [%o2 + 240]
304	retl
305	xor		%o0, %o0, %o0
306
307.align	16
308.L128:
309	brz,pt		%o3, .L128aligned
310	nop
311
312	ldd		[%o0 + 16], %f4
313	.word	0x81b00902 !faligndata	%f0,%f2,%f0
314	.word	0x85b08904 !faligndata	%f2,%f4,%f2
315.L128aligned:
316	std		%f0, [%o2 + 0]
317	.word	0x80c80102 !aes_kexpand1	%f0,%f2,0,%f0
318	std		%f2, [%o2 + 8]
319	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
320	std		%f0, [%o2 + 16]
321	.word	0x80c80302 !aes_kexpand1	%f0,%f2,1,%f0
322	std		%f2, [%o2 + 24]
323	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
324	std		%f0, [%o2 + 32]
325	.word	0x80c80502 !aes_kexpand1	%f0,%f2,2,%f0
326	std		%f2, [%o2 + 40]
327	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
328	std		%f0, [%o2 + 48]
329	.word	0x80c80702 !aes_kexpand1	%f0,%f2,3,%f0
330	std		%f2, [%o2 + 56]
331	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
332	std		%f0, [%o2 + 64]
333	.word	0x80c80902 !aes_kexpand1	%f0,%f2,4,%f0
334	std		%f2, [%o2 + 72]
335	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
336	std		%f0, [%o2 + 80]
337	.word	0x80c80b02 !aes_kexpand1	%f0,%f2,5,%f0
338	std		%f2, [%o2 + 88]
339	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
340	std		%f0, [%o2 + 96]
341	.word	0x80c80d02 !aes_kexpand1	%f0,%f2,6,%f0
342	std		%f2, [%o2 + 104]
343	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
344	std		%f0, [%o2 + 112]
345	.word	0x80c80f02 !aes_kexpand1	%f0,%f2,7,%f0
346	std		%f2, [%o2 + 120]
347	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
348	std		%f0, [%o2 + 128]
349	.word	0x80c81102 !aes_kexpand1	%f0,%f2,8,%f0
350	std		%f2, [%o2 + 136]
351	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
352	std		%f0, [%o2 + 144]
353	.word	0x80c81302 !aes_kexpand1	%f0,%f2,9,%f0
354	std		%f2, [%o2 + 152]
355	.word	0x85b0a620 !aes_kexpand2	%f2,%f0,%f2
356	std		%f0, [%o2 + 160]
357	std		%f2, [%o2 + 168]
358
359	mov		10, %o3
360	st		%o3, [%o2 + 240]
361	retl
362	xor		%o0, %o0, %o0
363.type	aes_t4_set_encrypt_key,#function
364.size	aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
365
366.globl	aes_t4_set_decrypt_key
367.align	32
368aes_t4_set_decrypt_key:
369	mov		%o7, %o5
370	call		.Lset_encrypt_key
371	nop
372
373	mov		%o5, %o7
374	sll		%o3, 4, %o0		! %o3 is number of rounds
375	add		%o3, 2, %o3
376	add		%o2, %o0, %o0	! %o0=%o2+16*rounds
377	srl		%o3, 2, %o3		! %o3=(rounds+2)/4
378
379.Lkey_flip:
380	ldd		[%o2 + 0],  %f0
381	ldd		[%o2 + 8],  %f2
382	ldd		[%o2 + 16], %f4
383	ldd		[%o2 + 24], %f6
384	ldd		[%o0 + 0],  %f8
385	ldd		[%o0 + 8],  %f10
386	ldd		[%o0 - 16], %f12
387	ldd		[%o0 - 8],  %f14
388	sub		%o3, 1, %o3
389	std		%f0, [%o0 + 0]
390	std		%f2, [%o0 + 8]
391	std		%f4, [%o0 - 16]
392	std		%f6, [%o0 - 8]
393	std		%f8, [%o2 + 0]
394	std		%f10, [%o2 + 8]
395	std		%f12, [%o2 + 16]
396	std		%f14, [%o2 + 24]
397	add		%o2, 32, %o2
398	brnz		%o3, .Lkey_flip
399	sub		%o0, 32, %o0
400
401	retl
402	xor		%o0, %o0, %o0
403.type	aes_t4_set_decrypt_key,#function
404.size	aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
405.align	32
406_aes128_encrypt_1x:
407	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
408	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
409	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
410	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
411	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
412	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
413	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
414	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
415	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
416	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
417	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
418	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
419	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
420	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
421	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
422	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
423	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
424	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
425	.word	0x80cd4484 !aes_eround01_l	%f52,%f4,%f2,%f0
426	retl
427	.word	0x84cdc4a4 !aes_eround23_l	%f54,%f4,%f2,%f2
428.type	_aes128_encrypt_1x,#function
429.size	_aes128_encrypt_1x,.-_aes128_encrypt_1x
430
431.align	32
432_aes128_encrypt_2x:
433	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
434	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
435	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
436	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
437	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
438	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
439	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
440	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
441	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
442	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
443	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
444	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
445	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
446	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
447	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
448	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
449	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
450	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
451	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
452	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
453	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
454	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
455	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
456	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
457	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
458	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
459	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
460	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
461	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
462	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
463	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
464	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
465	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
466	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
467	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
468	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
469	.word	0x80cd4488 !aes_eround01_l	%f52,%f8,%f2,%f0
470	.word	0x84cdc4a8 !aes_eround23_l	%f54,%f8,%f2,%f2
471	.word	0x88cd4c8a !aes_eround01_l	%f52,%f10,%f6,%f4
472	retl
473	.word	0x8ccdccaa !aes_eround23_l	%f54,%f10,%f6,%f6
474.type	_aes128_encrypt_2x,#function
475.size	_aes128_encrypt_2x,.-_aes128_encrypt_2x
476
477.align	32
478_aes128_loadkey:
479	ldx		[%i3 + 0], %g4
480	ldx		[%i3 + 8], %g5
481	ldd		[%i3 + 16], %f16
482	ldd		[%i3 + 24], %f18
483	ldd		[%i3 + 32], %f20
484	ldd		[%i3 + 40], %f22
485	ldd		[%i3 + 48], %f24
486	ldd		[%i3 + 56], %f26
487	ldd		[%i3 + 64], %f28
488	ldd		[%i3 + 72], %f30
489	ldd		[%i3 + 80], %f32
490	ldd		[%i3 + 88], %f34
491	ldd		[%i3 + 96], %f36
492	ldd		[%i3 + 104], %f38
493	ldd		[%i3 + 112], %f40
494	ldd		[%i3 + 120], %f42
495	ldd		[%i3 + 128], %f44
496	ldd		[%i3 + 136], %f46
497	ldd		[%i3 + 144], %f48
498	ldd		[%i3 + 152], %f50
499	ldd		[%i3 + 160], %f52
500	ldd		[%i3 + 168], %f54
501	retl
502	nop
503.type	_aes128_loadkey,#function
504.size	_aes128_loadkey,.-_aes128_loadkey
505_aes128_load_enckey=_aes128_loadkey
506_aes128_load_deckey=_aes128_loadkey
507
508.globl	aes128_t4_cbc_encrypt
509.align	32
510aes128_t4_cbc_encrypt:
511	save		%sp, -112, %sp
512	cmp		%i2, 0
513	be,pn		%icc, .L128_cbc_enc_abort
514	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
515	sub		%i0, %i1, %l5	! %i0!=%i1
516	ld		[%i4 + 0], %f0
517	ld		[%i4 + 4], %f1
518	ld		[%i4 + 8], %f2
519	ld		[%i4 + 12], %f3
520	prefetch	[%i0], 20
521	prefetch	[%i0 + 63], 20
522	call		_aes128_load_enckey
523	and		%i0, 7, %l0
524	andn		%i0, 7, %i0
525	sll		%l0, 3, %l0
526	mov		64, %l1
527	mov		0xff, %l3
528	sub		%l1, %l0, %l1
529	and		%i1, 7, %l2
530	cmp		%i2, 127
531	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
532	movleu		%icc, 0, %l5	!	%i2<128 ||
533	brnz,pn		%l5, .L128cbc_enc_blk	!	%i0==%i1)
534	srl		%l3, %l2, %l3
535
536	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
537	srlx		%i2, 4, %i2
538	prefetch	[%i1], 22
539
540.L128_cbc_enc_loop:
541	ldx		[%i0 + 0], %o0
542	brz,pt		%l0, 4f
543	ldx		[%i0 + 8], %o1
544
545	ldx		[%i0 + 16], %o2
546	sllx		%o0, %l0, %o0
547	srlx		%o1, %l1, %g1
548	sllx		%o1, %l0, %o1
549	or		%g1, %o0, %o0
550	srlx		%o2, %l1, %o2
551	or		%o2, %o1, %o1
5524:
553	xor		%g4, %o0, %o0		! ^= rk[0]
554	xor		%g5, %o1, %o1
555	.word	0x99b02308 !movxtod	%o0,%f12
556	.word	0x9db02309 !movxtod	%o1,%f14
557
558	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
559	.word	0x85b38d82 !fxor	%f14,%f2,%f2
560	prefetch	[%i1 + 63], 22
561	prefetch	[%i0 + 16+63], 20
562	call		_aes128_encrypt_1x
563	add		%i0, 16, %i0
564
565	brnz,pn		%l2, 2f
566	sub		%i2, 1, %i2
567
568	std		%f0, [%i1 + 0]
569	std		%f2, [%i1 + 8]
570	brnz,pt		%i2, .L128_cbc_enc_loop
571	add		%i1, 16, %i1
572	st		%f0, [%i4 + 0]
573	st		%f1, [%i4 + 4]
574	st		%f2, [%i4 + 8]
575	st		%f3, [%i4 + 12]
576.L128_cbc_enc_abort:
577	ret
578	restore
579
580.align	16
5812:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
582						! and ~3x deterioration
583						! in inp==out case
584	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
585	.word	0x8db00902 !faligndata	%f0,%f2,%f6
586	.word	0x91b08902 !faligndata	%f2,%f2,%f8
587
588	stda		%f4, [%i1 + %l3]0xc0	! partial store
589	std		%f6, [%i1 + 8]
590	add		%i1, 16, %i1
591	orn		%g0, %l3, %l3
592	stda		%f8, [%i1 + %l3]0xc0	! partial store
593
594	brnz,pt		%i2, .L128_cbc_enc_loop+4
595	orn		%g0, %l3, %l3
596	st		%f0, [%i4 + 0]
597	st		%f1, [%i4 + 4]
598	st		%f2, [%i4 + 8]
599	st		%f3, [%i4 + 12]
600	ret
601	restore
602
603!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
604.align	32
605.L128cbc_enc_blk:
606	add	%i1, %i2, %l5
607	and	%l5, 63, %l5	! tail
608	sub	%i2, %l5, %i2
609	add	%l5, 15, %l5	! round up to 16n
610	srlx	%i2, 4, %i2
611	srl	%l5, 4, %l5
612
613.L128_cbc_enc_blk_loop:
614	ldx		[%i0 + 0], %o0
615	brz,pt		%l0, 5f
616	ldx		[%i0 + 8], %o1
617
618	ldx		[%i0 + 16], %o2
619	sllx		%o0, %l0, %o0
620	srlx		%o1, %l1, %g1
621	sllx		%o1, %l0, %o1
622	or		%g1, %o0, %o0
623	srlx		%o2, %l1, %o2
624	or		%o2, %o1, %o1
6255:
626	xor		%g4, %o0, %o0		! ^= rk[0]
627	xor		%g5, %o1, %o1
628	.word	0x99b02308 !movxtod	%o0,%f12
629	.word	0x9db02309 !movxtod	%o1,%f14
630
631	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
632	.word	0x85b38d82 !fxor	%f14,%f2,%f2
633	prefetch	[%i0 + 16+63], 20
634	call		_aes128_encrypt_1x
635	add		%i0, 16, %i0
636	sub		%i2, 1, %i2
637
638	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
639	add		%i1, 8, %i1
640	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
641	brnz,pt		%i2, .L128_cbc_enc_blk_loop
642	add		%i1, 8, %i1
643
644	membar		#StoreLoad|#StoreStore
645	brnz,pt		%l5, .L128_cbc_enc_loop
646	mov		%l5, %i2
647	st		%f0, [%i4 + 0]
648	st		%f1, [%i4 + 4]
649	st		%f2, [%i4 + 8]
650	st		%f3, [%i4 + 12]
651	ret
652	restore
653.type	aes128_t4_cbc_encrypt,#function
654.size	aes128_t4_cbc_encrypt,.-aes128_t4_cbc_encrypt
655.globl	aes128_t4_ctr32_encrypt
656.align	32
657aes128_t4_ctr32_encrypt:
658	save		%sp, -112, %sp
659	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
660
661	prefetch	[%i0], 20
662	prefetch	[%i0 + 63], 20
663	call		_aes128_load_enckey
664	sllx		%i2, 4, %i2
665
666	ld		[%i4 + 0], %l4	! counter
667	ld		[%i4 + 4], %l5
668	ld		[%i4 + 8], %l6
669	ld		[%i4 + 12], %l7
670
671	sllx		%l4, 32, %o5
672	or		%l5, %o5, %o5
673	sllx		%l6, 32, %g1
674	xor		%o5, %g4, %g4		! ^= rk[0]
675	xor		%g1, %g5, %g5
676	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
677
678	sub		%i0, %i1, %l5	! %i0!=%i1
679	and		%i0, 7, %l0
680	andn		%i0, 7, %i0
681	sll		%l0, 3, %l0
682	mov		64, %l1
683	mov		0xff, %l3
684	sub		%l1, %l0, %l1
685	and		%i1, 7, %l2
686	cmp		%i2, 255
687	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
688	movleu		%icc, 0, %l5	!	%i2<256 ||
689	brnz,pn		%l5, .L128_ctr32_blk	!	%i0==%i1)
690	srl		%l3, %l2, %l3
691
692	andcc		%i2, 16, %g0		! is number of blocks even?
693	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
694	bz		%icc, .L128_ctr32_loop2x
695	srlx		%i2, 4, %i2
696.L128_ctr32_loop:
697	ldx		[%i0 + 0], %o0
698	brz,pt		%l0, 4f
699	ldx		[%i0 + 8], %o1
700
701	ldx		[%i0 + 16], %o2
702	sllx		%o0, %l0, %o0
703	srlx		%o1, %l1, %g1
704	sllx		%o1, %l0, %o1
705	or		%g1, %o0, %o0
706	srlx		%o2, %l1, %o2
707	or		%o2, %o1, %o1
7084:
709	xor		%g5, %l7, %g1		! ^= rk[0]
710	add		%l7, 1, %l7
711	.word	0x85b02301 !movxtod	%g1,%f2
712	srl		%l7, 0, %l7		! clruw
713	prefetch	[%i1 + 63], 22
714	prefetch	[%i0 + 16+63], 20
715	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
716	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
717	call		_aes128_encrypt_1x+8
718	add		%i0, 16, %i0
719
720	.word	0x95b02308 !movxtod	%o0,%f10
721	.word	0x99b02309 !movxtod	%o1,%f12
722	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
723	.word	0x85b30d82 !fxor	%f12,%f2,%f2
724
725	brnz,pn		%l2, 2f
726	sub		%i2, 1, %i2
727
728	std		%f0, [%i1 + 0]
729	std		%f2, [%i1 + 8]
730	brnz,pt		%i2, .L128_ctr32_loop2x
731	add		%i1, 16, %i1
732
733	ret
734	restore
735
736.align	16
7372:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
738						! and ~3x deterioration
739						! in inp==out case
740	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
741	.word	0x8db00902 !faligndata	%f0,%f2,%f6
742	.word	0x91b08902 !faligndata	%f2,%f2,%f8
743	stda		%f4, [%i1 + %l3]0xc0	! partial store
744	std		%f6, [%i1 + 8]
745	add		%i1, 16, %i1
746	orn		%g0, %l3, %l3
747	stda		%f8, [%i1 + %l3]0xc0	! partial store
748
749	brnz,pt		%i2, .L128_ctr32_loop2x+4
750	orn		%g0, %l3, %l3
751
752	ret
753	restore
754
755!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
756.align	32
757.L128_ctr32_loop2x:
758	ldx		[%i0 + 0], %o0
759	ldx		[%i0 + 8], %o1
760	ldx		[%i0 + 16], %o2
761	brz,pt		%l0, 4f
762	ldx		[%i0 + 24], %o3
763
764	ldx		[%i0 + 32], %o4
765	sllx		%o0, %l0, %o0
766	srlx		%o1, %l1, %g1
767	or		%g1, %o0, %o0
768	sllx		%o1, %l0, %o1
769	srlx		%o2, %l1, %g1
770	or		%g1, %o1, %o1
771	sllx		%o2, %l0, %o2
772	srlx		%o3, %l1, %g1
773	or		%g1, %o2, %o2
774	sllx		%o3, %l0, %o3
775	srlx		%o4, %l1, %o4
776	or		%o4, %o3, %o3
7774:
778	xor		%g5, %l7, %g1		! ^= rk[0]
779	add		%l7, 1, %l7
780	.word	0x85b02301 !movxtod	%g1,%f2
781	srl		%l7, 0, %l7		! clruw
782	xor		%g5, %l7, %g1
783	add		%l7, 1, %l7
784	.word	0x8db02301 !movxtod	%g1,%f6
785	srl		%l7, 0, %l7		! clruw
786	prefetch	[%i1 + 63], 22
787	prefetch	[%i0 + 32+63], 20
788	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
789	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
790	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
791	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
792	call		_aes128_encrypt_2x+16
793	add		%i0, 32, %i0
794
795	.word	0x91b02308 !movxtod	%o0,%f8
796	.word	0x95b02309 !movxtod	%o1,%f10
797	.word	0x99b0230a !movxtod	%o2,%f12
798	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
799	.word	0x91b0230b !movxtod	%o3,%f8
800	.word	0x85b28d82 !fxor	%f10,%f2,%f2
801	.word	0x89b30d84 !fxor	%f12,%f4,%f4
802	.word	0x8db20d86 !fxor	%f8,%f6,%f6
803
804	brnz,pn		%l2, 2f
805	sub		%i2, 2, %i2
806
807	std		%f0, [%i1 + 0]
808	std		%f2, [%i1 + 8]
809	std		%f4, [%i1 + 16]
810	std		%f6, [%i1 + 24]
811	brnz,pt		%i2, .L128_ctr32_loop2x
812	add		%i1, 32, %i1
813
814	ret
815	restore
816
817.align	16
8182:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
819						! and ~3x deterioration
820						! in inp==out case
821	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
822	.word	0x81b00902 !faligndata	%f0,%f2,%f0
823	.word	0x85b08904 !faligndata	%f2,%f4,%f2
824	.word	0x89b10906 !faligndata	%f4,%f6,%f4
825	.word	0x8db18906 !faligndata	%f6,%f6,%f6
826
827	stda		%f8, [%i1 + %l3]0xc0	! partial store
828	std		%f0, [%i1 + 8]
829	std		%f2, [%i1 + 16]
830	std		%f4, [%i1 + 24]
831	add		%i1, 32, %i1
832	orn		%g0, %l3, %l3
833	stda		%f6, [%i1 + %l3]0xc0	! partial store
834
835	brnz,pt		%i2, .L128_ctr32_loop2x+4
836	orn		%g0, %l3, %l3
837
838	ret
839	restore
840
841!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
842.align	32
843.L128_ctr32_blk:
844	add	%i1, %i2, %l5
845	and	%l5, 63, %l5	! tail
846	sub	%i2, %l5, %i2
847	add	%l5, 15, %l5	! round up to 16n
848	srlx	%i2, 4, %i2
849	srl	%l5, 4, %l5
850	sub	%i2, 1, %i2
851	add	%l5, 1, %l5
852
853.L128_ctr32_blk_loop2x:
854	ldx		[%i0 + 0], %o0
855	ldx		[%i0 + 8], %o1
856	ldx		[%i0 + 16], %o2
857	brz,pt		%l0, 5f
858	ldx		[%i0 + 24], %o3
859
860	ldx		[%i0 + 32], %o4
861	sllx		%o0, %l0, %o0
862	srlx		%o1, %l1, %g1
863	or		%g1, %o0, %o0
864	sllx		%o1, %l0, %o1
865	srlx		%o2, %l1, %g1
866	or		%g1, %o1, %o1
867	sllx		%o2, %l0, %o2
868	srlx		%o3, %l1, %g1
869	or		%g1, %o2, %o2
870	sllx		%o3, %l0, %o3
871	srlx		%o4, %l1, %o4
872	or		%o4, %o3, %o3
8735:
874	xor		%g5, %l7, %g1		! ^= rk[0]
875	add		%l7, 1, %l7
876	.word	0x85b02301 !movxtod	%g1,%f2
877	srl		%l7, 0, %l7		! clruw
878	xor		%g5, %l7, %g1
879	add		%l7, 1, %l7
880	.word	0x8db02301 !movxtod	%g1,%f6
881	srl		%l7, 0, %l7		! clruw
882	prefetch	[%i0 + 32+63], 20
883	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
884	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
885	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
886	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
887	call		_aes128_encrypt_2x+16
888	add		%i0, 32, %i0
889	subcc		%i2, 2, %i2
890
891	.word	0x91b02308 !movxtod	%o0,%f8
892	.word	0x95b02309 !movxtod	%o1,%f10
893	.word	0x99b0230a !movxtod	%o2,%f12
894	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
895	.word	0x91b0230b !movxtod	%o3,%f8
896	.word	0x85b28d82 !fxor	%f10,%f2,%f2
897	.word	0x89b30d84 !fxor	%f12,%f4,%f4
898	.word	0x8db20d86 !fxor	%f8,%f6,%f6
899
900	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
901	add		%i1, 8, %i1
902	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
903	add		%i1, 8, %i1
904	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
905	add		%i1, 8, %i1
906	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
907	bgu,pt		%icc, .L128_ctr32_blk_loop2x
908	add		%i1, 8, %i1
909
910	add		%l5, %i2, %i2
911	andcc		%i2, 1, %g0		! is number of blocks even?
912	membar		#StoreLoad|#StoreStore
913	bnz,pt		%icc, .L128_ctr32_loop
914	srl		%i2, 0, %i2
915	brnz,pn		%i2, .L128_ctr32_loop2x
916	nop
917
918	ret
919	restore
920.type	aes128_t4_ctr32_encrypt,#function
921.size	aes128_t4_ctr32_encrypt,.-aes128_t4_ctr32_encrypt
922.globl	aes128_t4_xts_encrypt
923.align	32
924aes128_t4_xts_encrypt:
925	save		%sp, -112-16, %sp
926	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
927
928	mov		%i5, %o0
929	add		%fp, 0-16, %o1
930	call		aes_t4_encrypt
931	mov		%i4, %o2
932
933	add		%fp, 0-16, %l7
934	ldxa		[%l7]0x88, %g2
935	add		%fp, 0-8, %l7
936	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
937
938	sethi		%hi(0x76543210), %l7
939	or		%l7, %lo(0x76543210), %l7
940	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
941
942	prefetch	[%i0], 20
943	prefetch	[%i0 + 63], 20
944	call		_aes128_load_enckey
945	and		%i2, 15,  %i5
946	and		%i2, -16, %i2
947
948	sub		%i0, %i1, %l5	! %i0!=%i1
949	and		%i0, 7, %l0
950	andn		%i0, 7, %i0
951	sll		%l0, 3, %l0
952	mov		64, %l1
953	mov		0xff, %l3
954	sub		%l1, %l0, %l1
955	and		%i1, 7, %l2
956	cmp		%i2, 255
957	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
958	movleu		%icc, 0, %l5	!	%i2<256 ||
959	brnz,pn		%l5, .L128_xts_enblk !	%i0==%i1)
960	srl		%l3, %l2, %l3
961
962	andcc		%i2, 16, %g0		! is number of blocks even?
963	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
964	bz		%icc, .L128_xts_enloop2x
965	srlx		%i2, 4, %i2
966.L128_xts_enloop:
967	ldx		[%i0 + 0], %o0
968	brz,pt		%l0, 4f
969	ldx		[%i0 + 8], %o1
970
971	ldx		[%i0 + 16], %o2
972	sllx		%o0, %l0, %o0
973	srlx		%o1, %l1, %g1
974	sllx		%o1, %l0, %o1
975	or		%g1, %o0, %o0
976	srlx		%o2, %l1, %o2
977	or		%o2, %o1, %o1
9784:
979	.word	0x99b02302 !movxtod	%g2,%f12
980	.word	0x9db02303 !movxtod	%g3,%f14
981	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
982	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
983
984	xor		%g4, %o0, %o0		! ^= rk[0]
985	xor		%g5, %o1, %o1
986	.word	0x81b02308 !movxtod	%o0,%f0
987	.word	0x85b02309 !movxtod	%o1,%f2
988
989	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
990	.word	0x85b38d82 !fxor	%f14,%f2,%f2
991
992	prefetch	[%i1 + 63], 22
993	prefetch	[%i0 + 16+63], 20
994	call		_aes128_encrypt_1x
995	add		%i0, 16, %i0
996
997	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
998	.word	0x85b38d82 !fxor	%f14,%f2,%f2
999
1000	srax		%g3, 63, %l7		! next tweak value
1001	addcc		%g2, %g2, %g2
1002	and		%l7, 0x87, %l7
1003	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1004	xor		%l7, %g2, %g2
1005
1006	brnz,pn		%l2, 2f
1007	sub		%i2, 1, %i2
1008
1009	std		%f0, [%i1 + 0]
1010	std		%f2, [%i1 + 8]
1011	brnz,pt		%i2, .L128_xts_enloop2x
1012	add		%i1, 16, %i1
1013
1014	brnz,pn		%i5, .L128_xts_ensteal
1015	nop
1016
1017	ret
1018	restore
1019
1020.align	16
10212:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1022						! and ~3x deterioration
1023						! in inp==out case
1024	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1025	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1026	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1027	stda		%f4, [%i1 + %l3]0xc0	! partial store
1028	std		%f6, [%i1 + 8]
1029	add		%i1, 16, %i1
1030	orn		%g0, %l3, %l3
1031	stda		%f8, [%i1 + %l3]0xc0	! partial store
1032
1033	brnz,pt		%i2, .L128_xts_enloop2x+4
1034	orn		%g0, %l3, %l3
1035
1036	brnz,pn		%i5, .L128_xts_ensteal
1037	nop
1038
1039	ret
1040	restore
1041
1042!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1043.align	32
1044.L128_xts_enloop2x:
1045	ldx		[%i0 + 0], %o0
1046	ldx		[%i0 + 8], %o1
1047	ldx		[%i0 + 16], %o2
1048	brz,pt		%l0, 4f
1049	ldx		[%i0 + 24], %o3
1050
1051	ldx		[%i0 + 32], %o4
1052	sllx		%o0, %l0, %o0
1053	srlx		%o1, %l1, %g1
1054	or		%g1, %o0, %o0
1055	sllx		%o1, %l0, %o1
1056	srlx		%o2, %l1, %g1
1057	or		%g1, %o1, %o1
1058	sllx		%o2, %l0, %o2
1059	srlx		%o3, %l1, %g1
1060	or		%g1, %o2, %o2
1061	sllx		%o3, %l0, %o3
1062	srlx		%o4, %l1, %o4
1063	or		%o4, %o3, %o3
10644:
1065	.word	0x99b02302 !movxtod	%g2,%f12
1066	.word	0x9db02303 !movxtod	%g3,%f14
1067	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1068	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1069
1070	srax		%g3, 63, %l7		! next tweak value
1071	addcc		%g2, %g2, %g2
1072	and		%l7, 0x87, %l7
1073	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1074	xor		%l7, %g2, %g2
1075
1076	.word	0x91b02302 !movxtod	%g2,%f8
1077	.word	0x95b02303 !movxtod	%g3,%f10
1078	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1079	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1080
1081	xor		%g4, %o0, %o0		! ^= rk[0]
1082	xor		%g5, %o1, %o1
1083	xor		%g4, %o2, %o2		! ^= rk[0]
1084	xor		%g5, %o3, %o3
1085	.word	0x81b02308 !movxtod	%o0,%f0
1086	.word	0x85b02309 !movxtod	%o1,%f2
1087	.word	0x89b0230a !movxtod	%o2,%f4
1088	.word	0x8db0230b !movxtod	%o3,%f6
1089
1090	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1091	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1092	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1093	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1094
1095	prefetch	[%i1 + 63], 22
1096	prefetch	[%i0 + 32+63], 20
1097	call		_aes128_encrypt_2x
1098	add		%i0, 32, %i0
1099
1100	.word	0x91b02302 !movxtod	%g2,%f8
1101	.word	0x95b02303 !movxtod	%g3,%f10
1102
1103	srax		%g3, 63, %l7		! next tweak value
1104	addcc		%g2, %g2, %g2
1105	and		%l7, 0x87, %l7
1106	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1107	xor		%l7, %g2, %g2
1108
1109	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1110	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1111
1112	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1113	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1114	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1115	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1116
1117	brnz,pn		%l2, 2f
1118	sub		%i2, 2, %i2
1119
1120	std		%f0, [%i1 + 0]
1121	std		%f2, [%i1 + 8]
1122	std		%f4, [%i1 + 16]
1123	std		%f6, [%i1 + 24]
1124	brnz,pt		%i2, .L128_xts_enloop2x
1125	add		%i1, 32, %i1
1126
1127	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1128	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1129	brnz,pn		%i5, .L128_xts_ensteal
1130	nop
1131
1132	ret
1133	restore
1134
1135.align	16
11362:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1137						! and ~3x deterioration
1138						! in inp==out case
1139	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1140	.word	0x95b00902 !faligndata	%f0,%f2,%f10
1141	.word	0x99b08904 !faligndata	%f2,%f4,%f12
1142	.word	0x9db10906 !faligndata	%f4,%f6,%f14
1143	.word	0x81b18906 !faligndata	%f6,%f6,%f0
1144
1145	stda		%f8, [%i1 + %l3]0xc0	! partial store
1146	std		%f10, [%i1 + 8]
1147	std		%f12, [%i1 + 16]
1148	std		%f14, [%i1 + 24]
1149	add		%i1, 32, %i1
1150	orn		%g0, %l3, %l3
1151	stda		%f0, [%i1 + %l3]0xc0	! partial store
1152
1153	brnz,pt		%i2, .L128_xts_enloop2x+4
1154	orn		%g0, %l3, %l3
1155
1156	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1157	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1158	brnz,pn		%i5, .L128_xts_ensteal
1159	nop
1160
1161	ret
1162	restore
1163
1164!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1165.align	32
1166.L128_xts_enblk:
1167	add	%i1, %i2, %l5
1168	and	%l5, 63, %l5	! tail
1169	sub	%i2, %l5, %i2
1170	add	%l5, 15, %l5	! round up to 16n
1171	srlx	%i2, 4, %i2
1172	srl	%l5, 4, %l5
1173	sub	%i2, 1, %i2
1174	add	%l5, 1, %l5
1175
1176.L128_xts_enblk2x:
1177	ldx		[%i0 + 0], %o0
1178	ldx		[%i0 + 8], %o1
1179	ldx		[%i0 + 16], %o2
1180	brz,pt		%l0, 5f
1181	ldx		[%i0 + 24], %o3
1182
1183	ldx		[%i0 + 32], %o4
1184	sllx		%o0, %l0, %o0
1185	srlx		%o1, %l1, %g1
1186	or		%g1, %o0, %o0
1187	sllx		%o1, %l0, %o1
1188	srlx		%o2, %l1, %g1
1189	or		%g1, %o1, %o1
1190	sllx		%o2, %l0, %o2
1191	srlx		%o3, %l1, %g1
1192	or		%g1, %o2, %o2
1193	sllx		%o3, %l0, %o3
1194	srlx		%o4, %l1, %o4
1195	or		%o4, %o3, %o3
11965:
1197	.word	0x99b02302 !movxtod	%g2,%f12
1198	.word	0x9db02303 !movxtod	%g3,%f14
1199	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1200	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1201
1202	srax		%g3, 63, %l7		! next tweak value
1203	addcc		%g2, %g2, %g2
1204	and		%l7, 0x87, %l7
1205	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1206	xor		%l7, %g2, %g2
1207
1208	.word	0x91b02302 !movxtod	%g2,%f8
1209	.word	0x95b02303 !movxtod	%g3,%f10
1210	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1211	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1212
1213	xor		%g4, %o0, %o0		! ^= rk[0]
1214	xor		%g5, %o1, %o1
1215	xor		%g4, %o2, %o2		! ^= rk[0]
1216	xor		%g5, %o3, %o3
1217	.word	0x81b02308 !movxtod	%o0,%f0
1218	.word	0x85b02309 !movxtod	%o1,%f2
1219	.word	0x89b0230a !movxtod	%o2,%f4
1220	.word	0x8db0230b !movxtod	%o3,%f6
1221
1222	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1223	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1224	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1225	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1226
1227	prefetch	[%i0 + 32+63], 20
1228	call		_aes128_encrypt_2x
1229	add		%i0, 32, %i0
1230
1231	.word	0x91b02302 !movxtod	%g2,%f8
1232	.word	0x95b02303 !movxtod	%g3,%f10
1233
1234	srax		%g3, 63, %l7		! next tweak value
1235	addcc		%g2, %g2, %g2
1236	and		%l7, 0x87, %l7
1237	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1238	xor		%l7, %g2, %g2
1239
1240	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1241	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1242
1243	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1244	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1245	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1246	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1247
1248	subcc		%i2, 2, %i2
1249	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1250	add		%i1, 8, %i1
1251	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1252	add		%i1, 8, %i1
1253	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1254	add		%i1, 8, %i1
1255	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1256	bgu,pt		%icc, .L128_xts_enblk2x
1257	add		%i1, 8, %i1
1258
1259	add		%l5, %i2, %i2
1260	andcc		%i2, 1, %g0		! is number of blocks even?
1261	membar		#StoreLoad|#StoreStore
1262	bnz,pt		%icc, .L128_xts_enloop
1263	srl		%i2, 0, %i2
1264	brnz,pn		%i2, .L128_xts_enloop2x
1265	nop
1266
1267	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1268	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1269	brnz,pn		%i5, .L128_xts_ensteal
1270	nop
1271
1272	ret
1273	restore
1274!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1275.align	32
1276.L128_xts_ensteal:
1277	std		%f0, [%fp + 0-16]	! copy of output
1278	std		%f2, [%fp + 0-8]
1279
1280	srl		%l0, 3, %l0
1281	add		%fp, 0-16, %l7
1282	add		%i0, %l0, %i0	! original %i0+%i2&-15
1283	add		%i1, %l2, %i1	! original %i1+%i2&-15
1284	mov		0, %l0
1285	nop					! align
1286
1287.L128_xts_enstealing:
1288	ldub		[%i0 + %l0], %o0
1289	ldub		[%l7  + %l0], %o1
1290	dec		%i5
1291	stb		%o0, [%l7  + %l0]
1292	stb		%o1, [%i1 + %l0]
1293	brnz		%i5, .L128_xts_enstealing
1294	inc		%l0
1295
1296	mov		%l7, %i0
1297	sub		%i1, 16, %i1
1298	mov		0, %l0
1299	sub		%i1, %l2, %i1
1300	ba		.L128_xts_enloop	! one more time
1301	mov		1, %i2				! %i5 is 0
1302	ret
1303	restore
1304.type	aes128_t4_xts_encrypt,#function
1305.size	aes128_t4_xts_encrypt,.-aes128_t4_xts_encrypt
1306.globl	aes128_t4_xts_decrypt
1307.align	32
1308aes128_t4_xts_decrypt:
1309	save		%sp, -112-16, %sp
1310	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1311
1312	mov		%i5, %o0
1313	add		%fp, 0-16, %o1
1314	call		aes_t4_encrypt
1315	mov		%i4, %o2
1316
1317	add		%fp, 0-16, %l7
1318	ldxa		[%l7]0x88, %g2
1319	add		%fp, 0-8, %l7
1320	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
1321
1322	sethi		%hi(0x76543210), %l7
1323	or		%l7, %lo(0x76543210), %l7
1324	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
1325
1326	prefetch	[%i0], 20
1327	prefetch	[%i0 + 63], 20
1328	call		_aes128_load_deckey
1329	and		%i2, 15,  %i5
1330	and		%i2, -16, %i2
1331	mov		0, %l7
1332	movrnz		%i5, 16,  %l7
1333	sub		%i2, %l7, %i2
1334
1335	sub		%i0, %i1, %l5	! %i0!=%i1
1336	and		%i0, 7, %l0
1337	andn		%i0, 7, %i0
1338	sll		%l0, 3, %l0
1339	mov		64, %l1
1340	mov		0xff, %l3
1341	sub		%l1, %l0, %l1
1342	and		%i1, 7, %l2
1343	cmp		%i2, 255
1344	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1345	movleu		%icc, 0, %l5	!	%i2<256 ||
1346	brnz,pn		%l5, .L128_xts_deblk !	%i0==%i1)
1347	srl		%l3, %l2, %l3
1348
1349	andcc		%i2, 16, %g0		! is number of blocks even?
1350	brz,pn		%i2, .L128_xts_desteal
1351	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1352	bz		%icc, .L128_xts_deloop2x
1353	srlx		%i2, 4, %i2
1354.L128_xts_deloop:
1355	ldx		[%i0 + 0], %o0
1356	brz,pt		%l0, 4f
1357	ldx		[%i0 + 8], %o1
1358
1359	ldx		[%i0 + 16], %o2
1360	sllx		%o0, %l0, %o0
1361	srlx		%o1, %l1, %g1
1362	sllx		%o1, %l0, %o1
1363	or		%g1, %o0, %o0
1364	srlx		%o2, %l1, %o2
1365	or		%o2, %o1, %o1
13664:
1367	.word	0x99b02302 !movxtod	%g2,%f12
1368	.word	0x9db02303 !movxtod	%g3,%f14
1369	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1370	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1371
1372	xor		%g4, %o0, %o0		! ^= rk[0]
1373	xor		%g5, %o1, %o1
1374	.word	0x81b02308 !movxtod	%o0,%f0
1375	.word	0x85b02309 !movxtod	%o1,%f2
1376
1377	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1378	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1379
1380	prefetch	[%i1 + 63], 22
1381	prefetch	[%i0 + 16+63], 20
1382	call		_aes128_decrypt_1x
1383	add		%i0, 16, %i0
1384
1385	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1386	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1387
1388	srax		%g3, 63, %l7		! next tweak value
1389	addcc		%g2, %g2, %g2
1390	and		%l7, 0x87, %l7
1391	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1392	xor		%l7, %g2, %g2
1393
1394	brnz,pn		%l2, 2f
1395	sub		%i2, 1, %i2
1396
1397	std		%f0, [%i1 + 0]
1398	std		%f2, [%i1 + 8]
1399	brnz,pt		%i2, .L128_xts_deloop2x
1400	add		%i1, 16, %i1
1401
1402	brnz,pn		%i5, .L128_xts_desteal
1403	nop
1404
1405	ret
1406	restore
1407
1408.align	16
14092:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1410						! and ~3x deterioration
1411						! in inp==out case
1412	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1413	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1414	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1415	stda		%f4, [%i1 + %l3]0xc0	! partial store
1416	std		%f6, [%i1 + 8]
1417	add		%i1, 16, %i1
1418	orn		%g0, %l3, %l3
1419	stda		%f8, [%i1 + %l3]0xc0	! partial store
1420
1421	brnz,pt		%i2, .L128_xts_deloop2x+4
1422	orn		%g0, %l3, %l3
1423
1424	brnz,pn		%i5, .L128_xts_desteal
1425	nop
1426
1427	ret
1428	restore
1429
1430!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1431.align	32
1432.L128_xts_deloop2x:
1433	ldx		[%i0 + 0], %o0
1434	ldx		[%i0 + 8], %o1
1435	ldx		[%i0 + 16], %o2
1436	brz,pt		%l0, 4f
1437	ldx		[%i0 + 24], %o3
1438
1439	ldx		[%i0 + 32], %o4
1440	sllx		%o0, %l0, %o0
1441	srlx		%o1, %l1, %g1
1442	or		%g1, %o0, %o0
1443	sllx		%o1, %l0, %o1
1444	srlx		%o2, %l1, %g1
1445	or		%g1, %o1, %o1
1446	sllx		%o2, %l0, %o2
1447	srlx		%o3, %l1, %g1
1448	or		%g1, %o2, %o2
1449	sllx		%o3, %l0, %o3
1450	srlx		%o4, %l1, %o4
1451	or		%o4, %o3, %o3
14524:
1453	.word	0x99b02302 !movxtod	%g2,%f12
1454	.word	0x9db02303 !movxtod	%g3,%f14
1455	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1456	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1457
1458	srax		%g3, 63, %l7		! next tweak value
1459	addcc		%g2, %g2, %g2
1460	and		%l7, 0x87, %l7
1461	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1462	xor		%l7, %g2, %g2
1463
1464	.word	0x91b02302 !movxtod	%g2,%f8
1465	.word	0x95b02303 !movxtod	%g3,%f10
1466	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1467	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1468
1469	xor		%g4, %o0, %o0		! ^= rk[0]
1470	xor		%g5, %o1, %o1
1471	xor		%g4, %o2, %o2		! ^= rk[0]
1472	xor		%g5, %o3, %o3
1473	.word	0x81b02308 !movxtod	%o0,%f0
1474	.word	0x85b02309 !movxtod	%o1,%f2
1475	.word	0x89b0230a !movxtod	%o2,%f4
1476	.word	0x8db0230b !movxtod	%o3,%f6
1477
1478	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1479	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1480	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1481	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1482
1483	prefetch	[%i1 + 63], 22
1484	prefetch	[%i0 + 32+63], 20
1485	call		_aes128_decrypt_2x
1486	add		%i0, 32, %i0
1487
1488	.word	0x91b02302 !movxtod	%g2,%f8
1489	.word	0x95b02303 !movxtod	%g3,%f10
1490
1491	srax		%g3, 63, %l7		! next tweak value
1492	addcc		%g2, %g2, %g2
1493	and		%l7, 0x87, %l7
1494	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1495	xor		%l7, %g2, %g2
1496
1497	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1498	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1499
1500	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1501	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1502	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1503	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1504
1505	brnz,pn		%l2, 2f
1506	sub		%i2, 2, %i2
1507
1508	std		%f0, [%i1 + 0]
1509	std		%f2, [%i1 + 8]
1510	std		%f4, [%i1 + 16]
1511	std		%f6, [%i1 + 24]
1512	brnz,pt		%i2, .L128_xts_deloop2x
1513	add		%i1, 32, %i1
1514
1515	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1516	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1517	brnz,pn		%i5, .L128_xts_desteal
1518	nop
1519
1520	ret
1521	restore
1522
1523.align	16
15242:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1525						! and ~3x deterioration
1526						! in inp==out case
1527	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1528	.word	0x95b00902 !faligndata	%f0,%f2,%f10
1529	.word	0x99b08904 !faligndata	%f2,%f4,%f12
1530	.word	0x9db10906 !faligndata	%f4,%f6,%f14
1531	.word	0x81b18906 !faligndata	%f6,%f6,%f0
1532
1533	stda		%f8, [%i1 + %l3]0xc0	! partial store
1534	std		%f10, [%i1 + 8]
1535	std		%f12, [%i1 + 16]
1536	std		%f14, [%i1 + 24]
1537	add		%i1, 32, %i1
1538	orn		%g0, %l3, %l3
1539	stda		%f0, [%i1 + %l3]0xc0	! partial store
1540
1541	brnz,pt		%i2, .L128_xts_deloop2x+4
1542	orn		%g0, %l3, %l3
1543
1544	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1545	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1546	brnz,pn		%i5, .L128_xts_desteal
1547	nop
1548
1549	ret
1550	restore
1551
1552!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1553.align	32
1554.L128_xts_deblk:
1555	add	%i1, %i2, %l5
1556	and	%l5, 63, %l5	! tail
1557	sub	%i2, %l5, %i2
1558	add	%l5, 15, %l5	! round up to 16n
1559	srlx	%i2, 4, %i2
1560	srl	%l5, 4, %l5
1561	sub	%i2, 1, %i2
1562	add	%l5, 1, %l5
1563
1564.L128_xts_deblk2x:
1565	ldx		[%i0 + 0], %o0
1566	ldx		[%i0 + 8], %o1
1567	ldx		[%i0 + 16], %o2
1568	brz,pt		%l0, 5f
1569	ldx		[%i0 + 24], %o3
1570
1571	ldx		[%i0 + 32], %o4
1572	sllx		%o0, %l0, %o0
1573	srlx		%o1, %l1, %g1
1574	or		%g1, %o0, %o0
1575	sllx		%o1, %l0, %o1
1576	srlx		%o2, %l1, %g1
1577	or		%g1, %o1, %o1
1578	sllx		%o2, %l0, %o2
1579	srlx		%o3, %l1, %g1
1580	or		%g1, %o2, %o2
1581	sllx		%o3, %l0, %o3
1582	srlx		%o4, %l1, %o4
1583	or		%o4, %o3, %o3
15845:
1585	.word	0x99b02302 !movxtod	%g2,%f12
1586	.word	0x9db02303 !movxtod	%g3,%f14
1587	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1588	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1589
1590	srax		%g3, 63, %l7		! next tweak value
1591	addcc		%g2, %g2, %g2
1592	and		%l7, 0x87, %l7
1593	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1594	xor		%l7, %g2, %g2
1595
1596	.word	0x91b02302 !movxtod	%g2,%f8
1597	.word	0x95b02303 !movxtod	%g3,%f10
1598	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1599	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1600
1601	xor		%g4, %o0, %o0		! ^= rk[0]
1602	xor		%g5, %o1, %o1
1603	xor		%g4, %o2, %o2		! ^= rk[0]
1604	xor		%g5, %o3, %o3
1605	.word	0x81b02308 !movxtod	%o0,%f0
1606	.word	0x85b02309 !movxtod	%o1,%f2
1607	.word	0x89b0230a !movxtod	%o2,%f4
1608	.word	0x8db0230b !movxtod	%o3,%f6
1609
1610	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1611	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1612	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
1613	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1614
1615	prefetch	[%i0 + 32+63], 20
1616	call		_aes128_decrypt_2x
1617	add		%i0, 32, %i0
1618
1619	.word	0x91b02302 !movxtod	%g2,%f8
1620	.word	0x95b02303 !movxtod	%g3,%f10
1621
1622	srax		%g3, 63, %l7		! next tweak value
1623	addcc		%g2, %g2, %g2
1624	and		%l7, 0x87, %l7
1625	.word	0x87b0c223 !addxc	%g3,%g3,%g3
1626	xor		%l7, %g2, %g2
1627
1628	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
1629	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
1630
1631	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1632	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1633	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1634	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1635
1636	subcc		%i2, 2, %i2
1637	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1638	add		%i1, 8, %i1
1639	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1640	add		%i1, 8, %i1
1641	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1642	add		%i1, 8, %i1
1643	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1644	bgu,pt		%icc, .L128_xts_deblk2x
1645	add		%i1, 8, %i1
1646
1647	add		%l5, %i2, %i2
1648	andcc		%i2, 1, %g0		! is number of blocks even?
1649	membar		#StoreLoad|#StoreStore
1650	bnz,pt		%icc, .L128_xts_deloop
1651	srl		%i2, 0, %i2
1652	brnz,pn		%i2, .L128_xts_deloop2x
1653	nop
1654
1655	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
1656	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
1657	brnz,pn		%i5, .L128_xts_desteal
1658	nop
1659
1660	ret
1661	restore
1662!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1663.align	32
1664.L128_xts_desteal:
1665	ldx		[%i0 + 0], %o0
1666	brz,pt		%l0, 8f
1667	ldx		[%i0 + 8], %o1
1668
1669	ldx		[%i0 + 16], %o2
1670	sllx		%o0, %l0, %o0
1671	srlx		%o1, %l1, %g1
1672	sllx		%o1, %l0, %o1
1673	or		%g1, %o0, %o0
1674	srlx		%o2, %l1, %o2
1675	or		%o2, %o1, %o1
16768:
1677	srax		%g3, 63, %l7		! next tweak value
1678	addcc		%g2, %g2, %o2
1679	and		%l7, 0x87, %l7
1680	.word	0x97b0c223 !addxc	%g3,%g3,%o3
1681	xor		%l7, %o2, %o2
1682
1683	.word	0x99b0230a !movxtod	%o2,%f12
1684	.word	0x9db0230b !movxtod	%o3,%f14
1685	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
1686	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
1687
1688	xor		%g4, %o0, %o0		! ^= rk[0]
1689	xor		%g5, %o1, %o1
1690	.word	0x81b02308 !movxtod	%o0,%f0
1691	.word	0x85b02309 !movxtod	%o1,%f2
1692
1693	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1694	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1695
1696	call		_aes128_decrypt_1x
1697	add		%i0, 16, %i0
1698
1699	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
1700	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1701
1702	std		%f0, [%fp + 0-16]
1703	std		%f2, [%fp + 0-8]
1704
1705	srl		%l0, 3, %l0
1706	add		%fp, 0-16, %l7
1707	add		%i0, %l0, %i0	! original %i0+%i2&-15
1708	add		%i1, %l2, %i1	! original %i1+%i2&-15
1709	mov		0, %l0
1710	add		%i1, 16, %i1
1711	nop					! align
1712
1713.L128_xts_destealing:
1714	ldub		[%i0 + %l0], %o0
1715	ldub		[%l7  + %l0], %o1
1716	dec		%i5
1717	stb		%o0, [%l7  + %l0]
1718	stb		%o1, [%i1 + %l0]
1719	brnz		%i5, .L128_xts_destealing
1720	inc		%l0
1721
1722	mov		%l7, %i0
1723	sub		%i1, 16, %i1
1724	mov		0, %l0
1725	sub		%i1, %l2, %i1
1726	ba		.L128_xts_deloop	! one more time
1727	mov		1, %i2				! %i5 is 0
1728	ret
1729	restore
1730.type	aes128_t4_xts_decrypt,#function
1731.size	aes128_t4_xts_decrypt,.-aes128_t4_xts_decrypt
1732.globl	aes128_t4_cbc_decrypt
1733.align	32
1734aes128_t4_cbc_decrypt:
1735	save		%sp, -112, %sp
1736	cmp		%i2, 0
1737	be,pn		%icc, .L128_cbc_dec_abort
1738	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1739	sub		%i0, %i1, %l5	! %i0!=%i1
1740	ld		[%i4 + 0], %f12	! load ivec
1741	ld		[%i4 + 4], %f13
1742	ld		[%i4 + 8], %f14
1743	ld		[%i4 + 12], %f15
1744	prefetch	[%i0], 20
1745	prefetch	[%i0 + 63], 20
1746	call		_aes128_load_deckey
1747	and		%i0, 7, %l0
1748	andn		%i0, 7, %i0
1749	sll		%l0, 3, %l0
1750	mov		64, %l1
1751	mov		0xff, %l3
1752	sub		%l1, %l0, %l1
1753	and		%i1, 7, %l2
1754	cmp		%i2, 255
1755	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1756	movleu		%icc, 0, %l5	!	%i2<256 ||
1757	brnz,pn		%l5, .L128cbc_dec_blk	!	%i0==%i1)
1758	srl		%l3, %l2, %l3
1759
1760	andcc		%i2, 16, %g0		! is number of blocks even?
1761	srlx		%i2, 4, %i2
1762	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1763	bz		%icc, .L128_cbc_dec_loop2x
1764	prefetch	[%i1], 22
1765.L128_cbc_dec_loop:
1766	ldx		[%i0 + 0], %o0
1767	brz,pt		%l0, 4f
1768	ldx		[%i0 + 8], %o1
1769
1770	ldx		[%i0 + 16], %o2
1771	sllx		%o0, %l0, %o0
1772	srlx		%o1, %l1, %g1
1773	sllx		%o1, %l0, %o1
1774	or		%g1, %o0, %o0
1775	srlx		%o2, %l1, %o2
1776	or		%o2, %o1, %o1
17774:
1778	xor		%g4, %o0, %o2		! ^= rk[0]
1779	xor		%g5, %o1, %o3
1780	.word	0x81b0230a !movxtod	%o2,%f0
1781	.word	0x85b0230b !movxtod	%o3,%f2
1782
1783	prefetch	[%i1 + 63], 22
1784	prefetch	[%i0 + 16+63], 20
1785	call		_aes128_decrypt_1x
1786	add		%i0, 16, %i0
1787
1788	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1789	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1790	.word	0x99b02308 !movxtod	%o0,%f12
1791	.word	0x9db02309 !movxtod	%o1,%f14
1792
1793	brnz,pn		%l2, 2f
1794	sub		%i2, 1, %i2
1795
1796	std		%f0, [%i1 + 0]
1797	std		%f2, [%i1 + 8]
1798	brnz,pt		%i2, .L128_cbc_dec_loop2x
1799	add		%i1, 16, %i1
1800	st		%f12, [%i4 + 0]
1801	st		%f13, [%i4 + 4]
1802	st		%f14, [%i4 + 8]
1803	st		%f15, [%i4 + 12]
1804.L128_cbc_dec_abort:
1805	ret
1806	restore
1807
1808.align	16
18092:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1810						! and ~3x deterioration
1811						! in inp==out case
1812	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1813	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1814	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1815
1816	stda		%f4, [%i1 + %l3]0xc0	! partial store
1817	std		%f6, [%i1 + 8]
1818	add		%i1, 16, %i1
1819	orn		%g0, %l3, %l3
1820	stda		%f8, [%i1 + %l3]0xc0	! partial store
1821
1822	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1823	orn		%g0, %l3, %l3
1824	st		%f12, [%i4 + 0]
1825	st		%f13, [%i4 + 4]
1826	st		%f14, [%i4 + 8]
1827	st		%f15, [%i4 + 12]
1828	ret
1829	restore
1830
1831!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1832.align	32
1833.L128_cbc_dec_loop2x:
1834	ldx		[%i0 + 0], %o0
1835	ldx		[%i0 + 8], %o1
1836	ldx		[%i0 + 16], %o2
1837	brz,pt		%l0, 4f
1838	ldx		[%i0 + 24], %o3
1839
1840	ldx		[%i0 + 32], %o4
1841	sllx		%o0, %l0, %o0
1842	srlx		%o1, %l1, %g1
1843	or		%g1, %o0, %o0
1844	sllx		%o1, %l0, %o1
1845	srlx		%o2, %l1, %g1
1846	or		%g1, %o1, %o1
1847	sllx		%o2, %l0, %o2
1848	srlx		%o3, %l1, %g1
1849	or		%g1, %o2, %o2
1850	sllx		%o3, %l0, %o3
1851	srlx		%o4, %l1, %o4
1852	or		%o4, %o3, %o3
18534:
1854	xor		%g4, %o0, %o4		! ^= rk[0]
1855	xor		%g5, %o1, %o5
1856	.word	0x81b0230c !movxtod	%o4,%f0
1857	.word	0x85b0230d !movxtod	%o5,%f2
1858	xor		%g4, %o2, %o4
1859	xor		%g5, %o3, %o5
1860	.word	0x89b0230c !movxtod	%o4,%f4
1861	.word	0x8db0230d !movxtod	%o5,%f6
1862
1863	prefetch	[%i1 + 63], 22
1864	prefetch	[%i0 + 32+63], 20
1865	call		_aes128_decrypt_2x
1866	add		%i0, 32, %i0
1867
1868	.word	0x91b02308 !movxtod	%o0,%f8
1869	.word	0x95b02309 !movxtod	%o1,%f10
1870	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1871	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1872	.word	0x99b0230a !movxtod	%o2,%f12
1873	.word	0x9db0230b !movxtod	%o3,%f14
1874	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1875	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1876
1877	brnz,pn		%l2, 2f
1878	sub		%i2, 2, %i2
1879
1880	std		%f0, [%i1 + 0]
1881	std		%f2, [%i1 + 8]
1882	std		%f4, [%i1 + 16]
1883	std		%f6, [%i1 + 24]
1884	brnz,pt		%i2, .L128_cbc_dec_loop2x
1885	add		%i1, 32, %i1
1886	st		%f12, [%i4 + 0]
1887	st		%f13, [%i4 + 4]
1888	st		%f14, [%i4 + 8]
1889	st		%f15, [%i4 + 12]
1890	ret
1891	restore
1892
1893.align	16
18942:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1895						! and ~3x deterioration
1896						! in inp==out case
1897	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1898	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1899	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1900	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1901	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1902	stda		%f8, [%i1 + %l3]0xc0	! partial store
1903	std		%f0, [%i1 + 8]
1904	std		%f2, [%i1 + 16]
1905	std		%f4, [%i1 + 24]
1906	add		%i1, 32, %i1
1907	orn		%g0, %l3, %l3
1908	stda		%f6, [%i1 + %l3]0xc0	! partial store
1909
1910	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1911	orn		%g0, %l3, %l3
1912	st		%f12, [%i4 + 0]
1913	st		%f13, [%i4 + 4]
1914	st		%f14, [%i4 + 8]
1915	st		%f15, [%i4 + 12]
1916	ret
1917	restore
1918
1919!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1920.align	32
1921.L128cbc_dec_blk:
1922	add	%i1, %i2, %l5
1923	and	%l5, 63, %l5	! tail
1924	sub	%i2, %l5, %i2
1925	add	%l5, 15, %l5	! round up to 16n
1926	srlx	%i2, 4, %i2
1927	srl	%l5, 4, %l5
1928	sub	%i2, 1, %i2
1929	add	%l5, 1, %l5
1930
1931.L128_cbc_dec_blk_loop2x:
1932	ldx		[%i0 + 0], %o0
1933	ldx		[%i0 + 8], %o1
1934	ldx		[%i0 + 16], %o2
1935	brz,pt		%l0, 5f
1936	ldx		[%i0 + 24], %o3
1937
1938	ldx		[%i0 + 32], %o4
1939	sllx		%o0, %l0, %o0
1940	srlx		%o1, %l1, %g1
1941	or		%g1, %o0, %o0
1942	sllx		%o1, %l0, %o1
1943	srlx		%o2, %l1, %g1
1944	or		%g1, %o1, %o1
1945	sllx		%o2, %l0, %o2
1946	srlx		%o3, %l1, %g1
1947	or		%g1, %o2, %o2
1948	sllx		%o3, %l0, %o3
1949	srlx		%o4, %l1, %o4
1950	or		%o4, %o3, %o3
19515:
1952	xor		%g4, %o0, %o4		! ^= rk[0]
1953	xor		%g5, %o1, %o5
1954	.word	0x81b0230c !movxtod	%o4,%f0
1955	.word	0x85b0230d !movxtod	%o5,%f2
1956	xor		%g4, %o2, %o4
1957	xor		%g5, %o3, %o5
1958	.word	0x89b0230c !movxtod	%o4,%f4
1959	.word	0x8db0230d !movxtod	%o5,%f6
1960
1961	prefetch	[%i0 + 32+63], 20
1962	call		_aes128_decrypt_2x
1963	add		%i0, 32, %i0
1964	subcc		%i2, 2, %i2
1965
1966	.word	0x91b02308 !movxtod	%o0,%f8
1967	.word	0x95b02309 !movxtod	%o1,%f10
1968	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1969	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1970	.word	0x99b0230a !movxtod	%o2,%f12
1971	.word	0x9db0230b !movxtod	%o3,%f14
1972	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1973	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1974
1975	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1976	add		%i1, 8, %i1
1977	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1978	add		%i1, 8, %i1
1979	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1980	add		%i1, 8, %i1
1981	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1982	bgu,pt		%icc, .L128_cbc_dec_blk_loop2x
1983	add		%i1, 8, %i1
1984
1985	add		%l5, %i2, %i2
1986	andcc		%i2, 1, %g0		! is number of blocks even?
1987	membar		#StoreLoad|#StoreStore
1988	bnz,pt		%icc, .L128_cbc_dec_loop
1989	srl		%i2, 0, %i2
1990	brnz,pn		%i2, .L128_cbc_dec_loop2x
1991	nop
1992	st		%f12, [%i4 + 0]	! write out ivec
1993	st		%f13, [%i4 + 4]
1994	st		%f14, [%i4 + 8]
1995	st		%f15, [%i4 + 12]
1996	ret
1997	restore
1998.type	aes128_t4_cbc_decrypt,#function
1999.size	aes128_t4_cbc_decrypt,.-aes128_t4_cbc_decrypt
2000.align	32
2001_aes128_decrypt_1x:
2002	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
2003	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
2004	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
2005	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
2006	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
2007	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
2008	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
2009	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
2010	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
2011	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
2012	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
2013	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
2014	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
2015	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
2016	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
2017	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
2018	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
2019	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
2020	.word	0x80cd44c4 !aes_dround01_l	%f52,%f4,%f2,%f0
2021	retl
2022	.word	0x84cdc4e4 !aes_dround23_l	%f54,%f4,%f2,%f2
2023.type	_aes128_decrypt_1x,#function
2024.size	_aes128_decrypt_1x,.-_aes128_decrypt_1x
2025
2026.align	32
2027_aes128_decrypt_2x:
2028	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
2029	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
2030	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
2031	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
2032	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
2033	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
2034	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
2035	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
2036	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
2037	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
2038	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
2039	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
2040	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
2041	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
2042	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
2043	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
2044	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
2045	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
2046	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
2047	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
2048	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
2049	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
2050	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
2051	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
2052	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
2053	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
2054	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
2055	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
2056	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
2057	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
2058	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
2059	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
2060	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
2061	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
2062	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
2063	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
2064	.word	0x80cd44c8 !aes_dround01_l	%f52,%f8,%f2,%f0
2065	.word	0x84cdc4e8 !aes_dround23_l	%f54,%f8,%f2,%f2
2066	.word	0x88cd4cca !aes_dround01_l	%f52,%f10,%f6,%f4
2067	retl
2068	.word	0x8ccdccea !aes_dround23_l	%f54,%f10,%f6,%f6
2069.type	_aes128_decrypt_2x,#function
2070.size	_aes128_decrypt_2x,.-_aes128_decrypt_2x
2071.align	32
2072_aes192_encrypt_1x:
2073	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2074	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2075	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
2076	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
2077	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
2078	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2079	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
2080	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
2081	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
2082	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2083	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
2084	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
2085	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
2086	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2087	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
2088	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
2089	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
2090	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2091	.word	0x80cd4404 !aes_eround01	%f52,%f4,%f2,%f0
2092	.word	0x84cdc424 !aes_eround23	%f54,%f4,%f2,%f2
2093	.word	0x88ce4400 !aes_eround01	%f56,%f0,%f2,%f4
2094	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2095	.word	0x80cf4484 !aes_eround01_l	%f60,%f4,%f2,%f0
2096	retl
2097	.word	0x84cfc4a4 !aes_eround23_l	%f62,%f4,%f2,%f2
2098.type	_aes192_encrypt_1x,#function
2099.size	_aes192_encrypt_1x,.-_aes192_encrypt_1x
2100
2101.align	32
2102_aes192_encrypt_2x:
2103	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2104	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2105	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2106	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2107	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
2108	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
2109	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
2110	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
2111	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
2112	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2113	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
2114	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
2115	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
2116	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
2117	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
2118	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
2119	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
2120	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2121	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
2122	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
2123	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
2124	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
2125	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
2126	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
2127	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
2128	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2129	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
2130	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
2131	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
2132	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
2133	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
2134	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
2135	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
2136	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2137	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
2138	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
2139	.word	0x80cd4408 !aes_eround01	%f52,%f8,%f2,%f0
2140	.word	0x84cdc428 !aes_eround23	%f54,%f8,%f2,%f2
2141	.word	0x88cd4c0a !aes_eround01	%f52,%f10,%f6,%f4
2142	.word	0x8ccdcc2a !aes_eround23	%f54,%f10,%f6,%f6
2143	.word	0x90ce4400 !aes_eround01	%f56,%f0,%f2,%f8
2144	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2145	.word	0x94ce4c04 !aes_eround01	%f56,%f4,%f6,%f10
2146	.word	0x8ccecc24 !aes_eround23	%f58,%f4,%f6,%f6
2147	.word	0x80cf4488 !aes_eround01_l	%f60,%f8,%f2,%f0
2148	.word	0x84cfc4a8 !aes_eround23_l	%f62,%f8,%f2,%f2
2149	.word	0x88cf4c8a !aes_eround01_l	%f60,%f10,%f6,%f4
2150	retl
2151	.word	0x8ccfccaa !aes_eround23_l	%f62,%f10,%f6,%f6
2152.type	_aes192_encrypt_2x,#function
2153.size	_aes192_encrypt_2x,.-_aes192_encrypt_2x
2154
2155.align	32
2156_aes256_encrypt_1x:
2157	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2158	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2159	ldd		[%i3 + 208], %f16
2160	ldd		[%i3 + 216], %f18
2161	.word	0x80cd0404 !aes_eround01	%f20,%f4,%f2,%f0
2162	.word	0x84cd8424 !aes_eround23	%f22,%f4,%f2,%f2
2163	ldd		[%i3 + 224], %f20
2164	ldd		[%i3 + 232], %f22
2165	.word	0x88ce0400 !aes_eround01	%f24,%f0,%f2,%f4
2166	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2167	.word	0x80cf0404 !aes_eround01	%f28,%f4,%f2,%f0
2168	.word	0x84cf8424 !aes_eround23	%f30,%f4,%f2,%f2
2169	.word	0x88c84400 !aes_eround01	%f32,%f0,%f2,%f4
2170	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2171	.word	0x80c94404 !aes_eround01	%f36,%f4,%f2,%f0
2172	.word	0x84c9c424 !aes_eround23	%f38,%f4,%f2,%f2
2173	.word	0x88ca4400 !aes_eround01	%f40,%f0,%f2,%f4
2174	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2175	.word	0x80cb4404 !aes_eround01	%f44,%f4,%f2,%f0
2176	.word	0x84cbc424 !aes_eround23	%f46,%f4,%f2,%f2
2177	.word	0x88cc4400 !aes_eround01	%f48,%f0,%f2,%f4
2178	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2179	.word	0x80cd4404 !aes_eround01	%f52,%f4,%f2,%f0
2180	.word	0x84cdc424 !aes_eround23	%f54,%f4,%f2,%f2
2181	.word	0x88ce4400 !aes_eround01	%f56,%f0,%f2,%f4
2182	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2183	.word	0x80cf4404 !aes_eround01	%f60,%f4,%f2,%f0
2184	.word	0x84cfc424 !aes_eround23	%f62,%f4,%f2,%f2
2185	.word	0x88cc0400 !aes_eround01	%f16,%f0,%f2,%f4
2186	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2187	ldd		[%i3 + 16], %f16
2188	ldd		[%i3 + 24], %f18
2189	.word	0x80cd0484 !aes_eround01_l	%f20,%f4,%f2,%f0
2190	.word	0x84cd84a4 !aes_eround23_l	%f22,%f4,%f2,%f2
2191	ldd		[%i3 + 32], %f20
2192	retl
2193	ldd		[%i3 + 40], %f22
2194.type	_aes256_encrypt_1x,#function
2195.size	_aes256_encrypt_1x,.-_aes256_encrypt_1x
2196
2197.align	32
2198_aes256_encrypt_2x:
2199	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2200	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2201	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2202	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2203	ldd		[%i3 + 208], %f16
2204	ldd		[%i3 + 216], %f18
2205	.word	0x80cd0408 !aes_eround01	%f20,%f8,%f2,%f0
2206	.word	0x84cd8428 !aes_eround23	%f22,%f8,%f2,%f2
2207	.word	0x88cd0c0a !aes_eround01	%f20,%f10,%f6,%f4
2208	.word	0x8ccd8c2a !aes_eround23	%f22,%f10,%f6,%f6
2209	ldd		[%i3 + 224], %f20
2210	ldd		[%i3 + 232], %f22
2211	.word	0x90ce0400 !aes_eround01	%f24,%f0,%f2,%f8
2212	.word	0x84ce8420 !aes_eround23	%f26,%f0,%f2,%f2
2213	.word	0x94ce0c04 !aes_eround01	%f24,%f4,%f6,%f10
2214	.word	0x8cce8c24 !aes_eround23	%f26,%f4,%f6,%f6
2215	.word	0x80cf0408 !aes_eround01	%f28,%f8,%f2,%f0
2216	.word	0x84cf8428 !aes_eround23	%f30,%f8,%f2,%f2
2217	.word	0x88cf0c0a !aes_eround01	%f28,%f10,%f6,%f4
2218	.word	0x8ccf8c2a !aes_eround23	%f30,%f10,%f6,%f6
2219	.word	0x90c84400 !aes_eround01	%f32,%f0,%f2,%f8
2220	.word	0x84c8c420 !aes_eround23	%f34,%f0,%f2,%f2
2221	.word	0x94c84c04 !aes_eround01	%f32,%f4,%f6,%f10
2222	.word	0x8cc8cc24 !aes_eround23	%f34,%f4,%f6,%f6
2223	.word	0x80c94408 !aes_eround01	%f36,%f8,%f2,%f0
2224	.word	0x84c9c428 !aes_eround23	%f38,%f8,%f2,%f2
2225	.word	0x88c94c0a !aes_eround01	%f36,%f10,%f6,%f4
2226	.word	0x8cc9cc2a !aes_eround23	%f38,%f10,%f6,%f6
2227	.word	0x90ca4400 !aes_eround01	%f40,%f0,%f2,%f8
2228	.word	0x84cac420 !aes_eround23	%f42,%f0,%f2,%f2
2229	.word	0x94ca4c04 !aes_eround01	%f40,%f4,%f6,%f10
2230	.word	0x8ccacc24 !aes_eround23	%f42,%f4,%f6,%f6
2231	.word	0x80cb4408 !aes_eround01	%f44,%f8,%f2,%f0
2232	.word	0x84cbc428 !aes_eround23	%f46,%f8,%f2,%f2
2233	.word	0x88cb4c0a !aes_eround01	%f44,%f10,%f6,%f4
2234	.word	0x8ccbcc2a !aes_eround23	%f46,%f10,%f6,%f6
2235	.word	0x90cc4400 !aes_eround01	%f48,%f0,%f2,%f8
2236	.word	0x84ccc420 !aes_eround23	%f50,%f0,%f2,%f2
2237	.word	0x94cc4c04 !aes_eround01	%f48,%f4,%f6,%f10
2238	.word	0x8ccccc24 !aes_eround23	%f50,%f4,%f6,%f6
2239	.word	0x80cd4408 !aes_eround01	%f52,%f8,%f2,%f0
2240	.word	0x84cdc428 !aes_eround23	%f54,%f8,%f2,%f2
2241	.word	0x88cd4c0a !aes_eround01	%f52,%f10,%f6,%f4
2242	.word	0x8ccdcc2a !aes_eround23	%f54,%f10,%f6,%f6
2243	.word	0x90ce4400 !aes_eround01	%f56,%f0,%f2,%f8
2244	.word	0x84cec420 !aes_eround23	%f58,%f0,%f2,%f2
2245	.word	0x94ce4c04 !aes_eround01	%f56,%f4,%f6,%f10
2246	.word	0x8ccecc24 !aes_eround23	%f58,%f4,%f6,%f6
2247	.word	0x80cf4408 !aes_eround01	%f60,%f8,%f2,%f0
2248	.word	0x84cfc428 !aes_eround23	%f62,%f8,%f2,%f2
2249	.word	0x88cf4c0a !aes_eround01	%f60,%f10,%f6,%f4
2250	.word	0x8ccfcc2a !aes_eround23	%f62,%f10,%f6,%f6
2251	.word	0x90cc0400 !aes_eround01	%f16,%f0,%f2,%f8
2252	.word	0x84cc8420 !aes_eround23	%f18,%f0,%f2,%f2
2253	.word	0x94cc0c04 !aes_eround01	%f16,%f4,%f6,%f10
2254	.word	0x8ccc8c24 !aes_eround23	%f18,%f4,%f6,%f6
2255	ldd		[%i3 + 16], %f16
2256	ldd		[%i3 + 24], %f18
2257	.word	0x80cd0488 !aes_eround01_l	%f20,%f8,%f2,%f0
2258	.word	0x84cd84a8 !aes_eround23_l	%f22,%f8,%f2,%f2
2259	.word	0x88cd0c8a !aes_eround01_l	%f20,%f10,%f6,%f4
2260	.word	0x8ccd8caa !aes_eround23_l	%f22,%f10,%f6,%f6
2261	ldd		[%i3 + 32], %f20
2262	retl
2263	ldd		[%i3 + 40], %f22
2264.type	_aes256_encrypt_2x,#function
2265.size	_aes256_encrypt_2x,.-_aes256_encrypt_2x
2266
2267.align	32
2268_aes192_loadkey:
2269	ldx		[%i3 + 0], %g4
2270	ldx		[%i3 + 8], %g5
2271	ldd		[%i3 + 16], %f16
2272	ldd		[%i3 + 24], %f18
2273	ldd		[%i3 + 32], %f20
2274	ldd		[%i3 + 40], %f22
2275	ldd		[%i3 + 48], %f24
2276	ldd		[%i3 + 56], %f26
2277	ldd		[%i3 + 64], %f28
2278	ldd		[%i3 + 72], %f30
2279	ldd		[%i3 + 80], %f32
2280	ldd		[%i3 + 88], %f34
2281	ldd		[%i3 + 96], %f36
2282	ldd		[%i3 + 104], %f38
2283	ldd		[%i3 + 112], %f40
2284	ldd		[%i3 + 120], %f42
2285	ldd		[%i3 + 128], %f44
2286	ldd		[%i3 + 136], %f46
2287	ldd		[%i3 + 144], %f48
2288	ldd		[%i3 + 152], %f50
2289	ldd		[%i3 + 160], %f52
2290	ldd		[%i3 + 168], %f54
2291	ldd		[%i3 + 176], %f56
2292	ldd		[%i3 + 184], %f58
2293	ldd		[%i3 + 192], %f60
2294	ldd		[%i3 + 200], %f62
2295	retl
2296	nop
2297.type	_aes192_loadkey,#function
2298.size	_aes192_loadkey,.-_aes192_loadkey
2299_aes256_loadkey=_aes192_loadkey
2300_aes192_load_enckey=_aes192_loadkey
2301_aes192_load_deckey=_aes192_loadkey
2302_aes256_load_enckey=_aes192_loadkey
2303_aes256_load_deckey=_aes192_loadkey
2304.globl	aes256_t4_cbc_encrypt
2305.align	32
2306aes256_t4_cbc_encrypt:
2307	save		%sp, -112, %sp
2308	cmp		%i2, 0
2309	be,pn		%icc, .L256_cbc_enc_abort
2310	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2311	sub		%i0, %i1, %l5	! %i0!=%i1
2312	ld		[%i4 + 0], %f0
2313	ld		[%i4 + 4], %f1
2314	ld		[%i4 + 8], %f2
2315	ld		[%i4 + 12], %f3
2316	prefetch	[%i0], 20
2317	prefetch	[%i0 + 63], 20
2318	call		_aes256_load_enckey
2319	and		%i0, 7, %l0
2320	andn		%i0, 7, %i0
2321	sll		%l0, 3, %l0
2322	mov		64, %l1
2323	mov		0xff, %l3
2324	sub		%l1, %l0, %l1
2325	and		%i1, 7, %l2
2326	cmp		%i2, 127
2327	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2328	movleu		%icc, 0, %l5	!	%i2<128 ||
2329	brnz,pn		%l5, .L256cbc_enc_blk	!	%i0==%i1)
2330	srl		%l3, %l2, %l3
2331
2332	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2333	srlx		%i2, 4, %i2
2334	prefetch	[%i1], 22
2335
2336.L256_cbc_enc_loop:
2337	ldx		[%i0 + 0], %o0
2338	brz,pt		%l0, 4f
2339	ldx		[%i0 + 8], %o1
2340
2341	ldx		[%i0 + 16], %o2
2342	sllx		%o0, %l0, %o0
2343	srlx		%o1, %l1, %g1
2344	sllx		%o1, %l0, %o1
2345	or		%g1, %o0, %o0
2346	srlx		%o2, %l1, %o2
2347	or		%o2, %o1, %o1
23484:
2349	xor		%g4, %o0, %o0		! ^= rk[0]
2350	xor		%g5, %o1, %o1
2351	.word	0x99b02308 !movxtod	%o0,%f12
2352	.word	0x9db02309 !movxtod	%o1,%f14
2353
2354	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2355	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2356	prefetch	[%i1 + 63], 22
2357	prefetch	[%i0 + 16+63], 20
2358	call		_aes256_encrypt_1x
2359	add		%i0, 16, %i0
2360
2361	brnz,pn		%l2, 2f
2362	sub		%i2, 1, %i2
2363
2364	std		%f0, [%i1 + 0]
2365	std		%f2, [%i1 + 8]
2366	brnz,pt		%i2, .L256_cbc_enc_loop
2367	add		%i1, 16, %i1
2368	st		%f0, [%i4 + 0]
2369	st		%f1, [%i4 + 4]
2370	st		%f2, [%i4 + 8]
2371	st		%f3, [%i4 + 12]
2372.L256_cbc_enc_abort:
2373	ret
2374	restore
2375
2376.align	16
23772:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2378						! and ~3x deterioration
2379						! in inp==out case
2380	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2381	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2382	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2383
2384	stda		%f4, [%i1 + %l3]0xc0	! partial store
2385	std		%f6, [%i1 + 8]
2386	add		%i1, 16, %i1
2387	orn		%g0, %l3, %l3
2388	stda		%f8, [%i1 + %l3]0xc0	! partial store
2389
2390	brnz,pt		%i2, .L256_cbc_enc_loop+4
2391	orn		%g0, %l3, %l3
2392	st		%f0, [%i4 + 0]
2393	st		%f1, [%i4 + 4]
2394	st		%f2, [%i4 + 8]
2395	st		%f3, [%i4 + 12]
2396	ret
2397	restore
2398
2399!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2400.align	32
2401.L256cbc_enc_blk:
2402	add	%i1, %i2, %l5
2403	and	%l5, 63, %l5	! tail
2404	sub	%i2, %l5, %i2
2405	add	%l5, 15, %l5	! round up to 16n
2406	srlx	%i2, 4, %i2
2407	srl	%l5, 4, %l5
2408
2409.L256_cbc_enc_blk_loop:
2410	ldx		[%i0 + 0], %o0
2411	brz,pt		%l0, 5f
2412	ldx		[%i0 + 8], %o1
2413
2414	ldx		[%i0 + 16], %o2
2415	sllx		%o0, %l0, %o0
2416	srlx		%o1, %l1, %g1
2417	sllx		%o1, %l0, %o1
2418	or		%g1, %o0, %o0
2419	srlx		%o2, %l1, %o2
2420	or		%o2, %o1, %o1
24215:
2422	xor		%g4, %o0, %o0		! ^= rk[0]
2423	xor		%g5, %o1, %o1
2424	.word	0x99b02308 !movxtod	%o0,%f12
2425	.word	0x9db02309 !movxtod	%o1,%f14
2426
2427	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2428	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2429	prefetch	[%i0 + 16+63], 20
2430	call		_aes256_encrypt_1x
2431	add		%i0, 16, %i0
2432	sub		%i2, 1, %i2
2433
2434	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2435	add		%i1, 8, %i1
2436	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2437	brnz,pt		%i2, .L256_cbc_enc_blk_loop
2438	add		%i1, 8, %i1
2439
2440	membar		#StoreLoad|#StoreStore
2441	brnz,pt		%l5, .L256_cbc_enc_loop
2442	mov		%l5, %i2
2443	st		%f0, [%i4 + 0]
2444	st		%f1, [%i4 + 4]
2445	st		%f2, [%i4 + 8]
2446	st		%f3, [%i4 + 12]
2447	ret
2448	restore
2449.type	aes256_t4_cbc_encrypt,#function
2450.size	aes256_t4_cbc_encrypt,.-aes256_t4_cbc_encrypt
2451.globl	aes192_t4_cbc_encrypt
2452.align	32
2453aes192_t4_cbc_encrypt:
2454	save		%sp, -112, %sp
2455	cmp		%i2, 0
2456	be,pn		%icc, .L192_cbc_enc_abort
2457	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2458	sub		%i0, %i1, %l5	! %i0!=%i1
2459	ld		[%i4 + 0], %f0
2460	ld		[%i4 + 4], %f1
2461	ld		[%i4 + 8], %f2
2462	ld		[%i4 + 12], %f3
2463	prefetch	[%i0], 20
2464	prefetch	[%i0 + 63], 20
2465	call		_aes192_load_enckey
2466	and		%i0, 7, %l0
2467	andn		%i0, 7, %i0
2468	sll		%l0, 3, %l0
2469	mov		64, %l1
2470	mov		0xff, %l3
2471	sub		%l1, %l0, %l1
2472	and		%i1, 7, %l2
2473	cmp		%i2, 127
2474	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2475	movleu		%icc, 0, %l5	!	%i2<128 ||
2476	brnz,pn		%l5, .L192cbc_enc_blk	!	%i0==%i1)
2477	srl		%l3, %l2, %l3
2478
2479	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2480	srlx		%i2, 4, %i2
2481	prefetch	[%i1], 22
2482
2483.L192_cbc_enc_loop:
2484	ldx		[%i0 + 0], %o0
2485	brz,pt		%l0, 4f
2486	ldx		[%i0 + 8], %o1
2487
2488	ldx		[%i0 + 16], %o2
2489	sllx		%o0, %l0, %o0
2490	srlx		%o1, %l1, %g1
2491	sllx		%o1, %l0, %o1
2492	or		%g1, %o0, %o0
2493	srlx		%o2, %l1, %o2
2494	or		%o2, %o1, %o1
24954:
2496	xor		%g4, %o0, %o0		! ^= rk[0]
2497	xor		%g5, %o1, %o1
2498	.word	0x99b02308 !movxtod	%o0,%f12
2499	.word	0x9db02309 !movxtod	%o1,%f14
2500
2501	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2502	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2503	prefetch	[%i1 + 63], 22
2504	prefetch	[%i0 + 16+63], 20
2505	call		_aes192_encrypt_1x
2506	add		%i0, 16, %i0
2507
2508	brnz,pn		%l2, 2f
2509	sub		%i2, 1, %i2
2510
2511	std		%f0, [%i1 + 0]
2512	std		%f2, [%i1 + 8]
2513	brnz,pt		%i2, .L192_cbc_enc_loop
2514	add		%i1, 16, %i1
2515	st		%f0, [%i4 + 0]
2516	st		%f1, [%i4 + 4]
2517	st		%f2, [%i4 + 8]
2518	st		%f3, [%i4 + 12]
2519.L192_cbc_enc_abort:
2520	ret
2521	restore
2522
2523.align	16
25242:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2525						! and ~3x deterioration
2526						! in inp==out case
2527	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2528	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2529	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2530
2531	stda		%f4, [%i1 + %l3]0xc0	! partial store
2532	std		%f6, [%i1 + 8]
2533	add		%i1, 16, %i1
2534	orn		%g0, %l3, %l3
2535	stda		%f8, [%i1 + %l3]0xc0	! partial store
2536
2537	brnz,pt		%i2, .L192_cbc_enc_loop+4
2538	orn		%g0, %l3, %l3
2539	st		%f0, [%i4 + 0]
2540	st		%f1, [%i4 + 4]
2541	st		%f2, [%i4 + 8]
2542	st		%f3, [%i4 + 12]
2543	ret
2544	restore
2545
2546!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2547.align	32
2548.L192cbc_enc_blk:
2549	add	%i1, %i2, %l5
2550	and	%l5, 63, %l5	! tail
2551	sub	%i2, %l5, %i2
2552	add	%l5, 15, %l5	! round up to 16n
2553	srlx	%i2, 4, %i2
2554	srl	%l5, 4, %l5
2555
2556.L192_cbc_enc_blk_loop:
2557	ldx		[%i0 + 0], %o0
2558	brz,pt		%l0, 5f
2559	ldx		[%i0 + 8], %o1
2560
2561	ldx		[%i0 + 16], %o2
2562	sllx		%o0, %l0, %o0
2563	srlx		%o1, %l1, %g1
2564	sllx		%o1, %l0, %o1
2565	or		%g1, %o0, %o0
2566	srlx		%o2, %l1, %o2
2567	or		%o2, %o1, %o1
25685:
2569	xor		%g4, %o0, %o0		! ^= rk[0]
2570	xor		%g5, %o1, %o1
2571	.word	0x99b02308 !movxtod	%o0,%f12
2572	.word	0x9db02309 !movxtod	%o1,%f14
2573
2574	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
2575	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2576	prefetch	[%i0 + 16+63], 20
2577	call		_aes192_encrypt_1x
2578	add		%i0, 16, %i0
2579	sub		%i2, 1, %i2
2580
2581	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2582	add		%i1, 8, %i1
2583	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2584	brnz,pt		%i2, .L192_cbc_enc_blk_loop
2585	add		%i1, 8, %i1
2586
2587	membar		#StoreLoad|#StoreStore
2588	brnz,pt		%l5, .L192_cbc_enc_loop
2589	mov		%l5, %i2
2590	st		%f0, [%i4 + 0]
2591	st		%f1, [%i4 + 4]
2592	st		%f2, [%i4 + 8]
2593	st		%f3, [%i4 + 12]
2594	ret
2595	restore
2596.type	aes192_t4_cbc_encrypt,#function
2597.size	aes192_t4_cbc_encrypt,.-aes192_t4_cbc_encrypt
2598.globl	aes256_t4_ctr32_encrypt
2599.align	32
2600aes256_t4_ctr32_encrypt:
2601	save		%sp, -112, %sp
2602	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2603
2604	prefetch	[%i0], 20
2605	prefetch	[%i0 + 63], 20
2606	call		_aes256_load_enckey
2607	sllx		%i2, 4, %i2
2608
2609	ld		[%i4 + 0], %l4	! counter
2610	ld		[%i4 + 4], %l5
2611	ld		[%i4 + 8], %l6
2612	ld		[%i4 + 12], %l7
2613
2614	sllx		%l4, 32, %o5
2615	or		%l5, %o5, %o5
2616	sllx		%l6, 32, %g1
2617	xor		%o5, %g4, %g4		! ^= rk[0]
2618	xor		%g1, %g5, %g5
2619	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
2620
2621	sub		%i0, %i1, %l5	! %i0!=%i1
2622	and		%i0, 7, %l0
2623	andn		%i0, 7, %i0
2624	sll		%l0, 3, %l0
2625	mov		64, %l1
2626	mov		0xff, %l3
2627	sub		%l1, %l0, %l1
2628	and		%i1, 7, %l2
2629	cmp		%i2, 255
2630	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2631	movleu		%icc, 0, %l5	!	%i2<256 ||
2632	brnz,pn		%l5, .L256_ctr32_blk	!	%i0==%i1)
2633	srl		%l3, %l2, %l3
2634
2635	andcc		%i2, 16, %g0		! is number of blocks even?
2636	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2637	bz		%icc, .L256_ctr32_loop2x
2638	srlx		%i2, 4, %i2
2639.L256_ctr32_loop:
2640	ldx		[%i0 + 0], %o0
2641	brz,pt		%l0, 4f
2642	ldx		[%i0 + 8], %o1
2643
2644	ldx		[%i0 + 16], %o2
2645	sllx		%o0, %l0, %o0
2646	srlx		%o1, %l1, %g1
2647	sllx		%o1, %l0, %o1
2648	or		%g1, %o0, %o0
2649	srlx		%o2, %l1, %o2
2650	or		%o2, %o1, %o1
26514:
2652	xor		%g5, %l7, %g1		! ^= rk[0]
2653	add		%l7, 1, %l7
2654	.word	0x85b02301 !movxtod	%g1,%f2
2655	srl		%l7, 0, %l7		! clruw
2656	prefetch	[%i1 + 63], 22
2657	prefetch	[%i0 + 16+63], 20
2658	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
2659	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2660	call		_aes256_encrypt_1x+8
2661	add		%i0, 16, %i0
2662
2663	.word	0x95b02308 !movxtod	%o0,%f10
2664	.word	0x99b02309 !movxtod	%o1,%f12
2665	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
2666	.word	0x85b30d82 !fxor	%f12,%f2,%f2
2667
2668	brnz,pn		%l2, 2f
2669	sub		%i2, 1, %i2
2670
2671	std		%f0, [%i1 + 0]
2672	std		%f2, [%i1 + 8]
2673	brnz,pt		%i2, .L256_ctr32_loop2x
2674	add		%i1, 16, %i1
2675
2676	ret
2677	restore
2678
2679.align	16
26802:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2681						! and ~3x deterioration
2682						! in inp==out case
2683	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2684	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2685	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2686	stda		%f4, [%i1 + %l3]0xc0	! partial store
2687	std		%f6, [%i1 + 8]
2688	add		%i1, 16, %i1
2689	orn		%g0, %l3, %l3
2690	stda		%f8, [%i1 + %l3]0xc0	! partial store
2691
2692	brnz,pt		%i2, .L256_ctr32_loop2x+4
2693	orn		%g0, %l3, %l3
2694
2695	ret
2696	restore
2697
2698!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2699.align	32
2700.L256_ctr32_loop2x:
2701	ldx		[%i0 + 0], %o0
2702	ldx		[%i0 + 8], %o1
2703	ldx		[%i0 + 16], %o2
2704	brz,pt		%l0, 4f
2705	ldx		[%i0 + 24], %o3
2706
2707	ldx		[%i0 + 32], %o4
2708	sllx		%o0, %l0, %o0
2709	srlx		%o1, %l1, %g1
2710	or		%g1, %o0, %o0
2711	sllx		%o1, %l0, %o1
2712	srlx		%o2, %l1, %g1
2713	or		%g1, %o1, %o1
2714	sllx		%o2, %l0, %o2
2715	srlx		%o3, %l1, %g1
2716	or		%g1, %o2, %o2
2717	sllx		%o3, %l0, %o3
2718	srlx		%o4, %l1, %o4
2719	or		%o4, %o3, %o3
27204:
2721	xor		%g5, %l7, %g1		! ^= rk[0]
2722	add		%l7, 1, %l7
2723	.word	0x85b02301 !movxtod	%g1,%f2
2724	srl		%l7, 0, %l7		! clruw
2725	xor		%g5, %l7, %g1
2726	add		%l7, 1, %l7
2727	.word	0x8db02301 !movxtod	%g1,%f6
2728	srl		%l7, 0, %l7		! clruw
2729	prefetch	[%i1 + 63], 22
2730	prefetch	[%i0 + 32+63], 20
2731	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
2732	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2733	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
2734	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
2735	call		_aes256_encrypt_2x+16
2736	add		%i0, 32, %i0
2737
2738	.word	0x91b02308 !movxtod	%o0,%f8
2739	.word	0x95b02309 !movxtod	%o1,%f10
2740	.word	0x99b0230a !movxtod	%o2,%f12
2741	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2742	.word	0x91b0230b !movxtod	%o3,%f8
2743	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2744	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2745	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2746
2747	brnz,pn		%l2, 2f
2748	sub		%i2, 2, %i2
2749
2750	std		%f0, [%i1 + 0]
2751	std		%f2, [%i1 + 8]
2752	std		%f4, [%i1 + 16]
2753	std		%f6, [%i1 + 24]
2754	brnz,pt		%i2, .L256_ctr32_loop2x
2755	add		%i1, 32, %i1
2756
2757	ret
2758	restore
2759
2760.align	16
27612:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2762						! and ~3x deterioration
2763						! in inp==out case
2764	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
2765	.word	0x81b00902 !faligndata	%f0,%f2,%f0
2766	.word	0x85b08904 !faligndata	%f2,%f4,%f2
2767	.word	0x89b10906 !faligndata	%f4,%f6,%f4
2768	.word	0x8db18906 !faligndata	%f6,%f6,%f6
2769
2770	stda		%f8, [%i1 + %l3]0xc0	! partial store
2771	std		%f0, [%i1 + 8]
2772	std		%f2, [%i1 + 16]
2773	std		%f4, [%i1 + 24]
2774	add		%i1, 32, %i1
2775	orn		%g0, %l3, %l3
2776	stda		%f6, [%i1 + %l3]0xc0	! partial store
2777
2778	brnz,pt		%i2, .L256_ctr32_loop2x+4
2779	orn		%g0, %l3, %l3
2780
2781	ret
2782	restore
2783
2784!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2785.align	32
2786.L256_ctr32_blk:
2787	add	%i1, %i2, %l5
2788	and	%l5, 63, %l5	! tail
2789	sub	%i2, %l5, %i2
2790	add	%l5, 15, %l5	! round up to 16n
2791	srlx	%i2, 4, %i2
2792	srl	%l5, 4, %l5
2793	sub	%i2, 1, %i2
2794	add	%l5, 1, %l5
2795
2796.L256_ctr32_blk_loop2x:
2797	ldx		[%i0 + 0], %o0
2798	ldx		[%i0 + 8], %o1
2799	ldx		[%i0 + 16], %o2
2800	brz,pt		%l0, 5f
2801	ldx		[%i0 + 24], %o3
2802
2803	ldx		[%i0 + 32], %o4
2804	sllx		%o0, %l0, %o0
2805	srlx		%o1, %l1, %g1
2806	or		%g1, %o0, %o0
2807	sllx		%o1, %l0, %o1
2808	srlx		%o2, %l1, %g1
2809	or		%g1, %o1, %o1
2810	sllx		%o2, %l0, %o2
2811	srlx		%o3, %l1, %g1
2812	or		%g1, %o2, %o2
2813	sllx		%o3, %l0, %o3
2814	srlx		%o4, %l1, %o4
2815	or		%o4, %o3, %o3
28165:
2817	xor		%g5, %l7, %g1		! ^= rk[0]
2818	add		%l7, 1, %l7
2819	.word	0x85b02301 !movxtod	%g1,%f2
2820	srl		%l7, 0, %l7		! clruw
2821	xor		%g5, %l7, %g1
2822	add		%l7, 1, %l7
2823	.word	0x8db02301 !movxtod	%g1,%f6
2824	srl		%l7, 0, %l7		! clruw
2825	prefetch	[%i0 + 32+63], 20
2826	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
2827	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
2828	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
2829	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
2830	call		_aes256_encrypt_2x+16
2831	add		%i0, 32, %i0
2832	subcc		%i2, 2, %i2
2833
2834	.word	0x91b02308 !movxtod	%o0,%f8
2835	.word	0x95b02309 !movxtod	%o1,%f10
2836	.word	0x99b0230a !movxtod	%o2,%f12
2837	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2838	.word	0x91b0230b !movxtod	%o3,%f8
2839	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2840	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2841	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2842
2843	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2844	add		%i1, 8, %i1
2845	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2846	add		%i1, 8, %i1
2847	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2848	add		%i1, 8, %i1
2849	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2850	bgu,pt		%icc, .L256_ctr32_blk_loop2x
2851	add		%i1, 8, %i1
2852
2853	add		%l5, %i2, %i2
2854	andcc		%i2, 1, %g0		! is number of blocks even?
2855	membar		#StoreLoad|#StoreStore
2856	bnz,pt		%icc, .L256_ctr32_loop
2857	srl		%i2, 0, %i2
2858	brnz,pn		%i2, .L256_ctr32_loop2x
2859	nop
2860
2861	ret
2862	restore
2863.type	aes256_t4_ctr32_encrypt,#function
2864.size	aes256_t4_ctr32_encrypt,.-aes256_t4_ctr32_encrypt
2865.globl	aes256_t4_xts_encrypt
2866.align	32
2867aes256_t4_xts_encrypt:
2868	save		%sp, -112-16, %sp
2869	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2870
2871	mov		%i5, %o0
2872	add		%fp, 0-16, %o1
2873	call		aes_t4_encrypt
2874	mov		%i4, %o2
2875
2876	add		%fp, 0-16, %l7
2877	ldxa		[%l7]0x88, %g2
2878	add		%fp, 0-8, %l7
2879	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
2880
2881	sethi		%hi(0x76543210), %l7
2882	or		%l7, %lo(0x76543210), %l7
2883	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
2884
2885	prefetch	[%i0], 20
2886	prefetch	[%i0 + 63], 20
2887	call		_aes256_load_enckey
2888	and		%i2, 15,  %i5
2889	and		%i2, -16, %i2
2890
2891	sub		%i0, %i1, %l5	! %i0!=%i1
2892	and		%i0, 7, %l0
2893	andn		%i0, 7, %i0
2894	sll		%l0, 3, %l0
2895	mov		64, %l1
2896	mov		0xff, %l3
2897	sub		%l1, %l0, %l1
2898	and		%i1, 7, %l2
2899	cmp		%i2, 255
2900	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2901	movleu		%icc, 0, %l5	!	%i2<256 ||
2902	brnz,pn		%l5, .L256_xts_enblk !	%i0==%i1)
2903	srl		%l3, %l2, %l3
2904
2905	andcc		%i2, 16, %g0		! is number of blocks even?
2906	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2907	bz		%icc, .L256_xts_enloop2x
2908	srlx		%i2, 4, %i2
2909.L256_xts_enloop:
2910	ldx		[%i0 + 0], %o0
2911	brz,pt		%l0, 4f
2912	ldx		[%i0 + 8], %o1
2913
2914	ldx		[%i0 + 16], %o2
2915	sllx		%o0, %l0, %o0
2916	srlx		%o1, %l1, %g1
2917	sllx		%o1, %l0, %o1
2918	or		%g1, %o0, %o0
2919	srlx		%o2, %l1, %o2
2920	or		%o2, %o1, %o1
29214:
2922	.word	0x99b02302 !movxtod	%g2,%f12
2923	.word	0x9db02303 !movxtod	%g3,%f14
2924	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
2925	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
2926
2927	xor		%g4, %o0, %o0		! ^= rk[0]
2928	xor		%g5, %o1, %o1
2929	.word	0x81b02308 !movxtod	%o0,%f0
2930	.word	0x85b02309 !movxtod	%o1,%f2
2931
2932	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
2933	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2934
2935	prefetch	[%i1 + 63], 22
2936	prefetch	[%i0 + 16+63], 20
2937	call		_aes256_encrypt_1x
2938	add		%i0, 16, %i0
2939
2940	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
2941	.word	0x85b38d82 !fxor	%f14,%f2,%f2
2942
2943	srax		%g3, 63, %l7		! next tweak value
2944	addcc		%g2, %g2, %g2
2945	and		%l7, 0x87, %l7
2946	.word	0x87b0c223 !addxc	%g3,%g3,%g3
2947	xor		%l7, %g2, %g2
2948
2949	brnz,pn		%l2, 2f
2950	sub		%i2, 1, %i2
2951
2952	std		%f0, [%i1 + 0]
2953	std		%f2, [%i1 + 8]
2954	brnz,pt		%i2, .L256_xts_enloop2x
2955	add		%i1, 16, %i1
2956
2957	brnz,pn		%i5, .L256_xts_ensteal
2958	nop
2959
2960	ret
2961	restore
2962
2963.align	16
29642:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2965						! and ~3x deterioration
2966						! in inp==out case
2967	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2968	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2969	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2970	stda		%f4, [%i1 + %l3]0xc0	! partial store
2971	std		%f6, [%i1 + 8]
2972	add		%i1, 16, %i1
2973	orn		%g0, %l3, %l3
2974	stda		%f8, [%i1 + %l3]0xc0	! partial store
2975
2976	brnz,pt		%i2, .L256_xts_enloop2x+4
2977	orn		%g0, %l3, %l3
2978
2979	brnz,pn		%i5, .L256_xts_ensteal
2980	nop
2981
2982	ret
2983	restore
2984
2985!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2986.align	32
2987.L256_xts_enloop2x:
2988	ldx		[%i0 + 0], %o0
2989	ldx		[%i0 + 8], %o1
2990	ldx		[%i0 + 16], %o2
2991	brz,pt		%l0, 4f
2992	ldx		[%i0 + 24], %o3
2993
2994	ldx		[%i0 + 32], %o4
2995	sllx		%o0, %l0, %o0
2996	srlx		%o1, %l1, %g1
2997	or		%g1, %o0, %o0
2998	sllx		%o1, %l0, %o1
2999	srlx		%o2, %l1, %g1
3000	or		%g1, %o1, %o1
3001	sllx		%o2, %l0, %o2
3002	srlx		%o3, %l1, %g1
3003	or		%g1, %o2, %o2
3004	sllx		%o3, %l0, %o3
3005	srlx		%o4, %l1, %o4
3006	or		%o4, %o3, %o3
30074:
3008	.word	0x99b02302 !movxtod	%g2,%f12
3009	.word	0x9db02303 !movxtod	%g3,%f14
3010	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3011	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3012
3013	srax		%g3, 63, %l7		! next tweak value
3014	addcc		%g2, %g2, %g2
3015	and		%l7, 0x87, %l7
3016	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3017	xor		%l7, %g2, %g2
3018
3019	.word	0x91b02302 !movxtod	%g2,%f8
3020	.word	0x95b02303 !movxtod	%g3,%f10
3021	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3022	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3023
3024	xor		%g4, %o0, %o0		! ^= rk[0]
3025	xor		%g5, %o1, %o1
3026	xor		%g4, %o2, %o2		! ^= rk[0]
3027	xor		%g5, %o3, %o3
3028	.word	0x81b02308 !movxtod	%o0,%f0
3029	.word	0x85b02309 !movxtod	%o1,%f2
3030	.word	0x89b0230a !movxtod	%o2,%f4
3031	.word	0x8db0230b !movxtod	%o3,%f6
3032
3033	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3034	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3035	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3036	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3037
3038	prefetch	[%i1 + 63], 22
3039	prefetch	[%i0 + 32+63], 20
3040	call		_aes256_encrypt_2x
3041	add		%i0, 32, %i0
3042
3043	.word	0x91b02302 !movxtod	%g2,%f8
3044	.word	0x95b02303 !movxtod	%g3,%f10
3045
3046	srax		%g3, 63, %l7		! next tweak value
3047	addcc		%g2, %g2, %g2
3048	and		%l7, 0x87, %l7
3049	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3050	xor		%l7, %g2, %g2
3051
3052	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3053	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3054
3055	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3056	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3057	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3058	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3059
3060	brnz,pn		%l2, 2f
3061	sub		%i2, 2, %i2
3062
3063	std		%f0, [%i1 + 0]
3064	std		%f2, [%i1 + 8]
3065	std		%f4, [%i1 + 16]
3066	std		%f6, [%i1 + 24]
3067	brnz,pt		%i2, .L256_xts_enloop2x
3068	add		%i1, 32, %i1
3069
3070	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3071	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3072	brnz,pn		%i5, .L256_xts_ensteal
3073	nop
3074
3075	ret
3076	restore
3077
3078.align	16
30792:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3080						! and ~3x deterioration
3081						! in inp==out case
3082	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3083	.word	0x95b00902 !faligndata	%f0,%f2,%f10
3084	.word	0x99b08904 !faligndata	%f2,%f4,%f12
3085	.word	0x9db10906 !faligndata	%f4,%f6,%f14
3086	.word	0x81b18906 !faligndata	%f6,%f6,%f0
3087
3088	stda		%f8, [%i1 + %l3]0xc0	! partial store
3089	std		%f10, [%i1 + 8]
3090	std		%f12, [%i1 + 16]
3091	std		%f14, [%i1 + 24]
3092	add		%i1, 32, %i1
3093	orn		%g0, %l3, %l3
3094	stda		%f0, [%i1 + %l3]0xc0	! partial store
3095
3096	brnz,pt		%i2, .L256_xts_enloop2x+4
3097	orn		%g0, %l3, %l3
3098
3099	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3100	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3101	brnz,pn		%i5, .L256_xts_ensteal
3102	nop
3103
3104	ret
3105	restore
3106
3107!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3108.align	32
3109.L256_xts_enblk:
3110	add	%i1, %i2, %l5
3111	and	%l5, 63, %l5	! tail
3112	sub	%i2, %l5, %i2
3113	add	%l5, 15, %l5	! round up to 16n
3114	srlx	%i2, 4, %i2
3115	srl	%l5, 4, %l5
3116	sub	%i2, 1, %i2
3117	add	%l5, 1, %l5
3118
3119.L256_xts_enblk2x:
3120	ldx		[%i0 + 0], %o0
3121	ldx		[%i0 + 8], %o1
3122	ldx		[%i0 + 16], %o2
3123	brz,pt		%l0, 5f
3124	ldx		[%i0 + 24], %o3
3125
3126	ldx		[%i0 + 32], %o4
3127	sllx		%o0, %l0, %o0
3128	srlx		%o1, %l1, %g1
3129	or		%g1, %o0, %o0
3130	sllx		%o1, %l0, %o1
3131	srlx		%o2, %l1, %g1
3132	or		%g1, %o1, %o1
3133	sllx		%o2, %l0, %o2
3134	srlx		%o3, %l1, %g1
3135	or		%g1, %o2, %o2
3136	sllx		%o3, %l0, %o3
3137	srlx		%o4, %l1, %o4
3138	or		%o4, %o3, %o3
31395:
3140	.word	0x99b02302 !movxtod	%g2,%f12
3141	.word	0x9db02303 !movxtod	%g3,%f14
3142	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3143	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3144
3145	srax		%g3, 63, %l7		! next tweak value
3146	addcc		%g2, %g2, %g2
3147	and		%l7, 0x87, %l7
3148	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3149	xor		%l7, %g2, %g2
3150
3151	.word	0x91b02302 !movxtod	%g2,%f8
3152	.word	0x95b02303 !movxtod	%g3,%f10
3153	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3154	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3155
3156	xor		%g4, %o0, %o0		! ^= rk[0]
3157	xor		%g5, %o1, %o1
3158	xor		%g4, %o2, %o2		! ^= rk[0]
3159	xor		%g5, %o3, %o3
3160	.word	0x81b02308 !movxtod	%o0,%f0
3161	.word	0x85b02309 !movxtod	%o1,%f2
3162	.word	0x89b0230a !movxtod	%o2,%f4
3163	.word	0x8db0230b !movxtod	%o3,%f6
3164
3165	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3166	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3167	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3168	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3169
3170	prefetch	[%i0 + 32+63], 20
3171	call		_aes256_encrypt_2x
3172	add		%i0, 32, %i0
3173
3174	.word	0x91b02302 !movxtod	%g2,%f8
3175	.word	0x95b02303 !movxtod	%g3,%f10
3176
3177	srax		%g3, 63, %l7		! next tweak value
3178	addcc		%g2, %g2, %g2
3179	and		%l7, 0x87, %l7
3180	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3181	xor		%l7, %g2, %g2
3182
3183	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3184	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3185
3186	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3187	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3188	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3189	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3190
3191	subcc		%i2, 2, %i2
3192	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3193	add		%i1, 8, %i1
3194	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3195	add		%i1, 8, %i1
3196	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3197	add		%i1, 8, %i1
3198	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3199	bgu,pt		%icc, .L256_xts_enblk2x
3200	add		%i1, 8, %i1
3201
3202	add		%l5, %i2, %i2
3203	andcc		%i2, 1, %g0		! is number of blocks even?
3204	membar		#StoreLoad|#StoreStore
3205	bnz,pt		%icc, .L256_xts_enloop
3206	srl		%i2, 0, %i2
3207	brnz,pn		%i2, .L256_xts_enloop2x
3208	nop
3209
3210	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3211	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3212	brnz,pn		%i5, .L256_xts_ensteal
3213	nop
3214
3215	ret
3216	restore
3217!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3218.align	32
3219.L256_xts_ensteal:
3220	std		%f0, [%fp + 0-16]	! copy of output
3221	std		%f2, [%fp + 0-8]
3222
3223	srl		%l0, 3, %l0
3224	add		%fp, 0-16, %l7
3225	add		%i0, %l0, %i0	! original %i0+%i2&-15
3226	add		%i1, %l2, %i1	! original %i1+%i2&-15
3227	mov		0, %l0
3228	nop					! align
3229
3230.L256_xts_enstealing:
3231	ldub		[%i0 + %l0], %o0
3232	ldub		[%l7  + %l0], %o1
3233	dec		%i5
3234	stb		%o0, [%l7  + %l0]
3235	stb		%o1, [%i1 + %l0]
3236	brnz		%i5, .L256_xts_enstealing
3237	inc		%l0
3238
3239	mov		%l7, %i0
3240	sub		%i1, 16, %i1
3241	mov		0, %l0
3242	sub		%i1, %l2, %i1
3243	ba		.L256_xts_enloop	! one more time
3244	mov		1, %i2				! %i5 is 0
3245	ret
3246	restore
3247.type	aes256_t4_xts_encrypt,#function
3248.size	aes256_t4_xts_encrypt,.-aes256_t4_xts_encrypt
3249.globl	aes256_t4_xts_decrypt
3250.align	32
3251aes256_t4_xts_decrypt:
3252	save		%sp, -112-16, %sp
3253	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3254
3255	mov		%i5, %o0
3256	add		%fp, 0-16, %o1
3257	call		aes_t4_encrypt
3258	mov		%i4, %o2
3259
3260	add		%fp, 0-16, %l7
3261	ldxa		[%l7]0x88, %g2
3262	add		%fp, 0-8, %l7
3263	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak
3264
3265	sethi		%hi(0x76543210), %l7
3266	or		%l7, %lo(0x76543210), %l7
3267	.word	0x81b5c320 !bmask	%l7,%g0,%g0		! byte swap mask
3268
3269	prefetch	[%i0], 20
3270	prefetch	[%i0 + 63], 20
3271	call		_aes256_load_deckey
3272	and		%i2, 15,  %i5
3273	and		%i2, -16, %i2
3274	mov		0, %l7
3275	movrnz		%i5, 16,  %l7
3276	sub		%i2, %l7, %i2
3277
3278	sub		%i0, %i1, %l5	! %i0!=%i1
3279	and		%i0, 7, %l0
3280	andn		%i0, 7, %i0
3281	sll		%l0, 3, %l0
3282	mov		64, %l1
3283	mov		0xff, %l3
3284	sub		%l1, %l0, %l1
3285	and		%i1, 7, %l2
3286	cmp		%i2, 255
3287	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3288	movleu		%icc, 0, %l5	!	%i2<256 ||
3289	brnz,pn		%l5, .L256_xts_deblk !	%i0==%i1)
3290	srl		%l3, %l2, %l3
3291
3292	andcc		%i2, 16, %g0		! is number of blocks even?
3293	brz,pn		%i2, .L256_xts_desteal
3294	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3295	bz		%icc, .L256_xts_deloop2x
3296	srlx		%i2, 4, %i2
3297.L256_xts_deloop:
3298	ldx		[%i0 + 0], %o0
3299	brz,pt		%l0, 4f
3300	ldx		[%i0 + 8], %o1
3301
3302	ldx		[%i0 + 16], %o2
3303	sllx		%o0, %l0, %o0
3304	srlx		%o1, %l1, %g1
3305	sllx		%o1, %l0, %o1
3306	or		%g1, %o0, %o0
3307	srlx		%o2, %l1, %o2
3308	or		%o2, %o1, %o1
33094:
3310	.word	0x99b02302 !movxtod	%g2,%f12
3311	.word	0x9db02303 !movxtod	%g3,%f14
3312	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3313	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3314
3315	xor		%g4, %o0, %o0		! ^= rk[0]
3316	xor		%g5, %o1, %o1
3317	.word	0x81b02308 !movxtod	%o0,%f0
3318	.word	0x85b02309 !movxtod	%o1,%f2
3319
3320	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3321	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3322
3323	prefetch	[%i1 + 63], 22
3324	prefetch	[%i0 + 16+63], 20
3325	call		_aes256_decrypt_1x
3326	add		%i0, 16, %i0
3327
3328	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3329	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3330
3331	srax		%g3, 63, %l7		! next tweak value
3332	addcc		%g2, %g2, %g2
3333	and		%l7, 0x87, %l7
3334	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3335	xor		%l7, %g2, %g2
3336
3337	brnz,pn		%l2, 2f
3338	sub		%i2, 1, %i2
3339
3340	std		%f0, [%i1 + 0]
3341	std		%f2, [%i1 + 8]
3342	brnz,pt		%i2, .L256_xts_deloop2x
3343	add		%i1, 16, %i1
3344
3345	brnz,pn		%i5, .L256_xts_desteal
3346	nop
3347
3348	ret
3349	restore
3350
3351.align	16
33522:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3353						! and ~3x deterioration
3354						! in inp==out case
3355	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
3356	.word	0x8db00902 !faligndata	%f0,%f2,%f6
3357	.word	0x91b08902 !faligndata	%f2,%f2,%f8
3358	stda		%f4, [%i1 + %l3]0xc0	! partial store
3359	std		%f6, [%i1 + 8]
3360	add		%i1, 16, %i1
3361	orn		%g0, %l3, %l3
3362	stda		%f8, [%i1 + %l3]0xc0	! partial store
3363
3364	brnz,pt		%i2, .L256_xts_deloop2x+4
3365	orn		%g0, %l3, %l3
3366
3367	brnz,pn		%i5, .L256_xts_desteal
3368	nop
3369
3370	ret
3371	restore
3372
3373!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3374.align	32
3375.L256_xts_deloop2x:
3376	ldx		[%i0 + 0], %o0
3377	ldx		[%i0 + 8], %o1
3378	ldx		[%i0 + 16], %o2
3379	brz,pt		%l0, 4f
3380	ldx		[%i0 + 24], %o3
3381
3382	ldx		[%i0 + 32], %o4
3383	sllx		%o0, %l0, %o0
3384	srlx		%o1, %l1, %g1
3385	or		%g1, %o0, %o0
3386	sllx		%o1, %l0, %o1
3387	srlx		%o2, %l1, %g1
3388	or		%g1, %o1, %o1
3389	sllx		%o2, %l0, %o2
3390	srlx		%o3, %l1, %g1
3391	or		%g1, %o2, %o2
3392	sllx		%o3, %l0, %o3
3393	srlx		%o4, %l1, %o4
3394	or		%o4, %o3, %o3
33954:
3396	.word	0x99b02302 !movxtod	%g2,%f12
3397	.word	0x9db02303 !movxtod	%g3,%f14
3398	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3399	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3400
3401	srax		%g3, 63, %l7		! next tweak value
3402	addcc		%g2, %g2, %g2
3403	and		%l7, 0x87, %l7
3404	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3405	xor		%l7, %g2, %g2
3406
3407	.word	0x91b02302 !movxtod	%g2,%f8
3408	.word	0x95b02303 !movxtod	%g3,%f10
3409	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3410	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3411
3412	xor		%g4, %o0, %o0		! ^= rk[0]
3413	xor		%g5, %o1, %o1
3414	xor		%g4, %o2, %o2		! ^= rk[0]
3415	xor		%g5, %o3, %o3
3416	.word	0x81b02308 !movxtod	%o0,%f0
3417	.word	0x85b02309 !movxtod	%o1,%f2
3418	.word	0x89b0230a !movxtod	%o2,%f4
3419	.word	0x8db0230b !movxtod	%o3,%f6
3420
3421	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3422	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3423	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3424	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3425
3426	prefetch	[%i1 + 63], 22
3427	prefetch	[%i0 + 32+63], 20
3428	call		_aes256_decrypt_2x
3429	add		%i0, 32, %i0
3430
3431	.word	0x91b02302 !movxtod	%g2,%f8
3432	.word	0x95b02303 !movxtod	%g3,%f10
3433
3434	srax		%g3, 63, %l7		! next tweak value
3435	addcc		%g2, %g2, %g2
3436	and		%l7, 0x87, %l7
3437	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3438	xor		%l7, %g2, %g2
3439
3440	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3441	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3442
3443	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3444	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3445	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3446	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3447
3448	brnz,pn		%l2, 2f
3449	sub		%i2, 2, %i2
3450
3451	std		%f0, [%i1 + 0]
3452	std		%f2, [%i1 + 8]
3453	std		%f4, [%i1 + 16]
3454	std		%f6, [%i1 + 24]
3455	brnz,pt		%i2, .L256_xts_deloop2x
3456	add		%i1, 32, %i1
3457
3458	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3459	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3460	brnz,pn		%i5, .L256_xts_desteal
3461	nop
3462
3463	ret
3464	restore
3465
3466.align	16
34672:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3468						! and ~3x deterioration
3469						! in inp==out case
3470	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3471	.word	0x95b00902 !faligndata	%f0,%f2,%f10
3472	.word	0x99b08904 !faligndata	%f2,%f4,%f12
3473	.word	0x9db10906 !faligndata	%f4,%f6,%f14
3474	.word	0x81b18906 !faligndata	%f6,%f6,%f0
3475
3476	stda		%f8, [%i1 + %l3]0xc0	! partial store
3477	std		%f10, [%i1 + 8]
3478	std		%f12, [%i1 + 16]
3479	std		%f14, [%i1 + 24]
3480	add		%i1, 32, %i1
3481	orn		%g0, %l3, %l3
3482	stda		%f0, [%i1 + %l3]0xc0	! partial store
3483
3484	brnz,pt		%i2, .L256_xts_deloop2x+4
3485	orn		%g0, %l3, %l3
3486
3487	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3488	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3489	brnz,pn		%i5, .L256_xts_desteal
3490	nop
3491
3492	ret
3493	restore
3494
3495!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3496.align	32
3497.L256_xts_deblk:
3498	add	%i1, %i2, %l5
3499	and	%l5, 63, %l5	! tail
3500	sub	%i2, %l5, %i2
3501	add	%l5, 15, %l5	! round up to 16n
3502	srlx	%i2, 4, %i2
3503	srl	%l5, 4, %l5
3504	sub	%i2, 1, %i2
3505	add	%l5, 1, %l5
3506
3507.L256_xts_deblk2x:
3508	ldx		[%i0 + 0], %o0
3509	ldx		[%i0 + 8], %o1
3510	ldx		[%i0 + 16], %o2
3511	brz,pt		%l0, 5f
3512	ldx		[%i0 + 24], %o3
3513
3514	ldx		[%i0 + 32], %o4
3515	sllx		%o0, %l0, %o0
3516	srlx		%o1, %l1, %g1
3517	or		%g1, %o0, %o0
3518	sllx		%o1, %l0, %o1
3519	srlx		%o2, %l1, %g1
3520	or		%g1, %o1, %o1
3521	sllx		%o2, %l0, %o2
3522	srlx		%o3, %l1, %g1
3523	or		%g1, %o2, %o2
3524	sllx		%o3, %l0, %o3
3525	srlx		%o4, %l1, %o4
3526	or		%o4, %o3, %o3
35275:
3528	.word	0x99b02302 !movxtod	%g2,%f12
3529	.word	0x9db02303 !movxtod	%g3,%f14
3530	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3531	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3532
3533	srax		%g3, 63, %l7		! next tweak value
3534	addcc		%g2, %g2, %g2
3535	and		%l7, 0x87, %l7
3536	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3537	xor		%l7, %g2, %g2
3538
3539	.word	0x91b02302 !movxtod	%g2,%f8
3540	.word	0x95b02303 !movxtod	%g3,%f10
3541	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3542	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3543
3544	xor		%g4, %o0, %o0		! ^= rk[0]
3545	xor		%g5, %o1, %o1
3546	xor		%g4, %o2, %o2		! ^= rk[0]
3547	xor		%g5, %o3, %o3
3548	.word	0x81b02308 !movxtod	%o0,%f0
3549	.word	0x85b02309 !movxtod	%o1,%f2
3550	.word	0x89b0230a !movxtod	%o2,%f4
3551	.word	0x8db0230b !movxtod	%o3,%f6
3552
3553	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3554	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3555	.word	0x89b20d84 !fxor	%f8,%f4,%f4		! ^= tweak[0]
3556	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3557
3558	prefetch	[%i0 + 32+63], 20
3559	call		_aes256_decrypt_2x
3560	add		%i0, 32, %i0
3561
3562	.word	0x91b02302 !movxtod	%g2,%f8
3563	.word	0x95b02303 !movxtod	%g3,%f10
3564
3565	srax		%g3, 63, %l7		! next tweak value
3566	addcc		%g2, %g2, %g2
3567	and		%l7, 0x87, %l7
3568	.word	0x87b0c223 !addxc	%g3,%g3,%g3
3569	xor		%l7, %g2, %g2
3570
3571	.word	0x91b20988 !bshuffle	%f8,%f8,%f8
3572	.word	0x95b2898a !bshuffle	%f10,%f10,%f10
3573
3574	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3575	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3576	.word	0x89b20d84 !fxor	%f8,%f4,%f4
3577	.word	0x8db28d86 !fxor	%f10,%f6,%f6
3578
3579	subcc		%i2, 2, %i2
3580	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3581	add		%i1, 8, %i1
3582	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3583	add		%i1, 8, %i1
3584	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3585	add		%i1, 8, %i1
3586	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3587	bgu,pt		%icc, .L256_xts_deblk2x
3588	add		%i1, 8, %i1
3589
3590	add		%l5, %i2, %i2
3591	andcc		%i2, 1, %g0		! is number of blocks even?
3592	membar		#StoreLoad|#StoreStore
3593	bnz,pt		%icc, .L256_xts_deloop
3594	srl		%i2, 0, %i2
3595	brnz,pn		%i2, .L256_xts_deloop2x
3596	nop
3597
3598	.word	0x81b00f04 !fsrc2	%f0,%f4,%f0
3599	.word	0x85b00f06 !fsrc2	%f0,%f6,%f2
3600	brnz,pn		%i5, .L256_xts_desteal
3601	nop
3602
3603	ret
3604	restore
3605!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3606.align	32
3607.L256_xts_desteal:
3608	ldx		[%i0 + 0], %o0
3609	brz,pt		%l0, 8f
3610	ldx		[%i0 + 8], %o1
3611
3612	ldx		[%i0 + 16], %o2
3613	sllx		%o0, %l0, %o0
3614	srlx		%o1, %l1, %g1
3615	sllx		%o1, %l0, %o1
3616	or		%g1, %o0, %o0
3617	srlx		%o2, %l1, %o2
3618	or		%o2, %o1, %o1
36198:
3620	srax		%g3, 63, %l7		! next tweak value
3621	addcc		%g2, %g2, %o2
3622	and		%l7, 0x87, %l7
3623	.word	0x97b0c223 !addxc	%g3,%g3,%o3
3624	xor		%l7, %o2, %o2
3625
3626	.word	0x99b0230a !movxtod	%o2,%f12
3627	.word	0x9db0230b !movxtod	%o3,%f14
3628	.word	0x99b3098c !bshuffle	%f12,%f12,%f12
3629	.word	0x9db3898e !bshuffle	%f14,%f14,%f14
3630
3631	xor		%g4, %o0, %o0		! ^= rk[0]
3632	xor		%g5, %o1, %o1
3633	.word	0x81b02308 !movxtod	%o0,%f0
3634	.word	0x85b02309 !movxtod	%o1,%f2
3635
3636	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3637	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3638
3639	call		_aes256_decrypt_1x
3640	add		%i0, 16, %i0
3641
3642	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= tweak[0]
3643	.word	0x85b38d82 !fxor	%f14,%f2,%f2
3644
3645	std		%f0, [%fp + 0-16]
3646	std		%f2, [%fp + 0-8]
3647
3648	srl		%l0, 3, %l0
3649	add		%fp, 0-16, %l7
3650	add		%i0, %l0, %i0	! original %i0+%i2&-15
3651	add		%i1, %l2, %i1	! original %i1+%i2&-15
3652	mov		0, %l0
3653	add		%i1, 16, %i1
3654	nop					! align
3655
3656.L256_xts_destealing:
3657	ldub		[%i0 + %l0], %o0
3658	ldub		[%l7  + %l0], %o1
3659	dec		%i5
3660	stb		%o0, [%l7  + %l0]
3661	stb		%o1, [%i1 + %l0]
3662	brnz		%i5, .L256_xts_destealing
3663	inc		%l0
3664
3665	mov		%l7, %i0
3666	sub		%i1, 16, %i1
3667	mov		0, %l0
3668	sub		%i1, %l2, %i1
3669	ba		.L256_xts_deloop	! one more time
3670	mov		1, %i2				! %i5 is 0
3671	ret
3672	restore
3673.type	aes256_t4_xts_decrypt,#function
3674.size	aes256_t4_xts_decrypt,.-aes256_t4_xts_decrypt
3675.globl	aes192_t4_ctr32_encrypt
3676.align	32
3677aes192_t4_ctr32_encrypt:
3678	save		%sp, -112, %sp
3679	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3680
3681	prefetch	[%i0], 20
3682	prefetch	[%i0 + 63], 20
3683	call		_aes192_load_enckey
3684	sllx		%i2, 4, %i2
3685
3686	ld		[%i4 + 0], %l4	! counter
3687	ld		[%i4 + 4], %l5
3688	ld		[%i4 + 8], %l6
3689	ld		[%i4 + 12], %l7
3690
3691	sllx		%l4, 32, %o5
3692	or		%l5, %o5, %o5
3693	sllx		%l6, 32, %g1
3694	xor		%o5, %g4, %g4		! ^= rk[0]
3695	xor		%g1, %g5, %g5
3696	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
3697
3698	sub		%i0, %i1, %l5	! %i0!=%i1
3699	and		%i0, 7, %l0
3700	andn		%i0, 7, %i0
3701	sll		%l0, 3, %l0
3702	mov		64, %l1
3703	mov		0xff, %l3
3704	sub		%l1, %l0, %l1
3705	and		%i1, 7, %l2
3706	cmp		%i2, 255
3707	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3708	movleu		%icc, 0, %l5	!	%i2<256 ||
3709	brnz,pn		%l5, .L192_ctr32_blk	!	%i0==%i1)
3710	srl		%l3, %l2, %l3
3711
3712	andcc		%i2, 16, %g0		! is number of blocks even?
3713	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3714	bz		%icc, .L192_ctr32_loop2x
3715	srlx		%i2, 4, %i2
3716.L192_ctr32_loop:
3717	ldx		[%i0 + 0], %o0
3718	brz,pt		%l0, 4f
3719	ldx		[%i0 + 8], %o1
3720
3721	ldx		[%i0 + 16], %o2
3722	sllx		%o0, %l0, %o0
3723	srlx		%o1, %l1, %g1
3724	sllx		%o1, %l0, %o1
3725	or		%g1, %o0, %o0
3726	srlx		%o2, %l1, %o2
3727	or		%o2, %o1, %o1
37284:
3729	xor		%g5, %l7, %g1		! ^= rk[0]
3730	add		%l7, 1, %l7
3731	.word	0x85b02301 !movxtod	%g1,%f2
3732	srl		%l7, 0, %l7		! clruw
3733	prefetch	[%i1 + 63], 22
3734	prefetch	[%i0 + 16+63], 20
3735	.word	0x88cc040e !aes_eround01	%f16,%f14,%f2,%f4
3736	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3737	call		_aes192_encrypt_1x+8
3738	add		%i0, 16, %i0
3739
3740	.word	0x95b02308 !movxtod	%o0,%f10
3741	.word	0x99b02309 !movxtod	%o1,%f12
3742	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
3743	.word	0x85b30d82 !fxor	%f12,%f2,%f2
3744
3745	brnz,pn		%l2, 2f
3746	sub		%i2, 1, %i2
3747
3748	std		%f0, [%i1 + 0]
3749	std		%f2, [%i1 + 8]
3750	brnz,pt		%i2, .L192_ctr32_loop2x
3751	add		%i1, 16, %i1
3752
3753	ret
3754	restore
3755
3756.align	16
37572:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3758						! and ~3x deterioration
3759						! in inp==out case
3760	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
3761	.word	0x8db00902 !faligndata	%f0,%f2,%f6
3762	.word	0x91b08902 !faligndata	%f2,%f2,%f8
3763	stda		%f4, [%i1 + %l3]0xc0	! partial store
3764	std		%f6, [%i1 + 8]
3765	add		%i1, 16, %i1
3766	orn		%g0, %l3, %l3
3767	stda		%f8, [%i1 + %l3]0xc0	! partial store
3768
3769	brnz,pt		%i2, .L192_ctr32_loop2x+4
3770	orn		%g0, %l3, %l3
3771
3772	ret
3773	restore
3774
3775!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3776.align	32
3777.L192_ctr32_loop2x:
3778	ldx		[%i0 + 0], %o0
3779	ldx		[%i0 + 8], %o1
3780	ldx		[%i0 + 16], %o2
3781	brz,pt		%l0, 4f
3782	ldx		[%i0 + 24], %o3
3783
3784	ldx		[%i0 + 32], %o4
3785	sllx		%o0, %l0, %o0
3786	srlx		%o1, %l1, %g1
3787	or		%g1, %o0, %o0
3788	sllx		%o1, %l0, %o1
3789	srlx		%o2, %l1, %g1
3790	or		%g1, %o1, %o1
3791	sllx		%o2, %l0, %o2
3792	srlx		%o3, %l1, %g1
3793	or		%g1, %o2, %o2
3794	sllx		%o3, %l0, %o3
3795	srlx		%o4, %l1, %o4
3796	or		%o4, %o3, %o3
37974:
3798	xor		%g5, %l7, %g1		! ^= rk[0]
3799	add		%l7, 1, %l7
3800	.word	0x85b02301 !movxtod	%g1,%f2
3801	srl		%l7, 0, %l7		! clruw
3802	xor		%g5, %l7, %g1
3803	add		%l7, 1, %l7
3804	.word	0x8db02301 !movxtod	%g1,%f6
3805	srl		%l7, 0, %l7		! clruw
3806	prefetch	[%i1 + 63], 22
3807	prefetch	[%i0 + 32+63], 20
3808	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
3809	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3810	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
3811	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
3812	call		_aes192_encrypt_2x+16
3813	add		%i0, 32, %i0
3814
3815	.word	0x91b02308 !movxtod	%o0,%f8
3816	.word	0x95b02309 !movxtod	%o1,%f10
3817	.word	0x99b0230a !movxtod	%o2,%f12
3818	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
3819	.word	0x91b0230b !movxtod	%o3,%f8
3820	.word	0x85b28d82 !fxor	%f10,%f2,%f2
3821	.word	0x89b30d84 !fxor	%f12,%f4,%f4
3822	.word	0x8db20d86 !fxor	%f8,%f6,%f6
3823
3824	brnz,pn		%l2, 2f
3825	sub		%i2, 2, %i2
3826
3827	std		%f0, [%i1 + 0]
3828	std		%f2, [%i1 + 8]
3829	std		%f4, [%i1 + 16]
3830	std		%f6, [%i1 + 24]
3831	brnz,pt		%i2, .L192_ctr32_loop2x
3832	add		%i1, 32, %i1
3833
3834	ret
3835	restore
3836
3837.align	16
38382:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
3839						! and ~3x deterioration
3840						! in inp==out case
3841	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
3842	.word	0x81b00902 !faligndata	%f0,%f2,%f0
3843	.word	0x85b08904 !faligndata	%f2,%f4,%f2
3844	.word	0x89b10906 !faligndata	%f4,%f6,%f4
3845	.word	0x8db18906 !faligndata	%f6,%f6,%f6
3846
3847	stda		%f8, [%i1 + %l3]0xc0	! partial store
3848	std		%f0, [%i1 + 8]
3849	std		%f2, [%i1 + 16]
3850	std		%f4, [%i1 + 24]
3851	add		%i1, 32, %i1
3852	orn		%g0, %l3, %l3
3853	stda		%f6, [%i1 + %l3]0xc0	! partial store
3854
3855	brnz,pt		%i2, .L192_ctr32_loop2x+4
3856	orn		%g0, %l3, %l3
3857
3858	ret
3859	restore
3860
3861!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3862.align	32
3863.L192_ctr32_blk:
3864	add	%i1, %i2, %l5
3865	and	%l5, 63, %l5	! tail
3866	sub	%i2, %l5, %i2
3867	add	%l5, 15, %l5	! round up to 16n
3868	srlx	%i2, 4, %i2
3869	srl	%l5, 4, %l5
3870	sub	%i2, 1, %i2
3871	add	%l5, 1, %l5
3872
3873.L192_ctr32_blk_loop2x:
3874	ldx		[%i0 + 0], %o0
3875	ldx		[%i0 + 8], %o1
3876	ldx		[%i0 + 16], %o2
3877	brz,pt		%l0, 5f
3878	ldx		[%i0 + 24], %o3
3879
3880	ldx		[%i0 + 32], %o4
3881	sllx		%o0, %l0, %o0
3882	srlx		%o1, %l1, %g1
3883	or		%g1, %o0, %o0
3884	sllx		%o1, %l0, %o1
3885	srlx		%o2, %l1, %g1
3886	or		%g1, %o1, %o1
3887	sllx		%o2, %l0, %o2
3888	srlx		%o3, %l1, %g1
3889	or		%g1, %o2, %o2
3890	sllx		%o3, %l0, %o3
3891	srlx		%o4, %l1, %o4
3892	or		%o4, %o3, %o3
38935:
3894	xor		%g5, %l7, %g1		! ^= rk[0]
3895	add		%l7, 1, %l7
3896	.word	0x85b02301 !movxtod	%g1,%f2
3897	srl		%l7, 0, %l7		! clruw
3898	xor		%g5, %l7, %g1
3899	add		%l7, 1, %l7
3900	.word	0x8db02301 !movxtod	%g1,%f6
3901	srl		%l7, 0, %l7		! clruw
3902	prefetch	[%i0 + 32+63], 20
3903	.word	0x90cc040e !aes_eround01	%f16,%f14,%f2,%f8
3904	.word	0x84cc842e !aes_eround23	%f18,%f14,%f2,%f2
3905	.word	0x94cc0c0e !aes_eround01	%f16,%f14,%f6,%f10
3906	.word	0x8ccc8c2e !aes_eround23	%f18,%f14,%f6,%f6
3907	call		_aes192_encrypt_2x+16
3908	add		%i0, 32, %i0
3909	subcc		%i2, 2, %i2
3910
3911	.word	0x91b02308 !movxtod	%o0,%f8
3912	.word	0x95b02309 !movxtod	%o1,%f10
3913	.word	0x99b0230a !movxtod	%o2,%f12
3914	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
3915	.word	0x91b0230b !movxtod	%o3,%f8
3916	.word	0x85b28d82 !fxor	%f10,%f2,%f2
3917	.word	0x89b30d84 !fxor	%f12,%f4,%f4
3918	.word	0x8db20d86 !fxor	%f8,%f6,%f6
3919
3920	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3921	add		%i1, 8, %i1
3922	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3923	add		%i1, 8, %i1
3924	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3925	add		%i1, 8, %i1
3926	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
3927	bgu,pt		%icc, .L192_ctr32_blk_loop2x
3928	add		%i1, 8, %i1
3929
3930	add		%l5, %i2, %i2
3931	andcc		%i2, 1, %g0		! is number of blocks even?
3932	membar		#StoreLoad|#StoreStore
3933	bnz,pt		%icc, .L192_ctr32_loop
3934	srl		%i2, 0, %i2
3935	brnz,pn		%i2, .L192_ctr32_loop2x
3936	nop
3937
3938	ret
3939	restore
3940.type	aes192_t4_ctr32_encrypt,#function
3941.size	aes192_t4_ctr32_encrypt,.-aes192_t4_ctr32_encrypt
3942.globl	aes192_t4_cbc_decrypt
3943.align	32
3944aes192_t4_cbc_decrypt:
3945	save		%sp, -112, %sp
3946	cmp		%i2, 0
3947	be,pn		%icc, .L192_cbc_dec_abort
3948	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
3949	sub		%i0, %i1, %l5	! %i0!=%i1
3950	ld		[%i4 + 0], %f12	! load ivec
3951	ld		[%i4 + 4], %f13
3952	ld		[%i4 + 8], %f14
3953	ld		[%i4 + 12], %f15
3954	prefetch	[%i0], 20
3955	prefetch	[%i0 + 63], 20
3956	call		_aes192_load_deckey
3957	and		%i0, 7, %l0
3958	andn		%i0, 7, %i0
3959	sll		%l0, 3, %l0
3960	mov		64, %l1
3961	mov		0xff, %l3
3962	sub		%l1, %l0, %l1
3963	and		%i1, 7, %l2
3964	cmp		%i2, 255
3965	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
3966	movleu		%icc, 0, %l5	!	%i2<256 ||
3967	brnz,pn		%l5, .L192cbc_dec_blk	!	%i0==%i1)
3968	srl		%l3, %l2, %l3
3969
3970	andcc		%i2, 16, %g0		! is number of blocks even?
3971	srlx		%i2, 4, %i2
3972	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
3973	bz		%icc, .L192_cbc_dec_loop2x
3974	prefetch	[%i1], 22
3975.L192_cbc_dec_loop:
3976	ldx		[%i0 + 0], %o0
3977	brz,pt		%l0, 4f
3978	ldx		[%i0 + 8], %o1
3979
3980	ldx		[%i0 + 16], %o2
3981	sllx		%o0, %l0, %o0
3982	srlx		%o1, %l1, %g1
3983	sllx		%o1, %l0, %o1
3984	or		%g1, %o0, %o0
3985	srlx		%o2, %l1, %o2
3986	or		%o2, %o1, %o1
39874:
3988	xor		%g4, %o0, %o2		! ^= rk[0]
3989	xor		%g5, %o1, %o3
3990	.word	0x81b0230a !movxtod	%o2,%f0
3991	.word	0x85b0230b !movxtod	%o3,%f2
3992
3993	prefetch	[%i1 + 63], 22
3994	prefetch	[%i0 + 16+63], 20
3995	call		_aes192_decrypt_1x
3996	add		%i0, 16, %i0
3997
3998	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
3999	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4000	.word	0x99b02308 !movxtod	%o0,%f12
4001	.word	0x9db02309 !movxtod	%o1,%f14
4002
4003	brnz,pn		%l2, 2f
4004	sub		%i2, 1, %i2
4005
4006	std		%f0, [%i1 + 0]
4007	std		%f2, [%i1 + 8]
4008	brnz,pt		%i2, .L192_cbc_dec_loop2x
4009	add		%i1, 16, %i1
4010	st		%f12, [%i4 + 0]
4011	st		%f13, [%i4 + 4]
4012	st		%f14, [%i4 + 8]
4013	st		%f15, [%i4 + 12]
4014.L192_cbc_dec_abort:
4015	ret
4016	restore
4017
4018.align	16
40192:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4020						! and ~3x deterioration
4021						! in inp==out case
4022	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
4023	.word	0x8db00902 !faligndata	%f0,%f2,%f6
4024	.word	0x91b08902 !faligndata	%f2,%f2,%f8
4025
4026	stda		%f4, [%i1 + %l3]0xc0	! partial store
4027	std		%f6, [%i1 + 8]
4028	add		%i1, 16, %i1
4029	orn		%g0, %l3, %l3
4030	stda		%f8, [%i1 + %l3]0xc0	! partial store
4031
4032	brnz,pt		%i2, .L192_cbc_dec_loop2x+4
4033	orn		%g0, %l3, %l3
4034	st		%f12, [%i4 + 0]
4035	st		%f13, [%i4 + 4]
4036	st		%f14, [%i4 + 8]
4037	st		%f15, [%i4 + 12]
4038	ret
4039	restore
4040
4041!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4042.align	32
4043.L192_cbc_dec_loop2x:
4044	ldx		[%i0 + 0], %o0
4045	ldx		[%i0 + 8], %o1
4046	ldx		[%i0 + 16], %o2
4047	brz,pt		%l0, 4f
4048	ldx		[%i0 + 24], %o3
4049
4050	ldx		[%i0 + 32], %o4
4051	sllx		%o0, %l0, %o0
4052	srlx		%o1, %l1, %g1
4053	or		%g1, %o0, %o0
4054	sllx		%o1, %l0, %o1
4055	srlx		%o2, %l1, %g1
4056	or		%g1, %o1, %o1
4057	sllx		%o2, %l0, %o2
4058	srlx		%o3, %l1, %g1
4059	or		%g1, %o2, %o2
4060	sllx		%o3, %l0, %o3
4061	srlx		%o4, %l1, %o4
4062	or		%o4, %o3, %o3
40634:
4064	xor		%g4, %o0, %o4		! ^= rk[0]
4065	xor		%g5, %o1, %o5
4066	.word	0x81b0230c !movxtod	%o4,%f0
4067	.word	0x85b0230d !movxtod	%o5,%f2
4068	xor		%g4, %o2, %o4
4069	xor		%g5, %o3, %o5
4070	.word	0x89b0230c !movxtod	%o4,%f4
4071	.word	0x8db0230d !movxtod	%o5,%f6
4072
4073	prefetch	[%i1 + 63], 22
4074	prefetch	[%i0 + 32+63], 20
4075	call		_aes192_decrypt_2x
4076	add		%i0, 32, %i0
4077
4078	.word	0x91b02308 !movxtod	%o0,%f8
4079	.word	0x95b02309 !movxtod	%o1,%f10
4080	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4081	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4082	.word	0x99b0230a !movxtod	%o2,%f12
4083	.word	0x9db0230b !movxtod	%o3,%f14
4084	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4085	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4086
4087	brnz,pn		%l2, 2f
4088	sub		%i2, 2, %i2
4089
4090	std		%f0, [%i1 + 0]
4091	std		%f2, [%i1 + 8]
4092	std		%f4, [%i1 + 16]
4093	std		%f6, [%i1 + 24]
4094	brnz,pt		%i2, .L192_cbc_dec_loop2x
4095	add		%i1, 32, %i1
4096	st		%f12, [%i4 + 0]
4097	st		%f13, [%i4 + 4]
4098	st		%f14, [%i4 + 8]
4099	st		%f15, [%i4 + 12]
4100	ret
4101	restore
4102
4103.align	16
41042:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4105						! and ~3x deterioration
4106						! in inp==out case
4107	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
4108	.word	0x81b00902 !faligndata	%f0,%f2,%f0
4109	.word	0x85b08904 !faligndata	%f2,%f4,%f2
4110	.word	0x89b10906 !faligndata	%f4,%f6,%f4
4111	.word	0x8db18906 !faligndata	%f6,%f6,%f6
4112	stda		%f8, [%i1 + %l3]0xc0	! partial store
4113	std		%f0, [%i1 + 8]
4114	std		%f2, [%i1 + 16]
4115	std		%f4, [%i1 + 24]
4116	add		%i1, 32, %i1
4117	orn		%g0, %l3, %l3
4118	stda		%f6, [%i1 + %l3]0xc0	! partial store
4119
4120	brnz,pt		%i2, .L192_cbc_dec_loop2x+4
4121	orn		%g0, %l3, %l3
4122	st		%f12, [%i4 + 0]
4123	st		%f13, [%i4 + 4]
4124	st		%f14, [%i4 + 8]
4125	st		%f15, [%i4 + 12]
4126	ret
4127	restore
4128
4129!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4130.align	32
4131.L192cbc_dec_blk:
4132	add	%i1, %i2, %l5
4133	and	%l5, 63, %l5	! tail
4134	sub	%i2, %l5, %i2
4135	add	%l5, 15, %l5	! round up to 16n
4136	srlx	%i2, 4, %i2
4137	srl	%l5, 4, %l5
4138	sub	%i2, 1, %i2
4139	add	%l5, 1, %l5
4140
4141.L192_cbc_dec_blk_loop2x:
4142	ldx		[%i0 + 0], %o0
4143	ldx		[%i0 + 8], %o1
4144	ldx		[%i0 + 16], %o2
4145	brz,pt		%l0, 5f
4146	ldx		[%i0 + 24], %o3
4147
4148	ldx		[%i0 + 32], %o4
4149	sllx		%o0, %l0, %o0
4150	srlx		%o1, %l1, %g1
4151	or		%g1, %o0, %o0
4152	sllx		%o1, %l0, %o1
4153	srlx		%o2, %l1, %g1
4154	or		%g1, %o1, %o1
4155	sllx		%o2, %l0, %o2
4156	srlx		%o3, %l1, %g1
4157	or		%g1, %o2, %o2
4158	sllx		%o3, %l0, %o3
4159	srlx		%o4, %l1, %o4
4160	or		%o4, %o3, %o3
41615:
4162	xor		%g4, %o0, %o4		! ^= rk[0]
4163	xor		%g5, %o1, %o5
4164	.word	0x81b0230c !movxtod	%o4,%f0
4165	.word	0x85b0230d !movxtod	%o5,%f2
4166	xor		%g4, %o2, %o4
4167	xor		%g5, %o3, %o5
4168	.word	0x89b0230c !movxtod	%o4,%f4
4169	.word	0x8db0230d !movxtod	%o5,%f6
4170
4171	prefetch	[%i0 + 32+63], 20
4172	call		_aes192_decrypt_2x
4173	add		%i0, 32, %i0
4174	subcc		%i2, 2, %i2
4175
4176	.word	0x91b02308 !movxtod	%o0,%f8
4177	.word	0x95b02309 !movxtod	%o1,%f10
4178	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4179	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4180	.word	0x99b0230a !movxtod	%o2,%f12
4181	.word	0x9db0230b !movxtod	%o3,%f14
4182	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4183	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4184
4185	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4186	add		%i1, 8, %i1
4187	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4188	add		%i1, 8, %i1
4189	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4190	add		%i1, 8, %i1
4191	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4192	bgu,pt		%icc, .L192_cbc_dec_blk_loop2x
4193	add		%i1, 8, %i1
4194
4195	add		%l5, %i2, %i2
4196	andcc		%i2, 1, %g0		! is number of blocks even?
4197	membar		#StoreLoad|#StoreStore
4198	bnz,pt		%icc, .L192_cbc_dec_loop
4199	srl		%i2, 0, %i2
4200	brnz,pn		%i2, .L192_cbc_dec_loop2x
4201	nop
4202	st		%f12, [%i4 + 0]	! write out ivec
4203	st		%f13, [%i4 + 4]
4204	st		%f14, [%i4 + 8]
4205	st		%f15, [%i4 + 12]
4206	ret
4207	restore
4208.type	aes192_t4_cbc_decrypt,#function
4209.size	aes192_t4_cbc_decrypt,.-aes192_t4_cbc_decrypt
4210.globl	aes256_t4_cbc_decrypt
4211.align	32
4212aes256_t4_cbc_decrypt:
4213	save		%sp, -112, %sp
4214	cmp		%i2, 0
4215	be,pn		%icc, .L256_cbc_dec_abort
4216	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
4217	sub		%i0, %i1, %l5	! %i0!=%i1
4218	ld		[%i4 + 0], %f12	! load ivec
4219	ld		[%i4 + 4], %f13
4220	ld		[%i4 + 8], %f14
4221	ld		[%i4 + 12], %f15
4222	prefetch	[%i0], 20
4223	prefetch	[%i0 + 63], 20
4224	call		_aes256_load_deckey
4225	and		%i0, 7, %l0
4226	andn		%i0, 7, %i0
4227	sll		%l0, 3, %l0
4228	mov		64, %l1
4229	mov		0xff, %l3
4230	sub		%l1, %l0, %l1
4231	and		%i1, 7, %l2
4232	cmp		%i2, 255
4233	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
4234	movleu		%icc, 0, %l5	!	%i2<256 ||
4235	brnz,pn		%l5, .L256cbc_dec_blk	!	%i0==%i1)
4236	srl		%l3, %l2, %l3
4237
4238	andcc		%i2, 16, %g0		! is number of blocks even?
4239	srlx		%i2, 4, %i2
4240	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
4241	bz		%icc, .L256_cbc_dec_loop2x
4242	prefetch	[%i1], 22
4243.L256_cbc_dec_loop:
4244	ldx		[%i0 + 0], %o0
4245	brz,pt		%l0, 4f
4246	ldx		[%i0 + 8], %o1
4247
4248	ldx		[%i0 + 16], %o2
4249	sllx		%o0, %l0, %o0
4250	srlx		%o1, %l1, %g1
4251	sllx		%o1, %l0, %o1
4252	or		%g1, %o0, %o0
4253	srlx		%o2, %l1, %o2
4254	or		%o2, %o1, %o1
42554:
4256	xor		%g4, %o0, %o2		! ^= rk[0]
4257	xor		%g5, %o1, %o3
4258	.word	0x81b0230a !movxtod	%o2,%f0
4259	.word	0x85b0230b !movxtod	%o3,%f2
4260
4261	prefetch	[%i1 + 63], 22
4262	prefetch	[%i0 + 16+63], 20
4263	call		_aes256_decrypt_1x
4264	add		%i0, 16, %i0
4265
4266	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4267	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4268	.word	0x99b02308 !movxtod	%o0,%f12
4269	.word	0x9db02309 !movxtod	%o1,%f14
4270
4271	brnz,pn		%l2, 2f
4272	sub		%i2, 1, %i2
4273
4274	std		%f0, [%i1 + 0]
4275	std		%f2, [%i1 + 8]
4276	brnz,pt		%i2, .L256_cbc_dec_loop2x
4277	add		%i1, 16, %i1
4278	st		%f12, [%i4 + 0]
4279	st		%f13, [%i4 + 4]
4280	st		%f14, [%i4 + 8]
4281	st		%f15, [%i4 + 12]
4282.L256_cbc_dec_abort:
4283	ret
4284	restore
4285
4286.align	16
42872:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4288						! and ~3x deterioration
4289						! in inp==out case
4290	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
4291	.word	0x8db00902 !faligndata	%f0,%f2,%f6
4292	.word	0x91b08902 !faligndata	%f2,%f2,%f8
4293
4294	stda		%f4, [%i1 + %l3]0xc0	! partial store
4295	std		%f6, [%i1 + 8]
4296	add		%i1, 16, %i1
4297	orn		%g0, %l3, %l3
4298	stda		%f8, [%i1 + %l3]0xc0	! partial store
4299
4300	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
4301	orn		%g0, %l3, %l3
4302	st		%f12, [%i4 + 0]
4303	st		%f13, [%i4 + 4]
4304	st		%f14, [%i4 + 8]
4305	st		%f15, [%i4 + 12]
4306	ret
4307	restore
4308
4309!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4310.align	32
4311.L256_cbc_dec_loop2x:
4312	ldx		[%i0 + 0], %o0
4313	ldx		[%i0 + 8], %o1
4314	ldx		[%i0 + 16], %o2
4315	brz,pt		%l0, 4f
4316	ldx		[%i0 + 24], %o3
4317
4318	ldx		[%i0 + 32], %o4
4319	sllx		%o0, %l0, %o0
4320	srlx		%o1, %l1, %g1
4321	or		%g1, %o0, %o0
4322	sllx		%o1, %l0, %o1
4323	srlx		%o2, %l1, %g1
4324	or		%g1, %o1, %o1
4325	sllx		%o2, %l0, %o2
4326	srlx		%o3, %l1, %g1
4327	or		%g1, %o2, %o2
4328	sllx		%o3, %l0, %o3
4329	srlx		%o4, %l1, %o4
4330	or		%o4, %o3, %o3
43314:
4332	xor		%g4, %o0, %o4		! ^= rk[0]
4333	xor		%g5, %o1, %o5
4334	.word	0x81b0230c !movxtod	%o4,%f0
4335	.word	0x85b0230d !movxtod	%o5,%f2
4336	xor		%g4, %o2, %o4
4337	xor		%g5, %o3, %o5
4338	.word	0x89b0230c !movxtod	%o4,%f4
4339	.word	0x8db0230d !movxtod	%o5,%f6
4340
4341	prefetch	[%i1 + 63], 22
4342	prefetch	[%i0 + 32+63], 20
4343	call		_aes256_decrypt_2x
4344	add		%i0, 32, %i0
4345
4346	.word	0x91b02308 !movxtod	%o0,%f8
4347	.word	0x95b02309 !movxtod	%o1,%f10
4348	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4349	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4350	.word	0x99b0230a !movxtod	%o2,%f12
4351	.word	0x9db0230b !movxtod	%o3,%f14
4352	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4353	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4354
4355	brnz,pn		%l2, 2f
4356	sub		%i2, 2, %i2
4357
4358	std		%f0, [%i1 + 0]
4359	std		%f2, [%i1 + 8]
4360	std		%f4, [%i1 + 16]
4361	std		%f6, [%i1 + 24]
4362	brnz,pt		%i2, .L256_cbc_dec_loop2x
4363	add		%i1, 32, %i1
4364	st		%f12, [%i4 + 0]
4365	st		%f13, [%i4 + 4]
4366	st		%f14, [%i4 + 8]
4367	st		%f15, [%i4 + 12]
4368	ret
4369	restore
4370
4371.align	16
43722:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
4373						! and ~3x deterioration
4374						! in inp==out case
4375	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
4376	.word	0x81b00902 !faligndata	%f0,%f2,%f0
4377	.word	0x85b08904 !faligndata	%f2,%f4,%f2
4378	.word	0x89b10906 !faligndata	%f4,%f6,%f4
4379	.word	0x8db18906 !faligndata	%f6,%f6,%f6
4380	stda		%f8, [%i1 + %l3]0xc0	! partial store
4381	std		%f0, [%i1 + 8]
4382	std		%f2, [%i1 + 16]
4383	std		%f4, [%i1 + 24]
4384	add		%i1, 32, %i1
4385	orn		%g0, %l3, %l3
4386	stda		%f6, [%i1 + %l3]0xc0	! partial store
4387
4388	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
4389	orn		%g0, %l3, %l3
4390	st		%f12, [%i4 + 0]
4391	st		%f13, [%i4 + 4]
4392	st		%f14, [%i4 + 8]
4393	st		%f15, [%i4 + 12]
4394	ret
4395	restore
4396
4397!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4398.align	32
4399.L256cbc_dec_blk:
4400	add	%i1, %i2, %l5
4401	and	%l5, 63, %l5	! tail
4402	sub	%i2, %l5, %i2
4403	add	%l5, 15, %l5	! round up to 16n
4404	srlx	%i2, 4, %i2
4405	srl	%l5, 4, %l5
4406	sub	%i2, 1, %i2
4407	add	%l5, 1, %l5
4408
4409.L256_cbc_dec_blk_loop2x:
4410	ldx		[%i0 + 0], %o0
4411	ldx		[%i0 + 8], %o1
4412	ldx		[%i0 + 16], %o2
4413	brz,pt		%l0, 5f
4414	ldx		[%i0 + 24], %o3
4415
4416	ldx		[%i0 + 32], %o4
4417	sllx		%o0, %l0, %o0
4418	srlx		%o1, %l1, %g1
4419	or		%g1, %o0, %o0
4420	sllx		%o1, %l0, %o1
4421	srlx		%o2, %l1, %g1
4422	or		%g1, %o1, %o1
4423	sllx		%o2, %l0, %o2
4424	srlx		%o3, %l1, %g1
4425	or		%g1, %o2, %o2
4426	sllx		%o3, %l0, %o3
4427	srlx		%o4, %l1, %o4
4428	or		%o4, %o3, %o3
44295:
4430	xor		%g4, %o0, %o4		! ^= rk[0]
4431	xor		%g5, %o1, %o5
4432	.word	0x81b0230c !movxtod	%o4,%f0
4433	.word	0x85b0230d !movxtod	%o5,%f2
4434	xor		%g4, %o2, %o4
4435	xor		%g5, %o3, %o5
4436	.word	0x89b0230c !movxtod	%o4,%f4
4437	.word	0x8db0230d !movxtod	%o5,%f6
4438
4439	prefetch	[%i0 + 32+63], 20
4440	call		_aes256_decrypt_2x
4441	add		%i0, 32, %i0
4442	subcc		%i2, 2, %i2
4443
4444	.word	0x91b02308 !movxtod	%o0,%f8
4445	.word	0x95b02309 !movxtod	%o1,%f10
4446	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
4447	.word	0x85b38d82 !fxor	%f14,%f2,%f2
4448	.word	0x99b0230a !movxtod	%o2,%f12
4449	.word	0x9db0230b !movxtod	%o3,%f14
4450	.word	0x89b20d84 !fxor	%f8,%f4,%f4
4451	.word	0x8db28d86 !fxor	%f10,%f6,%f6
4452
4453	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4454	add		%i1, 8, %i1
4455	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4456	add		%i1, 8, %i1
4457	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4458	add		%i1, 8, %i1
4459	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
4460	bgu,pt		%icc, .L256_cbc_dec_blk_loop2x
4461	add		%i1, 8, %i1
4462
4463	add		%l5, %i2, %i2
4464	andcc		%i2, 1, %g0		! is number of blocks even?
4465	membar		#StoreLoad|#StoreStore
4466	bnz,pt		%icc, .L256_cbc_dec_loop
4467	srl		%i2, 0, %i2
4468	brnz,pn		%i2, .L256_cbc_dec_loop2x
4469	nop
4470	st		%f12, [%i4 + 0]	! write out ivec
4471	st		%f13, [%i4 + 4]
4472	st		%f14, [%i4 + 8]
4473	st		%f15, [%i4 + 12]
4474	ret
4475	restore
4476.type	aes256_t4_cbc_decrypt,#function
4477.size	aes256_t4_cbc_decrypt,.-aes256_t4_cbc_decrypt
4478.align	32
4479_aes256_decrypt_1x:
4480	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4481	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4482	ldd		[%i3 + 208], %f16
4483	ldd		[%i3 + 216], %f18
4484	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
4485	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
4486	ldd		[%i3 + 224], %f20
4487	ldd		[%i3 + 232], %f22
4488	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
4489	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4490	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
4491	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
4492	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
4493	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4494	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
4495	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
4496	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
4497	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4498	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
4499	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
4500	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
4501	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4502	.word	0x80cd4444 !aes_dround01	%f52,%f4,%f2,%f0
4503	.word	0x84cdc464 !aes_dround23	%f54,%f4,%f2,%f2
4504	.word	0x88ce4440 !aes_dround01	%f56,%f0,%f2,%f4
4505	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4506	.word	0x80cf4444 !aes_dround01	%f60,%f4,%f2,%f0
4507	.word	0x84cfc464 !aes_dround23	%f62,%f4,%f2,%f2
4508	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4509	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4510	ldd		[%i3 + 16], %f16
4511	ldd		[%i3 + 24], %f18
4512	.word	0x80cd04c4 !aes_dround01_l	%f20,%f4,%f2,%f0
4513	.word	0x84cd84e4 !aes_dround23_l	%f22,%f4,%f2,%f2
4514	ldd		[%i3 + 32], %f20
4515	retl
4516	ldd		[%i3 + 40], %f22
4517.type	_aes256_decrypt_1x,#function
4518.size	_aes256_decrypt_1x,.-_aes256_decrypt_1x
4519
4520.align	32
4521_aes256_decrypt_2x:
4522	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4523	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4524	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4525	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4526	ldd		[%i3 + 208], %f16
4527	ldd		[%i3 + 216], %f18
4528	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
4529	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
4530	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
4531	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
4532	ldd		[%i3 + 224], %f20
4533	ldd		[%i3 + 232], %f22
4534	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
4535	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4536	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
4537	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
4538	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
4539	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
4540	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
4541	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
4542	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
4543	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4544	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
4545	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
4546	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
4547	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
4548	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
4549	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
4550	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
4551	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4552	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
4553	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
4554	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
4555	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
4556	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
4557	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
4558	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
4559	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4560	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
4561	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
4562	.word	0x80cd4448 !aes_dround01	%f52,%f8,%f2,%f0
4563	.word	0x84cdc468 !aes_dround23	%f54,%f8,%f2,%f2
4564	.word	0x88cd4c4a !aes_dround01	%f52,%f10,%f6,%f4
4565	.word	0x8ccdcc6a !aes_dround23	%f54,%f10,%f6,%f6
4566	.word	0x90ce4440 !aes_dround01	%f56,%f0,%f2,%f8
4567	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4568	.word	0x94ce4c44 !aes_dround01	%f56,%f4,%f6,%f10
4569	.word	0x8ccecc64 !aes_dround23	%f58,%f4,%f6,%f6
4570	.word	0x80cf4448 !aes_dround01	%f60,%f8,%f2,%f0
4571	.word	0x84cfc468 !aes_dround23	%f62,%f8,%f2,%f2
4572	.word	0x88cf4c4a !aes_dround01	%f60,%f10,%f6,%f4
4573	.word	0x8ccfcc6a !aes_dround23	%f62,%f10,%f6,%f6
4574	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4575	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4576	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4577	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4578	ldd		[%i3 + 16], %f16
4579	ldd		[%i3 + 24], %f18
4580	.word	0x80cd04c8 !aes_dround01_l	%f20,%f8,%f2,%f0
4581	.word	0x84cd84e8 !aes_dround23_l	%f22,%f8,%f2,%f2
4582	.word	0x88cd0cca !aes_dround01_l	%f20,%f10,%f6,%f4
4583	.word	0x8ccd8cea !aes_dround23_l	%f22,%f10,%f6,%f6
4584	ldd		[%i3 + 32], %f20
4585	retl
4586	ldd		[%i3 + 40], %f22
4587.type	_aes256_decrypt_2x,#function
4588.size	_aes256_decrypt_2x,.-_aes256_decrypt_2x
4589
4590.align	32
4591_aes192_decrypt_1x:
4592	.word	0x88cc0440 !aes_dround01	%f16,%f0,%f2,%f4
4593	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4594	.word	0x80cd0444 !aes_dround01	%f20,%f4,%f2,%f0
4595	.word	0x84cd8464 !aes_dround23	%f22,%f4,%f2,%f2
4596	.word	0x88ce0440 !aes_dround01	%f24,%f0,%f2,%f4
4597	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4598	.word	0x80cf0444 !aes_dround01	%f28,%f4,%f2,%f0
4599	.word	0x84cf8464 !aes_dround23	%f30,%f4,%f2,%f2
4600	.word	0x88c84440 !aes_dround01	%f32,%f0,%f2,%f4
4601	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4602	.word	0x80c94444 !aes_dround01	%f36,%f4,%f2,%f0
4603	.word	0x84c9c464 !aes_dround23	%f38,%f4,%f2,%f2
4604	.word	0x88ca4440 !aes_dround01	%f40,%f0,%f2,%f4
4605	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4606	.word	0x80cb4444 !aes_dround01	%f44,%f4,%f2,%f0
4607	.word	0x84cbc464 !aes_dround23	%f46,%f4,%f2,%f2
4608	.word	0x88cc4440 !aes_dround01	%f48,%f0,%f2,%f4
4609	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4610	.word	0x80cd4444 !aes_dround01	%f52,%f4,%f2,%f0
4611	.word	0x84cdc464 !aes_dround23	%f54,%f4,%f2,%f2
4612	.word	0x88ce4440 !aes_dround01	%f56,%f0,%f2,%f4
4613	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4614	.word	0x80cf44c4 !aes_dround01_l	%f60,%f4,%f2,%f0
4615	retl
4616	.word	0x84cfc4e4 !aes_dround23_l	%f62,%f4,%f2,%f2
4617.type	_aes192_decrypt_1x,#function
4618.size	_aes192_decrypt_1x,.-_aes192_decrypt_1x
4619
4620.align	32
4621_aes192_decrypt_2x:
4622	.word	0x90cc0440 !aes_dround01	%f16,%f0,%f2,%f8
4623	.word	0x84cc8460 !aes_dround23	%f18,%f0,%f2,%f2
4624	.word	0x94cc0c44 !aes_dround01	%f16,%f4,%f6,%f10
4625	.word	0x8ccc8c64 !aes_dround23	%f18,%f4,%f6,%f6
4626	.word	0x80cd0448 !aes_dround01	%f20,%f8,%f2,%f0
4627	.word	0x84cd8468 !aes_dround23	%f22,%f8,%f2,%f2
4628	.word	0x88cd0c4a !aes_dround01	%f20,%f10,%f6,%f4
4629	.word	0x8ccd8c6a !aes_dround23	%f22,%f10,%f6,%f6
4630	.word	0x90ce0440 !aes_dround01	%f24,%f0,%f2,%f8
4631	.word	0x84ce8460 !aes_dround23	%f26,%f0,%f2,%f2
4632	.word	0x94ce0c44 !aes_dround01	%f24,%f4,%f6,%f10
4633	.word	0x8cce8c64 !aes_dround23	%f26,%f4,%f6,%f6
4634	.word	0x80cf0448 !aes_dround01	%f28,%f8,%f2,%f0
4635	.word	0x84cf8468 !aes_dround23	%f30,%f8,%f2,%f2
4636	.word	0x88cf0c4a !aes_dround01	%f28,%f10,%f6,%f4
4637	.word	0x8ccf8c6a !aes_dround23	%f30,%f10,%f6,%f6
4638	.word	0x90c84440 !aes_dround01	%f32,%f0,%f2,%f8
4639	.word	0x84c8c460 !aes_dround23	%f34,%f0,%f2,%f2
4640	.word	0x94c84c44 !aes_dround01	%f32,%f4,%f6,%f10
4641	.word	0x8cc8cc64 !aes_dround23	%f34,%f4,%f6,%f6
4642	.word	0x80c94448 !aes_dround01	%f36,%f8,%f2,%f0
4643	.word	0x84c9c468 !aes_dround23	%f38,%f8,%f2,%f2
4644	.word	0x88c94c4a !aes_dround01	%f36,%f10,%f6,%f4
4645	.word	0x8cc9cc6a !aes_dround23	%f38,%f10,%f6,%f6
4646	.word	0x90ca4440 !aes_dround01	%f40,%f0,%f2,%f8
4647	.word	0x84cac460 !aes_dround23	%f42,%f0,%f2,%f2
4648	.word	0x94ca4c44 !aes_dround01	%f40,%f4,%f6,%f10
4649	.word	0x8ccacc64 !aes_dround23	%f42,%f4,%f6,%f6
4650	.word	0x80cb4448 !aes_dround01	%f44,%f8,%f2,%f0
4651	.word	0x84cbc468 !aes_dround23	%f46,%f8,%f2,%f2
4652	.word	0x88cb4c4a !aes_dround01	%f44,%f10,%f6,%f4
4653	.word	0x8ccbcc6a !aes_dround23	%f46,%f10,%f6,%f6
4654	.word	0x90cc4440 !aes_dround01	%f48,%f0,%f2,%f8
4655	.word	0x84ccc460 !aes_dround23	%f50,%f0,%f2,%f2
4656	.word	0x94cc4c44 !aes_dround01	%f48,%f4,%f6,%f10
4657	.word	0x8ccccc64 !aes_dround23	%f50,%f4,%f6,%f6
4658	.word	0x80cd4448 !aes_dround01	%f52,%f8,%f2,%f0
4659	.word	0x84cdc468 !aes_dround23	%f54,%f8,%f2,%f2
4660	.word	0x88cd4c4a !aes_dround01	%f52,%f10,%f6,%f4
4661	.word	0x8ccdcc6a !aes_dround23	%f54,%f10,%f6,%f6
4662	.word	0x90ce4440 !aes_dround01	%f56,%f0,%f2,%f8
4663	.word	0x84cec460 !aes_dround23	%f58,%f0,%f2,%f2
4664	.word	0x94ce4c44 !aes_dround01	%f56,%f4,%f6,%f10
4665	.word	0x8ccecc64 !aes_dround23	%f58,%f4,%f6,%f6
4666	.word	0x80cf44c8 !aes_dround01_l	%f60,%f8,%f2,%f0
4667	.word	0x84cfc4e8 !aes_dround23_l	%f62,%f8,%f2,%f2
4668	.word	0x88cf4cca !aes_dround01_l	%f60,%f10,%f6,%f4
4669	retl
4670	.word	0x8ccfccea !aes_dround23_l	%f62,%f10,%f6,%f6
4671.type	_aes192_decrypt_2x,#function
4672.size	_aes192_decrypt_2x,.-_aes192_decrypt_2x
4673.asciz	"AES for SPARC T4, David S. Miller, Andy Polyakov"
4674.align	4
4675