xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/aesfx-sparcv9.S (revision 1b3d6f93806f8821fe459e13ad13e605b37c6d43)
1#ifndef __ASSEMBLER__
2# define __ASSEMBLER__ 1
3#endif
4#include "crypto/sparc_arch.h"
5
6#define LOCALS (STACK_BIAS+STACK_FRAME)
7
8.text
9
10.globl	aes_fx_encrypt
11.align	32
12aes_fx_encrypt:
13	and		%o0, 7, %o4		! is input aligned?
14	andn		%o0, 7, %o0
15	ldd		[%o2 +  0], %f6	! round[0]
16	ldd		[%o2 +  8], %f8
17	mov		%o7, %g1
18	ld		[%o2 + 240], %o3
19
201:	call		.+8
21	add		%o7, .Linp_align-1b, %o7
22
23	sll		%o4, 3, %o4
24	ldd		[%o0 + 0], %f0		! load input
25	brz,pt		%o4, .Lenc_inp_aligned
26	ldd		[%o0 + 8], %f2
27
28	ldd		[%o7 + %o4], %f14	! shift left params
29	ldd		[%o0 + 16], %f4
30	.word	0x81b81d62 !fshiftorx	%f0,%f2,%f14,%f0
31	.word	0x85b89d64 !fshiftorx	%f2,%f4,%f14,%f2
32
33.Lenc_inp_aligned:
34	ldd		[%o2 + 16], %f10	! round[1]
35	ldd		[%o2 + 24], %f12
36
37	.word	0x81b00d86 !fxor	%f0,%f6,%f0		! ^=round[0]
38	.word	0x85b08d88 !fxor	%f2,%f8,%f2
39	ldd		[%o2 + 32], %f6	! round[2]
40	ldd		[%o2 + 40], %f8
41	add		%o2, 32, %o2
42	sub		%o3, 4, %o3
43
44.Loop_enc:
45	fmovd		%f0, %f4
46	.word	0x81b0920a !faesencx	%f2,%f10,%f0
47	.word	0x85b1120c !faesencx	%f4,%f12,%f2
48	ldd		[%o2 + 16], %f10
49	ldd		[%o2 + 24], %f12
50	add		%o2, 32, %o2
51
52	fmovd		%f0, %f4
53	.word	0x81b09206 !faesencx	%f2,%f6,%f0
54	.word	0x85b11208 !faesencx	%f4,%f8,%f2
55	ldd		[%o2 +  0], %f6
56	ldd		[%o2 +  8], %f8
57
58	brnz,a		%o3, .Loop_enc
59	sub		%o3, 2, %o3
60
61	andcc		%o1, 7, %o4		! is output aligned?
62	andn		%o1, 7, %o1
63	mov		0xff, %o5
64	srl		%o5, %o4, %o5
65	add		%o7, 64, %o7
66	sll		%o4, 3, %o4
67
68	fmovd		%f0, %f4
69	.word	0x81b0920a !faesencx	%f2,%f10,%f0
70	.word	0x85b1120c !faesencx	%f4,%f12,%f2
71	ldd		[%o7 + %o4], %f14	! shift right params
72
73	fmovd		%f0, %f4
74	.word	0x81b09246 !faesenclx	%f2,%f6,%f0
75	.word	0x85b11248 !faesenclx	%f4,%f8,%f2
76
77	bnz,pn		%icc, .Lenc_out_unaligned
78	mov		%g1, %o7
79
80	std		%f0, [%o1 + 0]
81	retl
82	std		%f2, [%o1 + 8]
83
84.align	16
85.Lenc_out_unaligned:
86	add		%o1, 16, %o0
87	orn		%g0, %o5, %o4
88	.word	0x89b81d60 !fshiftorx	%f0,%f0,%f14,%f4
89	.word	0x8db81d62 !fshiftorx	%f0,%f2,%f14,%f6
90	.word	0x91b89d62 !fshiftorx	%f2,%f2,%f14,%f8
91
92	stda		%f4, [%o1 + %o5]0xc0	! partial store
93	std		%f6, [%o1 + 8]
94	stda		%f8, [%o0 + %o4]0xc0	! partial store
95	retl
96	nop
97.type	aes_fx_encrypt,#function
98.size	aes_fx_encrypt,.-aes_fx_encrypt
99
100.globl	aes_fx_decrypt
101.align	32
102aes_fx_decrypt:
103	and		%o0, 7, %o4		! is input aligned?
104	andn		%o0, 7, %o0
105	ldd		[%o2 +  0], %f6	! round[0]
106	ldd		[%o2 +  8], %f8
107	mov		%o7, %g1
108	ld		[%o2 + 240], %o3
109
1101:	call		.+8
111	add		%o7, .Linp_align-1b, %o7
112
113	sll		%o4, 3, %o4
114	ldd		[%o0 + 0], %f0		! load input
115	brz,pt		%o4, .Ldec_inp_aligned
116	ldd		[%o0 + 8], %f2
117
118	ldd		[%o7 + %o4], %f14	! shift left params
119	ldd		[%o0 + 16], %f4
120	.word	0x81b81d62 !fshiftorx	%f0,%f2,%f14,%f0
121	.word	0x85b89d64 !fshiftorx	%f2,%f4,%f14,%f2
122
123.Ldec_inp_aligned:
124	ldd		[%o2 + 16], %f10	! round[1]
125	ldd		[%o2 + 24], %f12
126
127	.word	0x81b00d86 !fxor	%f0,%f6,%f0		! ^=round[0]
128	.word	0x85b08d88 !fxor	%f2,%f8,%f2
129	ldd		[%o2 + 32], %f6	! round[2]
130	ldd		[%o2 + 40], %f8
131	add		%o2, 32, %o2
132	sub		%o3, 4, %o3
133
134.Loop_dec:
135	fmovd		%f0, %f4
136	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
137	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
138	ldd		[%o2 + 16], %f10
139	ldd		[%o2 + 24], %f12
140	add		%o2, 32, %o2
141
142	fmovd		%f0, %f4
143	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
144	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
145	ldd		[%o2 +  0], %f6
146	ldd		[%o2 +  8], %f8
147
148	brnz,a		%o3, .Loop_dec
149	sub		%o3, 2, %o3
150
151	andcc		%o1, 7, %o4		! is output aligned?
152	andn		%o1, 7, %o1
153	mov		0xff, %o5
154	srl		%o5, %o4, %o5
155	add		%o7, 64, %o7
156	sll		%o4, 3, %o4
157
158	fmovd		%f0, %f4
159	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
160	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
161	ldd		[%o7 + %o4], %f14	! shift right params
162
163	fmovd		%f0, %f4
164	.word	0x81b09266 !faesdeclx	%f2,%f6,%f0
165	.word	0x85b11268 !faesdeclx	%f4,%f8,%f2
166
167	bnz,pn		%icc, .Ldec_out_unaligned
168	mov		%g1, %o7
169
170	std		%f0, [%o1 + 0]
171	retl
172	std		%f2, [%o1 + 8]
173
174.align	16
175.Ldec_out_unaligned:
176	add		%o1, 16, %o0
177	orn		%g0, %o5, %o4
178	.word	0x89b81d60 !fshiftorx	%f0,%f0,%f14,%f4
179	.word	0x8db81d62 !fshiftorx	%f0,%f2,%f14,%f6
180	.word	0x91b89d62 !fshiftorx	%f2,%f2,%f14,%f8
181
182	stda		%f4, [%o1 + %o5]0xc0	! partial store
183	std		%f6, [%o1 + 8]
184	stda		%f8, [%o0 + %o4]0xc0	! partial store
185	retl
186	nop
187.type	aes_fx_decrypt,#function
188.size	aes_fx_decrypt,.-aes_fx_decrypt
189.globl	aes_fx_set_decrypt_key
190.align	32
191aes_fx_set_decrypt_key:
192	b		.Lset_encrypt_key
193	mov		-1, %o4
194	retl
195	nop
196.type	aes_fx_set_decrypt_key,#function
197.size	aes_fx_set_decrypt_key,.-aes_fx_set_decrypt_key
198
199.globl	aes_fx_set_encrypt_key
200.align	32
201aes_fx_set_encrypt_key:
202	mov		1, %o4
203	nop
204.Lset_encrypt_key:
205	and		%o0, 7, %o3
206	andn		%o0, 7, %o0
207	sll		%o3, 3, %o3
208	mov		%o7, %g1
209
2101:	call		.+8
211	add		%o7, .Linp_align-1b, %o7
212
213	ldd		[%o7 + %o3], %f10	! shift left params
214	mov		%g1, %o7
215
216	cmp		%o1, 192
217	ldd		[%o0 + 0], %f0
218	bl,pt		%icc, .L128
219	ldd		[%o0 + 8], %f2
220
221	be,pt		%icc, .L192
222	ldd		[%o0 + 16], %f4
223	brz,pt		%o3, .L256aligned
224	ldd		[%o0 + 24], %f6
225
226	ldd		[%o0 + 32], %f8
227	.word	0x81b81562 !fshiftorx	%f0,%f2,%f10,%f0
228	.word	0x85b89564 !fshiftorx	%f2,%f4,%f10,%f2
229	.word	0x89b91566 !fshiftorx	%f4,%f6,%f10,%f4
230	.word	0x8db99568 !fshiftorx	%f6,%f8,%f10,%f6
231
232.L256aligned:
233	mov		14, %o1
234	and		%o4, 224, %o3
235	st		%o1, [%o2 + 240]	! store rounds
236	add		%o2, %o3, %o2	! start or end of key schedule
237	sllx		%o4, 4, %o4		! 16 or -16
238	std		%f0, [%o2 + 0]
239	.word	0x81b19290 !faeskeyx	%f6,16,%f0
240	std		%f2, [%o2 + 8]
241	add		%o2, %o4, %o2
242	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
243	std		%f4, [%o2 + 0]
244	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
245	std		%f6, [%o2 + 8]
246	add		%o2, %o4, %o2
247	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
248	std		%f0, [%o2 + 0]
249	.word	0x81b19291 !faeskeyx	%f6,17,%f0
250	std		%f2, [%o2 + 8]
251	add		%o2, %o4, %o2
252	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
253	std		%f4, [%o2 + 0]
254	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
255	std		%f6, [%o2 + 8]
256	add		%o2, %o4, %o2
257	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
258	std		%f0, [%o2 + 0]
259	.word	0x81b19292 !faeskeyx	%f6,18,%f0
260	std		%f2, [%o2 + 8]
261	add		%o2, %o4, %o2
262	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
263	std		%f4, [%o2 + 0]
264	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
265	std		%f6, [%o2 + 8]
266	add		%o2, %o4, %o2
267	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
268	std		%f0, [%o2 + 0]
269	.word	0x81b19293 !faeskeyx	%f6,19,%f0
270	std		%f2, [%o2 + 8]
271	add		%o2, %o4, %o2
272	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
273	std		%f4, [%o2 + 0]
274	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
275	std		%f6, [%o2 + 8]
276	add		%o2, %o4, %o2
277	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
278	std		%f0, [%o2 + 0]
279	.word	0x81b19294 !faeskeyx	%f6,20,%f0
280	std		%f2, [%o2 + 8]
281	add		%o2, %o4, %o2
282	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
283	std		%f4, [%o2 + 0]
284	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
285	std		%f6, [%o2 + 8]
286	add		%o2, %o4, %o2
287	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
288	std		%f0, [%o2 + 0]
289	.word	0x81b19295 !faeskeyx	%f6,21,%f0
290	std		%f2, [%o2 + 8]
291	add		%o2, %o4, %o2
292	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
293	std		%f4, [%o2 + 0]
294	.word	0x89b09281 !faeskeyx	%f2,0x01,%f4
295	std		%f6, [%o2 + 8]
296	add		%o2, %o4, %o2
297	.word	0x8db11280 !faeskeyx	%f4,0x00,%f6
298	std		%f0, [%o2 + 0]
299	.word	0x81b19296 !faeskeyx	%f6,22,%f0
300	std		%f2, [%o2 + 8]
301	add		%o2, %o4, %o2
302	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
303	std		%f4,[%o2 + 0]
304	std		%f6,[%o2 + 8]
305	add		%o2, %o4, %o2
306	std		%f0,[%o2 + 0]
307	std		%f2,[%o2 + 8]
308	retl
309	xor		%o0, %o0, %o0		! return 0
310
311.align	16
312.L192:
313	brz,pt		%o3, .L192aligned
314	nop
315
316	ldd		[%o0 + 24], %f6
317	.word	0x81b81562 !fshiftorx	%f0,%f2,%f10,%f0
318	.word	0x85b89564 !fshiftorx	%f2,%f4,%f10,%f2
319	.word	0x89b91566 !fshiftorx	%f4,%f6,%f10,%f4
320
321.L192aligned:
322	mov		12, %o1
323	and		%o4, 192, %o3
324	st		%o1, [%o2 + 240]	! store rounds
325	add		%o2, %o3, %o2	! start or end of key schedule
326	sllx		%o4, 4, %o4		! 16 or -16
327	std		%f0, [%o2 + 0]
328	.word	0x81b11290 !faeskeyx	%f4,16,%f0
329	std		%f2, [%o2 + 8]
330	add		%o2, %o4, %o2
331	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
332	std		%f4, [%o2 + 0]
333	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
334	std		%f0, [%o2 + 8]
335	add		%o2, %o4, %o2
336	.word	0x81b11291 !faeskeyx	%f4,17,%f0
337	std		%f2, [%o2 + 0]
338	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
339	std		%f4, [%o2 + 8]
340	add		%o2, %o4, %o2
341	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
342	std		%f0, [%o2 + 0]
343	.word	0x81b11292 !faeskeyx	%f4,18,%f0
344	std		%f2, [%o2 + 8]
345	add		%o2, %o4, %o2
346	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
347	std		%f4, [%o2 + 0]
348	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
349	std		%f0, [%o2 + 8]
350	add		%o2, %o4, %o2
351	.word	0x81b11293 !faeskeyx	%f4,19,%f0
352	std		%f2, [%o2 + 0]
353	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
354	std		%f4, [%o2 + 8]
355	add		%o2, %o4, %o2
356	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
357	std		%f0, [%o2 + 0]
358	.word	0x81b11294 !faeskeyx	%f4,20,%f0
359	std		%f2, [%o2 + 8]
360	add		%o2, %o4, %o2
361	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
362	std		%f4, [%o2 + 0]
363	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
364	std		%f0, [%o2 + 8]
365	add		%o2, %o4, %o2
366	.word	0x81b11295 !faeskeyx	%f4,21,%f0
367	std		%f2, [%o2 + 0]
368	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
369	std		%f4, [%o2 + 8]
370	add		%o2, %o4, %o2
371	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
372	std		%f0, [%o2 + 0]
373	.word	0x81b11296 !faeskeyx	%f4,22,%f0
374	std		%f2, [%o2 + 8]
375	add		%o2, %o4, %o2
376	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
377	std		%f4, [%o2 + 0]
378	.word	0x89b09280 !faeskeyx	%f2,0x00,%f4
379	std		%f0, [%o2 + 8]
380	add		%o2, %o4, %o2
381	.word	0x81b11297 !faeskeyx	%f4,23,%f0
382	std		%f2, [%o2 + 0]
383	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
384	std		%f4, [%o2 + 8]
385	add		%o2, %o4, %o2
386	std		%f0, [%o2 + 0]
387	std		%f2, [%o2 + 8]
388	retl
389	xor		%o0, %o0, %o0		! return 0
390
391.align	16
392.L128:
393	brz,pt		%o3, .L128aligned
394	nop
395
396	ldd		[%o0 + 16], %f4
397	.word	0x81b81562 !fshiftorx	%f0,%f2,%f10,%f0
398	.word	0x85b89564 !fshiftorx	%f2,%f4,%f10,%f2
399
400.L128aligned:
401	mov		10, %o1
402	and		%o4, 160, %o3
403	st		%o1, [%o2 + 240]	! store rounds
404	add		%o2, %o3, %o2	! start or end of key schedule
405	sllx		%o4, 4, %o4		! 16 or -16
406	std		%f0, [%o2 + 0]
407	.word	0x81b09290 !faeskeyx	%f2,16,%f0
408	std		%f2, [%o2 + 8]
409	add		%o2, %o4, %o2
410	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
411	std		%f0, [%o2 + 0]
412	.word	0x81b09291 !faeskeyx	%f2,17,%f0
413	std		%f2, [%o2 + 8]
414	add		%o2, %o4, %o2
415	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
416	std		%f0, [%o2 + 0]
417	.word	0x81b09292 !faeskeyx	%f2,18,%f0
418	std		%f2, [%o2 + 8]
419	add		%o2, %o4, %o2
420	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
421	std		%f0, [%o2 + 0]
422	.word	0x81b09293 !faeskeyx	%f2,19,%f0
423	std		%f2, [%o2 + 8]
424	add		%o2, %o4, %o2
425	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
426	std		%f0, [%o2 + 0]
427	.word	0x81b09294 !faeskeyx	%f2,20,%f0
428	std		%f2, [%o2 + 8]
429	add		%o2, %o4, %o2
430	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
431	std		%f0, [%o2 + 0]
432	.word	0x81b09295 !faeskeyx	%f2,21,%f0
433	std		%f2, [%o2 + 8]
434	add		%o2, %o4, %o2
435	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
436	std		%f0, [%o2 + 0]
437	.word	0x81b09296 !faeskeyx	%f2,22,%f0
438	std		%f2, [%o2 + 8]
439	add		%o2, %o4, %o2
440	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
441	std		%f0, [%o2 + 0]
442	.word	0x81b09297 !faeskeyx	%f2,23,%f0
443	std		%f2, [%o2 + 8]
444	add		%o2, %o4, %o2
445	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
446	std		%f0, [%o2 + 0]
447	.word	0x81b09298 !faeskeyx	%f2,24,%f0
448	std		%f2, [%o2 + 8]
449	add		%o2, %o4, %o2
450	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
451	std		%f0, [%o2 + 0]
452	.word	0x81b09299 !faeskeyx	%f2,25,%f0
453	std		%f2, [%o2 + 8]
454	add		%o2, %o4, %o2
455	.word	0x85b01280 !faeskeyx	%f0,0x00,%f2
456	std		%f0, [%o2 + 0]
457	std		%f2, [%o2 + 8]
458	retl
459	xor		%o0, %o0, %o0		! return 0
460.type	aes_fx_set_encrypt_key,#function
461.size	aes_fx_set_encrypt_key,.-aes_fx_set_encrypt_key
462.globl	aes_fx_cbc_encrypt
463.align	32
464aes_fx_cbc_encrypt:
465	save		%sp, -STACK_FRAME-16, %sp
466	srln		%i2, 4, %i2
467	and		%i0, 7, %l4
468	andn		%i0, 7, %i0
469	brz,pn		%i2, .Lcbc_no_data
470	sll		%l4, 3, %l4
471
4721:	call		.+8
473	add		%o7, .Linp_align-1b, %o7
474
475	ld		[%i3 + 240], %l0
476	and		%i1, 7, %l5
477	ld		[%i4 + 0], %f0		! load ivec
478	andn		%i1, 7, %i1
479	ld		[%i4 + 4], %f1
480	sll		%l5, 3, %l6
481	ld		[%i4 + 8], %f2
482	ld		[%i4 + 12], %f3
483
484	sll		%l0, 4, %l0
485	add		%l0, %i3, %l2
486	ldd		[%i3 + 0], %f20	! round[0]
487	ldd		[%i3 + 8], %f22
488
489	add		%i0, 16, %i0
490	sub		%i2,  1, %i2
491	ldd		[%l2 + 0], %f24	! round[last]
492	ldd		[%l2 + 8], %f26
493
494	mov		16, %l3
495	movrz		%i2, 0, %l3
496	ldd		[%i3 + 16], %f10	! round[1]
497	ldd		[%i3 + 24], %f12
498
499	ldd		[%o7 + %l4], %f36	! shift left params
500	add		%o7, 64, %o7
501	ldd		[%i0 - 16], %f28	! load input
502	ldd		[%i0 -  8], %f30
503	ldda		[%i0]0x82, %f32	! non-faulting load
504	brz		%i5, .Lcbc_decrypt
505	add		%i0, %l3, %i0	! inp+=16
506
507	.word	0x81b50d80 !fxor	%f20,%f0,%f0		! ivec^=round[0]
508	.word	0x85b58d82 !fxor	%f22,%f2,%f2
509	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
510	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
511	nop
512
513.Loop_cbc_enc:
514	.word	0x81b70d80 !fxor	%f28,%f0,%f0		! inp^ivec^round[0]
515	.word	0x85b78d82 !fxor	%f30,%f2,%f2
516	ldd		[%i3 + 32], %f6	! round[2]
517	ldd		[%i3 + 40], %f8
518	add		%i3, 32, %l2
519	sub		%l0, 16*6, %l1
520
521.Lcbc_enc:
522	fmovd		%f0, %f4
523	.word	0x81b0920a !faesencx	%f2,%f10,%f0
524	.word	0x85b1120c !faesencx	%f4,%f12,%f2
525	ldd		[%l2 + 16], %f10
526	ldd		[%l2 + 24], %f12
527	add		%l2, 32, %l2
528
529	fmovd		%f0, %f4
530	.word	0x81b09206 !faesencx	%f2,%f6,%f0
531	.word	0x85b11208 !faesencx	%f4,%f8,%f2
532	ldd		[%l2 + 0], %f6
533	ldd		[%l2 + 8], %f8
534
535	brnz,a		%l1, .Lcbc_enc
536	sub		%l1, 16*2, %l1
537
538	fmovd		%f0, %f4
539	.word	0x81b0920a !faesencx	%f2,%f10,%f0
540	.word	0x85b1120c !faesencx	%f4,%f12,%f2
541	ldd		[%l2 + 16], %f10	! round[last-1]
542	ldd		[%l2 + 24], %f12
543
544	movrz		%i2, 0, %l3
545	fmovd		%f32, %f28
546	ldd		[%i0 - 8], %f30	! load next input block
547	ldda		[%i0]0x82, %f32	! non-faulting load
548	add		%i0, %l3, %i0	! inp+=16
549
550	fmovd		%f0, %f4
551	.word	0x81b09206 !faesencx	%f2,%f6,%f0
552	.word	0x85b11208 !faesencx	%f4,%f8,%f2
553
554	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
555	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
556
557	fmovd		%f0, %f4
558	.word	0x81b0920a !faesencx	%f2,%f10,%f0
559	.word	0x85b1120c !faesencx	%f4,%f12,%f2
560	ldd		[%i3 + 16], %f10	! round[1]
561	ldd		[%i3 + 24], %f12
562
563	.word	0xb9b50d9c !fxor	%f20,%f28,%f28	! inp^=round[0]
564	.word	0xbdb58d9e !fxor	%f22,%f30,%f30
565
566	fmovd		%f0, %f4
567	.word	0x81b09258 !faesenclx	%f2,%f24,%f0
568	.word	0x85b1125a !faesenclx	%f4,%f26,%f2
569
570	brnz,pn		%l5, .Lcbc_enc_unaligned_out
571	nop
572
573	std		%f0, [%i1 + 0]
574	std		%f2, [%i1 + 8]
575	add		%i1, 16, %i1
576
577	brnz,a		%i2, .Loop_cbc_enc
578	sub		%i2, 1, %i2
579
580	st		%f0, [%i4 + 0]		! output ivec
581	st		%f1, [%i4 + 4]
582	st		%f2, [%i4 + 8]
583	st		%f3, [%i4 + 12]
584
585.Lcbc_no_data:
586	ret
587	restore
588
589.align	32
590.Lcbc_enc_unaligned_out:
591	ldd		[%o7 + %l6], %f36	! shift right params
592	mov		0xff, %l6
593	srl		%l6, %l5, %l6
594	sub		%g0, %l4, %l5
595
596	.word	0x8db80b60 !fshiftorx	%f0,%f0,%f36,%f6
597	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
598
599	stda		%f6, [%i1 + %l6]0xc0	! partial store
600	orn		%g0, %l6, %l6
601	std		%f8, [%i1 + 8]
602	add		%i1, 16, %i1
603	brz		%i2, .Lcbc_enc_unaligned_out_done
604	sub		%i2, 1, %i2
605	b		.Loop_cbc_enc_unaligned_out
606	nop
607
608.align	32
609.Loop_cbc_enc_unaligned_out:
610	fmovd		%f2, %f34
611	.word	0x81b70d80 !fxor	%f28,%f0,%f0		! inp^ivec^round[0]
612	.word	0x85b78d82 !fxor	%f30,%f2,%f2
613	ldd		[%i3 + 32], %f6	! round[2]
614	ldd		[%i3 + 40], %f8
615
616	fmovd		%f0, %f4
617	.word	0x81b0920a !faesencx	%f2,%f10,%f0
618	.word	0x85b1120c !faesencx	%f4,%f12,%f2
619	ldd		[%i3 + 48], %f10	! round[3]
620	ldd		[%i3 + 56], %f12
621
622	ldx		[%i0 - 16], %o0
623	ldx		[%i0 -  8], %o1
624	brz		%l4, .Lcbc_enc_aligned_inp
625	movrz		%i2, 0, %l3
626
627	ldx		[%i0], %o2
628	sllx		%o0, %l4, %o0
629	srlx		%o1, %l5, %g1
630	sllx		%o1, %l4, %o1
631	or		%g1, %o0, %o0
632	srlx		%o2, %l5, %o2
633	or		%o2, %o1, %o1
634
635.Lcbc_enc_aligned_inp:
636	fmovd		%f0, %f4
637	.word	0x81b09206 !faesencx	%f2,%f6,%f0
638	.word	0x85b11208 !faesencx	%f4,%f8,%f2
639	ldd		[%i3 + 64], %f6	! round[4]
640	ldd		[%i3 + 72], %f8
641	add		%i3, 64, %l2
642	sub		%l0, 16*8, %l1
643
644	stx		%o0, [%sp + LOCALS + 0]
645	stx		%o1, [%sp + LOCALS + 8]
646	add		%i0, %l3, %i0	! inp+=16
647	nop
648
649.Lcbc_enc_unaligned:
650	fmovd		%f0, %f4
651	.word	0x81b0920a !faesencx	%f2,%f10,%f0
652	.word	0x85b1120c !faesencx	%f4,%f12,%f2
653	ldd		[%l2 + 16], %f10
654	ldd		[%l2 + 24], %f12
655	add		%l2, 32, %l2
656
657	fmovd		%f0, %f4
658	.word	0x81b09206 !faesencx	%f2,%f6,%f0
659	.word	0x85b11208 !faesencx	%f4,%f8,%f2
660	ldd		[%l2 + 0], %f6
661	ldd		[%l2 + 8], %f8
662
663	brnz,a		%l1, .Lcbc_enc_unaligned
664	sub		%l1, 16*2, %l1
665
666	fmovd		%f0, %f4
667	.word	0x81b0920a !faesencx	%f2,%f10,%f0
668	.word	0x85b1120c !faesencx	%f4,%f12,%f2
669	ldd		[%l2 + 16], %f10	! round[last-1]
670	ldd		[%l2 + 24], %f12
671
672	fmovd		%f0, %f4
673	.word	0x81b09206 !faesencx	%f2,%f6,%f0
674	.word	0x85b11208 !faesencx	%f4,%f8,%f2
675
676	ldd		[%sp + LOCALS + 0], %f28
677	ldd		[%sp + LOCALS + 8], %f30
678
679	fmovd		%f0, %f4
680	.word	0x81b0920a !faesencx	%f2,%f10,%f0
681	.word	0x85b1120c !faesencx	%f4,%f12,%f2
682	ldd		[%i3 + 16], %f10	! round[1]
683	ldd		[%i3 + 24], %f12
684
685	.word	0xb9b50d9c !fxor	%f20,%f28,%f28	! inp^=round[0]
686	.word	0xbdb58d9e !fxor	%f22,%f30,%f30
687
688	fmovd		%f0, %f4
689	.word	0x81b09258 !faesenclx	%f2,%f24,%f0
690	.word	0x85b1125a !faesenclx	%f4,%f26,%f2
691
692	.word	0x8db8cb60 !fshiftorx	%f34,%f0,%f36,%f6
693	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
694	std		%f6, [%i1 + 0]
695	std		%f8, [%i1 + 8]
696	add		%i1, 16, %i1
697
698	brnz,a		%i2, .Loop_cbc_enc_unaligned_out
699	sub		%i2, 1, %i2
700
701.Lcbc_enc_unaligned_out_done:
702	.word	0x91b88b62 !fshiftorx	%f2,%f2,%f36,%f8
703	stda		%f8, [%i1 + %l6]0xc0	! partial store
704
705	st		%f0, [%i4 + 0]		! output ivec
706	st		%f1, [%i4 + 4]
707	st		%f2, [%i4 + 8]
708	st		%f3, [%i4 + 12]
709
710	ret
711	restore
712
713.align	32
714.Lcbc_decrypt:
715	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
716	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
717	fmovd		%f0, %f16
718	fmovd		%f2, %f18
719
720.Loop_cbc_dec:
721	.word	0x81b70d94 !fxor	%f28,%f20,%f0	! inp^round[0]
722	.word	0x85b78d96 !fxor	%f30,%f22,%f2
723	ldd		[%i3 + 32], %f6	! round[2]
724	ldd		[%i3 + 40], %f8
725	add		%i3, 32, %l2
726	sub		%l0, 16*6, %l1
727
728.Lcbc_dec:
729	fmovd		%f0, %f4
730	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
731	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
732	ldd		[%l2 + 16], %f10
733	ldd		[%l2 + 24], %f12
734	add		%l2, 32, %l2
735
736	fmovd		%f0, %f4
737	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
738	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
739	ldd		[%l2 + 0], %f6
740	ldd		[%l2 + 8], %f8
741
742	brnz,a		%l1, .Lcbc_dec
743	sub		%l1, 16*2, %l1
744
745	fmovd		%f0, %f4
746	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
747	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
748	ldd		[%l2 + 16], %f10	! round[last-1]
749	ldd		[%l2 + 24], %f12
750
751	fmovd		%f0, %f4
752	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
753	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
754	.word	0x8db40d98 !fxor	%f16,%f24,%f6	! ivec^round[last]
755	.word	0x91b48d9a !fxor	%f18,%f26,%f8
756	fmovd		%f28, %f16
757	fmovd		%f30, %f18
758
759	movrz		%i2, 0, %l3
760	fmovd		%f32, %f28
761	ldd		[%i0 - 8], %f30	! load next input block
762	ldda		[%i0]0x82, %f32	! non-faulting load
763	add		%i0, %l3, %i0	! inp+=16
764
765	fmovd		%f0, %f4
766	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
767	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
768	ldd		[%i3 + 16], %f10	! round[1]
769	ldd		[%i3 + 24], %f12
770
771	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
772	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
773
774	fmovd		%f0, %f4
775	.word	0x81b09266 !faesdeclx	%f2,%f6,%f0
776	.word	0x85b11268 !faesdeclx	%f4,%f8,%f2
777
778	brnz,pn		%l5, .Lcbc_dec_unaligned_out
779	nop
780
781	std		%f0, [%i1 + 0]
782	std		%f2, [%i1 + 8]
783	add		%i1, 16, %i1
784
785	brnz,a		%i2, .Loop_cbc_dec
786	sub		%i2, 1, %i2
787
788	st		%f16,    [%i4 + 0]	! output ivec
789	st		%f17, [%i4 + 4]
790	st		%f18,    [%i4 + 8]
791	st		%f19, [%i4 + 12]
792
793	ret
794	restore
795
796.align	32
797.Lcbc_dec_unaligned_out:
798	ldd		[%o7 + %l6], %f36	! shift right params
799	mov		0xff, %l6
800	srl		%l6, %l5, %l6
801	sub		%g0, %l4, %l5
802
803	.word	0x8db80b60 !fshiftorx	%f0,%f0,%f36,%f6
804	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
805
806	stda		%f6, [%i1 + %l6]0xc0	! partial store
807	orn		%g0, %l6, %l6
808	std		%f8, [%i1 + 8]
809	add		%i1, 16, %i1
810	brz		%i2, .Lcbc_dec_unaligned_out_done
811	sub		%i2, 1, %i2
812	b		.Loop_cbc_dec_unaligned_out
813	nop
814
815.align	32
816.Loop_cbc_dec_unaligned_out:
817	fmovd		%f2, %f34
818	.word	0x81b70d94 !fxor	%f28,%f20,%f0	! inp^round[0]
819	.word	0x85b78d96 !fxor	%f30,%f22,%f2
820	ldd		[%i3 + 32], %f6	! round[2]
821	ldd		[%i3 + 40], %f8
822
823	fmovd		%f0, %f4
824	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
825	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
826	ldd		[%i3 + 48], %f10	! round[3]
827	ldd		[%i3 + 56], %f12
828
829	ldx		[%i0 - 16], %o0
830	ldx		[%i0 - 8], %o1
831	brz		%l4, .Lcbc_dec_aligned_inp
832	movrz		%i2, 0, %l3
833
834	ldx		[%i0], %o2
835	sllx		%o0, %l4, %o0
836	srlx		%o1, %l5, %g1
837	sllx		%o1, %l4, %o1
838	or		%g1, %o0, %o0
839	srlx		%o2, %l5, %o2
840	or		%o2, %o1, %o1
841
842.Lcbc_dec_aligned_inp:
843	fmovd		%f0, %f4
844	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
845	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
846	ldd		[%i3 + 64], %f6	! round[4]
847	ldd		[%i3 + 72], %f8
848	add		%i3, 64, %l2
849	sub		%l0, 16*8, %l1
850
851	stx		%o0, [%sp + LOCALS + 0]
852	stx		%o1, [%sp + LOCALS + 8]
853	add		%i0, %l3, %i0	! inp+=16
854	nop
855
856.Lcbc_dec_unaligned:
857	fmovd		%f0, %f4
858	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
859	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
860	ldd		[%l2 + 16], %f10
861	ldd		[%l2 + 24], %f12
862	add		%l2, 32, %l2
863
864	fmovd		%f0, %f4
865	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
866	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
867	ldd		[%l2 + 0], %f6
868	ldd		[%l2 + 8], %f8
869
870	brnz,a		%l1, .Lcbc_dec_unaligned
871	sub		%l1, 16*2, %l1
872
873	fmovd		%f0, %f4
874	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
875	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
876	ldd		[%l2 + 16], %f10	! round[last-1]
877	ldd		[%l2 + 24], %f12
878
879	fmovd		%f0, %f4
880	.word	0x81b09226 !faesdecx	%f2,%f6,%f0
881	.word	0x85b11228 !faesdecx	%f4,%f8,%f2
882
883	.word	0x8db40d98 !fxor	%f16,%f24,%f6	! ivec^round[last]
884	.word	0x91b48d9a !fxor	%f18,%f26,%f8
885	fmovd		%f28, %f16
886	fmovd		%f30, %f18
887	ldd		[%sp + LOCALS + 0], %f28
888	ldd		[%sp + LOCALS + 8], %f30
889
890	fmovd		%f0, %f4
891	.word	0x81b0922a !faesdecx	%f2,%f10,%f0
892	.word	0x85b1122c !faesdecx	%f4,%f12,%f2
893	ldd		[%i3 + 16], %f10	! round[1]
894	ldd		[%i3 + 24], %f12
895
896	fmovd		%f0, %f4
897	.word	0x81b09266 !faesdeclx	%f2,%f6,%f0
898	.word	0x85b11268 !faesdeclx	%f4,%f8,%f2
899
900	.word	0x8db8cb60 !fshiftorx	%f34,%f0,%f36,%f6
901	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
902	std		%f6, [%i1 + 0]
903	std		%f8, [%i1 + 8]
904	add		%i1, 16, %i1
905
906	brnz,a		%i2, .Loop_cbc_dec_unaligned_out
907	sub		%i2, 1, %i2
908
909.Lcbc_dec_unaligned_out_done:
910	.word	0x91b88b62 !fshiftorx	%f2,%f2,%f36,%f8
911	stda		%f8, [%i1 + %l6]0xc0	! partial store
912
913	st		%f16,    [%i4 + 0]	! output ivec
914	st		%f17, [%i4 + 4]
915	st		%f18,    [%i4 + 8]
916	st		%f19, [%i4 + 12]
917
918	ret
919	restore
920.type	aes_fx_cbc_encrypt,#function
921.size	aes_fx_cbc_encrypt,.-aes_fx_cbc_encrypt
922.globl	aes_fx_ctr32_encrypt_blocks
923.align	32
924aes_fx_ctr32_encrypt_blocks:
925	save		%sp, -STACK_FRAME-16, %sp
926	srln		%i2, 0, %i2
927	and		%i0, 7, %l4
928	andn		%i0, 7, %i0
929	brz,pn		%i2, .Lctr32_no_data
930	sll		%l4, 3, %l4
931
932.Lpic:	call		.+8
933	add		%o7, .Linp_align - .Lpic, %o7
934
935	ld		[%i3 + 240], %l0
936	and		%i1, 7, %l5
937	ld		[%i4 +  0], %f16	! load counter
938	andn		%i1, 7, %i1
939	ld		[%i4 +  4], %f17
940	sll		%l5, 3, %l6
941	ld		[%i4 +  8], %f18
942	ld		[%i4 + 12], %f19
943	ldd		[%o7 + 128], %f14
944
945	sll		%l0, 4, %l0
946	add		%l0, %i3, %l2
947	ldd		[%i3 + 0], %f20	! round[0]
948	ldd		[%i3 + 8], %f22
949
950	add		%i0, 16, %i0
951	sub		%i2, 1, %i2
952	ldd		[%i3 + 16], %f10	! round[1]
953	ldd		[%i3 + 24], %f12
954
955	mov		16, %l3
956	movrz		%i2, 0, %l3
957	ldd		[%l2 + 0], %f24	! round[last]
958	ldd		[%l2 + 8], %f26
959
960	ldd		[%o7 + %l4], %f36	! shiftleft params
961	add		%o7, 64, %o7
962	ldd		[%i0 - 16], %f28	! load input
963	ldd		[%i0 -  8], %f30
964	ldda		[%i0]0x82, %f32	! non-faulting load
965	add		%i0, %l3, %i0	! inp+=16
966
967	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
968	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
969
970.Loop_ctr32:
971	.word	0x81b40d94 !fxor	%f16,%f20,%f0	! counter^round[0]
972	.word	0x85b48d96 !fxor	%f18,%f22,%f2
973	ldd		[%i3 + 32], %f6	! round[2]
974	ldd		[%i3 + 40], %f8
975	add		%i3, 32, %l2
976	sub		%l0, 16*6, %l1
977
978.Lctr32_enc:
979	fmovd		%f0, %f4
980	.word	0x81b0920a !faesencx	%f2,%f10,%f0
981	.word	0x85b1120c !faesencx	%f4,%f12,%f2
982	ldd		[%l2 + 16], %f10
983	ldd		[%l2 + 24], %f12
984	add		%l2, 32, %l2
985
986	fmovd		%f0, %f4
987	.word	0x81b09206 !faesencx	%f2,%f6,%f0
988	.word	0x85b11208 !faesencx	%f4,%f8,%f2
989	ldd		[%l2 + 0], %f6
990	ldd		[%l2 + 8], %f8
991
992	brnz,a		%l1, .Lctr32_enc
993	sub		%l1, 16*2, %l1
994
995	fmovd		%f0, %f4
996	.word	0x81b0920a !faesencx	%f2,%f10,%f0
997	.word	0x85b1120c !faesencx	%f4,%f12,%f2
998	ldd		[%l2 + 16], %f10	! round[last-1]
999	ldd		[%l2 + 24], %f12
1000
1001	fmovd		%f0, %f4
1002	.word	0x81b09206 !faesencx	%f2,%f6,%f0
1003	.word	0x85b11208 !faesencx	%f4,%f8,%f2
1004	.word	0x8db70d98 !fxor	%f28,%f24,%f6	! inp^round[last]
1005	.word	0x91b78d9a !fxor	%f30,%f26,%f8
1006
1007	movrz		%i2, 0, %l3
1008	fmovd		%f32, %f28
1009	ldd		[%i0 - 8], %f30	! load next input block
1010	ldda		[%i0]0x82, %f32	! non-faulting load
1011	add		%i0, %l3, %i0	! inp+=16
1012
1013	fmovd		%f0, %f4
1014	.word	0x81b0920a !faesencx	%f2,%f10,%f0
1015	.word	0x85b1120c !faesencx	%f4,%f12,%f2
1016	ldd		[%i3 + 16], %f10	! round[1]
1017	ldd		[%i3 + 24], %f12
1018
1019	.word	0xb9bf0b7e !fshiftorx	%f28,%f30,%f36,%f28
1020	.word	0xbdbf8b61 !fshiftorx	%f30,%f32,%f36,%f30
1021	.word	0xa5b48a4e !fpadd32	%f18,%f14,%f18	! increment counter
1022
1023	fmovd		%f0, %f4
1024	.word	0x81b09246 !faesenclx	%f2,%f6,%f0
1025	.word	0x85b11248 !faesenclx	%f4,%f8,%f2
1026
1027	brnz,pn		%l5, .Lctr32_unaligned_out
1028	nop
1029
1030	std		%f0, [%i1 + 0]
1031	std		%f2, [%i1 + 8]
1032	add		%i1, 16, %i1
1033
1034	brnz,a		%i2, .Loop_ctr32
1035	sub		%i2, 1, %i2
1036
1037.Lctr32_no_data:
1038	ret
1039	restore
1040
1041.align	32
1042.Lctr32_unaligned_out:
1043	ldd		[%o7 + %l6], %f36	! shift right params
1044	mov		0xff, %l6
1045	srl		%l6, %l5, %l6
1046	sub		%g0, %l4, %l5
1047
1048	.word	0x8db80b60 !fshiftorx	%f0,%f0,%f36,%f6
1049	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
1050
1051	stda		%f6, [%i1 + %l6]0xc0	! partial store
1052	orn		%g0, %l6, %l6
1053	std		%f8, [%i1 + 8]
1054	add		%i1, 16, %i1
1055	brz		%i2, .Lctr32_unaligned_out_done
1056	sub		%i2, 1, %i2
1057	b		.Loop_ctr32_unaligned_out
1058	nop
1059
1060.align	32
1061.Loop_ctr32_unaligned_out:
1062	fmovd		%f2, %f34
1063	.word	0x81b40d94 !fxor	%f16,%f20,%f0	! counter^round[0]
1064	.word	0x85b48d96 !fxor	%f18,%f22,%f2
1065	ldd		[%i3 + 32], %f6	! round[2]
1066	ldd		[%i3 + 40], %f8
1067
1068	fmovd		%f0, %f4
1069	.word	0x81b0920a !faesencx	%f2,%f10,%f0
1070	.word	0x85b1120c !faesencx	%f4,%f12,%f2
1071	ldd		[%i3 + 48], %f10	! round[3]
1072	ldd		[%i3 + 56], %f12
1073
1074	ldx		[%i0 - 16], %o0
1075	ldx		[%i0 -  8], %o1
1076	brz		%l4, .Lctr32_aligned_inp
1077	movrz		%i2, 0, %l3
1078
1079	ldx		[%i0], %o2
1080	sllx		%o0, %l4, %o0
1081	srlx		%o1, %l5, %g1
1082	sllx		%o1, %l4, %o1
1083	or		%g1, %o0, %o0
1084	srlx		%o2, %l5, %o2
1085	or		%o2, %o1, %o1
1086
1087.Lctr32_aligned_inp:
1088	fmovd		%f0, %f4
1089	.word	0x81b09206 !faesencx	%f2,%f6,%f0
1090	.word	0x85b11208 !faesencx	%f4,%f8,%f2
1091	ldd		[%i3 + 64], %f6	! round[4]
1092	ldd		[%i3 + 72], %f8
1093	add		%i3, 64, %l2
1094	sub		%l0, 16*8, %l1
1095
1096	stx		%o0, [%sp + LOCALS + 0]
1097	stx		%o1, [%sp + LOCALS + 8]
1098	add		%i0, %l3, %i0	! inp+=16
1099	nop
1100
1101.Lctr32_enc_unaligned:
1102	fmovd		%f0, %f4
1103	.word	0x81b0920a !faesencx	%f2,%f10,%f0
1104	.word	0x85b1120c !faesencx	%f4,%f12,%f2
1105	ldd		[%l2 + 16], %f10
1106	ldd		[%l2 + 24], %f12
1107	add		%l2, 32, %l2
1108
1109	fmovd		%f0, %f4
1110	.word	0x81b09206 !faesencx	%f2,%f6,%f0
1111	.word	0x85b11208 !faesencx	%f4,%f8,%f2
1112	ldd		[%l2 + 0], %f6
1113	ldd		[%l2 + 8], %f8
1114
1115	brnz,a		%l1, .Lctr32_enc_unaligned
1116	sub		%l1, 16*2, %l1
1117
1118	fmovd		%f0, %f4
1119	.word	0x81b0920a !faesencx	%f2,%f10,%f0
1120	.word	0x85b1120c !faesencx	%f4,%f12,%f2
1121	ldd		[%l2 + 16], %f10	! round[last-1]
1122	ldd		[%l2 + 24], %f12
1123	.word	0xa5b48a4e !fpadd32	%f18,%f14,%f18	! increment counter
1124
1125	fmovd		%f0, %f4
1126	.word	0x81b09206 !faesencx	%f2,%f6,%f0
1127	.word	0x85b11208 !faesencx	%f4,%f8,%f2
1128	.word	0x8db70d98 !fxor	%f28,%f24,%f6	! inp^round[last]
1129	.word	0x91b78d9a !fxor	%f30,%f26,%f8
1130	ldd		[%sp + LOCALS + 0], %f28
1131	ldd		[%sp + LOCALS + 8], %f30
1132
1133	fmovd		%f0, %f4
1134	.word	0x81b0920a !faesencx	%f2,%f10,%f0
1135	.word	0x85b1120c !faesencx	%f4,%f12,%f2
1136	ldd		[%i3 + 16], %f10	! round[1]
1137	ldd		[%i3 + 24], %f12
1138
1139	fmovd		%f0, %f4
1140	.word	0x81b09246 !faesenclx	%f2,%f6,%f0
1141	.word	0x85b11248 !faesenclx	%f4,%f8,%f2
1142
1143	.word	0x8db8cb60 !fshiftorx	%f34,%f0,%f36,%f6
1144	.word	0x91b80b62 !fshiftorx	%f0,%f2,%f36,%f8
1145	std		%f6, [%i1 + 0]
1146	std		%f8, [%i1 + 8]
1147	add		%i1, 16, %i1
1148
1149	brnz,a		%i2, .Loop_ctr32_unaligned_out
1150	sub		%i2, 1, %i2
1151
1152.Lctr32_unaligned_out_done:
1153	.word	0x91b88b62 !fshiftorx	%f2,%f2,%f36,%f8
1154	stda		%f8, [%i1 + %l6]0xc0	! partial store
1155
1156	ret
1157	restore
1158.type	aes_fx_ctr32_encrypt_blocks,#function
1159.size	aes_fx_ctr32_encrypt_blocks,.-aes_fx_ctr32_encrypt_blocks
1160
1161.align	32
1162.Linp_align:		! fshiftorx parameters for left shift toward %rs1
1163	.byte	0, 0, 64,  0,	0, 64,  0, -64
1164	.byte	0, 0, 56,  8,	0, 56,  8, -56
1165	.byte	0, 0, 48, 16,	0, 48, 16, -48
1166	.byte	0, 0, 40, 24,	0, 40, 24, -40
1167	.byte	0, 0, 32, 32,	0, 32, 32, -32
1168	.byte	0, 0, 24, 40,	0, 24, 40, -24
1169	.byte	0, 0, 16, 48,	0, 16, 48, -16
1170	.byte	0, 0,  8, 56,	0,  8, 56, -8
1171.Lout_align:		! fshiftorx parameters for right shift toward %rs2
1172	.byte	0, 0,  0, 64,	0,  0, 64,   0
1173	.byte	0, 0,  8, 56,	0,  8, 56,  -8
1174	.byte	0, 0, 16, 48,	0, 16, 48, -16
1175	.byte	0, 0, 24, 40,	0, 24, 40, -24
1176	.byte	0, 0, 32, 32,	0, 32, 32, -32
1177	.byte	0, 0, 40, 24,	0, 40, 24, -40
1178	.byte	0, 0, 48, 16,	0, 48, 16, -48
1179	.byte	0, 0, 56,  8,	0, 56,  8, -56
1180.Lone:
1181	.word	0, 1
1182.asciz	"AES for Fujitsu SPARC64 X, CRYPTOGAMS by <appro@openssl.org>"
1183.align	4
1184