xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/cmllt4-sparcv9.S (revision 1b3d6f93806f8821fe459e13ad13e605b37c6d43)
1#ifndef __ASSEMBLER__
2# define __ASSEMBLER__ 1
3#endif
4#include "crypto/sparc_arch.h"
5
6.text
7
8.globl	cmll_t4_encrypt
9.align	32
10cmll_t4_encrypt:
11	andcc		%o0, 7, %g1		! is input aligned?
12	andn		%o0, 7, %o0
13
14	ldx		[%o2 + 0], %g4
15	ldx		[%o2 + 8], %g5
16
17	ldx		[%o0 + 0], %o4
18	bz,pt		%icc, 1f
19	ldx		[%o0 + 8], %o5
20	ldx		[%o0 + 16], %o0
21	sll		%g1, 3, %g1
22	sub		%g0, %g1, %o3
23	sllx		%o4, %g1, %o4
24	sllx		%o5, %g1, %g1
25	srlx		%o5, %o3, %o5
26	srlx		%o0, %o3, %o3
27	or		%o5, %o4, %o4
28	or		%o3, %g1, %o5
291:
30	ld		[%o2 + 272], %o3	! grandRounds, 3 or 4
31	ldd		[%o2 + 16], %f12
32	ldd		[%o2 + 24], %f14
33	xor		%g4, %o4, %o4
34	xor		%g5, %o5, %o5
35	ldd		[%o2 + 32], %f16
36	ldd		[%o2 + 40], %f18
37	.word	0x81b0230c !movxtod	%o4,%f0
38	.word	0x85b0230d !movxtod	%o5,%f2
39	ldd		[%o2 + 48], %f20
40	ldd		[%o2 + 56], %f22
41	sub		%o3, 1, %o3
42	ldd		[%o2 + 64], %f24
43	ldd		[%o2 + 72], %f26
44	add		%o2, 80, %o2
45
46.Lenc:
47	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
48	ldd		[%o2 + 0], %f12
49	sub		%o3,1,%o3
50	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
51	ldd		[%o2 + 8], %f14
52	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
53	ldd		[%o2 + 16], %f16
54	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
55	ldd		[%o2 + 24], %f18
56	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
57	ldd		[%o2 + 32], %f20
58	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
59	ldd		[%o2 + 40], %f22
60	.word	0x81b62780 !camellia_fl	%f24,%f0,%f0
61	ldd		[%o2 + 48], %f24
62	.word	0x85b6a7a2 !camellia_fli	%f26,%f2,%f2
63	ldd		[%o2 + 56], %f26
64	brnz,pt		%o3, .Lenc
65	add		%o2, 64, %o2
66
67	andcc		%o1, 7, %o4		! is output aligned?
68	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
69	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
70	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
71	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
72	.word	0x88cd0182 !camellia_f	%f20,%f2,%f0,%f4
73	.word	0x84cd8980 !camellia_f	%f22,%f0,%f4,%f2
74	.word	0x81b60d84 !fxor	%f24,%f4,%f0
75	.word	0x85b68d82 !fxor	%f26,%f2,%f2
76
77	bnz,pn		%icc, 2f
78	nop
79
80	std		%f0, [%o1 + 0]
81	retl
82	std		%f2, [%o1 + 8]
83
842:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
85	mov		0xff, %o5
86	srl		%o5, %o4, %o5
87
88	.word	0x89b00900 !faligndata	%f0,%f0,%f4
89	.word	0x8db00902 !faligndata	%f0,%f2,%f6
90	.word	0x91b08902 !faligndata	%f2,%f2,%f8
91
92	stda		%f4, [%o1 + %o5]0xc0	! partial store
93	std		%f6, [%o1 + 8]
94	add		%o1, 16, %o1
95	orn		%g0, %o5, %o5
96	retl
97	stda		%f8, [%o1 + %o5]0xc0	! partial store
98.type	cmll_t4_encrypt,#function
99.size	cmll_t4_encrypt,.-cmll_t4_encrypt
100
101.globl	cmll_t4_decrypt
102.align	32
103cmll_t4_decrypt:
104	ld		[%o2 + 272], %o3	! grandRounds, 3 or 4
105	andcc		%o0, 7, %g1		! is input aligned?
106	andn		%o0, 7, %o0
107
108	sll		%o3, 6, %o3
109	add		%o3, %o2, %o2
110
111	ldx		[%o0 + 0], %o4
112	bz,pt		%icc, 1f
113	ldx		[%o0 + 8], %o5
114	ldx		[%o0 + 16], %o0
115	sll		%g1, 3, %g1
116	sub		%g0, %g1, %g4
117	sllx		%o4, %g1, %o4
118	sllx		%o5, %g1, %g1
119	srlx		%o5, %g4, %o5
120	srlx		%o0, %g4, %g4
121	or		%o5, %o4, %o4
122	or		%g4, %g1, %o5
1231:
124	ldx		[%o2 + 0], %g4
125	ldx		[%o2 + 8], %g5
126	ldd		[%o2 - 8], %f12
127	ldd		[%o2 - 16], %f14
128	xor		%g4, %o4, %o4
129	xor		%g5, %o5, %o5
130	ldd		[%o2 - 24], %f16
131	ldd		[%o2 - 32], %f18
132	.word	0x81b0230c !movxtod	%o4,%f0
133	.word	0x85b0230d !movxtod	%o5,%f2
134	ldd		[%o2 - 40], %f20
135	ldd		[%o2 - 48], %f22
136	sub		%o3, 64, %o3
137	ldd		[%o2 - 56], %f24
138	ldd		[%o2 - 64], %f26
139	sub		%o2, 64, %o2
140
141.Ldec:
142	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
143	ldd		[%o2 - 8], %f12
144	sub		%o3, 64, %o3
145	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
146	ldd		[%o2 - 16], %f14
147	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
148	ldd		[%o2 - 24], %f16
149	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
150	ldd		[%o2 - 32], %f18
151	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
152	ldd		[%o2 - 40], %f20
153	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
154	ldd		[%o2 - 48], %f22
155	.word	0x81b62780 !camellia_fl	%f24,%f0,%f0
156	ldd		[%o2 - 56], %f24
157	.word	0x85b6a7a2 !camellia_fli	%f26,%f2,%f2
158	ldd		[%o2 - 64], %f26
159	brnz,pt		%o3, .Ldec
160	sub		%o2, 64, %o2
161
162	andcc		%o1, 7, %o4		! is output aligned?
163	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
164	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
165	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
166	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
167	.word	0x88cd0182 !camellia_f	%f20,%f2,%f0,%f4
168	.word	0x84cd8980 !camellia_f	%f22,%f0,%f4,%f2
169	.word	0x81b68d84 !fxor	%f26,%f4,%f0
170	.word	0x85b60d82 !fxor	%f24,%f2,%f2
171
172	bnz,pn		%icc, 2f
173	nop
174
175	std		%f0, [%o1 + 0]
176	retl
177	std		%f2, [%o1 + 8]
178
1792:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
180	mov		0xff, %o5
181	srl		%o5, %o4, %o5
182
183	.word	0x89b00900 !faligndata	%f0,%f0,%f4
184	.word	0x8db00902 !faligndata	%f0,%f2,%f6
185	.word	0x91b08902 !faligndata	%f2,%f2,%f8
186
187	stda		%f4, [%o1 + %o5]0xc0	! partial store
188	std		%f6, [%o1 + 8]
189	add		%o1, 16, %o1
190	orn		%g0, %o5, %o5
191	retl
192	stda		%f8, [%o1 + %o5]0xc0	! partial store
193.type	cmll_t4_decrypt,#function
194.size	cmll_t4_decrypt,.-cmll_t4_decrypt
195.globl	cmll_t4_set_key
196.align	32
197cmll_t4_set_key:
198	and		%o0, 7, %o3
199	.word	0x91b20300 !alignaddr	%o0,%g0,%o0
200	cmp		%o1, 192
201	ldd		[%o0 + 0], %f0
202	bl,pt		%icc,.L128
203	ldd		[%o0 + 8], %f2
204
205	be,pt		%icc,.L192
206	ldd		[%o0 + 16], %f4
207
208	brz,pt		%o3, .L256aligned
209	ldd		[%o0 + 24], %f6
210
211	ldd		[%o0 + 32], %f8
212	.word	0x81b00902 !faligndata	%f0,%f2,%f0
213	.word	0x85b08904 !faligndata	%f2,%f4,%f2
214	.word	0x89b10906 !faligndata	%f4,%f6,%f4
215	b		.L256aligned
216	.word	0x8db18908 !faligndata	%f6,%f8,%f6
217
218.align	16
219.L192:
220	brz,a,pt	%o3, .L256aligned
221	.word	0x8db00cc4 !fnot2	%f0,%f4,%f6
222
223	ldd		[%o0 + 24], %f6
224	nop
225	.word	0x81b00902 !faligndata	%f0,%f2,%f0
226	.word	0x85b08904 !faligndata	%f2,%f4,%f2
227	.word	0x89b10906 !faligndata	%f4,%f6,%f4
228	.word	0x8db00cc4 !fnot2	%f0,%f4,%f6
229
230.L256aligned:
231	std		%f0, [%o2 + 0]		! k[0, 1]
232	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
233	std		%f2, [%o2 + 8]		! k[2, 3]
234	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
235	.word	0x81b10d80 !fxor	%f4,%f0,%f0
236	b		.L128key
237	.word	0x85b18d82 !fxor	%f6,%f2,%f2
238
239.align	16
240.L128:
241	brz,pt		%o3, .L128aligned
242	nop
243
244	ldd		[%o0 + 16], %f4
245	nop
246	.word	0x81b00902 !faligndata	%f0,%f2,%f0
247	.word	0x85b08904 !faligndata	%f2,%f4,%f2
248
249.L128aligned:
250	std		%f0, [%o2 + 0]		! k[0, 1]
251	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
252	std		%f2, [%o2 + 8]		! k[2, 3]
253	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
254
255.L128key:
256	mov		%o7, %o5
2571:	call		.+8
258	add		%o7, SIGMA-1b, %o4
259	mov		%o5, %o7
260
261	ldd		[%o4 + 0], %f16
262	ldd		[%o4 + 8], %f18
263	ldd		[%o4 + 16], %f20
264	ldd		[%o4 + 24], %f22
265
266	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
267	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
268	.word	0x81b70d80 !fxor	%f28,%f0,%f0
269	.word	0x85b78d82 !fxor	%f30,%f2,%f2
270	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
271	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
272
273	bge,pn		%icc, .L256key
274	nop
275	std	%f0, [%o2 + 0x10]	! k[ 4,  5]
276	std	%f2, [%o2 + 0x18]	! k[ 6,  7]
277
278	.word	0x99b02200 !movdtox	%f0,%o4
279	.word	0x9bb02202 !movdtox	%f2,%o5
280	srlx	%o4, 64-15, %g4
281	sllx	%o4, 15, %o4
282	srlx	%o5, 64-15, %g5
283	sllx	%o5, 15, %o5
284	or	%o4, %g5, %o4
285	or	%o5, %g4, %o5
286	stx	%o4, [%o2 + 0x30]	! k[12, 13]
287	stx	%o5, [%o2 + 0x38]	! k[14, 15]
288	srlx	%o4, 64-15, %g4
289	sllx	%o4, 15, %o4
290	srlx	%o5, 64-15, %g5
291	sllx	%o5, 15, %o5
292	or	%o4, %g5, %o4
293	or	%o5, %g4, %o5
294	stx	%o4, [%o2 + 0x40]	! k[16, 17]
295	stx	%o5, [%o2 + 0x48]	! k[18, 19]
296	srlx	%o4, 64-15, %g4
297	sllx	%o4, 15, %o4
298	srlx	%o5, 64-15, %g5
299	sllx	%o5, 15, %o5
300	or	%o4, %g5, %o4
301	or	%o5, %g4, %o5
302	stx	%o4, [%o2 + 0x60]	! k[24, 25]
303	srlx	%o4, 64-15, %g4
304	sllx	%o4, 15, %o4
305	srlx	%o5, 64-15, %g5
306	sllx	%o5, 15, %o5
307	or	%o4, %g5, %o4
308	or	%o5, %g4, %o5
309	stx	%o4, [%o2 + 0x70]	! k[28, 29]
310	stx	%o5, [%o2 + 0x78]	! k[30, 31]
311	srlx	%o4, 64-34, %g4
312	sllx	%o4, 34, %o4
313	srlx	%o5, 64-34, %g5
314	sllx	%o5, 34, %o5
315	or	%o4, %g5, %o4
316	or	%o5, %g4, %o5
317	stx	%o4, [%o2 + 0xa0]	! k[40, 41]
318	stx	%o5, [%o2 + 0xa8]	! k[42, 43]
319	srlx	%o4, 64-17, %g4
320	sllx	%o4, 17, %o4
321	srlx	%o5, 64-17, %g5
322	sllx	%o5, 17, %o5
323	or	%o4, %g5, %o4
324	or	%o5, %g4, %o5
325	stx	%o4, [%o2 + 0xc0]	! k[48, 49]
326	stx	%o5, [%o2 + 0xc8]	! k[50, 51]
327
328	.word	0x99b0221c !movdtox	%f28,%o4		! k[ 0,  1]
329	.word	0x9bb0221e !movdtox	%f30,%o5		! k[ 2,  3]
330	srlx	%o4, 64-15, %g4
331	sllx	%o4, 15, %o4
332	srlx	%o5, 64-15, %g5
333	sllx	%o5, 15, %o5
334	or	%o4, %g5, %o4
335	or	%o5, %g4, %o5
336	stx	%o4, [%o2 + 0x20]	! k[ 8,  9]
337	stx	%o5, [%o2 + 0x28]	! k[10, 11]
338	srlx	%o4, 64-30, %g4
339	sllx	%o4, 30, %o4
340	srlx	%o5, 64-30, %g5
341	sllx	%o5, 30, %o5
342	or	%o4, %g5, %o4
343	or	%o5, %g4, %o5
344	stx	%o4, [%o2 + 0x50]	! k[20, 21]
345	stx	%o5, [%o2 + 0x58]	! k[22, 23]
346	srlx	%o4, 64-15, %g4
347	sllx	%o4, 15, %o4
348	srlx	%o5, 64-15, %g5
349	sllx	%o5, 15, %o5
350	or	%o4, %g5, %o4
351	or	%o5, %g4, %o5
352	stx	%o5, [%o2 + 0x68]	! k[26, 27]
353	srlx	%o4, 64-17, %g4
354	sllx	%o4, 17, %o4
355	srlx	%o5, 64-17, %g5
356	sllx	%o5, 17, %o5
357	or	%o4, %g5, %o4
358	or	%o5, %g4, %o5
359	stx	%o4, [%o2 + 0x80]	! k[32, 33]
360	stx	%o5, [%o2 + 0x88]	! k[34, 35]
361	srlx	%o4, 64-17, %g4
362	sllx	%o4, 17, %o4
363	srlx	%o5, 64-17, %g5
364	sllx	%o5, 17, %o5
365	or	%o4, %g5, %o4
366	or	%o5, %g4, %o5
367	stx	%o4, [%o2 + 0x90]	! k[36, 37]
368	stx	%o5, [%o2 + 0x98]	! k[38, 39]
369	srlx	%o4, 64-17, %g4
370	sllx	%o4, 17, %o4
371	srlx	%o5, 64-17, %g5
372	sllx	%o5, 17, %o5
373	or	%o4, %g5, %o4
374	or	%o5, %g4, %o5
375	stx	%o4, [%o2 + 0xb0]	! k[44, 45]
376	stx	%o5, [%o2 + 0xb8]	! k[46, 47]
377
378	mov		3, %o3
379	st		%o3, [%o2 + 0x110]
380	retl
381	xor		%o0, %o0, %o0
382
383.align	16
384.L256key:
385	ldd		[%o4 + 32], %f24
386	ldd		[%o4 + 40], %f26
387
388	std		%f0, [%o2 + 0x30]	! k[12, 13]
389	std		%f2, [%o2 + 0x38]	! k[14, 15]
390
391	.word	0x81b10d80 !fxor	%f4,%f0,%f0
392	.word	0x85b18d82 !fxor	%f6,%f2,%f2
393	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
394	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
395
396	std	%f0, [%o2 + 0x10]	! k[ 4,  5]
397	std	%f2, [%o2 + 0x18]	! k[ 6,  7]
398
399	.word	0x99b02200 !movdtox	%f0,%o4
400	.word	0x9bb02202 !movdtox	%f2,%o5
401	srlx	%o4, 64-30, %g4
402	sllx	%o4, 30, %o4
403	srlx	%o5, 64-30, %g5
404	sllx	%o5, 30, %o5
405	or	%o4, %g5, %o4
406	or	%o5, %g4, %o5
407	stx	%o4, [%o2 + 0x50]	! k[20, 21]
408	stx	%o5, [%o2 + 0x58]	! k[22, 23]
409	srlx	%o4, 64-30, %g4
410	sllx	%o4, 30, %o4
411	srlx	%o5, 64-30, %g5
412	sllx	%o5, 30, %o5
413	or	%o4, %g5, %o4
414	or	%o5, %g4, %o5
415	stx	%o4, [%o2 + 0xa0]	! k[40, 41]
416	stx	%o5, [%o2 + 0xa8]	! k[42, 43]
417	srlx	%o4, 64-51, %g4
418	sllx	%o4, 51, %o4
419	srlx	%o5, 64-51, %g5
420	sllx	%o5, 51, %o5
421	or	%o4, %g5, %o4
422	or	%o5, %g4, %o5
423	stx	%o4, [%o2 + 0x100]	! k[64, 65]
424	stx	%o5, [%o2 + 0x108]	! k[66, 67]
425
426	.word	0x99b02204 !movdtox	%f4,%o4		! k[ 8,  9]
427	.word	0x9bb02206 !movdtox	%f6,%o5		! k[10, 11]
428	srlx	%o4, 64-15, %g4
429	sllx	%o4, 15, %o4
430	srlx	%o5, 64-15, %g5
431	sllx	%o5, 15, %o5
432	or	%o4, %g5, %o4
433	or	%o5, %g4, %o5
434	stx	%o4, [%o2 + 0x20]	! k[ 8,  9]
435	stx	%o5, [%o2 + 0x28]	! k[10, 11]
436	srlx	%o4, 64-15, %g4
437	sllx	%o4, 15, %o4
438	srlx	%o5, 64-15, %g5
439	sllx	%o5, 15, %o5
440	or	%o4, %g5, %o4
441	or	%o5, %g4, %o5
442	stx	%o4, [%o2 + 0x40]	! k[16, 17]
443	stx	%o5, [%o2 + 0x48]	! k[18, 19]
444	srlx	%o4, 64-30, %g4
445	sllx	%o4, 30, %o4
446	srlx	%o5, 64-30, %g5
447	sllx	%o5, 30, %o5
448	or	%o4, %g5, %o4
449	or	%o5, %g4, %o5
450	stx	%o4, [%o2 + 0x90]	! k[36, 37]
451	stx	%o5, [%o2 + 0x98]	! k[38, 39]
452	srlx	%o4, 64-34, %g4
453	sllx	%o4, 34, %o4
454	srlx	%o5, 64-34, %g5
455	sllx	%o5, 34, %o5
456	or	%o4, %g5, %o4
457	or	%o5, %g4, %o5
458	stx	%o4, [%o2 + 0xd0]	! k[52, 53]
459	stx	%o5, [%o2 + 0xd8]	! k[54, 55]
460	ldx	[%o2 + 0x30], %o4	! k[12, 13]
461	ldx	[%o2 + 0x38], %o5	! k[14, 15]
462	srlx	%o4, 64-15, %g4
463	sllx	%o4, 15, %o4
464	srlx	%o5, 64-15, %g5
465	sllx	%o5, 15, %o5
466	or	%o4, %g5, %o4
467	or	%o5, %g4, %o5
468	stx	%o4, [%o2 + 0x30]	! k[12, 13]
469	stx	%o5, [%o2 + 0x38]	! k[14, 15]
470	srlx	%o4, 64-30, %g4
471	sllx	%o4, 30, %o4
472	srlx	%o5, 64-30, %g5
473	sllx	%o5, 30, %o5
474	or	%o4, %g5, %o4
475	or	%o5, %g4, %o5
476	stx	%o4, [%o2 + 0x70]	! k[28, 29]
477	stx	%o5, [%o2 + 0x78]	! k[30, 31]
478	srlx	%o4, 32, %g4
479	srlx	%o5, 32, %g5
480	st	%o4, [%o2 + 0xc0]	! k[48]
481	st	%g5, [%o2 + 0xc4]	! k[49]
482	st	%o5, [%o2 + 0xc8]	! k[50]
483	st	%g4, [%o2 + 0xcc]	! k[51]
484	srlx	%o4, 64-49, %g4
485	sllx	%o4, 49, %o4
486	srlx	%o5, 64-49, %g5
487	sllx	%o5, 49, %o5
488	or	%o4, %g5, %o4
489	or	%o5, %g4, %o5
490	stx	%o4, [%o2 + 0xe0]	! k[56, 57]
491	stx	%o5, [%o2 + 0xe8]	! k[58, 59]
492
493	.word	0x99b0221c !movdtox	%f28,%o4		! k[ 0,  1]
494	.word	0x9bb0221e !movdtox	%f30,%o5		! k[ 2,  3]
495	srlx	%o4, 64-45, %g4
496	sllx	%o4, 45, %o4
497	srlx	%o5, 64-45, %g5
498	sllx	%o5, 45, %o5
499	or	%o4, %g5, %o4
500	or	%o5, %g4, %o5
501	stx	%o4, [%o2 + 0x60]	! k[24, 25]
502	stx	%o5, [%o2 + 0x68]	! k[26, 27]
503	srlx	%o4, 64-15, %g4
504	sllx	%o4, 15, %o4
505	srlx	%o5, 64-15, %g5
506	sllx	%o5, 15, %o5
507	or	%o4, %g5, %o4
508	or	%o5, %g4, %o5
509	stx	%o4, [%o2 + 0x80]	! k[32, 33]
510	stx	%o5, [%o2 + 0x88]	! k[34, 35]
511	srlx	%o4, 64-17, %g4
512	sllx	%o4, 17, %o4
513	srlx	%o5, 64-17, %g5
514	sllx	%o5, 17, %o5
515	or	%o4, %g5, %o4
516	or	%o5, %g4, %o5
517	stx	%o4, [%o2 + 0xb0]	! k[44, 45]
518	stx	%o5, [%o2 + 0xb8]	! k[46, 47]
519	srlx	%o4, 64-34, %g4
520	sllx	%o4, 34, %o4
521	srlx	%o5, 64-34, %g5
522	sllx	%o5, 34, %o5
523	or	%o4, %g5, %o4
524	or	%o5, %g4, %o5
525	stx	%o4, [%o2 + 0xf0]	! k[60, 61]
526	stx	%o5, [%o2 + 0xf8]	! k[62, 63]
527
528	mov		4, %o3
529	st		%o3, [%o2 + 0x110]
530	retl
531	xor		%o0, %o0, %o0
532.type	cmll_t4_set_key,#function
533.size	cmll_t4_set_key,.-cmll_t4_set_key
534.align	32
535SIGMA:
536	.long	0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
537	.long	0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
538	.long	0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
539.type	SIGMA,#object
540.size	SIGMA,.-SIGMA
541.asciz	"Camellia for SPARC T4, David S. Miller, Andy Polyakov"
542.align	32
543_cmll128_load_enckey:
544	ldx		[%i3 + 0], %g4
545	ldx		[%i3 + 8], %g5
546	ldd		[%i3 + 16], %f16
547	ldd		[%i3 + 24], %f18
548	ldd		[%i3 + 32], %f20
549	ldd		[%i3 + 40], %f22
550	ldd		[%i3 + 48], %f24
551	ldd		[%i3 + 56], %f26
552	ldd		[%i3 + 64], %f28
553	ldd		[%i3 + 72], %f30
554	ldd		[%i3 + 80], %f32
555	ldd		[%i3 + 88], %f34
556	ldd		[%i3 + 96], %f36
557	ldd		[%i3 + 104], %f38
558	ldd		[%i3 + 112], %f40
559	ldd		[%i3 + 120], %f42
560	ldd		[%i3 + 128], %f44
561	ldd		[%i3 + 136], %f46
562	ldd		[%i3 + 144], %f48
563	ldd		[%i3 + 152], %f50
564	ldd		[%i3 + 160], %f52
565	ldd		[%i3 + 168], %f54
566	ldd		[%i3 + 176], %f56
567	ldd		[%i3 + 184], %f58
568	ldd		[%i3 + 192], %f60
569	ldd		[%i3 + 200], %f62
570	retl
571	nop
572.type	_cmll128_load_enckey,#function
573.size	_cmll128_load_enckey,.-_cmll128_load_enckey
574_cmll256_load_enckey=_cmll128_load_enckey
575
576.align	32
577_cmll256_load_deckey:
578	ldd		[%i3 + 64], %f62
579	ldd		[%i3 + 72], %f60
580	b		.Load_deckey
581	add		%i3, 64, %i3
582_cmll128_load_deckey:
583	ldd		[%i3 + 0], %f60
584	ldd		[%i3 + 8], %f62
585.Load_deckey:
586	ldd		[%i3 + 16], %f58
587	ldd		[%i3 + 24], %f56
588	ldd		[%i3 + 32], %f54
589	ldd		[%i3 + 40], %f52
590	ldd		[%i3 + 48], %f50
591	ldd		[%i3 + 56], %f48
592	ldd		[%i3 + 64], %f46
593	ldd		[%i3 + 72], %f44
594	ldd		[%i3 + 80], %f42
595	ldd		[%i3 + 88], %f40
596	ldd		[%i3 + 96], %f38
597	ldd		[%i3 + 104], %f36
598	ldd		[%i3 + 112], %f34
599	ldd		[%i3 + 120], %f32
600	ldd		[%i3 + 128], %f30
601	ldd		[%i3 + 136], %f28
602	ldd		[%i3 + 144], %f26
603	ldd		[%i3 + 152], %f24
604	ldd		[%i3 + 160], %f22
605	ldd		[%i3 + 168], %f20
606	ldd		[%i3 + 176], %f18
607	ldd		[%i3 + 184], %f16
608	ldx		[%i3 + 192], %g4
609	retl
610	ldx		[%i3 + 200], %g5
611.type	_cmll256_load_deckey,#function
612.size	_cmll256_load_deckey,.-_cmll256_load_deckey
613
614.align	32
615_cmll128_encrypt_1x:
616	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
617	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
618	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
619	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
620	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
621	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
622	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
623	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
624	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
625	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
626	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
627	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
628	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
629	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
630	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
631	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
632	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
633	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
634	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
635	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
636	.word	0x88ce4182 !camellia_f	%f56,%f2,%f0,%f4
637	.word	0x84cec980 !camellia_f	%f58,%f0,%f4,%f2
638	.word	0x81b74d84 !fxor	%f60,%f4,%f0
639	retl
640	.word	0x85b7cd82 !fxor	%f62,%f2,%f2
641.type	_cmll128_encrypt_1x,#function
642.size	_cmll128_encrypt_1x,.-_cmll128_encrypt_1x
643_cmll128_decrypt_1x=_cmll128_encrypt_1x
644
645.align	32
646_cmll128_encrypt_2x:
647	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
648	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
649	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
650	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
651	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
652	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
653	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
654	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
655	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
656	.word	0x8cce0986 !camellia_f	%f24,%f6,%f4,%f6
657	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
658	.word	0x88ce8d84 !camellia_f	%f26,%f4,%f6,%f4
659	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
660	.word	0x89b72784 !camellia_fl	%f28,%f4,%f4
661	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
662	.word	0x8db7a7a6 !camellia_fli	%f30,%f6,%f6
663	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
664	.word	0x8cc84986 !camellia_f	%f32,%f6,%f4,%f6
665	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
666	.word	0x88c8cd84 !camellia_f	%f34,%f4,%f6,%f4
667	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
668	.word	0x8cc94986 !camellia_f	%f36,%f6,%f4,%f6
669	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
670	.word	0x88c9cd84 !camellia_f	%f38,%f4,%f6,%f4
671	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
672	.word	0x8cca4986 !camellia_f	%f40,%f6,%f4,%f6
673	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
674	.word	0x88cacd84 !camellia_f	%f42,%f4,%f6,%f4
675	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
676	.word	0x89b36784 !camellia_fl	%f44,%f4,%f4
677	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
678	.word	0x8db3e7a6 !camellia_fli	%f46,%f6,%f6
679	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
680	.word	0x8ccc4986 !camellia_f	%f48,%f6,%f4,%f6
681	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
682	.word	0x88cccd84 !camellia_f	%f50,%f4,%f6,%f4
683	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
684	.word	0x8ccd4986 !camellia_f	%f52,%f6,%f4,%f6
685	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
686	.word	0x88cdcd84 !camellia_f	%f54,%f4,%f6,%f4
687	.word	0x90ce4182 !camellia_f	%f56,%f2,%f0,%f8
688	.word	0x94ce4986 !camellia_f	%f56,%f6,%f4,%f10
689	.word	0x84ced180 !camellia_f	%f58,%f0,%f8,%f2
690	.word	0x8cced584 !camellia_f	%f58,%f4,%f10,%f6
691	.word	0x81b74d88 !fxor	%f60,%f8,%f0
692	.word	0x89b74d8a !fxor	%f60,%f10,%f4
693	.word	0x85b7cd82 !fxor	%f62,%f2,%f2
694	retl
695	.word	0x8db7cd86 !fxor	%f62,%f6,%f6
696.type	_cmll128_encrypt_2x,#function
697.size	_cmll128_encrypt_2x,.-_cmll128_encrypt_2x
698_cmll128_decrypt_2x=_cmll128_encrypt_2x
699
700.align	32
701_cmll256_encrypt_1x:
702	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
703	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
704	ldd		[%i3 + 208], %f16
705	ldd		[%i3 + 216], %f18
706	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
707	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
708	ldd		[%i3 + 224], %f20
709	ldd		[%i3 + 232], %f22
710	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
711	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
712	ldd		[%i3 + 240], %f24
713	ldd		[%i3 + 248], %f26
714	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
715	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
716	ldd		[%i3 + 256], %f28
717	ldd		[%i3 + 264], %f30
718	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
719	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
720	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
721	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
722	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
723	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
724	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
725	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
726	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
727	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
728	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
729	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
730	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
731	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
732	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
733	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
734	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
735	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
736	ldd		[%i3 + 16], %f16
737	ldd		[%i3 + 24], %f18
738	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
739	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
740	ldd		[%i3 + 32], %f20
741	ldd		[%i3 + 40], %f22
742	.word	0x88ce0182 !camellia_f	%f24,%f2,%f0,%f4
743	.word	0x84ce8980 !camellia_f	%f26,%f0,%f4,%f2
744	ldd		[%i3 + 48], %f24
745	ldd		[%i3 + 56], %f26
746	.word	0x81b70d84 !fxor	%f28,%f4,%f0
747	.word	0x85b78d82 !fxor	%f30,%f2,%f2
748	ldd		[%i3 + 64], %f28
749	retl
750	ldd		[%i3 + 72], %f30
751.type	_cmll256_encrypt_1x,#function
752.size	_cmll256_encrypt_1x,.-_cmll256_encrypt_1x
753
754.align	32
755_cmll256_encrypt_2x:
756	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
757	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
758	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
759	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
760	ldd		[%i3 + 208], %f16
761	ldd		[%i3 + 216], %f18
762	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
763	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
764	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
765	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
766	ldd		[%i3 + 224], %f20
767	ldd		[%i3 + 232], %f22
768	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
769	.word	0x8cce0986 !camellia_f	%f24,%f6,%f4,%f6
770	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
771	.word	0x88ce8d84 !camellia_f	%f26,%f4,%f6,%f4
772	ldd		[%i3 + 240], %f24
773	ldd		[%i3 + 248], %f26
774	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
775	.word	0x89b72784 !camellia_fl	%f28,%f4,%f4
776	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
777	.word	0x8db7a7a6 !camellia_fli	%f30,%f6,%f6
778	ldd		[%i3 + 256], %f28
779	ldd		[%i3 + 264], %f30
780	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
781	.word	0x8cc84986 !camellia_f	%f32,%f6,%f4,%f6
782	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
783	.word	0x88c8cd84 !camellia_f	%f34,%f4,%f6,%f4
784	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
785	.word	0x8cc94986 !camellia_f	%f36,%f6,%f4,%f6
786	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
787	.word	0x88c9cd84 !camellia_f	%f38,%f4,%f6,%f4
788	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
789	.word	0x8cca4986 !camellia_f	%f40,%f6,%f4,%f6
790	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
791	.word	0x88cacd84 !camellia_f	%f42,%f4,%f6,%f4
792	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
793	.word	0x89b36784 !camellia_fl	%f44,%f4,%f4
794	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
795	.word	0x8db3e7a6 !camellia_fli	%f46,%f6,%f6
796	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
797	.word	0x8ccc4986 !camellia_f	%f48,%f6,%f4,%f6
798	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
799	.word	0x88cccd84 !camellia_f	%f50,%f4,%f6,%f4
800	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
801	.word	0x8ccd4986 !camellia_f	%f52,%f6,%f4,%f6
802	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
803	.word	0x88cdcd84 !camellia_f	%f54,%f4,%f6,%f4
804	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
805	.word	0x8cce4986 !camellia_f	%f56,%f6,%f4,%f6
806	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
807	.word	0x88cecd84 !camellia_f	%f58,%f4,%f6,%f4
808	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
809	.word	0x89b76784 !camellia_fl	%f60,%f4,%f4
810	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
811	.word	0x8db7e7a6 !camellia_fli	%f62,%f6,%f6
812	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
813	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
814	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
815	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
816	ldd		[%i3 + 16], %f16
817	ldd		[%i3 + 24], %f18
818	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
819	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
820	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
821	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
822	ldd		[%i3 + 32], %f20
823	ldd		[%i3 + 40], %f22
824	.word	0x90ce0182 !camellia_f	%f24,%f2,%f0,%f8
825	.word	0x94ce0986 !camellia_f	%f24,%f6,%f4,%f10
826	.word	0x84ce9180 !camellia_f	%f26,%f0,%f8,%f2
827	.word	0x8cce9584 !camellia_f	%f26,%f4,%f10,%f6
828	ldd		[%i3 + 48], %f24
829	ldd		[%i3 + 56], %f26
830	.word	0x81b70d88 !fxor	%f28,%f8,%f0
831	.word	0x89b70d8a !fxor	%f28,%f10,%f4
832	.word	0x85b78d82 !fxor	%f30,%f2,%f2
833	.word	0x8db78d86 !fxor	%f30,%f6,%f6
834	ldd		[%i3 + 64], %f28
835	retl
836	ldd		[%i3 + 72], %f30
837.type	_cmll256_encrypt_2x,#function
838.size	_cmll256_encrypt_2x,.-_cmll256_encrypt_2x
839
840.align	32
841_cmll256_decrypt_1x:
842	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
843	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
844	ldd		[%i3 - 8], %f16
845	ldd		[%i3 - 16], %f18
846	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
847	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
848	ldd		[%i3 - 24], %f20
849	ldd		[%i3 - 32], %f22
850	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
851	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
852	ldd		[%i3 - 40], %f24
853	ldd		[%i3 - 48], %f26
854	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
855	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
856	ldd		[%i3 - 56], %f28
857	ldd		[%i3 - 64], %f30
858	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
859	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
860	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
861	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
862	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
863	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
864	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
865	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
866	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
867	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
868	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
869	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
870	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
871	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
872	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
873	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
874	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
875	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
876	ldd		[%i3 + 184], %f16
877	ldd		[%i3 + 176], %f18
878	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
879	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
880	ldd		[%i3 + 168], %f20
881	ldd		[%i3 + 160], %f22
882	.word	0x88ce0182 !camellia_f	%f24,%f2,%f0,%f4
883	.word	0x84ce8980 !camellia_f	%f26,%f0,%f4,%f2
884	ldd		[%i3 + 152], %f24
885	ldd		[%i3 + 144], %f26
886	.word	0x81b78d84 !fxor	%f30,%f4,%f0
887	.word	0x85b70d82 !fxor	%f28,%f2,%f2
888	ldd		[%i3 + 136], %f28
889	retl
890	ldd		[%i3 + 128], %f30
891.type	_cmll256_decrypt_1x,#function
892.size	_cmll256_decrypt_1x,.-_cmll256_decrypt_1x
893
894.align	32
895_cmll256_decrypt_2x:
896	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
897	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
898	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
899	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
900	ldd		[%i3 - 8], %f16
901	ldd		[%i3 - 16], %f18
902	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
903	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
904	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
905	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
906	ldd		[%i3 - 24], %f20
907	ldd		[%i3 - 32], %f22
908	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
909	.word	0x8cce0986 !camellia_f	%f24,%f6,%f4,%f6
910	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
911	.word	0x88ce8d84 !camellia_f	%f26,%f4,%f6,%f4
912	ldd		[%i3 - 40], %f24
913	ldd		[%i3 - 48], %f26
914	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
915	.word	0x89b72784 !camellia_fl	%f28,%f4,%f4
916	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
917	.word	0x8db7a7a6 !camellia_fli	%f30,%f6,%f6
918	ldd		[%i3 - 56], %f28
919	ldd		[%i3 - 64], %f30
920	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
921	.word	0x8cc84986 !camellia_f	%f32,%f6,%f4,%f6
922	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
923	.word	0x88c8cd84 !camellia_f	%f34,%f4,%f6,%f4
924	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
925	.word	0x8cc94986 !camellia_f	%f36,%f6,%f4,%f6
926	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
927	.word	0x88c9cd84 !camellia_f	%f38,%f4,%f6,%f4
928	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
929	.word	0x8cca4986 !camellia_f	%f40,%f6,%f4,%f6
930	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
931	.word	0x88cacd84 !camellia_f	%f42,%f4,%f6,%f4
932	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
933	.word	0x89b36784 !camellia_fl	%f44,%f4,%f4
934	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
935	.word	0x8db3e7a6 !camellia_fli	%f46,%f6,%f6
936	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
937	.word	0x8ccc4986 !camellia_f	%f48,%f6,%f4,%f6
938	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
939	.word	0x88cccd84 !camellia_f	%f50,%f4,%f6,%f4
940	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
941	.word	0x8ccd4986 !camellia_f	%f52,%f6,%f4,%f6
942	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
943	.word	0x88cdcd84 !camellia_f	%f54,%f4,%f6,%f4
944	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
945	.word	0x8cce4986 !camellia_f	%f56,%f6,%f4,%f6
946	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
947	.word	0x88cecd84 !camellia_f	%f58,%f4,%f6,%f4
948	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
949	.word	0x89b76784 !camellia_fl	%f60,%f4,%f4
950	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
951	.word	0x8db7e7a6 !camellia_fli	%f62,%f6,%f6
952	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
953	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
954	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
955	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
956	ldd		[%i3 + 184], %f16
957	ldd		[%i3 + 176], %f18
958	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
959	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
960	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
961	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
962	ldd		[%i3 + 168], %f20
963	ldd		[%i3 + 160], %f22
964	.word	0x90ce0182 !camellia_f	%f24,%f2,%f0,%f8
965	.word	0x94ce0986 !camellia_f	%f24,%f6,%f4,%f10
966	.word	0x84ce9180 !camellia_f	%f26,%f0,%f8,%f2
967	.word	0x8cce9584 !camellia_f	%f26,%f4,%f10,%f6
968	ldd		[%i3 + 152], %f24
969	ldd		[%i3 + 144], %f26
970	.word	0x81b78d88 !fxor	%f30,%f8,%f0
971	.word	0x89b78d8a !fxor	%f30,%f10,%f4
972	.word	0x85b70d82 !fxor	%f28,%f2,%f2
973	.word	0x8db70d86 !fxor	%f28,%f6,%f6
974	ldd		[%i3 + 136], %f28
975	retl
976	ldd		[%i3 + 128], %f30
977.type	_cmll256_decrypt_2x,#function
978.size	_cmll256_decrypt_2x,.-_cmll256_decrypt_2x
979.globl	cmll128_t4_cbc_encrypt
980.align	32
981cmll128_t4_cbc_encrypt:
982	save		%sp, -STACK_FRAME, %sp
983	cmp		%i2, 0
984	be,pn		SIZE_T_CC, .L128_cbc_enc_abort
985	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
986	sub		%i0, %i1, %l5	! %i0!=%i1
987	ld		[%i4 + 0], %f0
988	ld		[%i4 + 4], %f1
989	ld		[%i4 + 8], %f2
990	ld		[%i4 + 12], %f3
991	prefetch	[%i0], 20
992	prefetch	[%i0 + 63], 20
993	call		_cmll128_load_enckey
994	and		%i0, 7, %l0
995	andn		%i0, 7, %i0
996	sll		%l0, 3, %l0
997	mov		64, %l1
998	mov		0xff, %l3
999	sub		%l1, %l0, %l1
1000	and		%i1, 7, %l2
1001	cmp		%i2, 127
1002	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1003	movleu		SIZE_T_CC, 0, %l5	!	%i2<128 ||
1004	brnz,pn		%l5, .L128cbc_enc_blk	!	%i0==%i1)
1005	srl		%l3, %l2, %l3
1006
1007	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1008	srlx		%i2, 4, %i2
1009	prefetch	[%i1], 22
1010
1011.L128_cbc_enc_loop:
1012	ldx		[%i0 + 0], %o0
1013	brz,pt		%l0, 4f
1014	ldx		[%i0 + 8], %o1
1015
1016	ldx		[%i0 + 16], %o2
1017	sllx		%o0, %l0, %o0
1018	srlx		%o1, %l1, %g1
1019	sllx		%o1, %l0, %o1
1020	or		%g1, %o0, %o0
1021	srlx		%o2, %l1, %o2
1022	or		%o2, %o1, %o1
10234:
1024	xor		%g4, %o0, %o0		! ^= rk[0]
1025	xor		%g5, %o1, %o1
1026	.word	0x99b02308 !movxtod	%o0,%f12
1027	.word	0x9db02309 !movxtod	%o1,%f14
1028
1029	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1030	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1031	prefetch	[%i1 + 63], 22
1032	prefetch	[%i0 + 16+63], 20
1033	call		_cmll128_encrypt_1x
1034	add		%i0, 16, %i0
1035
1036	brnz,pn		%l2, 2f
1037	sub		%i2, 1, %i2
1038
1039	std		%f0, [%i1 + 0]
1040	std		%f2, [%i1 + 8]
1041	brnz,pt		%i2, .L128_cbc_enc_loop
1042	add		%i1, 16, %i1
1043	st		%f0, [%i4 + 0]
1044	st		%f1, [%i4 + 4]
1045	st		%f2, [%i4 + 8]
1046	st		%f3, [%i4 + 12]
1047.L128_cbc_enc_abort:
1048	ret
1049	restore
1050
1051.align	16
10522:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1053						! and ~3x deterioration
1054						! in inp==out case
1055	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1056	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1057	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1058
1059	stda		%f4, [%i1 + %l3]0xc0	! partial store
1060	std		%f6, [%i1 + 8]
1061	add		%i1, 16, %i1
1062	orn		%g0, %l3, %l3
1063	stda		%f8, [%i1 + %l3]0xc0	! partial store
1064
1065	brnz,pt		%i2, .L128_cbc_enc_loop+4
1066	orn		%g0, %l3, %l3
1067	st		%f0, [%i4 + 0]
1068	st		%f1, [%i4 + 4]
1069	st		%f2, [%i4 + 8]
1070	st		%f3, [%i4 + 12]
1071	ret
1072	restore
1073
1074!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1075.align	32
1076.L128cbc_enc_blk:
1077	add	%i1, %i2, %l5
1078	and	%l5, 63, %l5	! tail
1079	sub	%i2, %l5, %i2
1080	add	%l5, 15, %l5	! round up to 16n
1081	srlx	%i2, 4, %i2
1082	srl	%l5, 4, %l5
1083
1084.L128_cbc_enc_blk_loop:
1085	ldx		[%i0 + 0], %o0
1086	brz,pt		%l0, 5f
1087	ldx		[%i0 + 8], %o1
1088
1089	ldx		[%i0 + 16], %o2
1090	sllx		%o0, %l0, %o0
1091	srlx		%o1, %l1, %g1
1092	sllx		%o1, %l0, %o1
1093	or		%g1, %o0, %o0
1094	srlx		%o2, %l1, %o2
1095	or		%o2, %o1, %o1
10965:
1097	xor		%g4, %o0, %o0		! ^= rk[0]
1098	xor		%g5, %o1, %o1
1099	.word	0x99b02308 !movxtod	%o0,%f12
1100	.word	0x9db02309 !movxtod	%o1,%f14
1101
1102	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1103	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1104	prefetch	[%i0 + 16+63], 20
1105	call		_cmll128_encrypt_1x
1106	add		%i0, 16, %i0
1107	sub		%i2, 1, %i2
1108
1109	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1110	add		%i1, 8, %i1
1111	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1112	brnz,pt		%i2, .L128_cbc_enc_blk_loop
1113	add		%i1, 8, %i1
1114
1115	membar		#StoreLoad|#StoreStore
1116	brnz,pt		%l5, .L128_cbc_enc_loop
1117	mov		%l5, %i2
1118	st		%f0, [%i4 + 0]
1119	st		%f1, [%i4 + 4]
1120	st		%f2, [%i4 + 8]
1121	st		%f3, [%i4 + 12]
1122	ret
1123	restore
1124.type	cmll128_t4_cbc_encrypt,#function
1125.size	cmll128_t4_cbc_encrypt,.-cmll128_t4_cbc_encrypt
1126.globl	cmll256_t4_cbc_encrypt
1127.align	32
1128cmll256_t4_cbc_encrypt:
1129	save		%sp, -STACK_FRAME, %sp
1130	cmp		%i2, 0
1131	be,pn		SIZE_T_CC, .L256_cbc_enc_abort
1132	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1133	sub		%i0, %i1, %l5	! %i0!=%i1
1134	ld		[%i4 + 0], %f0
1135	ld		[%i4 + 4], %f1
1136	ld		[%i4 + 8], %f2
1137	ld		[%i4 + 12], %f3
1138	prefetch	[%i0], 20
1139	prefetch	[%i0 + 63], 20
1140	call		_cmll256_load_enckey
1141	and		%i0, 7, %l0
1142	andn		%i0, 7, %i0
1143	sll		%l0, 3, %l0
1144	mov		64, %l1
1145	mov		0xff, %l3
1146	sub		%l1, %l0, %l1
1147	and		%i1, 7, %l2
1148	cmp		%i2, 127
1149	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1150	movleu		SIZE_T_CC, 0, %l5	!	%i2<128 ||
1151	brnz,pn		%l5, .L256cbc_enc_blk	!	%i0==%i1)
1152	srl		%l3, %l2, %l3
1153
1154	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1155	srlx		%i2, 4, %i2
1156	prefetch	[%i1], 22
1157
1158.L256_cbc_enc_loop:
1159	ldx		[%i0 + 0], %o0
1160	brz,pt		%l0, 4f
1161	ldx		[%i0 + 8], %o1
1162
1163	ldx		[%i0 + 16], %o2
1164	sllx		%o0, %l0, %o0
1165	srlx		%o1, %l1, %g1
1166	sllx		%o1, %l0, %o1
1167	or		%g1, %o0, %o0
1168	srlx		%o2, %l1, %o2
1169	or		%o2, %o1, %o1
11704:
1171	xor		%g4, %o0, %o0		! ^= rk[0]
1172	xor		%g5, %o1, %o1
1173	.word	0x99b02308 !movxtod	%o0,%f12
1174	.word	0x9db02309 !movxtod	%o1,%f14
1175
1176	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1177	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1178	prefetch	[%i1 + 63], 22
1179	prefetch	[%i0 + 16+63], 20
1180	call		_cmll256_encrypt_1x
1181	add		%i0, 16, %i0
1182
1183	brnz,pn		%l2, 2f
1184	sub		%i2, 1, %i2
1185
1186	std		%f0, [%i1 + 0]
1187	std		%f2, [%i1 + 8]
1188	brnz,pt		%i2, .L256_cbc_enc_loop
1189	add		%i1, 16, %i1
1190	st		%f0, [%i4 + 0]
1191	st		%f1, [%i4 + 4]
1192	st		%f2, [%i4 + 8]
1193	st		%f3, [%i4 + 12]
1194.L256_cbc_enc_abort:
1195	ret
1196	restore
1197
1198.align	16
11992:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1200						! and ~3x deterioration
1201						! in inp==out case
1202	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1203	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1204	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1205
1206	stda		%f4, [%i1 + %l3]0xc0	! partial store
1207	std		%f6, [%i1 + 8]
1208	add		%i1, 16, %i1
1209	orn		%g0, %l3, %l3
1210	stda		%f8, [%i1 + %l3]0xc0	! partial store
1211
1212	brnz,pt		%i2, .L256_cbc_enc_loop+4
1213	orn		%g0, %l3, %l3
1214	st		%f0, [%i4 + 0]
1215	st		%f1, [%i4 + 4]
1216	st		%f2, [%i4 + 8]
1217	st		%f3, [%i4 + 12]
1218	ret
1219	restore
1220
1221!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1222.align	32
1223.L256cbc_enc_blk:
1224	add	%i1, %i2, %l5
1225	and	%l5, 63, %l5	! tail
1226	sub	%i2, %l5, %i2
1227	add	%l5, 15, %l5	! round up to 16n
1228	srlx	%i2, 4, %i2
1229	srl	%l5, 4, %l5
1230
1231.L256_cbc_enc_blk_loop:
1232	ldx		[%i0 + 0], %o0
1233	brz,pt		%l0, 5f
1234	ldx		[%i0 + 8], %o1
1235
1236	ldx		[%i0 + 16], %o2
1237	sllx		%o0, %l0, %o0
1238	srlx		%o1, %l1, %g1
1239	sllx		%o1, %l0, %o1
1240	or		%g1, %o0, %o0
1241	srlx		%o2, %l1, %o2
1242	or		%o2, %o1, %o1
12435:
1244	xor		%g4, %o0, %o0		! ^= rk[0]
1245	xor		%g5, %o1, %o1
1246	.word	0x99b02308 !movxtod	%o0,%f12
1247	.word	0x9db02309 !movxtod	%o1,%f14
1248
1249	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1250	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1251	prefetch	[%i0 + 16+63], 20
1252	call		_cmll256_encrypt_1x
1253	add		%i0, 16, %i0
1254	sub		%i2, 1, %i2
1255
1256	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1257	add		%i1, 8, %i1
1258	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1259	brnz,pt		%i2, .L256_cbc_enc_blk_loop
1260	add		%i1, 8, %i1
1261
1262	membar		#StoreLoad|#StoreStore
1263	brnz,pt		%l5, .L256_cbc_enc_loop
1264	mov		%l5, %i2
1265	st		%f0, [%i4 + 0]
1266	st		%f1, [%i4 + 4]
1267	st		%f2, [%i4 + 8]
1268	st		%f3, [%i4 + 12]
1269	ret
1270	restore
1271.type	cmll256_t4_cbc_encrypt,#function
1272.size	cmll256_t4_cbc_encrypt,.-cmll256_t4_cbc_encrypt
1273.globl	cmll128_t4_cbc_decrypt
1274.align	32
1275cmll128_t4_cbc_decrypt:
1276	save		%sp, -STACK_FRAME, %sp
1277	cmp		%i2, 0
1278	be,pn		SIZE_T_CC, .L128_cbc_dec_abort
1279	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1280	sub		%i0, %i1, %l5	! %i0!=%i1
1281	ld		[%i4 + 0], %f12	! load ivec
1282	ld		[%i4 + 4], %f13
1283	ld		[%i4 + 8], %f14
1284	ld		[%i4 + 12], %f15
1285	prefetch	[%i0], 20
1286	prefetch	[%i0 + 63], 20
1287	call		_cmll128_load_deckey
1288	and		%i0, 7, %l0
1289	andn		%i0, 7, %i0
1290	sll		%l0, 3, %l0
1291	mov		64, %l1
1292	mov		0xff, %l3
1293	sub		%l1, %l0, %l1
1294	and		%i1, 7, %l2
1295	cmp		%i2, 255
1296	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1297	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
1298	brnz,pn		%l5, .L128cbc_dec_blk	!	%i0==%i1)
1299	srl		%l3, %l2, %l3
1300
1301	andcc		%i2, 16, %g0		! is number of blocks even?
1302	srlx		%i2, 4, %i2
1303	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1304	bz		%icc, .L128_cbc_dec_loop2x
1305	prefetch	[%i1], 22
1306.L128_cbc_dec_loop:
1307	ldx		[%i0 + 0], %o0
1308	brz,pt		%l0, 4f
1309	ldx		[%i0 + 8], %o1
1310
1311	ldx		[%i0 + 16], %o2
1312	sllx		%o0, %l0, %o0
1313	srlx		%o1, %l1, %g1
1314	sllx		%o1, %l0, %o1
1315	or		%g1, %o0, %o0
1316	srlx		%o2, %l1, %o2
1317	or		%o2, %o1, %o1
13184:
1319	xor		%g4, %o0, %o2		! ^= rk[0]
1320	xor		%g5, %o1, %o3
1321	.word	0x81b0230a !movxtod	%o2,%f0
1322	.word	0x85b0230b !movxtod	%o3,%f2
1323
1324	prefetch	[%i1 + 63], 22
1325	prefetch	[%i0 + 16+63], 20
1326	call		_cmll128_decrypt_1x
1327	add		%i0, 16, %i0
1328
1329	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1330	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1331	.word	0x99b02308 !movxtod	%o0,%f12
1332	.word	0x9db02309 !movxtod	%o1,%f14
1333
1334	brnz,pn		%l2, 2f
1335	sub		%i2, 1, %i2
1336
1337	std		%f0, [%i1 + 0]
1338	std		%f2, [%i1 + 8]
1339	brnz,pt		%i2, .L128_cbc_dec_loop2x
1340	add		%i1, 16, %i1
1341	st		%f12, [%i4 + 0]
1342	st		%f13, [%i4 + 4]
1343	st		%f14, [%i4 + 8]
1344	st		%f15, [%i4 + 12]
1345.L128_cbc_dec_abort:
1346	ret
1347	restore
1348
1349.align	16
13502:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1351						! and ~3x deterioration
1352						! in inp==out case
1353	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1354	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1355	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1356
1357	stda		%f4, [%i1 + %l3]0xc0	! partial store
1358	std		%f6, [%i1 + 8]
1359	add		%i1, 16, %i1
1360	orn		%g0, %l3, %l3
1361	stda		%f8, [%i1 + %l3]0xc0	! partial store
1362
1363	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1364	orn		%g0, %l3, %l3
1365	st		%f12, [%i4 + 0]
1366	st		%f13, [%i4 + 4]
1367	st		%f14, [%i4 + 8]
1368	st		%f15, [%i4 + 12]
1369	ret
1370	restore
1371
1372!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1373.align	32
1374.L128_cbc_dec_loop2x:
1375	ldx		[%i0 + 0], %o0
1376	ldx		[%i0 + 8], %o1
1377	ldx		[%i0 + 16], %o2
1378	brz,pt		%l0, 4f
1379	ldx		[%i0 + 24], %o3
1380
1381	ldx		[%i0 + 32], %o4
1382	sllx		%o0, %l0, %o0
1383	srlx		%o1, %l1, %g1
1384	or		%g1, %o0, %o0
1385	sllx		%o1, %l0, %o1
1386	srlx		%o2, %l1, %g1
1387	or		%g1, %o1, %o1
1388	sllx		%o2, %l0, %o2
1389	srlx		%o3, %l1, %g1
1390	or		%g1, %o2, %o2
1391	sllx		%o3, %l0, %o3
1392	srlx		%o4, %l1, %o4
1393	or		%o4, %o3, %o3
13944:
1395	xor		%g4, %o0, %o4		! ^= rk[0]
1396	xor		%g5, %o1, %o5
1397	.word	0x81b0230c !movxtod	%o4,%f0
1398	.word	0x85b0230d !movxtod	%o5,%f2
1399	xor		%g4, %o2, %o4
1400	xor		%g5, %o3, %o5
1401	.word	0x89b0230c !movxtod	%o4,%f4
1402	.word	0x8db0230d !movxtod	%o5,%f6
1403
1404	prefetch	[%i1 + 63], 22
1405	prefetch	[%i0 + 32+63], 20
1406	call		_cmll128_decrypt_2x
1407	add		%i0, 32, %i0
1408
1409	.word	0x91b02308 !movxtod	%o0,%f8
1410	.word	0x95b02309 !movxtod	%o1,%f10
1411	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1412	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1413	.word	0x99b0230a !movxtod	%o2,%f12
1414	.word	0x9db0230b !movxtod	%o3,%f14
1415	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1416	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1417
1418	brnz,pn		%l2, 2f
1419	sub		%i2, 2, %i2
1420
1421	std		%f0, [%i1 + 0]
1422	std		%f2, [%i1 + 8]
1423	std		%f4, [%i1 + 16]
1424	std		%f6, [%i1 + 24]
1425	brnz,pt		%i2, .L128_cbc_dec_loop2x
1426	add		%i1, 32, %i1
1427	st		%f12, [%i4 + 0]
1428	st		%f13, [%i4 + 4]
1429	st		%f14, [%i4 + 8]
1430	st		%f15, [%i4 + 12]
1431	ret
1432	restore
1433
1434.align	16
14352:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1436						! and ~3x deterioration
1437						! in inp==out case
1438	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1439	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1440	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1441	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1442	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1443	stda		%f8, [%i1 + %l3]0xc0	! partial store
1444	std		%f0, [%i1 + 8]
1445	std		%f2, [%i1 + 16]
1446	std		%f4, [%i1 + 24]
1447	add		%i1, 32, %i1
1448	orn		%g0, %l3, %l3
1449	stda		%f6, [%i1 + %l3]0xc0	! partial store
1450
1451	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1452	orn		%g0, %l3, %l3
1453	st		%f12, [%i4 + 0]
1454	st		%f13, [%i4 + 4]
1455	st		%f14, [%i4 + 8]
1456	st		%f15, [%i4 + 12]
1457	ret
1458	restore
1459
1460!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1461.align	32
1462.L128cbc_dec_blk:
1463	add	%i1, %i2, %l5
1464	and	%l5, 63, %l5	! tail
1465	sub	%i2, %l5, %i2
1466	add	%l5, 15, %l5	! round up to 16n
1467	srlx	%i2, 4, %i2
1468	srl	%l5, 4, %l5
1469	sub	%i2, 1, %i2
1470	add	%l5, 1, %l5
1471
1472.L128_cbc_dec_blk_loop2x:
1473	ldx		[%i0 + 0], %o0
1474	ldx		[%i0 + 8], %o1
1475	ldx		[%i0 + 16], %o2
1476	brz,pt		%l0, 5f
1477	ldx		[%i0 + 24], %o3
1478
1479	ldx		[%i0 + 32], %o4
1480	sllx		%o0, %l0, %o0
1481	srlx		%o1, %l1, %g1
1482	or		%g1, %o0, %o0
1483	sllx		%o1, %l0, %o1
1484	srlx		%o2, %l1, %g1
1485	or		%g1, %o1, %o1
1486	sllx		%o2, %l0, %o2
1487	srlx		%o3, %l1, %g1
1488	or		%g1, %o2, %o2
1489	sllx		%o3, %l0, %o3
1490	srlx		%o4, %l1, %o4
1491	or		%o4, %o3, %o3
14925:
1493	xor		%g4, %o0, %o4		! ^= rk[0]
1494	xor		%g5, %o1, %o5
1495	.word	0x81b0230c !movxtod	%o4,%f0
1496	.word	0x85b0230d !movxtod	%o5,%f2
1497	xor		%g4, %o2, %o4
1498	xor		%g5, %o3, %o5
1499	.word	0x89b0230c !movxtod	%o4,%f4
1500	.word	0x8db0230d !movxtod	%o5,%f6
1501
1502	prefetch	[%i0 + 32+63], 20
1503	call		_cmll128_decrypt_2x
1504	add		%i0, 32, %i0
1505	subcc		%i2, 2, %i2
1506
1507	.word	0x91b02308 !movxtod	%o0,%f8
1508	.word	0x95b02309 !movxtod	%o1,%f10
1509	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1510	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1511	.word	0x99b0230a !movxtod	%o2,%f12
1512	.word	0x9db0230b !movxtod	%o3,%f14
1513	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1514	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1515
1516	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1517	add		%i1, 8, %i1
1518	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1519	add		%i1, 8, %i1
1520	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1521	add		%i1, 8, %i1
1522	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1523	bgu,pt		SIZE_T_CC, .L128_cbc_dec_blk_loop2x
1524	add		%i1, 8, %i1
1525
1526	add		%l5, %i2, %i2
1527	andcc		%i2, 1, %g0		! is number of blocks even?
1528	membar		#StoreLoad|#StoreStore
1529	bnz,pt		%icc, .L128_cbc_dec_loop
1530	srl		%i2, 0, %i2
1531	brnz,pn		%i2, .L128_cbc_dec_loop2x
1532	nop
1533	st		%f12, [%i4 + 0]	! write out ivec
1534	st		%f13, [%i4 + 4]
1535	st		%f14, [%i4 + 8]
1536	st		%f15, [%i4 + 12]
1537	ret
1538	restore
1539.type	cmll128_t4_cbc_decrypt,#function
1540.size	cmll128_t4_cbc_decrypt,.-cmll128_t4_cbc_decrypt
1541.globl	cmll256_t4_cbc_decrypt
1542.align	32
1543cmll256_t4_cbc_decrypt:
1544	save		%sp, -STACK_FRAME, %sp
1545	cmp		%i2, 0
1546	be,pn		SIZE_T_CC, .L256_cbc_dec_abort
1547	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1548	sub		%i0, %i1, %l5	! %i0!=%i1
1549	ld		[%i4 + 0], %f12	! load ivec
1550	ld		[%i4 + 4], %f13
1551	ld		[%i4 + 8], %f14
1552	ld		[%i4 + 12], %f15
1553	prefetch	[%i0], 20
1554	prefetch	[%i0 + 63], 20
1555	call		_cmll256_load_deckey
1556	and		%i0, 7, %l0
1557	andn		%i0, 7, %i0
1558	sll		%l0, 3, %l0
1559	mov		64, %l1
1560	mov		0xff, %l3
1561	sub		%l1, %l0, %l1
1562	and		%i1, 7, %l2
1563	cmp		%i2, 255
1564	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1565	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
1566	brnz,pn		%l5, .L256cbc_dec_blk	!	%i0==%i1)
1567	srl		%l3, %l2, %l3
1568
1569	andcc		%i2, 16, %g0		! is number of blocks even?
1570	srlx		%i2, 4, %i2
1571	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1572	bz		%icc, .L256_cbc_dec_loop2x
1573	prefetch	[%i1], 22
1574.L256_cbc_dec_loop:
1575	ldx		[%i0 + 0], %o0
1576	brz,pt		%l0, 4f
1577	ldx		[%i0 + 8], %o1
1578
1579	ldx		[%i0 + 16], %o2
1580	sllx		%o0, %l0, %o0
1581	srlx		%o1, %l1, %g1
1582	sllx		%o1, %l0, %o1
1583	or		%g1, %o0, %o0
1584	srlx		%o2, %l1, %o2
1585	or		%o2, %o1, %o1
15864:
1587	xor		%g4, %o0, %o2		! ^= rk[0]
1588	xor		%g5, %o1, %o3
1589	.word	0x81b0230a !movxtod	%o2,%f0
1590	.word	0x85b0230b !movxtod	%o3,%f2
1591
1592	prefetch	[%i1 + 63], 22
1593	prefetch	[%i0 + 16+63], 20
1594	call		_cmll256_decrypt_1x
1595	add		%i0, 16, %i0
1596
1597	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1598	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1599	.word	0x99b02308 !movxtod	%o0,%f12
1600	.word	0x9db02309 !movxtod	%o1,%f14
1601
1602	brnz,pn		%l2, 2f
1603	sub		%i2, 1, %i2
1604
1605	std		%f0, [%i1 + 0]
1606	std		%f2, [%i1 + 8]
1607	brnz,pt		%i2, .L256_cbc_dec_loop2x
1608	add		%i1, 16, %i1
1609	st		%f12, [%i4 + 0]
1610	st		%f13, [%i4 + 4]
1611	st		%f14, [%i4 + 8]
1612	st		%f15, [%i4 + 12]
1613.L256_cbc_dec_abort:
1614	ret
1615	restore
1616
1617.align	16
16182:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1619						! and ~3x deterioration
1620						! in inp==out case
1621	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1622	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1623	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1624
1625	stda		%f4, [%i1 + %l3]0xc0	! partial store
1626	std		%f6, [%i1 + 8]
1627	add		%i1, 16, %i1
1628	orn		%g0, %l3, %l3
1629	stda		%f8, [%i1 + %l3]0xc0	! partial store
1630
1631	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
1632	orn		%g0, %l3, %l3
1633	st		%f12, [%i4 + 0]
1634	st		%f13, [%i4 + 4]
1635	st		%f14, [%i4 + 8]
1636	st		%f15, [%i4 + 12]
1637	ret
1638	restore
1639
1640!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1641.align	32
1642.L256_cbc_dec_loop2x:
1643	ldx		[%i0 + 0], %o0
1644	ldx		[%i0 + 8], %o1
1645	ldx		[%i0 + 16], %o2
1646	brz,pt		%l0, 4f
1647	ldx		[%i0 + 24], %o3
1648
1649	ldx		[%i0 + 32], %o4
1650	sllx		%o0, %l0, %o0
1651	srlx		%o1, %l1, %g1
1652	or		%g1, %o0, %o0
1653	sllx		%o1, %l0, %o1
1654	srlx		%o2, %l1, %g1
1655	or		%g1, %o1, %o1
1656	sllx		%o2, %l0, %o2
1657	srlx		%o3, %l1, %g1
1658	or		%g1, %o2, %o2
1659	sllx		%o3, %l0, %o3
1660	srlx		%o4, %l1, %o4
1661	or		%o4, %o3, %o3
16624:
1663	xor		%g4, %o0, %o4		! ^= rk[0]
1664	xor		%g5, %o1, %o5
1665	.word	0x81b0230c !movxtod	%o4,%f0
1666	.word	0x85b0230d !movxtod	%o5,%f2
1667	xor		%g4, %o2, %o4
1668	xor		%g5, %o3, %o5
1669	.word	0x89b0230c !movxtod	%o4,%f4
1670	.word	0x8db0230d !movxtod	%o5,%f6
1671
1672	prefetch	[%i1 + 63], 22
1673	prefetch	[%i0 + 32+63], 20
1674	call		_cmll256_decrypt_2x
1675	add		%i0, 32, %i0
1676
1677	.word	0x91b02308 !movxtod	%o0,%f8
1678	.word	0x95b02309 !movxtod	%o1,%f10
1679	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1680	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1681	.word	0x99b0230a !movxtod	%o2,%f12
1682	.word	0x9db0230b !movxtod	%o3,%f14
1683	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1684	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1685
1686	brnz,pn		%l2, 2f
1687	sub		%i2, 2, %i2
1688
1689	std		%f0, [%i1 + 0]
1690	std		%f2, [%i1 + 8]
1691	std		%f4, [%i1 + 16]
1692	std		%f6, [%i1 + 24]
1693	brnz,pt		%i2, .L256_cbc_dec_loop2x
1694	add		%i1, 32, %i1
1695	st		%f12, [%i4 + 0]
1696	st		%f13, [%i4 + 4]
1697	st		%f14, [%i4 + 8]
1698	st		%f15, [%i4 + 12]
1699	ret
1700	restore
1701
1702.align	16
17032:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1704						! and ~3x deterioration
1705						! in inp==out case
1706	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1707	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1708	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1709	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1710	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1711	stda		%f8, [%i1 + %l3]0xc0	! partial store
1712	std		%f0, [%i1 + 8]
1713	std		%f2, [%i1 + 16]
1714	std		%f4, [%i1 + 24]
1715	add		%i1, 32, %i1
1716	orn		%g0, %l3, %l3
1717	stda		%f6, [%i1 + %l3]0xc0	! partial store
1718
1719	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
1720	orn		%g0, %l3, %l3
1721	st		%f12, [%i4 + 0]
1722	st		%f13, [%i4 + 4]
1723	st		%f14, [%i4 + 8]
1724	st		%f15, [%i4 + 12]
1725	ret
1726	restore
1727
1728!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1729.align	32
1730.L256cbc_dec_blk:
1731	add	%i1, %i2, %l5
1732	and	%l5, 63, %l5	! tail
1733	sub	%i2, %l5, %i2
1734	add	%l5, 15, %l5	! round up to 16n
1735	srlx	%i2, 4, %i2
1736	srl	%l5, 4, %l5
1737	sub	%i2, 1, %i2
1738	add	%l5, 1, %l5
1739
1740.L256_cbc_dec_blk_loop2x:
1741	ldx		[%i0 + 0], %o0
1742	ldx		[%i0 + 8], %o1
1743	ldx		[%i0 + 16], %o2
1744	brz,pt		%l0, 5f
1745	ldx		[%i0 + 24], %o3
1746
1747	ldx		[%i0 + 32], %o4
1748	sllx		%o0, %l0, %o0
1749	srlx		%o1, %l1, %g1
1750	or		%g1, %o0, %o0
1751	sllx		%o1, %l0, %o1
1752	srlx		%o2, %l1, %g1
1753	or		%g1, %o1, %o1
1754	sllx		%o2, %l0, %o2
1755	srlx		%o3, %l1, %g1
1756	or		%g1, %o2, %o2
1757	sllx		%o3, %l0, %o3
1758	srlx		%o4, %l1, %o4
1759	or		%o4, %o3, %o3
17605:
1761	xor		%g4, %o0, %o4		! ^= rk[0]
1762	xor		%g5, %o1, %o5
1763	.word	0x81b0230c !movxtod	%o4,%f0
1764	.word	0x85b0230d !movxtod	%o5,%f2
1765	xor		%g4, %o2, %o4
1766	xor		%g5, %o3, %o5
1767	.word	0x89b0230c !movxtod	%o4,%f4
1768	.word	0x8db0230d !movxtod	%o5,%f6
1769
1770	prefetch	[%i0 + 32+63], 20
1771	call		_cmll256_decrypt_2x
1772	add		%i0, 32, %i0
1773	subcc		%i2, 2, %i2
1774
1775	.word	0x91b02308 !movxtod	%o0,%f8
1776	.word	0x95b02309 !movxtod	%o1,%f10
1777	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1778	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1779	.word	0x99b0230a !movxtod	%o2,%f12
1780	.word	0x9db0230b !movxtod	%o3,%f14
1781	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1782	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1783
1784	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1785	add		%i1, 8, %i1
1786	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1787	add		%i1, 8, %i1
1788	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1789	add		%i1, 8, %i1
1790	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1791	bgu,pt		SIZE_T_CC, .L256_cbc_dec_blk_loop2x
1792	add		%i1, 8, %i1
1793
1794	add		%l5, %i2, %i2
1795	andcc		%i2, 1, %g0		! is number of blocks even?
1796	membar		#StoreLoad|#StoreStore
1797	bnz,pt		%icc, .L256_cbc_dec_loop
1798	srl		%i2, 0, %i2
1799	brnz,pn		%i2, .L256_cbc_dec_loop2x
1800	nop
1801	st		%f12, [%i4 + 0]	! write out ivec
1802	st		%f13, [%i4 + 4]
1803	st		%f14, [%i4 + 8]
1804	st		%f15, [%i4 + 12]
1805	ret
1806	restore
1807.type	cmll256_t4_cbc_decrypt,#function
1808.size	cmll256_t4_cbc_decrypt,.-cmll256_t4_cbc_decrypt
1809.globl	cmll128_t4_ctr32_encrypt
1810.align	32
1811cmll128_t4_ctr32_encrypt:
1812	save		%sp, -STACK_FRAME, %sp
1813	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1814
1815	prefetch	[%i0], 20
1816	prefetch	[%i0 + 63], 20
1817	call		_cmll128_load_enckey
1818	sllx		%i2, 4, %i2
1819
1820	ld		[%i4 + 0], %l4	! counter
1821	ld		[%i4 + 4], %l5
1822	ld		[%i4 + 8], %l6
1823	ld		[%i4 + 12], %l7
1824
1825	sllx		%l4, 32, %o5
1826	or		%l5, %o5, %o5
1827	sllx		%l6, 32, %g1
1828	xor		%o5, %g4, %g4		! ^= rk[0]
1829	xor		%g1, %g5, %g5
1830	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
1831
1832	sub		%i0, %i1, %l5	! %i0!=%i1
1833	and		%i0, 7, %l0
1834	andn		%i0, 7, %i0
1835	sll		%l0, 3, %l0
1836	mov		64, %l1
1837	mov		0xff, %l3
1838	sub		%l1, %l0, %l1
1839	and		%i1, 7, %l2
1840	cmp		%i2, 255
1841	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1842	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
1843	brnz,pn		%l5, .L128_ctr32_blk	!	%i0==%i1)
1844	srl		%l3, %l2, %l3
1845
1846	andcc		%i2, 16, %g0		! is number of blocks even?
1847	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1848	bz		%icc, .L128_ctr32_loop2x
1849	srlx		%i2, 4, %i2
1850.L128_ctr32_loop:
1851	ldx		[%i0 + 0], %o0
1852	brz,pt		%l0, 4f
1853	ldx		[%i0 + 8], %o1
1854
1855	ldx		[%i0 + 16], %o2
1856	sllx		%o0, %l0, %o0
1857	srlx		%o1, %l1, %g1
1858	sllx		%o1, %l0, %o1
1859	or		%g1, %o0, %o0
1860	srlx		%o2, %l1, %o2
1861	or		%o2, %o1, %o1
18624:
1863	xor		%g5, %l7, %g1		! ^= rk[0]
1864	add		%l7, 1, %l7
1865	.word	0x85b02301 !movxtod	%g1,%f2
1866	srl		%l7, 0, %l7		! clruw
1867	prefetch	[%i1 + 63], 22
1868	prefetch	[%i0 + 16+63], 20
1869	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
1870	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
1871	call		_cmll128_encrypt_1x+8
1872	add		%i0, 16, %i0
1873
1874	.word	0x95b02308 !movxtod	%o0,%f10
1875	.word	0x99b02309 !movxtod	%o1,%f12
1876	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
1877	.word	0x85b30d82 !fxor	%f12,%f2,%f2
1878
1879	brnz,pn		%l2, 2f
1880	sub		%i2, 1, %i2
1881
1882	std		%f0, [%i1 + 0]
1883	std		%f2, [%i1 + 8]
1884	brnz,pt		%i2, .L128_ctr32_loop2x
1885	add		%i1, 16, %i1
1886
1887	ret
1888	restore
1889
1890.align	16
18912:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1892						! and ~3x deterioration
1893						! in inp==out case
1894	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1895	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1896	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1897	stda		%f4, [%i1 + %l3]0xc0	! partial store
1898	std		%f6, [%i1 + 8]
1899	add		%i1, 16, %i1
1900	orn		%g0, %l3, %l3
1901	stda		%f8, [%i1 + %l3]0xc0	! partial store
1902
1903	brnz,pt		%i2, .L128_ctr32_loop2x+4
1904	orn		%g0, %l3, %l3
1905
1906	ret
1907	restore
1908
1909!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1910.align	32
1911.L128_ctr32_loop2x:
1912	ldx		[%i0 + 0], %o0
1913	ldx		[%i0 + 8], %o1
1914	ldx		[%i0 + 16], %o2
1915	brz,pt		%l0, 4f
1916	ldx		[%i0 + 24], %o3
1917
1918	ldx		[%i0 + 32], %o4
1919	sllx		%o0, %l0, %o0
1920	srlx		%o1, %l1, %g1
1921	or		%g1, %o0, %o0
1922	sllx		%o1, %l0, %o1
1923	srlx		%o2, %l1, %g1
1924	or		%g1, %o1, %o1
1925	sllx		%o2, %l0, %o2
1926	srlx		%o3, %l1, %g1
1927	or		%g1, %o2, %o2
1928	sllx		%o3, %l0, %o3
1929	srlx		%o4, %l1, %o4
1930	or		%o4, %o3, %o3
19314:
1932	xor		%g5, %l7, %g1		! ^= rk[0]
1933	add		%l7, 1, %l7
1934	.word	0x85b02301 !movxtod	%g1,%f2
1935	srl		%l7, 0, %l7		! clruw
1936	xor		%g5, %l7, %g1
1937	add		%l7, 1, %l7
1938	.word	0x8db02301 !movxtod	%g1,%f6
1939	srl		%l7, 0, %l7		! clruw
1940	prefetch	[%i1 + 63], 22
1941	prefetch	[%i0 + 32+63], 20
1942	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
1943	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
1944	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
1945	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
1946	call		_cmll128_encrypt_2x+16
1947	add		%i0, 32, %i0
1948
1949	.word	0x91b02308 !movxtod	%o0,%f8
1950	.word	0x95b02309 !movxtod	%o1,%f10
1951	.word	0x99b0230a !movxtod	%o2,%f12
1952	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
1953	.word	0x91b0230b !movxtod	%o3,%f8
1954	.word	0x85b28d82 !fxor	%f10,%f2,%f2
1955	.word	0x89b30d84 !fxor	%f12,%f4,%f4
1956	.word	0x8db20d86 !fxor	%f8,%f6,%f6
1957
1958	brnz,pn		%l2, 2f
1959	sub		%i2, 2, %i2
1960
1961	std		%f0, [%i1 + 0]
1962	std		%f2, [%i1 + 8]
1963	std		%f4, [%i1 + 16]
1964	std		%f6, [%i1 + 24]
1965	brnz,pt		%i2, .L128_ctr32_loop2x
1966	add		%i1, 32, %i1
1967
1968	ret
1969	restore
1970
1971.align	16
19722:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1973						! and ~3x deterioration
1974						! in inp==out case
1975	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1976	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1977	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1978	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1979	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1980
1981	stda		%f8, [%i1 + %l3]0xc0	! partial store
1982	std		%f0, [%i1 + 8]
1983	std		%f2, [%i1 + 16]
1984	std		%f4, [%i1 + 24]
1985	add		%i1, 32, %i1
1986	orn		%g0, %l3, %l3
1987	stda		%f6, [%i1 + %l3]0xc0	! partial store
1988
1989	brnz,pt		%i2, .L128_ctr32_loop2x+4
1990	orn		%g0, %l3, %l3
1991
1992	ret
1993	restore
1994
1995!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1996.align	32
1997.L128_ctr32_blk:
1998	add	%i1, %i2, %l5
1999	and	%l5, 63, %l5	! tail
2000	sub	%i2, %l5, %i2
2001	add	%l5, 15, %l5	! round up to 16n
2002	srlx	%i2, 4, %i2
2003	srl	%l5, 4, %l5
2004	sub	%i2, 1, %i2
2005	add	%l5, 1, %l5
2006
2007.L128_ctr32_blk_loop2x:
2008	ldx		[%i0 + 0], %o0
2009	ldx		[%i0 + 8], %o1
2010	ldx		[%i0 + 16], %o2
2011	brz,pt		%l0, 5f
2012	ldx		[%i0 + 24], %o3
2013
2014	ldx		[%i0 + 32], %o4
2015	sllx		%o0, %l0, %o0
2016	srlx		%o1, %l1, %g1
2017	or		%g1, %o0, %o0
2018	sllx		%o1, %l0, %o1
2019	srlx		%o2, %l1, %g1
2020	or		%g1, %o1, %o1
2021	sllx		%o2, %l0, %o2
2022	srlx		%o3, %l1, %g1
2023	or		%g1, %o2, %o2
2024	sllx		%o3, %l0, %o3
2025	srlx		%o4, %l1, %o4
2026	or		%o4, %o3, %o3
20275:
2028	xor		%g5, %l7, %g1		! ^= rk[0]
2029	add		%l7, 1, %l7
2030	.word	0x85b02301 !movxtod	%g1,%f2
2031	srl		%l7, 0, %l7		! clruw
2032	xor		%g5, %l7, %g1
2033	add		%l7, 1, %l7
2034	.word	0x8db02301 !movxtod	%g1,%f6
2035	srl		%l7, 0, %l7		! clruw
2036	prefetch	[%i0 + 32+63], 20
2037	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2038	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
2039	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2040	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
2041	call		_cmll128_encrypt_2x+16
2042	add		%i0, 32, %i0
2043	subcc		%i2, 2, %i2
2044
2045	.word	0x91b02308 !movxtod	%o0,%f8
2046	.word	0x95b02309 !movxtod	%o1,%f10
2047	.word	0x99b0230a !movxtod	%o2,%f12
2048	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2049	.word	0x91b0230b !movxtod	%o3,%f8
2050	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2051	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2052	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2053
2054	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2055	add		%i1, 8, %i1
2056	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2057	add		%i1, 8, %i1
2058	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2059	add		%i1, 8, %i1
2060	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2061	bgu,pt		SIZE_T_CC, .L128_ctr32_blk_loop2x
2062	add		%i1, 8, %i1
2063
2064	add		%l5, %i2, %i2
2065	andcc		%i2, 1, %g0		! is number of blocks even?
2066	membar		#StoreLoad|#StoreStore
2067	bnz,pt		%icc, .L128_ctr32_loop
2068	srl		%i2, 0, %i2
2069	brnz,pn		%i2, .L128_ctr32_loop2x
2070	nop
2071
2072	ret
2073	restore
2074.type	cmll128_t4_ctr32_encrypt,#function
2075.size	cmll128_t4_ctr32_encrypt,.-cmll128_t4_ctr32_encrypt
2076.globl	cmll256_t4_ctr32_encrypt
2077.align	32
2078cmll256_t4_ctr32_encrypt:
2079	save		%sp, -STACK_FRAME, %sp
2080	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2081
2082	prefetch	[%i0], 20
2083	prefetch	[%i0 + 63], 20
2084	call		_cmll256_load_enckey
2085	sllx		%i2, 4, %i2
2086
2087	ld		[%i4 + 0], %l4	! counter
2088	ld		[%i4 + 4], %l5
2089	ld		[%i4 + 8], %l6
2090	ld		[%i4 + 12], %l7
2091
2092	sllx		%l4, 32, %o5
2093	or		%l5, %o5, %o5
2094	sllx		%l6, 32, %g1
2095	xor		%o5, %g4, %g4		! ^= rk[0]
2096	xor		%g1, %g5, %g5
2097	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
2098
2099	sub		%i0, %i1, %l5	! %i0!=%i1
2100	and		%i0, 7, %l0
2101	andn		%i0, 7, %i0
2102	sll		%l0, 3, %l0
2103	mov		64, %l1
2104	mov		0xff, %l3
2105	sub		%l1, %l0, %l1
2106	and		%i1, 7, %l2
2107	cmp		%i2, 255
2108	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2109	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
2110	brnz,pn		%l5, .L256_ctr32_blk	!	%i0==%i1)
2111	srl		%l3, %l2, %l3
2112
2113	andcc		%i2, 16, %g0		! is number of blocks even?
2114	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2115	bz		%icc, .L256_ctr32_loop2x
2116	srlx		%i2, 4, %i2
2117.L256_ctr32_loop:
2118	ldx		[%i0 + 0], %o0
2119	brz,pt		%l0, 4f
2120	ldx		[%i0 + 8], %o1
2121
2122	ldx		[%i0 + 16], %o2
2123	sllx		%o0, %l0, %o0
2124	srlx		%o1, %l1, %g1
2125	sllx		%o1, %l0, %o1
2126	or		%g1, %o0, %o0
2127	srlx		%o2, %l1, %o2
2128	or		%o2, %o1, %o1
21294:
2130	xor		%g5, %l7, %g1		! ^= rk[0]
2131	add		%l7, 1, %l7
2132	.word	0x85b02301 !movxtod	%g1,%f2
2133	srl		%l7, 0, %l7		! clruw
2134	prefetch	[%i1 + 63], 22
2135	prefetch	[%i0 + 16+63], 20
2136	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2137	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2138	call		_cmll256_encrypt_1x+8
2139	add		%i0, 16, %i0
2140
2141	.word	0x95b02308 !movxtod	%o0,%f10
2142	.word	0x99b02309 !movxtod	%o1,%f12
2143	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
2144	.word	0x85b30d82 !fxor	%f12,%f2,%f2
2145
2146	brnz,pn		%l2, 2f
2147	sub		%i2, 1, %i2
2148
2149	std		%f0, [%i1 + 0]
2150	std		%f2, [%i1 + 8]
2151	brnz,pt		%i2, .L256_ctr32_loop2x
2152	add		%i1, 16, %i1
2153
2154	ret
2155	restore
2156
2157.align	16
21582:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2159						! and ~3x deterioration
2160						! in inp==out case
2161	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2162	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2163	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2164	stda		%f4, [%i1 + %l3]0xc0	! partial store
2165	std		%f6, [%i1 + 8]
2166	add		%i1, 16, %i1
2167	orn		%g0, %l3, %l3
2168	stda		%f8, [%i1 + %l3]0xc0	! partial store
2169
2170	brnz,pt		%i2, .L256_ctr32_loop2x+4
2171	orn		%g0, %l3, %l3
2172
2173	ret
2174	restore
2175
2176!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2177.align	32
2178.L256_ctr32_loop2x:
2179	ldx		[%i0 + 0], %o0
2180	ldx		[%i0 + 8], %o1
2181	ldx		[%i0 + 16], %o2
2182	brz,pt		%l0, 4f
2183	ldx		[%i0 + 24], %o3
2184
2185	ldx		[%i0 + 32], %o4
2186	sllx		%o0, %l0, %o0
2187	srlx		%o1, %l1, %g1
2188	or		%g1, %o0, %o0
2189	sllx		%o1, %l0, %o1
2190	srlx		%o2, %l1, %g1
2191	or		%g1, %o1, %o1
2192	sllx		%o2, %l0, %o2
2193	srlx		%o3, %l1, %g1
2194	or		%g1, %o2, %o2
2195	sllx		%o3, %l0, %o3
2196	srlx		%o4, %l1, %o4
2197	or		%o4, %o3, %o3
21984:
2199	xor		%g5, %l7, %g1		! ^= rk[0]
2200	add		%l7, 1, %l7
2201	.word	0x85b02301 !movxtod	%g1,%f2
2202	srl		%l7, 0, %l7		! clruw
2203	xor		%g5, %l7, %g1
2204	add		%l7, 1, %l7
2205	.word	0x8db02301 !movxtod	%g1,%f6
2206	srl		%l7, 0, %l7		! clruw
2207	prefetch	[%i1 + 63], 22
2208	prefetch	[%i0 + 32+63], 20
2209	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2210	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
2211	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2212	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
2213	call		_cmll256_encrypt_2x+16
2214	add		%i0, 32, %i0
2215
2216	.word	0x91b02308 !movxtod	%o0,%f8
2217	.word	0x95b02309 !movxtod	%o1,%f10
2218	.word	0x99b0230a !movxtod	%o2,%f12
2219	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2220	.word	0x91b0230b !movxtod	%o3,%f8
2221	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2222	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2223	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2224
2225	brnz,pn		%l2, 2f
2226	sub		%i2, 2, %i2
2227
2228	std		%f0, [%i1 + 0]
2229	std		%f2, [%i1 + 8]
2230	std		%f4, [%i1 + 16]
2231	std		%f6, [%i1 + 24]
2232	brnz,pt		%i2, .L256_ctr32_loop2x
2233	add		%i1, 32, %i1
2234
2235	ret
2236	restore
2237
2238.align	16
22392:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2240						! and ~3x deterioration
2241						! in inp==out case
2242	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
2243	.word	0x81b00902 !faligndata	%f0,%f2,%f0
2244	.word	0x85b08904 !faligndata	%f2,%f4,%f2
2245	.word	0x89b10906 !faligndata	%f4,%f6,%f4
2246	.word	0x8db18906 !faligndata	%f6,%f6,%f6
2247
2248	stda		%f8, [%i1 + %l3]0xc0	! partial store
2249	std		%f0, [%i1 + 8]
2250	std		%f2, [%i1 + 16]
2251	std		%f4, [%i1 + 24]
2252	add		%i1, 32, %i1
2253	orn		%g0, %l3, %l3
2254	stda		%f6, [%i1 + %l3]0xc0	! partial store
2255
2256	brnz,pt		%i2, .L256_ctr32_loop2x+4
2257	orn		%g0, %l3, %l3
2258
2259	ret
2260	restore
2261
2262!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2263.align	32
2264.L256_ctr32_blk:
2265	add	%i1, %i2, %l5
2266	and	%l5, 63, %l5	! tail
2267	sub	%i2, %l5, %i2
2268	add	%l5, 15, %l5	! round up to 16n
2269	srlx	%i2, 4, %i2
2270	srl	%l5, 4, %l5
2271	sub	%i2, 1, %i2
2272	add	%l5, 1, %l5
2273
2274.L256_ctr32_blk_loop2x:
2275	ldx		[%i0 + 0], %o0
2276	ldx		[%i0 + 8], %o1
2277	ldx		[%i0 + 16], %o2
2278	brz,pt		%l0, 5f
2279	ldx		[%i0 + 24], %o3
2280
2281	ldx		[%i0 + 32], %o4
2282	sllx		%o0, %l0, %o0
2283	srlx		%o1, %l1, %g1
2284	or		%g1, %o0, %o0
2285	sllx		%o1, %l0, %o1
2286	srlx		%o2, %l1, %g1
2287	or		%g1, %o1, %o1
2288	sllx		%o2, %l0, %o2
2289	srlx		%o3, %l1, %g1
2290	or		%g1, %o2, %o2
2291	sllx		%o3, %l0, %o3
2292	srlx		%o4, %l1, %o4
2293	or		%o4, %o3, %o3
22945:
2295	xor		%g5, %l7, %g1		! ^= rk[0]
2296	add		%l7, 1, %l7
2297	.word	0x85b02301 !movxtod	%g1,%f2
2298	srl		%l7, 0, %l7		! clruw
2299	xor		%g5, %l7, %g1
2300	add		%l7, 1, %l7
2301	.word	0x8db02301 !movxtod	%g1,%f6
2302	srl		%l7, 0, %l7		! clruw
2303	prefetch	[%i0 + 32+63], 20
2304	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2305	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
2306	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2307	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
2308	call		_cmll256_encrypt_2x+16
2309	add		%i0, 32, %i0
2310	subcc		%i2, 2, %i2
2311
2312	.word	0x91b02308 !movxtod	%o0,%f8
2313	.word	0x95b02309 !movxtod	%o1,%f10
2314	.word	0x99b0230a !movxtod	%o2,%f12
2315	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2316	.word	0x91b0230b !movxtod	%o3,%f8
2317	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2318	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2319	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2320
2321	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2322	add		%i1, 8, %i1
2323	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2324	add		%i1, 8, %i1
2325	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2326	add		%i1, 8, %i1
2327	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2328	bgu,pt		SIZE_T_CC, .L256_ctr32_blk_loop2x
2329	add		%i1, 8, %i1
2330
2331	add		%l5, %i2, %i2
2332	andcc		%i2, 1, %g0		! is number of blocks even?
2333	membar		#StoreLoad|#StoreStore
2334	bnz,pt		%icc, .L256_ctr32_loop
2335	srl		%i2, 0, %i2
2336	brnz,pn		%i2, .L256_ctr32_loop2x
2337	nop
2338
2339	ret
2340	restore
2341.type	cmll256_t4_ctr32_encrypt,#function
2342.size	cmll256_t4_ctr32_encrypt,.-cmll256_t4_ctr32_encrypt
2343