xref: /netbsd-src/crypto/external/bsd/openssl.old/lib/libcrypto/arch/sparc/cmllt4-sparcv9.S (revision 4724848cf0da353df257f730694b7882798e5daf)
1#include "sparc_arch.h"
2
3.text
4
5.globl	cmll_t4_encrypt
6.align	32
7cmll_t4_encrypt:
8	andcc		%o0, 7, %g1		! is input aligned?
9	andn		%o0, 7, %o0
10
11	ldx		[%o2 + 0], %g4
12	ldx		[%o2 + 8], %g5
13
14	ldx		[%o0 + 0], %o4
15	bz,pt		%icc, 1f
16	ldx		[%o0 + 8], %o5
17	ldx		[%o0 + 16], %o0
18	sll		%g1, 3, %g1
19	sub		%g0, %g1, %o3
20	sllx		%o4, %g1, %o4
21	sllx		%o5, %g1, %g1
22	srlx		%o5, %o3, %o5
23	srlx		%o0, %o3, %o3
24	or		%o5, %o4, %o4
25	or		%o3, %g1, %o5
261:
27	ld		[%o2 + 272], %o3	! grandRounds, 3 or 4
28	ldd		[%o2 + 16], %f12
29	ldd		[%o2 + 24], %f14
30	xor		%g4, %o4, %o4
31	xor		%g5, %o5, %o5
32	ldd		[%o2 + 32], %f16
33	ldd		[%o2 + 40], %f18
34	.word	0x81b0230c !movxtod	%o4,%f0
35	.word	0x85b0230d !movxtod	%o5,%f2
36	ldd		[%o2 + 48], %f20
37	ldd		[%o2 + 56], %f22
38	sub		%o3, 1, %o3
39	ldd		[%o2 + 64], %f24
40	ldd		[%o2 + 72], %f26
41	add		%o2, 80, %o2
42
43.Lenc:
44	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
45	ldd		[%o2 + 0], %f12
46	sub		%o3,1,%o3
47	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
48	ldd		[%o2 + 8], %f14
49	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
50	ldd		[%o2 + 16], %f16
51	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
52	ldd		[%o2 + 24], %f18
53	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
54	ldd		[%o2 + 32], %f20
55	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
56	ldd		[%o2 + 40], %f22
57	.word	0x81b62780 !camellia_fl	%f24,%f0,%f0
58	ldd		[%o2 + 48], %f24
59	.word	0x85b6a7a2 !camellia_fli	%f26,%f2,%f2
60	ldd		[%o2 + 56], %f26
61	brnz,pt		%o3, .Lenc
62	add		%o2, 64, %o2
63
64	andcc		%o1, 7, %o4		! is output aligned?
65	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
66	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
67	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
68	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
69	.word	0x88cd0182 !camellia_f	%f20,%f2,%f0,%f4
70	.word	0x84cd8980 !camellia_f	%f22,%f0,%f4,%f2
71	.word	0x81b60d84 !fxor	%f24,%f4,%f0
72	.word	0x85b68d82 !fxor	%f26,%f2,%f2
73
74	bnz,pn		%icc, 2f
75	nop
76
77	std		%f0, [%o1 + 0]
78	retl
79	std		%f2, [%o1 + 8]
80
812:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
82	mov		0xff, %o5
83	srl		%o5, %o4, %o5
84
85	.word	0x89b00900 !faligndata	%f0,%f0,%f4
86	.word	0x8db00902 !faligndata	%f0,%f2,%f6
87	.word	0x91b08902 !faligndata	%f2,%f2,%f8
88
89	stda		%f4, [%o1 + %o5]0xc0	! partial store
90	std		%f6, [%o1 + 8]
91	add		%o1, 16, %o1
92	orn		%g0, %o5, %o5
93	retl
94	stda		%f8, [%o1 + %o5]0xc0	! partial store
95.type	cmll_t4_encrypt,#function
96.size	cmll_t4_encrypt,.-cmll_t4_encrypt
97
98.globl	cmll_t4_decrypt
99.align	32
100cmll_t4_decrypt:
101	ld		[%o2 + 272], %o3	! grandRounds, 3 or 4
102	andcc		%o0, 7, %g1		! is input aligned?
103	andn		%o0, 7, %o0
104
105	sll		%o3, 6, %o3
106	add		%o3, %o2, %o2
107
108	ldx		[%o0 + 0], %o4
109	bz,pt		%icc, 1f
110	ldx		[%o0 + 8], %o5
111	ldx		[%o0 + 16], %o0
112	sll		%g1, 3, %g1
113	sub		%g0, %g1, %g4
114	sllx		%o4, %g1, %o4
115	sllx		%o5, %g1, %g1
116	srlx		%o5, %g4, %o5
117	srlx		%o0, %g4, %g4
118	or		%o5, %o4, %o4
119	or		%g4, %g1, %o5
1201:
121	ldx		[%o2 + 0], %g4
122	ldx		[%o2 + 8], %g5
123	ldd		[%o2 - 8], %f12
124	ldd		[%o2 - 16], %f14
125	xor		%g4, %o4, %o4
126	xor		%g5, %o5, %o5
127	ldd		[%o2 - 24], %f16
128	ldd		[%o2 - 32], %f18
129	.word	0x81b0230c !movxtod	%o4,%f0
130	.word	0x85b0230d !movxtod	%o5,%f2
131	ldd		[%o2 - 40], %f20
132	ldd		[%o2 - 48], %f22
133	sub		%o3, 64, %o3
134	ldd		[%o2 - 56], %f24
135	ldd		[%o2 - 64], %f26
136	sub		%o2, 64, %o2
137
138.Ldec:
139	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
140	ldd		[%o2 - 8], %f12
141	sub		%o3, 64, %o3
142	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
143	ldd		[%o2 - 16], %f14
144	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
145	ldd		[%o2 - 24], %f16
146	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
147	ldd		[%o2 - 32], %f18
148	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
149	ldd		[%o2 - 40], %f20
150	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
151	ldd		[%o2 - 48], %f22
152	.word	0x81b62780 !camellia_fl	%f24,%f0,%f0
153	ldd		[%o2 - 56], %f24
154	.word	0x85b6a7a2 !camellia_fli	%f26,%f2,%f2
155	ldd		[%o2 - 64], %f26
156	brnz,pt		%o3, .Ldec
157	sub		%o2, 64, %o2
158
159	andcc		%o1, 7, %o4		! is output aligned?
160	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
161	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
162	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
163	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
164	.word	0x88cd0182 !camellia_f	%f20,%f2,%f0,%f4
165	.word	0x84cd8980 !camellia_f	%f22,%f0,%f4,%f2
166	.word	0x81b68d84 !fxor	%f26,%f4,%f0
167	.word	0x85b60d82 !fxor	%f24,%f2,%f2
168
169	bnz,pn		%icc, 2f
170	nop
171
172	std		%f0, [%o1 + 0]
173	retl
174	std		%f2, [%o1 + 8]
175
1762:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
177	mov		0xff, %o5
178	srl		%o5, %o4, %o5
179
180	.word	0x89b00900 !faligndata	%f0,%f0,%f4
181	.word	0x8db00902 !faligndata	%f0,%f2,%f6
182	.word	0x91b08902 !faligndata	%f2,%f2,%f8
183
184	stda		%f4, [%o1 + %o5]0xc0	! partial store
185	std		%f6, [%o1 + 8]
186	add		%o1, 16, %o1
187	orn		%g0, %o5, %o5
188	retl
189	stda		%f8, [%o1 + %o5]0xc0	! partial store
190.type	cmll_t4_decrypt,#function
191.size	cmll_t4_decrypt,.-cmll_t4_decrypt
192.globl	cmll_t4_set_key
193.align	32
194cmll_t4_set_key:
195	and		%o0, 7, %o3
196	.word	0x91b20300 !alignaddr	%o0,%g0,%o0
197	cmp		%o1, 192
198	ldd		[%o0 + 0], %f0
199	bl,pt		%icc,.L128
200	ldd		[%o0 + 8], %f2
201
202	be,pt		%icc,.L192
203	ldd		[%o0 + 16], %f4
204
205	brz,pt		%o3, .L256aligned
206	ldd		[%o0 + 24], %f6
207
208	ldd		[%o0 + 32], %f8
209	.word	0x81b00902 !faligndata	%f0,%f2,%f0
210	.word	0x85b08904 !faligndata	%f2,%f4,%f2
211	.word	0x89b10906 !faligndata	%f4,%f6,%f4
212	b		.L256aligned
213	.word	0x8db18908 !faligndata	%f6,%f8,%f6
214
215.align	16
216.L192:
217	brz,a,pt	%o3, .L256aligned
218	.word	0x8db00cc4 !fnot2	%f0,%f4,%f6
219
220	ldd		[%o0 + 24], %f6
221	nop
222	.word	0x81b00902 !faligndata	%f0,%f2,%f0
223	.word	0x85b08904 !faligndata	%f2,%f4,%f2
224	.word	0x89b10906 !faligndata	%f4,%f6,%f4
225	.word	0x8db00cc4 !fnot2	%f0,%f4,%f6
226
227.L256aligned:
228	std		%f0, [%o2 + 0]		! k[0, 1]
229	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
230	std		%f2, [%o2 + 8]		! k[2, 3]
231	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
232	.word	0x81b10d80 !fxor	%f4,%f0,%f0
233	b		.L128key
234	.word	0x85b18d82 !fxor	%f6,%f2,%f2
235
236.align	16
237.L128:
238	brz,pt		%o3, .L128aligned
239	nop
240
241	ldd		[%o0 + 16], %f4
242	nop
243	.word	0x81b00902 !faligndata	%f0,%f2,%f0
244	.word	0x85b08904 !faligndata	%f2,%f4,%f2
245
246.L128aligned:
247	std		%f0, [%o2 + 0]		! k[0, 1]
248	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
249	std		%f2, [%o2 + 8]		! k[2, 3]
250	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
251
252.L128key:
253	mov		%o7, %o5
2541:	call		.+8
255	add		%o7, SIGMA-1b, %o4
256	mov		%o5, %o7
257
258	ldd		[%o4 + 0], %f16
259	ldd		[%o4 + 8], %f18
260	ldd		[%o4 + 16], %f20
261	ldd		[%o4 + 24], %f22
262
263	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
264	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
265	.word	0x81b70d80 !fxor	%f28,%f0,%f0
266	.word	0x85b78d82 !fxor	%f30,%f2,%f2
267	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
268	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
269
270	bge,pn		%icc, .L256key
271	nop
272	std	%f0, [%o2 + 0x10]	! k[ 4,  5]
273	std	%f2, [%o2 + 0x18]	! k[ 6,  7]
274
275	.word	0x99b02200 !movdtox	%f0,%o4
276	.word	0x9bb02202 !movdtox	%f2,%o5
277	srlx	%o4, 64-15, %g4
278	sllx	%o4, 15, %o4
279	srlx	%o5, 64-15, %g5
280	sllx	%o5, 15, %o5
281	or	%o4, %g5, %o4
282	or	%o5, %g4, %o5
283	stx	%o4, [%o2 + 0x30]	! k[12, 13]
284	stx	%o5, [%o2 + 0x38]	! k[14, 15]
285	srlx	%o4, 64-15, %g4
286	sllx	%o4, 15, %o4
287	srlx	%o5, 64-15, %g5
288	sllx	%o5, 15, %o5
289	or	%o4, %g5, %o4
290	or	%o5, %g4, %o5
291	stx	%o4, [%o2 + 0x40]	! k[16, 17]
292	stx	%o5, [%o2 + 0x48]	! k[18, 19]
293	srlx	%o4, 64-15, %g4
294	sllx	%o4, 15, %o4
295	srlx	%o5, 64-15, %g5
296	sllx	%o5, 15, %o5
297	or	%o4, %g5, %o4
298	or	%o5, %g4, %o5
299	stx	%o4, [%o2 + 0x60]	! k[24, 25]
300	srlx	%o4, 64-15, %g4
301	sllx	%o4, 15, %o4
302	srlx	%o5, 64-15, %g5
303	sllx	%o5, 15, %o5
304	or	%o4, %g5, %o4
305	or	%o5, %g4, %o5
306	stx	%o4, [%o2 + 0x70]	! k[28, 29]
307	stx	%o5, [%o2 + 0x78]	! k[30, 31]
308	srlx	%o4, 64-34, %g4
309	sllx	%o4, 34, %o4
310	srlx	%o5, 64-34, %g5
311	sllx	%o5, 34, %o5
312	or	%o4, %g5, %o4
313	or	%o5, %g4, %o5
314	stx	%o4, [%o2 + 0xa0]	! k[40, 41]
315	stx	%o5, [%o2 + 0xa8]	! k[42, 43]
316	srlx	%o4, 64-17, %g4
317	sllx	%o4, 17, %o4
318	srlx	%o5, 64-17, %g5
319	sllx	%o5, 17, %o5
320	or	%o4, %g5, %o4
321	or	%o5, %g4, %o5
322	stx	%o4, [%o2 + 0xc0]	! k[48, 49]
323	stx	%o5, [%o2 + 0xc8]	! k[50, 51]
324
325	.word	0x99b0221c !movdtox	%f28,%o4		! k[ 0,  1]
326	.word	0x9bb0221e !movdtox	%f30,%o5		! k[ 2,  3]
327	srlx	%o4, 64-15, %g4
328	sllx	%o4, 15, %o4
329	srlx	%o5, 64-15, %g5
330	sllx	%o5, 15, %o5
331	or	%o4, %g5, %o4
332	or	%o5, %g4, %o5
333	stx	%o4, [%o2 + 0x20]	! k[ 8,  9]
334	stx	%o5, [%o2 + 0x28]	! k[10, 11]
335	srlx	%o4, 64-30, %g4
336	sllx	%o4, 30, %o4
337	srlx	%o5, 64-30, %g5
338	sllx	%o5, 30, %o5
339	or	%o4, %g5, %o4
340	or	%o5, %g4, %o5
341	stx	%o4, [%o2 + 0x50]	! k[20, 21]
342	stx	%o5, [%o2 + 0x58]	! k[22, 23]
343	srlx	%o4, 64-15, %g4
344	sllx	%o4, 15, %o4
345	srlx	%o5, 64-15, %g5
346	sllx	%o5, 15, %o5
347	or	%o4, %g5, %o4
348	or	%o5, %g4, %o5
349	stx	%o5, [%o2 + 0x68]	! k[26, 27]
350	srlx	%o4, 64-17, %g4
351	sllx	%o4, 17, %o4
352	srlx	%o5, 64-17, %g5
353	sllx	%o5, 17, %o5
354	or	%o4, %g5, %o4
355	or	%o5, %g4, %o5
356	stx	%o4, [%o2 + 0x80]	! k[32, 33]
357	stx	%o5, [%o2 + 0x88]	! k[34, 35]
358	srlx	%o4, 64-17, %g4
359	sllx	%o4, 17, %o4
360	srlx	%o5, 64-17, %g5
361	sllx	%o5, 17, %o5
362	or	%o4, %g5, %o4
363	or	%o5, %g4, %o5
364	stx	%o4, [%o2 + 0x90]	! k[36, 37]
365	stx	%o5, [%o2 + 0x98]	! k[38, 39]
366	srlx	%o4, 64-17, %g4
367	sllx	%o4, 17, %o4
368	srlx	%o5, 64-17, %g5
369	sllx	%o5, 17, %o5
370	or	%o4, %g5, %o4
371	or	%o5, %g4, %o5
372	stx	%o4, [%o2 + 0xb0]	! k[44, 45]
373	stx	%o5, [%o2 + 0xb8]	! k[46, 47]
374
375	mov		3, %o3
376	st		%o3, [%o2 + 0x110]
377	retl
378	xor		%o0, %o0, %o0
379
380.align	16
381.L256key:
382	ldd		[%o4 + 32], %f24
383	ldd		[%o4 + 40], %f26
384
385	std		%f0, [%o2 + 0x30]	! k[12, 13]
386	std		%f2, [%o2 + 0x38]	! k[14, 15]
387
388	.word	0x81b10d80 !fxor	%f4,%f0,%f0
389	.word	0x85b18d82 !fxor	%f6,%f2,%f2
390	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
391	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
392
393	std	%f0, [%o2 + 0x10]	! k[ 4,  5]
394	std	%f2, [%o2 + 0x18]	! k[ 6,  7]
395
396	.word	0x99b02200 !movdtox	%f0,%o4
397	.word	0x9bb02202 !movdtox	%f2,%o5
398	srlx	%o4, 64-30, %g4
399	sllx	%o4, 30, %o4
400	srlx	%o5, 64-30, %g5
401	sllx	%o5, 30, %o5
402	or	%o4, %g5, %o4
403	or	%o5, %g4, %o5
404	stx	%o4, [%o2 + 0x50]	! k[20, 21]
405	stx	%o5, [%o2 + 0x58]	! k[22, 23]
406	srlx	%o4, 64-30, %g4
407	sllx	%o4, 30, %o4
408	srlx	%o5, 64-30, %g5
409	sllx	%o5, 30, %o5
410	or	%o4, %g5, %o4
411	or	%o5, %g4, %o5
412	stx	%o4, [%o2 + 0xa0]	! k[40, 41]
413	stx	%o5, [%o2 + 0xa8]	! k[42, 43]
414	srlx	%o4, 64-51, %g4
415	sllx	%o4, 51, %o4
416	srlx	%o5, 64-51, %g5
417	sllx	%o5, 51, %o5
418	or	%o4, %g5, %o4
419	or	%o5, %g4, %o5
420	stx	%o4, [%o2 + 0x100]	! k[64, 65]
421	stx	%o5, [%o2 + 0x108]	! k[66, 67]
422
423	.word	0x99b02204 !movdtox	%f4,%o4		! k[ 8,  9]
424	.word	0x9bb02206 !movdtox	%f6,%o5		! k[10, 11]
425	srlx	%o4, 64-15, %g4
426	sllx	%o4, 15, %o4
427	srlx	%o5, 64-15, %g5
428	sllx	%o5, 15, %o5
429	or	%o4, %g5, %o4
430	or	%o5, %g4, %o5
431	stx	%o4, [%o2 + 0x20]	! k[ 8,  9]
432	stx	%o5, [%o2 + 0x28]	! k[10, 11]
433	srlx	%o4, 64-15, %g4
434	sllx	%o4, 15, %o4
435	srlx	%o5, 64-15, %g5
436	sllx	%o5, 15, %o5
437	or	%o4, %g5, %o4
438	or	%o5, %g4, %o5
439	stx	%o4, [%o2 + 0x40]	! k[16, 17]
440	stx	%o5, [%o2 + 0x48]	! k[18, 19]
441	srlx	%o4, 64-30, %g4
442	sllx	%o4, 30, %o4
443	srlx	%o5, 64-30, %g5
444	sllx	%o5, 30, %o5
445	or	%o4, %g5, %o4
446	or	%o5, %g4, %o5
447	stx	%o4, [%o2 + 0x90]	! k[36, 37]
448	stx	%o5, [%o2 + 0x98]	! k[38, 39]
449	srlx	%o4, 64-34, %g4
450	sllx	%o4, 34, %o4
451	srlx	%o5, 64-34, %g5
452	sllx	%o5, 34, %o5
453	or	%o4, %g5, %o4
454	or	%o5, %g4, %o5
455	stx	%o4, [%o2 + 0xd0]	! k[52, 53]
456	stx	%o5, [%o2 + 0xd8]	! k[54, 55]
457	ldx	[%o2 + 0x30], %o4	! k[12, 13]
458	ldx	[%o2 + 0x38], %o5	! k[14, 15]
459	srlx	%o4, 64-15, %g4
460	sllx	%o4, 15, %o4
461	srlx	%o5, 64-15, %g5
462	sllx	%o5, 15, %o5
463	or	%o4, %g5, %o4
464	or	%o5, %g4, %o5
465	stx	%o4, [%o2 + 0x30]	! k[12, 13]
466	stx	%o5, [%o2 + 0x38]	! k[14, 15]
467	srlx	%o4, 64-30, %g4
468	sllx	%o4, 30, %o4
469	srlx	%o5, 64-30, %g5
470	sllx	%o5, 30, %o5
471	or	%o4, %g5, %o4
472	or	%o5, %g4, %o5
473	stx	%o4, [%o2 + 0x70]	! k[28, 29]
474	stx	%o5, [%o2 + 0x78]	! k[30, 31]
475	srlx	%o4, 32, %g4
476	srlx	%o5, 32, %g5
477	st	%o4, [%o2 + 0xc0]	! k[48]
478	st	%g5, [%o2 + 0xc4]	! k[49]
479	st	%o5, [%o2 + 0xc8]	! k[50]
480	st	%g4, [%o2 + 0xcc]	! k[51]
481	srlx	%o4, 64-49, %g4
482	sllx	%o4, 49, %o4
483	srlx	%o5, 64-49, %g5
484	sllx	%o5, 49, %o5
485	or	%o4, %g5, %o4
486	or	%o5, %g4, %o5
487	stx	%o4, [%o2 + 0xe0]	! k[56, 57]
488	stx	%o5, [%o2 + 0xe8]	! k[58, 59]
489
490	.word	0x99b0221c !movdtox	%f28,%o4		! k[ 0,  1]
491	.word	0x9bb0221e !movdtox	%f30,%o5		! k[ 2,  3]
492	srlx	%o4, 64-45, %g4
493	sllx	%o4, 45, %o4
494	srlx	%o5, 64-45, %g5
495	sllx	%o5, 45, %o5
496	or	%o4, %g5, %o4
497	or	%o5, %g4, %o5
498	stx	%o4, [%o2 + 0x60]	! k[24, 25]
499	stx	%o5, [%o2 + 0x68]	! k[26, 27]
500	srlx	%o4, 64-15, %g4
501	sllx	%o4, 15, %o4
502	srlx	%o5, 64-15, %g5
503	sllx	%o5, 15, %o5
504	or	%o4, %g5, %o4
505	or	%o5, %g4, %o5
506	stx	%o4, [%o2 + 0x80]	! k[32, 33]
507	stx	%o5, [%o2 + 0x88]	! k[34, 35]
508	srlx	%o4, 64-17, %g4
509	sllx	%o4, 17, %o4
510	srlx	%o5, 64-17, %g5
511	sllx	%o5, 17, %o5
512	or	%o4, %g5, %o4
513	or	%o5, %g4, %o5
514	stx	%o4, [%o2 + 0xb0]	! k[44, 45]
515	stx	%o5, [%o2 + 0xb8]	! k[46, 47]
516	srlx	%o4, 64-34, %g4
517	sllx	%o4, 34, %o4
518	srlx	%o5, 64-34, %g5
519	sllx	%o5, 34, %o5
520	or	%o4, %g5, %o4
521	or	%o5, %g4, %o5
522	stx	%o4, [%o2 + 0xf0]	! k[60, 61]
523	stx	%o5, [%o2 + 0xf8]	! k[62, 63]
524
525	mov		4, %o3
526	st		%o3, [%o2 + 0x110]
527	retl
528	xor		%o0, %o0, %o0
529.type	cmll_t4_set_key,#function
530.size	cmll_t4_set_key,.-cmll_t4_set_key
531.align	32
532SIGMA:
533	.long	0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
534	.long	0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
535	.long	0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
536.type	SIGMA,#object
537.size	SIGMA,.-SIGMA
538.asciz	"Camellia for SPARC T4, David S. Miller, Andy Polyakov"
539.align	32
540_cmll128_load_enckey:
541	ldx		[%i3 + 0], %g4
542	ldx		[%i3 + 8], %g5
543	ldd		[%i3 + 16], %f16
544	ldd		[%i3 + 24], %f18
545	ldd		[%i3 + 32], %f20
546	ldd		[%i3 + 40], %f22
547	ldd		[%i3 + 48], %f24
548	ldd		[%i3 + 56], %f26
549	ldd		[%i3 + 64], %f28
550	ldd		[%i3 + 72], %f30
551	ldd		[%i3 + 80], %f32
552	ldd		[%i3 + 88], %f34
553	ldd		[%i3 + 96], %f36
554	ldd		[%i3 + 104], %f38
555	ldd		[%i3 + 112], %f40
556	ldd		[%i3 + 120], %f42
557	ldd		[%i3 + 128], %f44
558	ldd		[%i3 + 136], %f46
559	ldd		[%i3 + 144], %f48
560	ldd		[%i3 + 152], %f50
561	ldd		[%i3 + 160], %f52
562	ldd		[%i3 + 168], %f54
563	ldd		[%i3 + 176], %f56
564	ldd		[%i3 + 184], %f58
565	ldd		[%i3 + 192], %f60
566	ldd		[%i3 + 200], %f62
567	retl
568	nop
569.type	_cmll128_load_enckey,#function
570.size	_cmll128_load_enckey,.-_cmll128_load_enckey
571_cmll256_load_enckey=_cmll128_load_enckey
572
573.align	32
574_cmll256_load_deckey:
575	ldd		[%i3 + 64], %f62
576	ldd		[%i3 + 72], %f60
577	b		.Load_deckey
578	add		%i3, 64, %i3
579_cmll128_load_deckey:
580	ldd		[%i3 + 0], %f60
581	ldd		[%i3 + 8], %f62
582.Load_deckey:
583	ldd		[%i3 + 16], %f58
584	ldd		[%i3 + 24], %f56
585	ldd		[%i3 + 32], %f54
586	ldd		[%i3 + 40], %f52
587	ldd		[%i3 + 48], %f50
588	ldd		[%i3 + 56], %f48
589	ldd		[%i3 + 64], %f46
590	ldd		[%i3 + 72], %f44
591	ldd		[%i3 + 80], %f42
592	ldd		[%i3 + 88], %f40
593	ldd		[%i3 + 96], %f38
594	ldd		[%i3 + 104], %f36
595	ldd		[%i3 + 112], %f34
596	ldd		[%i3 + 120], %f32
597	ldd		[%i3 + 128], %f30
598	ldd		[%i3 + 136], %f28
599	ldd		[%i3 + 144], %f26
600	ldd		[%i3 + 152], %f24
601	ldd		[%i3 + 160], %f22
602	ldd		[%i3 + 168], %f20
603	ldd		[%i3 + 176], %f18
604	ldd		[%i3 + 184], %f16
605	ldx		[%i3 + 192], %g4
606	retl
607	ldx		[%i3 + 200], %g5
608.type	_cmll256_load_deckey,#function
609.size	_cmll256_load_deckey,.-_cmll256_load_deckey
610
611.align	32
612_cmll128_encrypt_1x:
613	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
614	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
615	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
616	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
617	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
618	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
619	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
620	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
621	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
622	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
623	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
624	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
625	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
626	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
627	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
628	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
629	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
630	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
631	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
632	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
633	.word	0x88ce4182 !camellia_f	%f56,%f2,%f0,%f4
634	.word	0x84cec980 !camellia_f	%f58,%f0,%f4,%f2
635	.word	0x81b74d84 !fxor	%f60,%f4,%f0
636	retl
637	.word	0x85b7cd82 !fxor	%f62,%f2,%f2
638.type	_cmll128_encrypt_1x,#function
639.size	_cmll128_encrypt_1x,.-_cmll128_encrypt_1x
640_cmll128_decrypt_1x=_cmll128_encrypt_1x
641
642.align	32
643_cmll128_encrypt_2x:
644	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
645	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
646	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
647	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
648	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
649	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
650	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
651	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
652	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
653	.word	0x8cce0986 !camellia_f	%f24,%f6,%f4,%f6
654	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
655	.word	0x88ce8d84 !camellia_f	%f26,%f4,%f6,%f4
656	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
657	.word	0x89b72784 !camellia_fl	%f28,%f4,%f4
658	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
659	.word	0x8db7a7a6 !camellia_fli	%f30,%f6,%f6
660	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
661	.word	0x8cc84986 !camellia_f	%f32,%f6,%f4,%f6
662	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
663	.word	0x88c8cd84 !camellia_f	%f34,%f4,%f6,%f4
664	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
665	.word	0x8cc94986 !camellia_f	%f36,%f6,%f4,%f6
666	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
667	.word	0x88c9cd84 !camellia_f	%f38,%f4,%f6,%f4
668	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
669	.word	0x8cca4986 !camellia_f	%f40,%f6,%f4,%f6
670	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
671	.word	0x88cacd84 !camellia_f	%f42,%f4,%f6,%f4
672	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
673	.word	0x89b36784 !camellia_fl	%f44,%f4,%f4
674	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
675	.word	0x8db3e7a6 !camellia_fli	%f46,%f6,%f6
676	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
677	.word	0x8ccc4986 !camellia_f	%f48,%f6,%f4,%f6
678	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
679	.word	0x88cccd84 !camellia_f	%f50,%f4,%f6,%f4
680	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
681	.word	0x8ccd4986 !camellia_f	%f52,%f6,%f4,%f6
682	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
683	.word	0x88cdcd84 !camellia_f	%f54,%f4,%f6,%f4
684	.word	0x90ce4182 !camellia_f	%f56,%f2,%f0,%f8
685	.word	0x94ce4986 !camellia_f	%f56,%f6,%f4,%f10
686	.word	0x84ced180 !camellia_f	%f58,%f0,%f8,%f2
687	.word	0x8cced584 !camellia_f	%f58,%f4,%f10,%f6
688	.word	0x81b74d88 !fxor	%f60,%f8,%f0
689	.word	0x89b74d8a !fxor	%f60,%f10,%f4
690	.word	0x85b7cd82 !fxor	%f62,%f2,%f2
691	retl
692	.word	0x8db7cd86 !fxor	%f62,%f6,%f6
693.type	_cmll128_encrypt_2x,#function
694.size	_cmll128_encrypt_2x,.-_cmll128_encrypt_2x
695_cmll128_decrypt_2x=_cmll128_encrypt_2x
696
697.align	32
698_cmll256_encrypt_1x:
699	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
700	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
701	ldd		[%i3 + 208], %f16
702	ldd		[%i3 + 216], %f18
703	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
704	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
705	ldd		[%i3 + 224], %f20
706	ldd		[%i3 + 232], %f22
707	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
708	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
709	ldd		[%i3 + 240], %f24
710	ldd		[%i3 + 248], %f26
711	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
712	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
713	ldd		[%i3 + 256], %f28
714	ldd		[%i3 + 264], %f30
715	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
716	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
717	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
718	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
719	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
720	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
721	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
722	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
723	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
724	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
725	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
726	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
727	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
728	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
729	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
730	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
731	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
732	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
733	ldd		[%i3 + 16], %f16
734	ldd		[%i3 + 24], %f18
735	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
736	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
737	ldd		[%i3 + 32], %f20
738	ldd		[%i3 + 40], %f22
739	.word	0x88ce0182 !camellia_f	%f24,%f2,%f0,%f4
740	.word	0x84ce8980 !camellia_f	%f26,%f0,%f4,%f2
741	ldd		[%i3 + 48], %f24
742	ldd		[%i3 + 56], %f26
743	.word	0x81b70d84 !fxor	%f28,%f4,%f0
744	.word	0x85b78d82 !fxor	%f30,%f2,%f2
745	ldd		[%i3 + 64], %f28
746	retl
747	ldd		[%i3 + 72], %f30
748.type	_cmll256_encrypt_1x,#function
749.size	_cmll256_encrypt_1x,.-_cmll256_encrypt_1x
750
751.align	32
752_cmll256_encrypt_2x:
753	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
754	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
755	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
756	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
757	ldd		[%i3 + 208], %f16
758	ldd		[%i3 + 216], %f18
759	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
760	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
761	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
762	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
763	ldd		[%i3 + 224], %f20
764	ldd		[%i3 + 232], %f22
765	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
766	.word	0x8cce0986 !camellia_f	%f24,%f6,%f4,%f6
767	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
768	.word	0x88ce8d84 !camellia_f	%f26,%f4,%f6,%f4
769	ldd		[%i3 + 240], %f24
770	ldd		[%i3 + 248], %f26
771	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
772	.word	0x89b72784 !camellia_fl	%f28,%f4,%f4
773	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
774	.word	0x8db7a7a6 !camellia_fli	%f30,%f6,%f6
775	ldd		[%i3 + 256], %f28
776	ldd		[%i3 + 264], %f30
777	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
778	.word	0x8cc84986 !camellia_f	%f32,%f6,%f4,%f6
779	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
780	.word	0x88c8cd84 !camellia_f	%f34,%f4,%f6,%f4
781	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
782	.word	0x8cc94986 !camellia_f	%f36,%f6,%f4,%f6
783	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
784	.word	0x88c9cd84 !camellia_f	%f38,%f4,%f6,%f4
785	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
786	.word	0x8cca4986 !camellia_f	%f40,%f6,%f4,%f6
787	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
788	.word	0x88cacd84 !camellia_f	%f42,%f4,%f6,%f4
789	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
790	.word	0x89b36784 !camellia_fl	%f44,%f4,%f4
791	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
792	.word	0x8db3e7a6 !camellia_fli	%f46,%f6,%f6
793	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
794	.word	0x8ccc4986 !camellia_f	%f48,%f6,%f4,%f6
795	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
796	.word	0x88cccd84 !camellia_f	%f50,%f4,%f6,%f4
797	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
798	.word	0x8ccd4986 !camellia_f	%f52,%f6,%f4,%f6
799	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
800	.word	0x88cdcd84 !camellia_f	%f54,%f4,%f6,%f4
801	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
802	.word	0x8cce4986 !camellia_f	%f56,%f6,%f4,%f6
803	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
804	.word	0x88cecd84 !camellia_f	%f58,%f4,%f6,%f4
805	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
806	.word	0x89b76784 !camellia_fl	%f60,%f4,%f4
807	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
808	.word	0x8db7e7a6 !camellia_fli	%f62,%f6,%f6
809	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
810	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
811	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
812	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
813	ldd		[%i3 + 16], %f16
814	ldd		[%i3 + 24], %f18
815	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
816	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
817	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
818	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
819	ldd		[%i3 + 32], %f20
820	ldd		[%i3 + 40], %f22
821	.word	0x90ce0182 !camellia_f	%f24,%f2,%f0,%f8
822	.word	0x94ce0986 !camellia_f	%f24,%f6,%f4,%f10
823	.word	0x84ce9180 !camellia_f	%f26,%f0,%f8,%f2
824	.word	0x8cce9584 !camellia_f	%f26,%f4,%f10,%f6
825	ldd		[%i3 + 48], %f24
826	ldd		[%i3 + 56], %f26
827	.word	0x81b70d88 !fxor	%f28,%f8,%f0
828	.word	0x89b70d8a !fxor	%f28,%f10,%f4
829	.word	0x85b78d82 !fxor	%f30,%f2,%f2
830	.word	0x8db78d86 !fxor	%f30,%f6,%f6
831	ldd		[%i3 + 64], %f28
832	retl
833	ldd		[%i3 + 72], %f30
834.type	_cmll256_encrypt_2x,#function
835.size	_cmll256_encrypt_2x,.-_cmll256_encrypt_2x
836
837.align	32
838_cmll256_decrypt_1x:
839	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
840	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
841	ldd		[%i3 - 8], %f16
842	ldd		[%i3 - 16], %f18
843	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
844	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
845	ldd		[%i3 - 24], %f20
846	ldd		[%i3 - 32], %f22
847	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
848	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
849	ldd		[%i3 - 40], %f24
850	ldd		[%i3 - 48], %f26
851	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
852	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
853	ldd		[%i3 - 56], %f28
854	ldd		[%i3 - 64], %f30
855	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
856	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
857	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
858	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
859	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
860	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
861	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
862	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
863	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
864	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
865	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
866	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
867	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
868	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
869	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
870	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
871	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
872	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
873	ldd		[%i3 + 184], %f16
874	ldd		[%i3 + 176], %f18
875	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
876	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
877	ldd		[%i3 + 168], %f20
878	ldd		[%i3 + 160], %f22
879	.word	0x88ce0182 !camellia_f	%f24,%f2,%f0,%f4
880	.word	0x84ce8980 !camellia_f	%f26,%f0,%f4,%f2
881	ldd		[%i3 + 152], %f24
882	ldd		[%i3 + 144], %f26
883	.word	0x81b78d84 !fxor	%f30,%f4,%f0
884	.word	0x85b70d82 !fxor	%f28,%f2,%f2
885	ldd		[%i3 + 136], %f28
886	retl
887	ldd		[%i3 + 128], %f30
888.type	_cmll256_decrypt_1x,#function
889.size	_cmll256_decrypt_1x,.-_cmll256_decrypt_1x
890
891.align	32
892_cmll256_decrypt_2x:
893	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
894	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
895	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
896	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
897	ldd		[%i3 - 8], %f16
898	ldd		[%i3 - 16], %f18
899	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
900	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
901	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
902	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
903	ldd		[%i3 - 24], %f20
904	ldd		[%i3 - 32], %f22
905	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
906	.word	0x8cce0986 !camellia_f	%f24,%f6,%f4,%f6
907	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
908	.word	0x88ce8d84 !camellia_f	%f26,%f4,%f6,%f4
909	ldd		[%i3 - 40], %f24
910	ldd		[%i3 - 48], %f26
911	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
912	.word	0x89b72784 !camellia_fl	%f28,%f4,%f4
913	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
914	.word	0x8db7a7a6 !camellia_fli	%f30,%f6,%f6
915	ldd		[%i3 - 56], %f28
916	ldd		[%i3 - 64], %f30
917	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
918	.word	0x8cc84986 !camellia_f	%f32,%f6,%f4,%f6
919	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
920	.word	0x88c8cd84 !camellia_f	%f34,%f4,%f6,%f4
921	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
922	.word	0x8cc94986 !camellia_f	%f36,%f6,%f4,%f6
923	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
924	.word	0x88c9cd84 !camellia_f	%f38,%f4,%f6,%f4
925	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
926	.word	0x8cca4986 !camellia_f	%f40,%f6,%f4,%f6
927	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
928	.word	0x88cacd84 !camellia_f	%f42,%f4,%f6,%f4
929	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
930	.word	0x89b36784 !camellia_fl	%f44,%f4,%f4
931	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
932	.word	0x8db3e7a6 !camellia_fli	%f46,%f6,%f6
933	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
934	.word	0x8ccc4986 !camellia_f	%f48,%f6,%f4,%f6
935	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
936	.word	0x88cccd84 !camellia_f	%f50,%f4,%f6,%f4
937	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
938	.word	0x8ccd4986 !camellia_f	%f52,%f6,%f4,%f6
939	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
940	.word	0x88cdcd84 !camellia_f	%f54,%f4,%f6,%f4
941	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
942	.word	0x8cce4986 !camellia_f	%f56,%f6,%f4,%f6
943	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
944	.word	0x88cecd84 !camellia_f	%f58,%f4,%f6,%f4
945	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
946	.word	0x89b76784 !camellia_fl	%f60,%f4,%f4
947	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
948	.word	0x8db7e7a6 !camellia_fli	%f62,%f6,%f6
949	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
950	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
951	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
952	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
953	ldd		[%i3 + 184], %f16
954	ldd		[%i3 + 176], %f18
955	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
956	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
957	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
958	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
959	ldd		[%i3 + 168], %f20
960	ldd		[%i3 + 160], %f22
961	.word	0x90ce0182 !camellia_f	%f24,%f2,%f0,%f8
962	.word	0x94ce0986 !camellia_f	%f24,%f6,%f4,%f10
963	.word	0x84ce9180 !camellia_f	%f26,%f0,%f8,%f2
964	.word	0x8cce9584 !camellia_f	%f26,%f4,%f10,%f6
965	ldd		[%i3 + 152], %f24
966	ldd		[%i3 + 144], %f26
967	.word	0x81b78d88 !fxor	%f30,%f8,%f0
968	.word	0x89b78d8a !fxor	%f30,%f10,%f4
969	.word	0x85b70d82 !fxor	%f28,%f2,%f2
970	.word	0x8db70d86 !fxor	%f28,%f6,%f6
971	ldd		[%i3 + 136], %f28
972	retl
973	ldd		[%i3 + 128], %f30
974.type	_cmll256_decrypt_2x,#function
975.size	_cmll256_decrypt_2x,.-_cmll256_decrypt_2x
976.globl	cmll128_t4_cbc_encrypt
977.align	32
978cmll128_t4_cbc_encrypt:
979	save		%sp, -STACK_FRAME, %sp
980	cmp		%i2, 0
981	be,pn		SIZE_T_CC, .L128_cbc_enc_abort
982	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
983	sub		%i0, %i1, %l5	! %i0!=%i1
984	ld		[%i4 + 0], %f0
985	ld		[%i4 + 4], %f1
986	ld		[%i4 + 8], %f2
987	ld		[%i4 + 12], %f3
988	prefetch	[%i0], 20
989	prefetch	[%i0 + 63], 20
990	call		_cmll128_load_enckey
991	and		%i0, 7, %l0
992	andn		%i0, 7, %i0
993	sll		%l0, 3, %l0
994	mov		64, %l1
995	mov		0xff, %l3
996	sub		%l1, %l0, %l1
997	and		%i1, 7, %l2
998	cmp		%i2, 127
999	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1000	movleu		SIZE_T_CC, 0, %l5	!	%i2<128 ||
1001	brnz,pn		%l5, .L128cbc_enc_blk	!	%i0==%i1)
1002	srl		%l3, %l2, %l3
1003
1004	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1005	srlx		%i2, 4, %i2
1006	prefetch	[%i1], 22
1007
1008.L128_cbc_enc_loop:
1009	ldx		[%i0 + 0], %o0
1010	brz,pt		%l0, 4f
1011	ldx		[%i0 + 8], %o1
1012
1013	ldx		[%i0 + 16], %o2
1014	sllx		%o0, %l0, %o0
1015	srlx		%o1, %l1, %g1
1016	sllx		%o1, %l0, %o1
1017	or		%g1, %o0, %o0
1018	srlx		%o2, %l1, %o2
1019	or		%o2, %o1, %o1
10204:
1021	xor		%g4, %o0, %o0		! ^= rk[0]
1022	xor		%g5, %o1, %o1
1023	.word	0x99b02308 !movxtod	%o0,%f12
1024	.word	0x9db02309 !movxtod	%o1,%f14
1025
1026	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1027	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1028	prefetch	[%i1 + 63], 22
1029	prefetch	[%i0 + 16+63], 20
1030	call		_cmll128_encrypt_1x
1031	add		%i0, 16, %i0
1032
1033	brnz,pn		%l2, 2f
1034	sub		%i2, 1, %i2
1035
1036	std		%f0, [%i1 + 0]
1037	std		%f2, [%i1 + 8]
1038	brnz,pt		%i2, .L128_cbc_enc_loop
1039	add		%i1, 16, %i1
1040	st		%f0, [%i4 + 0]
1041	st		%f1, [%i4 + 4]
1042	st		%f2, [%i4 + 8]
1043	st		%f3, [%i4 + 12]
1044.L128_cbc_enc_abort:
1045	ret
1046	restore
1047
1048.align	16
10492:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1050						! and ~3x deterioration
1051						! in inp==out case
1052	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1053	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1054	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1055
1056	stda		%f4, [%i1 + %l3]0xc0	! partial store
1057	std		%f6, [%i1 + 8]
1058	add		%i1, 16, %i1
1059	orn		%g0, %l3, %l3
1060	stda		%f8, [%i1 + %l3]0xc0	! partial store
1061
1062	brnz,pt		%i2, .L128_cbc_enc_loop+4
1063	orn		%g0, %l3, %l3
1064	st		%f0, [%i4 + 0]
1065	st		%f1, [%i4 + 4]
1066	st		%f2, [%i4 + 8]
1067	st		%f3, [%i4 + 12]
1068	ret
1069	restore
1070
1071!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1072.align	32
1073.L128cbc_enc_blk:
1074	add	%i1, %i2, %l5
1075	and	%l5, 63, %l5	! tail
1076	sub	%i2, %l5, %i2
1077	add	%l5, 15, %l5	! round up to 16n
1078	srlx	%i2, 4, %i2
1079	srl	%l5, 4, %l5
1080
1081.L128_cbc_enc_blk_loop:
1082	ldx		[%i0 + 0], %o0
1083	brz,pt		%l0, 5f
1084	ldx		[%i0 + 8], %o1
1085
1086	ldx		[%i0 + 16], %o2
1087	sllx		%o0, %l0, %o0
1088	srlx		%o1, %l1, %g1
1089	sllx		%o1, %l0, %o1
1090	or		%g1, %o0, %o0
1091	srlx		%o2, %l1, %o2
1092	or		%o2, %o1, %o1
10935:
1094	xor		%g4, %o0, %o0		! ^= rk[0]
1095	xor		%g5, %o1, %o1
1096	.word	0x99b02308 !movxtod	%o0,%f12
1097	.word	0x9db02309 !movxtod	%o1,%f14
1098
1099	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1100	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1101	prefetch	[%i0 + 16+63], 20
1102	call		_cmll128_encrypt_1x
1103	add		%i0, 16, %i0
1104	sub		%i2, 1, %i2
1105
1106	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1107	add		%i1, 8, %i1
1108	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1109	brnz,pt		%i2, .L128_cbc_enc_blk_loop
1110	add		%i1, 8, %i1
1111
1112	membar		#StoreLoad|#StoreStore
1113	brnz,pt		%l5, .L128_cbc_enc_loop
1114	mov		%l5, %i2
1115	st		%f0, [%i4 + 0]
1116	st		%f1, [%i4 + 4]
1117	st		%f2, [%i4 + 8]
1118	st		%f3, [%i4 + 12]
1119	ret
1120	restore
1121.type	cmll128_t4_cbc_encrypt,#function
1122.size	cmll128_t4_cbc_encrypt,.-cmll128_t4_cbc_encrypt
1123.globl	cmll256_t4_cbc_encrypt
1124.align	32
1125cmll256_t4_cbc_encrypt:
1126	save		%sp, -STACK_FRAME, %sp
1127	cmp		%i2, 0
1128	be,pn		SIZE_T_CC, .L256_cbc_enc_abort
1129	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1130	sub		%i0, %i1, %l5	! %i0!=%i1
1131	ld		[%i4 + 0], %f0
1132	ld		[%i4 + 4], %f1
1133	ld		[%i4 + 8], %f2
1134	ld		[%i4 + 12], %f3
1135	prefetch	[%i0], 20
1136	prefetch	[%i0 + 63], 20
1137	call		_cmll256_load_enckey
1138	and		%i0, 7, %l0
1139	andn		%i0, 7, %i0
1140	sll		%l0, 3, %l0
1141	mov		64, %l1
1142	mov		0xff, %l3
1143	sub		%l1, %l0, %l1
1144	and		%i1, 7, %l2
1145	cmp		%i2, 127
1146	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1147	movleu		SIZE_T_CC, 0, %l5	!	%i2<128 ||
1148	brnz,pn		%l5, .L256cbc_enc_blk	!	%i0==%i1)
1149	srl		%l3, %l2, %l3
1150
1151	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1152	srlx		%i2, 4, %i2
1153	prefetch	[%i1], 22
1154
1155.L256_cbc_enc_loop:
1156	ldx		[%i0 + 0], %o0
1157	brz,pt		%l0, 4f
1158	ldx		[%i0 + 8], %o1
1159
1160	ldx		[%i0 + 16], %o2
1161	sllx		%o0, %l0, %o0
1162	srlx		%o1, %l1, %g1
1163	sllx		%o1, %l0, %o1
1164	or		%g1, %o0, %o0
1165	srlx		%o2, %l1, %o2
1166	or		%o2, %o1, %o1
11674:
1168	xor		%g4, %o0, %o0		! ^= rk[0]
1169	xor		%g5, %o1, %o1
1170	.word	0x99b02308 !movxtod	%o0,%f12
1171	.word	0x9db02309 !movxtod	%o1,%f14
1172
1173	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1174	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1175	prefetch	[%i1 + 63], 22
1176	prefetch	[%i0 + 16+63], 20
1177	call		_cmll256_encrypt_1x
1178	add		%i0, 16, %i0
1179
1180	brnz,pn		%l2, 2f
1181	sub		%i2, 1, %i2
1182
1183	std		%f0, [%i1 + 0]
1184	std		%f2, [%i1 + 8]
1185	brnz,pt		%i2, .L256_cbc_enc_loop
1186	add		%i1, 16, %i1
1187	st		%f0, [%i4 + 0]
1188	st		%f1, [%i4 + 4]
1189	st		%f2, [%i4 + 8]
1190	st		%f3, [%i4 + 12]
1191.L256_cbc_enc_abort:
1192	ret
1193	restore
1194
1195.align	16
11962:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1197						! and ~3x deterioration
1198						! in inp==out case
1199	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1200	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1201	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1202
1203	stda		%f4, [%i1 + %l3]0xc0	! partial store
1204	std		%f6, [%i1 + 8]
1205	add		%i1, 16, %i1
1206	orn		%g0, %l3, %l3
1207	stda		%f8, [%i1 + %l3]0xc0	! partial store
1208
1209	brnz,pt		%i2, .L256_cbc_enc_loop+4
1210	orn		%g0, %l3, %l3
1211	st		%f0, [%i4 + 0]
1212	st		%f1, [%i4 + 4]
1213	st		%f2, [%i4 + 8]
1214	st		%f3, [%i4 + 12]
1215	ret
1216	restore
1217
1218!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1219.align	32
1220.L256cbc_enc_blk:
1221	add	%i1, %i2, %l5
1222	and	%l5, 63, %l5	! tail
1223	sub	%i2, %l5, %i2
1224	add	%l5, 15, %l5	! round up to 16n
1225	srlx	%i2, 4, %i2
1226	srl	%l5, 4, %l5
1227
1228.L256_cbc_enc_blk_loop:
1229	ldx		[%i0 + 0], %o0
1230	brz,pt		%l0, 5f
1231	ldx		[%i0 + 8], %o1
1232
1233	ldx		[%i0 + 16], %o2
1234	sllx		%o0, %l0, %o0
1235	srlx		%o1, %l1, %g1
1236	sllx		%o1, %l0, %o1
1237	or		%g1, %o0, %o0
1238	srlx		%o2, %l1, %o2
1239	or		%o2, %o1, %o1
12405:
1241	xor		%g4, %o0, %o0		! ^= rk[0]
1242	xor		%g5, %o1, %o1
1243	.word	0x99b02308 !movxtod	%o0,%f12
1244	.word	0x9db02309 !movxtod	%o1,%f14
1245
1246	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1247	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1248	prefetch	[%i0 + 16+63], 20
1249	call		_cmll256_encrypt_1x
1250	add		%i0, 16, %i0
1251	sub		%i2, 1, %i2
1252
1253	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1254	add		%i1, 8, %i1
1255	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1256	brnz,pt		%i2, .L256_cbc_enc_blk_loop
1257	add		%i1, 8, %i1
1258
1259	membar		#StoreLoad|#StoreStore
1260	brnz,pt		%l5, .L256_cbc_enc_loop
1261	mov		%l5, %i2
1262	st		%f0, [%i4 + 0]
1263	st		%f1, [%i4 + 4]
1264	st		%f2, [%i4 + 8]
1265	st		%f3, [%i4 + 12]
1266	ret
1267	restore
1268.type	cmll256_t4_cbc_encrypt,#function
1269.size	cmll256_t4_cbc_encrypt,.-cmll256_t4_cbc_encrypt
1270.globl	cmll128_t4_cbc_decrypt
1271.align	32
1272cmll128_t4_cbc_decrypt:
1273	save		%sp, -STACK_FRAME, %sp
1274	cmp		%i2, 0
1275	be,pn		SIZE_T_CC, .L128_cbc_dec_abort
1276	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1277	sub		%i0, %i1, %l5	! %i0!=%i1
1278	ld		[%i4 + 0], %f12	! load ivec
1279	ld		[%i4 + 4], %f13
1280	ld		[%i4 + 8], %f14
1281	ld		[%i4 + 12], %f15
1282	prefetch	[%i0], 20
1283	prefetch	[%i0 + 63], 20
1284	call		_cmll128_load_deckey
1285	and		%i0, 7, %l0
1286	andn		%i0, 7, %i0
1287	sll		%l0, 3, %l0
1288	mov		64, %l1
1289	mov		0xff, %l3
1290	sub		%l1, %l0, %l1
1291	and		%i1, 7, %l2
1292	cmp		%i2, 255
1293	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1294	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
1295	brnz,pn		%l5, .L128cbc_dec_blk	!	%i0==%i1)
1296	srl		%l3, %l2, %l3
1297
1298	andcc		%i2, 16, %g0		! is number of blocks even?
1299	srlx		%i2, 4, %i2
1300	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1301	bz		%icc, .L128_cbc_dec_loop2x
1302	prefetch	[%i1], 22
1303.L128_cbc_dec_loop:
1304	ldx		[%i0 + 0], %o0
1305	brz,pt		%l0, 4f
1306	ldx		[%i0 + 8], %o1
1307
1308	ldx		[%i0 + 16], %o2
1309	sllx		%o0, %l0, %o0
1310	srlx		%o1, %l1, %g1
1311	sllx		%o1, %l0, %o1
1312	or		%g1, %o0, %o0
1313	srlx		%o2, %l1, %o2
1314	or		%o2, %o1, %o1
13154:
1316	xor		%g4, %o0, %o2		! ^= rk[0]
1317	xor		%g5, %o1, %o3
1318	.word	0x81b0230a !movxtod	%o2,%f0
1319	.word	0x85b0230b !movxtod	%o3,%f2
1320
1321	prefetch	[%i1 + 63], 22
1322	prefetch	[%i0 + 16+63], 20
1323	call		_cmll128_decrypt_1x
1324	add		%i0, 16, %i0
1325
1326	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1327	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1328	.word	0x99b02308 !movxtod	%o0,%f12
1329	.word	0x9db02309 !movxtod	%o1,%f14
1330
1331	brnz,pn		%l2, 2f
1332	sub		%i2, 1, %i2
1333
1334	std		%f0, [%i1 + 0]
1335	std		%f2, [%i1 + 8]
1336	brnz,pt		%i2, .L128_cbc_dec_loop2x
1337	add		%i1, 16, %i1
1338	st		%f12, [%i4 + 0]
1339	st		%f13, [%i4 + 4]
1340	st		%f14, [%i4 + 8]
1341	st		%f15, [%i4 + 12]
1342.L128_cbc_dec_abort:
1343	ret
1344	restore
1345
1346.align	16
13472:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1348						! and ~3x deterioration
1349						! in inp==out case
1350	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1351	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1352	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1353
1354	stda		%f4, [%i1 + %l3]0xc0	! partial store
1355	std		%f6, [%i1 + 8]
1356	add		%i1, 16, %i1
1357	orn		%g0, %l3, %l3
1358	stda		%f8, [%i1 + %l3]0xc0	! partial store
1359
1360	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1361	orn		%g0, %l3, %l3
1362	st		%f12, [%i4 + 0]
1363	st		%f13, [%i4 + 4]
1364	st		%f14, [%i4 + 8]
1365	st		%f15, [%i4 + 12]
1366	ret
1367	restore
1368
1369!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1370.align	32
1371.L128_cbc_dec_loop2x:
1372	ldx		[%i0 + 0], %o0
1373	ldx		[%i0 + 8], %o1
1374	ldx		[%i0 + 16], %o2
1375	brz,pt		%l0, 4f
1376	ldx		[%i0 + 24], %o3
1377
1378	ldx		[%i0 + 32], %o4
1379	sllx		%o0, %l0, %o0
1380	srlx		%o1, %l1, %g1
1381	or		%g1, %o0, %o0
1382	sllx		%o1, %l0, %o1
1383	srlx		%o2, %l1, %g1
1384	or		%g1, %o1, %o1
1385	sllx		%o2, %l0, %o2
1386	srlx		%o3, %l1, %g1
1387	or		%g1, %o2, %o2
1388	sllx		%o3, %l0, %o3
1389	srlx		%o4, %l1, %o4
1390	or		%o4, %o3, %o3
13914:
1392	xor		%g4, %o0, %o4		! ^= rk[0]
1393	xor		%g5, %o1, %o5
1394	.word	0x81b0230c !movxtod	%o4,%f0
1395	.word	0x85b0230d !movxtod	%o5,%f2
1396	xor		%g4, %o2, %o4
1397	xor		%g5, %o3, %o5
1398	.word	0x89b0230c !movxtod	%o4,%f4
1399	.word	0x8db0230d !movxtod	%o5,%f6
1400
1401	prefetch	[%i1 + 63], 22
1402	prefetch	[%i0 + 32+63], 20
1403	call		_cmll128_decrypt_2x
1404	add		%i0, 32, %i0
1405
1406	.word	0x91b02308 !movxtod	%o0,%f8
1407	.word	0x95b02309 !movxtod	%o1,%f10
1408	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1409	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1410	.word	0x99b0230a !movxtod	%o2,%f12
1411	.word	0x9db0230b !movxtod	%o3,%f14
1412	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1413	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1414
1415	brnz,pn		%l2, 2f
1416	sub		%i2, 2, %i2
1417
1418	std		%f0, [%i1 + 0]
1419	std		%f2, [%i1 + 8]
1420	std		%f4, [%i1 + 16]
1421	std		%f6, [%i1 + 24]
1422	brnz,pt		%i2, .L128_cbc_dec_loop2x
1423	add		%i1, 32, %i1
1424	st		%f12, [%i4 + 0]
1425	st		%f13, [%i4 + 4]
1426	st		%f14, [%i4 + 8]
1427	st		%f15, [%i4 + 12]
1428	ret
1429	restore
1430
1431.align	16
14322:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1433						! and ~3x deterioration
1434						! in inp==out case
1435	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1436	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1437	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1438	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1439	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1440	stda		%f8, [%i1 + %l3]0xc0	! partial store
1441	std		%f0, [%i1 + 8]
1442	std		%f2, [%i1 + 16]
1443	std		%f4, [%i1 + 24]
1444	add		%i1, 32, %i1
1445	orn		%g0, %l3, %l3
1446	stda		%f6, [%i1 + %l3]0xc0	! partial store
1447
1448	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1449	orn		%g0, %l3, %l3
1450	st		%f12, [%i4 + 0]
1451	st		%f13, [%i4 + 4]
1452	st		%f14, [%i4 + 8]
1453	st		%f15, [%i4 + 12]
1454	ret
1455	restore
1456
1457!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1458.align	32
1459.L128cbc_dec_blk:
1460	add	%i1, %i2, %l5
1461	and	%l5, 63, %l5	! tail
1462	sub	%i2, %l5, %i2
1463	add	%l5, 15, %l5	! round up to 16n
1464	srlx	%i2, 4, %i2
1465	srl	%l5, 4, %l5
1466	sub	%i2, 1, %i2
1467	add	%l5, 1, %l5
1468
1469.L128_cbc_dec_blk_loop2x:
1470	ldx		[%i0 + 0], %o0
1471	ldx		[%i0 + 8], %o1
1472	ldx		[%i0 + 16], %o2
1473	brz,pt		%l0, 5f
1474	ldx		[%i0 + 24], %o3
1475
1476	ldx		[%i0 + 32], %o4
1477	sllx		%o0, %l0, %o0
1478	srlx		%o1, %l1, %g1
1479	or		%g1, %o0, %o0
1480	sllx		%o1, %l0, %o1
1481	srlx		%o2, %l1, %g1
1482	or		%g1, %o1, %o1
1483	sllx		%o2, %l0, %o2
1484	srlx		%o3, %l1, %g1
1485	or		%g1, %o2, %o2
1486	sllx		%o3, %l0, %o3
1487	srlx		%o4, %l1, %o4
1488	or		%o4, %o3, %o3
14895:
1490	xor		%g4, %o0, %o4		! ^= rk[0]
1491	xor		%g5, %o1, %o5
1492	.word	0x81b0230c !movxtod	%o4,%f0
1493	.word	0x85b0230d !movxtod	%o5,%f2
1494	xor		%g4, %o2, %o4
1495	xor		%g5, %o3, %o5
1496	.word	0x89b0230c !movxtod	%o4,%f4
1497	.word	0x8db0230d !movxtod	%o5,%f6
1498
1499	prefetch	[%i0 + 32+63], 20
1500	call		_cmll128_decrypt_2x
1501	add		%i0, 32, %i0
1502	subcc		%i2, 2, %i2
1503
1504	.word	0x91b02308 !movxtod	%o0,%f8
1505	.word	0x95b02309 !movxtod	%o1,%f10
1506	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1507	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1508	.word	0x99b0230a !movxtod	%o2,%f12
1509	.word	0x9db0230b !movxtod	%o3,%f14
1510	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1511	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1512
1513	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1514	add		%i1, 8, %i1
1515	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1516	add		%i1, 8, %i1
1517	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1518	add		%i1, 8, %i1
1519	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1520	bgu,pt		SIZE_T_CC, .L128_cbc_dec_blk_loop2x
1521	add		%i1, 8, %i1
1522
1523	add		%l5, %i2, %i2
1524	andcc		%i2, 1, %g0		! is number of blocks even?
1525	membar		#StoreLoad|#StoreStore
1526	bnz,pt		%icc, .L128_cbc_dec_loop
1527	srl		%i2, 0, %i2
1528	brnz,pn		%i2, .L128_cbc_dec_loop2x
1529	nop
1530	st		%f12, [%i4 + 0]	! write out ivec
1531	st		%f13, [%i4 + 4]
1532	st		%f14, [%i4 + 8]
1533	st		%f15, [%i4 + 12]
1534	ret
1535	restore
1536.type	cmll128_t4_cbc_decrypt,#function
1537.size	cmll128_t4_cbc_decrypt,.-cmll128_t4_cbc_decrypt
1538.globl	cmll256_t4_cbc_decrypt
1539.align	32
1540cmll256_t4_cbc_decrypt:
1541	save		%sp, -STACK_FRAME, %sp
1542	cmp		%i2, 0
1543	be,pn		SIZE_T_CC, .L256_cbc_dec_abort
1544	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1545	sub		%i0, %i1, %l5	! %i0!=%i1
1546	ld		[%i4 + 0], %f12	! load ivec
1547	ld		[%i4 + 4], %f13
1548	ld		[%i4 + 8], %f14
1549	ld		[%i4 + 12], %f15
1550	prefetch	[%i0], 20
1551	prefetch	[%i0 + 63], 20
1552	call		_cmll256_load_deckey
1553	and		%i0, 7, %l0
1554	andn		%i0, 7, %i0
1555	sll		%l0, 3, %l0
1556	mov		64, %l1
1557	mov		0xff, %l3
1558	sub		%l1, %l0, %l1
1559	and		%i1, 7, %l2
1560	cmp		%i2, 255
1561	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1562	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
1563	brnz,pn		%l5, .L256cbc_dec_blk	!	%i0==%i1)
1564	srl		%l3, %l2, %l3
1565
1566	andcc		%i2, 16, %g0		! is number of blocks even?
1567	srlx		%i2, 4, %i2
1568	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1569	bz		%icc, .L256_cbc_dec_loop2x
1570	prefetch	[%i1], 22
1571.L256_cbc_dec_loop:
1572	ldx		[%i0 + 0], %o0
1573	brz,pt		%l0, 4f
1574	ldx		[%i0 + 8], %o1
1575
1576	ldx		[%i0 + 16], %o2
1577	sllx		%o0, %l0, %o0
1578	srlx		%o1, %l1, %g1
1579	sllx		%o1, %l0, %o1
1580	or		%g1, %o0, %o0
1581	srlx		%o2, %l1, %o2
1582	or		%o2, %o1, %o1
15834:
1584	xor		%g4, %o0, %o2		! ^= rk[0]
1585	xor		%g5, %o1, %o3
1586	.word	0x81b0230a !movxtod	%o2,%f0
1587	.word	0x85b0230b !movxtod	%o3,%f2
1588
1589	prefetch	[%i1 + 63], 22
1590	prefetch	[%i0 + 16+63], 20
1591	call		_cmll256_decrypt_1x
1592	add		%i0, 16, %i0
1593
1594	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1595	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1596	.word	0x99b02308 !movxtod	%o0,%f12
1597	.word	0x9db02309 !movxtod	%o1,%f14
1598
1599	brnz,pn		%l2, 2f
1600	sub		%i2, 1, %i2
1601
1602	std		%f0, [%i1 + 0]
1603	std		%f2, [%i1 + 8]
1604	brnz,pt		%i2, .L256_cbc_dec_loop2x
1605	add		%i1, 16, %i1
1606	st		%f12, [%i4 + 0]
1607	st		%f13, [%i4 + 4]
1608	st		%f14, [%i4 + 8]
1609	st		%f15, [%i4 + 12]
1610.L256_cbc_dec_abort:
1611	ret
1612	restore
1613
1614.align	16
16152:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1616						! and ~3x deterioration
1617						! in inp==out case
1618	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1619	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1620	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1621
1622	stda		%f4, [%i1 + %l3]0xc0	! partial store
1623	std		%f6, [%i1 + 8]
1624	add		%i1, 16, %i1
1625	orn		%g0, %l3, %l3
1626	stda		%f8, [%i1 + %l3]0xc0	! partial store
1627
1628	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
1629	orn		%g0, %l3, %l3
1630	st		%f12, [%i4 + 0]
1631	st		%f13, [%i4 + 4]
1632	st		%f14, [%i4 + 8]
1633	st		%f15, [%i4 + 12]
1634	ret
1635	restore
1636
1637!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1638.align	32
1639.L256_cbc_dec_loop2x:
1640	ldx		[%i0 + 0], %o0
1641	ldx		[%i0 + 8], %o1
1642	ldx		[%i0 + 16], %o2
1643	brz,pt		%l0, 4f
1644	ldx		[%i0 + 24], %o3
1645
1646	ldx		[%i0 + 32], %o4
1647	sllx		%o0, %l0, %o0
1648	srlx		%o1, %l1, %g1
1649	or		%g1, %o0, %o0
1650	sllx		%o1, %l0, %o1
1651	srlx		%o2, %l1, %g1
1652	or		%g1, %o1, %o1
1653	sllx		%o2, %l0, %o2
1654	srlx		%o3, %l1, %g1
1655	or		%g1, %o2, %o2
1656	sllx		%o3, %l0, %o3
1657	srlx		%o4, %l1, %o4
1658	or		%o4, %o3, %o3
16594:
1660	xor		%g4, %o0, %o4		! ^= rk[0]
1661	xor		%g5, %o1, %o5
1662	.word	0x81b0230c !movxtod	%o4,%f0
1663	.word	0x85b0230d !movxtod	%o5,%f2
1664	xor		%g4, %o2, %o4
1665	xor		%g5, %o3, %o5
1666	.word	0x89b0230c !movxtod	%o4,%f4
1667	.word	0x8db0230d !movxtod	%o5,%f6
1668
1669	prefetch	[%i1 + 63], 22
1670	prefetch	[%i0 + 32+63], 20
1671	call		_cmll256_decrypt_2x
1672	add		%i0, 32, %i0
1673
1674	.word	0x91b02308 !movxtod	%o0,%f8
1675	.word	0x95b02309 !movxtod	%o1,%f10
1676	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1677	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1678	.word	0x99b0230a !movxtod	%o2,%f12
1679	.word	0x9db0230b !movxtod	%o3,%f14
1680	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1681	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1682
1683	brnz,pn		%l2, 2f
1684	sub		%i2, 2, %i2
1685
1686	std		%f0, [%i1 + 0]
1687	std		%f2, [%i1 + 8]
1688	std		%f4, [%i1 + 16]
1689	std		%f6, [%i1 + 24]
1690	brnz,pt		%i2, .L256_cbc_dec_loop2x
1691	add		%i1, 32, %i1
1692	st		%f12, [%i4 + 0]
1693	st		%f13, [%i4 + 4]
1694	st		%f14, [%i4 + 8]
1695	st		%f15, [%i4 + 12]
1696	ret
1697	restore
1698
1699.align	16
17002:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1701						! and ~3x deterioration
1702						! in inp==out case
1703	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1704	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1705	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1706	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1707	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1708	stda		%f8, [%i1 + %l3]0xc0	! partial store
1709	std		%f0, [%i1 + 8]
1710	std		%f2, [%i1 + 16]
1711	std		%f4, [%i1 + 24]
1712	add		%i1, 32, %i1
1713	orn		%g0, %l3, %l3
1714	stda		%f6, [%i1 + %l3]0xc0	! partial store
1715
1716	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
1717	orn		%g0, %l3, %l3
1718	st		%f12, [%i4 + 0]
1719	st		%f13, [%i4 + 4]
1720	st		%f14, [%i4 + 8]
1721	st		%f15, [%i4 + 12]
1722	ret
1723	restore
1724
1725!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1726.align	32
1727.L256cbc_dec_blk:
1728	add	%i1, %i2, %l5
1729	and	%l5, 63, %l5	! tail
1730	sub	%i2, %l5, %i2
1731	add	%l5, 15, %l5	! round up to 16n
1732	srlx	%i2, 4, %i2
1733	srl	%l5, 4, %l5
1734	sub	%i2, 1, %i2
1735	add	%l5, 1, %l5
1736
1737.L256_cbc_dec_blk_loop2x:
1738	ldx		[%i0 + 0], %o0
1739	ldx		[%i0 + 8], %o1
1740	ldx		[%i0 + 16], %o2
1741	brz,pt		%l0, 5f
1742	ldx		[%i0 + 24], %o3
1743
1744	ldx		[%i0 + 32], %o4
1745	sllx		%o0, %l0, %o0
1746	srlx		%o1, %l1, %g1
1747	or		%g1, %o0, %o0
1748	sllx		%o1, %l0, %o1
1749	srlx		%o2, %l1, %g1
1750	or		%g1, %o1, %o1
1751	sllx		%o2, %l0, %o2
1752	srlx		%o3, %l1, %g1
1753	or		%g1, %o2, %o2
1754	sllx		%o3, %l0, %o3
1755	srlx		%o4, %l1, %o4
1756	or		%o4, %o3, %o3
17575:
1758	xor		%g4, %o0, %o4		! ^= rk[0]
1759	xor		%g5, %o1, %o5
1760	.word	0x81b0230c !movxtod	%o4,%f0
1761	.word	0x85b0230d !movxtod	%o5,%f2
1762	xor		%g4, %o2, %o4
1763	xor		%g5, %o3, %o5
1764	.word	0x89b0230c !movxtod	%o4,%f4
1765	.word	0x8db0230d !movxtod	%o5,%f6
1766
1767	prefetch	[%i0 + 32+63], 20
1768	call		_cmll256_decrypt_2x
1769	add		%i0, 32, %i0
1770	subcc		%i2, 2, %i2
1771
1772	.word	0x91b02308 !movxtod	%o0,%f8
1773	.word	0x95b02309 !movxtod	%o1,%f10
1774	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1775	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1776	.word	0x99b0230a !movxtod	%o2,%f12
1777	.word	0x9db0230b !movxtod	%o3,%f14
1778	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1779	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1780
1781	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1782	add		%i1, 8, %i1
1783	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1784	add		%i1, 8, %i1
1785	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1786	add		%i1, 8, %i1
1787	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1788	bgu,pt		SIZE_T_CC, .L256_cbc_dec_blk_loop2x
1789	add		%i1, 8, %i1
1790
1791	add		%l5, %i2, %i2
1792	andcc		%i2, 1, %g0		! is number of blocks even?
1793	membar		#StoreLoad|#StoreStore
1794	bnz,pt		%icc, .L256_cbc_dec_loop
1795	srl		%i2, 0, %i2
1796	brnz,pn		%i2, .L256_cbc_dec_loop2x
1797	nop
1798	st		%f12, [%i4 + 0]	! write out ivec
1799	st		%f13, [%i4 + 4]
1800	st		%f14, [%i4 + 8]
1801	st		%f15, [%i4 + 12]
1802	ret
1803	restore
1804.type	cmll256_t4_cbc_decrypt,#function
1805.size	cmll256_t4_cbc_decrypt,.-cmll256_t4_cbc_decrypt
1806.globl	cmll128_t4_ctr32_encrypt
1807.align	32
1808cmll128_t4_ctr32_encrypt:
1809	save		%sp, -STACK_FRAME, %sp
1810	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1811
1812	prefetch	[%i0], 20
1813	prefetch	[%i0 + 63], 20
1814	call		_cmll128_load_enckey
1815	sllx		%i2, 4, %i2
1816
1817	ld		[%i4 + 0], %l4	! counter
1818	ld		[%i4 + 4], %l5
1819	ld		[%i4 + 8], %l6
1820	ld		[%i4 + 12], %l7
1821
1822	sllx		%l4, 32, %o5
1823	or		%l5, %o5, %o5
1824	sllx		%l6, 32, %g1
1825	xor		%o5, %g4, %g4		! ^= rk[0]
1826	xor		%g1, %g5, %g5
1827	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
1828
1829	sub		%i0, %i1, %l5	! %i0!=%i1
1830	and		%i0, 7, %l0
1831	andn		%i0, 7, %i0
1832	sll		%l0, 3, %l0
1833	mov		64, %l1
1834	mov		0xff, %l3
1835	sub		%l1, %l0, %l1
1836	and		%i1, 7, %l2
1837	cmp		%i2, 255
1838	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1839	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
1840	brnz,pn		%l5, .L128_ctr32_blk	!	%i0==%i1)
1841	srl		%l3, %l2, %l3
1842
1843	andcc		%i2, 16, %g0		! is number of blocks even?
1844	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1845	bz		%icc, .L128_ctr32_loop2x
1846	srlx		%i2, 4, %i2
1847.L128_ctr32_loop:
1848	ldx		[%i0 + 0], %o0
1849	brz,pt		%l0, 4f
1850	ldx		[%i0 + 8], %o1
1851
1852	ldx		[%i0 + 16], %o2
1853	sllx		%o0, %l0, %o0
1854	srlx		%o1, %l1, %g1
1855	sllx		%o1, %l0, %o1
1856	or		%g1, %o0, %o0
1857	srlx		%o2, %l1, %o2
1858	or		%o2, %o1, %o1
18594:
1860	xor		%g5, %l7, %g1		! ^= rk[0]
1861	add		%l7, 1, %l7
1862	.word	0x85b02301 !movxtod	%g1,%f2
1863	srl		%l7, 0, %l7		! clruw
1864	prefetch	[%i1 + 63], 22
1865	prefetch	[%i0 + 16+63], 20
1866	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
1867	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
1868	call		_cmll128_encrypt_1x+8
1869	add		%i0, 16, %i0
1870
1871	.word	0x95b02308 !movxtod	%o0,%f10
1872	.word	0x99b02309 !movxtod	%o1,%f12
1873	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
1874	.word	0x85b30d82 !fxor	%f12,%f2,%f2
1875
1876	brnz,pn		%l2, 2f
1877	sub		%i2, 1, %i2
1878
1879	std		%f0, [%i1 + 0]
1880	std		%f2, [%i1 + 8]
1881	brnz,pt		%i2, .L128_ctr32_loop2x
1882	add		%i1, 16, %i1
1883
1884	ret
1885	restore
1886
1887.align	16
18882:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1889						! and ~3x deterioration
1890						! in inp==out case
1891	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1892	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1893	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1894	stda		%f4, [%i1 + %l3]0xc0	! partial store
1895	std		%f6, [%i1 + 8]
1896	add		%i1, 16, %i1
1897	orn		%g0, %l3, %l3
1898	stda		%f8, [%i1 + %l3]0xc0	! partial store
1899
1900	brnz,pt		%i2, .L128_ctr32_loop2x+4
1901	orn		%g0, %l3, %l3
1902
1903	ret
1904	restore
1905
1906!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1907.align	32
1908.L128_ctr32_loop2x:
1909	ldx		[%i0 + 0], %o0
1910	ldx		[%i0 + 8], %o1
1911	ldx		[%i0 + 16], %o2
1912	brz,pt		%l0, 4f
1913	ldx		[%i0 + 24], %o3
1914
1915	ldx		[%i0 + 32], %o4
1916	sllx		%o0, %l0, %o0
1917	srlx		%o1, %l1, %g1
1918	or		%g1, %o0, %o0
1919	sllx		%o1, %l0, %o1
1920	srlx		%o2, %l1, %g1
1921	or		%g1, %o1, %o1
1922	sllx		%o2, %l0, %o2
1923	srlx		%o3, %l1, %g1
1924	or		%g1, %o2, %o2
1925	sllx		%o3, %l0, %o3
1926	srlx		%o4, %l1, %o4
1927	or		%o4, %o3, %o3
19284:
1929	xor		%g5, %l7, %g1		! ^= rk[0]
1930	add		%l7, 1, %l7
1931	.word	0x85b02301 !movxtod	%g1,%f2
1932	srl		%l7, 0, %l7		! clruw
1933	xor		%g5, %l7, %g1
1934	add		%l7, 1, %l7
1935	.word	0x8db02301 !movxtod	%g1,%f6
1936	srl		%l7, 0, %l7		! clruw
1937	prefetch	[%i1 + 63], 22
1938	prefetch	[%i0 + 32+63], 20
1939	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
1940	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
1941	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
1942	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
1943	call		_cmll128_encrypt_2x+16
1944	add		%i0, 32, %i0
1945
1946	.word	0x91b02308 !movxtod	%o0,%f8
1947	.word	0x95b02309 !movxtod	%o1,%f10
1948	.word	0x99b0230a !movxtod	%o2,%f12
1949	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
1950	.word	0x91b0230b !movxtod	%o3,%f8
1951	.word	0x85b28d82 !fxor	%f10,%f2,%f2
1952	.word	0x89b30d84 !fxor	%f12,%f4,%f4
1953	.word	0x8db20d86 !fxor	%f8,%f6,%f6
1954
1955	brnz,pn		%l2, 2f
1956	sub		%i2, 2, %i2
1957
1958	std		%f0, [%i1 + 0]
1959	std		%f2, [%i1 + 8]
1960	std		%f4, [%i1 + 16]
1961	std		%f6, [%i1 + 24]
1962	brnz,pt		%i2, .L128_ctr32_loop2x
1963	add		%i1, 32, %i1
1964
1965	ret
1966	restore
1967
1968.align	16
19692:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1970						! and ~3x deterioration
1971						! in inp==out case
1972	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1973	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1974	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1975	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1976	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1977
1978	stda		%f8, [%i1 + %l3]0xc0	! partial store
1979	std		%f0, [%i1 + 8]
1980	std		%f2, [%i1 + 16]
1981	std		%f4, [%i1 + 24]
1982	add		%i1, 32, %i1
1983	orn		%g0, %l3, %l3
1984	stda		%f6, [%i1 + %l3]0xc0	! partial store
1985
1986	brnz,pt		%i2, .L128_ctr32_loop2x+4
1987	orn		%g0, %l3, %l3
1988
1989	ret
1990	restore
1991
1992!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1993.align	32
1994.L128_ctr32_blk:
1995	add	%i1, %i2, %l5
1996	and	%l5, 63, %l5	! tail
1997	sub	%i2, %l5, %i2
1998	add	%l5, 15, %l5	! round up to 16n
1999	srlx	%i2, 4, %i2
2000	srl	%l5, 4, %l5
2001	sub	%i2, 1, %i2
2002	add	%l5, 1, %l5
2003
2004.L128_ctr32_blk_loop2x:
2005	ldx		[%i0 + 0], %o0
2006	ldx		[%i0 + 8], %o1
2007	ldx		[%i0 + 16], %o2
2008	brz,pt		%l0, 5f
2009	ldx		[%i0 + 24], %o3
2010
2011	ldx		[%i0 + 32], %o4
2012	sllx		%o0, %l0, %o0
2013	srlx		%o1, %l1, %g1
2014	or		%g1, %o0, %o0
2015	sllx		%o1, %l0, %o1
2016	srlx		%o2, %l1, %g1
2017	or		%g1, %o1, %o1
2018	sllx		%o2, %l0, %o2
2019	srlx		%o3, %l1, %g1
2020	or		%g1, %o2, %o2
2021	sllx		%o3, %l0, %o3
2022	srlx		%o4, %l1, %o4
2023	or		%o4, %o3, %o3
20245:
2025	xor		%g5, %l7, %g1		! ^= rk[0]
2026	add		%l7, 1, %l7
2027	.word	0x85b02301 !movxtod	%g1,%f2
2028	srl		%l7, 0, %l7		! clruw
2029	xor		%g5, %l7, %g1
2030	add		%l7, 1, %l7
2031	.word	0x8db02301 !movxtod	%g1,%f6
2032	srl		%l7, 0, %l7		! clruw
2033	prefetch	[%i0 + 32+63], 20
2034	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2035	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
2036	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2037	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
2038	call		_cmll128_encrypt_2x+16
2039	add		%i0, 32, %i0
2040	subcc		%i2, 2, %i2
2041
2042	.word	0x91b02308 !movxtod	%o0,%f8
2043	.word	0x95b02309 !movxtod	%o1,%f10
2044	.word	0x99b0230a !movxtod	%o2,%f12
2045	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2046	.word	0x91b0230b !movxtod	%o3,%f8
2047	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2048	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2049	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2050
2051	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2052	add		%i1, 8, %i1
2053	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2054	add		%i1, 8, %i1
2055	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2056	add		%i1, 8, %i1
2057	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2058	bgu,pt		SIZE_T_CC, .L128_ctr32_blk_loop2x
2059	add		%i1, 8, %i1
2060
2061	add		%l5, %i2, %i2
2062	andcc		%i2, 1, %g0		! is number of blocks even?
2063	membar		#StoreLoad|#StoreStore
2064	bnz,pt		%icc, .L128_ctr32_loop
2065	srl		%i2, 0, %i2
2066	brnz,pn		%i2, .L128_ctr32_loop2x
2067	nop
2068
2069	ret
2070	restore
2071.type	cmll128_t4_ctr32_encrypt,#function
2072.size	cmll128_t4_ctr32_encrypt,.-cmll128_t4_ctr32_encrypt
2073.globl	cmll256_t4_ctr32_encrypt
2074.align	32
2075cmll256_t4_ctr32_encrypt:
2076	save		%sp, -STACK_FRAME, %sp
2077	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2078
2079	prefetch	[%i0], 20
2080	prefetch	[%i0 + 63], 20
2081	call		_cmll256_load_enckey
2082	sllx		%i2, 4, %i2
2083
2084	ld		[%i4 + 0], %l4	! counter
2085	ld		[%i4 + 4], %l5
2086	ld		[%i4 + 8], %l6
2087	ld		[%i4 + 12], %l7
2088
2089	sllx		%l4, 32, %o5
2090	or		%l5, %o5, %o5
2091	sllx		%l6, 32, %g1
2092	xor		%o5, %g4, %g4		! ^= rk[0]
2093	xor		%g1, %g5, %g5
2094	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
2095
2096	sub		%i0, %i1, %l5	! %i0!=%i1
2097	and		%i0, 7, %l0
2098	andn		%i0, 7, %i0
2099	sll		%l0, 3, %l0
2100	mov		64, %l1
2101	mov		0xff, %l3
2102	sub		%l1, %l0, %l1
2103	and		%i1, 7, %l2
2104	cmp		%i2, 255
2105	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2106	movleu		SIZE_T_CC, 0, %l5	!	%i2<256 ||
2107	brnz,pn		%l5, .L256_ctr32_blk	!	%i0==%i1)
2108	srl		%l3, %l2, %l3
2109
2110	andcc		%i2, 16, %g0		! is number of blocks even?
2111	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2112	bz		%icc, .L256_ctr32_loop2x
2113	srlx		%i2, 4, %i2
2114.L256_ctr32_loop:
2115	ldx		[%i0 + 0], %o0
2116	brz,pt		%l0, 4f
2117	ldx		[%i0 + 8], %o1
2118
2119	ldx		[%i0 + 16], %o2
2120	sllx		%o0, %l0, %o0
2121	srlx		%o1, %l1, %g1
2122	sllx		%o1, %l0, %o1
2123	or		%g1, %o0, %o0
2124	srlx		%o2, %l1, %o2
2125	or		%o2, %o1, %o1
21264:
2127	xor		%g5, %l7, %g1		! ^= rk[0]
2128	add		%l7, 1, %l7
2129	.word	0x85b02301 !movxtod	%g1,%f2
2130	srl		%l7, 0, %l7		! clruw
2131	prefetch	[%i1 + 63], 22
2132	prefetch	[%i0 + 16+63], 20
2133	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2134	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2135	call		_cmll256_encrypt_1x+8
2136	add		%i0, 16, %i0
2137
2138	.word	0x95b02308 !movxtod	%o0,%f10
2139	.word	0x99b02309 !movxtod	%o1,%f12
2140	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
2141	.word	0x85b30d82 !fxor	%f12,%f2,%f2
2142
2143	brnz,pn		%l2, 2f
2144	sub		%i2, 1, %i2
2145
2146	std		%f0, [%i1 + 0]
2147	std		%f2, [%i1 + 8]
2148	brnz,pt		%i2, .L256_ctr32_loop2x
2149	add		%i1, 16, %i1
2150
2151	ret
2152	restore
2153
2154.align	16
21552:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2156						! and ~3x deterioration
2157						! in inp==out case
2158	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2159	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2160	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2161	stda		%f4, [%i1 + %l3]0xc0	! partial store
2162	std		%f6, [%i1 + 8]
2163	add		%i1, 16, %i1
2164	orn		%g0, %l3, %l3
2165	stda		%f8, [%i1 + %l3]0xc0	! partial store
2166
2167	brnz,pt		%i2, .L256_ctr32_loop2x+4
2168	orn		%g0, %l3, %l3
2169
2170	ret
2171	restore
2172
2173!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2174.align	32
2175.L256_ctr32_loop2x:
2176	ldx		[%i0 + 0], %o0
2177	ldx		[%i0 + 8], %o1
2178	ldx		[%i0 + 16], %o2
2179	brz,pt		%l0, 4f
2180	ldx		[%i0 + 24], %o3
2181
2182	ldx		[%i0 + 32], %o4
2183	sllx		%o0, %l0, %o0
2184	srlx		%o1, %l1, %g1
2185	or		%g1, %o0, %o0
2186	sllx		%o1, %l0, %o1
2187	srlx		%o2, %l1, %g1
2188	or		%g1, %o1, %o1
2189	sllx		%o2, %l0, %o2
2190	srlx		%o3, %l1, %g1
2191	or		%g1, %o2, %o2
2192	sllx		%o3, %l0, %o3
2193	srlx		%o4, %l1, %o4
2194	or		%o4, %o3, %o3
21954:
2196	xor		%g5, %l7, %g1		! ^= rk[0]
2197	add		%l7, 1, %l7
2198	.word	0x85b02301 !movxtod	%g1,%f2
2199	srl		%l7, 0, %l7		! clruw
2200	xor		%g5, %l7, %g1
2201	add		%l7, 1, %l7
2202	.word	0x8db02301 !movxtod	%g1,%f6
2203	srl		%l7, 0, %l7		! clruw
2204	prefetch	[%i1 + 63], 22
2205	prefetch	[%i0 + 32+63], 20
2206	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2207	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
2208	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2209	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
2210	call		_cmll256_encrypt_2x+16
2211	add		%i0, 32, %i0
2212
2213	.word	0x91b02308 !movxtod	%o0,%f8
2214	.word	0x95b02309 !movxtod	%o1,%f10
2215	.word	0x99b0230a !movxtod	%o2,%f12
2216	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2217	.word	0x91b0230b !movxtod	%o3,%f8
2218	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2219	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2220	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2221
2222	brnz,pn		%l2, 2f
2223	sub		%i2, 2, %i2
2224
2225	std		%f0, [%i1 + 0]
2226	std		%f2, [%i1 + 8]
2227	std		%f4, [%i1 + 16]
2228	std		%f6, [%i1 + 24]
2229	brnz,pt		%i2, .L256_ctr32_loop2x
2230	add		%i1, 32, %i1
2231
2232	ret
2233	restore
2234
2235.align	16
22362:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2237						! and ~3x deterioration
2238						! in inp==out case
2239	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
2240	.word	0x81b00902 !faligndata	%f0,%f2,%f0
2241	.word	0x85b08904 !faligndata	%f2,%f4,%f2
2242	.word	0x89b10906 !faligndata	%f4,%f6,%f4
2243	.word	0x8db18906 !faligndata	%f6,%f6,%f6
2244
2245	stda		%f8, [%i1 + %l3]0xc0	! partial store
2246	std		%f0, [%i1 + 8]
2247	std		%f2, [%i1 + 16]
2248	std		%f4, [%i1 + 24]
2249	add		%i1, 32, %i1
2250	orn		%g0, %l3, %l3
2251	stda		%f6, [%i1 + %l3]0xc0	! partial store
2252
2253	brnz,pt		%i2, .L256_ctr32_loop2x+4
2254	orn		%g0, %l3, %l3
2255
2256	ret
2257	restore
2258
2259!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2260.align	32
2261.L256_ctr32_blk:
2262	add	%i1, %i2, %l5
2263	and	%l5, 63, %l5	! tail
2264	sub	%i2, %l5, %i2
2265	add	%l5, 15, %l5	! round up to 16n
2266	srlx	%i2, 4, %i2
2267	srl	%l5, 4, %l5
2268	sub	%i2, 1, %i2
2269	add	%l5, 1, %l5
2270
2271.L256_ctr32_blk_loop2x:
2272	ldx		[%i0 + 0], %o0
2273	ldx		[%i0 + 8], %o1
2274	ldx		[%i0 + 16], %o2
2275	brz,pt		%l0, 5f
2276	ldx		[%i0 + 24], %o3
2277
2278	ldx		[%i0 + 32], %o4
2279	sllx		%o0, %l0, %o0
2280	srlx		%o1, %l1, %g1
2281	or		%g1, %o0, %o0
2282	sllx		%o1, %l0, %o1
2283	srlx		%o2, %l1, %g1
2284	or		%g1, %o1, %o1
2285	sllx		%o2, %l0, %o2
2286	srlx		%o3, %l1, %g1
2287	or		%g1, %o2, %o2
2288	sllx		%o3, %l0, %o3
2289	srlx		%o4, %l1, %o4
2290	or		%o4, %o3, %o3
22915:
2292	xor		%g5, %l7, %g1		! ^= rk[0]
2293	add		%l7, 1, %l7
2294	.word	0x85b02301 !movxtod	%g1,%f2
2295	srl		%l7, 0, %l7		! clruw
2296	xor		%g5, %l7, %g1
2297	add		%l7, 1, %l7
2298	.word	0x8db02301 !movxtod	%g1,%f6
2299	srl		%l7, 0, %l7		! clruw
2300	prefetch	[%i0 + 32+63], 20
2301	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2302	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
2303	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2304	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
2305	call		_cmll256_encrypt_2x+16
2306	add		%i0, 32, %i0
2307	subcc		%i2, 2, %i2
2308
2309	.word	0x91b02308 !movxtod	%o0,%f8
2310	.word	0x95b02309 !movxtod	%o1,%f10
2311	.word	0x99b0230a !movxtod	%o2,%f12
2312	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2313	.word	0x91b0230b !movxtod	%o3,%f8
2314	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2315	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2316	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2317
2318	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2319	add		%i1, 8, %i1
2320	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2321	add		%i1, 8, %i1
2322	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2323	add		%i1, 8, %i1
2324	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2325	bgu,pt		SIZE_T_CC, .L256_ctr32_blk_loop2x
2326	add		%i1, 8, %i1
2327
2328	add		%l5, %i2, %i2
2329	andcc		%i2, 1, %g0		! is number of blocks even?
2330	membar		#StoreLoad|#StoreStore
2331	bnz,pt		%icc, .L256_ctr32_loop
2332	srl		%i2, 0, %i2
2333	brnz,pn		%i2, .L256_ctr32_loop2x
2334	nop
2335
2336	ret
2337	restore
2338.type	cmll256_t4_ctr32_encrypt,#function
2339.size	cmll256_t4_ctr32_encrypt,.-cmll256_t4_ctr32_encrypt
2340