xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aesni-x86.S (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1.file	"aesni-x86.s"
2.text
3.ascii	"AES for Intel AES-NI, CRYPTOGAMS by <appro@openssl.org>\0"
4.globl	aesni_encrypt
5.type	aesni_encrypt,@function
6.align	16
7aesni_encrypt:
8.L_aesni_encrypt_begin:
9	movl	4(%esp),%eax
10	movl	12(%esp),%edx
11	movups	(%eax),%xmm0
12	movl	240(%edx),%ecx
13	movl	8(%esp),%eax
14	movups	(%edx),%xmm3
15	movups	16(%edx),%xmm4
16	leal	32(%edx),%edx
17	pxor	%xmm3,%xmm0
18.L000enc1_loop:
19	aesenc	%xmm4,%xmm0
20	decl	%ecx
21	movups	(%edx),%xmm4
22	leal	16(%edx),%edx
23	jnz	.L000enc1_loop
24	aesenclast	%xmm4,%xmm0
25	movups	%xmm0,(%eax)
26	ret
27.size	aesni_encrypt,.-.L_aesni_encrypt_begin
28.globl	aesni_decrypt
29.type	aesni_decrypt,@function
30.align	16
31aesni_decrypt:
32.L_aesni_decrypt_begin:
33	movl	4(%esp),%eax
34	movl	12(%esp),%edx
35	movups	(%eax),%xmm0
36	movl	240(%edx),%ecx
37	movl	8(%esp),%eax
38	movups	(%edx),%xmm3
39	movups	16(%edx),%xmm4
40	leal	32(%edx),%edx
41	pxor	%xmm3,%xmm0
42.L001dec1_loop:
43	aesdec	%xmm4,%xmm0
44	decl	%ecx
45	movups	(%edx),%xmm4
46	leal	16(%edx),%edx
47	jnz	.L001dec1_loop
48	aesdeclast	%xmm4,%xmm0
49	movups	%xmm0,(%eax)
50	ret
51.size	aesni_decrypt,.-.L_aesni_decrypt_begin
52.type	_aesni_encrypt3,@function
53.align	16
54_aesni_encrypt3:
55	movups	(%edx),%xmm3
56	shrl	$1,%ecx
57	movups	16(%edx),%xmm4
58	leal	32(%edx),%edx
59	pxor	%xmm3,%xmm0
60	pxor	%xmm3,%xmm1
61	pxor	%xmm3,%xmm2
62	jmp	.L002enc3_loop
63.align	16
64.L002enc3_loop:
65	aesenc	%xmm4,%xmm0
66	movups	(%edx),%xmm3
67	aesenc	%xmm4,%xmm1
68	decl	%ecx
69	aesenc	%xmm4,%xmm2
70	movups	16(%edx),%xmm4
71	aesenc	%xmm3,%xmm0
72	leal	32(%edx),%edx
73	aesenc	%xmm3,%xmm1
74	aesenc	%xmm3,%xmm2
75	jnz	.L002enc3_loop
76	aesenc	%xmm4,%xmm0
77	movups	(%edx),%xmm3
78	aesenc	%xmm4,%xmm1
79	aesenc	%xmm4,%xmm2
80	aesenclast	%xmm3,%xmm0
81	aesenclast	%xmm3,%xmm1
82	aesenclast	%xmm3,%xmm2
83	ret
84.size	_aesni_encrypt3,.-_aesni_encrypt3
85.type	_aesni_decrypt3,@function
86.align	16
87_aesni_decrypt3:
88	movups	(%edx),%xmm3
89	shrl	$1,%ecx
90	movups	16(%edx),%xmm4
91	leal	32(%edx),%edx
92	pxor	%xmm3,%xmm0
93	pxor	%xmm3,%xmm1
94	pxor	%xmm3,%xmm2
95	jmp	.L003dec3_loop
96.align	16
97.L003dec3_loop:
98	aesdec	%xmm4,%xmm0
99	movups	(%edx),%xmm3
100	aesdec	%xmm4,%xmm1
101	decl	%ecx
102	aesdec	%xmm4,%xmm2
103	movups	16(%edx),%xmm4
104	aesdec	%xmm3,%xmm0
105	leal	32(%edx),%edx
106	aesdec	%xmm3,%xmm1
107	aesdec	%xmm3,%xmm2
108	jnz	.L003dec3_loop
109	aesdec	%xmm4,%xmm0
110	movups	(%edx),%xmm3
111	aesdec	%xmm4,%xmm1
112	aesdec	%xmm4,%xmm2
113	aesdeclast	%xmm3,%xmm0
114	aesdeclast	%xmm3,%xmm1
115	aesdeclast	%xmm3,%xmm2
116	ret
117.size	_aesni_decrypt3,.-_aesni_decrypt3
118.type	_aesni_encrypt4,@function
119.align	16
120_aesni_encrypt4:
121	movups	(%edx),%xmm3
122	movups	16(%edx),%xmm4
123	shrl	$1,%ecx
124	leal	32(%edx),%edx
125	pxor	%xmm3,%xmm0
126	pxor	%xmm3,%xmm1
127	pxor	%xmm3,%xmm2
128	pxor	%xmm3,%xmm7
129	jmp	.L004enc3_loop
130.align	16
131.L004enc3_loop:
132	aesenc	%xmm4,%xmm0
133	movups	(%edx),%xmm3
134	aesenc	%xmm4,%xmm1
135	decl	%ecx
136	aesenc	%xmm4,%xmm2
137	aesenc	%xmm4,%xmm7
138	movups	16(%edx),%xmm4
139	aesenc	%xmm3,%xmm0
140	leal	32(%edx),%edx
141	aesenc	%xmm3,%xmm1
142	aesenc	%xmm3,%xmm2
143	aesenc	%xmm3,%xmm7
144	jnz	.L004enc3_loop
145	aesenc	%xmm4,%xmm0
146	movups	(%edx),%xmm3
147	aesenc	%xmm4,%xmm1
148	aesenc	%xmm4,%xmm2
149	aesenc	%xmm4,%xmm7
150	aesenclast	%xmm3,%xmm0
151	aesenclast	%xmm3,%xmm1
152	aesenclast	%xmm3,%xmm2
153	aesenclast	%xmm3,%xmm7
154	ret
155.size	_aesni_encrypt4,.-_aesni_encrypt4
156.type	_aesni_decrypt4,@function
157.align	16
158_aesni_decrypt4:
159	movups	(%edx),%xmm3
160	movups	16(%edx),%xmm4
161	shrl	$1,%ecx
162	leal	32(%edx),%edx
163	pxor	%xmm3,%xmm0
164	pxor	%xmm3,%xmm1
165	pxor	%xmm3,%xmm2
166	pxor	%xmm3,%xmm7
167	jmp	.L005dec3_loop
168.align	16
169.L005dec3_loop:
170	aesdec	%xmm4,%xmm0
171	movups	(%edx),%xmm3
172	aesdec	%xmm4,%xmm1
173	decl	%ecx
174	aesdec	%xmm4,%xmm2
175	aesdec	%xmm4,%xmm7
176	movups	16(%edx),%xmm4
177	aesdec	%xmm3,%xmm0
178	leal	32(%edx),%edx
179	aesdec	%xmm3,%xmm1
180	aesdec	%xmm3,%xmm2
181	aesdec	%xmm3,%xmm7
182	jnz	.L005dec3_loop
183	aesdec	%xmm4,%xmm0
184	movups	(%edx),%xmm3
185	aesdec	%xmm4,%xmm1
186	aesdec	%xmm4,%xmm2
187	aesdec	%xmm4,%xmm7
188	aesdeclast	%xmm3,%xmm0
189	aesdeclast	%xmm3,%xmm1
190	aesdeclast	%xmm3,%xmm2
191	aesdeclast	%xmm3,%xmm7
192	ret
193.size	_aesni_decrypt4,.-_aesni_decrypt4
194.globl	aesni_ecb_encrypt
195.type	aesni_ecb_encrypt,@function
196.align	16
197aesni_ecb_encrypt:
198.L_aesni_ecb_encrypt_begin:
199	pushl	%ebp
200	pushl	%ebx
201	pushl	%esi
202	pushl	%edi
203	movl	20(%esp),%esi
204	movl	24(%esp),%edi
205	movl	28(%esp),%eax
206	movl	32(%esp),%edx
207	movl	36(%esp),%ecx
208	cmpl	$16,%eax
209	jb	.L006ecb_ret
210	andl	$-16,%eax
211	testl	%ecx,%ecx
212	movl	240(%edx),%ecx
213	movl	%edx,%ebp
214	movl	%ecx,%ebx
215	jz	.L007ecb_decrypt
216	subl	$64,%eax
217	jbe	.L008ecb_enc_tail
218	jmp	.L009ecb_enc_loop3
219.align	16
220.L009ecb_enc_loop3:
221	movups	(%esi),%xmm0
222	movups	16(%esi),%xmm1
223	movups	32(%esi),%xmm2
224	call	_aesni_encrypt3
225	subl	$48,%eax
226	leal	48(%esi),%esi
227	leal	48(%edi),%edi
228	movups	%xmm0,-48(%edi)
229	movl	%ebp,%edx
230	movups	%xmm1,-32(%edi)
231	movl	%ebx,%ecx
232	movups	%xmm2,-16(%edi)
233	ja	.L009ecb_enc_loop3
234.L008ecb_enc_tail:
235	addl	$64,%eax
236	jz	.L006ecb_ret
237	cmpl	$16,%eax
238	movups	(%esi),%xmm0
239	je	.L010ecb_enc_one
240	cmpl	$32,%eax
241	movups	16(%esi),%xmm1
242	je	.L011ecb_enc_two
243	cmpl	$48,%eax
244	movups	32(%esi),%xmm2
245	je	.L012ecb_enc_three
246	movups	48(%esi),%xmm7
247	call	_aesni_encrypt4
248	movups	%xmm0,(%edi)
249	movups	%xmm1,16(%edi)
250	movups	%xmm2,32(%edi)
251	movups	%xmm7,48(%edi)
252	jmp	.L006ecb_ret
253.align	16
254.L010ecb_enc_one:
255	movups	(%edx),%xmm3
256	movups	16(%edx),%xmm4
257	leal	32(%edx),%edx
258	pxor	%xmm3,%xmm0
259.L013enc1_loop:
260	aesenc	%xmm4,%xmm0
261	decl	%ecx
262	movups	(%edx),%xmm4
263	leal	16(%edx),%edx
264	jnz	.L013enc1_loop
265	aesenclast	%xmm4,%xmm0
266	movups	%xmm0,(%edi)
267	jmp	.L006ecb_ret
268.align	16
269.L011ecb_enc_two:
270	call	_aesni_encrypt3
271	movups	%xmm0,(%edi)
272	movups	%xmm1,16(%edi)
273	jmp	.L006ecb_ret
274.align	16
275.L012ecb_enc_three:
276	call	_aesni_encrypt3
277	movups	%xmm0,(%edi)
278	movups	%xmm1,16(%edi)
279	movups	%xmm2,32(%edi)
280	jmp	.L006ecb_ret
281.align	16
282.L007ecb_decrypt:
283	subl	$64,%eax
284	jbe	.L014ecb_dec_tail
285	jmp	.L015ecb_dec_loop3
286.align	16
287.L015ecb_dec_loop3:
288	movups	(%esi),%xmm0
289	movups	16(%esi),%xmm1
290	movups	32(%esi),%xmm2
291	call	_aesni_decrypt3
292	subl	$48,%eax
293	leal	48(%esi),%esi
294	leal	48(%edi),%edi
295	movups	%xmm0,-48(%edi)
296	movl	%ebp,%edx
297	movups	%xmm1,-32(%edi)
298	movl	%ebx,%ecx
299	movups	%xmm2,-16(%edi)
300	ja	.L015ecb_dec_loop3
301.L014ecb_dec_tail:
302	addl	$64,%eax
303	jz	.L006ecb_ret
304	cmpl	$16,%eax
305	movups	(%esi),%xmm0
306	je	.L016ecb_dec_one
307	cmpl	$32,%eax
308	movups	16(%esi),%xmm1
309	je	.L017ecb_dec_two
310	cmpl	$48,%eax
311	movups	32(%esi),%xmm2
312	je	.L018ecb_dec_three
313	movups	48(%esi),%xmm7
314	call	_aesni_decrypt4
315	movups	%xmm0,(%edi)
316	movups	%xmm1,16(%edi)
317	movups	%xmm2,32(%edi)
318	movups	%xmm7,48(%edi)
319	jmp	.L006ecb_ret
320.align	16
321.L016ecb_dec_one:
322	movups	(%edx),%xmm3
323	movups	16(%edx),%xmm4
324	leal	32(%edx),%edx
325	pxor	%xmm3,%xmm0
326.L019dec1_loop:
327	aesdec	%xmm4,%xmm0
328	decl	%ecx
329	movups	(%edx),%xmm4
330	leal	16(%edx),%edx
331	jnz	.L019dec1_loop
332	aesdeclast	%xmm4,%xmm0
333	movups	%xmm0,(%edi)
334	jmp	.L006ecb_ret
335.align	16
336.L017ecb_dec_two:
337	call	_aesni_decrypt3
338	movups	%xmm0,(%edi)
339	movups	%xmm1,16(%edi)
340	jmp	.L006ecb_ret
341.align	16
342.L018ecb_dec_three:
343	call	_aesni_decrypt3
344	movups	%xmm0,(%edi)
345	movups	%xmm1,16(%edi)
346	movups	%xmm2,32(%edi)
347.L006ecb_ret:
348	popl	%edi
349	popl	%esi
350	popl	%ebx
351	popl	%ebp
352	ret
353.size	aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
354.globl	aesni_cbc_encrypt
355.type	aesni_cbc_encrypt,@function
356.align	16
357aesni_cbc_encrypt:
358.L_aesni_cbc_encrypt_begin:
359	pushl	%ebp
360	pushl	%ebx
361	pushl	%esi
362	pushl	%edi
363	movl	20(%esp),%esi
364	movl	24(%esp),%edi
365	movl	28(%esp),%eax
366	movl	32(%esp),%edx
367	testl	%eax,%eax
368	movl	36(%esp),%ebp
369	jz	.L020cbc_ret
370	cmpl	$0,40(%esp)
371	movups	(%ebp),%xmm5
372	movl	240(%edx),%ecx
373	movl	%edx,%ebp
374	movl	%ecx,%ebx
375	je	.L021cbc_decrypt
376	movaps	%xmm5,%xmm0
377	cmpl	$16,%eax
378	jb	.L022cbc_enc_tail
379	subl	$16,%eax
380	jmp	.L023cbc_enc_loop
381.align	16
382.L023cbc_enc_loop:
383	movups	(%esi),%xmm5
384	leal	16(%esi),%esi
385	pxor	%xmm5,%xmm0
386	movups	(%edx),%xmm3
387	movups	16(%edx),%xmm4
388	leal	32(%edx),%edx
389	pxor	%xmm3,%xmm0
390.L024enc1_loop:
391	aesenc	%xmm4,%xmm0
392	decl	%ecx
393	movups	(%edx),%xmm4
394	leal	16(%edx),%edx
395	jnz	.L024enc1_loop
396	aesenclast	%xmm4,%xmm0
397	subl	$16,%eax
398	leal	16(%edi),%edi
399	movl	%ebx,%ecx
400	movl	%ebp,%edx
401	movups	%xmm0,-16(%edi)
402	jnc	.L023cbc_enc_loop
403	addl	$16,%eax
404	jnz	.L022cbc_enc_tail
405	movaps	%xmm0,%xmm5
406	jmp	.L020cbc_ret
407.L022cbc_enc_tail:
408	movl	%eax,%ecx
409.long	2767451785
410	movl	$16,%ecx
411	subl	%eax,%ecx
412	xorl	%eax,%eax
413.long	2868115081
414	leal	-16(%edi),%edi
415	movl	%ebx,%ecx
416	movl	%edi,%esi
417	movl	%ebp,%edx
418	jmp	.L023cbc_enc_loop
419.align	16
420.L021cbc_decrypt:
421	subl	$64,%eax
422	jbe	.L025cbc_dec_tail
423	jmp	.L026cbc_dec_loop3
424.align	16
425.L026cbc_dec_loop3:
426	movups	(%esi),%xmm0
427	movups	16(%esi),%xmm1
428	movups	32(%esi),%xmm2
429	movaps	%xmm0,%xmm6
430	movaps	%xmm1,%xmm7
431	call	_aesni_decrypt3
432	subl	$48,%eax
433	leal	48(%esi),%esi
434	leal	48(%edi),%edi
435	pxor	%xmm5,%xmm0
436	pxor	%xmm6,%xmm1
437	movups	-16(%esi),%xmm5
438	pxor	%xmm7,%xmm2
439	movups	%xmm0,-48(%edi)
440	movl	%ebx,%ecx
441	movups	%xmm1,-32(%edi)
442	movl	%ebp,%edx
443	movups	%xmm2,-16(%edi)
444	ja	.L026cbc_dec_loop3
445.L025cbc_dec_tail:
446	addl	$64,%eax
447	jz	.L020cbc_ret
448	movups	(%esi),%xmm0
449	cmpl	$16,%eax
450	movaps	%xmm0,%xmm6
451	jbe	.L027cbc_dec_one
452	movups	16(%esi),%xmm1
453	cmpl	$32,%eax
454	movaps	%xmm1,%xmm7
455	jbe	.L028cbc_dec_two
456	movups	32(%esi),%xmm2
457	cmpl	$48,%eax
458	jbe	.L029cbc_dec_three
459	movups	48(%esi),%xmm7
460	call	_aesni_decrypt4
461	movups	16(%esi),%xmm3
462	movups	32(%esi),%xmm4
463	pxor	%xmm5,%xmm0
464	pxor	%xmm6,%xmm1
465	movups	48(%esi),%xmm5
466	movups	%xmm0,(%edi)
467	pxor	%xmm3,%xmm2
468	pxor	%xmm4,%xmm7
469	movups	%xmm1,16(%edi)
470	movups	%xmm2,32(%edi)
471	movaps	%xmm7,%xmm0
472	leal	48(%edi),%edi
473	jmp	.L030cbc_dec_tail_collected
474.L027cbc_dec_one:
475	movups	(%edx),%xmm3
476	movups	16(%edx),%xmm4
477	leal	32(%edx),%edx
478	pxor	%xmm3,%xmm0
479.L031dec1_loop:
480	aesdec	%xmm4,%xmm0
481	decl	%ecx
482	movups	(%edx),%xmm4
483	leal	16(%edx),%edx
484	jnz	.L031dec1_loop
485	aesdeclast	%xmm4,%xmm0
486	pxor	%xmm5,%xmm0
487	movaps	%xmm6,%xmm5
488	jmp	.L030cbc_dec_tail_collected
489.L028cbc_dec_two:
490	call	_aesni_decrypt3
491	pxor	%xmm5,%xmm0
492	pxor	%xmm6,%xmm1
493	movups	%xmm0,(%edi)
494	movaps	%xmm1,%xmm0
495	movaps	%xmm7,%xmm5
496	leal	16(%edi),%edi
497	jmp	.L030cbc_dec_tail_collected
498.L029cbc_dec_three:
499	call	_aesni_decrypt3
500	pxor	%xmm5,%xmm0
501	pxor	%xmm6,%xmm1
502	pxor	%xmm7,%xmm2
503	movups	%xmm0,(%edi)
504	movups	%xmm1,16(%edi)
505	movaps	%xmm2,%xmm0
506	movups	32(%esi),%xmm5
507	leal	32(%edi),%edi
508.L030cbc_dec_tail_collected:
509	andl	$15,%eax
510	jnz	.L032cbc_dec_tail_partial
511	movups	%xmm0,(%edi)
512	jmp	.L020cbc_ret
513.L032cbc_dec_tail_partial:
514	movl	%esp,%ebp
515	subl	$16,%esp
516	andl	$-16,%esp
517	movaps	%xmm0,(%esp)
518	movl	%esp,%esi
519	movl	%eax,%ecx
520.long	2767451785
521	movl	%ebp,%esp
522.L020cbc_ret:
523	movl	36(%esp),%ebp
524	movups	%xmm5,(%ebp)
525	popl	%edi
526	popl	%esi
527	popl	%ebx
528	popl	%ebp
529	ret
530.size	aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
531.type	_aesni_set_encrypt_key,@function
532.align	16
533_aesni_set_encrypt_key:
534	testl	%eax,%eax
535	jz	.L033bad_pointer
536	testl	%edx,%edx
537	jz	.L033bad_pointer
538	movups	(%eax),%xmm0
539	pxor	%xmm4,%xmm4
540	leal	16(%edx),%edx
541	cmpl	$256,%ecx
542	je	.L03414rounds
543	cmpl	$192,%ecx
544	je	.L03512rounds
545	cmpl	$128,%ecx
546	jne	.L036bad_keybits
547.align	16
548.L03710rounds:
549	movl	$9,%ecx
550	movups	%xmm0,-16(%edx)
551	aeskeygenassist	$1,%xmm0,%xmm1
552	call	.L038key_128_cold
553	aeskeygenassist	$2,%xmm0,%xmm1
554	call	.L039key_128
555	aeskeygenassist	$4,%xmm0,%xmm1
556	call	.L039key_128
557	aeskeygenassist	$8,%xmm0,%xmm1
558	call	.L039key_128
559	aeskeygenassist	$16,%xmm0,%xmm1
560	call	.L039key_128
561	aeskeygenassist	$32,%xmm0,%xmm1
562	call	.L039key_128
563	aeskeygenassist	$64,%xmm0,%xmm1
564	call	.L039key_128
565	aeskeygenassist	$128,%xmm0,%xmm1
566	call	.L039key_128
567	aeskeygenassist	$27,%xmm0,%xmm1
568	call	.L039key_128
569	aeskeygenassist	$54,%xmm0,%xmm1
570	call	.L039key_128
571	movups	%xmm0,(%edx)
572	movl	%ecx,80(%edx)
573	xorl	%eax,%eax
574	ret
575.align	16
576.L039key_128:
577	movups	%xmm0,(%edx)
578	leal	16(%edx),%edx
579.L038key_128_cold:
580	shufps	$16,%xmm0,%xmm4
581	pxor	%xmm4,%xmm0
582	shufps	$140,%xmm0,%xmm4
583	pxor	%xmm4,%xmm0
584	pshufd	$255,%xmm1,%xmm1
585	pxor	%xmm1,%xmm0
586	ret
587.align	16
588.L03512rounds:
589	movq	16(%eax),%xmm2
590	movl	$11,%ecx
591	movups	%xmm0,-16(%edx)
592	aeskeygenassist	$1,%xmm2,%xmm1
593	call	.L040key_192a_cold
594	aeskeygenassist	$2,%xmm2,%xmm1
595	call	.L041key_192b
596	aeskeygenassist	$4,%xmm2,%xmm1
597	call	.L042key_192a
598	aeskeygenassist	$8,%xmm2,%xmm1
599	call	.L041key_192b
600	aeskeygenassist	$16,%xmm2,%xmm1
601	call	.L042key_192a
602	aeskeygenassist	$32,%xmm2,%xmm1
603	call	.L041key_192b
604	aeskeygenassist	$64,%xmm2,%xmm1
605	call	.L042key_192a
606	aeskeygenassist	$128,%xmm2,%xmm1
607	call	.L041key_192b
608	movups	%xmm0,(%edx)
609	movl	%ecx,48(%edx)
610	xorl	%eax,%eax
611	ret
612.align	16
613.L042key_192a:
614	movups	%xmm0,(%edx)
615	leal	16(%edx),%edx
616.align	16
617.L040key_192a_cold:
618	movaps	%xmm2,%xmm5
619.L043key_192b_warm:
620	shufps	$16,%xmm0,%xmm4
621	movaps	%xmm2,%xmm3
622	pxor	%xmm4,%xmm0
623	shufps	$140,%xmm0,%xmm4
624	pslldq	$4,%xmm3
625	pxor	%xmm4,%xmm0
626	pshufd	$85,%xmm1,%xmm1
627	pxor	%xmm3,%xmm2
628	pxor	%xmm1,%xmm0
629	pshufd	$255,%xmm0,%xmm3
630	pxor	%xmm3,%xmm2
631	ret
632.align	16
633.L041key_192b:
634	movaps	%xmm0,%xmm3
635	shufps	$68,%xmm0,%xmm5
636	movups	%xmm5,(%edx)
637	shufps	$78,%xmm2,%xmm3
638	movups	%xmm3,16(%edx)
639	leal	32(%edx),%edx
640	jmp	.L043key_192b_warm
641.align	16
642.L03414rounds:
643	movups	16(%eax),%xmm2
644	movl	$13,%ecx
645	leal	16(%edx),%edx
646	movups	%xmm0,-32(%edx)
647	movups	%xmm2,-16(%edx)
648	aeskeygenassist	$1,%xmm2,%xmm1
649	call	.L044key_256a_cold
650	aeskeygenassist	$1,%xmm0,%xmm1
651	call	.L045key_256b
652	aeskeygenassist	$2,%xmm2,%xmm1
653	call	.L046key_256a
654	aeskeygenassist	$2,%xmm0,%xmm1
655	call	.L045key_256b
656	aeskeygenassist	$4,%xmm2,%xmm1
657	call	.L046key_256a
658	aeskeygenassist	$4,%xmm0,%xmm1
659	call	.L045key_256b
660	aeskeygenassist	$8,%xmm2,%xmm1
661	call	.L046key_256a
662	aeskeygenassist	$8,%xmm0,%xmm1
663	call	.L045key_256b
664	aeskeygenassist	$16,%xmm2,%xmm1
665	call	.L046key_256a
666	aeskeygenassist	$16,%xmm0,%xmm1
667	call	.L045key_256b
668	aeskeygenassist	$32,%xmm2,%xmm1
669	call	.L046key_256a
670	aeskeygenassist	$32,%xmm0,%xmm1
671	call	.L045key_256b
672	aeskeygenassist	$64,%xmm2,%xmm1
673	call	.L046key_256a
674	movups	%xmm0,(%edx)
675	movl	%ecx,16(%edx)
676	xorl	%eax,%eax
677	ret
678.align	16
679.L046key_256a:
680	movups	%xmm2,(%edx)
681	leal	16(%edx),%edx
682.L044key_256a_cold:
683	shufps	$16,%xmm0,%xmm4
684	pxor	%xmm4,%xmm0
685	shufps	$140,%xmm0,%xmm4
686	pxor	%xmm4,%xmm0
687	pshufd	$255,%xmm1,%xmm1
688	pxor	%xmm1,%xmm0
689	ret
690.align	16
691.L045key_256b:
692	movups	%xmm0,(%edx)
693	leal	16(%edx),%edx
694	shufps	$16,%xmm2,%xmm4
695	pxor	%xmm4,%xmm2
696	shufps	$140,%xmm2,%xmm4
697	pxor	%xmm4,%xmm2
698	pshufd	$170,%xmm1,%xmm1
699	pxor	%xmm1,%xmm2
700	ret
701.align	4
702.L033bad_pointer:
703	movl	$-1,%eax
704	ret
705.align	4
706.L036bad_keybits:
707	movl	$-2,%eax
708	ret
709.size	_aesni_set_encrypt_key,.-_aesni_set_encrypt_key
710.globl	aesni_set_encrypt_key
711.type	aesni_set_encrypt_key,@function
712.align	16
713aesni_set_encrypt_key:
714.L_aesni_set_encrypt_key_begin:
715	movl	4(%esp),%eax
716	movl	8(%esp),%ecx
717	movl	12(%esp),%edx
718	call	_aesni_set_encrypt_key
719	ret
720.size	aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
721.globl	aesni_set_decrypt_key
722.type	aesni_set_decrypt_key,@function
723.align	16
724aesni_set_decrypt_key:
725.L_aesni_set_decrypt_key_begin:
726	movl	4(%esp),%eax
727	movl	8(%esp),%ecx
728	movl	12(%esp),%edx
729	call	_aesni_set_encrypt_key
730	movl	12(%esp),%edx
731	shll	$4,%ecx
732	testl	%eax,%eax
733	jnz	.L047dec_key_ret
734	leal	16(%edx,%ecx,1),%eax
735	movups	(%edx),%xmm0
736	movups	(%eax),%xmm1
737	movups	%xmm0,(%eax)
738	movups	%xmm1,(%edx)
739	leal	16(%edx),%edx
740	leal	-16(%eax),%eax
741.L048dec_key_inverse:
742	movups	(%edx),%xmm0
743	movups	(%eax),%xmm1
744	aesimc	%xmm0,%xmm0
745	aesimc	%xmm1,%xmm1
746	leal	16(%edx),%edx
747	leal	-16(%eax),%eax
748	cmpl	%edx,%eax
749	movups	%xmm0,16(%eax)
750	movups	%xmm1,-16(%edx)
751	ja	.L048dec_key_inverse
752	movups	(%edx),%xmm0
753	aesimc	%xmm0,%xmm0
754	movups	%xmm0,(%edx)
755	xorl	%eax,%eax
756.L047dec_key_ret:
757	ret
758.size	aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
759