xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aesni-x86.S (revision b45fa494daa2ba02187711d31a4144faf0993066)
1.file	"aesni-x86.s"
2.text
3.globl	aesni_encrypt
4.type	aesni_encrypt,@function
5.align	16
6aesni_encrypt:
7.L_aesni_encrypt_begin:
8	movl	4(%esp),%eax
9	movl	12(%esp),%edx
10	movups	(%eax),%xmm0
11	movl	240(%edx),%ecx
12	movl	8(%esp),%eax
13	movups	(%edx),%xmm3
14	movups	16(%edx),%xmm4
15	leal	32(%edx),%edx
16	pxor	%xmm3,%xmm0
17.L000enc1_loop:
18	aesenc	%xmm4,%xmm0
19	decl	%ecx
20	movups	(%edx),%xmm4
21	leal	16(%edx),%edx
22	jnz	.L000enc1_loop
23	aesenclast	%xmm4,%xmm0
24	movups	%xmm0,(%eax)
25	ret
26.size	aesni_encrypt,.-.L_aesni_encrypt_begin
27.globl	aesni_decrypt
28.type	aesni_decrypt,@function
29.align	16
30aesni_decrypt:
31.L_aesni_decrypt_begin:
32	movl	4(%esp),%eax
33	movl	12(%esp),%edx
34	movups	(%eax),%xmm0
35	movl	240(%edx),%ecx
36	movl	8(%esp),%eax
37	movups	(%edx),%xmm3
38	movups	16(%edx),%xmm4
39	leal	32(%edx),%edx
40	pxor	%xmm3,%xmm0
41.L001dec1_loop:
42	aesdec	%xmm4,%xmm0
43	decl	%ecx
44	movups	(%edx),%xmm4
45	leal	16(%edx),%edx
46	jnz	.L001dec1_loop
47	aesdeclast	%xmm4,%xmm0
48	movups	%xmm0,(%eax)
49	ret
50.size	aesni_decrypt,.-.L_aesni_decrypt_begin
51.type	_aesni_encrypt3,@function
52.align	16
53_aesni_encrypt3:
54	movups	(%edx),%xmm3
55	shrl	$1,%ecx
56	movups	16(%edx),%xmm4
57	leal	32(%edx),%edx
58	pxor	%xmm3,%xmm0
59	pxor	%xmm3,%xmm1
60	pxor	%xmm3,%xmm2
61	jmp	.L002enc3_loop
62.align	16
63.L002enc3_loop:
64	aesenc	%xmm4,%xmm0
65	movups	(%edx),%xmm3
66	aesenc	%xmm4,%xmm1
67	decl	%ecx
68	aesenc	%xmm4,%xmm2
69	movups	16(%edx),%xmm4
70	aesenc	%xmm3,%xmm0
71	leal	32(%edx),%edx
72	aesenc	%xmm3,%xmm1
73	aesenc	%xmm3,%xmm2
74	jnz	.L002enc3_loop
75	aesenc	%xmm4,%xmm0
76	movups	(%edx),%xmm3
77	aesenc	%xmm4,%xmm1
78	aesenc	%xmm4,%xmm2
79	aesenclast	%xmm3,%xmm0
80	aesenclast	%xmm3,%xmm1
81	aesenclast	%xmm3,%xmm2
82	ret
83.size	_aesni_encrypt3,.-_aesni_encrypt3
84.type	_aesni_decrypt3,@function
85.align	16
86_aesni_decrypt3:
87	movups	(%edx),%xmm3
88	shrl	$1,%ecx
89	movups	16(%edx),%xmm4
90	leal	32(%edx),%edx
91	pxor	%xmm3,%xmm0
92	pxor	%xmm3,%xmm1
93	pxor	%xmm3,%xmm2
94	jmp	.L003dec3_loop
95.align	16
96.L003dec3_loop:
97	aesdec	%xmm4,%xmm0
98	movups	(%edx),%xmm3
99	aesdec	%xmm4,%xmm1
100	decl	%ecx
101	aesdec	%xmm4,%xmm2
102	movups	16(%edx),%xmm4
103	aesdec	%xmm3,%xmm0
104	leal	32(%edx),%edx
105	aesdec	%xmm3,%xmm1
106	aesdec	%xmm3,%xmm2
107	jnz	.L003dec3_loop
108	aesdec	%xmm4,%xmm0
109	movups	(%edx),%xmm3
110	aesdec	%xmm4,%xmm1
111	aesdec	%xmm4,%xmm2
112	aesdeclast	%xmm3,%xmm0
113	aesdeclast	%xmm3,%xmm1
114	aesdeclast	%xmm3,%xmm2
115	ret
116.size	_aesni_decrypt3,.-_aesni_decrypt3
117.type	_aesni_encrypt4,@function
118.align	16
119_aesni_encrypt4:
120	movups	(%edx),%xmm3
121	movups	16(%edx),%xmm4
122	shrl	$1,%ecx
123	leal	32(%edx),%edx
124	pxor	%xmm3,%xmm0
125	pxor	%xmm3,%xmm1
126	pxor	%xmm3,%xmm2
127	pxor	%xmm3,%xmm7
128	jmp	.L004enc3_loop
129.align	16
130.L004enc3_loop:
131	aesenc	%xmm4,%xmm0
132	movups	(%edx),%xmm3
133	aesenc	%xmm4,%xmm1
134	decl	%ecx
135	aesenc	%xmm4,%xmm2
136	aesenc	%xmm4,%xmm7
137	movups	16(%edx),%xmm4
138	aesenc	%xmm3,%xmm0
139	leal	32(%edx),%edx
140	aesenc	%xmm3,%xmm1
141	aesenc	%xmm3,%xmm2
142	aesenc	%xmm3,%xmm7
143	jnz	.L004enc3_loop
144	aesenc	%xmm4,%xmm0
145	movups	(%edx),%xmm3
146	aesenc	%xmm4,%xmm1
147	aesenc	%xmm4,%xmm2
148	aesenc	%xmm4,%xmm7
149	aesenclast	%xmm3,%xmm0
150	aesenclast	%xmm3,%xmm1
151	aesenclast	%xmm3,%xmm2
152	aesenclast	%xmm3,%xmm7
153	ret
154.size	_aesni_encrypt4,.-_aesni_encrypt4
155.type	_aesni_decrypt4,@function
156.align	16
157_aesni_decrypt4:
158	movups	(%edx),%xmm3
159	movups	16(%edx),%xmm4
160	shrl	$1,%ecx
161	leal	32(%edx),%edx
162	pxor	%xmm3,%xmm0
163	pxor	%xmm3,%xmm1
164	pxor	%xmm3,%xmm2
165	pxor	%xmm3,%xmm7
166	jmp	.L005dec3_loop
167.align	16
168.L005dec3_loop:
169	aesdec	%xmm4,%xmm0
170	movups	(%edx),%xmm3
171	aesdec	%xmm4,%xmm1
172	decl	%ecx
173	aesdec	%xmm4,%xmm2
174	aesdec	%xmm4,%xmm7
175	movups	16(%edx),%xmm4
176	aesdec	%xmm3,%xmm0
177	leal	32(%edx),%edx
178	aesdec	%xmm3,%xmm1
179	aesdec	%xmm3,%xmm2
180	aesdec	%xmm3,%xmm7
181	jnz	.L005dec3_loop
182	aesdec	%xmm4,%xmm0
183	movups	(%edx),%xmm3
184	aesdec	%xmm4,%xmm1
185	aesdec	%xmm4,%xmm2
186	aesdec	%xmm4,%xmm7
187	aesdeclast	%xmm3,%xmm0
188	aesdeclast	%xmm3,%xmm1
189	aesdeclast	%xmm3,%xmm2
190	aesdeclast	%xmm3,%xmm7
191	ret
192.size	_aesni_decrypt4,.-_aesni_decrypt4
193.globl	aesni_ecb_encrypt
194.type	aesni_ecb_encrypt,@function
195.align	16
196aesni_ecb_encrypt:
197.L_aesni_ecb_encrypt_begin:
198	pushl	%ebp
199	pushl	%ebx
200	pushl	%esi
201	pushl	%edi
202	movl	20(%esp),%esi
203	movl	24(%esp),%edi
204	movl	28(%esp),%eax
205	movl	32(%esp),%edx
206	movl	36(%esp),%ecx
207	cmpl	$16,%eax
208	jb	.L006ecb_ret
209	andl	$-16,%eax
210	testl	%ecx,%ecx
211	movl	240(%edx),%ecx
212	movl	%edx,%ebp
213	movl	%ecx,%ebx
214	jz	.L007ecb_decrypt
215	subl	$64,%eax
216	jbe	.L008ecb_enc_tail
217	jmp	.L009ecb_enc_loop3
218.align	16
219.L009ecb_enc_loop3:
220	movups	(%esi),%xmm0
221	movups	16(%esi),%xmm1
222	movups	32(%esi),%xmm2
223	call	_aesni_encrypt3
224	subl	$48,%eax
225	leal	48(%esi),%esi
226	leal	48(%edi),%edi
227	movups	%xmm0,-48(%edi)
228	movl	%ebp,%edx
229	movups	%xmm1,-32(%edi)
230	movl	%ebx,%ecx
231	movups	%xmm2,-16(%edi)
232	ja	.L009ecb_enc_loop3
233.L008ecb_enc_tail:
234	addl	$64,%eax
235	jz	.L006ecb_ret
236	cmpl	$16,%eax
237	movups	(%esi),%xmm0
238	je	.L010ecb_enc_one
239	cmpl	$32,%eax
240	movups	16(%esi),%xmm1
241	je	.L011ecb_enc_two
242	cmpl	$48,%eax
243	movups	32(%esi),%xmm2
244	je	.L012ecb_enc_three
245	movups	48(%esi),%xmm7
246	call	_aesni_encrypt4
247	movups	%xmm0,(%edi)
248	movups	%xmm1,16(%edi)
249	movups	%xmm2,32(%edi)
250	movups	%xmm7,48(%edi)
251	jmp	.L006ecb_ret
252.align	16
253.L010ecb_enc_one:
254	movups	(%edx),%xmm3
255	movups	16(%edx),%xmm4
256	leal	32(%edx),%edx
257	pxor	%xmm3,%xmm0
258.L013enc1_loop:
259	aesenc	%xmm4,%xmm0
260	decl	%ecx
261	movups	(%edx),%xmm4
262	leal	16(%edx),%edx
263	jnz	.L013enc1_loop
264	aesenclast	%xmm4,%xmm0
265	movups	%xmm0,(%edi)
266	jmp	.L006ecb_ret
267.align	16
268.L011ecb_enc_two:
269	call	_aesni_encrypt3
270	movups	%xmm0,(%edi)
271	movups	%xmm1,16(%edi)
272	jmp	.L006ecb_ret
273.align	16
274.L012ecb_enc_three:
275	call	_aesni_encrypt3
276	movups	%xmm0,(%edi)
277	movups	%xmm1,16(%edi)
278	movups	%xmm2,32(%edi)
279	jmp	.L006ecb_ret
280.align	16
281.L007ecb_decrypt:
282	subl	$64,%eax
283	jbe	.L014ecb_dec_tail
284	jmp	.L015ecb_dec_loop3
285.align	16
286.L015ecb_dec_loop3:
287	movups	(%esi),%xmm0
288	movups	16(%esi),%xmm1
289	movups	32(%esi),%xmm2
290	call	_aesni_decrypt3
291	subl	$48,%eax
292	leal	48(%esi),%esi
293	leal	48(%edi),%edi
294	movups	%xmm0,-48(%edi)
295	movl	%ebp,%edx
296	movups	%xmm1,-32(%edi)
297	movl	%ebx,%ecx
298	movups	%xmm2,-16(%edi)
299	ja	.L015ecb_dec_loop3
300.L014ecb_dec_tail:
301	addl	$64,%eax
302	jz	.L006ecb_ret
303	cmpl	$16,%eax
304	movups	(%esi),%xmm0
305	je	.L016ecb_dec_one
306	cmpl	$32,%eax
307	movups	16(%esi),%xmm1
308	je	.L017ecb_dec_two
309	cmpl	$48,%eax
310	movups	32(%esi),%xmm2
311	je	.L018ecb_dec_three
312	movups	48(%esi),%xmm7
313	call	_aesni_decrypt4
314	movups	%xmm0,(%edi)
315	movups	%xmm1,16(%edi)
316	movups	%xmm2,32(%edi)
317	movups	%xmm7,48(%edi)
318	jmp	.L006ecb_ret
319.align	16
320.L016ecb_dec_one:
321	movups	(%edx),%xmm3
322	movups	16(%edx),%xmm4
323	leal	32(%edx),%edx
324	pxor	%xmm3,%xmm0
325.L019dec1_loop:
326	aesdec	%xmm4,%xmm0
327	decl	%ecx
328	movups	(%edx),%xmm4
329	leal	16(%edx),%edx
330	jnz	.L019dec1_loop
331	aesdeclast	%xmm4,%xmm0
332	movups	%xmm0,(%edi)
333	jmp	.L006ecb_ret
334.align	16
335.L017ecb_dec_two:
336	call	_aesni_decrypt3
337	movups	%xmm0,(%edi)
338	movups	%xmm1,16(%edi)
339	jmp	.L006ecb_ret
340.align	16
341.L018ecb_dec_three:
342	call	_aesni_decrypt3
343	movups	%xmm0,(%edi)
344	movups	%xmm1,16(%edi)
345	movups	%xmm2,32(%edi)
346.L006ecb_ret:
347	popl	%edi
348	popl	%esi
349	popl	%ebx
350	popl	%ebp
351	ret
352.size	aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
353.globl	aesni_cbc_encrypt
354.type	aesni_cbc_encrypt,@function
355.align	16
356aesni_cbc_encrypt:
357.L_aesni_cbc_encrypt_begin:
358	pushl	%ebp
359	pushl	%ebx
360	pushl	%esi
361	pushl	%edi
362	movl	20(%esp),%esi
363	movl	24(%esp),%edi
364	movl	28(%esp),%eax
365	movl	32(%esp),%edx
366	testl	%eax,%eax
367	movl	36(%esp),%ebp
368	jz	.L020cbc_ret
369	cmpl	$0,40(%esp)
370	movups	(%ebp),%xmm5
371	movl	240(%edx),%ecx
372	movl	%edx,%ebp
373	movl	%ecx,%ebx
374	je	.L021cbc_decrypt
375	movaps	%xmm5,%xmm0
376	cmpl	$16,%eax
377	jb	.L022cbc_enc_tail
378	subl	$16,%eax
379	jmp	.L023cbc_enc_loop
380.align	16
381.L023cbc_enc_loop:
382	movups	(%esi),%xmm5
383	leal	16(%esi),%esi
384	pxor	%xmm5,%xmm0
385	movups	(%edx),%xmm3
386	movups	16(%edx),%xmm4
387	leal	32(%edx),%edx
388	pxor	%xmm3,%xmm0
389.L024enc1_loop:
390	aesenc	%xmm4,%xmm0
391	decl	%ecx
392	movups	(%edx),%xmm4
393	leal	16(%edx),%edx
394	jnz	.L024enc1_loop
395	aesenclast	%xmm4,%xmm0
396	subl	$16,%eax
397	leal	16(%edi),%edi
398	movl	%ebx,%ecx
399	movl	%ebp,%edx
400	movups	%xmm0,-16(%edi)
401	jnc	.L023cbc_enc_loop
402	addl	$16,%eax
403	jnz	.L022cbc_enc_tail
404	movaps	%xmm0,%xmm5
405	jmp	.L020cbc_ret
406.L022cbc_enc_tail:
407	movl	%eax,%ecx
408.long	2767451785
409	movl	$16,%ecx
410	subl	%eax,%ecx
411	xorl	%eax,%eax
412.long	2868115081
413	leal	-16(%edi),%edi
414	movl	%ebx,%ecx
415	movl	%edi,%esi
416	movl	%ebp,%edx
417	jmp	.L023cbc_enc_loop
418.align	16
419.L021cbc_decrypt:
420	subl	$64,%eax
421	jbe	.L025cbc_dec_tail
422	jmp	.L026cbc_dec_loop3
423.align	16
424.L026cbc_dec_loop3:
425	movups	(%esi),%xmm0
426	movups	16(%esi),%xmm1
427	movups	32(%esi),%xmm2
428	movaps	%xmm0,%xmm6
429	movaps	%xmm1,%xmm7
430	call	_aesni_decrypt3
431	subl	$48,%eax
432	leal	48(%esi),%esi
433	leal	48(%edi),%edi
434	pxor	%xmm5,%xmm0
435	pxor	%xmm6,%xmm1
436	movups	-16(%esi),%xmm5
437	pxor	%xmm7,%xmm2
438	movups	%xmm0,-48(%edi)
439	movl	%ebx,%ecx
440	movups	%xmm1,-32(%edi)
441	movl	%ebp,%edx
442	movups	%xmm2,-16(%edi)
443	ja	.L026cbc_dec_loop3
444.L025cbc_dec_tail:
445	addl	$64,%eax
446	jz	.L020cbc_ret
447	movups	(%esi),%xmm0
448	cmpl	$16,%eax
449	movaps	%xmm0,%xmm6
450	jbe	.L027cbc_dec_one
451	movups	16(%esi),%xmm1
452	cmpl	$32,%eax
453	movaps	%xmm1,%xmm7
454	jbe	.L028cbc_dec_two
455	movups	32(%esi),%xmm2
456	cmpl	$48,%eax
457	jbe	.L029cbc_dec_three
458	movups	48(%esi),%xmm7
459	call	_aesni_decrypt4
460	movups	16(%esi),%xmm3
461	movups	32(%esi),%xmm4
462	pxor	%xmm5,%xmm0
463	pxor	%xmm6,%xmm1
464	movups	48(%esi),%xmm5
465	movups	%xmm0,(%edi)
466	pxor	%xmm3,%xmm2
467	pxor	%xmm4,%xmm7
468	movups	%xmm1,16(%edi)
469	movups	%xmm2,32(%edi)
470	movaps	%xmm7,%xmm0
471	leal	48(%edi),%edi
472	jmp	.L030cbc_dec_tail_collected
473.L027cbc_dec_one:
474	movups	(%edx),%xmm3
475	movups	16(%edx),%xmm4
476	leal	32(%edx),%edx
477	pxor	%xmm3,%xmm0
478.L031dec1_loop:
479	aesdec	%xmm4,%xmm0
480	decl	%ecx
481	movups	(%edx),%xmm4
482	leal	16(%edx),%edx
483	jnz	.L031dec1_loop
484	aesdeclast	%xmm4,%xmm0
485	pxor	%xmm5,%xmm0
486	movaps	%xmm6,%xmm5
487	jmp	.L030cbc_dec_tail_collected
488.L028cbc_dec_two:
489	call	_aesni_decrypt3
490	pxor	%xmm5,%xmm0
491	pxor	%xmm6,%xmm1
492	movups	%xmm0,(%edi)
493	movaps	%xmm1,%xmm0
494	movaps	%xmm7,%xmm5
495	leal	16(%edi),%edi
496	jmp	.L030cbc_dec_tail_collected
497.L029cbc_dec_three:
498	call	_aesni_decrypt3
499	pxor	%xmm5,%xmm0
500	pxor	%xmm6,%xmm1
501	pxor	%xmm7,%xmm2
502	movups	%xmm0,(%edi)
503	movups	%xmm1,16(%edi)
504	movaps	%xmm2,%xmm0
505	movups	32(%esi),%xmm5
506	leal	32(%edi),%edi
507.L030cbc_dec_tail_collected:
508	andl	$15,%eax
509	jnz	.L032cbc_dec_tail_partial
510	movups	%xmm0,(%edi)
511	jmp	.L020cbc_ret
512.L032cbc_dec_tail_partial:
513	movl	%esp,%ebp
514	subl	$16,%esp
515	andl	$-16,%esp
516	movaps	%xmm0,(%esp)
517	movl	%esp,%esi
518	movl	%eax,%ecx
519.long	2767451785
520	movl	%ebp,%esp
521.L020cbc_ret:
522	movl	36(%esp),%ebp
523	movups	%xmm5,(%ebp)
524	popl	%edi
525	popl	%esi
526	popl	%ebx
527	popl	%ebp
528	ret
529.size	aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
530.type	_aesni_set_encrypt_key,@function
531.align	16
532_aesni_set_encrypt_key:
533	testl	%eax,%eax
534	jz	.L033bad_pointer
535	testl	%edx,%edx
536	jz	.L033bad_pointer
537	movups	(%eax),%xmm0
538	pxor	%xmm4,%xmm4
539	leal	16(%edx),%edx
540	cmpl	$256,%ecx
541	je	.L03414rounds
542	cmpl	$192,%ecx
543	je	.L03512rounds
544	cmpl	$128,%ecx
545	jne	.L036bad_keybits
546.align	16
547.L03710rounds:
548	movl	$9,%ecx
549	movups	%xmm0,-16(%edx)
550	aeskeygenassist	$1,%xmm0,%xmm1
551	call	.L038key_128_cold
552	aeskeygenassist	$2,%xmm0,%xmm1
553	call	.L039key_128
554	aeskeygenassist	$4,%xmm0,%xmm1
555	call	.L039key_128
556	aeskeygenassist	$8,%xmm0,%xmm1
557	call	.L039key_128
558	aeskeygenassist	$16,%xmm0,%xmm1
559	call	.L039key_128
560	aeskeygenassist	$32,%xmm0,%xmm1
561	call	.L039key_128
562	aeskeygenassist	$64,%xmm0,%xmm1
563	call	.L039key_128
564	aeskeygenassist	$128,%xmm0,%xmm1
565	call	.L039key_128
566	aeskeygenassist	$27,%xmm0,%xmm1
567	call	.L039key_128
568	aeskeygenassist	$54,%xmm0,%xmm1
569	call	.L039key_128
570	movups	%xmm0,(%edx)
571	movl	%ecx,80(%edx)
572	xorl	%eax,%eax
573	ret
574.align	16
575.L039key_128:
576	movups	%xmm0,(%edx)
577	leal	16(%edx),%edx
578.L038key_128_cold:
579	shufps	$16,%xmm0,%xmm4
580	pxor	%xmm4,%xmm0
581	shufps	$140,%xmm0,%xmm4
582	pxor	%xmm4,%xmm0
583	pshufd	$255,%xmm1,%xmm1
584	pxor	%xmm1,%xmm0
585	ret
586.align	16
587.L03512rounds:
588	movq	16(%eax),%xmm2
589	movl	$11,%ecx
590	movups	%xmm0,-16(%edx)
591	aeskeygenassist	$1,%xmm2,%xmm1
592	call	.L040key_192a_cold
593	aeskeygenassist	$2,%xmm2,%xmm1
594	call	.L041key_192b
595	aeskeygenassist	$4,%xmm2,%xmm1
596	call	.L042key_192a
597	aeskeygenassist	$8,%xmm2,%xmm1
598	call	.L041key_192b
599	aeskeygenassist	$16,%xmm2,%xmm1
600	call	.L042key_192a
601	aeskeygenassist	$32,%xmm2,%xmm1
602	call	.L041key_192b
603	aeskeygenassist	$64,%xmm2,%xmm1
604	call	.L042key_192a
605	aeskeygenassist	$128,%xmm2,%xmm1
606	call	.L041key_192b
607	movups	%xmm0,(%edx)
608	movl	%ecx,48(%edx)
609	xorl	%eax,%eax
610	ret
611.align	16
612.L042key_192a:
613	movups	%xmm0,(%edx)
614	leal	16(%edx),%edx
615.align	16
616.L040key_192a_cold:
617	movaps	%xmm2,%xmm5
618.L043key_192b_warm:
619	shufps	$16,%xmm0,%xmm4
620	movaps	%xmm2,%xmm3
621	pxor	%xmm4,%xmm0
622	shufps	$140,%xmm0,%xmm4
623	pslldq	$4,%xmm3
624	pxor	%xmm4,%xmm0
625	pshufd	$85,%xmm1,%xmm1
626	pxor	%xmm3,%xmm2
627	pxor	%xmm1,%xmm0
628	pshufd	$255,%xmm0,%xmm3
629	pxor	%xmm3,%xmm2
630	ret
631.align	16
632.L041key_192b:
633	movaps	%xmm0,%xmm3
634	shufps	$68,%xmm0,%xmm5
635	movups	%xmm5,(%edx)
636	shufps	$78,%xmm2,%xmm3
637	movups	%xmm3,16(%edx)
638	leal	32(%edx),%edx
639	jmp	.L043key_192b_warm
640.align	16
641.L03414rounds:
642	movups	16(%eax),%xmm2
643	movl	$13,%ecx
644	leal	16(%edx),%edx
645	movups	%xmm0,-32(%edx)
646	movups	%xmm2,-16(%edx)
647	aeskeygenassist	$1,%xmm2,%xmm1
648	call	.L044key_256a_cold
649	aeskeygenassist	$1,%xmm0,%xmm1
650	call	.L045key_256b
651	aeskeygenassist	$2,%xmm2,%xmm1
652	call	.L046key_256a
653	aeskeygenassist	$2,%xmm0,%xmm1
654	call	.L045key_256b
655	aeskeygenassist	$4,%xmm2,%xmm1
656	call	.L046key_256a
657	aeskeygenassist	$4,%xmm0,%xmm1
658	call	.L045key_256b
659	aeskeygenassist	$8,%xmm2,%xmm1
660	call	.L046key_256a
661	aeskeygenassist	$8,%xmm0,%xmm1
662	call	.L045key_256b
663	aeskeygenassist	$16,%xmm2,%xmm1
664	call	.L046key_256a
665	aeskeygenassist	$16,%xmm0,%xmm1
666	call	.L045key_256b
667	aeskeygenassist	$32,%xmm2,%xmm1
668	call	.L046key_256a
669	aeskeygenassist	$32,%xmm0,%xmm1
670	call	.L045key_256b
671	aeskeygenassist	$64,%xmm2,%xmm1
672	call	.L046key_256a
673	movups	%xmm0,(%edx)
674	movl	%ecx,16(%edx)
675	xorl	%eax,%eax
676	ret
677.align	16
678.L046key_256a:
679	movups	%xmm2,(%edx)
680	leal	16(%edx),%edx
681.L044key_256a_cold:
682	shufps	$16,%xmm0,%xmm4
683	pxor	%xmm4,%xmm0
684	shufps	$140,%xmm0,%xmm4
685	pxor	%xmm4,%xmm0
686	pshufd	$255,%xmm1,%xmm1
687	pxor	%xmm1,%xmm0
688	ret
689.align	16
690.L045key_256b:
691	movups	%xmm0,(%edx)
692	leal	16(%edx),%edx
693	shufps	$16,%xmm2,%xmm4
694	pxor	%xmm4,%xmm2
695	shufps	$140,%xmm2,%xmm4
696	pxor	%xmm4,%xmm2
697	pshufd	$170,%xmm1,%xmm1
698	pxor	%xmm1,%xmm2
699	ret
700.align	4
701.L033bad_pointer:
702	movl	$-1,%eax
703	ret
704.align	4
705.L036bad_keybits:
706	movl	$-2,%eax
707	ret
708.size	_aesni_set_encrypt_key,.-_aesni_set_encrypt_key
709.globl	aesni_set_encrypt_key
710.type	aesni_set_encrypt_key,@function
711.align	16
712aesni_set_encrypt_key:
713.L_aesni_set_encrypt_key_begin:
714	movl	4(%esp),%eax
715	movl	8(%esp),%ecx
716	movl	12(%esp),%edx
717	call	_aesni_set_encrypt_key
718	ret
719.size	aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
720.globl	aesni_set_decrypt_key
721.type	aesni_set_decrypt_key,@function
722.align	16
723aesni_set_decrypt_key:
724.L_aesni_set_decrypt_key_begin:
725	movl	4(%esp),%eax
726	movl	8(%esp),%ecx
727	movl	12(%esp),%edx
728	call	_aesni_set_encrypt_key
729	movl	12(%esp),%edx
730	shll	$4,%ecx
731	testl	%eax,%eax
732	jnz	.L047dec_key_ret
733	leal	16(%edx,%ecx,1),%eax
734	movups	(%edx),%xmm0
735	movups	(%eax),%xmm1
736	movups	%xmm0,(%eax)
737	movups	%xmm1,(%edx)
738	leal	16(%edx),%edx
739	leal	-16(%eax),%eax
740.L048dec_key_inverse:
741	movups	(%edx),%xmm0
742	movups	(%eax),%xmm1
743	aesimc	%xmm0,%xmm0
744	aesimc	%xmm1,%xmm1
745	leal	16(%edx),%edx
746	leal	-16(%eax),%eax
747	cmpl	%edx,%eax
748	movups	%xmm0,16(%eax)
749	movups	%xmm1,-16(%edx)
750	ja	.L048dec_key_inverse
751	movups	(%edx),%xmm0
752	aesimc	%xmm0,%xmm0
753	movups	%xmm0,(%edx)
754	xorl	%eax,%eax
755.L047dec_key_ret:
756	ret
757.size	aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
758.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
759.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
760.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
761.byte	115,108,46,111,114,103,62,0
762