xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc/chacha-ppc.S (revision e0ea3921ea68e51b93ffc215f08ae1647c8e1796)
1.machine	"any"
2.text
3
4.globl	ChaCha20_ctr32_int
5.type	ChaCha20_ctr32_int,@function
6.align	5
7ChaCha20_ctr32_int:
8__ChaCha20_ctr32_int:
9	cmplwi	5,0
10	beqlr
11
12	stwu	1,-160(1)
13	mflr	0
14
15	stw	14,88(1)
16	stw	15,92(1)
17	stw	16,96(1)
18	stw	17,100(1)
19	stw	18,104(1)
20	stw	19,108(1)
21	stw	20,112(1)
22	stw	21,116(1)
23	stw	22,120(1)
24	stw	23,124(1)
25	stw	24,128(1)
26	stw	25,132(1)
27	stw	26,136(1)
28	stw	27,140(1)
29	stw	28,144(1)
30	stw	29,148(1)
31	stw	30,152(1)
32	stw	31,156(1)
33	stw	0,164(1)
34
35	lwz	11,0(7)
36	lwz	12,4(7)
37	lwz	14,8(7)
38	lwz	15,12(7)
39
40	bl	__ChaCha20_1x
41
42	lwz	0,164(1)
43	lwz	14,88(1)
44	lwz	15,92(1)
45	lwz	16,96(1)
46	lwz	17,100(1)
47	lwz	18,104(1)
48	lwz	19,108(1)
49	lwz	20,112(1)
50	lwz	21,116(1)
51	lwz	22,120(1)
52	lwz	23,124(1)
53	lwz	24,128(1)
54	lwz	25,132(1)
55	lwz	26,136(1)
56	lwz	27,140(1)
57	lwz	28,144(1)
58	lwz	29,148(1)
59	lwz	30,152(1)
60	lwz	31,156(1)
61	mtlr	0
62	addi	1,1,160
63	blr
64.long	0
65.byte	0,12,4,1,0x80,18,5,0
66.long	0
67
68
69.align	5
70__ChaCha20_1x:
71.Loop_outer:
72	lis	16,0x6170
73	lis	17,0x3320
74	lis	18,0x7962
75	lis	19,0x6b20
76	ori	16,16,0x7865
77	ori	17,17,0x646e
78	ori	18,18,0x2d32
79	ori	19,19,0x6574
80
81	li	0,10
82	lwz	20,0(6)
83	lwz	21,4(6)
84	lwz	22,8(6)
85	lwz	23,12(6)
86	lwz	24,16(6)
87	mr	28,11
88	lwz	25,20(6)
89	mr	29,12
90	lwz	26,24(6)
91	mr	30,14
92	lwz	27,28(6)
93	mr	31,15
94
95	mr	7,20
96	mr	8,21
97	mr	9,22
98	mr	10,23
99
100	mtctr	0
101.Loop:
102	add	16,16,20
103	add	17,17,21
104	add	18,18,22
105	add	19,19,23
106	xor	28,28,16
107	xor	29,29,17
108	xor	30,30,18
109	xor	31,31,19
110	rotlwi	28,28,16
111	rotlwi	29,29,16
112	rotlwi	30,30,16
113	rotlwi	31,31,16
114	add	24,24,28
115	add	25,25,29
116	add	26,26,30
117	add	27,27,31
118	xor	20,20,24
119	xor	21,21,25
120	xor	22,22,26
121	xor	23,23,27
122	rotlwi	20,20,12
123	rotlwi	21,21,12
124	rotlwi	22,22,12
125	rotlwi	23,23,12
126	add	16,16,20
127	add	17,17,21
128	add	18,18,22
129	add	19,19,23
130	xor	28,28,16
131	xor	29,29,17
132	xor	30,30,18
133	xor	31,31,19
134	rotlwi	28,28,8
135	rotlwi	29,29,8
136	rotlwi	30,30,8
137	rotlwi	31,31,8
138	add	24,24,28
139	add	25,25,29
140	add	26,26,30
141	add	27,27,31
142	xor	20,20,24
143	xor	21,21,25
144	xor	22,22,26
145	xor	23,23,27
146	rotlwi	20,20,7
147	rotlwi	21,21,7
148	rotlwi	22,22,7
149	rotlwi	23,23,7
150	add	16,16,21
151	add	17,17,22
152	add	18,18,23
153	add	19,19,20
154	xor	31,31,16
155	xor	28,28,17
156	xor	29,29,18
157	xor	30,30,19
158	rotlwi	31,31,16
159	rotlwi	28,28,16
160	rotlwi	29,29,16
161	rotlwi	30,30,16
162	add	26,26,31
163	add	27,27,28
164	add	24,24,29
165	add	25,25,30
166	xor	21,21,26
167	xor	22,22,27
168	xor	23,23,24
169	xor	20,20,25
170	rotlwi	21,21,12
171	rotlwi	22,22,12
172	rotlwi	23,23,12
173	rotlwi	20,20,12
174	add	16,16,21
175	add	17,17,22
176	add	18,18,23
177	add	19,19,20
178	xor	31,31,16
179	xor	28,28,17
180	xor	29,29,18
181	xor	30,30,19
182	rotlwi	31,31,8
183	rotlwi	28,28,8
184	rotlwi	29,29,8
185	rotlwi	30,30,8
186	add	26,26,31
187	add	27,27,28
188	add	24,24,29
189	add	25,25,30
190	xor	21,21,26
191	xor	22,22,27
192	xor	23,23,24
193	xor	20,20,25
194	rotlwi	21,21,7
195	rotlwi	22,22,7
196	rotlwi	23,23,7
197	rotlwi	20,20,7
198	bc	16,0,.Loop
199
200	subic	5,5,64
201	addi	16,16,0x7865
202	addi	17,17,0x646e
203	addi	18,18,0x2d32
204	addi	19,19,0x6574
205	addis	16,16,0x6170
206	addis	17,17,0x3320
207	addis	18,18,0x7962
208	addis	19,19,0x6b20
209
210	subfe.	0,0,0
211	add	20,20,7
212	lwz	7,16(6)
213	add	21,21,8
214	lwz	8,20(6)
215	add	22,22,9
216	lwz	9,24(6)
217	add	23,23,10
218	lwz	10,28(6)
219	add	24,24,7
220	add	25,25,8
221	add	26,26,9
222	add	27,27,10
223
224	add	28,28,11
225	add	29,29,12
226	add	30,30,14
227	add	31,31,15
228	addi	11,11,1
229	mr	7,16
230	rotlwi	16,16,8
231	rlwimi	16,7,24,0,7
232	rlwimi	16,7,24,16,23
233	mr	8,17
234	rotlwi	17,17,8
235	rlwimi	17,8,24,0,7
236	rlwimi	17,8,24,16,23
237	mr	9,18
238	rotlwi	18,18,8
239	rlwimi	18,9,24,0,7
240	rlwimi	18,9,24,16,23
241	mr	10,19
242	rotlwi	19,19,8
243	rlwimi	19,10,24,0,7
244	rlwimi	19,10,24,16,23
245	mr	7,20
246	rotlwi	20,20,8
247	rlwimi	20,7,24,0,7
248	rlwimi	20,7,24,16,23
249	mr	8,21
250	rotlwi	21,21,8
251	rlwimi	21,8,24,0,7
252	rlwimi	21,8,24,16,23
253	mr	9,22
254	rotlwi	22,22,8
255	rlwimi	22,9,24,0,7
256	rlwimi	22,9,24,16,23
257	mr	10,23
258	rotlwi	23,23,8
259	rlwimi	23,10,24,0,7
260	rlwimi	23,10,24,16,23
261	mr	7,24
262	rotlwi	24,24,8
263	rlwimi	24,7,24,0,7
264	rlwimi	24,7,24,16,23
265	mr	8,25
266	rotlwi	25,25,8
267	rlwimi	25,8,24,0,7
268	rlwimi	25,8,24,16,23
269	mr	9,26
270	rotlwi	26,26,8
271	rlwimi	26,9,24,0,7
272	rlwimi	26,9,24,16,23
273	mr	10,27
274	rotlwi	27,27,8
275	rlwimi	27,10,24,0,7
276	rlwimi	27,10,24,16,23
277	mr	7,28
278	rotlwi	28,28,8
279	rlwimi	28,7,24,0,7
280	rlwimi	28,7,24,16,23
281	mr	8,29
282	rotlwi	29,29,8
283	rlwimi	29,8,24,0,7
284	rlwimi	29,8,24,16,23
285	mr	9,30
286	rotlwi	30,30,8
287	rlwimi	30,9,24,0,7
288	rlwimi	30,9,24,16,23
289	mr	10,31
290	rotlwi	31,31,8
291	rlwimi	31,10,24,0,7
292	rlwimi	31,10,24,16,23
293	bne	.Ltail
294
295	lwz	7,0(4)
296	lwz	8,4(4)
297	cmplwi	5,0
298	lwz	9,8(4)
299	lwz	10,12(4)
300	xor	16,16,7
301	lwz	7,16(4)
302	xor	17,17,8
303	lwz	8,20(4)
304	xor	18,18,9
305	lwz	9,24(4)
306	xor	19,19,10
307	lwz	10,28(4)
308	xor	20,20,7
309	lwz	7,32(4)
310	xor	21,21,8
311	lwz	8,36(4)
312	xor	22,22,9
313	lwz	9,40(4)
314	xor	23,23,10
315	lwz	10,44(4)
316	xor	24,24,7
317	lwz	7,48(4)
318	xor	25,25,8
319	lwz	8,52(4)
320	xor	26,26,9
321	lwz	9,56(4)
322	xor	27,27,10
323	lwz	10,60(4)
324	xor	28,28,7
325	stw	16,0(3)
326	xor	29,29,8
327	stw	17,4(3)
328	xor	30,30,9
329	stw	18,8(3)
330	xor	31,31,10
331	stw	19,12(3)
332	stw	20,16(3)
333	stw	21,20(3)
334	stw	22,24(3)
335	stw	23,28(3)
336	stw	24,32(3)
337	stw	25,36(3)
338	stw	26,40(3)
339	stw	27,44(3)
340	stw	28,48(3)
341	stw	29,52(3)
342	stw	30,56(3)
343	addi	4,4,64
344	stw	31,60(3)
345	addi	3,3,64
346
347	bne	.Loop_outer
348
349	blr
350
351.align	4
352.Ltail:
353	addi	5,5,64
354	subi	4,4,1
355	subi	3,3,1
356	addi	7,1,24-1
357	mtctr	5
358
359	stw	16,24(1)
360	stw	17,28(1)
361	stw	18,32(1)
362	stw	19,36(1)
363	stw	20,40(1)
364	stw	21,44(1)
365	stw	22,48(1)
366	stw	23,52(1)
367	stw	24,56(1)
368	stw	25,60(1)
369	stw	26,64(1)
370	stw	27,68(1)
371	stw	28,72(1)
372	stw	29,76(1)
373	stw	30,80(1)
374	stw	31,84(1)
375
376.Loop_tail:
377	lbzu	11,1(4)
378	lbzu	16,1(7)
379	xor	12,11,16
380	stbu	12,1(3)
381	bc	16,0,.Loop_tail
382
383	stw	1,24(1)
384	stw	1,28(1)
385	stw	1,32(1)
386	stw	1,36(1)
387	stw	1,40(1)
388	stw	1,44(1)
389	stw	1,48(1)
390	stw	1,52(1)
391	stw	1,56(1)
392	stw	1,60(1)
393	stw	1,64(1)
394	stw	1,68(1)
395	stw	1,72(1)
396	stw	1,76(1)
397	stw	1,80(1)
398	stw	1,84(1)
399
400	blr
401.long	0
402.byte	0,12,0x14,0,0,0,0,0
403
404.globl	ChaCha20_ctr32_vmx
405.type	ChaCha20_ctr32_vmx,@function
406.align	5
407ChaCha20_ctr32_vmx:
408	cmplwi	5,256
409	blt	__ChaCha20_ctr32_int
410
411	stwu	1,-320(1)
412	mflr	0
413	li	10,103
414	li	11,119
415	mfspr	12,256
416	stvx	23,10,1
417	addi	10,10,32
418	stvx	24,11,1
419	addi	11,11,32
420	stvx	25,10,1
421	addi	10,10,32
422	stvx	26,11,1
423	addi	11,11,32
424	stvx	27,10,1
425	addi	10,10,32
426	stvx	28,11,1
427	addi	11,11,32
428	stvx	29,10,1
429	addi	10,10,32
430	stvx	30,11,1
431	stvx	31,10,1
432	stw	12,244(1)
433	stw	14,248(1)
434	stw	15,252(1)
435	stw	16,256(1)
436	stw	17,260(1)
437	stw	18,264(1)
438	stw	19,268(1)
439	stw	20,272(1)
440	stw	21,276(1)
441	stw	22,280(1)
442	stw	23,284(1)
443	stw	24,288(1)
444	stw	25,292(1)
445	stw	26,296(1)
446	stw	27,300(1)
447	stw	28,304(1)
448	stw	29,308(1)
449	stw	30,312(1)
450	stw	31,316(1)
451	li	12,-4096+511
452	stw	0, 324(1)
453	mtspr	256,12
454
455	bl	.Lconsts
456	li	16,16
457	li	17,32
458	li	18,48
459	li	19,64
460	li	20,31
461	li	21,15
462
463	lvx	13,0,6
464	lvsl	29,0,6
465	lvx	14,16,6
466	lvx	27,20,6
467
468	lvx	15,0,7
469	lvsl	30,0,7
470	lvx	28,21,7
471
472	lvx	12,0,12
473	lvx	17,16,12
474	lvx	18,17,12
475	lvx	19,18,12
476	lvx	23,19,12
477
478	vperm	13,13,14,29
479	vperm	14,14,27,29
480	vperm	15,15,28,30
481
482	lwz	11,0(7)
483	lwz	12,4(7)
484	vadduwm	15,15,17
485	lwz	14,8(7)
486	vadduwm	16,15,17
487	lwz	15,12(7)
488	vadduwm	17,16,17
489
490	vxor	29,29,29
491	vspltisw	26,-1
492	lvsl	24,0,4
493	lvsr	25,0,3
494	vperm	26,29,26,25
495
496	lvsl	29,0,16
497	vspltisb	30,3
498	vxor	29,29,30
499	vxor	25,25,30
500	vperm	24,24,24,29
501
502	li	0,10
503	b	.Loop_outer_vmx
504
505.align	4
506.Loop_outer_vmx:
507	lis	16,0x6170
508	lis	17,0x3320
509	vor	0,12,12
510	lis	18,0x7962
511	lis	19,0x6b20
512	vor	4,12,12
513	ori	16,16,0x7865
514	ori	17,17,0x646e
515	vor	8,12,12
516	ori	18,18,0x2d32
517	ori	19,19,0x6574
518	vor	1,13,13
519
520	lwz	20,0(6)
521	vor	5,13,13
522	lwz	21,4(6)
523	vor	9,13,13
524	lwz	22,8(6)
525	vor	2,14,14
526	lwz	23,12(6)
527	vor	6,14,14
528	lwz	24,16(6)
529	vor	10,14,14
530	mr	28,11
531	lwz	25,20(6)
532	vor	3,15,15
533	mr	29,12
534	lwz	26,24(6)
535	vor	7,16,16
536	mr	30,14
537	lwz	27,28(6)
538	vor	11,17,17
539	mr	31,15
540
541	mr	7,20
542	mr	8,21
543	mr	9,22
544	mr	10,23
545
546	vspltisw	27,12
547	vspltisw	28,7
548
549	mtctr	0
550	nop
551.Loop_vmx:
552	vadduwm	0,0,1
553	vadduwm	4,4,5
554	vadduwm	8,8,9
555	add	16,16,20
556	add	17,17,21
557	add	18,18,22
558	vxor	3,3,0
559	vxor	7,7,4
560	vxor	11,11,8
561	add	19,19,23
562	xor	28,28,16
563	xor	29,29,17
564	vperm	3,3,3,19
565	vperm	7,7,7,19
566	vperm	11,11,11,19
567	xor	30,30,18
568	xor	31,31,19
569	rotlwi	28,28,16
570	vadduwm	2,2,3
571	vadduwm	6,6,7
572	vadduwm	10,10,11
573	rotlwi	29,29,16
574	rotlwi	30,30,16
575	rotlwi	31,31,16
576	vxor	1,1,2
577	vxor	5,5,6
578	vxor	9,9,10
579	add	24,24,28
580	add	25,25,29
581	add	26,26,30
582	vrlw	1,1,27
583	vrlw	5,5,27
584	vrlw	9,9,27
585	add	27,27,31
586	xor	20,20,24
587	xor	21,21,25
588	vadduwm	0,0,1
589	vadduwm	4,4,5
590	vadduwm	8,8,9
591	xor	22,22,26
592	xor	23,23,27
593	rotlwi	20,20,12
594	vxor	3,3,0
595	vxor	7,7,4
596	vxor	11,11,8
597	rotlwi	21,21,12
598	rotlwi	22,22,12
599	rotlwi	23,23,12
600	vperm	3,3,3,23
601	vperm	7,7,7,23
602	vperm	11,11,11,23
603	add	16,16,20
604	add	17,17,21
605	add	18,18,22
606	vadduwm	2,2,3
607	vadduwm	6,6,7
608	vadduwm	10,10,11
609	add	19,19,23
610	xor	28,28,16
611	xor	29,29,17
612	vxor	1,1,2
613	vxor	5,5,6
614	vxor	9,9,10
615	xor	30,30,18
616	xor	31,31,19
617	rotlwi	28,28,8
618	vrlw	1,1,28
619	vrlw	5,5,28
620	vrlw	9,9,28
621	rotlwi	29,29,8
622	rotlwi	30,30,8
623	rotlwi	31,31,8
624	vsldoi	2,2,2, 16-8
625	vsldoi	6,6,6, 16-8
626	vsldoi	10,10,10, 16-8
627	add	24,24,28
628	add	25,25,29
629	add	26,26,30
630	vsldoi	1,1,1, 16-12
631	vsldoi	5,5,5, 16-12
632	vsldoi	9,9,9, 16-12
633	add	27,27,31
634	xor	20,20,24
635	xor	21,21,25
636	vsldoi	3,3,3, 16-4
637	vsldoi	7,7,7, 16-4
638	vsldoi	11,11,11, 16-4
639	xor	22,22,26
640	xor	23,23,27
641	rotlwi	20,20,7
642	rotlwi	21,21,7
643	rotlwi	22,22,7
644	rotlwi	23,23,7
645	vadduwm	0,0,1
646	vadduwm	4,4,5
647	vadduwm	8,8,9
648	add	16,16,21
649	add	17,17,22
650	add	18,18,23
651	vxor	3,3,0
652	vxor	7,7,4
653	vxor	11,11,8
654	add	19,19,20
655	xor	31,31,16
656	xor	28,28,17
657	vperm	3,3,3,19
658	vperm	7,7,7,19
659	vperm	11,11,11,19
660	xor	29,29,18
661	xor	30,30,19
662	rotlwi	31,31,16
663	vadduwm	2,2,3
664	vadduwm	6,6,7
665	vadduwm	10,10,11
666	rotlwi	28,28,16
667	rotlwi	29,29,16
668	rotlwi	30,30,16
669	vxor	1,1,2
670	vxor	5,5,6
671	vxor	9,9,10
672	add	26,26,31
673	add	27,27,28
674	add	24,24,29
675	vrlw	1,1,27
676	vrlw	5,5,27
677	vrlw	9,9,27
678	add	25,25,30
679	xor	21,21,26
680	xor	22,22,27
681	vadduwm	0,0,1
682	vadduwm	4,4,5
683	vadduwm	8,8,9
684	xor	23,23,24
685	xor	20,20,25
686	rotlwi	21,21,12
687	vxor	3,3,0
688	vxor	7,7,4
689	vxor	11,11,8
690	rotlwi	22,22,12
691	rotlwi	23,23,12
692	rotlwi	20,20,12
693	vperm	3,3,3,23
694	vperm	7,7,7,23
695	vperm	11,11,11,23
696	add	16,16,21
697	add	17,17,22
698	add	18,18,23
699	vadduwm	2,2,3
700	vadduwm	6,6,7
701	vadduwm	10,10,11
702	add	19,19,20
703	xor	31,31,16
704	xor	28,28,17
705	vxor	1,1,2
706	vxor	5,5,6
707	vxor	9,9,10
708	xor	29,29,18
709	xor	30,30,19
710	rotlwi	31,31,8
711	vrlw	1,1,28
712	vrlw	5,5,28
713	vrlw	9,9,28
714	rotlwi	28,28,8
715	rotlwi	29,29,8
716	rotlwi	30,30,8
717	vsldoi	2,2,2, 16-8
718	vsldoi	6,6,6, 16-8
719	vsldoi	10,10,10, 16-8
720	add	26,26,31
721	add	27,27,28
722	add	24,24,29
723	vsldoi	1,1,1, 16-4
724	vsldoi	5,5,5, 16-4
725	vsldoi	9,9,9, 16-4
726	add	25,25,30
727	xor	21,21,26
728	xor	22,22,27
729	vsldoi	3,3,3, 16-12
730	vsldoi	7,7,7, 16-12
731	vsldoi	11,11,11, 16-12
732	xor	23,23,24
733	xor	20,20,25
734	rotlwi	21,21,7
735	rotlwi	22,22,7
736	rotlwi	23,23,7
737	rotlwi	20,20,7
738	bc	16,0,.Loop_vmx
739
740	subi	5,5,256
741	addi	16,16,0x7865
742	addi	17,17,0x646e
743	addi	18,18,0x2d32
744	addi	19,19,0x6574
745	addis	16,16,0x6170
746	addis	17,17,0x3320
747	addis	18,18,0x7962
748	addis	19,19,0x6b20
749	add	20,20,7
750	lwz	7,16(6)
751	add	21,21,8
752	lwz	8,20(6)
753	add	22,22,9
754	lwz	9,24(6)
755	add	23,23,10
756	lwz	10,28(6)
757	add	24,24,7
758	add	25,25,8
759	add	26,26,9
760	add	27,27,10
761	add	28,28,11
762	add	29,29,12
763	add	30,30,14
764	add	31,31,15
765
766	vadduwm	0,0,12
767	vadduwm	4,4,12
768	vadduwm	8,8,12
769	vadduwm	1,1,13
770	vadduwm	5,5,13
771	vadduwm	9,9,13
772	vadduwm	2,2,14
773	vadduwm	6,6,14
774	vadduwm	10,10,14
775	vadduwm	3,3,15
776	vadduwm	7,7,16
777	vadduwm	11,11,17
778
779	addi	11,11,4
780	vadduwm	15,15,18
781	vadduwm	16,16,18
782	vadduwm	17,17,18
783
784	mr	7,16
785	rotlwi	16,16,8
786	rlwimi	16,7,24,0,7
787	rlwimi	16,7,24,16,23
788	mr	8,17
789	rotlwi	17,17,8
790	rlwimi	17,8,24,0,7
791	rlwimi	17,8,24,16,23
792	mr	9,18
793	rotlwi	18,18,8
794	rlwimi	18,9,24,0,7
795	rlwimi	18,9,24,16,23
796	mr	10,19
797	rotlwi	19,19,8
798	rlwimi	19,10,24,0,7
799	rlwimi	19,10,24,16,23
800	mr	7,20
801	rotlwi	20,20,8
802	rlwimi	20,7,24,0,7
803	rlwimi	20,7,24,16,23
804	mr	8,21
805	rotlwi	21,21,8
806	rlwimi	21,8,24,0,7
807	rlwimi	21,8,24,16,23
808	mr	9,22
809	rotlwi	22,22,8
810	rlwimi	22,9,24,0,7
811	rlwimi	22,9,24,16,23
812	mr	10,23
813	rotlwi	23,23,8
814	rlwimi	23,10,24,0,7
815	rlwimi	23,10,24,16,23
816	mr	7,24
817	rotlwi	24,24,8
818	rlwimi	24,7,24,0,7
819	rlwimi	24,7,24,16,23
820	mr	8,25
821	rotlwi	25,25,8
822	rlwimi	25,8,24,0,7
823	rlwimi	25,8,24,16,23
824	mr	9,26
825	rotlwi	26,26,8
826	rlwimi	26,9,24,0,7
827	rlwimi	26,9,24,16,23
828	mr	10,27
829	rotlwi	27,27,8
830	rlwimi	27,10,24,0,7
831	rlwimi	27,10,24,16,23
832	mr	7,28
833	rotlwi	28,28,8
834	rlwimi	28,7,24,0,7
835	rlwimi	28,7,24,16,23
836	mr	8,29
837	rotlwi	29,29,8
838	rlwimi	29,8,24,0,7
839	rlwimi	29,8,24,16,23
840	mr	9,30
841	rotlwi	30,30,8
842	rlwimi	30,9,24,0,7
843	rlwimi	30,9,24,16,23
844	mr	10,31
845	rotlwi	31,31,8
846	rlwimi	31,10,24,0,7
847	rlwimi	31,10,24,16,23
848	lwz	7,0(4)
849	lwz	8,4(4)
850	lwz	9,8(4)
851	lwz	10,12(4)
852	xor	16,16,7
853	lwz	7,16(4)
854	xor	17,17,8
855	lwz	8,20(4)
856	xor	18,18,9
857	lwz	9,24(4)
858	xor	19,19,10
859	lwz	10,28(4)
860	xor	20,20,7
861	lwz	7,32(4)
862	xor	21,21,8
863	lwz	8,36(4)
864	xor	22,22,9
865	lwz	9,40(4)
866	xor	23,23,10
867	lwz	10,44(4)
868	xor	24,24,7
869	lwz	7,48(4)
870	xor	25,25,8
871	lwz	8,52(4)
872	xor	26,26,9
873	lwz	9,56(4)
874	xor	27,27,10
875	lwz	10,60(4)
876	xor	28,28,7
877	stw	16,0(3)
878	xor	29,29,8
879	stw	17,4(3)
880	xor	30,30,9
881	stw	18,8(3)
882	xor	31,31,10
883	stw	19,12(3)
884	addi	4,4,64
885	stw	20,16(3)
886	li	7,16
887	stw	21,20(3)
888	li	8,32
889	stw	22,24(3)
890	li	9,48
891	stw	23,28(3)
892	li	10,64
893	stw	24,32(3)
894	stw	25,36(3)
895	stw	26,40(3)
896	stw	27,44(3)
897	stw	28,48(3)
898	stw	29,52(3)
899	stw	30,56(3)
900	stw	31,60(3)
901	addi	3,3,64
902
903	lvx	27,0,4
904	lvx	28,7,4
905	lvx	29,8,4
906	lvx	30,9,4
907	lvx	31,10,4
908	addi	4,4,64
909
910	vperm	27,27,28,24
911	vperm	28,28,29,24
912	vperm	29,29,30,24
913	vperm	30,30,31,24
914	vxor	0,0,27
915	vxor	1,1,28
916	lvx	28,7,4
917	vxor	2,2,29
918	lvx	29,8,4
919	vxor	3,3,30
920	lvx	30,9,4
921	lvx	27,10,4
922	addi	4,4,64
923	li	10,63
924	vperm	0,0,0,25
925	vperm	1,1,1,25
926	vperm	2,2,2,25
927	vperm	3,3,3,25
928
929	vperm	31,31,28,24
930	vperm	28,28,29,24
931	vperm	29,29,30,24
932	vperm	30,30,27,24
933	vxor	4,4,31
934	vxor	5,5,28
935	lvx	28,7,4
936	vxor	6,6,29
937	lvx	29,8,4
938	vxor	7,7,30
939	lvx	30,9,4
940	lvx	31,10,4
941	addi	4,4,64
942	vperm	4,4,4,25
943	vperm	5,5,5,25
944	vperm	6,6,6,25
945	vperm	7,7,7,25
946
947	vperm	27,27,28,24
948	vperm	28,28,29,24
949	vperm	29,29,30,24
950	vperm	30,30,31,24
951	vxor	8,8,27
952	vxor	9,9,28
953	vxor	10,10,29
954	vxor	11,11,30
955	vperm	8,8,8,25
956	vperm	9,9,9,25
957	vperm	10,10,10,25
958	vperm	11,11,11,25
959
960	andi.	17,3,15
961	mr	16,3
962
963	vsel	27,0,1,26
964	vsel	28,1,2,26
965	vsel	29,2,3,26
966	vsel	30,3,4,26
967	vsel	1,4,5,26
968	vsel	2,5,6,26
969	vsel	3,6,7,26
970	vsel	4,7,8,26
971	vsel	5,8,9,26
972	vsel	6,9,10,26
973	vsel	7,10,11,26
974
975
976	stvx	27,7,3
977	stvx	28,8,3
978	stvx	29,9,3
979	addi	3,3,64
980	stvx	30,0,3
981	stvx	1,7,3
982	stvx	2,8,3
983	stvx	3,9,3
984	addi	3,3,64
985	stvx	4,0,3
986	stvx	5,7,3
987	stvx	6,8,3
988	stvx	7,9,3
989	addi	3,3,64
990
991	beq	.Laligned_vmx
992
993	sub	18,3,17
994	li	19,0
995.Lunaligned_tail_vmx:
996	stvebx	11,19,18
997	addi	19,19,1
998	cmpw	19,17
999	bne	.Lunaligned_tail_vmx
1000
1001	sub	18,16,17
1002.Lunaligned_head_vmx:
1003	stvebx	0,17,18
1004	cmpwi	17,15
1005	addi	17,17,1
1006	bne	.Lunaligned_head_vmx
1007
1008	cmplwi	5,255
1009	bgt	.Loop_outer_vmx
1010
1011	b	.Ldone_vmx
1012
1013.align	4
1014.Laligned_vmx:
1015	stvx	0,0,16
1016
1017	cmplwi	5,255
1018	bgt	.Loop_outer_vmx
1019	nop
1020
1021.Ldone_vmx:
1022	cmplwi	5,0
1023	bnel	__ChaCha20_1x
1024
1025	lwz	12,244(1)
1026	li	10,103
1027	li	11,119
1028	mtspr	256,12
1029	lvx	23,10,1
1030	addi	10,10,32
1031	lvx	24,11,1
1032	addi	11,11,32
1033	lvx	25,10,1
1034	addi	10,10,32
1035	lvx	26,11,1
1036	addi	11,11,32
1037	lvx	27,10,1
1038	addi	10,10,32
1039	lvx	28,11,1
1040	addi	11,11,32
1041	lvx	29,10,1
1042	addi	10,10,32
1043	lvx	30,11,1
1044	lvx	31,10,1
1045	lwz	0, 324(1)
1046	lwz	14,248(1)
1047	lwz	15,252(1)
1048	lwz	16,256(1)
1049	lwz	17,260(1)
1050	lwz	18,264(1)
1051	lwz	19,268(1)
1052	lwz	20,272(1)
1053	lwz	21,276(1)
1054	lwz	22,280(1)
1055	lwz	23,284(1)
1056	lwz	24,288(1)
1057	lwz	25,292(1)
1058	lwz	26,296(1)
1059	lwz	27,300(1)
1060	lwz	28,304(1)
1061	lwz	29,308(1)
1062	lwz	30,312(1)
1063	lwz	31,316(1)
1064	mtlr	0
1065	addi	1,1,320
1066	blr
1067.long	0
1068.byte	0,12,0x04,1,0x80,18,5,0
1069.long	0
1070
1071
1072.globl	ChaCha20_ctr32_vsx
1073.type	ChaCha20_ctr32_vsx,@function
1074.align	5
1075ChaCha20_ctr32_vsx:
1076	stwu	1,-200(1)
1077	mflr	0
1078	li	10,103
1079	li	11,119
1080	mfspr	12,256
1081	stvx	26,10,1
1082	addi	10,10,32
1083	stvx	27,11,1
1084	addi	11,11,32
1085	stvx	28,10,1
1086	addi	10,10,32
1087	stvx	29,11,1
1088	addi	11,11,32
1089	stvx	30,10,1
1090	stvx	31,11,1
1091	stw	12,196(1)
1092	li	12,-4096+63
1093	stw	0, 204(1)
1094	mtspr	256,12
1095
1096	bl	.Lconsts
1097.long	0x7E006619
1098	addi	12,12,0x50
1099	li	8,16
1100	li	9,32
1101	li	10,48
1102	li	11,64
1103
1104.long	0x7E203619
1105.long	0x7E483619
1106.long	0x7E603E19
1107
1108	vxor	27,27,27
1109.long	0x7F8B6619
1110	vspltw	26,19,0
1111	vsldoi	19,19,27,4
1112	vsldoi	19,27,19,12
1113	vadduwm	26,26,28
1114
1115	lvsl	31,0,8
1116	vspltisb	27,3
1117	vxor	31,31,27
1118
1119	li	0,10
1120	mtctr	0
1121	b	.Loop_outer_vsx
1122
1123.align	5
1124.Loop_outer_vsx:
1125	lvx	0,0,12
1126	lvx	1,8,12
1127	lvx	2,9,12
1128	lvx	3,10,12
1129
1130	vspltw	4,17,0
1131	vspltw	5,17,1
1132	vspltw	6,17,2
1133	vspltw	7,17,3
1134
1135	vspltw	8,18,0
1136	vspltw	9,18,1
1137	vspltw	10,18,2
1138	vspltw	11,18,3
1139
1140	vor	12,26,26
1141	vspltw	13,19,1
1142	vspltw	14,19,2
1143	vspltw	15,19,3
1144
1145	vspltisw	27,-16
1146	vspltisw	28,12
1147	vspltisw	29,8
1148	vspltisw	30,7
1149
1150.Loop_vsx:
1151	vadduwm	0,0,4
1152	vadduwm	1,1,5
1153	vadduwm	2,2,6
1154	vadduwm	3,3,7
1155	vxor	12,12,0
1156	vxor	13,13,1
1157	vxor	14,14,2
1158	vxor	15,15,3
1159	vrlw	12,12,27
1160	vrlw	13,13,27
1161	vrlw	14,14,27
1162	vrlw	15,15,27
1163	vadduwm	8,8,12
1164	vadduwm	9,9,13
1165	vadduwm	10,10,14
1166	vadduwm	11,11,15
1167	vxor	4,4,8
1168	vxor	5,5,9
1169	vxor	6,6,10
1170	vxor	7,7,11
1171	vrlw	4,4,28
1172	vrlw	5,5,28
1173	vrlw	6,6,28
1174	vrlw	7,7,28
1175	vadduwm	0,0,4
1176	vadduwm	1,1,5
1177	vadduwm	2,2,6
1178	vadduwm	3,3,7
1179	vxor	12,12,0
1180	vxor	13,13,1
1181	vxor	14,14,2
1182	vxor	15,15,3
1183	vrlw	12,12,29
1184	vrlw	13,13,29
1185	vrlw	14,14,29
1186	vrlw	15,15,29
1187	vadduwm	8,8,12
1188	vadduwm	9,9,13
1189	vadduwm	10,10,14
1190	vadduwm	11,11,15
1191	vxor	4,4,8
1192	vxor	5,5,9
1193	vxor	6,6,10
1194	vxor	7,7,11
1195	vrlw	4,4,30
1196	vrlw	5,5,30
1197	vrlw	6,6,30
1198	vrlw	7,7,30
1199	vadduwm	0,0,5
1200	vadduwm	1,1,6
1201	vadduwm	2,2,7
1202	vadduwm	3,3,4
1203	vxor	15,15,0
1204	vxor	12,12,1
1205	vxor	13,13,2
1206	vxor	14,14,3
1207	vrlw	15,15,27
1208	vrlw	12,12,27
1209	vrlw	13,13,27
1210	vrlw	14,14,27
1211	vadduwm	10,10,15
1212	vadduwm	11,11,12
1213	vadduwm	8,8,13
1214	vadduwm	9,9,14
1215	vxor	5,5,10
1216	vxor	6,6,11
1217	vxor	7,7,8
1218	vxor	4,4,9
1219	vrlw	5,5,28
1220	vrlw	6,6,28
1221	vrlw	7,7,28
1222	vrlw	4,4,28
1223	vadduwm	0,0,5
1224	vadduwm	1,1,6
1225	vadduwm	2,2,7
1226	vadduwm	3,3,4
1227	vxor	15,15,0
1228	vxor	12,12,1
1229	vxor	13,13,2
1230	vxor	14,14,3
1231	vrlw	15,15,29
1232	vrlw	12,12,29
1233	vrlw	13,13,29
1234	vrlw	14,14,29
1235	vadduwm	10,10,15
1236	vadduwm	11,11,12
1237	vadduwm	8,8,13
1238	vadduwm	9,9,14
1239	vxor	5,5,10
1240	vxor	6,6,11
1241	vxor	7,7,8
1242	vxor	4,4,9
1243	vrlw	5,5,30
1244	vrlw	6,6,30
1245	vrlw	7,7,30
1246	vrlw	4,4,30
1247	bc	16,0,.Loop_vsx
1248
1249	vadduwm	12,12,26
1250
1251.long	0x13600F8C
1252.long	0x13821F8C
1253.long	0x10000E8C
1254.long	0x10421E8C
1255.long	0x13A42F8C
1256.long	0x13C63F8C
1257.long	0xF0201057
1258.long	0xF0601357
1259.long	0xF01BE057
1260.long	0xF05BE357
1261
1262.long	0x10842E8C
1263.long	0x10C63E8C
1264.long	0x13684F8C
1265.long	0x138A5F8C
1266.long	0xF0A43057
1267.long	0xF0E43357
1268.long	0xF09DF057
1269.long	0xF0DDF357
1270
1271.long	0x11084E8C
1272.long	0x114A5E8C
1273.long	0x13AC6F8C
1274.long	0x13CE7F8C
1275.long	0xF1285057
1276.long	0xF1685357
1277.long	0xF11BE057
1278.long	0xF15BE357
1279
1280.long	0x118C6E8C
1281.long	0x11CE7E8C
1282	vspltisw	27,4
1283	vadduwm	26,26,27
1284.long	0xF1AC7057
1285.long	0xF1EC7357
1286.long	0xF19DF057
1287.long	0xF1DDF357
1288
1289	vadduwm	0,0,16
1290	vadduwm	4,4,17
1291	vadduwm	8,8,18
1292	vadduwm	12,12,19
1293
1294	vperm	0,0,0,31
1295	vperm	4,4,4,31
1296	vperm	8,8,8,31
1297	vperm	12,12,12,31
1298
1299	cmplwi	5,0x40
1300	blt	.Ltail_vsx
1301
1302.long	0x7F602619
1303.long	0x7F882619
1304.long	0x7FA92619
1305.long	0x7FCA2619
1306
1307	vxor	27,27,0
1308	vxor	28,28,4
1309	vxor	29,29,8
1310	vxor	30,30,12
1311
1312.long	0x7F601F19
1313.long	0x7F881F19
1314	addi	4,4,0x40
1315.long	0x7FA91F19
1316	subi	5,5,0x40
1317.long	0x7FCA1F19
1318	addi	3,3,0x40
1319	beq	.Ldone_vsx
1320
1321	vadduwm	0,1,16
1322	vadduwm	4,5,17
1323	vadduwm	8,9,18
1324	vadduwm	12,13,19
1325
1326	vperm	0,0,0,31
1327	vperm	4,4,4,31
1328	vperm	8,8,8,31
1329	vperm	12,12,12,31
1330
1331	cmplwi	5,0x40
1332	blt	.Ltail_vsx
1333
1334.long	0x7F602619
1335.long	0x7F882619
1336.long	0x7FA92619
1337.long	0x7FCA2619
1338
1339	vxor	27,27,0
1340	vxor	28,28,4
1341	vxor	29,29,8
1342	vxor	30,30,12
1343
1344.long	0x7F601F19
1345.long	0x7F881F19
1346	addi	4,4,0x40
1347.long	0x7FA91F19
1348	subi	5,5,0x40
1349.long	0x7FCA1F19
1350	addi	3,3,0x40
1351	beq	.Ldone_vsx
1352
1353	vadduwm	0,2,16
1354	vadduwm	4,6,17
1355	vadduwm	8,10,18
1356	vadduwm	12,14,19
1357
1358	vperm	0,0,0,31
1359	vperm	4,4,4,31
1360	vperm	8,8,8,31
1361	vperm	12,12,12,31
1362
1363	cmplwi	5,0x40
1364	blt	.Ltail_vsx
1365
1366.long	0x7F602619
1367.long	0x7F882619
1368.long	0x7FA92619
1369.long	0x7FCA2619
1370
1371	vxor	27,27,0
1372	vxor	28,28,4
1373	vxor	29,29,8
1374	vxor	30,30,12
1375
1376.long	0x7F601F19
1377.long	0x7F881F19
1378	addi	4,4,0x40
1379.long	0x7FA91F19
1380	subi	5,5,0x40
1381.long	0x7FCA1F19
1382	addi	3,3,0x40
1383	beq	.Ldone_vsx
1384
1385	vadduwm	0,3,16
1386	vadduwm	4,7,17
1387	vadduwm	8,11,18
1388	vadduwm	12,15,19
1389
1390	vperm	0,0,0,31
1391	vperm	4,4,4,31
1392	vperm	8,8,8,31
1393	vperm	12,12,12,31
1394
1395	cmplwi	5,0x40
1396	blt	.Ltail_vsx
1397
1398.long	0x7F602619
1399.long	0x7F882619
1400.long	0x7FA92619
1401.long	0x7FCA2619
1402
1403	vxor	27,27,0
1404	vxor	28,28,4
1405	vxor	29,29,8
1406	vxor	30,30,12
1407
1408.long	0x7F601F19
1409.long	0x7F881F19
1410	addi	4,4,0x40
1411.long	0x7FA91F19
1412	subi	5,5,0x40
1413.long	0x7FCA1F19
1414	addi	3,3,0x40
1415	mtctr	0
1416	bne	.Loop_outer_vsx
1417
1418.Ldone_vsx:
1419	lwz	12,196(1)
1420	li	10,103
1421	li	11,119
1422	lwz	0, 204(1)
1423	mtspr	256,12
1424	lvx	26,10,1
1425	addi	10,10,32
1426	lvx	27,11,1
1427	addi	11,11,32
1428	lvx	28,10,1
1429	addi	10,10,32
1430	lvx	29,11,1
1431	addi	11,11,32
1432	lvx	30,10,1
1433	lvx	31,11,1
1434	mtlr	0
1435	addi	1,1,200
1436	blr
1437
1438.align	4
1439.Ltail_vsx:
1440	addi	11,1,24
1441	mtctr	5
1442.long	0x7C005F19
1443.long	0x7C885F19
1444.long	0x7D095F19
1445.long	0x7D8A5F19
1446	subi	12,11,1
1447	subi	4,4,1
1448	subi	3,3,1
1449
1450.Loop_tail_vsx:
1451	lbzu	6,1(12)
1452	lbzu	7,1(4)
1453	xor	6,6,7
1454	stbu	6,1(3)
1455	bc	16,0,.Loop_tail_vsx
1456
1457.long	0x7E005F19
1458.long	0x7E085F19
1459.long	0x7E095F19
1460.long	0x7E0A5F19
1461
1462	b	.Ldone_vsx
1463.long	0
1464.byte	0,12,0x04,1,0x80,0,5,0
1465.long	0
1466
1467.align	5
1468.Lconsts:
1469	mflr	0
1470	bcl	20,31,$+4
1471	mflr	12
1472	addi	12,12,56
1473	mtlr	0
1474	blr
1475.long	0
1476.byte	0,12,0x14,0,0,0,0,0
1477.space	28
1478.Lsigma:
1479.long	0x61707865,0x3320646e,0x79622d32,0x6b206574
1480.long	1,0,0,0
1481.long	4,0,0,0
1482.long	0x02030001,0x06070405,0x0a0b0809,0x0e0f0c0d
1483.long	0x01020300,0x05060704,0x090a0b08,0x0d0e0f0c
1484.long	0x61707865,0x61707865,0x61707865,0x61707865
1485.long	0x3320646e,0x3320646e,0x3320646e,0x3320646e
1486.long	0x79622d32,0x79622d32,0x79622d32,0x79622d32
1487.long	0x6b206574,0x6b206574,0x6b206574,0x6b206574
1488.long	0,1,2,3
1489.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,80,111,119,101,114,80,67,47,65,108,116,105,86,101,99,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1490.align	2
1491.align	2
1492