xref: /minix3/crypto/external/bsd/openssl/lib/libcrypto/arch/arm/ghashv8-armx.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc#include "arm_arch.h"
2*0a6a1f1dSLionel Sambuc#include "arm_asm.h"
3*0a6a1f1dSLionel Sambuc
4*0a6a1f1dSLionel Sambuc.text
5*0a6a1f1dSLionel Sambuc.fpu	neon
6*0a6a1f1dSLionel Sambuc.code	32
7*0a6a1f1dSLionel Sambuc.global	gcm_init_v8
8*0a6a1f1dSLionel Sambuc.type	gcm_init_v8,%function
9*0a6a1f1dSLionel Sambuc.align	4
10*0a6a1f1dSLionel Sambucgcm_init_v8:
11*0a6a1f1dSLionel Sambuc	vld1.64		{q9},[r1]		@ load H
12*0a6a1f1dSLionel Sambuc	vmov.i8		q8,#0xe1
13*0a6a1f1dSLionel Sambuc	vext.8		q3,q9,q9,#8
14*0a6a1f1dSLionel Sambuc	vshl.i64	q8,q8,#57
15*0a6a1f1dSLionel Sambuc	vshr.u64	q10,q8,#63
16*0a6a1f1dSLionel Sambuc	vext.8		q8,q10,q8,#8		@ t0=0xc2....01
17*0a6a1f1dSLionel Sambuc	vdup.32	q9,d18[1]
18*0a6a1f1dSLionel Sambuc	vshr.u64	q11,q3,#63
19*0a6a1f1dSLionel Sambuc	vshr.s32	q9,q9,#31		@ broadcast carry bit
20*0a6a1f1dSLionel Sambuc	vand		q11,q11,q8
21*0a6a1f1dSLionel Sambuc	vshl.i64	q3,q3,#1
22*0a6a1f1dSLionel Sambuc	vext.8		q11,q11,q11,#8
23*0a6a1f1dSLionel Sambuc	vand		q8,q8,q9
24*0a6a1f1dSLionel Sambuc	vorr		q3,q3,q11		@ H<<<=1
25*0a6a1f1dSLionel Sambuc	veor		q3,q3,q8		@ twisted H
26*0a6a1f1dSLionel Sambuc	vst1.64		{q3},[r0]
27*0a6a1f1dSLionel Sambuc
28*0a6a1f1dSLionel Sambuc	RET
29*0a6a1f1dSLionel Sambuc.size	gcm_init_v8,.-gcm_init_v8
30*0a6a1f1dSLionel Sambuc
31*0a6a1f1dSLionel Sambuc.global	gcm_gmult_v8
32*0a6a1f1dSLionel Sambuc.type	gcm_gmult_v8,%function
33*0a6a1f1dSLionel Sambuc.align	4
34*0a6a1f1dSLionel Sambucgcm_gmult_v8:
35*0a6a1f1dSLionel Sambuc	vld1.64		{q9},[r0]		@ load Xi
36*0a6a1f1dSLionel Sambuc	vmov.i8		q11,#0xe1
37*0a6a1f1dSLionel Sambuc	vld1.64		{q12},[r1]		@ load twisted H
38*0a6a1f1dSLionel Sambuc	vshl.u64	q11,q11,#57
39*0a6a1f1dSLionel Sambuc#ifndef __ARMEB__
40*0a6a1f1dSLionel Sambuc	vrev64.8	q9,q9
41*0a6a1f1dSLionel Sambuc#endif
42*0a6a1f1dSLionel Sambuc	vext.8		q13,q12,q12,#8
43*0a6a1f1dSLionel Sambuc	mov		r3,#0
44*0a6a1f1dSLionel Sambuc	vext.8		q3,q9,q9,#8
45*0a6a1f1dSLionel Sambuc	mov		r12,#0
46*0a6a1f1dSLionel Sambuc	veor		q13,q13,q12		@ Karatsuba pre-processing
47*0a6a1f1dSLionel Sambuc	mov		r2,r0
48*0a6a1f1dSLionel Sambuc	b		.Lgmult_v8
49*0a6a1f1dSLionel Sambuc.size	gcm_gmult_v8,.-gcm_gmult_v8
50*0a6a1f1dSLionel Sambuc
51*0a6a1f1dSLionel Sambuc.global	gcm_ghash_v8
52*0a6a1f1dSLionel Sambuc.type	gcm_ghash_v8,%function
53*0a6a1f1dSLionel Sambuc.align	4
54*0a6a1f1dSLionel Sambucgcm_ghash_v8:
55*0a6a1f1dSLionel Sambuc	vld1.64		{q0},[r0]		@ load [rotated] Xi
56*0a6a1f1dSLionel Sambuc	subs		r3,r3,#16
57*0a6a1f1dSLionel Sambuc	vmov.i8		q11,#0xe1
58*0a6a1f1dSLionel Sambuc	mov		r12,#16
59*0a6a1f1dSLionel Sambuc	vld1.64		{q12},[r1]		@ load twisted H
60*0a6a1f1dSLionel Sambuc	moveq	r12,#0
61*0a6a1f1dSLionel Sambuc	vext.8		q0,q0,q0,#8
62*0a6a1f1dSLionel Sambuc	vshl.u64	q11,q11,#57
63*0a6a1f1dSLionel Sambuc	vld1.64		{q9},[r2],r12	@ load [rotated] inp
64*0a6a1f1dSLionel Sambuc	vext.8		q13,q12,q12,#8
65*0a6a1f1dSLionel Sambuc#ifndef __ARMEB__
66*0a6a1f1dSLionel Sambuc	vrev64.8	q0,q0
67*0a6a1f1dSLionel Sambuc	vrev64.8	q9,q9
68*0a6a1f1dSLionel Sambuc#endif
69*0a6a1f1dSLionel Sambuc	veor		q13,q13,q12		@ Karatsuba pre-processing
70*0a6a1f1dSLionel Sambuc	vext.8		q3,q9,q9,#8
71*0a6a1f1dSLionel Sambuc	b		.Loop_v8
72*0a6a1f1dSLionel Sambuc
73*0a6a1f1dSLionel Sambuc.align	4
74*0a6a1f1dSLionel Sambuc.Loop_v8:
75*0a6a1f1dSLionel Sambuc	vext.8		q10,q0,q0,#8
76*0a6a1f1dSLionel Sambuc	veor		q3,q3,q0		@ inp^=Xi
77*0a6a1f1dSLionel Sambuc	veor		q9,q9,q10		@ q9 is rotated inp^Xi
78*0a6a1f1dSLionel Sambuc
79*0a6a1f1dSLionel Sambuc.Lgmult_v8:
80*0a6a1f1dSLionel Sambuc	.inst	0xf2a80e86	@ pmull q0,q12,q3		@ H.loXi.lo
81*0a6a1f1dSLionel Sambuc	veor		q9,q9,q3		@ Karatsuba pre-processing
82*0a6a1f1dSLionel Sambuc	.inst	0xf2a94e87	@ pmull2 q2,q12,q3		@ H.hiXi.hi
83*0a6a1f1dSLionel Sambuc	subs		r3,r3,#16
84*0a6a1f1dSLionel Sambuc	.inst	0xf2aa2ea2	@ pmull q1,q13,q9		@ (H.lo+H.hi)�(Xi.lo+Xi.hi)
85*0a6a1f1dSLionel Sambuc	moveq	r12,#0
86*0a6a1f1dSLionel Sambuc
87*0a6a1f1dSLionel Sambuc	vext.8		q9,q0,q2,#8		@ Karatsuba post-processing
88*0a6a1f1dSLionel Sambuc	veor		q10,q0,q2
89*0a6a1f1dSLionel Sambuc	veor		q1,q1,q9
90*0a6a1f1dSLionel Sambuc	 vld1.64	{q9},[r2],r12	@ load [rotated] inp
91*0a6a1f1dSLionel Sambuc	veor		q1,q1,q10
92*0a6a1f1dSLionel Sambuc	.inst	0xf2e04e26	@ pmull q10,q0,q11		@ 1st phase
93*0a6a1f1dSLionel Sambuc
94*0a6a1f1dSLionel Sambuc	vmov		d4,d3		@ Xh|Xm - 256-bit result
95*0a6a1f1dSLionel Sambuc	vmov		d3,d0		@ Xm is rotated Xl
96*0a6a1f1dSLionel Sambuc#ifndef __ARMEB__
97*0a6a1f1dSLionel Sambuc	 vrev64.8	q9,q9
98*0a6a1f1dSLionel Sambuc#endif
99*0a6a1f1dSLionel Sambuc	veor		q0,q1,q10
100*0a6a1f1dSLionel Sambuc	 vext.8		q3,q9,q9,#8
101*0a6a1f1dSLionel Sambuc
102*0a6a1f1dSLionel Sambuc	vext.8		q10,q0,q0,#8		@ 2nd phase
103*0a6a1f1dSLionel Sambuc	.inst	0xf2a00e26	@ pmull q0,q0,q11
104*0a6a1f1dSLionel Sambuc	veor		q10,q10,q2
105*0a6a1f1dSLionel Sambuc	veor		q0,q0,q10
106*0a6a1f1dSLionel Sambuc	bhs		.Loop_v8
107*0a6a1f1dSLionel Sambuc
108*0a6a1f1dSLionel Sambuc#ifndef __ARMEB__
109*0a6a1f1dSLionel Sambuc	vrev64.8	q0,q0
110*0a6a1f1dSLionel Sambuc#endif
111*0a6a1f1dSLionel Sambuc	vext.8		q0,q0,q0,#8
112*0a6a1f1dSLionel Sambuc	vst1.64		{q0},[r0]		@ write out Xi
113*0a6a1f1dSLionel Sambuc
114*0a6a1f1dSLionel Sambuc	RET
115*0a6a1f1dSLionel Sambuc.size	gcm_ghash_v8,.-gcm_ghash_v8
116*0a6a1f1dSLionel Sambuc.asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
117*0a6a1f1dSLionel Sambuc.align  2
118