xref: /minix3/crypto/external/bsd/openssl/lib/libcrypto/arch/arm/bsaes-armv7.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc
2*0a6a1f1dSLionel Sambuc@ ====================================================================
3*0a6a1f1dSLionel Sambuc@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4*0a6a1f1dSLionel Sambuc@ project. The module is, however, dual licensed under OpenSSL and
5*0a6a1f1dSLionel Sambuc@ CRYPTOGAMS licenses depending on where you obtain it. For further
6*0a6a1f1dSLionel Sambuc@ details see http://www.openssl.org/~appro/cryptogams/.
7*0a6a1f1dSLionel Sambuc@
8*0a6a1f1dSLionel Sambuc@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel
9*0a6a1f1dSLionel Sambuc@ <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is
10*0a6a1f1dSLionel Sambuc@ granted.
11*0a6a1f1dSLionel Sambuc@ ====================================================================
12*0a6a1f1dSLionel Sambuc
13*0a6a1f1dSLionel Sambuc@ Bit-sliced AES for ARM NEON
14*0a6a1f1dSLionel Sambuc@
15*0a6a1f1dSLionel Sambuc@ February 2012.
16*0a6a1f1dSLionel Sambuc@
17*0a6a1f1dSLionel Sambuc@ This implementation is direct adaptation of bsaes-x86_64 module for
18*0a6a1f1dSLionel Sambuc@ ARM NEON. Except that this module is endian-neutral [in sense that
19*0a6a1f1dSLionel Sambuc@ it can be compiled for either endianness] by courtesy of vld1.8's
20*0a6a1f1dSLionel Sambuc@ neutrality. Initial version doesn't implement interface to OpenSSL,
21*0a6a1f1dSLionel Sambuc@ only low-level primitives and unsupported entry points, just enough
22*0a6a1f1dSLionel Sambuc@ to collect performance results, which for Cortex-A8 core are:
23*0a6a1f1dSLionel Sambuc@
24*0a6a1f1dSLionel Sambuc@ encrypt	19.5 cycles per byte processed with 128-bit key
25*0a6a1f1dSLionel Sambuc@ decrypt	22.1 cycles per byte processed with 128-bit key
26*0a6a1f1dSLionel Sambuc@ key conv.	440  cycles per 128-bit key/0.18 of 8x block
27*0a6a1f1dSLionel Sambuc@
28*0a6a1f1dSLionel Sambuc@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7,
29*0a6a1f1dSLionel Sambuc@ which is [much] worse than anticipated (for further details see
30*0a6a1f1dSLionel Sambuc@ http://www.openssl.org/~appro/Snapdragon-S4.html).
31*0a6a1f1dSLionel Sambuc@
32*0a6a1f1dSLionel Sambuc@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code
33*0a6a1f1dSLionel Sambuc@ manages in 20.0 cycles].
34*0a6a1f1dSLionel Sambuc@
35*0a6a1f1dSLionel Sambuc@ When comparing to x86_64 results keep in mind that NEON unit is
36*0a6a1f1dSLionel Sambuc@ [mostly] single-issue and thus can't [fully] benefit from
37*0a6a1f1dSLionel Sambuc@ instruction-level parallelism. And when comparing to aes-armv4
38*0a6a1f1dSLionel Sambuc@ results keep in mind key schedule conversion overhead (see
39*0a6a1f1dSLionel Sambuc@ bsaes-x86_64.pl for further details)...
40*0a6a1f1dSLionel Sambuc@
41*0a6a1f1dSLionel Sambuc@						<appro@openssl.org>
42*0a6a1f1dSLionel Sambuc
43*0a6a1f1dSLionel Sambuc@ April-August 2013
44*0a6a1f1dSLionel Sambuc@
45*0a6a1f1dSLionel Sambuc@ Add CBC, CTR and XTS subroutines, adapt for kernel use.
46*0a6a1f1dSLionel Sambuc@
47*0a6a1f1dSLionel Sambuc@					<ard.biesheuvel@linaro.org>
48*0a6a1f1dSLionel Sambuc
49*0a6a1f1dSLionel Sambuc#ifndef __KERNEL__
50*0a6a1f1dSLionel Sambuc# include "arm_arch.h"
51*0a6a1f1dSLionel Sambuc# include "arm_asm.h"
52*0a6a1f1dSLionel Sambuc
53*0a6a1f1dSLionel Sambuc# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
54*0a6a1f1dSLionel Sambuc# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
55*0a6a1f1dSLionel Sambuc# define VFP_ABI_FRAME	0x40
56*0a6a1f1dSLionel Sambuc#else
57*0a6a1f1dSLionel Sambuc# define VFP_ABI_PUSH
58*0a6a1f1dSLionel Sambuc# define VFP_ABI_POP
59*0a6a1f1dSLionel Sambuc# define VFP_ABI_FRAME	0
60*0a6a1f1dSLionel Sambuc# define BSAES_ASM_EXTENDED_KEY
61*0a6a1f1dSLionel Sambuc# define XTS_CHAIN_TWEAK
62*0a6a1f1dSLionel Sambuc# define __ARM_ARCH__ __LINUX_ARM_ARCH__
63*0a6a1f1dSLionel Sambuc# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
64*0a6a1f1dSLionel Sambuc#endif
65*0a6a1f1dSLionel Sambuc
66*0a6a1f1dSLionel Sambuc#ifdef __thumb__
67*0a6a1f1dSLionel Sambuc# define adrl adr
68*0a6a1f1dSLionel Sambuc#endif
69*0a6a1f1dSLionel Sambuc
70*0a6a1f1dSLionel Sambuc#if __ARM_MAX_ARCH__>=7
71*0a6a1f1dSLionel Sambuc.arch	armv7-a
72*0a6a1f1dSLionel Sambuc.fpu	neon
73*0a6a1f1dSLionel Sambuc
74*0a6a1f1dSLionel Sambuc.text
75*0a6a1f1dSLionel Sambuc.syntax	unified 	@ ARMv7-capable assembler is expected to handle this
76*0a6a1f1dSLionel Sambuc#ifdef __thumb2__
77*0a6a1f1dSLionel Sambuc.thumb
78*0a6a1f1dSLionel Sambuc#else
79*0a6a1f1dSLionel Sambuc.code   32
80*0a6a1f1dSLionel Sambuc#endif
81*0a6a1f1dSLionel Sambuc
82*0a6a1f1dSLionel Sambuc.type	_bsaes_decrypt8,%function
83*0a6a1f1dSLionel Sambuc.align	4
84*0a6a1f1dSLionel Sambuc_bsaes_decrypt8:
85*0a6a1f1dSLionel Sambuc	adr	r6,_bsaes_decrypt8
86*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q9}		@ round 0 key
87*0a6a1f1dSLionel Sambuc	add	r6,r6,#.LM0ISR-_bsaes_decrypt8
88*0a6a1f1dSLionel Sambuc
89*0a6a1f1dSLionel Sambuc	vldmia	r6!, {q8}		@ .LM0ISR
90*0a6a1f1dSLionel Sambuc	veor	q10, q0, q9	@ xor with round0 key
91*0a6a1f1dSLionel Sambuc	veor	q11, q1, q9
92*0a6a1f1dSLionel Sambuc	 vtbl.8	d0, {q10}, d16
93*0a6a1f1dSLionel Sambuc	 vtbl.8	d1, {q10}, d17
94*0a6a1f1dSLionel Sambuc	veor	q12, q2, q9
95*0a6a1f1dSLionel Sambuc	 vtbl.8	d2, {q11}, d16
96*0a6a1f1dSLionel Sambuc	 vtbl.8	d3, {q11}, d17
97*0a6a1f1dSLionel Sambuc	veor	q13, q3, q9
98*0a6a1f1dSLionel Sambuc	 vtbl.8	d4, {q12}, d16
99*0a6a1f1dSLionel Sambuc	 vtbl.8	d5, {q12}, d17
100*0a6a1f1dSLionel Sambuc	veor	q14, q4, q9
101*0a6a1f1dSLionel Sambuc	 vtbl.8	d6, {q13}, d16
102*0a6a1f1dSLionel Sambuc	 vtbl.8	d7, {q13}, d17
103*0a6a1f1dSLionel Sambuc	veor	q15, q5, q9
104*0a6a1f1dSLionel Sambuc	 vtbl.8	d8, {q14}, d16
105*0a6a1f1dSLionel Sambuc	 vtbl.8	d9, {q14}, d17
106*0a6a1f1dSLionel Sambuc	veor	q10, q6, q9
107*0a6a1f1dSLionel Sambuc	 vtbl.8	d10, {q15}, d16
108*0a6a1f1dSLionel Sambuc	 vtbl.8	d11, {q15}, d17
109*0a6a1f1dSLionel Sambuc	veor	q11, q7, q9
110*0a6a1f1dSLionel Sambuc	 vtbl.8	d12, {q10}, d16
111*0a6a1f1dSLionel Sambuc	 vtbl.8	d13, {q10}, d17
112*0a6a1f1dSLionel Sambuc	 vtbl.8	d14, {q11}, d16
113*0a6a1f1dSLionel Sambuc	 vtbl.8	d15, {q11}, d17
114*0a6a1f1dSLionel Sambuc	vmov.i8	q8,#0x55			@ compose .LBS0
115*0a6a1f1dSLionel Sambuc	vmov.i8	q9,#0x33			@ compose .LBS1
116*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q6, #1
117*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q4, #1
118*0a6a1f1dSLionel Sambuc	veor		q10, q10, q7
119*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q5
120*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
121*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
122*0a6a1f1dSLionel Sambuc	veor		q7, q7, q10
123*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #1
124*0a6a1f1dSLionel Sambuc	 veor		q5, q5, q11
125*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #1
126*0a6a1f1dSLionel Sambuc	veor		q6, q6, q10
127*0a6a1f1dSLionel Sambuc	 veor		q4, q4, q11
128*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q2, #1
129*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #1
130*0a6a1f1dSLionel Sambuc	veor		q10, q10, q3
131*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q1
132*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
133*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
134*0a6a1f1dSLionel Sambuc	veor		q3, q3, q10
135*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #1
136*0a6a1f1dSLionel Sambuc	 veor		q1, q1, q11
137*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #1
138*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
139*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
140*0a6a1f1dSLionel Sambuc	vmov.i8	q8,#0x0f			@ compose .LBS2
141*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q5, #2
142*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q4, #2
143*0a6a1f1dSLionel Sambuc	veor		q10, q10, q7
144*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q6
145*0a6a1f1dSLionel Sambuc	vand		q10, q10, q9
146*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q9
147*0a6a1f1dSLionel Sambuc	veor		q7, q7, q10
148*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #2
149*0a6a1f1dSLionel Sambuc	 veor		q6, q6, q11
150*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #2
151*0a6a1f1dSLionel Sambuc	veor		q5, q5, q10
152*0a6a1f1dSLionel Sambuc	 veor		q4, q4, q11
153*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q1, #2
154*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #2
155*0a6a1f1dSLionel Sambuc	veor		q10, q10, q3
156*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q2
157*0a6a1f1dSLionel Sambuc	vand		q10, q10, q9
158*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q9
159*0a6a1f1dSLionel Sambuc	veor		q3, q3, q10
160*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #2
161*0a6a1f1dSLionel Sambuc	 veor		q2, q2, q11
162*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #2
163*0a6a1f1dSLionel Sambuc	veor		q1, q1, q10
164*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
165*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q3, #4
166*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q2, #4
167*0a6a1f1dSLionel Sambuc	veor		q10, q10, q7
168*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q6
169*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
170*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
171*0a6a1f1dSLionel Sambuc	veor		q7, q7, q10
172*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #4
173*0a6a1f1dSLionel Sambuc	 veor		q6, q6, q11
174*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #4
175*0a6a1f1dSLionel Sambuc	veor		q3, q3, q10
176*0a6a1f1dSLionel Sambuc	 veor		q2, q2, q11
177*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q1, #4
178*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #4
179*0a6a1f1dSLionel Sambuc	veor		q10, q10, q5
180*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q4
181*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
182*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
183*0a6a1f1dSLionel Sambuc	veor		q5, q5, q10
184*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #4
185*0a6a1f1dSLionel Sambuc	 veor		q4, q4, q11
186*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #4
187*0a6a1f1dSLionel Sambuc	veor		q1, q1, q10
188*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
189*0a6a1f1dSLionel Sambuc	sub	r5,r5,#1
190*0a6a1f1dSLionel Sambuc	b	.Ldec_sbox
191*0a6a1f1dSLionel Sambuc.align	4
192*0a6a1f1dSLionel Sambuc.Ldec_loop:
193*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q8-q11}
194*0a6a1f1dSLionel Sambuc	veor	q8, q8, q0
195*0a6a1f1dSLionel Sambuc	veor	q9, q9, q1
196*0a6a1f1dSLionel Sambuc	vtbl.8	d0, {q8}, d24
197*0a6a1f1dSLionel Sambuc	vtbl.8	d1, {q8}, d25
198*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q8}
199*0a6a1f1dSLionel Sambuc	veor	q10, q10, q2
200*0a6a1f1dSLionel Sambuc	vtbl.8	d2, {q9}, d24
201*0a6a1f1dSLionel Sambuc	vtbl.8	d3, {q9}, d25
202*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q9}
203*0a6a1f1dSLionel Sambuc	veor	q11, q11, q3
204*0a6a1f1dSLionel Sambuc	vtbl.8	d4, {q10}, d24
205*0a6a1f1dSLionel Sambuc	vtbl.8	d5, {q10}, d25
206*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q10}
207*0a6a1f1dSLionel Sambuc	vtbl.8	d6, {q11}, d24
208*0a6a1f1dSLionel Sambuc	vtbl.8	d7, {q11}, d25
209*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q11}
210*0a6a1f1dSLionel Sambuc	veor	q8, q8, q4
211*0a6a1f1dSLionel Sambuc	veor	q9, q9, q5
212*0a6a1f1dSLionel Sambuc	vtbl.8	d8, {q8}, d24
213*0a6a1f1dSLionel Sambuc	vtbl.8	d9, {q8}, d25
214*0a6a1f1dSLionel Sambuc	veor	q10, q10, q6
215*0a6a1f1dSLionel Sambuc	vtbl.8	d10, {q9}, d24
216*0a6a1f1dSLionel Sambuc	vtbl.8	d11, {q9}, d25
217*0a6a1f1dSLionel Sambuc	veor	q11, q11, q7
218*0a6a1f1dSLionel Sambuc	vtbl.8	d12, {q10}, d24
219*0a6a1f1dSLionel Sambuc	vtbl.8	d13, {q10}, d25
220*0a6a1f1dSLionel Sambuc	vtbl.8	d14, {q11}, d24
221*0a6a1f1dSLionel Sambuc	vtbl.8	d15, {q11}, d25
222*0a6a1f1dSLionel Sambuc.Ldec_sbox:
223*0a6a1f1dSLionel Sambuc	 veor	q1, q1, q4
224*0a6a1f1dSLionel Sambuc	veor	q3, q3, q4
225*0a6a1f1dSLionel Sambuc
226*0a6a1f1dSLionel Sambuc	veor	q4, q4, q7
227*0a6a1f1dSLionel Sambuc	 veor	q1, q1, q6
228*0a6a1f1dSLionel Sambuc	veor	q2, q2, q7
229*0a6a1f1dSLionel Sambuc	veor	q6, q6, q4
230*0a6a1f1dSLionel Sambuc
231*0a6a1f1dSLionel Sambuc	veor	q0, q0, q1
232*0a6a1f1dSLionel Sambuc	veor	q2, q2, q5
233*0a6a1f1dSLionel Sambuc	 veor	q7, q7, q6
234*0a6a1f1dSLionel Sambuc	veor	q3, q3, q0
235*0a6a1f1dSLionel Sambuc	veor	q5, q5, q0
236*0a6a1f1dSLionel Sambuc	veor	q1, q1, q3
237*0a6a1f1dSLionel Sambuc	veor	q11, q3, q0
238*0a6a1f1dSLionel Sambuc	veor	q10, q7, q4
239*0a6a1f1dSLionel Sambuc	veor	q9, q1, q6
240*0a6a1f1dSLionel Sambuc	veor	q13, q4, q0
241*0a6a1f1dSLionel Sambuc	 vmov	q8, q10
242*0a6a1f1dSLionel Sambuc	veor	q12, q5, q2
243*0a6a1f1dSLionel Sambuc
244*0a6a1f1dSLionel Sambuc	vorr	q10, q10, q9
245*0a6a1f1dSLionel Sambuc	veor	q15, q11, q8
246*0a6a1f1dSLionel Sambuc	vand	q14, q11, q12
247*0a6a1f1dSLionel Sambuc	vorr	q11, q11, q12
248*0a6a1f1dSLionel Sambuc	veor	q12, q12, q9
249*0a6a1f1dSLionel Sambuc	vand	q8, q8, q9
250*0a6a1f1dSLionel Sambuc	veor	q9, q6, q2
251*0a6a1f1dSLionel Sambuc	vand	q15, q15, q12
252*0a6a1f1dSLionel Sambuc	vand	q13, q13, q9
253*0a6a1f1dSLionel Sambuc	veor	q9, q3, q7
254*0a6a1f1dSLionel Sambuc	veor	q12, q1, q5
255*0a6a1f1dSLionel Sambuc	veor	q11, q11, q13
256*0a6a1f1dSLionel Sambuc	veor	q10, q10, q13
257*0a6a1f1dSLionel Sambuc	vand	q13, q9, q12
258*0a6a1f1dSLionel Sambuc	vorr	q9, q9, q12
259*0a6a1f1dSLionel Sambuc	veor	q11, q11, q15
260*0a6a1f1dSLionel Sambuc	veor	q8, q8, q13
261*0a6a1f1dSLionel Sambuc	veor	q10, q10, q14
262*0a6a1f1dSLionel Sambuc	veor	q9, q9, q15
263*0a6a1f1dSLionel Sambuc	veor	q8, q8, q14
264*0a6a1f1dSLionel Sambuc	vand	q12, q4, q6
265*0a6a1f1dSLionel Sambuc	veor	q9, q9, q14
266*0a6a1f1dSLionel Sambuc	vand	q13, q0, q2
267*0a6a1f1dSLionel Sambuc	vand	q14, q7, q1
268*0a6a1f1dSLionel Sambuc	vorr	q15, q3, q5
269*0a6a1f1dSLionel Sambuc	veor	q11, q11, q12
270*0a6a1f1dSLionel Sambuc	veor	q9, q9, q14
271*0a6a1f1dSLionel Sambuc	veor	q8, q8, q15
272*0a6a1f1dSLionel Sambuc	veor	q10, q10, q13
273*0a6a1f1dSLionel Sambuc
274*0a6a1f1dSLionel Sambuc	@ Inv_GF16 	0, 	1, 	2, 	3, s0, s1, s2, s3
275*0a6a1f1dSLionel Sambuc
276*0a6a1f1dSLionel Sambuc	@ new smaller inversion
277*0a6a1f1dSLionel Sambuc
278*0a6a1f1dSLionel Sambuc	vand	q14, q11, q9
279*0a6a1f1dSLionel Sambuc	vmov	q12, q8
280*0a6a1f1dSLionel Sambuc
281*0a6a1f1dSLionel Sambuc	veor	q13, q10, q14
282*0a6a1f1dSLionel Sambuc	veor	q15, q8, q14
283*0a6a1f1dSLionel Sambuc	veor	q14, q8, q14	@ q14=q15
284*0a6a1f1dSLionel Sambuc
285*0a6a1f1dSLionel Sambuc	vbsl	q13, q9, q8
286*0a6a1f1dSLionel Sambuc	vbsl	q15, q11, q10
287*0a6a1f1dSLionel Sambuc	veor	q11, q11, q10
288*0a6a1f1dSLionel Sambuc
289*0a6a1f1dSLionel Sambuc	vbsl	q12, q13, q14
290*0a6a1f1dSLionel Sambuc	vbsl	q8, q14, q13
291*0a6a1f1dSLionel Sambuc
292*0a6a1f1dSLionel Sambuc	vand	q14, q12, q15
293*0a6a1f1dSLionel Sambuc	veor	q9, q9, q8
294*0a6a1f1dSLionel Sambuc
295*0a6a1f1dSLionel Sambuc	veor	q14, q14, q11
296*0a6a1f1dSLionel Sambuc	veor	q12, q5, q2
297*0a6a1f1dSLionel Sambuc	veor	q8, q1, q6
298*0a6a1f1dSLionel Sambuc	veor 	q10, q15, q14
299*0a6a1f1dSLionel Sambuc	vand	q10, q10, q5
300*0a6a1f1dSLionel Sambuc	veor	q5, q5, q1
301*0a6a1f1dSLionel Sambuc	vand	q11, q1, q15
302*0a6a1f1dSLionel Sambuc	vand	q5, q5, q14
303*0a6a1f1dSLionel Sambuc	veor	q1, q11, q10
304*0a6a1f1dSLionel Sambuc	veor	q5, q5, q11
305*0a6a1f1dSLionel Sambuc	veor	q15, q15, q13
306*0a6a1f1dSLionel Sambuc	veor	q14, q14, q9
307*0a6a1f1dSLionel Sambuc	veor	q11, q15, q14
308*0a6a1f1dSLionel Sambuc	 veor 	q10, q13, q9
309*0a6a1f1dSLionel Sambuc	vand	q11, q11, q12
310*0a6a1f1dSLionel Sambuc	 vand	q10, q10, q2
311*0a6a1f1dSLionel Sambuc	veor	q12, q12, q8
312*0a6a1f1dSLionel Sambuc	 veor	q2, q2, q6
313*0a6a1f1dSLionel Sambuc	vand	q8, q8, q15
314*0a6a1f1dSLionel Sambuc	 vand	q6, q6, q13
315*0a6a1f1dSLionel Sambuc	vand	q12, q12, q14
316*0a6a1f1dSLionel Sambuc	 vand	q2, q2, q9
317*0a6a1f1dSLionel Sambuc	veor	q8, q8, q12
318*0a6a1f1dSLionel Sambuc	 veor	q2, q2, q6
319*0a6a1f1dSLionel Sambuc	veor	q12, q12, q11
320*0a6a1f1dSLionel Sambuc	 veor	q6, q6, q10
321*0a6a1f1dSLionel Sambuc	veor	q5, q5, q12
322*0a6a1f1dSLionel Sambuc	veor	q2, q2, q12
323*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
324*0a6a1f1dSLionel Sambuc	veor	q6, q6, q8
325*0a6a1f1dSLionel Sambuc
326*0a6a1f1dSLionel Sambuc	veor	q12, q3, q0
327*0a6a1f1dSLionel Sambuc	veor	q8, q7, q4
328*0a6a1f1dSLionel Sambuc	veor	q11, q15, q14
329*0a6a1f1dSLionel Sambuc	 veor 	q10, q13, q9
330*0a6a1f1dSLionel Sambuc	vand	q11, q11, q12
331*0a6a1f1dSLionel Sambuc	 vand	q10, q10, q0
332*0a6a1f1dSLionel Sambuc	veor	q12, q12, q8
333*0a6a1f1dSLionel Sambuc	 veor	q0, q0, q4
334*0a6a1f1dSLionel Sambuc	vand	q8, q8, q15
335*0a6a1f1dSLionel Sambuc	 vand	q4, q4, q13
336*0a6a1f1dSLionel Sambuc	vand	q12, q12, q14
337*0a6a1f1dSLionel Sambuc	 vand	q0, q0, q9
338*0a6a1f1dSLionel Sambuc	veor	q8, q8, q12
339*0a6a1f1dSLionel Sambuc	 veor	q0, q0, q4
340*0a6a1f1dSLionel Sambuc	veor	q12, q12, q11
341*0a6a1f1dSLionel Sambuc	 veor	q4, q4, q10
342*0a6a1f1dSLionel Sambuc	veor	q15, q15, q13
343*0a6a1f1dSLionel Sambuc	veor	q14, q14, q9
344*0a6a1f1dSLionel Sambuc	veor 	q10, q15, q14
345*0a6a1f1dSLionel Sambuc	vand	q10, q10, q3
346*0a6a1f1dSLionel Sambuc	veor	q3, q3, q7
347*0a6a1f1dSLionel Sambuc	vand	q11, q7, q15
348*0a6a1f1dSLionel Sambuc	vand	q3, q3, q14
349*0a6a1f1dSLionel Sambuc	veor	q7, q11, q10
350*0a6a1f1dSLionel Sambuc	veor	q3, q3, q11
351*0a6a1f1dSLionel Sambuc	veor	q3, q3, q12
352*0a6a1f1dSLionel Sambuc	veor	q0, q0, q12
353*0a6a1f1dSLionel Sambuc	veor	q7, q7, q8
354*0a6a1f1dSLionel Sambuc	veor	q4, q4, q8
355*0a6a1f1dSLionel Sambuc	veor	q1, q1, q7
356*0a6a1f1dSLionel Sambuc	veor	q6, q6, q5
357*0a6a1f1dSLionel Sambuc
358*0a6a1f1dSLionel Sambuc	veor	q4, q4, q1
359*0a6a1f1dSLionel Sambuc	veor	q2, q2, q7
360*0a6a1f1dSLionel Sambuc	veor	q5, q5, q7
361*0a6a1f1dSLionel Sambuc	veor	q4, q4, q2
362*0a6a1f1dSLionel Sambuc	 veor 	q7, q7, q0
363*0a6a1f1dSLionel Sambuc	veor	q4, q4, q5
364*0a6a1f1dSLionel Sambuc	 veor	q3, q3, q6
365*0a6a1f1dSLionel Sambuc	 veor	q6, q6, q1
366*0a6a1f1dSLionel Sambuc	veor	q3, q3, q4
367*0a6a1f1dSLionel Sambuc
368*0a6a1f1dSLionel Sambuc	veor	q4, q4, q0
369*0a6a1f1dSLionel Sambuc	veor	q7, q7, q3
370*0a6a1f1dSLionel Sambuc	subs	r5,r5,#1
371*0a6a1f1dSLionel Sambuc	bcc	.Ldec_done
372*0a6a1f1dSLionel Sambuc	@ multiplication by 0x05-0x00-0x04-0x00
373*0a6a1f1dSLionel Sambuc	vext.8	q8, q0, q0, #8
374*0a6a1f1dSLionel Sambuc	vext.8	q14, q3, q3, #8
375*0a6a1f1dSLionel Sambuc	vext.8	q15, q5, q5, #8
376*0a6a1f1dSLionel Sambuc	veor	q8, q8, q0
377*0a6a1f1dSLionel Sambuc	vext.8	q9, q1, q1, #8
378*0a6a1f1dSLionel Sambuc	veor	q14, q14, q3
379*0a6a1f1dSLionel Sambuc	vext.8	q10, q6, q6, #8
380*0a6a1f1dSLionel Sambuc	veor	q15, q15, q5
381*0a6a1f1dSLionel Sambuc	vext.8	q11, q4, q4, #8
382*0a6a1f1dSLionel Sambuc	veor	q9, q9, q1
383*0a6a1f1dSLionel Sambuc	vext.8	q12, q2, q2, #8
384*0a6a1f1dSLionel Sambuc	veor	q10, q10, q6
385*0a6a1f1dSLionel Sambuc	vext.8	q13, q7, q7, #8
386*0a6a1f1dSLionel Sambuc	veor	q11, q11, q4
387*0a6a1f1dSLionel Sambuc	veor	q12, q12, q2
388*0a6a1f1dSLionel Sambuc	veor	q13, q13, q7
389*0a6a1f1dSLionel Sambuc
390*0a6a1f1dSLionel Sambuc	 veor	q0, q0, q14
391*0a6a1f1dSLionel Sambuc	 veor	q1, q1, q14
392*0a6a1f1dSLionel Sambuc	 veor	q6, q6, q8
393*0a6a1f1dSLionel Sambuc	 veor	q2, q2, q10
394*0a6a1f1dSLionel Sambuc	 veor	q4, q4, q9
395*0a6a1f1dSLionel Sambuc	 veor	q1, q1, q15
396*0a6a1f1dSLionel Sambuc	 veor	q6, q6, q15
397*0a6a1f1dSLionel Sambuc	 veor	q2, q2, q14
398*0a6a1f1dSLionel Sambuc	 veor	q7, q7, q11
399*0a6a1f1dSLionel Sambuc	 veor	q4, q4, q14
400*0a6a1f1dSLionel Sambuc	 veor	q3, q3, q12
401*0a6a1f1dSLionel Sambuc	 veor	q2, q2, q15
402*0a6a1f1dSLionel Sambuc	 veor	q7, q7, q15
403*0a6a1f1dSLionel Sambuc	 veor	q5, q5, q13
404*0a6a1f1dSLionel Sambuc	vext.8	q8, q0, q0, #12	@ x0 <<< 32
405*0a6a1f1dSLionel Sambuc	vext.8	q9, q1, q1, #12
406*0a6a1f1dSLionel Sambuc	 veor	q0, q0, q8		@ x0 ^ (x0 <<< 32)
407*0a6a1f1dSLionel Sambuc	vext.8	q10, q6, q6, #12
408*0a6a1f1dSLionel Sambuc	 veor	q1, q1, q9
409*0a6a1f1dSLionel Sambuc	vext.8	q11, q4, q4, #12
410*0a6a1f1dSLionel Sambuc	 veor	q6, q6, q10
411*0a6a1f1dSLionel Sambuc	vext.8	q12, q2, q2, #12
412*0a6a1f1dSLionel Sambuc	 veor	q4, q4, q11
413*0a6a1f1dSLionel Sambuc	vext.8	q13, q7, q7, #12
414*0a6a1f1dSLionel Sambuc	 veor	q2, q2, q12
415*0a6a1f1dSLionel Sambuc	vext.8	q14, q3, q3, #12
416*0a6a1f1dSLionel Sambuc	 veor	q7, q7, q13
417*0a6a1f1dSLionel Sambuc	vext.8	q15, q5, q5, #12
418*0a6a1f1dSLionel Sambuc	 veor	q3, q3, q14
419*0a6a1f1dSLionel Sambuc
420*0a6a1f1dSLionel Sambuc	veor	q9, q9, q0
421*0a6a1f1dSLionel Sambuc	 veor	q5, q5, q15
422*0a6a1f1dSLionel Sambuc	 vext.8	q0, q0, q0, #8		@ (x0 ^ (x0 <<< 32)) <<< 64)
423*0a6a1f1dSLionel Sambuc	veor	q10, q10, q1
424*0a6a1f1dSLionel Sambuc	veor	q8, q8, q5
425*0a6a1f1dSLionel Sambuc	veor	q9, q9, q5
426*0a6a1f1dSLionel Sambuc	 vext.8	q1, q1, q1, #8
427*0a6a1f1dSLionel Sambuc	veor	q13, q13, q2
428*0a6a1f1dSLionel Sambuc	 veor	q0, q0, q8
429*0a6a1f1dSLionel Sambuc	veor	q14, q14, q7
430*0a6a1f1dSLionel Sambuc	 veor	q1, q1, q9
431*0a6a1f1dSLionel Sambuc	 vext.8	q8, q2, q2, #8
432*0a6a1f1dSLionel Sambuc	veor	q12, q12, q4
433*0a6a1f1dSLionel Sambuc	 vext.8	q9, q7, q7, #8
434*0a6a1f1dSLionel Sambuc	veor	q15, q15, q3
435*0a6a1f1dSLionel Sambuc	 vext.8	q2, q4, q4, #8
436*0a6a1f1dSLionel Sambuc	veor	q11, q11, q6
437*0a6a1f1dSLionel Sambuc	 vext.8	q7, q5, q5, #8
438*0a6a1f1dSLionel Sambuc	veor	q12, q12, q5
439*0a6a1f1dSLionel Sambuc	 vext.8	q4, q3, q3, #8
440*0a6a1f1dSLionel Sambuc	veor	q11, q11, q5
441*0a6a1f1dSLionel Sambuc	 vext.8	q3, q6, q6, #8
442*0a6a1f1dSLionel Sambuc	veor	q5, q9, q13
443*0a6a1f1dSLionel Sambuc	veor	q11, q11, q2
444*0a6a1f1dSLionel Sambuc	veor	q7, q7, q15
445*0a6a1f1dSLionel Sambuc	veor	q6, q4, q14
446*0a6a1f1dSLionel Sambuc	veor	q4, q8, q12
447*0a6a1f1dSLionel Sambuc	veor	q2, q3, q10
448*0a6a1f1dSLionel Sambuc	vmov	q3, q11
449*0a6a1f1dSLionel Sambuc	 @ vmov	q5, q9
450*0a6a1f1dSLionel Sambuc	vldmia	r6, {q12}		@ .LISR
451*0a6a1f1dSLionel Sambuc	ite	eq				@ Thumb2 thing, sanity check in ARM
452*0a6a1f1dSLionel Sambuc	addeq	r6,r6,#0x10
453*0a6a1f1dSLionel Sambuc	bne	.Ldec_loop
454*0a6a1f1dSLionel Sambuc	vldmia	r6, {q12}		@ .LISRM0
455*0a6a1f1dSLionel Sambuc	b	.Ldec_loop
456*0a6a1f1dSLionel Sambuc.align	4
457*0a6a1f1dSLionel Sambuc.Ldec_done:
458*0a6a1f1dSLionel Sambuc	vmov.i8	q8,#0x55			@ compose .LBS0
459*0a6a1f1dSLionel Sambuc	vmov.i8	q9,#0x33			@ compose .LBS1
460*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q3, #1
461*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q2, #1
462*0a6a1f1dSLionel Sambuc	veor		q10, q10, q5
463*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q7
464*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
465*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
466*0a6a1f1dSLionel Sambuc	veor		q5, q5, q10
467*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #1
468*0a6a1f1dSLionel Sambuc	 veor		q7, q7, q11
469*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #1
470*0a6a1f1dSLionel Sambuc	veor		q3, q3, q10
471*0a6a1f1dSLionel Sambuc	 veor		q2, q2, q11
472*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q6, #1
473*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #1
474*0a6a1f1dSLionel Sambuc	veor		q10, q10, q4
475*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q1
476*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
477*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
478*0a6a1f1dSLionel Sambuc	veor		q4, q4, q10
479*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #1
480*0a6a1f1dSLionel Sambuc	 veor		q1, q1, q11
481*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #1
482*0a6a1f1dSLionel Sambuc	veor		q6, q6, q10
483*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
484*0a6a1f1dSLionel Sambuc	vmov.i8	q8,#0x0f			@ compose .LBS2
485*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q7, #2
486*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q2, #2
487*0a6a1f1dSLionel Sambuc	veor		q10, q10, q5
488*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q3
489*0a6a1f1dSLionel Sambuc	vand		q10, q10, q9
490*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q9
491*0a6a1f1dSLionel Sambuc	veor		q5, q5, q10
492*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #2
493*0a6a1f1dSLionel Sambuc	 veor		q3, q3, q11
494*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #2
495*0a6a1f1dSLionel Sambuc	veor		q7, q7, q10
496*0a6a1f1dSLionel Sambuc	 veor		q2, q2, q11
497*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q1, #2
498*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #2
499*0a6a1f1dSLionel Sambuc	veor		q10, q10, q4
500*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q6
501*0a6a1f1dSLionel Sambuc	vand		q10, q10, q9
502*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q9
503*0a6a1f1dSLionel Sambuc	veor		q4, q4, q10
504*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #2
505*0a6a1f1dSLionel Sambuc	 veor		q6, q6, q11
506*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #2
507*0a6a1f1dSLionel Sambuc	veor		q1, q1, q10
508*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
509*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q4, #4
510*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q6, #4
511*0a6a1f1dSLionel Sambuc	veor		q10, q10, q5
512*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q3
513*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
514*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
515*0a6a1f1dSLionel Sambuc	veor		q5, q5, q10
516*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #4
517*0a6a1f1dSLionel Sambuc	 veor		q3, q3, q11
518*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #4
519*0a6a1f1dSLionel Sambuc	veor		q4, q4, q10
520*0a6a1f1dSLionel Sambuc	 veor		q6, q6, q11
521*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q1, #4
522*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #4
523*0a6a1f1dSLionel Sambuc	veor		q10, q10, q7
524*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q2
525*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
526*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
527*0a6a1f1dSLionel Sambuc	veor		q7, q7, q10
528*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #4
529*0a6a1f1dSLionel Sambuc	 veor		q2, q2, q11
530*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #4
531*0a6a1f1dSLionel Sambuc	veor		q1, q1, q10
532*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
533*0a6a1f1dSLionel Sambuc	vldmia	r4, {q8}			@ last round key
534*0a6a1f1dSLionel Sambuc	veor	q6, q6, q8
535*0a6a1f1dSLionel Sambuc	veor	q4, q4, q8
536*0a6a1f1dSLionel Sambuc	veor	q2, q2, q8
537*0a6a1f1dSLionel Sambuc	veor	q7, q7, q8
538*0a6a1f1dSLionel Sambuc	veor	q3, q3, q8
539*0a6a1f1dSLionel Sambuc	veor	q5, q5, q8
540*0a6a1f1dSLionel Sambuc	veor	q0, q0, q8
541*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
542*0a6a1f1dSLionel Sambuc	RET
543*0a6a1f1dSLionel Sambuc.size	_bsaes_decrypt8,.-_bsaes_decrypt8
544*0a6a1f1dSLionel Sambuc
545*0a6a1f1dSLionel Sambuc.type	_bsaes_const,%object
546*0a6a1f1dSLionel Sambuc.align	6
547*0a6a1f1dSLionel Sambuc_bsaes_const:
548*0a6a1f1dSLionel Sambuc.LM0ISR:	@ InvShiftRows constants
549*0a6a1f1dSLionel Sambuc	.quad	0x0a0e0206070b0f03, 0x0004080c0d010509
550*0a6a1f1dSLionel Sambuc.LISR:
551*0a6a1f1dSLionel Sambuc	.quad	0x0504070602010003, 0x0f0e0d0c080b0a09
552*0a6a1f1dSLionel Sambuc.LISRM0:
553*0a6a1f1dSLionel Sambuc	.quad	0x01040b0e0205080f, 0x0306090c00070a0d
554*0a6a1f1dSLionel Sambuc.LM0SR:		@ ShiftRows constants
555*0a6a1f1dSLionel Sambuc	.quad	0x0a0e02060f03070b, 0x0004080c05090d01
556*0a6a1f1dSLionel Sambuc.LSR:
557*0a6a1f1dSLionel Sambuc	.quad	0x0504070600030201, 0x0f0e0d0c0a09080b
558*0a6a1f1dSLionel Sambuc.LSRM0:
559*0a6a1f1dSLionel Sambuc	.quad	0x0304090e00050a0f, 0x01060b0c0207080d
560*0a6a1f1dSLionel Sambuc.LM0:
561*0a6a1f1dSLionel Sambuc	.quad	0x02060a0e03070b0f, 0x0004080c0105090d
562*0a6a1f1dSLionel Sambuc.LREVM0SR:
563*0a6a1f1dSLionel Sambuc	.quad	0x090d01050c000408, 0x03070b0f060a0e02
564*0a6a1f1dSLionel Sambuc.asciz	"Bit-sliced AES for NEON, CRYPTOGAMS by <appro@openssl.org>"
565*0a6a1f1dSLionel Sambuc.align	6
566*0a6a1f1dSLionel Sambuc.size	_bsaes_const,.-_bsaes_const
567*0a6a1f1dSLionel Sambuc
568*0a6a1f1dSLionel Sambuc.type	_bsaes_encrypt8,%function
569*0a6a1f1dSLionel Sambuc.align	4
570*0a6a1f1dSLionel Sambuc_bsaes_encrypt8:
571*0a6a1f1dSLionel Sambuc	adr	r6,_bsaes_encrypt8
572*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q9}		@ round 0 key
573*0a6a1f1dSLionel Sambuc	sub	r6,r6,#_bsaes_encrypt8-.LM0SR
574*0a6a1f1dSLionel Sambuc
575*0a6a1f1dSLionel Sambuc	vldmia	r6!, {q8}		@ .LM0SR
576*0a6a1f1dSLionel Sambuc_bsaes_encrypt8_alt:
577*0a6a1f1dSLionel Sambuc	veor	q10, q0, q9	@ xor with round0 key
578*0a6a1f1dSLionel Sambuc	veor	q11, q1, q9
579*0a6a1f1dSLionel Sambuc	 vtbl.8	d0, {q10}, d16
580*0a6a1f1dSLionel Sambuc	 vtbl.8	d1, {q10}, d17
581*0a6a1f1dSLionel Sambuc	veor	q12, q2, q9
582*0a6a1f1dSLionel Sambuc	 vtbl.8	d2, {q11}, d16
583*0a6a1f1dSLionel Sambuc	 vtbl.8	d3, {q11}, d17
584*0a6a1f1dSLionel Sambuc	veor	q13, q3, q9
585*0a6a1f1dSLionel Sambuc	 vtbl.8	d4, {q12}, d16
586*0a6a1f1dSLionel Sambuc	 vtbl.8	d5, {q12}, d17
587*0a6a1f1dSLionel Sambuc	veor	q14, q4, q9
588*0a6a1f1dSLionel Sambuc	 vtbl.8	d6, {q13}, d16
589*0a6a1f1dSLionel Sambuc	 vtbl.8	d7, {q13}, d17
590*0a6a1f1dSLionel Sambuc	veor	q15, q5, q9
591*0a6a1f1dSLionel Sambuc	 vtbl.8	d8, {q14}, d16
592*0a6a1f1dSLionel Sambuc	 vtbl.8	d9, {q14}, d17
593*0a6a1f1dSLionel Sambuc	veor	q10, q6, q9
594*0a6a1f1dSLionel Sambuc	 vtbl.8	d10, {q15}, d16
595*0a6a1f1dSLionel Sambuc	 vtbl.8	d11, {q15}, d17
596*0a6a1f1dSLionel Sambuc	veor	q11, q7, q9
597*0a6a1f1dSLionel Sambuc	 vtbl.8	d12, {q10}, d16
598*0a6a1f1dSLionel Sambuc	 vtbl.8	d13, {q10}, d17
599*0a6a1f1dSLionel Sambuc	 vtbl.8	d14, {q11}, d16
600*0a6a1f1dSLionel Sambuc	 vtbl.8	d15, {q11}, d17
601*0a6a1f1dSLionel Sambuc_bsaes_encrypt8_bitslice:
602*0a6a1f1dSLionel Sambuc	vmov.i8	q8,#0x55			@ compose .LBS0
603*0a6a1f1dSLionel Sambuc	vmov.i8	q9,#0x33			@ compose .LBS1
604*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q6, #1
605*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q4, #1
606*0a6a1f1dSLionel Sambuc	veor		q10, q10, q7
607*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q5
608*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
609*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
610*0a6a1f1dSLionel Sambuc	veor		q7, q7, q10
611*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #1
612*0a6a1f1dSLionel Sambuc	 veor		q5, q5, q11
613*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #1
614*0a6a1f1dSLionel Sambuc	veor		q6, q6, q10
615*0a6a1f1dSLionel Sambuc	 veor		q4, q4, q11
616*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q2, #1
617*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #1
618*0a6a1f1dSLionel Sambuc	veor		q10, q10, q3
619*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q1
620*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
621*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
622*0a6a1f1dSLionel Sambuc	veor		q3, q3, q10
623*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #1
624*0a6a1f1dSLionel Sambuc	 veor		q1, q1, q11
625*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #1
626*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
627*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
628*0a6a1f1dSLionel Sambuc	vmov.i8	q8,#0x0f			@ compose .LBS2
629*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q5, #2
630*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q4, #2
631*0a6a1f1dSLionel Sambuc	veor		q10, q10, q7
632*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q6
633*0a6a1f1dSLionel Sambuc	vand		q10, q10, q9
634*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q9
635*0a6a1f1dSLionel Sambuc	veor		q7, q7, q10
636*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #2
637*0a6a1f1dSLionel Sambuc	 veor		q6, q6, q11
638*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #2
639*0a6a1f1dSLionel Sambuc	veor		q5, q5, q10
640*0a6a1f1dSLionel Sambuc	 veor		q4, q4, q11
641*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q1, #2
642*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #2
643*0a6a1f1dSLionel Sambuc	veor		q10, q10, q3
644*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q2
645*0a6a1f1dSLionel Sambuc	vand		q10, q10, q9
646*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q9
647*0a6a1f1dSLionel Sambuc	veor		q3, q3, q10
648*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #2
649*0a6a1f1dSLionel Sambuc	 veor		q2, q2, q11
650*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #2
651*0a6a1f1dSLionel Sambuc	veor		q1, q1, q10
652*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
653*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q3, #4
654*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q2, #4
655*0a6a1f1dSLionel Sambuc	veor		q10, q10, q7
656*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q6
657*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
658*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
659*0a6a1f1dSLionel Sambuc	veor		q7, q7, q10
660*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #4
661*0a6a1f1dSLionel Sambuc	 veor		q6, q6, q11
662*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #4
663*0a6a1f1dSLionel Sambuc	veor		q3, q3, q10
664*0a6a1f1dSLionel Sambuc	 veor		q2, q2, q11
665*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q1, #4
666*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #4
667*0a6a1f1dSLionel Sambuc	veor		q10, q10, q5
668*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q4
669*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
670*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
671*0a6a1f1dSLionel Sambuc	veor		q5, q5, q10
672*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #4
673*0a6a1f1dSLionel Sambuc	 veor		q4, q4, q11
674*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #4
675*0a6a1f1dSLionel Sambuc	veor		q1, q1, q10
676*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
677*0a6a1f1dSLionel Sambuc	sub	r5,r5,#1
678*0a6a1f1dSLionel Sambuc	b	.Lenc_sbox
679*0a6a1f1dSLionel Sambuc.align	4
680*0a6a1f1dSLionel Sambuc.Lenc_loop:
681*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q8-q11}
682*0a6a1f1dSLionel Sambuc	veor	q8, q8, q0
683*0a6a1f1dSLionel Sambuc	veor	q9, q9, q1
684*0a6a1f1dSLionel Sambuc	vtbl.8	d0, {q8}, d24
685*0a6a1f1dSLionel Sambuc	vtbl.8	d1, {q8}, d25
686*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q8}
687*0a6a1f1dSLionel Sambuc	veor	q10, q10, q2
688*0a6a1f1dSLionel Sambuc	vtbl.8	d2, {q9}, d24
689*0a6a1f1dSLionel Sambuc	vtbl.8	d3, {q9}, d25
690*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q9}
691*0a6a1f1dSLionel Sambuc	veor	q11, q11, q3
692*0a6a1f1dSLionel Sambuc	vtbl.8	d4, {q10}, d24
693*0a6a1f1dSLionel Sambuc	vtbl.8	d5, {q10}, d25
694*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q10}
695*0a6a1f1dSLionel Sambuc	vtbl.8	d6, {q11}, d24
696*0a6a1f1dSLionel Sambuc	vtbl.8	d7, {q11}, d25
697*0a6a1f1dSLionel Sambuc	vldmia	r4!, {q11}
698*0a6a1f1dSLionel Sambuc	veor	q8, q8, q4
699*0a6a1f1dSLionel Sambuc	veor	q9, q9, q5
700*0a6a1f1dSLionel Sambuc	vtbl.8	d8, {q8}, d24
701*0a6a1f1dSLionel Sambuc	vtbl.8	d9, {q8}, d25
702*0a6a1f1dSLionel Sambuc	veor	q10, q10, q6
703*0a6a1f1dSLionel Sambuc	vtbl.8	d10, {q9}, d24
704*0a6a1f1dSLionel Sambuc	vtbl.8	d11, {q9}, d25
705*0a6a1f1dSLionel Sambuc	veor	q11, q11, q7
706*0a6a1f1dSLionel Sambuc	vtbl.8	d12, {q10}, d24
707*0a6a1f1dSLionel Sambuc	vtbl.8	d13, {q10}, d25
708*0a6a1f1dSLionel Sambuc	vtbl.8	d14, {q11}, d24
709*0a6a1f1dSLionel Sambuc	vtbl.8	d15, {q11}, d25
710*0a6a1f1dSLionel Sambuc.Lenc_sbox:
711*0a6a1f1dSLionel Sambuc	veor	q2, q2, q1
712*0a6a1f1dSLionel Sambuc	veor	q5, q5, q6
713*0a6a1f1dSLionel Sambuc	veor	q3, q3, q0
714*0a6a1f1dSLionel Sambuc	veor	q6, q6, q2
715*0a6a1f1dSLionel Sambuc	veor	q5, q5, q0
716*0a6a1f1dSLionel Sambuc
717*0a6a1f1dSLionel Sambuc	veor	q6, q6, q3
718*0a6a1f1dSLionel Sambuc	veor	q3, q3, q7
719*0a6a1f1dSLionel Sambuc	veor	q7, q7, q5
720*0a6a1f1dSLionel Sambuc	veor	q3, q3, q4
721*0a6a1f1dSLionel Sambuc	veor	q4, q4, q5
722*0a6a1f1dSLionel Sambuc
723*0a6a1f1dSLionel Sambuc	veor	q2, q2, q7
724*0a6a1f1dSLionel Sambuc	veor	q3, q3, q1
725*0a6a1f1dSLionel Sambuc	veor	q1, q1, q5
726*0a6a1f1dSLionel Sambuc	veor	q11, q7, q4
727*0a6a1f1dSLionel Sambuc	veor	q10, q1, q2
728*0a6a1f1dSLionel Sambuc	veor	q9, q5, q3
729*0a6a1f1dSLionel Sambuc	veor	q13, q2, q4
730*0a6a1f1dSLionel Sambuc	 vmov	q8, q10
731*0a6a1f1dSLionel Sambuc	veor	q12, q6, q0
732*0a6a1f1dSLionel Sambuc
733*0a6a1f1dSLionel Sambuc	vorr	q10, q10, q9
734*0a6a1f1dSLionel Sambuc	veor	q15, q11, q8
735*0a6a1f1dSLionel Sambuc	vand	q14, q11, q12
736*0a6a1f1dSLionel Sambuc	vorr	q11, q11, q12
737*0a6a1f1dSLionel Sambuc	veor	q12, q12, q9
738*0a6a1f1dSLionel Sambuc	vand	q8, q8, q9
739*0a6a1f1dSLionel Sambuc	veor	q9, q3, q0
740*0a6a1f1dSLionel Sambuc	vand	q15, q15, q12
741*0a6a1f1dSLionel Sambuc	vand	q13, q13, q9
742*0a6a1f1dSLionel Sambuc	veor	q9, q7, q1
743*0a6a1f1dSLionel Sambuc	veor	q12, q5, q6
744*0a6a1f1dSLionel Sambuc	veor	q11, q11, q13
745*0a6a1f1dSLionel Sambuc	veor	q10, q10, q13
746*0a6a1f1dSLionel Sambuc	vand	q13, q9, q12
747*0a6a1f1dSLionel Sambuc	vorr	q9, q9, q12
748*0a6a1f1dSLionel Sambuc	veor	q11, q11, q15
749*0a6a1f1dSLionel Sambuc	veor	q8, q8, q13
750*0a6a1f1dSLionel Sambuc	veor	q10, q10, q14
751*0a6a1f1dSLionel Sambuc	veor	q9, q9, q15
752*0a6a1f1dSLionel Sambuc	veor	q8, q8, q14
753*0a6a1f1dSLionel Sambuc	vand	q12, q2, q3
754*0a6a1f1dSLionel Sambuc	veor	q9, q9, q14
755*0a6a1f1dSLionel Sambuc	vand	q13, q4, q0
756*0a6a1f1dSLionel Sambuc	vand	q14, q1, q5
757*0a6a1f1dSLionel Sambuc	vorr	q15, q7, q6
758*0a6a1f1dSLionel Sambuc	veor	q11, q11, q12
759*0a6a1f1dSLionel Sambuc	veor	q9, q9, q14
760*0a6a1f1dSLionel Sambuc	veor	q8, q8, q15
761*0a6a1f1dSLionel Sambuc	veor	q10, q10, q13
762*0a6a1f1dSLionel Sambuc
763*0a6a1f1dSLionel Sambuc	@ Inv_GF16 	0, 	1, 	2, 	3, s0, s1, s2, s3
764*0a6a1f1dSLionel Sambuc
765*0a6a1f1dSLionel Sambuc	@ new smaller inversion
766*0a6a1f1dSLionel Sambuc
767*0a6a1f1dSLionel Sambuc	vand	q14, q11, q9
768*0a6a1f1dSLionel Sambuc	vmov	q12, q8
769*0a6a1f1dSLionel Sambuc
770*0a6a1f1dSLionel Sambuc	veor	q13, q10, q14
771*0a6a1f1dSLionel Sambuc	veor	q15, q8, q14
772*0a6a1f1dSLionel Sambuc	veor	q14, q8, q14	@ q14=q15
773*0a6a1f1dSLionel Sambuc
774*0a6a1f1dSLionel Sambuc	vbsl	q13, q9, q8
775*0a6a1f1dSLionel Sambuc	vbsl	q15, q11, q10
776*0a6a1f1dSLionel Sambuc	veor	q11, q11, q10
777*0a6a1f1dSLionel Sambuc
778*0a6a1f1dSLionel Sambuc	vbsl	q12, q13, q14
779*0a6a1f1dSLionel Sambuc	vbsl	q8, q14, q13
780*0a6a1f1dSLionel Sambuc
781*0a6a1f1dSLionel Sambuc	vand	q14, q12, q15
782*0a6a1f1dSLionel Sambuc	veor	q9, q9, q8
783*0a6a1f1dSLionel Sambuc
784*0a6a1f1dSLionel Sambuc	veor	q14, q14, q11
785*0a6a1f1dSLionel Sambuc	veor	q12, q6, q0
786*0a6a1f1dSLionel Sambuc	veor	q8, q5, q3
787*0a6a1f1dSLionel Sambuc	veor 	q10, q15, q14
788*0a6a1f1dSLionel Sambuc	vand	q10, q10, q6
789*0a6a1f1dSLionel Sambuc	veor	q6, q6, q5
790*0a6a1f1dSLionel Sambuc	vand	q11, q5, q15
791*0a6a1f1dSLionel Sambuc	vand	q6, q6, q14
792*0a6a1f1dSLionel Sambuc	veor	q5, q11, q10
793*0a6a1f1dSLionel Sambuc	veor	q6, q6, q11
794*0a6a1f1dSLionel Sambuc	veor	q15, q15, q13
795*0a6a1f1dSLionel Sambuc	veor	q14, q14, q9
796*0a6a1f1dSLionel Sambuc	veor	q11, q15, q14
797*0a6a1f1dSLionel Sambuc	 veor 	q10, q13, q9
798*0a6a1f1dSLionel Sambuc	vand	q11, q11, q12
799*0a6a1f1dSLionel Sambuc	 vand	q10, q10, q0
800*0a6a1f1dSLionel Sambuc	veor	q12, q12, q8
801*0a6a1f1dSLionel Sambuc	 veor	q0, q0, q3
802*0a6a1f1dSLionel Sambuc	vand	q8, q8, q15
803*0a6a1f1dSLionel Sambuc	 vand	q3, q3, q13
804*0a6a1f1dSLionel Sambuc	vand	q12, q12, q14
805*0a6a1f1dSLionel Sambuc	 vand	q0, q0, q9
806*0a6a1f1dSLionel Sambuc	veor	q8, q8, q12
807*0a6a1f1dSLionel Sambuc	 veor	q0, q0, q3
808*0a6a1f1dSLionel Sambuc	veor	q12, q12, q11
809*0a6a1f1dSLionel Sambuc	 veor	q3, q3, q10
810*0a6a1f1dSLionel Sambuc	veor	q6, q6, q12
811*0a6a1f1dSLionel Sambuc	veor	q0, q0, q12
812*0a6a1f1dSLionel Sambuc	veor	q5, q5, q8
813*0a6a1f1dSLionel Sambuc	veor	q3, q3, q8
814*0a6a1f1dSLionel Sambuc
815*0a6a1f1dSLionel Sambuc	veor	q12, q7, q4
816*0a6a1f1dSLionel Sambuc	veor	q8, q1, q2
817*0a6a1f1dSLionel Sambuc	veor	q11, q15, q14
818*0a6a1f1dSLionel Sambuc	 veor 	q10, q13, q9
819*0a6a1f1dSLionel Sambuc	vand	q11, q11, q12
820*0a6a1f1dSLionel Sambuc	 vand	q10, q10, q4
821*0a6a1f1dSLionel Sambuc	veor	q12, q12, q8
822*0a6a1f1dSLionel Sambuc	 veor	q4, q4, q2
823*0a6a1f1dSLionel Sambuc	vand	q8, q8, q15
824*0a6a1f1dSLionel Sambuc	 vand	q2, q2, q13
825*0a6a1f1dSLionel Sambuc	vand	q12, q12, q14
826*0a6a1f1dSLionel Sambuc	 vand	q4, q4, q9
827*0a6a1f1dSLionel Sambuc	veor	q8, q8, q12
828*0a6a1f1dSLionel Sambuc	 veor	q4, q4, q2
829*0a6a1f1dSLionel Sambuc	veor	q12, q12, q11
830*0a6a1f1dSLionel Sambuc	 veor	q2, q2, q10
831*0a6a1f1dSLionel Sambuc	veor	q15, q15, q13
832*0a6a1f1dSLionel Sambuc	veor	q14, q14, q9
833*0a6a1f1dSLionel Sambuc	veor 	q10, q15, q14
834*0a6a1f1dSLionel Sambuc	vand	q10, q10, q7
835*0a6a1f1dSLionel Sambuc	veor	q7, q7, q1
836*0a6a1f1dSLionel Sambuc	vand	q11, q1, q15
837*0a6a1f1dSLionel Sambuc	vand	q7, q7, q14
838*0a6a1f1dSLionel Sambuc	veor	q1, q11, q10
839*0a6a1f1dSLionel Sambuc	veor	q7, q7, q11
840*0a6a1f1dSLionel Sambuc	veor	q7, q7, q12
841*0a6a1f1dSLionel Sambuc	veor	q4, q4, q12
842*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
843*0a6a1f1dSLionel Sambuc	veor	q2, q2, q8
844*0a6a1f1dSLionel Sambuc	veor	q7, q7, q0
845*0a6a1f1dSLionel Sambuc	veor	q1, q1, q6
846*0a6a1f1dSLionel Sambuc	veor	q6, q6, q0
847*0a6a1f1dSLionel Sambuc	veor	q4, q4, q7
848*0a6a1f1dSLionel Sambuc	veor	q0, q0, q1
849*0a6a1f1dSLionel Sambuc
850*0a6a1f1dSLionel Sambuc	veor	q1, q1, q5
851*0a6a1f1dSLionel Sambuc	veor	q5, q5, q2
852*0a6a1f1dSLionel Sambuc	veor	q2, q2, q3
853*0a6a1f1dSLionel Sambuc	veor	q3, q3, q5
854*0a6a1f1dSLionel Sambuc	veor	q4, q4, q5
855*0a6a1f1dSLionel Sambuc
856*0a6a1f1dSLionel Sambuc	veor	q6, q6, q3
857*0a6a1f1dSLionel Sambuc	subs	r5,r5,#1
858*0a6a1f1dSLionel Sambuc	bcc	.Lenc_done
859*0a6a1f1dSLionel Sambuc	vext.8	q8, q0, q0, #12	@ x0 <<< 32
860*0a6a1f1dSLionel Sambuc	vext.8	q9, q1, q1, #12
861*0a6a1f1dSLionel Sambuc	 veor	q0, q0, q8		@ x0 ^ (x0 <<< 32)
862*0a6a1f1dSLionel Sambuc	vext.8	q10, q4, q4, #12
863*0a6a1f1dSLionel Sambuc	 veor	q1, q1, q9
864*0a6a1f1dSLionel Sambuc	vext.8	q11, q6, q6, #12
865*0a6a1f1dSLionel Sambuc	 veor	q4, q4, q10
866*0a6a1f1dSLionel Sambuc	vext.8	q12, q3, q3, #12
867*0a6a1f1dSLionel Sambuc	 veor	q6, q6, q11
868*0a6a1f1dSLionel Sambuc	vext.8	q13, q7, q7, #12
869*0a6a1f1dSLionel Sambuc	 veor	q3, q3, q12
870*0a6a1f1dSLionel Sambuc	vext.8	q14, q2, q2, #12
871*0a6a1f1dSLionel Sambuc	 veor	q7, q7, q13
872*0a6a1f1dSLionel Sambuc	vext.8	q15, q5, q5, #12
873*0a6a1f1dSLionel Sambuc	 veor	q2, q2, q14
874*0a6a1f1dSLionel Sambuc
875*0a6a1f1dSLionel Sambuc	veor	q9, q9, q0
876*0a6a1f1dSLionel Sambuc	 veor	q5, q5, q15
877*0a6a1f1dSLionel Sambuc	 vext.8	q0, q0, q0, #8		@ (x0 ^ (x0 <<< 32)) <<< 64)
878*0a6a1f1dSLionel Sambuc	veor	q10, q10, q1
879*0a6a1f1dSLionel Sambuc	veor	q8, q8, q5
880*0a6a1f1dSLionel Sambuc	veor	q9, q9, q5
881*0a6a1f1dSLionel Sambuc	 vext.8	q1, q1, q1, #8
882*0a6a1f1dSLionel Sambuc	veor	q13, q13, q3
883*0a6a1f1dSLionel Sambuc	 veor	q0, q0, q8
884*0a6a1f1dSLionel Sambuc	veor	q14, q14, q7
885*0a6a1f1dSLionel Sambuc	 veor	q1, q1, q9
886*0a6a1f1dSLionel Sambuc	 vext.8	q8, q3, q3, #8
887*0a6a1f1dSLionel Sambuc	veor	q12, q12, q6
888*0a6a1f1dSLionel Sambuc	 vext.8	q9, q7, q7, #8
889*0a6a1f1dSLionel Sambuc	veor	q15, q15, q2
890*0a6a1f1dSLionel Sambuc	 vext.8	q3, q6, q6, #8
891*0a6a1f1dSLionel Sambuc	veor	q11, q11, q4
892*0a6a1f1dSLionel Sambuc	 vext.8	q7, q5, q5, #8
893*0a6a1f1dSLionel Sambuc	veor	q12, q12, q5
894*0a6a1f1dSLionel Sambuc	 vext.8	q6, q2, q2, #8
895*0a6a1f1dSLionel Sambuc	veor	q11, q11, q5
896*0a6a1f1dSLionel Sambuc	 vext.8	q2, q4, q4, #8
897*0a6a1f1dSLionel Sambuc	veor	q5, q9, q13
898*0a6a1f1dSLionel Sambuc	veor	q4, q8, q12
899*0a6a1f1dSLionel Sambuc	veor	q3, q3, q11
900*0a6a1f1dSLionel Sambuc	veor	q7, q7, q15
901*0a6a1f1dSLionel Sambuc	veor	q6, q6, q14
902*0a6a1f1dSLionel Sambuc	 @ vmov	q4, q8
903*0a6a1f1dSLionel Sambuc	veor	q2, q2, q10
904*0a6a1f1dSLionel Sambuc	 @ vmov	q5, q9
905*0a6a1f1dSLionel Sambuc	vldmia	r6, {q12}		@ .LSR
906*0a6a1f1dSLionel Sambuc	ite	eq				@ Thumb2 thing, samity check in ARM
907*0a6a1f1dSLionel Sambuc	addeq	r6,r6,#0x10
908*0a6a1f1dSLionel Sambuc	bne	.Lenc_loop
909*0a6a1f1dSLionel Sambuc	vldmia	r6, {q12}		@ .LSRM0
910*0a6a1f1dSLionel Sambuc	b	.Lenc_loop
911*0a6a1f1dSLionel Sambuc.align	4
912*0a6a1f1dSLionel Sambuc.Lenc_done:
913*0a6a1f1dSLionel Sambuc	vmov.i8	q8,#0x55			@ compose .LBS0
914*0a6a1f1dSLionel Sambuc	vmov.i8	q9,#0x33			@ compose .LBS1
915*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q2, #1
916*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q3, #1
917*0a6a1f1dSLionel Sambuc	veor		q10, q10, q5
918*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q7
919*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
920*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
921*0a6a1f1dSLionel Sambuc	veor		q5, q5, q10
922*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #1
923*0a6a1f1dSLionel Sambuc	 veor		q7, q7, q11
924*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #1
925*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
926*0a6a1f1dSLionel Sambuc	 veor		q3, q3, q11
927*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q4, #1
928*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #1
929*0a6a1f1dSLionel Sambuc	veor		q10, q10, q6
930*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q1
931*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
932*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
933*0a6a1f1dSLionel Sambuc	veor		q6, q6, q10
934*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #1
935*0a6a1f1dSLionel Sambuc	 veor		q1, q1, q11
936*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #1
937*0a6a1f1dSLionel Sambuc	veor		q4, q4, q10
938*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
939*0a6a1f1dSLionel Sambuc	vmov.i8	q8,#0x0f			@ compose .LBS2
940*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q7, #2
941*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q3, #2
942*0a6a1f1dSLionel Sambuc	veor		q10, q10, q5
943*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q2
944*0a6a1f1dSLionel Sambuc	vand		q10, q10, q9
945*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q9
946*0a6a1f1dSLionel Sambuc	veor		q5, q5, q10
947*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #2
948*0a6a1f1dSLionel Sambuc	 veor		q2, q2, q11
949*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #2
950*0a6a1f1dSLionel Sambuc	veor		q7, q7, q10
951*0a6a1f1dSLionel Sambuc	 veor		q3, q3, q11
952*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q1, #2
953*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #2
954*0a6a1f1dSLionel Sambuc	veor		q10, q10, q6
955*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q4
956*0a6a1f1dSLionel Sambuc	vand		q10, q10, q9
957*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q9
958*0a6a1f1dSLionel Sambuc	veor		q6, q6, q10
959*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #2
960*0a6a1f1dSLionel Sambuc	 veor		q4, q4, q11
961*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #2
962*0a6a1f1dSLionel Sambuc	veor		q1, q1, q10
963*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
964*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q6, #4
965*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q4, #4
966*0a6a1f1dSLionel Sambuc	veor		q10, q10, q5
967*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q2
968*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
969*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
970*0a6a1f1dSLionel Sambuc	veor		q5, q5, q10
971*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #4
972*0a6a1f1dSLionel Sambuc	 veor		q2, q2, q11
973*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #4
974*0a6a1f1dSLionel Sambuc	veor		q6, q6, q10
975*0a6a1f1dSLionel Sambuc	 veor		q4, q4, q11
976*0a6a1f1dSLionel Sambuc	vshr.u64	q10, q1, #4
977*0a6a1f1dSLionel Sambuc	 vshr.u64	q11, q0, #4
978*0a6a1f1dSLionel Sambuc	veor		q10, q10, q7
979*0a6a1f1dSLionel Sambuc	 veor		q11, q11, q3
980*0a6a1f1dSLionel Sambuc	vand		q10, q10, q8
981*0a6a1f1dSLionel Sambuc	 vand		q11, q11, q8
982*0a6a1f1dSLionel Sambuc	veor		q7, q7, q10
983*0a6a1f1dSLionel Sambuc	vshl.u64	q10, q10, #4
984*0a6a1f1dSLionel Sambuc	 veor		q3, q3, q11
985*0a6a1f1dSLionel Sambuc	 vshl.u64	q11, q11, #4
986*0a6a1f1dSLionel Sambuc	veor		q1, q1, q10
987*0a6a1f1dSLionel Sambuc	 veor		q0, q0, q11
988*0a6a1f1dSLionel Sambuc	vldmia	r4, {q8}			@ last round key
989*0a6a1f1dSLionel Sambuc	veor	q4, q4, q8
990*0a6a1f1dSLionel Sambuc	veor	q6, q6, q8
991*0a6a1f1dSLionel Sambuc	veor	q3, q3, q8
992*0a6a1f1dSLionel Sambuc	veor	q7, q7, q8
993*0a6a1f1dSLionel Sambuc	veor	q2, q2, q8
994*0a6a1f1dSLionel Sambuc	veor	q5, q5, q8
995*0a6a1f1dSLionel Sambuc	veor	q0, q0, q8
996*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
997*0a6a1f1dSLionel Sambuc	RET
998*0a6a1f1dSLionel Sambuc.size	_bsaes_encrypt8,.-_bsaes_encrypt8
999*0a6a1f1dSLionel Sambuc.type	_bsaes_key_convert,%function
1000*0a6a1f1dSLionel Sambuc.align	4
1001*0a6a1f1dSLionel Sambuc_bsaes_key_convert:
1002*0a6a1f1dSLionel Sambuc	adr	r6,_bsaes_key_convert
1003*0a6a1f1dSLionel Sambuc	vld1.8	{q7},  [r4]!		@ load round 0 key
1004*0a6a1f1dSLionel Sambuc	sub	r6,r6,#_bsaes_key_convert-.LM0
1005*0a6a1f1dSLionel Sambuc	vld1.8	{q15}, [r4]!		@ load round 1 key
1006*0a6a1f1dSLionel Sambuc
1007*0a6a1f1dSLionel Sambuc	vmov.i8	q8,  #0x01			@ bit masks
1008*0a6a1f1dSLionel Sambuc	vmov.i8	q9,  #0x02
1009*0a6a1f1dSLionel Sambuc	vmov.i8	q10, #0x04
1010*0a6a1f1dSLionel Sambuc	vmov.i8	q11, #0x08
1011*0a6a1f1dSLionel Sambuc	vmov.i8	q12, #0x10
1012*0a6a1f1dSLionel Sambuc	vmov.i8	q13, #0x20
1013*0a6a1f1dSLionel Sambuc	vldmia	r6, {q14}		@ .LM0
1014*0a6a1f1dSLionel Sambuc
1015*0a6a1f1dSLionel Sambuc#ifdef __ARMEL__
1016*0a6a1f1dSLionel Sambuc	vrev32.8	q7,  q7
1017*0a6a1f1dSLionel Sambuc	vrev32.8	q15, q15
1018*0a6a1f1dSLionel Sambuc#endif
1019*0a6a1f1dSLionel Sambuc	sub	r5,r5,#1
1020*0a6a1f1dSLionel Sambuc	vstmia	r12!, {q7}		@ save round 0 key
1021*0a6a1f1dSLionel Sambuc	b	.Lkey_loop
1022*0a6a1f1dSLionel Sambuc
1023*0a6a1f1dSLionel Sambuc.align	4
1024*0a6a1f1dSLionel Sambuc.Lkey_loop:
1025*0a6a1f1dSLionel Sambuc	vtbl.8	d14,{q15},d28
1026*0a6a1f1dSLionel Sambuc	vtbl.8	d15,{q15},d29
1027*0a6a1f1dSLionel Sambuc	vmov.i8	q6,  #0x40
1028*0a6a1f1dSLionel Sambuc	vmov.i8	q15, #0x80
1029*0a6a1f1dSLionel Sambuc
1030*0a6a1f1dSLionel Sambuc	vtst.8	q0, q7, q8
1031*0a6a1f1dSLionel Sambuc	vtst.8	q1, q7, q9
1032*0a6a1f1dSLionel Sambuc	vtst.8	q2, q7, q10
1033*0a6a1f1dSLionel Sambuc	vtst.8	q3, q7, q11
1034*0a6a1f1dSLionel Sambuc	vtst.8	q4, q7, q12
1035*0a6a1f1dSLionel Sambuc	vtst.8	q5, q7, q13
1036*0a6a1f1dSLionel Sambuc	vtst.8	q6, q7, q6
1037*0a6a1f1dSLionel Sambuc	vtst.8	q7, q7, q15
1038*0a6a1f1dSLionel Sambuc	vld1.8	{q15}, [r4]!		@ load next round key
1039*0a6a1f1dSLionel Sambuc	vmvn	q0, q0		@ "pnot"
1040*0a6a1f1dSLionel Sambuc	vmvn	q1, q1
1041*0a6a1f1dSLionel Sambuc	vmvn	q5, q5
1042*0a6a1f1dSLionel Sambuc	vmvn	q6, q6
1043*0a6a1f1dSLionel Sambuc#ifdef __ARMEL__
1044*0a6a1f1dSLionel Sambuc	vrev32.8	q15, q15
1045*0a6a1f1dSLionel Sambuc#endif
1046*0a6a1f1dSLionel Sambuc	subs	r5,r5,#1
1047*0a6a1f1dSLionel Sambuc	vstmia	r12!,{q0-q7}		@ write bit-sliced round key
1048*0a6a1f1dSLionel Sambuc	bne	.Lkey_loop
1049*0a6a1f1dSLionel Sambuc
1050*0a6a1f1dSLionel Sambuc	vmov.i8	q7,#0x63			@ compose .L63
1051*0a6a1f1dSLionel Sambuc	@ don't save last round key
1052*0a6a1f1dSLionel Sambuc	RET
1053*0a6a1f1dSLionel Sambuc.size	_bsaes_key_convert,.-_bsaes_key_convert
1054*0a6a1f1dSLionel Sambuc.extern AES_cbc_encrypt
1055*0a6a1f1dSLionel Sambuc.extern AES_decrypt
1056*0a6a1f1dSLionel Sambuc
1057*0a6a1f1dSLionel Sambuc.global	bsaes_cbc_encrypt
1058*0a6a1f1dSLionel Sambuc.type	bsaes_cbc_encrypt,%function
1059*0a6a1f1dSLionel Sambuc.align	5
1060*0a6a1f1dSLionel Sambucbsaes_cbc_encrypt:
1061*0a6a1f1dSLionel Sambuc#ifndef	__KERNEL__
1062*0a6a1f1dSLionel Sambuc	cmp	r2, #128
1063*0a6a1f1dSLionel Sambuc#ifndef	__thumb__
1064*0a6a1f1dSLionel Sambuc	blo	AES_cbc_encrypt
1065*0a6a1f1dSLionel Sambuc#else
1066*0a6a1f1dSLionel Sambuc	bhs	1f
1067*0a6a1f1dSLionel Sambuc	b	AES_cbc_encrypt
1068*0a6a1f1dSLionel Sambuc1:
1069*0a6a1f1dSLionel Sambuc#endif
1070*0a6a1f1dSLionel Sambuc#endif
1071*0a6a1f1dSLionel Sambuc
1072*0a6a1f1dSLionel Sambuc	@ it is up to the caller to make sure we are called with enc == 0
1073*0a6a1f1dSLionel Sambuc
1074*0a6a1f1dSLionel Sambuc	mov	ip, sp
1075*0a6a1f1dSLionel Sambuc	stmdb	sp!, {r4-r10, lr}
1076*0a6a1f1dSLionel Sambuc	VFP_ABI_PUSH
1077*0a6a1f1dSLionel Sambuc	ldr	r8, [ip]			@ IV is 1st arg on the stack
1078*0a6a1f1dSLionel Sambuc	mov	r2, r2, lsr#4		@ len in 16 byte blocks
1079*0a6a1f1dSLionel Sambuc	sub	sp, #0x10			@ scratch space to carry over the IV
1080*0a6a1f1dSLionel Sambuc	mov	r9, sp				@ save sp
1081*0a6a1f1dSLionel Sambuc
1082*0a6a1f1dSLionel Sambuc	ldr	r10, [r3, #240]		@ get # of rounds
1083*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1084*0a6a1f1dSLionel Sambuc	@ allocate the key schedule on the stack
1085*0a6a1f1dSLionel Sambuc	sub	r12, sp, r10, lsl#7		@ 128 bytes per inner round key
1086*0a6a1f1dSLionel Sambuc	add	r12, #96			@ sifze of bit-slices key schedule
1087*0a6a1f1dSLionel Sambuc
1088*0a6a1f1dSLionel Sambuc	@ populate the key schedule
1089*0a6a1f1dSLionel Sambuc	mov	r4, r3			@ pass key
1090*0a6a1f1dSLionel Sambuc	mov	r5, r10			@ pass # of rounds
1091*0a6a1f1dSLionel Sambuc	mov	sp, r12				@ sp is sp
1092*0a6a1f1dSLionel Sambuc	bl	_bsaes_key_convert
1093*0a6a1f1dSLionel Sambuc	vldmia	sp, {q6}
1094*0a6a1f1dSLionel Sambuc	vstmia	r12,  {q15}		@ save last round key
1095*0a6a1f1dSLionel Sambuc	veor	q7, q7, q6	@ fix up round 0 key
1096*0a6a1f1dSLionel Sambuc	vstmia	sp, {q7}
1097*0a6a1f1dSLionel Sambuc#else
1098*0a6a1f1dSLionel Sambuc	ldr	r12, [r3, #244]
1099*0a6a1f1dSLionel Sambuc	eors	r12, #1
1100*0a6a1f1dSLionel Sambuc	beq	0f
1101*0a6a1f1dSLionel Sambuc
1102*0a6a1f1dSLionel Sambuc	@ populate the key schedule
1103*0a6a1f1dSLionel Sambuc	str	r12, [r3, #244]
1104*0a6a1f1dSLionel Sambuc	mov	r4, r3			@ pass key
1105*0a6a1f1dSLionel Sambuc	mov	r5, r10			@ pass # of rounds
1106*0a6a1f1dSLionel Sambuc	add	r12, r3, #248			@ pass key schedule
1107*0a6a1f1dSLionel Sambuc	bl	_bsaes_key_convert
1108*0a6a1f1dSLionel Sambuc	add	r4, r3, #248
1109*0a6a1f1dSLionel Sambuc	vldmia	r4, {q6}
1110*0a6a1f1dSLionel Sambuc	vstmia	r12, {q15}			@ save last round key
1111*0a6a1f1dSLionel Sambuc	veor	q7, q7, q6	@ fix up round 0 key
1112*0a6a1f1dSLionel Sambuc	vstmia	r4, {q7}
1113*0a6a1f1dSLionel Sambuc
1114*0a6a1f1dSLionel Sambuc.align	2
1115*0a6a1f1dSLionel Sambuc0:
1116*0a6a1f1dSLionel Sambuc#endif
1117*0a6a1f1dSLionel Sambuc
1118*0a6a1f1dSLionel Sambuc	vld1.8	{q15}, [r8]		@ load IV
1119*0a6a1f1dSLionel Sambuc	b	.Lcbc_dec_loop
1120*0a6a1f1dSLionel Sambuc
1121*0a6a1f1dSLionel Sambuc.align	4
1122*0a6a1f1dSLionel Sambuc.Lcbc_dec_loop:
1123*0a6a1f1dSLionel Sambuc	subs	r2, r2, #0x8
1124*0a6a1f1dSLionel Sambuc	bmi	.Lcbc_dec_loop_finish
1125*0a6a1f1dSLionel Sambuc
1126*0a6a1f1dSLionel Sambuc	vld1.8	{q0-q1}, [r0]!	@ load input
1127*0a6a1f1dSLionel Sambuc	vld1.8	{q2-q3}, [r0]!
1128*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1129*0a6a1f1dSLionel Sambuc	mov	r4, sp			@ pass the key
1130*0a6a1f1dSLionel Sambuc#else
1131*0a6a1f1dSLionel Sambuc	add	r4, r3, #248
1132*0a6a1f1dSLionel Sambuc#endif
1133*0a6a1f1dSLionel Sambuc	vld1.8	{q4-q5}, [r0]!
1134*0a6a1f1dSLionel Sambuc	mov	r5, r10
1135*0a6a1f1dSLionel Sambuc	vld1.8	{q6-q7}, [r0]
1136*0a6a1f1dSLionel Sambuc	sub	r0, r0, #0x60
1137*0a6a1f1dSLionel Sambuc	vstmia	r9, {q15}			@ put aside IV
1138*0a6a1f1dSLionel Sambuc
1139*0a6a1f1dSLionel Sambuc	bl	_bsaes_decrypt8
1140*0a6a1f1dSLionel Sambuc
1141*0a6a1f1dSLionel Sambuc	vldmia	r9, {q14}			@ reload IV
1142*0a6a1f1dSLionel Sambuc	vld1.8	{q8-q9}, [r0]!	@ reload input
1143*0a6a1f1dSLionel Sambuc	veor	q0, q0, q14	@ ^= IV
1144*0a6a1f1dSLionel Sambuc	vld1.8	{q10-q11}, [r0]!
1145*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
1146*0a6a1f1dSLionel Sambuc	veor	q6, q6, q9
1147*0a6a1f1dSLionel Sambuc	vld1.8	{q12-q13}, [r0]!
1148*0a6a1f1dSLionel Sambuc	veor	q4, q4, q10
1149*0a6a1f1dSLionel Sambuc	veor	q2, q2, q11
1150*0a6a1f1dSLionel Sambuc	vld1.8	{q14-q15}, [r0]!
1151*0a6a1f1dSLionel Sambuc	veor	q7, q7, q12
1152*0a6a1f1dSLionel Sambuc	vst1.8	{q0-q1}, [r1]!	@ write output
1153*0a6a1f1dSLionel Sambuc	veor	q3, q3, q13
1154*0a6a1f1dSLionel Sambuc	vst1.8	{q6}, [r1]!
1155*0a6a1f1dSLionel Sambuc	veor	q5, q5, q14
1156*0a6a1f1dSLionel Sambuc	vst1.8	{q4}, [r1]!
1157*0a6a1f1dSLionel Sambuc	vst1.8	{q2}, [r1]!
1158*0a6a1f1dSLionel Sambuc	vst1.8	{q7}, [r1]!
1159*0a6a1f1dSLionel Sambuc	vst1.8	{q3}, [r1]!
1160*0a6a1f1dSLionel Sambuc	vst1.8	{q5}, [r1]!
1161*0a6a1f1dSLionel Sambuc
1162*0a6a1f1dSLionel Sambuc	b	.Lcbc_dec_loop
1163*0a6a1f1dSLionel Sambuc
1164*0a6a1f1dSLionel Sambuc.Lcbc_dec_loop_finish:
1165*0a6a1f1dSLionel Sambuc	adds	r2, r2, #8
1166*0a6a1f1dSLionel Sambuc	beq	.Lcbc_dec_done
1167*0a6a1f1dSLionel Sambuc
1168*0a6a1f1dSLionel Sambuc	vld1.8	{q0}, [r0]!		@ load input
1169*0a6a1f1dSLionel Sambuc	cmp	r2, #2
1170*0a6a1f1dSLionel Sambuc	blo	.Lcbc_dec_one
1171*0a6a1f1dSLionel Sambuc	vld1.8	{q1}, [r0]!
1172*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1173*0a6a1f1dSLionel Sambuc	mov	r4, sp			@ pass the key
1174*0a6a1f1dSLionel Sambuc#else
1175*0a6a1f1dSLionel Sambuc	add	r4, r3, #248
1176*0a6a1f1dSLionel Sambuc#endif
1177*0a6a1f1dSLionel Sambuc	mov	r5, r10
1178*0a6a1f1dSLionel Sambuc	vstmia	r9, {q15}			@ put aside IV
1179*0a6a1f1dSLionel Sambuc	beq	.Lcbc_dec_two
1180*0a6a1f1dSLionel Sambuc	vld1.8	{q2}, [r0]!
1181*0a6a1f1dSLionel Sambuc	cmp	r2, #4
1182*0a6a1f1dSLionel Sambuc	blo	.Lcbc_dec_three
1183*0a6a1f1dSLionel Sambuc	vld1.8	{q3}, [r0]!
1184*0a6a1f1dSLionel Sambuc	beq	.Lcbc_dec_four
1185*0a6a1f1dSLionel Sambuc	vld1.8	{q4}, [r0]!
1186*0a6a1f1dSLionel Sambuc	cmp	r2, #6
1187*0a6a1f1dSLionel Sambuc	blo	.Lcbc_dec_five
1188*0a6a1f1dSLionel Sambuc	vld1.8	{q5}, [r0]!
1189*0a6a1f1dSLionel Sambuc	beq	.Lcbc_dec_six
1190*0a6a1f1dSLionel Sambuc	vld1.8	{q6}, [r0]!
1191*0a6a1f1dSLionel Sambuc	sub	r0, r0, #0x70
1192*0a6a1f1dSLionel Sambuc
1193*0a6a1f1dSLionel Sambuc	bl	_bsaes_decrypt8
1194*0a6a1f1dSLionel Sambuc
1195*0a6a1f1dSLionel Sambuc	vldmia	r9, {q14}			@ reload IV
1196*0a6a1f1dSLionel Sambuc	vld1.8	{q8-q9}, [r0]!	@ reload input
1197*0a6a1f1dSLionel Sambuc	veor	q0, q0, q14	@ ^= IV
1198*0a6a1f1dSLionel Sambuc	vld1.8	{q10-q11}, [r0]!
1199*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
1200*0a6a1f1dSLionel Sambuc	veor	q6, q6, q9
1201*0a6a1f1dSLionel Sambuc	vld1.8	{q12-q13}, [r0]!
1202*0a6a1f1dSLionel Sambuc	veor	q4, q4, q10
1203*0a6a1f1dSLionel Sambuc	veor	q2, q2, q11
1204*0a6a1f1dSLionel Sambuc	vld1.8	{q15}, [r0]!
1205*0a6a1f1dSLionel Sambuc	veor	q7, q7, q12
1206*0a6a1f1dSLionel Sambuc	vst1.8	{q0-q1}, [r1]!	@ write output
1207*0a6a1f1dSLionel Sambuc	veor	q3, q3, q13
1208*0a6a1f1dSLionel Sambuc	vst1.8	{q6}, [r1]!
1209*0a6a1f1dSLionel Sambuc	vst1.8	{q4}, [r1]!
1210*0a6a1f1dSLionel Sambuc	vst1.8	{q2}, [r1]!
1211*0a6a1f1dSLionel Sambuc	vst1.8	{q7}, [r1]!
1212*0a6a1f1dSLionel Sambuc	vst1.8	{q3}, [r1]!
1213*0a6a1f1dSLionel Sambuc	b	.Lcbc_dec_done
1214*0a6a1f1dSLionel Sambuc.align	4
1215*0a6a1f1dSLionel Sambuc.Lcbc_dec_six:
1216*0a6a1f1dSLionel Sambuc	sub	r0, r0, #0x60
1217*0a6a1f1dSLionel Sambuc	bl	_bsaes_decrypt8
1218*0a6a1f1dSLionel Sambuc	vldmia	r9,{q14}			@ reload IV
1219*0a6a1f1dSLionel Sambuc	vld1.8	{q8-q9}, [r0]!	@ reload input
1220*0a6a1f1dSLionel Sambuc	veor	q0, q0, q14	@ ^= IV
1221*0a6a1f1dSLionel Sambuc	vld1.8	{q10-q11}, [r0]!
1222*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
1223*0a6a1f1dSLionel Sambuc	veor	q6, q6, q9
1224*0a6a1f1dSLionel Sambuc	vld1.8	{q12}, [r0]!
1225*0a6a1f1dSLionel Sambuc	veor	q4, q4, q10
1226*0a6a1f1dSLionel Sambuc	veor	q2, q2, q11
1227*0a6a1f1dSLionel Sambuc	vld1.8	{q15}, [r0]!
1228*0a6a1f1dSLionel Sambuc	veor	q7, q7, q12
1229*0a6a1f1dSLionel Sambuc	vst1.8	{q0-q1}, [r1]!	@ write output
1230*0a6a1f1dSLionel Sambuc	vst1.8	{q6}, [r1]!
1231*0a6a1f1dSLionel Sambuc	vst1.8	{q4}, [r1]!
1232*0a6a1f1dSLionel Sambuc	vst1.8	{q2}, [r1]!
1233*0a6a1f1dSLionel Sambuc	vst1.8	{q7}, [r1]!
1234*0a6a1f1dSLionel Sambuc	b	.Lcbc_dec_done
1235*0a6a1f1dSLionel Sambuc.align	4
1236*0a6a1f1dSLionel Sambuc.Lcbc_dec_five:
1237*0a6a1f1dSLionel Sambuc	sub	r0, r0, #0x50
1238*0a6a1f1dSLionel Sambuc	bl	_bsaes_decrypt8
1239*0a6a1f1dSLionel Sambuc	vldmia	r9, {q14}			@ reload IV
1240*0a6a1f1dSLionel Sambuc	vld1.8	{q8-q9}, [r0]!	@ reload input
1241*0a6a1f1dSLionel Sambuc	veor	q0, q0, q14	@ ^= IV
1242*0a6a1f1dSLionel Sambuc	vld1.8	{q10-q11}, [r0]!
1243*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
1244*0a6a1f1dSLionel Sambuc	veor	q6, q6, q9
1245*0a6a1f1dSLionel Sambuc	vld1.8	{q15}, [r0]!
1246*0a6a1f1dSLionel Sambuc	veor	q4, q4, q10
1247*0a6a1f1dSLionel Sambuc	vst1.8	{q0-q1}, [r1]!	@ write output
1248*0a6a1f1dSLionel Sambuc	veor	q2, q2, q11
1249*0a6a1f1dSLionel Sambuc	vst1.8	{q6}, [r1]!
1250*0a6a1f1dSLionel Sambuc	vst1.8	{q4}, [r1]!
1251*0a6a1f1dSLionel Sambuc	vst1.8	{q2}, [r1]!
1252*0a6a1f1dSLionel Sambuc	b	.Lcbc_dec_done
1253*0a6a1f1dSLionel Sambuc.align	4
1254*0a6a1f1dSLionel Sambuc.Lcbc_dec_four:
1255*0a6a1f1dSLionel Sambuc	sub	r0, r0, #0x40
1256*0a6a1f1dSLionel Sambuc	bl	_bsaes_decrypt8
1257*0a6a1f1dSLionel Sambuc	vldmia	r9, {q14}			@ reload IV
1258*0a6a1f1dSLionel Sambuc	vld1.8	{q8-q9}, [r0]!	@ reload input
1259*0a6a1f1dSLionel Sambuc	veor	q0, q0, q14	@ ^= IV
1260*0a6a1f1dSLionel Sambuc	vld1.8	{q10}, [r0]!
1261*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
1262*0a6a1f1dSLionel Sambuc	veor	q6, q6, q9
1263*0a6a1f1dSLionel Sambuc	vld1.8	{q15}, [r0]!
1264*0a6a1f1dSLionel Sambuc	veor	q4, q4, q10
1265*0a6a1f1dSLionel Sambuc	vst1.8	{q0-q1}, [r1]!	@ write output
1266*0a6a1f1dSLionel Sambuc	vst1.8	{q6}, [r1]!
1267*0a6a1f1dSLionel Sambuc	vst1.8	{q4}, [r1]!
1268*0a6a1f1dSLionel Sambuc	b	.Lcbc_dec_done
1269*0a6a1f1dSLionel Sambuc.align	4
1270*0a6a1f1dSLionel Sambuc.Lcbc_dec_three:
1271*0a6a1f1dSLionel Sambuc	sub	r0, r0, #0x30
1272*0a6a1f1dSLionel Sambuc	bl	_bsaes_decrypt8
1273*0a6a1f1dSLionel Sambuc	vldmia	r9, {q14}			@ reload IV
1274*0a6a1f1dSLionel Sambuc	vld1.8	{q8-q9}, [r0]!	@ reload input
1275*0a6a1f1dSLionel Sambuc	veor	q0, q0, q14	@ ^= IV
1276*0a6a1f1dSLionel Sambuc	vld1.8	{q15}, [r0]!
1277*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
1278*0a6a1f1dSLionel Sambuc	veor	q6, q6, q9
1279*0a6a1f1dSLionel Sambuc	vst1.8	{q0-q1}, [r1]!	@ write output
1280*0a6a1f1dSLionel Sambuc	vst1.8	{q6}, [r1]!
1281*0a6a1f1dSLionel Sambuc	b	.Lcbc_dec_done
1282*0a6a1f1dSLionel Sambuc.align	4
1283*0a6a1f1dSLionel Sambuc.Lcbc_dec_two:
1284*0a6a1f1dSLionel Sambuc	sub	r0, r0, #0x20
1285*0a6a1f1dSLionel Sambuc	bl	_bsaes_decrypt8
1286*0a6a1f1dSLionel Sambuc	vldmia	r9, {q14}			@ reload IV
1287*0a6a1f1dSLionel Sambuc	vld1.8	{q8}, [r0]!		@ reload input
1288*0a6a1f1dSLionel Sambuc	veor	q0, q0, q14	@ ^= IV
1289*0a6a1f1dSLionel Sambuc	vld1.8	{q15}, [r0]!		@ reload input
1290*0a6a1f1dSLionel Sambuc	veor	q1, q1, q8
1291*0a6a1f1dSLionel Sambuc	vst1.8	{q0-q1}, [r1]!	@ write output
1292*0a6a1f1dSLionel Sambuc	b	.Lcbc_dec_done
1293*0a6a1f1dSLionel Sambuc.align	4
1294*0a6a1f1dSLionel Sambuc.Lcbc_dec_one:
1295*0a6a1f1dSLionel Sambuc	sub	r0, r0, #0x10
1296*0a6a1f1dSLionel Sambuc	mov	r10, r1			@ save original out pointer
1297*0a6a1f1dSLionel Sambuc	mov	r1, r9			@ use the iv scratch space as out buffer
1298*0a6a1f1dSLionel Sambuc	mov	r2, r3
1299*0a6a1f1dSLionel Sambuc	vmov	q4,q15		@ just in case ensure that IV
1300*0a6a1f1dSLionel Sambuc	vmov	q5,q0			@ and input are preserved
1301*0a6a1f1dSLionel Sambuc	bl	AES_decrypt
1302*0a6a1f1dSLionel Sambuc	vld1.8	{q0}, [r9,:64]		@ load result
1303*0a6a1f1dSLionel Sambuc	veor	q0, q0, q4	@ ^= IV
1304*0a6a1f1dSLionel Sambuc	vmov	q15, q5		@ q5 holds input
1305*0a6a1f1dSLionel Sambuc	vst1.8	{q0}, [r10]		@ write output
1306*0a6a1f1dSLionel Sambuc
1307*0a6a1f1dSLionel Sambuc.Lcbc_dec_done:
1308*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1309*0a6a1f1dSLionel Sambuc	vmov.i32	q0, #0
1310*0a6a1f1dSLionel Sambuc	vmov.i32	q1, #0
1311*0a6a1f1dSLionel Sambuc.Lcbc_dec_bzero:				@ wipe key schedule [if any]
1312*0a6a1f1dSLionel Sambuc	vstmia		sp!, {q0-q1}
1313*0a6a1f1dSLionel Sambuc	cmp		sp, r9
1314*0a6a1f1dSLionel Sambuc	bne		.Lcbc_dec_bzero
1315*0a6a1f1dSLionel Sambuc#endif
1316*0a6a1f1dSLionel Sambuc
1317*0a6a1f1dSLionel Sambuc	mov	sp, r9
1318*0a6a1f1dSLionel Sambuc	add	sp, #0x10			@ add sp,r9,#0x10 is no good for thumb
1319*0a6a1f1dSLionel Sambuc	vst1.8	{q15}, [r8]		@ return IV
1320*0a6a1f1dSLionel Sambuc	VFP_ABI_POP
1321*0a6a1f1dSLionel Sambuc	ldmia	sp!, {r4-r10, pc}
1322*0a6a1f1dSLionel Sambuc.size	bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
1323*0a6a1f1dSLionel Sambuc.extern	AES_encrypt
1324*0a6a1f1dSLionel Sambuc.global	bsaes_ctr32_encrypt_blocks
1325*0a6a1f1dSLionel Sambuc.type	bsaes_ctr32_encrypt_blocks,%function
1326*0a6a1f1dSLionel Sambuc.align	5
1327*0a6a1f1dSLionel Sambucbsaes_ctr32_encrypt_blocks:
1328*0a6a1f1dSLionel Sambuc	cmp	r2, #8			@ use plain AES for
1329*0a6a1f1dSLionel Sambuc	blo	.Lctr_enc_short			@ small sizes
1330*0a6a1f1dSLionel Sambuc
1331*0a6a1f1dSLionel Sambuc	mov	ip, sp
1332*0a6a1f1dSLionel Sambuc	stmdb	sp!, {r4-r10, lr}
1333*0a6a1f1dSLionel Sambuc	VFP_ABI_PUSH
1334*0a6a1f1dSLionel Sambuc	ldr	r8, [ip]			@ ctr is 1st arg on the stack
1335*0a6a1f1dSLionel Sambuc	sub	sp, sp, #0x10			@ scratch space to carry over the ctr
1336*0a6a1f1dSLionel Sambuc	mov	r9, sp				@ save sp
1337*0a6a1f1dSLionel Sambuc
1338*0a6a1f1dSLionel Sambuc	ldr	r10, [r3, #240]		@ get # of rounds
1339*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1340*0a6a1f1dSLionel Sambuc	@ allocate the key schedule on the stack
1341*0a6a1f1dSLionel Sambuc	sub	r12, sp, r10, lsl#7		@ 128 bytes per inner round key
1342*0a6a1f1dSLionel Sambuc	add	r12, #96			@ size of bit-sliced key schedule
1343*0a6a1f1dSLionel Sambuc
1344*0a6a1f1dSLionel Sambuc	@ populate the key schedule
1345*0a6a1f1dSLionel Sambuc	mov	r4, r3			@ pass key
1346*0a6a1f1dSLionel Sambuc	mov	r5, r10			@ pass # of rounds
1347*0a6a1f1dSLionel Sambuc	mov	sp, r12				@ sp is sp
1348*0a6a1f1dSLionel Sambuc	bl	_bsaes_key_convert
1349*0a6a1f1dSLionel Sambuc	veor	q7,q7,q15	@ fix up last round key
1350*0a6a1f1dSLionel Sambuc	vstmia	r12, {q7}			@ save last round key
1351*0a6a1f1dSLionel Sambuc
1352*0a6a1f1dSLionel Sambuc	vld1.8	{q0}, [r8]		@ load counter
1353*0a6a1f1dSLionel Sambuc	add	r8, r6, #.LREVM0SR-.LM0	@ borrow r8
1354*0a6a1f1dSLionel Sambuc	vldmia	sp, {q4}		@ load round0 key
1355*0a6a1f1dSLionel Sambuc#else
1356*0a6a1f1dSLionel Sambuc	ldr	r12, [r3, #244]
1357*0a6a1f1dSLionel Sambuc	eors	r12, #1
1358*0a6a1f1dSLionel Sambuc	beq	0f
1359*0a6a1f1dSLionel Sambuc
1360*0a6a1f1dSLionel Sambuc	@ populate the key schedule
1361*0a6a1f1dSLionel Sambuc	str	r12, [r3, #244]
1362*0a6a1f1dSLionel Sambuc	mov	r4, r3			@ pass key
1363*0a6a1f1dSLionel Sambuc	mov	r5, r10			@ pass # of rounds
1364*0a6a1f1dSLionel Sambuc	add	r12, r3, #248			@ pass key schedule
1365*0a6a1f1dSLionel Sambuc	bl	_bsaes_key_convert
1366*0a6a1f1dSLionel Sambuc	veor	q7,q7,q15	@ fix up last round key
1367*0a6a1f1dSLionel Sambuc	vstmia	r12, {q7}			@ save last round key
1368*0a6a1f1dSLionel Sambuc
1369*0a6a1f1dSLionel Sambuc.align	2
1370*0a6a1f1dSLionel Sambuc0:	add	r12, r3, #248
1371*0a6a1f1dSLionel Sambuc	vld1.8	{q0}, [r8]		@ load counter
1372*0a6a1f1dSLionel Sambuc	adrl	r8, .LREVM0SR			@ borrow r8
1373*0a6a1f1dSLionel Sambuc	vldmia	r12, {q4}			@ load round0 key
1374*0a6a1f1dSLionel Sambuc	sub	sp, #0x10			@ place for adjusted round0 key
1375*0a6a1f1dSLionel Sambuc#endif
1376*0a6a1f1dSLionel Sambuc
1377*0a6a1f1dSLionel Sambuc	vmov.i32	q8,#1		@ compose 1<<96
1378*0a6a1f1dSLionel Sambuc	veor		q9,q9,q9
1379*0a6a1f1dSLionel Sambuc	vrev32.8	q0,q0
1380*0a6a1f1dSLionel Sambuc	vext.8		q8,q9,q8,#4
1381*0a6a1f1dSLionel Sambuc	vrev32.8	q4,q4
1382*0a6a1f1dSLionel Sambuc	vadd.u32	q9,q8,q8	@ compose 2<<96
1383*0a6a1f1dSLionel Sambuc	vstmia	sp, {q4}		@ save adjusted round0 key
1384*0a6a1f1dSLionel Sambuc	b	.Lctr_enc_loop
1385*0a6a1f1dSLionel Sambuc
1386*0a6a1f1dSLionel Sambuc.align	4
1387*0a6a1f1dSLionel Sambuc.Lctr_enc_loop:
1388*0a6a1f1dSLionel Sambuc	vadd.u32	q10, q8, q9	@ compose 3<<96
1389*0a6a1f1dSLionel Sambuc	vadd.u32	q1, q0, q8	@ +1
1390*0a6a1f1dSLionel Sambuc	vadd.u32	q2, q0, q9	@ +2
1391*0a6a1f1dSLionel Sambuc	vadd.u32	q3, q0, q10	@ +3
1392*0a6a1f1dSLionel Sambuc	vadd.u32	q4, q1, q10
1393*0a6a1f1dSLionel Sambuc	vadd.u32	q5, q2, q10
1394*0a6a1f1dSLionel Sambuc	vadd.u32	q6, q3, q10
1395*0a6a1f1dSLionel Sambuc	vadd.u32	q7, q4, q10
1396*0a6a1f1dSLionel Sambuc	vadd.u32	q10, q5, q10	@ next counter
1397*0a6a1f1dSLionel Sambuc
1398*0a6a1f1dSLionel Sambuc	@ Borrow prologue from _bsaes_encrypt8 to use the opportunity
1399*0a6a1f1dSLionel Sambuc	@ to flip byte order in 32-bit counter
1400*0a6a1f1dSLionel Sambuc
1401*0a6a1f1dSLionel Sambuc	vldmia		sp, {q9}		@ load round0 key
1402*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1403*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x10		@ pass next round key
1404*0a6a1f1dSLionel Sambuc#else
1405*0a6a1f1dSLionel Sambuc	add		r4, r3, #264
1406*0a6a1f1dSLionel Sambuc#endif
1407*0a6a1f1dSLionel Sambuc	vldmia		r8, {q8}			@ .LREVM0SR
1408*0a6a1f1dSLionel Sambuc	mov		r5, r10			@ pass rounds
1409*0a6a1f1dSLionel Sambuc	vstmia		r9, {q10}			@ save next counter
1410*0a6a1f1dSLionel Sambuc	sub		r6, r8, #.LREVM0SR-.LSR	@ pass constants
1411*0a6a1f1dSLionel Sambuc
1412*0a6a1f1dSLionel Sambuc	bl		_bsaes_encrypt8_alt
1413*0a6a1f1dSLionel Sambuc
1414*0a6a1f1dSLionel Sambuc	subs		r2, r2, #8
1415*0a6a1f1dSLionel Sambuc	blo		.Lctr_enc_loop_done
1416*0a6a1f1dSLionel Sambuc
1417*0a6a1f1dSLionel Sambuc	vld1.8		{q8-q9}, [r0]!	@ load input
1418*0a6a1f1dSLionel Sambuc	vld1.8		{q10-q11}, [r0]!
1419*0a6a1f1dSLionel Sambuc	veor		q0, q8
1420*0a6a1f1dSLionel Sambuc	veor		q1, q9
1421*0a6a1f1dSLionel Sambuc	vld1.8		{q12-q13}, [r0]!
1422*0a6a1f1dSLionel Sambuc	veor		q4, q10
1423*0a6a1f1dSLionel Sambuc	veor		q6, q11
1424*0a6a1f1dSLionel Sambuc	vld1.8		{q14-q15}, [r0]!
1425*0a6a1f1dSLionel Sambuc	veor		q3, q12
1426*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r1]!	@ write output
1427*0a6a1f1dSLionel Sambuc	veor		q7, q13
1428*0a6a1f1dSLionel Sambuc	veor		q2, q14
1429*0a6a1f1dSLionel Sambuc	vst1.8		{q4}, [r1]!
1430*0a6a1f1dSLionel Sambuc	veor		q5, q15
1431*0a6a1f1dSLionel Sambuc	vst1.8		{q6}, [r1]!
1432*0a6a1f1dSLionel Sambuc	vmov.i32	q8, #1			@ compose 1<<96
1433*0a6a1f1dSLionel Sambuc	vst1.8		{q3}, [r1]!
1434*0a6a1f1dSLionel Sambuc	veor		q9, q9, q9
1435*0a6a1f1dSLionel Sambuc	vst1.8		{q7}, [r1]!
1436*0a6a1f1dSLionel Sambuc	vext.8		q8, q9, q8, #4
1437*0a6a1f1dSLionel Sambuc	vst1.8		{q2}, [r1]!
1438*0a6a1f1dSLionel Sambuc	vadd.u32	q9,q8,q8		@ compose 2<<96
1439*0a6a1f1dSLionel Sambuc	vst1.8		{q5}, [r1]!
1440*0a6a1f1dSLionel Sambuc	vldmia		r9, {q0}			@ load counter
1441*0a6a1f1dSLionel Sambuc
1442*0a6a1f1dSLionel Sambuc	bne		.Lctr_enc_loop
1443*0a6a1f1dSLionel Sambuc	b		.Lctr_enc_done
1444*0a6a1f1dSLionel Sambuc
1445*0a6a1f1dSLionel Sambuc.align	4
1446*0a6a1f1dSLionel Sambuc.Lctr_enc_loop_done:
1447*0a6a1f1dSLionel Sambuc	add		r2, r2, #8
1448*0a6a1f1dSLionel Sambuc	vld1.8		{q8}, [r0]!	@ load input
1449*0a6a1f1dSLionel Sambuc	veor		q0, q8
1450*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [r1]!	@ write output
1451*0a6a1f1dSLionel Sambuc	cmp		r2, #2
1452*0a6a1f1dSLionel Sambuc	blo		.Lctr_enc_done
1453*0a6a1f1dSLionel Sambuc	vld1.8		{q9}, [r0]!
1454*0a6a1f1dSLionel Sambuc	veor		q1, q9
1455*0a6a1f1dSLionel Sambuc	vst1.8		{q1}, [r1]!
1456*0a6a1f1dSLionel Sambuc	beq		.Lctr_enc_done
1457*0a6a1f1dSLionel Sambuc	vld1.8		{q10}, [r0]!
1458*0a6a1f1dSLionel Sambuc	veor		q4, q10
1459*0a6a1f1dSLionel Sambuc	vst1.8		{q4}, [r1]!
1460*0a6a1f1dSLionel Sambuc	cmp		r2, #4
1461*0a6a1f1dSLionel Sambuc	blo		.Lctr_enc_done
1462*0a6a1f1dSLionel Sambuc	vld1.8		{q11}, [r0]!
1463*0a6a1f1dSLionel Sambuc	veor		q6, q11
1464*0a6a1f1dSLionel Sambuc	vst1.8		{q6}, [r1]!
1465*0a6a1f1dSLionel Sambuc	beq		.Lctr_enc_done
1466*0a6a1f1dSLionel Sambuc	vld1.8		{q12}, [r0]!
1467*0a6a1f1dSLionel Sambuc	veor		q3, q12
1468*0a6a1f1dSLionel Sambuc	vst1.8		{q3}, [r1]!
1469*0a6a1f1dSLionel Sambuc	cmp		r2, #6
1470*0a6a1f1dSLionel Sambuc	blo		.Lctr_enc_done
1471*0a6a1f1dSLionel Sambuc	vld1.8		{q13}, [r0]!
1472*0a6a1f1dSLionel Sambuc	veor		q7, q13
1473*0a6a1f1dSLionel Sambuc	vst1.8		{q7}, [r1]!
1474*0a6a1f1dSLionel Sambuc	beq		.Lctr_enc_done
1475*0a6a1f1dSLionel Sambuc	vld1.8		{q14}, [r0]
1476*0a6a1f1dSLionel Sambuc	veor		q2, q14
1477*0a6a1f1dSLionel Sambuc	vst1.8		{q2}, [r1]!
1478*0a6a1f1dSLionel Sambuc
1479*0a6a1f1dSLionel Sambuc.Lctr_enc_done:
1480*0a6a1f1dSLionel Sambuc	vmov.i32	q0, #0
1481*0a6a1f1dSLionel Sambuc	vmov.i32	q1, #0
1482*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1483*0a6a1f1dSLionel Sambuc.Lctr_enc_bzero:			@ wipe key schedule [if any]
1484*0a6a1f1dSLionel Sambuc	vstmia		sp!, {q0-q1}
1485*0a6a1f1dSLionel Sambuc	cmp		sp, r9
1486*0a6a1f1dSLionel Sambuc	bne		.Lctr_enc_bzero
1487*0a6a1f1dSLionel Sambuc#else
1488*0a6a1f1dSLionel Sambuc	vstmia		sp, {q0-q1}
1489*0a6a1f1dSLionel Sambuc#endif
1490*0a6a1f1dSLionel Sambuc
1491*0a6a1f1dSLionel Sambuc	mov	sp, r9
1492*0a6a1f1dSLionel Sambuc	add	sp, #0x10		@ add sp,r9,#0x10 is no good for thumb
1493*0a6a1f1dSLionel Sambuc	VFP_ABI_POP
1494*0a6a1f1dSLionel Sambuc	ldmia	sp!, {r4-r10, pc}	@ return
1495*0a6a1f1dSLionel Sambuc
1496*0a6a1f1dSLionel Sambuc.align	4
1497*0a6a1f1dSLionel Sambuc.Lctr_enc_short:
1498*0a6a1f1dSLionel Sambuc	ldr	ip, [sp]		@ ctr pointer is passed on stack
1499*0a6a1f1dSLionel Sambuc	stmdb	sp!, {r4-r8, lr}
1500*0a6a1f1dSLionel Sambuc
1501*0a6a1f1dSLionel Sambuc	mov	r4, r0		@ copy arguments
1502*0a6a1f1dSLionel Sambuc	mov	r5, r1
1503*0a6a1f1dSLionel Sambuc	mov	r6, r2
1504*0a6a1f1dSLionel Sambuc	mov	r7, r3
1505*0a6a1f1dSLionel Sambuc	ldr	r8, [ip, #12]		@ load counter LSW
1506*0a6a1f1dSLionel Sambuc	vld1.8	{q1}, [ip]		@ load whole counter value
1507*0a6a1f1dSLionel Sambuc#ifdef __ARMEL__
1508*0a6a1f1dSLionel Sambuc	rev	r8, r8
1509*0a6a1f1dSLionel Sambuc#endif
1510*0a6a1f1dSLionel Sambuc	sub	sp, sp, #0x10
1511*0a6a1f1dSLionel Sambuc	vst1.8	{q1}, [sp,:64]	@ copy counter value
1512*0a6a1f1dSLionel Sambuc	sub	sp, sp, #0x10
1513*0a6a1f1dSLionel Sambuc
1514*0a6a1f1dSLionel Sambuc.Lctr_enc_short_loop:
1515*0a6a1f1dSLionel Sambuc	add	r0, sp, #0x10		@ input counter value
1516*0a6a1f1dSLionel Sambuc	mov	r1, sp			@ output on the stack
1517*0a6a1f1dSLionel Sambuc	mov	r2, r7			@ key
1518*0a6a1f1dSLionel Sambuc
1519*0a6a1f1dSLionel Sambuc	bl	AES_encrypt
1520*0a6a1f1dSLionel Sambuc
1521*0a6a1f1dSLionel Sambuc	vld1.8	{q0}, [r4]!	@ load input
1522*0a6a1f1dSLionel Sambuc	vld1.8	{q1}, [sp,:64]	@ load encrypted counter
1523*0a6a1f1dSLionel Sambuc	add	r8, r8, #1
1524*0a6a1f1dSLionel Sambuc#ifdef __ARMEL__
1525*0a6a1f1dSLionel Sambuc	rev	r0, r8
1526*0a6a1f1dSLionel Sambuc	str	r0, [sp, #0x1c]		@ next counter value
1527*0a6a1f1dSLionel Sambuc#else
1528*0a6a1f1dSLionel Sambuc	str	r8, [sp, #0x1c]		@ next counter value
1529*0a6a1f1dSLionel Sambuc#endif
1530*0a6a1f1dSLionel Sambuc	veor	q0,q0,q1
1531*0a6a1f1dSLionel Sambuc	vst1.8	{q0}, [r5]!	@ store output
1532*0a6a1f1dSLionel Sambuc	subs	r6, r6, #1
1533*0a6a1f1dSLionel Sambuc	bne	.Lctr_enc_short_loop
1534*0a6a1f1dSLionel Sambuc
1535*0a6a1f1dSLionel Sambuc	vmov.i32	q0, #0
1536*0a6a1f1dSLionel Sambuc	vmov.i32	q1, #0
1537*0a6a1f1dSLionel Sambuc	vstmia		sp!, {q0-q1}
1538*0a6a1f1dSLionel Sambuc
1539*0a6a1f1dSLionel Sambuc	ldmia	sp!, {r4-r8, pc}
1540*0a6a1f1dSLionel Sambuc.size	bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
1541*0a6a1f1dSLionel Sambuc.globl	bsaes_xts_encrypt
1542*0a6a1f1dSLionel Sambuc.type	bsaes_xts_encrypt,%function
1543*0a6a1f1dSLionel Sambuc.align	4
1544*0a6a1f1dSLionel Sambucbsaes_xts_encrypt:
1545*0a6a1f1dSLionel Sambuc	mov	ip, sp
1546*0a6a1f1dSLionel Sambuc	stmdb	sp!, {r4-r10, lr}		@ 0x20
1547*0a6a1f1dSLionel Sambuc	VFP_ABI_PUSH
1548*0a6a1f1dSLionel Sambuc	mov	r6, sp				@ future r3
1549*0a6a1f1dSLionel Sambuc
1550*0a6a1f1dSLionel Sambuc	mov	r7, r0
1551*0a6a1f1dSLionel Sambuc	mov	r8, r1
1552*0a6a1f1dSLionel Sambuc	mov	r9, r2
1553*0a6a1f1dSLionel Sambuc	mov	r10, r3
1554*0a6a1f1dSLionel Sambuc
1555*0a6a1f1dSLionel Sambuc	sub	r0, sp, #0x10			@ 0x10
1556*0a6a1f1dSLionel Sambuc	bic	r0, #0xf			@ align at 16 bytes
1557*0a6a1f1dSLionel Sambuc	mov	sp, r0
1558*0a6a1f1dSLionel Sambuc
1559*0a6a1f1dSLionel Sambuc#ifdef	XTS_CHAIN_TWEAK
1560*0a6a1f1dSLionel Sambuc	ldr	r0, [ip]			@ pointer to input tweak
1561*0a6a1f1dSLionel Sambuc#else
1562*0a6a1f1dSLionel Sambuc	@ generate initial tweak
1563*0a6a1f1dSLionel Sambuc	ldr	r0, [ip, #4]			@ iv[]
1564*0a6a1f1dSLionel Sambuc	mov	r1, sp
1565*0a6a1f1dSLionel Sambuc	ldr	r2, [ip, #0]			@ key2
1566*0a6a1f1dSLionel Sambuc	bl	AES_encrypt
1567*0a6a1f1dSLionel Sambuc	mov	r0,sp				@ pointer to initial tweak
1568*0a6a1f1dSLionel Sambuc#endif
1569*0a6a1f1dSLionel Sambuc
1570*0a6a1f1dSLionel Sambuc	ldr	r1, [r10, #240]		@ get # of rounds
1571*0a6a1f1dSLionel Sambuc	mov	r3, r6
1572*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1573*0a6a1f1dSLionel Sambuc	@ allocate the key schedule on the stack
1574*0a6a1f1dSLionel Sambuc	sub	r12, sp, r1, lsl#7		@ 128 bytes per inner round key
1575*0a6a1f1dSLionel Sambuc	@ add	r12, #96			@ size of bit-sliced key schedule
1576*0a6a1f1dSLionel Sambuc	sub	r12, #48			@ place for tweak[9]
1577*0a6a1f1dSLionel Sambuc
1578*0a6a1f1dSLionel Sambuc	@ populate the key schedule
1579*0a6a1f1dSLionel Sambuc	mov	r4, r10			@ pass key
1580*0a6a1f1dSLionel Sambuc	mov	r5, r1			@ pass # of rounds
1581*0a6a1f1dSLionel Sambuc	mov	sp, r12
1582*0a6a1f1dSLionel Sambuc	add	r12, #0x90			@ pass key schedule
1583*0a6a1f1dSLionel Sambuc	bl	_bsaes_key_convert
1584*0a6a1f1dSLionel Sambuc	veor	q7, q7, q15	@ fix up last round key
1585*0a6a1f1dSLionel Sambuc	vstmia	r12, {q7}			@ save last round key
1586*0a6a1f1dSLionel Sambuc#else
1587*0a6a1f1dSLionel Sambuc	ldr	r12, [r10, #244]
1588*0a6a1f1dSLionel Sambuc	eors	r12, #1
1589*0a6a1f1dSLionel Sambuc	beq	0f
1590*0a6a1f1dSLionel Sambuc
1591*0a6a1f1dSLionel Sambuc	str	r12, [r10, #244]
1592*0a6a1f1dSLionel Sambuc	mov	r4, r10			@ pass key
1593*0a6a1f1dSLionel Sambuc	mov	r5, r1			@ pass # of rounds
1594*0a6a1f1dSLionel Sambuc	add	r12, r10, #248			@ pass key schedule
1595*0a6a1f1dSLionel Sambuc	bl	_bsaes_key_convert
1596*0a6a1f1dSLionel Sambuc	veor	q7, q7, q15	@ fix up last round key
1597*0a6a1f1dSLionel Sambuc	vstmia	r12, {q7}
1598*0a6a1f1dSLionel Sambuc
1599*0a6a1f1dSLionel Sambuc.align	2
1600*0a6a1f1dSLionel Sambuc0:	sub	sp, #0x90			@ place for tweak[9]
1601*0a6a1f1dSLionel Sambuc#endif
1602*0a6a1f1dSLionel Sambuc
1603*0a6a1f1dSLionel Sambuc	vld1.8	{q8}, [r0]			@ initial tweak
1604*0a6a1f1dSLionel Sambuc	adr	r2, .Lxts_magic
1605*0a6a1f1dSLionel Sambuc
1606*0a6a1f1dSLionel Sambuc	subs	r9, #0x80
1607*0a6a1f1dSLionel Sambuc	blo	.Lxts_enc_short
1608*0a6a1f1dSLionel Sambuc	b	.Lxts_enc_loop
1609*0a6a1f1dSLionel Sambuc
1610*0a6a1f1dSLionel Sambuc.align	4
1611*0a6a1f1dSLionel Sambuc.Lxts_enc_loop:
1612*0a6a1f1dSLionel Sambuc	vldmia		r2, {q5}	@ load XTS magic
1613*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q8, #63
1614*0a6a1f1dSLionel Sambuc	mov		r0, sp
1615*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
1616*0a6a1f1dSLionel Sambuc	vadd.u64	q9, q8, q8
1617*0a6a1f1dSLionel Sambuc	vst1.64		{q8}, [r0,:128]!
1618*0a6a1f1dSLionel Sambuc	vswp		d13,d12
1619*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q9, #63
1620*0a6a1f1dSLionel Sambuc	veor		q9, q9, q6
1621*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
1622*0a6a1f1dSLionel Sambuc	vadd.u64	q10, q9, q9
1623*0a6a1f1dSLionel Sambuc	vst1.64		{q9}, [r0,:128]!
1624*0a6a1f1dSLionel Sambuc	vswp		d15,d14
1625*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q10, #63
1626*0a6a1f1dSLionel Sambuc	veor		q10, q10, q7
1627*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
1628*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [r7]!
1629*0a6a1f1dSLionel Sambuc	vadd.u64	q11, q10, q10
1630*0a6a1f1dSLionel Sambuc	vst1.64		{q10}, [r0,:128]!
1631*0a6a1f1dSLionel Sambuc	vswp		d13,d12
1632*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q11, #63
1633*0a6a1f1dSLionel Sambuc	veor		q11, q11, q6
1634*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
1635*0a6a1f1dSLionel Sambuc	vld1.8		{q1}, [r7]!
1636*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1637*0a6a1f1dSLionel Sambuc	vadd.u64	q12, q11, q11
1638*0a6a1f1dSLionel Sambuc	vst1.64		{q11}, [r0,:128]!
1639*0a6a1f1dSLionel Sambuc	vswp		d15,d14
1640*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q12, #63
1641*0a6a1f1dSLionel Sambuc	veor		q12, q12, q7
1642*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
1643*0a6a1f1dSLionel Sambuc	vld1.8		{q2}, [r7]!
1644*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1645*0a6a1f1dSLionel Sambuc	vadd.u64	q13, q12, q12
1646*0a6a1f1dSLionel Sambuc	vst1.64		{q12}, [r0,:128]!
1647*0a6a1f1dSLionel Sambuc	vswp		d13,d12
1648*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q13, #63
1649*0a6a1f1dSLionel Sambuc	veor		q13, q13, q6
1650*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
1651*0a6a1f1dSLionel Sambuc	vld1.8		{q3}, [r7]!
1652*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
1653*0a6a1f1dSLionel Sambuc	vadd.u64	q14, q13, q13
1654*0a6a1f1dSLionel Sambuc	vst1.64		{q13}, [r0,:128]!
1655*0a6a1f1dSLionel Sambuc	vswp		d15,d14
1656*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q14, #63
1657*0a6a1f1dSLionel Sambuc	veor		q14, q14, q7
1658*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
1659*0a6a1f1dSLionel Sambuc	vld1.8		{q4}, [r7]!
1660*0a6a1f1dSLionel Sambuc	veor		q3, q3, q11
1661*0a6a1f1dSLionel Sambuc	vadd.u64	q15, q14, q14
1662*0a6a1f1dSLionel Sambuc	vst1.64		{q14}, [r0,:128]!
1663*0a6a1f1dSLionel Sambuc	vswp		d13,d12
1664*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q15, #63
1665*0a6a1f1dSLionel Sambuc	veor		q15, q15, q6
1666*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
1667*0a6a1f1dSLionel Sambuc	vld1.8		{q5}, [r7]!
1668*0a6a1f1dSLionel Sambuc	veor		q4, q4, q12
1669*0a6a1f1dSLionel Sambuc	vadd.u64	q8, q15, q15
1670*0a6a1f1dSLionel Sambuc	vst1.64		{q15}, [r0,:128]!
1671*0a6a1f1dSLionel Sambuc	vswp		d15,d14
1672*0a6a1f1dSLionel Sambuc	veor		q8, q8, q7
1673*0a6a1f1dSLionel Sambuc	vst1.64		{q8}, [r0,:128]		@ next round tweak
1674*0a6a1f1dSLionel Sambuc
1675*0a6a1f1dSLionel Sambuc	vld1.8		{q6-q7}, [r7]!
1676*0a6a1f1dSLionel Sambuc	veor		q5, q5, q13
1677*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1678*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
1679*0a6a1f1dSLionel Sambuc#else
1680*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
1681*0a6a1f1dSLionel Sambuc#endif
1682*0a6a1f1dSLionel Sambuc	veor		q6, q6, q14
1683*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
1684*0a6a1f1dSLionel Sambuc	veor		q7, q7, q15
1685*0a6a1f1dSLionel Sambuc	mov		r0, sp
1686*0a6a1f1dSLionel Sambuc
1687*0a6a1f1dSLionel Sambuc	bl		_bsaes_encrypt8
1688*0a6a1f1dSLionel Sambuc
1689*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
1690*0a6a1f1dSLionel Sambuc	vld1.64		{q10-q11}, [r0,:128]!
1691*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1692*0a6a1f1dSLionel Sambuc	vld1.64		{q12-q13}, [r0,:128]!
1693*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1694*0a6a1f1dSLionel Sambuc	veor		q8, q4, q10
1695*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
1696*0a6a1f1dSLionel Sambuc	veor		q9, q6, q11
1697*0a6a1f1dSLionel Sambuc	vld1.64		{q14-q15}, [r0,:128]!
1698*0a6a1f1dSLionel Sambuc	veor		q10, q3, q12
1699*0a6a1f1dSLionel Sambuc	vst1.8		{q8-q9}, [r8]!
1700*0a6a1f1dSLionel Sambuc	veor		q11, q7, q13
1701*0a6a1f1dSLionel Sambuc	veor		q12, q2, q14
1702*0a6a1f1dSLionel Sambuc	vst1.8		{q10-q11}, [r8]!
1703*0a6a1f1dSLionel Sambuc	veor		q13, q5, q15
1704*0a6a1f1dSLionel Sambuc	vst1.8		{q12-q13}, [r8]!
1705*0a6a1f1dSLionel Sambuc
1706*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
1707*0a6a1f1dSLionel Sambuc
1708*0a6a1f1dSLionel Sambuc	subs		r9, #0x80
1709*0a6a1f1dSLionel Sambuc	bpl		.Lxts_enc_loop
1710*0a6a1f1dSLionel Sambuc
1711*0a6a1f1dSLionel Sambuc.Lxts_enc_short:
1712*0a6a1f1dSLionel Sambuc	adds		r9, #0x70
1713*0a6a1f1dSLionel Sambuc	bmi		.Lxts_enc_done
1714*0a6a1f1dSLionel Sambuc
1715*0a6a1f1dSLionel Sambuc	vldmia		r2, {q5}	@ load XTS magic
1716*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q8, #63
1717*0a6a1f1dSLionel Sambuc	mov		r0, sp
1718*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
1719*0a6a1f1dSLionel Sambuc	vadd.u64	q9, q8, q8
1720*0a6a1f1dSLionel Sambuc	vst1.64		{q8}, [r0,:128]!
1721*0a6a1f1dSLionel Sambuc	vswp		d15,d14
1722*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q9, #63
1723*0a6a1f1dSLionel Sambuc	veor		q9, q9, q7
1724*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
1725*0a6a1f1dSLionel Sambuc	vadd.u64	q10, q9, q9
1726*0a6a1f1dSLionel Sambuc	vst1.64		{q9}, [r0,:128]!
1727*0a6a1f1dSLionel Sambuc	vswp		d13,d12
1728*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q10, #63
1729*0a6a1f1dSLionel Sambuc	veor		q10, q10, q6
1730*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
1731*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [r7]!
1732*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
1733*0a6a1f1dSLionel Sambuc	bmi		.Lxts_enc_1
1734*0a6a1f1dSLionel Sambuc	vadd.u64	q11, q10, q10
1735*0a6a1f1dSLionel Sambuc	vst1.64		{q10}, [r0,:128]!
1736*0a6a1f1dSLionel Sambuc	vswp		d15,d14
1737*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q11, #63
1738*0a6a1f1dSLionel Sambuc	veor		q11, q11, q7
1739*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
1740*0a6a1f1dSLionel Sambuc	vld1.8		{q1}, [r7]!
1741*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
1742*0a6a1f1dSLionel Sambuc	bmi		.Lxts_enc_2
1743*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1744*0a6a1f1dSLionel Sambuc	vadd.u64	q12, q11, q11
1745*0a6a1f1dSLionel Sambuc	vst1.64		{q11}, [r0,:128]!
1746*0a6a1f1dSLionel Sambuc	vswp		d13,d12
1747*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q12, #63
1748*0a6a1f1dSLionel Sambuc	veor		q12, q12, q6
1749*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
1750*0a6a1f1dSLionel Sambuc	vld1.8		{q2}, [r7]!
1751*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
1752*0a6a1f1dSLionel Sambuc	bmi		.Lxts_enc_3
1753*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1754*0a6a1f1dSLionel Sambuc	vadd.u64	q13, q12, q12
1755*0a6a1f1dSLionel Sambuc	vst1.64		{q12}, [r0,:128]!
1756*0a6a1f1dSLionel Sambuc	vswp		d15,d14
1757*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q13, #63
1758*0a6a1f1dSLionel Sambuc	veor		q13, q13, q7
1759*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
1760*0a6a1f1dSLionel Sambuc	vld1.8		{q3}, [r7]!
1761*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
1762*0a6a1f1dSLionel Sambuc	bmi		.Lxts_enc_4
1763*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
1764*0a6a1f1dSLionel Sambuc	vadd.u64	q14, q13, q13
1765*0a6a1f1dSLionel Sambuc	vst1.64		{q13}, [r0,:128]!
1766*0a6a1f1dSLionel Sambuc	vswp		d13,d12
1767*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q14, #63
1768*0a6a1f1dSLionel Sambuc	veor		q14, q14, q6
1769*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
1770*0a6a1f1dSLionel Sambuc	vld1.8		{q4}, [r7]!
1771*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
1772*0a6a1f1dSLionel Sambuc	bmi		.Lxts_enc_5
1773*0a6a1f1dSLionel Sambuc	veor		q3, q3, q11
1774*0a6a1f1dSLionel Sambuc	vadd.u64	q15, q14, q14
1775*0a6a1f1dSLionel Sambuc	vst1.64		{q14}, [r0,:128]!
1776*0a6a1f1dSLionel Sambuc	vswp		d15,d14
1777*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q15, #63
1778*0a6a1f1dSLionel Sambuc	veor		q15, q15, q7
1779*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
1780*0a6a1f1dSLionel Sambuc	vld1.8		{q5}, [r7]!
1781*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
1782*0a6a1f1dSLionel Sambuc	bmi		.Lxts_enc_6
1783*0a6a1f1dSLionel Sambuc	veor		q4, q4, q12
1784*0a6a1f1dSLionel Sambuc	sub		r9, #0x10
1785*0a6a1f1dSLionel Sambuc	vst1.64		{q15}, [r0,:128]		@ next round tweak
1786*0a6a1f1dSLionel Sambuc
1787*0a6a1f1dSLionel Sambuc	vld1.8		{q6}, [r7]!
1788*0a6a1f1dSLionel Sambuc	veor		q5, q5, q13
1789*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1790*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
1791*0a6a1f1dSLionel Sambuc#else
1792*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
1793*0a6a1f1dSLionel Sambuc#endif
1794*0a6a1f1dSLionel Sambuc	veor		q6, q6, q14
1795*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
1796*0a6a1f1dSLionel Sambuc	mov		r0, sp
1797*0a6a1f1dSLionel Sambuc
1798*0a6a1f1dSLionel Sambuc	bl		_bsaes_encrypt8
1799*0a6a1f1dSLionel Sambuc
1800*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
1801*0a6a1f1dSLionel Sambuc	vld1.64		{q10-q11}, [r0,:128]!
1802*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1803*0a6a1f1dSLionel Sambuc	vld1.64		{q12-q13}, [r0,:128]!
1804*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1805*0a6a1f1dSLionel Sambuc	veor		q8, q4, q10
1806*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
1807*0a6a1f1dSLionel Sambuc	veor		q9, q6, q11
1808*0a6a1f1dSLionel Sambuc	vld1.64		{q14}, [r0,:128]!
1809*0a6a1f1dSLionel Sambuc	veor		q10, q3, q12
1810*0a6a1f1dSLionel Sambuc	vst1.8		{q8-q9}, [r8]!
1811*0a6a1f1dSLionel Sambuc	veor		q11, q7, q13
1812*0a6a1f1dSLionel Sambuc	veor		q12, q2, q14
1813*0a6a1f1dSLionel Sambuc	vst1.8		{q10-q11}, [r8]!
1814*0a6a1f1dSLionel Sambuc	vst1.8		{q12}, [r8]!
1815*0a6a1f1dSLionel Sambuc
1816*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
1817*0a6a1f1dSLionel Sambuc	b		.Lxts_enc_done
1818*0a6a1f1dSLionel Sambuc.align	4
1819*0a6a1f1dSLionel Sambuc.Lxts_enc_6:
1820*0a6a1f1dSLionel Sambuc	vst1.64		{q14}, [r0,:128]		@ next round tweak
1821*0a6a1f1dSLionel Sambuc
1822*0a6a1f1dSLionel Sambuc	veor		q4, q4, q12
1823*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1824*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
1825*0a6a1f1dSLionel Sambuc#else
1826*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
1827*0a6a1f1dSLionel Sambuc#endif
1828*0a6a1f1dSLionel Sambuc	veor		q5, q5, q13
1829*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
1830*0a6a1f1dSLionel Sambuc	mov		r0, sp
1831*0a6a1f1dSLionel Sambuc
1832*0a6a1f1dSLionel Sambuc	bl		_bsaes_encrypt8
1833*0a6a1f1dSLionel Sambuc
1834*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
1835*0a6a1f1dSLionel Sambuc	vld1.64		{q10-q11}, [r0,:128]!
1836*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1837*0a6a1f1dSLionel Sambuc	vld1.64		{q12-q13}, [r0,:128]!
1838*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1839*0a6a1f1dSLionel Sambuc	veor		q8, q4, q10
1840*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
1841*0a6a1f1dSLionel Sambuc	veor		q9, q6, q11
1842*0a6a1f1dSLionel Sambuc	veor		q10, q3, q12
1843*0a6a1f1dSLionel Sambuc	vst1.8		{q8-q9}, [r8]!
1844*0a6a1f1dSLionel Sambuc	veor		q11, q7, q13
1845*0a6a1f1dSLionel Sambuc	vst1.8		{q10-q11}, [r8]!
1846*0a6a1f1dSLionel Sambuc
1847*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
1848*0a6a1f1dSLionel Sambuc	b		.Lxts_enc_done
1849*0a6a1f1dSLionel Sambuc
1850*0a6a1f1dSLionel Sambuc@ put this in range for both ARM and Thumb mode adr instructions
1851*0a6a1f1dSLionel Sambuc.align	5
1852*0a6a1f1dSLionel Sambuc.Lxts_magic:
1853*0a6a1f1dSLionel Sambuc	.quad	1, 0x87
1854*0a6a1f1dSLionel Sambuc
1855*0a6a1f1dSLionel Sambuc.align	5
1856*0a6a1f1dSLionel Sambuc.Lxts_enc_5:
1857*0a6a1f1dSLionel Sambuc	vst1.64		{q13}, [r0,:128]		@ next round tweak
1858*0a6a1f1dSLionel Sambuc
1859*0a6a1f1dSLionel Sambuc	veor		q3, q3, q11
1860*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1861*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
1862*0a6a1f1dSLionel Sambuc#else
1863*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
1864*0a6a1f1dSLionel Sambuc#endif
1865*0a6a1f1dSLionel Sambuc	veor		q4, q4, q12
1866*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
1867*0a6a1f1dSLionel Sambuc	mov		r0, sp
1868*0a6a1f1dSLionel Sambuc
1869*0a6a1f1dSLionel Sambuc	bl		_bsaes_encrypt8
1870*0a6a1f1dSLionel Sambuc
1871*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
1872*0a6a1f1dSLionel Sambuc	vld1.64		{q10-q11}, [r0,:128]!
1873*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1874*0a6a1f1dSLionel Sambuc	vld1.64		{q12}, [r0,:128]!
1875*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1876*0a6a1f1dSLionel Sambuc	veor		q8, q4, q10
1877*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
1878*0a6a1f1dSLionel Sambuc	veor		q9, q6, q11
1879*0a6a1f1dSLionel Sambuc	veor		q10, q3, q12
1880*0a6a1f1dSLionel Sambuc	vst1.8		{q8-q9}, [r8]!
1881*0a6a1f1dSLionel Sambuc	vst1.8		{q10}, [r8]!
1882*0a6a1f1dSLionel Sambuc
1883*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
1884*0a6a1f1dSLionel Sambuc	b		.Lxts_enc_done
1885*0a6a1f1dSLionel Sambuc.align	4
1886*0a6a1f1dSLionel Sambuc.Lxts_enc_4:
1887*0a6a1f1dSLionel Sambuc	vst1.64		{q12}, [r0,:128]		@ next round tweak
1888*0a6a1f1dSLionel Sambuc
1889*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
1890*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1891*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
1892*0a6a1f1dSLionel Sambuc#else
1893*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
1894*0a6a1f1dSLionel Sambuc#endif
1895*0a6a1f1dSLionel Sambuc	veor		q3, q3, q11
1896*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
1897*0a6a1f1dSLionel Sambuc	mov		r0, sp
1898*0a6a1f1dSLionel Sambuc
1899*0a6a1f1dSLionel Sambuc	bl		_bsaes_encrypt8
1900*0a6a1f1dSLionel Sambuc
1901*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
1902*0a6a1f1dSLionel Sambuc	vld1.64		{q10-q11}, [r0,:128]!
1903*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1904*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1905*0a6a1f1dSLionel Sambuc	veor		q8, q4, q10
1906*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
1907*0a6a1f1dSLionel Sambuc	veor		q9, q6, q11
1908*0a6a1f1dSLionel Sambuc	vst1.8		{q8-q9}, [r8]!
1909*0a6a1f1dSLionel Sambuc
1910*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
1911*0a6a1f1dSLionel Sambuc	b		.Lxts_enc_done
1912*0a6a1f1dSLionel Sambuc.align	4
1913*0a6a1f1dSLionel Sambuc.Lxts_enc_3:
1914*0a6a1f1dSLionel Sambuc	vst1.64		{q11}, [r0,:128]		@ next round tweak
1915*0a6a1f1dSLionel Sambuc
1916*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1917*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1918*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
1919*0a6a1f1dSLionel Sambuc#else
1920*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
1921*0a6a1f1dSLionel Sambuc#endif
1922*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
1923*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
1924*0a6a1f1dSLionel Sambuc	mov		r0, sp
1925*0a6a1f1dSLionel Sambuc
1926*0a6a1f1dSLionel Sambuc	bl		_bsaes_encrypt8
1927*0a6a1f1dSLionel Sambuc
1928*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
1929*0a6a1f1dSLionel Sambuc	vld1.64		{q10}, [r0,:128]!
1930*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1931*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1932*0a6a1f1dSLionel Sambuc	veor		q8, q4, q10
1933*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
1934*0a6a1f1dSLionel Sambuc	vst1.8		{q8}, [r8]!
1935*0a6a1f1dSLionel Sambuc
1936*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
1937*0a6a1f1dSLionel Sambuc	b		.Lxts_enc_done
1938*0a6a1f1dSLionel Sambuc.align	4
1939*0a6a1f1dSLionel Sambuc.Lxts_enc_2:
1940*0a6a1f1dSLionel Sambuc	vst1.64		{q10}, [r0,:128]		@ next round tweak
1941*0a6a1f1dSLionel Sambuc
1942*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1943*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
1944*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
1945*0a6a1f1dSLionel Sambuc#else
1946*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
1947*0a6a1f1dSLionel Sambuc#endif
1948*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1949*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
1950*0a6a1f1dSLionel Sambuc	mov		r0, sp
1951*0a6a1f1dSLionel Sambuc
1952*0a6a1f1dSLionel Sambuc	bl		_bsaes_encrypt8
1953*0a6a1f1dSLionel Sambuc
1954*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
1955*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1956*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
1957*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
1958*0a6a1f1dSLionel Sambuc
1959*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
1960*0a6a1f1dSLionel Sambuc	b		.Lxts_enc_done
1961*0a6a1f1dSLionel Sambuc.align	4
1962*0a6a1f1dSLionel Sambuc.Lxts_enc_1:
1963*0a6a1f1dSLionel Sambuc	mov		r0, sp
1964*0a6a1f1dSLionel Sambuc	veor		q0, q8
1965*0a6a1f1dSLionel Sambuc	mov		r1, sp
1966*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [sp,:128]
1967*0a6a1f1dSLionel Sambuc	mov		r2, r10
1968*0a6a1f1dSLionel Sambuc	mov		r4, r3				@ preserve fp
1969*0a6a1f1dSLionel Sambuc
1970*0a6a1f1dSLionel Sambuc	bl		AES_encrypt
1971*0a6a1f1dSLionel Sambuc
1972*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [sp,:128]
1973*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1974*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [r8]!
1975*0a6a1f1dSLionel Sambuc	mov		r3, r4
1976*0a6a1f1dSLionel Sambuc
1977*0a6a1f1dSLionel Sambuc	vmov		q8, q9		@ next round tweak
1978*0a6a1f1dSLionel Sambuc
1979*0a6a1f1dSLionel Sambuc.Lxts_enc_done:
1980*0a6a1f1dSLionel Sambuc#ifndef	XTS_CHAIN_TWEAK
1981*0a6a1f1dSLionel Sambuc	adds		r9, #0x10
1982*0a6a1f1dSLionel Sambuc	beq		.Lxts_enc_ret
1983*0a6a1f1dSLionel Sambuc	sub		r6, r8, #0x10
1984*0a6a1f1dSLionel Sambuc
1985*0a6a1f1dSLionel Sambuc.Lxts_enc_steal:
1986*0a6a1f1dSLionel Sambuc	ldrb		r0, [r7], #1
1987*0a6a1f1dSLionel Sambuc	ldrb		r1, [r8, #-0x10]
1988*0a6a1f1dSLionel Sambuc	strb		r0, [r8, #-0x10]
1989*0a6a1f1dSLionel Sambuc	strb		r1, [r8], #1
1990*0a6a1f1dSLionel Sambuc
1991*0a6a1f1dSLionel Sambuc	subs		r9, #1
1992*0a6a1f1dSLionel Sambuc	bhi		.Lxts_enc_steal
1993*0a6a1f1dSLionel Sambuc
1994*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [r6]
1995*0a6a1f1dSLionel Sambuc	mov		r0, sp
1996*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
1997*0a6a1f1dSLionel Sambuc	mov		r1, sp
1998*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [sp,:128]
1999*0a6a1f1dSLionel Sambuc	mov		r2, r10
2000*0a6a1f1dSLionel Sambuc	mov		r4, r3			@ preserve fp
2001*0a6a1f1dSLionel Sambuc
2002*0a6a1f1dSLionel Sambuc	bl		AES_encrypt
2003*0a6a1f1dSLionel Sambuc
2004*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [sp,:128]
2005*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2006*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [r6]
2007*0a6a1f1dSLionel Sambuc	mov		r3, r4
2008*0a6a1f1dSLionel Sambuc#endif
2009*0a6a1f1dSLionel Sambuc
2010*0a6a1f1dSLionel Sambuc.Lxts_enc_ret:
2011*0a6a1f1dSLionel Sambuc	bic		r0, r3, #0xf
2012*0a6a1f1dSLionel Sambuc	vmov.i32	q0, #0
2013*0a6a1f1dSLionel Sambuc	vmov.i32	q1, #0
2014*0a6a1f1dSLionel Sambuc#ifdef	XTS_CHAIN_TWEAK
2015*0a6a1f1dSLionel Sambuc	ldr		r1, [r3, #0x20+VFP_ABI_FRAME]	@ chain tweak
2016*0a6a1f1dSLionel Sambuc#endif
2017*0a6a1f1dSLionel Sambuc.Lxts_enc_bzero:				@ wipe key schedule [if any]
2018*0a6a1f1dSLionel Sambuc	vstmia		sp!, {q0-q1}
2019*0a6a1f1dSLionel Sambuc	cmp		sp, r0
2020*0a6a1f1dSLionel Sambuc	bne		.Lxts_enc_bzero
2021*0a6a1f1dSLionel Sambuc
2022*0a6a1f1dSLionel Sambuc	mov		sp, r3
2023*0a6a1f1dSLionel Sambuc#ifdef	XTS_CHAIN_TWEAK
2024*0a6a1f1dSLionel Sambuc	vst1.8		{q8}, [r1]
2025*0a6a1f1dSLionel Sambuc#endif
2026*0a6a1f1dSLionel Sambuc	VFP_ABI_POP
2027*0a6a1f1dSLionel Sambuc	ldmia		sp!, {r4-r10, pc}	@ return
2028*0a6a1f1dSLionel Sambuc
2029*0a6a1f1dSLionel Sambuc.size	bsaes_xts_encrypt,.-bsaes_xts_encrypt
2030*0a6a1f1dSLionel Sambuc
2031*0a6a1f1dSLionel Sambuc.globl	bsaes_xts_decrypt
2032*0a6a1f1dSLionel Sambuc.type	bsaes_xts_decrypt,%function
2033*0a6a1f1dSLionel Sambuc.align	4
2034*0a6a1f1dSLionel Sambucbsaes_xts_decrypt:
2035*0a6a1f1dSLionel Sambuc	mov	ip, sp
2036*0a6a1f1dSLionel Sambuc	stmdb	sp!, {r4-r10, lr}		@ 0x20
2037*0a6a1f1dSLionel Sambuc	VFP_ABI_PUSH
2038*0a6a1f1dSLionel Sambuc	mov	r6, sp				@ future r3
2039*0a6a1f1dSLionel Sambuc
2040*0a6a1f1dSLionel Sambuc	mov	r7, r0
2041*0a6a1f1dSLionel Sambuc	mov	r8, r1
2042*0a6a1f1dSLionel Sambuc	mov	r9, r2
2043*0a6a1f1dSLionel Sambuc	mov	r10, r3
2044*0a6a1f1dSLionel Sambuc
2045*0a6a1f1dSLionel Sambuc	sub	r0, sp, #0x10			@ 0x10
2046*0a6a1f1dSLionel Sambuc	bic	r0, #0xf			@ align at 16 bytes
2047*0a6a1f1dSLionel Sambuc	mov	sp, r0
2048*0a6a1f1dSLionel Sambuc
2049*0a6a1f1dSLionel Sambuc#ifdef	XTS_CHAIN_TWEAK
2050*0a6a1f1dSLionel Sambuc	ldr	r0, [ip]			@ pointer to input tweak
2051*0a6a1f1dSLionel Sambuc#else
2052*0a6a1f1dSLionel Sambuc	@ generate initial tweak
2053*0a6a1f1dSLionel Sambuc	ldr	r0, [ip, #4]			@ iv[]
2054*0a6a1f1dSLionel Sambuc	mov	r1, sp
2055*0a6a1f1dSLionel Sambuc	ldr	r2, [ip, #0]			@ key2
2056*0a6a1f1dSLionel Sambuc	bl	AES_encrypt
2057*0a6a1f1dSLionel Sambuc	mov	r0, sp				@ pointer to initial tweak
2058*0a6a1f1dSLionel Sambuc#endif
2059*0a6a1f1dSLionel Sambuc
2060*0a6a1f1dSLionel Sambuc	ldr	r1, [r10, #240]		@ get # of rounds
2061*0a6a1f1dSLionel Sambuc	mov	r3, r6
2062*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
2063*0a6a1f1dSLionel Sambuc	@ allocate the key schedule on the stack
2064*0a6a1f1dSLionel Sambuc	sub	r12, sp, r1, lsl#7		@ 128 bytes per inner round key
2065*0a6a1f1dSLionel Sambuc	@ add	r12, #96			@ size of bit-sliced key schedule
2066*0a6a1f1dSLionel Sambuc	sub	r12, #48			@ place for tweak[9]
2067*0a6a1f1dSLionel Sambuc
2068*0a6a1f1dSLionel Sambuc	@ populate the key schedule
2069*0a6a1f1dSLionel Sambuc	mov	r4, r10			@ pass key
2070*0a6a1f1dSLionel Sambuc	mov	r5, r1			@ pass # of rounds
2071*0a6a1f1dSLionel Sambuc	mov	sp, r12
2072*0a6a1f1dSLionel Sambuc	add	r12, #0x90			@ pass key schedule
2073*0a6a1f1dSLionel Sambuc	bl	_bsaes_key_convert
2074*0a6a1f1dSLionel Sambuc	add	r4, sp, #0x90
2075*0a6a1f1dSLionel Sambuc	vldmia	r4, {q6}
2076*0a6a1f1dSLionel Sambuc	vstmia	r12,  {q15}		@ save last round key
2077*0a6a1f1dSLionel Sambuc	veor	q7, q7, q6	@ fix up round 0 key
2078*0a6a1f1dSLionel Sambuc	vstmia	r4, {q7}
2079*0a6a1f1dSLionel Sambuc#else
2080*0a6a1f1dSLionel Sambuc	ldr	r12, [r10, #244]
2081*0a6a1f1dSLionel Sambuc	eors	r12, #1
2082*0a6a1f1dSLionel Sambuc	beq	0f
2083*0a6a1f1dSLionel Sambuc
2084*0a6a1f1dSLionel Sambuc	str	r12, [r10, #244]
2085*0a6a1f1dSLionel Sambuc	mov	r4, r10			@ pass key
2086*0a6a1f1dSLionel Sambuc	mov	r5, r1			@ pass # of rounds
2087*0a6a1f1dSLionel Sambuc	add	r12, r10, #248			@ pass key schedule
2088*0a6a1f1dSLionel Sambuc	bl	_bsaes_key_convert
2089*0a6a1f1dSLionel Sambuc	add	r4, r10, #248
2090*0a6a1f1dSLionel Sambuc	vldmia	r4, {q6}
2091*0a6a1f1dSLionel Sambuc	vstmia	r12,  {q15}		@ save last round key
2092*0a6a1f1dSLionel Sambuc	veor	q7, q7, q6	@ fix up round 0 key
2093*0a6a1f1dSLionel Sambuc	vstmia	r4, {q7}
2094*0a6a1f1dSLionel Sambuc
2095*0a6a1f1dSLionel Sambuc.align	2
2096*0a6a1f1dSLionel Sambuc0:	sub	sp, #0x90			@ place for tweak[9]
2097*0a6a1f1dSLionel Sambuc#endif
2098*0a6a1f1dSLionel Sambuc	vld1.8	{q8}, [r0]			@ initial tweak
2099*0a6a1f1dSLionel Sambuc	adr	r2, .Lxts_magic
2100*0a6a1f1dSLionel Sambuc
2101*0a6a1f1dSLionel Sambuc	tst	r9, #0xf			@ if not multiple of 16
2102*0a6a1f1dSLionel Sambuc	it	ne				@ Thumb2 thing, sanity check in ARM
2103*0a6a1f1dSLionel Sambuc	subne	r9, #0x10			@ subtract another 16 bytes
2104*0a6a1f1dSLionel Sambuc	subs	r9, #0x80
2105*0a6a1f1dSLionel Sambuc
2106*0a6a1f1dSLionel Sambuc	blo	.Lxts_dec_short
2107*0a6a1f1dSLionel Sambuc	b	.Lxts_dec_loop
2108*0a6a1f1dSLionel Sambuc
2109*0a6a1f1dSLionel Sambuc.align	4
2110*0a6a1f1dSLionel Sambuc.Lxts_dec_loop:
2111*0a6a1f1dSLionel Sambuc	vldmia		r2, {q5}	@ load XTS magic
2112*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q8, #63
2113*0a6a1f1dSLionel Sambuc	mov		r0, sp
2114*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
2115*0a6a1f1dSLionel Sambuc	vadd.u64	q9, q8, q8
2116*0a6a1f1dSLionel Sambuc	vst1.64		{q8}, [r0,:128]!
2117*0a6a1f1dSLionel Sambuc	vswp		d13,d12
2118*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q9, #63
2119*0a6a1f1dSLionel Sambuc	veor		q9, q9, q6
2120*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
2121*0a6a1f1dSLionel Sambuc	vadd.u64	q10, q9, q9
2122*0a6a1f1dSLionel Sambuc	vst1.64		{q9}, [r0,:128]!
2123*0a6a1f1dSLionel Sambuc	vswp		d15,d14
2124*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q10, #63
2125*0a6a1f1dSLionel Sambuc	veor		q10, q10, q7
2126*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
2127*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [r7]!
2128*0a6a1f1dSLionel Sambuc	vadd.u64	q11, q10, q10
2129*0a6a1f1dSLionel Sambuc	vst1.64		{q10}, [r0,:128]!
2130*0a6a1f1dSLionel Sambuc	vswp		d13,d12
2131*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q11, #63
2132*0a6a1f1dSLionel Sambuc	veor		q11, q11, q6
2133*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
2134*0a6a1f1dSLionel Sambuc	vld1.8		{q1}, [r7]!
2135*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2136*0a6a1f1dSLionel Sambuc	vadd.u64	q12, q11, q11
2137*0a6a1f1dSLionel Sambuc	vst1.64		{q11}, [r0,:128]!
2138*0a6a1f1dSLionel Sambuc	vswp		d15,d14
2139*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q12, #63
2140*0a6a1f1dSLionel Sambuc	veor		q12, q12, q7
2141*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
2142*0a6a1f1dSLionel Sambuc	vld1.8		{q2}, [r7]!
2143*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2144*0a6a1f1dSLionel Sambuc	vadd.u64	q13, q12, q12
2145*0a6a1f1dSLionel Sambuc	vst1.64		{q12}, [r0,:128]!
2146*0a6a1f1dSLionel Sambuc	vswp		d13,d12
2147*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q13, #63
2148*0a6a1f1dSLionel Sambuc	veor		q13, q13, q6
2149*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
2150*0a6a1f1dSLionel Sambuc	vld1.8		{q3}, [r7]!
2151*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
2152*0a6a1f1dSLionel Sambuc	vadd.u64	q14, q13, q13
2153*0a6a1f1dSLionel Sambuc	vst1.64		{q13}, [r0,:128]!
2154*0a6a1f1dSLionel Sambuc	vswp		d15,d14
2155*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q14, #63
2156*0a6a1f1dSLionel Sambuc	veor		q14, q14, q7
2157*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
2158*0a6a1f1dSLionel Sambuc	vld1.8		{q4}, [r7]!
2159*0a6a1f1dSLionel Sambuc	veor		q3, q3, q11
2160*0a6a1f1dSLionel Sambuc	vadd.u64	q15, q14, q14
2161*0a6a1f1dSLionel Sambuc	vst1.64		{q14}, [r0,:128]!
2162*0a6a1f1dSLionel Sambuc	vswp		d13,d12
2163*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q15, #63
2164*0a6a1f1dSLionel Sambuc	veor		q15, q15, q6
2165*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
2166*0a6a1f1dSLionel Sambuc	vld1.8		{q5}, [r7]!
2167*0a6a1f1dSLionel Sambuc	veor		q4, q4, q12
2168*0a6a1f1dSLionel Sambuc	vadd.u64	q8, q15, q15
2169*0a6a1f1dSLionel Sambuc	vst1.64		{q15}, [r0,:128]!
2170*0a6a1f1dSLionel Sambuc	vswp		d15,d14
2171*0a6a1f1dSLionel Sambuc	veor		q8, q8, q7
2172*0a6a1f1dSLionel Sambuc	vst1.64		{q8}, [r0,:128]		@ next round tweak
2173*0a6a1f1dSLionel Sambuc
2174*0a6a1f1dSLionel Sambuc	vld1.8		{q6-q7}, [r7]!
2175*0a6a1f1dSLionel Sambuc	veor		q5, q5, q13
2176*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
2177*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
2178*0a6a1f1dSLionel Sambuc#else
2179*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
2180*0a6a1f1dSLionel Sambuc#endif
2181*0a6a1f1dSLionel Sambuc	veor		q6, q6, q14
2182*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
2183*0a6a1f1dSLionel Sambuc	veor		q7, q7, q15
2184*0a6a1f1dSLionel Sambuc	mov		r0, sp
2185*0a6a1f1dSLionel Sambuc
2186*0a6a1f1dSLionel Sambuc	bl		_bsaes_decrypt8
2187*0a6a1f1dSLionel Sambuc
2188*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
2189*0a6a1f1dSLionel Sambuc	vld1.64		{q10-q11}, [r0,:128]!
2190*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2191*0a6a1f1dSLionel Sambuc	vld1.64		{q12-q13}, [r0,:128]!
2192*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2193*0a6a1f1dSLionel Sambuc	veor		q8, q6, q10
2194*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
2195*0a6a1f1dSLionel Sambuc	veor		q9, q4, q11
2196*0a6a1f1dSLionel Sambuc	vld1.64		{q14-q15}, [r0,:128]!
2197*0a6a1f1dSLionel Sambuc	veor		q10, q2, q12
2198*0a6a1f1dSLionel Sambuc	vst1.8		{q8-q9}, [r8]!
2199*0a6a1f1dSLionel Sambuc	veor		q11, q7, q13
2200*0a6a1f1dSLionel Sambuc	veor		q12, q3, q14
2201*0a6a1f1dSLionel Sambuc	vst1.8		{q10-q11}, [r8]!
2202*0a6a1f1dSLionel Sambuc	veor		q13, q5, q15
2203*0a6a1f1dSLionel Sambuc	vst1.8		{q12-q13}, [r8]!
2204*0a6a1f1dSLionel Sambuc
2205*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
2206*0a6a1f1dSLionel Sambuc
2207*0a6a1f1dSLionel Sambuc	subs		r9, #0x80
2208*0a6a1f1dSLionel Sambuc	bpl		.Lxts_dec_loop
2209*0a6a1f1dSLionel Sambuc
2210*0a6a1f1dSLionel Sambuc.Lxts_dec_short:
2211*0a6a1f1dSLionel Sambuc	adds		r9, #0x70
2212*0a6a1f1dSLionel Sambuc	bmi		.Lxts_dec_done
2213*0a6a1f1dSLionel Sambuc
2214*0a6a1f1dSLionel Sambuc	vldmia		r2, {q5}	@ load XTS magic
2215*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q8, #63
2216*0a6a1f1dSLionel Sambuc	mov		r0, sp
2217*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
2218*0a6a1f1dSLionel Sambuc	vadd.u64	q9, q8, q8
2219*0a6a1f1dSLionel Sambuc	vst1.64		{q8}, [r0,:128]!
2220*0a6a1f1dSLionel Sambuc	vswp		d15,d14
2221*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q9, #63
2222*0a6a1f1dSLionel Sambuc	veor		q9, q9, q7
2223*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
2224*0a6a1f1dSLionel Sambuc	vadd.u64	q10, q9, q9
2225*0a6a1f1dSLionel Sambuc	vst1.64		{q9}, [r0,:128]!
2226*0a6a1f1dSLionel Sambuc	vswp		d13,d12
2227*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q10, #63
2228*0a6a1f1dSLionel Sambuc	veor		q10, q10, q6
2229*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
2230*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [r7]!
2231*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
2232*0a6a1f1dSLionel Sambuc	bmi		.Lxts_dec_1
2233*0a6a1f1dSLionel Sambuc	vadd.u64	q11, q10, q10
2234*0a6a1f1dSLionel Sambuc	vst1.64		{q10}, [r0,:128]!
2235*0a6a1f1dSLionel Sambuc	vswp		d15,d14
2236*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q11, #63
2237*0a6a1f1dSLionel Sambuc	veor		q11, q11, q7
2238*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
2239*0a6a1f1dSLionel Sambuc	vld1.8		{q1}, [r7]!
2240*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
2241*0a6a1f1dSLionel Sambuc	bmi		.Lxts_dec_2
2242*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2243*0a6a1f1dSLionel Sambuc	vadd.u64	q12, q11, q11
2244*0a6a1f1dSLionel Sambuc	vst1.64		{q11}, [r0,:128]!
2245*0a6a1f1dSLionel Sambuc	vswp		d13,d12
2246*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q12, #63
2247*0a6a1f1dSLionel Sambuc	veor		q12, q12, q6
2248*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
2249*0a6a1f1dSLionel Sambuc	vld1.8		{q2}, [r7]!
2250*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
2251*0a6a1f1dSLionel Sambuc	bmi		.Lxts_dec_3
2252*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2253*0a6a1f1dSLionel Sambuc	vadd.u64	q13, q12, q12
2254*0a6a1f1dSLionel Sambuc	vst1.64		{q12}, [r0,:128]!
2255*0a6a1f1dSLionel Sambuc	vswp		d15,d14
2256*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q13, #63
2257*0a6a1f1dSLionel Sambuc	veor		q13, q13, q7
2258*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
2259*0a6a1f1dSLionel Sambuc	vld1.8		{q3}, [r7]!
2260*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
2261*0a6a1f1dSLionel Sambuc	bmi		.Lxts_dec_4
2262*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
2263*0a6a1f1dSLionel Sambuc	vadd.u64	q14, q13, q13
2264*0a6a1f1dSLionel Sambuc	vst1.64		{q13}, [r0,:128]!
2265*0a6a1f1dSLionel Sambuc	vswp		d13,d12
2266*0a6a1f1dSLionel Sambuc	vshr.s64	q7, q14, #63
2267*0a6a1f1dSLionel Sambuc	veor		q14, q14, q6
2268*0a6a1f1dSLionel Sambuc	vand		q7, q7, q5
2269*0a6a1f1dSLionel Sambuc	vld1.8		{q4}, [r7]!
2270*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
2271*0a6a1f1dSLionel Sambuc	bmi		.Lxts_dec_5
2272*0a6a1f1dSLionel Sambuc	veor		q3, q3, q11
2273*0a6a1f1dSLionel Sambuc	vadd.u64	q15, q14, q14
2274*0a6a1f1dSLionel Sambuc	vst1.64		{q14}, [r0,:128]!
2275*0a6a1f1dSLionel Sambuc	vswp		d15,d14
2276*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q15, #63
2277*0a6a1f1dSLionel Sambuc	veor		q15, q15, q7
2278*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
2279*0a6a1f1dSLionel Sambuc	vld1.8		{q5}, [r7]!
2280*0a6a1f1dSLionel Sambuc	subs		r9, #0x10
2281*0a6a1f1dSLionel Sambuc	bmi		.Lxts_dec_6
2282*0a6a1f1dSLionel Sambuc	veor		q4, q4, q12
2283*0a6a1f1dSLionel Sambuc	sub		r9, #0x10
2284*0a6a1f1dSLionel Sambuc	vst1.64		{q15}, [r0,:128]		@ next round tweak
2285*0a6a1f1dSLionel Sambuc
2286*0a6a1f1dSLionel Sambuc	vld1.8		{q6}, [r7]!
2287*0a6a1f1dSLionel Sambuc	veor		q5, q5, q13
2288*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
2289*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
2290*0a6a1f1dSLionel Sambuc#else
2291*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
2292*0a6a1f1dSLionel Sambuc#endif
2293*0a6a1f1dSLionel Sambuc	veor		q6, q6, q14
2294*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
2295*0a6a1f1dSLionel Sambuc	mov		r0, sp
2296*0a6a1f1dSLionel Sambuc
2297*0a6a1f1dSLionel Sambuc	bl		_bsaes_decrypt8
2298*0a6a1f1dSLionel Sambuc
2299*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
2300*0a6a1f1dSLionel Sambuc	vld1.64		{q10-q11}, [r0,:128]!
2301*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2302*0a6a1f1dSLionel Sambuc	vld1.64		{q12-q13}, [r0,:128]!
2303*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2304*0a6a1f1dSLionel Sambuc	veor		q8, q6, q10
2305*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
2306*0a6a1f1dSLionel Sambuc	veor		q9, q4, q11
2307*0a6a1f1dSLionel Sambuc	vld1.64		{q14}, [r0,:128]!
2308*0a6a1f1dSLionel Sambuc	veor		q10, q2, q12
2309*0a6a1f1dSLionel Sambuc	vst1.8		{q8-q9}, [r8]!
2310*0a6a1f1dSLionel Sambuc	veor		q11, q7, q13
2311*0a6a1f1dSLionel Sambuc	veor		q12, q3, q14
2312*0a6a1f1dSLionel Sambuc	vst1.8		{q10-q11}, [r8]!
2313*0a6a1f1dSLionel Sambuc	vst1.8		{q12}, [r8]!
2314*0a6a1f1dSLionel Sambuc
2315*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
2316*0a6a1f1dSLionel Sambuc	b		.Lxts_dec_done
2317*0a6a1f1dSLionel Sambuc.align	4
2318*0a6a1f1dSLionel Sambuc.Lxts_dec_6:
2319*0a6a1f1dSLionel Sambuc	vst1.64		{q14}, [r0,:128]		@ next round tweak
2320*0a6a1f1dSLionel Sambuc
2321*0a6a1f1dSLionel Sambuc	veor		q4, q4, q12
2322*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
2323*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
2324*0a6a1f1dSLionel Sambuc#else
2325*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
2326*0a6a1f1dSLionel Sambuc#endif
2327*0a6a1f1dSLionel Sambuc	veor		q5, q5, q13
2328*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
2329*0a6a1f1dSLionel Sambuc	mov		r0, sp
2330*0a6a1f1dSLionel Sambuc
2331*0a6a1f1dSLionel Sambuc	bl		_bsaes_decrypt8
2332*0a6a1f1dSLionel Sambuc
2333*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
2334*0a6a1f1dSLionel Sambuc	vld1.64		{q10-q11}, [r0,:128]!
2335*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2336*0a6a1f1dSLionel Sambuc	vld1.64		{q12-q13}, [r0,:128]!
2337*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2338*0a6a1f1dSLionel Sambuc	veor		q8, q6, q10
2339*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
2340*0a6a1f1dSLionel Sambuc	veor		q9, q4, q11
2341*0a6a1f1dSLionel Sambuc	veor		q10, q2, q12
2342*0a6a1f1dSLionel Sambuc	vst1.8		{q8-q9}, [r8]!
2343*0a6a1f1dSLionel Sambuc	veor		q11, q7, q13
2344*0a6a1f1dSLionel Sambuc	vst1.8		{q10-q11}, [r8]!
2345*0a6a1f1dSLionel Sambuc
2346*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
2347*0a6a1f1dSLionel Sambuc	b		.Lxts_dec_done
2348*0a6a1f1dSLionel Sambuc.align	4
2349*0a6a1f1dSLionel Sambuc.Lxts_dec_5:
2350*0a6a1f1dSLionel Sambuc	vst1.64		{q13}, [r0,:128]		@ next round tweak
2351*0a6a1f1dSLionel Sambuc
2352*0a6a1f1dSLionel Sambuc	veor		q3, q3, q11
2353*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
2354*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
2355*0a6a1f1dSLionel Sambuc#else
2356*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
2357*0a6a1f1dSLionel Sambuc#endif
2358*0a6a1f1dSLionel Sambuc	veor		q4, q4, q12
2359*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
2360*0a6a1f1dSLionel Sambuc	mov		r0, sp
2361*0a6a1f1dSLionel Sambuc
2362*0a6a1f1dSLionel Sambuc	bl		_bsaes_decrypt8
2363*0a6a1f1dSLionel Sambuc
2364*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
2365*0a6a1f1dSLionel Sambuc	vld1.64		{q10-q11}, [r0,:128]!
2366*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2367*0a6a1f1dSLionel Sambuc	vld1.64		{q12}, [r0,:128]!
2368*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2369*0a6a1f1dSLionel Sambuc	veor		q8, q6, q10
2370*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
2371*0a6a1f1dSLionel Sambuc	veor		q9, q4, q11
2372*0a6a1f1dSLionel Sambuc	veor		q10, q2, q12
2373*0a6a1f1dSLionel Sambuc	vst1.8		{q8-q9}, [r8]!
2374*0a6a1f1dSLionel Sambuc	vst1.8		{q10}, [r8]!
2375*0a6a1f1dSLionel Sambuc
2376*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
2377*0a6a1f1dSLionel Sambuc	b		.Lxts_dec_done
2378*0a6a1f1dSLionel Sambuc.align	4
2379*0a6a1f1dSLionel Sambuc.Lxts_dec_4:
2380*0a6a1f1dSLionel Sambuc	vst1.64		{q12}, [r0,:128]		@ next round tweak
2381*0a6a1f1dSLionel Sambuc
2382*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
2383*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
2384*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
2385*0a6a1f1dSLionel Sambuc#else
2386*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
2387*0a6a1f1dSLionel Sambuc#endif
2388*0a6a1f1dSLionel Sambuc	veor		q3, q3, q11
2389*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
2390*0a6a1f1dSLionel Sambuc	mov		r0, sp
2391*0a6a1f1dSLionel Sambuc
2392*0a6a1f1dSLionel Sambuc	bl		_bsaes_decrypt8
2393*0a6a1f1dSLionel Sambuc
2394*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
2395*0a6a1f1dSLionel Sambuc	vld1.64		{q10-q11}, [r0,:128]!
2396*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2397*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2398*0a6a1f1dSLionel Sambuc	veor		q8, q6, q10
2399*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
2400*0a6a1f1dSLionel Sambuc	veor		q9, q4, q11
2401*0a6a1f1dSLionel Sambuc	vst1.8		{q8-q9}, [r8]!
2402*0a6a1f1dSLionel Sambuc
2403*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
2404*0a6a1f1dSLionel Sambuc	b		.Lxts_dec_done
2405*0a6a1f1dSLionel Sambuc.align	4
2406*0a6a1f1dSLionel Sambuc.Lxts_dec_3:
2407*0a6a1f1dSLionel Sambuc	vst1.64		{q11}, [r0,:128]		@ next round tweak
2408*0a6a1f1dSLionel Sambuc
2409*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2410*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
2411*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
2412*0a6a1f1dSLionel Sambuc#else
2413*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
2414*0a6a1f1dSLionel Sambuc#endif
2415*0a6a1f1dSLionel Sambuc	veor		q2, q2, q10
2416*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
2417*0a6a1f1dSLionel Sambuc	mov		r0, sp
2418*0a6a1f1dSLionel Sambuc
2419*0a6a1f1dSLionel Sambuc	bl		_bsaes_decrypt8
2420*0a6a1f1dSLionel Sambuc
2421*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
2422*0a6a1f1dSLionel Sambuc	vld1.64		{q10}, [r0,:128]!
2423*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2424*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2425*0a6a1f1dSLionel Sambuc	veor		q8, q6, q10
2426*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
2427*0a6a1f1dSLionel Sambuc	vst1.8		{q8}, [r8]!
2428*0a6a1f1dSLionel Sambuc
2429*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
2430*0a6a1f1dSLionel Sambuc	b		.Lxts_dec_done
2431*0a6a1f1dSLionel Sambuc.align	4
2432*0a6a1f1dSLionel Sambuc.Lxts_dec_2:
2433*0a6a1f1dSLionel Sambuc	vst1.64		{q10}, [r0,:128]		@ next round tweak
2434*0a6a1f1dSLionel Sambuc
2435*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2436*0a6a1f1dSLionel Sambuc#ifndef	BSAES_ASM_EXTENDED_KEY
2437*0a6a1f1dSLionel Sambuc	add		r4, sp, #0x90			@ pass key schedule
2438*0a6a1f1dSLionel Sambuc#else
2439*0a6a1f1dSLionel Sambuc	add		r4, r10, #248			@ pass key schedule
2440*0a6a1f1dSLionel Sambuc#endif
2441*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2442*0a6a1f1dSLionel Sambuc	mov		r5, r1			@ pass rounds
2443*0a6a1f1dSLionel Sambuc	mov		r0, sp
2444*0a6a1f1dSLionel Sambuc
2445*0a6a1f1dSLionel Sambuc	bl		_bsaes_decrypt8
2446*0a6a1f1dSLionel Sambuc
2447*0a6a1f1dSLionel Sambuc	vld1.64		{q8-q9}, [r0,:128]!
2448*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2449*0a6a1f1dSLionel Sambuc	veor		q1, q1, q9
2450*0a6a1f1dSLionel Sambuc	vst1.8		{q0-q1}, [r8]!
2451*0a6a1f1dSLionel Sambuc
2452*0a6a1f1dSLionel Sambuc	vld1.64		{q8}, [r0,:128]		@ next round tweak
2453*0a6a1f1dSLionel Sambuc	b		.Lxts_dec_done
2454*0a6a1f1dSLionel Sambuc.align	4
2455*0a6a1f1dSLionel Sambuc.Lxts_dec_1:
2456*0a6a1f1dSLionel Sambuc	mov		r0, sp
2457*0a6a1f1dSLionel Sambuc	veor		q0, q8
2458*0a6a1f1dSLionel Sambuc	mov		r1, sp
2459*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [sp,:128]
2460*0a6a1f1dSLionel Sambuc	mov		r2, r10
2461*0a6a1f1dSLionel Sambuc	mov		r4, r3				@ preserve fp
2462*0a6a1f1dSLionel Sambuc	mov		r5, r2			@ preserve magic
2463*0a6a1f1dSLionel Sambuc
2464*0a6a1f1dSLionel Sambuc	bl		AES_decrypt
2465*0a6a1f1dSLionel Sambuc
2466*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [sp,:128]
2467*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2468*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [r8]!
2469*0a6a1f1dSLionel Sambuc	mov		r3, r4
2470*0a6a1f1dSLionel Sambuc	mov		r2, r5
2471*0a6a1f1dSLionel Sambuc
2472*0a6a1f1dSLionel Sambuc	vmov		q8, q9		@ next round tweak
2473*0a6a1f1dSLionel Sambuc
2474*0a6a1f1dSLionel Sambuc.Lxts_dec_done:
2475*0a6a1f1dSLionel Sambuc#ifndef	XTS_CHAIN_TWEAK
2476*0a6a1f1dSLionel Sambuc	adds		r9, #0x10
2477*0a6a1f1dSLionel Sambuc	beq		.Lxts_dec_ret
2478*0a6a1f1dSLionel Sambuc
2479*0a6a1f1dSLionel Sambuc	@ calculate one round of extra tweak for the stolen ciphertext
2480*0a6a1f1dSLionel Sambuc	vldmia		r2, {q5}
2481*0a6a1f1dSLionel Sambuc	vshr.s64	q6, q8, #63
2482*0a6a1f1dSLionel Sambuc	vand		q6, q6, q5
2483*0a6a1f1dSLionel Sambuc	vadd.u64	q9, q8, q8
2484*0a6a1f1dSLionel Sambuc	vswp		d13,d12
2485*0a6a1f1dSLionel Sambuc	veor		q9, q9, q6
2486*0a6a1f1dSLionel Sambuc
2487*0a6a1f1dSLionel Sambuc	@ perform the final decryption with the last tweak value
2488*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [r7]!
2489*0a6a1f1dSLionel Sambuc	mov		r0, sp
2490*0a6a1f1dSLionel Sambuc	veor		q0, q0, q9
2491*0a6a1f1dSLionel Sambuc	mov		r1, sp
2492*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [sp,:128]
2493*0a6a1f1dSLionel Sambuc	mov		r2, r10
2494*0a6a1f1dSLionel Sambuc	mov		r4, r3			@ preserve fp
2495*0a6a1f1dSLionel Sambuc
2496*0a6a1f1dSLionel Sambuc	bl		AES_decrypt
2497*0a6a1f1dSLionel Sambuc
2498*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [sp,:128]
2499*0a6a1f1dSLionel Sambuc	veor		q0, q0, q9
2500*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [r8]
2501*0a6a1f1dSLionel Sambuc
2502*0a6a1f1dSLionel Sambuc	mov		r6, r8
2503*0a6a1f1dSLionel Sambuc.Lxts_dec_steal:
2504*0a6a1f1dSLionel Sambuc	ldrb		r1, [r8]
2505*0a6a1f1dSLionel Sambuc	ldrb		r0, [r7], #1
2506*0a6a1f1dSLionel Sambuc	strb		r1, [r8, #0x10]
2507*0a6a1f1dSLionel Sambuc	strb		r0, [r8], #1
2508*0a6a1f1dSLionel Sambuc
2509*0a6a1f1dSLionel Sambuc	subs		r9, #1
2510*0a6a1f1dSLionel Sambuc	bhi		.Lxts_dec_steal
2511*0a6a1f1dSLionel Sambuc
2512*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [r6]
2513*0a6a1f1dSLionel Sambuc	mov		r0, sp
2514*0a6a1f1dSLionel Sambuc	veor		q0, q8
2515*0a6a1f1dSLionel Sambuc	mov		r1, sp
2516*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [sp,:128]
2517*0a6a1f1dSLionel Sambuc	mov		r2, r10
2518*0a6a1f1dSLionel Sambuc
2519*0a6a1f1dSLionel Sambuc	bl		AES_decrypt
2520*0a6a1f1dSLionel Sambuc
2521*0a6a1f1dSLionel Sambuc	vld1.8		{q0}, [sp,:128]
2522*0a6a1f1dSLionel Sambuc	veor		q0, q0, q8
2523*0a6a1f1dSLionel Sambuc	vst1.8		{q0}, [r6]
2524*0a6a1f1dSLionel Sambuc	mov		r3, r4
2525*0a6a1f1dSLionel Sambuc#endif
2526*0a6a1f1dSLionel Sambuc
2527*0a6a1f1dSLionel Sambuc.Lxts_dec_ret:
2528*0a6a1f1dSLionel Sambuc	bic		r0, r3, #0xf
2529*0a6a1f1dSLionel Sambuc	vmov.i32	q0, #0
2530*0a6a1f1dSLionel Sambuc	vmov.i32	q1, #0
2531*0a6a1f1dSLionel Sambuc#ifdef	XTS_CHAIN_TWEAK
2532*0a6a1f1dSLionel Sambuc	ldr		r1, [r3, #0x20+VFP_ABI_FRAME]	@ chain tweak
2533*0a6a1f1dSLionel Sambuc#endif
2534*0a6a1f1dSLionel Sambuc.Lxts_dec_bzero:				@ wipe key schedule [if any]
2535*0a6a1f1dSLionel Sambuc	vstmia		sp!, {q0-q1}
2536*0a6a1f1dSLionel Sambuc	cmp		sp, r0
2537*0a6a1f1dSLionel Sambuc	bne		.Lxts_dec_bzero
2538*0a6a1f1dSLionel Sambuc
2539*0a6a1f1dSLionel Sambuc	mov		sp, r3
2540*0a6a1f1dSLionel Sambuc#ifdef	XTS_CHAIN_TWEAK
2541*0a6a1f1dSLionel Sambuc	vst1.8		{q8}, [r1]
2542*0a6a1f1dSLionel Sambuc#endif
2543*0a6a1f1dSLionel Sambuc	VFP_ABI_POP
2544*0a6a1f1dSLionel Sambuc	ldmia		sp!, {r4-r10, pc}	@ return
2545*0a6a1f1dSLionel Sambuc
2546*0a6a1f1dSLionel Sambuc.size	bsaes_xts_decrypt,.-bsaes_xts_decrypt
2547*0a6a1f1dSLionel Sambuc#endif
2548