xref: /minix3/crypto/external/bsd/openssl/lib/libcrypto/arch/arm/sha512-armv4.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc#include "arm_arch.h"
2*0a6a1f1dSLionel Sambuc#include "arm_asm.h"
3*0a6a1f1dSLionel Sambuc#ifdef __ARMEL__
4*0a6a1f1dSLionel Sambuc# define LO 0
5*0a6a1f1dSLionel Sambuc# define HI 4
6*0a6a1f1dSLionel Sambuc# define WORD64(hi0,lo0,hi1,lo1)	.word	lo0,hi0, lo1,hi1
7*0a6a1f1dSLionel Sambuc#else
8*0a6a1f1dSLionel Sambuc# define HI 0
9*0a6a1f1dSLionel Sambuc# define LO 4
10*0a6a1f1dSLionel Sambuc# define WORD64(hi0,lo0,hi1,lo1)	.word	hi0,lo0, hi1,lo1
11*0a6a1f1dSLionel Sambuc#endif
12*0a6a1f1dSLionel Sambuc
13*0a6a1f1dSLionel Sambuc.text
14*0a6a1f1dSLionel Sambuc.code	32
15*0a6a1f1dSLionel Sambuc.type	K512,%object
16*0a6a1f1dSLionel Sambuc.align	5
17*0a6a1f1dSLionel SambucK512:
18*0a6a1f1dSLionel SambucWORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
19*0a6a1f1dSLionel SambucWORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
20*0a6a1f1dSLionel SambucWORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
21*0a6a1f1dSLionel SambucWORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
22*0a6a1f1dSLionel SambucWORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
23*0a6a1f1dSLionel SambucWORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
24*0a6a1f1dSLionel SambucWORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
25*0a6a1f1dSLionel SambucWORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
26*0a6a1f1dSLionel SambucWORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
27*0a6a1f1dSLionel SambucWORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
28*0a6a1f1dSLionel SambucWORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
29*0a6a1f1dSLionel SambucWORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
30*0a6a1f1dSLionel SambucWORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
31*0a6a1f1dSLionel SambucWORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
32*0a6a1f1dSLionel SambucWORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
33*0a6a1f1dSLionel SambucWORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
34*0a6a1f1dSLionel SambucWORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
35*0a6a1f1dSLionel SambucWORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
36*0a6a1f1dSLionel SambucWORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
37*0a6a1f1dSLionel SambucWORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
38*0a6a1f1dSLionel SambucWORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
39*0a6a1f1dSLionel SambucWORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
40*0a6a1f1dSLionel SambucWORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
41*0a6a1f1dSLionel SambucWORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
42*0a6a1f1dSLionel SambucWORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
43*0a6a1f1dSLionel SambucWORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
44*0a6a1f1dSLionel SambucWORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
45*0a6a1f1dSLionel SambucWORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
46*0a6a1f1dSLionel SambucWORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
47*0a6a1f1dSLionel SambucWORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
48*0a6a1f1dSLionel SambucWORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
49*0a6a1f1dSLionel SambucWORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
50*0a6a1f1dSLionel SambucWORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
51*0a6a1f1dSLionel SambucWORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
52*0a6a1f1dSLionel SambucWORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
53*0a6a1f1dSLionel SambucWORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
54*0a6a1f1dSLionel SambucWORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
55*0a6a1f1dSLionel SambucWORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
56*0a6a1f1dSLionel SambucWORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
57*0a6a1f1dSLionel SambucWORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
58*0a6a1f1dSLionel Sambuc.size	K512,.-K512
59*0a6a1f1dSLionel Sambuc#if __ARM_MAX_ARCH__>=7
60*0a6a1f1dSLionel Sambuc.LOPENSSL_armcap:
61*0a6a1f1dSLionel Sambuc.word	OPENSSL_armcap_P-sha512_block_data_order
62*0a6a1f1dSLionel Sambuc.skip	32-4
63*0a6a1f1dSLionel Sambuc#else
64*0a6a1f1dSLionel Sambuc.skip	32
65*0a6a1f1dSLionel Sambuc#endif
66*0a6a1f1dSLionel Sambuc
67*0a6a1f1dSLionel Sambuc.global	sha512_block_data_order
68*0a6a1f1dSLionel Sambuc.type	sha512_block_data_order,%function
69*0a6a1f1dSLionel Sambucsha512_block_data_order:
70*0a6a1f1dSLionel Sambuc	sub	r3,pc,#8		@ sha512_block_data_order
71*0a6a1f1dSLionel Sambuc	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
72*0a6a1f1dSLionel Sambuc#if __ARM_MAX_ARCH__>=7
73*0a6a1f1dSLionel Sambuc	ldr	r12,.LOPENSSL_armcap
74*0a6a1f1dSLionel Sambuc	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
75*0a6a1f1dSLionel Sambuc	tst	r12,#1
76*0a6a1f1dSLionel Sambuc	bne	.LNEON
77*0a6a1f1dSLionel Sambuc#endif
78*0a6a1f1dSLionel Sambuc	stmdb	sp!,{r4-r12,lr}
79*0a6a1f1dSLionel Sambuc	sub	r14,r3,#672		@ K512
80*0a6a1f1dSLionel Sambuc	sub	sp,sp,#9*8
81*0a6a1f1dSLionel Sambuc
82*0a6a1f1dSLionel Sambuc	ldr	r7,[r0,#32+LO]
83*0a6a1f1dSLionel Sambuc	ldr	r8,[r0,#32+HI]
84*0a6a1f1dSLionel Sambuc	ldr	r9, [r0,#48+LO]
85*0a6a1f1dSLionel Sambuc	ldr	r10, [r0,#48+HI]
86*0a6a1f1dSLionel Sambuc	ldr	r11, [r0,#56+LO]
87*0a6a1f1dSLionel Sambuc	ldr	r12, [r0,#56+HI]
88*0a6a1f1dSLionel Sambuc.Loop:
89*0a6a1f1dSLionel Sambuc	str	r9, [sp,#48+0]
90*0a6a1f1dSLionel Sambuc	str	r10, [sp,#48+4]
91*0a6a1f1dSLionel Sambuc	str	r11, [sp,#56+0]
92*0a6a1f1dSLionel Sambuc	str	r12, [sp,#56+4]
93*0a6a1f1dSLionel Sambuc	ldr	r5,[r0,#0+LO]
94*0a6a1f1dSLionel Sambuc	ldr	r6,[r0,#0+HI]
95*0a6a1f1dSLionel Sambuc	ldr	r3,[r0,#8+LO]
96*0a6a1f1dSLionel Sambuc	ldr	r4,[r0,#8+HI]
97*0a6a1f1dSLionel Sambuc	ldr	r9, [r0,#16+LO]
98*0a6a1f1dSLionel Sambuc	ldr	r10, [r0,#16+HI]
99*0a6a1f1dSLionel Sambuc	ldr	r11, [r0,#24+LO]
100*0a6a1f1dSLionel Sambuc	ldr	r12, [r0,#24+HI]
101*0a6a1f1dSLionel Sambuc	str	r3,[sp,#8+0]
102*0a6a1f1dSLionel Sambuc	str	r4,[sp,#8+4]
103*0a6a1f1dSLionel Sambuc	str	r9, [sp,#16+0]
104*0a6a1f1dSLionel Sambuc	str	r10, [sp,#16+4]
105*0a6a1f1dSLionel Sambuc	str	r11, [sp,#24+0]
106*0a6a1f1dSLionel Sambuc	str	r12, [sp,#24+4]
107*0a6a1f1dSLionel Sambuc	ldr	r3,[r0,#40+LO]
108*0a6a1f1dSLionel Sambuc	ldr	r4,[r0,#40+HI]
109*0a6a1f1dSLionel Sambuc	str	r3,[sp,#40+0]
110*0a6a1f1dSLionel Sambuc	str	r4,[sp,#40+4]
111*0a6a1f1dSLionel Sambuc
112*0a6a1f1dSLionel Sambuc.L00_15:
113*0a6a1f1dSLionel Sambuc#if __ARM_ARCH__<7
114*0a6a1f1dSLionel Sambuc	ldrb	r3,[r1,#7]
115*0a6a1f1dSLionel Sambuc	ldrb	r9, [r1,#6]
116*0a6a1f1dSLionel Sambuc	ldrb	r10, [r1,#5]
117*0a6a1f1dSLionel Sambuc	ldrb	r11, [r1,#4]
118*0a6a1f1dSLionel Sambuc	ldrb	r4,[r1,#3]
119*0a6a1f1dSLionel Sambuc	ldrb	r12, [r1,#2]
120*0a6a1f1dSLionel Sambuc	orr	r3,r3,r9,lsl#8
121*0a6a1f1dSLionel Sambuc	ldrb	r9, [r1,#1]
122*0a6a1f1dSLionel Sambuc	orr	r3,r3,r10,lsl#16
123*0a6a1f1dSLionel Sambuc	ldrb	r10, [r1],#8
124*0a6a1f1dSLionel Sambuc	orr	r3,r3,r11,lsl#24
125*0a6a1f1dSLionel Sambuc	orr	r4,r4,r12,lsl#8
126*0a6a1f1dSLionel Sambuc	orr	r4,r4,r9,lsl#16
127*0a6a1f1dSLionel Sambuc	orr	r4,r4,r10,lsl#24
128*0a6a1f1dSLionel Sambuc#else
129*0a6a1f1dSLionel Sambuc	ldr	r3,[r1,#4]
130*0a6a1f1dSLionel Sambuc	ldr	r4,[r1],#8
131*0a6a1f1dSLionel Sambuc#ifdef __ARMEL__
132*0a6a1f1dSLionel Sambuc	rev	r3,r3
133*0a6a1f1dSLionel Sambuc	rev	r4,r4
134*0a6a1f1dSLionel Sambuc#endif
135*0a6a1f1dSLionel Sambuc#endif
136*0a6a1f1dSLionel Sambuc	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
137*0a6a1f1dSLionel Sambuc	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
138*0a6a1f1dSLionel Sambuc	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
139*0a6a1f1dSLionel Sambuc	mov	r9,r7,lsr#14
140*0a6a1f1dSLionel Sambuc	str	r3,[sp,#64+0]
141*0a6a1f1dSLionel Sambuc	mov	r10,r8,lsr#14
142*0a6a1f1dSLionel Sambuc	str	r4,[sp,#64+4]
143*0a6a1f1dSLionel Sambuc	eor	r9,r9,r8,lsl#18
144*0a6a1f1dSLionel Sambuc	ldr	r11,[sp,#56+0]	@ h.lo
145*0a6a1f1dSLionel Sambuc	eor	r10,r10,r7,lsl#18
146*0a6a1f1dSLionel Sambuc	ldr	r12,[sp,#56+4]	@ h.hi
147*0a6a1f1dSLionel Sambuc	eor	r9,r9,r7,lsr#18
148*0a6a1f1dSLionel Sambuc	eor	r10,r10,r8,lsr#18
149*0a6a1f1dSLionel Sambuc	eor	r9,r9,r8,lsl#14
150*0a6a1f1dSLionel Sambuc	eor	r10,r10,r7,lsl#14
151*0a6a1f1dSLionel Sambuc	eor	r9,r9,r8,lsr#9
152*0a6a1f1dSLionel Sambuc	eor	r10,r10,r7,lsr#9
153*0a6a1f1dSLionel Sambuc	eor	r9,r9,r7,lsl#23
154*0a6a1f1dSLionel Sambuc	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
155*0a6a1f1dSLionel Sambuc	adds	r3,r3,r9
156*0a6a1f1dSLionel Sambuc	ldr	r9,[sp,#40+0]	@ f.lo
157*0a6a1f1dSLionel Sambuc	adc	r4,r4,r10		@ T += Sigma1(e)
158*0a6a1f1dSLionel Sambuc	ldr	r10,[sp,#40+4]	@ f.hi
159*0a6a1f1dSLionel Sambuc	adds	r3,r3,r11
160*0a6a1f1dSLionel Sambuc	ldr	r11,[sp,#48+0]	@ g.lo
161*0a6a1f1dSLionel Sambuc	adc	r4,r4,r12		@ T += h
162*0a6a1f1dSLionel Sambuc	ldr	r12,[sp,#48+4]	@ g.hi
163*0a6a1f1dSLionel Sambuc
164*0a6a1f1dSLionel Sambuc	eor	r9,r9,r11
165*0a6a1f1dSLionel Sambuc	str	r7,[sp,#32+0]
166*0a6a1f1dSLionel Sambuc	eor	r10,r10,r12
167*0a6a1f1dSLionel Sambuc	str	r8,[sp,#32+4]
168*0a6a1f1dSLionel Sambuc	and	r9,r9,r7
169*0a6a1f1dSLionel Sambuc	str	r5,[sp,#0+0]
170*0a6a1f1dSLionel Sambuc	and	r10,r10,r8
171*0a6a1f1dSLionel Sambuc	str	r6,[sp,#0+4]
172*0a6a1f1dSLionel Sambuc	eor	r9,r9,r11
173*0a6a1f1dSLionel Sambuc	ldr	r11,[r14,#LO]	@ K[i].lo
174*0a6a1f1dSLionel Sambuc	eor	r10,r10,r12		@ Ch(e,f,g)
175*0a6a1f1dSLionel Sambuc	ldr	r12,[r14,#HI]	@ K[i].hi
176*0a6a1f1dSLionel Sambuc
177*0a6a1f1dSLionel Sambuc	adds	r3,r3,r9
178*0a6a1f1dSLionel Sambuc	ldr	r7,[sp,#24+0]	@ d.lo
179*0a6a1f1dSLionel Sambuc	adc	r4,r4,r10		@ T += Ch(e,f,g)
180*0a6a1f1dSLionel Sambuc	ldr	r8,[sp,#24+4]	@ d.hi
181*0a6a1f1dSLionel Sambuc	adds	r3,r3,r11
182*0a6a1f1dSLionel Sambuc	and	r9,r11,#0xff
183*0a6a1f1dSLionel Sambuc	adc	r4,r4,r12		@ T += K[i]
184*0a6a1f1dSLionel Sambuc	adds	r7,r7,r3
185*0a6a1f1dSLionel Sambuc	ldr	r11,[sp,#8+0]	@ b.lo
186*0a6a1f1dSLionel Sambuc	adc	r8,r8,r4		@ d += T
187*0a6a1f1dSLionel Sambuc	teq	r9,#148
188*0a6a1f1dSLionel Sambuc
189*0a6a1f1dSLionel Sambuc	ldr	r12,[sp,#16+0]	@ c.lo
190*0a6a1f1dSLionel Sambuc	orreq	r14,r14,#1
191*0a6a1f1dSLionel Sambuc	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
192*0a6a1f1dSLionel Sambuc	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
193*0a6a1f1dSLionel Sambuc	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
194*0a6a1f1dSLionel Sambuc	mov	r9,r5,lsr#28
195*0a6a1f1dSLionel Sambuc	mov	r10,r6,lsr#28
196*0a6a1f1dSLionel Sambuc	eor	r9,r9,r6,lsl#4
197*0a6a1f1dSLionel Sambuc	eor	r10,r10,r5,lsl#4
198*0a6a1f1dSLionel Sambuc	eor	r9,r9,r6,lsr#2
199*0a6a1f1dSLionel Sambuc	eor	r10,r10,r5,lsr#2
200*0a6a1f1dSLionel Sambuc	eor	r9,r9,r5,lsl#30
201*0a6a1f1dSLionel Sambuc	eor	r10,r10,r6,lsl#30
202*0a6a1f1dSLionel Sambuc	eor	r9,r9,r6,lsr#7
203*0a6a1f1dSLionel Sambuc	eor	r10,r10,r5,lsr#7
204*0a6a1f1dSLionel Sambuc	eor	r9,r9,r5,lsl#25
205*0a6a1f1dSLionel Sambuc	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
206*0a6a1f1dSLionel Sambuc	adds	r3,r3,r9
207*0a6a1f1dSLionel Sambuc	and	r9,r5,r11
208*0a6a1f1dSLionel Sambuc	adc	r4,r4,r10		@ T += Sigma0(a)
209*0a6a1f1dSLionel Sambuc
210*0a6a1f1dSLionel Sambuc	ldr	r10,[sp,#8+4]	@ b.hi
211*0a6a1f1dSLionel Sambuc	orr	r5,r5,r11
212*0a6a1f1dSLionel Sambuc	ldr	r11,[sp,#16+4]	@ c.hi
213*0a6a1f1dSLionel Sambuc	and	r5,r5,r12
214*0a6a1f1dSLionel Sambuc	and	r12,r6,r10
215*0a6a1f1dSLionel Sambuc	orr	r6,r6,r10
216*0a6a1f1dSLionel Sambuc	orr	r5,r5,r9		@ Maj(a,b,c).lo
217*0a6a1f1dSLionel Sambuc	and	r6,r6,r11
218*0a6a1f1dSLionel Sambuc	adds	r5,r5,r3
219*0a6a1f1dSLionel Sambuc	orr	r6,r6,r12		@ Maj(a,b,c).hi
220*0a6a1f1dSLionel Sambuc	sub	sp,sp,#8
221*0a6a1f1dSLionel Sambuc	adc	r6,r6,r4		@ h += T
222*0a6a1f1dSLionel Sambuc	tst	r14,#1
223*0a6a1f1dSLionel Sambuc	add	r14,r14,#8
224*0a6a1f1dSLionel Sambuc	tst	r14,#1
225*0a6a1f1dSLionel Sambuc	beq	.L00_15
226*0a6a1f1dSLionel Sambuc	ldr	r9,[sp,#184+0]
227*0a6a1f1dSLionel Sambuc	ldr	r10,[sp,#184+4]
228*0a6a1f1dSLionel Sambuc	bic	r14,r14,#1
229*0a6a1f1dSLionel Sambuc.L16_79:
230*0a6a1f1dSLionel Sambuc	@ sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
231*0a6a1f1dSLionel Sambuc	@ LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
232*0a6a1f1dSLionel Sambuc	@ HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
233*0a6a1f1dSLionel Sambuc	mov	r3,r9,lsr#1
234*0a6a1f1dSLionel Sambuc	ldr	r11,[sp,#80+0]
235*0a6a1f1dSLionel Sambuc	mov	r4,r10,lsr#1
236*0a6a1f1dSLionel Sambuc	ldr	r12,[sp,#80+4]
237*0a6a1f1dSLionel Sambuc	eor	r3,r3,r10,lsl#31
238*0a6a1f1dSLionel Sambuc	eor	r4,r4,r9,lsl#31
239*0a6a1f1dSLionel Sambuc	eor	r3,r3,r9,lsr#8
240*0a6a1f1dSLionel Sambuc	eor	r4,r4,r10,lsr#8
241*0a6a1f1dSLionel Sambuc	eor	r3,r3,r10,lsl#24
242*0a6a1f1dSLionel Sambuc	eor	r4,r4,r9,lsl#24
243*0a6a1f1dSLionel Sambuc	eor	r3,r3,r9,lsr#7
244*0a6a1f1dSLionel Sambuc	eor	r4,r4,r10,lsr#7
245*0a6a1f1dSLionel Sambuc	eor	r3,r3,r10,lsl#25
246*0a6a1f1dSLionel Sambuc
247*0a6a1f1dSLionel Sambuc	@ sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
248*0a6a1f1dSLionel Sambuc	@ LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
249*0a6a1f1dSLionel Sambuc	@ HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
250*0a6a1f1dSLionel Sambuc	mov	r9,r11,lsr#19
251*0a6a1f1dSLionel Sambuc	mov	r10,r12,lsr#19
252*0a6a1f1dSLionel Sambuc	eor	r9,r9,r12,lsl#13
253*0a6a1f1dSLionel Sambuc	eor	r10,r10,r11,lsl#13
254*0a6a1f1dSLionel Sambuc	eor	r9,r9,r12,lsr#29
255*0a6a1f1dSLionel Sambuc	eor	r10,r10,r11,lsr#29
256*0a6a1f1dSLionel Sambuc	eor	r9,r9,r11,lsl#3
257*0a6a1f1dSLionel Sambuc	eor	r10,r10,r12,lsl#3
258*0a6a1f1dSLionel Sambuc	eor	r9,r9,r11,lsr#6
259*0a6a1f1dSLionel Sambuc	eor	r10,r10,r12,lsr#6
260*0a6a1f1dSLionel Sambuc	ldr	r11,[sp,#120+0]
261*0a6a1f1dSLionel Sambuc	eor	r9,r9,r12,lsl#26
262*0a6a1f1dSLionel Sambuc
263*0a6a1f1dSLionel Sambuc	ldr	r12,[sp,#120+4]
264*0a6a1f1dSLionel Sambuc	adds	r3,r3,r9
265*0a6a1f1dSLionel Sambuc	ldr	r9,[sp,#192+0]
266*0a6a1f1dSLionel Sambuc	adc	r4,r4,r10
267*0a6a1f1dSLionel Sambuc
268*0a6a1f1dSLionel Sambuc	ldr	r10,[sp,#192+4]
269*0a6a1f1dSLionel Sambuc	adds	r3,r3,r11
270*0a6a1f1dSLionel Sambuc	adc	r4,r4,r12
271*0a6a1f1dSLionel Sambuc	adds	r3,r3,r9
272*0a6a1f1dSLionel Sambuc	adc	r4,r4,r10
273*0a6a1f1dSLionel Sambuc	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
274*0a6a1f1dSLionel Sambuc	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
275*0a6a1f1dSLionel Sambuc	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
276*0a6a1f1dSLionel Sambuc	mov	r9,r7,lsr#14
277*0a6a1f1dSLionel Sambuc	str	r3,[sp,#64+0]
278*0a6a1f1dSLionel Sambuc	mov	r10,r8,lsr#14
279*0a6a1f1dSLionel Sambuc	str	r4,[sp,#64+4]
280*0a6a1f1dSLionel Sambuc	eor	r9,r9,r8,lsl#18
281*0a6a1f1dSLionel Sambuc	ldr	r11,[sp,#56+0]	@ h.lo
282*0a6a1f1dSLionel Sambuc	eor	r10,r10,r7,lsl#18
283*0a6a1f1dSLionel Sambuc	ldr	r12,[sp,#56+4]	@ h.hi
284*0a6a1f1dSLionel Sambuc	eor	r9,r9,r7,lsr#18
285*0a6a1f1dSLionel Sambuc	eor	r10,r10,r8,lsr#18
286*0a6a1f1dSLionel Sambuc	eor	r9,r9,r8,lsl#14
287*0a6a1f1dSLionel Sambuc	eor	r10,r10,r7,lsl#14
288*0a6a1f1dSLionel Sambuc	eor	r9,r9,r8,lsr#9
289*0a6a1f1dSLionel Sambuc	eor	r10,r10,r7,lsr#9
290*0a6a1f1dSLionel Sambuc	eor	r9,r9,r7,lsl#23
291*0a6a1f1dSLionel Sambuc	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
292*0a6a1f1dSLionel Sambuc	adds	r3,r3,r9
293*0a6a1f1dSLionel Sambuc	ldr	r9,[sp,#40+0]	@ f.lo
294*0a6a1f1dSLionel Sambuc	adc	r4,r4,r10		@ T += Sigma1(e)
295*0a6a1f1dSLionel Sambuc	ldr	r10,[sp,#40+4]	@ f.hi
296*0a6a1f1dSLionel Sambuc	adds	r3,r3,r11
297*0a6a1f1dSLionel Sambuc	ldr	r11,[sp,#48+0]	@ g.lo
298*0a6a1f1dSLionel Sambuc	adc	r4,r4,r12		@ T += h
299*0a6a1f1dSLionel Sambuc	ldr	r12,[sp,#48+4]	@ g.hi
300*0a6a1f1dSLionel Sambuc
301*0a6a1f1dSLionel Sambuc	eor	r9,r9,r11
302*0a6a1f1dSLionel Sambuc	str	r7,[sp,#32+0]
303*0a6a1f1dSLionel Sambuc	eor	r10,r10,r12
304*0a6a1f1dSLionel Sambuc	str	r8,[sp,#32+4]
305*0a6a1f1dSLionel Sambuc	and	r9,r9,r7
306*0a6a1f1dSLionel Sambuc	str	r5,[sp,#0+0]
307*0a6a1f1dSLionel Sambuc	and	r10,r10,r8
308*0a6a1f1dSLionel Sambuc	str	r6,[sp,#0+4]
309*0a6a1f1dSLionel Sambuc	eor	r9,r9,r11
310*0a6a1f1dSLionel Sambuc	ldr	r11,[r14,#LO]	@ K[i].lo
311*0a6a1f1dSLionel Sambuc	eor	r10,r10,r12		@ Ch(e,f,g)
312*0a6a1f1dSLionel Sambuc	ldr	r12,[r14,#HI]	@ K[i].hi
313*0a6a1f1dSLionel Sambuc
314*0a6a1f1dSLionel Sambuc	adds	r3,r3,r9
315*0a6a1f1dSLionel Sambuc	ldr	r7,[sp,#24+0]	@ d.lo
316*0a6a1f1dSLionel Sambuc	adc	r4,r4,r10		@ T += Ch(e,f,g)
317*0a6a1f1dSLionel Sambuc	ldr	r8,[sp,#24+4]	@ d.hi
318*0a6a1f1dSLionel Sambuc	adds	r3,r3,r11
319*0a6a1f1dSLionel Sambuc	and	r9,r11,#0xff
320*0a6a1f1dSLionel Sambuc	adc	r4,r4,r12		@ T += K[i]
321*0a6a1f1dSLionel Sambuc	adds	r7,r7,r3
322*0a6a1f1dSLionel Sambuc	ldr	r11,[sp,#8+0]	@ b.lo
323*0a6a1f1dSLionel Sambuc	adc	r8,r8,r4		@ d += T
324*0a6a1f1dSLionel Sambuc	teq	r9,#23
325*0a6a1f1dSLionel Sambuc
326*0a6a1f1dSLionel Sambuc	ldr	r12,[sp,#16+0]	@ c.lo
327*0a6a1f1dSLionel Sambuc	orreq	r14,r14,#1
328*0a6a1f1dSLionel Sambuc	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
329*0a6a1f1dSLionel Sambuc	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
330*0a6a1f1dSLionel Sambuc	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
331*0a6a1f1dSLionel Sambuc	mov	r9,r5,lsr#28
332*0a6a1f1dSLionel Sambuc	mov	r10,r6,lsr#28
333*0a6a1f1dSLionel Sambuc	eor	r9,r9,r6,lsl#4
334*0a6a1f1dSLionel Sambuc	eor	r10,r10,r5,lsl#4
335*0a6a1f1dSLionel Sambuc	eor	r9,r9,r6,lsr#2
336*0a6a1f1dSLionel Sambuc	eor	r10,r10,r5,lsr#2
337*0a6a1f1dSLionel Sambuc	eor	r9,r9,r5,lsl#30
338*0a6a1f1dSLionel Sambuc	eor	r10,r10,r6,lsl#30
339*0a6a1f1dSLionel Sambuc	eor	r9,r9,r6,lsr#7
340*0a6a1f1dSLionel Sambuc	eor	r10,r10,r5,lsr#7
341*0a6a1f1dSLionel Sambuc	eor	r9,r9,r5,lsl#25
342*0a6a1f1dSLionel Sambuc	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
343*0a6a1f1dSLionel Sambuc	adds	r3,r3,r9
344*0a6a1f1dSLionel Sambuc	and	r9,r5,r11
345*0a6a1f1dSLionel Sambuc	adc	r4,r4,r10		@ T += Sigma0(a)
346*0a6a1f1dSLionel Sambuc
347*0a6a1f1dSLionel Sambuc	ldr	r10,[sp,#8+4]	@ b.hi
348*0a6a1f1dSLionel Sambuc	orr	r5,r5,r11
349*0a6a1f1dSLionel Sambuc	ldr	r11,[sp,#16+4]	@ c.hi
350*0a6a1f1dSLionel Sambuc	and	r5,r5,r12
351*0a6a1f1dSLionel Sambuc	and	r12,r6,r10
352*0a6a1f1dSLionel Sambuc	orr	r6,r6,r10
353*0a6a1f1dSLionel Sambuc	orr	r5,r5,r9		@ Maj(a,b,c).lo
354*0a6a1f1dSLionel Sambuc	and	r6,r6,r11
355*0a6a1f1dSLionel Sambuc	adds	r5,r5,r3
356*0a6a1f1dSLionel Sambuc	orr	r6,r6,r12		@ Maj(a,b,c).hi
357*0a6a1f1dSLionel Sambuc	sub	sp,sp,#8
358*0a6a1f1dSLionel Sambuc	adc	r6,r6,r4		@ h += T
359*0a6a1f1dSLionel Sambuc	tst	r14,#1
360*0a6a1f1dSLionel Sambuc	add	r14,r14,#8
361*0a6a1f1dSLionel Sambuc	ldreq	r9,[sp,#184+0]
362*0a6a1f1dSLionel Sambuc	ldreq	r10,[sp,#184+4]
363*0a6a1f1dSLionel Sambuc	beq	.L16_79
364*0a6a1f1dSLionel Sambuc	bic	r14,r14,#1
365*0a6a1f1dSLionel Sambuc
366*0a6a1f1dSLionel Sambuc	ldr	r3,[sp,#8+0]
367*0a6a1f1dSLionel Sambuc	ldr	r4,[sp,#8+4]
368*0a6a1f1dSLionel Sambuc	ldr	r9, [r0,#0+LO]
369*0a6a1f1dSLionel Sambuc	ldr	r10, [r0,#0+HI]
370*0a6a1f1dSLionel Sambuc	ldr	r11, [r0,#8+LO]
371*0a6a1f1dSLionel Sambuc	ldr	r12, [r0,#8+HI]
372*0a6a1f1dSLionel Sambuc	adds	r9,r5,r9
373*0a6a1f1dSLionel Sambuc	str	r9, [r0,#0+LO]
374*0a6a1f1dSLionel Sambuc	adc	r10,r6,r10
375*0a6a1f1dSLionel Sambuc	str	r10, [r0,#0+HI]
376*0a6a1f1dSLionel Sambuc	adds	r11,r3,r11
377*0a6a1f1dSLionel Sambuc	str	r11, [r0,#8+LO]
378*0a6a1f1dSLionel Sambuc	adc	r12,r4,r12
379*0a6a1f1dSLionel Sambuc	str	r12, [r0,#8+HI]
380*0a6a1f1dSLionel Sambuc
381*0a6a1f1dSLionel Sambuc	ldr	r5,[sp,#16+0]
382*0a6a1f1dSLionel Sambuc	ldr	r6,[sp,#16+4]
383*0a6a1f1dSLionel Sambuc	ldr	r3,[sp,#24+0]
384*0a6a1f1dSLionel Sambuc	ldr	r4,[sp,#24+4]
385*0a6a1f1dSLionel Sambuc	ldr	r9, [r0,#16+LO]
386*0a6a1f1dSLionel Sambuc	ldr	r10, [r0,#16+HI]
387*0a6a1f1dSLionel Sambuc	ldr	r11, [r0,#24+LO]
388*0a6a1f1dSLionel Sambuc	ldr	r12, [r0,#24+HI]
389*0a6a1f1dSLionel Sambuc	adds	r9,r5,r9
390*0a6a1f1dSLionel Sambuc	str	r9, [r0,#16+LO]
391*0a6a1f1dSLionel Sambuc	adc	r10,r6,r10
392*0a6a1f1dSLionel Sambuc	str	r10, [r0,#16+HI]
393*0a6a1f1dSLionel Sambuc	adds	r11,r3,r11
394*0a6a1f1dSLionel Sambuc	str	r11, [r0,#24+LO]
395*0a6a1f1dSLionel Sambuc	adc	r12,r4,r12
396*0a6a1f1dSLionel Sambuc	str	r12, [r0,#24+HI]
397*0a6a1f1dSLionel Sambuc
398*0a6a1f1dSLionel Sambuc	ldr	r3,[sp,#40+0]
399*0a6a1f1dSLionel Sambuc	ldr	r4,[sp,#40+4]
400*0a6a1f1dSLionel Sambuc	ldr	r9, [r0,#32+LO]
401*0a6a1f1dSLionel Sambuc	ldr	r10, [r0,#32+HI]
402*0a6a1f1dSLionel Sambuc	ldr	r11, [r0,#40+LO]
403*0a6a1f1dSLionel Sambuc	ldr	r12, [r0,#40+HI]
404*0a6a1f1dSLionel Sambuc	adds	r7,r7,r9
405*0a6a1f1dSLionel Sambuc	str	r7,[r0,#32+LO]
406*0a6a1f1dSLionel Sambuc	adc	r8,r8,r10
407*0a6a1f1dSLionel Sambuc	str	r8,[r0,#32+HI]
408*0a6a1f1dSLionel Sambuc	adds	r11,r3,r11
409*0a6a1f1dSLionel Sambuc	str	r11, [r0,#40+LO]
410*0a6a1f1dSLionel Sambuc	adc	r12,r4,r12
411*0a6a1f1dSLionel Sambuc	str	r12, [r0,#40+HI]
412*0a6a1f1dSLionel Sambuc
413*0a6a1f1dSLionel Sambuc	ldr	r5,[sp,#48+0]
414*0a6a1f1dSLionel Sambuc	ldr	r6,[sp,#48+4]
415*0a6a1f1dSLionel Sambuc	ldr	r3,[sp,#56+0]
416*0a6a1f1dSLionel Sambuc	ldr	r4,[sp,#56+4]
417*0a6a1f1dSLionel Sambuc	ldr	r9, [r0,#48+LO]
418*0a6a1f1dSLionel Sambuc	ldr	r10, [r0,#48+HI]
419*0a6a1f1dSLionel Sambuc	ldr	r11, [r0,#56+LO]
420*0a6a1f1dSLionel Sambuc	ldr	r12, [r0,#56+HI]
421*0a6a1f1dSLionel Sambuc	adds	r9,r5,r9
422*0a6a1f1dSLionel Sambuc	str	r9, [r0,#48+LO]
423*0a6a1f1dSLionel Sambuc	adc	r10,r6,r10
424*0a6a1f1dSLionel Sambuc	str	r10, [r0,#48+HI]
425*0a6a1f1dSLionel Sambuc	adds	r11,r3,r11
426*0a6a1f1dSLionel Sambuc	str	r11, [r0,#56+LO]
427*0a6a1f1dSLionel Sambuc	adc	r12,r4,r12
428*0a6a1f1dSLionel Sambuc	str	r12, [r0,#56+HI]
429*0a6a1f1dSLionel Sambuc
430*0a6a1f1dSLionel Sambuc	add	sp,sp,#640
431*0a6a1f1dSLionel Sambuc	sub	r14,r14,#640
432*0a6a1f1dSLionel Sambuc
433*0a6a1f1dSLionel Sambuc	teq	r1,r2
434*0a6a1f1dSLionel Sambuc	bne	.Loop
435*0a6a1f1dSLionel Sambuc
436*0a6a1f1dSLionel Sambuc	add	sp,sp,#8*9		@ destroy frame
437*0a6a1f1dSLionel Sambuc#if __ARM_ARCH__>=5
438*0a6a1f1dSLionel Sambuc	ldmia	sp!,{r4-r12,pc}
439*0a6a1f1dSLionel Sambuc#else
440*0a6a1f1dSLionel Sambuc	ldmia	sp!,{r4-r12,lr}
441*0a6a1f1dSLionel Sambuc	tst	lr,#1
442*0a6a1f1dSLionel Sambuc	moveq	pc,lr			@ be binary compatible with V4, yet
443*0a6a1f1dSLionel Sambuc	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
444*0a6a1f1dSLionel Sambuc#endif
445*0a6a1f1dSLionel Sambuc#if __ARM_MAX_ARCH__>=7
446*0a6a1f1dSLionel Sambuc.arch	armv7-a
447*0a6a1f1dSLionel Sambuc.fpu	neon
448*0a6a1f1dSLionel Sambuc
449*0a6a1f1dSLionel Sambuc.align	4
450*0a6a1f1dSLionel Sambuc.LNEON:
451*0a6a1f1dSLionel Sambuc	dmb				@ errata #451034 on early Cortex A8
452*0a6a1f1dSLionel Sambuc	vstmdb	sp!,{d8-d15}		@ ABI specification says so
453*0a6a1f1dSLionel Sambuc	sub	r3,r3,#672		@ K512
454*0a6a1f1dSLionel Sambuc	vldmia	r0,{d16-d23}		@ load context
455*0a6a1f1dSLionel Sambuc.Loop_neon:
456*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d20,#14	@ 0
457*0a6a1f1dSLionel Sambuc#if 0<16
458*0a6a1f1dSLionel Sambuc	vld1.64		{d0},[r1]!	@ handles unaligned
459*0a6a1f1dSLionel Sambuc#endif
460*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d20,#18
461*0a6a1f1dSLionel Sambuc#if 0>0
462*0a6a1f1dSLionel Sambuc	 vadd.i64	d16,d30			@ h+=Maj from the past
463*0a6a1f1dSLionel Sambuc#endif
464*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d20,#41
465*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
466*0a6a1f1dSLionel Sambuc	vsli.64		d24,d20,#50
467*0a6a1f1dSLionel Sambuc	vsli.64		d25,d20,#46
468*0a6a1f1dSLionel Sambuc	vmov		d29,d20
469*0a6a1f1dSLionel Sambuc	vsli.64		d26,d20,#23
470*0a6a1f1dSLionel Sambuc#if 0<16 && defined(__ARMEL__)
471*0a6a1f1dSLionel Sambuc	vrev64.8	d0,d0
472*0a6a1f1dSLionel Sambuc#endif
473*0a6a1f1dSLionel Sambuc	veor		d25,d24
474*0a6a1f1dSLionel Sambuc	vbsl		d29,d21,d22		@ Ch(e,f,g)
475*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d16,#28
476*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
477*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d23
478*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d16,#34
479*0a6a1f1dSLionel Sambuc	vsli.64		d24,d16,#36
480*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
481*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d16,#39
482*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d0
483*0a6a1f1dSLionel Sambuc	vsli.64		d25,d16,#30
484*0a6a1f1dSLionel Sambuc	veor		d30,d16,d17
485*0a6a1f1dSLionel Sambuc	vsli.64		d26,d16,#25
486*0a6a1f1dSLionel Sambuc	veor		d23,d24,d25
487*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
488*0a6a1f1dSLionel Sambuc	vbsl		d30,d18,d17		@ Maj(a,b,c)
489*0a6a1f1dSLionel Sambuc	veor		d23,d26			@ Sigma0(a)
490*0a6a1f1dSLionel Sambuc	vadd.i64	d19,d27
491*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
492*0a6a1f1dSLionel Sambuc	@ vadd.i64	d23,d30
493*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d19,#14	@ 1
494*0a6a1f1dSLionel Sambuc#if 1<16
495*0a6a1f1dSLionel Sambuc	vld1.64		{d1},[r1]!	@ handles unaligned
496*0a6a1f1dSLionel Sambuc#endif
497*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d19,#18
498*0a6a1f1dSLionel Sambuc#if 1>0
499*0a6a1f1dSLionel Sambuc	 vadd.i64	d23,d30			@ h+=Maj from the past
500*0a6a1f1dSLionel Sambuc#endif
501*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d19,#41
502*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
503*0a6a1f1dSLionel Sambuc	vsli.64		d24,d19,#50
504*0a6a1f1dSLionel Sambuc	vsli.64		d25,d19,#46
505*0a6a1f1dSLionel Sambuc	vmov		d29,d19
506*0a6a1f1dSLionel Sambuc	vsli.64		d26,d19,#23
507*0a6a1f1dSLionel Sambuc#if 1<16 && defined(__ARMEL__)
508*0a6a1f1dSLionel Sambuc	vrev64.8	d1,d1
509*0a6a1f1dSLionel Sambuc#endif
510*0a6a1f1dSLionel Sambuc	veor		d25,d24
511*0a6a1f1dSLionel Sambuc	vbsl		d29,d20,d21		@ Ch(e,f,g)
512*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d23,#28
513*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
514*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d22
515*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d23,#34
516*0a6a1f1dSLionel Sambuc	vsli.64		d24,d23,#36
517*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
518*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d23,#39
519*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d1
520*0a6a1f1dSLionel Sambuc	vsli.64		d25,d23,#30
521*0a6a1f1dSLionel Sambuc	veor		d30,d23,d16
522*0a6a1f1dSLionel Sambuc	vsli.64		d26,d23,#25
523*0a6a1f1dSLionel Sambuc	veor		d22,d24,d25
524*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
525*0a6a1f1dSLionel Sambuc	vbsl		d30,d17,d16		@ Maj(a,b,c)
526*0a6a1f1dSLionel Sambuc	veor		d22,d26			@ Sigma0(a)
527*0a6a1f1dSLionel Sambuc	vadd.i64	d18,d27
528*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
529*0a6a1f1dSLionel Sambuc	@ vadd.i64	d22,d30
530*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d18,#14	@ 2
531*0a6a1f1dSLionel Sambuc#if 2<16
532*0a6a1f1dSLionel Sambuc	vld1.64		{d2},[r1]!	@ handles unaligned
533*0a6a1f1dSLionel Sambuc#endif
534*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d18,#18
535*0a6a1f1dSLionel Sambuc#if 2>0
536*0a6a1f1dSLionel Sambuc	 vadd.i64	d22,d30			@ h+=Maj from the past
537*0a6a1f1dSLionel Sambuc#endif
538*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d18,#41
539*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
540*0a6a1f1dSLionel Sambuc	vsli.64		d24,d18,#50
541*0a6a1f1dSLionel Sambuc	vsli.64		d25,d18,#46
542*0a6a1f1dSLionel Sambuc	vmov		d29,d18
543*0a6a1f1dSLionel Sambuc	vsli.64		d26,d18,#23
544*0a6a1f1dSLionel Sambuc#if 2<16 && defined(__ARMEL__)
545*0a6a1f1dSLionel Sambuc	vrev64.8	d2,d2
546*0a6a1f1dSLionel Sambuc#endif
547*0a6a1f1dSLionel Sambuc	veor		d25,d24
548*0a6a1f1dSLionel Sambuc	vbsl		d29,d19,d20		@ Ch(e,f,g)
549*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d22,#28
550*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
551*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d21
552*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d22,#34
553*0a6a1f1dSLionel Sambuc	vsli.64		d24,d22,#36
554*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
555*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d22,#39
556*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d2
557*0a6a1f1dSLionel Sambuc	vsli.64		d25,d22,#30
558*0a6a1f1dSLionel Sambuc	veor		d30,d22,d23
559*0a6a1f1dSLionel Sambuc	vsli.64		d26,d22,#25
560*0a6a1f1dSLionel Sambuc	veor		d21,d24,d25
561*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
562*0a6a1f1dSLionel Sambuc	vbsl		d30,d16,d23		@ Maj(a,b,c)
563*0a6a1f1dSLionel Sambuc	veor		d21,d26			@ Sigma0(a)
564*0a6a1f1dSLionel Sambuc	vadd.i64	d17,d27
565*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
566*0a6a1f1dSLionel Sambuc	@ vadd.i64	d21,d30
567*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d17,#14	@ 3
568*0a6a1f1dSLionel Sambuc#if 3<16
569*0a6a1f1dSLionel Sambuc	vld1.64		{d3},[r1]!	@ handles unaligned
570*0a6a1f1dSLionel Sambuc#endif
571*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d17,#18
572*0a6a1f1dSLionel Sambuc#if 3>0
573*0a6a1f1dSLionel Sambuc	 vadd.i64	d21,d30			@ h+=Maj from the past
574*0a6a1f1dSLionel Sambuc#endif
575*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d17,#41
576*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
577*0a6a1f1dSLionel Sambuc	vsli.64		d24,d17,#50
578*0a6a1f1dSLionel Sambuc	vsli.64		d25,d17,#46
579*0a6a1f1dSLionel Sambuc	vmov		d29,d17
580*0a6a1f1dSLionel Sambuc	vsli.64		d26,d17,#23
581*0a6a1f1dSLionel Sambuc#if 3<16 && defined(__ARMEL__)
582*0a6a1f1dSLionel Sambuc	vrev64.8	d3,d3
583*0a6a1f1dSLionel Sambuc#endif
584*0a6a1f1dSLionel Sambuc	veor		d25,d24
585*0a6a1f1dSLionel Sambuc	vbsl		d29,d18,d19		@ Ch(e,f,g)
586*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d21,#28
587*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
588*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d20
589*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d21,#34
590*0a6a1f1dSLionel Sambuc	vsli.64		d24,d21,#36
591*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
592*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d21,#39
593*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d3
594*0a6a1f1dSLionel Sambuc	vsli.64		d25,d21,#30
595*0a6a1f1dSLionel Sambuc	veor		d30,d21,d22
596*0a6a1f1dSLionel Sambuc	vsli.64		d26,d21,#25
597*0a6a1f1dSLionel Sambuc	veor		d20,d24,d25
598*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
599*0a6a1f1dSLionel Sambuc	vbsl		d30,d23,d22		@ Maj(a,b,c)
600*0a6a1f1dSLionel Sambuc	veor		d20,d26			@ Sigma0(a)
601*0a6a1f1dSLionel Sambuc	vadd.i64	d16,d27
602*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
603*0a6a1f1dSLionel Sambuc	@ vadd.i64	d20,d30
604*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d16,#14	@ 4
605*0a6a1f1dSLionel Sambuc#if 4<16
606*0a6a1f1dSLionel Sambuc	vld1.64		{d4},[r1]!	@ handles unaligned
607*0a6a1f1dSLionel Sambuc#endif
608*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d16,#18
609*0a6a1f1dSLionel Sambuc#if 4>0
610*0a6a1f1dSLionel Sambuc	 vadd.i64	d20,d30			@ h+=Maj from the past
611*0a6a1f1dSLionel Sambuc#endif
612*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d16,#41
613*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
614*0a6a1f1dSLionel Sambuc	vsli.64		d24,d16,#50
615*0a6a1f1dSLionel Sambuc	vsli.64		d25,d16,#46
616*0a6a1f1dSLionel Sambuc	vmov		d29,d16
617*0a6a1f1dSLionel Sambuc	vsli.64		d26,d16,#23
618*0a6a1f1dSLionel Sambuc#if 4<16 && defined(__ARMEL__)
619*0a6a1f1dSLionel Sambuc	vrev64.8	d4,d4
620*0a6a1f1dSLionel Sambuc#endif
621*0a6a1f1dSLionel Sambuc	veor		d25,d24
622*0a6a1f1dSLionel Sambuc	vbsl		d29,d17,d18		@ Ch(e,f,g)
623*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d20,#28
624*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
625*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d19
626*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d20,#34
627*0a6a1f1dSLionel Sambuc	vsli.64		d24,d20,#36
628*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
629*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d20,#39
630*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d4
631*0a6a1f1dSLionel Sambuc	vsli.64		d25,d20,#30
632*0a6a1f1dSLionel Sambuc	veor		d30,d20,d21
633*0a6a1f1dSLionel Sambuc	vsli.64		d26,d20,#25
634*0a6a1f1dSLionel Sambuc	veor		d19,d24,d25
635*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
636*0a6a1f1dSLionel Sambuc	vbsl		d30,d22,d21		@ Maj(a,b,c)
637*0a6a1f1dSLionel Sambuc	veor		d19,d26			@ Sigma0(a)
638*0a6a1f1dSLionel Sambuc	vadd.i64	d23,d27
639*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
640*0a6a1f1dSLionel Sambuc	@ vadd.i64	d19,d30
641*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d23,#14	@ 5
642*0a6a1f1dSLionel Sambuc#if 5<16
643*0a6a1f1dSLionel Sambuc	vld1.64		{d5},[r1]!	@ handles unaligned
644*0a6a1f1dSLionel Sambuc#endif
645*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d23,#18
646*0a6a1f1dSLionel Sambuc#if 5>0
647*0a6a1f1dSLionel Sambuc	 vadd.i64	d19,d30			@ h+=Maj from the past
648*0a6a1f1dSLionel Sambuc#endif
649*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d23,#41
650*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
651*0a6a1f1dSLionel Sambuc	vsli.64		d24,d23,#50
652*0a6a1f1dSLionel Sambuc	vsli.64		d25,d23,#46
653*0a6a1f1dSLionel Sambuc	vmov		d29,d23
654*0a6a1f1dSLionel Sambuc	vsli.64		d26,d23,#23
655*0a6a1f1dSLionel Sambuc#if 5<16 && defined(__ARMEL__)
656*0a6a1f1dSLionel Sambuc	vrev64.8	d5,d5
657*0a6a1f1dSLionel Sambuc#endif
658*0a6a1f1dSLionel Sambuc	veor		d25,d24
659*0a6a1f1dSLionel Sambuc	vbsl		d29,d16,d17		@ Ch(e,f,g)
660*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d19,#28
661*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
662*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d18
663*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d19,#34
664*0a6a1f1dSLionel Sambuc	vsli.64		d24,d19,#36
665*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
666*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d19,#39
667*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d5
668*0a6a1f1dSLionel Sambuc	vsli.64		d25,d19,#30
669*0a6a1f1dSLionel Sambuc	veor		d30,d19,d20
670*0a6a1f1dSLionel Sambuc	vsli.64		d26,d19,#25
671*0a6a1f1dSLionel Sambuc	veor		d18,d24,d25
672*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
673*0a6a1f1dSLionel Sambuc	vbsl		d30,d21,d20		@ Maj(a,b,c)
674*0a6a1f1dSLionel Sambuc	veor		d18,d26			@ Sigma0(a)
675*0a6a1f1dSLionel Sambuc	vadd.i64	d22,d27
676*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
677*0a6a1f1dSLionel Sambuc	@ vadd.i64	d18,d30
678*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d22,#14	@ 6
679*0a6a1f1dSLionel Sambuc#if 6<16
680*0a6a1f1dSLionel Sambuc	vld1.64		{d6},[r1]!	@ handles unaligned
681*0a6a1f1dSLionel Sambuc#endif
682*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d22,#18
683*0a6a1f1dSLionel Sambuc#if 6>0
684*0a6a1f1dSLionel Sambuc	 vadd.i64	d18,d30			@ h+=Maj from the past
685*0a6a1f1dSLionel Sambuc#endif
686*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d22,#41
687*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
688*0a6a1f1dSLionel Sambuc	vsli.64		d24,d22,#50
689*0a6a1f1dSLionel Sambuc	vsli.64		d25,d22,#46
690*0a6a1f1dSLionel Sambuc	vmov		d29,d22
691*0a6a1f1dSLionel Sambuc	vsli.64		d26,d22,#23
692*0a6a1f1dSLionel Sambuc#if 6<16 && defined(__ARMEL__)
693*0a6a1f1dSLionel Sambuc	vrev64.8	d6,d6
694*0a6a1f1dSLionel Sambuc#endif
695*0a6a1f1dSLionel Sambuc	veor		d25,d24
696*0a6a1f1dSLionel Sambuc	vbsl		d29,d23,d16		@ Ch(e,f,g)
697*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d18,#28
698*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
699*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d17
700*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d18,#34
701*0a6a1f1dSLionel Sambuc	vsli.64		d24,d18,#36
702*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
703*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d18,#39
704*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d6
705*0a6a1f1dSLionel Sambuc	vsli.64		d25,d18,#30
706*0a6a1f1dSLionel Sambuc	veor		d30,d18,d19
707*0a6a1f1dSLionel Sambuc	vsli.64		d26,d18,#25
708*0a6a1f1dSLionel Sambuc	veor		d17,d24,d25
709*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
710*0a6a1f1dSLionel Sambuc	vbsl		d30,d20,d19		@ Maj(a,b,c)
711*0a6a1f1dSLionel Sambuc	veor		d17,d26			@ Sigma0(a)
712*0a6a1f1dSLionel Sambuc	vadd.i64	d21,d27
713*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
714*0a6a1f1dSLionel Sambuc	@ vadd.i64	d17,d30
715*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d21,#14	@ 7
716*0a6a1f1dSLionel Sambuc#if 7<16
717*0a6a1f1dSLionel Sambuc	vld1.64		{d7},[r1]!	@ handles unaligned
718*0a6a1f1dSLionel Sambuc#endif
719*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d21,#18
720*0a6a1f1dSLionel Sambuc#if 7>0
721*0a6a1f1dSLionel Sambuc	 vadd.i64	d17,d30			@ h+=Maj from the past
722*0a6a1f1dSLionel Sambuc#endif
723*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d21,#41
724*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
725*0a6a1f1dSLionel Sambuc	vsli.64		d24,d21,#50
726*0a6a1f1dSLionel Sambuc	vsli.64		d25,d21,#46
727*0a6a1f1dSLionel Sambuc	vmov		d29,d21
728*0a6a1f1dSLionel Sambuc	vsli.64		d26,d21,#23
729*0a6a1f1dSLionel Sambuc#if 7<16 && defined(__ARMEL__)
730*0a6a1f1dSLionel Sambuc	vrev64.8	d7,d7
731*0a6a1f1dSLionel Sambuc#endif
732*0a6a1f1dSLionel Sambuc	veor		d25,d24
733*0a6a1f1dSLionel Sambuc	vbsl		d29,d22,d23		@ Ch(e,f,g)
734*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d17,#28
735*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
736*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d16
737*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d17,#34
738*0a6a1f1dSLionel Sambuc	vsli.64		d24,d17,#36
739*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
740*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d17,#39
741*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d7
742*0a6a1f1dSLionel Sambuc	vsli.64		d25,d17,#30
743*0a6a1f1dSLionel Sambuc	veor		d30,d17,d18
744*0a6a1f1dSLionel Sambuc	vsli.64		d26,d17,#25
745*0a6a1f1dSLionel Sambuc	veor		d16,d24,d25
746*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
747*0a6a1f1dSLionel Sambuc	vbsl		d30,d19,d18		@ Maj(a,b,c)
748*0a6a1f1dSLionel Sambuc	veor		d16,d26			@ Sigma0(a)
749*0a6a1f1dSLionel Sambuc	vadd.i64	d20,d27
750*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
751*0a6a1f1dSLionel Sambuc	@ vadd.i64	d16,d30
752*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d20,#14	@ 8
753*0a6a1f1dSLionel Sambuc#if 8<16
754*0a6a1f1dSLionel Sambuc	vld1.64		{d8},[r1]!	@ handles unaligned
755*0a6a1f1dSLionel Sambuc#endif
756*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d20,#18
757*0a6a1f1dSLionel Sambuc#if 8>0
758*0a6a1f1dSLionel Sambuc	 vadd.i64	d16,d30			@ h+=Maj from the past
759*0a6a1f1dSLionel Sambuc#endif
760*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d20,#41
761*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
762*0a6a1f1dSLionel Sambuc	vsli.64		d24,d20,#50
763*0a6a1f1dSLionel Sambuc	vsli.64		d25,d20,#46
764*0a6a1f1dSLionel Sambuc	vmov		d29,d20
765*0a6a1f1dSLionel Sambuc	vsli.64		d26,d20,#23
766*0a6a1f1dSLionel Sambuc#if 8<16 && defined(__ARMEL__)
767*0a6a1f1dSLionel Sambuc	vrev64.8	d8,d8
768*0a6a1f1dSLionel Sambuc#endif
769*0a6a1f1dSLionel Sambuc	veor		d25,d24
770*0a6a1f1dSLionel Sambuc	vbsl		d29,d21,d22		@ Ch(e,f,g)
771*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d16,#28
772*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
773*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d23
774*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d16,#34
775*0a6a1f1dSLionel Sambuc	vsli.64		d24,d16,#36
776*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
777*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d16,#39
778*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d8
779*0a6a1f1dSLionel Sambuc	vsli.64		d25,d16,#30
780*0a6a1f1dSLionel Sambuc	veor		d30,d16,d17
781*0a6a1f1dSLionel Sambuc	vsli.64		d26,d16,#25
782*0a6a1f1dSLionel Sambuc	veor		d23,d24,d25
783*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
784*0a6a1f1dSLionel Sambuc	vbsl		d30,d18,d17		@ Maj(a,b,c)
785*0a6a1f1dSLionel Sambuc	veor		d23,d26			@ Sigma0(a)
786*0a6a1f1dSLionel Sambuc	vadd.i64	d19,d27
787*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
788*0a6a1f1dSLionel Sambuc	@ vadd.i64	d23,d30
789*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d19,#14	@ 9
790*0a6a1f1dSLionel Sambuc#if 9<16
791*0a6a1f1dSLionel Sambuc	vld1.64		{d9},[r1]!	@ handles unaligned
792*0a6a1f1dSLionel Sambuc#endif
793*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d19,#18
794*0a6a1f1dSLionel Sambuc#if 9>0
795*0a6a1f1dSLionel Sambuc	 vadd.i64	d23,d30			@ h+=Maj from the past
796*0a6a1f1dSLionel Sambuc#endif
797*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d19,#41
798*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
799*0a6a1f1dSLionel Sambuc	vsli.64		d24,d19,#50
800*0a6a1f1dSLionel Sambuc	vsli.64		d25,d19,#46
801*0a6a1f1dSLionel Sambuc	vmov		d29,d19
802*0a6a1f1dSLionel Sambuc	vsli.64		d26,d19,#23
803*0a6a1f1dSLionel Sambuc#if 9<16 && defined(__ARMEL__)
804*0a6a1f1dSLionel Sambuc	vrev64.8	d9,d9
805*0a6a1f1dSLionel Sambuc#endif
806*0a6a1f1dSLionel Sambuc	veor		d25,d24
807*0a6a1f1dSLionel Sambuc	vbsl		d29,d20,d21		@ Ch(e,f,g)
808*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d23,#28
809*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
810*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d22
811*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d23,#34
812*0a6a1f1dSLionel Sambuc	vsli.64		d24,d23,#36
813*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
814*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d23,#39
815*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d9
816*0a6a1f1dSLionel Sambuc	vsli.64		d25,d23,#30
817*0a6a1f1dSLionel Sambuc	veor		d30,d23,d16
818*0a6a1f1dSLionel Sambuc	vsli.64		d26,d23,#25
819*0a6a1f1dSLionel Sambuc	veor		d22,d24,d25
820*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
821*0a6a1f1dSLionel Sambuc	vbsl		d30,d17,d16		@ Maj(a,b,c)
822*0a6a1f1dSLionel Sambuc	veor		d22,d26			@ Sigma0(a)
823*0a6a1f1dSLionel Sambuc	vadd.i64	d18,d27
824*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
825*0a6a1f1dSLionel Sambuc	@ vadd.i64	d22,d30
826*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d18,#14	@ 10
827*0a6a1f1dSLionel Sambuc#if 10<16
828*0a6a1f1dSLionel Sambuc	vld1.64		{d10},[r1]!	@ handles unaligned
829*0a6a1f1dSLionel Sambuc#endif
830*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d18,#18
831*0a6a1f1dSLionel Sambuc#if 10>0
832*0a6a1f1dSLionel Sambuc	 vadd.i64	d22,d30			@ h+=Maj from the past
833*0a6a1f1dSLionel Sambuc#endif
834*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d18,#41
835*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
836*0a6a1f1dSLionel Sambuc	vsli.64		d24,d18,#50
837*0a6a1f1dSLionel Sambuc	vsli.64		d25,d18,#46
838*0a6a1f1dSLionel Sambuc	vmov		d29,d18
839*0a6a1f1dSLionel Sambuc	vsli.64		d26,d18,#23
840*0a6a1f1dSLionel Sambuc#if 10<16 && defined(__ARMEL__)
841*0a6a1f1dSLionel Sambuc	vrev64.8	d10,d10
842*0a6a1f1dSLionel Sambuc#endif
843*0a6a1f1dSLionel Sambuc	veor		d25,d24
844*0a6a1f1dSLionel Sambuc	vbsl		d29,d19,d20		@ Ch(e,f,g)
845*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d22,#28
846*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
847*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d21
848*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d22,#34
849*0a6a1f1dSLionel Sambuc	vsli.64		d24,d22,#36
850*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
851*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d22,#39
852*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d10
853*0a6a1f1dSLionel Sambuc	vsli.64		d25,d22,#30
854*0a6a1f1dSLionel Sambuc	veor		d30,d22,d23
855*0a6a1f1dSLionel Sambuc	vsli.64		d26,d22,#25
856*0a6a1f1dSLionel Sambuc	veor		d21,d24,d25
857*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
858*0a6a1f1dSLionel Sambuc	vbsl		d30,d16,d23		@ Maj(a,b,c)
859*0a6a1f1dSLionel Sambuc	veor		d21,d26			@ Sigma0(a)
860*0a6a1f1dSLionel Sambuc	vadd.i64	d17,d27
861*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
862*0a6a1f1dSLionel Sambuc	@ vadd.i64	d21,d30
863*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d17,#14	@ 11
864*0a6a1f1dSLionel Sambuc#if 11<16
865*0a6a1f1dSLionel Sambuc	vld1.64		{d11},[r1]!	@ handles unaligned
866*0a6a1f1dSLionel Sambuc#endif
867*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d17,#18
868*0a6a1f1dSLionel Sambuc#if 11>0
869*0a6a1f1dSLionel Sambuc	 vadd.i64	d21,d30			@ h+=Maj from the past
870*0a6a1f1dSLionel Sambuc#endif
871*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d17,#41
872*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
873*0a6a1f1dSLionel Sambuc	vsli.64		d24,d17,#50
874*0a6a1f1dSLionel Sambuc	vsli.64		d25,d17,#46
875*0a6a1f1dSLionel Sambuc	vmov		d29,d17
876*0a6a1f1dSLionel Sambuc	vsli.64		d26,d17,#23
877*0a6a1f1dSLionel Sambuc#if 11<16 && defined(__ARMEL__)
878*0a6a1f1dSLionel Sambuc	vrev64.8	d11,d11
879*0a6a1f1dSLionel Sambuc#endif
880*0a6a1f1dSLionel Sambuc	veor		d25,d24
881*0a6a1f1dSLionel Sambuc	vbsl		d29,d18,d19		@ Ch(e,f,g)
882*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d21,#28
883*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
884*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d20
885*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d21,#34
886*0a6a1f1dSLionel Sambuc	vsli.64		d24,d21,#36
887*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
888*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d21,#39
889*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d11
890*0a6a1f1dSLionel Sambuc	vsli.64		d25,d21,#30
891*0a6a1f1dSLionel Sambuc	veor		d30,d21,d22
892*0a6a1f1dSLionel Sambuc	vsli.64		d26,d21,#25
893*0a6a1f1dSLionel Sambuc	veor		d20,d24,d25
894*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
895*0a6a1f1dSLionel Sambuc	vbsl		d30,d23,d22		@ Maj(a,b,c)
896*0a6a1f1dSLionel Sambuc	veor		d20,d26			@ Sigma0(a)
897*0a6a1f1dSLionel Sambuc	vadd.i64	d16,d27
898*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
899*0a6a1f1dSLionel Sambuc	@ vadd.i64	d20,d30
900*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d16,#14	@ 12
901*0a6a1f1dSLionel Sambuc#if 12<16
902*0a6a1f1dSLionel Sambuc	vld1.64		{d12},[r1]!	@ handles unaligned
903*0a6a1f1dSLionel Sambuc#endif
904*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d16,#18
905*0a6a1f1dSLionel Sambuc#if 12>0
906*0a6a1f1dSLionel Sambuc	 vadd.i64	d20,d30			@ h+=Maj from the past
907*0a6a1f1dSLionel Sambuc#endif
908*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d16,#41
909*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
910*0a6a1f1dSLionel Sambuc	vsli.64		d24,d16,#50
911*0a6a1f1dSLionel Sambuc	vsli.64		d25,d16,#46
912*0a6a1f1dSLionel Sambuc	vmov		d29,d16
913*0a6a1f1dSLionel Sambuc	vsli.64		d26,d16,#23
914*0a6a1f1dSLionel Sambuc#if 12<16 && defined(__ARMEL__)
915*0a6a1f1dSLionel Sambuc	vrev64.8	d12,d12
916*0a6a1f1dSLionel Sambuc#endif
917*0a6a1f1dSLionel Sambuc	veor		d25,d24
918*0a6a1f1dSLionel Sambuc	vbsl		d29,d17,d18		@ Ch(e,f,g)
919*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d20,#28
920*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
921*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d19
922*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d20,#34
923*0a6a1f1dSLionel Sambuc	vsli.64		d24,d20,#36
924*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
925*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d20,#39
926*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d12
927*0a6a1f1dSLionel Sambuc	vsli.64		d25,d20,#30
928*0a6a1f1dSLionel Sambuc	veor		d30,d20,d21
929*0a6a1f1dSLionel Sambuc	vsli.64		d26,d20,#25
930*0a6a1f1dSLionel Sambuc	veor		d19,d24,d25
931*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
932*0a6a1f1dSLionel Sambuc	vbsl		d30,d22,d21		@ Maj(a,b,c)
933*0a6a1f1dSLionel Sambuc	veor		d19,d26			@ Sigma0(a)
934*0a6a1f1dSLionel Sambuc	vadd.i64	d23,d27
935*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
936*0a6a1f1dSLionel Sambuc	@ vadd.i64	d19,d30
937*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d23,#14	@ 13
938*0a6a1f1dSLionel Sambuc#if 13<16
939*0a6a1f1dSLionel Sambuc	vld1.64		{d13},[r1]!	@ handles unaligned
940*0a6a1f1dSLionel Sambuc#endif
941*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d23,#18
942*0a6a1f1dSLionel Sambuc#if 13>0
943*0a6a1f1dSLionel Sambuc	 vadd.i64	d19,d30			@ h+=Maj from the past
944*0a6a1f1dSLionel Sambuc#endif
945*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d23,#41
946*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
947*0a6a1f1dSLionel Sambuc	vsli.64		d24,d23,#50
948*0a6a1f1dSLionel Sambuc	vsli.64		d25,d23,#46
949*0a6a1f1dSLionel Sambuc	vmov		d29,d23
950*0a6a1f1dSLionel Sambuc	vsli.64		d26,d23,#23
951*0a6a1f1dSLionel Sambuc#if 13<16 && defined(__ARMEL__)
952*0a6a1f1dSLionel Sambuc	vrev64.8	d13,d13
953*0a6a1f1dSLionel Sambuc#endif
954*0a6a1f1dSLionel Sambuc	veor		d25,d24
955*0a6a1f1dSLionel Sambuc	vbsl		d29,d16,d17		@ Ch(e,f,g)
956*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d19,#28
957*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
958*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d18
959*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d19,#34
960*0a6a1f1dSLionel Sambuc	vsli.64		d24,d19,#36
961*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
962*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d19,#39
963*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d13
964*0a6a1f1dSLionel Sambuc	vsli.64		d25,d19,#30
965*0a6a1f1dSLionel Sambuc	veor		d30,d19,d20
966*0a6a1f1dSLionel Sambuc	vsli.64		d26,d19,#25
967*0a6a1f1dSLionel Sambuc	veor		d18,d24,d25
968*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
969*0a6a1f1dSLionel Sambuc	vbsl		d30,d21,d20		@ Maj(a,b,c)
970*0a6a1f1dSLionel Sambuc	veor		d18,d26			@ Sigma0(a)
971*0a6a1f1dSLionel Sambuc	vadd.i64	d22,d27
972*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
973*0a6a1f1dSLionel Sambuc	@ vadd.i64	d18,d30
974*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d22,#14	@ 14
975*0a6a1f1dSLionel Sambuc#if 14<16
976*0a6a1f1dSLionel Sambuc	vld1.64		{d14},[r1]!	@ handles unaligned
977*0a6a1f1dSLionel Sambuc#endif
978*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d22,#18
979*0a6a1f1dSLionel Sambuc#if 14>0
980*0a6a1f1dSLionel Sambuc	 vadd.i64	d18,d30			@ h+=Maj from the past
981*0a6a1f1dSLionel Sambuc#endif
982*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d22,#41
983*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
984*0a6a1f1dSLionel Sambuc	vsli.64		d24,d22,#50
985*0a6a1f1dSLionel Sambuc	vsli.64		d25,d22,#46
986*0a6a1f1dSLionel Sambuc	vmov		d29,d22
987*0a6a1f1dSLionel Sambuc	vsli.64		d26,d22,#23
988*0a6a1f1dSLionel Sambuc#if 14<16 && defined(__ARMEL__)
989*0a6a1f1dSLionel Sambuc	vrev64.8	d14,d14
990*0a6a1f1dSLionel Sambuc#endif
991*0a6a1f1dSLionel Sambuc	veor		d25,d24
992*0a6a1f1dSLionel Sambuc	vbsl		d29,d23,d16		@ Ch(e,f,g)
993*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d18,#28
994*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
995*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d17
996*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d18,#34
997*0a6a1f1dSLionel Sambuc	vsli.64		d24,d18,#36
998*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
999*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d18,#39
1000*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d14
1001*0a6a1f1dSLionel Sambuc	vsli.64		d25,d18,#30
1002*0a6a1f1dSLionel Sambuc	veor		d30,d18,d19
1003*0a6a1f1dSLionel Sambuc	vsli.64		d26,d18,#25
1004*0a6a1f1dSLionel Sambuc	veor		d17,d24,d25
1005*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1006*0a6a1f1dSLionel Sambuc	vbsl		d30,d20,d19		@ Maj(a,b,c)
1007*0a6a1f1dSLionel Sambuc	veor		d17,d26			@ Sigma0(a)
1008*0a6a1f1dSLionel Sambuc	vadd.i64	d21,d27
1009*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1010*0a6a1f1dSLionel Sambuc	@ vadd.i64	d17,d30
1011*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d21,#14	@ 15
1012*0a6a1f1dSLionel Sambuc#if 15<16
1013*0a6a1f1dSLionel Sambuc	vld1.64		{d15},[r1]!	@ handles unaligned
1014*0a6a1f1dSLionel Sambuc#endif
1015*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d21,#18
1016*0a6a1f1dSLionel Sambuc#if 15>0
1017*0a6a1f1dSLionel Sambuc	 vadd.i64	d17,d30			@ h+=Maj from the past
1018*0a6a1f1dSLionel Sambuc#endif
1019*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d21,#41
1020*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1021*0a6a1f1dSLionel Sambuc	vsli.64		d24,d21,#50
1022*0a6a1f1dSLionel Sambuc	vsli.64		d25,d21,#46
1023*0a6a1f1dSLionel Sambuc	vmov		d29,d21
1024*0a6a1f1dSLionel Sambuc	vsli.64		d26,d21,#23
1025*0a6a1f1dSLionel Sambuc#if 15<16 && defined(__ARMEL__)
1026*0a6a1f1dSLionel Sambuc	vrev64.8	d15,d15
1027*0a6a1f1dSLionel Sambuc#endif
1028*0a6a1f1dSLionel Sambuc	veor		d25,d24
1029*0a6a1f1dSLionel Sambuc	vbsl		d29,d22,d23		@ Ch(e,f,g)
1030*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d17,#28
1031*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1032*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d16
1033*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d17,#34
1034*0a6a1f1dSLionel Sambuc	vsli.64		d24,d17,#36
1035*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1036*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d17,#39
1037*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d15
1038*0a6a1f1dSLionel Sambuc	vsli.64		d25,d17,#30
1039*0a6a1f1dSLionel Sambuc	veor		d30,d17,d18
1040*0a6a1f1dSLionel Sambuc	vsli.64		d26,d17,#25
1041*0a6a1f1dSLionel Sambuc	veor		d16,d24,d25
1042*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1043*0a6a1f1dSLionel Sambuc	vbsl		d30,d19,d18		@ Maj(a,b,c)
1044*0a6a1f1dSLionel Sambuc	veor		d16,d26			@ Sigma0(a)
1045*0a6a1f1dSLionel Sambuc	vadd.i64	d20,d27
1046*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1047*0a6a1f1dSLionel Sambuc	@ vadd.i64	d16,d30
1048*0a6a1f1dSLionel Sambuc	mov		r12,#4
1049*0a6a1f1dSLionel Sambuc.L16_79_neon:
1050*0a6a1f1dSLionel Sambuc	subs		r12,#1
1051*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q7,#19
1052*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q7,#61
1053*0a6a1f1dSLionel Sambuc	 vadd.i64	d16,d30			@ h+=Maj from the past
1054*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q7,#6
1055*0a6a1f1dSLionel Sambuc	vsli.64		q12,q7,#45
1056*0a6a1f1dSLionel Sambuc	vext.8		q14,q0,q1,#8	@ X[i+1]
1057*0a6a1f1dSLionel Sambuc	vsli.64		q13,q7,#3
1058*0a6a1f1dSLionel Sambuc	veor		q15,q12
1059*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q14,#1
1060*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma1(X[i+14])
1061*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q14,#8
1062*0a6a1f1dSLionel Sambuc	vadd.i64	q0,q15
1063*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q14,#7
1064*0a6a1f1dSLionel Sambuc	vsli.64		q12,q14,#63
1065*0a6a1f1dSLionel Sambuc	vsli.64		q13,q14,#56
1066*0a6a1f1dSLionel Sambuc	vext.8		q14,q4,q5,#8	@ X[i+9]
1067*0a6a1f1dSLionel Sambuc	veor		q15,q12
1068*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d20,#14		@ from NEON_00_15
1069*0a6a1f1dSLionel Sambuc	vadd.i64	q0,q14
1070*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d20,#18		@ from NEON_00_15
1071*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma0(X[i+1])
1072*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d20,#41		@ from NEON_00_15
1073*0a6a1f1dSLionel Sambuc	vadd.i64	q0,q15
1074*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1075*0a6a1f1dSLionel Sambuc	vsli.64		d24,d20,#50
1076*0a6a1f1dSLionel Sambuc	vsli.64		d25,d20,#46
1077*0a6a1f1dSLionel Sambuc	vmov		d29,d20
1078*0a6a1f1dSLionel Sambuc	vsli.64		d26,d20,#23
1079*0a6a1f1dSLionel Sambuc#if 16<16 && defined(__ARMEL__)
1080*0a6a1f1dSLionel Sambuc	vrev64.8	,
1081*0a6a1f1dSLionel Sambuc#endif
1082*0a6a1f1dSLionel Sambuc	veor		d25,d24
1083*0a6a1f1dSLionel Sambuc	vbsl		d29,d21,d22		@ Ch(e,f,g)
1084*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d16,#28
1085*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1086*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d23
1087*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d16,#34
1088*0a6a1f1dSLionel Sambuc	vsli.64		d24,d16,#36
1089*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1090*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d16,#39
1091*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d0
1092*0a6a1f1dSLionel Sambuc	vsli.64		d25,d16,#30
1093*0a6a1f1dSLionel Sambuc	veor		d30,d16,d17
1094*0a6a1f1dSLionel Sambuc	vsli.64		d26,d16,#25
1095*0a6a1f1dSLionel Sambuc	veor		d23,d24,d25
1096*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1097*0a6a1f1dSLionel Sambuc	vbsl		d30,d18,d17		@ Maj(a,b,c)
1098*0a6a1f1dSLionel Sambuc	veor		d23,d26			@ Sigma0(a)
1099*0a6a1f1dSLionel Sambuc	vadd.i64	d19,d27
1100*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1101*0a6a1f1dSLionel Sambuc	@ vadd.i64	d23,d30
1102*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d19,#14	@ 17
1103*0a6a1f1dSLionel Sambuc#if 17<16
1104*0a6a1f1dSLionel Sambuc	vld1.64		{d1},[r1]!	@ handles unaligned
1105*0a6a1f1dSLionel Sambuc#endif
1106*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d19,#18
1107*0a6a1f1dSLionel Sambuc#if 17>0
1108*0a6a1f1dSLionel Sambuc	 vadd.i64	d23,d30			@ h+=Maj from the past
1109*0a6a1f1dSLionel Sambuc#endif
1110*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d19,#41
1111*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1112*0a6a1f1dSLionel Sambuc	vsli.64		d24,d19,#50
1113*0a6a1f1dSLionel Sambuc	vsli.64		d25,d19,#46
1114*0a6a1f1dSLionel Sambuc	vmov		d29,d19
1115*0a6a1f1dSLionel Sambuc	vsli.64		d26,d19,#23
1116*0a6a1f1dSLionel Sambuc#if 17<16 && defined(__ARMEL__)
1117*0a6a1f1dSLionel Sambuc	vrev64.8	,
1118*0a6a1f1dSLionel Sambuc#endif
1119*0a6a1f1dSLionel Sambuc	veor		d25,d24
1120*0a6a1f1dSLionel Sambuc	vbsl		d29,d20,d21		@ Ch(e,f,g)
1121*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d23,#28
1122*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1123*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d22
1124*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d23,#34
1125*0a6a1f1dSLionel Sambuc	vsli.64		d24,d23,#36
1126*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1127*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d23,#39
1128*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d1
1129*0a6a1f1dSLionel Sambuc	vsli.64		d25,d23,#30
1130*0a6a1f1dSLionel Sambuc	veor		d30,d23,d16
1131*0a6a1f1dSLionel Sambuc	vsli.64		d26,d23,#25
1132*0a6a1f1dSLionel Sambuc	veor		d22,d24,d25
1133*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1134*0a6a1f1dSLionel Sambuc	vbsl		d30,d17,d16		@ Maj(a,b,c)
1135*0a6a1f1dSLionel Sambuc	veor		d22,d26			@ Sigma0(a)
1136*0a6a1f1dSLionel Sambuc	vadd.i64	d18,d27
1137*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1138*0a6a1f1dSLionel Sambuc	@ vadd.i64	d22,d30
1139*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q0,#19
1140*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q0,#61
1141*0a6a1f1dSLionel Sambuc	 vadd.i64	d22,d30			@ h+=Maj from the past
1142*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q0,#6
1143*0a6a1f1dSLionel Sambuc	vsli.64		q12,q0,#45
1144*0a6a1f1dSLionel Sambuc	vext.8		q14,q1,q2,#8	@ X[i+1]
1145*0a6a1f1dSLionel Sambuc	vsli.64		q13,q0,#3
1146*0a6a1f1dSLionel Sambuc	veor		q15,q12
1147*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q14,#1
1148*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma1(X[i+14])
1149*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q14,#8
1150*0a6a1f1dSLionel Sambuc	vadd.i64	q1,q15
1151*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q14,#7
1152*0a6a1f1dSLionel Sambuc	vsli.64		q12,q14,#63
1153*0a6a1f1dSLionel Sambuc	vsli.64		q13,q14,#56
1154*0a6a1f1dSLionel Sambuc	vext.8		q14,q5,q6,#8	@ X[i+9]
1155*0a6a1f1dSLionel Sambuc	veor		q15,q12
1156*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d18,#14		@ from NEON_00_15
1157*0a6a1f1dSLionel Sambuc	vadd.i64	q1,q14
1158*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d18,#18		@ from NEON_00_15
1159*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma0(X[i+1])
1160*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d18,#41		@ from NEON_00_15
1161*0a6a1f1dSLionel Sambuc	vadd.i64	q1,q15
1162*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1163*0a6a1f1dSLionel Sambuc	vsli.64		d24,d18,#50
1164*0a6a1f1dSLionel Sambuc	vsli.64		d25,d18,#46
1165*0a6a1f1dSLionel Sambuc	vmov		d29,d18
1166*0a6a1f1dSLionel Sambuc	vsli.64		d26,d18,#23
1167*0a6a1f1dSLionel Sambuc#if 18<16 && defined(__ARMEL__)
1168*0a6a1f1dSLionel Sambuc	vrev64.8	,
1169*0a6a1f1dSLionel Sambuc#endif
1170*0a6a1f1dSLionel Sambuc	veor		d25,d24
1171*0a6a1f1dSLionel Sambuc	vbsl		d29,d19,d20		@ Ch(e,f,g)
1172*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d22,#28
1173*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1174*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d21
1175*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d22,#34
1176*0a6a1f1dSLionel Sambuc	vsli.64		d24,d22,#36
1177*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1178*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d22,#39
1179*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d2
1180*0a6a1f1dSLionel Sambuc	vsli.64		d25,d22,#30
1181*0a6a1f1dSLionel Sambuc	veor		d30,d22,d23
1182*0a6a1f1dSLionel Sambuc	vsli.64		d26,d22,#25
1183*0a6a1f1dSLionel Sambuc	veor		d21,d24,d25
1184*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1185*0a6a1f1dSLionel Sambuc	vbsl		d30,d16,d23		@ Maj(a,b,c)
1186*0a6a1f1dSLionel Sambuc	veor		d21,d26			@ Sigma0(a)
1187*0a6a1f1dSLionel Sambuc	vadd.i64	d17,d27
1188*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1189*0a6a1f1dSLionel Sambuc	@ vadd.i64	d21,d30
1190*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d17,#14	@ 19
1191*0a6a1f1dSLionel Sambuc#if 19<16
1192*0a6a1f1dSLionel Sambuc	vld1.64		{d3},[r1]!	@ handles unaligned
1193*0a6a1f1dSLionel Sambuc#endif
1194*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d17,#18
1195*0a6a1f1dSLionel Sambuc#if 19>0
1196*0a6a1f1dSLionel Sambuc	 vadd.i64	d21,d30			@ h+=Maj from the past
1197*0a6a1f1dSLionel Sambuc#endif
1198*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d17,#41
1199*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1200*0a6a1f1dSLionel Sambuc	vsli.64		d24,d17,#50
1201*0a6a1f1dSLionel Sambuc	vsli.64		d25,d17,#46
1202*0a6a1f1dSLionel Sambuc	vmov		d29,d17
1203*0a6a1f1dSLionel Sambuc	vsli.64		d26,d17,#23
1204*0a6a1f1dSLionel Sambuc#if 19<16 && defined(__ARMEL__)
1205*0a6a1f1dSLionel Sambuc	vrev64.8	,
1206*0a6a1f1dSLionel Sambuc#endif
1207*0a6a1f1dSLionel Sambuc	veor		d25,d24
1208*0a6a1f1dSLionel Sambuc	vbsl		d29,d18,d19		@ Ch(e,f,g)
1209*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d21,#28
1210*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1211*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d20
1212*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d21,#34
1213*0a6a1f1dSLionel Sambuc	vsli.64		d24,d21,#36
1214*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1215*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d21,#39
1216*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d3
1217*0a6a1f1dSLionel Sambuc	vsli.64		d25,d21,#30
1218*0a6a1f1dSLionel Sambuc	veor		d30,d21,d22
1219*0a6a1f1dSLionel Sambuc	vsli.64		d26,d21,#25
1220*0a6a1f1dSLionel Sambuc	veor		d20,d24,d25
1221*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1222*0a6a1f1dSLionel Sambuc	vbsl		d30,d23,d22		@ Maj(a,b,c)
1223*0a6a1f1dSLionel Sambuc	veor		d20,d26			@ Sigma0(a)
1224*0a6a1f1dSLionel Sambuc	vadd.i64	d16,d27
1225*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1226*0a6a1f1dSLionel Sambuc	@ vadd.i64	d20,d30
1227*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q1,#19
1228*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q1,#61
1229*0a6a1f1dSLionel Sambuc	 vadd.i64	d20,d30			@ h+=Maj from the past
1230*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q1,#6
1231*0a6a1f1dSLionel Sambuc	vsli.64		q12,q1,#45
1232*0a6a1f1dSLionel Sambuc	vext.8		q14,q2,q3,#8	@ X[i+1]
1233*0a6a1f1dSLionel Sambuc	vsli.64		q13,q1,#3
1234*0a6a1f1dSLionel Sambuc	veor		q15,q12
1235*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q14,#1
1236*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma1(X[i+14])
1237*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q14,#8
1238*0a6a1f1dSLionel Sambuc	vadd.i64	q2,q15
1239*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q14,#7
1240*0a6a1f1dSLionel Sambuc	vsli.64		q12,q14,#63
1241*0a6a1f1dSLionel Sambuc	vsli.64		q13,q14,#56
1242*0a6a1f1dSLionel Sambuc	vext.8		q14,q6,q7,#8	@ X[i+9]
1243*0a6a1f1dSLionel Sambuc	veor		q15,q12
1244*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d16,#14		@ from NEON_00_15
1245*0a6a1f1dSLionel Sambuc	vadd.i64	q2,q14
1246*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d16,#18		@ from NEON_00_15
1247*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma0(X[i+1])
1248*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d16,#41		@ from NEON_00_15
1249*0a6a1f1dSLionel Sambuc	vadd.i64	q2,q15
1250*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1251*0a6a1f1dSLionel Sambuc	vsli.64		d24,d16,#50
1252*0a6a1f1dSLionel Sambuc	vsli.64		d25,d16,#46
1253*0a6a1f1dSLionel Sambuc	vmov		d29,d16
1254*0a6a1f1dSLionel Sambuc	vsli.64		d26,d16,#23
1255*0a6a1f1dSLionel Sambuc#if 20<16 && defined(__ARMEL__)
1256*0a6a1f1dSLionel Sambuc	vrev64.8	,
1257*0a6a1f1dSLionel Sambuc#endif
1258*0a6a1f1dSLionel Sambuc	veor		d25,d24
1259*0a6a1f1dSLionel Sambuc	vbsl		d29,d17,d18		@ Ch(e,f,g)
1260*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d20,#28
1261*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1262*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d19
1263*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d20,#34
1264*0a6a1f1dSLionel Sambuc	vsli.64		d24,d20,#36
1265*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1266*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d20,#39
1267*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d4
1268*0a6a1f1dSLionel Sambuc	vsli.64		d25,d20,#30
1269*0a6a1f1dSLionel Sambuc	veor		d30,d20,d21
1270*0a6a1f1dSLionel Sambuc	vsli.64		d26,d20,#25
1271*0a6a1f1dSLionel Sambuc	veor		d19,d24,d25
1272*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1273*0a6a1f1dSLionel Sambuc	vbsl		d30,d22,d21		@ Maj(a,b,c)
1274*0a6a1f1dSLionel Sambuc	veor		d19,d26			@ Sigma0(a)
1275*0a6a1f1dSLionel Sambuc	vadd.i64	d23,d27
1276*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1277*0a6a1f1dSLionel Sambuc	@ vadd.i64	d19,d30
1278*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d23,#14	@ 21
1279*0a6a1f1dSLionel Sambuc#if 21<16
1280*0a6a1f1dSLionel Sambuc	vld1.64		{d5},[r1]!	@ handles unaligned
1281*0a6a1f1dSLionel Sambuc#endif
1282*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d23,#18
1283*0a6a1f1dSLionel Sambuc#if 21>0
1284*0a6a1f1dSLionel Sambuc	 vadd.i64	d19,d30			@ h+=Maj from the past
1285*0a6a1f1dSLionel Sambuc#endif
1286*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d23,#41
1287*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1288*0a6a1f1dSLionel Sambuc	vsli.64		d24,d23,#50
1289*0a6a1f1dSLionel Sambuc	vsli.64		d25,d23,#46
1290*0a6a1f1dSLionel Sambuc	vmov		d29,d23
1291*0a6a1f1dSLionel Sambuc	vsli.64		d26,d23,#23
1292*0a6a1f1dSLionel Sambuc#if 21<16 && defined(__ARMEL__)
1293*0a6a1f1dSLionel Sambuc	vrev64.8	,
1294*0a6a1f1dSLionel Sambuc#endif
1295*0a6a1f1dSLionel Sambuc	veor		d25,d24
1296*0a6a1f1dSLionel Sambuc	vbsl		d29,d16,d17		@ Ch(e,f,g)
1297*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d19,#28
1298*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1299*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d18
1300*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d19,#34
1301*0a6a1f1dSLionel Sambuc	vsli.64		d24,d19,#36
1302*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1303*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d19,#39
1304*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d5
1305*0a6a1f1dSLionel Sambuc	vsli.64		d25,d19,#30
1306*0a6a1f1dSLionel Sambuc	veor		d30,d19,d20
1307*0a6a1f1dSLionel Sambuc	vsli.64		d26,d19,#25
1308*0a6a1f1dSLionel Sambuc	veor		d18,d24,d25
1309*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1310*0a6a1f1dSLionel Sambuc	vbsl		d30,d21,d20		@ Maj(a,b,c)
1311*0a6a1f1dSLionel Sambuc	veor		d18,d26			@ Sigma0(a)
1312*0a6a1f1dSLionel Sambuc	vadd.i64	d22,d27
1313*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1314*0a6a1f1dSLionel Sambuc	@ vadd.i64	d18,d30
1315*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q2,#19
1316*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q2,#61
1317*0a6a1f1dSLionel Sambuc	 vadd.i64	d18,d30			@ h+=Maj from the past
1318*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q2,#6
1319*0a6a1f1dSLionel Sambuc	vsli.64		q12,q2,#45
1320*0a6a1f1dSLionel Sambuc	vext.8		q14,q3,q4,#8	@ X[i+1]
1321*0a6a1f1dSLionel Sambuc	vsli.64		q13,q2,#3
1322*0a6a1f1dSLionel Sambuc	veor		q15,q12
1323*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q14,#1
1324*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma1(X[i+14])
1325*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q14,#8
1326*0a6a1f1dSLionel Sambuc	vadd.i64	q3,q15
1327*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q14,#7
1328*0a6a1f1dSLionel Sambuc	vsli.64		q12,q14,#63
1329*0a6a1f1dSLionel Sambuc	vsli.64		q13,q14,#56
1330*0a6a1f1dSLionel Sambuc	vext.8		q14,q7,q0,#8	@ X[i+9]
1331*0a6a1f1dSLionel Sambuc	veor		q15,q12
1332*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d22,#14		@ from NEON_00_15
1333*0a6a1f1dSLionel Sambuc	vadd.i64	q3,q14
1334*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d22,#18		@ from NEON_00_15
1335*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma0(X[i+1])
1336*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d22,#41		@ from NEON_00_15
1337*0a6a1f1dSLionel Sambuc	vadd.i64	q3,q15
1338*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1339*0a6a1f1dSLionel Sambuc	vsli.64		d24,d22,#50
1340*0a6a1f1dSLionel Sambuc	vsli.64		d25,d22,#46
1341*0a6a1f1dSLionel Sambuc	vmov		d29,d22
1342*0a6a1f1dSLionel Sambuc	vsli.64		d26,d22,#23
1343*0a6a1f1dSLionel Sambuc#if 22<16 && defined(__ARMEL__)
1344*0a6a1f1dSLionel Sambuc	vrev64.8	,
1345*0a6a1f1dSLionel Sambuc#endif
1346*0a6a1f1dSLionel Sambuc	veor		d25,d24
1347*0a6a1f1dSLionel Sambuc	vbsl		d29,d23,d16		@ Ch(e,f,g)
1348*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d18,#28
1349*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1350*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d17
1351*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d18,#34
1352*0a6a1f1dSLionel Sambuc	vsli.64		d24,d18,#36
1353*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1354*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d18,#39
1355*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d6
1356*0a6a1f1dSLionel Sambuc	vsli.64		d25,d18,#30
1357*0a6a1f1dSLionel Sambuc	veor		d30,d18,d19
1358*0a6a1f1dSLionel Sambuc	vsli.64		d26,d18,#25
1359*0a6a1f1dSLionel Sambuc	veor		d17,d24,d25
1360*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1361*0a6a1f1dSLionel Sambuc	vbsl		d30,d20,d19		@ Maj(a,b,c)
1362*0a6a1f1dSLionel Sambuc	veor		d17,d26			@ Sigma0(a)
1363*0a6a1f1dSLionel Sambuc	vadd.i64	d21,d27
1364*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1365*0a6a1f1dSLionel Sambuc	@ vadd.i64	d17,d30
1366*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d21,#14	@ 23
1367*0a6a1f1dSLionel Sambuc#if 23<16
1368*0a6a1f1dSLionel Sambuc	vld1.64		{d7},[r1]!	@ handles unaligned
1369*0a6a1f1dSLionel Sambuc#endif
1370*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d21,#18
1371*0a6a1f1dSLionel Sambuc#if 23>0
1372*0a6a1f1dSLionel Sambuc	 vadd.i64	d17,d30			@ h+=Maj from the past
1373*0a6a1f1dSLionel Sambuc#endif
1374*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d21,#41
1375*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1376*0a6a1f1dSLionel Sambuc	vsli.64		d24,d21,#50
1377*0a6a1f1dSLionel Sambuc	vsli.64		d25,d21,#46
1378*0a6a1f1dSLionel Sambuc	vmov		d29,d21
1379*0a6a1f1dSLionel Sambuc	vsli.64		d26,d21,#23
1380*0a6a1f1dSLionel Sambuc#if 23<16 && defined(__ARMEL__)
1381*0a6a1f1dSLionel Sambuc	vrev64.8	,
1382*0a6a1f1dSLionel Sambuc#endif
1383*0a6a1f1dSLionel Sambuc	veor		d25,d24
1384*0a6a1f1dSLionel Sambuc	vbsl		d29,d22,d23		@ Ch(e,f,g)
1385*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d17,#28
1386*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1387*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d16
1388*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d17,#34
1389*0a6a1f1dSLionel Sambuc	vsli.64		d24,d17,#36
1390*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1391*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d17,#39
1392*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d7
1393*0a6a1f1dSLionel Sambuc	vsli.64		d25,d17,#30
1394*0a6a1f1dSLionel Sambuc	veor		d30,d17,d18
1395*0a6a1f1dSLionel Sambuc	vsli.64		d26,d17,#25
1396*0a6a1f1dSLionel Sambuc	veor		d16,d24,d25
1397*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1398*0a6a1f1dSLionel Sambuc	vbsl		d30,d19,d18		@ Maj(a,b,c)
1399*0a6a1f1dSLionel Sambuc	veor		d16,d26			@ Sigma0(a)
1400*0a6a1f1dSLionel Sambuc	vadd.i64	d20,d27
1401*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1402*0a6a1f1dSLionel Sambuc	@ vadd.i64	d16,d30
1403*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q3,#19
1404*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q3,#61
1405*0a6a1f1dSLionel Sambuc	 vadd.i64	d16,d30			@ h+=Maj from the past
1406*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q3,#6
1407*0a6a1f1dSLionel Sambuc	vsli.64		q12,q3,#45
1408*0a6a1f1dSLionel Sambuc	vext.8		q14,q4,q5,#8	@ X[i+1]
1409*0a6a1f1dSLionel Sambuc	vsli.64		q13,q3,#3
1410*0a6a1f1dSLionel Sambuc	veor		q15,q12
1411*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q14,#1
1412*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma1(X[i+14])
1413*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q14,#8
1414*0a6a1f1dSLionel Sambuc	vadd.i64	q4,q15
1415*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q14,#7
1416*0a6a1f1dSLionel Sambuc	vsli.64		q12,q14,#63
1417*0a6a1f1dSLionel Sambuc	vsli.64		q13,q14,#56
1418*0a6a1f1dSLionel Sambuc	vext.8		q14,q0,q1,#8	@ X[i+9]
1419*0a6a1f1dSLionel Sambuc	veor		q15,q12
1420*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d20,#14		@ from NEON_00_15
1421*0a6a1f1dSLionel Sambuc	vadd.i64	q4,q14
1422*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d20,#18		@ from NEON_00_15
1423*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma0(X[i+1])
1424*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d20,#41		@ from NEON_00_15
1425*0a6a1f1dSLionel Sambuc	vadd.i64	q4,q15
1426*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1427*0a6a1f1dSLionel Sambuc	vsli.64		d24,d20,#50
1428*0a6a1f1dSLionel Sambuc	vsli.64		d25,d20,#46
1429*0a6a1f1dSLionel Sambuc	vmov		d29,d20
1430*0a6a1f1dSLionel Sambuc	vsli.64		d26,d20,#23
1431*0a6a1f1dSLionel Sambuc#if 24<16 && defined(__ARMEL__)
1432*0a6a1f1dSLionel Sambuc	vrev64.8	,
1433*0a6a1f1dSLionel Sambuc#endif
1434*0a6a1f1dSLionel Sambuc	veor		d25,d24
1435*0a6a1f1dSLionel Sambuc	vbsl		d29,d21,d22		@ Ch(e,f,g)
1436*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d16,#28
1437*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1438*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d23
1439*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d16,#34
1440*0a6a1f1dSLionel Sambuc	vsli.64		d24,d16,#36
1441*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1442*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d16,#39
1443*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d8
1444*0a6a1f1dSLionel Sambuc	vsli.64		d25,d16,#30
1445*0a6a1f1dSLionel Sambuc	veor		d30,d16,d17
1446*0a6a1f1dSLionel Sambuc	vsli.64		d26,d16,#25
1447*0a6a1f1dSLionel Sambuc	veor		d23,d24,d25
1448*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1449*0a6a1f1dSLionel Sambuc	vbsl		d30,d18,d17		@ Maj(a,b,c)
1450*0a6a1f1dSLionel Sambuc	veor		d23,d26			@ Sigma0(a)
1451*0a6a1f1dSLionel Sambuc	vadd.i64	d19,d27
1452*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1453*0a6a1f1dSLionel Sambuc	@ vadd.i64	d23,d30
1454*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d19,#14	@ 25
1455*0a6a1f1dSLionel Sambuc#if 25<16
1456*0a6a1f1dSLionel Sambuc	vld1.64		{d9},[r1]!	@ handles unaligned
1457*0a6a1f1dSLionel Sambuc#endif
1458*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d19,#18
1459*0a6a1f1dSLionel Sambuc#if 25>0
1460*0a6a1f1dSLionel Sambuc	 vadd.i64	d23,d30			@ h+=Maj from the past
1461*0a6a1f1dSLionel Sambuc#endif
1462*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d19,#41
1463*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1464*0a6a1f1dSLionel Sambuc	vsli.64		d24,d19,#50
1465*0a6a1f1dSLionel Sambuc	vsli.64		d25,d19,#46
1466*0a6a1f1dSLionel Sambuc	vmov		d29,d19
1467*0a6a1f1dSLionel Sambuc	vsli.64		d26,d19,#23
1468*0a6a1f1dSLionel Sambuc#if 25<16 && defined(__ARMEL__)
1469*0a6a1f1dSLionel Sambuc	vrev64.8	,
1470*0a6a1f1dSLionel Sambuc#endif
1471*0a6a1f1dSLionel Sambuc	veor		d25,d24
1472*0a6a1f1dSLionel Sambuc	vbsl		d29,d20,d21		@ Ch(e,f,g)
1473*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d23,#28
1474*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1475*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d22
1476*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d23,#34
1477*0a6a1f1dSLionel Sambuc	vsli.64		d24,d23,#36
1478*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1479*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d23,#39
1480*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d9
1481*0a6a1f1dSLionel Sambuc	vsli.64		d25,d23,#30
1482*0a6a1f1dSLionel Sambuc	veor		d30,d23,d16
1483*0a6a1f1dSLionel Sambuc	vsli.64		d26,d23,#25
1484*0a6a1f1dSLionel Sambuc	veor		d22,d24,d25
1485*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1486*0a6a1f1dSLionel Sambuc	vbsl		d30,d17,d16		@ Maj(a,b,c)
1487*0a6a1f1dSLionel Sambuc	veor		d22,d26			@ Sigma0(a)
1488*0a6a1f1dSLionel Sambuc	vadd.i64	d18,d27
1489*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1490*0a6a1f1dSLionel Sambuc	@ vadd.i64	d22,d30
1491*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q4,#19
1492*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q4,#61
1493*0a6a1f1dSLionel Sambuc	 vadd.i64	d22,d30			@ h+=Maj from the past
1494*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q4,#6
1495*0a6a1f1dSLionel Sambuc	vsli.64		q12,q4,#45
1496*0a6a1f1dSLionel Sambuc	vext.8		q14,q5,q6,#8	@ X[i+1]
1497*0a6a1f1dSLionel Sambuc	vsli.64		q13,q4,#3
1498*0a6a1f1dSLionel Sambuc	veor		q15,q12
1499*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q14,#1
1500*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma1(X[i+14])
1501*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q14,#8
1502*0a6a1f1dSLionel Sambuc	vadd.i64	q5,q15
1503*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q14,#7
1504*0a6a1f1dSLionel Sambuc	vsli.64		q12,q14,#63
1505*0a6a1f1dSLionel Sambuc	vsli.64		q13,q14,#56
1506*0a6a1f1dSLionel Sambuc	vext.8		q14,q1,q2,#8	@ X[i+9]
1507*0a6a1f1dSLionel Sambuc	veor		q15,q12
1508*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d18,#14		@ from NEON_00_15
1509*0a6a1f1dSLionel Sambuc	vadd.i64	q5,q14
1510*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d18,#18		@ from NEON_00_15
1511*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma0(X[i+1])
1512*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d18,#41		@ from NEON_00_15
1513*0a6a1f1dSLionel Sambuc	vadd.i64	q5,q15
1514*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1515*0a6a1f1dSLionel Sambuc	vsli.64		d24,d18,#50
1516*0a6a1f1dSLionel Sambuc	vsli.64		d25,d18,#46
1517*0a6a1f1dSLionel Sambuc	vmov		d29,d18
1518*0a6a1f1dSLionel Sambuc	vsli.64		d26,d18,#23
1519*0a6a1f1dSLionel Sambuc#if 26<16 && defined(__ARMEL__)
1520*0a6a1f1dSLionel Sambuc	vrev64.8	,
1521*0a6a1f1dSLionel Sambuc#endif
1522*0a6a1f1dSLionel Sambuc	veor		d25,d24
1523*0a6a1f1dSLionel Sambuc	vbsl		d29,d19,d20		@ Ch(e,f,g)
1524*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d22,#28
1525*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1526*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d21
1527*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d22,#34
1528*0a6a1f1dSLionel Sambuc	vsli.64		d24,d22,#36
1529*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1530*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d22,#39
1531*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d10
1532*0a6a1f1dSLionel Sambuc	vsli.64		d25,d22,#30
1533*0a6a1f1dSLionel Sambuc	veor		d30,d22,d23
1534*0a6a1f1dSLionel Sambuc	vsli.64		d26,d22,#25
1535*0a6a1f1dSLionel Sambuc	veor		d21,d24,d25
1536*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1537*0a6a1f1dSLionel Sambuc	vbsl		d30,d16,d23		@ Maj(a,b,c)
1538*0a6a1f1dSLionel Sambuc	veor		d21,d26			@ Sigma0(a)
1539*0a6a1f1dSLionel Sambuc	vadd.i64	d17,d27
1540*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1541*0a6a1f1dSLionel Sambuc	@ vadd.i64	d21,d30
1542*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d17,#14	@ 27
1543*0a6a1f1dSLionel Sambuc#if 27<16
1544*0a6a1f1dSLionel Sambuc	vld1.64		{d11},[r1]!	@ handles unaligned
1545*0a6a1f1dSLionel Sambuc#endif
1546*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d17,#18
1547*0a6a1f1dSLionel Sambuc#if 27>0
1548*0a6a1f1dSLionel Sambuc	 vadd.i64	d21,d30			@ h+=Maj from the past
1549*0a6a1f1dSLionel Sambuc#endif
1550*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d17,#41
1551*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1552*0a6a1f1dSLionel Sambuc	vsli.64		d24,d17,#50
1553*0a6a1f1dSLionel Sambuc	vsli.64		d25,d17,#46
1554*0a6a1f1dSLionel Sambuc	vmov		d29,d17
1555*0a6a1f1dSLionel Sambuc	vsli.64		d26,d17,#23
1556*0a6a1f1dSLionel Sambuc#if 27<16 && defined(__ARMEL__)
1557*0a6a1f1dSLionel Sambuc	vrev64.8	,
1558*0a6a1f1dSLionel Sambuc#endif
1559*0a6a1f1dSLionel Sambuc	veor		d25,d24
1560*0a6a1f1dSLionel Sambuc	vbsl		d29,d18,d19		@ Ch(e,f,g)
1561*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d21,#28
1562*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1563*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d20
1564*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d21,#34
1565*0a6a1f1dSLionel Sambuc	vsli.64		d24,d21,#36
1566*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1567*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d21,#39
1568*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d11
1569*0a6a1f1dSLionel Sambuc	vsli.64		d25,d21,#30
1570*0a6a1f1dSLionel Sambuc	veor		d30,d21,d22
1571*0a6a1f1dSLionel Sambuc	vsli.64		d26,d21,#25
1572*0a6a1f1dSLionel Sambuc	veor		d20,d24,d25
1573*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1574*0a6a1f1dSLionel Sambuc	vbsl		d30,d23,d22		@ Maj(a,b,c)
1575*0a6a1f1dSLionel Sambuc	veor		d20,d26			@ Sigma0(a)
1576*0a6a1f1dSLionel Sambuc	vadd.i64	d16,d27
1577*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1578*0a6a1f1dSLionel Sambuc	@ vadd.i64	d20,d30
1579*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q5,#19
1580*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q5,#61
1581*0a6a1f1dSLionel Sambuc	 vadd.i64	d20,d30			@ h+=Maj from the past
1582*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q5,#6
1583*0a6a1f1dSLionel Sambuc	vsli.64		q12,q5,#45
1584*0a6a1f1dSLionel Sambuc	vext.8		q14,q6,q7,#8	@ X[i+1]
1585*0a6a1f1dSLionel Sambuc	vsli.64		q13,q5,#3
1586*0a6a1f1dSLionel Sambuc	veor		q15,q12
1587*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q14,#1
1588*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma1(X[i+14])
1589*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q14,#8
1590*0a6a1f1dSLionel Sambuc	vadd.i64	q6,q15
1591*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q14,#7
1592*0a6a1f1dSLionel Sambuc	vsli.64		q12,q14,#63
1593*0a6a1f1dSLionel Sambuc	vsli.64		q13,q14,#56
1594*0a6a1f1dSLionel Sambuc	vext.8		q14,q2,q3,#8	@ X[i+9]
1595*0a6a1f1dSLionel Sambuc	veor		q15,q12
1596*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d16,#14		@ from NEON_00_15
1597*0a6a1f1dSLionel Sambuc	vadd.i64	q6,q14
1598*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d16,#18		@ from NEON_00_15
1599*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma0(X[i+1])
1600*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d16,#41		@ from NEON_00_15
1601*0a6a1f1dSLionel Sambuc	vadd.i64	q6,q15
1602*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1603*0a6a1f1dSLionel Sambuc	vsli.64		d24,d16,#50
1604*0a6a1f1dSLionel Sambuc	vsli.64		d25,d16,#46
1605*0a6a1f1dSLionel Sambuc	vmov		d29,d16
1606*0a6a1f1dSLionel Sambuc	vsli.64		d26,d16,#23
1607*0a6a1f1dSLionel Sambuc#if 28<16 && defined(__ARMEL__)
1608*0a6a1f1dSLionel Sambuc	vrev64.8	,
1609*0a6a1f1dSLionel Sambuc#endif
1610*0a6a1f1dSLionel Sambuc	veor		d25,d24
1611*0a6a1f1dSLionel Sambuc	vbsl		d29,d17,d18		@ Ch(e,f,g)
1612*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d20,#28
1613*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1614*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d19
1615*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d20,#34
1616*0a6a1f1dSLionel Sambuc	vsli.64		d24,d20,#36
1617*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1618*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d20,#39
1619*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d12
1620*0a6a1f1dSLionel Sambuc	vsli.64		d25,d20,#30
1621*0a6a1f1dSLionel Sambuc	veor		d30,d20,d21
1622*0a6a1f1dSLionel Sambuc	vsli.64		d26,d20,#25
1623*0a6a1f1dSLionel Sambuc	veor		d19,d24,d25
1624*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1625*0a6a1f1dSLionel Sambuc	vbsl		d30,d22,d21		@ Maj(a,b,c)
1626*0a6a1f1dSLionel Sambuc	veor		d19,d26			@ Sigma0(a)
1627*0a6a1f1dSLionel Sambuc	vadd.i64	d23,d27
1628*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1629*0a6a1f1dSLionel Sambuc	@ vadd.i64	d19,d30
1630*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d23,#14	@ 29
1631*0a6a1f1dSLionel Sambuc#if 29<16
1632*0a6a1f1dSLionel Sambuc	vld1.64		{d13},[r1]!	@ handles unaligned
1633*0a6a1f1dSLionel Sambuc#endif
1634*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d23,#18
1635*0a6a1f1dSLionel Sambuc#if 29>0
1636*0a6a1f1dSLionel Sambuc	 vadd.i64	d19,d30			@ h+=Maj from the past
1637*0a6a1f1dSLionel Sambuc#endif
1638*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d23,#41
1639*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1640*0a6a1f1dSLionel Sambuc	vsli.64		d24,d23,#50
1641*0a6a1f1dSLionel Sambuc	vsli.64		d25,d23,#46
1642*0a6a1f1dSLionel Sambuc	vmov		d29,d23
1643*0a6a1f1dSLionel Sambuc	vsli.64		d26,d23,#23
1644*0a6a1f1dSLionel Sambuc#if 29<16 && defined(__ARMEL__)
1645*0a6a1f1dSLionel Sambuc	vrev64.8	,
1646*0a6a1f1dSLionel Sambuc#endif
1647*0a6a1f1dSLionel Sambuc	veor		d25,d24
1648*0a6a1f1dSLionel Sambuc	vbsl		d29,d16,d17		@ Ch(e,f,g)
1649*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d19,#28
1650*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1651*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d18
1652*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d19,#34
1653*0a6a1f1dSLionel Sambuc	vsli.64		d24,d19,#36
1654*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1655*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d19,#39
1656*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d13
1657*0a6a1f1dSLionel Sambuc	vsli.64		d25,d19,#30
1658*0a6a1f1dSLionel Sambuc	veor		d30,d19,d20
1659*0a6a1f1dSLionel Sambuc	vsli.64		d26,d19,#25
1660*0a6a1f1dSLionel Sambuc	veor		d18,d24,d25
1661*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1662*0a6a1f1dSLionel Sambuc	vbsl		d30,d21,d20		@ Maj(a,b,c)
1663*0a6a1f1dSLionel Sambuc	veor		d18,d26			@ Sigma0(a)
1664*0a6a1f1dSLionel Sambuc	vadd.i64	d22,d27
1665*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1666*0a6a1f1dSLionel Sambuc	@ vadd.i64	d18,d30
1667*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q6,#19
1668*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q6,#61
1669*0a6a1f1dSLionel Sambuc	 vadd.i64	d18,d30			@ h+=Maj from the past
1670*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q6,#6
1671*0a6a1f1dSLionel Sambuc	vsli.64		q12,q6,#45
1672*0a6a1f1dSLionel Sambuc	vext.8		q14,q7,q0,#8	@ X[i+1]
1673*0a6a1f1dSLionel Sambuc	vsli.64		q13,q6,#3
1674*0a6a1f1dSLionel Sambuc	veor		q15,q12
1675*0a6a1f1dSLionel Sambuc	vshr.u64	q12,q14,#1
1676*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma1(X[i+14])
1677*0a6a1f1dSLionel Sambuc	vshr.u64	q13,q14,#8
1678*0a6a1f1dSLionel Sambuc	vadd.i64	q7,q15
1679*0a6a1f1dSLionel Sambuc	vshr.u64	q15,q14,#7
1680*0a6a1f1dSLionel Sambuc	vsli.64		q12,q14,#63
1681*0a6a1f1dSLionel Sambuc	vsli.64		q13,q14,#56
1682*0a6a1f1dSLionel Sambuc	vext.8		q14,q3,q4,#8	@ X[i+9]
1683*0a6a1f1dSLionel Sambuc	veor		q15,q12
1684*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d22,#14		@ from NEON_00_15
1685*0a6a1f1dSLionel Sambuc	vadd.i64	q7,q14
1686*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d22,#18		@ from NEON_00_15
1687*0a6a1f1dSLionel Sambuc	veor		q15,q13				@ sigma0(X[i+1])
1688*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d22,#41		@ from NEON_00_15
1689*0a6a1f1dSLionel Sambuc	vadd.i64	q7,q15
1690*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1691*0a6a1f1dSLionel Sambuc	vsli.64		d24,d22,#50
1692*0a6a1f1dSLionel Sambuc	vsli.64		d25,d22,#46
1693*0a6a1f1dSLionel Sambuc	vmov		d29,d22
1694*0a6a1f1dSLionel Sambuc	vsli.64		d26,d22,#23
1695*0a6a1f1dSLionel Sambuc#if 30<16 && defined(__ARMEL__)
1696*0a6a1f1dSLionel Sambuc	vrev64.8	,
1697*0a6a1f1dSLionel Sambuc#endif
1698*0a6a1f1dSLionel Sambuc	veor		d25,d24
1699*0a6a1f1dSLionel Sambuc	vbsl		d29,d23,d16		@ Ch(e,f,g)
1700*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d18,#28
1701*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1702*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d17
1703*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d18,#34
1704*0a6a1f1dSLionel Sambuc	vsli.64		d24,d18,#36
1705*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1706*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d18,#39
1707*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d14
1708*0a6a1f1dSLionel Sambuc	vsli.64		d25,d18,#30
1709*0a6a1f1dSLionel Sambuc	veor		d30,d18,d19
1710*0a6a1f1dSLionel Sambuc	vsli.64		d26,d18,#25
1711*0a6a1f1dSLionel Sambuc	veor		d17,d24,d25
1712*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1713*0a6a1f1dSLionel Sambuc	vbsl		d30,d20,d19		@ Maj(a,b,c)
1714*0a6a1f1dSLionel Sambuc	veor		d17,d26			@ Sigma0(a)
1715*0a6a1f1dSLionel Sambuc	vadd.i64	d21,d27
1716*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1717*0a6a1f1dSLionel Sambuc	@ vadd.i64	d17,d30
1718*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d21,#14	@ 31
1719*0a6a1f1dSLionel Sambuc#if 31<16
1720*0a6a1f1dSLionel Sambuc	vld1.64		{d15},[r1]!	@ handles unaligned
1721*0a6a1f1dSLionel Sambuc#endif
1722*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d21,#18
1723*0a6a1f1dSLionel Sambuc#if 31>0
1724*0a6a1f1dSLionel Sambuc	 vadd.i64	d17,d30			@ h+=Maj from the past
1725*0a6a1f1dSLionel Sambuc#endif
1726*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d21,#41
1727*0a6a1f1dSLionel Sambuc	vld1.64		{d28},[r3,:64]!	@ K[i++]
1728*0a6a1f1dSLionel Sambuc	vsli.64		d24,d21,#50
1729*0a6a1f1dSLionel Sambuc	vsli.64		d25,d21,#46
1730*0a6a1f1dSLionel Sambuc	vmov		d29,d21
1731*0a6a1f1dSLionel Sambuc	vsli.64		d26,d21,#23
1732*0a6a1f1dSLionel Sambuc#if 31<16 && defined(__ARMEL__)
1733*0a6a1f1dSLionel Sambuc	vrev64.8	,
1734*0a6a1f1dSLionel Sambuc#endif
1735*0a6a1f1dSLionel Sambuc	veor		d25,d24
1736*0a6a1f1dSLionel Sambuc	vbsl		d29,d22,d23		@ Ch(e,f,g)
1737*0a6a1f1dSLionel Sambuc	vshr.u64	d24,d17,#28
1738*0a6a1f1dSLionel Sambuc	veor		d26,d25			@ Sigma1(e)
1739*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d29,d16
1740*0a6a1f1dSLionel Sambuc	vshr.u64	d25,d17,#34
1741*0a6a1f1dSLionel Sambuc	vsli.64		d24,d17,#36
1742*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d26
1743*0a6a1f1dSLionel Sambuc	vshr.u64	d26,d17,#39
1744*0a6a1f1dSLionel Sambuc	vadd.i64	d28,d15
1745*0a6a1f1dSLionel Sambuc	vsli.64		d25,d17,#30
1746*0a6a1f1dSLionel Sambuc	veor		d30,d17,d18
1747*0a6a1f1dSLionel Sambuc	vsli.64		d26,d17,#25
1748*0a6a1f1dSLionel Sambuc	veor		d16,d24,d25
1749*0a6a1f1dSLionel Sambuc	vadd.i64	d27,d28
1750*0a6a1f1dSLionel Sambuc	vbsl		d30,d19,d18		@ Maj(a,b,c)
1751*0a6a1f1dSLionel Sambuc	veor		d16,d26			@ Sigma0(a)
1752*0a6a1f1dSLionel Sambuc	vadd.i64	d20,d27
1753*0a6a1f1dSLionel Sambuc	vadd.i64	d30,d27
1754*0a6a1f1dSLionel Sambuc	@ vadd.i64	d16,d30
1755*0a6a1f1dSLionel Sambuc	bne		.L16_79_neon
1756*0a6a1f1dSLionel Sambuc
1757*0a6a1f1dSLionel Sambuc	 vadd.i64	d16,d30		@ h+=Maj from the past
1758*0a6a1f1dSLionel Sambuc	vldmia		r0,{d24-d31}	@ load context to temp
1759*0a6a1f1dSLionel Sambuc	vadd.i64	q8,q12		@ vectorized accumulate
1760*0a6a1f1dSLionel Sambuc	vadd.i64	q9,q13
1761*0a6a1f1dSLionel Sambuc	vadd.i64	q10,q14
1762*0a6a1f1dSLionel Sambuc	vadd.i64	q11,q15
1763*0a6a1f1dSLionel Sambuc	vstmia		r0,{d16-d23}	@ save context
1764*0a6a1f1dSLionel Sambuc	teq		r1,r2
1765*0a6a1f1dSLionel Sambuc	sub		r3,#640	@ rewind K512
1766*0a6a1f1dSLionel Sambuc	bne		.Loop_neon
1767*0a6a1f1dSLionel Sambuc
1768*0a6a1f1dSLionel Sambuc	vldmia	sp!,{d8-d15}		@ epilogue
1769*0a6a1f1dSLionel Sambuc	RET				@ .word	0xe12fff1e
1770*0a6a1f1dSLionel Sambuc#endif
1771*0a6a1f1dSLionel Sambuc.size	sha512_block_data_order,.-sha512_block_data_order
1772*0a6a1f1dSLionel Sambuc.asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
1773*0a6a1f1dSLionel Sambuc.align	2
1774*0a6a1f1dSLionel Sambuc#if __ARM_MAX_ARCH__>=7
1775*0a6a1f1dSLionel Sambuc.comm	OPENSSL_armcap_P,4,4
1776*0a6a1f1dSLionel Sambuc#endif
1777