xref: /minix3/crypto/external/bsd/openssl/lib/libcrypto/arch/arm/sha256-armv4.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1#include "arm_arch.h"
2#include "arm_asm.h"
3
4.text
5.code	32
6
7.type	K256,%object
8.align	5
9K256:
10.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
11.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
12.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
13.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
14.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
15.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
16.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
17.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
18.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
19.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
20.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
21.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
22.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
23.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
24.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
25.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
26.size	K256,.-K256
27.word	0				@ terminator
28#if __ARM_MAX_ARCH__>=7
29.LOPENSSL_armcap:
30.word	OPENSSL_armcap_P-sha256_block_data_order
31#endif
32.align	5
33
34.global	sha256_block_data_order
35.type	sha256_block_data_order,%function
36sha256_block_data_order:
37	sub	r3,pc,#8		@ sha256_block_data_order
38	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
39#if __ARM_MAX_ARCH__>=7
40	ldr	r12,.LOPENSSL_armcap
41	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
42	tst	r12,#ARMV8_SHA256
43	bne	.LARMv8
44	tst	r12,#ARMV7_NEON
45	bne	.LNEON
46#endif
47	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
48	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
49	sub	r14,r3,#256+32	@ K256
50	sub	sp,sp,#16*4		@ alloca(X[16])
51.Loop:
52# if __ARM_ARCH__>=7
53	ldr	r2,[r1],#4
54# else
55	ldrb	r2,[r1,#3]
56# endif
57	eor	r3,r5,r6		@ magic
58	eor	r12,r12,r12
59#if __ARM_ARCH__>=7
60	@ ldr	r2,[r1],#4			@ 0
61# if 0==15
62	str	r1,[sp,#17*4]			@ make room for r1
63# endif
64	eor	r0,r8,r8,ror#5
65	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
66	eor	r0,r0,r8,ror#19	@ Sigma1(e)
67	rev	r2,r2
68#else
69	@ ldrb	r2,[r1,#3]			@ 0
70	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
71	ldrb	r12,[r1,#2]
72	ldrb	r0,[r1,#1]
73	orr	r2,r2,r12,lsl#8
74	ldrb	r12,[r1],#4
75	orr	r2,r2,r0,lsl#16
76# if 0==15
77	str	r1,[sp,#17*4]			@ make room for r1
78# endif
79	eor	r0,r8,r8,ror#5
80	orr	r2,r2,r12,lsl#24
81	eor	r0,r0,r8,ror#19	@ Sigma1(e)
82#endif
83	ldr	r12,[r14],#4			@ *K256++
84	add	r11,r11,r2			@ h+=X[i]
85	str	r2,[sp,#0*4]
86	eor	r2,r9,r10
87	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
88	and	r2,r2,r8
89	add	r11,r11,r12			@ h+=K256[i]
90	eor	r2,r2,r10			@ Ch(e,f,g)
91	eor	r0,r4,r4,ror#11
92	add	r11,r11,r2			@ h+=Ch(e,f,g)
93#if 0==31
94	and	r12,r12,#0xff
95	cmp	r12,#0xf2			@ done?
96#endif
97#if 0<15
98# if __ARM_ARCH__>=7
99	ldr	r2,[r1],#4			@ prefetch
100# else
101	ldrb	r2,[r1,#3]
102# endif
103	eor	r12,r4,r5			@ a^b, b^c in next round
104#else
105	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
106	eor	r12,r4,r5			@ a^b, b^c in next round
107	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
108#endif
109	eor	r0,r0,r4,ror#20	@ Sigma0(a)
110	and	r3,r3,r12			@ (b^c)&=(a^b)
111	add	r7,r7,r11			@ d+=h
112	eor	r3,r3,r5			@ Maj(a,b,c)
113	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
114	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
115#if __ARM_ARCH__>=7
116	@ ldr	r2,[r1],#4			@ 1
117# if 1==15
118	str	r1,[sp,#17*4]			@ make room for r1
119# endif
120	eor	r0,r7,r7,ror#5
121	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
122	eor	r0,r0,r7,ror#19	@ Sigma1(e)
123	rev	r2,r2
124#else
125	@ ldrb	r2,[r1,#3]			@ 1
126	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
127	ldrb	r3,[r1,#2]
128	ldrb	r0,[r1,#1]
129	orr	r2,r2,r3,lsl#8
130	ldrb	r3,[r1],#4
131	orr	r2,r2,r0,lsl#16
132# if 1==15
133	str	r1,[sp,#17*4]			@ make room for r1
134# endif
135	eor	r0,r7,r7,ror#5
136	orr	r2,r2,r3,lsl#24
137	eor	r0,r0,r7,ror#19	@ Sigma1(e)
138#endif
139	ldr	r3,[r14],#4			@ *K256++
140	add	r10,r10,r2			@ h+=X[i]
141	str	r2,[sp,#1*4]
142	eor	r2,r8,r9
143	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
144	and	r2,r2,r7
145	add	r10,r10,r3			@ h+=K256[i]
146	eor	r2,r2,r9			@ Ch(e,f,g)
147	eor	r0,r11,r11,ror#11
148	add	r10,r10,r2			@ h+=Ch(e,f,g)
149#if 1==31
150	and	r3,r3,#0xff
151	cmp	r3,#0xf2			@ done?
152#endif
153#if 1<15
154# if __ARM_ARCH__>=7
155	ldr	r2,[r1],#4			@ prefetch
156# else
157	ldrb	r2,[r1,#3]
158# endif
159	eor	r3,r11,r4			@ a^b, b^c in next round
160#else
161	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
162	eor	r3,r11,r4			@ a^b, b^c in next round
163	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
164#endif
165	eor	r0,r0,r11,ror#20	@ Sigma0(a)
166	and	r12,r12,r3			@ (b^c)&=(a^b)
167	add	r6,r6,r10			@ d+=h
168	eor	r12,r12,r4			@ Maj(a,b,c)
169	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
170	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
171#if __ARM_ARCH__>=7
172	@ ldr	r2,[r1],#4			@ 2
173# if 2==15
174	str	r1,[sp,#17*4]			@ make room for r1
175# endif
176	eor	r0,r6,r6,ror#5
177	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
178	eor	r0,r0,r6,ror#19	@ Sigma1(e)
179	rev	r2,r2
180#else
181	@ ldrb	r2,[r1,#3]			@ 2
182	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
183	ldrb	r12,[r1,#2]
184	ldrb	r0,[r1,#1]
185	orr	r2,r2,r12,lsl#8
186	ldrb	r12,[r1],#4
187	orr	r2,r2,r0,lsl#16
188# if 2==15
189	str	r1,[sp,#17*4]			@ make room for r1
190# endif
191	eor	r0,r6,r6,ror#5
192	orr	r2,r2,r12,lsl#24
193	eor	r0,r0,r6,ror#19	@ Sigma1(e)
194#endif
195	ldr	r12,[r14],#4			@ *K256++
196	add	r9,r9,r2			@ h+=X[i]
197	str	r2,[sp,#2*4]
198	eor	r2,r7,r8
199	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
200	and	r2,r2,r6
201	add	r9,r9,r12			@ h+=K256[i]
202	eor	r2,r2,r8			@ Ch(e,f,g)
203	eor	r0,r10,r10,ror#11
204	add	r9,r9,r2			@ h+=Ch(e,f,g)
205#if 2==31
206	and	r12,r12,#0xff
207	cmp	r12,#0xf2			@ done?
208#endif
209#if 2<15
210# if __ARM_ARCH__>=7
211	ldr	r2,[r1],#4			@ prefetch
212# else
213	ldrb	r2,[r1,#3]
214# endif
215	eor	r12,r10,r11			@ a^b, b^c in next round
216#else
217	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
218	eor	r12,r10,r11			@ a^b, b^c in next round
219	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
220#endif
221	eor	r0,r0,r10,ror#20	@ Sigma0(a)
222	and	r3,r3,r12			@ (b^c)&=(a^b)
223	add	r5,r5,r9			@ d+=h
224	eor	r3,r3,r11			@ Maj(a,b,c)
225	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
226	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
227#if __ARM_ARCH__>=7
228	@ ldr	r2,[r1],#4			@ 3
229# if 3==15
230	str	r1,[sp,#17*4]			@ make room for r1
231# endif
232	eor	r0,r5,r5,ror#5
233	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
234	eor	r0,r0,r5,ror#19	@ Sigma1(e)
235	rev	r2,r2
236#else
237	@ ldrb	r2,[r1,#3]			@ 3
238	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
239	ldrb	r3,[r1,#2]
240	ldrb	r0,[r1,#1]
241	orr	r2,r2,r3,lsl#8
242	ldrb	r3,[r1],#4
243	orr	r2,r2,r0,lsl#16
244# if 3==15
245	str	r1,[sp,#17*4]			@ make room for r1
246# endif
247	eor	r0,r5,r5,ror#5
248	orr	r2,r2,r3,lsl#24
249	eor	r0,r0,r5,ror#19	@ Sigma1(e)
250#endif
251	ldr	r3,[r14],#4			@ *K256++
252	add	r8,r8,r2			@ h+=X[i]
253	str	r2,[sp,#3*4]
254	eor	r2,r6,r7
255	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
256	and	r2,r2,r5
257	add	r8,r8,r3			@ h+=K256[i]
258	eor	r2,r2,r7			@ Ch(e,f,g)
259	eor	r0,r9,r9,ror#11
260	add	r8,r8,r2			@ h+=Ch(e,f,g)
261#if 3==31
262	and	r3,r3,#0xff
263	cmp	r3,#0xf2			@ done?
264#endif
265#if 3<15
266# if __ARM_ARCH__>=7
267	ldr	r2,[r1],#4			@ prefetch
268# else
269	ldrb	r2,[r1,#3]
270# endif
271	eor	r3,r9,r10			@ a^b, b^c in next round
272#else
273	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
274	eor	r3,r9,r10			@ a^b, b^c in next round
275	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
276#endif
277	eor	r0,r0,r9,ror#20	@ Sigma0(a)
278	and	r12,r12,r3			@ (b^c)&=(a^b)
279	add	r4,r4,r8			@ d+=h
280	eor	r12,r12,r10			@ Maj(a,b,c)
281	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
282	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
283#if __ARM_ARCH__>=7
284	@ ldr	r2,[r1],#4			@ 4
285# if 4==15
286	str	r1,[sp,#17*4]			@ make room for r1
287# endif
288	eor	r0,r4,r4,ror#5
289	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
290	eor	r0,r0,r4,ror#19	@ Sigma1(e)
291	rev	r2,r2
292#else
293	@ ldrb	r2,[r1,#3]			@ 4
294	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
295	ldrb	r12,[r1,#2]
296	ldrb	r0,[r1,#1]
297	orr	r2,r2,r12,lsl#8
298	ldrb	r12,[r1],#4
299	orr	r2,r2,r0,lsl#16
300# if 4==15
301	str	r1,[sp,#17*4]			@ make room for r1
302# endif
303	eor	r0,r4,r4,ror#5
304	orr	r2,r2,r12,lsl#24
305	eor	r0,r0,r4,ror#19	@ Sigma1(e)
306#endif
307	ldr	r12,[r14],#4			@ *K256++
308	add	r7,r7,r2			@ h+=X[i]
309	str	r2,[sp,#4*4]
310	eor	r2,r5,r6
311	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
312	and	r2,r2,r4
313	add	r7,r7,r12			@ h+=K256[i]
314	eor	r2,r2,r6			@ Ch(e,f,g)
315	eor	r0,r8,r8,ror#11
316	add	r7,r7,r2			@ h+=Ch(e,f,g)
317#if 4==31
318	and	r12,r12,#0xff
319	cmp	r12,#0xf2			@ done?
320#endif
321#if 4<15
322# if __ARM_ARCH__>=7
323	ldr	r2,[r1],#4			@ prefetch
324# else
325	ldrb	r2,[r1,#3]
326# endif
327	eor	r12,r8,r9			@ a^b, b^c in next round
328#else
329	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
330	eor	r12,r8,r9			@ a^b, b^c in next round
331	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
332#endif
333	eor	r0,r0,r8,ror#20	@ Sigma0(a)
334	and	r3,r3,r12			@ (b^c)&=(a^b)
335	add	r11,r11,r7			@ d+=h
336	eor	r3,r3,r9			@ Maj(a,b,c)
337	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
338	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
339#if __ARM_ARCH__>=7
340	@ ldr	r2,[r1],#4			@ 5
341# if 5==15
342	str	r1,[sp,#17*4]			@ make room for r1
343# endif
344	eor	r0,r11,r11,ror#5
345	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
346	eor	r0,r0,r11,ror#19	@ Sigma1(e)
347	rev	r2,r2
348#else
349	@ ldrb	r2,[r1,#3]			@ 5
350	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
351	ldrb	r3,[r1,#2]
352	ldrb	r0,[r1,#1]
353	orr	r2,r2,r3,lsl#8
354	ldrb	r3,[r1],#4
355	orr	r2,r2,r0,lsl#16
356# if 5==15
357	str	r1,[sp,#17*4]			@ make room for r1
358# endif
359	eor	r0,r11,r11,ror#5
360	orr	r2,r2,r3,lsl#24
361	eor	r0,r0,r11,ror#19	@ Sigma1(e)
362#endif
363	ldr	r3,[r14],#4			@ *K256++
364	add	r6,r6,r2			@ h+=X[i]
365	str	r2,[sp,#5*4]
366	eor	r2,r4,r5
367	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
368	and	r2,r2,r11
369	add	r6,r6,r3			@ h+=K256[i]
370	eor	r2,r2,r5			@ Ch(e,f,g)
371	eor	r0,r7,r7,ror#11
372	add	r6,r6,r2			@ h+=Ch(e,f,g)
373#if 5==31
374	and	r3,r3,#0xff
375	cmp	r3,#0xf2			@ done?
376#endif
377#if 5<15
378# if __ARM_ARCH__>=7
379	ldr	r2,[r1],#4			@ prefetch
380# else
381	ldrb	r2,[r1,#3]
382# endif
383	eor	r3,r7,r8			@ a^b, b^c in next round
384#else
385	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
386	eor	r3,r7,r8			@ a^b, b^c in next round
387	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
388#endif
389	eor	r0,r0,r7,ror#20	@ Sigma0(a)
390	and	r12,r12,r3			@ (b^c)&=(a^b)
391	add	r10,r10,r6			@ d+=h
392	eor	r12,r12,r8			@ Maj(a,b,c)
393	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
394	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
395#if __ARM_ARCH__>=7
396	@ ldr	r2,[r1],#4			@ 6
397# if 6==15
398	str	r1,[sp,#17*4]			@ make room for r1
399# endif
400	eor	r0,r10,r10,ror#5
401	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
402	eor	r0,r0,r10,ror#19	@ Sigma1(e)
403	rev	r2,r2
404#else
405	@ ldrb	r2,[r1,#3]			@ 6
406	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
407	ldrb	r12,[r1,#2]
408	ldrb	r0,[r1,#1]
409	orr	r2,r2,r12,lsl#8
410	ldrb	r12,[r1],#4
411	orr	r2,r2,r0,lsl#16
412# if 6==15
413	str	r1,[sp,#17*4]			@ make room for r1
414# endif
415	eor	r0,r10,r10,ror#5
416	orr	r2,r2,r12,lsl#24
417	eor	r0,r0,r10,ror#19	@ Sigma1(e)
418#endif
419	ldr	r12,[r14],#4			@ *K256++
420	add	r5,r5,r2			@ h+=X[i]
421	str	r2,[sp,#6*4]
422	eor	r2,r11,r4
423	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
424	and	r2,r2,r10
425	add	r5,r5,r12			@ h+=K256[i]
426	eor	r2,r2,r4			@ Ch(e,f,g)
427	eor	r0,r6,r6,ror#11
428	add	r5,r5,r2			@ h+=Ch(e,f,g)
429#if 6==31
430	and	r12,r12,#0xff
431	cmp	r12,#0xf2			@ done?
432#endif
433#if 6<15
434# if __ARM_ARCH__>=7
435	ldr	r2,[r1],#4			@ prefetch
436# else
437	ldrb	r2,[r1,#3]
438# endif
439	eor	r12,r6,r7			@ a^b, b^c in next round
440#else
441	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
442	eor	r12,r6,r7			@ a^b, b^c in next round
443	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
444#endif
445	eor	r0,r0,r6,ror#20	@ Sigma0(a)
446	and	r3,r3,r12			@ (b^c)&=(a^b)
447	add	r9,r9,r5			@ d+=h
448	eor	r3,r3,r7			@ Maj(a,b,c)
449	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
450	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
451#if __ARM_ARCH__>=7
452	@ ldr	r2,[r1],#4			@ 7
453# if 7==15
454	str	r1,[sp,#17*4]			@ make room for r1
455# endif
456	eor	r0,r9,r9,ror#5
457	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
458	eor	r0,r0,r9,ror#19	@ Sigma1(e)
459	rev	r2,r2
460#else
461	@ ldrb	r2,[r1,#3]			@ 7
462	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
463	ldrb	r3,[r1,#2]
464	ldrb	r0,[r1,#1]
465	orr	r2,r2,r3,lsl#8
466	ldrb	r3,[r1],#4
467	orr	r2,r2,r0,lsl#16
468# if 7==15
469	str	r1,[sp,#17*4]			@ make room for r1
470# endif
471	eor	r0,r9,r9,ror#5
472	orr	r2,r2,r3,lsl#24
473	eor	r0,r0,r9,ror#19	@ Sigma1(e)
474#endif
475	ldr	r3,[r14],#4			@ *K256++
476	add	r4,r4,r2			@ h+=X[i]
477	str	r2,[sp,#7*4]
478	eor	r2,r10,r11
479	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
480	and	r2,r2,r9
481	add	r4,r4,r3			@ h+=K256[i]
482	eor	r2,r2,r11			@ Ch(e,f,g)
483	eor	r0,r5,r5,ror#11
484	add	r4,r4,r2			@ h+=Ch(e,f,g)
485#if 7==31
486	and	r3,r3,#0xff
487	cmp	r3,#0xf2			@ done?
488#endif
489#if 7<15
490# if __ARM_ARCH__>=7
491	ldr	r2,[r1],#4			@ prefetch
492# else
493	ldrb	r2,[r1,#3]
494# endif
495	eor	r3,r5,r6			@ a^b, b^c in next round
496#else
497	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
498	eor	r3,r5,r6			@ a^b, b^c in next round
499	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
500#endif
501	eor	r0,r0,r5,ror#20	@ Sigma0(a)
502	and	r12,r12,r3			@ (b^c)&=(a^b)
503	add	r8,r8,r4			@ d+=h
504	eor	r12,r12,r6			@ Maj(a,b,c)
505	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
506	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
507#if __ARM_ARCH__>=7
508	@ ldr	r2,[r1],#4			@ 8
509# if 8==15
510	str	r1,[sp,#17*4]			@ make room for r1
511# endif
512	eor	r0,r8,r8,ror#5
513	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
514	eor	r0,r0,r8,ror#19	@ Sigma1(e)
515	rev	r2,r2
516#else
517	@ ldrb	r2,[r1,#3]			@ 8
518	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
519	ldrb	r12,[r1,#2]
520	ldrb	r0,[r1,#1]
521	orr	r2,r2,r12,lsl#8
522	ldrb	r12,[r1],#4
523	orr	r2,r2,r0,lsl#16
524# if 8==15
525	str	r1,[sp,#17*4]			@ make room for r1
526# endif
527	eor	r0,r8,r8,ror#5
528	orr	r2,r2,r12,lsl#24
529	eor	r0,r0,r8,ror#19	@ Sigma1(e)
530#endif
531	ldr	r12,[r14],#4			@ *K256++
532	add	r11,r11,r2			@ h+=X[i]
533	str	r2,[sp,#8*4]
534	eor	r2,r9,r10
535	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
536	and	r2,r2,r8
537	add	r11,r11,r12			@ h+=K256[i]
538	eor	r2,r2,r10			@ Ch(e,f,g)
539	eor	r0,r4,r4,ror#11
540	add	r11,r11,r2			@ h+=Ch(e,f,g)
541#if 8==31
542	and	r12,r12,#0xff
543	cmp	r12,#0xf2			@ done?
544#endif
545#if 8<15
546# if __ARM_ARCH__>=7
547	ldr	r2,[r1],#4			@ prefetch
548# else
549	ldrb	r2,[r1,#3]
550# endif
551	eor	r12,r4,r5			@ a^b, b^c in next round
552#else
553	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
554	eor	r12,r4,r5			@ a^b, b^c in next round
555	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
556#endif
557	eor	r0,r0,r4,ror#20	@ Sigma0(a)
558	and	r3,r3,r12			@ (b^c)&=(a^b)
559	add	r7,r7,r11			@ d+=h
560	eor	r3,r3,r5			@ Maj(a,b,c)
561	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
562	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
563#if __ARM_ARCH__>=7
564	@ ldr	r2,[r1],#4			@ 9
565# if 9==15
566	str	r1,[sp,#17*4]			@ make room for r1
567# endif
568	eor	r0,r7,r7,ror#5
569	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
570	eor	r0,r0,r7,ror#19	@ Sigma1(e)
571	rev	r2,r2
572#else
573	@ ldrb	r2,[r1,#3]			@ 9
574	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
575	ldrb	r3,[r1,#2]
576	ldrb	r0,[r1,#1]
577	orr	r2,r2,r3,lsl#8
578	ldrb	r3,[r1],#4
579	orr	r2,r2,r0,lsl#16
580# if 9==15
581	str	r1,[sp,#17*4]			@ make room for r1
582# endif
583	eor	r0,r7,r7,ror#5
584	orr	r2,r2,r3,lsl#24
585	eor	r0,r0,r7,ror#19	@ Sigma1(e)
586#endif
587	ldr	r3,[r14],#4			@ *K256++
588	add	r10,r10,r2			@ h+=X[i]
589	str	r2,[sp,#9*4]
590	eor	r2,r8,r9
591	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
592	and	r2,r2,r7
593	add	r10,r10,r3			@ h+=K256[i]
594	eor	r2,r2,r9			@ Ch(e,f,g)
595	eor	r0,r11,r11,ror#11
596	add	r10,r10,r2			@ h+=Ch(e,f,g)
597#if 9==31
598	and	r3,r3,#0xff
599	cmp	r3,#0xf2			@ done?
600#endif
601#if 9<15
602# if __ARM_ARCH__>=7
603	ldr	r2,[r1],#4			@ prefetch
604# else
605	ldrb	r2,[r1,#3]
606# endif
607	eor	r3,r11,r4			@ a^b, b^c in next round
608#else
609	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
610	eor	r3,r11,r4			@ a^b, b^c in next round
611	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
612#endif
613	eor	r0,r0,r11,ror#20	@ Sigma0(a)
614	and	r12,r12,r3			@ (b^c)&=(a^b)
615	add	r6,r6,r10			@ d+=h
616	eor	r12,r12,r4			@ Maj(a,b,c)
617	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
618	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
619#if __ARM_ARCH__>=7
620	@ ldr	r2,[r1],#4			@ 10
621# if 10==15
622	str	r1,[sp,#17*4]			@ make room for r1
623# endif
624	eor	r0,r6,r6,ror#5
625	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
626	eor	r0,r0,r6,ror#19	@ Sigma1(e)
627	rev	r2,r2
628#else
629	@ ldrb	r2,[r1,#3]			@ 10
630	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
631	ldrb	r12,[r1,#2]
632	ldrb	r0,[r1,#1]
633	orr	r2,r2,r12,lsl#8
634	ldrb	r12,[r1],#4
635	orr	r2,r2,r0,lsl#16
636# if 10==15
637	str	r1,[sp,#17*4]			@ make room for r1
638# endif
639	eor	r0,r6,r6,ror#5
640	orr	r2,r2,r12,lsl#24
641	eor	r0,r0,r6,ror#19	@ Sigma1(e)
642#endif
643	ldr	r12,[r14],#4			@ *K256++
644	add	r9,r9,r2			@ h+=X[i]
645	str	r2,[sp,#10*4]
646	eor	r2,r7,r8
647	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
648	and	r2,r2,r6
649	add	r9,r9,r12			@ h+=K256[i]
650	eor	r2,r2,r8			@ Ch(e,f,g)
651	eor	r0,r10,r10,ror#11
652	add	r9,r9,r2			@ h+=Ch(e,f,g)
653#if 10==31
654	and	r12,r12,#0xff
655	cmp	r12,#0xf2			@ done?
656#endif
657#if 10<15
658# if __ARM_ARCH__>=7
659	ldr	r2,[r1],#4			@ prefetch
660# else
661	ldrb	r2,[r1,#3]
662# endif
663	eor	r12,r10,r11			@ a^b, b^c in next round
664#else
665	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
666	eor	r12,r10,r11			@ a^b, b^c in next round
667	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
668#endif
669	eor	r0,r0,r10,ror#20	@ Sigma0(a)
670	and	r3,r3,r12			@ (b^c)&=(a^b)
671	add	r5,r5,r9			@ d+=h
672	eor	r3,r3,r11			@ Maj(a,b,c)
673	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
674	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
675#if __ARM_ARCH__>=7
676	@ ldr	r2,[r1],#4			@ 11
677# if 11==15
678	str	r1,[sp,#17*4]			@ make room for r1
679# endif
680	eor	r0,r5,r5,ror#5
681	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
682	eor	r0,r0,r5,ror#19	@ Sigma1(e)
683	rev	r2,r2
684#else
685	@ ldrb	r2,[r1,#3]			@ 11
686	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
687	ldrb	r3,[r1,#2]
688	ldrb	r0,[r1,#1]
689	orr	r2,r2,r3,lsl#8
690	ldrb	r3,[r1],#4
691	orr	r2,r2,r0,lsl#16
692# if 11==15
693	str	r1,[sp,#17*4]			@ make room for r1
694# endif
695	eor	r0,r5,r5,ror#5
696	orr	r2,r2,r3,lsl#24
697	eor	r0,r0,r5,ror#19	@ Sigma1(e)
698#endif
699	ldr	r3,[r14],#4			@ *K256++
700	add	r8,r8,r2			@ h+=X[i]
701	str	r2,[sp,#11*4]
702	eor	r2,r6,r7
703	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
704	and	r2,r2,r5
705	add	r8,r8,r3			@ h+=K256[i]
706	eor	r2,r2,r7			@ Ch(e,f,g)
707	eor	r0,r9,r9,ror#11
708	add	r8,r8,r2			@ h+=Ch(e,f,g)
709#if 11==31
710	and	r3,r3,#0xff
711	cmp	r3,#0xf2			@ done?
712#endif
713#if 11<15
714# if __ARM_ARCH__>=7
715	ldr	r2,[r1],#4			@ prefetch
716# else
717	ldrb	r2,[r1,#3]
718# endif
719	eor	r3,r9,r10			@ a^b, b^c in next round
720#else
721	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
722	eor	r3,r9,r10			@ a^b, b^c in next round
723	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
724#endif
725	eor	r0,r0,r9,ror#20	@ Sigma0(a)
726	and	r12,r12,r3			@ (b^c)&=(a^b)
727	add	r4,r4,r8			@ d+=h
728	eor	r12,r12,r10			@ Maj(a,b,c)
729	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
730	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
731#if __ARM_ARCH__>=7
732	@ ldr	r2,[r1],#4			@ 12
733# if 12==15
734	str	r1,[sp,#17*4]			@ make room for r1
735# endif
736	eor	r0,r4,r4,ror#5
737	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
738	eor	r0,r0,r4,ror#19	@ Sigma1(e)
739	rev	r2,r2
740#else
741	@ ldrb	r2,[r1,#3]			@ 12
742	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
743	ldrb	r12,[r1,#2]
744	ldrb	r0,[r1,#1]
745	orr	r2,r2,r12,lsl#8
746	ldrb	r12,[r1],#4
747	orr	r2,r2,r0,lsl#16
748# if 12==15
749	str	r1,[sp,#17*4]			@ make room for r1
750# endif
751	eor	r0,r4,r4,ror#5
752	orr	r2,r2,r12,lsl#24
753	eor	r0,r0,r4,ror#19	@ Sigma1(e)
754#endif
755	ldr	r12,[r14],#4			@ *K256++
756	add	r7,r7,r2			@ h+=X[i]
757	str	r2,[sp,#12*4]
758	eor	r2,r5,r6
759	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
760	and	r2,r2,r4
761	add	r7,r7,r12			@ h+=K256[i]
762	eor	r2,r2,r6			@ Ch(e,f,g)
763	eor	r0,r8,r8,ror#11
764	add	r7,r7,r2			@ h+=Ch(e,f,g)
765#if 12==31
766	and	r12,r12,#0xff
767	cmp	r12,#0xf2			@ done?
768#endif
769#if 12<15
770# if __ARM_ARCH__>=7
771	ldr	r2,[r1],#4			@ prefetch
772# else
773	ldrb	r2,[r1,#3]
774# endif
775	eor	r12,r8,r9			@ a^b, b^c in next round
776#else
777	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
778	eor	r12,r8,r9			@ a^b, b^c in next round
779	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
780#endif
781	eor	r0,r0,r8,ror#20	@ Sigma0(a)
782	and	r3,r3,r12			@ (b^c)&=(a^b)
783	add	r11,r11,r7			@ d+=h
784	eor	r3,r3,r9			@ Maj(a,b,c)
785	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
786	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
787#if __ARM_ARCH__>=7
788	@ ldr	r2,[r1],#4			@ 13
789# if 13==15
790	str	r1,[sp,#17*4]			@ make room for r1
791# endif
792	eor	r0,r11,r11,ror#5
793	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
794	eor	r0,r0,r11,ror#19	@ Sigma1(e)
795	rev	r2,r2
796#else
797	@ ldrb	r2,[r1,#3]			@ 13
798	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
799	ldrb	r3,[r1,#2]
800	ldrb	r0,[r1,#1]
801	orr	r2,r2,r3,lsl#8
802	ldrb	r3,[r1],#4
803	orr	r2,r2,r0,lsl#16
804# if 13==15
805	str	r1,[sp,#17*4]			@ make room for r1
806# endif
807	eor	r0,r11,r11,ror#5
808	orr	r2,r2,r3,lsl#24
809	eor	r0,r0,r11,ror#19	@ Sigma1(e)
810#endif
811	ldr	r3,[r14],#4			@ *K256++
812	add	r6,r6,r2			@ h+=X[i]
813	str	r2,[sp,#13*4]
814	eor	r2,r4,r5
815	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
816	and	r2,r2,r11
817	add	r6,r6,r3			@ h+=K256[i]
818	eor	r2,r2,r5			@ Ch(e,f,g)
819	eor	r0,r7,r7,ror#11
820	add	r6,r6,r2			@ h+=Ch(e,f,g)
821#if 13==31
822	and	r3,r3,#0xff
823	cmp	r3,#0xf2			@ done?
824#endif
825#if 13<15
826# if __ARM_ARCH__>=7
827	ldr	r2,[r1],#4			@ prefetch
828# else
829	ldrb	r2,[r1,#3]
830# endif
831	eor	r3,r7,r8			@ a^b, b^c in next round
832#else
833	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
834	eor	r3,r7,r8			@ a^b, b^c in next round
835	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
836#endif
837	eor	r0,r0,r7,ror#20	@ Sigma0(a)
838	and	r12,r12,r3			@ (b^c)&=(a^b)
839	add	r10,r10,r6			@ d+=h
840	eor	r12,r12,r8			@ Maj(a,b,c)
841	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
842	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
843#if __ARM_ARCH__>=7
844	@ ldr	r2,[r1],#4			@ 14
845# if 14==15
846	str	r1,[sp,#17*4]			@ make room for r1
847# endif
848	eor	r0,r10,r10,ror#5
849	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
850	eor	r0,r0,r10,ror#19	@ Sigma1(e)
851	rev	r2,r2
852#else
853	@ ldrb	r2,[r1,#3]			@ 14
854	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
855	ldrb	r12,[r1,#2]
856	ldrb	r0,[r1,#1]
857	orr	r2,r2,r12,lsl#8
858	ldrb	r12,[r1],#4
859	orr	r2,r2,r0,lsl#16
860# if 14==15
861	str	r1,[sp,#17*4]			@ make room for r1
862# endif
863	eor	r0,r10,r10,ror#5
864	orr	r2,r2,r12,lsl#24
865	eor	r0,r0,r10,ror#19	@ Sigma1(e)
866#endif
867	ldr	r12,[r14],#4			@ *K256++
868	add	r5,r5,r2			@ h+=X[i]
869	str	r2,[sp,#14*4]
870	eor	r2,r11,r4
871	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
872	and	r2,r2,r10
873	add	r5,r5,r12			@ h+=K256[i]
874	eor	r2,r2,r4			@ Ch(e,f,g)
875	eor	r0,r6,r6,ror#11
876	add	r5,r5,r2			@ h+=Ch(e,f,g)
877#if 14==31
878	and	r12,r12,#0xff
879	cmp	r12,#0xf2			@ done?
880#endif
881#if 14<15
882# if __ARM_ARCH__>=7
883	ldr	r2,[r1],#4			@ prefetch
884# else
885	ldrb	r2,[r1,#3]
886# endif
887	eor	r12,r6,r7			@ a^b, b^c in next round
888#else
889	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
890	eor	r12,r6,r7			@ a^b, b^c in next round
891	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
892#endif
893	eor	r0,r0,r6,ror#20	@ Sigma0(a)
894	and	r3,r3,r12			@ (b^c)&=(a^b)
895	add	r9,r9,r5			@ d+=h
896	eor	r3,r3,r7			@ Maj(a,b,c)
897	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
898	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
899#if __ARM_ARCH__>=7
900	@ ldr	r2,[r1],#4			@ 15
901# if 15==15
902	str	r1,[sp,#17*4]			@ make room for r1
903# endif
904	eor	r0,r9,r9,ror#5
905	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
906	eor	r0,r0,r9,ror#19	@ Sigma1(e)
907	rev	r2,r2
908#else
909	@ ldrb	r2,[r1,#3]			@ 15
910	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
911	ldrb	r3,[r1,#2]
912	ldrb	r0,[r1,#1]
913	orr	r2,r2,r3,lsl#8
914	ldrb	r3,[r1],#4
915	orr	r2,r2,r0,lsl#16
916# if 15==15
917	str	r1,[sp,#17*4]			@ make room for r1
918# endif
919	eor	r0,r9,r9,ror#5
920	orr	r2,r2,r3,lsl#24
921	eor	r0,r0,r9,ror#19	@ Sigma1(e)
922#endif
923	ldr	r3,[r14],#4			@ *K256++
924	add	r4,r4,r2			@ h+=X[i]
925	str	r2,[sp,#15*4]
926	eor	r2,r10,r11
927	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
928	and	r2,r2,r9
929	add	r4,r4,r3			@ h+=K256[i]
930	eor	r2,r2,r11			@ Ch(e,f,g)
931	eor	r0,r5,r5,ror#11
932	add	r4,r4,r2			@ h+=Ch(e,f,g)
933#if 15==31
934	and	r3,r3,#0xff
935	cmp	r3,#0xf2			@ done?
936#endif
937#if 15<15
938# if __ARM_ARCH__>=7
939	ldr	r2,[r1],#4			@ prefetch
940# else
941	ldrb	r2,[r1,#3]
942# endif
943	eor	r3,r5,r6			@ a^b, b^c in next round
944#else
945	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
946	eor	r3,r5,r6			@ a^b, b^c in next round
947	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
948#endif
949	eor	r0,r0,r5,ror#20	@ Sigma0(a)
950	and	r12,r12,r3			@ (b^c)&=(a^b)
951	add	r8,r8,r4			@ d+=h
952	eor	r12,r12,r6			@ Maj(a,b,c)
953	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
954	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
955.Lrounds_16_xx:
956	@ ldr	r2,[sp,#1*4]		@ 16
957	@ ldr	r1,[sp,#14*4]
958	mov	r0,r2,ror#7
959	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
960	mov	r12,r1,ror#17
961	eor	r0,r0,r2,ror#18
962	eor	r12,r12,r1,ror#19
963	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
964	ldr	r2,[sp,#0*4]
965	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
966	ldr	r1,[sp,#9*4]
967
968	add	r12,r12,r0
969	eor	r0,r8,r8,ror#5	@ from BODY_00_15
970	add	r2,r2,r12
971	eor	r0,r0,r8,ror#19	@ Sigma1(e)
972	add	r2,r2,r1			@ X[i]
973	ldr	r12,[r14],#4			@ *K256++
974	add	r11,r11,r2			@ h+=X[i]
975	str	r2,[sp,#0*4]
976	eor	r2,r9,r10
977	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
978	and	r2,r2,r8
979	add	r11,r11,r12			@ h+=K256[i]
980	eor	r2,r2,r10			@ Ch(e,f,g)
981	eor	r0,r4,r4,ror#11
982	add	r11,r11,r2			@ h+=Ch(e,f,g)
983#if 16==31
984	and	r12,r12,#0xff
985	cmp	r12,#0xf2			@ done?
986#endif
987#if 16<15
988# if __ARM_ARCH__>=7
989	ldr	r2,[r1],#4			@ prefetch
990# else
991	ldrb	r2,[r1,#3]
992# endif
993	eor	r12,r4,r5			@ a^b, b^c in next round
994#else
995	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
996	eor	r12,r4,r5			@ a^b, b^c in next round
997	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
998#endif
999	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1000	and	r3,r3,r12			@ (b^c)&=(a^b)
1001	add	r7,r7,r11			@ d+=h
1002	eor	r3,r3,r5			@ Maj(a,b,c)
1003	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1004	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1005	@ ldr	r2,[sp,#2*4]		@ 17
1006	@ ldr	r1,[sp,#15*4]
1007	mov	r0,r2,ror#7
1008	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1009	mov	r3,r1,ror#17
1010	eor	r0,r0,r2,ror#18
1011	eor	r3,r3,r1,ror#19
1012	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1013	ldr	r2,[sp,#1*4]
1014	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1015	ldr	r1,[sp,#10*4]
1016
1017	add	r3,r3,r0
1018	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1019	add	r2,r2,r3
1020	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1021	add	r2,r2,r1			@ X[i]
1022	ldr	r3,[r14],#4			@ *K256++
1023	add	r10,r10,r2			@ h+=X[i]
1024	str	r2,[sp,#1*4]
1025	eor	r2,r8,r9
1026	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1027	and	r2,r2,r7
1028	add	r10,r10,r3			@ h+=K256[i]
1029	eor	r2,r2,r9			@ Ch(e,f,g)
1030	eor	r0,r11,r11,ror#11
1031	add	r10,r10,r2			@ h+=Ch(e,f,g)
1032#if 17==31
1033	and	r3,r3,#0xff
1034	cmp	r3,#0xf2			@ done?
1035#endif
1036#if 17<15
1037# if __ARM_ARCH__>=7
1038	ldr	r2,[r1],#4			@ prefetch
1039# else
1040	ldrb	r2,[r1,#3]
1041# endif
1042	eor	r3,r11,r4			@ a^b, b^c in next round
1043#else
1044	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1045	eor	r3,r11,r4			@ a^b, b^c in next round
1046	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1047#endif
1048	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1049	and	r12,r12,r3			@ (b^c)&=(a^b)
1050	add	r6,r6,r10			@ d+=h
1051	eor	r12,r12,r4			@ Maj(a,b,c)
1052	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1053	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1054	@ ldr	r2,[sp,#3*4]		@ 18
1055	@ ldr	r1,[sp,#0*4]
1056	mov	r0,r2,ror#7
1057	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1058	mov	r12,r1,ror#17
1059	eor	r0,r0,r2,ror#18
1060	eor	r12,r12,r1,ror#19
1061	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1062	ldr	r2,[sp,#2*4]
1063	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1064	ldr	r1,[sp,#11*4]
1065
1066	add	r12,r12,r0
1067	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1068	add	r2,r2,r12
1069	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1070	add	r2,r2,r1			@ X[i]
1071	ldr	r12,[r14],#4			@ *K256++
1072	add	r9,r9,r2			@ h+=X[i]
1073	str	r2,[sp,#2*4]
1074	eor	r2,r7,r8
1075	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1076	and	r2,r2,r6
1077	add	r9,r9,r12			@ h+=K256[i]
1078	eor	r2,r2,r8			@ Ch(e,f,g)
1079	eor	r0,r10,r10,ror#11
1080	add	r9,r9,r2			@ h+=Ch(e,f,g)
1081#if 18==31
1082	and	r12,r12,#0xff
1083	cmp	r12,#0xf2			@ done?
1084#endif
1085#if 18<15
1086# if __ARM_ARCH__>=7
1087	ldr	r2,[r1],#4			@ prefetch
1088# else
1089	ldrb	r2,[r1,#3]
1090# endif
1091	eor	r12,r10,r11			@ a^b, b^c in next round
1092#else
1093	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1094	eor	r12,r10,r11			@ a^b, b^c in next round
1095	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1096#endif
1097	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1098	and	r3,r3,r12			@ (b^c)&=(a^b)
1099	add	r5,r5,r9			@ d+=h
1100	eor	r3,r3,r11			@ Maj(a,b,c)
1101	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1102	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1103	@ ldr	r2,[sp,#4*4]		@ 19
1104	@ ldr	r1,[sp,#1*4]
1105	mov	r0,r2,ror#7
1106	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1107	mov	r3,r1,ror#17
1108	eor	r0,r0,r2,ror#18
1109	eor	r3,r3,r1,ror#19
1110	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1111	ldr	r2,[sp,#3*4]
1112	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1113	ldr	r1,[sp,#12*4]
1114
1115	add	r3,r3,r0
1116	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1117	add	r2,r2,r3
1118	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1119	add	r2,r2,r1			@ X[i]
1120	ldr	r3,[r14],#4			@ *K256++
1121	add	r8,r8,r2			@ h+=X[i]
1122	str	r2,[sp,#3*4]
1123	eor	r2,r6,r7
1124	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1125	and	r2,r2,r5
1126	add	r8,r8,r3			@ h+=K256[i]
1127	eor	r2,r2,r7			@ Ch(e,f,g)
1128	eor	r0,r9,r9,ror#11
1129	add	r8,r8,r2			@ h+=Ch(e,f,g)
1130#if 19==31
1131	and	r3,r3,#0xff
1132	cmp	r3,#0xf2			@ done?
1133#endif
1134#if 19<15
1135# if __ARM_ARCH__>=7
1136	ldr	r2,[r1],#4			@ prefetch
1137# else
1138	ldrb	r2,[r1,#3]
1139# endif
1140	eor	r3,r9,r10			@ a^b, b^c in next round
1141#else
1142	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1143	eor	r3,r9,r10			@ a^b, b^c in next round
1144	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1145#endif
1146	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1147	and	r12,r12,r3			@ (b^c)&=(a^b)
1148	add	r4,r4,r8			@ d+=h
1149	eor	r12,r12,r10			@ Maj(a,b,c)
1150	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1151	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1152	@ ldr	r2,[sp,#5*4]		@ 20
1153	@ ldr	r1,[sp,#2*4]
1154	mov	r0,r2,ror#7
1155	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1156	mov	r12,r1,ror#17
1157	eor	r0,r0,r2,ror#18
1158	eor	r12,r12,r1,ror#19
1159	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1160	ldr	r2,[sp,#4*4]
1161	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1162	ldr	r1,[sp,#13*4]
1163
1164	add	r12,r12,r0
1165	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1166	add	r2,r2,r12
1167	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1168	add	r2,r2,r1			@ X[i]
1169	ldr	r12,[r14],#4			@ *K256++
1170	add	r7,r7,r2			@ h+=X[i]
1171	str	r2,[sp,#4*4]
1172	eor	r2,r5,r6
1173	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1174	and	r2,r2,r4
1175	add	r7,r7,r12			@ h+=K256[i]
1176	eor	r2,r2,r6			@ Ch(e,f,g)
1177	eor	r0,r8,r8,ror#11
1178	add	r7,r7,r2			@ h+=Ch(e,f,g)
1179#if 20==31
1180	and	r12,r12,#0xff
1181	cmp	r12,#0xf2			@ done?
1182#endif
1183#if 20<15
1184# if __ARM_ARCH__>=7
1185	ldr	r2,[r1],#4			@ prefetch
1186# else
1187	ldrb	r2,[r1,#3]
1188# endif
1189	eor	r12,r8,r9			@ a^b, b^c in next round
1190#else
1191	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1192	eor	r12,r8,r9			@ a^b, b^c in next round
1193	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1194#endif
1195	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1196	and	r3,r3,r12			@ (b^c)&=(a^b)
1197	add	r11,r11,r7			@ d+=h
1198	eor	r3,r3,r9			@ Maj(a,b,c)
1199	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1200	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1201	@ ldr	r2,[sp,#6*4]		@ 21
1202	@ ldr	r1,[sp,#3*4]
1203	mov	r0,r2,ror#7
1204	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1205	mov	r3,r1,ror#17
1206	eor	r0,r0,r2,ror#18
1207	eor	r3,r3,r1,ror#19
1208	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1209	ldr	r2,[sp,#5*4]
1210	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1211	ldr	r1,[sp,#14*4]
1212
1213	add	r3,r3,r0
1214	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1215	add	r2,r2,r3
1216	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1217	add	r2,r2,r1			@ X[i]
1218	ldr	r3,[r14],#4			@ *K256++
1219	add	r6,r6,r2			@ h+=X[i]
1220	str	r2,[sp,#5*4]
1221	eor	r2,r4,r5
1222	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1223	and	r2,r2,r11
1224	add	r6,r6,r3			@ h+=K256[i]
1225	eor	r2,r2,r5			@ Ch(e,f,g)
1226	eor	r0,r7,r7,ror#11
1227	add	r6,r6,r2			@ h+=Ch(e,f,g)
1228#if 21==31
1229	and	r3,r3,#0xff
1230	cmp	r3,#0xf2			@ done?
1231#endif
1232#if 21<15
1233# if __ARM_ARCH__>=7
1234	ldr	r2,[r1],#4			@ prefetch
1235# else
1236	ldrb	r2,[r1,#3]
1237# endif
1238	eor	r3,r7,r8			@ a^b, b^c in next round
1239#else
1240	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1241	eor	r3,r7,r8			@ a^b, b^c in next round
1242	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1243#endif
1244	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1245	and	r12,r12,r3			@ (b^c)&=(a^b)
1246	add	r10,r10,r6			@ d+=h
1247	eor	r12,r12,r8			@ Maj(a,b,c)
1248	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1249	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1250	@ ldr	r2,[sp,#7*4]		@ 22
1251	@ ldr	r1,[sp,#4*4]
1252	mov	r0,r2,ror#7
1253	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1254	mov	r12,r1,ror#17
1255	eor	r0,r0,r2,ror#18
1256	eor	r12,r12,r1,ror#19
1257	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1258	ldr	r2,[sp,#6*4]
1259	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1260	ldr	r1,[sp,#15*4]
1261
1262	add	r12,r12,r0
1263	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1264	add	r2,r2,r12
1265	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1266	add	r2,r2,r1			@ X[i]
1267	ldr	r12,[r14],#4			@ *K256++
1268	add	r5,r5,r2			@ h+=X[i]
1269	str	r2,[sp,#6*4]
1270	eor	r2,r11,r4
1271	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1272	and	r2,r2,r10
1273	add	r5,r5,r12			@ h+=K256[i]
1274	eor	r2,r2,r4			@ Ch(e,f,g)
1275	eor	r0,r6,r6,ror#11
1276	add	r5,r5,r2			@ h+=Ch(e,f,g)
1277#if 22==31
1278	and	r12,r12,#0xff
1279	cmp	r12,#0xf2			@ done?
1280#endif
1281#if 22<15
1282# if __ARM_ARCH__>=7
1283	ldr	r2,[r1],#4			@ prefetch
1284# else
1285	ldrb	r2,[r1,#3]
1286# endif
1287	eor	r12,r6,r7			@ a^b, b^c in next round
1288#else
1289	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1290	eor	r12,r6,r7			@ a^b, b^c in next round
1291	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1292#endif
1293	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1294	and	r3,r3,r12			@ (b^c)&=(a^b)
1295	add	r9,r9,r5			@ d+=h
1296	eor	r3,r3,r7			@ Maj(a,b,c)
1297	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1298	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1299	@ ldr	r2,[sp,#8*4]		@ 23
1300	@ ldr	r1,[sp,#5*4]
1301	mov	r0,r2,ror#7
1302	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1303	mov	r3,r1,ror#17
1304	eor	r0,r0,r2,ror#18
1305	eor	r3,r3,r1,ror#19
1306	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1307	ldr	r2,[sp,#7*4]
1308	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1309	ldr	r1,[sp,#0*4]
1310
1311	add	r3,r3,r0
1312	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1313	add	r2,r2,r3
1314	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1315	add	r2,r2,r1			@ X[i]
1316	ldr	r3,[r14],#4			@ *K256++
1317	add	r4,r4,r2			@ h+=X[i]
1318	str	r2,[sp,#7*4]
1319	eor	r2,r10,r11
1320	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1321	and	r2,r2,r9
1322	add	r4,r4,r3			@ h+=K256[i]
1323	eor	r2,r2,r11			@ Ch(e,f,g)
1324	eor	r0,r5,r5,ror#11
1325	add	r4,r4,r2			@ h+=Ch(e,f,g)
1326#if 23==31
1327	and	r3,r3,#0xff
1328	cmp	r3,#0xf2			@ done?
1329#endif
1330#if 23<15
1331# if __ARM_ARCH__>=7
1332	ldr	r2,[r1],#4			@ prefetch
1333# else
1334	ldrb	r2,[r1,#3]
1335# endif
1336	eor	r3,r5,r6			@ a^b, b^c in next round
1337#else
1338	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1339	eor	r3,r5,r6			@ a^b, b^c in next round
1340	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1341#endif
1342	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1343	and	r12,r12,r3			@ (b^c)&=(a^b)
1344	add	r8,r8,r4			@ d+=h
1345	eor	r12,r12,r6			@ Maj(a,b,c)
1346	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1347	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1348	@ ldr	r2,[sp,#9*4]		@ 24
1349	@ ldr	r1,[sp,#6*4]
1350	mov	r0,r2,ror#7
1351	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1352	mov	r12,r1,ror#17
1353	eor	r0,r0,r2,ror#18
1354	eor	r12,r12,r1,ror#19
1355	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1356	ldr	r2,[sp,#8*4]
1357	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1358	ldr	r1,[sp,#1*4]
1359
1360	add	r12,r12,r0
1361	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1362	add	r2,r2,r12
1363	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1364	add	r2,r2,r1			@ X[i]
1365	ldr	r12,[r14],#4			@ *K256++
1366	add	r11,r11,r2			@ h+=X[i]
1367	str	r2,[sp,#8*4]
1368	eor	r2,r9,r10
1369	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1370	and	r2,r2,r8
1371	add	r11,r11,r12			@ h+=K256[i]
1372	eor	r2,r2,r10			@ Ch(e,f,g)
1373	eor	r0,r4,r4,ror#11
1374	add	r11,r11,r2			@ h+=Ch(e,f,g)
1375#if 24==31
1376	and	r12,r12,#0xff
1377	cmp	r12,#0xf2			@ done?
1378#endif
1379#if 24<15
1380# if __ARM_ARCH__>=7
1381	ldr	r2,[r1],#4			@ prefetch
1382# else
1383	ldrb	r2,[r1,#3]
1384# endif
1385	eor	r12,r4,r5			@ a^b, b^c in next round
1386#else
1387	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1388	eor	r12,r4,r5			@ a^b, b^c in next round
1389	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1390#endif
1391	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1392	and	r3,r3,r12			@ (b^c)&=(a^b)
1393	add	r7,r7,r11			@ d+=h
1394	eor	r3,r3,r5			@ Maj(a,b,c)
1395	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1396	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1397	@ ldr	r2,[sp,#10*4]		@ 25
1398	@ ldr	r1,[sp,#7*4]
1399	mov	r0,r2,ror#7
1400	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1401	mov	r3,r1,ror#17
1402	eor	r0,r0,r2,ror#18
1403	eor	r3,r3,r1,ror#19
1404	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1405	ldr	r2,[sp,#9*4]
1406	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1407	ldr	r1,[sp,#2*4]
1408
1409	add	r3,r3,r0
1410	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1411	add	r2,r2,r3
1412	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1413	add	r2,r2,r1			@ X[i]
1414	ldr	r3,[r14],#4			@ *K256++
1415	add	r10,r10,r2			@ h+=X[i]
1416	str	r2,[sp,#9*4]
1417	eor	r2,r8,r9
1418	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1419	and	r2,r2,r7
1420	add	r10,r10,r3			@ h+=K256[i]
1421	eor	r2,r2,r9			@ Ch(e,f,g)
1422	eor	r0,r11,r11,ror#11
1423	add	r10,r10,r2			@ h+=Ch(e,f,g)
1424#if 25==31
1425	and	r3,r3,#0xff
1426	cmp	r3,#0xf2			@ done?
1427#endif
1428#if 25<15
1429# if __ARM_ARCH__>=7
1430	ldr	r2,[r1],#4			@ prefetch
1431# else
1432	ldrb	r2,[r1,#3]
1433# endif
1434	eor	r3,r11,r4			@ a^b, b^c in next round
1435#else
1436	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1437	eor	r3,r11,r4			@ a^b, b^c in next round
1438	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1439#endif
1440	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1441	and	r12,r12,r3			@ (b^c)&=(a^b)
1442	add	r6,r6,r10			@ d+=h
1443	eor	r12,r12,r4			@ Maj(a,b,c)
1444	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1445	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1446	@ ldr	r2,[sp,#11*4]		@ 26
1447	@ ldr	r1,[sp,#8*4]
1448	mov	r0,r2,ror#7
1449	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1450	mov	r12,r1,ror#17
1451	eor	r0,r0,r2,ror#18
1452	eor	r12,r12,r1,ror#19
1453	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1454	ldr	r2,[sp,#10*4]
1455	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1456	ldr	r1,[sp,#3*4]
1457
1458	add	r12,r12,r0
1459	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1460	add	r2,r2,r12
1461	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1462	add	r2,r2,r1			@ X[i]
1463	ldr	r12,[r14],#4			@ *K256++
1464	add	r9,r9,r2			@ h+=X[i]
1465	str	r2,[sp,#10*4]
1466	eor	r2,r7,r8
1467	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1468	and	r2,r2,r6
1469	add	r9,r9,r12			@ h+=K256[i]
1470	eor	r2,r2,r8			@ Ch(e,f,g)
1471	eor	r0,r10,r10,ror#11
1472	add	r9,r9,r2			@ h+=Ch(e,f,g)
1473#if 26==31
1474	and	r12,r12,#0xff
1475	cmp	r12,#0xf2			@ done?
1476#endif
1477#if 26<15
1478# if __ARM_ARCH__>=7
1479	ldr	r2,[r1],#4			@ prefetch
1480# else
1481	ldrb	r2,[r1,#3]
1482# endif
1483	eor	r12,r10,r11			@ a^b, b^c in next round
1484#else
1485	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1486	eor	r12,r10,r11			@ a^b, b^c in next round
1487	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1488#endif
1489	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1490	and	r3,r3,r12			@ (b^c)&=(a^b)
1491	add	r5,r5,r9			@ d+=h
1492	eor	r3,r3,r11			@ Maj(a,b,c)
1493	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1494	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1495	@ ldr	r2,[sp,#12*4]		@ 27
1496	@ ldr	r1,[sp,#9*4]
1497	mov	r0,r2,ror#7
1498	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1499	mov	r3,r1,ror#17
1500	eor	r0,r0,r2,ror#18
1501	eor	r3,r3,r1,ror#19
1502	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1503	ldr	r2,[sp,#11*4]
1504	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1505	ldr	r1,[sp,#4*4]
1506
1507	add	r3,r3,r0
1508	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1509	add	r2,r2,r3
1510	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1511	add	r2,r2,r1			@ X[i]
1512	ldr	r3,[r14],#4			@ *K256++
1513	add	r8,r8,r2			@ h+=X[i]
1514	str	r2,[sp,#11*4]
1515	eor	r2,r6,r7
1516	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1517	and	r2,r2,r5
1518	add	r8,r8,r3			@ h+=K256[i]
1519	eor	r2,r2,r7			@ Ch(e,f,g)
1520	eor	r0,r9,r9,ror#11
1521	add	r8,r8,r2			@ h+=Ch(e,f,g)
1522#if 27==31
1523	and	r3,r3,#0xff
1524	cmp	r3,#0xf2			@ done?
1525#endif
1526#if 27<15
1527# if __ARM_ARCH__>=7
1528	ldr	r2,[r1],#4			@ prefetch
1529# else
1530	ldrb	r2,[r1,#3]
1531# endif
1532	eor	r3,r9,r10			@ a^b, b^c in next round
1533#else
1534	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1535	eor	r3,r9,r10			@ a^b, b^c in next round
1536	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1537#endif
1538	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1539	and	r12,r12,r3			@ (b^c)&=(a^b)
1540	add	r4,r4,r8			@ d+=h
1541	eor	r12,r12,r10			@ Maj(a,b,c)
1542	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1543	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1544	@ ldr	r2,[sp,#13*4]		@ 28
1545	@ ldr	r1,[sp,#10*4]
1546	mov	r0,r2,ror#7
1547	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1548	mov	r12,r1,ror#17
1549	eor	r0,r0,r2,ror#18
1550	eor	r12,r12,r1,ror#19
1551	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1552	ldr	r2,[sp,#12*4]
1553	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1554	ldr	r1,[sp,#5*4]
1555
1556	add	r12,r12,r0
1557	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1558	add	r2,r2,r12
1559	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1560	add	r2,r2,r1			@ X[i]
1561	ldr	r12,[r14],#4			@ *K256++
1562	add	r7,r7,r2			@ h+=X[i]
1563	str	r2,[sp,#12*4]
1564	eor	r2,r5,r6
1565	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1566	and	r2,r2,r4
1567	add	r7,r7,r12			@ h+=K256[i]
1568	eor	r2,r2,r6			@ Ch(e,f,g)
1569	eor	r0,r8,r8,ror#11
1570	add	r7,r7,r2			@ h+=Ch(e,f,g)
1571#if 28==31
1572	and	r12,r12,#0xff
1573	cmp	r12,#0xf2			@ done?
1574#endif
1575#if 28<15
1576# if __ARM_ARCH__>=7
1577	ldr	r2,[r1],#4			@ prefetch
1578# else
1579	ldrb	r2,[r1,#3]
1580# endif
1581	eor	r12,r8,r9			@ a^b, b^c in next round
1582#else
1583	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1584	eor	r12,r8,r9			@ a^b, b^c in next round
1585	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1586#endif
1587	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1588	and	r3,r3,r12			@ (b^c)&=(a^b)
1589	add	r11,r11,r7			@ d+=h
1590	eor	r3,r3,r9			@ Maj(a,b,c)
1591	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1592	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1593	@ ldr	r2,[sp,#14*4]		@ 29
1594	@ ldr	r1,[sp,#11*4]
1595	mov	r0,r2,ror#7
1596	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1597	mov	r3,r1,ror#17
1598	eor	r0,r0,r2,ror#18
1599	eor	r3,r3,r1,ror#19
1600	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1601	ldr	r2,[sp,#13*4]
1602	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1603	ldr	r1,[sp,#6*4]
1604
1605	add	r3,r3,r0
1606	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1607	add	r2,r2,r3
1608	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1609	add	r2,r2,r1			@ X[i]
1610	ldr	r3,[r14],#4			@ *K256++
1611	add	r6,r6,r2			@ h+=X[i]
1612	str	r2,[sp,#13*4]
1613	eor	r2,r4,r5
1614	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1615	and	r2,r2,r11
1616	add	r6,r6,r3			@ h+=K256[i]
1617	eor	r2,r2,r5			@ Ch(e,f,g)
1618	eor	r0,r7,r7,ror#11
1619	add	r6,r6,r2			@ h+=Ch(e,f,g)
1620#if 29==31
1621	and	r3,r3,#0xff
1622	cmp	r3,#0xf2			@ done?
1623#endif
1624#if 29<15
1625# if __ARM_ARCH__>=7
1626	ldr	r2,[r1],#4			@ prefetch
1627# else
1628	ldrb	r2,[r1,#3]
1629# endif
1630	eor	r3,r7,r8			@ a^b, b^c in next round
1631#else
1632	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1633	eor	r3,r7,r8			@ a^b, b^c in next round
1634	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1635#endif
1636	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1637	and	r12,r12,r3			@ (b^c)&=(a^b)
1638	add	r10,r10,r6			@ d+=h
1639	eor	r12,r12,r8			@ Maj(a,b,c)
1640	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1641	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1642	@ ldr	r2,[sp,#15*4]		@ 30
1643	@ ldr	r1,[sp,#12*4]
1644	mov	r0,r2,ror#7
1645	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1646	mov	r12,r1,ror#17
1647	eor	r0,r0,r2,ror#18
1648	eor	r12,r12,r1,ror#19
1649	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1650	ldr	r2,[sp,#14*4]
1651	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1652	ldr	r1,[sp,#7*4]
1653
1654	add	r12,r12,r0
1655	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1656	add	r2,r2,r12
1657	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1658	add	r2,r2,r1			@ X[i]
1659	ldr	r12,[r14],#4			@ *K256++
1660	add	r5,r5,r2			@ h+=X[i]
1661	str	r2,[sp,#14*4]
1662	eor	r2,r11,r4
1663	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1664	and	r2,r2,r10
1665	add	r5,r5,r12			@ h+=K256[i]
1666	eor	r2,r2,r4			@ Ch(e,f,g)
1667	eor	r0,r6,r6,ror#11
1668	add	r5,r5,r2			@ h+=Ch(e,f,g)
1669#if 30==31
1670	and	r12,r12,#0xff
1671	cmp	r12,#0xf2			@ done?
1672#endif
1673#if 30<15
1674# if __ARM_ARCH__>=7
1675	ldr	r2,[r1],#4			@ prefetch
1676# else
1677	ldrb	r2,[r1,#3]
1678# endif
1679	eor	r12,r6,r7			@ a^b, b^c in next round
1680#else
1681	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1682	eor	r12,r6,r7			@ a^b, b^c in next round
1683	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1684#endif
1685	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1686	and	r3,r3,r12			@ (b^c)&=(a^b)
1687	add	r9,r9,r5			@ d+=h
1688	eor	r3,r3,r7			@ Maj(a,b,c)
1689	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1690	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1691	@ ldr	r2,[sp,#0*4]		@ 31
1692	@ ldr	r1,[sp,#13*4]
1693	mov	r0,r2,ror#7
1694	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1695	mov	r3,r1,ror#17
1696	eor	r0,r0,r2,ror#18
1697	eor	r3,r3,r1,ror#19
1698	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1699	ldr	r2,[sp,#15*4]
1700	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1701	ldr	r1,[sp,#8*4]
1702
1703	add	r3,r3,r0
1704	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1705	add	r2,r2,r3
1706	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1707	add	r2,r2,r1			@ X[i]
1708	ldr	r3,[r14],#4			@ *K256++
1709	add	r4,r4,r2			@ h+=X[i]
1710	str	r2,[sp,#15*4]
1711	eor	r2,r10,r11
1712	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1713	and	r2,r2,r9
1714	add	r4,r4,r3			@ h+=K256[i]
1715	eor	r2,r2,r11			@ Ch(e,f,g)
1716	eor	r0,r5,r5,ror#11
1717	add	r4,r4,r2			@ h+=Ch(e,f,g)
1718#if 31==31
1719	and	r3,r3,#0xff
1720	cmp	r3,#0xf2			@ done?
1721#endif
1722#if 31<15
1723# if __ARM_ARCH__>=7
1724	ldr	r2,[r1],#4			@ prefetch
1725# else
1726	ldrb	r2,[r1,#3]
1727# endif
1728	eor	r3,r5,r6			@ a^b, b^c in next round
1729#else
1730	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1731	eor	r3,r5,r6			@ a^b, b^c in next round
1732	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1733#endif
1734	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1735	and	r12,r12,r3			@ (b^c)&=(a^b)
1736	add	r8,r8,r4			@ d+=h
1737	eor	r12,r12,r6			@ Maj(a,b,c)
1738	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1739	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1740	ldreq	r3,[sp,#16*4]		@ pull ctx
1741	bne	.Lrounds_16_xx
1742
1743	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1744	ldr	r0,[r3,#0]
1745	ldr	r2,[r3,#4]
1746	ldr	r12,[r3,#8]
1747	add	r4,r4,r0
1748	ldr	r0,[r3,#12]
1749	add	r5,r5,r2
1750	ldr	r2,[r3,#16]
1751	add	r6,r6,r12
1752	ldr	r12,[r3,#20]
1753	add	r7,r7,r0
1754	ldr	r0,[r3,#24]
1755	add	r8,r8,r2
1756	ldr	r2,[r3,#28]
1757	add	r9,r9,r12
1758	ldr	r1,[sp,#17*4]		@ pull inp
1759	ldr	r12,[sp,#18*4]		@ pull inp+len
1760	add	r10,r10,r0
1761	add	r11,r11,r2
1762	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1763	cmp	r1,r12
1764	sub	r14,r14,#256	@ rewind Ktbl
1765	bne	.Loop
1766
1767	add	sp,sp,#19*4	@ destroy frame
1768#if __ARM_ARCH__>=5
1769	ldmia	sp!,{r4-r11,pc}
1770#else
1771	ldmia	sp!,{r4-r11,lr}
1772	tst	lr,#1
1773	moveq	pc,lr			@ be binary compatible with V4, yet
1774	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1775#endif
1776.size	sha256_block_data_order,.-sha256_block_data_order
1777#if __ARM_MAX_ARCH__>=7
1778.arch	armv7-a
1779.fpu	neon
1780
1781.type	sha256_block_data_order_neon,%function
1782.align	4
1783sha256_block_data_order_neon:
1784.LNEON:
1785	stmdb	sp!,{r4-r12,lr}
1786
1787	mov	r12,sp
1788	sub	sp,sp,#16*4+16		@ alloca
1789	sub	r14,r3,#256+32	@ K256
1790	bic	sp,sp,#15		@ align for 128-bit stores
1791
1792	vld1.8		{q0},[r1]!
1793	vld1.8		{q1},[r1]!
1794	vld1.8		{q2},[r1]!
1795	vld1.8		{q3},[r1]!
1796	vld1.32		{q8},[r14,:128]!
1797	vld1.32		{q9},[r14,:128]!
1798	vld1.32		{q10},[r14,:128]!
1799	vld1.32		{q11},[r14,:128]!
1800	vrev32.8	q0,q0		@ yes, even on
1801	str		r0,[sp,#64]
1802	vrev32.8	q1,q1		@ big-endian
1803	str		r1,[sp,#68]
1804	mov		r1,sp
1805	vrev32.8	q2,q2
1806	str		r2,[sp,#72]
1807	vrev32.8	q3,q3
1808	str		r12,[sp,#76]		@ save original sp
1809	vadd.i32	q8,q8,q0
1810	vadd.i32	q9,q9,q1
1811	vst1.32		{q8},[r1,:128]!
1812	vadd.i32	q10,q10,q2
1813	vst1.32		{q9},[r1,:128]!
1814	vadd.i32	q11,q11,q3
1815	vst1.32		{q10},[r1,:128]!
1816	vst1.32		{q11},[r1,:128]!
1817
1818	ldmia		r0,{r4-r11}
1819	sub		r1,r1,#64
1820	ldr		r2,[sp,#0]
1821	eor		r12,r12,r12
1822	eor		r3,r5,r6
1823	b		.L_00_48
1824
1825.align	4
1826.L_00_48:
1827	vext.8	q8,q0,q1,#4
1828	add	r11,r11,r2
1829	eor	r2,r9,r10
1830	eor	r0,r8,r8,ror#5
1831	vext.8	q9,q2,q3,#4
1832	add	r4,r4,r12
1833	and	r2,r2,r8
1834	eor	r12,r0,r8,ror#19
1835	vshr.u32	q10,q8,#7
1836	eor	r0,r4,r4,ror#11
1837	eor	r2,r2,r10
1838	vadd.i32	q0,q0,q9
1839	add	r11,r11,r12,ror#6
1840	eor	r12,r4,r5
1841	vshr.u32	q9,q8,#3
1842	eor	r0,r0,r4,ror#20
1843	add	r11,r11,r2
1844	vsli.32	q10,q8,#25
1845	ldr	r2,[sp,#4]
1846	and	r3,r3,r12
1847	vshr.u32	q11,q8,#18
1848	add	r7,r7,r11
1849	add	r11,r11,r0,ror#2
1850	eor	r3,r3,r5
1851	veor	q9,q9,q10
1852	add	r10,r10,r2
1853	vsli.32	q11,q8,#14
1854	eor	r2,r8,r9
1855	eor	r0,r7,r7,ror#5
1856	vshr.u32	d24,d7,#17
1857	add	r11,r11,r3
1858	and	r2,r2,r7
1859	veor	q9,q9,q11
1860	eor	r3,r0,r7,ror#19
1861	eor	r0,r11,r11,ror#11
1862	vsli.32	d24,d7,#15
1863	eor	r2,r2,r9
1864	add	r10,r10,r3,ror#6
1865	vshr.u32	d25,d7,#10
1866	eor	r3,r11,r4
1867	eor	r0,r0,r11,ror#20
1868	vadd.i32	q0,q0,q9
1869	add	r10,r10,r2
1870	ldr	r2,[sp,#8]
1871	veor	d25,d25,d24
1872	and	r12,r12,r3
1873	add	r6,r6,r10
1874	vshr.u32	d24,d7,#19
1875	add	r10,r10,r0,ror#2
1876	eor	r12,r12,r4
1877	vsli.32	d24,d7,#13
1878	add	r9,r9,r2
1879	eor	r2,r7,r8
1880	veor	d25,d25,d24
1881	eor	r0,r6,r6,ror#5
1882	add	r10,r10,r12
1883	vadd.i32	d0,d0,d25
1884	and	r2,r2,r6
1885	eor	r12,r0,r6,ror#19
1886	vshr.u32	d24,d0,#17
1887	eor	r0,r10,r10,ror#11
1888	eor	r2,r2,r8
1889	vsli.32	d24,d0,#15
1890	add	r9,r9,r12,ror#6
1891	eor	r12,r10,r11
1892	vshr.u32	d25,d0,#10
1893	eor	r0,r0,r10,ror#20
1894	add	r9,r9,r2
1895	veor	d25,d25,d24
1896	ldr	r2,[sp,#12]
1897	and	r3,r3,r12
1898	vshr.u32	d24,d0,#19
1899	add	r5,r5,r9
1900	add	r9,r9,r0,ror#2
1901	eor	r3,r3,r11
1902	vld1.32	{q8},[r14,:128]!
1903	add	r8,r8,r2
1904	vsli.32	d24,d0,#13
1905	eor	r2,r6,r7
1906	eor	r0,r5,r5,ror#5
1907	veor	d25,d25,d24
1908	add	r9,r9,r3
1909	and	r2,r2,r5
1910	vadd.i32	d1,d1,d25
1911	eor	r3,r0,r5,ror#19
1912	eor	r0,r9,r9,ror#11
1913	vadd.i32	q8,q8,q0
1914	eor	r2,r2,r7
1915	add	r8,r8,r3,ror#6
1916	eor	r3,r9,r10
1917	eor	r0,r0,r9,ror#20
1918	add	r8,r8,r2
1919	ldr	r2,[sp,#16]
1920	and	r12,r12,r3
1921	add	r4,r4,r8
1922	vst1.32	{q8},[r1,:128]!
1923	add	r8,r8,r0,ror#2
1924	eor	r12,r12,r10
1925	vext.8	q8,q1,q2,#4
1926	add	r7,r7,r2
1927	eor	r2,r5,r6
1928	eor	r0,r4,r4,ror#5
1929	vext.8	q9,q3,q0,#4
1930	add	r8,r8,r12
1931	and	r2,r2,r4
1932	eor	r12,r0,r4,ror#19
1933	vshr.u32	q10,q8,#7
1934	eor	r0,r8,r8,ror#11
1935	eor	r2,r2,r6
1936	vadd.i32	q1,q1,q9
1937	add	r7,r7,r12,ror#6
1938	eor	r12,r8,r9
1939	vshr.u32	q9,q8,#3
1940	eor	r0,r0,r8,ror#20
1941	add	r7,r7,r2
1942	vsli.32	q10,q8,#25
1943	ldr	r2,[sp,#20]
1944	and	r3,r3,r12
1945	vshr.u32	q11,q8,#18
1946	add	r11,r11,r7
1947	add	r7,r7,r0,ror#2
1948	eor	r3,r3,r9
1949	veor	q9,q9,q10
1950	add	r6,r6,r2
1951	vsli.32	q11,q8,#14
1952	eor	r2,r4,r5
1953	eor	r0,r11,r11,ror#5
1954	vshr.u32	d24,d1,#17
1955	add	r7,r7,r3
1956	and	r2,r2,r11
1957	veor	q9,q9,q11
1958	eor	r3,r0,r11,ror#19
1959	eor	r0,r7,r7,ror#11
1960	vsli.32	d24,d1,#15
1961	eor	r2,r2,r5
1962	add	r6,r6,r3,ror#6
1963	vshr.u32	d25,d1,#10
1964	eor	r3,r7,r8
1965	eor	r0,r0,r7,ror#20
1966	vadd.i32	q1,q1,q9
1967	add	r6,r6,r2
1968	ldr	r2,[sp,#24]
1969	veor	d25,d25,d24
1970	and	r12,r12,r3
1971	add	r10,r10,r6
1972	vshr.u32	d24,d1,#19
1973	add	r6,r6,r0,ror#2
1974	eor	r12,r12,r8
1975	vsli.32	d24,d1,#13
1976	add	r5,r5,r2
1977	eor	r2,r11,r4
1978	veor	d25,d25,d24
1979	eor	r0,r10,r10,ror#5
1980	add	r6,r6,r12
1981	vadd.i32	d2,d2,d25
1982	and	r2,r2,r10
1983	eor	r12,r0,r10,ror#19
1984	vshr.u32	d24,d2,#17
1985	eor	r0,r6,r6,ror#11
1986	eor	r2,r2,r4
1987	vsli.32	d24,d2,#15
1988	add	r5,r5,r12,ror#6
1989	eor	r12,r6,r7
1990	vshr.u32	d25,d2,#10
1991	eor	r0,r0,r6,ror#20
1992	add	r5,r5,r2
1993	veor	d25,d25,d24
1994	ldr	r2,[sp,#28]
1995	and	r3,r3,r12
1996	vshr.u32	d24,d2,#19
1997	add	r9,r9,r5
1998	add	r5,r5,r0,ror#2
1999	eor	r3,r3,r7
2000	vld1.32	{q8},[r14,:128]!
2001	add	r4,r4,r2
2002	vsli.32	d24,d2,#13
2003	eor	r2,r10,r11
2004	eor	r0,r9,r9,ror#5
2005	veor	d25,d25,d24
2006	add	r5,r5,r3
2007	and	r2,r2,r9
2008	vadd.i32	d3,d3,d25
2009	eor	r3,r0,r9,ror#19
2010	eor	r0,r5,r5,ror#11
2011	vadd.i32	q8,q8,q1
2012	eor	r2,r2,r11
2013	add	r4,r4,r3,ror#6
2014	eor	r3,r5,r6
2015	eor	r0,r0,r5,ror#20
2016	add	r4,r4,r2
2017	ldr	r2,[sp,#32]
2018	and	r12,r12,r3
2019	add	r8,r8,r4
2020	vst1.32	{q8},[r1,:128]!
2021	add	r4,r4,r0,ror#2
2022	eor	r12,r12,r6
2023	vext.8	q8,q2,q3,#4
2024	add	r11,r11,r2
2025	eor	r2,r9,r10
2026	eor	r0,r8,r8,ror#5
2027	vext.8	q9,q0,q1,#4
2028	add	r4,r4,r12
2029	and	r2,r2,r8
2030	eor	r12,r0,r8,ror#19
2031	vshr.u32	q10,q8,#7
2032	eor	r0,r4,r4,ror#11
2033	eor	r2,r2,r10
2034	vadd.i32	q2,q2,q9
2035	add	r11,r11,r12,ror#6
2036	eor	r12,r4,r5
2037	vshr.u32	q9,q8,#3
2038	eor	r0,r0,r4,ror#20
2039	add	r11,r11,r2
2040	vsli.32	q10,q8,#25
2041	ldr	r2,[sp,#36]
2042	and	r3,r3,r12
2043	vshr.u32	q11,q8,#18
2044	add	r7,r7,r11
2045	add	r11,r11,r0,ror#2
2046	eor	r3,r3,r5
2047	veor	q9,q9,q10
2048	add	r10,r10,r2
2049	vsli.32	q11,q8,#14
2050	eor	r2,r8,r9
2051	eor	r0,r7,r7,ror#5
2052	vshr.u32	d24,d3,#17
2053	add	r11,r11,r3
2054	and	r2,r2,r7
2055	veor	q9,q9,q11
2056	eor	r3,r0,r7,ror#19
2057	eor	r0,r11,r11,ror#11
2058	vsli.32	d24,d3,#15
2059	eor	r2,r2,r9
2060	add	r10,r10,r3,ror#6
2061	vshr.u32	d25,d3,#10
2062	eor	r3,r11,r4
2063	eor	r0,r0,r11,ror#20
2064	vadd.i32	q2,q2,q9
2065	add	r10,r10,r2
2066	ldr	r2,[sp,#40]
2067	veor	d25,d25,d24
2068	and	r12,r12,r3
2069	add	r6,r6,r10
2070	vshr.u32	d24,d3,#19
2071	add	r10,r10,r0,ror#2
2072	eor	r12,r12,r4
2073	vsli.32	d24,d3,#13
2074	add	r9,r9,r2
2075	eor	r2,r7,r8
2076	veor	d25,d25,d24
2077	eor	r0,r6,r6,ror#5
2078	add	r10,r10,r12
2079	vadd.i32	d4,d4,d25
2080	and	r2,r2,r6
2081	eor	r12,r0,r6,ror#19
2082	vshr.u32	d24,d4,#17
2083	eor	r0,r10,r10,ror#11
2084	eor	r2,r2,r8
2085	vsli.32	d24,d4,#15
2086	add	r9,r9,r12,ror#6
2087	eor	r12,r10,r11
2088	vshr.u32	d25,d4,#10
2089	eor	r0,r0,r10,ror#20
2090	add	r9,r9,r2
2091	veor	d25,d25,d24
2092	ldr	r2,[sp,#44]
2093	and	r3,r3,r12
2094	vshr.u32	d24,d4,#19
2095	add	r5,r5,r9
2096	add	r9,r9,r0,ror#2
2097	eor	r3,r3,r11
2098	vld1.32	{q8},[r14,:128]!
2099	add	r8,r8,r2
2100	vsli.32	d24,d4,#13
2101	eor	r2,r6,r7
2102	eor	r0,r5,r5,ror#5
2103	veor	d25,d25,d24
2104	add	r9,r9,r3
2105	and	r2,r2,r5
2106	vadd.i32	d5,d5,d25
2107	eor	r3,r0,r5,ror#19
2108	eor	r0,r9,r9,ror#11
2109	vadd.i32	q8,q8,q2
2110	eor	r2,r2,r7
2111	add	r8,r8,r3,ror#6
2112	eor	r3,r9,r10
2113	eor	r0,r0,r9,ror#20
2114	add	r8,r8,r2
2115	ldr	r2,[sp,#48]
2116	and	r12,r12,r3
2117	add	r4,r4,r8
2118	vst1.32	{q8},[r1,:128]!
2119	add	r8,r8,r0,ror#2
2120	eor	r12,r12,r10
2121	vext.8	q8,q3,q0,#4
2122	add	r7,r7,r2
2123	eor	r2,r5,r6
2124	eor	r0,r4,r4,ror#5
2125	vext.8	q9,q1,q2,#4
2126	add	r8,r8,r12
2127	and	r2,r2,r4
2128	eor	r12,r0,r4,ror#19
2129	vshr.u32	q10,q8,#7
2130	eor	r0,r8,r8,ror#11
2131	eor	r2,r2,r6
2132	vadd.i32	q3,q3,q9
2133	add	r7,r7,r12,ror#6
2134	eor	r12,r8,r9
2135	vshr.u32	q9,q8,#3
2136	eor	r0,r0,r8,ror#20
2137	add	r7,r7,r2
2138	vsli.32	q10,q8,#25
2139	ldr	r2,[sp,#52]
2140	and	r3,r3,r12
2141	vshr.u32	q11,q8,#18
2142	add	r11,r11,r7
2143	add	r7,r7,r0,ror#2
2144	eor	r3,r3,r9
2145	veor	q9,q9,q10
2146	add	r6,r6,r2
2147	vsli.32	q11,q8,#14
2148	eor	r2,r4,r5
2149	eor	r0,r11,r11,ror#5
2150	vshr.u32	d24,d5,#17
2151	add	r7,r7,r3
2152	and	r2,r2,r11
2153	veor	q9,q9,q11
2154	eor	r3,r0,r11,ror#19
2155	eor	r0,r7,r7,ror#11
2156	vsli.32	d24,d5,#15
2157	eor	r2,r2,r5
2158	add	r6,r6,r3,ror#6
2159	vshr.u32	d25,d5,#10
2160	eor	r3,r7,r8
2161	eor	r0,r0,r7,ror#20
2162	vadd.i32	q3,q3,q9
2163	add	r6,r6,r2
2164	ldr	r2,[sp,#56]
2165	veor	d25,d25,d24
2166	and	r12,r12,r3
2167	add	r10,r10,r6
2168	vshr.u32	d24,d5,#19
2169	add	r6,r6,r0,ror#2
2170	eor	r12,r12,r8
2171	vsli.32	d24,d5,#13
2172	add	r5,r5,r2
2173	eor	r2,r11,r4
2174	veor	d25,d25,d24
2175	eor	r0,r10,r10,ror#5
2176	add	r6,r6,r12
2177	vadd.i32	d6,d6,d25
2178	and	r2,r2,r10
2179	eor	r12,r0,r10,ror#19
2180	vshr.u32	d24,d6,#17
2181	eor	r0,r6,r6,ror#11
2182	eor	r2,r2,r4
2183	vsli.32	d24,d6,#15
2184	add	r5,r5,r12,ror#6
2185	eor	r12,r6,r7
2186	vshr.u32	d25,d6,#10
2187	eor	r0,r0,r6,ror#20
2188	add	r5,r5,r2
2189	veor	d25,d25,d24
2190	ldr	r2,[sp,#60]
2191	and	r3,r3,r12
2192	vshr.u32	d24,d6,#19
2193	add	r9,r9,r5
2194	add	r5,r5,r0,ror#2
2195	eor	r3,r3,r7
2196	vld1.32	{q8},[r14,:128]!
2197	add	r4,r4,r2
2198	vsli.32	d24,d6,#13
2199	eor	r2,r10,r11
2200	eor	r0,r9,r9,ror#5
2201	veor	d25,d25,d24
2202	add	r5,r5,r3
2203	and	r2,r2,r9
2204	vadd.i32	d7,d7,d25
2205	eor	r3,r0,r9,ror#19
2206	eor	r0,r5,r5,ror#11
2207	vadd.i32	q8,q8,q3
2208	eor	r2,r2,r11
2209	add	r4,r4,r3,ror#6
2210	eor	r3,r5,r6
2211	eor	r0,r0,r5,ror#20
2212	add	r4,r4,r2
2213	ldr	r2,[r14]
2214	and	r12,r12,r3
2215	add	r8,r8,r4
2216	vst1.32	{q8},[r1,:128]!
2217	add	r4,r4,r0,ror#2
2218	eor	r12,r12,r6
2219	teq	r2,#0				@ check for K256 terminator
2220	ldr	r2,[sp,#0]
2221	sub	r1,r1,#64
2222	bne	.L_00_48
2223
2224	ldr		r1,[sp,#68]
2225	ldr		r0,[sp,#72]
2226	sub		r14,r14,#256	@ rewind r14
2227	teq		r1,r0
2228	subeq		r1,r1,#64		@ avoid SEGV
2229	vld1.8		{q0},[r1]!		@ load next input block
2230	vld1.8		{q1},[r1]!
2231	vld1.8		{q2},[r1]!
2232	vld1.8		{q3},[r1]!
2233	strne		r1,[sp,#68]
2234	mov		r1,sp
2235	add	r11,r11,r2
2236	eor	r2,r9,r10
2237	eor	r0,r8,r8,ror#5
2238	add	r4,r4,r12
2239	vld1.32	{q8},[r14,:128]!
2240	and	r2,r2,r8
2241	eor	r12,r0,r8,ror#19
2242	eor	r0,r4,r4,ror#11
2243	eor	r2,r2,r10
2244	vrev32.8	q0,q0
2245	add	r11,r11,r12,ror#6
2246	eor	r12,r4,r5
2247	eor	r0,r0,r4,ror#20
2248	add	r11,r11,r2
2249	vadd.i32	q8,q8,q0
2250	ldr	r2,[sp,#4]
2251	and	r3,r3,r12
2252	add	r7,r7,r11
2253	add	r11,r11,r0,ror#2
2254	eor	r3,r3,r5
2255	add	r10,r10,r2
2256	eor	r2,r8,r9
2257	eor	r0,r7,r7,ror#5
2258	add	r11,r11,r3
2259	and	r2,r2,r7
2260	eor	r3,r0,r7,ror#19
2261	eor	r0,r11,r11,ror#11
2262	eor	r2,r2,r9
2263	add	r10,r10,r3,ror#6
2264	eor	r3,r11,r4
2265	eor	r0,r0,r11,ror#20
2266	add	r10,r10,r2
2267	ldr	r2,[sp,#8]
2268	and	r12,r12,r3
2269	add	r6,r6,r10
2270	add	r10,r10,r0,ror#2
2271	eor	r12,r12,r4
2272	add	r9,r9,r2
2273	eor	r2,r7,r8
2274	eor	r0,r6,r6,ror#5
2275	add	r10,r10,r12
2276	and	r2,r2,r6
2277	eor	r12,r0,r6,ror#19
2278	eor	r0,r10,r10,ror#11
2279	eor	r2,r2,r8
2280	add	r9,r9,r12,ror#6
2281	eor	r12,r10,r11
2282	eor	r0,r0,r10,ror#20
2283	add	r9,r9,r2
2284	ldr	r2,[sp,#12]
2285	and	r3,r3,r12
2286	add	r5,r5,r9
2287	add	r9,r9,r0,ror#2
2288	eor	r3,r3,r11
2289	add	r8,r8,r2
2290	eor	r2,r6,r7
2291	eor	r0,r5,r5,ror#5
2292	add	r9,r9,r3
2293	and	r2,r2,r5
2294	eor	r3,r0,r5,ror#19
2295	eor	r0,r9,r9,ror#11
2296	eor	r2,r2,r7
2297	add	r8,r8,r3,ror#6
2298	eor	r3,r9,r10
2299	eor	r0,r0,r9,ror#20
2300	add	r8,r8,r2
2301	ldr	r2,[sp,#16]
2302	and	r12,r12,r3
2303	add	r4,r4,r8
2304	add	r8,r8,r0,ror#2
2305	eor	r12,r12,r10
2306	vst1.32	{q8},[r1,:128]!
2307	add	r7,r7,r2
2308	eor	r2,r5,r6
2309	eor	r0,r4,r4,ror#5
2310	add	r8,r8,r12
2311	vld1.32	{q8},[r14,:128]!
2312	and	r2,r2,r4
2313	eor	r12,r0,r4,ror#19
2314	eor	r0,r8,r8,ror#11
2315	eor	r2,r2,r6
2316	vrev32.8	q1,q1
2317	add	r7,r7,r12,ror#6
2318	eor	r12,r8,r9
2319	eor	r0,r0,r8,ror#20
2320	add	r7,r7,r2
2321	vadd.i32	q8,q8,q1
2322	ldr	r2,[sp,#20]
2323	and	r3,r3,r12
2324	add	r11,r11,r7
2325	add	r7,r7,r0,ror#2
2326	eor	r3,r3,r9
2327	add	r6,r6,r2
2328	eor	r2,r4,r5
2329	eor	r0,r11,r11,ror#5
2330	add	r7,r7,r3
2331	and	r2,r2,r11
2332	eor	r3,r0,r11,ror#19
2333	eor	r0,r7,r7,ror#11
2334	eor	r2,r2,r5
2335	add	r6,r6,r3,ror#6
2336	eor	r3,r7,r8
2337	eor	r0,r0,r7,ror#20
2338	add	r6,r6,r2
2339	ldr	r2,[sp,#24]
2340	and	r12,r12,r3
2341	add	r10,r10,r6
2342	add	r6,r6,r0,ror#2
2343	eor	r12,r12,r8
2344	add	r5,r5,r2
2345	eor	r2,r11,r4
2346	eor	r0,r10,r10,ror#5
2347	add	r6,r6,r12
2348	and	r2,r2,r10
2349	eor	r12,r0,r10,ror#19
2350	eor	r0,r6,r6,ror#11
2351	eor	r2,r2,r4
2352	add	r5,r5,r12,ror#6
2353	eor	r12,r6,r7
2354	eor	r0,r0,r6,ror#20
2355	add	r5,r5,r2
2356	ldr	r2,[sp,#28]
2357	and	r3,r3,r12
2358	add	r9,r9,r5
2359	add	r5,r5,r0,ror#2
2360	eor	r3,r3,r7
2361	add	r4,r4,r2
2362	eor	r2,r10,r11
2363	eor	r0,r9,r9,ror#5
2364	add	r5,r5,r3
2365	and	r2,r2,r9
2366	eor	r3,r0,r9,ror#19
2367	eor	r0,r5,r5,ror#11
2368	eor	r2,r2,r11
2369	add	r4,r4,r3,ror#6
2370	eor	r3,r5,r6
2371	eor	r0,r0,r5,ror#20
2372	add	r4,r4,r2
2373	ldr	r2,[sp,#32]
2374	and	r12,r12,r3
2375	add	r8,r8,r4
2376	add	r4,r4,r0,ror#2
2377	eor	r12,r12,r6
2378	vst1.32	{q8},[r1,:128]!
2379	add	r11,r11,r2
2380	eor	r2,r9,r10
2381	eor	r0,r8,r8,ror#5
2382	add	r4,r4,r12
2383	vld1.32	{q8},[r14,:128]!
2384	and	r2,r2,r8
2385	eor	r12,r0,r8,ror#19
2386	eor	r0,r4,r4,ror#11
2387	eor	r2,r2,r10
2388	vrev32.8	q2,q2
2389	add	r11,r11,r12,ror#6
2390	eor	r12,r4,r5
2391	eor	r0,r0,r4,ror#20
2392	add	r11,r11,r2
2393	vadd.i32	q8,q8,q2
2394	ldr	r2,[sp,#36]
2395	and	r3,r3,r12
2396	add	r7,r7,r11
2397	add	r11,r11,r0,ror#2
2398	eor	r3,r3,r5
2399	add	r10,r10,r2
2400	eor	r2,r8,r9
2401	eor	r0,r7,r7,ror#5
2402	add	r11,r11,r3
2403	and	r2,r2,r7
2404	eor	r3,r0,r7,ror#19
2405	eor	r0,r11,r11,ror#11
2406	eor	r2,r2,r9
2407	add	r10,r10,r3,ror#6
2408	eor	r3,r11,r4
2409	eor	r0,r0,r11,ror#20
2410	add	r10,r10,r2
2411	ldr	r2,[sp,#40]
2412	and	r12,r12,r3
2413	add	r6,r6,r10
2414	add	r10,r10,r0,ror#2
2415	eor	r12,r12,r4
2416	add	r9,r9,r2
2417	eor	r2,r7,r8
2418	eor	r0,r6,r6,ror#5
2419	add	r10,r10,r12
2420	and	r2,r2,r6
2421	eor	r12,r0,r6,ror#19
2422	eor	r0,r10,r10,ror#11
2423	eor	r2,r2,r8
2424	add	r9,r9,r12,ror#6
2425	eor	r12,r10,r11
2426	eor	r0,r0,r10,ror#20
2427	add	r9,r9,r2
2428	ldr	r2,[sp,#44]
2429	and	r3,r3,r12
2430	add	r5,r5,r9
2431	add	r9,r9,r0,ror#2
2432	eor	r3,r3,r11
2433	add	r8,r8,r2
2434	eor	r2,r6,r7
2435	eor	r0,r5,r5,ror#5
2436	add	r9,r9,r3
2437	and	r2,r2,r5
2438	eor	r3,r0,r5,ror#19
2439	eor	r0,r9,r9,ror#11
2440	eor	r2,r2,r7
2441	add	r8,r8,r3,ror#6
2442	eor	r3,r9,r10
2443	eor	r0,r0,r9,ror#20
2444	add	r8,r8,r2
2445	ldr	r2,[sp,#48]
2446	and	r12,r12,r3
2447	add	r4,r4,r8
2448	add	r8,r8,r0,ror#2
2449	eor	r12,r12,r10
2450	vst1.32	{q8},[r1,:128]!
2451	add	r7,r7,r2
2452	eor	r2,r5,r6
2453	eor	r0,r4,r4,ror#5
2454	add	r8,r8,r12
2455	vld1.32	{q8},[r14,:128]!
2456	and	r2,r2,r4
2457	eor	r12,r0,r4,ror#19
2458	eor	r0,r8,r8,ror#11
2459	eor	r2,r2,r6
2460	vrev32.8	q3,q3
2461	add	r7,r7,r12,ror#6
2462	eor	r12,r8,r9
2463	eor	r0,r0,r8,ror#20
2464	add	r7,r7,r2
2465	vadd.i32	q8,q8,q3
2466	ldr	r2,[sp,#52]
2467	and	r3,r3,r12
2468	add	r11,r11,r7
2469	add	r7,r7,r0,ror#2
2470	eor	r3,r3,r9
2471	add	r6,r6,r2
2472	eor	r2,r4,r5
2473	eor	r0,r11,r11,ror#5
2474	add	r7,r7,r3
2475	and	r2,r2,r11
2476	eor	r3,r0,r11,ror#19
2477	eor	r0,r7,r7,ror#11
2478	eor	r2,r2,r5
2479	add	r6,r6,r3,ror#6
2480	eor	r3,r7,r8
2481	eor	r0,r0,r7,ror#20
2482	add	r6,r6,r2
2483	ldr	r2,[sp,#56]
2484	and	r12,r12,r3
2485	add	r10,r10,r6
2486	add	r6,r6,r0,ror#2
2487	eor	r12,r12,r8
2488	add	r5,r5,r2
2489	eor	r2,r11,r4
2490	eor	r0,r10,r10,ror#5
2491	add	r6,r6,r12
2492	and	r2,r2,r10
2493	eor	r12,r0,r10,ror#19
2494	eor	r0,r6,r6,ror#11
2495	eor	r2,r2,r4
2496	add	r5,r5,r12,ror#6
2497	eor	r12,r6,r7
2498	eor	r0,r0,r6,ror#20
2499	add	r5,r5,r2
2500	ldr	r2,[sp,#60]
2501	and	r3,r3,r12
2502	add	r9,r9,r5
2503	add	r5,r5,r0,ror#2
2504	eor	r3,r3,r7
2505	add	r4,r4,r2
2506	eor	r2,r10,r11
2507	eor	r0,r9,r9,ror#5
2508	add	r5,r5,r3
2509	and	r2,r2,r9
2510	eor	r3,r0,r9,ror#19
2511	eor	r0,r5,r5,ror#11
2512	eor	r2,r2,r11
2513	add	r4,r4,r3,ror#6
2514	eor	r3,r5,r6
2515	eor	r0,r0,r5,ror#20
2516	add	r4,r4,r2
2517	ldr	r2,[sp,#64]
2518	and	r12,r12,r3
2519	add	r8,r8,r4
2520	add	r4,r4,r0,ror#2
2521	eor	r12,r12,r6
2522	vst1.32	{q8},[r1,:128]!
2523	ldr	r0,[r2,#0]
2524	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2525	ldr	r12,[r2,#4]
2526	ldr	r3,[r2,#8]
2527	ldr	r1,[r2,#12]
2528	add	r4,r4,r0			@ accumulate
2529	ldr	r0,[r2,#16]
2530	add	r5,r5,r12
2531	ldr	r12,[r2,#20]
2532	add	r6,r6,r3
2533	ldr	r3,[r2,#24]
2534	add	r7,r7,r1
2535	ldr	r1,[r2,#28]
2536	add	r8,r8,r0
2537	str	r4,[r2],#4
2538	add	r9,r9,r12
2539	str	r5,[r2],#4
2540	add	r10,r10,r3
2541	str	r6,[r2],#4
2542	add	r11,r11,r1
2543	str	r7,[r2],#4
2544	stmia	r2,{r8-r11}
2545
2546	movne	r1,sp
2547	ldrne	r2,[sp,#0]
2548	eorne	r12,r12,r12
2549	ldreq	sp,[sp,#76]			@ restore original sp
2550	eorne	r3,r5,r6
2551	bne	.L_00_48
2552
2553	ldmia	sp!,{r4-r12,pc}
2554.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2555#endif
2556#if __ARM_MAX_ARCH__>=7
2557.type	sha256_block_data_order_armv8,%function
2558.align	5
2559sha256_block_data_order_armv8:
2560.LARMv8:
2561	vld1.32	{q0,q1},[r0]
2562	sub	r3,r3,#sha256_block_data_order-K256
2563
2564.Loop_v8:
2565	vld1.8		{q8-q9},[r1]!
2566	vld1.8		{q10-q11},[r1]!
2567	vld1.32		{q12},[r3]!
2568	vrev32.8	q8,q8
2569	vrev32.8	q9,q9
2570	vrev32.8	q10,q10
2571	vrev32.8	q11,q11
2572	vmov		q14,q0	@ offload
2573	vmov		q15,q1
2574	teq		r1,r2
2575	vld1.32		{q13},[r3]!
2576	vadd.i32	q12,q12,q8
2577	.inst	0xf3fa03e2	@ sha256su0 q8,q9
2578	vmov		q2,q0
2579	.inst	0xf3020c68	@ sha256h q0,q1,q12
2580	.inst	0xf3142c68	@ sha256h2 q1,q2,q12
2581	.inst	0xf3640ce6	@ sha256su1 q8,q10,q11
2582	vld1.32		{q12},[r3]!
2583	vadd.i32	q13,q13,q9
2584	.inst	0xf3fa23e4	@ sha256su0 q9,q10
2585	vmov		q2,q0
2586	.inst	0xf3020c6a	@ sha256h q0,q1,q13
2587	.inst	0xf3142c6a	@ sha256h2 q1,q2,q13
2588	.inst	0xf3662ce0	@ sha256su1 q9,q11,q8
2589	vld1.32		{q13},[r3]!
2590	vadd.i32	q12,q12,q10
2591	.inst	0xf3fa43e6	@ sha256su0 q10,q11
2592	vmov		q2,q0
2593	.inst	0xf3020c68	@ sha256h q0,q1,q12
2594	.inst	0xf3142c68	@ sha256h2 q1,q2,q12
2595	.inst	0xf3604ce2	@ sha256su1 q10,q8,q9
2596	vld1.32		{q12},[r3]!
2597	vadd.i32	q13,q13,q11
2598	.inst	0xf3fa63e0	@ sha256su0 q11,q8
2599	vmov		q2,q0
2600	.inst	0xf3020c6a	@ sha256h q0,q1,q13
2601	.inst	0xf3142c6a	@ sha256h2 q1,q2,q13
2602	.inst	0xf3626ce4	@ sha256su1 q11,q9,q10
2603	vld1.32		{q13},[r3]!
2604	vadd.i32	q12,q12,q8
2605	.inst	0xf3fa03e2	@ sha256su0 q8,q9
2606	vmov		q2,q0
2607	.inst	0xf3020c68	@ sha256h q0,q1,q12
2608	.inst	0xf3142c68	@ sha256h2 q1,q2,q12
2609	.inst	0xf3640ce6	@ sha256su1 q8,q10,q11
2610	vld1.32		{q12},[r3]!
2611	vadd.i32	q13,q13,q9
2612	.inst	0xf3fa23e4	@ sha256su0 q9,q10
2613	vmov		q2,q0
2614	.inst	0xf3020c6a	@ sha256h q0,q1,q13
2615	.inst	0xf3142c6a	@ sha256h2 q1,q2,q13
2616	.inst	0xf3662ce0	@ sha256su1 q9,q11,q8
2617	vld1.32		{q13},[r3]!
2618	vadd.i32	q12,q12,q10
2619	.inst	0xf3fa43e6	@ sha256su0 q10,q11
2620	vmov		q2,q0
2621	.inst	0xf3020c68	@ sha256h q0,q1,q12
2622	.inst	0xf3142c68	@ sha256h2 q1,q2,q12
2623	.inst	0xf3604ce2	@ sha256su1 q10,q8,q9
2624	vld1.32		{q12},[r3]!
2625	vadd.i32	q13,q13,q11
2626	.inst	0xf3fa63e0	@ sha256su0 q11,q8
2627	vmov		q2,q0
2628	.inst	0xf3020c6a	@ sha256h q0,q1,q13
2629	.inst	0xf3142c6a	@ sha256h2 q1,q2,q13
2630	.inst	0xf3626ce4	@ sha256su1 q11,q9,q10
2631	vld1.32		{q13},[r3]!
2632	vadd.i32	q12,q12,q8
2633	.inst	0xf3fa03e2	@ sha256su0 q8,q9
2634	vmov		q2,q0
2635	.inst	0xf3020c68	@ sha256h q0,q1,q12
2636	.inst	0xf3142c68	@ sha256h2 q1,q2,q12
2637	.inst	0xf3640ce6	@ sha256su1 q8,q10,q11
2638	vld1.32		{q12},[r3]!
2639	vadd.i32	q13,q13,q9
2640	.inst	0xf3fa23e4	@ sha256su0 q9,q10
2641	vmov		q2,q0
2642	.inst	0xf3020c6a	@ sha256h q0,q1,q13
2643	.inst	0xf3142c6a	@ sha256h2 q1,q2,q13
2644	.inst	0xf3662ce0	@ sha256su1 q9,q11,q8
2645	vld1.32		{q13},[r3]!
2646	vadd.i32	q12,q12,q10
2647	.inst	0xf3fa43e6	@ sha256su0 q10,q11
2648	vmov		q2,q0
2649	.inst	0xf3020c68	@ sha256h q0,q1,q12
2650	.inst	0xf3142c68	@ sha256h2 q1,q2,q12
2651	.inst	0xf3604ce2	@ sha256su1 q10,q8,q9
2652	vld1.32		{q12},[r3]!
2653	vadd.i32	q13,q13,q11
2654	.inst	0xf3fa63e0	@ sha256su0 q11,q8
2655	vmov		q2,q0
2656	.inst	0xf3020c6a	@ sha256h q0,q1,q13
2657	.inst	0xf3142c6a	@ sha256h2 q1,q2,q13
2658	.inst	0xf3626ce4	@ sha256su1 q11,q9,q10
2659	vld1.32		{q13},[r3]!
2660	vadd.i32	q12,q12,q8
2661	vmov		q2,q0
2662	.inst	0xf3020c68	@ sha256h q0,q1,q12
2663	.inst	0xf3142c68	@ sha256h2 q1,q2,q12
2664
2665	vld1.32		{q12},[r3]!
2666	vadd.i32	q13,q13,q9
2667	vmov		q2,q0
2668	.inst	0xf3020c6a	@ sha256h q0,q1,q13
2669	.inst	0xf3142c6a	@ sha256h2 q1,q2,q13
2670
2671	vld1.32		{q13},[r3]
2672	vadd.i32	q12,q12,q10
2673	sub		r3,r3,#256-16	@ rewind
2674	vmov		q2,q0
2675	.inst	0xf3020c68	@ sha256h q0,q1,q12
2676	.inst	0xf3142c68	@ sha256h2 q1,q2,q12
2677
2678	vadd.i32	q13,q13,q11
2679	vmov		q2,q0
2680	.inst	0xf3020c6a	@ sha256h q0,q1,q13
2681	.inst	0xf3142c6a	@ sha256h2 q1,q2,q13
2682
2683	vadd.i32	q0,q0,q14
2684	vadd.i32	q1,q1,q15
2685	bne		.Loop_v8
2686
2687	vst1.32		{q0,q1},[r0]
2688
2689	RET		@ bx lr
2690.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2691#endif
2692.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2693.align	2
2694#if __ARM_MAX_ARCH__>=7
2695.comm   OPENSSL_armcap_P,4,4
2696#endif
2697