xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/sha512-mips.S (revision 16dce51364ebe8aeafbae46bc5aa167b8115bc45)
1#ifdef OPENSSL_FIPSCANISTER
2# include <openssl/fipssyms.h>
3#endif
4
5#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
6#define _MIPS_ARCH_MIPS32R2
7#endif
8
9.text
10.set	noat
11#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
12.option	pic2
13#endif
14
15.align	5
16.globl	sha256_block_data_order
17.ent	sha256_block_data_order
18sha256_block_data_order:
19	.frame	$29,128,$31
20	.mask	0xc0ff0000,-4
21	.set	noreorder
22	.cpload	$25
23	sub $29,128
24	sw	$31,128-1*4($29)
25	sw	$30,128-2*4($29)
26	sw	$23,128-3*4($29)
27	sw	$22,128-4*4($29)
28	sw	$21,128-5*4($29)
29	sw	$20,128-6*4($29)
30	sw	$19,128-7*4($29)
31	sw	$18,128-8*4($29)
32	sw	$17,128-9*4($29)
33	sw	$16,128-10*4($29)
34	sll $23,$6,6
35	.set	reorder
36	la	$6,K256		# PIC-ified 'load address'
37
38	lw	$1,0*4($4)		# load context
39	lw	$2,1*4($4)
40	lw	$3,2*4($4)
41	lw	$7,3*4($4)
42	lw	$24,4*4($4)
43	lw	$25,5*4($4)
44	lw	$30,6*4($4)
45	lw	$31,7*4($4)
46
47	add $23,$5		# pointer to the end of input
48	sw	$23,16*4($29)
49	b	.Loop
50
51.align	5
52.Loop:
53	lwl	$8,3($5)
54	lwr	$8,0($5)
55	lwl	$9,7($5)
56	lwr	$9,4($5)
57#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
58	wsbh	$8,$8		# byte swap(0)
59	rotr	$8,$8,16
60#else
61	srl	$13,$8,24		# byte swap(0)
62	srl	$14,$8,8
63	andi	$15,$8,0xFF00
64	sll	$8,$8,24
65	andi	$14,0xFF00
66	sll	$15,$15,8
67	or	$8,$13
68	or	$14,$15
69	or	$8,$14
70#endif
71#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
72	xor	$15,$25,$30			# 0
73	rotr	$13,$24,6
74	addu	$12,$8,$31
75	rotr	$14,$24,11
76	and	$15,$24
77	rotr	$31,$24,25
78	xor	$13,$14
79	rotr	$14,$1,2
80	xor	$15,$30			# Ch(e,f,g)
81	xor	$13,$31			# Sigma1(e)
82
83	rotr	$31,$1,13
84	addu	$12,$15
85	lw	$15,0($6)		# K[0]
86	xor	$31,$14
87	rotr	$14,$1,22
88	addu	$12,$13
89	and	$13,$2,$3
90	xor	$31,$14			# Sigma0(a)
91	xor	$14,$2,$3
92#else
93	addu	$12,$8,$31			# 0
94	srl	$31,$24,6
95	xor	$15,$25,$30
96	sll	$14,$24,7
97	and	$15,$24
98	srl	$13,$24,11
99	xor	$31,$14
100	sll	$14,$24,21
101	xor	$31,$13
102	srl	$13,$24,25
103	xor	$31,$14
104	sll	$14,$24,26
105	xor	$31,$13
106	xor	$15,$30			# Ch(e,f,g)
107	xor	$13,$14,$31			# Sigma1(e)
108
109	srl	$31,$1,2
110	addu	$12,$15
111	lw	$15,0($6)		# K[0]
112	sll	$14,$1,10
113	addu	$12,$13
114	srl	$13,$1,13
115	xor	$31,$14
116	sll	$14,$1,19
117	xor	$31,$13
118	srl	$13,$1,22
119	xor	$31,$14
120	sll	$14,$1,30
121	xor	$31,$13
122	and	$13,$2,$3
123	xor	$31,$14			# Sigma0(a)
124	xor	$14,$2,$3
125#endif
126	sw	$8,0($29)	# offload to ring buffer
127	addu	$31,$13
128	and	$14,$1
129	addu	$12,$15			# +=K[0]
130	addu	$31,$14			# +=Maj(a,b,c)
131	addu	$7,$12
132	addu	$31,$12
133	lwl	$10,11($5)
134	lwr	$10,8($5)
135#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
136	wsbh	$9,$9		# byte swap(1)
137	rotr	$9,$9,16
138#else
139	srl	$14,$9,24		# byte swap(1)
140	srl	$15,$9,8
141	andi	$16,$9,0xFF00
142	sll	$9,$9,24
143	andi	$15,0xFF00
144	sll	$16,$16,8
145	or	$9,$14
146	or	$15,$16
147	or	$9,$15
148#endif
149#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
150	xor	$16,$24,$25			# 1
151	rotr	$14,$7,6
152	addu	$13,$9,$30
153	rotr	$15,$7,11
154	and	$16,$7
155	rotr	$30,$7,25
156	xor	$14,$15
157	rotr	$15,$31,2
158	xor	$16,$25			# Ch(e,f,g)
159	xor	$14,$30			# Sigma1(e)
160
161	rotr	$30,$31,13
162	addu	$13,$16
163	lw	$16,4($6)		# K[1]
164	xor	$30,$15
165	rotr	$15,$31,22
166	addu	$13,$14
167	and	$14,$1,$2
168	xor	$30,$15			# Sigma0(a)
169	xor	$15,$1,$2
170#else
171	addu	$13,$9,$30			# 1
172	srl	$30,$7,6
173	xor	$16,$24,$25
174	sll	$15,$7,7
175	and	$16,$7
176	srl	$14,$7,11
177	xor	$30,$15
178	sll	$15,$7,21
179	xor	$30,$14
180	srl	$14,$7,25
181	xor	$30,$15
182	sll	$15,$7,26
183	xor	$30,$14
184	xor	$16,$25			# Ch(e,f,g)
185	xor	$14,$15,$30			# Sigma1(e)
186
187	srl	$30,$31,2
188	addu	$13,$16
189	lw	$16,4($6)		# K[1]
190	sll	$15,$31,10
191	addu	$13,$14
192	srl	$14,$31,13
193	xor	$30,$15
194	sll	$15,$31,19
195	xor	$30,$14
196	srl	$14,$31,22
197	xor	$30,$15
198	sll	$15,$31,30
199	xor	$30,$14
200	and	$14,$1,$2
201	xor	$30,$15			# Sigma0(a)
202	xor	$15,$1,$2
203#endif
204	sw	$9,4($29)	# offload to ring buffer
205	addu	$30,$14
206	and	$15,$31
207	addu	$13,$16			# +=K[1]
208	addu	$30,$15			# +=Maj(a,b,c)
209	addu	$3,$13
210	addu	$30,$13
211	lwl	$11,15($5)
212	lwr	$11,12($5)
213#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
214	wsbh	$10,$10		# byte swap(2)
215	rotr	$10,$10,16
216#else
217	srl	$15,$10,24		# byte swap(2)
218	srl	$16,$10,8
219	andi	$17,$10,0xFF00
220	sll	$10,$10,24
221	andi	$16,0xFF00
222	sll	$17,$17,8
223	or	$10,$15
224	or	$16,$17
225	or	$10,$16
226#endif
227#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
228	xor	$17,$7,$24			# 2
229	rotr	$15,$3,6
230	addu	$14,$10,$25
231	rotr	$16,$3,11
232	and	$17,$3
233	rotr	$25,$3,25
234	xor	$15,$16
235	rotr	$16,$30,2
236	xor	$17,$24			# Ch(e,f,g)
237	xor	$15,$25			# Sigma1(e)
238
239	rotr	$25,$30,13
240	addu	$14,$17
241	lw	$17,8($6)		# K[2]
242	xor	$25,$16
243	rotr	$16,$30,22
244	addu	$14,$15
245	and	$15,$31,$1
246	xor	$25,$16			# Sigma0(a)
247	xor	$16,$31,$1
248#else
249	addu	$14,$10,$25			# 2
250	srl	$25,$3,6
251	xor	$17,$7,$24
252	sll	$16,$3,7
253	and	$17,$3
254	srl	$15,$3,11
255	xor	$25,$16
256	sll	$16,$3,21
257	xor	$25,$15
258	srl	$15,$3,25
259	xor	$25,$16
260	sll	$16,$3,26
261	xor	$25,$15
262	xor	$17,$24			# Ch(e,f,g)
263	xor	$15,$16,$25			# Sigma1(e)
264
265	srl	$25,$30,2
266	addu	$14,$17
267	lw	$17,8($6)		# K[2]
268	sll	$16,$30,10
269	addu	$14,$15
270	srl	$15,$30,13
271	xor	$25,$16
272	sll	$16,$30,19
273	xor	$25,$15
274	srl	$15,$30,22
275	xor	$25,$16
276	sll	$16,$30,30
277	xor	$25,$15
278	and	$15,$31,$1
279	xor	$25,$16			# Sigma0(a)
280	xor	$16,$31,$1
281#endif
282	sw	$10,8($29)	# offload to ring buffer
283	addu	$25,$15
284	and	$16,$30
285	addu	$14,$17			# +=K[2]
286	addu	$25,$16			# +=Maj(a,b,c)
287	addu	$2,$14
288	addu	$25,$14
289	lwl	$12,19($5)
290	lwr	$12,16($5)
291#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
292	wsbh	$11,$11		# byte swap(3)
293	rotr	$11,$11,16
294#else
295	srl	$16,$11,24		# byte swap(3)
296	srl	$17,$11,8
297	andi	$18,$11,0xFF00
298	sll	$11,$11,24
299	andi	$17,0xFF00
300	sll	$18,$18,8
301	or	$11,$16
302	or	$17,$18
303	or	$11,$17
304#endif
305#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
306	xor	$18,$3,$7			# 3
307	rotr	$16,$2,6
308	addu	$15,$11,$24
309	rotr	$17,$2,11
310	and	$18,$2
311	rotr	$24,$2,25
312	xor	$16,$17
313	rotr	$17,$25,2
314	xor	$18,$7			# Ch(e,f,g)
315	xor	$16,$24			# Sigma1(e)
316
317	rotr	$24,$25,13
318	addu	$15,$18
319	lw	$18,12($6)		# K[3]
320	xor	$24,$17
321	rotr	$17,$25,22
322	addu	$15,$16
323	and	$16,$30,$31
324	xor	$24,$17			# Sigma0(a)
325	xor	$17,$30,$31
326#else
327	addu	$15,$11,$24			# 3
328	srl	$24,$2,6
329	xor	$18,$3,$7
330	sll	$17,$2,7
331	and	$18,$2
332	srl	$16,$2,11
333	xor	$24,$17
334	sll	$17,$2,21
335	xor	$24,$16
336	srl	$16,$2,25
337	xor	$24,$17
338	sll	$17,$2,26
339	xor	$24,$16
340	xor	$18,$7			# Ch(e,f,g)
341	xor	$16,$17,$24			# Sigma1(e)
342
343	srl	$24,$25,2
344	addu	$15,$18
345	lw	$18,12($6)		# K[3]
346	sll	$17,$25,10
347	addu	$15,$16
348	srl	$16,$25,13
349	xor	$24,$17
350	sll	$17,$25,19
351	xor	$24,$16
352	srl	$16,$25,22
353	xor	$24,$17
354	sll	$17,$25,30
355	xor	$24,$16
356	and	$16,$30,$31
357	xor	$24,$17			# Sigma0(a)
358	xor	$17,$30,$31
359#endif
360	sw	$11,12($29)	# offload to ring buffer
361	addu	$24,$16
362	and	$17,$25
363	addu	$15,$18			# +=K[3]
364	addu	$24,$17			# +=Maj(a,b,c)
365	addu	$1,$15
366	addu	$24,$15
367	lwl	$13,23($5)
368	lwr	$13,20($5)
369#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
370	wsbh	$12,$12		# byte swap(4)
371	rotr	$12,$12,16
372#else
373	srl	$17,$12,24		# byte swap(4)
374	srl	$18,$12,8
375	andi	$19,$12,0xFF00
376	sll	$12,$12,24
377	andi	$18,0xFF00
378	sll	$19,$19,8
379	or	$12,$17
380	or	$18,$19
381	or	$12,$18
382#endif
383#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
384	xor	$19,$2,$3			# 4
385	rotr	$17,$1,6
386	addu	$16,$12,$7
387	rotr	$18,$1,11
388	and	$19,$1
389	rotr	$7,$1,25
390	xor	$17,$18
391	rotr	$18,$24,2
392	xor	$19,$3			# Ch(e,f,g)
393	xor	$17,$7			# Sigma1(e)
394
395	rotr	$7,$24,13
396	addu	$16,$19
397	lw	$19,16($6)		# K[4]
398	xor	$7,$18
399	rotr	$18,$24,22
400	addu	$16,$17
401	and	$17,$25,$30
402	xor	$7,$18			# Sigma0(a)
403	xor	$18,$25,$30
404#else
405	addu	$16,$12,$7			# 4
406	srl	$7,$1,6
407	xor	$19,$2,$3
408	sll	$18,$1,7
409	and	$19,$1
410	srl	$17,$1,11
411	xor	$7,$18
412	sll	$18,$1,21
413	xor	$7,$17
414	srl	$17,$1,25
415	xor	$7,$18
416	sll	$18,$1,26
417	xor	$7,$17
418	xor	$19,$3			# Ch(e,f,g)
419	xor	$17,$18,$7			# Sigma1(e)
420
421	srl	$7,$24,2
422	addu	$16,$19
423	lw	$19,16($6)		# K[4]
424	sll	$18,$24,10
425	addu	$16,$17
426	srl	$17,$24,13
427	xor	$7,$18
428	sll	$18,$24,19
429	xor	$7,$17
430	srl	$17,$24,22
431	xor	$7,$18
432	sll	$18,$24,30
433	xor	$7,$17
434	and	$17,$25,$30
435	xor	$7,$18			# Sigma0(a)
436	xor	$18,$25,$30
437#endif
438	sw	$12,16($29)	# offload to ring buffer
439	addu	$7,$17
440	and	$18,$24
441	addu	$16,$19			# +=K[4]
442	addu	$7,$18			# +=Maj(a,b,c)
443	addu	$31,$16
444	addu	$7,$16
445	lwl	$14,27($5)
446	lwr	$14,24($5)
447#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
448	wsbh	$13,$13		# byte swap(5)
449	rotr	$13,$13,16
450#else
451	srl	$18,$13,24		# byte swap(5)
452	srl	$19,$13,8
453	andi	$20,$13,0xFF00
454	sll	$13,$13,24
455	andi	$19,0xFF00
456	sll	$20,$20,8
457	or	$13,$18
458	or	$19,$20
459	or	$13,$19
460#endif
461#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
462	xor	$20,$1,$2			# 5
463	rotr	$18,$31,6
464	addu	$17,$13,$3
465	rotr	$19,$31,11
466	and	$20,$31
467	rotr	$3,$31,25
468	xor	$18,$19
469	rotr	$19,$7,2
470	xor	$20,$2			# Ch(e,f,g)
471	xor	$18,$3			# Sigma1(e)
472
473	rotr	$3,$7,13
474	addu	$17,$20
475	lw	$20,20($6)		# K[5]
476	xor	$3,$19
477	rotr	$19,$7,22
478	addu	$17,$18
479	and	$18,$24,$25
480	xor	$3,$19			# Sigma0(a)
481	xor	$19,$24,$25
482#else
483	addu	$17,$13,$3			# 5
484	srl	$3,$31,6
485	xor	$20,$1,$2
486	sll	$19,$31,7
487	and	$20,$31
488	srl	$18,$31,11
489	xor	$3,$19
490	sll	$19,$31,21
491	xor	$3,$18
492	srl	$18,$31,25
493	xor	$3,$19
494	sll	$19,$31,26
495	xor	$3,$18
496	xor	$20,$2			# Ch(e,f,g)
497	xor	$18,$19,$3			# Sigma1(e)
498
499	srl	$3,$7,2
500	addu	$17,$20
501	lw	$20,20($6)		# K[5]
502	sll	$19,$7,10
503	addu	$17,$18
504	srl	$18,$7,13
505	xor	$3,$19
506	sll	$19,$7,19
507	xor	$3,$18
508	srl	$18,$7,22
509	xor	$3,$19
510	sll	$19,$7,30
511	xor	$3,$18
512	and	$18,$24,$25
513	xor	$3,$19			# Sigma0(a)
514	xor	$19,$24,$25
515#endif
516	sw	$13,20($29)	# offload to ring buffer
517	addu	$3,$18
518	and	$19,$7
519	addu	$17,$20			# +=K[5]
520	addu	$3,$19			# +=Maj(a,b,c)
521	addu	$30,$17
522	addu	$3,$17
523	lwl	$15,31($5)
524	lwr	$15,28($5)
525#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
526	wsbh	$14,$14		# byte swap(6)
527	rotr	$14,$14,16
528#else
529	srl	$19,$14,24		# byte swap(6)
530	srl	$20,$14,8
531	andi	$21,$14,0xFF00
532	sll	$14,$14,24
533	andi	$20,0xFF00
534	sll	$21,$21,8
535	or	$14,$19
536	or	$20,$21
537	or	$14,$20
538#endif
539#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
540	xor	$21,$31,$1			# 6
541	rotr	$19,$30,6
542	addu	$18,$14,$2
543	rotr	$20,$30,11
544	and	$21,$30
545	rotr	$2,$30,25
546	xor	$19,$20
547	rotr	$20,$3,2
548	xor	$21,$1			# Ch(e,f,g)
549	xor	$19,$2			# Sigma1(e)
550
551	rotr	$2,$3,13
552	addu	$18,$21
553	lw	$21,24($6)		# K[6]
554	xor	$2,$20
555	rotr	$20,$3,22
556	addu	$18,$19
557	and	$19,$7,$24
558	xor	$2,$20			# Sigma0(a)
559	xor	$20,$7,$24
560#else
561	addu	$18,$14,$2			# 6
562	srl	$2,$30,6
563	xor	$21,$31,$1
564	sll	$20,$30,7
565	and	$21,$30
566	srl	$19,$30,11
567	xor	$2,$20
568	sll	$20,$30,21
569	xor	$2,$19
570	srl	$19,$30,25
571	xor	$2,$20
572	sll	$20,$30,26
573	xor	$2,$19
574	xor	$21,$1			# Ch(e,f,g)
575	xor	$19,$20,$2			# Sigma1(e)
576
577	srl	$2,$3,2
578	addu	$18,$21
579	lw	$21,24($6)		# K[6]
580	sll	$20,$3,10
581	addu	$18,$19
582	srl	$19,$3,13
583	xor	$2,$20
584	sll	$20,$3,19
585	xor	$2,$19
586	srl	$19,$3,22
587	xor	$2,$20
588	sll	$20,$3,30
589	xor	$2,$19
590	and	$19,$7,$24
591	xor	$2,$20			# Sigma0(a)
592	xor	$20,$7,$24
593#endif
594	sw	$14,24($29)	# offload to ring buffer
595	addu	$2,$19
596	and	$20,$3
597	addu	$18,$21			# +=K[6]
598	addu	$2,$20			# +=Maj(a,b,c)
599	addu	$25,$18
600	addu	$2,$18
601	lwl	$16,35($5)
602	lwr	$16,32($5)
603#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
604	wsbh	$15,$15		# byte swap(7)
605	rotr	$15,$15,16
606#else
607	srl	$20,$15,24		# byte swap(7)
608	srl	$21,$15,8
609	andi	$22,$15,0xFF00
610	sll	$15,$15,24
611	andi	$21,0xFF00
612	sll	$22,$22,8
613	or	$15,$20
614	or	$21,$22
615	or	$15,$21
616#endif
617#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
618	xor	$22,$30,$31			# 7
619	rotr	$20,$25,6
620	addu	$19,$15,$1
621	rotr	$21,$25,11
622	and	$22,$25
623	rotr	$1,$25,25
624	xor	$20,$21
625	rotr	$21,$2,2
626	xor	$22,$31			# Ch(e,f,g)
627	xor	$20,$1			# Sigma1(e)
628
629	rotr	$1,$2,13
630	addu	$19,$22
631	lw	$22,28($6)		# K[7]
632	xor	$1,$21
633	rotr	$21,$2,22
634	addu	$19,$20
635	and	$20,$3,$7
636	xor	$1,$21			# Sigma0(a)
637	xor	$21,$3,$7
638#else
639	addu	$19,$15,$1			# 7
640	srl	$1,$25,6
641	xor	$22,$30,$31
642	sll	$21,$25,7
643	and	$22,$25
644	srl	$20,$25,11
645	xor	$1,$21
646	sll	$21,$25,21
647	xor	$1,$20
648	srl	$20,$25,25
649	xor	$1,$21
650	sll	$21,$25,26
651	xor	$1,$20
652	xor	$22,$31			# Ch(e,f,g)
653	xor	$20,$21,$1			# Sigma1(e)
654
655	srl	$1,$2,2
656	addu	$19,$22
657	lw	$22,28($6)		# K[7]
658	sll	$21,$2,10
659	addu	$19,$20
660	srl	$20,$2,13
661	xor	$1,$21
662	sll	$21,$2,19
663	xor	$1,$20
664	srl	$20,$2,22
665	xor	$1,$21
666	sll	$21,$2,30
667	xor	$1,$20
668	and	$20,$3,$7
669	xor	$1,$21			# Sigma0(a)
670	xor	$21,$3,$7
671#endif
672	sw	$15,28($29)	# offload to ring buffer
673	addu	$1,$20
674	and	$21,$2
675	addu	$19,$22			# +=K[7]
676	addu	$1,$21			# +=Maj(a,b,c)
677	addu	$24,$19
678	addu	$1,$19
679	lwl	$17,39($5)
680	lwr	$17,36($5)
681#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
682	wsbh	$16,$16		# byte swap(8)
683	rotr	$16,$16,16
684#else
685	srl	$21,$16,24		# byte swap(8)
686	srl	$22,$16,8
687	andi	$23,$16,0xFF00
688	sll	$16,$16,24
689	andi	$22,0xFF00
690	sll	$23,$23,8
691	or	$16,$21
692	or	$22,$23
693	or	$16,$22
694#endif
695#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
696	xor	$23,$25,$30			# 8
697	rotr	$21,$24,6
698	addu	$20,$16,$31
699	rotr	$22,$24,11
700	and	$23,$24
701	rotr	$31,$24,25
702	xor	$21,$22
703	rotr	$22,$1,2
704	xor	$23,$30			# Ch(e,f,g)
705	xor	$21,$31			# Sigma1(e)
706
707	rotr	$31,$1,13
708	addu	$20,$23
709	lw	$23,32($6)		# K[8]
710	xor	$31,$22
711	rotr	$22,$1,22
712	addu	$20,$21
713	and	$21,$2,$3
714	xor	$31,$22			# Sigma0(a)
715	xor	$22,$2,$3
716#else
717	addu	$20,$16,$31			# 8
718	srl	$31,$24,6
719	xor	$23,$25,$30
720	sll	$22,$24,7
721	and	$23,$24
722	srl	$21,$24,11
723	xor	$31,$22
724	sll	$22,$24,21
725	xor	$31,$21
726	srl	$21,$24,25
727	xor	$31,$22
728	sll	$22,$24,26
729	xor	$31,$21
730	xor	$23,$30			# Ch(e,f,g)
731	xor	$21,$22,$31			# Sigma1(e)
732
733	srl	$31,$1,2
734	addu	$20,$23
735	lw	$23,32($6)		# K[8]
736	sll	$22,$1,10
737	addu	$20,$21
738	srl	$21,$1,13
739	xor	$31,$22
740	sll	$22,$1,19
741	xor	$31,$21
742	srl	$21,$1,22
743	xor	$31,$22
744	sll	$22,$1,30
745	xor	$31,$21
746	and	$21,$2,$3
747	xor	$31,$22			# Sigma0(a)
748	xor	$22,$2,$3
749#endif
750	sw	$16,32($29)	# offload to ring buffer
751	addu	$31,$21
752	and	$22,$1
753	addu	$20,$23			# +=K[8]
754	addu	$31,$22			# +=Maj(a,b,c)
755	addu	$7,$20
756	addu	$31,$20
757	lwl	$18,43($5)
758	lwr	$18,40($5)
759#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
760	wsbh	$17,$17		# byte swap(9)
761	rotr	$17,$17,16
762#else
763	srl	$22,$17,24		# byte swap(9)
764	srl	$23,$17,8
765	andi	$8,$17,0xFF00
766	sll	$17,$17,24
767	andi	$23,0xFF00
768	sll	$8,$8,8
769	or	$17,$22
770	or	$23,$8
771	or	$17,$23
772#endif
773#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
774	xor	$8,$24,$25			# 9
775	rotr	$22,$7,6
776	addu	$21,$17,$30
777	rotr	$23,$7,11
778	and	$8,$7
779	rotr	$30,$7,25
780	xor	$22,$23
781	rotr	$23,$31,2
782	xor	$8,$25			# Ch(e,f,g)
783	xor	$22,$30			# Sigma1(e)
784
785	rotr	$30,$31,13
786	addu	$21,$8
787	lw	$8,36($6)		# K[9]
788	xor	$30,$23
789	rotr	$23,$31,22
790	addu	$21,$22
791	and	$22,$1,$2
792	xor	$30,$23			# Sigma0(a)
793	xor	$23,$1,$2
794#else
795	addu	$21,$17,$30			# 9
796	srl	$30,$7,6
797	xor	$8,$24,$25
798	sll	$23,$7,7
799	and	$8,$7
800	srl	$22,$7,11
801	xor	$30,$23
802	sll	$23,$7,21
803	xor	$30,$22
804	srl	$22,$7,25
805	xor	$30,$23
806	sll	$23,$7,26
807	xor	$30,$22
808	xor	$8,$25			# Ch(e,f,g)
809	xor	$22,$23,$30			# Sigma1(e)
810
811	srl	$30,$31,2
812	addu	$21,$8
813	lw	$8,36($6)		# K[9]
814	sll	$23,$31,10
815	addu	$21,$22
816	srl	$22,$31,13
817	xor	$30,$23
818	sll	$23,$31,19
819	xor	$30,$22
820	srl	$22,$31,22
821	xor	$30,$23
822	sll	$23,$31,30
823	xor	$30,$22
824	and	$22,$1,$2
825	xor	$30,$23			# Sigma0(a)
826	xor	$23,$1,$2
827#endif
828	sw	$17,36($29)	# offload to ring buffer
829	addu	$30,$22
830	and	$23,$31
831	addu	$21,$8			# +=K[9]
832	addu	$30,$23			# +=Maj(a,b,c)
833	addu	$3,$21
834	addu	$30,$21
835	lwl	$19,47($5)
836	lwr	$19,44($5)
837#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
838	wsbh	$18,$18		# byte swap(10)
839	rotr	$18,$18,16
840#else
841	srl	$23,$18,24		# byte swap(10)
842	srl	$8,$18,8
843	andi	$9,$18,0xFF00
844	sll	$18,$18,24
845	andi	$8,0xFF00
846	sll	$9,$9,8
847	or	$18,$23
848	or	$8,$9
849	or	$18,$8
850#endif
851#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
852	xor	$9,$7,$24			# 10
853	rotr	$23,$3,6
854	addu	$22,$18,$25
855	rotr	$8,$3,11
856	and	$9,$3
857	rotr	$25,$3,25
858	xor	$23,$8
859	rotr	$8,$30,2
860	xor	$9,$24			# Ch(e,f,g)
861	xor	$23,$25			# Sigma1(e)
862
863	rotr	$25,$30,13
864	addu	$22,$9
865	lw	$9,40($6)		# K[10]
866	xor	$25,$8
867	rotr	$8,$30,22
868	addu	$22,$23
869	and	$23,$31,$1
870	xor	$25,$8			# Sigma0(a)
871	xor	$8,$31,$1
872#else
873	addu	$22,$18,$25			# 10
874	srl	$25,$3,6
875	xor	$9,$7,$24
876	sll	$8,$3,7
877	and	$9,$3
878	srl	$23,$3,11
879	xor	$25,$8
880	sll	$8,$3,21
881	xor	$25,$23
882	srl	$23,$3,25
883	xor	$25,$8
884	sll	$8,$3,26
885	xor	$25,$23
886	xor	$9,$24			# Ch(e,f,g)
887	xor	$23,$8,$25			# Sigma1(e)
888
889	srl	$25,$30,2
890	addu	$22,$9
891	lw	$9,40($6)		# K[10]
892	sll	$8,$30,10
893	addu	$22,$23
894	srl	$23,$30,13
895	xor	$25,$8
896	sll	$8,$30,19
897	xor	$25,$23
898	srl	$23,$30,22
899	xor	$25,$8
900	sll	$8,$30,30
901	xor	$25,$23
902	and	$23,$31,$1
903	xor	$25,$8			# Sigma0(a)
904	xor	$8,$31,$1
905#endif
906	sw	$18,40($29)	# offload to ring buffer
907	addu	$25,$23
908	and	$8,$30
909	addu	$22,$9			# +=K[10]
910	addu	$25,$8			# +=Maj(a,b,c)
911	addu	$2,$22
912	addu	$25,$22
913	lwl	$20,51($5)
914	lwr	$20,48($5)
915#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
916	wsbh	$19,$19		# byte swap(11)
917	rotr	$19,$19,16
918#else
919	srl	$8,$19,24		# byte swap(11)
920	srl	$9,$19,8
921	andi	$10,$19,0xFF00
922	sll	$19,$19,24
923	andi	$9,0xFF00
924	sll	$10,$10,8
925	or	$19,$8
926	or	$9,$10
927	or	$19,$9
928#endif
929#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
930	xor	$10,$3,$7			# 11
931	rotr	$8,$2,6
932	addu	$23,$19,$24
933	rotr	$9,$2,11
934	and	$10,$2
935	rotr	$24,$2,25
936	xor	$8,$9
937	rotr	$9,$25,2
938	xor	$10,$7			# Ch(e,f,g)
939	xor	$8,$24			# Sigma1(e)
940
941	rotr	$24,$25,13
942	addu	$23,$10
943	lw	$10,44($6)		# K[11]
944	xor	$24,$9
945	rotr	$9,$25,22
946	addu	$23,$8
947	and	$8,$30,$31
948	xor	$24,$9			# Sigma0(a)
949	xor	$9,$30,$31
950#else
951	addu	$23,$19,$24			# 11
952	srl	$24,$2,6
953	xor	$10,$3,$7
954	sll	$9,$2,7
955	and	$10,$2
956	srl	$8,$2,11
957	xor	$24,$9
958	sll	$9,$2,21
959	xor	$24,$8
960	srl	$8,$2,25
961	xor	$24,$9
962	sll	$9,$2,26
963	xor	$24,$8
964	xor	$10,$7			# Ch(e,f,g)
965	xor	$8,$9,$24			# Sigma1(e)
966
967	srl	$24,$25,2
968	addu	$23,$10
969	lw	$10,44($6)		# K[11]
970	sll	$9,$25,10
971	addu	$23,$8
972	srl	$8,$25,13
973	xor	$24,$9
974	sll	$9,$25,19
975	xor	$24,$8
976	srl	$8,$25,22
977	xor	$24,$9
978	sll	$9,$25,30
979	xor	$24,$8
980	and	$8,$30,$31
981	xor	$24,$9			# Sigma0(a)
982	xor	$9,$30,$31
983#endif
984	sw	$19,44($29)	# offload to ring buffer
985	addu	$24,$8
986	and	$9,$25
987	addu	$23,$10			# +=K[11]
988	addu	$24,$9			# +=Maj(a,b,c)
989	addu	$1,$23
990	addu	$24,$23
991	lwl	$21,55($5)
992	lwr	$21,52($5)
993#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
994	wsbh	$20,$20		# byte swap(12)
995	rotr	$20,$20,16
996#else
997	srl	$9,$20,24		# byte swap(12)
998	srl	$10,$20,8
999	andi	$11,$20,0xFF00
1000	sll	$20,$20,24
1001	andi	$10,0xFF00
1002	sll	$11,$11,8
1003	or	$20,$9
1004	or	$10,$11
1005	or	$20,$10
1006#endif
1007#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1008	xor	$11,$2,$3			# 12
1009	rotr	$9,$1,6
1010	addu	$8,$20,$7
1011	rotr	$10,$1,11
1012	and	$11,$1
1013	rotr	$7,$1,25
1014	xor	$9,$10
1015	rotr	$10,$24,2
1016	xor	$11,$3			# Ch(e,f,g)
1017	xor	$9,$7			# Sigma1(e)
1018
1019	rotr	$7,$24,13
1020	addu	$8,$11
1021	lw	$11,48($6)		# K[12]
1022	xor	$7,$10
1023	rotr	$10,$24,22
1024	addu	$8,$9
1025	and	$9,$25,$30
1026	xor	$7,$10			# Sigma0(a)
1027	xor	$10,$25,$30
1028#else
1029	addu	$8,$20,$7			# 12
1030	srl	$7,$1,6
1031	xor	$11,$2,$3
1032	sll	$10,$1,7
1033	and	$11,$1
1034	srl	$9,$1,11
1035	xor	$7,$10
1036	sll	$10,$1,21
1037	xor	$7,$9
1038	srl	$9,$1,25
1039	xor	$7,$10
1040	sll	$10,$1,26
1041	xor	$7,$9
1042	xor	$11,$3			# Ch(e,f,g)
1043	xor	$9,$10,$7			# Sigma1(e)
1044
1045	srl	$7,$24,2
1046	addu	$8,$11
1047	lw	$11,48($6)		# K[12]
1048	sll	$10,$24,10
1049	addu	$8,$9
1050	srl	$9,$24,13
1051	xor	$7,$10
1052	sll	$10,$24,19
1053	xor	$7,$9
1054	srl	$9,$24,22
1055	xor	$7,$10
1056	sll	$10,$24,30
1057	xor	$7,$9
1058	and	$9,$25,$30
1059	xor	$7,$10			# Sigma0(a)
1060	xor	$10,$25,$30
1061#endif
1062	sw	$20,48($29)	# offload to ring buffer
1063	addu	$7,$9
1064	and	$10,$24
1065	addu	$8,$11			# +=K[12]
1066	addu	$7,$10			# +=Maj(a,b,c)
1067	addu	$31,$8
1068	addu	$7,$8
1069	lwl	$22,59($5)
1070	lwr	$22,56($5)
1071#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1072	wsbh	$21,$21		# byte swap(13)
1073	rotr	$21,$21,16
1074#else
1075	srl	$10,$21,24		# byte swap(13)
1076	srl	$11,$21,8
1077	andi	$12,$21,0xFF00
1078	sll	$21,$21,24
1079	andi	$11,0xFF00
1080	sll	$12,$12,8
1081	or	$21,$10
1082	or	$11,$12
1083	or	$21,$11
1084#endif
1085#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1086	xor	$12,$1,$2			# 13
1087	rotr	$10,$31,6
1088	addu	$9,$21,$3
1089	rotr	$11,$31,11
1090	and	$12,$31
1091	rotr	$3,$31,25
1092	xor	$10,$11
1093	rotr	$11,$7,2
1094	xor	$12,$2			# Ch(e,f,g)
1095	xor	$10,$3			# Sigma1(e)
1096
1097	rotr	$3,$7,13
1098	addu	$9,$12
1099	lw	$12,52($6)		# K[13]
1100	xor	$3,$11
1101	rotr	$11,$7,22
1102	addu	$9,$10
1103	and	$10,$24,$25
1104	xor	$3,$11			# Sigma0(a)
1105	xor	$11,$24,$25
1106#else
1107	addu	$9,$21,$3			# 13
1108	srl	$3,$31,6
1109	xor	$12,$1,$2
1110	sll	$11,$31,7
1111	and	$12,$31
1112	srl	$10,$31,11
1113	xor	$3,$11
1114	sll	$11,$31,21
1115	xor	$3,$10
1116	srl	$10,$31,25
1117	xor	$3,$11
1118	sll	$11,$31,26
1119	xor	$3,$10
1120	xor	$12,$2			# Ch(e,f,g)
1121	xor	$10,$11,$3			# Sigma1(e)
1122
1123	srl	$3,$7,2
1124	addu	$9,$12
1125	lw	$12,52($6)		# K[13]
1126	sll	$11,$7,10
1127	addu	$9,$10
1128	srl	$10,$7,13
1129	xor	$3,$11
1130	sll	$11,$7,19
1131	xor	$3,$10
1132	srl	$10,$7,22
1133	xor	$3,$11
1134	sll	$11,$7,30
1135	xor	$3,$10
1136	and	$10,$24,$25
1137	xor	$3,$11			# Sigma0(a)
1138	xor	$11,$24,$25
1139#endif
1140	sw	$21,52($29)	# offload to ring buffer
1141	addu	$3,$10
1142	and	$11,$7
1143	addu	$9,$12			# +=K[13]
1144	addu	$3,$11			# +=Maj(a,b,c)
1145	addu	$30,$9
1146	addu	$3,$9
1147	lw	$8,0($29)	# prefetch from ring buffer
1148	lwl	$23,63($5)
1149	lwr	$23,60($5)
1150#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1151	wsbh	$22,$22		# byte swap(14)
1152	rotr	$22,$22,16
1153#else
1154	srl	$11,$22,24		# byte swap(14)
1155	srl	$12,$22,8
1156	andi	$13,$22,0xFF00
1157	sll	$22,$22,24
1158	andi	$12,0xFF00
1159	sll	$13,$13,8
1160	or	$22,$11
1161	or	$12,$13
1162	or	$22,$12
1163#endif
1164#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1165	xor	$13,$31,$1			# 14
1166	rotr	$11,$30,6
1167	addu	$10,$22,$2
1168	rotr	$12,$30,11
1169	and	$13,$30
1170	rotr	$2,$30,25
1171	xor	$11,$12
1172	rotr	$12,$3,2
1173	xor	$13,$1			# Ch(e,f,g)
1174	xor	$11,$2			# Sigma1(e)
1175
1176	rotr	$2,$3,13
1177	addu	$10,$13
1178	lw	$13,56($6)		# K[14]
1179	xor	$2,$12
1180	rotr	$12,$3,22
1181	addu	$10,$11
1182	and	$11,$7,$24
1183	xor	$2,$12			# Sigma0(a)
1184	xor	$12,$7,$24
1185#else
1186	addu	$10,$22,$2			# 14
1187	srl	$2,$30,6
1188	xor	$13,$31,$1
1189	sll	$12,$30,7
1190	and	$13,$30
1191	srl	$11,$30,11
1192	xor	$2,$12
1193	sll	$12,$30,21
1194	xor	$2,$11
1195	srl	$11,$30,25
1196	xor	$2,$12
1197	sll	$12,$30,26
1198	xor	$2,$11
1199	xor	$13,$1			# Ch(e,f,g)
1200	xor	$11,$12,$2			# Sigma1(e)
1201
1202	srl	$2,$3,2
1203	addu	$10,$13
1204	lw	$13,56($6)		# K[14]
1205	sll	$12,$3,10
1206	addu	$10,$11
1207	srl	$11,$3,13
1208	xor	$2,$12
1209	sll	$12,$3,19
1210	xor	$2,$11
1211	srl	$11,$3,22
1212	xor	$2,$12
1213	sll	$12,$3,30
1214	xor	$2,$11
1215	and	$11,$7,$24
1216	xor	$2,$12			# Sigma0(a)
1217	xor	$12,$7,$24
1218#endif
1219	sw	$22,56($29)	# offload to ring buffer
1220	addu	$2,$11
1221	and	$12,$3
1222	addu	$10,$13			# +=K[14]
1223	addu	$2,$12			# +=Maj(a,b,c)
1224	addu	$25,$10
1225	addu	$2,$10
1226	lw	$9,4($29)	# prefetch from ring buffer
1227#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1228	wsbh	$23,$23		# byte swap(15)
1229	rotr	$23,$23,16
1230#else
1231	srl	$12,$23,24		# byte swap(15)
1232	srl	$13,$23,8
1233	andi	$14,$23,0xFF00
1234	sll	$23,$23,24
1235	andi	$13,0xFF00
1236	sll	$14,$14,8
1237	or	$23,$12
1238	or	$13,$14
1239	or	$23,$13
1240#endif
1241#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1242	xor	$14,$30,$31			# 15
1243	rotr	$12,$25,6
1244	addu	$11,$23,$1
1245	rotr	$13,$25,11
1246	and	$14,$25
1247	rotr	$1,$25,25
1248	xor	$12,$13
1249	rotr	$13,$2,2
1250	xor	$14,$31			# Ch(e,f,g)
1251	xor	$12,$1			# Sigma1(e)
1252
1253	rotr	$1,$2,13
1254	addu	$11,$14
1255	lw	$14,60($6)		# K[15]
1256	xor	$1,$13
1257	rotr	$13,$2,22
1258	addu	$11,$12
1259	and	$12,$3,$7
1260	xor	$1,$13			# Sigma0(a)
1261	xor	$13,$3,$7
1262#else
1263	addu	$11,$23,$1			# 15
1264	srl	$1,$25,6
1265	xor	$14,$30,$31
1266	sll	$13,$25,7
1267	and	$14,$25
1268	srl	$12,$25,11
1269	xor	$1,$13
1270	sll	$13,$25,21
1271	xor	$1,$12
1272	srl	$12,$25,25
1273	xor	$1,$13
1274	sll	$13,$25,26
1275	xor	$1,$12
1276	xor	$14,$31			# Ch(e,f,g)
1277	xor	$12,$13,$1			# Sigma1(e)
1278
1279	srl	$1,$2,2
1280	addu	$11,$14
1281	lw	$14,60($6)		# K[15]
1282	sll	$13,$2,10
1283	addu	$11,$12
1284	srl	$12,$2,13
1285	xor	$1,$13
1286	sll	$13,$2,19
1287	xor	$1,$12
1288	srl	$12,$2,22
1289	xor	$1,$13
1290	sll	$13,$2,30
1291	xor	$1,$12
1292	and	$12,$3,$7
1293	xor	$1,$13			# Sigma0(a)
1294	xor	$13,$3,$7
1295#endif
1296	sw	$23,60($29)	# offload to ring buffer
1297	addu	$1,$12
1298	and	$13,$2
1299	addu	$11,$14			# +=K[15]
1300	addu	$1,$13			# +=Maj(a,b,c)
1301	addu	$24,$11
1302	addu	$1,$11
1303	lw	$10,8($29)	# prefetch from ring buffer
1304	b	.L16_xx
1305.align	4
1306.L16_xx:
1307#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1308	srl	$14,$9,3		# Xupdate(16)
1309	rotr	$12,$9,7
1310	addu	$8,$17			# +=X[i+9]
1311	xor	$14,$12
1312	rotr	$12,$9,18
1313
1314	srl	$15,$22,10
1315	rotr	$13,$22,17
1316	xor	$14,$12			# sigma0(X[i+1])
1317	rotr	$12,$22,19
1318	xor	$15,$13
1319	addu	$8,$14
1320#else
1321	srl	$14,$9,3		# Xupdate(16)
1322	addu	$8,$17			# +=X[i+9]
1323	sll	$13,$9,14
1324	srl	$12,$9,7
1325	xor	$14,$13
1326	sll	$13,11
1327	xor	$14,$12
1328	srl	$12,$9,18
1329	xor	$14,$13
1330
1331	srl	$15,$22,10
1332	xor	$14,$12			# sigma0(X[i+1])
1333	sll	$13,$22,13
1334	addu	$8,$14
1335	srl	$12,$22,17
1336	xor	$15,$13
1337	sll	$13,2
1338	xor	$15,$12
1339	srl	$12,$22,19
1340	xor	$15,$13
1341#endif
1342	xor	$15,$12			# sigma1(X[i+14])
1343	addu	$8,$15
1344#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1345	xor	$15,$25,$30			# 16
1346	rotr	$13,$24,6
1347	addu	$12,$8,$31
1348	rotr	$14,$24,11
1349	and	$15,$24
1350	rotr	$31,$24,25
1351	xor	$13,$14
1352	rotr	$14,$1,2
1353	xor	$15,$30			# Ch(e,f,g)
1354	xor	$13,$31			# Sigma1(e)
1355
1356	rotr	$31,$1,13
1357	addu	$12,$15
1358	lw	$15,64($6)		# K[16]
1359	xor	$31,$14
1360	rotr	$14,$1,22
1361	addu	$12,$13
1362	and	$13,$2,$3
1363	xor	$31,$14			# Sigma0(a)
1364	xor	$14,$2,$3
1365#else
1366	addu	$12,$8,$31			# 16
1367	srl	$31,$24,6
1368	xor	$15,$25,$30
1369	sll	$14,$24,7
1370	and	$15,$24
1371	srl	$13,$24,11
1372	xor	$31,$14
1373	sll	$14,$24,21
1374	xor	$31,$13
1375	srl	$13,$24,25
1376	xor	$31,$14
1377	sll	$14,$24,26
1378	xor	$31,$13
1379	xor	$15,$30			# Ch(e,f,g)
1380	xor	$13,$14,$31			# Sigma1(e)
1381
1382	srl	$31,$1,2
1383	addu	$12,$15
1384	lw	$15,64($6)		# K[16]
1385	sll	$14,$1,10
1386	addu	$12,$13
1387	srl	$13,$1,13
1388	xor	$31,$14
1389	sll	$14,$1,19
1390	xor	$31,$13
1391	srl	$13,$1,22
1392	xor	$31,$14
1393	sll	$14,$1,30
1394	xor	$31,$13
1395	and	$13,$2,$3
1396	xor	$31,$14			# Sigma0(a)
1397	xor	$14,$2,$3
1398#endif
1399	sw	$8,0($29)	# offload to ring buffer
1400	addu	$31,$13
1401	and	$14,$1
1402	addu	$12,$15			# +=K[16]
1403	addu	$31,$14			# +=Maj(a,b,c)
1404	addu	$7,$12
1405	addu	$31,$12
1406	lw	$11,12($29)	# prefetch from ring buffer
1407#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1408	srl	$15,$10,3		# Xupdate(17)
1409	rotr	$13,$10,7
1410	addu	$9,$18			# +=X[i+9]
1411	xor	$15,$13
1412	rotr	$13,$10,18
1413
1414	srl	$16,$23,10
1415	rotr	$14,$23,17
1416	xor	$15,$13			# sigma0(X[i+1])
1417	rotr	$13,$23,19
1418	xor	$16,$14
1419	addu	$9,$15
1420#else
1421	srl	$15,$10,3		# Xupdate(17)
1422	addu	$9,$18			# +=X[i+9]
1423	sll	$14,$10,14
1424	srl	$13,$10,7
1425	xor	$15,$14
1426	sll	$14,11
1427	xor	$15,$13
1428	srl	$13,$10,18
1429	xor	$15,$14
1430
1431	srl	$16,$23,10
1432	xor	$15,$13			# sigma0(X[i+1])
1433	sll	$14,$23,13
1434	addu	$9,$15
1435	srl	$13,$23,17
1436	xor	$16,$14
1437	sll	$14,2
1438	xor	$16,$13
1439	srl	$13,$23,19
1440	xor	$16,$14
1441#endif
1442	xor	$16,$13			# sigma1(X[i+14])
1443	addu	$9,$16
1444#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1445	xor	$16,$24,$25			# 17
1446	rotr	$14,$7,6
1447	addu	$13,$9,$30
1448	rotr	$15,$7,11
1449	and	$16,$7
1450	rotr	$30,$7,25
1451	xor	$14,$15
1452	rotr	$15,$31,2
1453	xor	$16,$25			# Ch(e,f,g)
1454	xor	$14,$30			# Sigma1(e)
1455
1456	rotr	$30,$31,13
1457	addu	$13,$16
1458	lw	$16,68($6)		# K[17]
1459	xor	$30,$15
1460	rotr	$15,$31,22
1461	addu	$13,$14
1462	and	$14,$1,$2
1463	xor	$30,$15			# Sigma0(a)
1464	xor	$15,$1,$2
1465#else
1466	addu	$13,$9,$30			# 17
1467	srl	$30,$7,6
1468	xor	$16,$24,$25
1469	sll	$15,$7,7
1470	and	$16,$7
1471	srl	$14,$7,11
1472	xor	$30,$15
1473	sll	$15,$7,21
1474	xor	$30,$14
1475	srl	$14,$7,25
1476	xor	$30,$15
1477	sll	$15,$7,26
1478	xor	$30,$14
1479	xor	$16,$25			# Ch(e,f,g)
1480	xor	$14,$15,$30			# Sigma1(e)
1481
1482	srl	$30,$31,2
1483	addu	$13,$16
1484	lw	$16,68($6)		# K[17]
1485	sll	$15,$31,10
1486	addu	$13,$14
1487	srl	$14,$31,13
1488	xor	$30,$15
1489	sll	$15,$31,19
1490	xor	$30,$14
1491	srl	$14,$31,22
1492	xor	$30,$15
1493	sll	$15,$31,30
1494	xor	$30,$14
1495	and	$14,$1,$2
1496	xor	$30,$15			# Sigma0(a)
1497	xor	$15,$1,$2
1498#endif
1499	sw	$9,4($29)	# offload to ring buffer
1500	addu	$30,$14
1501	and	$15,$31
1502	addu	$13,$16			# +=K[17]
1503	addu	$30,$15			# +=Maj(a,b,c)
1504	addu	$3,$13
1505	addu	$30,$13
1506	lw	$12,16($29)	# prefetch from ring buffer
1507#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1508	srl	$16,$11,3		# Xupdate(18)
1509	rotr	$14,$11,7
1510	addu	$10,$19			# +=X[i+9]
1511	xor	$16,$14
1512	rotr	$14,$11,18
1513
1514	srl	$17,$8,10
1515	rotr	$15,$8,17
1516	xor	$16,$14			# sigma0(X[i+1])
1517	rotr	$14,$8,19
1518	xor	$17,$15
1519	addu	$10,$16
1520#else
1521	srl	$16,$11,3		# Xupdate(18)
1522	addu	$10,$19			# +=X[i+9]
1523	sll	$15,$11,14
1524	srl	$14,$11,7
1525	xor	$16,$15
1526	sll	$15,11
1527	xor	$16,$14
1528	srl	$14,$11,18
1529	xor	$16,$15
1530
1531	srl	$17,$8,10
1532	xor	$16,$14			# sigma0(X[i+1])
1533	sll	$15,$8,13
1534	addu	$10,$16
1535	srl	$14,$8,17
1536	xor	$17,$15
1537	sll	$15,2
1538	xor	$17,$14
1539	srl	$14,$8,19
1540	xor	$17,$15
1541#endif
1542	xor	$17,$14			# sigma1(X[i+14])
1543	addu	$10,$17
1544#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1545	xor	$17,$7,$24			# 18
1546	rotr	$15,$3,6
1547	addu	$14,$10,$25
1548	rotr	$16,$3,11
1549	and	$17,$3
1550	rotr	$25,$3,25
1551	xor	$15,$16
1552	rotr	$16,$30,2
1553	xor	$17,$24			# Ch(e,f,g)
1554	xor	$15,$25			# Sigma1(e)
1555
1556	rotr	$25,$30,13
1557	addu	$14,$17
1558	lw	$17,72($6)		# K[18]
1559	xor	$25,$16
1560	rotr	$16,$30,22
1561	addu	$14,$15
1562	and	$15,$31,$1
1563	xor	$25,$16			# Sigma0(a)
1564	xor	$16,$31,$1
1565#else
1566	addu	$14,$10,$25			# 18
1567	srl	$25,$3,6
1568	xor	$17,$7,$24
1569	sll	$16,$3,7
1570	and	$17,$3
1571	srl	$15,$3,11
1572	xor	$25,$16
1573	sll	$16,$3,21
1574	xor	$25,$15
1575	srl	$15,$3,25
1576	xor	$25,$16
1577	sll	$16,$3,26
1578	xor	$25,$15
1579	xor	$17,$24			# Ch(e,f,g)
1580	xor	$15,$16,$25			# Sigma1(e)
1581
1582	srl	$25,$30,2
1583	addu	$14,$17
1584	lw	$17,72($6)		# K[18]
1585	sll	$16,$30,10
1586	addu	$14,$15
1587	srl	$15,$30,13
1588	xor	$25,$16
1589	sll	$16,$30,19
1590	xor	$25,$15
1591	srl	$15,$30,22
1592	xor	$25,$16
1593	sll	$16,$30,30
1594	xor	$25,$15
1595	and	$15,$31,$1
1596	xor	$25,$16			# Sigma0(a)
1597	xor	$16,$31,$1
1598#endif
1599	sw	$10,8($29)	# offload to ring buffer
1600	addu	$25,$15
1601	and	$16,$30
1602	addu	$14,$17			# +=K[18]
1603	addu	$25,$16			# +=Maj(a,b,c)
1604	addu	$2,$14
1605	addu	$25,$14
1606	lw	$13,20($29)	# prefetch from ring buffer
1607#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1608	srl	$17,$12,3		# Xupdate(19)
1609	rotr	$15,$12,7
1610	addu	$11,$20			# +=X[i+9]
1611	xor	$17,$15
1612	rotr	$15,$12,18
1613
1614	srl	$18,$9,10
1615	rotr	$16,$9,17
1616	xor	$17,$15			# sigma0(X[i+1])
1617	rotr	$15,$9,19
1618	xor	$18,$16
1619	addu	$11,$17
1620#else
1621	srl	$17,$12,3		# Xupdate(19)
1622	addu	$11,$20			# +=X[i+9]
1623	sll	$16,$12,14
1624	srl	$15,$12,7
1625	xor	$17,$16
1626	sll	$16,11
1627	xor	$17,$15
1628	srl	$15,$12,18
1629	xor	$17,$16
1630
1631	srl	$18,$9,10
1632	xor	$17,$15			# sigma0(X[i+1])
1633	sll	$16,$9,13
1634	addu	$11,$17
1635	srl	$15,$9,17
1636	xor	$18,$16
1637	sll	$16,2
1638	xor	$18,$15
1639	srl	$15,$9,19
1640	xor	$18,$16
1641#endif
1642	xor	$18,$15			# sigma1(X[i+14])
1643	addu	$11,$18
1644#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1645	xor	$18,$3,$7			# 19
1646	rotr	$16,$2,6
1647	addu	$15,$11,$24
1648	rotr	$17,$2,11
1649	and	$18,$2
1650	rotr	$24,$2,25
1651	xor	$16,$17
1652	rotr	$17,$25,2
1653	xor	$18,$7			# Ch(e,f,g)
1654	xor	$16,$24			# Sigma1(e)
1655
1656	rotr	$24,$25,13
1657	addu	$15,$18
1658	lw	$18,76($6)		# K[19]
1659	xor	$24,$17
1660	rotr	$17,$25,22
1661	addu	$15,$16
1662	and	$16,$30,$31
1663	xor	$24,$17			# Sigma0(a)
1664	xor	$17,$30,$31
1665#else
1666	addu	$15,$11,$24			# 19
1667	srl	$24,$2,6
1668	xor	$18,$3,$7
1669	sll	$17,$2,7
1670	and	$18,$2
1671	srl	$16,$2,11
1672	xor	$24,$17
1673	sll	$17,$2,21
1674	xor	$24,$16
1675	srl	$16,$2,25
1676	xor	$24,$17
1677	sll	$17,$2,26
1678	xor	$24,$16
1679	xor	$18,$7			# Ch(e,f,g)
1680	xor	$16,$17,$24			# Sigma1(e)
1681
1682	srl	$24,$25,2
1683	addu	$15,$18
1684	lw	$18,76($6)		# K[19]
1685	sll	$17,$25,10
1686	addu	$15,$16
1687	srl	$16,$25,13
1688	xor	$24,$17
1689	sll	$17,$25,19
1690	xor	$24,$16
1691	srl	$16,$25,22
1692	xor	$24,$17
1693	sll	$17,$25,30
1694	xor	$24,$16
1695	and	$16,$30,$31
1696	xor	$24,$17			# Sigma0(a)
1697	xor	$17,$30,$31
1698#endif
1699	sw	$11,12($29)	# offload to ring buffer
1700	addu	$24,$16
1701	and	$17,$25
1702	addu	$15,$18			# +=K[19]
1703	addu	$24,$17			# +=Maj(a,b,c)
1704	addu	$1,$15
1705	addu	$24,$15
1706	lw	$14,24($29)	# prefetch from ring buffer
1707#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1708	srl	$18,$13,3		# Xupdate(20)
1709	rotr	$16,$13,7
1710	addu	$12,$21			# +=X[i+9]
1711	xor	$18,$16
1712	rotr	$16,$13,18
1713
1714	srl	$19,$10,10
1715	rotr	$17,$10,17
1716	xor	$18,$16			# sigma0(X[i+1])
1717	rotr	$16,$10,19
1718	xor	$19,$17
1719	addu	$12,$18
1720#else
1721	srl	$18,$13,3		# Xupdate(20)
1722	addu	$12,$21			# +=X[i+9]
1723	sll	$17,$13,14
1724	srl	$16,$13,7
1725	xor	$18,$17
1726	sll	$17,11
1727	xor	$18,$16
1728	srl	$16,$13,18
1729	xor	$18,$17
1730
1731	srl	$19,$10,10
1732	xor	$18,$16			# sigma0(X[i+1])
1733	sll	$17,$10,13
1734	addu	$12,$18
1735	srl	$16,$10,17
1736	xor	$19,$17
1737	sll	$17,2
1738	xor	$19,$16
1739	srl	$16,$10,19
1740	xor	$19,$17
1741#endif
1742	xor	$19,$16			# sigma1(X[i+14])
1743	addu	$12,$19
1744#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1745	xor	$19,$2,$3			# 20
1746	rotr	$17,$1,6
1747	addu	$16,$12,$7
1748	rotr	$18,$1,11
1749	and	$19,$1
1750	rotr	$7,$1,25
1751	xor	$17,$18
1752	rotr	$18,$24,2
1753	xor	$19,$3			# Ch(e,f,g)
1754	xor	$17,$7			# Sigma1(e)
1755
1756	rotr	$7,$24,13
1757	addu	$16,$19
1758	lw	$19,80($6)		# K[20]
1759	xor	$7,$18
1760	rotr	$18,$24,22
1761	addu	$16,$17
1762	and	$17,$25,$30
1763	xor	$7,$18			# Sigma0(a)
1764	xor	$18,$25,$30
1765#else
1766	addu	$16,$12,$7			# 20
1767	srl	$7,$1,6
1768	xor	$19,$2,$3
1769	sll	$18,$1,7
1770	and	$19,$1
1771	srl	$17,$1,11
1772	xor	$7,$18
1773	sll	$18,$1,21
1774	xor	$7,$17
1775	srl	$17,$1,25
1776	xor	$7,$18
1777	sll	$18,$1,26
1778	xor	$7,$17
1779	xor	$19,$3			# Ch(e,f,g)
1780	xor	$17,$18,$7			# Sigma1(e)
1781
1782	srl	$7,$24,2
1783	addu	$16,$19
1784	lw	$19,80($6)		# K[20]
1785	sll	$18,$24,10
1786	addu	$16,$17
1787	srl	$17,$24,13
1788	xor	$7,$18
1789	sll	$18,$24,19
1790	xor	$7,$17
1791	srl	$17,$24,22
1792	xor	$7,$18
1793	sll	$18,$24,30
1794	xor	$7,$17
1795	and	$17,$25,$30
1796	xor	$7,$18			# Sigma0(a)
1797	xor	$18,$25,$30
1798#endif
1799	sw	$12,16($29)	# offload to ring buffer
1800	addu	$7,$17
1801	and	$18,$24
1802	addu	$16,$19			# +=K[20]
1803	addu	$7,$18			# +=Maj(a,b,c)
1804	addu	$31,$16
1805	addu	$7,$16
1806	lw	$15,28($29)	# prefetch from ring buffer
1807#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1808	srl	$19,$14,3		# Xupdate(21)
1809	rotr	$17,$14,7
1810	addu	$13,$22			# +=X[i+9]
1811	xor	$19,$17
1812	rotr	$17,$14,18
1813
1814	srl	$20,$11,10
1815	rotr	$18,$11,17
1816	xor	$19,$17			# sigma0(X[i+1])
1817	rotr	$17,$11,19
1818	xor	$20,$18
1819	addu	$13,$19
1820#else
1821	srl	$19,$14,3		# Xupdate(21)
1822	addu	$13,$22			# +=X[i+9]
1823	sll	$18,$14,14
1824	srl	$17,$14,7
1825	xor	$19,$18
1826	sll	$18,11
1827	xor	$19,$17
1828	srl	$17,$14,18
1829	xor	$19,$18
1830
1831	srl	$20,$11,10
1832	xor	$19,$17			# sigma0(X[i+1])
1833	sll	$18,$11,13
1834	addu	$13,$19
1835	srl	$17,$11,17
1836	xor	$20,$18
1837	sll	$18,2
1838	xor	$20,$17
1839	srl	$17,$11,19
1840	xor	$20,$18
1841#endif
1842	xor	$20,$17			# sigma1(X[i+14])
1843	addu	$13,$20
1844#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1845	xor	$20,$1,$2			# 21
1846	rotr	$18,$31,6
1847	addu	$17,$13,$3
1848	rotr	$19,$31,11
1849	and	$20,$31
1850	rotr	$3,$31,25
1851	xor	$18,$19
1852	rotr	$19,$7,2
1853	xor	$20,$2			# Ch(e,f,g)
1854	xor	$18,$3			# Sigma1(e)
1855
1856	rotr	$3,$7,13
1857	addu	$17,$20
1858	lw	$20,84($6)		# K[21]
1859	xor	$3,$19
1860	rotr	$19,$7,22
1861	addu	$17,$18
1862	and	$18,$24,$25
1863	xor	$3,$19			# Sigma0(a)
1864	xor	$19,$24,$25
1865#else
1866	addu	$17,$13,$3			# 21
1867	srl	$3,$31,6
1868	xor	$20,$1,$2
1869	sll	$19,$31,7
1870	and	$20,$31
1871	srl	$18,$31,11
1872	xor	$3,$19
1873	sll	$19,$31,21
1874	xor	$3,$18
1875	srl	$18,$31,25
1876	xor	$3,$19
1877	sll	$19,$31,26
1878	xor	$3,$18
1879	xor	$20,$2			# Ch(e,f,g)
1880	xor	$18,$19,$3			# Sigma1(e)
1881
1882	srl	$3,$7,2
1883	addu	$17,$20
1884	lw	$20,84($6)		# K[21]
1885	sll	$19,$7,10
1886	addu	$17,$18
1887	srl	$18,$7,13
1888	xor	$3,$19
1889	sll	$19,$7,19
1890	xor	$3,$18
1891	srl	$18,$7,22
1892	xor	$3,$19
1893	sll	$19,$7,30
1894	xor	$3,$18
1895	and	$18,$24,$25
1896	xor	$3,$19			# Sigma0(a)
1897	xor	$19,$24,$25
1898#endif
1899	sw	$13,20($29)	# offload to ring buffer
1900	addu	$3,$18
1901	and	$19,$7
1902	addu	$17,$20			# +=K[21]
1903	addu	$3,$19			# +=Maj(a,b,c)
1904	addu	$30,$17
1905	addu	$3,$17
1906	lw	$16,32($29)	# prefetch from ring buffer
1907#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1908	srl	$20,$15,3		# Xupdate(22)
1909	rotr	$18,$15,7
1910	addu	$14,$23			# +=X[i+9]
1911	xor	$20,$18
1912	rotr	$18,$15,18
1913
1914	srl	$21,$12,10
1915	rotr	$19,$12,17
1916	xor	$20,$18			# sigma0(X[i+1])
1917	rotr	$18,$12,19
1918	xor	$21,$19
1919	addu	$14,$20
1920#else
1921	srl	$20,$15,3		# Xupdate(22)
1922	addu	$14,$23			# +=X[i+9]
1923	sll	$19,$15,14
1924	srl	$18,$15,7
1925	xor	$20,$19
1926	sll	$19,11
1927	xor	$20,$18
1928	srl	$18,$15,18
1929	xor	$20,$19
1930
1931	srl	$21,$12,10
1932	xor	$20,$18			# sigma0(X[i+1])
1933	sll	$19,$12,13
1934	addu	$14,$20
1935	srl	$18,$12,17
1936	xor	$21,$19
1937	sll	$19,2
1938	xor	$21,$18
1939	srl	$18,$12,19
1940	xor	$21,$19
1941#endif
1942	xor	$21,$18			# sigma1(X[i+14])
1943	addu	$14,$21
1944#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1945	xor	$21,$31,$1			# 22
1946	rotr	$19,$30,6
1947	addu	$18,$14,$2
1948	rotr	$20,$30,11
1949	and	$21,$30
1950	rotr	$2,$30,25
1951	xor	$19,$20
1952	rotr	$20,$3,2
1953	xor	$21,$1			# Ch(e,f,g)
1954	xor	$19,$2			# Sigma1(e)
1955
1956	rotr	$2,$3,13
1957	addu	$18,$21
1958	lw	$21,88($6)		# K[22]
1959	xor	$2,$20
1960	rotr	$20,$3,22
1961	addu	$18,$19
1962	and	$19,$7,$24
1963	xor	$2,$20			# Sigma0(a)
1964	xor	$20,$7,$24
1965#else
1966	addu	$18,$14,$2			# 22
1967	srl	$2,$30,6
1968	xor	$21,$31,$1
1969	sll	$20,$30,7
1970	and	$21,$30
1971	srl	$19,$30,11
1972	xor	$2,$20
1973	sll	$20,$30,21
1974	xor	$2,$19
1975	srl	$19,$30,25
1976	xor	$2,$20
1977	sll	$20,$30,26
1978	xor	$2,$19
1979	xor	$21,$1			# Ch(e,f,g)
1980	xor	$19,$20,$2			# Sigma1(e)
1981
1982	srl	$2,$3,2
1983	addu	$18,$21
1984	lw	$21,88($6)		# K[22]
1985	sll	$20,$3,10
1986	addu	$18,$19
1987	srl	$19,$3,13
1988	xor	$2,$20
1989	sll	$20,$3,19
1990	xor	$2,$19
1991	srl	$19,$3,22
1992	xor	$2,$20
1993	sll	$20,$3,30
1994	xor	$2,$19
1995	and	$19,$7,$24
1996	xor	$2,$20			# Sigma0(a)
1997	xor	$20,$7,$24
1998#endif
1999	sw	$14,24($29)	# offload to ring buffer
2000	addu	$2,$19
2001	and	$20,$3
2002	addu	$18,$21			# +=K[22]
2003	addu	$2,$20			# +=Maj(a,b,c)
2004	addu	$25,$18
2005	addu	$2,$18
2006	lw	$17,36($29)	# prefetch from ring buffer
2007#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2008	srl	$21,$16,3		# Xupdate(23)
2009	rotr	$19,$16,7
2010	addu	$15,$8			# +=X[i+9]
2011	xor	$21,$19
2012	rotr	$19,$16,18
2013
2014	srl	$22,$13,10
2015	rotr	$20,$13,17
2016	xor	$21,$19			# sigma0(X[i+1])
2017	rotr	$19,$13,19
2018	xor	$22,$20
2019	addu	$15,$21
2020#else
2021	srl	$21,$16,3		# Xupdate(23)
2022	addu	$15,$8			# +=X[i+9]
2023	sll	$20,$16,14
2024	srl	$19,$16,7
2025	xor	$21,$20
2026	sll	$20,11
2027	xor	$21,$19
2028	srl	$19,$16,18
2029	xor	$21,$20
2030
2031	srl	$22,$13,10
2032	xor	$21,$19			# sigma0(X[i+1])
2033	sll	$20,$13,13
2034	addu	$15,$21
2035	srl	$19,$13,17
2036	xor	$22,$20
2037	sll	$20,2
2038	xor	$22,$19
2039	srl	$19,$13,19
2040	xor	$22,$20
2041#endif
2042	xor	$22,$19			# sigma1(X[i+14])
2043	addu	$15,$22
2044#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2045	xor	$22,$30,$31			# 23
2046	rotr	$20,$25,6
2047	addu	$19,$15,$1
2048	rotr	$21,$25,11
2049	and	$22,$25
2050	rotr	$1,$25,25
2051	xor	$20,$21
2052	rotr	$21,$2,2
2053	xor	$22,$31			# Ch(e,f,g)
2054	xor	$20,$1			# Sigma1(e)
2055
2056	rotr	$1,$2,13
2057	addu	$19,$22
2058	lw	$22,92($6)		# K[23]
2059	xor	$1,$21
2060	rotr	$21,$2,22
2061	addu	$19,$20
2062	and	$20,$3,$7
2063	xor	$1,$21			# Sigma0(a)
2064	xor	$21,$3,$7
2065#else
2066	addu	$19,$15,$1			# 23
2067	srl	$1,$25,6
2068	xor	$22,$30,$31
2069	sll	$21,$25,7
2070	and	$22,$25
2071	srl	$20,$25,11
2072	xor	$1,$21
2073	sll	$21,$25,21
2074	xor	$1,$20
2075	srl	$20,$25,25
2076	xor	$1,$21
2077	sll	$21,$25,26
2078	xor	$1,$20
2079	xor	$22,$31			# Ch(e,f,g)
2080	xor	$20,$21,$1			# Sigma1(e)
2081
2082	srl	$1,$2,2
2083	addu	$19,$22
2084	lw	$22,92($6)		# K[23]
2085	sll	$21,$2,10
2086	addu	$19,$20
2087	srl	$20,$2,13
2088	xor	$1,$21
2089	sll	$21,$2,19
2090	xor	$1,$20
2091	srl	$20,$2,22
2092	xor	$1,$21
2093	sll	$21,$2,30
2094	xor	$1,$20
2095	and	$20,$3,$7
2096	xor	$1,$21			# Sigma0(a)
2097	xor	$21,$3,$7
2098#endif
2099	sw	$15,28($29)	# offload to ring buffer
2100	addu	$1,$20
2101	and	$21,$2
2102	addu	$19,$22			# +=K[23]
2103	addu	$1,$21			# +=Maj(a,b,c)
2104	addu	$24,$19
2105	addu	$1,$19
2106	lw	$18,40($29)	# prefetch from ring buffer
2107#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2108	srl	$22,$17,3		# Xupdate(24)
2109	rotr	$20,$17,7
2110	addu	$16,$9			# +=X[i+9]
2111	xor	$22,$20
2112	rotr	$20,$17,18
2113
2114	srl	$23,$14,10
2115	rotr	$21,$14,17
2116	xor	$22,$20			# sigma0(X[i+1])
2117	rotr	$20,$14,19
2118	xor	$23,$21
2119	addu	$16,$22
2120#else
2121	srl	$22,$17,3		# Xupdate(24)
2122	addu	$16,$9			# +=X[i+9]
2123	sll	$21,$17,14
2124	srl	$20,$17,7
2125	xor	$22,$21
2126	sll	$21,11
2127	xor	$22,$20
2128	srl	$20,$17,18
2129	xor	$22,$21
2130
2131	srl	$23,$14,10
2132	xor	$22,$20			# sigma0(X[i+1])
2133	sll	$21,$14,13
2134	addu	$16,$22
2135	srl	$20,$14,17
2136	xor	$23,$21
2137	sll	$21,2
2138	xor	$23,$20
2139	srl	$20,$14,19
2140	xor	$23,$21
2141#endif
2142	xor	$23,$20			# sigma1(X[i+14])
2143	addu	$16,$23
2144#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2145	xor	$23,$25,$30			# 24
2146	rotr	$21,$24,6
2147	addu	$20,$16,$31
2148	rotr	$22,$24,11
2149	and	$23,$24
2150	rotr	$31,$24,25
2151	xor	$21,$22
2152	rotr	$22,$1,2
2153	xor	$23,$30			# Ch(e,f,g)
2154	xor	$21,$31			# Sigma1(e)
2155
2156	rotr	$31,$1,13
2157	addu	$20,$23
2158	lw	$23,96($6)		# K[24]
2159	xor	$31,$22
2160	rotr	$22,$1,22
2161	addu	$20,$21
2162	and	$21,$2,$3
2163	xor	$31,$22			# Sigma0(a)
2164	xor	$22,$2,$3
2165#else
2166	addu	$20,$16,$31			# 24
2167	srl	$31,$24,6
2168	xor	$23,$25,$30
2169	sll	$22,$24,7
2170	and	$23,$24
2171	srl	$21,$24,11
2172	xor	$31,$22
2173	sll	$22,$24,21
2174	xor	$31,$21
2175	srl	$21,$24,25
2176	xor	$31,$22
2177	sll	$22,$24,26
2178	xor	$31,$21
2179	xor	$23,$30			# Ch(e,f,g)
2180	xor	$21,$22,$31			# Sigma1(e)
2181
2182	srl	$31,$1,2
2183	addu	$20,$23
2184	lw	$23,96($6)		# K[24]
2185	sll	$22,$1,10
2186	addu	$20,$21
2187	srl	$21,$1,13
2188	xor	$31,$22
2189	sll	$22,$1,19
2190	xor	$31,$21
2191	srl	$21,$1,22
2192	xor	$31,$22
2193	sll	$22,$1,30
2194	xor	$31,$21
2195	and	$21,$2,$3
2196	xor	$31,$22			# Sigma0(a)
2197	xor	$22,$2,$3
2198#endif
2199	sw	$16,32($29)	# offload to ring buffer
2200	addu	$31,$21
2201	and	$22,$1
2202	addu	$20,$23			# +=K[24]
2203	addu	$31,$22			# +=Maj(a,b,c)
2204	addu	$7,$20
2205	addu	$31,$20
2206	lw	$19,44($29)	# prefetch from ring buffer
2207#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2208	srl	$23,$18,3		# Xupdate(25)
2209	rotr	$21,$18,7
2210	addu	$17,$10			# +=X[i+9]
2211	xor	$23,$21
2212	rotr	$21,$18,18
2213
2214	srl	$8,$15,10
2215	rotr	$22,$15,17
2216	xor	$23,$21			# sigma0(X[i+1])
2217	rotr	$21,$15,19
2218	xor	$8,$22
2219	addu	$17,$23
2220#else
2221	srl	$23,$18,3		# Xupdate(25)
2222	addu	$17,$10			# +=X[i+9]
2223	sll	$22,$18,14
2224	srl	$21,$18,7
2225	xor	$23,$22
2226	sll	$22,11
2227	xor	$23,$21
2228	srl	$21,$18,18
2229	xor	$23,$22
2230
2231	srl	$8,$15,10
2232	xor	$23,$21			# sigma0(X[i+1])
2233	sll	$22,$15,13
2234	addu	$17,$23
2235	srl	$21,$15,17
2236	xor	$8,$22
2237	sll	$22,2
2238	xor	$8,$21
2239	srl	$21,$15,19
2240	xor	$8,$22
2241#endif
2242	xor	$8,$21			# sigma1(X[i+14])
2243	addu	$17,$8
2244#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2245	xor	$8,$24,$25			# 25
2246	rotr	$22,$7,6
2247	addu	$21,$17,$30
2248	rotr	$23,$7,11
2249	and	$8,$7
2250	rotr	$30,$7,25
2251	xor	$22,$23
2252	rotr	$23,$31,2
2253	xor	$8,$25			# Ch(e,f,g)
2254	xor	$22,$30			# Sigma1(e)
2255
2256	rotr	$30,$31,13
2257	addu	$21,$8
2258	lw	$8,100($6)		# K[25]
2259	xor	$30,$23
2260	rotr	$23,$31,22
2261	addu	$21,$22
2262	and	$22,$1,$2
2263	xor	$30,$23			# Sigma0(a)
2264	xor	$23,$1,$2
2265#else
2266	addu	$21,$17,$30			# 25
2267	srl	$30,$7,6
2268	xor	$8,$24,$25
2269	sll	$23,$7,7
2270	and	$8,$7
2271	srl	$22,$7,11
2272	xor	$30,$23
2273	sll	$23,$7,21
2274	xor	$30,$22
2275	srl	$22,$7,25
2276	xor	$30,$23
2277	sll	$23,$7,26
2278	xor	$30,$22
2279	xor	$8,$25			# Ch(e,f,g)
2280	xor	$22,$23,$30			# Sigma1(e)
2281
2282	srl	$30,$31,2
2283	addu	$21,$8
2284	lw	$8,100($6)		# K[25]
2285	sll	$23,$31,10
2286	addu	$21,$22
2287	srl	$22,$31,13
2288	xor	$30,$23
2289	sll	$23,$31,19
2290	xor	$30,$22
2291	srl	$22,$31,22
2292	xor	$30,$23
2293	sll	$23,$31,30
2294	xor	$30,$22
2295	and	$22,$1,$2
2296	xor	$30,$23			# Sigma0(a)
2297	xor	$23,$1,$2
2298#endif
2299	sw	$17,36($29)	# offload to ring buffer
2300	addu	$30,$22
2301	and	$23,$31
2302	addu	$21,$8			# +=K[25]
2303	addu	$30,$23			# +=Maj(a,b,c)
2304	addu	$3,$21
2305	addu	$30,$21
2306	lw	$20,48($29)	# prefetch from ring buffer
2307#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2308	srl	$8,$19,3		# Xupdate(26)
2309	rotr	$22,$19,7
2310	addu	$18,$11			# +=X[i+9]
2311	xor	$8,$22
2312	rotr	$22,$19,18
2313
2314	srl	$9,$16,10
2315	rotr	$23,$16,17
2316	xor	$8,$22			# sigma0(X[i+1])
2317	rotr	$22,$16,19
2318	xor	$9,$23
2319	addu	$18,$8
2320#else
2321	srl	$8,$19,3		# Xupdate(26)
2322	addu	$18,$11			# +=X[i+9]
2323	sll	$23,$19,14
2324	srl	$22,$19,7
2325	xor	$8,$23
2326	sll	$23,11
2327	xor	$8,$22
2328	srl	$22,$19,18
2329	xor	$8,$23
2330
2331	srl	$9,$16,10
2332	xor	$8,$22			# sigma0(X[i+1])
2333	sll	$23,$16,13
2334	addu	$18,$8
2335	srl	$22,$16,17
2336	xor	$9,$23
2337	sll	$23,2
2338	xor	$9,$22
2339	srl	$22,$16,19
2340	xor	$9,$23
2341#endif
2342	xor	$9,$22			# sigma1(X[i+14])
2343	addu	$18,$9
2344#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2345	xor	$9,$7,$24			# 26
2346	rotr	$23,$3,6
2347	addu	$22,$18,$25
2348	rotr	$8,$3,11
2349	and	$9,$3
2350	rotr	$25,$3,25
2351	xor	$23,$8
2352	rotr	$8,$30,2
2353	xor	$9,$24			# Ch(e,f,g)
2354	xor	$23,$25			# Sigma1(e)
2355
2356	rotr	$25,$30,13
2357	addu	$22,$9
2358	lw	$9,104($6)		# K[26]
2359	xor	$25,$8
2360	rotr	$8,$30,22
2361	addu	$22,$23
2362	and	$23,$31,$1
2363	xor	$25,$8			# Sigma0(a)
2364	xor	$8,$31,$1
2365#else
2366	addu	$22,$18,$25			# 26
2367	srl	$25,$3,6
2368	xor	$9,$7,$24
2369	sll	$8,$3,7
2370	and	$9,$3
2371	srl	$23,$3,11
2372	xor	$25,$8
2373	sll	$8,$3,21
2374	xor	$25,$23
2375	srl	$23,$3,25
2376	xor	$25,$8
2377	sll	$8,$3,26
2378	xor	$25,$23
2379	xor	$9,$24			# Ch(e,f,g)
2380	xor	$23,$8,$25			# Sigma1(e)
2381
2382	srl	$25,$30,2
2383	addu	$22,$9
2384	lw	$9,104($6)		# K[26]
2385	sll	$8,$30,10
2386	addu	$22,$23
2387	srl	$23,$30,13
2388	xor	$25,$8
2389	sll	$8,$30,19
2390	xor	$25,$23
2391	srl	$23,$30,22
2392	xor	$25,$8
2393	sll	$8,$30,30
2394	xor	$25,$23
2395	and	$23,$31,$1
2396	xor	$25,$8			# Sigma0(a)
2397	xor	$8,$31,$1
2398#endif
2399	sw	$18,40($29)	# offload to ring buffer
2400	addu	$25,$23
2401	and	$8,$30
2402	addu	$22,$9			# +=K[26]
2403	addu	$25,$8			# +=Maj(a,b,c)
2404	addu	$2,$22
2405	addu	$25,$22
2406	lw	$21,52($29)	# prefetch from ring buffer
2407#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2408	srl	$9,$20,3		# Xupdate(27)
2409	rotr	$23,$20,7
2410	addu	$19,$12			# +=X[i+9]
2411	xor	$9,$23
2412	rotr	$23,$20,18
2413
2414	srl	$10,$17,10
2415	rotr	$8,$17,17
2416	xor	$9,$23			# sigma0(X[i+1])
2417	rotr	$23,$17,19
2418	xor	$10,$8
2419	addu	$19,$9
2420#else
2421	srl	$9,$20,3		# Xupdate(27)
2422	addu	$19,$12			# +=X[i+9]
2423	sll	$8,$20,14
2424	srl	$23,$20,7
2425	xor	$9,$8
2426	sll	$8,11
2427	xor	$9,$23
2428	srl	$23,$20,18
2429	xor	$9,$8
2430
2431	srl	$10,$17,10
2432	xor	$9,$23			# sigma0(X[i+1])
2433	sll	$8,$17,13
2434	addu	$19,$9
2435	srl	$23,$17,17
2436	xor	$10,$8
2437	sll	$8,2
2438	xor	$10,$23
2439	srl	$23,$17,19
2440	xor	$10,$8
2441#endif
2442	xor	$10,$23			# sigma1(X[i+14])
2443	addu	$19,$10
2444#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2445	xor	$10,$3,$7			# 27
2446	rotr	$8,$2,6
2447	addu	$23,$19,$24
2448	rotr	$9,$2,11
2449	and	$10,$2
2450	rotr	$24,$2,25
2451	xor	$8,$9
2452	rotr	$9,$25,2
2453	xor	$10,$7			# Ch(e,f,g)
2454	xor	$8,$24			# Sigma1(e)
2455
2456	rotr	$24,$25,13
2457	addu	$23,$10
2458	lw	$10,108($6)		# K[27]
2459	xor	$24,$9
2460	rotr	$9,$25,22
2461	addu	$23,$8
2462	and	$8,$30,$31
2463	xor	$24,$9			# Sigma0(a)
2464	xor	$9,$30,$31
2465#else
2466	addu	$23,$19,$24			# 27
2467	srl	$24,$2,6
2468	xor	$10,$3,$7
2469	sll	$9,$2,7
2470	and	$10,$2
2471	srl	$8,$2,11
2472	xor	$24,$9
2473	sll	$9,$2,21
2474	xor	$24,$8
2475	srl	$8,$2,25
2476	xor	$24,$9
2477	sll	$9,$2,26
2478	xor	$24,$8
2479	xor	$10,$7			# Ch(e,f,g)
2480	xor	$8,$9,$24			# Sigma1(e)
2481
2482	srl	$24,$25,2
2483	addu	$23,$10
2484	lw	$10,108($6)		# K[27]
2485	sll	$9,$25,10
2486	addu	$23,$8
2487	srl	$8,$25,13
2488	xor	$24,$9
2489	sll	$9,$25,19
2490	xor	$24,$8
2491	srl	$8,$25,22
2492	xor	$24,$9
2493	sll	$9,$25,30
2494	xor	$24,$8
2495	and	$8,$30,$31
2496	xor	$24,$9			# Sigma0(a)
2497	xor	$9,$30,$31
2498#endif
2499	sw	$19,44($29)	# offload to ring buffer
2500	addu	$24,$8
2501	and	$9,$25
2502	addu	$23,$10			# +=K[27]
2503	addu	$24,$9			# +=Maj(a,b,c)
2504	addu	$1,$23
2505	addu	$24,$23
2506	lw	$22,56($29)	# prefetch from ring buffer
2507#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2508	srl	$10,$21,3		# Xupdate(28)
2509	rotr	$8,$21,7
2510	addu	$20,$13			# +=X[i+9]
2511	xor	$10,$8
2512	rotr	$8,$21,18
2513
2514	srl	$11,$18,10
2515	rotr	$9,$18,17
2516	xor	$10,$8			# sigma0(X[i+1])
2517	rotr	$8,$18,19
2518	xor	$11,$9
2519	addu	$20,$10
2520#else
2521	srl	$10,$21,3		# Xupdate(28)
2522	addu	$20,$13			# +=X[i+9]
2523	sll	$9,$21,14
2524	srl	$8,$21,7
2525	xor	$10,$9
2526	sll	$9,11
2527	xor	$10,$8
2528	srl	$8,$21,18
2529	xor	$10,$9
2530
2531	srl	$11,$18,10
2532	xor	$10,$8			# sigma0(X[i+1])
2533	sll	$9,$18,13
2534	addu	$20,$10
2535	srl	$8,$18,17
2536	xor	$11,$9
2537	sll	$9,2
2538	xor	$11,$8
2539	srl	$8,$18,19
2540	xor	$11,$9
2541#endif
2542	xor	$11,$8			# sigma1(X[i+14])
2543	addu	$20,$11
2544#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2545	xor	$11,$2,$3			# 28
2546	rotr	$9,$1,6
2547	addu	$8,$20,$7
2548	rotr	$10,$1,11
2549	and	$11,$1
2550	rotr	$7,$1,25
2551	xor	$9,$10
2552	rotr	$10,$24,2
2553	xor	$11,$3			# Ch(e,f,g)
2554	xor	$9,$7			# Sigma1(e)
2555
2556	rotr	$7,$24,13
2557	addu	$8,$11
2558	lw	$11,112($6)		# K[28]
2559	xor	$7,$10
2560	rotr	$10,$24,22
2561	addu	$8,$9
2562	and	$9,$25,$30
2563	xor	$7,$10			# Sigma0(a)
2564	xor	$10,$25,$30
2565#else
2566	addu	$8,$20,$7			# 28
2567	srl	$7,$1,6
2568	xor	$11,$2,$3
2569	sll	$10,$1,7
2570	and	$11,$1
2571	srl	$9,$1,11
2572	xor	$7,$10
2573	sll	$10,$1,21
2574	xor	$7,$9
2575	srl	$9,$1,25
2576	xor	$7,$10
2577	sll	$10,$1,26
2578	xor	$7,$9
2579	xor	$11,$3			# Ch(e,f,g)
2580	xor	$9,$10,$7			# Sigma1(e)
2581
2582	srl	$7,$24,2
2583	addu	$8,$11
2584	lw	$11,112($6)		# K[28]
2585	sll	$10,$24,10
2586	addu	$8,$9
2587	srl	$9,$24,13
2588	xor	$7,$10
2589	sll	$10,$24,19
2590	xor	$7,$9
2591	srl	$9,$24,22
2592	xor	$7,$10
2593	sll	$10,$24,30
2594	xor	$7,$9
2595	and	$9,$25,$30
2596	xor	$7,$10			# Sigma0(a)
2597	xor	$10,$25,$30
2598#endif
2599	sw	$20,48($29)	# offload to ring buffer
2600	addu	$7,$9
2601	and	$10,$24
2602	addu	$8,$11			# +=K[28]
2603	addu	$7,$10			# +=Maj(a,b,c)
2604	addu	$31,$8
2605	addu	$7,$8
2606	lw	$23,60($29)	# prefetch from ring buffer
2607#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2608	srl	$11,$22,3		# Xupdate(29)
2609	rotr	$9,$22,7
2610	addu	$21,$14			# +=X[i+9]
2611	xor	$11,$9
2612	rotr	$9,$22,18
2613
2614	srl	$12,$19,10
2615	rotr	$10,$19,17
2616	xor	$11,$9			# sigma0(X[i+1])
2617	rotr	$9,$19,19
2618	xor	$12,$10
2619	addu	$21,$11
2620#else
2621	srl	$11,$22,3		# Xupdate(29)
2622	addu	$21,$14			# +=X[i+9]
2623	sll	$10,$22,14
2624	srl	$9,$22,7
2625	xor	$11,$10
2626	sll	$10,11
2627	xor	$11,$9
2628	srl	$9,$22,18
2629	xor	$11,$10
2630
2631	srl	$12,$19,10
2632	xor	$11,$9			# sigma0(X[i+1])
2633	sll	$10,$19,13
2634	addu	$21,$11
2635	srl	$9,$19,17
2636	xor	$12,$10
2637	sll	$10,2
2638	xor	$12,$9
2639	srl	$9,$19,19
2640	xor	$12,$10
2641#endif
2642	xor	$12,$9			# sigma1(X[i+14])
2643	addu	$21,$12
2644#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2645	xor	$12,$1,$2			# 29
2646	rotr	$10,$31,6
2647	addu	$9,$21,$3
2648	rotr	$11,$31,11
2649	and	$12,$31
2650	rotr	$3,$31,25
2651	xor	$10,$11
2652	rotr	$11,$7,2
2653	xor	$12,$2			# Ch(e,f,g)
2654	xor	$10,$3			# Sigma1(e)
2655
2656	rotr	$3,$7,13
2657	addu	$9,$12
2658	lw	$12,116($6)		# K[29]
2659	xor	$3,$11
2660	rotr	$11,$7,22
2661	addu	$9,$10
2662	and	$10,$24,$25
2663	xor	$3,$11			# Sigma0(a)
2664	xor	$11,$24,$25
2665#else
2666	addu	$9,$21,$3			# 29
2667	srl	$3,$31,6
2668	xor	$12,$1,$2
2669	sll	$11,$31,7
2670	and	$12,$31
2671	srl	$10,$31,11
2672	xor	$3,$11
2673	sll	$11,$31,21
2674	xor	$3,$10
2675	srl	$10,$31,25
2676	xor	$3,$11
2677	sll	$11,$31,26
2678	xor	$3,$10
2679	xor	$12,$2			# Ch(e,f,g)
2680	xor	$10,$11,$3			# Sigma1(e)
2681
2682	srl	$3,$7,2
2683	addu	$9,$12
2684	lw	$12,116($6)		# K[29]
2685	sll	$11,$7,10
2686	addu	$9,$10
2687	srl	$10,$7,13
2688	xor	$3,$11
2689	sll	$11,$7,19
2690	xor	$3,$10
2691	srl	$10,$7,22
2692	xor	$3,$11
2693	sll	$11,$7,30
2694	xor	$3,$10
2695	and	$10,$24,$25
2696	xor	$3,$11			# Sigma0(a)
2697	xor	$11,$24,$25
2698#endif
2699	sw	$21,52($29)	# offload to ring buffer
2700	addu	$3,$10
2701	and	$11,$7
2702	addu	$9,$12			# +=K[29]
2703	addu	$3,$11			# +=Maj(a,b,c)
2704	addu	$30,$9
2705	addu	$3,$9
2706	lw	$8,0($29)	# prefetch from ring buffer
2707#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2708	srl	$12,$23,3		# Xupdate(30)
2709	rotr	$10,$23,7
2710	addu	$22,$15			# +=X[i+9]
2711	xor	$12,$10
2712	rotr	$10,$23,18
2713
2714	srl	$13,$20,10
2715	rotr	$11,$20,17
2716	xor	$12,$10			# sigma0(X[i+1])
2717	rotr	$10,$20,19
2718	xor	$13,$11
2719	addu	$22,$12
2720#else
2721	srl	$12,$23,3		# Xupdate(30)
2722	addu	$22,$15			# +=X[i+9]
2723	sll	$11,$23,14
2724	srl	$10,$23,7
2725	xor	$12,$11
2726	sll	$11,11
2727	xor	$12,$10
2728	srl	$10,$23,18
2729	xor	$12,$11
2730
2731	srl	$13,$20,10
2732	xor	$12,$10			# sigma0(X[i+1])
2733	sll	$11,$20,13
2734	addu	$22,$12
2735	srl	$10,$20,17
2736	xor	$13,$11
2737	sll	$11,2
2738	xor	$13,$10
2739	srl	$10,$20,19
2740	xor	$13,$11
2741#endif
2742	xor	$13,$10			# sigma1(X[i+14])
2743	addu	$22,$13
2744#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2745	xor	$13,$31,$1			# 30
2746	rotr	$11,$30,6
2747	addu	$10,$22,$2
2748	rotr	$12,$30,11
2749	and	$13,$30
2750	rotr	$2,$30,25
2751	xor	$11,$12
2752	rotr	$12,$3,2
2753	xor	$13,$1			# Ch(e,f,g)
2754	xor	$11,$2			# Sigma1(e)
2755
2756	rotr	$2,$3,13
2757	addu	$10,$13
2758	lw	$13,120($6)		# K[30]
2759	xor	$2,$12
2760	rotr	$12,$3,22
2761	addu	$10,$11
2762	and	$11,$7,$24
2763	xor	$2,$12			# Sigma0(a)
2764	xor	$12,$7,$24
2765#else
2766	addu	$10,$22,$2			# 30
2767	srl	$2,$30,6
2768	xor	$13,$31,$1
2769	sll	$12,$30,7
2770	and	$13,$30
2771	srl	$11,$30,11
2772	xor	$2,$12
2773	sll	$12,$30,21
2774	xor	$2,$11
2775	srl	$11,$30,25
2776	xor	$2,$12
2777	sll	$12,$30,26
2778	xor	$2,$11
2779	xor	$13,$1			# Ch(e,f,g)
2780	xor	$11,$12,$2			# Sigma1(e)
2781
2782	srl	$2,$3,2
2783	addu	$10,$13
2784	lw	$13,120($6)		# K[30]
2785	sll	$12,$3,10
2786	addu	$10,$11
2787	srl	$11,$3,13
2788	xor	$2,$12
2789	sll	$12,$3,19
2790	xor	$2,$11
2791	srl	$11,$3,22
2792	xor	$2,$12
2793	sll	$12,$3,30
2794	xor	$2,$11
2795	and	$11,$7,$24
2796	xor	$2,$12			# Sigma0(a)
2797	xor	$12,$7,$24
2798#endif
2799	sw	$22,56($29)	# offload to ring buffer
2800	addu	$2,$11
2801	and	$12,$3
2802	addu	$10,$13			# +=K[30]
2803	addu	$2,$12			# +=Maj(a,b,c)
2804	addu	$25,$10
2805	addu	$2,$10
2806	lw	$9,4($29)	# prefetch from ring buffer
2807#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2808	srl	$13,$8,3		# Xupdate(31)
2809	rotr	$11,$8,7
2810	addu	$23,$16			# +=X[i+9]
2811	xor	$13,$11
2812	rotr	$11,$8,18
2813
2814	srl	$14,$21,10
2815	rotr	$12,$21,17
2816	xor	$13,$11			# sigma0(X[i+1])
2817	rotr	$11,$21,19
2818	xor	$14,$12
2819	addu	$23,$13
2820#else
2821	srl	$13,$8,3		# Xupdate(31)
2822	addu	$23,$16			# +=X[i+9]
2823	sll	$12,$8,14
2824	srl	$11,$8,7
2825	xor	$13,$12
2826	sll	$12,11
2827	xor	$13,$11
2828	srl	$11,$8,18
2829	xor	$13,$12
2830
2831	srl	$14,$21,10
2832	xor	$13,$11			# sigma0(X[i+1])
2833	sll	$12,$21,13
2834	addu	$23,$13
2835	srl	$11,$21,17
2836	xor	$14,$12
2837	sll	$12,2
2838	xor	$14,$11
2839	srl	$11,$21,19
2840	xor	$14,$12
2841#endif
2842	xor	$14,$11			# sigma1(X[i+14])
2843	addu	$23,$14
2844#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2845	xor	$14,$30,$31			# 31
2846	rotr	$12,$25,6
2847	addu	$11,$23,$1
2848	rotr	$13,$25,11
2849	and	$14,$25
2850	rotr	$1,$25,25
2851	xor	$12,$13
2852	rotr	$13,$2,2
2853	xor	$14,$31			# Ch(e,f,g)
2854	xor	$12,$1			# Sigma1(e)
2855
2856	rotr	$1,$2,13
2857	addu	$11,$14
2858	lw	$14,124($6)		# K[31]
2859	xor	$1,$13
2860	rotr	$13,$2,22
2861	addu	$11,$12
2862	and	$12,$3,$7
2863	xor	$1,$13			# Sigma0(a)
2864	xor	$13,$3,$7
2865#else
2866	addu	$11,$23,$1			# 31
2867	srl	$1,$25,6
2868	xor	$14,$30,$31
2869	sll	$13,$25,7
2870	and	$14,$25
2871	srl	$12,$25,11
2872	xor	$1,$13
2873	sll	$13,$25,21
2874	xor	$1,$12
2875	srl	$12,$25,25
2876	xor	$1,$13
2877	sll	$13,$25,26
2878	xor	$1,$12
2879	xor	$14,$31			# Ch(e,f,g)
2880	xor	$12,$13,$1			# Sigma1(e)
2881
2882	srl	$1,$2,2
2883	addu	$11,$14
2884	lw	$14,124($6)		# K[31]
2885	sll	$13,$2,10
2886	addu	$11,$12
2887	srl	$12,$2,13
2888	xor	$1,$13
2889	sll	$13,$2,19
2890	xor	$1,$12
2891	srl	$12,$2,22
2892	xor	$1,$13
2893	sll	$13,$2,30
2894	xor	$1,$12
2895	and	$12,$3,$7
2896	xor	$1,$13			# Sigma0(a)
2897	xor	$13,$3,$7
2898#endif
2899	sw	$23,60($29)	# offload to ring buffer
2900	addu	$1,$12
2901	and	$13,$2
2902	addu	$11,$14			# +=K[31]
2903	addu	$1,$13			# +=Maj(a,b,c)
2904	addu	$24,$11
2905	addu	$1,$11
2906	lw	$10,8($29)	# prefetch from ring buffer
2907	and	$14,0xfff
2908	li	$15,2290
2909	.set	noreorder
2910	bne	$14,$15,.L16_xx
2911	add $6,16*4		# Ktbl+=16
2912
2913	lw	$23,16*4($29)	# restore pointer to the end of input
2914	lw	$8,0*4($4)
2915	lw	$9,1*4($4)
2916	lw	$10,2*4($4)
2917	add $5,16*4
2918	lw	$11,3*4($4)
2919	addu	$1,$8
2920	lw	$12,4*4($4)
2921	addu	$2,$9
2922	lw	$13,5*4($4)
2923	addu	$3,$10
2924	lw	$14,6*4($4)
2925	addu	$7,$11
2926	lw	$15,7*4($4)
2927	addu	$24,$12
2928	sw	$1,0*4($4)
2929	addu	$25,$13
2930	sw	$2,1*4($4)
2931	addu	$30,$14
2932	sw	$3,2*4($4)
2933	addu	$31,$15
2934	sw	$7,3*4($4)
2935	sw	$24,4*4($4)
2936	sw	$25,5*4($4)
2937	sw	$30,6*4($4)
2938	sw	$31,7*4($4)
2939
2940	bne	$5,$23,.Loop
2941	sub $6,192	# rewind $6
2942
2943	lw	$31,128-1*4($29)
2944	lw	$30,128-2*4($29)
2945	lw	$23,128-3*4($29)
2946	lw	$22,128-4*4($29)
2947	lw	$21,128-5*4($29)
2948	lw	$20,128-6*4($29)
2949	lw	$19,128-7*4($29)
2950	lw	$18,128-8*4($29)
2951	lw	$17,128-9*4($29)
2952	lw	$16,128-10*4($29)
2953	jr	$31
2954	add $29,128
2955.end	sha256_block_data_order
2956
2957.rdata
2958.align	5
2959K256:
2960	.word	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
2961	.word	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
2962	.word	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
2963	.word	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
2964	.word	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
2965	.word	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
2966	.word	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
2967	.word	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
2968	.word	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
2969	.word	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
2970	.word	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
2971	.word	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
2972	.word	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
2973	.word	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
2974	.word	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
2975	.word	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
2976.asciiz	"SHA256 for MIPS, CRYPTOGAMS by <appro@openssl.org>"
2977.align	5
2978
2979