xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/sha512-mips64.S (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1#ifdef OPENSSL_FIPSCANISTER
2# include <openssl/fipssyms.h>
3#endif
4
5#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
6#define _MIPS_ARCH_MIPS32R2
7#endif
8
9.text
10.set	noat
11#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
12.option	pic2
13#endif
14
15.align	5
16.globl	sha256_block_data_order
17.ent	sha256_block_data_order
18sha256_block_data_order:
19	.frame	$29,192,$31
20	.mask	0xc0ff0000,-8
21	.set	noreorder
22	dsub $29,192
23	sd	$31,192-1*8($29)
24	sd	$30,192-2*8($29)
25	sd	$23,192-3*8($29)
26	sd	$22,192-4*8($29)
27	sd	$21,192-5*8($29)
28	sd	$20,192-6*8($29)
29	sd	$19,192-7*8($29)
30	sd	$18,192-8*8($29)
31	sd	$17,192-9*8($29)
32	sd	$16,192-10*8($29)
33	dsll $23,$6,6
34	.cplocal	$6
35	.cpsetup	$25,$0,sha256_block_data_order
36	.set	reorder
37	dla	$6,K256		# PIC-ified 'load address'
38
39	lw	$1,0*4($4)		# load context
40	lw	$2,1*4($4)
41	lw	$3,2*4($4)
42	lw	$7,3*4($4)
43	lw	$24,4*4($4)
44	lw	$25,5*4($4)
45	lw	$30,6*4($4)
46	lw	$31,7*4($4)
47
48	dadd $23,$5		# pointer to the end of input
49	sd	$23,16*4($29)
50	b	.Loop
51
52.align	5
53.Loop:
54	lwl	$8,3($5)
55	lwr	$8,0($5)
56	lwl	$9,7($5)
57	lwr	$9,4($5)
58#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
59	wsbh	$8,$8		# byte swap(0)
60	rotr	$8,$8,16
61#else
62	srl	$13,$8,24		# byte swap(0)
63	srl	$14,$8,8
64	andi	$15,$8,0xFF00
65	sll	$8,$8,24
66	andi	$14,0xFF00
67	sll	$15,$15,8
68	or	$8,$13
69	or	$14,$15
70	or	$8,$14
71#endif
72#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
73	xor	$15,$25,$30			# 0
74	rotr	$13,$24,6
75	addu	$12,$8,$31
76	rotr	$14,$24,11
77	and	$15,$24
78	rotr	$31,$24,25
79	xor	$13,$14
80	rotr	$14,$1,2
81	xor	$15,$30			# Ch(e,f,g)
82	xor	$13,$31			# Sigma1(e)
83
84	rotr	$31,$1,13
85	addu	$12,$15
86	lw	$15,0($6)		# K[0]
87	xor	$31,$14
88	rotr	$14,$1,22
89	addu	$12,$13
90	and	$13,$2,$3
91	xor	$31,$14			# Sigma0(a)
92	xor	$14,$2,$3
93#else
94	addu	$12,$8,$31			# 0
95	srl	$31,$24,6
96	xor	$15,$25,$30
97	sll	$14,$24,7
98	and	$15,$24
99	srl	$13,$24,11
100	xor	$31,$14
101	sll	$14,$24,21
102	xor	$31,$13
103	srl	$13,$24,25
104	xor	$31,$14
105	sll	$14,$24,26
106	xor	$31,$13
107	xor	$15,$30			# Ch(e,f,g)
108	xor	$13,$14,$31			# Sigma1(e)
109
110	srl	$31,$1,2
111	addu	$12,$15
112	lw	$15,0($6)		# K[0]
113	sll	$14,$1,10
114	addu	$12,$13
115	srl	$13,$1,13
116	xor	$31,$14
117	sll	$14,$1,19
118	xor	$31,$13
119	srl	$13,$1,22
120	xor	$31,$14
121	sll	$14,$1,30
122	xor	$31,$13
123	and	$13,$2,$3
124	xor	$31,$14			# Sigma0(a)
125	xor	$14,$2,$3
126#endif
127	sw	$8,0($29)	# offload to ring buffer
128	addu	$31,$13
129	and	$14,$1
130	addu	$12,$15			# +=K[0]
131	addu	$31,$14			# +=Maj(a,b,c)
132	addu	$7,$12
133	addu	$31,$12
134	lwl	$10,11($5)
135	lwr	$10,8($5)
136#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
137	wsbh	$9,$9		# byte swap(1)
138	rotr	$9,$9,16
139#else
140	srl	$14,$9,24		# byte swap(1)
141	srl	$15,$9,8
142	andi	$16,$9,0xFF00
143	sll	$9,$9,24
144	andi	$15,0xFF00
145	sll	$16,$16,8
146	or	$9,$14
147	or	$15,$16
148	or	$9,$15
149#endif
150#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
151	xor	$16,$24,$25			# 1
152	rotr	$14,$7,6
153	addu	$13,$9,$30
154	rotr	$15,$7,11
155	and	$16,$7
156	rotr	$30,$7,25
157	xor	$14,$15
158	rotr	$15,$31,2
159	xor	$16,$25			# Ch(e,f,g)
160	xor	$14,$30			# Sigma1(e)
161
162	rotr	$30,$31,13
163	addu	$13,$16
164	lw	$16,4($6)		# K[1]
165	xor	$30,$15
166	rotr	$15,$31,22
167	addu	$13,$14
168	and	$14,$1,$2
169	xor	$30,$15			# Sigma0(a)
170	xor	$15,$1,$2
171#else
172	addu	$13,$9,$30			# 1
173	srl	$30,$7,6
174	xor	$16,$24,$25
175	sll	$15,$7,7
176	and	$16,$7
177	srl	$14,$7,11
178	xor	$30,$15
179	sll	$15,$7,21
180	xor	$30,$14
181	srl	$14,$7,25
182	xor	$30,$15
183	sll	$15,$7,26
184	xor	$30,$14
185	xor	$16,$25			# Ch(e,f,g)
186	xor	$14,$15,$30			# Sigma1(e)
187
188	srl	$30,$31,2
189	addu	$13,$16
190	lw	$16,4($6)		# K[1]
191	sll	$15,$31,10
192	addu	$13,$14
193	srl	$14,$31,13
194	xor	$30,$15
195	sll	$15,$31,19
196	xor	$30,$14
197	srl	$14,$31,22
198	xor	$30,$15
199	sll	$15,$31,30
200	xor	$30,$14
201	and	$14,$1,$2
202	xor	$30,$15			# Sigma0(a)
203	xor	$15,$1,$2
204#endif
205	sw	$9,4($29)	# offload to ring buffer
206	addu	$30,$14
207	and	$15,$31
208	addu	$13,$16			# +=K[1]
209	addu	$30,$15			# +=Maj(a,b,c)
210	addu	$3,$13
211	addu	$30,$13
212	lwl	$11,15($5)
213	lwr	$11,12($5)
214#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
215	wsbh	$10,$10		# byte swap(2)
216	rotr	$10,$10,16
217#else
218	srl	$15,$10,24		# byte swap(2)
219	srl	$16,$10,8
220	andi	$17,$10,0xFF00
221	sll	$10,$10,24
222	andi	$16,0xFF00
223	sll	$17,$17,8
224	or	$10,$15
225	or	$16,$17
226	or	$10,$16
227#endif
228#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
229	xor	$17,$7,$24			# 2
230	rotr	$15,$3,6
231	addu	$14,$10,$25
232	rotr	$16,$3,11
233	and	$17,$3
234	rotr	$25,$3,25
235	xor	$15,$16
236	rotr	$16,$30,2
237	xor	$17,$24			# Ch(e,f,g)
238	xor	$15,$25			# Sigma1(e)
239
240	rotr	$25,$30,13
241	addu	$14,$17
242	lw	$17,8($6)		# K[2]
243	xor	$25,$16
244	rotr	$16,$30,22
245	addu	$14,$15
246	and	$15,$31,$1
247	xor	$25,$16			# Sigma0(a)
248	xor	$16,$31,$1
249#else
250	addu	$14,$10,$25			# 2
251	srl	$25,$3,6
252	xor	$17,$7,$24
253	sll	$16,$3,7
254	and	$17,$3
255	srl	$15,$3,11
256	xor	$25,$16
257	sll	$16,$3,21
258	xor	$25,$15
259	srl	$15,$3,25
260	xor	$25,$16
261	sll	$16,$3,26
262	xor	$25,$15
263	xor	$17,$24			# Ch(e,f,g)
264	xor	$15,$16,$25			# Sigma1(e)
265
266	srl	$25,$30,2
267	addu	$14,$17
268	lw	$17,8($6)		# K[2]
269	sll	$16,$30,10
270	addu	$14,$15
271	srl	$15,$30,13
272	xor	$25,$16
273	sll	$16,$30,19
274	xor	$25,$15
275	srl	$15,$30,22
276	xor	$25,$16
277	sll	$16,$30,30
278	xor	$25,$15
279	and	$15,$31,$1
280	xor	$25,$16			# Sigma0(a)
281	xor	$16,$31,$1
282#endif
283	sw	$10,8($29)	# offload to ring buffer
284	addu	$25,$15
285	and	$16,$30
286	addu	$14,$17			# +=K[2]
287	addu	$25,$16			# +=Maj(a,b,c)
288	addu	$2,$14
289	addu	$25,$14
290	lwl	$12,19($5)
291	lwr	$12,16($5)
292#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
293	wsbh	$11,$11		# byte swap(3)
294	rotr	$11,$11,16
295#else
296	srl	$16,$11,24		# byte swap(3)
297	srl	$17,$11,8
298	andi	$18,$11,0xFF00
299	sll	$11,$11,24
300	andi	$17,0xFF00
301	sll	$18,$18,8
302	or	$11,$16
303	or	$17,$18
304	or	$11,$17
305#endif
306#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
307	xor	$18,$3,$7			# 3
308	rotr	$16,$2,6
309	addu	$15,$11,$24
310	rotr	$17,$2,11
311	and	$18,$2
312	rotr	$24,$2,25
313	xor	$16,$17
314	rotr	$17,$25,2
315	xor	$18,$7			# Ch(e,f,g)
316	xor	$16,$24			# Sigma1(e)
317
318	rotr	$24,$25,13
319	addu	$15,$18
320	lw	$18,12($6)		# K[3]
321	xor	$24,$17
322	rotr	$17,$25,22
323	addu	$15,$16
324	and	$16,$30,$31
325	xor	$24,$17			# Sigma0(a)
326	xor	$17,$30,$31
327#else
328	addu	$15,$11,$24			# 3
329	srl	$24,$2,6
330	xor	$18,$3,$7
331	sll	$17,$2,7
332	and	$18,$2
333	srl	$16,$2,11
334	xor	$24,$17
335	sll	$17,$2,21
336	xor	$24,$16
337	srl	$16,$2,25
338	xor	$24,$17
339	sll	$17,$2,26
340	xor	$24,$16
341	xor	$18,$7			# Ch(e,f,g)
342	xor	$16,$17,$24			# Sigma1(e)
343
344	srl	$24,$25,2
345	addu	$15,$18
346	lw	$18,12($6)		# K[3]
347	sll	$17,$25,10
348	addu	$15,$16
349	srl	$16,$25,13
350	xor	$24,$17
351	sll	$17,$25,19
352	xor	$24,$16
353	srl	$16,$25,22
354	xor	$24,$17
355	sll	$17,$25,30
356	xor	$24,$16
357	and	$16,$30,$31
358	xor	$24,$17			# Sigma0(a)
359	xor	$17,$30,$31
360#endif
361	sw	$11,12($29)	# offload to ring buffer
362	addu	$24,$16
363	and	$17,$25
364	addu	$15,$18			# +=K[3]
365	addu	$24,$17			# +=Maj(a,b,c)
366	addu	$1,$15
367	addu	$24,$15
368	lwl	$13,23($5)
369	lwr	$13,20($5)
370#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
371	wsbh	$12,$12		# byte swap(4)
372	rotr	$12,$12,16
373#else
374	srl	$17,$12,24		# byte swap(4)
375	srl	$18,$12,8
376	andi	$19,$12,0xFF00
377	sll	$12,$12,24
378	andi	$18,0xFF00
379	sll	$19,$19,8
380	or	$12,$17
381	or	$18,$19
382	or	$12,$18
383#endif
384#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
385	xor	$19,$2,$3			# 4
386	rotr	$17,$1,6
387	addu	$16,$12,$7
388	rotr	$18,$1,11
389	and	$19,$1
390	rotr	$7,$1,25
391	xor	$17,$18
392	rotr	$18,$24,2
393	xor	$19,$3			# Ch(e,f,g)
394	xor	$17,$7			# Sigma1(e)
395
396	rotr	$7,$24,13
397	addu	$16,$19
398	lw	$19,16($6)		# K[4]
399	xor	$7,$18
400	rotr	$18,$24,22
401	addu	$16,$17
402	and	$17,$25,$30
403	xor	$7,$18			# Sigma0(a)
404	xor	$18,$25,$30
405#else
406	addu	$16,$12,$7			# 4
407	srl	$7,$1,6
408	xor	$19,$2,$3
409	sll	$18,$1,7
410	and	$19,$1
411	srl	$17,$1,11
412	xor	$7,$18
413	sll	$18,$1,21
414	xor	$7,$17
415	srl	$17,$1,25
416	xor	$7,$18
417	sll	$18,$1,26
418	xor	$7,$17
419	xor	$19,$3			# Ch(e,f,g)
420	xor	$17,$18,$7			# Sigma1(e)
421
422	srl	$7,$24,2
423	addu	$16,$19
424	lw	$19,16($6)		# K[4]
425	sll	$18,$24,10
426	addu	$16,$17
427	srl	$17,$24,13
428	xor	$7,$18
429	sll	$18,$24,19
430	xor	$7,$17
431	srl	$17,$24,22
432	xor	$7,$18
433	sll	$18,$24,30
434	xor	$7,$17
435	and	$17,$25,$30
436	xor	$7,$18			# Sigma0(a)
437	xor	$18,$25,$30
438#endif
439	sw	$12,16($29)	# offload to ring buffer
440	addu	$7,$17
441	and	$18,$24
442	addu	$16,$19			# +=K[4]
443	addu	$7,$18			# +=Maj(a,b,c)
444	addu	$31,$16
445	addu	$7,$16
446	lwl	$14,27($5)
447	lwr	$14,24($5)
448#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
449	wsbh	$13,$13		# byte swap(5)
450	rotr	$13,$13,16
451#else
452	srl	$18,$13,24		# byte swap(5)
453	srl	$19,$13,8
454	andi	$20,$13,0xFF00
455	sll	$13,$13,24
456	andi	$19,0xFF00
457	sll	$20,$20,8
458	or	$13,$18
459	or	$19,$20
460	or	$13,$19
461#endif
462#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
463	xor	$20,$1,$2			# 5
464	rotr	$18,$31,6
465	addu	$17,$13,$3
466	rotr	$19,$31,11
467	and	$20,$31
468	rotr	$3,$31,25
469	xor	$18,$19
470	rotr	$19,$7,2
471	xor	$20,$2			# Ch(e,f,g)
472	xor	$18,$3			# Sigma1(e)
473
474	rotr	$3,$7,13
475	addu	$17,$20
476	lw	$20,20($6)		# K[5]
477	xor	$3,$19
478	rotr	$19,$7,22
479	addu	$17,$18
480	and	$18,$24,$25
481	xor	$3,$19			# Sigma0(a)
482	xor	$19,$24,$25
483#else
484	addu	$17,$13,$3			# 5
485	srl	$3,$31,6
486	xor	$20,$1,$2
487	sll	$19,$31,7
488	and	$20,$31
489	srl	$18,$31,11
490	xor	$3,$19
491	sll	$19,$31,21
492	xor	$3,$18
493	srl	$18,$31,25
494	xor	$3,$19
495	sll	$19,$31,26
496	xor	$3,$18
497	xor	$20,$2			# Ch(e,f,g)
498	xor	$18,$19,$3			# Sigma1(e)
499
500	srl	$3,$7,2
501	addu	$17,$20
502	lw	$20,20($6)		# K[5]
503	sll	$19,$7,10
504	addu	$17,$18
505	srl	$18,$7,13
506	xor	$3,$19
507	sll	$19,$7,19
508	xor	$3,$18
509	srl	$18,$7,22
510	xor	$3,$19
511	sll	$19,$7,30
512	xor	$3,$18
513	and	$18,$24,$25
514	xor	$3,$19			# Sigma0(a)
515	xor	$19,$24,$25
516#endif
517	sw	$13,20($29)	# offload to ring buffer
518	addu	$3,$18
519	and	$19,$7
520	addu	$17,$20			# +=K[5]
521	addu	$3,$19			# +=Maj(a,b,c)
522	addu	$30,$17
523	addu	$3,$17
524	lwl	$15,31($5)
525	lwr	$15,28($5)
526#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
527	wsbh	$14,$14		# byte swap(6)
528	rotr	$14,$14,16
529#else
530	srl	$19,$14,24		# byte swap(6)
531	srl	$20,$14,8
532	andi	$21,$14,0xFF00
533	sll	$14,$14,24
534	andi	$20,0xFF00
535	sll	$21,$21,8
536	or	$14,$19
537	or	$20,$21
538	or	$14,$20
539#endif
540#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
541	xor	$21,$31,$1			# 6
542	rotr	$19,$30,6
543	addu	$18,$14,$2
544	rotr	$20,$30,11
545	and	$21,$30
546	rotr	$2,$30,25
547	xor	$19,$20
548	rotr	$20,$3,2
549	xor	$21,$1			# Ch(e,f,g)
550	xor	$19,$2			# Sigma1(e)
551
552	rotr	$2,$3,13
553	addu	$18,$21
554	lw	$21,24($6)		# K[6]
555	xor	$2,$20
556	rotr	$20,$3,22
557	addu	$18,$19
558	and	$19,$7,$24
559	xor	$2,$20			# Sigma0(a)
560	xor	$20,$7,$24
561#else
562	addu	$18,$14,$2			# 6
563	srl	$2,$30,6
564	xor	$21,$31,$1
565	sll	$20,$30,7
566	and	$21,$30
567	srl	$19,$30,11
568	xor	$2,$20
569	sll	$20,$30,21
570	xor	$2,$19
571	srl	$19,$30,25
572	xor	$2,$20
573	sll	$20,$30,26
574	xor	$2,$19
575	xor	$21,$1			# Ch(e,f,g)
576	xor	$19,$20,$2			# Sigma1(e)
577
578	srl	$2,$3,2
579	addu	$18,$21
580	lw	$21,24($6)		# K[6]
581	sll	$20,$3,10
582	addu	$18,$19
583	srl	$19,$3,13
584	xor	$2,$20
585	sll	$20,$3,19
586	xor	$2,$19
587	srl	$19,$3,22
588	xor	$2,$20
589	sll	$20,$3,30
590	xor	$2,$19
591	and	$19,$7,$24
592	xor	$2,$20			# Sigma0(a)
593	xor	$20,$7,$24
594#endif
595	sw	$14,24($29)	# offload to ring buffer
596	addu	$2,$19
597	and	$20,$3
598	addu	$18,$21			# +=K[6]
599	addu	$2,$20			# +=Maj(a,b,c)
600	addu	$25,$18
601	addu	$2,$18
602	lwl	$16,35($5)
603	lwr	$16,32($5)
604#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
605	wsbh	$15,$15		# byte swap(7)
606	rotr	$15,$15,16
607#else
608	srl	$20,$15,24		# byte swap(7)
609	srl	$21,$15,8
610	andi	$22,$15,0xFF00
611	sll	$15,$15,24
612	andi	$21,0xFF00
613	sll	$22,$22,8
614	or	$15,$20
615	or	$21,$22
616	or	$15,$21
617#endif
618#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
619	xor	$22,$30,$31			# 7
620	rotr	$20,$25,6
621	addu	$19,$15,$1
622	rotr	$21,$25,11
623	and	$22,$25
624	rotr	$1,$25,25
625	xor	$20,$21
626	rotr	$21,$2,2
627	xor	$22,$31			# Ch(e,f,g)
628	xor	$20,$1			# Sigma1(e)
629
630	rotr	$1,$2,13
631	addu	$19,$22
632	lw	$22,28($6)		# K[7]
633	xor	$1,$21
634	rotr	$21,$2,22
635	addu	$19,$20
636	and	$20,$3,$7
637	xor	$1,$21			# Sigma0(a)
638	xor	$21,$3,$7
639#else
640	addu	$19,$15,$1			# 7
641	srl	$1,$25,6
642	xor	$22,$30,$31
643	sll	$21,$25,7
644	and	$22,$25
645	srl	$20,$25,11
646	xor	$1,$21
647	sll	$21,$25,21
648	xor	$1,$20
649	srl	$20,$25,25
650	xor	$1,$21
651	sll	$21,$25,26
652	xor	$1,$20
653	xor	$22,$31			# Ch(e,f,g)
654	xor	$20,$21,$1			# Sigma1(e)
655
656	srl	$1,$2,2
657	addu	$19,$22
658	lw	$22,28($6)		# K[7]
659	sll	$21,$2,10
660	addu	$19,$20
661	srl	$20,$2,13
662	xor	$1,$21
663	sll	$21,$2,19
664	xor	$1,$20
665	srl	$20,$2,22
666	xor	$1,$21
667	sll	$21,$2,30
668	xor	$1,$20
669	and	$20,$3,$7
670	xor	$1,$21			# Sigma0(a)
671	xor	$21,$3,$7
672#endif
673	sw	$15,28($29)	# offload to ring buffer
674	addu	$1,$20
675	and	$21,$2
676	addu	$19,$22			# +=K[7]
677	addu	$1,$21			# +=Maj(a,b,c)
678	addu	$24,$19
679	addu	$1,$19
680	lwl	$17,39($5)
681	lwr	$17,36($5)
682#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
683	wsbh	$16,$16		# byte swap(8)
684	rotr	$16,$16,16
685#else
686	srl	$21,$16,24		# byte swap(8)
687	srl	$22,$16,8
688	andi	$23,$16,0xFF00
689	sll	$16,$16,24
690	andi	$22,0xFF00
691	sll	$23,$23,8
692	or	$16,$21
693	or	$22,$23
694	or	$16,$22
695#endif
696#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
697	xor	$23,$25,$30			# 8
698	rotr	$21,$24,6
699	addu	$20,$16,$31
700	rotr	$22,$24,11
701	and	$23,$24
702	rotr	$31,$24,25
703	xor	$21,$22
704	rotr	$22,$1,2
705	xor	$23,$30			# Ch(e,f,g)
706	xor	$21,$31			# Sigma1(e)
707
708	rotr	$31,$1,13
709	addu	$20,$23
710	lw	$23,32($6)		# K[8]
711	xor	$31,$22
712	rotr	$22,$1,22
713	addu	$20,$21
714	and	$21,$2,$3
715	xor	$31,$22			# Sigma0(a)
716	xor	$22,$2,$3
717#else
718	addu	$20,$16,$31			# 8
719	srl	$31,$24,6
720	xor	$23,$25,$30
721	sll	$22,$24,7
722	and	$23,$24
723	srl	$21,$24,11
724	xor	$31,$22
725	sll	$22,$24,21
726	xor	$31,$21
727	srl	$21,$24,25
728	xor	$31,$22
729	sll	$22,$24,26
730	xor	$31,$21
731	xor	$23,$30			# Ch(e,f,g)
732	xor	$21,$22,$31			# Sigma1(e)
733
734	srl	$31,$1,2
735	addu	$20,$23
736	lw	$23,32($6)		# K[8]
737	sll	$22,$1,10
738	addu	$20,$21
739	srl	$21,$1,13
740	xor	$31,$22
741	sll	$22,$1,19
742	xor	$31,$21
743	srl	$21,$1,22
744	xor	$31,$22
745	sll	$22,$1,30
746	xor	$31,$21
747	and	$21,$2,$3
748	xor	$31,$22			# Sigma0(a)
749	xor	$22,$2,$3
750#endif
751	sw	$16,32($29)	# offload to ring buffer
752	addu	$31,$21
753	and	$22,$1
754	addu	$20,$23			# +=K[8]
755	addu	$31,$22			# +=Maj(a,b,c)
756	addu	$7,$20
757	addu	$31,$20
758	lwl	$18,43($5)
759	lwr	$18,40($5)
760#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
761	wsbh	$17,$17		# byte swap(9)
762	rotr	$17,$17,16
763#else
764	srl	$22,$17,24		# byte swap(9)
765	srl	$23,$17,8
766	andi	$8,$17,0xFF00
767	sll	$17,$17,24
768	andi	$23,0xFF00
769	sll	$8,$8,8
770	or	$17,$22
771	or	$23,$8
772	or	$17,$23
773#endif
774#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
775	xor	$8,$24,$25			# 9
776	rotr	$22,$7,6
777	addu	$21,$17,$30
778	rotr	$23,$7,11
779	and	$8,$7
780	rotr	$30,$7,25
781	xor	$22,$23
782	rotr	$23,$31,2
783	xor	$8,$25			# Ch(e,f,g)
784	xor	$22,$30			# Sigma1(e)
785
786	rotr	$30,$31,13
787	addu	$21,$8
788	lw	$8,36($6)		# K[9]
789	xor	$30,$23
790	rotr	$23,$31,22
791	addu	$21,$22
792	and	$22,$1,$2
793	xor	$30,$23			# Sigma0(a)
794	xor	$23,$1,$2
795#else
796	addu	$21,$17,$30			# 9
797	srl	$30,$7,6
798	xor	$8,$24,$25
799	sll	$23,$7,7
800	and	$8,$7
801	srl	$22,$7,11
802	xor	$30,$23
803	sll	$23,$7,21
804	xor	$30,$22
805	srl	$22,$7,25
806	xor	$30,$23
807	sll	$23,$7,26
808	xor	$30,$22
809	xor	$8,$25			# Ch(e,f,g)
810	xor	$22,$23,$30			# Sigma1(e)
811
812	srl	$30,$31,2
813	addu	$21,$8
814	lw	$8,36($6)		# K[9]
815	sll	$23,$31,10
816	addu	$21,$22
817	srl	$22,$31,13
818	xor	$30,$23
819	sll	$23,$31,19
820	xor	$30,$22
821	srl	$22,$31,22
822	xor	$30,$23
823	sll	$23,$31,30
824	xor	$30,$22
825	and	$22,$1,$2
826	xor	$30,$23			# Sigma0(a)
827	xor	$23,$1,$2
828#endif
829	sw	$17,36($29)	# offload to ring buffer
830	addu	$30,$22
831	and	$23,$31
832	addu	$21,$8			# +=K[9]
833	addu	$30,$23			# +=Maj(a,b,c)
834	addu	$3,$21
835	addu	$30,$21
836	lwl	$19,47($5)
837	lwr	$19,44($5)
838#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
839	wsbh	$18,$18		# byte swap(10)
840	rotr	$18,$18,16
841#else
842	srl	$23,$18,24		# byte swap(10)
843	srl	$8,$18,8
844	andi	$9,$18,0xFF00
845	sll	$18,$18,24
846	andi	$8,0xFF00
847	sll	$9,$9,8
848	or	$18,$23
849	or	$8,$9
850	or	$18,$8
851#endif
852#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
853	xor	$9,$7,$24			# 10
854	rotr	$23,$3,6
855	addu	$22,$18,$25
856	rotr	$8,$3,11
857	and	$9,$3
858	rotr	$25,$3,25
859	xor	$23,$8
860	rotr	$8,$30,2
861	xor	$9,$24			# Ch(e,f,g)
862	xor	$23,$25			# Sigma1(e)
863
864	rotr	$25,$30,13
865	addu	$22,$9
866	lw	$9,40($6)		# K[10]
867	xor	$25,$8
868	rotr	$8,$30,22
869	addu	$22,$23
870	and	$23,$31,$1
871	xor	$25,$8			# Sigma0(a)
872	xor	$8,$31,$1
873#else
874	addu	$22,$18,$25			# 10
875	srl	$25,$3,6
876	xor	$9,$7,$24
877	sll	$8,$3,7
878	and	$9,$3
879	srl	$23,$3,11
880	xor	$25,$8
881	sll	$8,$3,21
882	xor	$25,$23
883	srl	$23,$3,25
884	xor	$25,$8
885	sll	$8,$3,26
886	xor	$25,$23
887	xor	$9,$24			# Ch(e,f,g)
888	xor	$23,$8,$25			# Sigma1(e)
889
890	srl	$25,$30,2
891	addu	$22,$9
892	lw	$9,40($6)		# K[10]
893	sll	$8,$30,10
894	addu	$22,$23
895	srl	$23,$30,13
896	xor	$25,$8
897	sll	$8,$30,19
898	xor	$25,$23
899	srl	$23,$30,22
900	xor	$25,$8
901	sll	$8,$30,30
902	xor	$25,$23
903	and	$23,$31,$1
904	xor	$25,$8			# Sigma0(a)
905	xor	$8,$31,$1
906#endif
907	sw	$18,40($29)	# offload to ring buffer
908	addu	$25,$23
909	and	$8,$30
910	addu	$22,$9			# +=K[10]
911	addu	$25,$8			# +=Maj(a,b,c)
912	addu	$2,$22
913	addu	$25,$22
914	lwl	$20,51($5)
915	lwr	$20,48($5)
916#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
917	wsbh	$19,$19		# byte swap(11)
918	rotr	$19,$19,16
919#else
920	srl	$8,$19,24		# byte swap(11)
921	srl	$9,$19,8
922	andi	$10,$19,0xFF00
923	sll	$19,$19,24
924	andi	$9,0xFF00
925	sll	$10,$10,8
926	or	$19,$8
927	or	$9,$10
928	or	$19,$9
929#endif
930#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
931	xor	$10,$3,$7			# 11
932	rotr	$8,$2,6
933	addu	$23,$19,$24
934	rotr	$9,$2,11
935	and	$10,$2
936	rotr	$24,$2,25
937	xor	$8,$9
938	rotr	$9,$25,2
939	xor	$10,$7			# Ch(e,f,g)
940	xor	$8,$24			# Sigma1(e)
941
942	rotr	$24,$25,13
943	addu	$23,$10
944	lw	$10,44($6)		# K[11]
945	xor	$24,$9
946	rotr	$9,$25,22
947	addu	$23,$8
948	and	$8,$30,$31
949	xor	$24,$9			# Sigma0(a)
950	xor	$9,$30,$31
951#else
952	addu	$23,$19,$24			# 11
953	srl	$24,$2,6
954	xor	$10,$3,$7
955	sll	$9,$2,7
956	and	$10,$2
957	srl	$8,$2,11
958	xor	$24,$9
959	sll	$9,$2,21
960	xor	$24,$8
961	srl	$8,$2,25
962	xor	$24,$9
963	sll	$9,$2,26
964	xor	$24,$8
965	xor	$10,$7			# Ch(e,f,g)
966	xor	$8,$9,$24			# Sigma1(e)
967
968	srl	$24,$25,2
969	addu	$23,$10
970	lw	$10,44($6)		# K[11]
971	sll	$9,$25,10
972	addu	$23,$8
973	srl	$8,$25,13
974	xor	$24,$9
975	sll	$9,$25,19
976	xor	$24,$8
977	srl	$8,$25,22
978	xor	$24,$9
979	sll	$9,$25,30
980	xor	$24,$8
981	and	$8,$30,$31
982	xor	$24,$9			# Sigma0(a)
983	xor	$9,$30,$31
984#endif
985	sw	$19,44($29)	# offload to ring buffer
986	addu	$24,$8
987	and	$9,$25
988	addu	$23,$10			# +=K[11]
989	addu	$24,$9			# +=Maj(a,b,c)
990	addu	$1,$23
991	addu	$24,$23
992	lwl	$21,55($5)
993	lwr	$21,52($5)
994#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
995	wsbh	$20,$20		# byte swap(12)
996	rotr	$20,$20,16
997#else
998	srl	$9,$20,24		# byte swap(12)
999	srl	$10,$20,8
1000	andi	$11,$20,0xFF00
1001	sll	$20,$20,24
1002	andi	$10,0xFF00
1003	sll	$11,$11,8
1004	or	$20,$9
1005	or	$10,$11
1006	or	$20,$10
1007#endif
1008#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1009	xor	$11,$2,$3			# 12
1010	rotr	$9,$1,6
1011	addu	$8,$20,$7
1012	rotr	$10,$1,11
1013	and	$11,$1
1014	rotr	$7,$1,25
1015	xor	$9,$10
1016	rotr	$10,$24,2
1017	xor	$11,$3			# Ch(e,f,g)
1018	xor	$9,$7			# Sigma1(e)
1019
1020	rotr	$7,$24,13
1021	addu	$8,$11
1022	lw	$11,48($6)		# K[12]
1023	xor	$7,$10
1024	rotr	$10,$24,22
1025	addu	$8,$9
1026	and	$9,$25,$30
1027	xor	$7,$10			# Sigma0(a)
1028	xor	$10,$25,$30
1029#else
1030	addu	$8,$20,$7			# 12
1031	srl	$7,$1,6
1032	xor	$11,$2,$3
1033	sll	$10,$1,7
1034	and	$11,$1
1035	srl	$9,$1,11
1036	xor	$7,$10
1037	sll	$10,$1,21
1038	xor	$7,$9
1039	srl	$9,$1,25
1040	xor	$7,$10
1041	sll	$10,$1,26
1042	xor	$7,$9
1043	xor	$11,$3			# Ch(e,f,g)
1044	xor	$9,$10,$7			# Sigma1(e)
1045
1046	srl	$7,$24,2
1047	addu	$8,$11
1048	lw	$11,48($6)		# K[12]
1049	sll	$10,$24,10
1050	addu	$8,$9
1051	srl	$9,$24,13
1052	xor	$7,$10
1053	sll	$10,$24,19
1054	xor	$7,$9
1055	srl	$9,$24,22
1056	xor	$7,$10
1057	sll	$10,$24,30
1058	xor	$7,$9
1059	and	$9,$25,$30
1060	xor	$7,$10			# Sigma0(a)
1061	xor	$10,$25,$30
1062#endif
1063	sw	$20,48($29)	# offload to ring buffer
1064	addu	$7,$9
1065	and	$10,$24
1066	addu	$8,$11			# +=K[12]
1067	addu	$7,$10			# +=Maj(a,b,c)
1068	addu	$31,$8
1069	addu	$7,$8
1070	lwl	$22,59($5)
1071	lwr	$22,56($5)
1072#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1073	wsbh	$21,$21		# byte swap(13)
1074	rotr	$21,$21,16
1075#else
1076	srl	$10,$21,24		# byte swap(13)
1077	srl	$11,$21,8
1078	andi	$12,$21,0xFF00
1079	sll	$21,$21,24
1080	andi	$11,0xFF00
1081	sll	$12,$12,8
1082	or	$21,$10
1083	or	$11,$12
1084	or	$21,$11
1085#endif
1086#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1087	xor	$12,$1,$2			# 13
1088	rotr	$10,$31,6
1089	addu	$9,$21,$3
1090	rotr	$11,$31,11
1091	and	$12,$31
1092	rotr	$3,$31,25
1093	xor	$10,$11
1094	rotr	$11,$7,2
1095	xor	$12,$2			# Ch(e,f,g)
1096	xor	$10,$3			# Sigma1(e)
1097
1098	rotr	$3,$7,13
1099	addu	$9,$12
1100	lw	$12,52($6)		# K[13]
1101	xor	$3,$11
1102	rotr	$11,$7,22
1103	addu	$9,$10
1104	and	$10,$24,$25
1105	xor	$3,$11			# Sigma0(a)
1106	xor	$11,$24,$25
1107#else
1108	addu	$9,$21,$3			# 13
1109	srl	$3,$31,6
1110	xor	$12,$1,$2
1111	sll	$11,$31,7
1112	and	$12,$31
1113	srl	$10,$31,11
1114	xor	$3,$11
1115	sll	$11,$31,21
1116	xor	$3,$10
1117	srl	$10,$31,25
1118	xor	$3,$11
1119	sll	$11,$31,26
1120	xor	$3,$10
1121	xor	$12,$2			# Ch(e,f,g)
1122	xor	$10,$11,$3			# Sigma1(e)
1123
1124	srl	$3,$7,2
1125	addu	$9,$12
1126	lw	$12,52($6)		# K[13]
1127	sll	$11,$7,10
1128	addu	$9,$10
1129	srl	$10,$7,13
1130	xor	$3,$11
1131	sll	$11,$7,19
1132	xor	$3,$10
1133	srl	$10,$7,22
1134	xor	$3,$11
1135	sll	$11,$7,30
1136	xor	$3,$10
1137	and	$10,$24,$25
1138	xor	$3,$11			# Sigma0(a)
1139	xor	$11,$24,$25
1140#endif
1141	sw	$21,52($29)	# offload to ring buffer
1142	addu	$3,$10
1143	and	$11,$7
1144	addu	$9,$12			# +=K[13]
1145	addu	$3,$11			# +=Maj(a,b,c)
1146	addu	$30,$9
1147	addu	$3,$9
1148	lw	$8,0($29)	# prefetch from ring buffer
1149	lwl	$23,63($5)
1150	lwr	$23,60($5)
1151#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1152	wsbh	$22,$22		# byte swap(14)
1153	rotr	$22,$22,16
1154#else
1155	srl	$11,$22,24		# byte swap(14)
1156	srl	$12,$22,8
1157	andi	$13,$22,0xFF00
1158	sll	$22,$22,24
1159	andi	$12,0xFF00
1160	sll	$13,$13,8
1161	or	$22,$11
1162	or	$12,$13
1163	or	$22,$12
1164#endif
1165#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1166	xor	$13,$31,$1			# 14
1167	rotr	$11,$30,6
1168	addu	$10,$22,$2
1169	rotr	$12,$30,11
1170	and	$13,$30
1171	rotr	$2,$30,25
1172	xor	$11,$12
1173	rotr	$12,$3,2
1174	xor	$13,$1			# Ch(e,f,g)
1175	xor	$11,$2			# Sigma1(e)
1176
1177	rotr	$2,$3,13
1178	addu	$10,$13
1179	lw	$13,56($6)		# K[14]
1180	xor	$2,$12
1181	rotr	$12,$3,22
1182	addu	$10,$11
1183	and	$11,$7,$24
1184	xor	$2,$12			# Sigma0(a)
1185	xor	$12,$7,$24
1186#else
1187	addu	$10,$22,$2			# 14
1188	srl	$2,$30,6
1189	xor	$13,$31,$1
1190	sll	$12,$30,7
1191	and	$13,$30
1192	srl	$11,$30,11
1193	xor	$2,$12
1194	sll	$12,$30,21
1195	xor	$2,$11
1196	srl	$11,$30,25
1197	xor	$2,$12
1198	sll	$12,$30,26
1199	xor	$2,$11
1200	xor	$13,$1			# Ch(e,f,g)
1201	xor	$11,$12,$2			# Sigma1(e)
1202
1203	srl	$2,$3,2
1204	addu	$10,$13
1205	lw	$13,56($6)		# K[14]
1206	sll	$12,$3,10
1207	addu	$10,$11
1208	srl	$11,$3,13
1209	xor	$2,$12
1210	sll	$12,$3,19
1211	xor	$2,$11
1212	srl	$11,$3,22
1213	xor	$2,$12
1214	sll	$12,$3,30
1215	xor	$2,$11
1216	and	$11,$7,$24
1217	xor	$2,$12			# Sigma0(a)
1218	xor	$12,$7,$24
1219#endif
1220	sw	$22,56($29)	# offload to ring buffer
1221	addu	$2,$11
1222	and	$12,$3
1223	addu	$10,$13			# +=K[14]
1224	addu	$2,$12			# +=Maj(a,b,c)
1225	addu	$25,$10
1226	addu	$2,$10
1227	lw	$9,4($29)	# prefetch from ring buffer
1228#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1229	wsbh	$23,$23		# byte swap(15)
1230	rotr	$23,$23,16
1231#else
1232	srl	$12,$23,24		# byte swap(15)
1233	srl	$13,$23,8
1234	andi	$14,$23,0xFF00
1235	sll	$23,$23,24
1236	andi	$13,0xFF00
1237	sll	$14,$14,8
1238	or	$23,$12
1239	or	$13,$14
1240	or	$23,$13
1241#endif
1242#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1243	xor	$14,$30,$31			# 15
1244	rotr	$12,$25,6
1245	addu	$11,$23,$1
1246	rotr	$13,$25,11
1247	and	$14,$25
1248	rotr	$1,$25,25
1249	xor	$12,$13
1250	rotr	$13,$2,2
1251	xor	$14,$31			# Ch(e,f,g)
1252	xor	$12,$1			# Sigma1(e)
1253
1254	rotr	$1,$2,13
1255	addu	$11,$14
1256	lw	$14,60($6)		# K[15]
1257	xor	$1,$13
1258	rotr	$13,$2,22
1259	addu	$11,$12
1260	and	$12,$3,$7
1261	xor	$1,$13			# Sigma0(a)
1262	xor	$13,$3,$7
1263#else
1264	addu	$11,$23,$1			# 15
1265	srl	$1,$25,6
1266	xor	$14,$30,$31
1267	sll	$13,$25,7
1268	and	$14,$25
1269	srl	$12,$25,11
1270	xor	$1,$13
1271	sll	$13,$25,21
1272	xor	$1,$12
1273	srl	$12,$25,25
1274	xor	$1,$13
1275	sll	$13,$25,26
1276	xor	$1,$12
1277	xor	$14,$31			# Ch(e,f,g)
1278	xor	$12,$13,$1			# Sigma1(e)
1279
1280	srl	$1,$2,2
1281	addu	$11,$14
1282	lw	$14,60($6)		# K[15]
1283	sll	$13,$2,10
1284	addu	$11,$12
1285	srl	$12,$2,13
1286	xor	$1,$13
1287	sll	$13,$2,19
1288	xor	$1,$12
1289	srl	$12,$2,22
1290	xor	$1,$13
1291	sll	$13,$2,30
1292	xor	$1,$12
1293	and	$12,$3,$7
1294	xor	$1,$13			# Sigma0(a)
1295	xor	$13,$3,$7
1296#endif
1297	sw	$23,60($29)	# offload to ring buffer
1298	addu	$1,$12
1299	and	$13,$2
1300	addu	$11,$14			# +=K[15]
1301	addu	$1,$13			# +=Maj(a,b,c)
1302	addu	$24,$11
1303	addu	$1,$11
1304	lw	$10,8($29)	# prefetch from ring buffer
1305	b	.L16_xx
1306.align	4
1307.L16_xx:
1308#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1309	srl	$14,$9,3		# Xupdate(16)
1310	rotr	$12,$9,7
1311	addu	$8,$17			# +=X[i+9]
1312	xor	$14,$12
1313	rotr	$12,$9,18
1314
1315	srl	$15,$22,10
1316	rotr	$13,$22,17
1317	xor	$14,$12			# sigma0(X[i+1])
1318	rotr	$12,$22,19
1319	xor	$15,$13
1320	addu	$8,$14
1321#else
1322	srl	$14,$9,3		# Xupdate(16)
1323	addu	$8,$17			# +=X[i+9]
1324	sll	$13,$9,14
1325	srl	$12,$9,7
1326	xor	$14,$13
1327	sll	$13,11
1328	xor	$14,$12
1329	srl	$12,$9,18
1330	xor	$14,$13
1331
1332	srl	$15,$22,10
1333	xor	$14,$12			# sigma0(X[i+1])
1334	sll	$13,$22,13
1335	addu	$8,$14
1336	srl	$12,$22,17
1337	xor	$15,$13
1338	sll	$13,2
1339	xor	$15,$12
1340	srl	$12,$22,19
1341	xor	$15,$13
1342#endif
1343	xor	$15,$12			# sigma1(X[i+14])
1344	addu	$8,$15
1345#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1346	xor	$15,$25,$30			# 16
1347	rotr	$13,$24,6
1348	addu	$12,$8,$31
1349	rotr	$14,$24,11
1350	and	$15,$24
1351	rotr	$31,$24,25
1352	xor	$13,$14
1353	rotr	$14,$1,2
1354	xor	$15,$30			# Ch(e,f,g)
1355	xor	$13,$31			# Sigma1(e)
1356
1357	rotr	$31,$1,13
1358	addu	$12,$15
1359	lw	$15,64($6)		# K[16]
1360	xor	$31,$14
1361	rotr	$14,$1,22
1362	addu	$12,$13
1363	and	$13,$2,$3
1364	xor	$31,$14			# Sigma0(a)
1365	xor	$14,$2,$3
1366#else
1367	addu	$12,$8,$31			# 16
1368	srl	$31,$24,6
1369	xor	$15,$25,$30
1370	sll	$14,$24,7
1371	and	$15,$24
1372	srl	$13,$24,11
1373	xor	$31,$14
1374	sll	$14,$24,21
1375	xor	$31,$13
1376	srl	$13,$24,25
1377	xor	$31,$14
1378	sll	$14,$24,26
1379	xor	$31,$13
1380	xor	$15,$30			# Ch(e,f,g)
1381	xor	$13,$14,$31			# Sigma1(e)
1382
1383	srl	$31,$1,2
1384	addu	$12,$15
1385	lw	$15,64($6)		# K[16]
1386	sll	$14,$1,10
1387	addu	$12,$13
1388	srl	$13,$1,13
1389	xor	$31,$14
1390	sll	$14,$1,19
1391	xor	$31,$13
1392	srl	$13,$1,22
1393	xor	$31,$14
1394	sll	$14,$1,30
1395	xor	$31,$13
1396	and	$13,$2,$3
1397	xor	$31,$14			# Sigma0(a)
1398	xor	$14,$2,$3
1399#endif
1400	sw	$8,0($29)	# offload to ring buffer
1401	addu	$31,$13
1402	and	$14,$1
1403	addu	$12,$15			# +=K[16]
1404	addu	$31,$14			# +=Maj(a,b,c)
1405	addu	$7,$12
1406	addu	$31,$12
1407	lw	$11,12($29)	# prefetch from ring buffer
1408#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1409	srl	$15,$10,3		# Xupdate(17)
1410	rotr	$13,$10,7
1411	addu	$9,$18			# +=X[i+9]
1412	xor	$15,$13
1413	rotr	$13,$10,18
1414
1415	srl	$16,$23,10
1416	rotr	$14,$23,17
1417	xor	$15,$13			# sigma0(X[i+1])
1418	rotr	$13,$23,19
1419	xor	$16,$14
1420	addu	$9,$15
1421#else
1422	srl	$15,$10,3		# Xupdate(17)
1423	addu	$9,$18			# +=X[i+9]
1424	sll	$14,$10,14
1425	srl	$13,$10,7
1426	xor	$15,$14
1427	sll	$14,11
1428	xor	$15,$13
1429	srl	$13,$10,18
1430	xor	$15,$14
1431
1432	srl	$16,$23,10
1433	xor	$15,$13			# sigma0(X[i+1])
1434	sll	$14,$23,13
1435	addu	$9,$15
1436	srl	$13,$23,17
1437	xor	$16,$14
1438	sll	$14,2
1439	xor	$16,$13
1440	srl	$13,$23,19
1441	xor	$16,$14
1442#endif
1443	xor	$16,$13			# sigma1(X[i+14])
1444	addu	$9,$16
1445#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1446	xor	$16,$24,$25			# 17
1447	rotr	$14,$7,6
1448	addu	$13,$9,$30
1449	rotr	$15,$7,11
1450	and	$16,$7
1451	rotr	$30,$7,25
1452	xor	$14,$15
1453	rotr	$15,$31,2
1454	xor	$16,$25			# Ch(e,f,g)
1455	xor	$14,$30			# Sigma1(e)
1456
1457	rotr	$30,$31,13
1458	addu	$13,$16
1459	lw	$16,68($6)		# K[17]
1460	xor	$30,$15
1461	rotr	$15,$31,22
1462	addu	$13,$14
1463	and	$14,$1,$2
1464	xor	$30,$15			# Sigma0(a)
1465	xor	$15,$1,$2
1466#else
1467	addu	$13,$9,$30			# 17
1468	srl	$30,$7,6
1469	xor	$16,$24,$25
1470	sll	$15,$7,7
1471	and	$16,$7
1472	srl	$14,$7,11
1473	xor	$30,$15
1474	sll	$15,$7,21
1475	xor	$30,$14
1476	srl	$14,$7,25
1477	xor	$30,$15
1478	sll	$15,$7,26
1479	xor	$30,$14
1480	xor	$16,$25			# Ch(e,f,g)
1481	xor	$14,$15,$30			# Sigma1(e)
1482
1483	srl	$30,$31,2
1484	addu	$13,$16
1485	lw	$16,68($6)		# K[17]
1486	sll	$15,$31,10
1487	addu	$13,$14
1488	srl	$14,$31,13
1489	xor	$30,$15
1490	sll	$15,$31,19
1491	xor	$30,$14
1492	srl	$14,$31,22
1493	xor	$30,$15
1494	sll	$15,$31,30
1495	xor	$30,$14
1496	and	$14,$1,$2
1497	xor	$30,$15			# Sigma0(a)
1498	xor	$15,$1,$2
1499#endif
1500	sw	$9,4($29)	# offload to ring buffer
1501	addu	$30,$14
1502	and	$15,$31
1503	addu	$13,$16			# +=K[17]
1504	addu	$30,$15			# +=Maj(a,b,c)
1505	addu	$3,$13
1506	addu	$30,$13
1507	lw	$12,16($29)	# prefetch from ring buffer
1508#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1509	srl	$16,$11,3		# Xupdate(18)
1510	rotr	$14,$11,7
1511	addu	$10,$19			# +=X[i+9]
1512	xor	$16,$14
1513	rotr	$14,$11,18
1514
1515	srl	$17,$8,10
1516	rotr	$15,$8,17
1517	xor	$16,$14			# sigma0(X[i+1])
1518	rotr	$14,$8,19
1519	xor	$17,$15
1520	addu	$10,$16
1521#else
1522	srl	$16,$11,3		# Xupdate(18)
1523	addu	$10,$19			# +=X[i+9]
1524	sll	$15,$11,14
1525	srl	$14,$11,7
1526	xor	$16,$15
1527	sll	$15,11
1528	xor	$16,$14
1529	srl	$14,$11,18
1530	xor	$16,$15
1531
1532	srl	$17,$8,10
1533	xor	$16,$14			# sigma0(X[i+1])
1534	sll	$15,$8,13
1535	addu	$10,$16
1536	srl	$14,$8,17
1537	xor	$17,$15
1538	sll	$15,2
1539	xor	$17,$14
1540	srl	$14,$8,19
1541	xor	$17,$15
1542#endif
1543	xor	$17,$14			# sigma1(X[i+14])
1544	addu	$10,$17
1545#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1546	xor	$17,$7,$24			# 18
1547	rotr	$15,$3,6
1548	addu	$14,$10,$25
1549	rotr	$16,$3,11
1550	and	$17,$3
1551	rotr	$25,$3,25
1552	xor	$15,$16
1553	rotr	$16,$30,2
1554	xor	$17,$24			# Ch(e,f,g)
1555	xor	$15,$25			# Sigma1(e)
1556
1557	rotr	$25,$30,13
1558	addu	$14,$17
1559	lw	$17,72($6)		# K[18]
1560	xor	$25,$16
1561	rotr	$16,$30,22
1562	addu	$14,$15
1563	and	$15,$31,$1
1564	xor	$25,$16			# Sigma0(a)
1565	xor	$16,$31,$1
1566#else
1567	addu	$14,$10,$25			# 18
1568	srl	$25,$3,6
1569	xor	$17,$7,$24
1570	sll	$16,$3,7
1571	and	$17,$3
1572	srl	$15,$3,11
1573	xor	$25,$16
1574	sll	$16,$3,21
1575	xor	$25,$15
1576	srl	$15,$3,25
1577	xor	$25,$16
1578	sll	$16,$3,26
1579	xor	$25,$15
1580	xor	$17,$24			# Ch(e,f,g)
1581	xor	$15,$16,$25			# Sigma1(e)
1582
1583	srl	$25,$30,2
1584	addu	$14,$17
1585	lw	$17,72($6)		# K[18]
1586	sll	$16,$30,10
1587	addu	$14,$15
1588	srl	$15,$30,13
1589	xor	$25,$16
1590	sll	$16,$30,19
1591	xor	$25,$15
1592	srl	$15,$30,22
1593	xor	$25,$16
1594	sll	$16,$30,30
1595	xor	$25,$15
1596	and	$15,$31,$1
1597	xor	$25,$16			# Sigma0(a)
1598	xor	$16,$31,$1
1599#endif
1600	sw	$10,8($29)	# offload to ring buffer
1601	addu	$25,$15
1602	and	$16,$30
1603	addu	$14,$17			# +=K[18]
1604	addu	$25,$16			# +=Maj(a,b,c)
1605	addu	$2,$14
1606	addu	$25,$14
1607	lw	$13,20($29)	# prefetch from ring buffer
1608#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1609	srl	$17,$12,3		# Xupdate(19)
1610	rotr	$15,$12,7
1611	addu	$11,$20			# +=X[i+9]
1612	xor	$17,$15
1613	rotr	$15,$12,18
1614
1615	srl	$18,$9,10
1616	rotr	$16,$9,17
1617	xor	$17,$15			# sigma0(X[i+1])
1618	rotr	$15,$9,19
1619	xor	$18,$16
1620	addu	$11,$17
1621#else
1622	srl	$17,$12,3		# Xupdate(19)
1623	addu	$11,$20			# +=X[i+9]
1624	sll	$16,$12,14
1625	srl	$15,$12,7
1626	xor	$17,$16
1627	sll	$16,11
1628	xor	$17,$15
1629	srl	$15,$12,18
1630	xor	$17,$16
1631
1632	srl	$18,$9,10
1633	xor	$17,$15			# sigma0(X[i+1])
1634	sll	$16,$9,13
1635	addu	$11,$17
1636	srl	$15,$9,17
1637	xor	$18,$16
1638	sll	$16,2
1639	xor	$18,$15
1640	srl	$15,$9,19
1641	xor	$18,$16
1642#endif
1643	xor	$18,$15			# sigma1(X[i+14])
1644	addu	$11,$18
1645#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1646	xor	$18,$3,$7			# 19
1647	rotr	$16,$2,6
1648	addu	$15,$11,$24
1649	rotr	$17,$2,11
1650	and	$18,$2
1651	rotr	$24,$2,25
1652	xor	$16,$17
1653	rotr	$17,$25,2
1654	xor	$18,$7			# Ch(e,f,g)
1655	xor	$16,$24			# Sigma1(e)
1656
1657	rotr	$24,$25,13
1658	addu	$15,$18
1659	lw	$18,76($6)		# K[19]
1660	xor	$24,$17
1661	rotr	$17,$25,22
1662	addu	$15,$16
1663	and	$16,$30,$31
1664	xor	$24,$17			# Sigma0(a)
1665	xor	$17,$30,$31
1666#else
1667	addu	$15,$11,$24			# 19
1668	srl	$24,$2,6
1669	xor	$18,$3,$7
1670	sll	$17,$2,7
1671	and	$18,$2
1672	srl	$16,$2,11
1673	xor	$24,$17
1674	sll	$17,$2,21
1675	xor	$24,$16
1676	srl	$16,$2,25
1677	xor	$24,$17
1678	sll	$17,$2,26
1679	xor	$24,$16
1680	xor	$18,$7			# Ch(e,f,g)
1681	xor	$16,$17,$24			# Sigma1(e)
1682
1683	srl	$24,$25,2
1684	addu	$15,$18
1685	lw	$18,76($6)		# K[19]
1686	sll	$17,$25,10
1687	addu	$15,$16
1688	srl	$16,$25,13
1689	xor	$24,$17
1690	sll	$17,$25,19
1691	xor	$24,$16
1692	srl	$16,$25,22
1693	xor	$24,$17
1694	sll	$17,$25,30
1695	xor	$24,$16
1696	and	$16,$30,$31
1697	xor	$24,$17			# Sigma0(a)
1698	xor	$17,$30,$31
1699#endif
1700	sw	$11,12($29)	# offload to ring buffer
1701	addu	$24,$16
1702	and	$17,$25
1703	addu	$15,$18			# +=K[19]
1704	addu	$24,$17			# +=Maj(a,b,c)
1705	addu	$1,$15
1706	addu	$24,$15
1707	lw	$14,24($29)	# prefetch from ring buffer
1708#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1709	srl	$18,$13,3		# Xupdate(20)
1710	rotr	$16,$13,7
1711	addu	$12,$21			# +=X[i+9]
1712	xor	$18,$16
1713	rotr	$16,$13,18
1714
1715	srl	$19,$10,10
1716	rotr	$17,$10,17
1717	xor	$18,$16			# sigma0(X[i+1])
1718	rotr	$16,$10,19
1719	xor	$19,$17
1720	addu	$12,$18
1721#else
1722	srl	$18,$13,3		# Xupdate(20)
1723	addu	$12,$21			# +=X[i+9]
1724	sll	$17,$13,14
1725	srl	$16,$13,7
1726	xor	$18,$17
1727	sll	$17,11
1728	xor	$18,$16
1729	srl	$16,$13,18
1730	xor	$18,$17
1731
1732	srl	$19,$10,10
1733	xor	$18,$16			# sigma0(X[i+1])
1734	sll	$17,$10,13
1735	addu	$12,$18
1736	srl	$16,$10,17
1737	xor	$19,$17
1738	sll	$17,2
1739	xor	$19,$16
1740	srl	$16,$10,19
1741	xor	$19,$17
1742#endif
1743	xor	$19,$16			# sigma1(X[i+14])
1744	addu	$12,$19
1745#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1746	xor	$19,$2,$3			# 20
1747	rotr	$17,$1,6
1748	addu	$16,$12,$7
1749	rotr	$18,$1,11
1750	and	$19,$1
1751	rotr	$7,$1,25
1752	xor	$17,$18
1753	rotr	$18,$24,2
1754	xor	$19,$3			# Ch(e,f,g)
1755	xor	$17,$7			# Sigma1(e)
1756
1757	rotr	$7,$24,13
1758	addu	$16,$19
1759	lw	$19,80($6)		# K[20]
1760	xor	$7,$18
1761	rotr	$18,$24,22
1762	addu	$16,$17
1763	and	$17,$25,$30
1764	xor	$7,$18			# Sigma0(a)
1765	xor	$18,$25,$30
1766#else
1767	addu	$16,$12,$7			# 20
1768	srl	$7,$1,6
1769	xor	$19,$2,$3
1770	sll	$18,$1,7
1771	and	$19,$1
1772	srl	$17,$1,11
1773	xor	$7,$18
1774	sll	$18,$1,21
1775	xor	$7,$17
1776	srl	$17,$1,25
1777	xor	$7,$18
1778	sll	$18,$1,26
1779	xor	$7,$17
1780	xor	$19,$3			# Ch(e,f,g)
1781	xor	$17,$18,$7			# Sigma1(e)
1782
1783	srl	$7,$24,2
1784	addu	$16,$19
1785	lw	$19,80($6)		# K[20]
1786	sll	$18,$24,10
1787	addu	$16,$17
1788	srl	$17,$24,13
1789	xor	$7,$18
1790	sll	$18,$24,19
1791	xor	$7,$17
1792	srl	$17,$24,22
1793	xor	$7,$18
1794	sll	$18,$24,30
1795	xor	$7,$17
1796	and	$17,$25,$30
1797	xor	$7,$18			# Sigma0(a)
1798	xor	$18,$25,$30
1799#endif
1800	sw	$12,16($29)	# offload to ring buffer
1801	addu	$7,$17
1802	and	$18,$24
1803	addu	$16,$19			# +=K[20]
1804	addu	$7,$18			# +=Maj(a,b,c)
1805	addu	$31,$16
1806	addu	$7,$16
1807	lw	$15,28($29)	# prefetch from ring buffer
1808#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1809	srl	$19,$14,3		# Xupdate(21)
1810	rotr	$17,$14,7
1811	addu	$13,$22			# +=X[i+9]
1812	xor	$19,$17
1813	rotr	$17,$14,18
1814
1815	srl	$20,$11,10
1816	rotr	$18,$11,17
1817	xor	$19,$17			# sigma0(X[i+1])
1818	rotr	$17,$11,19
1819	xor	$20,$18
1820	addu	$13,$19
1821#else
1822	srl	$19,$14,3		# Xupdate(21)
1823	addu	$13,$22			# +=X[i+9]
1824	sll	$18,$14,14
1825	srl	$17,$14,7
1826	xor	$19,$18
1827	sll	$18,11
1828	xor	$19,$17
1829	srl	$17,$14,18
1830	xor	$19,$18
1831
1832	srl	$20,$11,10
1833	xor	$19,$17			# sigma0(X[i+1])
1834	sll	$18,$11,13
1835	addu	$13,$19
1836	srl	$17,$11,17
1837	xor	$20,$18
1838	sll	$18,2
1839	xor	$20,$17
1840	srl	$17,$11,19
1841	xor	$20,$18
1842#endif
1843	xor	$20,$17			# sigma1(X[i+14])
1844	addu	$13,$20
1845#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1846	xor	$20,$1,$2			# 21
1847	rotr	$18,$31,6
1848	addu	$17,$13,$3
1849	rotr	$19,$31,11
1850	and	$20,$31
1851	rotr	$3,$31,25
1852	xor	$18,$19
1853	rotr	$19,$7,2
1854	xor	$20,$2			# Ch(e,f,g)
1855	xor	$18,$3			# Sigma1(e)
1856
1857	rotr	$3,$7,13
1858	addu	$17,$20
1859	lw	$20,84($6)		# K[21]
1860	xor	$3,$19
1861	rotr	$19,$7,22
1862	addu	$17,$18
1863	and	$18,$24,$25
1864	xor	$3,$19			# Sigma0(a)
1865	xor	$19,$24,$25
1866#else
1867	addu	$17,$13,$3			# 21
1868	srl	$3,$31,6
1869	xor	$20,$1,$2
1870	sll	$19,$31,7
1871	and	$20,$31
1872	srl	$18,$31,11
1873	xor	$3,$19
1874	sll	$19,$31,21
1875	xor	$3,$18
1876	srl	$18,$31,25
1877	xor	$3,$19
1878	sll	$19,$31,26
1879	xor	$3,$18
1880	xor	$20,$2			# Ch(e,f,g)
1881	xor	$18,$19,$3			# Sigma1(e)
1882
1883	srl	$3,$7,2
1884	addu	$17,$20
1885	lw	$20,84($6)		# K[21]
1886	sll	$19,$7,10
1887	addu	$17,$18
1888	srl	$18,$7,13
1889	xor	$3,$19
1890	sll	$19,$7,19
1891	xor	$3,$18
1892	srl	$18,$7,22
1893	xor	$3,$19
1894	sll	$19,$7,30
1895	xor	$3,$18
1896	and	$18,$24,$25
1897	xor	$3,$19			# Sigma0(a)
1898	xor	$19,$24,$25
1899#endif
1900	sw	$13,20($29)	# offload to ring buffer
1901	addu	$3,$18
1902	and	$19,$7
1903	addu	$17,$20			# +=K[21]
1904	addu	$3,$19			# +=Maj(a,b,c)
1905	addu	$30,$17
1906	addu	$3,$17
1907	lw	$16,32($29)	# prefetch from ring buffer
1908#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1909	srl	$20,$15,3		# Xupdate(22)
1910	rotr	$18,$15,7
1911	addu	$14,$23			# +=X[i+9]
1912	xor	$20,$18
1913	rotr	$18,$15,18
1914
1915	srl	$21,$12,10
1916	rotr	$19,$12,17
1917	xor	$20,$18			# sigma0(X[i+1])
1918	rotr	$18,$12,19
1919	xor	$21,$19
1920	addu	$14,$20
1921#else
1922	srl	$20,$15,3		# Xupdate(22)
1923	addu	$14,$23			# +=X[i+9]
1924	sll	$19,$15,14
1925	srl	$18,$15,7
1926	xor	$20,$19
1927	sll	$19,11
1928	xor	$20,$18
1929	srl	$18,$15,18
1930	xor	$20,$19
1931
1932	srl	$21,$12,10
1933	xor	$20,$18			# sigma0(X[i+1])
1934	sll	$19,$12,13
1935	addu	$14,$20
1936	srl	$18,$12,17
1937	xor	$21,$19
1938	sll	$19,2
1939	xor	$21,$18
1940	srl	$18,$12,19
1941	xor	$21,$19
1942#endif
1943	xor	$21,$18			# sigma1(X[i+14])
1944	addu	$14,$21
1945#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1946	xor	$21,$31,$1			# 22
1947	rotr	$19,$30,6
1948	addu	$18,$14,$2
1949	rotr	$20,$30,11
1950	and	$21,$30
1951	rotr	$2,$30,25
1952	xor	$19,$20
1953	rotr	$20,$3,2
1954	xor	$21,$1			# Ch(e,f,g)
1955	xor	$19,$2			# Sigma1(e)
1956
1957	rotr	$2,$3,13
1958	addu	$18,$21
1959	lw	$21,88($6)		# K[22]
1960	xor	$2,$20
1961	rotr	$20,$3,22
1962	addu	$18,$19
1963	and	$19,$7,$24
1964	xor	$2,$20			# Sigma0(a)
1965	xor	$20,$7,$24
1966#else
1967	addu	$18,$14,$2			# 22
1968	srl	$2,$30,6
1969	xor	$21,$31,$1
1970	sll	$20,$30,7
1971	and	$21,$30
1972	srl	$19,$30,11
1973	xor	$2,$20
1974	sll	$20,$30,21
1975	xor	$2,$19
1976	srl	$19,$30,25
1977	xor	$2,$20
1978	sll	$20,$30,26
1979	xor	$2,$19
1980	xor	$21,$1			# Ch(e,f,g)
1981	xor	$19,$20,$2			# Sigma1(e)
1982
1983	srl	$2,$3,2
1984	addu	$18,$21
1985	lw	$21,88($6)		# K[22]
1986	sll	$20,$3,10
1987	addu	$18,$19
1988	srl	$19,$3,13
1989	xor	$2,$20
1990	sll	$20,$3,19
1991	xor	$2,$19
1992	srl	$19,$3,22
1993	xor	$2,$20
1994	sll	$20,$3,30
1995	xor	$2,$19
1996	and	$19,$7,$24
1997	xor	$2,$20			# Sigma0(a)
1998	xor	$20,$7,$24
1999#endif
2000	sw	$14,24($29)	# offload to ring buffer
2001	addu	$2,$19
2002	and	$20,$3
2003	addu	$18,$21			# +=K[22]
2004	addu	$2,$20			# +=Maj(a,b,c)
2005	addu	$25,$18
2006	addu	$2,$18
2007	lw	$17,36($29)	# prefetch from ring buffer
2008#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2009	srl	$21,$16,3		# Xupdate(23)
2010	rotr	$19,$16,7
2011	addu	$15,$8			# +=X[i+9]
2012	xor	$21,$19
2013	rotr	$19,$16,18
2014
2015	srl	$22,$13,10
2016	rotr	$20,$13,17
2017	xor	$21,$19			# sigma0(X[i+1])
2018	rotr	$19,$13,19
2019	xor	$22,$20
2020	addu	$15,$21
2021#else
2022	srl	$21,$16,3		# Xupdate(23)
2023	addu	$15,$8			# +=X[i+9]
2024	sll	$20,$16,14
2025	srl	$19,$16,7
2026	xor	$21,$20
2027	sll	$20,11
2028	xor	$21,$19
2029	srl	$19,$16,18
2030	xor	$21,$20
2031
2032	srl	$22,$13,10
2033	xor	$21,$19			# sigma0(X[i+1])
2034	sll	$20,$13,13
2035	addu	$15,$21
2036	srl	$19,$13,17
2037	xor	$22,$20
2038	sll	$20,2
2039	xor	$22,$19
2040	srl	$19,$13,19
2041	xor	$22,$20
2042#endif
2043	xor	$22,$19			# sigma1(X[i+14])
2044	addu	$15,$22
2045#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2046	xor	$22,$30,$31			# 23
2047	rotr	$20,$25,6
2048	addu	$19,$15,$1
2049	rotr	$21,$25,11
2050	and	$22,$25
2051	rotr	$1,$25,25
2052	xor	$20,$21
2053	rotr	$21,$2,2
2054	xor	$22,$31			# Ch(e,f,g)
2055	xor	$20,$1			# Sigma1(e)
2056
2057	rotr	$1,$2,13
2058	addu	$19,$22
2059	lw	$22,92($6)		# K[23]
2060	xor	$1,$21
2061	rotr	$21,$2,22
2062	addu	$19,$20
2063	and	$20,$3,$7
2064	xor	$1,$21			# Sigma0(a)
2065	xor	$21,$3,$7
2066#else
2067	addu	$19,$15,$1			# 23
2068	srl	$1,$25,6
2069	xor	$22,$30,$31
2070	sll	$21,$25,7
2071	and	$22,$25
2072	srl	$20,$25,11
2073	xor	$1,$21
2074	sll	$21,$25,21
2075	xor	$1,$20
2076	srl	$20,$25,25
2077	xor	$1,$21
2078	sll	$21,$25,26
2079	xor	$1,$20
2080	xor	$22,$31			# Ch(e,f,g)
2081	xor	$20,$21,$1			# Sigma1(e)
2082
2083	srl	$1,$2,2
2084	addu	$19,$22
2085	lw	$22,92($6)		# K[23]
2086	sll	$21,$2,10
2087	addu	$19,$20
2088	srl	$20,$2,13
2089	xor	$1,$21
2090	sll	$21,$2,19
2091	xor	$1,$20
2092	srl	$20,$2,22
2093	xor	$1,$21
2094	sll	$21,$2,30
2095	xor	$1,$20
2096	and	$20,$3,$7
2097	xor	$1,$21			# Sigma0(a)
2098	xor	$21,$3,$7
2099#endif
2100	sw	$15,28($29)	# offload to ring buffer
2101	addu	$1,$20
2102	and	$21,$2
2103	addu	$19,$22			# +=K[23]
2104	addu	$1,$21			# +=Maj(a,b,c)
2105	addu	$24,$19
2106	addu	$1,$19
2107	lw	$18,40($29)	# prefetch from ring buffer
2108#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2109	srl	$22,$17,3		# Xupdate(24)
2110	rotr	$20,$17,7
2111	addu	$16,$9			# +=X[i+9]
2112	xor	$22,$20
2113	rotr	$20,$17,18
2114
2115	srl	$23,$14,10
2116	rotr	$21,$14,17
2117	xor	$22,$20			# sigma0(X[i+1])
2118	rotr	$20,$14,19
2119	xor	$23,$21
2120	addu	$16,$22
2121#else
2122	srl	$22,$17,3		# Xupdate(24)
2123	addu	$16,$9			# +=X[i+9]
2124	sll	$21,$17,14
2125	srl	$20,$17,7
2126	xor	$22,$21
2127	sll	$21,11
2128	xor	$22,$20
2129	srl	$20,$17,18
2130	xor	$22,$21
2131
2132	srl	$23,$14,10
2133	xor	$22,$20			# sigma0(X[i+1])
2134	sll	$21,$14,13
2135	addu	$16,$22
2136	srl	$20,$14,17
2137	xor	$23,$21
2138	sll	$21,2
2139	xor	$23,$20
2140	srl	$20,$14,19
2141	xor	$23,$21
2142#endif
2143	xor	$23,$20			# sigma1(X[i+14])
2144	addu	$16,$23
2145#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2146	xor	$23,$25,$30			# 24
2147	rotr	$21,$24,6
2148	addu	$20,$16,$31
2149	rotr	$22,$24,11
2150	and	$23,$24
2151	rotr	$31,$24,25
2152	xor	$21,$22
2153	rotr	$22,$1,2
2154	xor	$23,$30			# Ch(e,f,g)
2155	xor	$21,$31			# Sigma1(e)
2156
2157	rotr	$31,$1,13
2158	addu	$20,$23
2159	lw	$23,96($6)		# K[24]
2160	xor	$31,$22
2161	rotr	$22,$1,22
2162	addu	$20,$21
2163	and	$21,$2,$3
2164	xor	$31,$22			# Sigma0(a)
2165	xor	$22,$2,$3
2166#else
2167	addu	$20,$16,$31			# 24
2168	srl	$31,$24,6
2169	xor	$23,$25,$30
2170	sll	$22,$24,7
2171	and	$23,$24
2172	srl	$21,$24,11
2173	xor	$31,$22
2174	sll	$22,$24,21
2175	xor	$31,$21
2176	srl	$21,$24,25
2177	xor	$31,$22
2178	sll	$22,$24,26
2179	xor	$31,$21
2180	xor	$23,$30			# Ch(e,f,g)
2181	xor	$21,$22,$31			# Sigma1(e)
2182
2183	srl	$31,$1,2
2184	addu	$20,$23
2185	lw	$23,96($6)		# K[24]
2186	sll	$22,$1,10
2187	addu	$20,$21
2188	srl	$21,$1,13
2189	xor	$31,$22
2190	sll	$22,$1,19
2191	xor	$31,$21
2192	srl	$21,$1,22
2193	xor	$31,$22
2194	sll	$22,$1,30
2195	xor	$31,$21
2196	and	$21,$2,$3
2197	xor	$31,$22			# Sigma0(a)
2198	xor	$22,$2,$3
2199#endif
2200	sw	$16,32($29)	# offload to ring buffer
2201	addu	$31,$21
2202	and	$22,$1
2203	addu	$20,$23			# +=K[24]
2204	addu	$31,$22			# +=Maj(a,b,c)
2205	addu	$7,$20
2206	addu	$31,$20
2207	lw	$19,44($29)	# prefetch from ring buffer
2208#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2209	srl	$23,$18,3		# Xupdate(25)
2210	rotr	$21,$18,7
2211	addu	$17,$10			# +=X[i+9]
2212	xor	$23,$21
2213	rotr	$21,$18,18
2214
2215	srl	$8,$15,10
2216	rotr	$22,$15,17
2217	xor	$23,$21			# sigma0(X[i+1])
2218	rotr	$21,$15,19
2219	xor	$8,$22
2220	addu	$17,$23
2221#else
2222	srl	$23,$18,3		# Xupdate(25)
2223	addu	$17,$10			# +=X[i+9]
2224	sll	$22,$18,14
2225	srl	$21,$18,7
2226	xor	$23,$22
2227	sll	$22,11
2228	xor	$23,$21
2229	srl	$21,$18,18
2230	xor	$23,$22
2231
2232	srl	$8,$15,10
2233	xor	$23,$21			# sigma0(X[i+1])
2234	sll	$22,$15,13
2235	addu	$17,$23
2236	srl	$21,$15,17
2237	xor	$8,$22
2238	sll	$22,2
2239	xor	$8,$21
2240	srl	$21,$15,19
2241	xor	$8,$22
2242#endif
2243	xor	$8,$21			# sigma1(X[i+14])
2244	addu	$17,$8
2245#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2246	xor	$8,$24,$25			# 25
2247	rotr	$22,$7,6
2248	addu	$21,$17,$30
2249	rotr	$23,$7,11
2250	and	$8,$7
2251	rotr	$30,$7,25
2252	xor	$22,$23
2253	rotr	$23,$31,2
2254	xor	$8,$25			# Ch(e,f,g)
2255	xor	$22,$30			# Sigma1(e)
2256
2257	rotr	$30,$31,13
2258	addu	$21,$8
2259	lw	$8,100($6)		# K[25]
2260	xor	$30,$23
2261	rotr	$23,$31,22
2262	addu	$21,$22
2263	and	$22,$1,$2
2264	xor	$30,$23			# Sigma0(a)
2265	xor	$23,$1,$2
2266#else
2267	addu	$21,$17,$30			# 25
2268	srl	$30,$7,6
2269	xor	$8,$24,$25
2270	sll	$23,$7,7
2271	and	$8,$7
2272	srl	$22,$7,11
2273	xor	$30,$23
2274	sll	$23,$7,21
2275	xor	$30,$22
2276	srl	$22,$7,25
2277	xor	$30,$23
2278	sll	$23,$7,26
2279	xor	$30,$22
2280	xor	$8,$25			# Ch(e,f,g)
2281	xor	$22,$23,$30			# Sigma1(e)
2282
2283	srl	$30,$31,2
2284	addu	$21,$8
2285	lw	$8,100($6)		# K[25]
2286	sll	$23,$31,10
2287	addu	$21,$22
2288	srl	$22,$31,13
2289	xor	$30,$23
2290	sll	$23,$31,19
2291	xor	$30,$22
2292	srl	$22,$31,22
2293	xor	$30,$23
2294	sll	$23,$31,30
2295	xor	$30,$22
2296	and	$22,$1,$2
2297	xor	$30,$23			# Sigma0(a)
2298	xor	$23,$1,$2
2299#endif
2300	sw	$17,36($29)	# offload to ring buffer
2301	addu	$30,$22
2302	and	$23,$31
2303	addu	$21,$8			# +=K[25]
2304	addu	$30,$23			# +=Maj(a,b,c)
2305	addu	$3,$21
2306	addu	$30,$21
2307	lw	$20,48($29)	# prefetch from ring buffer
2308#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2309	srl	$8,$19,3		# Xupdate(26)
2310	rotr	$22,$19,7
2311	addu	$18,$11			# +=X[i+9]
2312	xor	$8,$22
2313	rotr	$22,$19,18
2314
2315	srl	$9,$16,10
2316	rotr	$23,$16,17
2317	xor	$8,$22			# sigma0(X[i+1])
2318	rotr	$22,$16,19
2319	xor	$9,$23
2320	addu	$18,$8
2321#else
2322	srl	$8,$19,3		# Xupdate(26)
2323	addu	$18,$11			# +=X[i+9]
2324	sll	$23,$19,14
2325	srl	$22,$19,7
2326	xor	$8,$23
2327	sll	$23,11
2328	xor	$8,$22
2329	srl	$22,$19,18
2330	xor	$8,$23
2331
2332	srl	$9,$16,10
2333	xor	$8,$22			# sigma0(X[i+1])
2334	sll	$23,$16,13
2335	addu	$18,$8
2336	srl	$22,$16,17
2337	xor	$9,$23
2338	sll	$23,2
2339	xor	$9,$22
2340	srl	$22,$16,19
2341	xor	$9,$23
2342#endif
2343	xor	$9,$22			# sigma1(X[i+14])
2344	addu	$18,$9
2345#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2346	xor	$9,$7,$24			# 26
2347	rotr	$23,$3,6
2348	addu	$22,$18,$25
2349	rotr	$8,$3,11
2350	and	$9,$3
2351	rotr	$25,$3,25
2352	xor	$23,$8
2353	rotr	$8,$30,2
2354	xor	$9,$24			# Ch(e,f,g)
2355	xor	$23,$25			# Sigma1(e)
2356
2357	rotr	$25,$30,13
2358	addu	$22,$9
2359	lw	$9,104($6)		# K[26]
2360	xor	$25,$8
2361	rotr	$8,$30,22
2362	addu	$22,$23
2363	and	$23,$31,$1
2364	xor	$25,$8			# Sigma0(a)
2365	xor	$8,$31,$1
2366#else
2367	addu	$22,$18,$25			# 26
2368	srl	$25,$3,6
2369	xor	$9,$7,$24
2370	sll	$8,$3,7
2371	and	$9,$3
2372	srl	$23,$3,11
2373	xor	$25,$8
2374	sll	$8,$3,21
2375	xor	$25,$23
2376	srl	$23,$3,25
2377	xor	$25,$8
2378	sll	$8,$3,26
2379	xor	$25,$23
2380	xor	$9,$24			# Ch(e,f,g)
2381	xor	$23,$8,$25			# Sigma1(e)
2382
2383	srl	$25,$30,2
2384	addu	$22,$9
2385	lw	$9,104($6)		# K[26]
2386	sll	$8,$30,10
2387	addu	$22,$23
2388	srl	$23,$30,13
2389	xor	$25,$8
2390	sll	$8,$30,19
2391	xor	$25,$23
2392	srl	$23,$30,22
2393	xor	$25,$8
2394	sll	$8,$30,30
2395	xor	$25,$23
2396	and	$23,$31,$1
2397	xor	$25,$8			# Sigma0(a)
2398	xor	$8,$31,$1
2399#endif
2400	sw	$18,40($29)	# offload to ring buffer
2401	addu	$25,$23
2402	and	$8,$30
2403	addu	$22,$9			# +=K[26]
2404	addu	$25,$8			# +=Maj(a,b,c)
2405	addu	$2,$22
2406	addu	$25,$22
2407	lw	$21,52($29)	# prefetch from ring buffer
2408#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2409	srl	$9,$20,3		# Xupdate(27)
2410	rotr	$23,$20,7
2411	addu	$19,$12			# +=X[i+9]
2412	xor	$9,$23
2413	rotr	$23,$20,18
2414
2415	srl	$10,$17,10
2416	rotr	$8,$17,17
2417	xor	$9,$23			# sigma0(X[i+1])
2418	rotr	$23,$17,19
2419	xor	$10,$8
2420	addu	$19,$9
2421#else
2422	srl	$9,$20,3		# Xupdate(27)
2423	addu	$19,$12			# +=X[i+9]
2424	sll	$8,$20,14
2425	srl	$23,$20,7
2426	xor	$9,$8
2427	sll	$8,11
2428	xor	$9,$23
2429	srl	$23,$20,18
2430	xor	$9,$8
2431
2432	srl	$10,$17,10
2433	xor	$9,$23			# sigma0(X[i+1])
2434	sll	$8,$17,13
2435	addu	$19,$9
2436	srl	$23,$17,17
2437	xor	$10,$8
2438	sll	$8,2
2439	xor	$10,$23
2440	srl	$23,$17,19
2441	xor	$10,$8
2442#endif
2443	xor	$10,$23			# sigma1(X[i+14])
2444	addu	$19,$10
2445#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2446	xor	$10,$3,$7			# 27
2447	rotr	$8,$2,6
2448	addu	$23,$19,$24
2449	rotr	$9,$2,11
2450	and	$10,$2
2451	rotr	$24,$2,25
2452	xor	$8,$9
2453	rotr	$9,$25,2
2454	xor	$10,$7			# Ch(e,f,g)
2455	xor	$8,$24			# Sigma1(e)
2456
2457	rotr	$24,$25,13
2458	addu	$23,$10
2459	lw	$10,108($6)		# K[27]
2460	xor	$24,$9
2461	rotr	$9,$25,22
2462	addu	$23,$8
2463	and	$8,$30,$31
2464	xor	$24,$9			# Sigma0(a)
2465	xor	$9,$30,$31
2466#else
2467	addu	$23,$19,$24			# 27
2468	srl	$24,$2,6
2469	xor	$10,$3,$7
2470	sll	$9,$2,7
2471	and	$10,$2
2472	srl	$8,$2,11
2473	xor	$24,$9
2474	sll	$9,$2,21
2475	xor	$24,$8
2476	srl	$8,$2,25
2477	xor	$24,$9
2478	sll	$9,$2,26
2479	xor	$24,$8
2480	xor	$10,$7			# Ch(e,f,g)
2481	xor	$8,$9,$24			# Sigma1(e)
2482
2483	srl	$24,$25,2
2484	addu	$23,$10
2485	lw	$10,108($6)		# K[27]
2486	sll	$9,$25,10
2487	addu	$23,$8
2488	srl	$8,$25,13
2489	xor	$24,$9
2490	sll	$9,$25,19
2491	xor	$24,$8
2492	srl	$8,$25,22
2493	xor	$24,$9
2494	sll	$9,$25,30
2495	xor	$24,$8
2496	and	$8,$30,$31
2497	xor	$24,$9			# Sigma0(a)
2498	xor	$9,$30,$31
2499#endif
2500	sw	$19,44($29)	# offload to ring buffer
2501	addu	$24,$8
2502	and	$9,$25
2503	addu	$23,$10			# +=K[27]
2504	addu	$24,$9			# +=Maj(a,b,c)
2505	addu	$1,$23
2506	addu	$24,$23
2507	lw	$22,56($29)	# prefetch from ring buffer
2508#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2509	srl	$10,$21,3		# Xupdate(28)
2510	rotr	$8,$21,7
2511	addu	$20,$13			# +=X[i+9]
2512	xor	$10,$8
2513	rotr	$8,$21,18
2514
2515	srl	$11,$18,10
2516	rotr	$9,$18,17
2517	xor	$10,$8			# sigma0(X[i+1])
2518	rotr	$8,$18,19
2519	xor	$11,$9
2520	addu	$20,$10
2521#else
2522	srl	$10,$21,3		# Xupdate(28)
2523	addu	$20,$13			# +=X[i+9]
2524	sll	$9,$21,14
2525	srl	$8,$21,7
2526	xor	$10,$9
2527	sll	$9,11
2528	xor	$10,$8
2529	srl	$8,$21,18
2530	xor	$10,$9
2531
2532	srl	$11,$18,10
2533	xor	$10,$8			# sigma0(X[i+1])
2534	sll	$9,$18,13
2535	addu	$20,$10
2536	srl	$8,$18,17
2537	xor	$11,$9
2538	sll	$9,2
2539	xor	$11,$8
2540	srl	$8,$18,19
2541	xor	$11,$9
2542#endif
2543	xor	$11,$8			# sigma1(X[i+14])
2544	addu	$20,$11
2545#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2546	xor	$11,$2,$3			# 28
2547	rotr	$9,$1,6
2548	addu	$8,$20,$7
2549	rotr	$10,$1,11
2550	and	$11,$1
2551	rotr	$7,$1,25
2552	xor	$9,$10
2553	rotr	$10,$24,2
2554	xor	$11,$3			# Ch(e,f,g)
2555	xor	$9,$7			# Sigma1(e)
2556
2557	rotr	$7,$24,13
2558	addu	$8,$11
2559	lw	$11,112($6)		# K[28]
2560	xor	$7,$10
2561	rotr	$10,$24,22
2562	addu	$8,$9
2563	and	$9,$25,$30
2564	xor	$7,$10			# Sigma0(a)
2565	xor	$10,$25,$30
2566#else
2567	addu	$8,$20,$7			# 28
2568	srl	$7,$1,6
2569	xor	$11,$2,$3
2570	sll	$10,$1,7
2571	and	$11,$1
2572	srl	$9,$1,11
2573	xor	$7,$10
2574	sll	$10,$1,21
2575	xor	$7,$9
2576	srl	$9,$1,25
2577	xor	$7,$10
2578	sll	$10,$1,26
2579	xor	$7,$9
2580	xor	$11,$3			# Ch(e,f,g)
2581	xor	$9,$10,$7			# Sigma1(e)
2582
2583	srl	$7,$24,2
2584	addu	$8,$11
2585	lw	$11,112($6)		# K[28]
2586	sll	$10,$24,10
2587	addu	$8,$9
2588	srl	$9,$24,13
2589	xor	$7,$10
2590	sll	$10,$24,19
2591	xor	$7,$9
2592	srl	$9,$24,22
2593	xor	$7,$10
2594	sll	$10,$24,30
2595	xor	$7,$9
2596	and	$9,$25,$30
2597	xor	$7,$10			# Sigma0(a)
2598	xor	$10,$25,$30
2599#endif
2600	sw	$20,48($29)	# offload to ring buffer
2601	addu	$7,$9
2602	and	$10,$24
2603	addu	$8,$11			# +=K[28]
2604	addu	$7,$10			# +=Maj(a,b,c)
2605	addu	$31,$8
2606	addu	$7,$8
2607	lw	$23,60($29)	# prefetch from ring buffer
2608#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2609	srl	$11,$22,3		# Xupdate(29)
2610	rotr	$9,$22,7
2611	addu	$21,$14			# +=X[i+9]
2612	xor	$11,$9
2613	rotr	$9,$22,18
2614
2615	srl	$12,$19,10
2616	rotr	$10,$19,17
2617	xor	$11,$9			# sigma0(X[i+1])
2618	rotr	$9,$19,19
2619	xor	$12,$10
2620	addu	$21,$11
2621#else
2622	srl	$11,$22,3		# Xupdate(29)
2623	addu	$21,$14			# +=X[i+9]
2624	sll	$10,$22,14
2625	srl	$9,$22,7
2626	xor	$11,$10
2627	sll	$10,11
2628	xor	$11,$9
2629	srl	$9,$22,18
2630	xor	$11,$10
2631
2632	srl	$12,$19,10
2633	xor	$11,$9			# sigma0(X[i+1])
2634	sll	$10,$19,13
2635	addu	$21,$11
2636	srl	$9,$19,17
2637	xor	$12,$10
2638	sll	$10,2
2639	xor	$12,$9
2640	srl	$9,$19,19
2641	xor	$12,$10
2642#endif
2643	xor	$12,$9			# sigma1(X[i+14])
2644	addu	$21,$12
2645#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2646	xor	$12,$1,$2			# 29
2647	rotr	$10,$31,6
2648	addu	$9,$21,$3
2649	rotr	$11,$31,11
2650	and	$12,$31
2651	rotr	$3,$31,25
2652	xor	$10,$11
2653	rotr	$11,$7,2
2654	xor	$12,$2			# Ch(e,f,g)
2655	xor	$10,$3			# Sigma1(e)
2656
2657	rotr	$3,$7,13
2658	addu	$9,$12
2659	lw	$12,116($6)		# K[29]
2660	xor	$3,$11
2661	rotr	$11,$7,22
2662	addu	$9,$10
2663	and	$10,$24,$25
2664	xor	$3,$11			# Sigma0(a)
2665	xor	$11,$24,$25
2666#else
2667	addu	$9,$21,$3			# 29
2668	srl	$3,$31,6
2669	xor	$12,$1,$2
2670	sll	$11,$31,7
2671	and	$12,$31
2672	srl	$10,$31,11
2673	xor	$3,$11
2674	sll	$11,$31,21
2675	xor	$3,$10
2676	srl	$10,$31,25
2677	xor	$3,$11
2678	sll	$11,$31,26
2679	xor	$3,$10
2680	xor	$12,$2			# Ch(e,f,g)
2681	xor	$10,$11,$3			# Sigma1(e)
2682
2683	srl	$3,$7,2
2684	addu	$9,$12
2685	lw	$12,116($6)		# K[29]
2686	sll	$11,$7,10
2687	addu	$9,$10
2688	srl	$10,$7,13
2689	xor	$3,$11
2690	sll	$11,$7,19
2691	xor	$3,$10
2692	srl	$10,$7,22
2693	xor	$3,$11
2694	sll	$11,$7,30
2695	xor	$3,$10
2696	and	$10,$24,$25
2697	xor	$3,$11			# Sigma0(a)
2698	xor	$11,$24,$25
2699#endif
2700	sw	$21,52($29)	# offload to ring buffer
2701	addu	$3,$10
2702	and	$11,$7
2703	addu	$9,$12			# +=K[29]
2704	addu	$3,$11			# +=Maj(a,b,c)
2705	addu	$30,$9
2706	addu	$3,$9
2707	lw	$8,0($29)	# prefetch from ring buffer
2708#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2709	srl	$12,$23,3		# Xupdate(30)
2710	rotr	$10,$23,7
2711	addu	$22,$15			# +=X[i+9]
2712	xor	$12,$10
2713	rotr	$10,$23,18
2714
2715	srl	$13,$20,10
2716	rotr	$11,$20,17
2717	xor	$12,$10			# sigma0(X[i+1])
2718	rotr	$10,$20,19
2719	xor	$13,$11
2720	addu	$22,$12
2721#else
2722	srl	$12,$23,3		# Xupdate(30)
2723	addu	$22,$15			# +=X[i+9]
2724	sll	$11,$23,14
2725	srl	$10,$23,7
2726	xor	$12,$11
2727	sll	$11,11
2728	xor	$12,$10
2729	srl	$10,$23,18
2730	xor	$12,$11
2731
2732	srl	$13,$20,10
2733	xor	$12,$10			# sigma0(X[i+1])
2734	sll	$11,$20,13
2735	addu	$22,$12
2736	srl	$10,$20,17
2737	xor	$13,$11
2738	sll	$11,2
2739	xor	$13,$10
2740	srl	$10,$20,19
2741	xor	$13,$11
2742#endif
2743	xor	$13,$10			# sigma1(X[i+14])
2744	addu	$22,$13
2745#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2746	xor	$13,$31,$1			# 30
2747	rotr	$11,$30,6
2748	addu	$10,$22,$2
2749	rotr	$12,$30,11
2750	and	$13,$30
2751	rotr	$2,$30,25
2752	xor	$11,$12
2753	rotr	$12,$3,2
2754	xor	$13,$1			# Ch(e,f,g)
2755	xor	$11,$2			# Sigma1(e)
2756
2757	rotr	$2,$3,13
2758	addu	$10,$13
2759	lw	$13,120($6)		# K[30]
2760	xor	$2,$12
2761	rotr	$12,$3,22
2762	addu	$10,$11
2763	and	$11,$7,$24
2764	xor	$2,$12			# Sigma0(a)
2765	xor	$12,$7,$24
2766#else
2767	addu	$10,$22,$2			# 30
2768	srl	$2,$30,6
2769	xor	$13,$31,$1
2770	sll	$12,$30,7
2771	and	$13,$30
2772	srl	$11,$30,11
2773	xor	$2,$12
2774	sll	$12,$30,21
2775	xor	$2,$11
2776	srl	$11,$30,25
2777	xor	$2,$12
2778	sll	$12,$30,26
2779	xor	$2,$11
2780	xor	$13,$1			# Ch(e,f,g)
2781	xor	$11,$12,$2			# Sigma1(e)
2782
2783	srl	$2,$3,2
2784	addu	$10,$13
2785	lw	$13,120($6)		# K[30]
2786	sll	$12,$3,10
2787	addu	$10,$11
2788	srl	$11,$3,13
2789	xor	$2,$12
2790	sll	$12,$3,19
2791	xor	$2,$11
2792	srl	$11,$3,22
2793	xor	$2,$12
2794	sll	$12,$3,30
2795	xor	$2,$11
2796	and	$11,$7,$24
2797	xor	$2,$12			# Sigma0(a)
2798	xor	$12,$7,$24
2799#endif
2800	sw	$22,56($29)	# offload to ring buffer
2801	addu	$2,$11
2802	and	$12,$3
2803	addu	$10,$13			# +=K[30]
2804	addu	$2,$12			# +=Maj(a,b,c)
2805	addu	$25,$10
2806	addu	$2,$10
2807	lw	$9,4($29)	# prefetch from ring buffer
2808#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2809	srl	$13,$8,3		# Xupdate(31)
2810	rotr	$11,$8,7
2811	addu	$23,$16			# +=X[i+9]
2812	xor	$13,$11
2813	rotr	$11,$8,18
2814
2815	srl	$14,$21,10
2816	rotr	$12,$21,17
2817	xor	$13,$11			# sigma0(X[i+1])
2818	rotr	$11,$21,19
2819	xor	$14,$12
2820	addu	$23,$13
2821#else
2822	srl	$13,$8,3		# Xupdate(31)
2823	addu	$23,$16			# +=X[i+9]
2824	sll	$12,$8,14
2825	srl	$11,$8,7
2826	xor	$13,$12
2827	sll	$12,11
2828	xor	$13,$11
2829	srl	$11,$8,18
2830	xor	$13,$12
2831
2832	srl	$14,$21,10
2833	xor	$13,$11			# sigma0(X[i+1])
2834	sll	$12,$21,13
2835	addu	$23,$13
2836	srl	$11,$21,17
2837	xor	$14,$12
2838	sll	$12,2
2839	xor	$14,$11
2840	srl	$11,$21,19
2841	xor	$14,$12
2842#endif
2843	xor	$14,$11			# sigma1(X[i+14])
2844	addu	$23,$14
2845#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
2846	xor	$14,$30,$31			# 31
2847	rotr	$12,$25,6
2848	addu	$11,$23,$1
2849	rotr	$13,$25,11
2850	and	$14,$25
2851	rotr	$1,$25,25
2852	xor	$12,$13
2853	rotr	$13,$2,2
2854	xor	$14,$31			# Ch(e,f,g)
2855	xor	$12,$1			# Sigma1(e)
2856
2857	rotr	$1,$2,13
2858	addu	$11,$14
2859	lw	$14,124($6)		# K[31]
2860	xor	$1,$13
2861	rotr	$13,$2,22
2862	addu	$11,$12
2863	and	$12,$3,$7
2864	xor	$1,$13			# Sigma0(a)
2865	xor	$13,$3,$7
2866#else
2867	addu	$11,$23,$1			# 31
2868	srl	$1,$25,6
2869	xor	$14,$30,$31
2870	sll	$13,$25,7
2871	and	$14,$25
2872	srl	$12,$25,11
2873	xor	$1,$13
2874	sll	$13,$25,21
2875	xor	$1,$12
2876	srl	$12,$25,25
2877	xor	$1,$13
2878	sll	$13,$25,26
2879	xor	$1,$12
2880	xor	$14,$31			# Ch(e,f,g)
2881	xor	$12,$13,$1			# Sigma1(e)
2882
2883	srl	$1,$2,2
2884	addu	$11,$14
2885	lw	$14,124($6)		# K[31]
2886	sll	$13,$2,10
2887	addu	$11,$12
2888	srl	$12,$2,13
2889	xor	$1,$13
2890	sll	$13,$2,19
2891	xor	$1,$12
2892	srl	$12,$2,22
2893	xor	$1,$13
2894	sll	$13,$2,30
2895	xor	$1,$12
2896	and	$12,$3,$7
2897	xor	$1,$13			# Sigma0(a)
2898	xor	$13,$3,$7
2899#endif
2900	sw	$23,60($29)	# offload to ring buffer
2901	addu	$1,$12
2902	and	$13,$2
2903	addu	$11,$14			# +=K[31]
2904	addu	$1,$13			# +=Maj(a,b,c)
2905	addu	$24,$11
2906	addu	$1,$11
2907	lw	$10,8($29)	# prefetch from ring buffer
2908	and	$14,0xfff
2909	li	$15,2290
2910	.set	noreorder
2911	bne	$14,$15,.L16_xx
2912	dadd $6,16*4		# Ktbl+=16
2913
2914	ld	$23,16*4($29)	# restore pointer to the end of input
2915	lw	$8,0*4($4)
2916	lw	$9,1*4($4)
2917	lw	$10,2*4($4)
2918	dadd $5,16*4
2919	lw	$11,3*4($4)
2920	addu	$1,$8
2921	lw	$12,4*4($4)
2922	addu	$2,$9
2923	lw	$13,5*4($4)
2924	addu	$3,$10
2925	lw	$14,6*4($4)
2926	addu	$7,$11
2927	lw	$15,7*4($4)
2928	addu	$24,$12
2929	sw	$1,0*4($4)
2930	addu	$25,$13
2931	sw	$2,1*4($4)
2932	addu	$30,$14
2933	sw	$3,2*4($4)
2934	addu	$31,$15
2935	sw	$7,3*4($4)
2936	sw	$24,4*4($4)
2937	sw	$25,5*4($4)
2938	sw	$30,6*4($4)
2939	sw	$31,7*4($4)
2940
2941	bne	$5,$23,.Loop
2942	dsub $6,192	# rewind $6
2943
2944	ld	$31,192-1*8($29)
2945	ld	$30,192-2*8($29)
2946	ld	$23,192-3*8($29)
2947	ld	$22,192-4*8($29)
2948	ld	$21,192-5*8($29)
2949	ld	$20,192-6*8($29)
2950	ld	$19,192-7*8($29)
2951	ld	$18,192-8*8($29)
2952	ld	$17,192-9*8($29)
2953	ld	$16,192-10*8($29)
2954	jr	$31
2955	dadd $29,192
2956.end	sha256_block_data_order
2957
2958.rdata
2959.align	5
2960K256:
2961	.word	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
2962	.word	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
2963	.word	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
2964	.word	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
2965	.word	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
2966	.word	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
2967	.word	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
2968	.word	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
2969	.word	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
2970	.word	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
2971	.word	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
2972	.word	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
2973	.word	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
2974	.word	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
2975	.word	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
2976	.word	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
2977.asciiz	"SHA256 for MIPS, CRYPTOGAMS by <appro@openssl.org>"
2978.align	5
2979
2980