xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/mips.S (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1#if !(defined (__mips_isa_rev) && (__mips_isa_rev >= 6))
2.set     mips2
3#endif
4#include "mips_arch.h"
5
6#if defined(_MIPS_ARCH_MIPS64R6)
7# define ddivu(rs,rt)
8# define mfqt(rd,rs,rt)	ddivu	rd,rs,rt
9# define mfrm(rd,rs,rt)	dmodu	rd,rs,rt
10#elif defined(_MIPS_ARCH_MIPS32R6)
11# define divu(rs,rt)
12# define mfqt(rd,rs,rt)	divu	rd,rs,rt
13# define mfrm(rd,rs,rt)	modu	rd,rs,rt
14#else
15# define divu(rs,rt)	divu	$0,rs,rt
16# define mfqt(rd,rs,rt)	mflo	rd
17# define mfrm(rd,rs,rt)	mfhi	rd
18#endif
19
20.rdata
21.asciiz	"mips3.s, Version 1.2"
22.asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
23
24.text
25.set	noat
26
27.align	5
28.globl	bn_mul_add_words
29.ent	bn_mul_add_words
30bn_mul_add_words:
31	.set	noreorder
32	bgtz	$6,bn_mul_add_words_internal
33	move	$2,$0
34	jr	$31
35	move	$4,$2
36.end	bn_mul_add_words
37
38.align	5
39.ent	bn_mul_add_words_internal
40bn_mul_add_words_internal:
41	.set	reorder
42	li	$3,-4
43	and	$8,$6,$3
44	beqz	$8,.L_bn_mul_add_words_tail
45
46.L_bn_mul_add_words_loop:
47	lw	$12,0($5)
48	multu	($12,$7)
49	lw	$13,0($4)
50	lw	$14,4($5)
51	lw	$15,4($4)
52	lw	$8,2*4($5)
53	lw	$9,2*4($4)
54	addu	$13,$2
55	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
56				# values", but it seems to work fine
57				# even on 64-bit registers.
58	mflo	($1,$12,$7)
59	mfhi	($12,$12,$7)
60	addu	$13,$1
61	addu	$2,$12
62	 multu	($14,$7)
63	sltu	$1,$13,$1
64	sw	$13,0($4)
65	addu	$2,$1
66
67	lw	$10,3*4($5)
68	lw	$11,3*4($4)
69	addu	$15,$2
70	sltu	$2,$15,$2
71	mflo	($1,$14,$7)
72	mfhi	($14,$14,$7)
73	addu	$15,$1
74	addu	$2,$14
75	 multu	($8,$7)
76	sltu	$1,$15,$1
77	sw	$15,4($4)
78	addu	$2,$1
79
80	subu	$6,4
81	addu $4,4*4
82	addu $5,4*4
83	addu	$9,$2
84	sltu	$2,$9,$2
85	mflo	($1,$8,$7)
86	mfhi	($8,$8,$7)
87	addu	$9,$1
88	addu	$2,$8
89	 multu	($10,$7)
90	sltu	$1,$9,$1
91	sw	$9,-2*4($4)
92	addu	$2,$1
93
94
95	and	$8,$6,$3
96	addu	$11,$2
97	sltu	$2,$11,$2
98	mflo	($1,$10,$7)
99	mfhi	($10,$10,$7)
100	addu	$11,$1
101	addu	$2,$10
102	sltu	$1,$11,$1
103	sw	$11,-4($4)
104	.set	noreorder
105	bgtz	$8,.L_bn_mul_add_words_loop
106	addu	$2,$1
107
108	beqz	$6,.L_bn_mul_add_words_return
109	nop
110
111.L_bn_mul_add_words_tail:
112	.set	reorder
113	lw	$12,0($5)
114	multu	($12,$7)
115	lw	$13,0($4)
116	subu	$6,1
117	addu	$13,$2
118	sltu	$2,$13,$2
119	mflo	($1,$12,$7)
120	mfhi	($12,$12,$7)
121	addu	$13,$1
122	addu	$2,$12
123	sltu	$1,$13,$1
124	sw	$13,0($4)
125	addu	$2,$1
126	beqz	$6,.L_bn_mul_add_words_return
127
128	lw	$12,4($5)
129	multu	($12,$7)
130	lw	$13,4($4)
131	subu	$6,1
132	addu	$13,$2
133	sltu	$2,$13,$2
134	mflo	($1,$12,$7)
135	mfhi	($12,$12,$7)
136	addu	$13,$1
137	addu	$2,$12
138	sltu	$1,$13,$1
139	sw	$13,4($4)
140	addu	$2,$1
141	beqz	$6,.L_bn_mul_add_words_return
142
143	lw	$12,2*4($5)
144	multu	($12,$7)
145	lw	$13,2*4($4)
146	addu	$13,$2
147	sltu	$2,$13,$2
148	mflo	($1,$12,$7)
149	mfhi	($12,$12,$7)
150	addu	$13,$1
151	addu	$2,$12
152	sltu	$1,$13,$1
153	sw	$13,2*4($4)
154	addu	$2,$1
155
156.L_bn_mul_add_words_return:
157	.set	noreorder
158	jr	$31
159	move	$4,$2
160.end	bn_mul_add_words_internal
161
162.align	5
163.globl	bn_mul_words
164.ent	bn_mul_words
165bn_mul_words:
166	.set	noreorder
167	bgtz	$6,bn_mul_words_internal
168	move	$2,$0
169	jr	$31
170	move	$4,$2
171.end	bn_mul_words
172
173.align	5
174.ent	bn_mul_words_internal
175bn_mul_words_internal:
176	.set	reorder
177	li	$3,-4
178	and	$8,$6,$3
179	beqz	$8,.L_bn_mul_words_tail
180
181.L_bn_mul_words_loop:
182	lw	$12,0($5)
183	multu	($12,$7)
184	lw	$14,4($5)
185	lw	$8,2*4($5)
186	lw	$10,3*4($5)
187	mflo	($1,$12,$7)
188	mfhi	($12,$12,$7)
189	addu	$2,$1
190	sltu	$13,$2,$1
191	 multu	($14,$7)
192	sw	$2,0($4)
193	addu	$2,$13,$12
194
195	subu	$6,4
196	addu $4,4*4
197	addu $5,4*4
198	mflo	($1,$14,$7)
199	mfhi	($14,$14,$7)
200	addu	$2,$1
201	sltu	$15,$2,$1
202	 multu	($8,$7)
203	sw	$2,-3*4($4)
204	addu	$2,$15,$14
205
206	mflo	($1,$8,$7)
207	mfhi	($8,$8,$7)
208	addu	$2,$1
209	sltu	$9,$2,$1
210	 multu	($10,$7)
211	sw	$2,-2*4($4)
212	addu	$2,$9,$8
213
214	and	$8,$6,$3
215	mflo	($1,$10,$7)
216	mfhi	($10,$10,$7)
217	addu	$2,$1
218	sltu	$11,$2,$1
219	sw	$2,-4($4)
220	.set	noreorder
221	bgtz	$8,.L_bn_mul_words_loop
222	addu	$2,$11,$10
223
224	beqz	$6,.L_bn_mul_words_return
225	nop
226
227.L_bn_mul_words_tail:
228	.set	reorder
229	lw	$12,0($5)
230	multu	($12,$7)
231	subu	$6,1
232	mflo	($1,$12,$7)
233	mfhi	($12,$12,$7)
234	addu	$2,$1
235	sltu	$13,$2,$1
236	sw	$2,0($4)
237	addu	$2,$13,$12
238	beqz	$6,.L_bn_mul_words_return
239
240	lw	$12,4($5)
241	multu	($12,$7)
242	subu	$6,1
243	mflo	($1,$12,$7)
244	mfhi	($12,$12,$7)
245	addu	$2,$1
246	sltu	$13,$2,$1
247	sw	$2,4($4)
248	addu	$2,$13,$12
249	beqz	$6,.L_bn_mul_words_return
250
251	lw	$12,2*4($5)
252	multu	($12,$7)
253	mflo	($1,$12,$7)
254	mfhi	($12,$12,$7)
255	addu	$2,$1
256	sltu	$13,$2,$1
257	sw	$2,2*4($4)
258	addu	$2,$13,$12
259
260.L_bn_mul_words_return:
261	.set	noreorder
262	jr	$31
263	move	$4,$2
264.end	bn_mul_words_internal
265
266.align	5
267.globl	bn_sqr_words
268.ent	bn_sqr_words
269bn_sqr_words:
270	.set	noreorder
271	bgtz	$6,bn_sqr_words_internal
272	move	$2,$0
273	jr	$31
274	move	$4,$2
275.end	bn_sqr_words
276
277.align	5
278.ent	bn_sqr_words_internal
279bn_sqr_words_internal:
280	.set	reorder
281	li	$3,-4
282	and	$8,$6,$3
283	beqz	$8,.L_bn_sqr_words_tail
284
285.L_bn_sqr_words_loop:
286	lw	$12,0($5)
287	multu	($12,$12)
288	lw	$14,4($5)
289	lw	$8,2*4($5)
290	lw	$10,3*4($5)
291	mflo	($13,$12,$12)
292	mfhi	($12,$12,$12)
293	sw	$13,0($4)
294	sw	$12,4($4)
295
296	multu	($14,$14)
297	subu	$6,4
298	addu $4,8*4
299	addu $5,4*4
300	mflo	($15,$14,$14)
301	mfhi	($14,$14,$14)
302	sw	$15,-6*4($4)
303	sw	$14,-5*4($4)
304
305	multu	($8,$8)
306	mflo	($9,$8,$8)
307	mfhi	($8,$8,$8)
308	sw	$9,-4*4($4)
309	sw	$8,-3*4($4)
310
311
312	multu	($10,$10)
313	and	$8,$6,$3
314	mflo	($11,$10,$10)
315	mfhi	($10,$10,$10)
316	sw	$11,-2*4($4)
317
318	.set	noreorder
319	sw	$10,-4($4)
320	bgtz	$8,.L_bn_sqr_words_loop
321	nop
322
323	beqz	$6,.L_bn_sqr_words_return
324	nop
325
326.L_bn_sqr_words_tail:
327	.set	reorder
328	lw	$12,0($5)
329	multu	($12,$12)
330	subu	$6,1
331	mflo	($13,$12,$12)
332	mfhi	($12,$12,$12)
333	sw	$13,0($4)
334	sw	$12,4($4)
335	beqz	$6,.L_bn_sqr_words_return
336
337	lw	$12,4($5)
338	multu	($12,$12)
339	subu	$6,1
340	mflo	($13,$12,$12)
341	mfhi	($12,$12,$12)
342	sw	$13,2*4($4)
343	sw	$12,3*4($4)
344	beqz	$6,.L_bn_sqr_words_return
345
346	lw	$12,2*4($5)
347	multu	($12,$12)
348	mflo	($13,$12,$12)
349	mfhi	($12,$12,$12)
350	sw	$13,4*4($4)
351	sw	$12,5*4($4)
352
353.L_bn_sqr_words_return:
354	.set	noreorder
355	jr	$31
356	move	$4,$2
357
358.end	bn_sqr_words_internal
359
360.align	5
361.globl	bn_add_words
362.ent	bn_add_words
363bn_add_words:
364	.set	noreorder
365	bgtz	$7,bn_add_words_internal
366	move	$2,$0
367	jr	$31
368	move	$4,$2
369.end	bn_add_words
370
371.align	5
372.ent	bn_add_words_internal
373bn_add_words_internal:
374	.set	reorder
375	li	$3,-4
376	and	$1,$7,$3
377	beqz	$1,.L_bn_add_words_tail
378
379.L_bn_add_words_loop:
380	lw	$12,0($5)
381	lw	$8,0($6)
382	subu	$7,4
383	lw	$13,4($5)
384	and	$1,$7,$3
385	lw	$14,2*4($5)
386	addu $6,4*4
387	lw	$15,3*4($5)
388	addu $4,4*4
389	lw	$9,-3*4($6)
390	addu $5,4*4
391	lw	$10,-2*4($6)
392	lw	$11,-4($6)
393	addu	$8,$12
394	sltu	$24,$8,$12
395	addu	$12,$8,$2
396	sltu	$2,$12,$8
397	sw	$12,-4*4($4)
398	addu	$2,$24
399
400	addu	$9,$13
401	sltu	$25,$9,$13
402	addu	$13,$9,$2
403	sltu	$2,$13,$9
404	sw	$13,-3*4($4)
405	addu	$2,$25
406
407	addu	$10,$14
408	sltu	$24,$10,$14
409	addu	$14,$10,$2
410	sltu	$2,$14,$10
411	sw	$14,-2*4($4)
412	addu	$2,$24
413
414	addu	$11,$15
415	sltu	$25,$11,$15
416	addu	$15,$11,$2
417	sltu	$2,$15,$11
418	sw	$15,-4($4)
419
420	.set	noreorder
421	bgtz	$1,.L_bn_add_words_loop
422	addu	$2,$25
423
424	beqz	$7,.L_bn_add_words_return
425	nop
426
427.L_bn_add_words_tail:
428	.set	reorder
429	lw	$12,0($5)
430	lw	$8,0($6)
431	addu	$8,$12
432	subu	$7,1
433	sltu	$24,$8,$12
434	addu	$12,$8,$2
435	sltu	$2,$12,$8
436	sw	$12,0($4)
437	addu	$2,$24
438	beqz	$7,.L_bn_add_words_return
439
440	lw	$13,4($5)
441	lw	$9,4($6)
442	addu	$9,$13
443	subu	$7,1
444	sltu	$25,$9,$13
445	addu	$13,$9,$2
446	sltu	$2,$13,$9
447	sw	$13,4($4)
448	addu	$2,$25
449	beqz	$7,.L_bn_add_words_return
450
451	lw	$14,2*4($5)
452	lw	$10,2*4($6)
453	addu	$10,$14
454	sltu	$24,$10,$14
455	addu	$14,$10,$2
456	sltu	$2,$14,$10
457	sw	$14,2*4($4)
458	addu	$2,$24
459
460.L_bn_add_words_return:
461	.set	noreorder
462	jr	$31
463	move	$4,$2
464
465.end	bn_add_words_internal
466
467.align	5
468.globl	bn_sub_words
469.ent	bn_sub_words
470bn_sub_words:
471	.set	noreorder
472	bgtz	$7,bn_sub_words_internal
473	move	$2,$0
474	jr	$31
475	move	$4,$0
476.end	bn_sub_words
477
478.align	5
479.ent	bn_sub_words_internal
480bn_sub_words_internal:
481	.set	reorder
482	li	$3,-4
483	and	$1,$7,$3
484	beqz	$1,.L_bn_sub_words_tail
485
486.L_bn_sub_words_loop:
487	lw	$12,0($5)
488	lw	$8,0($6)
489	subu	$7,4
490	lw	$13,4($5)
491	and	$1,$7,$3
492	lw	$14,2*4($5)
493	addu $6,4*4
494	lw	$15,3*4($5)
495	addu $4,4*4
496	lw	$9,-3*4($6)
497	addu $5,4*4
498	lw	$10,-2*4($6)
499	lw	$11,-4($6)
500	sltu	$24,$12,$8
501	subu	$8,$12,$8
502	subu	$12,$8,$2
503	sgtu	$2,$12,$8
504	sw	$12,-4*4($4)
505	addu	$2,$24
506
507	sltu	$25,$13,$9
508	subu	$9,$13,$9
509	subu	$13,$9,$2
510	sgtu	$2,$13,$9
511	sw	$13,-3*4($4)
512	addu	$2,$25
513
514
515	sltu	$24,$14,$10
516	subu	$10,$14,$10
517	subu	$14,$10,$2
518	sgtu	$2,$14,$10
519	sw	$14,-2*4($4)
520	addu	$2,$24
521
522	sltu	$25,$15,$11
523	subu	$11,$15,$11
524	subu	$15,$11,$2
525	sgtu	$2,$15,$11
526	sw	$15,-4($4)
527
528	.set	noreorder
529	bgtz	$1,.L_bn_sub_words_loop
530	addu	$2,$25
531
532	beqz	$7,.L_bn_sub_words_return
533	nop
534
535.L_bn_sub_words_tail:
536	.set	reorder
537	lw	$12,0($5)
538	lw	$8,0($6)
539	subu	$7,1
540	sltu	$24,$12,$8
541	subu	$8,$12,$8
542	subu	$12,$8,$2
543	sgtu	$2,$12,$8
544	sw	$12,0($4)
545	addu	$2,$24
546	beqz	$7,.L_bn_sub_words_return
547
548	lw	$13,4($5)
549	subu	$7,1
550	lw	$9,4($6)
551	sltu	$25,$13,$9
552	subu	$9,$13,$9
553	subu	$13,$9,$2
554	sgtu	$2,$13,$9
555	sw	$13,4($4)
556	addu	$2,$25
557	beqz	$7,.L_bn_sub_words_return
558
559	lw	$14,2*4($5)
560	lw	$10,2*4($6)
561	sltu	$24,$14,$10
562	subu	$10,$14,$10
563	subu	$14,$10,$2
564	sgtu	$2,$14,$10
565	sw	$14,2*4($4)
566	addu	$2,$24
567
568.L_bn_sub_words_return:
569	.set	noreorder
570	jr	$31
571	move	$4,$2
572.end	bn_sub_words_internal
573
574#if 0
575/*
576 * The bn_div_3_words entry point is re-used for constant-time interface.
577 * Implementation is retained as historical reference.
578 */
579.align 5
580.globl	bn_div_3_words
581.ent	bn_div_3_words
582bn_div_3_words:
583	.set	noreorder
584	move	$7,$4		# we know that bn_div_words does not
585				# touch $7, $10, $11 and preserves $6
586				# so that we can save two arguments
587				# and return address in registers
588				# instead of stack:-)
589
590	lw	$4,($7)
591	move	$10,$5
592	lw	$5,-4($7)
593	bne	$4,$6,bn_div_3_words_internal
594	 nop
595	li	$2,-1
596	jr	$31
597	move	$4,$2
598.end	bn_div_3_words
599
600.align	5
601.ent	bn_div_3_words_internal
602bn_div_3_words_internal:
603	.set	reorder
604	move	$11,$31
605	bal	bn_div_words_internal
606	move	$31,$11
607	multu	($10,$2)
608	lw	$14,-2*4($7)
609	move	$8,$0
610	mfhi	($13,$10,$2)
611	mflo	($12,$10,$2)
612	sltu	$24,$13,$5
613.L_bn_div_3_words_inner_loop:
614	bnez	$24,.L_bn_div_3_words_inner_loop_done
615	sgeu	$1,$14,$12
616	seq	$25,$13,$5
617	and	$1,$25
618	sltu	$15,$12,$10
619	addu	$5,$6
620	subu	$13,$15
621	subu	$12,$10
622	sltu	$24,$13,$5
623	sltu	$8,$5,$6
624	or	$24,$8
625	.set	noreorder
626	beqz	$1,.L_bn_div_3_words_inner_loop
627	subu	$2,1
628	addu	$2,1
629	.set	reorder
630.L_bn_div_3_words_inner_loop_done:
631	.set	noreorder
632	jr	$31
633	move	$4,$2
634.end	bn_div_3_words_internal
635#endif
636
637.align	5
638.globl	bn_div_words
639.ent	bn_div_words
640bn_div_words:
641	.set	noreorder
642	bnez	$6,bn_div_words_internal
643	li	$2,-1		# I would rather signal div-by-zero
644				# which can be done with 'break 7'
645	jr	$31
646	move	$4,$2
647.end	bn_div_words
648
649.align	5
650.ent	bn_div_words_internal
651bn_div_words_internal:
652	move	$3,$0
653	bltz	$6,.L_bn_div_words_body
654	move	$25,$3
655	sll	$6,1
656	bgtz	$6,.-4
657	addu	$25,1
658
659	.set	reorder
660	negu	$13,$25
661	li	$14,-1
662	sll	$14,$13
663	and	$14,$4
664	srl	$1,$5,$13
665	.set	noreorder
666	beqz	$14,.+12
667	nop
668	break	6		# signal overflow
669	.set	reorder
670	sll	$4,$25
671	sll	$5,$25
672	or	$4,$1
673.L_bn_div_words_body:
674	srl	$3,$6,4*4	# bits
675	sgeu	$1,$4,$6
676	.set	noreorder
677	beqz	$1,.+12
678	nop
679	subu	$4,$6
680	.set	reorder
681
682	li	$8,-1
683	srl	$9,$4,4*4	# bits
684	srl	$8,4*4	# q=0xffffffff
685	beq	$3,$9,.L_bn_div_words_skip_div1
686	divu	($4,$3)
687	mfqt	($8,$4,$3)
688.L_bn_div_words_skip_div1:
689	multu	($6,$8)
690	sll	$15,$4,4*4	# bits
691	srl	$1,$5,4*4	# bits
692	or	$15,$1
693	mflo	($12,$6,$8)
694	mfhi	($13,$6,$8)
695.L_bn_div_words_inner_loop1:
696	sltu	$14,$15,$12
697	seq	$24,$9,$13
698	sltu	$1,$9,$13
699	and	$14,$24
700	sltu	$2,$12,$6
701	or	$1,$14
702	.set	noreorder
703	beqz	$1,.L_bn_div_words_inner_loop1_done
704	subu	$13,$2
705	subu	$12,$6
706	b	.L_bn_div_words_inner_loop1
707	subu	$8,1
708	.set	reorder
709.L_bn_div_words_inner_loop1_done:
710
711	sll	$5,4*4	# bits
712	subu	$4,$15,$12
713	sll	$2,$8,4*4	# bits
714
715	li	$8,-1
716	srl	$9,$4,4*4	# bits
717	srl	$8,4*4	# q=0xffffffff
718	beq	$3,$9,.L_bn_div_words_skip_div2
719	divu	($4,$3)
720	mfqt	($8,$4,$3)
721.L_bn_div_words_skip_div2:
722	multu	($6,$8)
723	sll	$15,$4,4*4	# bits
724	srl	$1,$5,4*4	# bits
725	or	$15,$1
726	mflo	($12,$6,$8)
727	mfhi	($13,$6,$8)
728.L_bn_div_words_inner_loop2:
729	sltu	$14,$15,$12
730	seq	$24,$9,$13
731	sltu	$1,$9,$13
732	and	$14,$24
733	sltu	$3,$12,$6
734	or	$1,$14
735	.set	noreorder
736	beqz	$1,.L_bn_div_words_inner_loop2_done
737	subu	$13,$3
738	subu	$12,$6
739	b	.L_bn_div_words_inner_loop2
740	subu	$8,1
741	.set	reorder
742.L_bn_div_words_inner_loop2_done:
743
744	subu	$4,$15,$12
745	or	$2,$8
746	srl	$3,$4,$25	# $3 contains remainder if anybody wants it
747	srl	$6,$25		# restore $6
748
749	.set	noreorder
750	move	$5,$3
751	jr	$31
752	move	$4,$2
753.end	bn_div_words_internal
754
755.align	5
756.globl	bn_mul_comba8
757.ent	bn_mul_comba8
758bn_mul_comba8:
759	.set	noreorder
760	.frame	$29,6*4,$31
761	.mask	0x003f0000,-4
762	subu $29,6*4
763	sw	$21,5*4($29)
764	sw	$20,4*4($29)
765	sw	$19,3*4($29)
766	sw	$18,2*4($29)
767	sw	$17,1*4($29)
768	sw	$16,0*4($29)
769
770	.set	reorder
771	lw	$12,0($5)	# If compiled with -mips3 option on
772				# R5000 box assembler barks on this
773				# 1ine with "should not have mult/div
774				# as last instruction in bb (R10K
775				# bug)" warning. If anybody out there
776				# has a clue about how to circumvent
777				# this do send me a note.
778				#		<appro@fy.chalmers.se>
779
780	lw	$8,0($6)
781	lw	$13,4($5)
782	lw	$14,2*4($5)
783	multu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
784	lw	$15,3*4($5)
785	lw	$9,4($6)
786	lw	$10,2*4($6)
787	lw	$11,3*4($6)
788	mflo	($2,$12,$8)
789	mfhi	($3,$12,$8)
790
791	lw	$16,4*4($5)
792	lw	$18,5*4($5)
793	multu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
794	lw	$20,6*4($5)
795	lw	$5,7*4($5)
796	lw	$17,4*4($6)
797	lw	$19,5*4($6)
798	mflo	($24,$12,$9)
799	mfhi	($25,$12,$9)
800	addu	$3,$24
801	sltu	$1,$3,$24
802	multu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
803	addu	$7,$25,$1
804	lw	$21,6*4($6)
805	lw	$6,7*4($6)
806	sw	$2,0($4)	# r[0]=c1;
807	mflo	($24,$13,$8)
808	mfhi	($25,$13,$8)
809	addu	$3,$24
810	sltu	$1,$3,$24
811	 multu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
812	addu	$25,$1
813	addu	$7,$25
814	sltu	$2,$7,$25
815	sw	$3,4($4)	# r[1]=c2;
816
817	mflo	($24,$14,$8)
818	mfhi	($25,$14,$8)
819	addu	$7,$24
820	sltu	$1,$7,$24
821	multu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
822	addu	$25,$1
823	addu	$2,$25
824	mflo	($24,$13,$9)
825	mfhi	($25,$13,$9)
826	addu	$7,$24
827	sltu	$1,$7,$24
828	multu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
829	addu	$25,$1
830	addu	$2,$25
831	sltu	$3,$2,$25
832	mflo	($24,$12,$10)
833	mfhi	($25,$12,$10)
834	addu	$7,$24
835	sltu	$1,$7,$24
836	 multu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
837	addu	$25,$1
838	addu	$2,$25
839	sltu	$1,$2,$25
840	addu	$3,$1
841	sw	$7,2*4($4)	# r[2]=c3;
842
843	mflo	($24,$12,$11)
844	mfhi	($25,$12,$11)
845	addu	$2,$24
846	sltu	$1,$2,$24
847	multu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
848	addu	$25,$1
849	addu	$3,$25
850	sltu	$7,$3,$25
851	mflo	($24,$13,$10)
852	mfhi	($25,$13,$10)
853	addu	$2,$24
854	sltu	$1,$2,$24
855	multu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
856	addu	$25,$1
857	addu	$3,$25
858	sltu	$1,$3,$25
859	addu	$7,$1
860	mflo	($24,$14,$9)
861	mfhi	($25,$14,$9)
862	addu	$2,$24
863	sltu	$1,$2,$24
864	multu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
865	addu	$25,$1
866	addu	$3,$25
867	sltu	$1,$3,$25
868	addu	$7,$1
869	mflo	($24,$15,$8)
870	mfhi	($25,$15,$8)
871	addu	$2,$24
872	sltu	$1,$2,$24
873	 multu	($16,$8)		# mul_add_c(a[4],b[0],c2,c3,c1);
874	addu	$25,$1
875	addu	$3,$25
876	sltu	$1,$3,$25
877	addu	$7,$1
878	sw	$2,3*4($4)	# r[3]=c1;
879
880	mflo	($24,$16,$8)
881	mfhi	($25,$16,$8)
882	addu	$3,$24
883	sltu	$1,$3,$24
884	multu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
885	addu	$25,$1
886	addu	$7,$25
887	sltu	$2,$7,$25
888	mflo	($24,$15,$9)
889	mfhi	($25,$15,$9)
890	addu	$3,$24
891	sltu	$1,$3,$24
892	multu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
893	addu	$25,$1
894	addu	$7,$25
895	sltu	$1,$7,$25
896	addu	$2,$1
897	mflo	($24,$14,$10)
898	mfhi	($25,$14,$10)
899	addu	$3,$24
900	sltu	$1,$3,$24
901	multu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
902	addu	$25,$1
903	addu	$7,$25
904	sltu	$1,$7,$25
905	addu	$2,$1
906	mflo	($24,$13,$11)
907	mfhi	($25,$13,$11)
908	addu	$3,$24
909	sltu	$1,$3,$24
910	multu	($12,$17)		# mul_add_c(a[0],b[4],c2,c3,c1);
911	addu	$25,$1
912	addu	$7,$25
913	sltu	$1,$7,$25
914	addu	$2,$1
915	mflo	($24,$12,$17)
916	mfhi	($25,$12,$17)
917	addu	$3,$24
918	sltu	$1,$3,$24
919	 multu	($12,$19)		# mul_add_c(a[0],b[5],c3,c1,c2);
920	addu	$25,$1
921	addu	$7,$25
922	sltu	$1,$7,$25
923	addu	$2,$1
924	sw	$3,4*4($4)	# r[4]=c2;
925
926	mflo	($24,$12,$19)
927	mfhi	($25,$12,$19)
928	addu	$7,$24
929	sltu	$1,$7,$24
930	multu	($13,$17)		# mul_add_c(a[1],b[4],c3,c1,c2);
931	addu	$25,$1
932	addu	$2,$25
933	sltu	$3,$2,$25
934	mflo	($24,$13,$17)
935	mfhi	($25,$13,$17)
936	addu	$7,$24
937	sltu	$1,$7,$24
938	multu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
939	addu	$25,$1
940	addu	$2,$25
941	sltu	$1,$2,$25
942	addu	$3,$1
943	mflo	($24,$14,$11)
944	mfhi	($25,$14,$11)
945	addu	$7,$24
946	sltu	$1,$7,$24
947	multu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
948	addu	$25,$1
949	addu	$2,$25
950	sltu	$1,$2,$25
951	addu	$3,$1
952	mflo	($24,$15,$10)
953	mfhi	($25,$15,$10)
954	addu	$7,$24
955	sltu	$1,$7,$24
956	multu	($16,$9)		# mul_add_c(a[4],b[1],c3,c1,c2);
957	addu	$25,$1
958	addu	$2,$25
959	sltu	$1,$2,$25
960	addu	$3,$1
961	mflo	($24,$16,$9)
962	mfhi	($25,$16,$9)
963	addu	$7,$24
964	sltu	$1,$7,$24
965	multu	($18,$8)		# mul_add_c(a[5],b[0],c3,c1,c2);
966	addu	$25,$1
967	addu	$2,$25
968	sltu	$1,$2,$25
969	addu	$3,$1
970	mflo	($24,$18,$8)
971	mfhi	($25,$18,$8)
972	addu	$7,$24
973	sltu	$1,$7,$24
974	 multu	($20,$8)		# mul_add_c(a[6],b[0],c1,c2,c3);
975	addu	$25,$1
976	addu	$2,$25
977	sltu	$1,$2,$25
978	addu	$3,$1
979	sw	$7,5*4($4)	# r[5]=c3;
980
981	mflo	($24,$20,$8)
982	mfhi	($25,$20,$8)
983	addu	$2,$24
984	sltu	$1,$2,$24
985	multu	($18,$9)		# mul_add_c(a[5],b[1],c1,c2,c3);
986	addu	$25,$1
987	addu	$3,$25
988	sltu	$7,$3,$25
989	mflo	($24,$18,$9)
990	mfhi	($25,$18,$9)
991	addu	$2,$24
992	sltu	$1,$2,$24
993	multu	($16,$10)		# mul_add_c(a[4],b[2],c1,c2,c3);
994	addu	$25,$1
995	addu	$3,$25
996	sltu	$1,$3,$25
997	addu	$7,$1
998	mflo	($24,$16,$10)
999	mfhi	($25,$16,$10)
1000	addu	$2,$24
1001	sltu	$1,$2,$24
1002	multu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
1003	addu	$25,$1
1004	addu	$3,$25
1005	sltu	$1,$3,$25
1006	addu	$7,$1
1007	mflo	($24,$15,$11)
1008	mfhi	($25,$15,$11)
1009	addu	$2,$24
1010	sltu	$1,$2,$24
1011	multu	($14,$17)		# mul_add_c(a[2],b[4],c1,c2,c3);
1012	addu	$25,$1
1013	addu	$3,$25
1014	sltu	$1,$3,$25
1015	addu	$7,$1
1016	mflo	($24,$14,$17)
1017	mfhi	($25,$14,$17)
1018	addu	$2,$24
1019	sltu	$1,$2,$24
1020	multu	($13,$19)		# mul_add_c(a[1],b[5],c1,c2,c3);
1021	addu	$25,$1
1022	addu	$3,$25
1023	sltu	$1,$3,$25
1024	addu	$7,$1
1025	mflo	($24,$13,$19)
1026	mfhi	($25,$13,$19)
1027	addu	$2,$24
1028	sltu	$1,$2,$24
1029	multu	($12,$21)		# mul_add_c(a[0],b[6],c1,c2,c3);
1030	addu	$25,$1
1031	addu	$3,$25
1032	sltu	$1,$3,$25
1033	addu	$7,$1
1034	mflo	($24,$12,$21)
1035	mfhi	($25,$12,$21)
1036	addu	$2,$24
1037	sltu	$1,$2,$24
1038	 multu	($12,$6)		# mul_add_c(a[0],b[7],c2,c3,c1);
1039	addu	$25,$1
1040	addu	$3,$25
1041	sltu	$1,$3,$25
1042	addu	$7,$1
1043	sw	$2,6*4($4)	# r[6]=c1;
1044
1045	mflo	($24,$12,$6)
1046	mfhi	($25,$12,$6)
1047	addu	$3,$24
1048	sltu	$1,$3,$24
1049	multu	($13,$21)		# mul_add_c(a[1],b[6],c2,c3,c1);
1050	addu	$25,$1
1051	addu	$7,$25
1052	sltu	$2,$7,$25
1053	mflo	($24,$13,$21)
1054	mfhi	($25,$13,$21)
1055	addu	$3,$24
1056	sltu	$1,$3,$24
1057	multu	($14,$19)		# mul_add_c(a[2],b[5],c2,c3,c1);
1058	addu	$25,$1
1059	addu	$7,$25
1060	sltu	$1,$7,$25
1061	addu	$2,$1
1062	mflo	($24,$14,$19)
1063	mfhi	($25,$14,$19)
1064	addu	$3,$24
1065	sltu	$1,$3,$24
1066	multu	($15,$17)		# mul_add_c(a[3],b[4],c2,c3,c1);
1067	addu	$25,$1
1068	addu	$7,$25
1069	sltu	$1,$7,$25
1070	addu	$2,$1
1071	mflo	($24,$15,$17)
1072	mfhi	($25,$15,$17)
1073	addu	$3,$24
1074	sltu	$1,$3,$24
1075	multu	($16,$11)		# mul_add_c(a[4],b[3],c2,c3,c1);
1076	addu	$25,$1
1077	addu	$7,$25
1078	sltu	$1,$7,$25
1079	addu	$2,$1
1080	mflo	($24,$16,$11)
1081	mfhi	($25,$16,$11)
1082	addu	$3,$24
1083	sltu	$1,$3,$24
1084	multu	($18,$10)		# mul_add_c(a[5],b[2],c2,c3,c1);
1085	addu	$25,$1
1086	addu	$7,$25
1087	sltu	$1,$7,$25
1088	addu	$2,$1
1089	mflo	($24,$18,$10)
1090	mfhi	($25,$18,$10)
1091	addu	$3,$24
1092	sltu	$1,$3,$24
1093	multu	($20,$9)		# mul_add_c(a[6],b[1],c2,c3,c1);
1094	addu	$25,$1
1095	addu	$7,$25
1096	sltu	$1,$7,$25
1097	addu	$2,$1
1098	mflo	($24,$20,$9)
1099	mfhi	($25,$20,$9)
1100	addu	$3,$24
1101	sltu	$1,$3,$24
1102	multu	($5,$8)		# mul_add_c(a[7],b[0],c2,c3,c1);
1103	addu	$25,$1
1104	addu	$7,$25
1105	sltu	$1,$7,$25
1106	addu	$2,$1
1107	mflo	($24,$5,$8)
1108	mfhi	($25,$5,$8)
1109	addu	$3,$24
1110	sltu	$1,$3,$24
1111	 multu	($5,$9)		# mul_add_c(a[7],b[1],c3,c1,c2);
1112	addu	$25,$1
1113	addu	$7,$25
1114	sltu	$1,$7,$25
1115	addu	$2,$1
1116	sw	$3,7*4($4)	# r[7]=c2;
1117
1118	mflo	($24,$5,$9)
1119	mfhi	($25,$5,$9)
1120	addu	$7,$24
1121	sltu	$1,$7,$24
1122	multu	($20,$10)		# mul_add_c(a[6],b[2],c3,c1,c2);
1123	addu	$25,$1
1124	addu	$2,$25
1125	sltu	$3,$2,$25
1126	mflo	($24,$20,$10)
1127	mfhi	($25,$20,$10)
1128	addu	$7,$24
1129	sltu	$1,$7,$24
1130	multu	($18,$11)		# mul_add_c(a[5],b[3],c3,c1,c2);
1131	addu	$25,$1
1132	addu	$2,$25
1133	sltu	$1,$2,$25
1134	addu	$3,$1
1135	mflo	($24,$18,$11)
1136	mfhi	($25,$18,$11)
1137	addu	$7,$24
1138	sltu	$1,$7,$24
1139	multu	($16,$17)		# mul_add_c(a[4],b[4],c3,c1,c2);
1140	addu	$25,$1
1141	addu	$2,$25
1142	sltu	$1,$2,$25
1143	addu	$3,$1
1144	mflo	($24,$16,$17)
1145	mfhi	($25,$16,$17)
1146	addu	$7,$24
1147	sltu	$1,$7,$24
1148	multu	($15,$19)		# mul_add_c(a[3],b[5],c3,c1,c2);
1149	addu	$25,$1
1150	addu	$2,$25
1151	sltu	$1,$2,$25
1152	addu	$3,$1
1153	mflo	($24,$15,$19)
1154	mfhi	($25,$15,$19)
1155	addu	$7,$24
1156	sltu	$1,$7,$24
1157	multu	($14,$21)		# mul_add_c(a[2],b[6],c3,c1,c2);
1158	addu	$25,$1
1159	addu	$2,$25
1160	sltu	$1,$2,$25
1161	addu	$3,$1
1162	mflo	($24,$14,$21)
1163	mfhi	($25,$14,$21)
1164	addu	$7,$24
1165	sltu	$1,$7,$24
1166	multu	($13,$6)		# mul_add_c(a[1],b[7],c3,c1,c2);
1167	addu	$25,$1
1168	addu	$2,$25
1169	sltu	$1,$2,$25
1170	addu	$3,$1
1171	mflo	($24,$13,$6)
1172	mfhi	($25,$13,$6)
1173	addu	$7,$24
1174	sltu	$1,$7,$24
1175	 multu	($14,$6)		# mul_add_c(a[2],b[7],c1,c2,c3);
1176	addu	$25,$1
1177	addu	$2,$25
1178	sltu	$1,$2,$25
1179	addu	$3,$1
1180	sw	$7,8*4($4)	# r[8]=c3;
1181
1182	mflo	($24,$14,$6)
1183	mfhi	($25,$14,$6)
1184	addu	$2,$24
1185	sltu	$1,$2,$24
1186	multu	($15,$21)		# mul_add_c(a[3],b[6],c1,c2,c3);
1187	addu	$25,$1
1188	addu	$3,$25
1189	sltu	$7,$3,$25
1190	mflo	($24,$15,$21)
1191	mfhi	($25,$15,$21)
1192	addu	$2,$24
1193	sltu	$1,$2,$24
1194	multu	($16,$19)		# mul_add_c(a[4],b[5],c1,c2,c3);
1195	addu	$25,$1
1196	addu	$3,$25
1197	sltu	$1,$3,$25
1198	addu	$7,$1
1199	mflo	($24,$16,$19)
1200	mfhi	($25,$16,$19)
1201	addu	$2,$24
1202	sltu	$1,$2,$24
1203	multu	($18,$17)		# mul_add_c(a[5],b[4],c1,c2,c3);
1204	addu	$25,$1
1205	addu	$3,$25
1206	sltu	$1,$3,$25
1207	addu	$7,$1
1208	mflo	($24,$18,$17)
1209	mfhi	($25,$18,$17)
1210	addu	$2,$24
1211	sltu	$1,$2,$24
1212	multu	($20,$11)		# mul_add_c(a[6],b[3],c1,c2,c3);
1213	addu	$25,$1
1214	addu	$3,$25
1215	sltu	$1,$3,$25
1216	addu	$7,$1
1217	mflo	($24,$20,$11)
1218	mfhi	($25,$20,$11)
1219	addu	$2,$24
1220	sltu	$1,$2,$24
1221	multu	($5,$10)		# mul_add_c(a[7],b[2],c1,c2,c3);
1222	addu	$25,$1
1223	addu	$3,$25
1224	sltu	$1,$3,$25
1225	addu	$7,$1
1226	mflo	($24,$5,$10)
1227	mfhi	($25,$5,$10)
1228	addu	$2,$24
1229	sltu	$1,$2,$24
1230	 multu	($5,$11)		# mul_add_c(a[7],b[3],c2,c3,c1);
1231	addu	$25,$1
1232	addu	$3,$25
1233	sltu	$1,$3,$25
1234	addu	$7,$1
1235	sw	$2,9*4($4)	# r[9]=c1;
1236
1237	mflo	($24,$5,$11)
1238	mfhi	($25,$5,$11)
1239	addu	$3,$24
1240	sltu	$1,$3,$24
1241	multu	($20,$17)		# mul_add_c(a[6],b[4],c2,c3,c1);
1242	addu	$25,$1
1243	addu	$7,$25
1244	sltu	$2,$7,$25
1245	mflo	($24,$20,$17)
1246	mfhi	($25,$20,$17)
1247	addu	$3,$24
1248	sltu	$1,$3,$24
1249	multu	($18,$19)		# mul_add_c(a[5],b[5],c2,c3,c1);
1250	addu	$25,$1
1251	addu	$7,$25
1252	sltu	$1,$7,$25
1253	addu	$2,$1
1254	mflo	($24,$18,$19)
1255	mfhi	($25,$18,$19)
1256	addu	$3,$24
1257	sltu	$1,$3,$24
1258	multu	($16,$21)		# mul_add_c(a[4],b[6],c2,c3,c1);
1259	addu	$25,$1
1260	addu	$7,$25
1261	sltu	$1,$7,$25
1262	addu	$2,$1
1263	mflo	($24,$16,$21)
1264	mfhi	($25,$16,$21)
1265	addu	$3,$24
1266	sltu	$1,$3,$24
1267	multu	($15,$6)		# mul_add_c(a[3],b[7],c2,c3,c1);
1268	addu	$25,$1
1269	addu	$7,$25
1270	sltu	$1,$7,$25
1271	addu	$2,$1
1272	mflo	($24,$15,$6)
1273	mfhi	($25,$15,$6)
1274	addu	$3,$24
1275	sltu	$1,$3,$24
1276	multu	($16,$6)		# mul_add_c(a[4],b[7],c3,c1,c2);
1277	addu	$25,$1
1278	addu	$7,$25
1279	sltu	$1,$7,$25
1280	addu	$2,$1
1281	sw	$3,10*4($4)	# r[10]=c2;
1282
1283	mflo	($24,$16,$6)
1284	mfhi	($25,$16,$6)
1285	addu	$7,$24
1286	sltu	$1,$7,$24
1287	multu	($18,$21)		# mul_add_c(a[5],b[6],c3,c1,c2);
1288	addu	$25,$1
1289	addu	$2,$25
1290	sltu	$3,$2,$25
1291	mflo	($24,$18,$21)
1292	mfhi	($25,$18,$21)
1293	addu	$7,$24
1294	sltu	$1,$7,$24
1295	multu	($20,$19)		# mul_add_c(a[6],b[5],c3,c1,c2);
1296	addu	$25,$1
1297	addu	$2,$25
1298	sltu	$1,$2,$25
1299	addu	$3,$1
1300	mflo	($24,$20,$19)
1301	mfhi	($25,$20,$19)
1302	addu	$7,$24
1303	sltu	$1,$7,$24
1304	multu	($5,$17)		# mul_add_c(a[7],b[4],c3,c1,c2);
1305	addu	$25,$1
1306	addu	$2,$25
1307	sltu	$1,$2,$25
1308	addu	$3,$1
1309	mflo	($24,$5,$17)
1310	mfhi	($25,$5,$17)
1311	addu	$7,$24
1312	sltu	$1,$7,$24
1313	 multu	($5,$19)		# mul_add_c(a[7],b[5],c1,c2,c3);
1314	addu	$25,$1
1315	addu	$2,$25
1316	sltu	$1,$2,$25
1317	addu	$3,$1
1318	sw	$7,11*4($4)	# r[11]=c3;
1319
1320	mflo	($24,$5,$19)
1321	mfhi	($25,$5,$19)
1322	addu	$2,$24
1323	sltu	$1,$2,$24
1324	multu	($20,$21)		# mul_add_c(a[6],b[6],c1,c2,c3);
1325	addu	$25,$1
1326	addu	$3,$25
1327	sltu	$7,$3,$25
1328	mflo	($24,$20,$21)
1329	mfhi	($25,$20,$21)
1330	addu	$2,$24
1331	sltu	$1,$2,$24
1332	multu	($18,$6)		# mul_add_c(a[5],b[7],c1,c2,c3);
1333	addu	$25,$1
1334	addu	$3,$25
1335	sltu	$1,$3,$25
1336	addu	$7,$1
1337	mflo	($24,$18,$6)
1338	mfhi	($25,$18,$6)
1339	addu	$2,$24
1340	sltu	$1,$2,$24
1341	 multu	($20,$6)		# mul_add_c(a[6],b[7],c2,c3,c1);
1342	addu	$25,$1
1343	addu	$3,$25
1344	sltu	$1,$3,$25
1345	addu	$7,$1
1346	sw	$2,12*4($4)	# r[12]=c1;
1347
1348	mflo	($24,$20,$6)
1349	mfhi	($25,$20,$6)
1350	addu	$3,$24
1351	sltu	$1,$3,$24
1352	multu	($5,$21)		# mul_add_c(a[7],b[6],c2,c3,c1);
1353	addu	$25,$1
1354	addu	$7,$25
1355	sltu	$2,$7,$25
1356	mflo	($24,$5,$21)
1357	mfhi	($25,$5,$21)
1358	addu	$3,$24
1359	sltu	$1,$3,$24
1360	multu	($5,$6)		# mul_add_c(a[7],b[7],c3,c1,c2);
1361	addu	$25,$1
1362	addu	$7,$25
1363	sltu	$1,$7,$25
1364	addu	$2,$1
1365	sw	$3,13*4($4)	# r[13]=c2;
1366
1367	mflo	($24,$5,$6)
1368	mfhi	($25,$5,$6)
1369	addu	$7,$24
1370	sltu	$1,$7,$24
1371	addu	$25,$1
1372	addu	$2,$25
1373	sw	$7,14*4($4)	# r[14]=c3;
1374	sw	$2,15*4($4)	# r[15]=c1;
1375
1376	.set	noreorder
1377	lw	$21,5*4($29)
1378	lw	$20,4*4($29)
1379	lw	$19,3*4($29)
1380	lw	$18,2*4($29)
1381	lw	$17,1*4($29)
1382	lw	$16,0*4($29)
1383	jr	$31
1384	addu $29,6*4
1385.end	bn_mul_comba8
1386
1387.align	5
1388.globl	bn_mul_comba4
1389.ent	bn_mul_comba4
1390bn_mul_comba4:
1391	.set	reorder
1392	lw	$12,0($5)
1393	lw	$8,0($6)
1394	lw	$13,4($5)
1395	lw	$14,2*4($5)
1396	multu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
1397	lw	$15,3*4($5)
1398	lw	$9,4($6)
1399	lw	$10,2*4($6)
1400	lw	$11,3*4($6)
1401	mflo	($2,$12,$8)
1402	mfhi	($3,$12,$8)
1403	sw	$2,0($4)
1404
1405	multu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
1406	mflo	($24,$12,$9)
1407	mfhi	($25,$12,$9)
1408	addu	$3,$24
1409	sltu	$1,$3,$24
1410	multu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
1411	addu	$7,$25,$1
1412	mflo	($24,$13,$8)
1413	mfhi	($25,$13,$8)
1414	addu	$3,$24
1415	sltu	$1,$3,$24
1416	 multu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
1417	addu	$25,$1
1418	addu	$7,$25
1419	sltu	$2,$7,$25
1420	sw	$3,4($4)
1421
1422	mflo	($24,$14,$8)
1423	mfhi	($25,$14,$8)
1424	addu	$7,$24
1425	sltu	$1,$7,$24
1426	multu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
1427	addu	$25,$1
1428	addu	$2,$25
1429	mflo	($24,$13,$9)
1430	mfhi	($25,$13,$9)
1431	addu	$7,$24
1432	sltu	$1,$7,$24
1433	multu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
1434	addu	$25,$1
1435	addu	$2,$25
1436	sltu	$3,$2,$25
1437	mflo	($24,$12,$10)
1438	mfhi	($25,$12,$10)
1439	addu	$7,$24
1440	sltu	$1,$7,$24
1441	 multu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
1442	addu	$25,$1
1443	addu	$2,$25
1444	sltu	$1,$2,$25
1445	addu	$3,$1
1446	sw	$7,2*4($4)
1447
1448	mflo	($24,$12,$11)
1449	mfhi	($25,$12,$11)
1450	addu	$2,$24
1451	sltu	$1,$2,$24
1452	multu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
1453	addu	$25,$1
1454	addu	$3,$25
1455	sltu	$7,$3,$25
1456	mflo	($24,$13,$10)
1457	mfhi	($25,$13,$10)
1458	addu	$2,$24
1459	sltu	$1,$2,$24
1460	multu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
1461	addu	$25,$1
1462	addu	$3,$25
1463	sltu	$1,$3,$25
1464	addu	$7,$1
1465	mflo	($24,$14,$9)
1466	mfhi	($25,$14,$9)
1467	addu	$2,$24
1468	sltu	$1,$2,$24
1469	multu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
1470	addu	$25,$1
1471	addu	$3,$25
1472	sltu	$1,$3,$25
1473	addu	$7,$1
1474	mflo	($24,$15,$8)
1475	mfhi	($25,$15,$8)
1476	addu	$2,$24
1477	sltu	$1,$2,$24
1478	 multu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
1479	addu	$25,$1
1480	addu	$3,$25
1481	sltu	$1,$3,$25
1482	addu	$7,$1
1483	sw	$2,3*4($4)
1484
1485	mflo	($24,$15,$9)
1486	mfhi	($25,$15,$9)
1487	addu	$3,$24
1488	sltu	$1,$3,$24
1489	multu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
1490	addu	$25,$1
1491	addu	$7,$25
1492	sltu	$2,$7,$25
1493	mflo	($24,$14,$10)
1494	mfhi	($25,$14,$10)
1495	addu	$3,$24
1496	sltu	$1,$3,$24
1497	multu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
1498	addu	$25,$1
1499	addu	$7,$25
1500	sltu	$1,$7,$25
1501	addu	$2,$1
1502	mflo	($24,$13,$11)
1503	mfhi	($25,$13,$11)
1504	addu	$3,$24
1505	sltu	$1,$3,$24
1506	 multu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
1507	addu	$25,$1
1508	addu	$7,$25
1509	sltu	$1,$7,$25
1510	addu	$2,$1
1511	sw	$3,4*4($4)
1512
1513	mflo	($24,$14,$11)
1514	mfhi	($25,$14,$11)
1515	addu	$7,$24
1516	sltu	$1,$7,$24
1517	multu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
1518	addu	$25,$1
1519	addu	$2,$25
1520	sltu	$3,$2,$25
1521	mflo	($24,$15,$10)
1522	mfhi	($25,$15,$10)
1523	addu	$7,$24
1524	sltu	$1,$7,$24
1525	 multu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
1526	addu	$25,$1
1527	addu	$2,$25
1528	sltu	$1,$2,$25
1529	addu	$3,$1
1530	sw	$7,5*4($4)
1531
1532	mflo	($24,$15,$11)
1533	mfhi	($25,$15,$11)
1534	addu	$2,$24
1535	sltu	$1,$2,$24
1536	addu	$25,$1
1537	addu	$3,$25
1538	sw	$2,6*4($4)
1539	sw	$3,7*4($4)
1540
1541	.set	noreorder
1542	jr	$31
1543	nop
1544.end	bn_mul_comba4
1545
1546.align	5
1547.globl	bn_sqr_comba8
1548.ent	bn_sqr_comba8
1549bn_sqr_comba8:
1550	.set	reorder
1551	lw	$12,0($5)
1552	lw	$13,4($5)
1553	lw	$14,2*4($5)
1554	lw	$15,3*4($5)
1555
1556	multu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
1557	lw	$8,4*4($5)
1558	lw	$9,5*4($5)
1559	lw	$10,6*4($5)
1560	lw	$11,7*4($5)
1561	mflo	($2,$12,$12)
1562	mfhi	($3,$12,$12)
1563	sw	$2,0($4)
1564
1565	multu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
1566	mflo	($24,$12,$13)
1567	mfhi	($25,$12,$13)
1568	slt	$2,$25,$0
1569	sll	$25,1
1570	 multu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
1571	slt	$6,$24,$0
1572	addu	$25,$6
1573	sll	$24,1
1574	addu	$3,$24
1575	sltu	$1,$3,$24
1576	addu	$7,$25,$1
1577	sw	$3,4($4)
1578	mflo	($24,$14,$12)
1579	mfhi	($25,$14,$12)
1580	addu	$7,$24
1581	sltu	$1,$7,$24
1582	 multu	($13,$13)		# forward multiplication
1583	addu	$7,$24
1584	addu	$1,$25
1585	sltu	$24,$7,$24
1586	addu	$2,$1
1587	addu	$25,$24
1588	sltu	$3,$2,$1
1589	addu	$2,$25
1590	sltu	$25,$2,$25
1591	addu	$3,$25
1592	mflo	($24,$13,$13)
1593	mfhi	($25,$13,$13)
1594	addu	$7,$24
1595	sltu	$1,$7,$24
1596	 multu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
1597	addu	$25,$1
1598	addu	$2,$25
1599	sltu	$1,$2,$25
1600	addu	$3,$1
1601	sw	$7,2*4($4)
1602	mflo	($24,$12,$15)
1603	mfhi	($25,$12,$15)
1604	addu	$2,$24
1605	sltu	$1,$2,$24
1606	 multu	($13,$14)		# forward multiplication
1607	addu	$2,$24
1608	addu	$1,$25
1609	sltu	$24,$2,$24
1610	addu	$3,$1
1611	addu	$25,$24
1612	sltu	$7,$3,$1
1613	addu	$3,$25
1614	sltu	$25,$3,$25
1615	addu	$7,$25
1616	mflo	($24,$13,$14)
1617	mfhi	($25,$13,$14)
1618	addu	$2,$24
1619	sltu	$1,$2,$24
1620	 multu	($8,$12)		# forward multiplication
1621	addu	$2,$24
1622	addu	$1,$25
1623	sltu	$24,$2,$24
1624	addu	$3,$1
1625	addu	$25,$24
1626	sltu	$1,$3,$1
1627	addu	$3,$25
1628	addu	$7,$1
1629	sltu	$25,$3,$25
1630	addu	$7,$25
1631	mflo	($24,$8,$12)
1632	mfhi	($25,$8,$12)
1633	sw	$2,3*4($4)
1634	addu	$3,$24
1635	sltu	$1,$3,$24
1636	 multu	($15,$13)		# forward multiplication
1637	addu	$3,$24
1638	addu	$1,$25
1639	sltu	$24,$3,$24
1640	addu	$7,$1
1641	addu	$25,$24
1642	sltu	$2,$7,$1
1643	addu	$7,$25
1644	sltu	$25,$7,$25
1645	addu	$2,$25
1646	mflo	($24,$15,$13)
1647	mfhi	($25,$15,$13)
1648	addu	$3,$24
1649	sltu	$1,$3,$24
1650	 multu	($14,$14)		# forward multiplication
1651	addu	$3,$24
1652	addu	$1,$25
1653	sltu	$24,$3,$24
1654	addu	$7,$1
1655	addu	$25,$24
1656	sltu	$1,$7,$1
1657	addu	$7,$25
1658	addu	$2,$1
1659	sltu	$25,$7,$25
1660	addu	$2,$25
1661	mflo	($24,$14,$14)
1662	mfhi	($25,$14,$14)
1663	addu	$3,$24
1664	sltu	$1,$3,$24
1665	 multu	($12,$9)		# mul_add_c2(a[0],b[5],c3,c1,c2);
1666	addu	$25,$1
1667	addu	$7,$25
1668	sltu	$1,$7,$25
1669	addu	$2,$1
1670	sw	$3,4*4($4)
1671	mflo	($24,$12,$9)
1672	mfhi	($25,$12,$9)
1673	addu	$7,$24
1674	sltu	$1,$7,$24
1675	 multu	($13,$8)		# forward multiplication
1676	addu	$7,$24
1677	addu	$1,$25
1678	sltu	$24,$7,$24
1679	addu	$2,$1
1680	addu	$25,$24
1681	sltu	$3,$2,$1
1682	addu	$2,$25
1683	sltu	$25,$2,$25
1684	addu	$3,$25
1685	mflo	($24,$13,$8)
1686	mfhi	($25,$13,$8)
1687	addu	$7,$24
1688	sltu	$1,$7,$24
1689	 multu	($14,$15)		# forward multiplication
1690	addu	$7,$24
1691	addu	$1,$25
1692	sltu	$24,$7,$24
1693	addu	$2,$1
1694	addu	$25,$24
1695	sltu	$1,$2,$1
1696	addu	$2,$25
1697	addu	$3,$1
1698	sltu	$25,$2,$25
1699	addu	$3,$25
1700	mflo	($24,$14,$15)
1701	mfhi	($25,$14,$15)
1702	addu	$7,$24
1703	sltu	$1,$7,$24
1704	 multu	($10,$12)		# forward multiplication
1705	addu	$7,$24
1706	addu	$1,$25
1707	sltu	$24,$7,$24
1708	addu	$2,$1
1709	addu	$25,$24
1710	sltu	$1,$2,$1
1711	addu	$2,$25
1712	addu	$3,$1
1713	sltu	$25,$2,$25
1714	addu	$3,$25
1715	mflo	($24,$10,$12)
1716	mfhi	($25,$10,$12)
1717	sw	$7,5*4($4)
1718	addu	$2,$24
1719	sltu	$1,$2,$24
1720	 multu	($9,$13)		# forward multiplication
1721	addu	$2,$24
1722	addu	$1,$25
1723	sltu	$24,$2,$24
1724	addu	$3,$1
1725	addu	$25,$24
1726	sltu	$7,$3,$1
1727	addu	$3,$25
1728	sltu	$25,$3,$25
1729	addu	$7,$25
1730	mflo	($24,$9,$13)
1731	mfhi	($25,$9,$13)
1732	addu	$2,$24
1733	sltu	$1,$2,$24
1734	 multu	($8,$14)		# forward multiplication
1735	addu	$2,$24
1736	addu	$1,$25
1737	sltu	$24,$2,$24
1738	addu	$3,$1
1739	addu	$25,$24
1740	sltu	$1,$3,$1
1741	addu	$3,$25
1742	addu	$7,$1
1743	sltu	$25,$3,$25
1744	addu	$7,$25
1745	mflo	($24,$8,$14)
1746	mfhi	($25,$8,$14)
1747	addu	$2,$24
1748	sltu	$1,$2,$24
1749	 multu	($15,$15)		# forward multiplication
1750	addu	$2,$24
1751	addu	$1,$25
1752	sltu	$24,$2,$24
1753	addu	$3,$1
1754	addu	$25,$24
1755	sltu	$1,$3,$1
1756	addu	$3,$25
1757	addu	$7,$1
1758	sltu	$25,$3,$25
1759	addu	$7,$25
1760	mflo	($24,$15,$15)
1761	mfhi	($25,$15,$15)
1762	addu	$2,$24
1763	sltu	$1,$2,$24
1764	 multu	($12,$11)		# mul_add_c2(a[0],b[7],c2,c3,c1);
1765	addu	$25,$1
1766	addu	$3,$25
1767	sltu	$1,$3,$25
1768	addu	$7,$1
1769	sw	$2,6*4($4)
1770	mflo	($24,$12,$11)
1771	mfhi	($25,$12,$11)
1772	addu	$3,$24
1773	sltu	$1,$3,$24
1774	 multu	($13,$10)		# forward multiplication
1775	addu	$3,$24
1776	addu	$1,$25
1777	sltu	$24,$3,$24
1778	addu	$7,$1
1779	addu	$25,$24
1780	sltu	$2,$7,$1
1781	addu	$7,$25
1782	sltu	$25,$7,$25
1783	addu	$2,$25
1784	mflo	($24,$13,$10)
1785	mfhi	($25,$13,$10)
1786	addu	$3,$24
1787	sltu	$1,$3,$24
1788	 multu	($14,$9)		# forward multiplication
1789	addu	$3,$24
1790	addu	$1,$25
1791	sltu	$24,$3,$24
1792	addu	$7,$1
1793	addu	$25,$24
1794	sltu	$1,$7,$1
1795	addu	$7,$25
1796	addu	$2,$1
1797	sltu	$25,$7,$25
1798	addu	$2,$25
1799	mflo	($24,$14,$9)
1800	mfhi	($25,$14,$9)
1801	addu	$3,$24
1802	sltu	$1,$3,$24
1803	 multu	($15,$8)		# forward multiplication
1804	addu	$3,$24
1805	addu	$1,$25
1806	sltu	$24,$3,$24
1807	addu	$7,$1
1808	addu	$25,$24
1809	sltu	$1,$7,$1
1810	addu	$7,$25
1811	addu	$2,$1
1812	sltu	$25,$7,$25
1813	addu	$2,$25
1814	mflo	($24,$15,$8)
1815	mfhi	($25,$15,$8)
1816	addu	$3,$24
1817	sltu	$1,$3,$24
1818	 multu	($11,$13)		# forward multiplication
1819	addu	$3,$24
1820	addu	$1,$25
1821	sltu	$24,$3,$24
1822	addu	$7,$1
1823	addu	$25,$24
1824	sltu	$1,$7,$1
1825	addu	$7,$25
1826	addu	$2,$1
1827	sltu	$25,$7,$25
1828	addu	$2,$25
1829	mflo	($24,$11,$13)
1830	mfhi	($25,$11,$13)
1831	sw	$3,7*4($4)
1832	addu	$7,$24
1833	sltu	$1,$7,$24
1834	 multu	($10,$14)		# forward multiplication
1835	addu	$7,$24
1836	addu	$1,$25
1837	sltu	$24,$7,$24
1838	addu	$2,$1
1839	addu	$25,$24
1840	sltu	$3,$2,$1
1841	addu	$2,$25
1842	sltu	$25,$2,$25
1843	addu	$3,$25
1844	mflo	($24,$10,$14)
1845	mfhi	($25,$10,$14)
1846	addu	$7,$24
1847	sltu	$1,$7,$24
1848	 multu	($9,$15)		# forward multiplication
1849	addu	$7,$24
1850	addu	$1,$25
1851	sltu	$24,$7,$24
1852	addu	$2,$1
1853	addu	$25,$24
1854	sltu	$1,$2,$1
1855	addu	$2,$25
1856	addu	$3,$1
1857	sltu	$25,$2,$25
1858	addu	$3,$25
1859	mflo	($24,$9,$15)
1860	mfhi	($25,$9,$15)
1861	addu	$7,$24
1862	sltu	$1,$7,$24
1863	 multu	($8,$8)		# forward multiplication
1864	addu	$7,$24
1865	addu	$1,$25
1866	sltu	$24,$7,$24
1867	addu	$2,$1
1868	addu	$25,$24
1869	sltu	$1,$2,$1
1870	addu	$2,$25
1871	addu	$3,$1
1872	sltu	$25,$2,$25
1873	addu	$3,$25
1874	mflo	($24,$8,$8)
1875	mfhi	($25,$8,$8)
1876	addu	$7,$24
1877	sltu	$1,$7,$24
1878	 multu	($14,$11)		# mul_add_c2(a[2],b[7],c1,c2,c3);
1879	addu	$25,$1
1880	addu	$2,$25
1881	sltu	$1,$2,$25
1882	addu	$3,$1
1883	sw	$7,8*4($4)
1884	mflo	($24,$14,$11)
1885	mfhi	($25,$14,$11)
1886	addu	$2,$24
1887	sltu	$1,$2,$24
1888	 multu	($15,$10)		# forward multiplication
1889	addu	$2,$24
1890	addu	$1,$25
1891	sltu	$24,$2,$24
1892	addu	$3,$1
1893	addu	$25,$24
1894	sltu	$7,$3,$1
1895	addu	$3,$25
1896	sltu	$25,$3,$25
1897	addu	$7,$25
1898	mflo	($24,$15,$10)
1899	mfhi	($25,$15,$10)
1900	addu	$2,$24
1901	sltu	$1,$2,$24
1902	 multu	($8,$9)		# forward multiplication
1903	addu	$2,$24
1904	addu	$1,$25
1905	sltu	$24,$2,$24
1906	addu	$3,$1
1907	addu	$25,$24
1908	sltu	$1,$3,$1
1909	addu	$3,$25
1910	addu	$7,$1
1911	sltu	$25,$3,$25
1912	addu	$7,$25
1913	mflo	($24,$8,$9)
1914	mfhi	($25,$8,$9)
1915	addu	$2,$24
1916	sltu	$1,$2,$24
1917	 multu	($11,$15)		# forward multiplication
1918	addu	$2,$24
1919	addu	$1,$25
1920	sltu	$24,$2,$24
1921	addu	$3,$1
1922	addu	$25,$24
1923	sltu	$1,$3,$1
1924	addu	$3,$25
1925	addu	$7,$1
1926	sltu	$25,$3,$25
1927	addu	$7,$25
1928	mflo	($24,$11,$15)
1929	mfhi	($25,$11,$15)
1930	sw	$2,9*4($4)
1931	addu	$3,$24
1932	sltu	$1,$3,$24
1933	 multu	($10,$8)		# forward multiplication
1934	addu	$3,$24
1935	addu	$1,$25
1936	sltu	$24,$3,$24
1937	addu	$7,$1
1938	addu	$25,$24
1939	sltu	$2,$7,$1
1940	addu	$7,$25
1941	sltu	$25,$7,$25
1942	addu	$2,$25
1943	mflo	($24,$10,$8)
1944	mfhi	($25,$10,$8)
1945	addu	$3,$24
1946	sltu	$1,$3,$24
1947	 multu	($9,$9)		# forward multiplication
1948	addu	$3,$24
1949	addu	$1,$25
1950	sltu	$24,$3,$24
1951	addu	$7,$1
1952	addu	$25,$24
1953	sltu	$1,$7,$1
1954	addu	$7,$25
1955	addu	$2,$1
1956	sltu	$25,$7,$25
1957	addu	$2,$25
1958	mflo	($24,$9,$9)
1959	mfhi	($25,$9,$9)
1960	addu	$3,$24
1961	sltu	$1,$3,$24
1962	 multu	($8,$11)		# mul_add_c2(a[4],b[7],c3,c1,c2);
1963	addu	$25,$1
1964	addu	$7,$25
1965	sltu	$1,$7,$25
1966	addu	$2,$1
1967	sw	$3,10*4($4)
1968	mflo	($24,$8,$11)
1969	mfhi	($25,$8,$11)
1970	addu	$7,$24
1971	sltu	$1,$7,$24
1972	 multu	($9,$10)		# forward multiplication
1973	addu	$7,$24
1974	addu	$1,$25
1975	sltu	$24,$7,$24
1976	addu	$2,$1
1977	addu	$25,$24
1978	sltu	$3,$2,$1
1979	addu	$2,$25
1980	sltu	$25,$2,$25
1981	addu	$3,$25
1982	mflo	($24,$9,$10)
1983	mfhi	($25,$9,$10)
1984	addu	$7,$24
1985	sltu	$1,$7,$24
1986	 multu	($11,$9)		# forward multiplication
1987	addu	$7,$24
1988	addu	$1,$25
1989	sltu	$24,$7,$24
1990	addu	$2,$1
1991	addu	$25,$24
1992	sltu	$1,$2,$1
1993	addu	$2,$25
1994	addu	$3,$1
1995	sltu	$25,$2,$25
1996	addu	$3,$25
1997	mflo	($24,$11,$9)
1998	mfhi	($25,$11,$9)
1999	sw	$7,11*4($4)
2000	addu	$2,$24
2001	sltu	$1,$2,$24
2002	 multu	($10,$10)		# forward multiplication
2003	addu	$2,$24
2004	addu	$1,$25
2005	sltu	$24,$2,$24
2006	addu	$3,$1
2007	addu	$25,$24
2008	sltu	$7,$3,$1
2009	addu	$3,$25
2010	sltu	$25,$3,$25
2011	addu	$7,$25
2012	mflo	($24,$10,$10)
2013	mfhi	($25,$10,$10)
2014	addu	$2,$24
2015	sltu	$1,$2,$24
2016	 multu	($10,$11)		# mul_add_c2(a[6],b[7],c2,c3,c1);
2017	addu	$25,$1
2018	addu	$3,$25
2019	sltu	$1,$3,$25
2020	addu	$7,$1
2021	sw	$2,12*4($4)
2022	mflo	($24,$10,$11)
2023	mfhi	($25,$10,$11)
2024	addu	$3,$24
2025	sltu	$1,$3,$24
2026	 multu	($11,$11)		# forward multiplication
2027	addu	$3,$24
2028	addu	$1,$25
2029	sltu	$24,$3,$24
2030	addu	$7,$1
2031	addu	$25,$24
2032	sltu	$2,$7,$1
2033	addu	$7,$25
2034	sltu	$25,$7,$25
2035	addu	$2,$25
2036	mflo	($24,$11,$11)
2037	mfhi	($25,$11,$11)
2038	sw	$3,13*4($4)
2039
2040	addu	$7,$24
2041	sltu	$1,$7,$24
2042	addu	$25,$1
2043	addu	$2,$25
2044	sw	$7,14*4($4)
2045	sw	$2,15*4($4)
2046
2047	.set	noreorder
2048	jr	$31
2049	nop
2050.end	bn_sqr_comba8
2051
2052.align	5
2053.globl	bn_sqr_comba4
2054.ent	bn_sqr_comba4
2055bn_sqr_comba4:
2056	.set	reorder
2057	lw	$12,0($5)
2058	lw	$13,4($5)
2059	multu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
2060	lw	$14,2*4($5)
2061	lw	$15,3*4($5)
2062	mflo	($2,$12,$12)
2063	mfhi	($3,$12,$12)
2064	sw	$2,0($4)
2065
2066	multu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
2067	mflo	($24,$12,$13)
2068	mfhi	($25,$12,$13)
2069	slt	$2,$25,$0
2070	sll	$25,1
2071	 multu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
2072	slt	$6,$24,$0
2073	addu	$25,$6
2074	sll	$24,1
2075	addu	$3,$24
2076	sltu	$1,$3,$24
2077	addu	$7,$25,$1
2078	sw	$3,4($4)
2079	mflo	($24,$14,$12)
2080	mfhi	($25,$14,$12)
2081	addu	$7,$24
2082	sltu	$1,$7,$24
2083	 multu	($13,$13)		# forward multiplication
2084	addu	$7,$24
2085	addu	$1,$25
2086	sltu	$24,$7,$24
2087	addu	$2,$1
2088	addu	$25,$24
2089	sltu	$3,$2,$1
2090	addu	$2,$25
2091	sltu	$25,$2,$25
2092	addu	$3,$25
2093	mflo	($24,$13,$13)
2094	mfhi	($25,$13,$13)
2095	addu	$7,$24
2096	sltu	$1,$7,$24
2097	 multu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
2098	addu	$25,$1
2099	addu	$2,$25
2100	sltu	$1,$2,$25
2101	addu	$3,$1
2102	sw	$7,2*4($4)
2103	mflo	($24,$12,$15)
2104	mfhi	($25,$12,$15)
2105	addu	$2,$24
2106	sltu	$1,$2,$24
2107	 multu	($13,$14)		# forward multiplication
2108	addu	$2,$24
2109	addu	$1,$25
2110	sltu	$24,$2,$24
2111	addu	$3,$1
2112	addu	$25,$24
2113	sltu	$7,$3,$1
2114	addu	$3,$25
2115	sltu	$25,$3,$25
2116	addu	$7,$25
2117	mflo	($24,$13,$14)
2118	mfhi	($25,$13,$14)
2119	addu	$2,$24
2120	sltu	$1,$2,$24
2121	 multu	($15,$13)		# forward multiplication
2122	addu	$2,$24
2123	addu	$1,$25
2124	sltu	$24,$2,$24
2125	addu	$3,$1
2126	addu	$25,$24
2127	sltu	$1,$3,$1
2128	addu	$3,$25
2129	addu	$7,$1
2130	sltu	$25,$3,$25
2131	addu	$7,$25
2132	mflo	($24,$15,$13)
2133	mfhi	($25,$15,$13)
2134	sw	$2,3*4($4)
2135	addu	$3,$24
2136	sltu	$1,$3,$24
2137	 multu	($14,$14)		# forward multiplication
2138	addu	$3,$24
2139	addu	$1,$25
2140	sltu	$24,$3,$24
2141	addu	$7,$1
2142	addu	$25,$24
2143	sltu	$2,$7,$1
2144	addu	$7,$25
2145	sltu	$25,$7,$25
2146	addu	$2,$25
2147	mflo	($24,$14,$14)
2148	mfhi	($25,$14,$14)
2149	addu	$3,$24
2150	sltu	$1,$3,$24
2151	 multu	($14,$15)		# mul_add_c2(a[2],b[3],c3,c1,c2);
2152	addu	$25,$1
2153	addu	$7,$25
2154	sltu	$1,$7,$25
2155	addu	$2,$1
2156	sw	$3,4*4($4)
2157	mflo	($24,$14,$15)
2158	mfhi	($25,$14,$15)
2159	addu	$7,$24
2160	sltu	$1,$7,$24
2161	 multu	($15,$15)		# forward multiplication
2162	addu	$7,$24
2163	addu	$1,$25
2164	sltu	$24,$7,$24
2165	addu	$2,$1
2166	addu	$25,$24
2167	sltu	$3,$2,$1
2168	addu	$2,$25
2169	sltu	$25,$2,$25
2170	addu	$3,$25
2171	mflo	($24,$15,$15)
2172	mfhi	($25,$15,$15)
2173	sw	$7,5*4($4)
2174
2175	addu	$2,$24
2176	sltu	$1,$2,$24
2177	addu	$25,$1
2178	addu	$3,$25
2179	sw	$2,6*4($4)
2180	sw	$3,7*4($4)
2181
2182	.set	noreorder
2183	jr	$31
2184	nop
2185.end	bn_sqr_comba4
2186