xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/mips.S (revision 1b3d6f93806f8821fe459e13ad13e605b37c6d43)
1#if !(defined (__mips_isa_rev) && (__mips_isa_rev >= 6))
2.set     mips2
3#endif
4#include "mips_arch.h"
5
6#if defined(_MIPS_ARCH_MIPS64R6)
7# define ddivu(rs,rt)
8# define mfqt(rd,rs,rt)	ddivu	rd,rs,rt
9# define mfrm(rd,rs,rt)	dmodu	rd,rs,rt
10#elif defined(_MIPS_ARCH_MIPS32R6)
11# define divu(rs,rt)
12# define mfqt(rd,rs,rt)	divu	rd,rs,rt
13# define mfrm(rd,rs,rt)	modu	rd,rs,rt
14#else
15# define divu(rs,rt)	divu	$0,rs,rt
16# define mfqt(rd,rs,rt)	mflo	rd
17# define mfrm(rd,rs,rt)	mfhi	rd
18#endif
19
20.rdata
21.asciiz	"mips3.s, Version 1.2"
22.asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
23
24.text
25.set	noat
26
27.align	5
28.globl	bn_mul_add_words
29.ent	bn_mul_add_words
30bn_mul_add_words:
31	.set	noreorder
32	bgtz	$6,bn_mul_add_words_internal
33	move	$2,$0
34	jr	$31
35	move	$4,$2
36.end	bn_mul_add_words
37
38.align	5
39.ent	bn_mul_add_words_internal
40bn_mul_add_words_internal:
41	.set	reorder
42	li	$3,-4
43	and	$8,$6,$3
44	beqz	$8,.L_bn_mul_add_words_tail
45
46.L_bn_mul_add_words_loop:
47	lw	$12,0($5)
48	multu	($12,$7)
49	lw	$13,0($4)
50	lw	$14,4($5)
51	lw	$15,4($4)
52	lw	$8,2*4($5)
53	lw	$9,2*4($4)
54	addu	$13,$2
55	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
56				# values", but it seems to work fine
57				# even on 64-bit registers.
58	mflo	($1,$12,$7)
59	mfhi	($12,$12,$7)
60	addu	$13,$1
61	addu	$2,$12
62	 multu	($14,$7)
63	sltu	$1,$13,$1
64	sw	$13,0($4)
65	addu	$2,$1
66
67	lw	$10,3*4($5)
68	lw	$11,3*4($4)
69	addu	$15,$2
70	sltu	$2,$15,$2
71	mflo	($1,$14,$7)
72	mfhi	($14,$14,$7)
73	addu	$15,$1
74	addu	$2,$14
75	 multu	($8,$7)
76	sltu	$1,$15,$1
77	sw	$15,4($4)
78	addu	$2,$1
79
80	subu	$6,4
81	addu $4,4*4
82	addu $5,4*4
83	addu	$9,$2
84	sltu	$2,$9,$2
85	mflo	($1,$8,$7)
86	mfhi	($8,$8,$7)
87	addu	$9,$1
88	addu	$2,$8
89	 multu	($10,$7)
90	sltu	$1,$9,$1
91	sw	$9,-2*4($4)
92	addu	$2,$1
93
94
95	and	$8,$6,$3
96	addu	$11,$2
97	sltu	$2,$11,$2
98	mflo	($1,$10,$7)
99	mfhi	($10,$10,$7)
100	addu	$11,$1
101	addu	$2,$10
102	sltu	$1,$11,$1
103	sw	$11,-4($4)
104	.set	noreorder
105	bgtz	$8,.L_bn_mul_add_words_loop
106	addu	$2,$1
107
108	beqz	$6,.L_bn_mul_add_words_return
109	nop
110
111.L_bn_mul_add_words_tail:
112	.set	reorder
113	lw	$12,0($5)
114	multu	($12,$7)
115	lw	$13,0($4)
116	subu	$6,1
117	addu	$13,$2
118	sltu	$2,$13,$2
119	mflo	($1,$12,$7)
120	mfhi	($12,$12,$7)
121	addu	$13,$1
122	addu	$2,$12
123	sltu	$1,$13,$1
124	sw	$13,0($4)
125	addu	$2,$1
126	beqz	$6,.L_bn_mul_add_words_return
127
128	lw	$12,4($5)
129	multu	($12,$7)
130	lw	$13,4($4)
131	subu	$6,1
132	addu	$13,$2
133	sltu	$2,$13,$2
134	mflo	($1,$12,$7)
135	mfhi	($12,$12,$7)
136	addu	$13,$1
137	addu	$2,$12
138	sltu	$1,$13,$1
139	sw	$13,4($4)
140	addu	$2,$1
141	beqz	$6,.L_bn_mul_add_words_return
142
143	lw	$12,2*4($5)
144	multu	($12,$7)
145	lw	$13,2*4($4)
146	addu	$13,$2
147	sltu	$2,$13,$2
148	mflo	($1,$12,$7)
149	mfhi	($12,$12,$7)
150	addu	$13,$1
151	addu	$2,$12
152	sltu	$1,$13,$1
153	sw	$13,2*4($4)
154	addu	$2,$1
155
156.L_bn_mul_add_words_return:
157	.set	noreorder
158	jr	$31
159	move	$4,$2
160.end	bn_mul_add_words_internal
161
162.align	5
163.globl	bn_mul_words
164.ent	bn_mul_words
165bn_mul_words:
166	.set	noreorder
167	bgtz	$6,bn_mul_words_internal
168	move	$2,$0
169	jr	$31
170	move	$4,$2
171.end	bn_mul_words
172
173.align	5
174.ent	bn_mul_words_internal
175bn_mul_words_internal:
176	.set	reorder
177	li	$3,-4
178	and	$8,$6,$3
179	beqz	$8,.L_bn_mul_words_tail
180
181.L_bn_mul_words_loop:
182	lw	$12,0($5)
183	multu	($12,$7)
184	lw	$14,4($5)
185	lw	$8,2*4($5)
186	lw	$10,3*4($5)
187	mflo	($1,$12,$7)
188	mfhi	($12,$12,$7)
189	addu	$2,$1
190	sltu	$13,$2,$1
191	 multu	($14,$7)
192	sw	$2,0($4)
193	addu	$2,$13,$12
194
195	subu	$6,4
196	addu $4,4*4
197	addu $5,4*4
198	mflo	($1,$14,$7)
199	mfhi	($14,$14,$7)
200	addu	$2,$1
201	sltu	$15,$2,$1
202	 multu	($8,$7)
203	sw	$2,-3*4($4)
204	addu	$2,$15,$14
205
206	mflo	($1,$8,$7)
207	mfhi	($8,$8,$7)
208	addu	$2,$1
209	sltu	$9,$2,$1
210	 multu	($10,$7)
211	sw	$2,-2*4($4)
212	addu	$2,$9,$8
213
214	and	$8,$6,$3
215	mflo	($1,$10,$7)
216	mfhi	($10,$10,$7)
217	addu	$2,$1
218	sltu	$11,$2,$1
219	sw	$2,-4($4)
220	.set	noreorder
221	bgtz	$8,.L_bn_mul_words_loop
222	addu	$2,$11,$10
223
224	beqz	$6,.L_bn_mul_words_return
225	nop
226
227.L_bn_mul_words_tail:
228	.set	reorder
229	lw	$12,0($5)
230	multu	($12,$7)
231	subu	$6,1
232	mflo	($1,$12,$7)
233	mfhi	($12,$12,$7)
234	addu	$2,$1
235	sltu	$13,$2,$1
236	sw	$2,0($4)
237	addu	$2,$13,$12
238	beqz	$6,.L_bn_mul_words_return
239
240	lw	$12,4($5)
241	multu	($12,$7)
242	subu	$6,1
243	mflo	($1,$12,$7)
244	mfhi	($12,$12,$7)
245	addu	$2,$1
246	sltu	$13,$2,$1
247	sw	$2,4($4)
248	addu	$2,$13,$12
249	beqz	$6,.L_bn_mul_words_return
250
251	lw	$12,2*4($5)
252	multu	($12,$7)
253	mflo	($1,$12,$7)
254	mfhi	($12,$12,$7)
255	addu	$2,$1
256	sltu	$13,$2,$1
257	sw	$2,2*4($4)
258	addu	$2,$13,$12
259
260.L_bn_mul_words_return:
261	.set	noreorder
262	jr	$31
263	move	$4,$2
264.end	bn_mul_words_internal
265
266.align	5
267.globl	bn_sqr_words
268.ent	bn_sqr_words
269bn_sqr_words:
270	.set	noreorder
271	bgtz	$6,bn_sqr_words_internal
272	move	$2,$0
273	jr	$31
274	move	$4,$2
275.end	bn_sqr_words
276
277.align	5
278.ent	bn_sqr_words_internal
279bn_sqr_words_internal:
280	.set	reorder
281	li	$3,-4
282	and	$8,$6,$3
283	beqz	$8,.L_bn_sqr_words_tail
284
285.L_bn_sqr_words_loop:
286	lw	$12,0($5)
287	multu	($12,$12)
288	lw	$14,4($5)
289	lw	$8,2*4($5)
290	lw	$10,3*4($5)
291	mflo	($13,$12,$12)
292	mfhi	($12,$12,$12)
293	sw	$13,0($4)
294	sw	$12,4($4)
295
296	multu	($14,$14)
297	subu	$6,4
298	addu $4,8*4
299	addu $5,4*4
300	mflo	($15,$14,$14)
301	mfhi	($14,$14,$14)
302	sw	$15,-6*4($4)
303	sw	$14,-5*4($4)
304
305	multu	($8,$8)
306	mflo	($9,$8,$8)
307	mfhi	($8,$8,$8)
308	sw	$9,-4*4($4)
309	sw	$8,-3*4($4)
310
311
312	multu	($10,$10)
313	and	$8,$6,$3
314	mflo	($11,$10,$10)
315	mfhi	($10,$10,$10)
316	sw	$11,-2*4($4)
317
318	.set	noreorder
319	bgtz	$8,.L_bn_sqr_words_loop
320	sw	$10,-4($4)
321
322	beqz	$6,.L_bn_sqr_words_return
323	nop
324
325.L_bn_sqr_words_tail:
326	.set	reorder
327	lw	$12,0($5)
328	multu	($12,$12)
329	subu	$6,1
330	mflo	($13,$12,$12)
331	mfhi	($12,$12,$12)
332	sw	$13,0($4)
333	sw	$12,4($4)
334	beqz	$6,.L_bn_sqr_words_return
335
336	lw	$12,4($5)
337	multu	($12,$12)
338	subu	$6,1
339	mflo	($13,$12,$12)
340	mfhi	($12,$12,$12)
341	sw	$13,2*4($4)
342	sw	$12,3*4($4)
343	beqz	$6,.L_bn_sqr_words_return
344
345	lw	$12,2*4($5)
346	multu	($12,$12)
347	mflo	($13,$12,$12)
348	mfhi	($12,$12,$12)
349	sw	$13,4*4($4)
350	sw	$12,5*4($4)
351
352.L_bn_sqr_words_return:
353	.set	noreorder
354	jr	$31
355	move	$4,$2
356
357.end	bn_sqr_words_internal
358
359.align	5
360.globl	bn_add_words
361.ent	bn_add_words
362bn_add_words:
363	.set	noreorder
364	bgtz	$7,bn_add_words_internal
365	move	$2,$0
366	jr	$31
367	move	$4,$2
368.end	bn_add_words
369
370.align	5
371.ent	bn_add_words_internal
372bn_add_words_internal:
373	.set	reorder
374	li	$3,-4
375	and	$1,$7,$3
376	beqz	$1,.L_bn_add_words_tail
377
378.L_bn_add_words_loop:
379	lw	$12,0($5)
380	lw	$8,0($6)
381	subu	$7,4
382	lw	$13,4($5)
383	and	$1,$7,$3
384	lw	$14,2*4($5)
385	addu $6,4*4
386	lw	$15,3*4($5)
387	addu $4,4*4
388	lw	$9,-3*4($6)
389	addu $5,4*4
390	lw	$10,-2*4($6)
391	lw	$11,-4($6)
392	addu	$8,$12
393	sltu	$24,$8,$12
394	addu	$12,$8,$2
395	sltu	$2,$12,$8
396	sw	$12,-4*4($4)
397	addu	$2,$24
398
399	addu	$9,$13
400	sltu	$25,$9,$13
401	addu	$13,$9,$2
402	sltu	$2,$13,$9
403	sw	$13,-3*4($4)
404	addu	$2,$25
405
406	addu	$10,$14
407	sltu	$24,$10,$14
408	addu	$14,$10,$2
409	sltu	$2,$14,$10
410	sw	$14,-2*4($4)
411	addu	$2,$24
412
413	addu	$11,$15
414	sltu	$25,$11,$15
415	addu	$15,$11,$2
416	sltu	$2,$15,$11
417	sw	$15,-4($4)
418
419	.set	noreorder
420	bgtz	$1,.L_bn_add_words_loop
421	addu	$2,$25
422
423	beqz	$7,.L_bn_add_words_return
424	nop
425
426.L_bn_add_words_tail:
427	.set	reorder
428	lw	$12,0($5)
429	lw	$8,0($6)
430	addu	$8,$12
431	subu	$7,1
432	sltu	$24,$8,$12
433	addu	$12,$8,$2
434	sltu	$2,$12,$8
435	sw	$12,0($4)
436	addu	$2,$24
437	beqz	$7,.L_bn_add_words_return
438
439	lw	$13,4($5)
440	lw	$9,4($6)
441	addu	$9,$13
442	subu	$7,1
443	sltu	$25,$9,$13
444	addu	$13,$9,$2
445	sltu	$2,$13,$9
446	sw	$13,4($4)
447	addu	$2,$25
448	beqz	$7,.L_bn_add_words_return
449
450	lw	$14,2*4($5)
451	lw	$10,2*4($6)
452	addu	$10,$14
453	sltu	$24,$10,$14
454	addu	$14,$10,$2
455	sltu	$2,$14,$10
456	sw	$14,2*4($4)
457	addu	$2,$24
458
459.L_bn_add_words_return:
460	.set	noreorder
461	jr	$31
462	move	$4,$2
463
464.end	bn_add_words_internal
465
466.align	5
467.globl	bn_sub_words
468.ent	bn_sub_words
469bn_sub_words:
470	.set	noreorder
471	bgtz	$7,bn_sub_words_internal
472	move	$2,$0
473	jr	$31
474	move	$4,$0
475.end	bn_sub_words
476
477.align	5
478.ent	bn_sub_words_internal
479bn_sub_words_internal:
480	.set	reorder
481	li	$3,-4
482	and	$1,$7,$3
483	beqz	$1,.L_bn_sub_words_tail
484
485.L_bn_sub_words_loop:
486	lw	$12,0($5)
487	lw	$8,0($6)
488	subu	$7,4
489	lw	$13,4($5)
490	and	$1,$7,$3
491	lw	$14,2*4($5)
492	addu $6,4*4
493	lw	$15,3*4($5)
494	addu $4,4*4
495	lw	$9,-3*4($6)
496	addu $5,4*4
497	lw	$10,-2*4($6)
498	lw	$11,-4($6)
499	sltu	$24,$12,$8
500	subu	$8,$12,$8
501	subu	$12,$8,$2
502	sgtu	$2,$12,$8
503	sw	$12,-4*4($4)
504	addu	$2,$24
505
506	sltu	$25,$13,$9
507	subu	$9,$13,$9
508	subu	$13,$9,$2
509	sgtu	$2,$13,$9
510	sw	$13,-3*4($4)
511	addu	$2,$25
512
513
514	sltu	$24,$14,$10
515	subu	$10,$14,$10
516	subu	$14,$10,$2
517	sgtu	$2,$14,$10
518	sw	$14,-2*4($4)
519	addu	$2,$24
520
521	sltu	$25,$15,$11
522	subu	$11,$15,$11
523	subu	$15,$11,$2
524	sgtu	$2,$15,$11
525	sw	$15,-4($4)
526
527	.set	noreorder
528	bgtz	$1,.L_bn_sub_words_loop
529	addu	$2,$25
530
531	beqz	$7,.L_bn_sub_words_return
532	nop
533
534.L_bn_sub_words_tail:
535	.set	reorder
536	lw	$12,0($5)
537	lw	$8,0($6)
538	subu	$7,1
539	sltu	$24,$12,$8
540	subu	$8,$12,$8
541	subu	$12,$8,$2
542	sgtu	$2,$12,$8
543	sw	$12,0($4)
544	addu	$2,$24
545	beqz	$7,.L_bn_sub_words_return
546
547	lw	$13,4($5)
548	subu	$7,1
549	lw	$9,4($6)
550	sltu	$25,$13,$9
551	subu	$9,$13,$9
552	subu	$13,$9,$2
553	sgtu	$2,$13,$9
554	sw	$13,4($4)
555	addu	$2,$25
556	beqz	$7,.L_bn_sub_words_return
557
558	lw	$14,2*4($5)
559	lw	$10,2*4($6)
560	sltu	$24,$14,$10
561	subu	$10,$14,$10
562	subu	$14,$10,$2
563	sgtu	$2,$14,$10
564	sw	$14,2*4($4)
565	addu	$2,$24
566
567.L_bn_sub_words_return:
568	.set	noreorder
569	jr	$31
570	move	$4,$2
571.end	bn_sub_words_internal
572
573#if 0
574/*
575 * The bn_div_3_words entry point is re-used for constant-time interface.
576 * Implementation is retained as historical reference.
577 */
578.align 5
579.globl	bn_div_3_words
580.ent	bn_div_3_words
581bn_div_3_words:
582	.set	noreorder
583	move	$7,$4		# we know that bn_div_words does not
584				# touch $7, $10, $11 and preserves $6
585				# so that we can save two arguments
586				# and return address in registers
587				# instead of stack:-)
588
589	lw	$4,($7)
590	move	$10,$5
591	bne	$4,$6,bn_div_3_words_internal
592	lw	$5,-4($7)
593	li	$2,-1
594	jr	$31
595	move	$4,$2
596.end	bn_div_3_words
597
598.align	5
599.ent	bn_div_3_words_internal
600bn_div_3_words_internal:
601	.set	reorder
602	move	$11,$31
603	bal	bn_div_words_internal
604	move	$31,$11
605	multu	($10,$2)
606	lw	$14,-2*4($7)
607	move	$8,$0
608	mfhi	($13,$10,$2)
609	mflo	($12,$10,$2)
610	sltu	$24,$13,$5
611.L_bn_div_3_words_inner_loop:
612	bnez	$24,.L_bn_div_3_words_inner_loop_done
613	sgeu	$1,$14,$12
614	seq	$25,$13,$5
615	and	$1,$25
616	sltu	$15,$12,$10
617	addu	$5,$6
618	subu	$13,$15
619	subu	$12,$10
620	sltu	$24,$13,$5
621	sltu	$8,$5,$6
622	or	$24,$8
623	.set	noreorder
624	beqz	$1,.L_bn_div_3_words_inner_loop
625	subu	$2,1
626	addu	$2,1
627	.set	reorder
628.L_bn_div_3_words_inner_loop_done:
629	.set	noreorder
630	jr	$31
631	move	$4,$2
632.end	bn_div_3_words_internal
633#endif
634
635.align	5
636.globl	bn_div_words
637.ent	bn_div_words
638bn_div_words:
639	.set	noreorder
640	bnez	$6,bn_div_words_internal
641	li	$2,-1		# I would rather signal div-by-zero
642				# which can be done with 'break 7'
643	jr	$31
644	move	$4,$2
645.end	bn_div_words
646
647.align	5
648.ent	bn_div_words_internal
649bn_div_words_internal:
650	move	$3,$0
651	bltz	$6,.L_bn_div_words_body
652	move	$25,$3
653	sll	$6,1
654	bgtz	$6,.-4
655	addu	$25,1
656
657	.set	reorder
658	negu	$13,$25
659	li	$14,-1
660	sll	$14,$13
661	and	$14,$4
662	srl	$1,$5,$13
663	.set	noreorder
664	beqz	$14,.+12
665	nop
666	break	6		# signal overflow
667	.set	reorder
668	sll	$4,$25
669	sll	$5,$25
670	or	$4,$1
671.L_bn_div_words_body:
672	srl	$3,$6,4*4	# bits
673	sgeu	$1,$4,$6
674	.set	noreorder
675	beqz	$1,.+12
676	nop
677	subu	$4,$6
678	.set	reorder
679
680	li	$8,-1
681	srl	$9,$4,4*4	# bits
682	srl	$8,4*4	# q=0xffffffff
683	beq	$3,$9,.L_bn_div_words_skip_div1
684	divu	($4,$3)
685	mfqt	($8,$4,$3)
686.L_bn_div_words_skip_div1:
687	multu	($6,$8)
688	sll	$15,$4,4*4	# bits
689	srl	$1,$5,4*4	# bits
690	or	$15,$1
691	mflo	($12,$6,$8)
692	mfhi	($13,$6,$8)
693.L_bn_div_words_inner_loop1:
694	sltu	$14,$15,$12
695	seq	$24,$9,$13
696	sltu	$1,$9,$13
697	and	$14,$24
698	sltu	$2,$12,$6
699	or	$1,$14
700	.set	noreorder
701	beqz	$1,.L_bn_div_words_inner_loop1_done
702	subu	$13,$2
703	subu	$12,$6
704	b	.L_bn_div_words_inner_loop1
705	subu	$8,1
706	.set	reorder
707.L_bn_div_words_inner_loop1_done:
708
709	sll	$5,4*4	# bits
710	subu	$4,$15,$12
711	sll	$2,$8,4*4	# bits
712
713	li	$8,-1
714	srl	$9,$4,4*4	# bits
715	srl	$8,4*4	# q=0xffffffff
716	beq	$3,$9,.L_bn_div_words_skip_div2
717	divu	($4,$3)
718	mfqt	($8,$4,$3)
719.L_bn_div_words_skip_div2:
720	multu	($6,$8)
721	sll	$15,$4,4*4	# bits
722	srl	$1,$5,4*4	# bits
723	or	$15,$1
724	mflo	($12,$6,$8)
725	mfhi	($13,$6,$8)
726.L_bn_div_words_inner_loop2:
727	sltu	$14,$15,$12
728	seq	$24,$9,$13
729	sltu	$1,$9,$13
730	and	$14,$24
731	sltu	$3,$12,$6
732	or	$1,$14
733	.set	noreorder
734	beqz	$1,.L_bn_div_words_inner_loop2_done
735	subu	$13,$3
736	subu	$12,$6
737	b	.L_bn_div_words_inner_loop2
738	subu	$8,1
739	.set	reorder
740.L_bn_div_words_inner_loop2_done:
741
742	subu	$4,$15,$12
743	or	$2,$8
744	srl	$3,$4,$25	# $3 contains remainder if anybody wants it
745	srl	$6,$25		# restore $6
746
747	.set	noreorder
748	move	$5,$3
749	jr	$31
750	move	$4,$2
751.end	bn_div_words_internal
752
753.align	5
754.globl	bn_mul_comba8
755.ent	bn_mul_comba8
756bn_mul_comba8:
757	.set	noreorder
758	.frame	$29,6*4,$31
759	.mask	0x003f0000,-4
760	subu $29,6*4
761	sw	$21,5*4($29)
762	sw	$20,4*4($29)
763	sw	$19,3*4($29)
764	sw	$18,2*4($29)
765	sw	$17,1*4($29)
766	sw	$16,0*4($29)
767
768	.set	reorder
769	lw	$12,0($5)	# If compiled with -mips3 option on
770				# R5000 box assembler barks on this
771				# 1ine with "should not have mult/div
772				# as last instruction in bb (R10K
773				# bug)" warning. If anybody out there
774				# has a clue about how to circumvent
775				# this do send me a note.
776				#		<appro@fy.chalmers.se>
777
778	lw	$8,0($6)
779	lw	$13,4($5)
780	lw	$14,2*4($5)
781	multu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
782	lw	$15,3*4($5)
783	lw	$9,4($6)
784	lw	$10,2*4($6)
785	lw	$11,3*4($6)
786	mflo	($2,$12,$8)
787	mfhi	($3,$12,$8)
788
789	lw	$16,4*4($5)
790	lw	$18,5*4($5)
791	multu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
792	lw	$20,6*4($5)
793	lw	$5,7*4($5)
794	lw	$17,4*4($6)
795	lw	$19,5*4($6)
796	mflo	($24,$12,$9)
797	mfhi	($25,$12,$9)
798	addu	$3,$24
799	sltu	$1,$3,$24
800	multu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
801	addu	$7,$25,$1
802	lw	$21,6*4($6)
803	lw	$6,7*4($6)
804	sw	$2,0($4)	# r[0]=c1;
805	mflo	($24,$13,$8)
806	mfhi	($25,$13,$8)
807	addu	$3,$24
808	sltu	$1,$3,$24
809	 multu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
810	addu	$25,$1
811	addu	$7,$25
812	sltu	$2,$7,$25
813	sw	$3,4($4)	# r[1]=c2;
814
815	mflo	($24,$14,$8)
816	mfhi	($25,$14,$8)
817	addu	$7,$24
818	sltu	$1,$7,$24
819	multu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
820	addu	$25,$1
821	addu	$2,$25
822	mflo	($24,$13,$9)
823	mfhi	($25,$13,$9)
824	addu	$7,$24
825	sltu	$1,$7,$24
826	multu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
827	addu	$25,$1
828	addu	$2,$25
829	sltu	$3,$2,$25
830	mflo	($24,$12,$10)
831	mfhi	($25,$12,$10)
832	addu	$7,$24
833	sltu	$1,$7,$24
834	 multu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
835	addu	$25,$1
836	addu	$2,$25
837	sltu	$1,$2,$25
838	addu	$3,$1
839	sw	$7,2*4($4)	# r[2]=c3;
840
841	mflo	($24,$12,$11)
842	mfhi	($25,$12,$11)
843	addu	$2,$24
844	sltu	$1,$2,$24
845	multu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
846	addu	$25,$1
847	addu	$3,$25
848	sltu	$7,$3,$25
849	mflo	($24,$13,$10)
850	mfhi	($25,$13,$10)
851	addu	$2,$24
852	sltu	$1,$2,$24
853	multu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
854	addu	$25,$1
855	addu	$3,$25
856	sltu	$1,$3,$25
857	addu	$7,$1
858	mflo	($24,$14,$9)
859	mfhi	($25,$14,$9)
860	addu	$2,$24
861	sltu	$1,$2,$24
862	multu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
863	addu	$25,$1
864	addu	$3,$25
865	sltu	$1,$3,$25
866	addu	$7,$1
867	mflo	($24,$15,$8)
868	mfhi	($25,$15,$8)
869	addu	$2,$24
870	sltu	$1,$2,$24
871	 multu	($16,$8)		# mul_add_c(a[4],b[0],c2,c3,c1);
872	addu	$25,$1
873	addu	$3,$25
874	sltu	$1,$3,$25
875	addu	$7,$1
876	sw	$2,3*4($4)	# r[3]=c1;
877
878	mflo	($24,$16,$8)
879	mfhi	($25,$16,$8)
880	addu	$3,$24
881	sltu	$1,$3,$24
882	multu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
883	addu	$25,$1
884	addu	$7,$25
885	sltu	$2,$7,$25
886	mflo	($24,$15,$9)
887	mfhi	($25,$15,$9)
888	addu	$3,$24
889	sltu	$1,$3,$24
890	multu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
891	addu	$25,$1
892	addu	$7,$25
893	sltu	$1,$7,$25
894	addu	$2,$1
895	mflo	($24,$14,$10)
896	mfhi	($25,$14,$10)
897	addu	$3,$24
898	sltu	$1,$3,$24
899	multu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
900	addu	$25,$1
901	addu	$7,$25
902	sltu	$1,$7,$25
903	addu	$2,$1
904	mflo	($24,$13,$11)
905	mfhi	($25,$13,$11)
906	addu	$3,$24
907	sltu	$1,$3,$24
908	multu	($12,$17)		# mul_add_c(a[0],b[4],c2,c3,c1);
909	addu	$25,$1
910	addu	$7,$25
911	sltu	$1,$7,$25
912	addu	$2,$1
913	mflo	($24,$12,$17)
914	mfhi	($25,$12,$17)
915	addu	$3,$24
916	sltu	$1,$3,$24
917	 multu	($12,$19)		# mul_add_c(a[0],b[5],c3,c1,c2);
918	addu	$25,$1
919	addu	$7,$25
920	sltu	$1,$7,$25
921	addu	$2,$1
922	sw	$3,4*4($4)	# r[4]=c2;
923
924	mflo	($24,$12,$19)
925	mfhi	($25,$12,$19)
926	addu	$7,$24
927	sltu	$1,$7,$24
928	multu	($13,$17)		# mul_add_c(a[1],b[4],c3,c1,c2);
929	addu	$25,$1
930	addu	$2,$25
931	sltu	$3,$2,$25
932	mflo	($24,$13,$17)
933	mfhi	($25,$13,$17)
934	addu	$7,$24
935	sltu	$1,$7,$24
936	multu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
937	addu	$25,$1
938	addu	$2,$25
939	sltu	$1,$2,$25
940	addu	$3,$1
941	mflo	($24,$14,$11)
942	mfhi	($25,$14,$11)
943	addu	$7,$24
944	sltu	$1,$7,$24
945	multu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
946	addu	$25,$1
947	addu	$2,$25
948	sltu	$1,$2,$25
949	addu	$3,$1
950	mflo	($24,$15,$10)
951	mfhi	($25,$15,$10)
952	addu	$7,$24
953	sltu	$1,$7,$24
954	multu	($16,$9)		# mul_add_c(a[4],b[1],c3,c1,c2);
955	addu	$25,$1
956	addu	$2,$25
957	sltu	$1,$2,$25
958	addu	$3,$1
959	mflo	($24,$16,$9)
960	mfhi	($25,$16,$9)
961	addu	$7,$24
962	sltu	$1,$7,$24
963	multu	($18,$8)		# mul_add_c(a[5],b[0],c3,c1,c2);
964	addu	$25,$1
965	addu	$2,$25
966	sltu	$1,$2,$25
967	addu	$3,$1
968	mflo	($24,$18,$8)
969	mfhi	($25,$18,$8)
970	addu	$7,$24
971	sltu	$1,$7,$24
972	 multu	($20,$8)		# mul_add_c(a[6],b[0],c1,c2,c3);
973	addu	$25,$1
974	addu	$2,$25
975	sltu	$1,$2,$25
976	addu	$3,$1
977	sw	$7,5*4($4)	# r[5]=c3;
978
979	mflo	($24,$20,$8)
980	mfhi	($25,$20,$8)
981	addu	$2,$24
982	sltu	$1,$2,$24
983	multu	($18,$9)		# mul_add_c(a[5],b[1],c1,c2,c3);
984	addu	$25,$1
985	addu	$3,$25
986	sltu	$7,$3,$25
987	mflo	($24,$18,$9)
988	mfhi	($25,$18,$9)
989	addu	$2,$24
990	sltu	$1,$2,$24
991	multu	($16,$10)		# mul_add_c(a[4],b[2],c1,c2,c3);
992	addu	$25,$1
993	addu	$3,$25
994	sltu	$1,$3,$25
995	addu	$7,$1
996	mflo	($24,$16,$10)
997	mfhi	($25,$16,$10)
998	addu	$2,$24
999	sltu	$1,$2,$24
1000	multu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
1001	addu	$25,$1
1002	addu	$3,$25
1003	sltu	$1,$3,$25
1004	addu	$7,$1
1005	mflo	($24,$15,$11)
1006	mfhi	($25,$15,$11)
1007	addu	$2,$24
1008	sltu	$1,$2,$24
1009	multu	($14,$17)		# mul_add_c(a[2],b[4],c1,c2,c3);
1010	addu	$25,$1
1011	addu	$3,$25
1012	sltu	$1,$3,$25
1013	addu	$7,$1
1014	mflo	($24,$14,$17)
1015	mfhi	($25,$14,$17)
1016	addu	$2,$24
1017	sltu	$1,$2,$24
1018	multu	($13,$19)		# mul_add_c(a[1],b[5],c1,c2,c3);
1019	addu	$25,$1
1020	addu	$3,$25
1021	sltu	$1,$3,$25
1022	addu	$7,$1
1023	mflo	($24,$13,$19)
1024	mfhi	($25,$13,$19)
1025	addu	$2,$24
1026	sltu	$1,$2,$24
1027	multu	($12,$21)		# mul_add_c(a[0],b[6],c1,c2,c3);
1028	addu	$25,$1
1029	addu	$3,$25
1030	sltu	$1,$3,$25
1031	addu	$7,$1
1032	mflo	($24,$12,$21)
1033	mfhi	($25,$12,$21)
1034	addu	$2,$24
1035	sltu	$1,$2,$24
1036	 multu	($12,$6)		# mul_add_c(a[0],b[7],c2,c3,c1);
1037	addu	$25,$1
1038	addu	$3,$25
1039	sltu	$1,$3,$25
1040	addu	$7,$1
1041	sw	$2,6*4($4)	# r[6]=c1;
1042
1043	mflo	($24,$12,$6)
1044	mfhi	($25,$12,$6)
1045	addu	$3,$24
1046	sltu	$1,$3,$24
1047	multu	($13,$21)		# mul_add_c(a[1],b[6],c2,c3,c1);
1048	addu	$25,$1
1049	addu	$7,$25
1050	sltu	$2,$7,$25
1051	mflo	($24,$13,$21)
1052	mfhi	($25,$13,$21)
1053	addu	$3,$24
1054	sltu	$1,$3,$24
1055	multu	($14,$19)		# mul_add_c(a[2],b[5],c2,c3,c1);
1056	addu	$25,$1
1057	addu	$7,$25
1058	sltu	$1,$7,$25
1059	addu	$2,$1
1060	mflo	($24,$14,$19)
1061	mfhi	($25,$14,$19)
1062	addu	$3,$24
1063	sltu	$1,$3,$24
1064	multu	($15,$17)		# mul_add_c(a[3],b[4],c2,c3,c1);
1065	addu	$25,$1
1066	addu	$7,$25
1067	sltu	$1,$7,$25
1068	addu	$2,$1
1069	mflo	($24,$15,$17)
1070	mfhi	($25,$15,$17)
1071	addu	$3,$24
1072	sltu	$1,$3,$24
1073	multu	($16,$11)		# mul_add_c(a[4],b[3],c2,c3,c1);
1074	addu	$25,$1
1075	addu	$7,$25
1076	sltu	$1,$7,$25
1077	addu	$2,$1
1078	mflo	($24,$16,$11)
1079	mfhi	($25,$16,$11)
1080	addu	$3,$24
1081	sltu	$1,$3,$24
1082	multu	($18,$10)		# mul_add_c(a[5],b[2],c2,c3,c1);
1083	addu	$25,$1
1084	addu	$7,$25
1085	sltu	$1,$7,$25
1086	addu	$2,$1
1087	mflo	($24,$18,$10)
1088	mfhi	($25,$18,$10)
1089	addu	$3,$24
1090	sltu	$1,$3,$24
1091	multu	($20,$9)		# mul_add_c(a[6],b[1],c2,c3,c1);
1092	addu	$25,$1
1093	addu	$7,$25
1094	sltu	$1,$7,$25
1095	addu	$2,$1
1096	mflo	($24,$20,$9)
1097	mfhi	($25,$20,$9)
1098	addu	$3,$24
1099	sltu	$1,$3,$24
1100	multu	($5,$8)		# mul_add_c(a[7],b[0],c2,c3,c1);
1101	addu	$25,$1
1102	addu	$7,$25
1103	sltu	$1,$7,$25
1104	addu	$2,$1
1105	mflo	($24,$5,$8)
1106	mfhi	($25,$5,$8)
1107	addu	$3,$24
1108	sltu	$1,$3,$24
1109	 multu	($5,$9)		# mul_add_c(a[7],b[1],c3,c1,c2);
1110	addu	$25,$1
1111	addu	$7,$25
1112	sltu	$1,$7,$25
1113	addu	$2,$1
1114	sw	$3,7*4($4)	# r[7]=c2;
1115
1116	mflo	($24,$5,$9)
1117	mfhi	($25,$5,$9)
1118	addu	$7,$24
1119	sltu	$1,$7,$24
1120	multu	($20,$10)		# mul_add_c(a[6],b[2],c3,c1,c2);
1121	addu	$25,$1
1122	addu	$2,$25
1123	sltu	$3,$2,$25
1124	mflo	($24,$20,$10)
1125	mfhi	($25,$20,$10)
1126	addu	$7,$24
1127	sltu	$1,$7,$24
1128	multu	($18,$11)		# mul_add_c(a[5],b[3],c3,c1,c2);
1129	addu	$25,$1
1130	addu	$2,$25
1131	sltu	$1,$2,$25
1132	addu	$3,$1
1133	mflo	($24,$18,$11)
1134	mfhi	($25,$18,$11)
1135	addu	$7,$24
1136	sltu	$1,$7,$24
1137	multu	($16,$17)		# mul_add_c(a[4],b[4],c3,c1,c2);
1138	addu	$25,$1
1139	addu	$2,$25
1140	sltu	$1,$2,$25
1141	addu	$3,$1
1142	mflo	($24,$16,$17)
1143	mfhi	($25,$16,$17)
1144	addu	$7,$24
1145	sltu	$1,$7,$24
1146	multu	($15,$19)		# mul_add_c(a[3],b[5],c3,c1,c2);
1147	addu	$25,$1
1148	addu	$2,$25
1149	sltu	$1,$2,$25
1150	addu	$3,$1
1151	mflo	($24,$15,$19)
1152	mfhi	($25,$15,$19)
1153	addu	$7,$24
1154	sltu	$1,$7,$24
1155	multu	($14,$21)		# mul_add_c(a[2],b[6],c3,c1,c2);
1156	addu	$25,$1
1157	addu	$2,$25
1158	sltu	$1,$2,$25
1159	addu	$3,$1
1160	mflo	($24,$14,$21)
1161	mfhi	($25,$14,$21)
1162	addu	$7,$24
1163	sltu	$1,$7,$24
1164	multu	($13,$6)		# mul_add_c(a[1],b[7],c3,c1,c2);
1165	addu	$25,$1
1166	addu	$2,$25
1167	sltu	$1,$2,$25
1168	addu	$3,$1
1169	mflo	($24,$13,$6)
1170	mfhi	($25,$13,$6)
1171	addu	$7,$24
1172	sltu	$1,$7,$24
1173	 multu	($14,$6)		# mul_add_c(a[2],b[7],c1,c2,c3);
1174	addu	$25,$1
1175	addu	$2,$25
1176	sltu	$1,$2,$25
1177	addu	$3,$1
1178	sw	$7,8*4($4)	# r[8]=c3;
1179
1180	mflo	($24,$14,$6)
1181	mfhi	($25,$14,$6)
1182	addu	$2,$24
1183	sltu	$1,$2,$24
1184	multu	($15,$21)		# mul_add_c(a[3],b[6],c1,c2,c3);
1185	addu	$25,$1
1186	addu	$3,$25
1187	sltu	$7,$3,$25
1188	mflo	($24,$15,$21)
1189	mfhi	($25,$15,$21)
1190	addu	$2,$24
1191	sltu	$1,$2,$24
1192	multu	($16,$19)		# mul_add_c(a[4],b[5],c1,c2,c3);
1193	addu	$25,$1
1194	addu	$3,$25
1195	sltu	$1,$3,$25
1196	addu	$7,$1
1197	mflo	($24,$16,$19)
1198	mfhi	($25,$16,$19)
1199	addu	$2,$24
1200	sltu	$1,$2,$24
1201	multu	($18,$17)		# mul_add_c(a[5],b[4],c1,c2,c3);
1202	addu	$25,$1
1203	addu	$3,$25
1204	sltu	$1,$3,$25
1205	addu	$7,$1
1206	mflo	($24,$18,$17)
1207	mfhi	($25,$18,$17)
1208	addu	$2,$24
1209	sltu	$1,$2,$24
1210	multu	($20,$11)		# mul_add_c(a[6],b[3],c1,c2,c3);
1211	addu	$25,$1
1212	addu	$3,$25
1213	sltu	$1,$3,$25
1214	addu	$7,$1
1215	mflo	($24,$20,$11)
1216	mfhi	($25,$20,$11)
1217	addu	$2,$24
1218	sltu	$1,$2,$24
1219	multu	($5,$10)		# mul_add_c(a[7],b[2],c1,c2,c3);
1220	addu	$25,$1
1221	addu	$3,$25
1222	sltu	$1,$3,$25
1223	addu	$7,$1
1224	mflo	($24,$5,$10)
1225	mfhi	($25,$5,$10)
1226	addu	$2,$24
1227	sltu	$1,$2,$24
1228	 multu	($5,$11)		# mul_add_c(a[7],b[3],c2,c3,c1);
1229	addu	$25,$1
1230	addu	$3,$25
1231	sltu	$1,$3,$25
1232	addu	$7,$1
1233	sw	$2,9*4($4)	# r[9]=c1;
1234
1235	mflo	($24,$5,$11)
1236	mfhi	($25,$5,$11)
1237	addu	$3,$24
1238	sltu	$1,$3,$24
1239	multu	($20,$17)		# mul_add_c(a[6],b[4],c2,c3,c1);
1240	addu	$25,$1
1241	addu	$7,$25
1242	sltu	$2,$7,$25
1243	mflo	($24,$20,$17)
1244	mfhi	($25,$20,$17)
1245	addu	$3,$24
1246	sltu	$1,$3,$24
1247	multu	($18,$19)		# mul_add_c(a[5],b[5],c2,c3,c1);
1248	addu	$25,$1
1249	addu	$7,$25
1250	sltu	$1,$7,$25
1251	addu	$2,$1
1252	mflo	($24,$18,$19)
1253	mfhi	($25,$18,$19)
1254	addu	$3,$24
1255	sltu	$1,$3,$24
1256	multu	($16,$21)		# mul_add_c(a[4],b[6],c2,c3,c1);
1257	addu	$25,$1
1258	addu	$7,$25
1259	sltu	$1,$7,$25
1260	addu	$2,$1
1261	mflo	($24,$16,$21)
1262	mfhi	($25,$16,$21)
1263	addu	$3,$24
1264	sltu	$1,$3,$24
1265	multu	($15,$6)		# mul_add_c(a[3],b[7],c2,c3,c1);
1266	addu	$25,$1
1267	addu	$7,$25
1268	sltu	$1,$7,$25
1269	addu	$2,$1
1270	mflo	($24,$15,$6)
1271	mfhi	($25,$15,$6)
1272	addu	$3,$24
1273	sltu	$1,$3,$24
1274	multu	($16,$6)		# mul_add_c(a[4],b[7],c3,c1,c2);
1275	addu	$25,$1
1276	addu	$7,$25
1277	sltu	$1,$7,$25
1278	addu	$2,$1
1279	sw	$3,10*4($4)	# r[10]=c2;
1280
1281	mflo	($24,$16,$6)
1282	mfhi	($25,$16,$6)
1283	addu	$7,$24
1284	sltu	$1,$7,$24
1285	multu	($18,$21)		# mul_add_c(a[5],b[6],c3,c1,c2);
1286	addu	$25,$1
1287	addu	$2,$25
1288	sltu	$3,$2,$25
1289	mflo	($24,$18,$21)
1290	mfhi	($25,$18,$21)
1291	addu	$7,$24
1292	sltu	$1,$7,$24
1293	multu	($20,$19)		# mul_add_c(a[6],b[5],c3,c1,c2);
1294	addu	$25,$1
1295	addu	$2,$25
1296	sltu	$1,$2,$25
1297	addu	$3,$1
1298	mflo	($24,$20,$19)
1299	mfhi	($25,$20,$19)
1300	addu	$7,$24
1301	sltu	$1,$7,$24
1302	multu	($5,$17)		# mul_add_c(a[7],b[4],c3,c1,c2);
1303	addu	$25,$1
1304	addu	$2,$25
1305	sltu	$1,$2,$25
1306	addu	$3,$1
1307	mflo	($24,$5,$17)
1308	mfhi	($25,$5,$17)
1309	addu	$7,$24
1310	sltu	$1,$7,$24
1311	 multu	($5,$19)		# mul_add_c(a[7],b[5],c1,c2,c3);
1312	addu	$25,$1
1313	addu	$2,$25
1314	sltu	$1,$2,$25
1315	addu	$3,$1
1316	sw	$7,11*4($4)	# r[11]=c3;
1317
1318	mflo	($24,$5,$19)
1319	mfhi	($25,$5,$19)
1320	addu	$2,$24
1321	sltu	$1,$2,$24
1322	multu	($20,$21)		# mul_add_c(a[6],b[6],c1,c2,c3);
1323	addu	$25,$1
1324	addu	$3,$25
1325	sltu	$7,$3,$25
1326	mflo	($24,$20,$21)
1327	mfhi	($25,$20,$21)
1328	addu	$2,$24
1329	sltu	$1,$2,$24
1330	multu	($18,$6)		# mul_add_c(a[5],b[7],c1,c2,c3);
1331	addu	$25,$1
1332	addu	$3,$25
1333	sltu	$1,$3,$25
1334	addu	$7,$1
1335	mflo	($24,$18,$6)
1336	mfhi	($25,$18,$6)
1337	addu	$2,$24
1338	sltu	$1,$2,$24
1339	 multu	($20,$6)		# mul_add_c(a[6],b[7],c2,c3,c1);
1340	addu	$25,$1
1341	addu	$3,$25
1342	sltu	$1,$3,$25
1343	addu	$7,$1
1344	sw	$2,12*4($4)	# r[12]=c1;
1345
1346	mflo	($24,$20,$6)
1347	mfhi	($25,$20,$6)
1348	addu	$3,$24
1349	sltu	$1,$3,$24
1350	multu	($5,$21)		# mul_add_c(a[7],b[6],c2,c3,c1);
1351	addu	$25,$1
1352	addu	$7,$25
1353	sltu	$2,$7,$25
1354	mflo	($24,$5,$21)
1355	mfhi	($25,$5,$21)
1356	addu	$3,$24
1357	sltu	$1,$3,$24
1358	multu	($5,$6)		# mul_add_c(a[7],b[7],c3,c1,c2);
1359	addu	$25,$1
1360	addu	$7,$25
1361	sltu	$1,$7,$25
1362	addu	$2,$1
1363	sw	$3,13*4($4)	# r[13]=c2;
1364
1365	mflo	($24,$5,$6)
1366	mfhi	($25,$5,$6)
1367	addu	$7,$24
1368	sltu	$1,$7,$24
1369	addu	$25,$1
1370	addu	$2,$25
1371	sw	$7,14*4($4)	# r[14]=c3;
1372	sw	$2,15*4($4)	# r[15]=c1;
1373
1374	.set	noreorder
1375	lw	$21,5*4($29)
1376	lw	$20,4*4($29)
1377	lw	$19,3*4($29)
1378	lw	$18,2*4($29)
1379	lw	$17,1*4($29)
1380	lw	$16,0*4($29)
1381	jr	$31
1382	addu $29,6*4
1383.end	bn_mul_comba8
1384
1385.align	5
1386.globl	bn_mul_comba4
1387.ent	bn_mul_comba4
1388bn_mul_comba4:
1389	.set	reorder
1390	lw	$12,0($5)
1391	lw	$8,0($6)
1392	lw	$13,4($5)
1393	lw	$14,2*4($5)
1394	multu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
1395	lw	$15,3*4($5)
1396	lw	$9,4($6)
1397	lw	$10,2*4($6)
1398	lw	$11,3*4($6)
1399	mflo	($2,$12,$8)
1400	mfhi	($3,$12,$8)
1401	sw	$2,0($4)
1402
1403	multu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
1404	mflo	($24,$12,$9)
1405	mfhi	($25,$12,$9)
1406	addu	$3,$24
1407	sltu	$1,$3,$24
1408	multu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
1409	addu	$7,$25,$1
1410	mflo	($24,$13,$8)
1411	mfhi	($25,$13,$8)
1412	addu	$3,$24
1413	sltu	$1,$3,$24
1414	 multu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
1415	addu	$25,$1
1416	addu	$7,$25
1417	sltu	$2,$7,$25
1418	sw	$3,4($4)
1419
1420	mflo	($24,$14,$8)
1421	mfhi	($25,$14,$8)
1422	addu	$7,$24
1423	sltu	$1,$7,$24
1424	multu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
1425	addu	$25,$1
1426	addu	$2,$25
1427	mflo	($24,$13,$9)
1428	mfhi	($25,$13,$9)
1429	addu	$7,$24
1430	sltu	$1,$7,$24
1431	multu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
1432	addu	$25,$1
1433	addu	$2,$25
1434	sltu	$3,$2,$25
1435	mflo	($24,$12,$10)
1436	mfhi	($25,$12,$10)
1437	addu	$7,$24
1438	sltu	$1,$7,$24
1439	 multu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
1440	addu	$25,$1
1441	addu	$2,$25
1442	sltu	$1,$2,$25
1443	addu	$3,$1
1444	sw	$7,2*4($4)
1445
1446	mflo	($24,$12,$11)
1447	mfhi	($25,$12,$11)
1448	addu	$2,$24
1449	sltu	$1,$2,$24
1450	multu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
1451	addu	$25,$1
1452	addu	$3,$25
1453	sltu	$7,$3,$25
1454	mflo	($24,$13,$10)
1455	mfhi	($25,$13,$10)
1456	addu	$2,$24
1457	sltu	$1,$2,$24
1458	multu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
1459	addu	$25,$1
1460	addu	$3,$25
1461	sltu	$1,$3,$25
1462	addu	$7,$1
1463	mflo	($24,$14,$9)
1464	mfhi	($25,$14,$9)
1465	addu	$2,$24
1466	sltu	$1,$2,$24
1467	multu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
1468	addu	$25,$1
1469	addu	$3,$25
1470	sltu	$1,$3,$25
1471	addu	$7,$1
1472	mflo	($24,$15,$8)
1473	mfhi	($25,$15,$8)
1474	addu	$2,$24
1475	sltu	$1,$2,$24
1476	 multu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
1477	addu	$25,$1
1478	addu	$3,$25
1479	sltu	$1,$3,$25
1480	addu	$7,$1
1481	sw	$2,3*4($4)
1482
1483	mflo	($24,$15,$9)
1484	mfhi	($25,$15,$9)
1485	addu	$3,$24
1486	sltu	$1,$3,$24
1487	multu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
1488	addu	$25,$1
1489	addu	$7,$25
1490	sltu	$2,$7,$25
1491	mflo	($24,$14,$10)
1492	mfhi	($25,$14,$10)
1493	addu	$3,$24
1494	sltu	$1,$3,$24
1495	multu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
1496	addu	$25,$1
1497	addu	$7,$25
1498	sltu	$1,$7,$25
1499	addu	$2,$1
1500	mflo	($24,$13,$11)
1501	mfhi	($25,$13,$11)
1502	addu	$3,$24
1503	sltu	$1,$3,$24
1504	 multu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
1505	addu	$25,$1
1506	addu	$7,$25
1507	sltu	$1,$7,$25
1508	addu	$2,$1
1509	sw	$3,4*4($4)
1510
1511	mflo	($24,$14,$11)
1512	mfhi	($25,$14,$11)
1513	addu	$7,$24
1514	sltu	$1,$7,$24
1515	multu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
1516	addu	$25,$1
1517	addu	$2,$25
1518	sltu	$3,$2,$25
1519	mflo	($24,$15,$10)
1520	mfhi	($25,$15,$10)
1521	addu	$7,$24
1522	sltu	$1,$7,$24
1523	 multu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
1524	addu	$25,$1
1525	addu	$2,$25
1526	sltu	$1,$2,$25
1527	addu	$3,$1
1528	sw	$7,5*4($4)
1529
1530	mflo	($24,$15,$11)
1531	mfhi	($25,$15,$11)
1532	addu	$2,$24
1533	sltu	$1,$2,$24
1534	addu	$25,$1
1535	addu	$3,$25
1536	sw	$2,6*4($4)
1537	sw	$3,7*4($4)
1538
1539	.set	noreorder
1540	jr	$31
1541	nop
1542.end	bn_mul_comba4
1543
1544.align	5
1545.globl	bn_sqr_comba8
1546.ent	bn_sqr_comba8
1547bn_sqr_comba8:
1548	.set	reorder
1549	lw	$12,0($5)
1550	lw	$13,4($5)
1551	lw	$14,2*4($5)
1552	lw	$15,3*4($5)
1553
1554	multu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
1555	lw	$8,4*4($5)
1556	lw	$9,5*4($5)
1557	lw	$10,6*4($5)
1558	lw	$11,7*4($5)
1559	mflo	($2,$12,$12)
1560	mfhi	($3,$12,$12)
1561	sw	$2,0($4)
1562
1563	multu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
1564	mflo	($24,$12,$13)
1565	mfhi	($25,$12,$13)
1566	slt	$2,$25,$0
1567	sll	$25,1
1568	 multu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
1569	slt	$6,$24,$0
1570	addu	$25,$6
1571	sll	$24,1
1572	addu	$3,$24
1573	sltu	$1,$3,$24
1574	addu	$7,$25,$1
1575	sw	$3,4($4)
1576	sltu	$1,$7,$25
1577	addu	$2,$1
1578	mflo	($24,$14,$12)
1579	mfhi	($25,$14,$12)
1580	addu	$7,$24
1581	sltu	$1,$7,$24
1582	 multu	($13,$13)		# forward multiplication
1583	addu	$7,$24
1584	addu	$1,$25
1585	sltu	$24,$7,$24
1586	addu	$2,$1
1587	addu	$25,$24
1588	sltu	$3,$2,$1
1589	addu	$2,$25
1590	sltu	$25,$2,$25
1591	addu	$3,$25
1592	mflo	($24,$13,$13)
1593	mfhi	($25,$13,$13)
1594	addu	$7,$24
1595	sltu	$1,$7,$24
1596	 multu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
1597	addu	$25,$1
1598	addu	$2,$25
1599	sltu	$1,$2,$25
1600	addu	$3,$1
1601	sw	$7,2*4($4)
1602	mflo	($24,$12,$15)
1603	mfhi	($25,$12,$15)
1604	addu	$2,$24
1605	sltu	$1,$2,$24
1606	 multu	($13,$14)		# forward multiplication
1607	addu	$2,$24
1608	addu	$1,$25
1609	sltu	$24,$2,$24
1610	addu	$3,$1
1611	addu	$25,$24
1612	sltu	$7,$3,$1
1613	addu	$3,$25
1614	sltu	$25,$3,$25
1615	addu	$7,$25
1616	mflo	($24,$13,$14)
1617	mfhi	($25,$13,$14)
1618	addu	$2,$24
1619	sltu	$1,$2,$24
1620	 multu	($8,$12)		# forward multiplication
1621	addu	$2,$24
1622	addu	$1,$25
1623	sltu	$24,$2,$24
1624	addu	$3,$1
1625	addu	$25,$24
1626	sltu	$1,$3,$1
1627	addu	$3,$25
1628	addu	$7,$1
1629	sltu	$25,$3,$25
1630	addu	$7,$25
1631	mflo	($24,$8,$12)
1632	mfhi	($25,$8,$12)
1633	sw	$2,3*4($4)
1634	addu	$3,$24
1635	sltu	$1,$3,$24
1636	 multu	($15,$13)		# forward multiplication
1637	addu	$3,$24
1638	addu	$1,$25
1639	sltu	$24,$3,$24
1640	addu	$7,$1
1641	addu	$25,$24
1642	sltu	$2,$7,$1
1643	addu	$7,$25
1644	sltu	$25,$7,$25
1645	addu	$2,$25
1646	mflo	($24,$15,$13)
1647	mfhi	($25,$15,$13)
1648	addu	$3,$24
1649	sltu	$1,$3,$24
1650	 multu	($14,$14)		# forward multiplication
1651	addu	$3,$24
1652	addu	$1,$25
1653	sltu	$24,$3,$24
1654	addu	$7,$1
1655	addu	$25,$24
1656	sltu	$1,$7,$1
1657	addu	$7,$25
1658	addu	$2,$1
1659	sltu	$25,$7,$25
1660	addu	$2,$25
1661	mflo	($24,$14,$14)
1662	mfhi	($25,$14,$14)
1663	addu	$3,$24
1664	sltu	$1,$3,$24
1665	 multu	($12,$9)		# mul_add_c2(a[0],b[5],c3,c1,c2);
1666	addu	$25,$1
1667	addu	$7,$25
1668	sltu	$1,$7,$25
1669	addu	$2,$1
1670	sw	$3,4*4($4)
1671	mflo	($24,$12,$9)
1672	mfhi	($25,$12,$9)
1673	addu	$7,$24
1674	sltu	$1,$7,$24
1675	 multu	($13,$8)		# forward multiplication
1676	addu	$7,$24
1677	addu	$1,$25
1678	sltu	$24,$7,$24
1679	addu	$2,$1
1680	addu	$25,$24
1681	sltu	$3,$2,$1
1682	addu	$2,$25
1683	sltu	$25,$2,$25
1684	addu	$3,$25
1685	mflo	($24,$13,$8)
1686	mfhi	($25,$13,$8)
1687	addu	$7,$24
1688	sltu	$1,$7,$24
1689	 multu	($14,$15)		# forward multiplication
1690	addu	$7,$24
1691	addu	$1,$25
1692	sltu	$24,$7,$24
1693	addu	$2,$1
1694	addu	$25,$24
1695	sltu	$1,$2,$1
1696	addu	$2,$25
1697	addu	$3,$1
1698	sltu	$25,$2,$25
1699	addu	$3,$25
1700	mflo	($24,$14,$15)
1701	mfhi	($25,$14,$15)
1702	addu	$7,$24
1703	sltu	$1,$7,$24
1704	 multu	($10,$12)		# forward multiplication
1705	addu	$7,$24
1706	addu	$1,$25
1707	sltu	$24,$7,$24
1708	addu	$2,$1
1709	addu	$25,$24
1710	sltu	$1,$2,$1
1711	addu	$2,$25
1712	addu	$3,$1
1713	sltu	$25,$2,$25
1714	addu	$3,$25
1715	mflo	($24,$10,$12)
1716	mfhi	($25,$10,$12)
1717	sw	$7,5*4($4)
1718	addu	$2,$24
1719	sltu	$1,$2,$24
1720	 multu	($9,$13)		# forward multiplication
1721	addu	$2,$24
1722	addu	$1,$25
1723	sltu	$24,$2,$24
1724	addu	$3,$1
1725	addu	$25,$24
1726	sltu	$7,$3,$1
1727	addu	$3,$25
1728	sltu	$25,$3,$25
1729	addu	$7,$25
1730	mflo	($24,$9,$13)
1731	mfhi	($25,$9,$13)
1732	addu	$2,$24
1733	sltu	$1,$2,$24
1734	 multu	($8,$14)		# forward multiplication
1735	addu	$2,$24
1736	addu	$1,$25
1737	sltu	$24,$2,$24
1738	addu	$3,$1
1739	addu	$25,$24
1740	sltu	$1,$3,$1
1741	addu	$3,$25
1742	addu	$7,$1
1743	sltu	$25,$3,$25
1744	addu	$7,$25
1745	mflo	($24,$8,$14)
1746	mfhi	($25,$8,$14)
1747	addu	$2,$24
1748	sltu	$1,$2,$24
1749	 multu	($15,$15)		# forward multiplication
1750	addu	$2,$24
1751	addu	$1,$25
1752	sltu	$24,$2,$24
1753	addu	$3,$1
1754	addu	$25,$24
1755	sltu	$1,$3,$1
1756	addu	$3,$25
1757	addu	$7,$1
1758	sltu	$25,$3,$25
1759	addu	$7,$25
1760	mflo	($24,$15,$15)
1761	mfhi	($25,$15,$15)
1762	addu	$2,$24
1763	sltu	$1,$2,$24
1764	 multu	($12,$11)		# mul_add_c2(a[0],b[7],c2,c3,c1);
1765	addu	$25,$1
1766	addu	$3,$25
1767	sltu	$1,$3,$25
1768	addu	$7,$1
1769	sw	$2,6*4($4)
1770	mflo	($24,$12,$11)
1771	mfhi	($25,$12,$11)
1772	addu	$3,$24
1773	sltu	$1,$3,$24
1774	 multu	($13,$10)		# forward multiplication
1775	addu	$3,$24
1776	addu	$1,$25
1777	sltu	$24,$3,$24
1778	addu	$7,$1
1779	addu	$25,$24
1780	sltu	$2,$7,$1
1781	addu	$7,$25
1782	sltu	$25,$7,$25
1783	addu	$2,$25
1784	mflo	($24,$13,$10)
1785	mfhi	($25,$13,$10)
1786	addu	$3,$24
1787	sltu	$1,$3,$24
1788	 multu	($14,$9)		# forward multiplication
1789	addu	$3,$24
1790	addu	$1,$25
1791	sltu	$24,$3,$24
1792	addu	$7,$1
1793	addu	$25,$24
1794	sltu	$1,$7,$1
1795	addu	$7,$25
1796	addu	$2,$1
1797	sltu	$25,$7,$25
1798	addu	$2,$25
1799	mflo	($24,$14,$9)
1800	mfhi	($25,$14,$9)
1801	addu	$3,$24
1802	sltu	$1,$3,$24
1803	 multu	($15,$8)		# forward multiplication
1804	addu	$3,$24
1805	addu	$1,$25
1806	sltu	$24,$3,$24
1807	addu	$7,$1
1808	addu	$25,$24
1809	sltu	$1,$7,$1
1810	addu	$7,$25
1811	addu	$2,$1
1812	sltu	$25,$7,$25
1813	addu	$2,$25
1814	mflo	($24,$15,$8)
1815	mfhi	($25,$15,$8)
1816	addu	$3,$24
1817	sltu	$1,$3,$24
1818	 multu	($11,$13)		# forward multiplication
1819	addu	$3,$24
1820	addu	$1,$25
1821	sltu	$24,$3,$24
1822	addu	$7,$1
1823	addu	$25,$24
1824	sltu	$1,$7,$1
1825	addu	$7,$25
1826	addu	$2,$1
1827	sltu	$25,$7,$25
1828	addu	$2,$25
1829	mflo	($24,$11,$13)
1830	mfhi	($25,$11,$13)
1831	sw	$3,7*4($4)
1832	addu	$7,$24
1833	sltu	$1,$7,$24
1834	 multu	($10,$14)		# forward multiplication
1835	addu	$7,$24
1836	addu	$1,$25
1837	sltu	$24,$7,$24
1838	addu	$2,$1
1839	addu	$25,$24
1840	sltu	$3,$2,$1
1841	addu	$2,$25
1842	sltu	$25,$2,$25
1843	addu	$3,$25
1844	mflo	($24,$10,$14)
1845	mfhi	($25,$10,$14)
1846	addu	$7,$24
1847	sltu	$1,$7,$24
1848	 multu	($9,$15)		# forward multiplication
1849	addu	$7,$24
1850	addu	$1,$25
1851	sltu	$24,$7,$24
1852	addu	$2,$1
1853	addu	$25,$24
1854	sltu	$1,$2,$1
1855	addu	$2,$25
1856	addu	$3,$1
1857	sltu	$25,$2,$25
1858	addu	$3,$25
1859	mflo	($24,$9,$15)
1860	mfhi	($25,$9,$15)
1861	addu	$7,$24
1862	sltu	$1,$7,$24
1863	 multu	($8,$8)		# forward multiplication
1864	addu	$7,$24
1865	addu	$1,$25
1866	sltu	$24,$7,$24
1867	addu	$2,$1
1868	addu	$25,$24
1869	sltu	$1,$2,$1
1870	addu	$2,$25
1871	addu	$3,$1
1872	sltu	$25,$2,$25
1873	addu	$3,$25
1874	mflo	($24,$8,$8)
1875	mfhi	($25,$8,$8)
1876	addu	$7,$24
1877	sltu	$1,$7,$24
1878	 multu	($14,$11)		# mul_add_c2(a[2],b[7],c1,c2,c3);
1879	addu	$25,$1
1880	addu	$2,$25
1881	sltu	$1,$2,$25
1882	addu	$3,$1
1883	sw	$7,8*4($4)
1884	mflo	($24,$14,$11)
1885	mfhi	($25,$14,$11)
1886	addu	$2,$24
1887	sltu	$1,$2,$24
1888	 multu	($15,$10)		# forward multiplication
1889	addu	$2,$24
1890	addu	$1,$25
1891	sltu	$24,$2,$24
1892	addu	$3,$1
1893	addu	$25,$24
1894	sltu	$7,$3,$1
1895	addu	$3,$25
1896	sltu	$25,$3,$25
1897	addu	$7,$25
1898	mflo	($24,$15,$10)
1899	mfhi	($25,$15,$10)
1900	addu	$2,$24
1901	sltu	$1,$2,$24
1902	 multu	($8,$9)		# forward multiplication
1903	addu	$2,$24
1904	addu	$1,$25
1905	sltu	$24,$2,$24
1906	addu	$3,$1
1907	addu	$25,$24
1908	sltu	$1,$3,$1
1909	addu	$3,$25
1910	addu	$7,$1
1911	sltu	$25,$3,$25
1912	addu	$7,$25
1913	mflo	($24,$8,$9)
1914	mfhi	($25,$8,$9)
1915	addu	$2,$24
1916	sltu	$1,$2,$24
1917	 multu	($11,$15)		# forward multiplication
1918	addu	$2,$24
1919	addu	$1,$25
1920	sltu	$24,$2,$24
1921	addu	$3,$1
1922	addu	$25,$24
1923	sltu	$1,$3,$1
1924	addu	$3,$25
1925	addu	$7,$1
1926	sltu	$25,$3,$25
1927	addu	$7,$25
1928	mflo	($24,$11,$15)
1929	mfhi	($25,$11,$15)
1930	sw	$2,9*4($4)
1931	addu	$3,$24
1932	sltu	$1,$3,$24
1933	 multu	($10,$8)		# forward multiplication
1934	addu	$3,$24
1935	addu	$1,$25
1936	sltu	$24,$3,$24
1937	addu	$7,$1
1938	addu	$25,$24
1939	sltu	$2,$7,$1
1940	addu	$7,$25
1941	sltu	$25,$7,$25
1942	addu	$2,$25
1943	mflo	($24,$10,$8)
1944	mfhi	($25,$10,$8)
1945	addu	$3,$24
1946	sltu	$1,$3,$24
1947	 multu	($9,$9)		# forward multiplication
1948	addu	$3,$24
1949	addu	$1,$25
1950	sltu	$24,$3,$24
1951	addu	$7,$1
1952	addu	$25,$24
1953	sltu	$1,$7,$1
1954	addu	$7,$25
1955	addu	$2,$1
1956	sltu	$25,$7,$25
1957	addu	$2,$25
1958	mflo	($24,$9,$9)
1959	mfhi	($25,$9,$9)
1960	addu	$3,$24
1961	sltu	$1,$3,$24
1962	 multu	($8,$11)		# mul_add_c2(a[4],b[7],c3,c1,c2);
1963	addu	$25,$1
1964	addu	$7,$25
1965	sltu	$1,$7,$25
1966	addu	$2,$1
1967	sw	$3,10*4($4)
1968	mflo	($24,$8,$11)
1969	mfhi	($25,$8,$11)
1970	addu	$7,$24
1971	sltu	$1,$7,$24
1972	 multu	($9,$10)		# forward multiplication
1973	addu	$7,$24
1974	addu	$1,$25
1975	sltu	$24,$7,$24
1976	addu	$2,$1
1977	addu	$25,$24
1978	sltu	$3,$2,$1
1979	addu	$2,$25
1980	sltu	$25,$2,$25
1981	addu	$3,$25
1982	mflo	($24,$9,$10)
1983	mfhi	($25,$9,$10)
1984	addu	$7,$24
1985	sltu	$1,$7,$24
1986	 multu	($11,$9)		# forward multiplication
1987	addu	$7,$24
1988	addu	$1,$25
1989	sltu	$24,$7,$24
1990	addu	$2,$1
1991	addu	$25,$24
1992	sltu	$1,$2,$1
1993	addu	$2,$25
1994	addu	$3,$1
1995	sltu	$25,$2,$25
1996	addu	$3,$25
1997	mflo	($24,$11,$9)
1998	mfhi	($25,$11,$9)
1999	sw	$7,11*4($4)
2000	addu	$2,$24
2001	sltu	$1,$2,$24
2002	 multu	($10,$10)		# forward multiplication
2003	addu	$2,$24
2004	addu	$1,$25
2005	sltu	$24,$2,$24
2006	addu	$3,$1
2007	addu	$25,$24
2008	sltu	$7,$3,$1
2009	addu	$3,$25
2010	sltu	$25,$3,$25
2011	addu	$7,$25
2012	mflo	($24,$10,$10)
2013	mfhi	($25,$10,$10)
2014	addu	$2,$24
2015	sltu	$1,$2,$24
2016	 multu	($10,$11)		# mul_add_c2(a[6],b[7],c2,c3,c1);
2017	addu	$25,$1
2018	addu	$3,$25
2019	sltu	$1,$3,$25
2020	addu	$7,$1
2021	sw	$2,12*4($4)
2022	mflo	($24,$10,$11)
2023	mfhi	($25,$10,$11)
2024	addu	$3,$24
2025	sltu	$1,$3,$24
2026	 multu	($11,$11)		# forward multiplication
2027	addu	$3,$24
2028	addu	$1,$25
2029	sltu	$24,$3,$24
2030	addu	$7,$1
2031	addu	$25,$24
2032	sltu	$2,$7,$1
2033	addu	$7,$25
2034	sltu	$25,$7,$25
2035	addu	$2,$25
2036	mflo	($24,$11,$11)
2037	mfhi	($25,$11,$11)
2038	sw	$3,13*4($4)
2039
2040	addu	$7,$24
2041	sltu	$1,$7,$24
2042	addu	$25,$1
2043	addu	$2,$25
2044	sw	$7,14*4($4)
2045	sw	$2,15*4($4)
2046
2047	.set	noreorder
2048	jr	$31
2049	nop
2050.end	bn_sqr_comba8
2051
2052.align	5
2053.globl	bn_sqr_comba4
2054.ent	bn_sqr_comba4
2055bn_sqr_comba4:
2056	.set	reorder
2057	lw	$12,0($5)
2058	lw	$13,4($5)
2059	multu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
2060	lw	$14,2*4($5)
2061	lw	$15,3*4($5)
2062	mflo	($2,$12,$12)
2063	mfhi	($3,$12,$12)
2064	sw	$2,0($4)
2065
2066	multu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
2067	mflo	($24,$12,$13)
2068	mfhi	($25,$12,$13)
2069	slt	$2,$25,$0
2070	sll	$25,1
2071	 multu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
2072	slt	$6,$24,$0
2073	addu	$25,$6
2074	sll	$24,1
2075	addu	$3,$24
2076	sltu	$1,$3,$24
2077	addu	$7,$25,$1
2078	sw	$3,4($4)
2079	sltu	$1,$7,$25
2080	addu	$2,$1
2081	mflo	($24,$14,$12)
2082	mfhi	($25,$14,$12)
2083	addu	$7,$24
2084	sltu	$1,$7,$24
2085	 multu	($13,$13)		# forward multiplication
2086	addu	$7,$24
2087	addu	$1,$25
2088	sltu	$24,$7,$24
2089	addu	$2,$1
2090	addu	$25,$24
2091	sltu	$3,$2,$1
2092	addu	$2,$25
2093	sltu	$25,$2,$25
2094	addu	$3,$25
2095	mflo	($24,$13,$13)
2096	mfhi	($25,$13,$13)
2097	addu	$7,$24
2098	sltu	$1,$7,$24
2099	 multu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
2100	addu	$25,$1
2101	addu	$2,$25
2102	sltu	$1,$2,$25
2103	addu	$3,$1
2104	sw	$7,2*4($4)
2105	mflo	($24,$12,$15)
2106	mfhi	($25,$12,$15)
2107	addu	$2,$24
2108	sltu	$1,$2,$24
2109	 multu	($13,$14)		# forward multiplication
2110	addu	$2,$24
2111	addu	$1,$25
2112	sltu	$24,$2,$24
2113	addu	$3,$1
2114	addu	$25,$24
2115	sltu	$7,$3,$1
2116	addu	$3,$25
2117	sltu	$25,$3,$25
2118	addu	$7,$25
2119	mflo	($24,$13,$14)
2120	mfhi	($25,$13,$14)
2121	addu	$2,$24
2122	sltu	$1,$2,$24
2123	 multu	($15,$13)		# forward multiplication
2124	addu	$2,$24
2125	addu	$1,$25
2126	sltu	$24,$2,$24
2127	addu	$3,$1
2128	addu	$25,$24
2129	sltu	$1,$3,$1
2130	addu	$3,$25
2131	addu	$7,$1
2132	sltu	$25,$3,$25
2133	addu	$7,$25
2134	mflo	($24,$15,$13)
2135	mfhi	($25,$15,$13)
2136	sw	$2,3*4($4)
2137	addu	$3,$24
2138	sltu	$1,$3,$24
2139	 multu	($14,$14)		# forward multiplication
2140	addu	$3,$24
2141	addu	$1,$25
2142	sltu	$24,$3,$24
2143	addu	$7,$1
2144	addu	$25,$24
2145	sltu	$2,$7,$1
2146	addu	$7,$25
2147	sltu	$25,$7,$25
2148	addu	$2,$25
2149	mflo	($24,$14,$14)
2150	mfhi	($25,$14,$14)
2151	addu	$3,$24
2152	sltu	$1,$3,$24
2153	 multu	($14,$15)		# mul_add_c2(a[2],b[3],c3,c1,c2);
2154	addu	$25,$1
2155	addu	$7,$25
2156	sltu	$1,$7,$25
2157	addu	$2,$1
2158	sw	$3,4*4($4)
2159	mflo	($24,$14,$15)
2160	mfhi	($25,$14,$15)
2161	addu	$7,$24
2162	sltu	$1,$7,$24
2163	 multu	($15,$15)		# forward multiplication
2164	addu	$7,$24
2165	addu	$1,$25
2166	sltu	$24,$7,$24
2167	addu	$2,$1
2168	addu	$25,$24
2169	sltu	$3,$2,$1
2170	addu	$2,$25
2171	sltu	$25,$2,$25
2172	addu	$3,$25
2173	mflo	($24,$15,$15)
2174	mfhi	($25,$15,$15)
2175	sw	$7,5*4($4)
2176
2177	addu	$2,$24
2178	sltu	$1,$2,$24
2179	addu	$25,$1
2180	addu	$3,$25
2181	sw	$2,6*4($4)
2182	sw	$3,7*4($4)
2183
2184	.set	noreorder
2185	jr	$31
2186	nop
2187.end	bn_sqr_comba4
2188