xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/mips.S (revision e0ea3921ea68e51b93ffc215f08ae1647c8e1796)
1.set	mips2
2#include "mips_arch.h"
3
4#if defined(_MIPS_ARCH_MIPS64R6)
5# define ddivu(rs,rt)
6# define mfqt(rd,rs,rt)	ddivu	rd,rs,rt
7# define mfrm(rd,rs,rt)	dmodu	rd,rs,rt
8#elif defined(_MIPS_ARCH_MIPS32R6)
9# define divu(rs,rt)
10# define mfqt(rd,rs,rt)	divu	rd,rs,rt
11# define mfrm(rd,rs,rt)	modu	rd,rs,rt
12#else
13# define divu(rs,rt)	divu	$0,rs,rt
14# define mfqt(rd,rs,rt)	mflo	rd
15# define mfrm(rd,rs,rt)	mfhi	rd
16#endif
17
18.rdata
19.asciiz	"mips3.s, Version 1.2"
20.asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
21
22.text
23.set	noat
24
25.align	5
26.globl	bn_mul_add_words
27.ent	bn_mul_add_words
28bn_mul_add_words:
29	.set	noreorder
30	bgtz	$6,bn_mul_add_words_internal
31	move	$2,$0
32	jr	$31
33	move	$4,$2
34.end	bn_mul_add_words
35
36.align	5
37.ent	bn_mul_add_words_internal
38bn_mul_add_words_internal:
39	.set	reorder
40	li	$3,-4
41	and	$8,$6,$3
42	beqz	$8,.L_bn_mul_add_words_tail
43
44.L_bn_mul_add_words_loop:
45	lw	$12,0($5)
46	multu	($12,$7)
47	lw	$13,0($4)
48	lw	$14,4($5)
49	lw	$15,4($4)
50	lw	$8,2*4($5)
51	lw	$9,2*4($4)
52	addu	$13,$2
53	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
54				# values", but it seems to work fine
55				# even on 64-bit registers.
56	mflo	($1,$12,$7)
57	mfhi	($12,$12,$7)
58	addu	$13,$1
59	addu	$2,$12
60	 multu	($14,$7)
61	sltu	$1,$13,$1
62	sw	$13,0($4)
63	addu	$2,$1
64
65	lw	$10,3*4($5)
66	lw	$11,3*4($4)
67	addu	$15,$2
68	sltu	$2,$15,$2
69	mflo	($1,$14,$7)
70	mfhi	($14,$14,$7)
71	addu	$15,$1
72	addu	$2,$14
73	 multu	($8,$7)
74	sltu	$1,$15,$1
75	sw	$15,4($4)
76	addu	$2,$1
77
78	subu	$6,4
79	addu $4,4*4
80	addu $5,4*4
81	addu	$9,$2
82	sltu	$2,$9,$2
83	mflo	($1,$8,$7)
84	mfhi	($8,$8,$7)
85	addu	$9,$1
86	addu	$2,$8
87	 multu	($10,$7)
88	sltu	$1,$9,$1
89	sw	$9,-2*4($4)
90	addu	$2,$1
91
92
93	and	$8,$6,$3
94	addu	$11,$2
95	sltu	$2,$11,$2
96	mflo	($1,$10,$7)
97	mfhi	($10,$10,$7)
98	addu	$11,$1
99	addu	$2,$10
100	sltu	$1,$11,$1
101	sw	$11,-4($4)
102	.set	noreorder
103	bgtz	$8,.L_bn_mul_add_words_loop
104	addu	$2,$1
105
106	beqz	$6,.L_bn_mul_add_words_return
107	nop
108
109.L_bn_mul_add_words_tail:
110	.set	reorder
111	lw	$12,0($5)
112	multu	($12,$7)
113	lw	$13,0($4)
114	subu	$6,1
115	addu	$13,$2
116	sltu	$2,$13,$2
117	mflo	($1,$12,$7)
118	mfhi	($12,$12,$7)
119	addu	$13,$1
120	addu	$2,$12
121	sltu	$1,$13,$1
122	sw	$13,0($4)
123	addu	$2,$1
124	beqz	$6,.L_bn_mul_add_words_return
125
126	lw	$12,4($5)
127	multu	($12,$7)
128	lw	$13,4($4)
129	subu	$6,1
130	addu	$13,$2
131	sltu	$2,$13,$2
132	mflo	($1,$12,$7)
133	mfhi	($12,$12,$7)
134	addu	$13,$1
135	addu	$2,$12
136	sltu	$1,$13,$1
137	sw	$13,4($4)
138	addu	$2,$1
139	beqz	$6,.L_bn_mul_add_words_return
140
141	lw	$12,2*4($5)
142	multu	($12,$7)
143	lw	$13,2*4($4)
144	addu	$13,$2
145	sltu	$2,$13,$2
146	mflo	($1,$12,$7)
147	mfhi	($12,$12,$7)
148	addu	$13,$1
149	addu	$2,$12
150	sltu	$1,$13,$1
151	sw	$13,2*4($4)
152	addu	$2,$1
153
154.L_bn_mul_add_words_return:
155	.set	noreorder
156	jr	$31
157	move	$4,$2
158.end	bn_mul_add_words_internal
159
160.align	5
161.globl	bn_mul_words
162.ent	bn_mul_words
163bn_mul_words:
164	.set	noreorder
165	bgtz	$6,bn_mul_words_internal
166	move	$2,$0
167	jr	$31
168	move	$4,$2
169.end	bn_mul_words
170
171.align	5
172.ent	bn_mul_words_internal
173bn_mul_words_internal:
174	.set	reorder
175	li	$3,-4
176	and	$8,$6,$3
177	beqz	$8,.L_bn_mul_words_tail
178
179.L_bn_mul_words_loop:
180	lw	$12,0($5)
181	multu	($12,$7)
182	lw	$14,4($5)
183	lw	$8,2*4($5)
184	lw	$10,3*4($5)
185	mflo	($1,$12,$7)
186	mfhi	($12,$12,$7)
187	addu	$2,$1
188	sltu	$13,$2,$1
189	 multu	($14,$7)
190	sw	$2,0($4)
191	addu	$2,$13,$12
192
193	subu	$6,4
194	addu $4,4*4
195	addu $5,4*4
196	mflo	($1,$14,$7)
197	mfhi	($14,$14,$7)
198	addu	$2,$1
199	sltu	$15,$2,$1
200	 multu	($8,$7)
201	sw	$2,-3*4($4)
202	addu	$2,$15,$14
203
204	mflo	($1,$8,$7)
205	mfhi	($8,$8,$7)
206	addu	$2,$1
207	sltu	$9,$2,$1
208	 multu	($10,$7)
209	sw	$2,-2*4($4)
210	addu	$2,$9,$8
211
212	and	$8,$6,$3
213	mflo	($1,$10,$7)
214	mfhi	($10,$10,$7)
215	addu	$2,$1
216	sltu	$11,$2,$1
217	sw	$2,-4($4)
218	.set	noreorder
219	bgtz	$8,.L_bn_mul_words_loop
220	addu	$2,$11,$10
221
222	beqz	$6,.L_bn_mul_words_return
223	nop
224
225.L_bn_mul_words_tail:
226	.set	reorder
227	lw	$12,0($5)
228	multu	($12,$7)
229	subu	$6,1
230	mflo	($1,$12,$7)
231	mfhi	($12,$12,$7)
232	addu	$2,$1
233	sltu	$13,$2,$1
234	sw	$2,0($4)
235	addu	$2,$13,$12
236	beqz	$6,.L_bn_mul_words_return
237
238	lw	$12,4($5)
239	multu	($12,$7)
240	subu	$6,1
241	mflo	($1,$12,$7)
242	mfhi	($12,$12,$7)
243	addu	$2,$1
244	sltu	$13,$2,$1
245	sw	$2,4($4)
246	addu	$2,$13,$12
247	beqz	$6,.L_bn_mul_words_return
248
249	lw	$12,2*4($5)
250	multu	($12,$7)
251	mflo	($1,$12,$7)
252	mfhi	($12,$12,$7)
253	addu	$2,$1
254	sltu	$13,$2,$1
255	sw	$2,2*4($4)
256	addu	$2,$13,$12
257
258.L_bn_mul_words_return:
259	.set	noreorder
260	jr	$31
261	move	$4,$2
262.end	bn_mul_words_internal
263
264.align	5
265.globl	bn_sqr_words
266.ent	bn_sqr_words
267bn_sqr_words:
268	.set	noreorder
269	bgtz	$6,bn_sqr_words_internal
270	move	$2,$0
271	jr	$31
272	move	$4,$2
273.end	bn_sqr_words
274
275.align	5
276.ent	bn_sqr_words_internal
277bn_sqr_words_internal:
278	.set	reorder
279	li	$3,-4
280	and	$8,$6,$3
281	beqz	$8,.L_bn_sqr_words_tail
282
283.L_bn_sqr_words_loop:
284	lw	$12,0($5)
285	multu	($12,$12)
286	lw	$14,4($5)
287	lw	$8,2*4($5)
288	lw	$10,3*4($5)
289	mflo	($13,$12,$12)
290	mfhi	($12,$12,$12)
291	sw	$13,0($4)
292	sw	$12,4($4)
293
294	multu	($14,$14)
295	subu	$6,4
296	addu $4,8*4
297	addu $5,4*4
298	mflo	($15,$14,$14)
299	mfhi	($14,$14,$14)
300	sw	$15,-6*4($4)
301	sw	$14,-5*4($4)
302
303	multu	($8,$8)
304	mflo	($9,$8,$8)
305	mfhi	($8,$8,$8)
306	sw	$9,-4*4($4)
307	sw	$8,-3*4($4)
308
309
310	multu	($10,$10)
311	and	$8,$6,$3
312	mflo	($11,$10,$10)
313	mfhi	($10,$10,$10)
314	sw	$11,-2*4($4)
315
316	.set	noreorder
317	sw	$10,-4($4)
318	bgtz	$8,.L_bn_sqr_words_loop
319	nop
320
321	beqz	$6,.L_bn_sqr_words_return
322	nop
323
324.L_bn_sqr_words_tail:
325	.set	reorder
326	lw	$12,0($5)
327	multu	($12,$12)
328	subu	$6,1
329	mflo	($13,$12,$12)
330	mfhi	($12,$12,$12)
331	sw	$13,0($4)
332	sw	$12,4($4)
333	beqz	$6,.L_bn_sqr_words_return
334
335	lw	$12,4($5)
336	multu	($12,$12)
337	subu	$6,1
338	mflo	($13,$12,$12)
339	mfhi	($12,$12,$12)
340	sw	$13,2*4($4)
341	sw	$12,3*4($4)
342	beqz	$6,.L_bn_sqr_words_return
343
344	lw	$12,2*4($5)
345	multu	($12,$12)
346	mflo	($13,$12,$12)
347	mfhi	($12,$12,$12)
348	sw	$13,4*4($4)
349	sw	$12,5*4($4)
350
351.L_bn_sqr_words_return:
352	.set	noreorder
353	jr	$31
354	move	$4,$2
355
356.end	bn_sqr_words_internal
357
358.align	5
359.globl	bn_add_words
360.ent	bn_add_words
361bn_add_words:
362	.set	noreorder
363	bgtz	$7,bn_add_words_internal
364	move	$2,$0
365	jr	$31
366	move	$4,$2
367.end	bn_add_words
368
369.align	5
370.ent	bn_add_words_internal
371bn_add_words_internal:
372	.set	reorder
373	li	$3,-4
374	and	$1,$7,$3
375	beqz	$1,.L_bn_add_words_tail
376
377.L_bn_add_words_loop:
378	lw	$12,0($5)
379	lw	$8,0($6)
380	subu	$7,4
381	lw	$13,4($5)
382	and	$1,$7,$3
383	lw	$14,2*4($5)
384	addu $6,4*4
385	lw	$15,3*4($5)
386	addu $4,4*4
387	lw	$9,-3*4($6)
388	addu $5,4*4
389	lw	$10,-2*4($6)
390	lw	$11,-4($6)
391	addu	$8,$12
392	sltu	$24,$8,$12
393	addu	$12,$8,$2
394	sltu	$2,$12,$8
395	sw	$12,-4*4($4)
396	addu	$2,$24
397
398	addu	$9,$13
399	sltu	$25,$9,$13
400	addu	$13,$9,$2
401	sltu	$2,$13,$9
402	sw	$13,-3*4($4)
403	addu	$2,$25
404
405	addu	$10,$14
406	sltu	$24,$10,$14
407	addu	$14,$10,$2
408	sltu	$2,$14,$10
409	sw	$14,-2*4($4)
410	addu	$2,$24
411
412	addu	$11,$15
413	sltu	$25,$11,$15
414	addu	$15,$11,$2
415	sltu	$2,$15,$11
416	sw	$15,-4($4)
417
418	.set	noreorder
419	bgtz	$1,.L_bn_add_words_loop
420	addu	$2,$25
421
422	beqz	$7,.L_bn_add_words_return
423	nop
424
425.L_bn_add_words_tail:
426	.set	reorder
427	lw	$12,0($5)
428	lw	$8,0($6)
429	addu	$8,$12
430	subu	$7,1
431	sltu	$24,$8,$12
432	addu	$12,$8,$2
433	sltu	$2,$12,$8
434	sw	$12,0($4)
435	addu	$2,$24
436	beqz	$7,.L_bn_add_words_return
437
438	lw	$13,4($5)
439	lw	$9,4($6)
440	addu	$9,$13
441	subu	$7,1
442	sltu	$25,$9,$13
443	addu	$13,$9,$2
444	sltu	$2,$13,$9
445	sw	$13,4($4)
446	addu	$2,$25
447	beqz	$7,.L_bn_add_words_return
448
449	lw	$14,2*4($5)
450	lw	$10,2*4($6)
451	addu	$10,$14
452	sltu	$24,$10,$14
453	addu	$14,$10,$2
454	sltu	$2,$14,$10
455	sw	$14,2*4($4)
456	addu	$2,$24
457
458.L_bn_add_words_return:
459	.set	noreorder
460	jr	$31
461	move	$4,$2
462
463.end	bn_add_words_internal
464
465.align	5
466.globl	bn_sub_words
467.ent	bn_sub_words
468bn_sub_words:
469	.set	noreorder
470	bgtz	$7,bn_sub_words_internal
471	move	$2,$0
472	jr	$31
473	move	$4,$0
474.end	bn_sub_words
475
476.align	5
477.ent	bn_sub_words_internal
478bn_sub_words_internal:
479	.set	reorder
480	li	$3,-4
481	and	$1,$7,$3
482	beqz	$1,.L_bn_sub_words_tail
483
484.L_bn_sub_words_loop:
485	lw	$12,0($5)
486	lw	$8,0($6)
487	subu	$7,4
488	lw	$13,4($5)
489	and	$1,$7,$3
490	lw	$14,2*4($5)
491	addu $6,4*4
492	lw	$15,3*4($5)
493	addu $4,4*4
494	lw	$9,-3*4($6)
495	addu $5,4*4
496	lw	$10,-2*4($6)
497	lw	$11,-4($6)
498	sltu	$24,$12,$8
499	subu	$8,$12,$8
500	subu	$12,$8,$2
501	sgtu	$2,$12,$8
502	sw	$12,-4*4($4)
503	addu	$2,$24
504
505	sltu	$25,$13,$9
506	subu	$9,$13,$9
507	subu	$13,$9,$2
508	sgtu	$2,$13,$9
509	sw	$13,-3*4($4)
510	addu	$2,$25
511
512
513	sltu	$24,$14,$10
514	subu	$10,$14,$10
515	subu	$14,$10,$2
516	sgtu	$2,$14,$10
517	sw	$14,-2*4($4)
518	addu	$2,$24
519
520	sltu	$25,$15,$11
521	subu	$11,$15,$11
522	subu	$15,$11,$2
523	sgtu	$2,$15,$11
524	sw	$15,-4($4)
525
526	.set	noreorder
527	bgtz	$1,.L_bn_sub_words_loop
528	addu	$2,$25
529
530	beqz	$7,.L_bn_sub_words_return
531	nop
532
533.L_bn_sub_words_tail:
534	.set	reorder
535	lw	$12,0($5)
536	lw	$8,0($6)
537	subu	$7,1
538	sltu	$24,$12,$8
539	subu	$8,$12,$8
540	subu	$12,$8,$2
541	sgtu	$2,$12,$8
542	sw	$12,0($4)
543	addu	$2,$24
544	beqz	$7,.L_bn_sub_words_return
545
546	lw	$13,4($5)
547	subu	$7,1
548	lw	$9,4($6)
549	sltu	$25,$13,$9
550	subu	$9,$13,$9
551	subu	$13,$9,$2
552	sgtu	$2,$13,$9
553	sw	$13,4($4)
554	addu	$2,$25
555	beqz	$7,.L_bn_sub_words_return
556
557	lw	$14,2*4($5)
558	lw	$10,2*4($6)
559	sltu	$24,$14,$10
560	subu	$10,$14,$10
561	subu	$14,$10,$2
562	sgtu	$2,$14,$10
563	sw	$14,2*4($4)
564	addu	$2,$24
565
566.L_bn_sub_words_return:
567	.set	noreorder
568	jr	$31
569	move	$4,$2
570.end	bn_sub_words_internal
571
572.align 5
573.globl	bn_div_3_words
574.ent	bn_div_3_words
575bn_div_3_words:
576	.set	noreorder
577	move	$7,$4		# we know that bn_div_words does not
578				# touch $7, $10, $11 and preserves $6
579				# so that we can save two arguments
580				# and return address in registers
581				# instead of stack:-)
582
583	lw	$4,($7)
584	move	$10,$5
585	lw	$5,-4($7)
586	bne	$4,$6,bn_div_3_words_internal
587	 nop
588	li	$2,-1
589	jr	$31
590	move	$4,$2
591.end	bn_div_3_words
592
593.align	5
594.ent	bn_div_3_words_internal
595bn_div_3_words_internal:
596	.set	reorder
597	move	$11,$31
598	bal	bn_div_words_internal
599	move	$31,$11
600	multu	($10,$2)
601	lw	$14,-2*4($7)
602	move	$8,$0
603	mfhi	($13,$10,$2)
604	mflo	($12,$10,$2)
605	sltu	$24,$13,$5
606.L_bn_div_3_words_inner_loop:
607	bnez	$24,.L_bn_div_3_words_inner_loop_done
608	sgeu	$1,$14,$12
609	seq	$25,$13,$5
610	and	$1,$25
611	sltu	$15,$12,$10
612	addu	$5,$6
613	subu	$13,$15
614	subu	$12,$10
615	sltu	$24,$13,$5
616	sltu	$8,$5,$6
617	or	$24,$8
618	.set	noreorder
619	beqz	$1,.L_bn_div_3_words_inner_loop
620	subu	$2,1
621	addu	$2,1
622	.set	reorder
623.L_bn_div_3_words_inner_loop_done:
624	.set	noreorder
625	jr	$31
626	move	$4,$2
627.end	bn_div_3_words_internal
628
629.align	5
630.globl	bn_div_words
631.ent	bn_div_words
632bn_div_words:
633	.set	noreorder
634	bnez	$6,bn_div_words_internal
635	li	$2,-1		# I would rather signal div-by-zero
636				# which can be done with 'break 7'
637	jr	$31
638	move	$4,$2
639.end	bn_div_words
640
641.align	5
642.ent	bn_div_words_internal
643bn_div_words_internal:
644	move	$3,$0
645	bltz	$6,.L_bn_div_words_body
646	move	$25,$3
647	sll	$6,1
648	bgtz	$6,.-4
649	addu	$25,1
650
651	.set	reorder
652	negu	$13,$25
653	li	$14,-1
654	sll	$14,$13
655	and	$14,$4
656	srl	$1,$5,$13
657	.set	noreorder
658	beqz	$14,.+12
659	nop
660	break	6		# signal overflow
661	.set	reorder
662	sll	$4,$25
663	sll	$5,$25
664	or	$4,$1
665.L_bn_div_words_body:
666	srl	$3,$6,4*4	# bits
667	sgeu	$1,$4,$6
668	.set	noreorder
669	beqz	$1,.+12
670	nop
671	subu	$4,$6
672	.set	reorder
673
674	li	$8,-1
675	srl	$9,$4,4*4	# bits
676	srl	$8,4*4	# q=0xffffffff
677	beq	$3,$9,.L_bn_div_words_skip_div1
678	divu	($4,$3)
679	mfqt	($8,$4,$3)
680.L_bn_div_words_skip_div1:
681	multu	($6,$8)
682	sll	$15,$4,4*4	# bits
683	srl	$1,$5,4*4	# bits
684	or	$15,$1
685	mflo	($12,$6,$8)
686	mfhi	($13,$6,$8)
687.L_bn_div_words_inner_loop1:
688	sltu	$14,$15,$12
689	seq	$24,$9,$13
690	sltu	$1,$9,$13
691	and	$14,$24
692	sltu	$2,$12,$6
693	or	$1,$14
694	.set	noreorder
695	beqz	$1,.L_bn_div_words_inner_loop1_done
696	subu	$13,$2
697	subu	$12,$6
698	b	.L_bn_div_words_inner_loop1
699	subu	$8,1
700	.set	reorder
701.L_bn_div_words_inner_loop1_done:
702
703	sll	$5,4*4	# bits
704	subu	$4,$15,$12
705	sll	$2,$8,4*4	# bits
706
707	li	$8,-1
708	srl	$9,$4,4*4	# bits
709	srl	$8,4*4	# q=0xffffffff
710	beq	$3,$9,.L_bn_div_words_skip_div2
711	divu	($4,$3)
712	mfqt	($8,$4,$3)
713.L_bn_div_words_skip_div2:
714	multu	($6,$8)
715	sll	$15,$4,4*4	# bits
716	srl	$1,$5,4*4	# bits
717	or	$15,$1
718	mflo	($12,$6,$8)
719	mfhi	($13,$6,$8)
720.L_bn_div_words_inner_loop2:
721	sltu	$14,$15,$12
722	seq	$24,$9,$13
723	sltu	$1,$9,$13
724	and	$14,$24
725	sltu	$3,$12,$6
726	or	$1,$14
727	.set	noreorder
728	beqz	$1,.L_bn_div_words_inner_loop2_done
729	subu	$13,$3
730	subu	$12,$6
731	b	.L_bn_div_words_inner_loop2
732	subu	$8,1
733	.set	reorder
734.L_bn_div_words_inner_loop2_done:
735
736	subu	$4,$15,$12
737	or	$2,$8
738	srl	$3,$4,$25	# $3 contains remainder if anybody wants it
739	srl	$6,$25		# restore $6
740
741	.set	noreorder
742	move	$5,$3
743	jr	$31
744	move	$4,$2
745.end	bn_div_words_internal
746
747.align	5
748.globl	bn_mul_comba8
749.ent	bn_mul_comba8
750bn_mul_comba8:
751	.set	noreorder
752	.frame	$29,6*4,$31
753	.mask	0x003f0000,-4
754	subu $29,6*4
755	sw	$21,5*4($29)
756	sw	$20,4*4($29)
757	sw	$19,3*4($29)
758	sw	$18,2*4($29)
759	sw	$17,1*4($29)
760	sw	$16,0*4($29)
761
762	.set	reorder
763	lw	$12,0($5)	# If compiled with -mips3 option on
764				# R5000 box assembler barks on this
765				# 1ine with "should not have mult/div
766				# as last instruction in bb (R10K
767				# bug)" warning. If anybody out there
768				# has a clue about how to circumvent
769				# this do send me a note.
770				#		<appro@fy.chalmers.se>
771
772	lw	$8,0($6)
773	lw	$13,4($5)
774	lw	$14,2*4($5)
775	multu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
776	lw	$15,3*4($5)
777	lw	$9,4($6)
778	lw	$10,2*4($6)
779	lw	$11,3*4($6)
780	mflo	($2,$12,$8)
781	mfhi	($3,$12,$8)
782
783	lw	$16,4*4($5)
784	lw	$18,5*4($5)
785	multu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
786	lw	$20,6*4($5)
787	lw	$5,7*4($5)
788	lw	$17,4*4($6)
789	lw	$19,5*4($6)
790	mflo	($24,$12,$9)
791	mfhi	($25,$12,$9)
792	addu	$3,$24
793	sltu	$1,$3,$24
794	multu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
795	addu	$7,$25,$1
796	lw	$21,6*4($6)
797	lw	$6,7*4($6)
798	sw	$2,0($4)	# r[0]=c1;
799	mflo	($24,$13,$8)
800	mfhi	($25,$13,$8)
801	addu	$3,$24
802	sltu	$1,$3,$24
803	 multu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
804	addu	$25,$1
805	addu	$7,$25
806	sltu	$2,$7,$25
807	sw	$3,4($4)	# r[1]=c2;
808
809	mflo	($24,$14,$8)
810	mfhi	($25,$14,$8)
811	addu	$7,$24
812	sltu	$1,$7,$24
813	multu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
814	addu	$25,$1
815	addu	$2,$25
816	mflo	($24,$13,$9)
817	mfhi	($25,$13,$9)
818	addu	$7,$24
819	sltu	$1,$7,$24
820	multu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
821	addu	$25,$1
822	addu	$2,$25
823	sltu	$3,$2,$25
824	mflo	($24,$12,$10)
825	mfhi	($25,$12,$10)
826	addu	$7,$24
827	sltu	$1,$7,$24
828	 multu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
829	addu	$25,$1
830	addu	$2,$25
831	sltu	$1,$2,$25
832	addu	$3,$1
833	sw	$7,2*4($4)	# r[2]=c3;
834
835	mflo	($24,$12,$11)
836	mfhi	($25,$12,$11)
837	addu	$2,$24
838	sltu	$1,$2,$24
839	multu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
840	addu	$25,$1
841	addu	$3,$25
842	sltu	$7,$3,$25
843	mflo	($24,$13,$10)
844	mfhi	($25,$13,$10)
845	addu	$2,$24
846	sltu	$1,$2,$24
847	multu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
848	addu	$25,$1
849	addu	$3,$25
850	sltu	$1,$3,$25
851	addu	$7,$1
852	mflo	($24,$14,$9)
853	mfhi	($25,$14,$9)
854	addu	$2,$24
855	sltu	$1,$2,$24
856	multu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
857	addu	$25,$1
858	addu	$3,$25
859	sltu	$1,$3,$25
860	addu	$7,$1
861	mflo	($24,$15,$8)
862	mfhi	($25,$15,$8)
863	addu	$2,$24
864	sltu	$1,$2,$24
865	 multu	($16,$8)		# mul_add_c(a[4],b[0],c2,c3,c1);
866	addu	$25,$1
867	addu	$3,$25
868	sltu	$1,$3,$25
869	addu	$7,$1
870	sw	$2,3*4($4)	# r[3]=c1;
871
872	mflo	($24,$16,$8)
873	mfhi	($25,$16,$8)
874	addu	$3,$24
875	sltu	$1,$3,$24
876	multu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
877	addu	$25,$1
878	addu	$7,$25
879	sltu	$2,$7,$25
880	mflo	($24,$15,$9)
881	mfhi	($25,$15,$9)
882	addu	$3,$24
883	sltu	$1,$3,$24
884	multu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
885	addu	$25,$1
886	addu	$7,$25
887	sltu	$1,$7,$25
888	addu	$2,$1
889	mflo	($24,$14,$10)
890	mfhi	($25,$14,$10)
891	addu	$3,$24
892	sltu	$1,$3,$24
893	multu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
894	addu	$25,$1
895	addu	$7,$25
896	sltu	$1,$7,$25
897	addu	$2,$1
898	mflo	($24,$13,$11)
899	mfhi	($25,$13,$11)
900	addu	$3,$24
901	sltu	$1,$3,$24
902	multu	($12,$17)		# mul_add_c(a[0],b[4],c2,c3,c1);
903	addu	$25,$1
904	addu	$7,$25
905	sltu	$1,$7,$25
906	addu	$2,$1
907	mflo	($24,$12,$17)
908	mfhi	($25,$12,$17)
909	addu	$3,$24
910	sltu	$1,$3,$24
911	 multu	($12,$19)		# mul_add_c(a[0],b[5],c3,c1,c2);
912	addu	$25,$1
913	addu	$7,$25
914	sltu	$1,$7,$25
915	addu	$2,$1
916	sw	$3,4*4($4)	# r[4]=c2;
917
918	mflo	($24,$12,$19)
919	mfhi	($25,$12,$19)
920	addu	$7,$24
921	sltu	$1,$7,$24
922	multu	($13,$17)		# mul_add_c(a[1],b[4],c3,c1,c2);
923	addu	$25,$1
924	addu	$2,$25
925	sltu	$3,$2,$25
926	mflo	($24,$13,$17)
927	mfhi	($25,$13,$17)
928	addu	$7,$24
929	sltu	$1,$7,$24
930	multu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
931	addu	$25,$1
932	addu	$2,$25
933	sltu	$1,$2,$25
934	addu	$3,$1
935	mflo	($24,$14,$11)
936	mfhi	($25,$14,$11)
937	addu	$7,$24
938	sltu	$1,$7,$24
939	multu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
940	addu	$25,$1
941	addu	$2,$25
942	sltu	$1,$2,$25
943	addu	$3,$1
944	mflo	($24,$15,$10)
945	mfhi	($25,$15,$10)
946	addu	$7,$24
947	sltu	$1,$7,$24
948	multu	($16,$9)		# mul_add_c(a[4],b[1],c3,c1,c2);
949	addu	$25,$1
950	addu	$2,$25
951	sltu	$1,$2,$25
952	addu	$3,$1
953	mflo	($24,$16,$9)
954	mfhi	($25,$16,$9)
955	addu	$7,$24
956	sltu	$1,$7,$24
957	multu	($18,$8)		# mul_add_c(a[5],b[0],c3,c1,c2);
958	addu	$25,$1
959	addu	$2,$25
960	sltu	$1,$2,$25
961	addu	$3,$1
962	mflo	($24,$18,$8)
963	mfhi	($25,$18,$8)
964	addu	$7,$24
965	sltu	$1,$7,$24
966	 multu	($20,$8)		# mul_add_c(a[6],b[0],c1,c2,c3);
967	addu	$25,$1
968	addu	$2,$25
969	sltu	$1,$2,$25
970	addu	$3,$1
971	sw	$7,5*4($4)	# r[5]=c3;
972
973	mflo	($24,$20,$8)
974	mfhi	($25,$20,$8)
975	addu	$2,$24
976	sltu	$1,$2,$24
977	multu	($18,$9)		# mul_add_c(a[5],b[1],c1,c2,c3);
978	addu	$25,$1
979	addu	$3,$25
980	sltu	$7,$3,$25
981	mflo	($24,$18,$9)
982	mfhi	($25,$18,$9)
983	addu	$2,$24
984	sltu	$1,$2,$24
985	multu	($16,$10)		# mul_add_c(a[4],b[2],c1,c2,c3);
986	addu	$25,$1
987	addu	$3,$25
988	sltu	$1,$3,$25
989	addu	$7,$1
990	mflo	($24,$16,$10)
991	mfhi	($25,$16,$10)
992	addu	$2,$24
993	sltu	$1,$2,$24
994	multu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
995	addu	$25,$1
996	addu	$3,$25
997	sltu	$1,$3,$25
998	addu	$7,$1
999	mflo	($24,$15,$11)
1000	mfhi	($25,$15,$11)
1001	addu	$2,$24
1002	sltu	$1,$2,$24
1003	multu	($14,$17)		# mul_add_c(a[2],b[4],c1,c2,c3);
1004	addu	$25,$1
1005	addu	$3,$25
1006	sltu	$1,$3,$25
1007	addu	$7,$1
1008	mflo	($24,$14,$17)
1009	mfhi	($25,$14,$17)
1010	addu	$2,$24
1011	sltu	$1,$2,$24
1012	multu	($13,$19)		# mul_add_c(a[1],b[5],c1,c2,c3);
1013	addu	$25,$1
1014	addu	$3,$25
1015	sltu	$1,$3,$25
1016	addu	$7,$1
1017	mflo	($24,$13,$19)
1018	mfhi	($25,$13,$19)
1019	addu	$2,$24
1020	sltu	$1,$2,$24
1021	multu	($12,$21)		# mul_add_c(a[0],b[6],c1,c2,c3);
1022	addu	$25,$1
1023	addu	$3,$25
1024	sltu	$1,$3,$25
1025	addu	$7,$1
1026	mflo	($24,$12,$21)
1027	mfhi	($25,$12,$21)
1028	addu	$2,$24
1029	sltu	$1,$2,$24
1030	 multu	($12,$6)		# mul_add_c(a[0],b[7],c2,c3,c1);
1031	addu	$25,$1
1032	addu	$3,$25
1033	sltu	$1,$3,$25
1034	addu	$7,$1
1035	sw	$2,6*4($4)	# r[6]=c1;
1036
1037	mflo	($24,$12,$6)
1038	mfhi	($25,$12,$6)
1039	addu	$3,$24
1040	sltu	$1,$3,$24
1041	multu	($13,$21)		# mul_add_c(a[1],b[6],c2,c3,c1);
1042	addu	$25,$1
1043	addu	$7,$25
1044	sltu	$2,$7,$25
1045	mflo	($24,$13,$21)
1046	mfhi	($25,$13,$21)
1047	addu	$3,$24
1048	sltu	$1,$3,$24
1049	multu	($14,$19)		# mul_add_c(a[2],b[5],c2,c3,c1);
1050	addu	$25,$1
1051	addu	$7,$25
1052	sltu	$1,$7,$25
1053	addu	$2,$1
1054	mflo	($24,$14,$19)
1055	mfhi	($25,$14,$19)
1056	addu	$3,$24
1057	sltu	$1,$3,$24
1058	multu	($15,$17)		# mul_add_c(a[3],b[4],c2,c3,c1);
1059	addu	$25,$1
1060	addu	$7,$25
1061	sltu	$1,$7,$25
1062	addu	$2,$1
1063	mflo	($24,$15,$17)
1064	mfhi	($25,$15,$17)
1065	addu	$3,$24
1066	sltu	$1,$3,$24
1067	multu	($16,$11)		# mul_add_c(a[4],b[3],c2,c3,c1);
1068	addu	$25,$1
1069	addu	$7,$25
1070	sltu	$1,$7,$25
1071	addu	$2,$1
1072	mflo	($24,$16,$11)
1073	mfhi	($25,$16,$11)
1074	addu	$3,$24
1075	sltu	$1,$3,$24
1076	multu	($18,$10)		# mul_add_c(a[5],b[2],c2,c3,c1);
1077	addu	$25,$1
1078	addu	$7,$25
1079	sltu	$1,$7,$25
1080	addu	$2,$1
1081	mflo	($24,$18,$10)
1082	mfhi	($25,$18,$10)
1083	addu	$3,$24
1084	sltu	$1,$3,$24
1085	multu	($20,$9)		# mul_add_c(a[6],b[1],c2,c3,c1);
1086	addu	$25,$1
1087	addu	$7,$25
1088	sltu	$1,$7,$25
1089	addu	$2,$1
1090	mflo	($24,$20,$9)
1091	mfhi	($25,$20,$9)
1092	addu	$3,$24
1093	sltu	$1,$3,$24
1094	multu	($5,$8)		# mul_add_c(a[7],b[0],c2,c3,c1);
1095	addu	$25,$1
1096	addu	$7,$25
1097	sltu	$1,$7,$25
1098	addu	$2,$1
1099	mflo	($24,$5,$8)
1100	mfhi	($25,$5,$8)
1101	addu	$3,$24
1102	sltu	$1,$3,$24
1103	 multu	($5,$9)		# mul_add_c(a[7],b[1],c3,c1,c2);
1104	addu	$25,$1
1105	addu	$7,$25
1106	sltu	$1,$7,$25
1107	addu	$2,$1
1108	sw	$3,7*4($4)	# r[7]=c2;
1109
1110	mflo	($24,$5,$9)
1111	mfhi	($25,$5,$9)
1112	addu	$7,$24
1113	sltu	$1,$7,$24
1114	multu	($20,$10)		# mul_add_c(a[6],b[2],c3,c1,c2);
1115	addu	$25,$1
1116	addu	$2,$25
1117	sltu	$3,$2,$25
1118	mflo	($24,$20,$10)
1119	mfhi	($25,$20,$10)
1120	addu	$7,$24
1121	sltu	$1,$7,$24
1122	multu	($18,$11)		# mul_add_c(a[5],b[3],c3,c1,c2);
1123	addu	$25,$1
1124	addu	$2,$25
1125	sltu	$1,$2,$25
1126	addu	$3,$1
1127	mflo	($24,$18,$11)
1128	mfhi	($25,$18,$11)
1129	addu	$7,$24
1130	sltu	$1,$7,$24
1131	multu	($16,$17)		# mul_add_c(a[4],b[4],c3,c1,c2);
1132	addu	$25,$1
1133	addu	$2,$25
1134	sltu	$1,$2,$25
1135	addu	$3,$1
1136	mflo	($24,$16,$17)
1137	mfhi	($25,$16,$17)
1138	addu	$7,$24
1139	sltu	$1,$7,$24
1140	multu	($15,$19)		# mul_add_c(a[3],b[5],c3,c1,c2);
1141	addu	$25,$1
1142	addu	$2,$25
1143	sltu	$1,$2,$25
1144	addu	$3,$1
1145	mflo	($24,$15,$19)
1146	mfhi	($25,$15,$19)
1147	addu	$7,$24
1148	sltu	$1,$7,$24
1149	multu	($14,$21)		# mul_add_c(a[2],b[6],c3,c1,c2);
1150	addu	$25,$1
1151	addu	$2,$25
1152	sltu	$1,$2,$25
1153	addu	$3,$1
1154	mflo	($24,$14,$21)
1155	mfhi	($25,$14,$21)
1156	addu	$7,$24
1157	sltu	$1,$7,$24
1158	multu	($13,$6)		# mul_add_c(a[1],b[7],c3,c1,c2);
1159	addu	$25,$1
1160	addu	$2,$25
1161	sltu	$1,$2,$25
1162	addu	$3,$1
1163	mflo	($24,$13,$6)
1164	mfhi	($25,$13,$6)
1165	addu	$7,$24
1166	sltu	$1,$7,$24
1167	 multu	($14,$6)		# mul_add_c(a[2],b[7],c1,c2,c3);
1168	addu	$25,$1
1169	addu	$2,$25
1170	sltu	$1,$2,$25
1171	addu	$3,$1
1172	sw	$7,8*4($4)	# r[8]=c3;
1173
1174	mflo	($24,$14,$6)
1175	mfhi	($25,$14,$6)
1176	addu	$2,$24
1177	sltu	$1,$2,$24
1178	multu	($15,$21)		# mul_add_c(a[3],b[6],c1,c2,c3);
1179	addu	$25,$1
1180	addu	$3,$25
1181	sltu	$7,$3,$25
1182	mflo	($24,$15,$21)
1183	mfhi	($25,$15,$21)
1184	addu	$2,$24
1185	sltu	$1,$2,$24
1186	multu	($16,$19)		# mul_add_c(a[4],b[5],c1,c2,c3);
1187	addu	$25,$1
1188	addu	$3,$25
1189	sltu	$1,$3,$25
1190	addu	$7,$1
1191	mflo	($24,$16,$19)
1192	mfhi	($25,$16,$19)
1193	addu	$2,$24
1194	sltu	$1,$2,$24
1195	multu	($18,$17)		# mul_add_c(a[5],b[4],c1,c2,c3);
1196	addu	$25,$1
1197	addu	$3,$25
1198	sltu	$1,$3,$25
1199	addu	$7,$1
1200	mflo	($24,$18,$17)
1201	mfhi	($25,$18,$17)
1202	addu	$2,$24
1203	sltu	$1,$2,$24
1204	multu	($20,$11)		# mul_add_c(a[6],b[3],c1,c2,c3);
1205	addu	$25,$1
1206	addu	$3,$25
1207	sltu	$1,$3,$25
1208	addu	$7,$1
1209	mflo	($24,$20,$11)
1210	mfhi	($25,$20,$11)
1211	addu	$2,$24
1212	sltu	$1,$2,$24
1213	multu	($5,$10)		# mul_add_c(a[7],b[2],c1,c2,c3);
1214	addu	$25,$1
1215	addu	$3,$25
1216	sltu	$1,$3,$25
1217	addu	$7,$1
1218	mflo	($24,$5,$10)
1219	mfhi	($25,$5,$10)
1220	addu	$2,$24
1221	sltu	$1,$2,$24
1222	 multu	($5,$11)		# mul_add_c(a[7],b[3],c2,c3,c1);
1223	addu	$25,$1
1224	addu	$3,$25
1225	sltu	$1,$3,$25
1226	addu	$7,$1
1227	sw	$2,9*4($4)	# r[9]=c1;
1228
1229	mflo	($24,$5,$11)
1230	mfhi	($25,$5,$11)
1231	addu	$3,$24
1232	sltu	$1,$3,$24
1233	multu	($20,$17)		# mul_add_c(a[6],b[4],c2,c3,c1);
1234	addu	$25,$1
1235	addu	$7,$25
1236	sltu	$2,$7,$25
1237	mflo	($24,$20,$17)
1238	mfhi	($25,$20,$17)
1239	addu	$3,$24
1240	sltu	$1,$3,$24
1241	multu	($18,$19)		# mul_add_c(a[5],b[5],c2,c3,c1);
1242	addu	$25,$1
1243	addu	$7,$25
1244	sltu	$1,$7,$25
1245	addu	$2,$1
1246	mflo	($24,$18,$19)
1247	mfhi	($25,$18,$19)
1248	addu	$3,$24
1249	sltu	$1,$3,$24
1250	multu	($16,$21)		# mul_add_c(a[4],b[6],c2,c3,c1);
1251	addu	$25,$1
1252	addu	$7,$25
1253	sltu	$1,$7,$25
1254	addu	$2,$1
1255	mflo	($24,$16,$21)
1256	mfhi	($25,$16,$21)
1257	addu	$3,$24
1258	sltu	$1,$3,$24
1259	multu	($15,$6)		# mul_add_c(a[3],b[7],c2,c3,c1);
1260	addu	$25,$1
1261	addu	$7,$25
1262	sltu	$1,$7,$25
1263	addu	$2,$1
1264	mflo	($24,$15,$6)
1265	mfhi	($25,$15,$6)
1266	addu	$3,$24
1267	sltu	$1,$3,$24
1268	multu	($16,$6)		# mul_add_c(a[4],b[7],c3,c1,c2);
1269	addu	$25,$1
1270	addu	$7,$25
1271	sltu	$1,$7,$25
1272	addu	$2,$1
1273	sw	$3,10*4($4)	# r[10]=c2;
1274
1275	mflo	($24,$16,$6)
1276	mfhi	($25,$16,$6)
1277	addu	$7,$24
1278	sltu	$1,$7,$24
1279	multu	($18,$21)		# mul_add_c(a[5],b[6],c3,c1,c2);
1280	addu	$25,$1
1281	addu	$2,$25
1282	sltu	$3,$2,$25
1283	mflo	($24,$18,$21)
1284	mfhi	($25,$18,$21)
1285	addu	$7,$24
1286	sltu	$1,$7,$24
1287	multu	($20,$19)		# mul_add_c(a[6],b[5],c3,c1,c2);
1288	addu	$25,$1
1289	addu	$2,$25
1290	sltu	$1,$2,$25
1291	addu	$3,$1
1292	mflo	($24,$20,$19)
1293	mfhi	($25,$20,$19)
1294	addu	$7,$24
1295	sltu	$1,$7,$24
1296	multu	($5,$17)		# mul_add_c(a[7],b[4],c3,c1,c2);
1297	addu	$25,$1
1298	addu	$2,$25
1299	sltu	$1,$2,$25
1300	addu	$3,$1
1301	mflo	($24,$5,$17)
1302	mfhi	($25,$5,$17)
1303	addu	$7,$24
1304	sltu	$1,$7,$24
1305	 multu	($5,$19)		# mul_add_c(a[7],b[5],c1,c2,c3);
1306	addu	$25,$1
1307	addu	$2,$25
1308	sltu	$1,$2,$25
1309	addu	$3,$1
1310	sw	$7,11*4($4)	# r[11]=c3;
1311
1312	mflo	($24,$5,$19)
1313	mfhi	($25,$5,$19)
1314	addu	$2,$24
1315	sltu	$1,$2,$24
1316	multu	($20,$21)		# mul_add_c(a[6],b[6],c1,c2,c3);
1317	addu	$25,$1
1318	addu	$3,$25
1319	sltu	$7,$3,$25
1320	mflo	($24,$20,$21)
1321	mfhi	($25,$20,$21)
1322	addu	$2,$24
1323	sltu	$1,$2,$24
1324	multu	($18,$6)		# mul_add_c(a[5],b[7],c1,c2,c3);
1325	addu	$25,$1
1326	addu	$3,$25
1327	sltu	$1,$3,$25
1328	addu	$7,$1
1329	mflo	($24,$18,$6)
1330	mfhi	($25,$18,$6)
1331	addu	$2,$24
1332	sltu	$1,$2,$24
1333	 multu	($20,$6)		# mul_add_c(a[6],b[7],c2,c3,c1);
1334	addu	$25,$1
1335	addu	$3,$25
1336	sltu	$1,$3,$25
1337	addu	$7,$1
1338	sw	$2,12*4($4)	# r[12]=c1;
1339
1340	mflo	($24,$20,$6)
1341	mfhi	($25,$20,$6)
1342	addu	$3,$24
1343	sltu	$1,$3,$24
1344	multu	($5,$21)		# mul_add_c(a[7],b[6],c2,c3,c1);
1345	addu	$25,$1
1346	addu	$7,$25
1347	sltu	$2,$7,$25
1348	mflo	($24,$5,$21)
1349	mfhi	($25,$5,$21)
1350	addu	$3,$24
1351	sltu	$1,$3,$24
1352	multu	($5,$6)		# mul_add_c(a[7],b[7],c3,c1,c2);
1353	addu	$25,$1
1354	addu	$7,$25
1355	sltu	$1,$7,$25
1356	addu	$2,$1
1357	sw	$3,13*4($4)	# r[13]=c2;
1358
1359	mflo	($24,$5,$6)
1360	mfhi	($25,$5,$6)
1361	addu	$7,$24
1362	sltu	$1,$7,$24
1363	addu	$25,$1
1364	addu	$2,$25
1365	sw	$7,14*4($4)	# r[14]=c3;
1366	sw	$2,15*4($4)	# r[15]=c1;
1367
1368	.set	noreorder
1369	lw	$21,5*4($29)
1370	lw	$20,4*4($29)
1371	lw	$19,3*4($29)
1372	lw	$18,2*4($29)
1373	lw	$17,1*4($29)
1374	lw	$16,0*4($29)
1375	jr	$31
1376	addu $29,6*4
1377.end	bn_mul_comba8
1378
1379.align	5
1380.globl	bn_mul_comba4
1381.ent	bn_mul_comba4
1382bn_mul_comba4:
1383	.set	reorder
1384	lw	$12,0($5)
1385	lw	$8,0($6)
1386	lw	$13,4($5)
1387	lw	$14,2*4($5)
1388	multu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
1389	lw	$15,3*4($5)
1390	lw	$9,4($6)
1391	lw	$10,2*4($6)
1392	lw	$11,3*4($6)
1393	mflo	($2,$12,$8)
1394	mfhi	($3,$12,$8)
1395	sw	$2,0($4)
1396
1397	multu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
1398	mflo	($24,$12,$9)
1399	mfhi	($25,$12,$9)
1400	addu	$3,$24
1401	sltu	$1,$3,$24
1402	multu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
1403	addu	$7,$25,$1
1404	mflo	($24,$13,$8)
1405	mfhi	($25,$13,$8)
1406	addu	$3,$24
1407	sltu	$1,$3,$24
1408	 multu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
1409	addu	$25,$1
1410	addu	$7,$25
1411	sltu	$2,$7,$25
1412	sw	$3,4($4)
1413
1414	mflo	($24,$14,$8)
1415	mfhi	($25,$14,$8)
1416	addu	$7,$24
1417	sltu	$1,$7,$24
1418	multu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
1419	addu	$25,$1
1420	addu	$2,$25
1421	mflo	($24,$13,$9)
1422	mfhi	($25,$13,$9)
1423	addu	$7,$24
1424	sltu	$1,$7,$24
1425	multu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
1426	addu	$25,$1
1427	addu	$2,$25
1428	sltu	$3,$2,$25
1429	mflo	($24,$12,$10)
1430	mfhi	($25,$12,$10)
1431	addu	$7,$24
1432	sltu	$1,$7,$24
1433	 multu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
1434	addu	$25,$1
1435	addu	$2,$25
1436	sltu	$1,$2,$25
1437	addu	$3,$1
1438	sw	$7,2*4($4)
1439
1440	mflo	($24,$12,$11)
1441	mfhi	($25,$12,$11)
1442	addu	$2,$24
1443	sltu	$1,$2,$24
1444	multu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
1445	addu	$25,$1
1446	addu	$3,$25
1447	sltu	$7,$3,$25
1448	mflo	($24,$13,$10)
1449	mfhi	($25,$13,$10)
1450	addu	$2,$24
1451	sltu	$1,$2,$24
1452	multu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
1453	addu	$25,$1
1454	addu	$3,$25
1455	sltu	$1,$3,$25
1456	addu	$7,$1
1457	mflo	($24,$14,$9)
1458	mfhi	($25,$14,$9)
1459	addu	$2,$24
1460	sltu	$1,$2,$24
1461	multu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
1462	addu	$25,$1
1463	addu	$3,$25
1464	sltu	$1,$3,$25
1465	addu	$7,$1
1466	mflo	($24,$15,$8)
1467	mfhi	($25,$15,$8)
1468	addu	$2,$24
1469	sltu	$1,$2,$24
1470	 multu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
1471	addu	$25,$1
1472	addu	$3,$25
1473	sltu	$1,$3,$25
1474	addu	$7,$1
1475	sw	$2,3*4($4)
1476
1477	mflo	($24,$15,$9)
1478	mfhi	($25,$15,$9)
1479	addu	$3,$24
1480	sltu	$1,$3,$24
1481	multu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
1482	addu	$25,$1
1483	addu	$7,$25
1484	sltu	$2,$7,$25
1485	mflo	($24,$14,$10)
1486	mfhi	($25,$14,$10)
1487	addu	$3,$24
1488	sltu	$1,$3,$24
1489	multu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
1490	addu	$25,$1
1491	addu	$7,$25
1492	sltu	$1,$7,$25
1493	addu	$2,$1
1494	mflo	($24,$13,$11)
1495	mfhi	($25,$13,$11)
1496	addu	$3,$24
1497	sltu	$1,$3,$24
1498	 multu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
1499	addu	$25,$1
1500	addu	$7,$25
1501	sltu	$1,$7,$25
1502	addu	$2,$1
1503	sw	$3,4*4($4)
1504
1505	mflo	($24,$14,$11)
1506	mfhi	($25,$14,$11)
1507	addu	$7,$24
1508	sltu	$1,$7,$24
1509	multu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
1510	addu	$25,$1
1511	addu	$2,$25
1512	sltu	$3,$2,$25
1513	mflo	($24,$15,$10)
1514	mfhi	($25,$15,$10)
1515	addu	$7,$24
1516	sltu	$1,$7,$24
1517	 multu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
1518	addu	$25,$1
1519	addu	$2,$25
1520	sltu	$1,$2,$25
1521	addu	$3,$1
1522	sw	$7,5*4($4)
1523
1524	mflo	($24,$15,$11)
1525	mfhi	($25,$15,$11)
1526	addu	$2,$24
1527	sltu	$1,$2,$24
1528	addu	$25,$1
1529	addu	$3,$25
1530	sw	$2,6*4($4)
1531	sw	$3,7*4($4)
1532
1533	.set	noreorder
1534	jr	$31
1535	nop
1536.end	bn_mul_comba4
1537
1538.align	5
1539.globl	bn_sqr_comba8
1540.ent	bn_sqr_comba8
1541bn_sqr_comba8:
1542	.set	reorder
1543	lw	$12,0($5)
1544	lw	$13,4($5)
1545	lw	$14,2*4($5)
1546	lw	$15,3*4($5)
1547
1548	multu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
1549	lw	$8,4*4($5)
1550	lw	$9,5*4($5)
1551	lw	$10,6*4($5)
1552	lw	$11,7*4($5)
1553	mflo	($2,$12,$12)
1554	mfhi	($3,$12,$12)
1555	sw	$2,0($4)
1556
1557	multu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
1558	mflo	($24,$12,$13)
1559	mfhi	($25,$12,$13)
1560	slt	$2,$25,$0
1561	sll	$25,1
1562	 multu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
1563	slt	$6,$24,$0
1564	addu	$25,$6
1565	sll	$24,1
1566	addu	$3,$24
1567	sltu	$1,$3,$24
1568	addu	$7,$25,$1
1569	sw	$3,4($4)
1570	mflo	($24,$14,$12)
1571	mfhi	($25,$14,$12)
1572	addu	$7,$24
1573	sltu	$1,$7,$24
1574	 multu	($13,$13)		# forward multiplication
1575	addu	$7,$24
1576	addu	$1,$25
1577	sltu	$24,$7,$24
1578	addu	$2,$1
1579	addu	$25,$24
1580	sltu	$3,$2,$1
1581	addu	$2,$25
1582	sltu	$25,$2,$25
1583	addu	$3,$25
1584	mflo	($24,$13,$13)
1585	mfhi	($25,$13,$13)
1586	addu	$7,$24
1587	sltu	$1,$7,$24
1588	 multu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
1589	addu	$25,$1
1590	addu	$2,$25
1591	sltu	$1,$2,$25
1592	addu	$3,$1
1593	sw	$7,2*4($4)
1594	mflo	($24,$12,$15)
1595	mfhi	($25,$12,$15)
1596	addu	$2,$24
1597	sltu	$1,$2,$24
1598	 multu	($13,$14)		# forward multiplication
1599	addu	$2,$24
1600	addu	$1,$25
1601	sltu	$24,$2,$24
1602	addu	$3,$1
1603	addu	$25,$24
1604	sltu	$7,$3,$1
1605	addu	$3,$25
1606	sltu	$25,$3,$25
1607	addu	$7,$25
1608	mflo	($24,$13,$14)
1609	mfhi	($25,$13,$14)
1610	addu	$2,$24
1611	sltu	$1,$2,$24
1612	 multu	($8,$12)		# forward multiplication
1613	addu	$2,$24
1614	addu	$1,$25
1615	sltu	$24,$2,$24
1616	addu	$3,$1
1617	addu	$25,$24
1618	sltu	$1,$3,$1
1619	addu	$3,$25
1620	addu	$7,$1
1621	sltu	$25,$3,$25
1622	addu	$7,$25
1623	mflo	($24,$8,$12)
1624	mfhi	($25,$8,$12)
1625	sw	$2,3*4($4)
1626	addu	$3,$24
1627	sltu	$1,$3,$24
1628	 multu	($15,$13)		# forward multiplication
1629	addu	$3,$24
1630	addu	$1,$25
1631	sltu	$24,$3,$24
1632	addu	$7,$1
1633	addu	$25,$24
1634	sltu	$2,$7,$1
1635	addu	$7,$25
1636	sltu	$25,$7,$25
1637	addu	$2,$25
1638	mflo	($24,$15,$13)
1639	mfhi	($25,$15,$13)
1640	addu	$3,$24
1641	sltu	$1,$3,$24
1642	 multu	($14,$14)		# forward multiplication
1643	addu	$3,$24
1644	addu	$1,$25
1645	sltu	$24,$3,$24
1646	addu	$7,$1
1647	addu	$25,$24
1648	sltu	$1,$7,$1
1649	addu	$7,$25
1650	addu	$2,$1
1651	sltu	$25,$7,$25
1652	addu	$2,$25
1653	mflo	($24,$14,$14)
1654	mfhi	($25,$14,$14)
1655	addu	$3,$24
1656	sltu	$1,$3,$24
1657	 multu	($12,$9)		# mul_add_c2(a[0],b[5],c3,c1,c2);
1658	addu	$25,$1
1659	addu	$7,$25
1660	sltu	$1,$7,$25
1661	addu	$2,$1
1662	sw	$3,4*4($4)
1663	mflo	($24,$12,$9)
1664	mfhi	($25,$12,$9)
1665	addu	$7,$24
1666	sltu	$1,$7,$24
1667	 multu	($13,$8)		# forward multiplication
1668	addu	$7,$24
1669	addu	$1,$25
1670	sltu	$24,$7,$24
1671	addu	$2,$1
1672	addu	$25,$24
1673	sltu	$3,$2,$1
1674	addu	$2,$25
1675	sltu	$25,$2,$25
1676	addu	$3,$25
1677	mflo	($24,$13,$8)
1678	mfhi	($25,$13,$8)
1679	addu	$7,$24
1680	sltu	$1,$7,$24
1681	 multu	($14,$15)		# forward multiplication
1682	addu	$7,$24
1683	addu	$1,$25
1684	sltu	$24,$7,$24
1685	addu	$2,$1
1686	addu	$25,$24
1687	sltu	$1,$2,$1
1688	addu	$2,$25
1689	addu	$3,$1
1690	sltu	$25,$2,$25
1691	addu	$3,$25
1692	mflo	($24,$14,$15)
1693	mfhi	($25,$14,$15)
1694	addu	$7,$24
1695	sltu	$1,$7,$24
1696	 multu	($10,$12)		# forward multiplication
1697	addu	$7,$24
1698	addu	$1,$25
1699	sltu	$24,$7,$24
1700	addu	$2,$1
1701	addu	$25,$24
1702	sltu	$1,$2,$1
1703	addu	$2,$25
1704	addu	$3,$1
1705	sltu	$25,$2,$25
1706	addu	$3,$25
1707	mflo	($24,$10,$12)
1708	mfhi	($25,$10,$12)
1709	sw	$7,5*4($4)
1710	addu	$2,$24
1711	sltu	$1,$2,$24
1712	 multu	($9,$13)		# forward multiplication
1713	addu	$2,$24
1714	addu	$1,$25
1715	sltu	$24,$2,$24
1716	addu	$3,$1
1717	addu	$25,$24
1718	sltu	$7,$3,$1
1719	addu	$3,$25
1720	sltu	$25,$3,$25
1721	addu	$7,$25
1722	mflo	($24,$9,$13)
1723	mfhi	($25,$9,$13)
1724	addu	$2,$24
1725	sltu	$1,$2,$24
1726	 multu	($8,$14)		# forward multiplication
1727	addu	$2,$24
1728	addu	$1,$25
1729	sltu	$24,$2,$24
1730	addu	$3,$1
1731	addu	$25,$24
1732	sltu	$1,$3,$1
1733	addu	$3,$25
1734	addu	$7,$1
1735	sltu	$25,$3,$25
1736	addu	$7,$25
1737	mflo	($24,$8,$14)
1738	mfhi	($25,$8,$14)
1739	addu	$2,$24
1740	sltu	$1,$2,$24
1741	 multu	($15,$15)		# forward multiplication
1742	addu	$2,$24
1743	addu	$1,$25
1744	sltu	$24,$2,$24
1745	addu	$3,$1
1746	addu	$25,$24
1747	sltu	$1,$3,$1
1748	addu	$3,$25
1749	addu	$7,$1
1750	sltu	$25,$3,$25
1751	addu	$7,$25
1752	mflo	($24,$15,$15)
1753	mfhi	($25,$15,$15)
1754	addu	$2,$24
1755	sltu	$1,$2,$24
1756	 multu	($12,$11)		# mul_add_c2(a[0],b[7],c2,c3,c1);
1757	addu	$25,$1
1758	addu	$3,$25
1759	sltu	$1,$3,$25
1760	addu	$7,$1
1761	sw	$2,6*4($4)
1762	mflo	($24,$12,$11)
1763	mfhi	($25,$12,$11)
1764	addu	$3,$24
1765	sltu	$1,$3,$24
1766	 multu	($13,$10)		# forward multiplication
1767	addu	$3,$24
1768	addu	$1,$25
1769	sltu	$24,$3,$24
1770	addu	$7,$1
1771	addu	$25,$24
1772	sltu	$2,$7,$1
1773	addu	$7,$25
1774	sltu	$25,$7,$25
1775	addu	$2,$25
1776	mflo	($24,$13,$10)
1777	mfhi	($25,$13,$10)
1778	addu	$3,$24
1779	sltu	$1,$3,$24
1780	 multu	($14,$9)		# forward multiplication
1781	addu	$3,$24
1782	addu	$1,$25
1783	sltu	$24,$3,$24
1784	addu	$7,$1
1785	addu	$25,$24
1786	sltu	$1,$7,$1
1787	addu	$7,$25
1788	addu	$2,$1
1789	sltu	$25,$7,$25
1790	addu	$2,$25
1791	mflo	($24,$14,$9)
1792	mfhi	($25,$14,$9)
1793	addu	$3,$24
1794	sltu	$1,$3,$24
1795	 multu	($15,$8)		# forward multiplication
1796	addu	$3,$24
1797	addu	$1,$25
1798	sltu	$24,$3,$24
1799	addu	$7,$1
1800	addu	$25,$24
1801	sltu	$1,$7,$1
1802	addu	$7,$25
1803	addu	$2,$1
1804	sltu	$25,$7,$25
1805	addu	$2,$25
1806	mflo	($24,$15,$8)
1807	mfhi	($25,$15,$8)
1808	addu	$3,$24
1809	sltu	$1,$3,$24
1810	 multu	($11,$13)		# forward multiplication
1811	addu	$3,$24
1812	addu	$1,$25
1813	sltu	$24,$3,$24
1814	addu	$7,$1
1815	addu	$25,$24
1816	sltu	$1,$7,$1
1817	addu	$7,$25
1818	addu	$2,$1
1819	sltu	$25,$7,$25
1820	addu	$2,$25
1821	mflo	($24,$11,$13)
1822	mfhi	($25,$11,$13)
1823	sw	$3,7*4($4)
1824	addu	$7,$24
1825	sltu	$1,$7,$24
1826	 multu	($10,$14)		# forward multiplication
1827	addu	$7,$24
1828	addu	$1,$25
1829	sltu	$24,$7,$24
1830	addu	$2,$1
1831	addu	$25,$24
1832	sltu	$3,$2,$1
1833	addu	$2,$25
1834	sltu	$25,$2,$25
1835	addu	$3,$25
1836	mflo	($24,$10,$14)
1837	mfhi	($25,$10,$14)
1838	addu	$7,$24
1839	sltu	$1,$7,$24
1840	 multu	($9,$15)		# forward multiplication
1841	addu	$7,$24
1842	addu	$1,$25
1843	sltu	$24,$7,$24
1844	addu	$2,$1
1845	addu	$25,$24
1846	sltu	$1,$2,$1
1847	addu	$2,$25
1848	addu	$3,$1
1849	sltu	$25,$2,$25
1850	addu	$3,$25
1851	mflo	($24,$9,$15)
1852	mfhi	($25,$9,$15)
1853	addu	$7,$24
1854	sltu	$1,$7,$24
1855	 multu	($8,$8)		# forward multiplication
1856	addu	$7,$24
1857	addu	$1,$25
1858	sltu	$24,$7,$24
1859	addu	$2,$1
1860	addu	$25,$24
1861	sltu	$1,$2,$1
1862	addu	$2,$25
1863	addu	$3,$1
1864	sltu	$25,$2,$25
1865	addu	$3,$25
1866	mflo	($24,$8,$8)
1867	mfhi	($25,$8,$8)
1868	addu	$7,$24
1869	sltu	$1,$7,$24
1870	 multu	($14,$11)		# mul_add_c2(a[2],b[7],c1,c2,c3);
1871	addu	$25,$1
1872	addu	$2,$25
1873	sltu	$1,$2,$25
1874	addu	$3,$1
1875	sw	$7,8*4($4)
1876	mflo	($24,$14,$11)
1877	mfhi	($25,$14,$11)
1878	addu	$2,$24
1879	sltu	$1,$2,$24
1880	 multu	($15,$10)		# forward multiplication
1881	addu	$2,$24
1882	addu	$1,$25
1883	sltu	$24,$2,$24
1884	addu	$3,$1
1885	addu	$25,$24
1886	sltu	$7,$3,$1
1887	addu	$3,$25
1888	sltu	$25,$3,$25
1889	addu	$7,$25
1890	mflo	($24,$15,$10)
1891	mfhi	($25,$15,$10)
1892	addu	$2,$24
1893	sltu	$1,$2,$24
1894	 multu	($8,$9)		# forward multiplication
1895	addu	$2,$24
1896	addu	$1,$25
1897	sltu	$24,$2,$24
1898	addu	$3,$1
1899	addu	$25,$24
1900	sltu	$1,$3,$1
1901	addu	$3,$25
1902	addu	$7,$1
1903	sltu	$25,$3,$25
1904	addu	$7,$25
1905	mflo	($24,$8,$9)
1906	mfhi	($25,$8,$9)
1907	addu	$2,$24
1908	sltu	$1,$2,$24
1909	 multu	($11,$15)		# forward multiplication
1910	addu	$2,$24
1911	addu	$1,$25
1912	sltu	$24,$2,$24
1913	addu	$3,$1
1914	addu	$25,$24
1915	sltu	$1,$3,$1
1916	addu	$3,$25
1917	addu	$7,$1
1918	sltu	$25,$3,$25
1919	addu	$7,$25
1920	mflo	($24,$11,$15)
1921	mfhi	($25,$11,$15)
1922	sw	$2,9*4($4)
1923	addu	$3,$24
1924	sltu	$1,$3,$24
1925	 multu	($10,$8)		# forward multiplication
1926	addu	$3,$24
1927	addu	$1,$25
1928	sltu	$24,$3,$24
1929	addu	$7,$1
1930	addu	$25,$24
1931	sltu	$2,$7,$1
1932	addu	$7,$25
1933	sltu	$25,$7,$25
1934	addu	$2,$25
1935	mflo	($24,$10,$8)
1936	mfhi	($25,$10,$8)
1937	addu	$3,$24
1938	sltu	$1,$3,$24
1939	 multu	($9,$9)		# forward multiplication
1940	addu	$3,$24
1941	addu	$1,$25
1942	sltu	$24,$3,$24
1943	addu	$7,$1
1944	addu	$25,$24
1945	sltu	$1,$7,$1
1946	addu	$7,$25
1947	addu	$2,$1
1948	sltu	$25,$7,$25
1949	addu	$2,$25
1950	mflo	($24,$9,$9)
1951	mfhi	($25,$9,$9)
1952	addu	$3,$24
1953	sltu	$1,$3,$24
1954	 multu	($8,$11)		# mul_add_c2(a[4],b[7],c3,c1,c2);
1955	addu	$25,$1
1956	addu	$7,$25
1957	sltu	$1,$7,$25
1958	addu	$2,$1
1959	sw	$3,10*4($4)
1960	mflo	($24,$8,$11)
1961	mfhi	($25,$8,$11)
1962	addu	$7,$24
1963	sltu	$1,$7,$24
1964	 multu	($9,$10)		# forward multiplication
1965	addu	$7,$24
1966	addu	$1,$25
1967	sltu	$24,$7,$24
1968	addu	$2,$1
1969	addu	$25,$24
1970	sltu	$3,$2,$1
1971	addu	$2,$25
1972	sltu	$25,$2,$25
1973	addu	$3,$25
1974	mflo	($24,$9,$10)
1975	mfhi	($25,$9,$10)
1976	addu	$7,$24
1977	sltu	$1,$7,$24
1978	 multu	($11,$9)		# forward multiplication
1979	addu	$7,$24
1980	addu	$1,$25
1981	sltu	$24,$7,$24
1982	addu	$2,$1
1983	addu	$25,$24
1984	sltu	$1,$2,$1
1985	addu	$2,$25
1986	addu	$3,$1
1987	sltu	$25,$2,$25
1988	addu	$3,$25
1989	mflo	($24,$11,$9)
1990	mfhi	($25,$11,$9)
1991	sw	$7,11*4($4)
1992	addu	$2,$24
1993	sltu	$1,$2,$24
1994	 multu	($10,$10)		# forward multiplication
1995	addu	$2,$24
1996	addu	$1,$25
1997	sltu	$24,$2,$24
1998	addu	$3,$1
1999	addu	$25,$24
2000	sltu	$7,$3,$1
2001	addu	$3,$25
2002	sltu	$25,$3,$25
2003	addu	$7,$25
2004	mflo	($24,$10,$10)
2005	mfhi	($25,$10,$10)
2006	addu	$2,$24
2007	sltu	$1,$2,$24
2008	 multu	($10,$11)		# mul_add_c2(a[6],b[7],c2,c3,c1);
2009	addu	$25,$1
2010	addu	$3,$25
2011	sltu	$1,$3,$25
2012	addu	$7,$1
2013	sw	$2,12*4($4)
2014	mflo	($24,$10,$11)
2015	mfhi	($25,$10,$11)
2016	addu	$3,$24
2017	sltu	$1,$3,$24
2018	 multu	($11,$11)		# forward multiplication
2019	addu	$3,$24
2020	addu	$1,$25
2021	sltu	$24,$3,$24
2022	addu	$7,$1
2023	addu	$25,$24
2024	sltu	$2,$7,$1
2025	addu	$7,$25
2026	sltu	$25,$7,$25
2027	addu	$2,$25
2028	mflo	($24,$11,$11)
2029	mfhi	($25,$11,$11)
2030	sw	$3,13*4($4)
2031
2032	addu	$7,$24
2033	sltu	$1,$7,$24
2034	addu	$25,$1
2035	addu	$2,$25
2036	sw	$7,14*4($4)
2037	sw	$2,15*4($4)
2038
2039	.set	noreorder
2040	jr	$31
2041	nop
2042.end	bn_sqr_comba8
2043
2044.align	5
2045.globl	bn_sqr_comba4
2046.ent	bn_sqr_comba4
2047bn_sqr_comba4:
2048	.set	reorder
2049	lw	$12,0($5)
2050	lw	$13,4($5)
2051	multu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
2052	lw	$14,2*4($5)
2053	lw	$15,3*4($5)
2054	mflo	($2,$12,$12)
2055	mfhi	($3,$12,$12)
2056	sw	$2,0($4)
2057
2058	multu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
2059	mflo	($24,$12,$13)
2060	mfhi	($25,$12,$13)
2061	slt	$2,$25,$0
2062	sll	$25,1
2063	 multu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
2064	slt	$6,$24,$0
2065	addu	$25,$6
2066	sll	$24,1
2067	addu	$3,$24
2068	sltu	$1,$3,$24
2069	addu	$7,$25,$1
2070	sw	$3,4($4)
2071	mflo	($24,$14,$12)
2072	mfhi	($25,$14,$12)
2073	addu	$7,$24
2074	sltu	$1,$7,$24
2075	 multu	($13,$13)		# forward multiplication
2076	addu	$7,$24
2077	addu	$1,$25
2078	sltu	$24,$7,$24
2079	addu	$2,$1
2080	addu	$25,$24
2081	sltu	$3,$2,$1
2082	addu	$2,$25
2083	sltu	$25,$2,$25
2084	addu	$3,$25
2085	mflo	($24,$13,$13)
2086	mfhi	($25,$13,$13)
2087	addu	$7,$24
2088	sltu	$1,$7,$24
2089	 multu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
2090	addu	$25,$1
2091	addu	$2,$25
2092	sltu	$1,$2,$25
2093	addu	$3,$1
2094	sw	$7,2*4($4)
2095	mflo	($24,$12,$15)
2096	mfhi	($25,$12,$15)
2097	addu	$2,$24
2098	sltu	$1,$2,$24
2099	 multu	($13,$14)		# forward multiplication
2100	addu	$2,$24
2101	addu	$1,$25
2102	sltu	$24,$2,$24
2103	addu	$3,$1
2104	addu	$25,$24
2105	sltu	$7,$3,$1
2106	addu	$3,$25
2107	sltu	$25,$3,$25
2108	addu	$7,$25
2109	mflo	($24,$13,$14)
2110	mfhi	($25,$13,$14)
2111	addu	$2,$24
2112	sltu	$1,$2,$24
2113	 multu	($15,$13)		# forward multiplication
2114	addu	$2,$24
2115	addu	$1,$25
2116	sltu	$24,$2,$24
2117	addu	$3,$1
2118	addu	$25,$24
2119	sltu	$1,$3,$1
2120	addu	$3,$25
2121	addu	$7,$1
2122	sltu	$25,$3,$25
2123	addu	$7,$25
2124	mflo	($24,$15,$13)
2125	mfhi	($25,$15,$13)
2126	sw	$2,3*4($4)
2127	addu	$3,$24
2128	sltu	$1,$3,$24
2129	 multu	($14,$14)		# forward multiplication
2130	addu	$3,$24
2131	addu	$1,$25
2132	sltu	$24,$3,$24
2133	addu	$7,$1
2134	addu	$25,$24
2135	sltu	$2,$7,$1
2136	addu	$7,$25
2137	sltu	$25,$7,$25
2138	addu	$2,$25
2139	mflo	($24,$14,$14)
2140	mfhi	($25,$14,$14)
2141	addu	$3,$24
2142	sltu	$1,$3,$24
2143	 multu	($14,$15)		# mul_add_c2(a[2],b[3],c3,c1,c2);
2144	addu	$25,$1
2145	addu	$7,$25
2146	sltu	$1,$7,$25
2147	addu	$2,$1
2148	sw	$3,4*4($4)
2149	mflo	($24,$14,$15)
2150	mfhi	($25,$14,$15)
2151	addu	$7,$24
2152	sltu	$1,$7,$24
2153	 multu	($15,$15)		# forward multiplication
2154	addu	$7,$24
2155	addu	$1,$25
2156	sltu	$24,$7,$24
2157	addu	$2,$1
2158	addu	$25,$24
2159	sltu	$3,$2,$1
2160	addu	$2,$25
2161	sltu	$25,$2,$25
2162	addu	$3,$25
2163	mflo	($24,$15,$15)
2164	mfhi	($25,$15,$15)
2165	sw	$7,5*4($4)
2166
2167	addu	$2,$24
2168	sltu	$1,$2,$24
2169	addu	$25,$1
2170	addu	$3,$25
2171	sw	$2,6*4($4)
2172	sw	$3,7*4($4)
2173
2174	.set	noreorder
2175	jr	$31
2176	nop
2177.end	bn_sqr_comba4
2178