xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/mips.S (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1.set	mips2
2.rdata
3.asciiz	"mips3.s, Version 1.2"
4.asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
5
6.text
7.set	noat
8
9.align	5
10.globl	bn_mul_add_words
11.ent	bn_mul_add_words
12bn_mul_add_words:
13	.set	noreorder
14	bgtz	$6,bn_mul_add_words_internal
15	move	$2,$0
16	jr	$31
17	move	$4,$2
18.end	bn_mul_add_words
19
20.align	5
21.ent	bn_mul_add_words_internal
22bn_mul_add_words_internal:
23	.set	reorder
24	li	$3,-4
25	and	$8,$6,$3
26	beqz	$8,.L_bn_mul_add_words_tail
27
28.L_bn_mul_add_words_loop:
29	lw	$12,0($5)
30	multu	$12,$7
31	lw	$13,0($4)
32	lw	$14,4($5)
33	lw	$15,4($4)
34	lw	$8,2*4($5)
35	lw	$9,2*4($4)
36	addu	$13,$2
37	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
38				# values", but it seems to work fine
39				# even on 64-bit registers.
40	mflo	$1
41	mfhi	$12
42	addu	$13,$1
43	addu	$2,$12
44	 multu	$14,$7
45	sltu	$1,$13,$1
46	sw	$13,0($4)
47	addu	$2,$1
48
49	lw	$10,3*4($5)
50	lw	$11,3*4($4)
51	addu	$15,$2
52	sltu	$2,$15,$2
53	mflo	$1
54	mfhi	$14
55	addu	$15,$1
56	addu	$2,$14
57	 multu	$8,$7
58	sltu	$1,$15,$1
59	sw	$15,4($4)
60	addu	$2,$1
61
62	subu	$6,4
63	addu $4,4*4
64	addu $5,4*4
65	addu	$9,$2
66	sltu	$2,$9,$2
67	mflo	$1
68	mfhi	$8
69	addu	$9,$1
70	addu	$2,$8
71	 multu	$10,$7
72	sltu	$1,$9,$1
73	sw	$9,-2*4($4)
74	addu	$2,$1
75
76
77	and	$8,$6,$3
78	addu	$11,$2
79	sltu	$2,$11,$2
80	mflo	$1
81	mfhi	$10
82	addu	$11,$1
83	addu	$2,$10
84	sltu	$1,$11,$1
85	sw	$11,-4($4)
86	.set	noreorder
87	bgtz	$8,.L_bn_mul_add_words_loop
88	addu	$2,$1
89
90	beqz	$6,.L_bn_mul_add_words_return
91	nop
92
93.L_bn_mul_add_words_tail:
94	.set	reorder
95	lw	$12,0($5)
96	multu	$12,$7
97	lw	$13,0($4)
98	subu	$6,1
99	addu	$13,$2
100	sltu	$2,$13,$2
101	mflo	$1
102	mfhi	$12
103	addu	$13,$1
104	addu	$2,$12
105	sltu	$1,$13,$1
106	sw	$13,0($4)
107	addu	$2,$1
108	beqz	$6,.L_bn_mul_add_words_return
109
110	lw	$12,4($5)
111	multu	$12,$7
112	lw	$13,4($4)
113	subu	$6,1
114	addu	$13,$2
115	sltu	$2,$13,$2
116	mflo	$1
117	mfhi	$12
118	addu	$13,$1
119	addu	$2,$12
120	sltu	$1,$13,$1
121	sw	$13,4($4)
122	addu	$2,$1
123	beqz	$6,.L_bn_mul_add_words_return
124
125	lw	$12,2*4($5)
126	multu	$12,$7
127	lw	$13,2*4($4)
128	addu	$13,$2
129	sltu	$2,$13,$2
130	mflo	$1
131	mfhi	$12
132	addu	$13,$1
133	addu	$2,$12
134	sltu	$1,$13,$1
135	sw	$13,2*4($4)
136	addu	$2,$1
137
138.L_bn_mul_add_words_return:
139	.set	noreorder
140	jr	$31
141	move	$4,$2
142.end	bn_mul_add_words_internal
143
144.align	5
145.globl	bn_mul_words
146.ent	bn_mul_words
147bn_mul_words:
148	.set	noreorder
149	bgtz	$6,bn_mul_words_internal
150	move	$2,$0
151	jr	$31
152	move	$4,$2
153.end	bn_mul_words
154
155.align	5
156.ent	bn_mul_words_internal
157bn_mul_words_internal:
158	.set	reorder
159	li	$3,-4
160	and	$8,$6,$3
161	beqz	$8,.L_bn_mul_words_tail
162
163.L_bn_mul_words_loop:
164	lw	$12,0($5)
165	multu	$12,$7
166	lw	$14,4($5)
167	lw	$8,2*4($5)
168	lw	$10,3*4($5)
169	mflo	$1
170	mfhi	$12
171	addu	$2,$1
172	sltu	$13,$2,$1
173	 multu	$14,$7
174	sw	$2,0($4)
175	addu	$2,$13,$12
176
177	subu	$6,4
178	addu $4,4*4
179	addu $5,4*4
180	mflo	$1
181	mfhi	$14
182	addu	$2,$1
183	sltu	$15,$2,$1
184	 multu	$8,$7
185	sw	$2,-3*4($4)
186	addu	$2,$15,$14
187
188	mflo	$1
189	mfhi	$8
190	addu	$2,$1
191	sltu	$9,$2,$1
192	 multu	$10,$7
193	sw	$2,-2*4($4)
194	addu	$2,$9,$8
195
196	and	$8,$6,$3
197	mflo	$1
198	mfhi	$10
199	addu	$2,$1
200	sltu	$11,$2,$1
201	sw	$2,-4($4)
202	.set	noreorder
203	bgtz	$8,.L_bn_mul_words_loop
204	addu	$2,$11,$10
205
206	beqz	$6,.L_bn_mul_words_return
207	nop
208
209.L_bn_mul_words_tail:
210	.set	reorder
211	lw	$12,0($5)
212	multu	$12,$7
213	subu	$6,1
214	mflo	$1
215	mfhi	$12
216	addu	$2,$1
217	sltu	$13,$2,$1
218	sw	$2,0($4)
219	addu	$2,$13,$12
220	beqz	$6,.L_bn_mul_words_return
221
222	lw	$12,4($5)
223	multu	$12,$7
224	subu	$6,1
225	mflo	$1
226	mfhi	$12
227	addu	$2,$1
228	sltu	$13,$2,$1
229	sw	$2,4($4)
230	addu	$2,$13,$12
231	beqz	$6,.L_bn_mul_words_return
232
233	lw	$12,2*4($5)
234	multu	$12,$7
235	mflo	$1
236	mfhi	$12
237	addu	$2,$1
238	sltu	$13,$2,$1
239	sw	$2,2*4($4)
240	addu	$2,$13,$12
241
242.L_bn_mul_words_return:
243	.set	noreorder
244	jr	$31
245	move	$4,$2
246.end	bn_mul_words_internal
247
248.align	5
249.globl	bn_sqr_words
250.ent	bn_sqr_words
251bn_sqr_words:
252	.set	noreorder
253	bgtz	$6,bn_sqr_words_internal
254	move	$2,$0
255	jr	$31
256	move	$4,$2
257.end	bn_sqr_words
258
259.align	5
260.ent	bn_sqr_words_internal
261bn_sqr_words_internal:
262	.set	reorder
263	li	$3,-4
264	and	$8,$6,$3
265	beqz	$8,.L_bn_sqr_words_tail
266
267.L_bn_sqr_words_loop:
268	lw	$12,0($5)
269	multu	$12,$12
270	lw	$14,4($5)
271	lw	$8,2*4($5)
272	lw	$10,3*4($5)
273	mflo	$13
274	mfhi	$12
275	sw	$13,0($4)
276	sw	$12,4($4)
277
278	multu	$14,$14
279	subu	$6,4
280	addu $4,8*4
281	addu $5,4*4
282	mflo	$15
283	mfhi	$14
284	sw	$15,-6*4($4)
285	sw	$14,-5*4($4)
286
287	multu	$8,$8
288	mflo	$9
289	mfhi	$8
290	sw	$9,-4*4($4)
291	sw	$8,-3*4($4)
292
293
294	multu	$10,$10
295	and	$8,$6,$3
296	mflo	$11
297	mfhi	$10
298	sw	$11,-2*4($4)
299
300	.set	noreorder
301	bgtz	$8,.L_bn_sqr_words_loop
302	sw	$10,-4($4)
303
304	beqz	$6,.L_bn_sqr_words_return
305	nop
306
307.L_bn_sqr_words_tail:
308	.set	reorder
309	lw	$12,0($5)
310	multu	$12,$12
311	subu	$6,1
312	mflo	$13
313	mfhi	$12
314	sw	$13,0($4)
315	sw	$12,4($4)
316	beqz	$6,.L_bn_sqr_words_return
317
318	lw	$12,4($5)
319	multu	$12,$12
320	subu	$6,1
321	mflo	$13
322	mfhi	$12
323	sw	$13,2*4($4)
324	sw	$12,3*4($4)
325	beqz	$6,.L_bn_sqr_words_return
326
327	lw	$12,2*4($5)
328	multu	$12,$12
329	mflo	$13
330	mfhi	$12
331	sw	$13,4*4($4)
332	sw	$12,5*4($4)
333
334.L_bn_sqr_words_return:
335	.set	noreorder
336	jr	$31
337	move	$4,$2
338
339.end	bn_sqr_words_internal
340
341.align	5
342.globl	bn_add_words
343.ent	bn_add_words
344bn_add_words:
345	.set	noreorder
346	bgtz	$7,bn_add_words_internal
347	move	$2,$0
348	jr	$31
349	move	$4,$2
350.end	bn_add_words
351
352.align	5
353.ent	bn_add_words_internal
354bn_add_words_internal:
355	.set	reorder
356	li	$3,-4
357	and	$1,$7,$3
358	beqz	$1,.L_bn_add_words_tail
359
360.L_bn_add_words_loop:
361	lw	$12,0($5)
362	lw	$8,0($6)
363	subu	$7,4
364	lw	$13,4($5)
365	and	$1,$7,$3
366	lw	$14,2*4($5)
367	addu $6,4*4
368	lw	$15,3*4($5)
369	addu $4,4*4
370	lw	$9,-3*4($6)
371	addu $5,4*4
372	lw	$10,-2*4($6)
373	lw	$11,-4($6)
374	addu	$8,$12
375	sltu	$24,$8,$12
376	addu	$12,$8,$2
377	sltu	$2,$12,$8
378	sw	$12,-4*4($4)
379	addu	$2,$24
380
381	addu	$9,$13
382	sltu	$25,$9,$13
383	addu	$13,$9,$2
384	sltu	$2,$13,$9
385	sw	$13,-3*4($4)
386	addu	$2,$25
387
388	addu	$10,$14
389	sltu	$24,$10,$14
390	addu	$14,$10,$2
391	sltu	$2,$14,$10
392	sw	$14,-2*4($4)
393	addu	$2,$24
394
395	addu	$11,$15
396	sltu	$25,$11,$15
397	addu	$15,$11,$2
398	sltu	$2,$15,$11
399	sw	$15,-4($4)
400
401	.set	noreorder
402	bgtz	$1,.L_bn_add_words_loop
403	addu	$2,$25
404
405	beqz	$7,.L_bn_add_words_return
406	nop
407
408.L_bn_add_words_tail:
409	.set	reorder
410	lw	$12,0($5)
411	lw	$8,0($6)
412	addu	$8,$12
413	subu	$7,1
414	sltu	$24,$8,$12
415	addu	$12,$8,$2
416	sltu	$2,$12,$8
417	sw	$12,0($4)
418	addu	$2,$24
419	beqz	$7,.L_bn_add_words_return
420
421	lw	$13,4($5)
422	lw	$9,4($6)
423	addu	$9,$13
424	subu	$7,1
425	sltu	$25,$9,$13
426	addu	$13,$9,$2
427	sltu	$2,$13,$9
428	sw	$13,4($4)
429	addu	$2,$25
430	beqz	$7,.L_bn_add_words_return
431
432	lw	$14,2*4($5)
433	lw	$10,2*4($6)
434	addu	$10,$14
435	sltu	$24,$10,$14
436	addu	$14,$10,$2
437	sltu	$2,$14,$10
438	sw	$14,2*4($4)
439	addu	$2,$24
440
441.L_bn_add_words_return:
442	.set	noreorder
443	jr	$31
444	move	$4,$2
445
446.end	bn_add_words_internal
447
448.align	5
449.globl	bn_sub_words
450.ent	bn_sub_words
451bn_sub_words:
452	.set	noreorder
453	bgtz	$7,bn_sub_words_internal
454	move	$2,$0
455	jr	$31
456	move	$4,$0
457.end	bn_sub_words
458
459.align	5
460.ent	bn_sub_words_internal
461bn_sub_words_internal:
462	.set	reorder
463	li	$3,-4
464	and	$1,$7,$3
465	beqz	$1,.L_bn_sub_words_tail
466
467.L_bn_sub_words_loop:
468	lw	$12,0($5)
469	lw	$8,0($6)
470	subu	$7,4
471	lw	$13,4($5)
472	and	$1,$7,$3
473	lw	$14,2*4($5)
474	addu $6,4*4
475	lw	$15,3*4($5)
476	addu $4,4*4
477	lw	$9,-3*4($6)
478	addu $5,4*4
479	lw	$10,-2*4($6)
480	lw	$11,-4($6)
481	sltu	$24,$12,$8
482	subu	$8,$12,$8
483	subu	$12,$8,$2
484	sgtu	$2,$12,$8
485	sw	$12,-4*4($4)
486	addu	$2,$24
487
488	sltu	$25,$13,$9
489	subu	$9,$13,$9
490	subu	$13,$9,$2
491	sgtu	$2,$13,$9
492	sw	$13,-3*4($4)
493	addu	$2,$25
494
495
496	sltu	$24,$14,$10
497	subu	$10,$14,$10
498	subu	$14,$10,$2
499	sgtu	$2,$14,$10
500	sw	$14,-2*4($4)
501	addu	$2,$24
502
503	sltu	$25,$15,$11
504	subu	$11,$15,$11
505	subu	$15,$11,$2
506	sgtu	$2,$15,$11
507	sw	$15,-4($4)
508
509	.set	noreorder
510	bgtz	$1,.L_bn_sub_words_loop
511	addu	$2,$25
512
513	beqz	$7,.L_bn_sub_words_return
514	nop
515
516.L_bn_sub_words_tail:
517	.set	reorder
518	lw	$12,0($5)
519	lw	$8,0($6)
520	subu	$7,1
521	sltu	$24,$12,$8
522	subu	$8,$12,$8
523	subu	$12,$8,$2
524	sgtu	$2,$12,$8
525	sw	$12,0($4)
526	addu	$2,$24
527	beqz	$7,.L_bn_sub_words_return
528
529	lw	$13,4($5)
530	subu	$7,1
531	lw	$9,4($6)
532	sltu	$25,$13,$9
533	subu	$9,$13,$9
534	subu	$13,$9,$2
535	sgtu	$2,$13,$9
536	sw	$13,4($4)
537	addu	$2,$25
538	beqz	$7,.L_bn_sub_words_return
539
540	lw	$14,2*4($5)
541	lw	$10,2*4($6)
542	sltu	$24,$14,$10
543	subu	$10,$14,$10
544	subu	$14,$10,$2
545	sgtu	$2,$14,$10
546	sw	$14,2*4($4)
547	addu	$2,$24
548
549.L_bn_sub_words_return:
550	.set	noreorder
551	jr	$31
552	move	$4,$2
553.end	bn_sub_words_internal
554
555.align 5
556.globl	bn_div_3_words
557.ent	bn_div_3_words
558bn_div_3_words:
559	.set	noreorder
560	move	$7,$4		# we know that bn_div_words does not
561				# touch $7, $10, $11 and preserves $6
562				# so that we can save two arguments
563				# and return address in registers
564				# instead of stack:-)
565
566	lw	$4,($7)
567	move	$10,$5
568	bne	$4,$6,bn_div_3_words_internal
569	lw	$5,-4($7)
570	li	$2,-1
571	jr	$31
572	move	$4,$2
573.end	bn_div_3_words
574
575.align	5
576.ent	bn_div_3_words_internal
577bn_div_3_words_internal:
578	.set	reorder
579	move	$11,$31
580	bal	bn_div_words_internal
581	move	$31,$11
582	multu	$10,$2
583	lw	$14,-2*4($7)
584	move	$8,$0
585	mfhi	$13
586	mflo	$12
587	sltu	$24,$13,$5
588.L_bn_div_3_words_inner_loop:
589	bnez	$24,.L_bn_div_3_words_inner_loop_done
590	sgeu	$1,$14,$12
591	seq	$25,$13,$5
592	and	$1,$25
593	sltu	$15,$12,$10
594	addu	$5,$6
595	subu	$13,$15
596	subu	$12,$10
597	sltu	$24,$13,$5
598	sltu	$8,$5,$6
599	or	$24,$8
600	.set	noreorder
601	beqz	$1,.L_bn_div_3_words_inner_loop
602	subu	$2,1
603	addu	$2,1
604	.set	reorder
605.L_bn_div_3_words_inner_loop_done:
606	.set	noreorder
607	jr	$31
608	move	$4,$2
609.end	bn_div_3_words_internal
610
611.align	5
612.globl	bn_div_words
613.ent	bn_div_words
614bn_div_words:
615	.set	noreorder
616	bnez	$6,bn_div_words_internal
617	li	$2,-1		# I would rather signal div-by-zero
618				# which can be done with 'break 7'
619	jr	$31
620	move	$4,$2
621.end	bn_div_words
622
623.align	5
624.ent	bn_div_words_internal
625bn_div_words_internal:
626	move	$3,$0
627	bltz	$6,.L_bn_div_words_body
628	move	$25,$3
629	sll	$6,1
630	bgtz	$6,.-4
631	addu	$25,1
632
633	.set	reorder
634	negu	$13,$25
635	li	$14,-1
636	sll	$14,$13
637	and	$14,$4
638	srl	$1,$5,$13
639	.set	noreorder
640	beqz	$14,.+12
641	nop
642	break	6		# signal overflow
643	.set	reorder
644	sll	$4,$25
645	sll	$5,$25
646	or	$4,$1
647.L_bn_div_words_body:
648	srl	$3,$6,4*4	# bits
649	sgeu	$1,$4,$6
650	.set	noreorder
651	beqz	$1,.+12
652	nop
653	subu	$4,$6
654	.set	reorder
655
656	li	$8,-1
657	srl	$9,$4,4*4	# bits
658	srl	$8,4*4	# q=0xffffffff
659	beq	$3,$9,.L_bn_div_words_skip_div1
660	divu	$0,$4,$3
661	mflo	$8
662.L_bn_div_words_skip_div1:
663	multu	$6,$8
664	sll	$15,$4,4*4	# bits
665	srl	$1,$5,4*4	# bits
666	or	$15,$1
667	mflo	$12
668	mfhi	$13
669.L_bn_div_words_inner_loop1:
670	sltu	$14,$15,$12
671	seq	$24,$9,$13
672	sltu	$1,$9,$13
673	and	$14,$24
674	sltu	$2,$12,$6
675	or	$1,$14
676	.set	noreorder
677	beqz	$1,.L_bn_div_words_inner_loop1_done
678	subu	$13,$2
679	subu	$12,$6
680	b	.L_bn_div_words_inner_loop1
681	subu	$8,1
682	.set	reorder
683.L_bn_div_words_inner_loop1_done:
684
685	sll	$5,4*4	# bits
686	subu	$4,$15,$12
687	sll	$2,$8,4*4	# bits
688
689	li	$8,-1
690	srl	$9,$4,4*4	# bits
691	srl	$8,4*4	# q=0xffffffff
692	beq	$3,$9,.L_bn_div_words_skip_div2
693	divu	$0,$4,$3
694	mflo	$8
695.L_bn_div_words_skip_div2:
696	multu	$6,$8
697	sll	$15,$4,4*4	# bits
698	srl	$1,$5,4*4	# bits
699	or	$15,$1
700	mflo	$12
701	mfhi	$13
702.L_bn_div_words_inner_loop2:
703	sltu	$14,$15,$12
704	seq	$24,$9,$13
705	sltu	$1,$9,$13
706	and	$14,$24
707	sltu	$3,$12,$6
708	or	$1,$14
709	.set	noreorder
710	beqz	$1,.L_bn_div_words_inner_loop2_done
711	subu	$13,$3
712	subu	$12,$6
713	b	.L_bn_div_words_inner_loop2
714	subu	$8,1
715	.set	reorder
716.L_bn_div_words_inner_loop2_done:
717
718	subu	$4,$15,$12
719	or	$2,$8
720	srl	$3,$4,$25	# $3 contains remainder if anybody wants it
721	srl	$6,$25		# restore $6
722
723	.set	noreorder
724	move	$5,$3
725	jr	$31
726	move	$4,$2
727.end	bn_div_words_internal
728
729.align	5
730.globl	bn_mul_comba8
731.ent	bn_mul_comba8
732bn_mul_comba8:
733	.set	noreorder
734	.frame	$29,6*4,$31
735	.mask	0x003f0000,-4
736	subu $29,6*4
737	sw	$21,5*4($29)
738	sw	$20,4*4($29)
739	sw	$19,3*4($29)
740	sw	$18,2*4($29)
741	sw	$17,1*4($29)
742	sw	$16,0*4($29)
743
744	.set	reorder
745	lw	$12,0($5)	# If compiled with -mips3 option on
746				# R5000 box assembler barks on this
747				# 1ine with "should not have mult/div
748				# as last instruction in bb (R10K
749				# bug)" warning. If anybody out there
750				# has a clue about how to circumvent
751				# this do send me a note.
752				#		<appro@fy.chalmers.se>
753
754	lw	$8,0($6)
755	lw	$13,4($5)
756	lw	$14,2*4($5)
757	multu	$12,$8		# mul_add_c(a[0],b[0],c1,c2,c3);
758	lw	$15,3*4($5)
759	lw	$9,4($6)
760	lw	$10,2*4($6)
761	lw	$11,3*4($6)
762	mflo	$2
763	mfhi	$3
764
765	lw	$16,4*4($5)
766	lw	$18,5*4($5)
767	multu	$12,$9		# mul_add_c(a[0],b[1],c2,c3,c1);
768	lw	$20,6*4($5)
769	lw	$5,7*4($5)
770	lw	$17,4*4($6)
771	lw	$19,5*4($6)
772	mflo	$24
773	mfhi	$25
774	addu	$3,$24
775	sltu	$1,$3,$24
776	multu	$13,$8		# mul_add_c(a[1],b[0],c2,c3,c1);
777	addu	$7,$25,$1
778	lw	$21,6*4($6)
779	lw	$6,7*4($6)
780	sw	$2,0($4)	# r[0]=c1;
781	mflo	$24
782	mfhi	$25
783	addu	$3,$24
784	sltu	$1,$3,$24
785	 multu	$14,$8		# mul_add_c(a[2],b[0],c3,c1,c2);
786	addu	$25,$1
787	addu	$7,$25
788	sltu	$2,$7,$25
789	sw	$3,4($4)	# r[1]=c2;
790
791	mflo	$24
792	mfhi	$25
793	addu	$7,$24
794	sltu	$1,$7,$24
795	multu	$13,$9		# mul_add_c(a[1],b[1],c3,c1,c2);
796	addu	$25,$1
797	addu	$2,$25
798	mflo	$24
799	mfhi	$25
800	addu	$7,$24
801	sltu	$1,$7,$24
802	multu	$12,$10		# mul_add_c(a[0],b[2],c3,c1,c2);
803	addu	$25,$1
804	addu	$2,$25
805	sltu	$3,$2,$25
806	mflo	$24
807	mfhi	$25
808	addu	$7,$24
809	sltu	$1,$7,$24
810	 multu	$12,$11		# mul_add_c(a[0],b[3],c1,c2,c3);
811	addu	$25,$1
812	addu	$2,$25
813	sltu	$1,$2,$25
814	addu	$3,$1
815	sw	$7,2*4($4)	# r[2]=c3;
816
817	mflo	$24
818	mfhi	$25
819	addu	$2,$24
820	sltu	$1,$2,$24
821	multu	$13,$10		# mul_add_c(a[1],b[2],c1,c2,c3);
822	addu	$25,$1
823	addu	$3,$25
824	sltu	$7,$3,$25
825	mflo	$24
826	mfhi	$25
827	addu	$2,$24
828	sltu	$1,$2,$24
829	multu	$14,$9		# mul_add_c(a[2],b[1],c1,c2,c3);
830	addu	$25,$1
831	addu	$3,$25
832	sltu	$1,$3,$25
833	addu	$7,$1
834	mflo	$24
835	mfhi	$25
836	addu	$2,$24
837	sltu	$1,$2,$24
838	multu	$15,$8		# mul_add_c(a[3],b[0],c1,c2,c3);
839	addu	$25,$1
840	addu	$3,$25
841	sltu	$1,$3,$25
842	addu	$7,$1
843	mflo	$24
844	mfhi	$25
845	addu	$2,$24
846	sltu	$1,$2,$24
847	 multu	$16,$8		# mul_add_c(a[4],b[0],c2,c3,c1);
848	addu	$25,$1
849	addu	$3,$25
850	sltu	$1,$3,$25
851	addu	$7,$1
852	sw	$2,3*4($4)	# r[3]=c1;
853
854	mflo	$24
855	mfhi	$25
856	addu	$3,$24
857	sltu	$1,$3,$24
858	multu	$15,$9		# mul_add_c(a[3],b[1],c2,c3,c1);
859	addu	$25,$1
860	addu	$7,$25
861	sltu	$2,$7,$25
862	mflo	$24
863	mfhi	$25
864	addu	$3,$24
865	sltu	$1,$3,$24
866	multu	$14,$10		# mul_add_c(a[2],b[2],c2,c3,c1);
867	addu	$25,$1
868	addu	$7,$25
869	sltu	$1,$7,$25
870	addu	$2,$1
871	mflo	$24
872	mfhi	$25
873	addu	$3,$24
874	sltu	$1,$3,$24
875	multu	$13,$11		# mul_add_c(a[1],b[3],c2,c3,c1);
876	addu	$25,$1
877	addu	$7,$25
878	sltu	$1,$7,$25
879	addu	$2,$1
880	mflo	$24
881	mfhi	$25
882	addu	$3,$24
883	sltu	$1,$3,$24
884	multu	$12,$17		# mul_add_c(a[0],b[4],c2,c3,c1);
885	addu	$25,$1
886	addu	$7,$25
887	sltu	$1,$7,$25
888	addu	$2,$1
889	mflo	$24
890	mfhi	$25
891	addu	$3,$24
892	sltu	$1,$3,$24
893	 multu	$12,$19		# mul_add_c(a[0],b[5],c3,c1,c2);
894	addu	$25,$1
895	addu	$7,$25
896	sltu	$1,$7,$25
897	addu	$2,$1
898	sw	$3,4*4($4)	# r[4]=c2;
899
900	mflo	$24
901	mfhi	$25
902	addu	$7,$24
903	sltu	$1,$7,$24
904	multu	$13,$17		# mul_add_c(a[1],b[4],c3,c1,c2);
905	addu	$25,$1
906	addu	$2,$25
907	sltu	$3,$2,$25
908	mflo	$24
909	mfhi	$25
910	addu	$7,$24
911	sltu	$1,$7,$24
912	multu	$14,$11		# mul_add_c(a[2],b[3],c3,c1,c2);
913	addu	$25,$1
914	addu	$2,$25
915	sltu	$1,$2,$25
916	addu	$3,$1
917	mflo	$24
918	mfhi	$25
919	addu	$7,$24
920	sltu	$1,$7,$24
921	multu	$15,$10		# mul_add_c(a[3],b[2],c3,c1,c2);
922	addu	$25,$1
923	addu	$2,$25
924	sltu	$1,$2,$25
925	addu	$3,$1
926	mflo	$24
927	mfhi	$25
928	addu	$7,$24
929	sltu	$1,$7,$24
930	multu	$16,$9		# mul_add_c(a[4],b[1],c3,c1,c2);
931	addu	$25,$1
932	addu	$2,$25
933	sltu	$1,$2,$25
934	addu	$3,$1
935	mflo	$24
936	mfhi	$25
937	addu	$7,$24
938	sltu	$1,$7,$24
939	multu	$18,$8		# mul_add_c(a[5],b[0],c3,c1,c2);
940	addu	$25,$1
941	addu	$2,$25
942	sltu	$1,$2,$25
943	addu	$3,$1
944	mflo	$24
945	mfhi	$25
946	addu	$7,$24
947	sltu	$1,$7,$24
948	 multu	$20,$8		# mul_add_c(a[6],b[0],c1,c2,c3);
949	addu	$25,$1
950	addu	$2,$25
951	sltu	$1,$2,$25
952	addu	$3,$1
953	sw	$7,5*4($4)	# r[5]=c3;
954
955	mflo	$24
956	mfhi	$25
957	addu	$2,$24
958	sltu	$1,$2,$24
959	multu	$18,$9		# mul_add_c(a[5],b[1],c1,c2,c3);
960	addu	$25,$1
961	addu	$3,$25
962	sltu	$7,$3,$25
963	mflo	$24
964	mfhi	$25
965	addu	$2,$24
966	sltu	$1,$2,$24
967	multu	$16,$10		# mul_add_c(a[4],b[2],c1,c2,c3);
968	addu	$25,$1
969	addu	$3,$25
970	sltu	$1,$3,$25
971	addu	$7,$1
972	mflo	$24
973	mfhi	$25
974	addu	$2,$24
975	sltu	$1,$2,$24
976	multu	$15,$11		# mul_add_c(a[3],b[3],c1,c2,c3);
977	addu	$25,$1
978	addu	$3,$25
979	sltu	$1,$3,$25
980	addu	$7,$1
981	mflo	$24
982	mfhi	$25
983	addu	$2,$24
984	sltu	$1,$2,$24
985	multu	$14,$17		# mul_add_c(a[2],b[4],c1,c2,c3);
986	addu	$25,$1
987	addu	$3,$25
988	sltu	$1,$3,$25
989	addu	$7,$1
990	mflo	$24
991	mfhi	$25
992	addu	$2,$24
993	sltu	$1,$2,$24
994	multu	$13,$19		# mul_add_c(a[1],b[5],c1,c2,c3);
995	addu	$25,$1
996	addu	$3,$25
997	sltu	$1,$3,$25
998	addu	$7,$1
999	mflo	$24
1000	mfhi	$25
1001	addu	$2,$24
1002	sltu	$1,$2,$24
1003	multu	$12,$21		# mul_add_c(a[0],b[6],c1,c2,c3);
1004	addu	$25,$1
1005	addu	$3,$25
1006	sltu	$1,$3,$25
1007	addu	$7,$1
1008	mflo	$24
1009	mfhi	$25
1010	addu	$2,$24
1011	sltu	$1,$2,$24
1012	 multu	$12,$6		# mul_add_c(a[0],b[7],c2,c3,c1);
1013	addu	$25,$1
1014	addu	$3,$25
1015	sltu	$1,$3,$25
1016	addu	$7,$1
1017	sw	$2,6*4($4)	# r[6]=c1;
1018
1019	mflo	$24
1020	mfhi	$25
1021	addu	$3,$24
1022	sltu	$1,$3,$24
1023	multu	$13,$21		# mul_add_c(a[1],b[6],c2,c3,c1);
1024	addu	$25,$1
1025	addu	$7,$25
1026	sltu	$2,$7,$25
1027	mflo	$24
1028	mfhi	$25
1029	addu	$3,$24
1030	sltu	$1,$3,$24
1031	multu	$14,$19		# mul_add_c(a[2],b[5],c2,c3,c1);
1032	addu	$25,$1
1033	addu	$7,$25
1034	sltu	$1,$7,$25
1035	addu	$2,$1
1036	mflo	$24
1037	mfhi	$25
1038	addu	$3,$24
1039	sltu	$1,$3,$24
1040	multu	$15,$17		# mul_add_c(a[3],b[4],c2,c3,c1);
1041	addu	$25,$1
1042	addu	$7,$25
1043	sltu	$1,$7,$25
1044	addu	$2,$1
1045	mflo	$24
1046	mfhi	$25
1047	addu	$3,$24
1048	sltu	$1,$3,$24
1049	multu	$16,$11		# mul_add_c(a[4],b[3],c2,c3,c1);
1050	addu	$25,$1
1051	addu	$7,$25
1052	sltu	$1,$7,$25
1053	addu	$2,$1
1054	mflo	$24
1055	mfhi	$25
1056	addu	$3,$24
1057	sltu	$1,$3,$24
1058	multu	$18,$10		# mul_add_c(a[5],b[2],c2,c3,c1);
1059	addu	$25,$1
1060	addu	$7,$25
1061	sltu	$1,$7,$25
1062	addu	$2,$1
1063	mflo	$24
1064	mfhi	$25
1065	addu	$3,$24
1066	sltu	$1,$3,$24
1067	multu	$20,$9		# mul_add_c(a[6],b[1],c2,c3,c1);
1068	addu	$25,$1
1069	addu	$7,$25
1070	sltu	$1,$7,$25
1071	addu	$2,$1
1072	mflo	$24
1073	mfhi	$25
1074	addu	$3,$24
1075	sltu	$1,$3,$24
1076	multu	$5,$8		# mul_add_c(a[7],b[0],c2,c3,c1);
1077	addu	$25,$1
1078	addu	$7,$25
1079	sltu	$1,$7,$25
1080	addu	$2,$1
1081	mflo	$24
1082	mfhi	$25
1083	addu	$3,$24
1084	sltu	$1,$3,$24
1085	 multu	$5,$9		# mul_add_c(a[7],b[1],c3,c1,c2);
1086	addu	$25,$1
1087	addu	$7,$25
1088	sltu	$1,$7,$25
1089	addu	$2,$1
1090	sw	$3,7*4($4)	# r[7]=c2;
1091
1092	mflo	$24
1093	mfhi	$25
1094	addu	$7,$24
1095	sltu	$1,$7,$24
1096	multu	$20,$10		# mul_add_c(a[6],b[2],c3,c1,c2);
1097	addu	$25,$1
1098	addu	$2,$25
1099	sltu	$3,$2,$25
1100	mflo	$24
1101	mfhi	$25
1102	addu	$7,$24
1103	sltu	$1,$7,$24
1104	multu	$18,$11		# mul_add_c(a[5],b[3],c3,c1,c2);
1105	addu	$25,$1
1106	addu	$2,$25
1107	sltu	$1,$2,$25
1108	addu	$3,$1
1109	mflo	$24
1110	mfhi	$25
1111	addu	$7,$24
1112	sltu	$1,$7,$24
1113	multu	$16,$17		# mul_add_c(a[4],b[4],c3,c1,c2);
1114	addu	$25,$1
1115	addu	$2,$25
1116	sltu	$1,$2,$25
1117	addu	$3,$1
1118	mflo	$24
1119	mfhi	$25
1120	addu	$7,$24
1121	sltu	$1,$7,$24
1122	multu	$15,$19		# mul_add_c(a[3],b[5],c3,c1,c2);
1123	addu	$25,$1
1124	addu	$2,$25
1125	sltu	$1,$2,$25
1126	addu	$3,$1
1127	mflo	$24
1128	mfhi	$25
1129	addu	$7,$24
1130	sltu	$1,$7,$24
1131	multu	$14,$21		# mul_add_c(a[2],b[6],c3,c1,c2);
1132	addu	$25,$1
1133	addu	$2,$25
1134	sltu	$1,$2,$25
1135	addu	$3,$1
1136	mflo	$24
1137	mfhi	$25
1138	addu	$7,$24
1139	sltu	$1,$7,$24
1140	multu	$13,$6		# mul_add_c(a[1],b[7],c3,c1,c2);
1141	addu	$25,$1
1142	addu	$2,$25
1143	sltu	$1,$2,$25
1144	addu	$3,$1
1145	mflo	$24
1146	mfhi	$25
1147	addu	$7,$24
1148	sltu	$1,$7,$24
1149	 multu	$14,$6		# mul_add_c(a[2],b[7],c1,c2,c3);
1150	addu	$25,$1
1151	addu	$2,$25
1152	sltu	$1,$2,$25
1153	addu	$3,$1
1154	sw	$7,8*4($4)	# r[8]=c3;
1155
1156	mflo	$24
1157	mfhi	$25
1158	addu	$2,$24
1159	sltu	$1,$2,$24
1160	multu	$15,$21		# mul_add_c(a[3],b[6],c1,c2,c3);
1161	addu	$25,$1
1162	addu	$3,$25
1163	sltu	$7,$3,$25
1164	mflo	$24
1165	mfhi	$25
1166	addu	$2,$24
1167	sltu	$1,$2,$24
1168	multu	$16,$19		# mul_add_c(a[4],b[5],c1,c2,c3);
1169	addu	$25,$1
1170	addu	$3,$25
1171	sltu	$1,$3,$25
1172	addu	$7,$1
1173	mflo	$24
1174	mfhi	$25
1175	addu	$2,$24
1176	sltu	$1,$2,$24
1177	multu	$18,$17		# mul_add_c(a[5],b[4],c1,c2,c3);
1178	addu	$25,$1
1179	addu	$3,$25
1180	sltu	$1,$3,$25
1181	addu	$7,$1
1182	mflo	$24
1183	mfhi	$25
1184	addu	$2,$24
1185	sltu	$1,$2,$24
1186	multu	$20,$11		# mul_add_c(a[6],b[3],c1,c2,c3);
1187	addu	$25,$1
1188	addu	$3,$25
1189	sltu	$1,$3,$25
1190	addu	$7,$1
1191	mflo	$24
1192	mfhi	$25
1193	addu	$2,$24
1194	sltu	$1,$2,$24
1195	multu	$5,$10		# mul_add_c(a[7],b[2],c1,c2,c3);
1196	addu	$25,$1
1197	addu	$3,$25
1198	sltu	$1,$3,$25
1199	addu	$7,$1
1200	mflo	$24
1201	mfhi	$25
1202	addu	$2,$24
1203	sltu	$1,$2,$24
1204	 multu	$5,$11		# mul_add_c(a[7],b[3],c2,c3,c1);
1205	addu	$25,$1
1206	addu	$3,$25
1207	sltu	$1,$3,$25
1208	addu	$7,$1
1209	sw	$2,9*4($4)	# r[9]=c1;
1210
1211	mflo	$24
1212	mfhi	$25
1213	addu	$3,$24
1214	sltu	$1,$3,$24
1215	multu	$20,$17		# mul_add_c(a[6],b[4],c2,c3,c1);
1216	addu	$25,$1
1217	addu	$7,$25
1218	sltu	$2,$7,$25
1219	mflo	$24
1220	mfhi	$25
1221	addu	$3,$24
1222	sltu	$1,$3,$24
1223	multu	$18,$19		# mul_add_c(a[5],b[5],c2,c3,c1);
1224	addu	$25,$1
1225	addu	$7,$25
1226	sltu	$1,$7,$25
1227	addu	$2,$1
1228	mflo	$24
1229	mfhi	$25
1230	addu	$3,$24
1231	sltu	$1,$3,$24
1232	multu	$16,$21		# mul_add_c(a[4],b[6],c2,c3,c1);
1233	addu	$25,$1
1234	addu	$7,$25
1235	sltu	$1,$7,$25
1236	addu	$2,$1
1237	mflo	$24
1238	mfhi	$25
1239	addu	$3,$24
1240	sltu	$1,$3,$24
1241	multu	$15,$6		# mul_add_c(a[3],b[7],c2,c3,c1);
1242	addu	$25,$1
1243	addu	$7,$25
1244	sltu	$1,$7,$25
1245	addu	$2,$1
1246	mflo	$24
1247	mfhi	$25
1248	addu	$3,$24
1249	sltu	$1,$3,$24
1250	multu	$16,$6		# mul_add_c(a[4],b[7],c3,c1,c2);
1251	addu	$25,$1
1252	addu	$7,$25
1253	sltu	$1,$7,$25
1254	addu	$2,$1
1255	sw	$3,10*4($4)	# r[10]=c2;
1256
1257	mflo	$24
1258	mfhi	$25
1259	addu	$7,$24
1260	sltu	$1,$7,$24
1261	multu	$18,$21		# mul_add_c(a[5],b[6],c3,c1,c2);
1262	addu	$25,$1
1263	addu	$2,$25
1264	sltu	$3,$2,$25
1265	mflo	$24
1266	mfhi	$25
1267	addu	$7,$24
1268	sltu	$1,$7,$24
1269	multu	$20,$19		# mul_add_c(a[6],b[5],c3,c1,c2);
1270	addu	$25,$1
1271	addu	$2,$25
1272	sltu	$1,$2,$25
1273	addu	$3,$1
1274	mflo	$24
1275	mfhi	$25
1276	addu	$7,$24
1277	sltu	$1,$7,$24
1278	multu	$5,$17		# mul_add_c(a[7],b[4],c3,c1,c2);
1279	addu	$25,$1
1280	addu	$2,$25
1281	sltu	$1,$2,$25
1282	addu	$3,$1
1283	mflo	$24
1284	mfhi	$25
1285	addu	$7,$24
1286	sltu	$1,$7,$24
1287	 multu	$5,$19		# mul_add_c(a[7],b[5],c1,c2,c3);
1288	addu	$25,$1
1289	addu	$2,$25
1290	sltu	$1,$2,$25
1291	addu	$3,$1
1292	sw	$7,11*4($4)	# r[11]=c3;
1293
1294	mflo	$24
1295	mfhi	$25
1296	addu	$2,$24
1297	sltu	$1,$2,$24
1298	multu	$20,$21		# mul_add_c(a[6],b[6],c1,c2,c3);
1299	addu	$25,$1
1300	addu	$3,$25
1301	sltu	$7,$3,$25
1302	mflo	$24
1303	mfhi	$25
1304	addu	$2,$24
1305	sltu	$1,$2,$24
1306	multu	$18,$6		# mul_add_c(a[5],b[7],c1,c2,c3);
1307	addu	$25,$1
1308	addu	$3,$25
1309	sltu	$1,$3,$25
1310	addu	$7,$1
1311	mflo	$24
1312	mfhi	$25
1313	addu	$2,$24
1314	sltu	$1,$2,$24
1315	 multu	$20,$6		# mul_add_c(a[6],b[7],c2,c3,c1);
1316	addu	$25,$1
1317	addu	$3,$25
1318	sltu	$1,$3,$25
1319	addu	$7,$1
1320	sw	$2,12*4($4)	# r[12]=c1;
1321
1322	mflo	$24
1323	mfhi	$25
1324	addu	$3,$24
1325	sltu	$1,$3,$24
1326	multu	$5,$21		# mul_add_c(a[7],b[6],c2,c3,c1);
1327	addu	$25,$1
1328	addu	$7,$25
1329	sltu	$2,$7,$25
1330	mflo	$24
1331	mfhi	$25
1332	addu	$3,$24
1333	sltu	$1,$3,$24
1334	multu	$5,$6		# mul_add_c(a[7],b[7],c3,c1,c2);
1335	addu	$25,$1
1336	addu	$7,$25
1337	sltu	$1,$7,$25
1338	addu	$2,$1
1339	sw	$3,13*4($4)	# r[13]=c2;
1340
1341	mflo	$24
1342	mfhi	$25
1343	addu	$7,$24
1344	sltu	$1,$7,$24
1345	addu	$25,$1
1346	addu	$2,$25
1347	sw	$7,14*4($4)	# r[14]=c3;
1348	sw	$2,15*4($4)	# r[15]=c1;
1349
1350	.set	noreorder
1351	lw	$21,5*4($29)
1352	lw	$20,4*4($29)
1353	lw	$19,3*4($29)
1354	lw	$18,2*4($29)
1355	lw	$17,1*4($29)
1356	lw	$16,0*4($29)
1357	jr	$31
1358	addu $29,6*4
1359.end	bn_mul_comba8
1360
1361.align	5
1362.globl	bn_mul_comba4
1363.ent	bn_mul_comba4
1364bn_mul_comba4:
1365	.set	reorder
1366	lw	$12,0($5)
1367	lw	$8,0($6)
1368	lw	$13,4($5)
1369	lw	$14,2*4($5)
1370	multu	$12,$8		# mul_add_c(a[0],b[0],c1,c2,c3);
1371	lw	$15,3*4($5)
1372	lw	$9,4($6)
1373	lw	$10,2*4($6)
1374	lw	$11,3*4($6)
1375	mflo	$2
1376	mfhi	$3
1377	sw	$2,0($4)
1378
1379	multu	$12,$9		# mul_add_c(a[0],b[1],c2,c3,c1);
1380	mflo	$24
1381	mfhi	$25
1382	addu	$3,$24
1383	sltu	$1,$3,$24
1384	multu	$13,$8		# mul_add_c(a[1],b[0],c2,c3,c1);
1385	addu	$7,$25,$1
1386	mflo	$24
1387	mfhi	$25
1388	addu	$3,$24
1389	sltu	$1,$3,$24
1390	 multu	$14,$8		# mul_add_c(a[2],b[0],c3,c1,c2);
1391	addu	$25,$1
1392	addu	$7,$25
1393	sltu	$2,$7,$25
1394	sw	$3,4($4)
1395
1396	mflo	$24
1397	mfhi	$25
1398	addu	$7,$24
1399	sltu	$1,$7,$24
1400	multu	$13,$9		# mul_add_c(a[1],b[1],c3,c1,c2);
1401	addu	$25,$1
1402	addu	$2,$25
1403	mflo	$24
1404	mfhi	$25
1405	addu	$7,$24
1406	sltu	$1,$7,$24
1407	multu	$12,$10		# mul_add_c(a[0],b[2],c3,c1,c2);
1408	addu	$25,$1
1409	addu	$2,$25
1410	sltu	$3,$2,$25
1411	mflo	$24
1412	mfhi	$25
1413	addu	$7,$24
1414	sltu	$1,$7,$24
1415	 multu	$12,$11		# mul_add_c(a[0],b[3],c1,c2,c3);
1416	addu	$25,$1
1417	addu	$2,$25
1418	sltu	$1,$2,$25
1419	addu	$3,$1
1420	sw	$7,2*4($4)
1421
1422	mflo	$24
1423	mfhi	$25
1424	addu	$2,$24
1425	sltu	$1,$2,$24
1426	multu	$13,$10		# mul_add_c(a[1],b[2],c1,c2,c3);
1427	addu	$25,$1
1428	addu	$3,$25
1429	sltu	$7,$3,$25
1430	mflo	$24
1431	mfhi	$25
1432	addu	$2,$24
1433	sltu	$1,$2,$24
1434	multu	$14,$9		# mul_add_c(a[2],b[1],c1,c2,c3);
1435	addu	$25,$1
1436	addu	$3,$25
1437	sltu	$1,$3,$25
1438	addu	$7,$1
1439	mflo	$24
1440	mfhi	$25
1441	addu	$2,$24
1442	sltu	$1,$2,$24
1443	multu	$15,$8		# mul_add_c(a[3],b[0],c1,c2,c3);
1444	addu	$25,$1
1445	addu	$3,$25
1446	sltu	$1,$3,$25
1447	addu	$7,$1
1448	mflo	$24
1449	mfhi	$25
1450	addu	$2,$24
1451	sltu	$1,$2,$24
1452	 multu	$15,$9		# mul_add_c(a[3],b[1],c2,c3,c1);
1453	addu	$25,$1
1454	addu	$3,$25
1455	sltu	$1,$3,$25
1456	addu	$7,$1
1457	sw	$2,3*4($4)
1458
1459	mflo	$24
1460	mfhi	$25
1461	addu	$3,$24
1462	sltu	$1,$3,$24
1463	multu	$14,$10		# mul_add_c(a[2],b[2],c2,c3,c1);
1464	addu	$25,$1
1465	addu	$7,$25
1466	sltu	$2,$7,$25
1467	mflo	$24
1468	mfhi	$25
1469	addu	$3,$24
1470	sltu	$1,$3,$24
1471	multu	$13,$11		# mul_add_c(a[1],b[3],c2,c3,c1);
1472	addu	$25,$1
1473	addu	$7,$25
1474	sltu	$1,$7,$25
1475	addu	$2,$1
1476	mflo	$24
1477	mfhi	$25
1478	addu	$3,$24
1479	sltu	$1,$3,$24
1480	 multu	$14,$11		# mul_add_c(a[2],b[3],c3,c1,c2);
1481	addu	$25,$1
1482	addu	$7,$25
1483	sltu	$1,$7,$25
1484	addu	$2,$1
1485	sw	$3,4*4($4)
1486
1487	mflo	$24
1488	mfhi	$25
1489	addu	$7,$24
1490	sltu	$1,$7,$24
1491	multu	$15,$10		# mul_add_c(a[3],b[2],c3,c1,c2);
1492	addu	$25,$1
1493	addu	$2,$25
1494	sltu	$3,$2,$25
1495	mflo	$24
1496	mfhi	$25
1497	addu	$7,$24
1498	sltu	$1,$7,$24
1499	 multu	$15,$11		# mul_add_c(a[3],b[3],c1,c2,c3);
1500	addu	$25,$1
1501	addu	$2,$25
1502	sltu	$1,$2,$25
1503	addu	$3,$1
1504	sw	$7,5*4($4)
1505
1506	mflo	$24
1507	mfhi	$25
1508	addu	$2,$24
1509	sltu	$1,$2,$24
1510	addu	$25,$1
1511	addu	$3,$25
1512	sw	$2,6*4($4)
1513	sw	$3,7*4($4)
1514
1515	.set	noreorder
1516	jr	$31
1517	nop
1518.end	bn_mul_comba4
1519
1520.align	5
1521.globl	bn_sqr_comba8
1522.ent	bn_sqr_comba8
1523bn_sqr_comba8:
1524	.set	reorder
1525	lw	$12,0($5)
1526	lw	$13,4($5)
1527	lw	$14,2*4($5)
1528	lw	$15,3*4($5)
1529
1530	multu	$12,$12		# mul_add_c(a[0],b[0],c1,c2,c3);
1531	lw	$8,4*4($5)
1532	lw	$9,5*4($5)
1533	lw	$10,6*4($5)
1534	lw	$11,7*4($5)
1535	mflo	$2
1536	mfhi	$3
1537	sw	$2,0($4)
1538
1539	multu	$12,$13		# mul_add_c2(a[0],b[1],c2,c3,c1);
1540	mflo	$24
1541	mfhi	$25
1542	slt	$2,$25,$0
1543	sll	$25,1
1544	 multu	$14,$12		# mul_add_c2(a[2],b[0],c3,c1,c2);
1545	slt	$6,$24,$0
1546	addu	$25,$6
1547	sll	$24,1
1548	addu	$3,$24
1549	sltu	$1,$3,$24
1550	addu	$7,$25,$1
1551	sw	$3,4($4)
1552	mflo	$24
1553	mfhi	$25
1554	addu	$7,$24
1555	sltu	$1,$7,$24
1556	 multu	$13,$13			# forward multiplication
1557	addu	$7,$24
1558	addu	$1,$25
1559	sltu	$24,$7,$24
1560	addu	$2,$1
1561	addu	$25,$24
1562	sltu	$3,$2,$1
1563	addu	$2,$25
1564	sltu	$25,$2,$25
1565	addu	$3,$25
1566	mflo	$24
1567	mfhi	$25
1568	addu	$7,$24
1569	sltu	$1,$7,$24
1570	 multu	$12,$15		# mul_add_c2(a[0],b[3],c1,c2,c3);
1571	addu	$25,$1
1572	addu	$2,$25
1573	sltu	$1,$2,$25
1574	addu	$3,$1
1575	sw	$7,2*4($4)
1576	mflo	$24
1577	mfhi	$25
1578	addu	$2,$24
1579	sltu	$1,$2,$24
1580	 multu	$13,$14			# forward multiplication
1581	addu	$2,$24
1582	addu	$1,$25
1583	sltu	$24,$2,$24
1584	addu	$3,$1
1585	addu	$25,$24
1586	sltu	$7,$3,$1
1587	addu	$3,$25
1588	sltu	$25,$3,$25
1589	addu	$7,$25
1590	mflo	$24
1591	mfhi	$25
1592	addu	$2,$24
1593	sltu	$1,$2,$24
1594	 multu	$8,$12			# forward multiplication
1595	addu	$2,$24
1596	addu	$1,$25
1597	sltu	$24,$2,$24
1598	addu	$3,$1
1599	addu	$25,$24
1600	sltu	$1,$3,$1
1601	addu	$3,$25
1602	addu	$7,$1
1603	sltu	$25,$3,$25
1604	addu	$7,$25
1605	sw	$2,3*4($4)
1606	mflo	$24
1607	mfhi	$25
1608	addu	$3,$24
1609	sltu	$1,$3,$24
1610	 multu	$15,$13			# forward multiplication
1611	addu	$3,$24
1612	addu	$1,$25
1613	sltu	$24,$3,$24
1614	addu	$7,$1
1615	addu	$25,$24
1616	sltu	$2,$7,$1
1617	addu	$7,$25
1618	sltu	$25,$7,$25
1619	addu	$2,$25
1620	mflo	$24
1621	mfhi	$25
1622	addu	$3,$24
1623	sltu	$1,$3,$24
1624	 multu	$14,$14			# forward multiplication
1625	addu	$3,$24
1626	addu	$1,$25
1627	sltu	$24,$3,$24
1628	addu	$7,$1
1629	addu	$25,$24
1630	sltu	$1,$7,$1
1631	addu	$7,$25
1632	addu	$2,$1
1633	sltu	$25,$7,$25
1634	addu	$2,$25
1635	mflo	$24
1636	mfhi	$25
1637	addu	$3,$24
1638	sltu	$1,$3,$24
1639	 multu	$12,$9		# mul_add_c2(a[0],b[5],c3,c1,c2);
1640	addu	$25,$1
1641	addu	$7,$25
1642	sltu	$1,$7,$25
1643	addu	$2,$1
1644	sw	$3,4*4($4)
1645	mflo	$24
1646	mfhi	$25
1647	addu	$7,$24
1648	sltu	$1,$7,$24
1649	 multu	$13,$8			# forward multiplication
1650	addu	$7,$24
1651	addu	$1,$25
1652	sltu	$24,$7,$24
1653	addu	$2,$1
1654	addu	$25,$24
1655	sltu	$3,$2,$1
1656	addu	$2,$25
1657	sltu	$25,$2,$25
1658	addu	$3,$25
1659	mflo	$24
1660	mfhi	$25
1661	addu	$7,$24
1662	sltu	$1,$7,$24
1663	 multu	$14,$15			# forward multiplication
1664	addu	$7,$24
1665	addu	$1,$25
1666	sltu	$24,$7,$24
1667	addu	$2,$1
1668	addu	$25,$24
1669	sltu	$1,$2,$1
1670	addu	$2,$25
1671	addu	$3,$1
1672	sltu	$25,$2,$25
1673	addu	$3,$25
1674	mflo	$24
1675	mfhi	$25
1676	addu	$7,$24
1677	sltu	$1,$7,$24
1678	 multu	$10,$12			# forward multiplication
1679	addu	$7,$24
1680	addu	$1,$25
1681	sltu	$24,$7,$24
1682	addu	$2,$1
1683	addu	$25,$24
1684	sltu	$1,$2,$1
1685	addu	$2,$25
1686	addu	$3,$1
1687	sltu	$25,$2,$25
1688	addu	$3,$25
1689	sw	$7,5*4($4)
1690	mflo	$24
1691	mfhi	$25
1692	addu	$2,$24
1693	sltu	$1,$2,$24
1694	 multu	$9,$13			# forward multiplication
1695	addu	$2,$24
1696	addu	$1,$25
1697	sltu	$24,$2,$24
1698	addu	$3,$1
1699	addu	$25,$24
1700	sltu	$7,$3,$1
1701	addu	$3,$25
1702	sltu	$25,$3,$25
1703	addu	$7,$25
1704	mflo	$24
1705	mfhi	$25
1706	addu	$2,$24
1707	sltu	$1,$2,$24
1708	 multu	$8,$14			# forward multiplication
1709	addu	$2,$24
1710	addu	$1,$25
1711	sltu	$24,$2,$24
1712	addu	$3,$1
1713	addu	$25,$24
1714	sltu	$1,$3,$1
1715	addu	$3,$25
1716	addu	$7,$1
1717	sltu	$25,$3,$25
1718	addu	$7,$25
1719	mflo	$24
1720	mfhi	$25
1721	addu	$2,$24
1722	sltu	$1,$2,$24
1723	 multu	$15,$15			# forward multiplication
1724	addu	$2,$24
1725	addu	$1,$25
1726	sltu	$24,$2,$24
1727	addu	$3,$1
1728	addu	$25,$24
1729	sltu	$1,$3,$1
1730	addu	$3,$25
1731	addu	$7,$1
1732	sltu	$25,$3,$25
1733	addu	$7,$25
1734	mflo	$24
1735	mfhi	$25
1736	addu	$2,$24
1737	sltu	$1,$2,$24
1738	 multu	$12,$11		# mul_add_c2(a[0],b[7],c2,c3,c1);
1739	addu	$25,$1
1740	addu	$3,$25
1741	sltu	$1,$3,$25
1742	addu	$7,$1
1743	sw	$2,6*4($4)
1744	mflo	$24
1745	mfhi	$25
1746	addu	$3,$24
1747	sltu	$1,$3,$24
1748	 multu	$13,$10			# forward multiplication
1749	addu	$3,$24
1750	addu	$1,$25
1751	sltu	$24,$3,$24
1752	addu	$7,$1
1753	addu	$25,$24
1754	sltu	$2,$7,$1
1755	addu	$7,$25
1756	sltu	$25,$7,$25
1757	addu	$2,$25
1758	mflo	$24
1759	mfhi	$25
1760	addu	$3,$24
1761	sltu	$1,$3,$24
1762	 multu	$14,$9			# forward multiplication
1763	addu	$3,$24
1764	addu	$1,$25
1765	sltu	$24,$3,$24
1766	addu	$7,$1
1767	addu	$25,$24
1768	sltu	$1,$7,$1
1769	addu	$7,$25
1770	addu	$2,$1
1771	sltu	$25,$7,$25
1772	addu	$2,$25
1773	mflo	$24
1774	mfhi	$25
1775	addu	$3,$24
1776	sltu	$1,$3,$24
1777	 multu	$15,$8			# forward multiplication
1778	addu	$3,$24
1779	addu	$1,$25
1780	sltu	$24,$3,$24
1781	addu	$7,$1
1782	addu	$25,$24
1783	sltu	$1,$7,$1
1784	addu	$7,$25
1785	addu	$2,$1
1786	sltu	$25,$7,$25
1787	addu	$2,$25
1788	mflo	$24
1789	mfhi	$25
1790	addu	$3,$24
1791	sltu	$1,$3,$24
1792	 multu	$11,$13			# forward multiplication
1793	addu	$3,$24
1794	addu	$1,$25
1795	sltu	$24,$3,$24
1796	addu	$7,$1
1797	addu	$25,$24
1798	sltu	$1,$7,$1
1799	addu	$7,$25
1800	addu	$2,$1
1801	sltu	$25,$7,$25
1802	addu	$2,$25
1803	sw	$3,7*4($4)
1804	mflo	$24
1805	mfhi	$25
1806	addu	$7,$24
1807	sltu	$1,$7,$24
1808	 multu	$10,$14			# forward multiplication
1809	addu	$7,$24
1810	addu	$1,$25
1811	sltu	$24,$7,$24
1812	addu	$2,$1
1813	addu	$25,$24
1814	sltu	$3,$2,$1
1815	addu	$2,$25
1816	sltu	$25,$2,$25
1817	addu	$3,$25
1818	mflo	$24
1819	mfhi	$25
1820	addu	$7,$24
1821	sltu	$1,$7,$24
1822	 multu	$9,$15			# forward multiplication
1823	addu	$7,$24
1824	addu	$1,$25
1825	sltu	$24,$7,$24
1826	addu	$2,$1
1827	addu	$25,$24
1828	sltu	$1,$2,$1
1829	addu	$2,$25
1830	addu	$3,$1
1831	sltu	$25,$2,$25
1832	addu	$3,$25
1833	mflo	$24
1834	mfhi	$25
1835	addu	$7,$24
1836	sltu	$1,$7,$24
1837	 multu	$8,$8			# forward multiplication
1838	addu	$7,$24
1839	addu	$1,$25
1840	sltu	$24,$7,$24
1841	addu	$2,$1
1842	addu	$25,$24
1843	sltu	$1,$2,$1
1844	addu	$2,$25
1845	addu	$3,$1
1846	sltu	$25,$2,$25
1847	addu	$3,$25
1848	mflo	$24
1849	mfhi	$25
1850	addu	$7,$24
1851	sltu	$1,$7,$24
1852	 multu	$14,$11		# mul_add_c2(a[2],b[7],c1,c2,c3);
1853	addu	$25,$1
1854	addu	$2,$25
1855	sltu	$1,$2,$25
1856	addu	$3,$1
1857	sw	$7,8*4($4)
1858	mflo	$24
1859	mfhi	$25
1860	addu	$2,$24
1861	sltu	$1,$2,$24
1862	 multu	$15,$10			# forward multiplication
1863	addu	$2,$24
1864	addu	$1,$25
1865	sltu	$24,$2,$24
1866	addu	$3,$1
1867	addu	$25,$24
1868	sltu	$7,$3,$1
1869	addu	$3,$25
1870	sltu	$25,$3,$25
1871	addu	$7,$25
1872	mflo	$24
1873	mfhi	$25
1874	addu	$2,$24
1875	sltu	$1,$2,$24
1876	 multu	$8,$9			# forward multiplication
1877	addu	$2,$24
1878	addu	$1,$25
1879	sltu	$24,$2,$24
1880	addu	$3,$1
1881	addu	$25,$24
1882	sltu	$1,$3,$1
1883	addu	$3,$25
1884	addu	$7,$1
1885	sltu	$25,$3,$25
1886	addu	$7,$25
1887	mflo	$24
1888	mfhi	$25
1889	addu	$2,$24
1890	sltu	$1,$2,$24
1891	 multu	$11,$15			# forward multiplication
1892	addu	$2,$24
1893	addu	$1,$25
1894	sltu	$24,$2,$24
1895	addu	$3,$1
1896	addu	$25,$24
1897	sltu	$1,$3,$1
1898	addu	$3,$25
1899	addu	$7,$1
1900	sltu	$25,$3,$25
1901	addu	$7,$25
1902	sw	$2,9*4($4)
1903	mflo	$24
1904	mfhi	$25
1905	addu	$3,$24
1906	sltu	$1,$3,$24
1907	 multu	$10,$8			# forward multiplication
1908	addu	$3,$24
1909	addu	$1,$25
1910	sltu	$24,$3,$24
1911	addu	$7,$1
1912	addu	$25,$24
1913	sltu	$2,$7,$1
1914	addu	$7,$25
1915	sltu	$25,$7,$25
1916	addu	$2,$25
1917	mflo	$24
1918	mfhi	$25
1919	addu	$3,$24
1920	sltu	$1,$3,$24
1921	 multu	$9,$9			# forward multiplication
1922	addu	$3,$24
1923	addu	$1,$25
1924	sltu	$24,$3,$24
1925	addu	$7,$1
1926	addu	$25,$24
1927	sltu	$1,$7,$1
1928	addu	$7,$25
1929	addu	$2,$1
1930	sltu	$25,$7,$25
1931	addu	$2,$25
1932	mflo	$24
1933	mfhi	$25
1934	addu	$3,$24
1935	sltu	$1,$3,$24
1936	 multu	$8,$11		# mul_add_c2(a[4],b[7],c3,c1,c2);
1937	addu	$25,$1
1938	addu	$7,$25
1939	sltu	$1,$7,$25
1940	addu	$2,$1
1941	sw	$3,10*4($4)
1942	mflo	$24
1943	mfhi	$25
1944	addu	$7,$24
1945	sltu	$1,$7,$24
1946	 multu	$9,$10			# forward multiplication
1947	addu	$7,$24
1948	addu	$1,$25
1949	sltu	$24,$7,$24
1950	addu	$2,$1
1951	addu	$25,$24
1952	sltu	$3,$2,$1
1953	addu	$2,$25
1954	sltu	$25,$2,$25
1955	addu	$3,$25
1956	mflo	$24
1957	mfhi	$25
1958	addu	$7,$24
1959	sltu	$1,$7,$24
1960	 multu	$11,$9			# forward multiplication
1961	addu	$7,$24
1962	addu	$1,$25
1963	sltu	$24,$7,$24
1964	addu	$2,$1
1965	addu	$25,$24
1966	sltu	$1,$2,$1
1967	addu	$2,$25
1968	addu	$3,$1
1969	sltu	$25,$2,$25
1970	addu	$3,$25
1971	sw	$7,11*4($4)
1972	mflo	$24
1973	mfhi	$25
1974	addu	$2,$24
1975	sltu	$1,$2,$24
1976	 multu	$10,$10			# forward multiplication
1977	addu	$2,$24
1978	addu	$1,$25
1979	sltu	$24,$2,$24
1980	addu	$3,$1
1981	addu	$25,$24
1982	sltu	$7,$3,$1
1983	addu	$3,$25
1984	sltu	$25,$3,$25
1985	addu	$7,$25
1986	mflo	$24
1987	mfhi	$25
1988	addu	$2,$24
1989	sltu	$1,$2,$24
1990	 multu	$10,$11		# mul_add_c2(a[6],b[7],c2,c3,c1);
1991	addu	$25,$1
1992	addu	$3,$25
1993	sltu	$1,$3,$25
1994	addu	$7,$1
1995	sw	$2,12*4($4)
1996	mflo	$24
1997	mfhi	$25
1998	addu	$3,$24
1999	sltu	$1,$3,$24
2000	 multu	$11,$11			# forward multiplication
2001	addu	$3,$24
2002	addu	$1,$25
2003	sltu	$24,$3,$24
2004	addu	$7,$1
2005	addu	$25,$24
2006	sltu	$2,$7,$1
2007	addu	$7,$25
2008	sltu	$25,$7,$25
2009	addu	$2,$25
2010	sw	$3,13*4($4)
2011
2012	mflo	$24
2013	mfhi	$25
2014	addu	$7,$24
2015	sltu	$1,$7,$24
2016	addu	$25,$1
2017	addu	$2,$25
2018	sw	$7,14*4($4)
2019	sw	$2,15*4($4)
2020
2021	.set	noreorder
2022	jr	$31
2023	nop
2024.end	bn_sqr_comba8
2025
2026.align	5
2027.globl	bn_sqr_comba4
2028.ent	bn_sqr_comba4
2029bn_sqr_comba4:
2030	.set	reorder
2031	lw	$12,0($5)
2032	lw	$13,4($5)
2033	multu	$12,$12		# mul_add_c(a[0],b[0],c1,c2,c3);
2034	lw	$14,2*4($5)
2035	lw	$15,3*4($5)
2036	mflo	$2
2037	mfhi	$3
2038	sw	$2,0($4)
2039
2040	multu	$12,$13		# mul_add_c2(a[0],b[1],c2,c3,c1);
2041	mflo	$24
2042	mfhi	$25
2043	slt	$2,$25,$0
2044	sll	$25,1
2045	 multu	$14,$12		# mul_add_c2(a[2],b[0],c3,c1,c2);
2046	slt	$6,$24,$0
2047	addu	$25,$6
2048	sll	$24,1
2049	addu	$3,$24
2050	sltu	$1,$3,$24
2051	addu	$7,$25,$1
2052	sw	$3,4($4)
2053	mflo	$24
2054	mfhi	$25
2055	addu	$7,$24
2056	sltu	$1,$7,$24
2057	 multu	$13,$13			# forward multiplication
2058	addu	$7,$24
2059	addu	$1,$25
2060	sltu	$24,$7,$24
2061	addu	$2,$1
2062	addu	$25,$24
2063	sltu	$3,$2,$1
2064	addu	$2,$25
2065	sltu	$25,$2,$25
2066	addu	$3,$25
2067	mflo	$24
2068	mfhi	$25
2069	addu	$7,$24
2070	sltu	$1,$7,$24
2071	 multu	$12,$15		# mul_add_c2(a[0],b[3],c1,c2,c3);
2072	addu	$25,$1
2073	addu	$2,$25
2074	sltu	$1,$2,$25
2075	addu	$3,$1
2076	sw	$7,2*4($4)
2077	mflo	$24
2078	mfhi	$25
2079	addu	$2,$24
2080	sltu	$1,$2,$24
2081	 multu	$13,$14			# forward multiplication
2082	addu	$2,$24
2083	addu	$1,$25
2084	sltu	$24,$2,$24
2085	addu	$3,$1
2086	addu	$25,$24
2087	sltu	$7,$3,$1
2088	addu	$3,$25
2089	sltu	$25,$3,$25
2090	addu	$7,$25
2091	mflo	$24
2092	mfhi	$25
2093	addu	$2,$24
2094	sltu	$1,$2,$24
2095	 multu	$15,$13			# forward multiplication
2096	addu	$2,$24
2097	addu	$1,$25
2098	sltu	$24,$2,$24
2099	addu	$3,$1
2100	addu	$25,$24
2101	sltu	$1,$3,$1
2102	addu	$3,$25
2103	addu	$7,$1
2104	sltu	$25,$3,$25
2105	addu	$7,$25
2106	sw	$2,3*4($4)
2107	mflo	$24
2108	mfhi	$25
2109	addu	$3,$24
2110	sltu	$1,$3,$24
2111	 multu	$14,$14			# forward multiplication
2112	addu	$3,$24
2113	addu	$1,$25
2114	sltu	$24,$3,$24
2115	addu	$7,$1
2116	addu	$25,$24
2117	sltu	$2,$7,$1
2118	addu	$7,$25
2119	sltu	$25,$7,$25
2120	addu	$2,$25
2121	mflo	$24
2122	mfhi	$25
2123	addu	$3,$24
2124	sltu	$1,$3,$24
2125	 multu	$14,$15		# mul_add_c2(a[2],b[3],c3,c1,c2);
2126	addu	$25,$1
2127	addu	$7,$25
2128	sltu	$1,$7,$25
2129	addu	$2,$1
2130	sw	$3,4*4($4)
2131	mflo	$24
2132	mfhi	$25
2133	addu	$7,$24
2134	sltu	$1,$7,$24
2135	 multu	$15,$15			# forward multiplication
2136	addu	$7,$24
2137	addu	$1,$25
2138	sltu	$24,$7,$24
2139	addu	$2,$1
2140	addu	$25,$24
2141	sltu	$3,$2,$1
2142	addu	$2,$25
2143	sltu	$25,$2,$25
2144	addu	$3,$25
2145	sw	$7,5*4($4)
2146
2147	mflo	$24
2148	mfhi	$25
2149	addu	$2,$24
2150	sltu	$1,$2,$24
2151	addu	$25,$1
2152	addu	$3,$25
2153	sw	$2,6*4($4)
2154	sw	$3,7*4($4)
2155
2156	.set	noreorder
2157	jr	$31
2158	nop
2159.end	bn_sqr_comba4
2160