xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/mips64.S (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1.rdata
2.asciiz	"mips3.s, Version 1.2"
3.asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
4
5.text
6.set	noat
7
8.align	5
9.globl	bn_mul_add_words
10.ent	bn_mul_add_words
11bn_mul_add_words:
12	.set	noreorder
13	bgtz	$6,bn_mul_add_words_internal
14	move	$2,$0
15	jr	$31
16	move	$4,$2
17.end	bn_mul_add_words
18
19.align	5
20.ent	bn_mul_add_words_internal
21bn_mul_add_words_internal:
22	.set	reorder
23	li	$3,-4
24	and	$8,$6,$3
25	beqz	$8,.L_bn_mul_add_words_tail
26
27.L_bn_mul_add_words_loop:
28	ld	$12,0($5)
29	dmultu	$12,$7
30	ld	$13,0($4)
31	ld	$14,8($5)
32	ld	$15,8($4)
33	ld	$8,2*8($5)
34	ld	$9,2*8($4)
35	daddu	$13,$2
36	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
37				# values", but it seems to work fine
38				# even on 64-bit registers.
39	mflo	$1
40	mfhi	$12
41	daddu	$13,$1
42	daddu	$2,$12
43	 dmultu	$14,$7
44	sltu	$1,$13,$1
45	sd	$13,0($4)
46	daddu	$2,$1
47
48	ld	$10,3*8($5)
49	ld	$11,3*8($4)
50	daddu	$15,$2
51	sltu	$2,$15,$2
52	mflo	$1
53	mfhi	$14
54	daddu	$15,$1
55	daddu	$2,$14
56	 dmultu	$8,$7
57	sltu	$1,$15,$1
58	sd	$15,8($4)
59	daddu	$2,$1
60
61	subu	$6,4
62	daddu $4,4*8
63	daddu $5,4*8
64	daddu	$9,$2
65	sltu	$2,$9,$2
66	mflo	$1
67	mfhi	$8
68	daddu	$9,$1
69	daddu	$2,$8
70	 dmultu	$10,$7
71	sltu	$1,$9,$1
72	sd	$9,-2*8($4)
73	daddu	$2,$1
74
75
76	and	$8,$6,$3
77	daddu	$11,$2
78	sltu	$2,$11,$2
79	mflo	$1
80	mfhi	$10
81	daddu	$11,$1
82	daddu	$2,$10
83	sltu	$1,$11,$1
84	sd	$11,-8($4)
85	.set	noreorder
86	bgtz	$8,.L_bn_mul_add_words_loop
87	daddu	$2,$1
88
89	beqz	$6,.L_bn_mul_add_words_return
90	nop
91
92.L_bn_mul_add_words_tail:
93	.set	reorder
94	ld	$12,0($5)
95	dmultu	$12,$7
96	ld	$13,0($4)
97	subu	$6,1
98	daddu	$13,$2
99	sltu	$2,$13,$2
100	mflo	$1
101	mfhi	$12
102	daddu	$13,$1
103	daddu	$2,$12
104	sltu	$1,$13,$1
105	sd	$13,0($4)
106	daddu	$2,$1
107	beqz	$6,.L_bn_mul_add_words_return
108
109	ld	$12,8($5)
110	dmultu	$12,$7
111	ld	$13,8($4)
112	subu	$6,1
113	daddu	$13,$2
114	sltu	$2,$13,$2
115	mflo	$1
116	mfhi	$12
117	daddu	$13,$1
118	daddu	$2,$12
119	sltu	$1,$13,$1
120	sd	$13,8($4)
121	daddu	$2,$1
122	beqz	$6,.L_bn_mul_add_words_return
123
124	ld	$12,2*8($5)
125	dmultu	$12,$7
126	ld	$13,2*8($4)
127	daddu	$13,$2
128	sltu	$2,$13,$2
129	mflo	$1
130	mfhi	$12
131	daddu	$13,$1
132	daddu	$2,$12
133	sltu	$1,$13,$1
134	sd	$13,2*8($4)
135	daddu	$2,$1
136
137.L_bn_mul_add_words_return:
138	.set	noreorder
139	jr	$31
140	move	$4,$2
141.end	bn_mul_add_words_internal
142
143.align	5
144.globl	bn_mul_words
145.ent	bn_mul_words
146bn_mul_words:
147	.set	noreorder
148	bgtz	$6,bn_mul_words_internal
149	move	$2,$0
150	jr	$31
151	move	$4,$2
152.end	bn_mul_words
153
154.align	5
155.ent	bn_mul_words_internal
156bn_mul_words_internal:
157	.set	reorder
158	li	$3,-4
159	and	$8,$6,$3
160	beqz	$8,.L_bn_mul_words_tail
161
162.L_bn_mul_words_loop:
163	ld	$12,0($5)
164	dmultu	$12,$7
165	ld	$14,8($5)
166	ld	$8,2*8($5)
167	ld	$10,3*8($5)
168	mflo	$1
169	mfhi	$12
170	daddu	$2,$1
171	sltu	$13,$2,$1
172	 dmultu	$14,$7
173	sd	$2,0($4)
174	daddu	$2,$13,$12
175
176	subu	$6,4
177	daddu $4,4*8
178	daddu $5,4*8
179	mflo	$1
180	mfhi	$14
181	daddu	$2,$1
182	sltu	$15,$2,$1
183	 dmultu	$8,$7
184	sd	$2,-3*8($4)
185	daddu	$2,$15,$14
186
187	mflo	$1
188	mfhi	$8
189	daddu	$2,$1
190	sltu	$9,$2,$1
191	 dmultu	$10,$7
192	sd	$2,-2*8($4)
193	daddu	$2,$9,$8
194
195	and	$8,$6,$3
196	mflo	$1
197	mfhi	$10
198	daddu	$2,$1
199	sltu	$11,$2,$1
200	sd	$2,-8($4)
201	.set	noreorder
202	bgtz	$8,.L_bn_mul_words_loop
203	daddu	$2,$11,$10
204
205	beqz	$6,.L_bn_mul_words_return
206	nop
207
208.L_bn_mul_words_tail:
209	.set	reorder
210	ld	$12,0($5)
211	dmultu	$12,$7
212	subu	$6,1
213	mflo	$1
214	mfhi	$12
215	daddu	$2,$1
216	sltu	$13,$2,$1
217	sd	$2,0($4)
218	daddu	$2,$13,$12
219	beqz	$6,.L_bn_mul_words_return
220
221	ld	$12,8($5)
222	dmultu	$12,$7
223	subu	$6,1
224	mflo	$1
225	mfhi	$12
226	daddu	$2,$1
227	sltu	$13,$2,$1
228	sd	$2,8($4)
229	daddu	$2,$13,$12
230	beqz	$6,.L_bn_mul_words_return
231
232	ld	$12,2*8($5)
233	dmultu	$12,$7
234	mflo	$1
235	mfhi	$12
236	daddu	$2,$1
237	sltu	$13,$2,$1
238	sd	$2,2*8($4)
239	daddu	$2,$13,$12
240
241.L_bn_mul_words_return:
242	.set	noreorder
243	jr	$31
244	move	$4,$2
245.end	bn_mul_words_internal
246
247.align	5
248.globl	bn_sqr_words
249.ent	bn_sqr_words
250bn_sqr_words:
251	.set	noreorder
252	bgtz	$6,bn_sqr_words_internal
253	move	$2,$0
254	jr	$31
255	move	$4,$2
256.end	bn_sqr_words
257
258.align	5
259.ent	bn_sqr_words_internal
260bn_sqr_words_internal:
261	.set	reorder
262	li	$3,-4
263	and	$8,$6,$3
264	beqz	$8,.L_bn_sqr_words_tail
265
266.L_bn_sqr_words_loop:
267	ld	$12,0($5)
268	dmultu	$12,$12
269	ld	$14,8($5)
270	ld	$8,2*8($5)
271	ld	$10,3*8($5)
272	mflo	$13
273	mfhi	$12
274	sd	$13,0($4)
275	sd	$12,8($4)
276
277	dmultu	$14,$14
278	subu	$6,4
279	daddu $4,8*8
280	daddu $5,4*8
281	mflo	$15
282	mfhi	$14
283	sd	$15,-6*8($4)
284	sd	$14,-5*8($4)
285
286	dmultu	$8,$8
287	mflo	$9
288	mfhi	$8
289	sd	$9,-4*8($4)
290	sd	$8,-3*8($4)
291
292
293	dmultu	$10,$10
294	and	$8,$6,$3
295	mflo	$11
296	mfhi	$10
297	sd	$11,-2*8($4)
298
299	.set	noreorder
300	sd	$10,-8($4)
301	bgtz	$8,.L_bn_sqr_words_loop
302	nop
303
304	beqz	$6,.L_bn_sqr_words_return
305	nop
306
307.L_bn_sqr_words_tail:
308	.set	reorder
309	ld	$12,0($5)
310	dmultu	$12,$12
311	subu	$6,1
312	mflo	$13
313	mfhi	$12
314	sd	$13,0($4)
315	sd	$12,8($4)
316	beqz	$6,.L_bn_sqr_words_return
317
318	ld	$12,8($5)
319	dmultu	$12,$12
320	subu	$6,1
321	mflo	$13
322	mfhi	$12
323	sd	$13,2*8($4)
324	sd	$12,3*8($4)
325	beqz	$6,.L_bn_sqr_words_return
326
327	ld	$12,2*8($5)
328	dmultu	$12,$12
329	mflo	$13
330	mfhi	$12
331	sd	$13,4*8($4)
332	sd	$12,5*8($4)
333
334.L_bn_sqr_words_return:
335	.set	noreorder
336	jr	$31
337	move	$4,$2
338
339.end	bn_sqr_words_internal
340
341.align	5
342.globl	bn_add_words
343.ent	bn_add_words
344bn_add_words:
345	.set	noreorder
346	bgtz	$7,bn_add_words_internal
347	move	$2,$0
348	jr	$31
349	move	$4,$2
350.end	bn_add_words
351
352.align	5
353.ent	bn_add_words_internal
354bn_add_words_internal:
355	.set	reorder
356	li	$3,-4
357	and	$1,$7,$3
358	beqz	$1,.L_bn_add_words_tail
359
360.L_bn_add_words_loop:
361	ld	$12,0($5)
362	ld	$8,0($6)
363	subu	$7,4
364	ld	$13,8($5)
365	and	$1,$7,$3
366	ld	$14,2*8($5)
367	daddu $6,4*8
368	ld	$15,3*8($5)
369	daddu $4,4*8
370	ld	$9,-3*8($6)
371	daddu $5,4*8
372	ld	$10,-2*8($6)
373	ld	$11,-8($6)
374	daddu	$8,$12
375	sltu	$24,$8,$12
376	daddu	$12,$8,$2
377	sltu	$2,$12,$8
378	sd	$12,-4*8($4)
379	daddu	$2,$24
380
381	daddu	$9,$13
382	sltu	$25,$9,$13
383	daddu	$13,$9,$2
384	sltu	$2,$13,$9
385	sd	$13,-3*8($4)
386	daddu	$2,$25
387
388	daddu	$10,$14
389	sltu	$24,$10,$14
390	daddu	$14,$10,$2
391	sltu	$2,$14,$10
392	sd	$14,-2*8($4)
393	daddu	$2,$24
394
395	daddu	$11,$15
396	sltu	$25,$11,$15
397	daddu	$15,$11,$2
398	sltu	$2,$15,$11
399	sd	$15,-8($4)
400
401	.set	noreorder
402	bgtz	$1,.L_bn_add_words_loop
403	daddu	$2,$25
404
405	beqz	$7,.L_bn_add_words_return
406	nop
407
408.L_bn_add_words_tail:
409	.set	reorder
410	ld	$12,0($5)
411	ld	$8,0($6)
412	daddu	$8,$12
413	subu	$7,1
414	sltu	$24,$8,$12
415	daddu	$12,$8,$2
416	sltu	$2,$12,$8
417	sd	$12,0($4)
418	daddu	$2,$24
419	beqz	$7,.L_bn_add_words_return
420
421	ld	$13,8($5)
422	ld	$9,8($6)
423	daddu	$9,$13
424	subu	$7,1
425	sltu	$25,$9,$13
426	daddu	$13,$9,$2
427	sltu	$2,$13,$9
428	sd	$13,8($4)
429	daddu	$2,$25
430	beqz	$7,.L_bn_add_words_return
431
432	ld	$14,2*8($5)
433	ld	$10,2*8($6)
434	daddu	$10,$14
435	sltu	$24,$10,$14
436	daddu	$14,$10,$2
437	sltu	$2,$14,$10
438	sd	$14,2*8($4)
439	daddu	$2,$24
440
441.L_bn_add_words_return:
442	.set	noreorder
443	jr	$31
444	move	$4,$2
445
446.end	bn_add_words_internal
447
448.align	5
449.globl	bn_sub_words
450.ent	bn_sub_words
451bn_sub_words:
452	.set	noreorder
453	bgtz	$7,bn_sub_words_internal
454	move	$2,$0
455	jr	$31
456	move	$4,$0
457.end	bn_sub_words
458
459.align	5
460.ent	bn_sub_words_internal
461bn_sub_words_internal:
462	.set	reorder
463	li	$3,-4
464	and	$1,$7,$3
465	beqz	$1,.L_bn_sub_words_tail
466
467.L_bn_sub_words_loop:
468	ld	$12,0($5)
469	ld	$8,0($6)
470	subu	$7,4
471	ld	$13,8($5)
472	and	$1,$7,$3
473	ld	$14,2*8($5)
474	daddu $6,4*8
475	ld	$15,3*8($5)
476	daddu $4,4*8
477	ld	$9,-3*8($6)
478	daddu $5,4*8
479	ld	$10,-2*8($6)
480	ld	$11,-8($6)
481	sltu	$24,$12,$8
482	dsubu	$8,$12,$8
483	dsubu	$12,$8,$2
484	sgtu	$2,$12,$8
485	sd	$12,-4*8($4)
486	daddu	$2,$24
487
488	sltu	$25,$13,$9
489	dsubu	$9,$13,$9
490	dsubu	$13,$9,$2
491	sgtu	$2,$13,$9
492	sd	$13,-3*8($4)
493	daddu	$2,$25
494
495
496	sltu	$24,$14,$10
497	dsubu	$10,$14,$10
498	dsubu	$14,$10,$2
499	sgtu	$2,$14,$10
500	sd	$14,-2*8($4)
501	daddu	$2,$24
502
503	sltu	$25,$15,$11
504	dsubu	$11,$15,$11
505	dsubu	$15,$11,$2
506	sgtu	$2,$15,$11
507	sd	$15,-8($4)
508
509	.set	noreorder
510	bgtz	$1,.L_bn_sub_words_loop
511	daddu	$2,$25
512
513	beqz	$7,.L_bn_sub_words_return
514	nop
515
516.L_bn_sub_words_tail:
517	.set	reorder
518	ld	$12,0($5)
519	ld	$8,0($6)
520	subu	$7,1
521	sltu	$24,$12,$8
522	dsubu	$8,$12,$8
523	dsubu	$12,$8,$2
524	sgtu	$2,$12,$8
525	sd	$12,0($4)
526	daddu	$2,$24
527	beqz	$7,.L_bn_sub_words_return
528
529	ld	$13,8($5)
530	subu	$7,1
531	ld	$9,8($6)
532	sltu	$25,$13,$9
533	dsubu	$9,$13,$9
534	dsubu	$13,$9,$2
535	sgtu	$2,$13,$9
536	sd	$13,8($4)
537	daddu	$2,$25
538	beqz	$7,.L_bn_sub_words_return
539
540	ld	$14,2*8($5)
541	ld	$10,2*8($6)
542	sltu	$24,$14,$10
543	dsubu	$10,$14,$10
544	dsubu	$14,$10,$2
545	sgtu	$2,$14,$10
546	sd	$14,2*8($4)
547	daddu	$2,$24
548
549.L_bn_sub_words_return:
550	.set	noreorder
551	jr	$31
552	move	$4,$2
553.end	bn_sub_words_internal
554
555.align 5
556.globl	bn_div_3_words
557.ent	bn_div_3_words
558bn_div_3_words:
559	.set	noreorder
560	move	$7,$4		# we know that bn_div_words does not
561				# touch $7, $10, $11 and preserves $6
562				# so that we can save two arguments
563				# and return address in registers
564				# instead of stack:-)
565
566	ld	$4,($7)
567	move	$10,$5
568	ld	$5,-8($7)
569	bne	$4,$6,bn_div_3_words_internal
570	nop
571	li	$2,-1
572	jr	$31
573	move	$4,$2
574.end	bn_div_3_words
575
576.align	5
577.ent	bn_div_3_words_internal
578bn_div_3_words_internal:
579	.set	reorder
580	move	$11,$31
581	bal	bn_div_words_internal
582	move	$31,$11
583	dmultu	$10,$2
584	ld	$14,-2*8($7)
585	move	$8,$0
586	mfhi	$13
587	mflo	$12
588	sltu	$24,$13,$5
589.L_bn_div_3_words_inner_loop:
590	bnez	$24,.L_bn_div_3_words_inner_loop_done
591	sgeu	$1,$14,$12
592	seq	$25,$13,$5
593	and	$1,$25
594	sltu	$15,$12,$10
595	daddu	$5,$6
596	dsubu	$13,$15
597	dsubu	$12,$10
598	sltu	$24,$13,$5
599	sltu	$8,$5,$6
600	or	$24,$8
601	.set	noreorder
602	beqz	$1,.L_bn_div_3_words_inner_loop
603	dsubu	$2,1
604	daddu	$2,1
605	.set	reorder
606.L_bn_div_3_words_inner_loop_done:
607	.set	noreorder
608	jr	$31
609	move	$4,$2
610.end	bn_div_3_words_internal
611
612.align	5
613.globl	bn_div_words
614.ent	bn_div_words
615bn_div_words:
616	.set	noreorder
617	bnez	$6,bn_div_words_internal
618	li	$2,-1		# I would rather signal div-by-zero
619				# which can be done with 'break 7'
620	jr	$31
621	move	$4,$2
622.end	bn_div_words
623
624.align	5
625.ent	bn_div_words_internal
626bn_div_words_internal:
627	move	$3,$0
628	bltz	$6,.L_bn_div_words_body
629	move	$25,$3
630	dsll	$6,1
631	bgtz	$6,.-4
632	addu	$25,1
633
634	.set	reorder
635	negu	$13,$25
636	li	$14,-1
637	dsll	$14,$13
638	and	$14,$4
639	dsrl	$1,$5,$13
640	.set	noreorder
641	beqz	$14,.+12
642	nop
643	break	6		# signal overflow
644	.set	reorder
645	dsll	$4,$25
646	dsll	$5,$25
647	or	$4,$1
648.L_bn_div_words_body:
649	dsrl	$3,$6,4*8	# bits
650	sgeu	$1,$4,$6
651	.set	noreorder
652	beqz	$1,.+12
653	nop
654	dsubu	$4,$6
655	.set	reorder
656
657	li	$8,-1
658	dsrl	$9,$4,4*8	# bits
659	dsrl	$8,4*8	# q=0xffffffff
660	beq	$3,$9,.L_bn_div_words_skip_div1
661	ddivu	$0,$4,$3
662	mflo	$8
663.L_bn_div_words_skip_div1:
664	dmultu	$6,$8
665	dsll	$15,$4,4*8	# bits
666	dsrl	$1,$5,4*8	# bits
667	or	$15,$1
668	mflo	$12
669	mfhi	$13
670.L_bn_div_words_inner_loop1:
671	sltu	$14,$15,$12
672	seq	$24,$9,$13
673	sltu	$1,$9,$13
674	and	$14,$24
675	sltu	$2,$12,$6
676	or	$1,$14
677	.set	noreorder
678	beqz	$1,.L_bn_div_words_inner_loop1_done
679	dsubu	$13,$2
680	dsubu	$12,$6
681	b	.L_bn_div_words_inner_loop1
682	dsubu	$8,1
683	.set	reorder
684.L_bn_div_words_inner_loop1_done:
685
686	dsll	$5,4*8	# bits
687	dsubu	$4,$15,$12
688	dsll	$2,$8,4*8	# bits
689
690	li	$8,-1
691	dsrl	$9,$4,4*8	# bits
692	dsrl	$8,4*8	# q=0xffffffff
693	beq	$3,$9,.L_bn_div_words_skip_div2
694	ddivu	$0,$4,$3
695	mflo	$8
696.L_bn_div_words_skip_div2:
697	dmultu	$6,$8
698	dsll	$15,$4,4*8	# bits
699	dsrl	$1,$5,4*8	# bits
700	or	$15,$1
701	mflo	$12
702	mfhi	$13
703.L_bn_div_words_inner_loop2:
704	sltu	$14,$15,$12
705	seq	$24,$9,$13
706	sltu	$1,$9,$13
707	and	$14,$24
708	sltu	$3,$12,$6
709	or	$1,$14
710	.set	noreorder
711	beqz	$1,.L_bn_div_words_inner_loop2_done
712	dsubu	$13,$3
713	dsubu	$12,$6
714	b	.L_bn_div_words_inner_loop2
715	dsubu	$8,1
716	.set	reorder
717.L_bn_div_words_inner_loop2_done:
718
719	dsubu	$4,$15,$12
720	or	$2,$8
721	dsrl	$3,$4,$25	# $3 contains remainder if anybody wants it
722	dsrl	$6,$25		# restore $6
723
724	.set	noreorder
725	move	$5,$3
726	jr	$31
727	move	$4,$2
728.end	bn_div_words_internal
729
730.align	5
731.globl	bn_mul_comba8
732.ent	bn_mul_comba8
733bn_mul_comba8:
734	.set	noreorder
735	.frame	$29,6*8,$31
736	.mask	0x003f0000,-8
737	dsubu $29,6*8
738	sd	$21,5*8($29)
739	sd	$20,4*8($29)
740	sd	$19,3*8($29)
741	sd	$18,2*8($29)
742	sd	$17,1*8($29)
743	sd	$16,0*8($29)
744
745	.set	reorder
746	ld	$12,0($5)	# If compiled with -mips3 option on
747				# R5000 box assembler barks on this
748				# 1ine with "should not have mult/div
749				# as last instruction in bb (R10K
750				# bug)" warning. If anybody out there
751				# has a clue about how to circumvent
752				# this do send me a note.
753				#		<appro@fy.chalmers.se>
754
755	ld	$8,0($6)
756	ld	$13,8($5)
757	ld	$14,2*8($5)
758	dmultu	$12,$8		# mul_add_c(a[0],b[0],c1,c2,c3);
759	ld	$15,3*8($5)
760	ld	$9,8($6)
761	ld	$10,2*8($6)
762	ld	$11,3*8($6)
763	mflo	$2
764	mfhi	$3
765
766	ld	$16,4*8($5)
767	ld	$18,5*8($5)
768	dmultu	$12,$9		# mul_add_c(a[0],b[1],c2,c3,c1);
769	ld	$20,6*8($5)
770	ld	$5,7*8($5)
771	ld	$17,4*8($6)
772	ld	$19,5*8($6)
773	mflo	$24
774	mfhi	$25
775	daddu	$3,$24
776	sltu	$1,$3,$24
777	dmultu	$13,$8		# mul_add_c(a[1],b[0],c2,c3,c1);
778	daddu	$7,$25,$1
779	ld	$21,6*8($6)
780	ld	$6,7*8($6)
781	sd	$2,0($4)	# r[0]=c1;
782	mflo	$24
783	mfhi	$25
784	daddu	$3,$24
785	sltu	$1,$3,$24
786	 dmultu	$14,$8		# mul_add_c(a[2],b[0],c3,c1,c2);
787	daddu	$25,$1
788	daddu	$7,$25
789	sltu	$2,$7,$25
790	sd	$3,8($4)	# r[1]=c2;
791
792	mflo	$24
793	mfhi	$25
794	daddu	$7,$24
795	sltu	$1,$7,$24
796	dmultu	$13,$9		# mul_add_c(a[1],b[1],c3,c1,c2);
797	daddu	$25,$1
798	daddu	$2,$25
799	mflo	$24
800	mfhi	$25
801	daddu	$7,$24
802	sltu	$1,$7,$24
803	dmultu	$12,$10		# mul_add_c(a[0],b[2],c3,c1,c2);
804	daddu	$25,$1
805	daddu	$2,$25
806	sltu	$3,$2,$25
807	mflo	$24
808	mfhi	$25
809	daddu	$7,$24
810	sltu	$1,$7,$24
811	 dmultu	$12,$11		# mul_add_c(a[0],b[3],c1,c2,c3);
812	daddu	$25,$1
813	daddu	$2,$25
814	sltu	$1,$2,$25
815	daddu	$3,$1
816	sd	$7,2*8($4)	# r[2]=c3;
817
818	mflo	$24
819	mfhi	$25
820	daddu	$2,$24
821	sltu	$1,$2,$24
822	dmultu	$13,$10		# mul_add_c(a[1],b[2],c1,c2,c3);
823	daddu	$25,$1
824	daddu	$3,$25
825	sltu	$7,$3,$25
826	mflo	$24
827	mfhi	$25
828	daddu	$2,$24
829	sltu	$1,$2,$24
830	dmultu	$14,$9		# mul_add_c(a[2],b[1],c1,c2,c3);
831	daddu	$25,$1
832	daddu	$3,$25
833	sltu	$1,$3,$25
834	daddu	$7,$1
835	mflo	$24
836	mfhi	$25
837	daddu	$2,$24
838	sltu	$1,$2,$24
839	dmultu	$15,$8		# mul_add_c(a[3],b[0],c1,c2,c3);
840	daddu	$25,$1
841	daddu	$3,$25
842	sltu	$1,$3,$25
843	daddu	$7,$1
844	mflo	$24
845	mfhi	$25
846	daddu	$2,$24
847	sltu	$1,$2,$24
848	 dmultu	$16,$8		# mul_add_c(a[4],b[0],c2,c3,c1);
849	daddu	$25,$1
850	daddu	$3,$25
851	sltu	$1,$3,$25
852	daddu	$7,$1
853	sd	$2,3*8($4)	# r[3]=c1;
854
855	mflo	$24
856	mfhi	$25
857	daddu	$3,$24
858	sltu	$1,$3,$24
859	dmultu	$15,$9		# mul_add_c(a[3],b[1],c2,c3,c1);
860	daddu	$25,$1
861	daddu	$7,$25
862	sltu	$2,$7,$25
863	mflo	$24
864	mfhi	$25
865	daddu	$3,$24
866	sltu	$1,$3,$24
867	dmultu	$14,$10		# mul_add_c(a[2],b[2],c2,c3,c1);
868	daddu	$25,$1
869	daddu	$7,$25
870	sltu	$1,$7,$25
871	daddu	$2,$1
872	mflo	$24
873	mfhi	$25
874	daddu	$3,$24
875	sltu	$1,$3,$24
876	dmultu	$13,$11		# mul_add_c(a[1],b[3],c2,c3,c1);
877	daddu	$25,$1
878	daddu	$7,$25
879	sltu	$1,$7,$25
880	daddu	$2,$1
881	mflo	$24
882	mfhi	$25
883	daddu	$3,$24
884	sltu	$1,$3,$24
885	dmultu	$12,$17		# mul_add_c(a[0],b[4],c2,c3,c1);
886	daddu	$25,$1
887	daddu	$7,$25
888	sltu	$1,$7,$25
889	daddu	$2,$1
890	mflo	$24
891	mfhi	$25
892	daddu	$3,$24
893	sltu	$1,$3,$24
894	 dmultu	$12,$19		# mul_add_c(a[0],b[5],c3,c1,c2);
895	daddu	$25,$1
896	daddu	$7,$25
897	sltu	$1,$7,$25
898	daddu	$2,$1
899	sd	$3,4*8($4)	# r[4]=c2;
900
901	mflo	$24
902	mfhi	$25
903	daddu	$7,$24
904	sltu	$1,$7,$24
905	dmultu	$13,$17		# mul_add_c(a[1],b[4],c3,c1,c2);
906	daddu	$25,$1
907	daddu	$2,$25
908	sltu	$3,$2,$25
909	mflo	$24
910	mfhi	$25
911	daddu	$7,$24
912	sltu	$1,$7,$24
913	dmultu	$14,$11		# mul_add_c(a[2],b[3],c3,c1,c2);
914	daddu	$25,$1
915	daddu	$2,$25
916	sltu	$1,$2,$25
917	daddu	$3,$1
918	mflo	$24
919	mfhi	$25
920	daddu	$7,$24
921	sltu	$1,$7,$24
922	dmultu	$15,$10		# mul_add_c(a[3],b[2],c3,c1,c2);
923	daddu	$25,$1
924	daddu	$2,$25
925	sltu	$1,$2,$25
926	daddu	$3,$1
927	mflo	$24
928	mfhi	$25
929	daddu	$7,$24
930	sltu	$1,$7,$24
931	dmultu	$16,$9		# mul_add_c(a[4],b[1],c3,c1,c2);
932	daddu	$25,$1
933	daddu	$2,$25
934	sltu	$1,$2,$25
935	daddu	$3,$1
936	mflo	$24
937	mfhi	$25
938	daddu	$7,$24
939	sltu	$1,$7,$24
940	dmultu	$18,$8		# mul_add_c(a[5],b[0],c3,c1,c2);
941	daddu	$25,$1
942	daddu	$2,$25
943	sltu	$1,$2,$25
944	daddu	$3,$1
945	mflo	$24
946	mfhi	$25
947	daddu	$7,$24
948	sltu	$1,$7,$24
949	 dmultu	$20,$8		# mul_add_c(a[6],b[0],c1,c2,c3);
950	daddu	$25,$1
951	daddu	$2,$25
952	sltu	$1,$2,$25
953	daddu	$3,$1
954	sd	$7,5*8($4)	# r[5]=c3;
955
956	mflo	$24
957	mfhi	$25
958	daddu	$2,$24
959	sltu	$1,$2,$24
960	dmultu	$18,$9		# mul_add_c(a[5],b[1],c1,c2,c3);
961	daddu	$25,$1
962	daddu	$3,$25
963	sltu	$7,$3,$25
964	mflo	$24
965	mfhi	$25
966	daddu	$2,$24
967	sltu	$1,$2,$24
968	dmultu	$16,$10		# mul_add_c(a[4],b[2],c1,c2,c3);
969	daddu	$25,$1
970	daddu	$3,$25
971	sltu	$1,$3,$25
972	daddu	$7,$1
973	mflo	$24
974	mfhi	$25
975	daddu	$2,$24
976	sltu	$1,$2,$24
977	dmultu	$15,$11		# mul_add_c(a[3],b[3],c1,c2,c3);
978	daddu	$25,$1
979	daddu	$3,$25
980	sltu	$1,$3,$25
981	daddu	$7,$1
982	mflo	$24
983	mfhi	$25
984	daddu	$2,$24
985	sltu	$1,$2,$24
986	dmultu	$14,$17		# mul_add_c(a[2],b[4],c1,c2,c3);
987	daddu	$25,$1
988	daddu	$3,$25
989	sltu	$1,$3,$25
990	daddu	$7,$1
991	mflo	$24
992	mfhi	$25
993	daddu	$2,$24
994	sltu	$1,$2,$24
995	dmultu	$13,$19		# mul_add_c(a[1],b[5],c1,c2,c3);
996	daddu	$25,$1
997	daddu	$3,$25
998	sltu	$1,$3,$25
999	daddu	$7,$1
1000	mflo	$24
1001	mfhi	$25
1002	daddu	$2,$24
1003	sltu	$1,$2,$24
1004	dmultu	$12,$21		# mul_add_c(a[0],b[6],c1,c2,c3);
1005	daddu	$25,$1
1006	daddu	$3,$25
1007	sltu	$1,$3,$25
1008	daddu	$7,$1
1009	mflo	$24
1010	mfhi	$25
1011	daddu	$2,$24
1012	sltu	$1,$2,$24
1013	 dmultu	$12,$6		# mul_add_c(a[0],b[7],c2,c3,c1);
1014	daddu	$25,$1
1015	daddu	$3,$25
1016	sltu	$1,$3,$25
1017	daddu	$7,$1
1018	sd	$2,6*8($4)	# r[6]=c1;
1019
1020	mflo	$24
1021	mfhi	$25
1022	daddu	$3,$24
1023	sltu	$1,$3,$24
1024	dmultu	$13,$21		# mul_add_c(a[1],b[6],c2,c3,c1);
1025	daddu	$25,$1
1026	daddu	$7,$25
1027	sltu	$2,$7,$25
1028	mflo	$24
1029	mfhi	$25
1030	daddu	$3,$24
1031	sltu	$1,$3,$24
1032	dmultu	$14,$19		# mul_add_c(a[2],b[5],c2,c3,c1);
1033	daddu	$25,$1
1034	daddu	$7,$25
1035	sltu	$1,$7,$25
1036	daddu	$2,$1
1037	mflo	$24
1038	mfhi	$25
1039	daddu	$3,$24
1040	sltu	$1,$3,$24
1041	dmultu	$15,$17		# mul_add_c(a[3],b[4],c2,c3,c1);
1042	daddu	$25,$1
1043	daddu	$7,$25
1044	sltu	$1,$7,$25
1045	daddu	$2,$1
1046	mflo	$24
1047	mfhi	$25
1048	daddu	$3,$24
1049	sltu	$1,$3,$24
1050	dmultu	$16,$11		# mul_add_c(a[4],b[3],c2,c3,c1);
1051	daddu	$25,$1
1052	daddu	$7,$25
1053	sltu	$1,$7,$25
1054	daddu	$2,$1
1055	mflo	$24
1056	mfhi	$25
1057	daddu	$3,$24
1058	sltu	$1,$3,$24
1059	dmultu	$18,$10		# mul_add_c(a[5],b[2],c2,c3,c1);
1060	daddu	$25,$1
1061	daddu	$7,$25
1062	sltu	$1,$7,$25
1063	daddu	$2,$1
1064	mflo	$24
1065	mfhi	$25
1066	daddu	$3,$24
1067	sltu	$1,$3,$24
1068	dmultu	$20,$9		# mul_add_c(a[6],b[1],c2,c3,c1);
1069	daddu	$25,$1
1070	daddu	$7,$25
1071	sltu	$1,$7,$25
1072	daddu	$2,$1
1073	mflo	$24
1074	mfhi	$25
1075	daddu	$3,$24
1076	sltu	$1,$3,$24
1077	dmultu	$5,$8		# mul_add_c(a[7],b[0],c2,c3,c1);
1078	daddu	$25,$1
1079	daddu	$7,$25
1080	sltu	$1,$7,$25
1081	daddu	$2,$1
1082	mflo	$24
1083	mfhi	$25
1084	daddu	$3,$24
1085	sltu	$1,$3,$24
1086	 dmultu	$5,$9		# mul_add_c(a[7],b[1],c3,c1,c2);
1087	daddu	$25,$1
1088	daddu	$7,$25
1089	sltu	$1,$7,$25
1090	daddu	$2,$1
1091	sd	$3,7*8($4)	# r[7]=c2;
1092
1093	mflo	$24
1094	mfhi	$25
1095	daddu	$7,$24
1096	sltu	$1,$7,$24
1097	dmultu	$20,$10		# mul_add_c(a[6],b[2],c3,c1,c2);
1098	daddu	$25,$1
1099	daddu	$2,$25
1100	sltu	$3,$2,$25
1101	mflo	$24
1102	mfhi	$25
1103	daddu	$7,$24
1104	sltu	$1,$7,$24
1105	dmultu	$18,$11		# mul_add_c(a[5],b[3],c3,c1,c2);
1106	daddu	$25,$1
1107	daddu	$2,$25
1108	sltu	$1,$2,$25
1109	daddu	$3,$1
1110	mflo	$24
1111	mfhi	$25
1112	daddu	$7,$24
1113	sltu	$1,$7,$24
1114	dmultu	$16,$17		# mul_add_c(a[4],b[4],c3,c1,c2);
1115	daddu	$25,$1
1116	daddu	$2,$25
1117	sltu	$1,$2,$25
1118	daddu	$3,$1
1119	mflo	$24
1120	mfhi	$25
1121	daddu	$7,$24
1122	sltu	$1,$7,$24
1123	dmultu	$15,$19		# mul_add_c(a[3],b[5],c3,c1,c2);
1124	daddu	$25,$1
1125	daddu	$2,$25
1126	sltu	$1,$2,$25
1127	daddu	$3,$1
1128	mflo	$24
1129	mfhi	$25
1130	daddu	$7,$24
1131	sltu	$1,$7,$24
1132	dmultu	$14,$21		# mul_add_c(a[2],b[6],c3,c1,c2);
1133	daddu	$25,$1
1134	daddu	$2,$25
1135	sltu	$1,$2,$25
1136	daddu	$3,$1
1137	mflo	$24
1138	mfhi	$25
1139	daddu	$7,$24
1140	sltu	$1,$7,$24
1141	dmultu	$13,$6		# mul_add_c(a[1],b[7],c3,c1,c2);
1142	daddu	$25,$1
1143	daddu	$2,$25
1144	sltu	$1,$2,$25
1145	daddu	$3,$1
1146	mflo	$24
1147	mfhi	$25
1148	daddu	$7,$24
1149	sltu	$1,$7,$24
1150	 dmultu	$14,$6		# mul_add_c(a[2],b[7],c1,c2,c3);
1151	daddu	$25,$1
1152	daddu	$2,$25
1153	sltu	$1,$2,$25
1154	daddu	$3,$1
1155	sd	$7,8*8($4)	# r[8]=c3;
1156
1157	mflo	$24
1158	mfhi	$25
1159	daddu	$2,$24
1160	sltu	$1,$2,$24
1161	dmultu	$15,$21		# mul_add_c(a[3],b[6],c1,c2,c3);
1162	daddu	$25,$1
1163	daddu	$3,$25
1164	sltu	$7,$3,$25
1165	mflo	$24
1166	mfhi	$25
1167	daddu	$2,$24
1168	sltu	$1,$2,$24
1169	dmultu	$16,$19		# mul_add_c(a[4],b[5],c1,c2,c3);
1170	daddu	$25,$1
1171	daddu	$3,$25
1172	sltu	$1,$3,$25
1173	daddu	$7,$1
1174	mflo	$24
1175	mfhi	$25
1176	daddu	$2,$24
1177	sltu	$1,$2,$24
1178	dmultu	$18,$17		# mul_add_c(a[5],b[4],c1,c2,c3);
1179	daddu	$25,$1
1180	daddu	$3,$25
1181	sltu	$1,$3,$25
1182	daddu	$7,$1
1183	mflo	$24
1184	mfhi	$25
1185	daddu	$2,$24
1186	sltu	$1,$2,$24
1187	dmultu	$20,$11		# mul_add_c(a[6],b[3],c1,c2,c3);
1188	daddu	$25,$1
1189	daddu	$3,$25
1190	sltu	$1,$3,$25
1191	daddu	$7,$1
1192	mflo	$24
1193	mfhi	$25
1194	daddu	$2,$24
1195	sltu	$1,$2,$24
1196	dmultu	$5,$10		# mul_add_c(a[7],b[2],c1,c2,c3);
1197	daddu	$25,$1
1198	daddu	$3,$25
1199	sltu	$1,$3,$25
1200	daddu	$7,$1
1201	mflo	$24
1202	mfhi	$25
1203	daddu	$2,$24
1204	sltu	$1,$2,$24
1205	 dmultu	$5,$11		# mul_add_c(a[7],b[3],c2,c3,c1);
1206	daddu	$25,$1
1207	daddu	$3,$25
1208	sltu	$1,$3,$25
1209	daddu	$7,$1
1210	sd	$2,9*8($4)	# r[9]=c1;
1211
1212	mflo	$24
1213	mfhi	$25
1214	daddu	$3,$24
1215	sltu	$1,$3,$24
1216	dmultu	$20,$17		# mul_add_c(a[6],b[4],c2,c3,c1);
1217	daddu	$25,$1
1218	daddu	$7,$25
1219	sltu	$2,$7,$25
1220	mflo	$24
1221	mfhi	$25
1222	daddu	$3,$24
1223	sltu	$1,$3,$24
1224	dmultu	$18,$19		# mul_add_c(a[5],b[5],c2,c3,c1);
1225	daddu	$25,$1
1226	daddu	$7,$25
1227	sltu	$1,$7,$25
1228	daddu	$2,$1
1229	mflo	$24
1230	mfhi	$25
1231	daddu	$3,$24
1232	sltu	$1,$3,$24
1233	dmultu	$16,$21		# mul_add_c(a[4],b[6],c2,c3,c1);
1234	daddu	$25,$1
1235	daddu	$7,$25
1236	sltu	$1,$7,$25
1237	daddu	$2,$1
1238	mflo	$24
1239	mfhi	$25
1240	daddu	$3,$24
1241	sltu	$1,$3,$24
1242	dmultu	$15,$6		# mul_add_c(a[3],b[7],c2,c3,c1);
1243	daddu	$25,$1
1244	daddu	$7,$25
1245	sltu	$1,$7,$25
1246	daddu	$2,$1
1247	mflo	$24
1248	mfhi	$25
1249	daddu	$3,$24
1250	sltu	$1,$3,$24
1251	dmultu	$16,$6		# mul_add_c(a[4],b[7],c3,c1,c2);
1252	daddu	$25,$1
1253	daddu	$7,$25
1254	sltu	$1,$7,$25
1255	daddu	$2,$1
1256	sd	$3,10*8($4)	# r[10]=c2;
1257
1258	mflo	$24
1259	mfhi	$25
1260	daddu	$7,$24
1261	sltu	$1,$7,$24
1262	dmultu	$18,$21		# mul_add_c(a[5],b[6],c3,c1,c2);
1263	daddu	$25,$1
1264	daddu	$2,$25
1265	sltu	$3,$2,$25
1266	mflo	$24
1267	mfhi	$25
1268	daddu	$7,$24
1269	sltu	$1,$7,$24
1270	dmultu	$20,$19		# mul_add_c(a[6],b[5],c3,c1,c2);
1271	daddu	$25,$1
1272	daddu	$2,$25
1273	sltu	$1,$2,$25
1274	daddu	$3,$1
1275	mflo	$24
1276	mfhi	$25
1277	daddu	$7,$24
1278	sltu	$1,$7,$24
1279	dmultu	$5,$17		# mul_add_c(a[7],b[4],c3,c1,c2);
1280	daddu	$25,$1
1281	daddu	$2,$25
1282	sltu	$1,$2,$25
1283	daddu	$3,$1
1284	mflo	$24
1285	mfhi	$25
1286	daddu	$7,$24
1287	sltu	$1,$7,$24
1288	 dmultu	$5,$19		# mul_add_c(a[7],b[5],c1,c2,c3);
1289	daddu	$25,$1
1290	daddu	$2,$25
1291	sltu	$1,$2,$25
1292	daddu	$3,$1
1293	sd	$7,11*8($4)	# r[11]=c3;
1294
1295	mflo	$24
1296	mfhi	$25
1297	daddu	$2,$24
1298	sltu	$1,$2,$24
1299	dmultu	$20,$21		# mul_add_c(a[6],b[6],c1,c2,c3);
1300	daddu	$25,$1
1301	daddu	$3,$25
1302	sltu	$7,$3,$25
1303	mflo	$24
1304	mfhi	$25
1305	daddu	$2,$24
1306	sltu	$1,$2,$24
1307	dmultu	$18,$6		# mul_add_c(a[5],b[7],c1,c2,c3);
1308	daddu	$25,$1
1309	daddu	$3,$25
1310	sltu	$1,$3,$25
1311	daddu	$7,$1
1312	mflo	$24
1313	mfhi	$25
1314	daddu	$2,$24
1315	sltu	$1,$2,$24
1316	 dmultu	$20,$6		# mul_add_c(a[6],b[7],c2,c3,c1);
1317	daddu	$25,$1
1318	daddu	$3,$25
1319	sltu	$1,$3,$25
1320	daddu	$7,$1
1321	sd	$2,12*8($4)	# r[12]=c1;
1322
1323	mflo	$24
1324	mfhi	$25
1325	daddu	$3,$24
1326	sltu	$1,$3,$24
1327	dmultu	$5,$21		# mul_add_c(a[7],b[6],c2,c3,c1);
1328	daddu	$25,$1
1329	daddu	$7,$25
1330	sltu	$2,$7,$25
1331	mflo	$24
1332	mfhi	$25
1333	daddu	$3,$24
1334	sltu	$1,$3,$24
1335	dmultu	$5,$6		# mul_add_c(a[7],b[7],c3,c1,c2);
1336	daddu	$25,$1
1337	daddu	$7,$25
1338	sltu	$1,$7,$25
1339	daddu	$2,$1
1340	sd	$3,13*8($4)	# r[13]=c2;
1341
1342	mflo	$24
1343	mfhi	$25
1344	daddu	$7,$24
1345	sltu	$1,$7,$24
1346	daddu	$25,$1
1347	daddu	$2,$25
1348	sd	$7,14*8($4)	# r[14]=c3;
1349	sd	$2,15*8($4)	# r[15]=c1;
1350
1351	.set	noreorder
1352	ld	$21,5*8($29)
1353	ld	$20,4*8($29)
1354	ld	$19,3*8($29)
1355	ld	$18,2*8($29)
1356	ld	$17,1*8($29)
1357	ld	$16,0*8($29)
1358	jr	$31
1359	daddu $29,6*8
1360.end	bn_mul_comba8
1361
1362.align	5
1363.globl	bn_mul_comba4
1364.ent	bn_mul_comba4
1365bn_mul_comba4:
1366	.set	reorder
1367	ld	$12,0($5)
1368	ld	$8,0($6)
1369	ld	$13,8($5)
1370	ld	$14,2*8($5)
1371	dmultu	$12,$8		# mul_add_c(a[0],b[0],c1,c2,c3);
1372	ld	$15,3*8($5)
1373	ld	$9,8($6)
1374	ld	$10,2*8($6)
1375	ld	$11,3*8($6)
1376	mflo	$2
1377	mfhi	$3
1378	sd	$2,0($4)
1379
1380	dmultu	$12,$9		# mul_add_c(a[0],b[1],c2,c3,c1);
1381	mflo	$24
1382	mfhi	$25
1383	daddu	$3,$24
1384	sltu	$1,$3,$24
1385	dmultu	$13,$8		# mul_add_c(a[1],b[0],c2,c3,c1);
1386	daddu	$7,$25,$1
1387	mflo	$24
1388	mfhi	$25
1389	daddu	$3,$24
1390	sltu	$1,$3,$24
1391	 dmultu	$14,$8		# mul_add_c(a[2],b[0],c3,c1,c2);
1392	daddu	$25,$1
1393	daddu	$7,$25
1394	sltu	$2,$7,$25
1395	sd	$3,8($4)
1396
1397	mflo	$24
1398	mfhi	$25
1399	daddu	$7,$24
1400	sltu	$1,$7,$24
1401	dmultu	$13,$9		# mul_add_c(a[1],b[1],c3,c1,c2);
1402	daddu	$25,$1
1403	daddu	$2,$25
1404	mflo	$24
1405	mfhi	$25
1406	daddu	$7,$24
1407	sltu	$1,$7,$24
1408	dmultu	$12,$10		# mul_add_c(a[0],b[2],c3,c1,c2);
1409	daddu	$25,$1
1410	daddu	$2,$25
1411	sltu	$3,$2,$25
1412	mflo	$24
1413	mfhi	$25
1414	daddu	$7,$24
1415	sltu	$1,$7,$24
1416	 dmultu	$12,$11		# mul_add_c(a[0],b[3],c1,c2,c3);
1417	daddu	$25,$1
1418	daddu	$2,$25
1419	sltu	$1,$2,$25
1420	daddu	$3,$1
1421	sd	$7,2*8($4)
1422
1423	mflo	$24
1424	mfhi	$25
1425	daddu	$2,$24
1426	sltu	$1,$2,$24
1427	dmultu	$13,$10		# mul_add_c(a[1],b[2],c1,c2,c3);
1428	daddu	$25,$1
1429	daddu	$3,$25
1430	sltu	$7,$3,$25
1431	mflo	$24
1432	mfhi	$25
1433	daddu	$2,$24
1434	sltu	$1,$2,$24
1435	dmultu	$14,$9		# mul_add_c(a[2],b[1],c1,c2,c3);
1436	daddu	$25,$1
1437	daddu	$3,$25
1438	sltu	$1,$3,$25
1439	daddu	$7,$1
1440	mflo	$24
1441	mfhi	$25
1442	daddu	$2,$24
1443	sltu	$1,$2,$24
1444	dmultu	$15,$8		# mul_add_c(a[3],b[0],c1,c2,c3);
1445	daddu	$25,$1
1446	daddu	$3,$25
1447	sltu	$1,$3,$25
1448	daddu	$7,$1
1449	mflo	$24
1450	mfhi	$25
1451	daddu	$2,$24
1452	sltu	$1,$2,$24
1453	 dmultu	$15,$9		# mul_add_c(a[3],b[1],c2,c3,c1);
1454	daddu	$25,$1
1455	daddu	$3,$25
1456	sltu	$1,$3,$25
1457	daddu	$7,$1
1458	sd	$2,3*8($4)
1459
1460	mflo	$24
1461	mfhi	$25
1462	daddu	$3,$24
1463	sltu	$1,$3,$24
1464	dmultu	$14,$10		# mul_add_c(a[2],b[2],c2,c3,c1);
1465	daddu	$25,$1
1466	daddu	$7,$25
1467	sltu	$2,$7,$25
1468	mflo	$24
1469	mfhi	$25
1470	daddu	$3,$24
1471	sltu	$1,$3,$24
1472	dmultu	$13,$11		# mul_add_c(a[1],b[3],c2,c3,c1);
1473	daddu	$25,$1
1474	daddu	$7,$25
1475	sltu	$1,$7,$25
1476	daddu	$2,$1
1477	mflo	$24
1478	mfhi	$25
1479	daddu	$3,$24
1480	sltu	$1,$3,$24
1481	 dmultu	$14,$11		# mul_add_c(a[2],b[3],c3,c1,c2);
1482	daddu	$25,$1
1483	daddu	$7,$25
1484	sltu	$1,$7,$25
1485	daddu	$2,$1
1486	sd	$3,4*8($4)
1487
1488	mflo	$24
1489	mfhi	$25
1490	daddu	$7,$24
1491	sltu	$1,$7,$24
1492	dmultu	$15,$10		# mul_add_c(a[3],b[2],c3,c1,c2);
1493	daddu	$25,$1
1494	daddu	$2,$25
1495	sltu	$3,$2,$25
1496	mflo	$24
1497	mfhi	$25
1498	daddu	$7,$24
1499	sltu	$1,$7,$24
1500	 dmultu	$15,$11		# mul_add_c(a[3],b[3],c1,c2,c3);
1501	daddu	$25,$1
1502	daddu	$2,$25
1503	sltu	$1,$2,$25
1504	daddu	$3,$1
1505	sd	$7,5*8($4)
1506
1507	mflo	$24
1508	mfhi	$25
1509	daddu	$2,$24
1510	sltu	$1,$2,$24
1511	daddu	$25,$1
1512	daddu	$3,$25
1513	sd	$2,6*8($4)
1514	sd	$3,7*8($4)
1515
1516	.set	noreorder
1517	jr	$31
1518	nop
1519.end	bn_mul_comba4
1520
1521.align	5
1522.globl	bn_sqr_comba8
1523.ent	bn_sqr_comba8
1524bn_sqr_comba8:
1525	.set	reorder
1526	ld	$12,0($5)
1527	ld	$13,8($5)
1528	ld	$14,2*8($5)
1529	ld	$15,3*8($5)
1530
1531	dmultu	$12,$12		# mul_add_c(a[0],b[0],c1,c2,c3);
1532	ld	$8,4*8($5)
1533	ld	$9,5*8($5)
1534	ld	$10,6*8($5)
1535	ld	$11,7*8($5)
1536	mflo	$2
1537	mfhi	$3
1538	sd	$2,0($4)
1539
1540	dmultu	$12,$13		# mul_add_c2(a[0],b[1],c2,c3,c1);
1541	mflo	$24
1542	mfhi	$25
1543	slt	$2,$25,$0
1544	dsll	$25,1
1545	 dmultu	$14,$12		# mul_add_c2(a[2],b[0],c3,c1,c2);
1546	slt	$6,$24,$0
1547	daddu	$25,$6
1548	dsll	$24,1
1549	daddu	$3,$24
1550	sltu	$1,$3,$24
1551	daddu	$7,$25,$1
1552	sd	$3,8($4)
1553	mflo	$24
1554	mfhi	$25
1555	daddu	$7,$24
1556	sltu	$1,$7,$24
1557	 dmultu	$13,$13			# forward multiplication
1558	daddu	$7,$24
1559	daddu	$1,$25
1560	sltu	$24,$7,$24
1561	daddu	$2,$1
1562	daddu	$25,$24
1563	sltu	$3,$2,$1
1564	daddu	$2,$25
1565	sltu	$25,$2,$25
1566	daddu	$3,$25
1567	mflo	$24
1568	mfhi	$25
1569	daddu	$7,$24
1570	sltu	$1,$7,$24
1571	 dmultu	$12,$15		# mul_add_c2(a[0],b[3],c1,c2,c3);
1572	daddu	$25,$1
1573	daddu	$2,$25
1574	sltu	$1,$2,$25
1575	daddu	$3,$1
1576	sd	$7,2*8($4)
1577	mflo	$24
1578	mfhi	$25
1579	daddu	$2,$24
1580	sltu	$1,$2,$24
1581	 dmultu	$13,$14			# forward multiplication
1582	daddu	$2,$24
1583	daddu	$1,$25
1584	sltu	$24,$2,$24
1585	daddu	$3,$1
1586	daddu	$25,$24
1587	sltu	$7,$3,$1
1588	daddu	$3,$25
1589	sltu	$25,$3,$25
1590	daddu	$7,$25
1591	mflo	$24
1592	mfhi	$25
1593	daddu	$2,$24
1594	sltu	$1,$2,$24
1595	 dmultu	$8,$12			# forward multiplication
1596	daddu	$2,$24
1597	daddu	$1,$25
1598	sltu	$24,$2,$24
1599	daddu	$3,$1
1600	daddu	$25,$24
1601	sltu	$1,$3,$1
1602	daddu	$3,$25
1603	daddu	$7,$1
1604	sltu	$25,$3,$25
1605	daddu	$7,$25
1606	sd	$2,3*8($4)
1607	mflo	$24
1608	mfhi	$25
1609	daddu	$3,$24
1610	sltu	$1,$3,$24
1611	 dmultu	$15,$13			# forward multiplication
1612	daddu	$3,$24
1613	daddu	$1,$25
1614	sltu	$24,$3,$24
1615	daddu	$7,$1
1616	daddu	$25,$24
1617	sltu	$2,$7,$1
1618	daddu	$7,$25
1619	sltu	$25,$7,$25
1620	daddu	$2,$25
1621	mflo	$24
1622	mfhi	$25
1623	daddu	$3,$24
1624	sltu	$1,$3,$24
1625	 dmultu	$14,$14			# forward multiplication
1626	daddu	$3,$24
1627	daddu	$1,$25
1628	sltu	$24,$3,$24
1629	daddu	$7,$1
1630	daddu	$25,$24
1631	sltu	$1,$7,$1
1632	daddu	$7,$25
1633	daddu	$2,$1
1634	sltu	$25,$7,$25
1635	daddu	$2,$25
1636	mflo	$24
1637	mfhi	$25
1638	daddu	$3,$24
1639	sltu	$1,$3,$24
1640	 dmultu	$12,$9		# mul_add_c2(a[0],b[5],c3,c1,c2);
1641	daddu	$25,$1
1642	daddu	$7,$25
1643	sltu	$1,$7,$25
1644	daddu	$2,$1
1645	sd	$3,4*8($4)
1646	mflo	$24
1647	mfhi	$25
1648	daddu	$7,$24
1649	sltu	$1,$7,$24
1650	 dmultu	$13,$8			# forward multiplication
1651	daddu	$7,$24
1652	daddu	$1,$25
1653	sltu	$24,$7,$24
1654	daddu	$2,$1
1655	daddu	$25,$24
1656	sltu	$3,$2,$1
1657	daddu	$2,$25
1658	sltu	$25,$2,$25
1659	daddu	$3,$25
1660	mflo	$24
1661	mfhi	$25
1662	daddu	$7,$24
1663	sltu	$1,$7,$24
1664	 dmultu	$14,$15			# forward multiplication
1665	daddu	$7,$24
1666	daddu	$1,$25
1667	sltu	$24,$7,$24
1668	daddu	$2,$1
1669	daddu	$25,$24
1670	sltu	$1,$2,$1
1671	daddu	$2,$25
1672	daddu	$3,$1
1673	sltu	$25,$2,$25
1674	daddu	$3,$25
1675	mflo	$24
1676	mfhi	$25
1677	daddu	$7,$24
1678	sltu	$1,$7,$24
1679	 dmultu	$10,$12			# forward multiplication
1680	daddu	$7,$24
1681	daddu	$1,$25
1682	sltu	$24,$7,$24
1683	daddu	$2,$1
1684	daddu	$25,$24
1685	sltu	$1,$2,$1
1686	daddu	$2,$25
1687	daddu	$3,$1
1688	sltu	$25,$2,$25
1689	daddu	$3,$25
1690	sd	$7,5*8($4)
1691	mflo	$24
1692	mfhi	$25
1693	daddu	$2,$24
1694	sltu	$1,$2,$24
1695	 dmultu	$9,$13			# forward multiplication
1696	daddu	$2,$24
1697	daddu	$1,$25
1698	sltu	$24,$2,$24
1699	daddu	$3,$1
1700	daddu	$25,$24
1701	sltu	$7,$3,$1
1702	daddu	$3,$25
1703	sltu	$25,$3,$25
1704	daddu	$7,$25
1705	mflo	$24
1706	mfhi	$25
1707	daddu	$2,$24
1708	sltu	$1,$2,$24
1709	 dmultu	$8,$14			# forward multiplication
1710	daddu	$2,$24
1711	daddu	$1,$25
1712	sltu	$24,$2,$24
1713	daddu	$3,$1
1714	daddu	$25,$24
1715	sltu	$1,$3,$1
1716	daddu	$3,$25
1717	daddu	$7,$1
1718	sltu	$25,$3,$25
1719	daddu	$7,$25
1720	mflo	$24
1721	mfhi	$25
1722	daddu	$2,$24
1723	sltu	$1,$2,$24
1724	 dmultu	$15,$15			# forward multiplication
1725	daddu	$2,$24
1726	daddu	$1,$25
1727	sltu	$24,$2,$24
1728	daddu	$3,$1
1729	daddu	$25,$24
1730	sltu	$1,$3,$1
1731	daddu	$3,$25
1732	daddu	$7,$1
1733	sltu	$25,$3,$25
1734	daddu	$7,$25
1735	mflo	$24
1736	mfhi	$25
1737	daddu	$2,$24
1738	sltu	$1,$2,$24
1739	 dmultu	$12,$11		# mul_add_c2(a[0],b[7],c2,c3,c1);
1740	daddu	$25,$1
1741	daddu	$3,$25
1742	sltu	$1,$3,$25
1743	daddu	$7,$1
1744	sd	$2,6*8($4)
1745	mflo	$24
1746	mfhi	$25
1747	daddu	$3,$24
1748	sltu	$1,$3,$24
1749	 dmultu	$13,$10			# forward multiplication
1750	daddu	$3,$24
1751	daddu	$1,$25
1752	sltu	$24,$3,$24
1753	daddu	$7,$1
1754	daddu	$25,$24
1755	sltu	$2,$7,$1
1756	daddu	$7,$25
1757	sltu	$25,$7,$25
1758	daddu	$2,$25
1759	mflo	$24
1760	mfhi	$25
1761	daddu	$3,$24
1762	sltu	$1,$3,$24
1763	 dmultu	$14,$9			# forward multiplication
1764	daddu	$3,$24
1765	daddu	$1,$25
1766	sltu	$24,$3,$24
1767	daddu	$7,$1
1768	daddu	$25,$24
1769	sltu	$1,$7,$1
1770	daddu	$7,$25
1771	daddu	$2,$1
1772	sltu	$25,$7,$25
1773	daddu	$2,$25
1774	mflo	$24
1775	mfhi	$25
1776	daddu	$3,$24
1777	sltu	$1,$3,$24
1778	 dmultu	$15,$8			# forward multiplication
1779	daddu	$3,$24
1780	daddu	$1,$25
1781	sltu	$24,$3,$24
1782	daddu	$7,$1
1783	daddu	$25,$24
1784	sltu	$1,$7,$1
1785	daddu	$7,$25
1786	daddu	$2,$1
1787	sltu	$25,$7,$25
1788	daddu	$2,$25
1789	mflo	$24
1790	mfhi	$25
1791	daddu	$3,$24
1792	sltu	$1,$3,$24
1793	 dmultu	$11,$13			# forward multiplication
1794	daddu	$3,$24
1795	daddu	$1,$25
1796	sltu	$24,$3,$24
1797	daddu	$7,$1
1798	daddu	$25,$24
1799	sltu	$1,$7,$1
1800	daddu	$7,$25
1801	daddu	$2,$1
1802	sltu	$25,$7,$25
1803	daddu	$2,$25
1804	sd	$3,7*8($4)
1805	mflo	$24
1806	mfhi	$25
1807	daddu	$7,$24
1808	sltu	$1,$7,$24
1809	 dmultu	$10,$14			# forward multiplication
1810	daddu	$7,$24
1811	daddu	$1,$25
1812	sltu	$24,$7,$24
1813	daddu	$2,$1
1814	daddu	$25,$24
1815	sltu	$3,$2,$1
1816	daddu	$2,$25
1817	sltu	$25,$2,$25
1818	daddu	$3,$25
1819	mflo	$24
1820	mfhi	$25
1821	daddu	$7,$24
1822	sltu	$1,$7,$24
1823	 dmultu	$9,$15			# forward multiplication
1824	daddu	$7,$24
1825	daddu	$1,$25
1826	sltu	$24,$7,$24
1827	daddu	$2,$1
1828	daddu	$25,$24
1829	sltu	$1,$2,$1
1830	daddu	$2,$25
1831	daddu	$3,$1
1832	sltu	$25,$2,$25
1833	daddu	$3,$25
1834	mflo	$24
1835	mfhi	$25
1836	daddu	$7,$24
1837	sltu	$1,$7,$24
1838	 dmultu	$8,$8			# forward multiplication
1839	daddu	$7,$24
1840	daddu	$1,$25
1841	sltu	$24,$7,$24
1842	daddu	$2,$1
1843	daddu	$25,$24
1844	sltu	$1,$2,$1
1845	daddu	$2,$25
1846	daddu	$3,$1
1847	sltu	$25,$2,$25
1848	daddu	$3,$25
1849	mflo	$24
1850	mfhi	$25
1851	daddu	$7,$24
1852	sltu	$1,$7,$24
1853	 dmultu	$14,$11		# mul_add_c2(a[2],b[7],c1,c2,c3);
1854	daddu	$25,$1
1855	daddu	$2,$25
1856	sltu	$1,$2,$25
1857	daddu	$3,$1
1858	sd	$7,8*8($4)
1859	mflo	$24
1860	mfhi	$25
1861	daddu	$2,$24
1862	sltu	$1,$2,$24
1863	 dmultu	$15,$10			# forward multiplication
1864	daddu	$2,$24
1865	daddu	$1,$25
1866	sltu	$24,$2,$24
1867	daddu	$3,$1
1868	daddu	$25,$24
1869	sltu	$7,$3,$1
1870	daddu	$3,$25
1871	sltu	$25,$3,$25
1872	daddu	$7,$25
1873	mflo	$24
1874	mfhi	$25
1875	daddu	$2,$24
1876	sltu	$1,$2,$24
1877	 dmultu	$8,$9			# forward multiplication
1878	daddu	$2,$24
1879	daddu	$1,$25
1880	sltu	$24,$2,$24
1881	daddu	$3,$1
1882	daddu	$25,$24
1883	sltu	$1,$3,$1
1884	daddu	$3,$25
1885	daddu	$7,$1
1886	sltu	$25,$3,$25
1887	daddu	$7,$25
1888	mflo	$24
1889	mfhi	$25
1890	daddu	$2,$24
1891	sltu	$1,$2,$24
1892	 dmultu	$11,$15			# forward multiplication
1893	daddu	$2,$24
1894	daddu	$1,$25
1895	sltu	$24,$2,$24
1896	daddu	$3,$1
1897	daddu	$25,$24
1898	sltu	$1,$3,$1
1899	daddu	$3,$25
1900	daddu	$7,$1
1901	sltu	$25,$3,$25
1902	daddu	$7,$25
1903	sd	$2,9*8($4)
1904	mflo	$24
1905	mfhi	$25
1906	daddu	$3,$24
1907	sltu	$1,$3,$24
1908	 dmultu	$10,$8			# forward multiplication
1909	daddu	$3,$24
1910	daddu	$1,$25
1911	sltu	$24,$3,$24
1912	daddu	$7,$1
1913	daddu	$25,$24
1914	sltu	$2,$7,$1
1915	daddu	$7,$25
1916	sltu	$25,$7,$25
1917	daddu	$2,$25
1918	mflo	$24
1919	mfhi	$25
1920	daddu	$3,$24
1921	sltu	$1,$3,$24
1922	 dmultu	$9,$9			# forward multiplication
1923	daddu	$3,$24
1924	daddu	$1,$25
1925	sltu	$24,$3,$24
1926	daddu	$7,$1
1927	daddu	$25,$24
1928	sltu	$1,$7,$1
1929	daddu	$7,$25
1930	daddu	$2,$1
1931	sltu	$25,$7,$25
1932	daddu	$2,$25
1933	mflo	$24
1934	mfhi	$25
1935	daddu	$3,$24
1936	sltu	$1,$3,$24
1937	 dmultu	$8,$11		# mul_add_c2(a[4],b[7],c3,c1,c2);
1938	daddu	$25,$1
1939	daddu	$7,$25
1940	sltu	$1,$7,$25
1941	daddu	$2,$1
1942	sd	$3,10*8($4)
1943	mflo	$24
1944	mfhi	$25
1945	daddu	$7,$24
1946	sltu	$1,$7,$24
1947	 dmultu	$9,$10			# forward multiplication
1948	daddu	$7,$24
1949	daddu	$1,$25
1950	sltu	$24,$7,$24
1951	daddu	$2,$1
1952	daddu	$25,$24
1953	sltu	$3,$2,$1
1954	daddu	$2,$25
1955	sltu	$25,$2,$25
1956	daddu	$3,$25
1957	mflo	$24
1958	mfhi	$25
1959	daddu	$7,$24
1960	sltu	$1,$7,$24
1961	 dmultu	$11,$9			# forward multiplication
1962	daddu	$7,$24
1963	daddu	$1,$25
1964	sltu	$24,$7,$24
1965	daddu	$2,$1
1966	daddu	$25,$24
1967	sltu	$1,$2,$1
1968	daddu	$2,$25
1969	daddu	$3,$1
1970	sltu	$25,$2,$25
1971	daddu	$3,$25
1972	sd	$7,11*8($4)
1973	mflo	$24
1974	mfhi	$25
1975	daddu	$2,$24
1976	sltu	$1,$2,$24
1977	 dmultu	$10,$10			# forward multiplication
1978	daddu	$2,$24
1979	daddu	$1,$25
1980	sltu	$24,$2,$24
1981	daddu	$3,$1
1982	daddu	$25,$24
1983	sltu	$7,$3,$1
1984	daddu	$3,$25
1985	sltu	$25,$3,$25
1986	daddu	$7,$25
1987	mflo	$24
1988	mfhi	$25
1989	daddu	$2,$24
1990	sltu	$1,$2,$24
1991	 dmultu	$10,$11		# mul_add_c2(a[6],b[7],c2,c3,c1);
1992	daddu	$25,$1
1993	daddu	$3,$25
1994	sltu	$1,$3,$25
1995	daddu	$7,$1
1996	sd	$2,12*8($4)
1997	mflo	$24
1998	mfhi	$25
1999	daddu	$3,$24
2000	sltu	$1,$3,$24
2001	 dmultu	$11,$11			# forward multiplication
2002	daddu	$3,$24
2003	daddu	$1,$25
2004	sltu	$24,$3,$24
2005	daddu	$7,$1
2006	daddu	$25,$24
2007	sltu	$2,$7,$1
2008	daddu	$7,$25
2009	sltu	$25,$7,$25
2010	daddu	$2,$25
2011	sd	$3,13*8($4)
2012
2013	mflo	$24
2014	mfhi	$25
2015	daddu	$7,$24
2016	sltu	$1,$7,$24
2017	daddu	$25,$1
2018	daddu	$2,$25
2019	sd	$7,14*8($4)
2020	sd	$2,15*8($4)
2021
2022	.set	noreorder
2023	jr	$31
2024	nop
2025.end	bn_sqr_comba8
2026
2027.align	5
2028.globl	bn_sqr_comba4
2029.ent	bn_sqr_comba4
2030bn_sqr_comba4:
2031	.set	reorder
2032	ld	$12,0($5)
2033	ld	$13,8($5)
2034	dmultu	$12,$12		# mul_add_c(a[0],b[0],c1,c2,c3);
2035	ld	$14,2*8($5)
2036	ld	$15,3*8($5)
2037	mflo	$2
2038	mfhi	$3
2039	sd	$2,0($4)
2040
2041	dmultu	$12,$13		# mul_add_c2(a[0],b[1],c2,c3,c1);
2042	mflo	$24
2043	mfhi	$25
2044	slt	$2,$25,$0
2045	dsll	$25,1
2046	 dmultu	$14,$12		# mul_add_c2(a[2],b[0],c3,c1,c2);
2047	slt	$6,$24,$0
2048	daddu	$25,$6
2049	dsll	$24,1
2050	daddu	$3,$24
2051	sltu	$1,$3,$24
2052	daddu	$7,$25,$1
2053	sd	$3,8($4)
2054	mflo	$24
2055	mfhi	$25
2056	daddu	$7,$24
2057	sltu	$1,$7,$24
2058	 dmultu	$13,$13			# forward multiplication
2059	daddu	$7,$24
2060	daddu	$1,$25
2061	sltu	$24,$7,$24
2062	daddu	$2,$1
2063	daddu	$25,$24
2064	sltu	$3,$2,$1
2065	daddu	$2,$25
2066	sltu	$25,$2,$25
2067	daddu	$3,$25
2068	mflo	$24
2069	mfhi	$25
2070	daddu	$7,$24
2071	sltu	$1,$7,$24
2072	 dmultu	$12,$15		# mul_add_c2(a[0],b[3],c1,c2,c3);
2073	daddu	$25,$1
2074	daddu	$2,$25
2075	sltu	$1,$2,$25
2076	daddu	$3,$1
2077	sd	$7,2*8($4)
2078	mflo	$24
2079	mfhi	$25
2080	daddu	$2,$24
2081	sltu	$1,$2,$24
2082	 dmultu	$13,$14			# forward multiplication
2083	daddu	$2,$24
2084	daddu	$1,$25
2085	sltu	$24,$2,$24
2086	daddu	$3,$1
2087	daddu	$25,$24
2088	sltu	$7,$3,$1
2089	daddu	$3,$25
2090	sltu	$25,$3,$25
2091	daddu	$7,$25
2092	mflo	$24
2093	mfhi	$25
2094	daddu	$2,$24
2095	sltu	$1,$2,$24
2096	 dmultu	$15,$13			# forward multiplication
2097	daddu	$2,$24
2098	daddu	$1,$25
2099	sltu	$24,$2,$24
2100	daddu	$3,$1
2101	daddu	$25,$24
2102	sltu	$1,$3,$1
2103	daddu	$3,$25
2104	daddu	$7,$1
2105	sltu	$25,$3,$25
2106	daddu	$7,$25
2107	sd	$2,3*8($4)
2108	mflo	$24
2109	mfhi	$25
2110	daddu	$3,$24
2111	sltu	$1,$3,$24
2112	 dmultu	$14,$14			# forward multiplication
2113	daddu	$3,$24
2114	daddu	$1,$25
2115	sltu	$24,$3,$24
2116	daddu	$7,$1
2117	daddu	$25,$24
2118	sltu	$2,$7,$1
2119	daddu	$7,$25
2120	sltu	$25,$7,$25
2121	daddu	$2,$25
2122	mflo	$24
2123	mfhi	$25
2124	daddu	$3,$24
2125	sltu	$1,$3,$24
2126	 dmultu	$14,$15		# mul_add_c2(a[2],b[3],c3,c1,c2);
2127	daddu	$25,$1
2128	daddu	$7,$25
2129	sltu	$1,$7,$25
2130	daddu	$2,$1
2131	sd	$3,4*8($4)
2132	mflo	$24
2133	mfhi	$25
2134	daddu	$7,$24
2135	sltu	$1,$7,$24
2136	 dmultu	$15,$15			# forward multiplication
2137	daddu	$7,$24
2138	daddu	$1,$25
2139	sltu	$24,$7,$24
2140	daddu	$2,$1
2141	daddu	$25,$24
2142	sltu	$3,$2,$1
2143	daddu	$2,$25
2144	sltu	$25,$2,$25
2145	daddu	$3,$25
2146	sd	$7,5*8($4)
2147
2148	mflo	$24
2149	mfhi	$25
2150	daddu	$2,$24
2151	sltu	$1,$2,$24
2152	daddu	$25,$1
2153	daddu	$3,$25
2154	sd	$2,6*8($4)
2155	sd	$3,7*8($4)
2156
2157	.set	noreorder
2158	jr	$31
2159	nop
2160.end	bn_sqr_comba4
2161