xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/mips64.S (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1#include "mips_arch.h"
2
3#if defined(_MIPS_ARCH_MIPS64R6)
4# define ddivu(rs,rt)
5# define mfqt(rd,rs,rt)	ddivu	rd,rs,rt
6# define mfrm(rd,rs,rt)	dmodu	rd,rs,rt
7#elif defined(_MIPS_ARCH_MIPS32R6)
8# define divu(rs,rt)
9# define mfqt(rd,rs,rt)	divu	rd,rs,rt
10# define mfrm(rd,rs,rt)	modu	rd,rs,rt
11#else
12# define ddivu(rs,rt)	ddivu	$0,rs,rt
13# define mfqt(rd,rs,rt)	mflo	rd
14# define mfrm(rd,rs,rt)	mfhi	rd
15#endif
16
17.rdata
18.asciiz	"mips3.s, Version 1.2"
19.asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
20
21.text
22.set	noat
23
24.align	5
25.globl	bn_mul_add_words
26.ent	bn_mul_add_words
27bn_mul_add_words:
28	.set	noreorder
29	bgtz	$6,bn_mul_add_words_internal
30	move	$2,$0
31	jr	$31
32	move	$4,$2
33.end	bn_mul_add_words
34
35.align	5
36.ent	bn_mul_add_words_internal
37bn_mul_add_words_internal:
38	.set	reorder
39	li	$3,-4
40	and	$8,$6,$3
41	beqz	$8,.L_bn_mul_add_words_tail
42
43.L_bn_mul_add_words_loop:
44	ld	$12,0($5)
45	dmultu	($12,$7)
46	ld	$13,0($4)
47	ld	$14,8($5)
48	ld	$15,8($4)
49	ld	$8,2*8($5)
50	ld	$9,2*8($4)
51	daddu	$13,$2
52	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
53				# values", but it seems to work fine
54				# even on 64-bit registers.
55	mflo	($1,$12,$7)
56	mfhi	($12,$12,$7)
57	daddu	$13,$1
58	daddu	$2,$12
59	 dmultu	($14,$7)
60	sltu	$1,$13,$1
61	sd	$13,0($4)
62	daddu	$2,$1
63
64	ld	$10,3*8($5)
65	ld	$11,3*8($4)
66	daddu	$15,$2
67	sltu	$2,$15,$2
68	mflo	($1,$14,$7)
69	mfhi	($14,$14,$7)
70	daddu	$15,$1
71	daddu	$2,$14
72	 dmultu	($8,$7)
73	sltu	$1,$15,$1
74	sd	$15,8($4)
75	daddu	$2,$1
76
77	subu	$6,4
78	daddu $4,4*8
79	daddu $5,4*8
80	daddu	$9,$2
81	sltu	$2,$9,$2
82	mflo	($1,$8,$7)
83	mfhi	($8,$8,$7)
84	daddu	$9,$1
85	daddu	$2,$8
86	 dmultu	($10,$7)
87	sltu	$1,$9,$1
88	sd	$9,-2*8($4)
89	daddu	$2,$1
90
91
92	and	$8,$6,$3
93	daddu	$11,$2
94	sltu	$2,$11,$2
95	mflo	($1,$10,$7)
96	mfhi	($10,$10,$7)
97	daddu	$11,$1
98	daddu	$2,$10
99	sltu	$1,$11,$1
100	sd	$11,-8($4)
101	.set	noreorder
102	bgtz	$8,.L_bn_mul_add_words_loop
103	daddu	$2,$1
104
105	beqz	$6,.L_bn_mul_add_words_return
106	nop
107
108.L_bn_mul_add_words_tail:
109	.set	reorder
110	ld	$12,0($5)
111	dmultu	($12,$7)
112	ld	$13,0($4)
113	subu	$6,1
114	daddu	$13,$2
115	sltu	$2,$13,$2
116	mflo	($1,$12,$7)
117	mfhi	($12,$12,$7)
118	daddu	$13,$1
119	daddu	$2,$12
120	sltu	$1,$13,$1
121	sd	$13,0($4)
122	daddu	$2,$1
123	beqz	$6,.L_bn_mul_add_words_return
124
125	ld	$12,8($5)
126	dmultu	($12,$7)
127	ld	$13,8($4)
128	subu	$6,1
129	daddu	$13,$2
130	sltu	$2,$13,$2
131	mflo	($1,$12,$7)
132	mfhi	($12,$12,$7)
133	daddu	$13,$1
134	daddu	$2,$12
135	sltu	$1,$13,$1
136	sd	$13,8($4)
137	daddu	$2,$1
138	beqz	$6,.L_bn_mul_add_words_return
139
140	ld	$12,2*8($5)
141	dmultu	($12,$7)
142	ld	$13,2*8($4)
143	daddu	$13,$2
144	sltu	$2,$13,$2
145	mflo	($1,$12,$7)
146	mfhi	($12,$12,$7)
147	daddu	$13,$1
148	daddu	$2,$12
149	sltu	$1,$13,$1
150	sd	$13,2*8($4)
151	daddu	$2,$1
152
153.L_bn_mul_add_words_return:
154	.set	noreorder
155	jr	$31
156	move	$4,$2
157.end	bn_mul_add_words_internal
158
159.align	5
160.globl	bn_mul_words
161.ent	bn_mul_words
162bn_mul_words:
163	.set	noreorder
164	bgtz	$6,bn_mul_words_internal
165	move	$2,$0
166	jr	$31
167	move	$4,$2
168.end	bn_mul_words
169
170.align	5
171.ent	bn_mul_words_internal
172bn_mul_words_internal:
173	.set	reorder
174	li	$3,-4
175	and	$8,$6,$3
176	beqz	$8,.L_bn_mul_words_tail
177
178.L_bn_mul_words_loop:
179	ld	$12,0($5)
180	dmultu	($12,$7)
181	ld	$14,8($5)
182	ld	$8,2*8($5)
183	ld	$10,3*8($5)
184	mflo	($1,$12,$7)
185	mfhi	($12,$12,$7)
186	daddu	$2,$1
187	sltu	$13,$2,$1
188	 dmultu	($14,$7)
189	sd	$2,0($4)
190	daddu	$2,$13,$12
191
192	subu	$6,4
193	daddu $4,4*8
194	daddu $5,4*8
195	mflo	($1,$14,$7)
196	mfhi	($14,$14,$7)
197	daddu	$2,$1
198	sltu	$15,$2,$1
199	 dmultu	($8,$7)
200	sd	$2,-3*8($4)
201	daddu	$2,$15,$14
202
203	mflo	($1,$8,$7)
204	mfhi	($8,$8,$7)
205	daddu	$2,$1
206	sltu	$9,$2,$1
207	 dmultu	($10,$7)
208	sd	$2,-2*8($4)
209	daddu	$2,$9,$8
210
211	and	$8,$6,$3
212	mflo	($1,$10,$7)
213	mfhi	($10,$10,$7)
214	daddu	$2,$1
215	sltu	$11,$2,$1
216	sd	$2,-8($4)
217	.set	noreorder
218	bgtz	$8,.L_bn_mul_words_loop
219	daddu	$2,$11,$10
220
221	beqz	$6,.L_bn_mul_words_return
222	nop
223
224.L_bn_mul_words_tail:
225	.set	reorder
226	ld	$12,0($5)
227	dmultu	($12,$7)
228	subu	$6,1
229	mflo	($1,$12,$7)
230	mfhi	($12,$12,$7)
231	daddu	$2,$1
232	sltu	$13,$2,$1
233	sd	$2,0($4)
234	daddu	$2,$13,$12
235	beqz	$6,.L_bn_mul_words_return
236
237	ld	$12,8($5)
238	dmultu	($12,$7)
239	subu	$6,1
240	mflo	($1,$12,$7)
241	mfhi	($12,$12,$7)
242	daddu	$2,$1
243	sltu	$13,$2,$1
244	sd	$2,8($4)
245	daddu	$2,$13,$12
246	beqz	$6,.L_bn_mul_words_return
247
248	ld	$12,2*8($5)
249	dmultu	($12,$7)
250	mflo	($1,$12,$7)
251	mfhi	($12,$12,$7)
252	daddu	$2,$1
253	sltu	$13,$2,$1
254	sd	$2,2*8($4)
255	daddu	$2,$13,$12
256
257.L_bn_mul_words_return:
258	.set	noreorder
259	jr	$31
260	move	$4,$2
261.end	bn_mul_words_internal
262
263.align	5
264.globl	bn_sqr_words
265.ent	bn_sqr_words
266bn_sqr_words:
267	.set	noreorder
268	bgtz	$6,bn_sqr_words_internal
269	move	$2,$0
270	jr	$31
271	move	$4,$2
272.end	bn_sqr_words
273
274.align	5
275.ent	bn_sqr_words_internal
276bn_sqr_words_internal:
277	.set	reorder
278	li	$3,-4
279	and	$8,$6,$3
280	beqz	$8,.L_bn_sqr_words_tail
281
282.L_bn_sqr_words_loop:
283	ld	$12,0($5)
284	dmultu	($12,$12)
285	ld	$14,8($5)
286	ld	$8,2*8($5)
287	ld	$10,3*8($5)
288	mflo	($13,$12,$12)
289	mfhi	($12,$12,$12)
290	sd	$13,0($4)
291	sd	$12,8($4)
292
293	dmultu	($14,$14)
294	subu	$6,4
295	daddu $4,8*8
296	daddu $5,4*8
297	mflo	($15,$14,$14)
298	mfhi	($14,$14,$14)
299	sd	$15,-6*8($4)
300	sd	$14,-5*8($4)
301
302	dmultu	($8,$8)
303	mflo	($9,$8,$8)
304	mfhi	($8,$8,$8)
305	sd	$9,-4*8($4)
306	sd	$8,-3*8($4)
307
308
309	dmultu	($10,$10)
310	and	$8,$6,$3
311	mflo	($11,$10,$10)
312	mfhi	($10,$10,$10)
313	sd	$11,-2*8($4)
314
315	.set	noreorder
316	sd	$10,-8($4)
317	bgtz	$8,.L_bn_sqr_words_loop
318	nop
319
320	beqz	$6,.L_bn_sqr_words_return
321	nop
322
323.L_bn_sqr_words_tail:
324	.set	reorder
325	ld	$12,0($5)
326	dmultu	($12,$12)
327	subu	$6,1
328	mflo	($13,$12,$12)
329	mfhi	($12,$12,$12)
330	sd	$13,0($4)
331	sd	$12,8($4)
332	beqz	$6,.L_bn_sqr_words_return
333
334	ld	$12,8($5)
335	dmultu	($12,$12)
336	subu	$6,1
337	mflo	($13,$12,$12)
338	mfhi	($12,$12,$12)
339	sd	$13,2*8($4)
340	sd	$12,3*8($4)
341	beqz	$6,.L_bn_sqr_words_return
342
343	ld	$12,2*8($5)
344	dmultu	($12,$12)
345	mflo	($13,$12,$12)
346	mfhi	($12,$12,$12)
347	sd	$13,4*8($4)
348	sd	$12,5*8($4)
349
350.L_bn_sqr_words_return:
351	.set	noreorder
352	jr	$31
353	move	$4,$2
354
355.end	bn_sqr_words_internal
356
357.align	5
358.globl	bn_add_words
359.ent	bn_add_words
360bn_add_words:
361	.set	noreorder
362	bgtz	$7,bn_add_words_internal
363	move	$2,$0
364	jr	$31
365	move	$4,$2
366.end	bn_add_words
367
368.align	5
369.ent	bn_add_words_internal
370bn_add_words_internal:
371	.set	reorder
372	li	$3,-4
373	and	$1,$7,$3
374	beqz	$1,.L_bn_add_words_tail
375
376.L_bn_add_words_loop:
377	ld	$12,0($5)
378	ld	$8,0($6)
379	subu	$7,4
380	ld	$13,8($5)
381	and	$1,$7,$3
382	ld	$14,2*8($5)
383	daddu $6,4*8
384	ld	$15,3*8($5)
385	daddu $4,4*8
386	ld	$9,-3*8($6)
387	daddu $5,4*8
388	ld	$10,-2*8($6)
389	ld	$11,-8($6)
390	daddu	$8,$12
391	sltu	$24,$8,$12
392	daddu	$12,$8,$2
393	sltu	$2,$12,$8
394	sd	$12,-4*8($4)
395	daddu	$2,$24
396
397	daddu	$9,$13
398	sltu	$25,$9,$13
399	daddu	$13,$9,$2
400	sltu	$2,$13,$9
401	sd	$13,-3*8($4)
402	daddu	$2,$25
403
404	daddu	$10,$14
405	sltu	$24,$10,$14
406	daddu	$14,$10,$2
407	sltu	$2,$14,$10
408	sd	$14,-2*8($4)
409	daddu	$2,$24
410
411	daddu	$11,$15
412	sltu	$25,$11,$15
413	daddu	$15,$11,$2
414	sltu	$2,$15,$11
415	sd	$15,-8($4)
416
417	.set	noreorder
418	bgtz	$1,.L_bn_add_words_loop
419	daddu	$2,$25
420
421	beqz	$7,.L_bn_add_words_return
422	nop
423
424.L_bn_add_words_tail:
425	.set	reorder
426	ld	$12,0($5)
427	ld	$8,0($6)
428	daddu	$8,$12
429	subu	$7,1
430	sltu	$24,$8,$12
431	daddu	$12,$8,$2
432	sltu	$2,$12,$8
433	sd	$12,0($4)
434	daddu	$2,$24
435	beqz	$7,.L_bn_add_words_return
436
437	ld	$13,8($5)
438	ld	$9,8($6)
439	daddu	$9,$13
440	subu	$7,1
441	sltu	$25,$9,$13
442	daddu	$13,$9,$2
443	sltu	$2,$13,$9
444	sd	$13,8($4)
445	daddu	$2,$25
446	beqz	$7,.L_bn_add_words_return
447
448	ld	$14,2*8($5)
449	ld	$10,2*8($6)
450	daddu	$10,$14
451	sltu	$24,$10,$14
452	daddu	$14,$10,$2
453	sltu	$2,$14,$10
454	sd	$14,2*8($4)
455	daddu	$2,$24
456
457.L_bn_add_words_return:
458	.set	noreorder
459	jr	$31
460	move	$4,$2
461
462.end	bn_add_words_internal
463
464.align	5
465.globl	bn_sub_words
466.ent	bn_sub_words
467bn_sub_words:
468	.set	noreorder
469	bgtz	$7,bn_sub_words_internal
470	move	$2,$0
471	jr	$31
472	move	$4,$0
473.end	bn_sub_words
474
475.align	5
476.ent	bn_sub_words_internal
477bn_sub_words_internal:
478	.set	reorder
479	li	$3,-4
480	and	$1,$7,$3
481	beqz	$1,.L_bn_sub_words_tail
482
483.L_bn_sub_words_loop:
484	ld	$12,0($5)
485	ld	$8,0($6)
486	subu	$7,4
487	ld	$13,8($5)
488	and	$1,$7,$3
489	ld	$14,2*8($5)
490	daddu $6,4*8
491	ld	$15,3*8($5)
492	daddu $4,4*8
493	ld	$9,-3*8($6)
494	daddu $5,4*8
495	ld	$10,-2*8($6)
496	ld	$11,-8($6)
497	sltu	$24,$12,$8
498	dsubu	$8,$12,$8
499	dsubu	$12,$8,$2
500	sgtu	$2,$12,$8
501	sd	$12,-4*8($4)
502	daddu	$2,$24
503
504	sltu	$25,$13,$9
505	dsubu	$9,$13,$9
506	dsubu	$13,$9,$2
507	sgtu	$2,$13,$9
508	sd	$13,-3*8($4)
509	daddu	$2,$25
510
511
512	sltu	$24,$14,$10
513	dsubu	$10,$14,$10
514	dsubu	$14,$10,$2
515	sgtu	$2,$14,$10
516	sd	$14,-2*8($4)
517	daddu	$2,$24
518
519	sltu	$25,$15,$11
520	dsubu	$11,$15,$11
521	dsubu	$15,$11,$2
522	sgtu	$2,$15,$11
523	sd	$15,-8($4)
524
525	.set	noreorder
526	bgtz	$1,.L_bn_sub_words_loop
527	daddu	$2,$25
528
529	beqz	$7,.L_bn_sub_words_return
530	nop
531
532.L_bn_sub_words_tail:
533	.set	reorder
534	ld	$12,0($5)
535	ld	$8,0($6)
536	subu	$7,1
537	sltu	$24,$12,$8
538	dsubu	$8,$12,$8
539	dsubu	$12,$8,$2
540	sgtu	$2,$12,$8
541	sd	$12,0($4)
542	daddu	$2,$24
543	beqz	$7,.L_bn_sub_words_return
544
545	ld	$13,8($5)
546	subu	$7,1
547	ld	$9,8($6)
548	sltu	$25,$13,$9
549	dsubu	$9,$13,$9
550	dsubu	$13,$9,$2
551	sgtu	$2,$13,$9
552	sd	$13,8($4)
553	daddu	$2,$25
554	beqz	$7,.L_bn_sub_words_return
555
556	ld	$14,2*8($5)
557	ld	$10,2*8($6)
558	sltu	$24,$14,$10
559	dsubu	$10,$14,$10
560	dsubu	$14,$10,$2
561	sgtu	$2,$14,$10
562	sd	$14,2*8($4)
563	daddu	$2,$24
564
565.L_bn_sub_words_return:
566	.set	noreorder
567	jr	$31
568	move	$4,$2
569.end	bn_sub_words_internal
570
571#if 0
572/*
573 * The bn_div_3_words entry point is re-used for constant-time interface.
574 * Implementation is retained as historical reference.
575 */
576.align 5
577.globl	bn_div_3_words
578.ent	bn_div_3_words
579bn_div_3_words:
580	.set	noreorder
581	move	$7,$4		# we know that bn_div_words does not
582				# touch $7, $10, $11 and preserves $6
583				# so that we can save two arguments
584				# and return address in registers
585				# instead of stack:-)
586
587	ld	$4,($7)
588	move	$10,$5
589	ld	$5,-8($7)
590	bne	$4,$6,bn_div_3_words_internal
591	 nop
592	li	$2,-1
593	jr	$31
594	move	$4,$2
595.end	bn_div_3_words
596
597.align	5
598.ent	bn_div_3_words_internal
599bn_div_3_words_internal:
600	.set	reorder
601	move	$11,$31
602	bal	bn_div_words_internal
603	move	$31,$11
604	dmultu	($10,$2)
605	ld	$14,-2*8($7)
606	move	$8,$0
607	mfhi	($13,$10,$2)
608	mflo	($12,$10,$2)
609	sltu	$24,$13,$5
610.L_bn_div_3_words_inner_loop:
611	bnez	$24,.L_bn_div_3_words_inner_loop_done
612	sgeu	$1,$14,$12
613	seq	$25,$13,$5
614	and	$1,$25
615	sltu	$15,$12,$10
616	daddu	$5,$6
617	dsubu	$13,$15
618	dsubu	$12,$10
619	sltu	$24,$13,$5
620	sltu	$8,$5,$6
621	or	$24,$8
622	.set	noreorder
623	beqz	$1,.L_bn_div_3_words_inner_loop
624	dsubu	$2,1
625	daddu	$2,1
626	.set	reorder
627.L_bn_div_3_words_inner_loop_done:
628	.set	noreorder
629	jr	$31
630	move	$4,$2
631.end	bn_div_3_words_internal
632#endif
633
634.align	5
635.globl	bn_div_words
636.ent	bn_div_words
637bn_div_words:
638	.set	noreorder
639	bnez	$6,bn_div_words_internal
640	li	$2,-1		# I would rather signal div-by-zero
641				# which can be done with 'break 7'
642	jr	$31
643	move	$4,$2
644.end	bn_div_words
645
646.align	5
647.ent	bn_div_words_internal
648bn_div_words_internal:
649	move	$3,$0
650	bltz	$6,.L_bn_div_words_body
651	move	$25,$3
652	dsll	$6,1
653	bgtz	$6,.-4
654	addu	$25,1
655
656	.set	reorder
657	negu	$13,$25
658	li	$14,-1
659	dsll	$14,$13
660	and	$14,$4
661	dsrl	$1,$5,$13
662	.set	noreorder
663	beqz	$14,.+12
664	nop
665	break	6		# signal overflow
666	.set	reorder
667	dsll	$4,$25
668	dsll	$5,$25
669	or	$4,$1
670.L_bn_div_words_body:
671	dsrl	$3,$6,4*8	# bits
672	sgeu	$1,$4,$6
673	.set	noreorder
674	beqz	$1,.+12
675	nop
676	dsubu	$4,$6
677	.set	reorder
678
679	li	$8,-1
680	dsrl	$9,$4,4*8	# bits
681	dsrl	$8,4*8	# q=0xffffffff
682	beq	$3,$9,.L_bn_div_words_skip_div1
683	ddivu	($4,$3)
684	mfqt	($8,$4,$3)
685.L_bn_div_words_skip_div1:
686	dmultu	($6,$8)
687	dsll	$15,$4,4*8	# bits
688	dsrl	$1,$5,4*8	# bits
689	or	$15,$1
690	mflo	($12,$6,$8)
691	mfhi	($13,$6,$8)
692.L_bn_div_words_inner_loop1:
693	sltu	$14,$15,$12
694	seq	$24,$9,$13
695	sltu	$1,$9,$13
696	and	$14,$24
697	sltu	$2,$12,$6
698	or	$1,$14
699	.set	noreorder
700	beqz	$1,.L_bn_div_words_inner_loop1_done
701	dsubu	$13,$2
702	dsubu	$12,$6
703	b	.L_bn_div_words_inner_loop1
704	dsubu	$8,1
705	.set	reorder
706.L_bn_div_words_inner_loop1_done:
707
708	dsll	$5,4*8	# bits
709	dsubu	$4,$15,$12
710	dsll	$2,$8,4*8	# bits
711
712	li	$8,-1
713	dsrl	$9,$4,4*8	# bits
714	dsrl	$8,4*8	# q=0xffffffff
715	beq	$3,$9,.L_bn_div_words_skip_div2
716	ddivu	($4,$3)
717	mfqt	($8,$4,$3)
718.L_bn_div_words_skip_div2:
719	dmultu	($6,$8)
720	dsll	$15,$4,4*8	# bits
721	dsrl	$1,$5,4*8	# bits
722	or	$15,$1
723	mflo	($12,$6,$8)
724	mfhi	($13,$6,$8)
725.L_bn_div_words_inner_loop2:
726	sltu	$14,$15,$12
727	seq	$24,$9,$13
728	sltu	$1,$9,$13
729	and	$14,$24
730	sltu	$3,$12,$6
731	or	$1,$14
732	.set	noreorder
733	beqz	$1,.L_bn_div_words_inner_loop2_done
734	dsubu	$13,$3
735	dsubu	$12,$6
736	b	.L_bn_div_words_inner_loop2
737	dsubu	$8,1
738	.set	reorder
739.L_bn_div_words_inner_loop2_done:
740
741	dsubu	$4,$15,$12
742	or	$2,$8
743	dsrl	$3,$4,$25	# $3 contains remainder if anybody wants it
744	dsrl	$6,$25		# restore $6
745
746	.set	noreorder
747	move	$5,$3
748	jr	$31
749	move	$4,$2
750.end	bn_div_words_internal
751
752.align	5
753.globl	bn_mul_comba8
754.ent	bn_mul_comba8
755bn_mul_comba8:
756	.set	noreorder
757	.frame	$29,6*8,$31
758	.mask	0x003f0000,-8
759	dsubu $29,6*8
760	sd	$21,5*8($29)
761	sd	$20,4*8($29)
762	sd	$19,3*8($29)
763	sd	$18,2*8($29)
764	sd	$17,1*8($29)
765	sd	$16,0*8($29)
766
767	.set	reorder
768	ld	$12,0($5)	# If compiled with -mips3 option on
769				# R5000 box assembler barks on this
770				# 1ine with "should not have mult/div
771				# as last instruction in bb (R10K
772				# bug)" warning. If anybody out there
773				# has a clue about how to circumvent
774				# this do send me a note.
775				#		<appro@fy.chalmers.se>
776
777	ld	$8,0($6)
778	ld	$13,8($5)
779	ld	$14,2*8($5)
780	dmultu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
781	ld	$15,3*8($5)
782	ld	$9,8($6)
783	ld	$10,2*8($6)
784	ld	$11,3*8($6)
785	mflo	($2,$12,$8)
786	mfhi	($3,$12,$8)
787
788	ld	$16,4*8($5)
789	ld	$18,5*8($5)
790	dmultu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
791	ld	$20,6*8($5)
792	ld	$5,7*8($5)
793	ld	$17,4*8($6)
794	ld	$19,5*8($6)
795	mflo	($24,$12,$9)
796	mfhi	($25,$12,$9)
797	daddu	$3,$24
798	sltu	$1,$3,$24
799	dmultu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
800	daddu	$7,$25,$1
801	ld	$21,6*8($6)
802	ld	$6,7*8($6)
803	sd	$2,0($4)	# r[0]=c1;
804	mflo	($24,$13,$8)
805	mfhi	($25,$13,$8)
806	daddu	$3,$24
807	sltu	$1,$3,$24
808	 dmultu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
809	daddu	$25,$1
810	daddu	$7,$25
811	sltu	$2,$7,$25
812	sd	$3,8($4)	# r[1]=c2;
813
814	mflo	($24,$14,$8)
815	mfhi	($25,$14,$8)
816	daddu	$7,$24
817	sltu	$1,$7,$24
818	dmultu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
819	daddu	$25,$1
820	daddu	$2,$25
821	mflo	($24,$13,$9)
822	mfhi	($25,$13,$9)
823	daddu	$7,$24
824	sltu	$1,$7,$24
825	dmultu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
826	daddu	$25,$1
827	daddu	$2,$25
828	sltu	$3,$2,$25
829	mflo	($24,$12,$10)
830	mfhi	($25,$12,$10)
831	daddu	$7,$24
832	sltu	$1,$7,$24
833	 dmultu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
834	daddu	$25,$1
835	daddu	$2,$25
836	sltu	$1,$2,$25
837	daddu	$3,$1
838	sd	$7,2*8($4)	# r[2]=c3;
839
840	mflo	($24,$12,$11)
841	mfhi	($25,$12,$11)
842	daddu	$2,$24
843	sltu	$1,$2,$24
844	dmultu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
845	daddu	$25,$1
846	daddu	$3,$25
847	sltu	$7,$3,$25
848	mflo	($24,$13,$10)
849	mfhi	($25,$13,$10)
850	daddu	$2,$24
851	sltu	$1,$2,$24
852	dmultu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
853	daddu	$25,$1
854	daddu	$3,$25
855	sltu	$1,$3,$25
856	daddu	$7,$1
857	mflo	($24,$14,$9)
858	mfhi	($25,$14,$9)
859	daddu	$2,$24
860	sltu	$1,$2,$24
861	dmultu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
862	daddu	$25,$1
863	daddu	$3,$25
864	sltu	$1,$3,$25
865	daddu	$7,$1
866	mflo	($24,$15,$8)
867	mfhi	($25,$15,$8)
868	daddu	$2,$24
869	sltu	$1,$2,$24
870	 dmultu	($16,$8)		# mul_add_c(a[4],b[0],c2,c3,c1);
871	daddu	$25,$1
872	daddu	$3,$25
873	sltu	$1,$3,$25
874	daddu	$7,$1
875	sd	$2,3*8($4)	# r[3]=c1;
876
877	mflo	($24,$16,$8)
878	mfhi	($25,$16,$8)
879	daddu	$3,$24
880	sltu	$1,$3,$24
881	dmultu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
882	daddu	$25,$1
883	daddu	$7,$25
884	sltu	$2,$7,$25
885	mflo	($24,$15,$9)
886	mfhi	($25,$15,$9)
887	daddu	$3,$24
888	sltu	$1,$3,$24
889	dmultu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
890	daddu	$25,$1
891	daddu	$7,$25
892	sltu	$1,$7,$25
893	daddu	$2,$1
894	mflo	($24,$14,$10)
895	mfhi	($25,$14,$10)
896	daddu	$3,$24
897	sltu	$1,$3,$24
898	dmultu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
899	daddu	$25,$1
900	daddu	$7,$25
901	sltu	$1,$7,$25
902	daddu	$2,$1
903	mflo	($24,$13,$11)
904	mfhi	($25,$13,$11)
905	daddu	$3,$24
906	sltu	$1,$3,$24
907	dmultu	($12,$17)		# mul_add_c(a[0],b[4],c2,c3,c1);
908	daddu	$25,$1
909	daddu	$7,$25
910	sltu	$1,$7,$25
911	daddu	$2,$1
912	mflo	($24,$12,$17)
913	mfhi	($25,$12,$17)
914	daddu	$3,$24
915	sltu	$1,$3,$24
916	 dmultu	($12,$19)		# mul_add_c(a[0],b[5],c3,c1,c2);
917	daddu	$25,$1
918	daddu	$7,$25
919	sltu	$1,$7,$25
920	daddu	$2,$1
921	sd	$3,4*8($4)	# r[4]=c2;
922
923	mflo	($24,$12,$19)
924	mfhi	($25,$12,$19)
925	daddu	$7,$24
926	sltu	$1,$7,$24
927	dmultu	($13,$17)		# mul_add_c(a[1],b[4],c3,c1,c2);
928	daddu	$25,$1
929	daddu	$2,$25
930	sltu	$3,$2,$25
931	mflo	($24,$13,$17)
932	mfhi	($25,$13,$17)
933	daddu	$7,$24
934	sltu	$1,$7,$24
935	dmultu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
936	daddu	$25,$1
937	daddu	$2,$25
938	sltu	$1,$2,$25
939	daddu	$3,$1
940	mflo	($24,$14,$11)
941	mfhi	($25,$14,$11)
942	daddu	$7,$24
943	sltu	$1,$7,$24
944	dmultu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
945	daddu	$25,$1
946	daddu	$2,$25
947	sltu	$1,$2,$25
948	daddu	$3,$1
949	mflo	($24,$15,$10)
950	mfhi	($25,$15,$10)
951	daddu	$7,$24
952	sltu	$1,$7,$24
953	dmultu	($16,$9)		# mul_add_c(a[4],b[1],c3,c1,c2);
954	daddu	$25,$1
955	daddu	$2,$25
956	sltu	$1,$2,$25
957	daddu	$3,$1
958	mflo	($24,$16,$9)
959	mfhi	($25,$16,$9)
960	daddu	$7,$24
961	sltu	$1,$7,$24
962	dmultu	($18,$8)		# mul_add_c(a[5],b[0],c3,c1,c2);
963	daddu	$25,$1
964	daddu	$2,$25
965	sltu	$1,$2,$25
966	daddu	$3,$1
967	mflo	($24,$18,$8)
968	mfhi	($25,$18,$8)
969	daddu	$7,$24
970	sltu	$1,$7,$24
971	 dmultu	($20,$8)		# mul_add_c(a[6],b[0],c1,c2,c3);
972	daddu	$25,$1
973	daddu	$2,$25
974	sltu	$1,$2,$25
975	daddu	$3,$1
976	sd	$7,5*8($4)	# r[5]=c3;
977
978	mflo	($24,$20,$8)
979	mfhi	($25,$20,$8)
980	daddu	$2,$24
981	sltu	$1,$2,$24
982	dmultu	($18,$9)		# mul_add_c(a[5],b[1],c1,c2,c3);
983	daddu	$25,$1
984	daddu	$3,$25
985	sltu	$7,$3,$25
986	mflo	($24,$18,$9)
987	mfhi	($25,$18,$9)
988	daddu	$2,$24
989	sltu	$1,$2,$24
990	dmultu	($16,$10)		# mul_add_c(a[4],b[2],c1,c2,c3);
991	daddu	$25,$1
992	daddu	$3,$25
993	sltu	$1,$3,$25
994	daddu	$7,$1
995	mflo	($24,$16,$10)
996	mfhi	($25,$16,$10)
997	daddu	$2,$24
998	sltu	$1,$2,$24
999	dmultu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
1000	daddu	$25,$1
1001	daddu	$3,$25
1002	sltu	$1,$3,$25
1003	daddu	$7,$1
1004	mflo	($24,$15,$11)
1005	mfhi	($25,$15,$11)
1006	daddu	$2,$24
1007	sltu	$1,$2,$24
1008	dmultu	($14,$17)		# mul_add_c(a[2],b[4],c1,c2,c3);
1009	daddu	$25,$1
1010	daddu	$3,$25
1011	sltu	$1,$3,$25
1012	daddu	$7,$1
1013	mflo	($24,$14,$17)
1014	mfhi	($25,$14,$17)
1015	daddu	$2,$24
1016	sltu	$1,$2,$24
1017	dmultu	($13,$19)		# mul_add_c(a[1],b[5],c1,c2,c3);
1018	daddu	$25,$1
1019	daddu	$3,$25
1020	sltu	$1,$3,$25
1021	daddu	$7,$1
1022	mflo	($24,$13,$19)
1023	mfhi	($25,$13,$19)
1024	daddu	$2,$24
1025	sltu	$1,$2,$24
1026	dmultu	($12,$21)		# mul_add_c(a[0],b[6],c1,c2,c3);
1027	daddu	$25,$1
1028	daddu	$3,$25
1029	sltu	$1,$3,$25
1030	daddu	$7,$1
1031	mflo	($24,$12,$21)
1032	mfhi	($25,$12,$21)
1033	daddu	$2,$24
1034	sltu	$1,$2,$24
1035	 dmultu	($12,$6)		# mul_add_c(a[0],b[7],c2,c3,c1);
1036	daddu	$25,$1
1037	daddu	$3,$25
1038	sltu	$1,$3,$25
1039	daddu	$7,$1
1040	sd	$2,6*8($4)	# r[6]=c1;
1041
1042	mflo	($24,$12,$6)
1043	mfhi	($25,$12,$6)
1044	daddu	$3,$24
1045	sltu	$1,$3,$24
1046	dmultu	($13,$21)		# mul_add_c(a[1],b[6],c2,c3,c1);
1047	daddu	$25,$1
1048	daddu	$7,$25
1049	sltu	$2,$7,$25
1050	mflo	($24,$13,$21)
1051	mfhi	($25,$13,$21)
1052	daddu	$3,$24
1053	sltu	$1,$3,$24
1054	dmultu	($14,$19)		# mul_add_c(a[2],b[5],c2,c3,c1);
1055	daddu	$25,$1
1056	daddu	$7,$25
1057	sltu	$1,$7,$25
1058	daddu	$2,$1
1059	mflo	($24,$14,$19)
1060	mfhi	($25,$14,$19)
1061	daddu	$3,$24
1062	sltu	$1,$3,$24
1063	dmultu	($15,$17)		# mul_add_c(a[3],b[4],c2,c3,c1);
1064	daddu	$25,$1
1065	daddu	$7,$25
1066	sltu	$1,$7,$25
1067	daddu	$2,$1
1068	mflo	($24,$15,$17)
1069	mfhi	($25,$15,$17)
1070	daddu	$3,$24
1071	sltu	$1,$3,$24
1072	dmultu	($16,$11)		# mul_add_c(a[4],b[3],c2,c3,c1);
1073	daddu	$25,$1
1074	daddu	$7,$25
1075	sltu	$1,$7,$25
1076	daddu	$2,$1
1077	mflo	($24,$16,$11)
1078	mfhi	($25,$16,$11)
1079	daddu	$3,$24
1080	sltu	$1,$3,$24
1081	dmultu	($18,$10)		# mul_add_c(a[5],b[2],c2,c3,c1);
1082	daddu	$25,$1
1083	daddu	$7,$25
1084	sltu	$1,$7,$25
1085	daddu	$2,$1
1086	mflo	($24,$18,$10)
1087	mfhi	($25,$18,$10)
1088	daddu	$3,$24
1089	sltu	$1,$3,$24
1090	dmultu	($20,$9)		# mul_add_c(a[6],b[1],c2,c3,c1);
1091	daddu	$25,$1
1092	daddu	$7,$25
1093	sltu	$1,$7,$25
1094	daddu	$2,$1
1095	mflo	($24,$20,$9)
1096	mfhi	($25,$20,$9)
1097	daddu	$3,$24
1098	sltu	$1,$3,$24
1099	dmultu	($5,$8)		# mul_add_c(a[7],b[0],c2,c3,c1);
1100	daddu	$25,$1
1101	daddu	$7,$25
1102	sltu	$1,$7,$25
1103	daddu	$2,$1
1104	mflo	($24,$5,$8)
1105	mfhi	($25,$5,$8)
1106	daddu	$3,$24
1107	sltu	$1,$3,$24
1108	 dmultu	($5,$9)		# mul_add_c(a[7],b[1],c3,c1,c2);
1109	daddu	$25,$1
1110	daddu	$7,$25
1111	sltu	$1,$7,$25
1112	daddu	$2,$1
1113	sd	$3,7*8($4)	# r[7]=c2;
1114
1115	mflo	($24,$5,$9)
1116	mfhi	($25,$5,$9)
1117	daddu	$7,$24
1118	sltu	$1,$7,$24
1119	dmultu	($20,$10)		# mul_add_c(a[6],b[2],c3,c1,c2);
1120	daddu	$25,$1
1121	daddu	$2,$25
1122	sltu	$3,$2,$25
1123	mflo	($24,$20,$10)
1124	mfhi	($25,$20,$10)
1125	daddu	$7,$24
1126	sltu	$1,$7,$24
1127	dmultu	($18,$11)		# mul_add_c(a[5],b[3],c3,c1,c2);
1128	daddu	$25,$1
1129	daddu	$2,$25
1130	sltu	$1,$2,$25
1131	daddu	$3,$1
1132	mflo	($24,$18,$11)
1133	mfhi	($25,$18,$11)
1134	daddu	$7,$24
1135	sltu	$1,$7,$24
1136	dmultu	($16,$17)		# mul_add_c(a[4],b[4],c3,c1,c2);
1137	daddu	$25,$1
1138	daddu	$2,$25
1139	sltu	$1,$2,$25
1140	daddu	$3,$1
1141	mflo	($24,$16,$17)
1142	mfhi	($25,$16,$17)
1143	daddu	$7,$24
1144	sltu	$1,$7,$24
1145	dmultu	($15,$19)		# mul_add_c(a[3],b[5],c3,c1,c2);
1146	daddu	$25,$1
1147	daddu	$2,$25
1148	sltu	$1,$2,$25
1149	daddu	$3,$1
1150	mflo	($24,$15,$19)
1151	mfhi	($25,$15,$19)
1152	daddu	$7,$24
1153	sltu	$1,$7,$24
1154	dmultu	($14,$21)		# mul_add_c(a[2],b[6],c3,c1,c2);
1155	daddu	$25,$1
1156	daddu	$2,$25
1157	sltu	$1,$2,$25
1158	daddu	$3,$1
1159	mflo	($24,$14,$21)
1160	mfhi	($25,$14,$21)
1161	daddu	$7,$24
1162	sltu	$1,$7,$24
1163	dmultu	($13,$6)		# mul_add_c(a[1],b[7],c3,c1,c2);
1164	daddu	$25,$1
1165	daddu	$2,$25
1166	sltu	$1,$2,$25
1167	daddu	$3,$1
1168	mflo	($24,$13,$6)
1169	mfhi	($25,$13,$6)
1170	daddu	$7,$24
1171	sltu	$1,$7,$24
1172	 dmultu	($14,$6)		# mul_add_c(a[2],b[7],c1,c2,c3);
1173	daddu	$25,$1
1174	daddu	$2,$25
1175	sltu	$1,$2,$25
1176	daddu	$3,$1
1177	sd	$7,8*8($4)	# r[8]=c3;
1178
1179	mflo	($24,$14,$6)
1180	mfhi	($25,$14,$6)
1181	daddu	$2,$24
1182	sltu	$1,$2,$24
1183	dmultu	($15,$21)		# mul_add_c(a[3],b[6],c1,c2,c3);
1184	daddu	$25,$1
1185	daddu	$3,$25
1186	sltu	$7,$3,$25
1187	mflo	($24,$15,$21)
1188	mfhi	($25,$15,$21)
1189	daddu	$2,$24
1190	sltu	$1,$2,$24
1191	dmultu	($16,$19)		# mul_add_c(a[4],b[5],c1,c2,c3);
1192	daddu	$25,$1
1193	daddu	$3,$25
1194	sltu	$1,$3,$25
1195	daddu	$7,$1
1196	mflo	($24,$16,$19)
1197	mfhi	($25,$16,$19)
1198	daddu	$2,$24
1199	sltu	$1,$2,$24
1200	dmultu	($18,$17)		# mul_add_c(a[5],b[4],c1,c2,c3);
1201	daddu	$25,$1
1202	daddu	$3,$25
1203	sltu	$1,$3,$25
1204	daddu	$7,$1
1205	mflo	($24,$18,$17)
1206	mfhi	($25,$18,$17)
1207	daddu	$2,$24
1208	sltu	$1,$2,$24
1209	dmultu	($20,$11)		# mul_add_c(a[6],b[3],c1,c2,c3);
1210	daddu	$25,$1
1211	daddu	$3,$25
1212	sltu	$1,$3,$25
1213	daddu	$7,$1
1214	mflo	($24,$20,$11)
1215	mfhi	($25,$20,$11)
1216	daddu	$2,$24
1217	sltu	$1,$2,$24
1218	dmultu	($5,$10)		# mul_add_c(a[7],b[2],c1,c2,c3);
1219	daddu	$25,$1
1220	daddu	$3,$25
1221	sltu	$1,$3,$25
1222	daddu	$7,$1
1223	mflo	($24,$5,$10)
1224	mfhi	($25,$5,$10)
1225	daddu	$2,$24
1226	sltu	$1,$2,$24
1227	 dmultu	($5,$11)		# mul_add_c(a[7],b[3],c2,c3,c1);
1228	daddu	$25,$1
1229	daddu	$3,$25
1230	sltu	$1,$3,$25
1231	daddu	$7,$1
1232	sd	$2,9*8($4)	# r[9]=c1;
1233
1234	mflo	($24,$5,$11)
1235	mfhi	($25,$5,$11)
1236	daddu	$3,$24
1237	sltu	$1,$3,$24
1238	dmultu	($20,$17)		# mul_add_c(a[6],b[4],c2,c3,c1);
1239	daddu	$25,$1
1240	daddu	$7,$25
1241	sltu	$2,$7,$25
1242	mflo	($24,$20,$17)
1243	mfhi	($25,$20,$17)
1244	daddu	$3,$24
1245	sltu	$1,$3,$24
1246	dmultu	($18,$19)		# mul_add_c(a[5],b[5],c2,c3,c1);
1247	daddu	$25,$1
1248	daddu	$7,$25
1249	sltu	$1,$7,$25
1250	daddu	$2,$1
1251	mflo	($24,$18,$19)
1252	mfhi	($25,$18,$19)
1253	daddu	$3,$24
1254	sltu	$1,$3,$24
1255	dmultu	($16,$21)		# mul_add_c(a[4],b[6],c2,c3,c1);
1256	daddu	$25,$1
1257	daddu	$7,$25
1258	sltu	$1,$7,$25
1259	daddu	$2,$1
1260	mflo	($24,$16,$21)
1261	mfhi	($25,$16,$21)
1262	daddu	$3,$24
1263	sltu	$1,$3,$24
1264	dmultu	($15,$6)		# mul_add_c(a[3],b[7],c2,c3,c1);
1265	daddu	$25,$1
1266	daddu	$7,$25
1267	sltu	$1,$7,$25
1268	daddu	$2,$1
1269	mflo	($24,$15,$6)
1270	mfhi	($25,$15,$6)
1271	daddu	$3,$24
1272	sltu	$1,$3,$24
1273	dmultu	($16,$6)		# mul_add_c(a[4],b[7],c3,c1,c2);
1274	daddu	$25,$1
1275	daddu	$7,$25
1276	sltu	$1,$7,$25
1277	daddu	$2,$1
1278	sd	$3,10*8($4)	# r[10]=c2;
1279
1280	mflo	($24,$16,$6)
1281	mfhi	($25,$16,$6)
1282	daddu	$7,$24
1283	sltu	$1,$7,$24
1284	dmultu	($18,$21)		# mul_add_c(a[5],b[6],c3,c1,c2);
1285	daddu	$25,$1
1286	daddu	$2,$25
1287	sltu	$3,$2,$25
1288	mflo	($24,$18,$21)
1289	mfhi	($25,$18,$21)
1290	daddu	$7,$24
1291	sltu	$1,$7,$24
1292	dmultu	($20,$19)		# mul_add_c(a[6],b[5],c3,c1,c2);
1293	daddu	$25,$1
1294	daddu	$2,$25
1295	sltu	$1,$2,$25
1296	daddu	$3,$1
1297	mflo	($24,$20,$19)
1298	mfhi	($25,$20,$19)
1299	daddu	$7,$24
1300	sltu	$1,$7,$24
1301	dmultu	($5,$17)		# mul_add_c(a[7],b[4],c3,c1,c2);
1302	daddu	$25,$1
1303	daddu	$2,$25
1304	sltu	$1,$2,$25
1305	daddu	$3,$1
1306	mflo	($24,$5,$17)
1307	mfhi	($25,$5,$17)
1308	daddu	$7,$24
1309	sltu	$1,$7,$24
1310	 dmultu	($5,$19)		# mul_add_c(a[7],b[5],c1,c2,c3);
1311	daddu	$25,$1
1312	daddu	$2,$25
1313	sltu	$1,$2,$25
1314	daddu	$3,$1
1315	sd	$7,11*8($4)	# r[11]=c3;
1316
1317	mflo	($24,$5,$19)
1318	mfhi	($25,$5,$19)
1319	daddu	$2,$24
1320	sltu	$1,$2,$24
1321	dmultu	($20,$21)		# mul_add_c(a[6],b[6],c1,c2,c3);
1322	daddu	$25,$1
1323	daddu	$3,$25
1324	sltu	$7,$3,$25
1325	mflo	($24,$20,$21)
1326	mfhi	($25,$20,$21)
1327	daddu	$2,$24
1328	sltu	$1,$2,$24
1329	dmultu	($18,$6)		# mul_add_c(a[5],b[7],c1,c2,c3);
1330	daddu	$25,$1
1331	daddu	$3,$25
1332	sltu	$1,$3,$25
1333	daddu	$7,$1
1334	mflo	($24,$18,$6)
1335	mfhi	($25,$18,$6)
1336	daddu	$2,$24
1337	sltu	$1,$2,$24
1338	 dmultu	($20,$6)		# mul_add_c(a[6],b[7],c2,c3,c1);
1339	daddu	$25,$1
1340	daddu	$3,$25
1341	sltu	$1,$3,$25
1342	daddu	$7,$1
1343	sd	$2,12*8($4)	# r[12]=c1;
1344
1345	mflo	($24,$20,$6)
1346	mfhi	($25,$20,$6)
1347	daddu	$3,$24
1348	sltu	$1,$3,$24
1349	dmultu	($5,$21)		# mul_add_c(a[7],b[6],c2,c3,c1);
1350	daddu	$25,$1
1351	daddu	$7,$25
1352	sltu	$2,$7,$25
1353	mflo	($24,$5,$21)
1354	mfhi	($25,$5,$21)
1355	daddu	$3,$24
1356	sltu	$1,$3,$24
1357	dmultu	($5,$6)		# mul_add_c(a[7],b[7],c3,c1,c2);
1358	daddu	$25,$1
1359	daddu	$7,$25
1360	sltu	$1,$7,$25
1361	daddu	$2,$1
1362	sd	$3,13*8($4)	# r[13]=c2;
1363
1364	mflo	($24,$5,$6)
1365	mfhi	($25,$5,$6)
1366	daddu	$7,$24
1367	sltu	$1,$7,$24
1368	daddu	$25,$1
1369	daddu	$2,$25
1370	sd	$7,14*8($4)	# r[14]=c3;
1371	sd	$2,15*8($4)	# r[15]=c1;
1372
1373	.set	noreorder
1374	ld	$21,5*8($29)
1375	ld	$20,4*8($29)
1376	ld	$19,3*8($29)
1377	ld	$18,2*8($29)
1378	ld	$17,1*8($29)
1379	ld	$16,0*8($29)
1380	jr	$31
1381	daddu $29,6*8
1382.end	bn_mul_comba8
1383
1384.align	5
1385.globl	bn_mul_comba4
1386.ent	bn_mul_comba4
1387bn_mul_comba4:
1388	.set	reorder
1389	ld	$12,0($5)
1390	ld	$8,0($6)
1391	ld	$13,8($5)
1392	ld	$14,2*8($5)
1393	dmultu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
1394	ld	$15,3*8($5)
1395	ld	$9,8($6)
1396	ld	$10,2*8($6)
1397	ld	$11,3*8($6)
1398	mflo	($2,$12,$8)
1399	mfhi	($3,$12,$8)
1400	sd	$2,0($4)
1401
1402	dmultu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
1403	mflo	($24,$12,$9)
1404	mfhi	($25,$12,$9)
1405	daddu	$3,$24
1406	sltu	$1,$3,$24
1407	dmultu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
1408	daddu	$7,$25,$1
1409	mflo	($24,$13,$8)
1410	mfhi	($25,$13,$8)
1411	daddu	$3,$24
1412	sltu	$1,$3,$24
1413	 dmultu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
1414	daddu	$25,$1
1415	daddu	$7,$25
1416	sltu	$2,$7,$25
1417	sd	$3,8($4)
1418
1419	mflo	($24,$14,$8)
1420	mfhi	($25,$14,$8)
1421	daddu	$7,$24
1422	sltu	$1,$7,$24
1423	dmultu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
1424	daddu	$25,$1
1425	daddu	$2,$25
1426	mflo	($24,$13,$9)
1427	mfhi	($25,$13,$9)
1428	daddu	$7,$24
1429	sltu	$1,$7,$24
1430	dmultu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
1431	daddu	$25,$1
1432	daddu	$2,$25
1433	sltu	$3,$2,$25
1434	mflo	($24,$12,$10)
1435	mfhi	($25,$12,$10)
1436	daddu	$7,$24
1437	sltu	$1,$7,$24
1438	 dmultu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
1439	daddu	$25,$1
1440	daddu	$2,$25
1441	sltu	$1,$2,$25
1442	daddu	$3,$1
1443	sd	$7,2*8($4)
1444
1445	mflo	($24,$12,$11)
1446	mfhi	($25,$12,$11)
1447	daddu	$2,$24
1448	sltu	$1,$2,$24
1449	dmultu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
1450	daddu	$25,$1
1451	daddu	$3,$25
1452	sltu	$7,$3,$25
1453	mflo	($24,$13,$10)
1454	mfhi	($25,$13,$10)
1455	daddu	$2,$24
1456	sltu	$1,$2,$24
1457	dmultu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
1458	daddu	$25,$1
1459	daddu	$3,$25
1460	sltu	$1,$3,$25
1461	daddu	$7,$1
1462	mflo	($24,$14,$9)
1463	mfhi	($25,$14,$9)
1464	daddu	$2,$24
1465	sltu	$1,$2,$24
1466	dmultu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
1467	daddu	$25,$1
1468	daddu	$3,$25
1469	sltu	$1,$3,$25
1470	daddu	$7,$1
1471	mflo	($24,$15,$8)
1472	mfhi	($25,$15,$8)
1473	daddu	$2,$24
1474	sltu	$1,$2,$24
1475	 dmultu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
1476	daddu	$25,$1
1477	daddu	$3,$25
1478	sltu	$1,$3,$25
1479	daddu	$7,$1
1480	sd	$2,3*8($4)
1481
1482	mflo	($24,$15,$9)
1483	mfhi	($25,$15,$9)
1484	daddu	$3,$24
1485	sltu	$1,$3,$24
1486	dmultu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
1487	daddu	$25,$1
1488	daddu	$7,$25
1489	sltu	$2,$7,$25
1490	mflo	($24,$14,$10)
1491	mfhi	($25,$14,$10)
1492	daddu	$3,$24
1493	sltu	$1,$3,$24
1494	dmultu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
1495	daddu	$25,$1
1496	daddu	$7,$25
1497	sltu	$1,$7,$25
1498	daddu	$2,$1
1499	mflo	($24,$13,$11)
1500	mfhi	($25,$13,$11)
1501	daddu	$3,$24
1502	sltu	$1,$3,$24
1503	 dmultu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
1504	daddu	$25,$1
1505	daddu	$7,$25
1506	sltu	$1,$7,$25
1507	daddu	$2,$1
1508	sd	$3,4*8($4)
1509
1510	mflo	($24,$14,$11)
1511	mfhi	($25,$14,$11)
1512	daddu	$7,$24
1513	sltu	$1,$7,$24
1514	dmultu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
1515	daddu	$25,$1
1516	daddu	$2,$25
1517	sltu	$3,$2,$25
1518	mflo	($24,$15,$10)
1519	mfhi	($25,$15,$10)
1520	daddu	$7,$24
1521	sltu	$1,$7,$24
1522	 dmultu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
1523	daddu	$25,$1
1524	daddu	$2,$25
1525	sltu	$1,$2,$25
1526	daddu	$3,$1
1527	sd	$7,5*8($4)
1528
1529	mflo	($24,$15,$11)
1530	mfhi	($25,$15,$11)
1531	daddu	$2,$24
1532	sltu	$1,$2,$24
1533	daddu	$25,$1
1534	daddu	$3,$25
1535	sd	$2,6*8($4)
1536	sd	$3,7*8($4)
1537
1538	.set	noreorder
1539	jr	$31
1540	nop
1541.end	bn_mul_comba4
1542
1543.align	5
1544.globl	bn_sqr_comba8
1545.ent	bn_sqr_comba8
1546bn_sqr_comba8:
1547	.set	reorder
1548	ld	$12,0($5)
1549	ld	$13,8($5)
1550	ld	$14,2*8($5)
1551	ld	$15,3*8($5)
1552
1553	dmultu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
1554	ld	$8,4*8($5)
1555	ld	$9,5*8($5)
1556	ld	$10,6*8($5)
1557	ld	$11,7*8($5)
1558	mflo	($2,$12,$12)
1559	mfhi	($3,$12,$12)
1560	sd	$2,0($4)
1561
1562	dmultu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
1563	mflo	($24,$12,$13)
1564	mfhi	($25,$12,$13)
1565	slt	$2,$25,$0
1566	dsll	$25,1
1567	 dmultu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
1568	slt	$6,$24,$0
1569	daddu	$25,$6
1570	dsll	$24,1
1571	daddu	$3,$24
1572	sltu	$1,$3,$24
1573	daddu	$7,$25,$1
1574	sd	$3,8($4)
1575	mflo	($24,$14,$12)
1576	mfhi	($25,$14,$12)
1577	daddu	$7,$24
1578	sltu	$1,$7,$24
1579	 dmultu	($13,$13)		# forward multiplication
1580	daddu	$7,$24
1581	daddu	$1,$25
1582	sltu	$24,$7,$24
1583	daddu	$2,$1
1584	daddu	$25,$24
1585	sltu	$3,$2,$1
1586	daddu	$2,$25
1587	sltu	$25,$2,$25
1588	daddu	$3,$25
1589	mflo	($24,$13,$13)
1590	mfhi	($25,$13,$13)
1591	daddu	$7,$24
1592	sltu	$1,$7,$24
1593	 dmultu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
1594	daddu	$25,$1
1595	daddu	$2,$25
1596	sltu	$1,$2,$25
1597	daddu	$3,$1
1598	sd	$7,2*8($4)
1599	mflo	($24,$12,$15)
1600	mfhi	($25,$12,$15)
1601	daddu	$2,$24
1602	sltu	$1,$2,$24
1603	 dmultu	($13,$14)		# forward multiplication
1604	daddu	$2,$24
1605	daddu	$1,$25
1606	sltu	$24,$2,$24
1607	daddu	$3,$1
1608	daddu	$25,$24
1609	sltu	$7,$3,$1
1610	daddu	$3,$25
1611	sltu	$25,$3,$25
1612	daddu	$7,$25
1613	mflo	($24,$13,$14)
1614	mfhi	($25,$13,$14)
1615	daddu	$2,$24
1616	sltu	$1,$2,$24
1617	 dmultu	($8,$12)		# forward multiplication
1618	daddu	$2,$24
1619	daddu	$1,$25
1620	sltu	$24,$2,$24
1621	daddu	$3,$1
1622	daddu	$25,$24
1623	sltu	$1,$3,$1
1624	daddu	$3,$25
1625	daddu	$7,$1
1626	sltu	$25,$3,$25
1627	daddu	$7,$25
1628	mflo	($24,$8,$12)
1629	mfhi	($25,$8,$12)
1630	sd	$2,3*8($4)
1631	daddu	$3,$24
1632	sltu	$1,$3,$24
1633	 dmultu	($15,$13)		# forward multiplication
1634	daddu	$3,$24
1635	daddu	$1,$25
1636	sltu	$24,$3,$24
1637	daddu	$7,$1
1638	daddu	$25,$24
1639	sltu	$2,$7,$1
1640	daddu	$7,$25
1641	sltu	$25,$7,$25
1642	daddu	$2,$25
1643	mflo	($24,$15,$13)
1644	mfhi	($25,$15,$13)
1645	daddu	$3,$24
1646	sltu	$1,$3,$24
1647	 dmultu	($14,$14)		# forward multiplication
1648	daddu	$3,$24
1649	daddu	$1,$25
1650	sltu	$24,$3,$24
1651	daddu	$7,$1
1652	daddu	$25,$24
1653	sltu	$1,$7,$1
1654	daddu	$7,$25
1655	daddu	$2,$1
1656	sltu	$25,$7,$25
1657	daddu	$2,$25
1658	mflo	($24,$14,$14)
1659	mfhi	($25,$14,$14)
1660	daddu	$3,$24
1661	sltu	$1,$3,$24
1662	 dmultu	($12,$9)		# mul_add_c2(a[0],b[5],c3,c1,c2);
1663	daddu	$25,$1
1664	daddu	$7,$25
1665	sltu	$1,$7,$25
1666	daddu	$2,$1
1667	sd	$3,4*8($4)
1668	mflo	($24,$12,$9)
1669	mfhi	($25,$12,$9)
1670	daddu	$7,$24
1671	sltu	$1,$7,$24
1672	 dmultu	($13,$8)		# forward multiplication
1673	daddu	$7,$24
1674	daddu	$1,$25
1675	sltu	$24,$7,$24
1676	daddu	$2,$1
1677	daddu	$25,$24
1678	sltu	$3,$2,$1
1679	daddu	$2,$25
1680	sltu	$25,$2,$25
1681	daddu	$3,$25
1682	mflo	($24,$13,$8)
1683	mfhi	($25,$13,$8)
1684	daddu	$7,$24
1685	sltu	$1,$7,$24
1686	 dmultu	($14,$15)		# forward multiplication
1687	daddu	$7,$24
1688	daddu	$1,$25
1689	sltu	$24,$7,$24
1690	daddu	$2,$1
1691	daddu	$25,$24
1692	sltu	$1,$2,$1
1693	daddu	$2,$25
1694	daddu	$3,$1
1695	sltu	$25,$2,$25
1696	daddu	$3,$25
1697	mflo	($24,$14,$15)
1698	mfhi	($25,$14,$15)
1699	daddu	$7,$24
1700	sltu	$1,$7,$24
1701	 dmultu	($10,$12)		# forward multiplication
1702	daddu	$7,$24
1703	daddu	$1,$25
1704	sltu	$24,$7,$24
1705	daddu	$2,$1
1706	daddu	$25,$24
1707	sltu	$1,$2,$1
1708	daddu	$2,$25
1709	daddu	$3,$1
1710	sltu	$25,$2,$25
1711	daddu	$3,$25
1712	mflo	($24,$10,$12)
1713	mfhi	($25,$10,$12)
1714	sd	$7,5*8($4)
1715	daddu	$2,$24
1716	sltu	$1,$2,$24
1717	 dmultu	($9,$13)		# forward multiplication
1718	daddu	$2,$24
1719	daddu	$1,$25
1720	sltu	$24,$2,$24
1721	daddu	$3,$1
1722	daddu	$25,$24
1723	sltu	$7,$3,$1
1724	daddu	$3,$25
1725	sltu	$25,$3,$25
1726	daddu	$7,$25
1727	mflo	($24,$9,$13)
1728	mfhi	($25,$9,$13)
1729	daddu	$2,$24
1730	sltu	$1,$2,$24
1731	 dmultu	($8,$14)		# forward multiplication
1732	daddu	$2,$24
1733	daddu	$1,$25
1734	sltu	$24,$2,$24
1735	daddu	$3,$1
1736	daddu	$25,$24
1737	sltu	$1,$3,$1
1738	daddu	$3,$25
1739	daddu	$7,$1
1740	sltu	$25,$3,$25
1741	daddu	$7,$25
1742	mflo	($24,$8,$14)
1743	mfhi	($25,$8,$14)
1744	daddu	$2,$24
1745	sltu	$1,$2,$24
1746	 dmultu	($15,$15)		# forward multiplication
1747	daddu	$2,$24
1748	daddu	$1,$25
1749	sltu	$24,$2,$24
1750	daddu	$3,$1
1751	daddu	$25,$24
1752	sltu	$1,$3,$1
1753	daddu	$3,$25
1754	daddu	$7,$1
1755	sltu	$25,$3,$25
1756	daddu	$7,$25
1757	mflo	($24,$15,$15)
1758	mfhi	($25,$15,$15)
1759	daddu	$2,$24
1760	sltu	$1,$2,$24
1761	 dmultu	($12,$11)		# mul_add_c2(a[0],b[7],c2,c3,c1);
1762	daddu	$25,$1
1763	daddu	$3,$25
1764	sltu	$1,$3,$25
1765	daddu	$7,$1
1766	sd	$2,6*8($4)
1767	mflo	($24,$12,$11)
1768	mfhi	($25,$12,$11)
1769	daddu	$3,$24
1770	sltu	$1,$3,$24
1771	 dmultu	($13,$10)		# forward multiplication
1772	daddu	$3,$24
1773	daddu	$1,$25
1774	sltu	$24,$3,$24
1775	daddu	$7,$1
1776	daddu	$25,$24
1777	sltu	$2,$7,$1
1778	daddu	$7,$25
1779	sltu	$25,$7,$25
1780	daddu	$2,$25
1781	mflo	($24,$13,$10)
1782	mfhi	($25,$13,$10)
1783	daddu	$3,$24
1784	sltu	$1,$3,$24
1785	 dmultu	($14,$9)		# forward multiplication
1786	daddu	$3,$24
1787	daddu	$1,$25
1788	sltu	$24,$3,$24
1789	daddu	$7,$1
1790	daddu	$25,$24
1791	sltu	$1,$7,$1
1792	daddu	$7,$25
1793	daddu	$2,$1
1794	sltu	$25,$7,$25
1795	daddu	$2,$25
1796	mflo	($24,$14,$9)
1797	mfhi	($25,$14,$9)
1798	daddu	$3,$24
1799	sltu	$1,$3,$24
1800	 dmultu	($15,$8)		# forward multiplication
1801	daddu	$3,$24
1802	daddu	$1,$25
1803	sltu	$24,$3,$24
1804	daddu	$7,$1
1805	daddu	$25,$24
1806	sltu	$1,$7,$1
1807	daddu	$7,$25
1808	daddu	$2,$1
1809	sltu	$25,$7,$25
1810	daddu	$2,$25
1811	mflo	($24,$15,$8)
1812	mfhi	($25,$15,$8)
1813	daddu	$3,$24
1814	sltu	$1,$3,$24
1815	 dmultu	($11,$13)		# forward multiplication
1816	daddu	$3,$24
1817	daddu	$1,$25
1818	sltu	$24,$3,$24
1819	daddu	$7,$1
1820	daddu	$25,$24
1821	sltu	$1,$7,$1
1822	daddu	$7,$25
1823	daddu	$2,$1
1824	sltu	$25,$7,$25
1825	daddu	$2,$25
1826	mflo	($24,$11,$13)
1827	mfhi	($25,$11,$13)
1828	sd	$3,7*8($4)
1829	daddu	$7,$24
1830	sltu	$1,$7,$24
1831	 dmultu	($10,$14)		# forward multiplication
1832	daddu	$7,$24
1833	daddu	$1,$25
1834	sltu	$24,$7,$24
1835	daddu	$2,$1
1836	daddu	$25,$24
1837	sltu	$3,$2,$1
1838	daddu	$2,$25
1839	sltu	$25,$2,$25
1840	daddu	$3,$25
1841	mflo	($24,$10,$14)
1842	mfhi	($25,$10,$14)
1843	daddu	$7,$24
1844	sltu	$1,$7,$24
1845	 dmultu	($9,$15)		# forward multiplication
1846	daddu	$7,$24
1847	daddu	$1,$25
1848	sltu	$24,$7,$24
1849	daddu	$2,$1
1850	daddu	$25,$24
1851	sltu	$1,$2,$1
1852	daddu	$2,$25
1853	daddu	$3,$1
1854	sltu	$25,$2,$25
1855	daddu	$3,$25
1856	mflo	($24,$9,$15)
1857	mfhi	($25,$9,$15)
1858	daddu	$7,$24
1859	sltu	$1,$7,$24
1860	 dmultu	($8,$8)		# forward multiplication
1861	daddu	$7,$24
1862	daddu	$1,$25
1863	sltu	$24,$7,$24
1864	daddu	$2,$1
1865	daddu	$25,$24
1866	sltu	$1,$2,$1
1867	daddu	$2,$25
1868	daddu	$3,$1
1869	sltu	$25,$2,$25
1870	daddu	$3,$25
1871	mflo	($24,$8,$8)
1872	mfhi	($25,$8,$8)
1873	daddu	$7,$24
1874	sltu	$1,$7,$24
1875	 dmultu	($14,$11)		# mul_add_c2(a[2],b[7],c1,c2,c3);
1876	daddu	$25,$1
1877	daddu	$2,$25
1878	sltu	$1,$2,$25
1879	daddu	$3,$1
1880	sd	$7,8*8($4)
1881	mflo	($24,$14,$11)
1882	mfhi	($25,$14,$11)
1883	daddu	$2,$24
1884	sltu	$1,$2,$24
1885	 dmultu	($15,$10)		# forward multiplication
1886	daddu	$2,$24
1887	daddu	$1,$25
1888	sltu	$24,$2,$24
1889	daddu	$3,$1
1890	daddu	$25,$24
1891	sltu	$7,$3,$1
1892	daddu	$3,$25
1893	sltu	$25,$3,$25
1894	daddu	$7,$25
1895	mflo	($24,$15,$10)
1896	mfhi	($25,$15,$10)
1897	daddu	$2,$24
1898	sltu	$1,$2,$24
1899	 dmultu	($8,$9)		# forward multiplication
1900	daddu	$2,$24
1901	daddu	$1,$25
1902	sltu	$24,$2,$24
1903	daddu	$3,$1
1904	daddu	$25,$24
1905	sltu	$1,$3,$1
1906	daddu	$3,$25
1907	daddu	$7,$1
1908	sltu	$25,$3,$25
1909	daddu	$7,$25
1910	mflo	($24,$8,$9)
1911	mfhi	($25,$8,$9)
1912	daddu	$2,$24
1913	sltu	$1,$2,$24
1914	 dmultu	($11,$15)		# forward multiplication
1915	daddu	$2,$24
1916	daddu	$1,$25
1917	sltu	$24,$2,$24
1918	daddu	$3,$1
1919	daddu	$25,$24
1920	sltu	$1,$3,$1
1921	daddu	$3,$25
1922	daddu	$7,$1
1923	sltu	$25,$3,$25
1924	daddu	$7,$25
1925	mflo	($24,$11,$15)
1926	mfhi	($25,$11,$15)
1927	sd	$2,9*8($4)
1928	daddu	$3,$24
1929	sltu	$1,$3,$24
1930	 dmultu	($10,$8)		# forward multiplication
1931	daddu	$3,$24
1932	daddu	$1,$25
1933	sltu	$24,$3,$24
1934	daddu	$7,$1
1935	daddu	$25,$24
1936	sltu	$2,$7,$1
1937	daddu	$7,$25
1938	sltu	$25,$7,$25
1939	daddu	$2,$25
1940	mflo	($24,$10,$8)
1941	mfhi	($25,$10,$8)
1942	daddu	$3,$24
1943	sltu	$1,$3,$24
1944	 dmultu	($9,$9)		# forward multiplication
1945	daddu	$3,$24
1946	daddu	$1,$25
1947	sltu	$24,$3,$24
1948	daddu	$7,$1
1949	daddu	$25,$24
1950	sltu	$1,$7,$1
1951	daddu	$7,$25
1952	daddu	$2,$1
1953	sltu	$25,$7,$25
1954	daddu	$2,$25
1955	mflo	($24,$9,$9)
1956	mfhi	($25,$9,$9)
1957	daddu	$3,$24
1958	sltu	$1,$3,$24
1959	 dmultu	($8,$11)		# mul_add_c2(a[4],b[7],c3,c1,c2);
1960	daddu	$25,$1
1961	daddu	$7,$25
1962	sltu	$1,$7,$25
1963	daddu	$2,$1
1964	sd	$3,10*8($4)
1965	mflo	($24,$8,$11)
1966	mfhi	($25,$8,$11)
1967	daddu	$7,$24
1968	sltu	$1,$7,$24
1969	 dmultu	($9,$10)		# forward multiplication
1970	daddu	$7,$24
1971	daddu	$1,$25
1972	sltu	$24,$7,$24
1973	daddu	$2,$1
1974	daddu	$25,$24
1975	sltu	$3,$2,$1
1976	daddu	$2,$25
1977	sltu	$25,$2,$25
1978	daddu	$3,$25
1979	mflo	($24,$9,$10)
1980	mfhi	($25,$9,$10)
1981	daddu	$7,$24
1982	sltu	$1,$7,$24
1983	 dmultu	($11,$9)		# forward multiplication
1984	daddu	$7,$24
1985	daddu	$1,$25
1986	sltu	$24,$7,$24
1987	daddu	$2,$1
1988	daddu	$25,$24
1989	sltu	$1,$2,$1
1990	daddu	$2,$25
1991	daddu	$3,$1
1992	sltu	$25,$2,$25
1993	daddu	$3,$25
1994	mflo	($24,$11,$9)
1995	mfhi	($25,$11,$9)
1996	sd	$7,11*8($4)
1997	daddu	$2,$24
1998	sltu	$1,$2,$24
1999	 dmultu	($10,$10)		# forward multiplication
2000	daddu	$2,$24
2001	daddu	$1,$25
2002	sltu	$24,$2,$24
2003	daddu	$3,$1
2004	daddu	$25,$24
2005	sltu	$7,$3,$1
2006	daddu	$3,$25
2007	sltu	$25,$3,$25
2008	daddu	$7,$25
2009	mflo	($24,$10,$10)
2010	mfhi	($25,$10,$10)
2011	daddu	$2,$24
2012	sltu	$1,$2,$24
2013	 dmultu	($10,$11)		# mul_add_c2(a[6],b[7],c2,c3,c1);
2014	daddu	$25,$1
2015	daddu	$3,$25
2016	sltu	$1,$3,$25
2017	daddu	$7,$1
2018	sd	$2,12*8($4)
2019	mflo	($24,$10,$11)
2020	mfhi	($25,$10,$11)
2021	daddu	$3,$24
2022	sltu	$1,$3,$24
2023	 dmultu	($11,$11)		# forward multiplication
2024	daddu	$3,$24
2025	daddu	$1,$25
2026	sltu	$24,$3,$24
2027	daddu	$7,$1
2028	daddu	$25,$24
2029	sltu	$2,$7,$1
2030	daddu	$7,$25
2031	sltu	$25,$7,$25
2032	daddu	$2,$25
2033	mflo	($24,$11,$11)
2034	mfhi	($25,$11,$11)
2035	sd	$3,13*8($4)
2036
2037	daddu	$7,$24
2038	sltu	$1,$7,$24
2039	daddu	$25,$1
2040	daddu	$2,$25
2041	sd	$7,14*8($4)
2042	sd	$2,15*8($4)
2043
2044	.set	noreorder
2045	jr	$31
2046	nop
2047.end	bn_sqr_comba8
2048
2049.align	5
2050.globl	bn_sqr_comba4
2051.ent	bn_sqr_comba4
2052bn_sqr_comba4:
2053	.set	reorder
2054	ld	$12,0($5)
2055	ld	$13,8($5)
2056	dmultu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
2057	ld	$14,2*8($5)
2058	ld	$15,3*8($5)
2059	mflo	($2,$12,$12)
2060	mfhi	($3,$12,$12)
2061	sd	$2,0($4)
2062
2063	dmultu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
2064	mflo	($24,$12,$13)
2065	mfhi	($25,$12,$13)
2066	slt	$2,$25,$0
2067	dsll	$25,1
2068	 dmultu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
2069	slt	$6,$24,$0
2070	daddu	$25,$6
2071	dsll	$24,1
2072	daddu	$3,$24
2073	sltu	$1,$3,$24
2074	daddu	$7,$25,$1
2075	sd	$3,8($4)
2076	mflo	($24,$14,$12)
2077	mfhi	($25,$14,$12)
2078	daddu	$7,$24
2079	sltu	$1,$7,$24
2080	 dmultu	($13,$13)		# forward multiplication
2081	daddu	$7,$24
2082	daddu	$1,$25
2083	sltu	$24,$7,$24
2084	daddu	$2,$1
2085	daddu	$25,$24
2086	sltu	$3,$2,$1
2087	daddu	$2,$25
2088	sltu	$25,$2,$25
2089	daddu	$3,$25
2090	mflo	($24,$13,$13)
2091	mfhi	($25,$13,$13)
2092	daddu	$7,$24
2093	sltu	$1,$7,$24
2094	 dmultu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
2095	daddu	$25,$1
2096	daddu	$2,$25
2097	sltu	$1,$2,$25
2098	daddu	$3,$1
2099	sd	$7,2*8($4)
2100	mflo	($24,$12,$15)
2101	mfhi	($25,$12,$15)
2102	daddu	$2,$24
2103	sltu	$1,$2,$24
2104	 dmultu	($13,$14)		# forward multiplication
2105	daddu	$2,$24
2106	daddu	$1,$25
2107	sltu	$24,$2,$24
2108	daddu	$3,$1
2109	daddu	$25,$24
2110	sltu	$7,$3,$1
2111	daddu	$3,$25
2112	sltu	$25,$3,$25
2113	daddu	$7,$25
2114	mflo	($24,$13,$14)
2115	mfhi	($25,$13,$14)
2116	daddu	$2,$24
2117	sltu	$1,$2,$24
2118	 dmultu	($15,$13)		# forward multiplication
2119	daddu	$2,$24
2120	daddu	$1,$25
2121	sltu	$24,$2,$24
2122	daddu	$3,$1
2123	daddu	$25,$24
2124	sltu	$1,$3,$1
2125	daddu	$3,$25
2126	daddu	$7,$1
2127	sltu	$25,$3,$25
2128	daddu	$7,$25
2129	mflo	($24,$15,$13)
2130	mfhi	($25,$15,$13)
2131	sd	$2,3*8($4)
2132	daddu	$3,$24
2133	sltu	$1,$3,$24
2134	 dmultu	($14,$14)		# forward multiplication
2135	daddu	$3,$24
2136	daddu	$1,$25
2137	sltu	$24,$3,$24
2138	daddu	$7,$1
2139	daddu	$25,$24
2140	sltu	$2,$7,$1
2141	daddu	$7,$25
2142	sltu	$25,$7,$25
2143	daddu	$2,$25
2144	mflo	($24,$14,$14)
2145	mfhi	($25,$14,$14)
2146	daddu	$3,$24
2147	sltu	$1,$3,$24
2148	 dmultu	($14,$15)		# mul_add_c2(a[2],b[3],c3,c1,c2);
2149	daddu	$25,$1
2150	daddu	$7,$25
2151	sltu	$1,$7,$25
2152	daddu	$2,$1
2153	sd	$3,4*8($4)
2154	mflo	($24,$14,$15)
2155	mfhi	($25,$14,$15)
2156	daddu	$7,$24
2157	sltu	$1,$7,$24
2158	 dmultu	($15,$15)		# forward multiplication
2159	daddu	$7,$24
2160	daddu	$1,$25
2161	sltu	$24,$7,$24
2162	daddu	$2,$1
2163	daddu	$25,$24
2164	sltu	$3,$2,$1
2165	daddu	$2,$25
2166	sltu	$25,$2,$25
2167	daddu	$3,$25
2168	mflo	($24,$15,$15)
2169	mfhi	($25,$15,$15)
2170	sd	$7,5*8($4)
2171
2172	daddu	$2,$24
2173	sltu	$1,$2,$24
2174	daddu	$25,$1
2175	daddu	$3,$25
2176	sd	$2,6*8($4)
2177	sd	$3,7*8($4)
2178
2179	.set	noreorder
2180	jr	$31
2181	nop
2182.end	bn_sqr_comba4
2183