xref: /plan9/sys/src/libmp/mips/mpvecdigmuladd.s (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1/*
2 *	mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p)
3 *
4 *	p += b*m
5 *
6 *	each step looks like:
7 *		hi,lo = m*b[i]
8 *		lo += oldhi + carry
9 *		hi += carry
10 *		p[i] += lo
11 *		oldhi = hi
12 *
13 *	the registers are:
14 *		b = R1
15 *		n = R4
16 *		m = R5
17 *		p = R6
18 *		i = R7
19 *		hi = R8		- constrained by hardware
20 *		lo = R9		- constrained by hardware
21 *		oldhi = R10
22 *		tmp = R11
23 *
24 */
25TEXT	mpvecdigmuladd(SB),$0
26
27	MOVW	n+4(FP),R4
28	MOVW	m+8(FP),R5
29	MOVW	p+12(FP),R6
30
31
32	MOVW	R0, R10		/* oldhi = 0 */
33	BEQ	R6, _muladd1
34_muladdloop:
35	MOVW	0(R1), R9	/* lo = b[i] */
36	ADDU	$4, R1
37	MOVW	0(R6), R11	/* tmp = p[i] */
38	MULU	R9, R5
39	MOVW	HI, R8		/* hi = (b[i] * m)>>32 */
40	MOVW	LO, R9		/* lo = b[i] * m */
41	ADDU	R10, R9		/* lo += oldhi */
42	SGTU	R10, R9, R2
43	ADDU	R2, R8		/* hi += carry */
44	ADDU	R9, R11		/* tmp += lo */
45	SGTU	R9, R11, R2
46	ADDU	R2, R8		/* hi += carry */
47	MOVW	R11, 0(R6)	/* p[i] = tmp */
48	ADDU	$4, R6
49	MOVW	R8, R10		/* oldhi = hi */
50	SUBU	$1, R4
51	BNE	R4, _muladdloop
52
53_muladd1:
54	MOVW	0(R6), R11	/* tmp = p[i] */
55	ADDU	R10, R11	/* tmp += oldhi */
56	MOVW	R11, 0(R6)	/* p[i] = tmp */
57
58	RET
59