xref: /plan9/sys/src/libmp/mips/mpvecdigmuladd.s (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1*7dd7cddfSDavid du Colombier/*
2*7dd7cddfSDavid du Colombier *	mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p)
3*7dd7cddfSDavid du Colombier *
4*7dd7cddfSDavid du Colombier *	p += b*m
5*7dd7cddfSDavid du Colombier *
6*7dd7cddfSDavid du Colombier *	each step looks like:
7*7dd7cddfSDavid du Colombier *		hi,lo = m*b[i]
8*7dd7cddfSDavid du Colombier *		lo += oldhi + carry
9*7dd7cddfSDavid du Colombier *		hi += carry
10*7dd7cddfSDavid du Colombier *		p[i] += lo
11*7dd7cddfSDavid du Colombier *		oldhi = hi
12*7dd7cddfSDavid du Colombier *
13*7dd7cddfSDavid du Colombier *	the registers are:
14*7dd7cddfSDavid du Colombier *		b = R1
15*7dd7cddfSDavid du Colombier *		n = R4
16*7dd7cddfSDavid du Colombier *		m = R5
17*7dd7cddfSDavid du Colombier *		p = R6
18*7dd7cddfSDavid du Colombier *		i = R7
19*7dd7cddfSDavid du Colombier *		hi = R8		- constrained by hardware
20*7dd7cddfSDavid du Colombier *		lo = R9		- constrained by hardware
21*7dd7cddfSDavid du Colombier *		oldhi = R10
22*7dd7cddfSDavid du Colombier *		tmp = R11
23*7dd7cddfSDavid du Colombier *
24*7dd7cddfSDavid du Colombier */
25*7dd7cddfSDavid du ColombierTEXT	mpvecdigmuladd(SB),$0
26*7dd7cddfSDavid du Colombier
27*7dd7cddfSDavid du Colombier	MOVW	n+4(FP),R4
28*7dd7cddfSDavid du Colombier	MOVW	m+8(FP),R5
29*7dd7cddfSDavid du Colombier	MOVW	p+12(FP),R6
30*7dd7cddfSDavid du Colombier
31*7dd7cddfSDavid du Colombier
32*7dd7cddfSDavid du Colombier	MOVW	R0, R10		/* oldhi = 0 */
33*7dd7cddfSDavid du Colombier	BEQ	R6, _muladd1
34*7dd7cddfSDavid du Colombier_muladdloop:
35*7dd7cddfSDavid du Colombier	MOVW	0(R1), R9	/* lo = b[i] */
36*7dd7cddfSDavid du Colombier	ADDU	$4, R1
37*7dd7cddfSDavid du Colombier	MOVW	0(R6), R11	/* tmp = p[i] */
38*7dd7cddfSDavid du Colombier	MULU	R9, R5
39*7dd7cddfSDavid du Colombier	MOVW	HI, R8		/* hi = (b[i] * m)>>32 */
40*7dd7cddfSDavid du Colombier	MOVW	LO, R9		/* lo = b[i] * m */
41*7dd7cddfSDavid du Colombier	ADDU	R10, R9		/* lo += oldhi */
42*7dd7cddfSDavid du Colombier	SGTU	R10, R9, R2
43*7dd7cddfSDavid du Colombier	ADDU	R2, R8		/* hi += carry */
44*7dd7cddfSDavid du Colombier	ADDU	R9, R11		/* tmp += lo */
45*7dd7cddfSDavid du Colombier	SGTU	R9, R11, R2
46*7dd7cddfSDavid du Colombier	ADDU	R2, R8		/* hi += carry */
47*7dd7cddfSDavid du Colombier	MOVW	R11, 0(R6)	/* p[i] = tmp */
48*7dd7cddfSDavid du Colombier	ADDU	$4, R6
49*7dd7cddfSDavid du Colombier	MOVW	R8, R10		/* oldhi = hi */
50*7dd7cddfSDavid du Colombier	SUBU	$1, R4
51*7dd7cddfSDavid du Colombier	BNE	R4, _muladdloop
52*7dd7cddfSDavid du Colombier
53*7dd7cddfSDavid du Colombier_muladd1:
54*7dd7cddfSDavid du Colombier	MOVW	0(R6), R11	/* tmp = p[i] */
55*7dd7cddfSDavid du Colombier	ADDU	R10, R11	/* tmp += oldhi */
56*7dd7cddfSDavid du Colombier	MOVW	R11, 0(R6)	/* p[i] = tmp */
57*7dd7cddfSDavid du Colombier
58*7dd7cddfSDavid du Colombier	RET
59