xref: /plan9-contrib/sys/src/libmp/power/mpvecdigmuladd.s (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1#define	BDNZ	BC	16,0,
2#define	BDNE	BC	0,2,
3
4/*
5 *	mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p)
6 *
7 *	p += b*m
8 *
9 *	each step looks like:
10 *		hi,lo = m*b[i]
11 *		lo += oldhi + carry
12 *		hi += carry
13 *		p[i] += lo
14 *		oldhi = hi
15 *
16 *	the registers are:
17 *		b = R3
18 *		n = R4
19 *		m = R5
20 *		p = R6
21 *		i = R7
22 *		hi = R8		- constrained by hardware
23 *		lo = R9		- constrained by hardware
24 *		oldhi = R10
25 *		tmp = R11
26 *
27 */
28TEXT	mpvecdigmuladd(SB),$0
29
30	MOVW	n+4(FP),R4
31	MOVW	m+8(FP),R5
32	MOVW	p+12(FP),R6
33	SUB	$4, R3		/* pre decrement for MOVWU's */
34	SUB	$4, R6		/* pre decrement for MOVWU's */
35
36	MOVW	R0, R10
37	MOVW	R0, XER
38	MOVW	R4, CTR
39_muladdloop:
40	MOVWU	4(R3),R9	/* lo = b[i] */
41	MOVW	4(R6),R11	/* tmp = p[i] */
42	MULHWU	R9,R5,R8	/* hi = (b[i] * m)>>32 */
43	MULLW	R9,R5,R9	/* lo = b[i] * m */
44	ADDC	R10,R9		/* lo += oldhi */
45	ADDE	R0,R8		/* hi += carry */
46	ADDC	R9,R11		/* tmp += lo */
47	ADDE	R0,R8		/* hi += carry */
48	MOVWU	R11,4(R6)	/* p[i] = tmp */
49	MOVW	R8,R10		/* oldhi = hi */
50	BDNZ	_muladdloop
51
52	MOVW	4(R6),R11	/* tmp = p[i] */
53	ADDC	R10,R11
54	MOVWU	R11,4(R6)	/* p[i] = tmp */
55
56	RETURN
57