xref: /plan9-contrib/sys/src/libmp/amd64/mpvecdigmuladd.s (revision 272efad760864ee41cfe633b56aea9b4f5cf3ae7)
1/*
2 *	mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p)
3 *
4 *	p += b*m
5 *
6 *	each step look like:
7 *		hi,lo = m*b[i]
8 *		lo += oldhi + carry
9 *		hi += carry
10 *		p[i] += lo
11 *		oldhi = hi
12 *
13 *	the registers are:
14 *		hi = DX		- constrained by hardware
15 *		lo = AX		- constrained by hardware
16 *		b+n = SI	- can't be BP
17 *		p+n = DI	- can't be BP
18 *		i-n = BP
19 *		m = BX
20 *		oldhi = CX
21 *
22 */
23TEXT	mpvecdigmuladd(SB),$0
24
25/*	MOVQ	b+0(FP),SI	*/
26	MOVQ	RARG,SI
27	MOVL	n+8(FP),CX
28	MOVL	m+16(FP),BX
29	MOVQ	p+24(FP),DI
30	MOVL	CX,BP
31	NEGQ	BP		/* BP = -n */
32	SHLL	$2,CX
33	ADDQ	CX,SI		/* SI = b + n */
34	ADDQ	CX,DI		/* DI = p + n */
35	XORL	CX,CX
36_muladdloop:
37	MOVL	(SI)(BP*4),AX	/* lo = b[i] */
38	MULL	BX		/* hi, lo = b[i] * m */
39	ADDL	CX,AX		/* lo += oldhi */
40	JCC	_muladdnocarry1
41	INCL	DX		/* hi += carry */
42_muladdnocarry1:
43	ADDL	AX,(DI)(BP*4)	/* p[i] += lo */
44	JCC	_muladdnocarry2
45	INCL	DX		/* hi += carry */
46_muladdnocarry2:
47	MOVL	DX,CX		/* oldhi = hi */
48	INCQ	BP		/* i++ */
49	JNZ	_muladdloop
50	XORL	AX,AX
51	ADDL	CX,(DI)(BP*4)	/* p[n] + oldhi */
52	ADCL	AX,AX		/* return carry out of p[n] */
53	RET
54