xref: /plan9/sys/src/libmp/power/mpvecdigmulsub.s (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1#define	BDNZ	BC	16,0,
2#define	BDNE	BC	0,2,
3#define	BLT	BC	0xC,0,
4
5/*
6 *	mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p)
7 *
8 *	p -= b*m
9 *
10 *	each step looks like:
11 *		hi,lo = m*b[i]
12 *		lo += oldhi + carry
13 *		hi += carry
14 *		p[i] += lo
15 *		oldhi = hi
16 *
17 *	the registers are:
18 *		b = R3
19 *		n = R4
20 *		m = R5
21 *		p = R6
22 *		i = R7
23 *		hi = R8		- constrained by hardware
24 *		lo = R9		- constrained by hardware
25 *		oldhi = R10
26 *		tmp = R11
27 *		borrow = R12
28 *
29 */
30TEXT	mpvecdigmulsub(SB),$0
31
32	MOVW	n+4(FP),R10
33	MOVW	R10,CTR
34	MOVW	m+8(FP),R5
35	MOVW	p+12(FP),R6
36	SUB	$4, R3		/* pre decrement for MOVWU's */
37	SUBC	$4, R6		/* pre decrement for MOVWU's and set carry */
38	MOVW	XER,R12
39
40	MOVW	R0, R10
41
42_mulsubloop:
43	MOVWU	4(R3),R9	/* lo = b[i] */
44	MOVW	4(R6),R11	/* tmp = p[i] */
45	MULHWU	R9,R5,R8	/* hi = (b[i] * m)>>32 */
46	MULLW	R9,R5,R9	/* lo = b[i] * m */
47	ADDC	R10,R9		/* lo += oldhi */
48	ADDE	R0,R8		/* hi += carry */
49	MOVW	R12,XER
50	SUBE	R9,R11		/* tmp -= lo */
51	MOVW	XER,R12
52	MOVWU	R11,4(R6)	/* p[i] = tmp */
53	MOVW	R8,R10		/* oldhi = hi */
54	BDNZ	_mulsubloop
55
56	MOVW	4(R6),R11	/* tmp = p[i] */
57	MOVW	R12,XER
58	SUBE	R10,R11		/* tmp -= lo */
59	MOVWU	R11,4(R6)	/* p[i] = tmp */
60
61	/* return -1 if the result was negative, +1 otherwise */
62	SUBECC	R0,R0,R3
63	BLT	_mulsub2
64	MOVW	$1,R3
65_mulsub2:
66	RETURN
67