xref: /plan9-contrib/sys/src/ape/lib/ap/power/vlop.s (revision 6891d8578618fb7ccda4a131c122d4d0e6580c4b)
1*6891d857SDavid du Colombier#define	BDNZ	BC	16,0,
2*6891d857SDavid du Colombier
3*6891d857SDavid du Colombier/*
4*6891d857SDavid du Colombier * 64/64 division adapted from powerpc compiler writer's handbook
5*6891d857SDavid du Colombier *
6*6891d857SDavid du Colombier * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
7*6891d857SDavid du Colombier * quo dvd dvs
8*6891d857SDavid du Colombier *
9*6891d857SDavid du Colombier * Remainder is left in R7:R8
10*6891d857SDavid du Colombier *
11*6891d857SDavid du Colombier * Code comment notation:
12*6891d857SDavid du Colombier * msw = most-significant (high-order) word, i.e. bits 0..31
13*6891d857SDavid du Colombier * lsw = least-significant (low-order) word, i.e. bits 32..63
14*6891d857SDavid du Colombier * LZ = Leading Zeroes
15*6891d857SDavid du Colombier * SD = Significant Digits
16*6891d857SDavid du Colombier *
17*6891d857SDavid du Colombier * R3:R4 = dvd (input dividend); quo (output quotient)
18*6891d857SDavid du Colombier * R5:R6 = dvs (input divisor)
19*6891d857SDavid du Colombier *
20*6891d857SDavid du Colombier * R7:R8 = tmp; rem (output remainder)
21*6891d857SDavid du Colombier */
22*6891d857SDavid du Colombier
23*6891d857SDavid du ColombierTEXT	_divu64(SB), $0
24*6891d857SDavid du Colombier	MOVW	a+0(FP), R3
25*6891d857SDavid du Colombier	MOVW	a+4(FP), R4
26*6891d857SDavid du Colombier	MOVW	b+8(FP), R5
27*6891d857SDavid du Colombier	MOVW	b+12(FP), R6
28*6891d857SDavid du Colombier
29*6891d857SDavid du Colombier	/*  count the number of leading 0s in the dividend */
30*6891d857SDavid du Colombier	CMP	R3, $0 	/*  dvd.msw == 0? 	R3, */
31*6891d857SDavid du Colombier	CNTLZW 	R3, R11 	/*  R11 = dvd.msw.LZ */
32*6891d857SDavid du Colombier	CNTLZW 	R4, R9 	/*  R9 = dvd.lsw.LZ */
33*6891d857SDavid du Colombier	BNE 	lab1 	/*  if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */
34*6891d857SDavid du Colombier	ADD 	$32, R9, R11 	/*  dvd.LZ = dvd.lsw.LZ + 32 */
35*6891d857SDavid du Colombier
36*6891d857SDavid du Colombierlab1:
37*6891d857SDavid du Colombier	/*  count the number of leading 0s in the divisor */
38*6891d857SDavid du Colombier	CMP 	R5, $0 	/*  dvd.msw == 0? */
39*6891d857SDavid du Colombier	CNTLZW 	R5, R9 	/*  R9 = dvs.msw.LZ */
40*6891d857SDavid du Colombier	CNTLZW 	R6, R10 	/*  R10 = dvs.lsw.LZ */
41*6891d857SDavid du Colombier	BNE 	lab2 	/*  if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */
42*6891d857SDavid du Colombier	ADD 	$32, R10, R9 	/*  dvs.LZ = dvs.lsw.LZ + 32 */
43*6891d857SDavid du Colombier
44*6891d857SDavid du Colombierlab2:
45*6891d857SDavid du Colombier	/*  determine shift amounts to minimize the number of iterations */
46*6891d857SDavid du Colombier	CMP 	R11, R9 	/*  compare dvd.LZ to dvs.LZ */
47*6891d857SDavid du Colombier	SUBC	R11, $64, R10	/*  R10 = dvd.SD */
48*6891d857SDavid du Colombier	BGT 	lab9 	/*  if(dvs > dvd) quotient = 0 */
49*6891d857SDavid du Colombier	ADD 	$1, R9 	/*  ++dvs.LZ (or --dvs.SD) */
50*6891d857SDavid du Colombier	SUBC 	R9, $64, R9 	/*  R9 = dvs.SD */
51*6891d857SDavid du Colombier	ADD 	R9, R11 	/*  (dvd.LZ + dvs.SD) = left shift of dvd for */
52*6891d857SDavid du Colombier			/*  initial dvd */
53*6891d857SDavid du Colombier	SUB		R9, R10, R9 	/*  (dvd.SD - dvs.SD) = right shift of dvd for */
54*6891d857SDavid du Colombier			/*  initial tmp */
55*6891d857SDavid du Colombier	MOVW 	R9, CTR 	/*  number of iterations = dvd.SD - dvs.SD */
56*6891d857SDavid du Colombier
57*6891d857SDavid du Colombier	/*  R7:R8 = R3:R4 >> R9 */
58*6891d857SDavid du Colombier	CMP 	 R9, $32
59*6891d857SDavid du Colombier	ADD	$-32, R9, R7
60*6891d857SDavid du Colombier	BLT	lab3 	/*  if(R9 < 32) jump to lab3 */
61*6891d857SDavid du Colombier	SRW	R7, R3, R8 	/*  tmp.lsw = dvd.msw >> (R9 - 32) */
62*6891d857SDavid du Colombier	MOVW 	$0, R7 	/*  tmp.msw = 0 */
63*6891d857SDavid du Colombier	BR 	lab4
64*6891d857SDavid du Colombierlab3:
65*6891d857SDavid du Colombier	SRW	R9, R4, R8 	/*  R8 = dvd.lsw >> R9 */
66*6891d857SDavid du Colombier	SUBC	R9, $32, R7
67*6891d857SDavid du Colombier	SLW	R7, R3, R7		/*  R7 = dvd.msw << 32 - R9 */
68*6891d857SDavid du Colombier	OR	R7, R8 		/*  tmp.lsw = R8 | R7 */
69*6891d857SDavid du Colombier	SRW	R9, R3, R7		/*  tmp.msw = dvd.msw >> R9 */
70*6891d857SDavid du Colombier
71*6891d857SDavid du Colombierlab4:
72*6891d857SDavid du Colombier	/*  R3:R4 = R3:R4 << R11 */
73*6891d857SDavid du Colombier	CMP	R11,$32
74*6891d857SDavid du Colombier	ADDC	$-32, R11, R9
75*6891d857SDavid du Colombier	BLT 	lab5 	/*  (R11 < 32)? */
76*6891d857SDavid du Colombier	SLW	R9, R4, R3	/*  dvd.msw = dvs.lsw << R9 */
77*6891d857SDavid du Colombier	MOVW 	$0, R4 	/*  dvd.lsw = 0 */
78*6891d857SDavid du Colombier	BR 	lab6
79*6891d857SDavid du Colombier
80*6891d857SDavid du Colombierlab5:
81*6891d857SDavid du Colombier	SLW	R11, R3	/*  R3 = dvd.msw << R11 */
82*6891d857SDavid du Colombier	SUBC	R11, $32, R9
83*6891d857SDavid du Colombier	SRW	R9, R4, R9	/*  R9 = dvd.lsw >> 32 - R11 */
84*6891d857SDavid du Colombier	OR	R9, R3	/*  dvd.msw = R3 | R9 */
85*6891d857SDavid du Colombier	SLW	R11, R4	/*  dvd.lsw = dvd.lsw << R11 */
86*6891d857SDavid du Colombier
87*6891d857SDavid du Colombierlab6:
88*6891d857SDavid du Colombier	/*  restoring division shift and subtract loop */
89*6891d857SDavid du Colombier	MOVW	$-1, R10
90*6891d857SDavid du Colombier	ADDC	$0, R7	/*  clear carry bit before loop starts */
91*6891d857SDavid du Colombierlab7:
92*6891d857SDavid du Colombier	/*  tmp:dvd is considered one large register */
93*6891d857SDavid du Colombier	/*  each portion is shifted left 1 bit by adding it to itself */
94*6891d857SDavid du Colombier	/*  adde sums the carry from the previous and creates a new carry */
95*6891d857SDavid du Colombier	ADDE 	R4,R4 	/*  shift dvd.lsw left 1 bit */
96*6891d857SDavid du Colombier	ADDE 	R3,R3 	/*  shift dvd.msw to left 1 bit */
97*6891d857SDavid du Colombier	ADDE 	R8,R8 	/*  shift tmp.lsw to left 1 bit */
98*6891d857SDavid du Colombier	ADDE 	R7,R7 	/*  shift tmp.msw to left 1 bit */
99*6891d857SDavid du Colombier	SUBC	R6, R8, R11	/*  tmp.lsw - dvs.lsw */
100*6891d857SDavid du Colombier	SUBECC	R5, R7, R9	/*  tmp.msw - dvs.msw */
101*6891d857SDavid du Colombier	BLT 	lab8 	/*  if(result < 0) clear carry bit */
102*6891d857SDavid du Colombier	MOVW	R11, R8 	/*  move lsw */
103*6891d857SDavid du Colombier	MOVW	R9, R7	/*  move msw */
104*6891d857SDavid du Colombier	ADDC 	$1, R10, R11 	/*  set carry bit */
105*6891d857SDavid du Colombierlab8:
106*6891d857SDavid du Colombier	BDNZ 	lab7
107*6891d857SDavid du Colombier
108*6891d857SDavid du Colombier	ADDE 	R4,R4 	/*  quo.lsw (lsb = CA) */
109*6891d857SDavid du Colombier	ADDE 	R3,R3 	/*  quo.msw (lsb from lsw) */
110*6891d857SDavid du Colombier
111*6891d857SDavid du Colombierlab10:
112*6891d857SDavid du Colombier	MOVW	qp+16(FP), R9
113*6891d857SDavid du Colombier	MOVW	rp+20(FP), R10
114*6891d857SDavid du Colombier	CMP	R9, $0
115*6891d857SDavid du Colombier	BEQ	lab11
116*6891d857SDavid du Colombier	MOVW	R3, 0(R9)
117*6891d857SDavid du Colombier	MOVW	R4, 4(R9)
118*6891d857SDavid du Colombierlab11:
119*6891d857SDavid du Colombier	CMP	R10, $0
120*6891d857SDavid du Colombier	BEQ	lab12
121*6891d857SDavid du Colombier	MOVW	R7, 0(R10)
122*6891d857SDavid du Colombier	MOVW	R8, 4(R10)
123*6891d857SDavid du Colombierlab12:
1247dd7cddfSDavid du Colombier	RETURN
125*6891d857SDavid du Colombier
126*6891d857SDavid du Colombierlab9:
127*6891d857SDavid du Colombier	/*  Quotient is 0 (dvs > dvd) */
128*6891d857SDavid du Colombier	MOVW	R4, R8	/*  rmd.lsw = dvd.lsw */
129*6891d857SDavid du Colombier	MOVW	R3, R7	/*  rmd.msw = dvd.msw */
130*6891d857SDavid du Colombier	MOVW	$0, R4	/*  dvd.lsw = 0 */
131*6891d857SDavid du Colombier	MOVW	$0, R3	/*  dvd.msw = 0 */
132*6891d857SDavid du Colombier	BR	lab10
133