xref: /plan9/sys/src/libc/power/vlop.s (revision 6891d8578618fb7ccda4a131c122d4d0e6580c4b)
1#define	BDNZ	BC	16,0,
2
3/*
4 * 64/64 division adapted from powerpc compiler writer's handbook
5 *
6 * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
7 * quo dvd dvs
8 *
9 * Remainder is left in R7:R8
10 *
11 * Code comment notation:
12 * msw = most-significant (high-order) word, i.e. bits 0..31
13 * lsw = least-significant (low-order) word, i.e. bits 32..63
14 * LZ = Leading Zeroes
15 * SD = Significant Digits
16 *
17 * R3:R4 = dvd (input dividend); quo (output quotient)
18 * R5:R6 = dvs (input divisor)
19 *
20 * R7:R8 = tmp; rem (output remainder)
21 */
22
23TEXT	_divu64(SB), $0
24	MOVW	a+0(FP), R3
25	MOVW	a+4(FP), R4
26	MOVW	b+8(FP), R5
27	MOVW	b+12(FP), R6
28
29	/*  count the number of leading 0s in the dividend */
30	CMP	R3, $0 	/*  dvd.msw == 0? 	R3, */
31	CNTLZW 	R3, R11 	/*  R11 = dvd.msw.LZ */
32	CNTLZW 	R4, R9 	/*  R9 = dvd.lsw.LZ */
33	BNE 	lab1 	/*  if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */
34	ADD 	$32, R9, R11 	/*  dvd.LZ = dvd.lsw.LZ + 32 */
35
36lab1:
37	/*  count the number of leading 0s in the divisor */
38	CMP 	R5, $0 	/*  dvd.msw == 0? */
39	CNTLZW 	R5, R9 	/*  R9 = dvs.msw.LZ */
40	CNTLZW 	R6, R10 	/*  R10 = dvs.lsw.LZ */
41	BNE 	lab2 	/*  if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */
42	ADD 	$32, R10, R9 	/*  dvs.LZ = dvs.lsw.LZ + 32 */
43
44lab2:
45	/*  determine shift amounts to minimize the number of iterations */
46	CMP 	R11, R9 	/*  compare dvd.LZ to dvs.LZ */
47	SUBC	R11, $64, R10	/*  R10 = dvd.SD */
48	BGT 	lab9 	/*  if(dvs > dvd) quotient = 0 */
49	ADD 	$1, R9 	/*  ++dvs.LZ (or --dvs.SD) */
50	SUBC 	R9, $64, R9 	/*  R9 = dvs.SD */
51	ADD 	R9, R11 	/*  (dvd.LZ + dvs.SD) = left shift of dvd for */
52			/*  initial dvd */
53	SUB		R9, R10, R9 	/*  (dvd.SD - dvs.SD) = right shift of dvd for */
54			/*  initial tmp */
55	MOVW 	R9, CTR 	/*  number of iterations = dvd.SD - dvs.SD */
56
57	/*  R7:R8 = R3:R4 >> R9 */
58	CMP 	 R9, $32
59	ADD	$-32, R9, R7
60	BLT	lab3 	/*  if(R9 < 32) jump to lab3 */
61	SRW	R7, R3, R8 	/*  tmp.lsw = dvd.msw >> (R9 - 32) */
62	MOVW 	$0, R7 	/*  tmp.msw = 0 */
63	BR 	lab4
64lab3:
65	SRW	R9, R4, R8 	/*  R8 = dvd.lsw >> R9 */
66	SUBC	R9, $32, R7
67	SLW	R7, R3, R7		/*  R7 = dvd.msw << 32 - R9 */
68	OR	R7, R8 		/*  tmp.lsw = R8 | R7 */
69	SRW	R9, R3, R7		/*  tmp.msw = dvd.msw >> R9 */
70
71lab4:
72	/*  R3:R4 = R3:R4 << R11 */
73	CMP	R11,$32
74	ADDC	$-32, R11, R9
75	BLT 	lab5 	/*  (R11 < 32)? */
76	SLW	R9, R4, R3	/*  dvd.msw = dvs.lsw << R9 */
77	MOVW 	$0, R4 	/*  dvd.lsw = 0 */
78	BR 	lab6
79
80lab5:
81	SLW	R11, R3	/*  R3 = dvd.msw << R11 */
82	SUBC	R11, $32, R9
83	SRW	R9, R4, R9	/*  R9 = dvd.lsw >> 32 - R11 */
84	OR	R9, R3	/*  dvd.msw = R3 | R9 */
85	SLW	R11, R4	/*  dvd.lsw = dvd.lsw << R11 */
86
87lab6:
88	/*  restoring division shift and subtract loop */
89	MOVW	$-1, R10
90	ADDC	$0, R7	/*  clear carry bit before loop starts */
91lab7:
92	/*  tmp:dvd is considered one large register */
93	/*  each portion is shifted left 1 bit by adding it to itself */
94	/*  adde sums the carry from the previous and creates a new carry */
95	ADDE 	R4,R4 	/*  shift dvd.lsw left 1 bit */
96	ADDE 	R3,R3 	/*  shift dvd.msw to left 1 bit */
97	ADDE 	R8,R8 	/*  shift tmp.lsw to left 1 bit */
98	ADDE 	R7,R7 	/*  shift tmp.msw to left 1 bit */
99	SUBC	R6, R8, R11	/*  tmp.lsw - dvs.lsw */
100	SUBECC	R5, R7, R9	/*  tmp.msw - dvs.msw */
101	BLT 	lab8 	/*  if(result < 0) clear carry bit */
102	MOVW	R11, R8 	/*  move lsw */
103	MOVW	R9, R7	/*  move msw */
104	ADDC 	$1, R10, R11 	/*  set carry bit */
105lab8:
106	BDNZ 	lab7
107
108	ADDE 	R4,R4 	/*  quo.lsw (lsb = CA) */
109	ADDE 	R3,R3 	/*  quo.msw (lsb from lsw) */
110
111lab10:
112	MOVW	qp+16(FP), R9
113	MOVW	rp+20(FP), R10
114	CMP	R9, $0
115	BEQ	lab11
116	MOVW	R3, 0(R9)
117	MOVW	R4, 4(R9)
118lab11:
119	CMP	R10, $0
120	BEQ	lab12
121	MOVW	R7, 0(R10)
122	MOVW	R8, 4(R10)
123lab12:
124	RETURN
125
126lab9:
127	/*  Quotient is 0 (dvs > dvd) */
128	MOVW	R4, R8	/*  rmd.lsw = dvd.lsw */
129	MOVW	R3, R7	/*  rmd.msw = dvd.msw */
130	MOVW	$0, R4	/*  dvd.lsw = 0 */
131	MOVW	$0, R3	/*  dvd.msw = 0 */
132	BR	lab10
133