xref: /netbsd-src/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/divdi3.S (revision 61f2f2562dcc3e4eb50e7ec5dab0dfa1f093861a)
1156cd587Sjoerg// This file is dual licensed under the MIT and the University of Illinois Open
2156cd587Sjoerg// Source Licenses. See LICENSE.TXT for details.
3156cd587Sjoerg
4156cd587Sjoerg#include "../assembly.h"
5156cd587Sjoerg
6156cd587Sjoerg// di_int __divdi3(di_int a, di_int b);
7156cd587Sjoerg
8156cd587Sjoerg// result = a / b.
9156cd587Sjoerg// both inputs and the output are 64-bit signed integers.
10156cd587Sjoerg// This will do whatever the underlying hardware is set to do on division by zero.
11156cd587Sjoerg// No other exceptions are generated, as the divide cannot overflow.
12156cd587Sjoerg//
13156cd587Sjoerg// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
14156cd587Sjoerg// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
15156cd587Sjoerg// currently possible via simulation of integer divides on the x87 unit.
16156cd587Sjoerg//
17156cd587Sjoerg// Stephen Canon, December 2008
18156cd587Sjoerg
19156cd587Sjoerg#ifdef __i386__
20156cd587Sjoerg
21156cd587Sjoerg.text
22*61f2f256Sjoerg.balign 4
23156cd587SjoergDEFINE_COMPILERRT_FUNCTION(__divdi3)
24156cd587Sjoerg
25156cd587Sjoerg/* This is currently implemented by wrapping the unsigned divide up in an absolute
26156cd587Sjoerg   value, then restoring the correct sign at the end of the computation.  This could
27156cd587Sjoerg   certainly be improved upon. */
28156cd587Sjoerg
29156cd587Sjoerg	pushl		%esi
30156cd587Sjoerg	movl	 20(%esp),			%edx	// high word of b
31156cd587Sjoerg	movl	 16(%esp),			%eax	// low word of b
32156cd587Sjoerg	movl		%edx,			%ecx
33156cd587Sjoerg	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
34156cd587Sjoerg	xorl		%ecx,			%eax
35156cd587Sjoerg	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
36156cd587Sjoerg	subl		%ecx,			%eax
37156cd587Sjoerg	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
38156cd587Sjoerg	movl		%edx,		 20(%esp)
39156cd587Sjoerg	movl		%eax,		 16(%esp)	// store abs(b) back to stack
40156cd587Sjoerg	movl		%ecx,			%esi	// set aside sign of b
41156cd587Sjoerg
42156cd587Sjoerg	movl	 12(%esp),			%edx	// high word of b
43156cd587Sjoerg	movl	  8(%esp),			%eax	// low word of b
44156cd587Sjoerg	movl		%edx,			%ecx
45156cd587Sjoerg	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
46156cd587Sjoerg	xorl		%ecx,			%eax
47156cd587Sjoerg	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
48156cd587Sjoerg	subl		%ecx,			%eax
49156cd587Sjoerg	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
50156cd587Sjoerg	movl		%edx,		 12(%esp)
51156cd587Sjoerg	movl		%eax,		  8(%esp)	// store abs(a) back to stack
52156cd587Sjoerg	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
53156cd587Sjoerg
54156cd587Sjoerg	pushl		%ebx
55156cd587Sjoerg	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
56156cd587Sjoerg	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
57156cd587Sjoerg	jz			9f						// the code to handle that special case [9].
58156cd587Sjoerg
59156cd587Sjoerg	/* High word of b is known to be non-zero on this branch */
60156cd587Sjoerg
61156cd587Sjoerg	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
62156cd587Sjoerg
63156cd587Sjoerg	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
64156cd587Sjoerg	shrl		%eax					//
65156cd587Sjoerg	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
66156cd587Sjoerg	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
67156cd587Sjoerg	orl			%eax,			%ebx	//
68156cd587Sjoerg	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
69156cd587Sjoerg	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
70156cd587Sjoerg	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
71156cd587Sjoerg	jae			1f
72156cd587Sjoerg
73156cd587Sjoerg	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
74156cd587Sjoerg
75156cd587Sjoerg	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
76156cd587Sjoerg
77156cd587Sjoerg	pushl		%edi
78156cd587Sjoerg	notl		%ecx
79156cd587Sjoerg	shrl		%eax
80156cd587Sjoerg	shrl		%cl,			%eax	// q = qs >> (1 + i)
81156cd587Sjoerg	movl		%eax,			%edi
82156cd587Sjoerg	mull	 24(%esp)					// q*blo
83156cd587Sjoerg	movl	 16(%esp),			%ebx
84156cd587Sjoerg	movl	 20(%esp),			%ecx	// ECX:EBX = a
85156cd587Sjoerg	subl		%eax,			%ebx
86156cd587Sjoerg	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
87156cd587Sjoerg	movl	 28(%esp),			%eax
88156cd587Sjoerg	imull		%edi,			%eax	// q*bhi
89156cd587Sjoerg	subl		%eax,			%ecx	// ECX:EBX = a - q*b
90156cd587Sjoerg	sbbl		$0,				%edi	// decrement q if remainder is negative
91156cd587Sjoerg	xorl		%edx,			%edx
92156cd587Sjoerg	movl		%edi,			%eax
93156cd587Sjoerg
94156cd587Sjoerg	addl		%esi,			%eax	// Restore correct sign to result
95156cd587Sjoerg	adcl		%esi,			%edx
96156cd587Sjoerg	xorl		%esi,			%eax
97156cd587Sjoerg	xorl		%esi,			%edx
98156cd587Sjoerg	popl		%edi					// Restore callee-save registers
99156cd587Sjoerg	popl		%ebx
100156cd587Sjoerg	popl		%esi
101156cd587Sjoerg	retl								// Return
102156cd587Sjoerg
103156cd587Sjoerg
104156cd587Sjoerg1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
105156cd587Sjoerg
106156cd587Sjoerg	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
107156cd587Sjoerg	divl		%ebx					// overflow, and find q and r such that
108156cd587Sjoerg										//
109156cd587Sjoerg										//		ahi:alo = (1:q)*bhi + r
110156cd587Sjoerg										//
111156cd587Sjoerg										// Note that q is a number in (31-i).(1+i)
112156cd587Sjoerg										// fix point.
113156cd587Sjoerg
114156cd587Sjoerg	pushl		%edi
115156cd587Sjoerg	notl		%ecx
116156cd587Sjoerg	shrl		%eax
117156cd587Sjoerg	orl			$0x80000000,	%eax
118156cd587Sjoerg	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
119156cd587Sjoerg	movl		%eax,			%edi
120156cd587Sjoerg	mull	 24(%esp)					// q*blo
121156cd587Sjoerg	movl	 16(%esp),			%ebx
122156cd587Sjoerg	movl	 20(%esp),			%ecx	// ECX:EBX = a
123156cd587Sjoerg	subl		%eax,			%ebx
124156cd587Sjoerg	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
125156cd587Sjoerg	movl	 28(%esp),			%eax
126156cd587Sjoerg	imull		%edi,			%eax	// q*bhi
127156cd587Sjoerg	subl		%eax,			%ecx	// ECX:EBX = a - q*b
128156cd587Sjoerg	sbbl		$0,				%edi	// decrement q if remainder is negative
129156cd587Sjoerg	xorl		%edx,			%edx
130156cd587Sjoerg	movl		%edi,			%eax
131156cd587Sjoerg
132156cd587Sjoerg	addl		%esi,			%eax	// Restore correct sign to result
133156cd587Sjoerg	adcl		%esi,			%edx
134156cd587Sjoerg	xorl		%esi,			%eax
135156cd587Sjoerg	xorl		%esi,			%edx
136156cd587Sjoerg	popl		%edi					// Restore callee-save registers
137156cd587Sjoerg	popl		%ebx
138156cd587Sjoerg	popl		%esi
139156cd587Sjoerg	retl								// Return
140156cd587Sjoerg
141156cd587Sjoerg
142156cd587Sjoerg9:	/* High word of b is zero on this branch */
143156cd587Sjoerg
144156cd587Sjoerg	movl	 16(%esp),			%eax	// Find qhi and rhi such that
145156cd587Sjoerg	movl	 20(%esp),			%ecx	//
146156cd587Sjoerg	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b
147156cd587Sjoerg	divl		%ecx					//
148156cd587Sjoerg	movl		%eax,			%ebx	//
149156cd587Sjoerg	movl	 12(%esp),			%eax	// Find qlo such that
150156cd587Sjoerg	divl		%ecx					//
151156cd587Sjoerg	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b
152156cd587Sjoerg
153156cd587Sjoerg	addl		%esi,			%eax	// Restore correct sign to result
154156cd587Sjoerg	adcl		%esi,			%edx
155156cd587Sjoerg	xorl		%esi,			%eax
156156cd587Sjoerg	xorl		%esi,			%edx
157156cd587Sjoerg	popl		%ebx					// Restore callee-save registers
158156cd587Sjoerg	popl		%esi
159156cd587Sjoerg	retl								// Return
160156cd587SjoergEND_COMPILERRT_FUNCTION(__divdi3)
161156cd587Sjoerg
162156cd587Sjoerg#endif // __i386__
163