builtins/i386/divdi3.S

156cd587Sjoerg// This file is dual licensed under the MIT and the University of Illinois Open
156cd587Sjoerg// Source Licenses. See LICENSE.TXT for details.
156cd587Sjoerg
156cd587Sjoerg#include "../assembly.h"
156cd587Sjoerg
156cd587Sjoerg// di_int __divdi3(di_int a, di_int b);
156cd587Sjoerg
156cd587Sjoerg// result = a / b.
156cd587Sjoerg// both inputs and the output are 64-bit signed integers.
156cd587Sjoerg// This will do whatever the underlying hardware is set to do on division by zero.
156cd587Sjoerg// No other exceptions are generated, as the divide cannot overflow.
156cd587Sjoerg//
156cd587Sjoerg// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
156cd587Sjoerg// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
156cd587Sjoerg// currently possible via simulation of integer divides on the x87 unit.
156cd587Sjoerg//
156cd587Sjoerg// Stephen Canon, December 2008
156cd587Sjoerg
156cd587Sjoerg#ifdef __i386__
156cd587Sjoerg
156cd587Sjoerg.text
*61f2f256Sjoerg.balign 4
156cd587SjoergDEFINE_COMPILERRT_FUNCTION(__divdi3)
156cd587Sjoerg
156cd587Sjoerg/* This is currently implemented by wrapping the unsigned divide up in an absolute
156cd587Sjoerg   value, then restoring the correct sign at the end of the computation.  This could
156cd587Sjoerg   certainly be improved upon. */
156cd587Sjoerg
156cd587Sjoerg	pushl		%esi
156cd587Sjoerg	movl	 20(%esp),			%edx	// high word of b
156cd587Sjoerg	movl	 16(%esp),			%eax	// low word of b
156cd587Sjoerg	movl		%edx,			%ecx
156cd587Sjoerg	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
156cd587Sjoerg	xorl		%ecx,			%eax
156cd587Sjoerg	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
156cd587Sjoerg	subl		%ecx,			%eax
156cd587Sjoerg	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
156cd587Sjoerg	movl		%edx,		 20(%esp)
156cd587Sjoerg	movl		%eax,		 16(%esp)	// store abs(b) back to stack
156cd587Sjoerg	movl		%ecx,			%esi	// set aside sign of b
156cd587Sjoerg
156cd587Sjoerg	movl	 12(%esp),			%edx	// high word of b
156cd587Sjoerg	movl	  8(%esp),			%eax	// low word of b
156cd587Sjoerg	movl		%edx,			%ecx
156cd587Sjoerg	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
156cd587Sjoerg	xorl		%ecx,			%eax
156cd587Sjoerg	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
156cd587Sjoerg	subl		%ecx,			%eax
156cd587Sjoerg	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
156cd587Sjoerg	movl		%edx,		 12(%esp)
156cd587Sjoerg	movl		%eax,		  8(%esp)	// store abs(a) back to stack
156cd587Sjoerg	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
156cd587Sjoerg
156cd587Sjoerg	pushl		%ebx
156cd587Sjoerg	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
156cd587Sjoerg	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
156cd587Sjoerg	jz			9f						// the code to handle that special case [9].
156cd587Sjoerg
156cd587Sjoerg	/* High word of b is known to be non-zero on this branch */
156cd587Sjoerg
156cd587Sjoerg	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
156cd587Sjoerg
156cd587Sjoerg	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
156cd587Sjoerg	shrl		%eax					//
156cd587Sjoerg	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
156cd587Sjoerg	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
156cd587Sjoerg	orl			%eax,			%ebx	//
156cd587Sjoerg	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
156cd587Sjoerg	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
156cd587Sjoerg	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
156cd587Sjoerg	jae			1f
156cd587Sjoerg
156cd587Sjoerg	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
156cd587Sjoerg
156cd587Sjoerg	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
156cd587Sjoerg
156cd587Sjoerg	pushl		%edi
156cd587Sjoerg	notl		%ecx
156cd587Sjoerg	shrl		%eax
156cd587Sjoerg	shrl		%cl,			%eax	// q = qs >> (1 + i)
156cd587Sjoerg	movl		%eax,			%edi
156cd587Sjoerg	mull	 24(%esp)					// q*blo
156cd587Sjoerg	movl	 16(%esp),			%ebx
156cd587Sjoerg	movl	 20(%esp),			%ecx	// ECX:EBX = a
156cd587Sjoerg	subl		%eax,			%ebx
156cd587Sjoerg	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
156cd587Sjoerg	movl	 28(%esp),			%eax
156cd587Sjoerg	imull		%edi,			%eax	// q*bhi
156cd587Sjoerg	subl		%eax,			%ecx	// ECX:EBX = a - q*b
156cd587Sjoerg	sbbl		$0,				%edi	// decrement q if remainder is negative
156cd587Sjoerg	xorl		%edx,			%edx
156cd587Sjoerg	movl		%edi,			%eax
156cd587Sjoerg
156cd587Sjoerg	addl		%esi,			%eax	// Restore correct sign to result
156cd587Sjoerg	adcl		%esi,			%edx
156cd587Sjoerg	xorl		%esi,			%eax
156cd587Sjoerg	xorl		%esi,			%edx
156cd587Sjoerg	popl		%edi					// Restore callee-save registers
156cd587Sjoerg	popl		%ebx
156cd587Sjoerg	popl		%esi
156cd587Sjoerg	retl								// Return
156cd587Sjoerg
156cd587Sjoerg
156cd587Sjoerg1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
156cd587Sjoerg
156cd587Sjoerg	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
156cd587Sjoerg	divl		%ebx					// overflow, and find q and r such that
156cd587Sjoerg										//
156cd587Sjoerg										//		ahi:alo = (1:q)*bhi + r
156cd587Sjoerg										//
156cd587Sjoerg										// Note that q is a number in (31-i).(1+i)
156cd587Sjoerg										// fix point.
156cd587Sjoerg
156cd587Sjoerg	pushl		%edi
156cd587Sjoerg	notl		%ecx
156cd587Sjoerg	shrl		%eax
156cd587Sjoerg	orl			$0x80000000,	%eax
156cd587Sjoerg	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
156cd587Sjoerg	movl		%eax,			%edi
156cd587Sjoerg	mull	 24(%esp)					// q*blo
156cd587Sjoerg	movl	 16(%esp),			%ebx
156cd587Sjoerg	movl	 20(%esp),			%ecx	// ECX:EBX = a
156cd587Sjoerg	subl		%eax,			%ebx
156cd587Sjoerg	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
156cd587Sjoerg	movl	 28(%esp),			%eax
156cd587Sjoerg	imull		%edi,			%eax	// q*bhi
156cd587Sjoerg	subl		%eax,			%ecx	// ECX:EBX = a - q*b
156cd587Sjoerg	sbbl		$0,				%edi	// decrement q if remainder is negative
156cd587Sjoerg	xorl		%edx,			%edx
156cd587Sjoerg	movl		%edi,			%eax
156cd587Sjoerg
156cd587Sjoerg	addl		%esi,			%eax	// Restore correct sign to result
156cd587Sjoerg	adcl		%esi,			%edx
156cd587Sjoerg	xorl		%esi,			%eax
156cd587Sjoerg	xorl		%esi,			%edx
156cd587Sjoerg	popl		%edi					// Restore callee-save registers
156cd587Sjoerg	popl		%ebx
156cd587Sjoerg	popl		%esi
156cd587Sjoerg	retl								// Return
156cd587Sjoerg
156cd587Sjoerg
156cd587Sjoerg9:	/* High word of b is zero on this branch */
156cd587Sjoerg
156cd587Sjoerg	movl	 16(%esp),			%eax	// Find qhi and rhi such that
156cd587Sjoerg	movl	 20(%esp),			%ecx	//
156cd587Sjoerg	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b
156cd587Sjoerg	divl		%ecx					//
156cd587Sjoerg	movl		%eax,			%ebx	//
156cd587Sjoerg	movl	 12(%esp),			%eax	// Find qlo such that
156cd587Sjoerg	divl		%ecx					//
156cd587Sjoerg	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b
156cd587Sjoerg
156cd587Sjoerg	addl		%esi,			%eax	// Restore correct sign to result
156cd587Sjoerg	adcl		%esi,			%edx
156cd587Sjoerg	xorl		%esi,			%eax
156cd587Sjoerg	xorl		%esi,			%edx
156cd587Sjoerg	popl		%ebx					// Restore callee-save registers
156cd587Sjoerg	popl		%esi
156cd587Sjoerg	retl								// Return
156cd587SjoergEND_COMPILERRT_FUNCTION(__divdi3)
156cd587Sjoerg
156cd587Sjoerg#endif // __i386__