1156cd587Sjoerg// This file is dual licensed under the MIT and the University of Illinois Open 2156cd587Sjoerg// Source Licenses. See LICENSE.TXT for details. 3156cd587Sjoerg 4156cd587Sjoerg#include "../assembly.h" 5156cd587Sjoerg 6156cd587Sjoerg// di_int __divdi3(di_int a, di_int b); 7156cd587Sjoerg 8156cd587Sjoerg// result = a / b. 9156cd587Sjoerg// both inputs and the output are 64-bit signed integers. 10156cd587Sjoerg// This will do whatever the underlying hardware is set to do on division by zero. 11156cd587Sjoerg// No other exceptions are generated, as the divide cannot overflow. 12156cd587Sjoerg// 13156cd587Sjoerg// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 14156cd587Sjoerg// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 15156cd587Sjoerg// currently possible via simulation of integer divides on the x87 unit. 16156cd587Sjoerg// 17156cd587Sjoerg// Stephen Canon, December 2008 18156cd587Sjoerg 19156cd587Sjoerg#ifdef __i386__ 20156cd587Sjoerg 21156cd587Sjoerg.text 22*61f2f256Sjoerg.balign 4 23156cd587SjoergDEFINE_COMPILERRT_FUNCTION(__divdi3) 24156cd587Sjoerg 25156cd587Sjoerg/* This is currently implemented by wrapping the unsigned divide up in an absolute 26156cd587Sjoerg value, then restoring the correct sign at the end of the computation. This could 27156cd587Sjoerg certainly be improved upon. */ 28156cd587Sjoerg 29156cd587Sjoerg pushl %esi 30156cd587Sjoerg movl 20(%esp), %edx // high word of b 31156cd587Sjoerg movl 16(%esp), %eax // low word of b 32156cd587Sjoerg movl %edx, %ecx 33156cd587Sjoerg sarl $31, %ecx // (b < 0) ? -1 : 0 34156cd587Sjoerg xorl %ecx, %eax 35156cd587Sjoerg xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b 36156cd587Sjoerg subl %ecx, %eax 37156cd587Sjoerg sbbl %ecx, %edx // EDX:EAX = abs(b) 38156cd587Sjoerg movl %edx, 20(%esp) 39156cd587Sjoerg movl %eax, 16(%esp) // store abs(b) back to stack 40156cd587Sjoerg movl %ecx, %esi // set aside sign of b 41156cd587Sjoerg 42156cd587Sjoerg movl 12(%esp), %edx // high word of b 43156cd587Sjoerg movl 8(%esp), %eax // low word of b 44156cd587Sjoerg movl %edx, %ecx 45156cd587Sjoerg sarl $31, %ecx // (a < 0) ? -1 : 0 46156cd587Sjoerg xorl %ecx, %eax 47156cd587Sjoerg xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a 48156cd587Sjoerg subl %ecx, %eax 49156cd587Sjoerg sbbl %ecx, %edx // EDX:EAX = abs(a) 50156cd587Sjoerg movl %edx, 12(%esp) 51156cd587Sjoerg movl %eax, 8(%esp) // store abs(a) back to stack 52156cd587Sjoerg xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) 53156cd587Sjoerg 54156cd587Sjoerg pushl %ebx 55156cd587Sjoerg movl 24(%esp), %ebx // Find the index i of the leading bit in b. 56156cd587Sjoerg bsrl %ebx, %ecx // If the high word of b is zero, jump to 57156cd587Sjoerg jz 9f // the code to handle that special case [9]. 58156cd587Sjoerg 59156cd587Sjoerg /* High word of b is known to be non-zero on this branch */ 60156cd587Sjoerg 61156cd587Sjoerg movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 62156cd587Sjoerg 63156cd587Sjoerg shrl %cl, %eax // Practically, this means that bhi is given by: 64156cd587Sjoerg shrl %eax // 65156cd587Sjoerg notl %ecx // bhi = (high word of b) << (31 - i) | 66156cd587Sjoerg shll %cl, %ebx // (low word of b) >> (1 + i) 67156cd587Sjoerg orl %eax, %ebx // 68156cd587Sjoerg movl 16(%esp), %edx // Load the high and low words of a, and jump 69156cd587Sjoerg movl 12(%esp), %eax // to [1] if the high word is larger than bhi 70156cd587Sjoerg cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 71156cd587Sjoerg jae 1f 72156cd587Sjoerg 73156cd587Sjoerg /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 74156cd587Sjoerg 75156cd587Sjoerg divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 76156cd587Sjoerg 77156cd587Sjoerg pushl %edi 78156cd587Sjoerg notl %ecx 79156cd587Sjoerg shrl %eax 80156cd587Sjoerg shrl %cl, %eax // q = qs >> (1 + i) 81156cd587Sjoerg movl %eax, %edi 82156cd587Sjoerg mull 24(%esp) // q*blo 83156cd587Sjoerg movl 16(%esp), %ebx 84156cd587Sjoerg movl 20(%esp), %ecx // ECX:EBX = a 85156cd587Sjoerg subl %eax, %ebx 86156cd587Sjoerg sbbl %edx, %ecx // ECX:EBX = a - q*blo 87156cd587Sjoerg movl 28(%esp), %eax 88156cd587Sjoerg imull %edi, %eax // q*bhi 89156cd587Sjoerg subl %eax, %ecx // ECX:EBX = a - q*b 90156cd587Sjoerg sbbl $0, %edi // decrement q if remainder is negative 91156cd587Sjoerg xorl %edx, %edx 92156cd587Sjoerg movl %edi, %eax 93156cd587Sjoerg 94156cd587Sjoerg addl %esi, %eax // Restore correct sign to result 95156cd587Sjoerg adcl %esi, %edx 96156cd587Sjoerg xorl %esi, %eax 97156cd587Sjoerg xorl %esi, %edx 98156cd587Sjoerg popl %edi // Restore callee-save registers 99156cd587Sjoerg popl %ebx 100156cd587Sjoerg popl %esi 101156cd587Sjoerg retl // Return 102156cd587Sjoerg 103156cd587Sjoerg 104156cd587Sjoerg1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 105156cd587Sjoerg 106156cd587Sjoerg subl %ebx, %edx // subtract bhi from ahi so that divide will not 107156cd587Sjoerg divl %ebx // overflow, and find q and r such that 108156cd587Sjoerg // 109156cd587Sjoerg // ahi:alo = (1:q)*bhi + r 110156cd587Sjoerg // 111156cd587Sjoerg // Note that q is a number in (31-i).(1+i) 112156cd587Sjoerg // fix point. 113156cd587Sjoerg 114156cd587Sjoerg pushl %edi 115156cd587Sjoerg notl %ecx 116156cd587Sjoerg shrl %eax 117156cd587Sjoerg orl $0x80000000, %eax 118156cd587Sjoerg shrl %cl, %eax // q = (1:qs) >> (1 + i) 119156cd587Sjoerg movl %eax, %edi 120156cd587Sjoerg mull 24(%esp) // q*blo 121156cd587Sjoerg movl 16(%esp), %ebx 122156cd587Sjoerg movl 20(%esp), %ecx // ECX:EBX = a 123156cd587Sjoerg subl %eax, %ebx 124156cd587Sjoerg sbbl %edx, %ecx // ECX:EBX = a - q*blo 125156cd587Sjoerg movl 28(%esp), %eax 126156cd587Sjoerg imull %edi, %eax // q*bhi 127156cd587Sjoerg subl %eax, %ecx // ECX:EBX = a - q*b 128156cd587Sjoerg sbbl $0, %edi // decrement q if remainder is negative 129156cd587Sjoerg xorl %edx, %edx 130156cd587Sjoerg movl %edi, %eax 131156cd587Sjoerg 132156cd587Sjoerg addl %esi, %eax // Restore correct sign to result 133156cd587Sjoerg adcl %esi, %edx 134156cd587Sjoerg xorl %esi, %eax 135156cd587Sjoerg xorl %esi, %edx 136156cd587Sjoerg popl %edi // Restore callee-save registers 137156cd587Sjoerg popl %ebx 138156cd587Sjoerg popl %esi 139156cd587Sjoerg retl // Return 140156cd587Sjoerg 141156cd587Sjoerg 142156cd587Sjoerg9: /* High word of b is zero on this branch */ 143156cd587Sjoerg 144156cd587Sjoerg movl 16(%esp), %eax // Find qhi and rhi such that 145156cd587Sjoerg movl 20(%esp), %ecx // 146156cd587Sjoerg xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b 147156cd587Sjoerg divl %ecx // 148156cd587Sjoerg movl %eax, %ebx // 149156cd587Sjoerg movl 12(%esp), %eax // Find qlo such that 150156cd587Sjoerg divl %ecx // 151156cd587Sjoerg movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b 152156cd587Sjoerg 153156cd587Sjoerg addl %esi, %eax // Restore correct sign to result 154156cd587Sjoerg adcl %esi, %edx 155156cd587Sjoerg xorl %esi, %eax 156156cd587Sjoerg xorl %esi, %edx 157156cd587Sjoerg popl %ebx // Restore callee-save registers 158156cd587Sjoerg popl %esi 159156cd587Sjoerg retl // Return 160156cd587SjoergEND_COMPILERRT_FUNCTION(__divdi3) 161156cd587Sjoerg 162156cd587Sjoerg#endif // __i386__ 163