1*3cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 2*3cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information. 3*3cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 4*3cab2bb3Spatrick 5*3cab2bb3Spatrick#include "../assembly.h" 6*3cab2bb3Spatrick 7*3cab2bb3Spatrick// di_int __divdi3(di_int a, di_int b); 8*3cab2bb3Spatrick 9*3cab2bb3Spatrick// result = a / b. 10*3cab2bb3Spatrick// both inputs and the output are 64-bit signed integers. 11*3cab2bb3Spatrick// This will do whatever the underlying hardware is set to do on division by zero. 12*3cab2bb3Spatrick// No other exceptions are generated, as the divide cannot overflow. 13*3cab2bb3Spatrick// 14*3cab2bb3Spatrick// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 15*3cab2bb3Spatrick// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 16*3cab2bb3Spatrick// currently possible via simulation of integer divides on the x87 unit. 17*3cab2bb3Spatrick// 18*3cab2bb3Spatrick// Stephen Canon, December 2008 19*3cab2bb3Spatrick 20*3cab2bb3Spatrick#ifdef __i386__ 21*3cab2bb3Spatrick 22*3cab2bb3Spatrick.text 23*3cab2bb3Spatrick.balign 4 24*3cab2bb3SpatrickDEFINE_COMPILERRT_FUNCTION(__divdi3) 25*3cab2bb3Spatrick 26*3cab2bb3Spatrick// This is currently implemented by wrapping the unsigned divide up in an absolute 27*3cab2bb3Spatrick// value, then restoring the correct sign at the end of the computation. This could 28*3cab2bb3Spatrick// certainly be improved upon. 29*3cab2bb3Spatrick 30*3cab2bb3Spatrick pushl %esi 31*3cab2bb3Spatrick movl 20(%esp), %edx // high word of b 32*3cab2bb3Spatrick movl 16(%esp), %eax // low word of b 33*3cab2bb3Spatrick movl %edx, %ecx 34*3cab2bb3Spatrick sarl $31, %ecx // (b < 0) ? -1 : 0 35*3cab2bb3Spatrick xorl %ecx, %eax 36*3cab2bb3Spatrick xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b 37*3cab2bb3Spatrick subl %ecx, %eax 38*3cab2bb3Spatrick sbbl %ecx, %edx // EDX:EAX = abs(b) 39*3cab2bb3Spatrick movl %edx, 20(%esp) 40*3cab2bb3Spatrick movl %eax, 16(%esp) // store abs(b) back to stack 41*3cab2bb3Spatrick movl %ecx, %esi // set aside sign of b 42*3cab2bb3Spatrick 43*3cab2bb3Spatrick movl 12(%esp), %edx // high word of b 44*3cab2bb3Spatrick movl 8(%esp), %eax // low word of b 45*3cab2bb3Spatrick movl %edx, %ecx 46*3cab2bb3Spatrick sarl $31, %ecx // (a < 0) ? -1 : 0 47*3cab2bb3Spatrick xorl %ecx, %eax 48*3cab2bb3Spatrick xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a 49*3cab2bb3Spatrick subl %ecx, %eax 50*3cab2bb3Spatrick sbbl %ecx, %edx // EDX:EAX = abs(a) 51*3cab2bb3Spatrick movl %edx, 12(%esp) 52*3cab2bb3Spatrick movl %eax, 8(%esp) // store abs(a) back to stack 53*3cab2bb3Spatrick xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) 54*3cab2bb3Spatrick 55*3cab2bb3Spatrick pushl %ebx 56*3cab2bb3Spatrick movl 24(%esp), %ebx // Find the index i of the leading bit in b. 57*3cab2bb3Spatrick bsrl %ebx, %ecx // If the high word of b is zero, jump to 58*3cab2bb3Spatrick jz 9f // the code to handle that special case [9]. 59*3cab2bb3Spatrick 60*3cab2bb3Spatrick // High word of b is known to be non-zero on this branch 61*3cab2bb3Spatrick 62*3cab2bb3Spatrick movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 63*3cab2bb3Spatrick 64*3cab2bb3Spatrick shrl %cl, %eax // Practically, this means that bhi is given by: 65*3cab2bb3Spatrick shrl %eax // 66*3cab2bb3Spatrick notl %ecx // bhi = (high word of b) << (31 - i) | 67*3cab2bb3Spatrick shll %cl, %ebx // (low word of b) >> (1 + i) 68*3cab2bb3Spatrick orl %eax, %ebx // 69*3cab2bb3Spatrick movl 16(%esp), %edx // Load the high and low words of a, and jump 70*3cab2bb3Spatrick movl 12(%esp), %eax // to [1] if the high word is larger than bhi 71*3cab2bb3Spatrick cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 72*3cab2bb3Spatrick jae 1f 73*3cab2bb3Spatrick 74*3cab2bb3Spatrick // High word of a is greater than or equal to (b >> (1 + i)) on this branch 75*3cab2bb3Spatrick 76*3cab2bb3Spatrick divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 77*3cab2bb3Spatrick 78*3cab2bb3Spatrick pushl %edi 79*3cab2bb3Spatrick notl %ecx 80*3cab2bb3Spatrick shrl %eax 81*3cab2bb3Spatrick shrl %cl, %eax // q = qs >> (1 + i) 82*3cab2bb3Spatrick movl %eax, %edi 83*3cab2bb3Spatrick mull 24(%esp) // q*blo 84*3cab2bb3Spatrick movl 16(%esp), %ebx 85*3cab2bb3Spatrick movl 20(%esp), %ecx // ECX:EBX = a 86*3cab2bb3Spatrick subl %eax, %ebx 87*3cab2bb3Spatrick sbbl %edx, %ecx // ECX:EBX = a - q*blo 88*3cab2bb3Spatrick movl 28(%esp), %eax 89*3cab2bb3Spatrick imull %edi, %eax // q*bhi 90*3cab2bb3Spatrick subl %eax, %ecx // ECX:EBX = a - q*b 91*3cab2bb3Spatrick sbbl $0, %edi // decrement q if remainder is negative 92*3cab2bb3Spatrick xorl %edx, %edx 93*3cab2bb3Spatrick movl %edi, %eax 94*3cab2bb3Spatrick 95*3cab2bb3Spatrick addl %esi, %eax // Restore correct sign to result 96*3cab2bb3Spatrick adcl %esi, %edx 97*3cab2bb3Spatrick xorl %esi, %eax 98*3cab2bb3Spatrick xorl %esi, %edx 99*3cab2bb3Spatrick popl %edi // Restore callee-save registers 100*3cab2bb3Spatrick popl %ebx 101*3cab2bb3Spatrick popl %esi 102*3cab2bb3Spatrick retl // Return 103*3cab2bb3Spatrick 104*3cab2bb3Spatrick 105*3cab2bb3Spatrick1: // High word of a is greater than or equal to (b >> (1 + i)) on this branch 106*3cab2bb3Spatrick 107*3cab2bb3Spatrick subl %ebx, %edx // subtract bhi from ahi so that divide will not 108*3cab2bb3Spatrick divl %ebx // overflow, and find q and r such that 109*3cab2bb3Spatrick // 110*3cab2bb3Spatrick // ahi:alo = (1:q)*bhi + r 111*3cab2bb3Spatrick // 112*3cab2bb3Spatrick // Note that q is a number in (31-i).(1+i) 113*3cab2bb3Spatrick // fix point. 114*3cab2bb3Spatrick 115*3cab2bb3Spatrick pushl %edi 116*3cab2bb3Spatrick notl %ecx 117*3cab2bb3Spatrick shrl %eax 118*3cab2bb3Spatrick orl $0x80000000, %eax 119*3cab2bb3Spatrick shrl %cl, %eax // q = (1:qs) >> (1 + i) 120*3cab2bb3Spatrick movl %eax, %edi 121*3cab2bb3Spatrick mull 24(%esp) // q*blo 122*3cab2bb3Spatrick movl 16(%esp), %ebx 123*3cab2bb3Spatrick movl 20(%esp), %ecx // ECX:EBX = a 124*3cab2bb3Spatrick subl %eax, %ebx 125*3cab2bb3Spatrick sbbl %edx, %ecx // ECX:EBX = a - q*blo 126*3cab2bb3Spatrick movl 28(%esp), %eax 127*3cab2bb3Spatrick imull %edi, %eax // q*bhi 128*3cab2bb3Spatrick subl %eax, %ecx // ECX:EBX = a - q*b 129*3cab2bb3Spatrick sbbl $0, %edi // decrement q if remainder is negative 130*3cab2bb3Spatrick xorl %edx, %edx 131*3cab2bb3Spatrick movl %edi, %eax 132*3cab2bb3Spatrick 133*3cab2bb3Spatrick addl %esi, %eax // Restore correct sign to result 134*3cab2bb3Spatrick adcl %esi, %edx 135*3cab2bb3Spatrick xorl %esi, %eax 136*3cab2bb3Spatrick xorl %esi, %edx 137*3cab2bb3Spatrick popl %edi // Restore callee-save registers 138*3cab2bb3Spatrick popl %ebx 139*3cab2bb3Spatrick popl %esi 140*3cab2bb3Spatrick retl // Return 141*3cab2bb3Spatrick 142*3cab2bb3Spatrick 143*3cab2bb3Spatrick9: // High word of b is zero on this branch 144*3cab2bb3Spatrick 145*3cab2bb3Spatrick movl 16(%esp), %eax // Find qhi and rhi such that 146*3cab2bb3Spatrick movl 20(%esp), %ecx // 147*3cab2bb3Spatrick xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b 148*3cab2bb3Spatrick divl %ecx // 149*3cab2bb3Spatrick movl %eax, %ebx // 150*3cab2bb3Spatrick movl 12(%esp), %eax // Find qlo such that 151*3cab2bb3Spatrick divl %ecx // 152*3cab2bb3Spatrick movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b 153*3cab2bb3Spatrick 154*3cab2bb3Spatrick addl %esi, %eax // Restore correct sign to result 155*3cab2bb3Spatrick adcl %esi, %edx 156*3cab2bb3Spatrick xorl %esi, %eax 157*3cab2bb3Spatrick xorl %esi, %edx 158*3cab2bb3Spatrick popl %ebx // Restore callee-save registers 159*3cab2bb3Spatrick popl %esi 160*3cab2bb3Spatrick retl // Return 161*3cab2bb3SpatrickEND_COMPILERRT_FUNCTION(__divdi3) 162*3cab2bb3Spatrick 163*3cab2bb3Spatrick#endif // __i386__ 164*3cab2bb3Spatrick 165*3cab2bb3SpatrickNO_EXEC_STACK_DIRECTIVE 166*3cab2bb3Spatrick 167