1*3cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 2*3cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information. 3*3cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 4*3cab2bb3Spatrick 5*3cab2bb3Spatrick#include "../assembly.h" 6*3cab2bb3Spatrick 7*3cab2bb3Spatrick// di_int __moddi3(di_int a, di_int b); 8*3cab2bb3Spatrick 9*3cab2bb3Spatrick// result = remainder of a / b. 10*3cab2bb3Spatrick// both inputs and the output are 64-bit signed integers. 11*3cab2bb3Spatrick// This will do whatever the underlying hardware is set to do on division by zero. 12*3cab2bb3Spatrick// No other exceptions are generated, as the divide cannot overflow. 13*3cab2bb3Spatrick// 14*3cab2bb3Spatrick// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 15*3cab2bb3Spatrick// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 16*3cab2bb3Spatrick// currently possible via simulation of integer divides on the x87 unit. 17*3cab2bb3Spatrick// 18*3cab2bb3Spatrick 19*3cab2bb3Spatrick// Stephen Canon, December 2008 20*3cab2bb3Spatrick 21*3cab2bb3Spatrick#ifdef __i386__ 22*3cab2bb3Spatrick 23*3cab2bb3Spatrick.text 24*3cab2bb3Spatrick.balign 4 25*3cab2bb3SpatrickDEFINE_COMPILERRT_FUNCTION(__moddi3) 26*3cab2bb3Spatrick 27*3cab2bb3Spatrick// This is currently implemented by wrapping the unsigned modulus up in an absolute 28*3cab2bb3Spatrick// value. This could certainly be improved upon. 29*3cab2bb3Spatrick 30*3cab2bb3Spatrick pushl %esi 31*3cab2bb3Spatrick movl 20(%esp), %edx // high word of b 32*3cab2bb3Spatrick movl 16(%esp), %eax // low word of b 33*3cab2bb3Spatrick movl %edx, %ecx 34*3cab2bb3Spatrick sarl $31, %ecx // (b < 0) ? -1 : 0 35*3cab2bb3Spatrick xorl %ecx, %eax 36*3cab2bb3Spatrick xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b 37*3cab2bb3Spatrick subl %ecx, %eax 38*3cab2bb3Spatrick sbbl %ecx, %edx // EDX:EAX = abs(b) 39*3cab2bb3Spatrick movl %edx, 20(%esp) 40*3cab2bb3Spatrick movl %eax, 16(%esp) // store abs(b) back to stack 41*3cab2bb3Spatrick 42*3cab2bb3Spatrick movl 12(%esp), %edx // high word of b 43*3cab2bb3Spatrick movl 8(%esp), %eax // low word of b 44*3cab2bb3Spatrick movl %edx, %ecx 45*3cab2bb3Spatrick sarl $31, %ecx // (a < 0) ? -1 : 0 46*3cab2bb3Spatrick xorl %ecx, %eax 47*3cab2bb3Spatrick xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a 48*3cab2bb3Spatrick subl %ecx, %eax 49*3cab2bb3Spatrick sbbl %ecx, %edx // EDX:EAX = abs(a) 50*3cab2bb3Spatrick movl %edx, 12(%esp) 51*3cab2bb3Spatrick movl %eax, 8(%esp) // store abs(a) back to stack 52*3cab2bb3Spatrick movl %ecx, %esi // set aside sign of a 53*3cab2bb3Spatrick 54*3cab2bb3Spatrick pushl %ebx 55*3cab2bb3Spatrick movl 24(%esp), %ebx // Find the index i of the leading bit in b. 56*3cab2bb3Spatrick bsrl %ebx, %ecx // If the high word of b is zero, jump to 57*3cab2bb3Spatrick jz 9f // the code to handle that special case [9]. 58*3cab2bb3Spatrick 59*3cab2bb3Spatrick // High word of b is known to be non-zero on this branch 60*3cab2bb3Spatrick 61*3cab2bb3Spatrick movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 62*3cab2bb3Spatrick 63*3cab2bb3Spatrick shrl %cl, %eax // Practically, this means that bhi is given by: 64*3cab2bb3Spatrick shrl %eax // 65*3cab2bb3Spatrick notl %ecx // bhi = (high word of b) << (31 - i) | 66*3cab2bb3Spatrick shll %cl, %ebx // (low word of b) >> (1 + i) 67*3cab2bb3Spatrick orl %eax, %ebx // 68*3cab2bb3Spatrick movl 16(%esp), %edx // Load the high and low words of a, and jump 69*3cab2bb3Spatrick movl 12(%esp), %eax // to [2] if the high word is larger than bhi 70*3cab2bb3Spatrick cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 71*3cab2bb3Spatrick jae 2f 72*3cab2bb3Spatrick 73*3cab2bb3Spatrick // High word of a is greater than or equal to (b >> (1 + i)) on this branch 74*3cab2bb3Spatrick 75*3cab2bb3Spatrick divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 76*3cab2bb3Spatrick 77*3cab2bb3Spatrick pushl %edi 78*3cab2bb3Spatrick notl %ecx 79*3cab2bb3Spatrick shrl %eax 80*3cab2bb3Spatrick shrl %cl, %eax // q = qs >> (1 + i) 81*3cab2bb3Spatrick movl %eax, %edi 82*3cab2bb3Spatrick mull 24(%esp) // q*blo 83*3cab2bb3Spatrick movl 16(%esp), %ebx 84*3cab2bb3Spatrick movl 20(%esp), %ecx // ECX:EBX = a 85*3cab2bb3Spatrick subl %eax, %ebx 86*3cab2bb3Spatrick sbbl %edx, %ecx // ECX:EBX = a - q*blo 87*3cab2bb3Spatrick movl 28(%esp), %eax 88*3cab2bb3Spatrick imull %edi, %eax // q*bhi 89*3cab2bb3Spatrick subl %eax, %ecx // ECX:EBX = a - q*b 90*3cab2bb3Spatrick 91*3cab2bb3Spatrick jnc 1f // if positive, this is the result. 92*3cab2bb3Spatrick addl 24(%esp), %ebx // otherwise 93*3cab2bb3Spatrick adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result 94*3cab2bb3Spatrick1: movl %ebx, %eax 95*3cab2bb3Spatrick movl %ecx, %edx 96*3cab2bb3Spatrick 97*3cab2bb3Spatrick addl %esi, %eax // Restore correct sign to result 98*3cab2bb3Spatrick adcl %esi, %edx 99*3cab2bb3Spatrick xorl %esi, %eax 100*3cab2bb3Spatrick xorl %esi, %edx 101*3cab2bb3Spatrick popl %edi // Restore callee-save registers 102*3cab2bb3Spatrick popl %ebx 103*3cab2bb3Spatrick popl %esi 104*3cab2bb3Spatrick retl // Return 105*3cab2bb3Spatrick 106*3cab2bb3Spatrick2: // High word of a is greater than or equal to (b >> (1 + i)) on this branch 107*3cab2bb3Spatrick 108*3cab2bb3Spatrick subl %ebx, %edx // subtract bhi from ahi so that divide will not 109*3cab2bb3Spatrick divl %ebx // overflow, and find q and r such that 110*3cab2bb3Spatrick // 111*3cab2bb3Spatrick // ahi:alo = (1:q)*bhi + r 112*3cab2bb3Spatrick // 113*3cab2bb3Spatrick // Note that q is a number in (31-i).(1+i) 114*3cab2bb3Spatrick // fix point. 115*3cab2bb3Spatrick 116*3cab2bb3Spatrick pushl %edi 117*3cab2bb3Spatrick notl %ecx 118*3cab2bb3Spatrick shrl %eax 119*3cab2bb3Spatrick orl $0x80000000, %eax 120*3cab2bb3Spatrick shrl %cl, %eax // q = (1:qs) >> (1 + i) 121*3cab2bb3Spatrick movl %eax, %edi 122*3cab2bb3Spatrick mull 24(%esp) // q*blo 123*3cab2bb3Spatrick movl 16(%esp), %ebx 124*3cab2bb3Spatrick movl 20(%esp), %ecx // ECX:EBX = a 125*3cab2bb3Spatrick subl %eax, %ebx 126*3cab2bb3Spatrick sbbl %edx, %ecx // ECX:EBX = a - q*blo 127*3cab2bb3Spatrick movl 28(%esp), %eax 128*3cab2bb3Spatrick imull %edi, %eax // q*bhi 129*3cab2bb3Spatrick subl %eax, %ecx // ECX:EBX = a - q*b 130*3cab2bb3Spatrick 131*3cab2bb3Spatrick jnc 3f // if positive, this is the result. 132*3cab2bb3Spatrick addl 24(%esp), %ebx // otherwise 133*3cab2bb3Spatrick adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result 134*3cab2bb3Spatrick3: movl %ebx, %eax 135*3cab2bb3Spatrick movl %ecx, %edx 136*3cab2bb3Spatrick 137*3cab2bb3Spatrick addl %esi, %eax // Restore correct sign to result 138*3cab2bb3Spatrick adcl %esi, %edx 139*3cab2bb3Spatrick xorl %esi, %eax 140*3cab2bb3Spatrick xorl %esi, %edx 141*3cab2bb3Spatrick popl %edi // Restore callee-save registers 142*3cab2bb3Spatrick popl %ebx 143*3cab2bb3Spatrick popl %esi 144*3cab2bb3Spatrick retl // Return 145*3cab2bb3Spatrick 146*3cab2bb3Spatrick9: // High word of b is zero on this branch 147*3cab2bb3Spatrick 148*3cab2bb3Spatrick movl 16(%esp), %eax // Find qhi and rhi such that 149*3cab2bb3Spatrick movl 20(%esp), %ecx // 150*3cab2bb3Spatrick xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b 151*3cab2bb3Spatrick divl %ecx // 152*3cab2bb3Spatrick movl %eax, %ebx // 153*3cab2bb3Spatrick movl 12(%esp), %eax // Find rlo such that 154*3cab2bb3Spatrick divl %ecx // 155*3cab2bb3Spatrick movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b 156*3cab2bb3Spatrick popl %ebx // 157*3cab2bb3Spatrick xorl %edx, %edx // and return 0:rlo 158*3cab2bb3Spatrick 159*3cab2bb3Spatrick addl %esi, %eax // Restore correct sign to result 160*3cab2bb3Spatrick adcl %esi, %edx 161*3cab2bb3Spatrick xorl %esi, %eax 162*3cab2bb3Spatrick xorl %esi, %edx 163*3cab2bb3Spatrick popl %esi 164*3cab2bb3Spatrick retl // Return 165*3cab2bb3SpatrickEND_COMPILERRT_FUNCTION(__moddi3) 166*3cab2bb3Spatrick 167*3cab2bb3Spatrick#endif // __i386__ 168*3cab2bb3Spatrick 169*3cab2bb3SpatrickNO_EXEC_STACK_DIRECTIVE 170*3cab2bb3Spatrick 171