1*0a6a1f1dSLionel Sambuc// This file is dual licensed under the MIT and the University of Illinois Open 2*0a6a1f1dSLionel Sambuc// Source Licenses. See LICENSE.TXT for details. 3*0a6a1f1dSLionel Sambuc 4*0a6a1f1dSLionel Sambuc#include "../assembly.h" 5*0a6a1f1dSLionel Sambuc 6*0a6a1f1dSLionel Sambuc// di_int __lshrdi3(di_int input, int count); 7*0a6a1f1dSLionel Sambuc 8*0a6a1f1dSLionel Sambuc// This routine has some extra memory traffic, loading the 64-bit input via two 9*0a6a1f1dSLionel Sambuc// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 10*0a6a1f1dSLionel Sambuc// store. This is to avoid a write-small, read-large stall. 11*0a6a1f1dSLionel Sambuc// However, if callers of this routine can be safely assumed to store the argument 12*0a6a1f1dSLionel Sambuc// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 13*0a6a1f1dSLionel Sambuc// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 14*0a6a1f1dSLionel Sambuc 15*0a6a1f1dSLionel Sambuc#ifdef __i386__ 16*0a6a1f1dSLionel Sambuc#ifdef __SSE2__ 17*0a6a1f1dSLionel Sambuc 18*0a6a1f1dSLionel Sambuc.text 19*0a6a1f1dSLionel Sambuc.balign 4 20*0a6a1f1dSLionel SambucDEFINE_COMPILERRT_FUNCTION(__lshrdi3) 21*0a6a1f1dSLionel Sambuc movd 12(%esp), %xmm2 // Load count 22*0a6a1f1dSLionel Sambuc#ifndef TRUST_CALLERS_USE_64_BIT_STORES 23*0a6a1f1dSLionel Sambuc movd 4(%esp), %xmm0 24*0a6a1f1dSLionel Sambuc movd 8(%esp), %xmm1 25*0a6a1f1dSLionel Sambuc punpckldq %xmm1, %xmm0 // Load input 26*0a6a1f1dSLionel Sambuc#else 27*0a6a1f1dSLionel Sambuc movq 4(%esp), %xmm0 // Load input 28*0a6a1f1dSLionel Sambuc#endif 29*0a6a1f1dSLionel Sambuc psrlq %xmm2, %xmm0 // shift input by count 30*0a6a1f1dSLionel Sambuc movd %xmm0, %eax 31*0a6a1f1dSLionel Sambuc psrlq $32, %xmm0 32*0a6a1f1dSLionel Sambuc movd %xmm0, %edx 33*0a6a1f1dSLionel Sambuc ret 34*0a6a1f1dSLionel SambucEND_COMPILERRT_FUNCTION(__lshrdi3) 35*0a6a1f1dSLionel Sambuc 36*0a6a1f1dSLionel Sambuc#else // Use GPRs instead of SSE2 instructions, if they aren't available. 37*0a6a1f1dSLionel Sambuc 38*0a6a1f1dSLionel Sambuc.text 39*0a6a1f1dSLionel Sambuc.balign 4 40*0a6a1f1dSLionel SambucDEFINE_COMPILERRT_FUNCTION(__lshrdi3) 41*0a6a1f1dSLionel Sambuc movl 12(%esp), %ecx // Load count 42*0a6a1f1dSLionel Sambuc movl 8(%esp), %edx // Load high 43*0a6a1f1dSLionel Sambuc movl 4(%esp), %eax // Load low 44*0a6a1f1dSLionel Sambuc 45*0a6a1f1dSLionel Sambuc testl $0x20, %ecx // If count >= 32 46*0a6a1f1dSLionel Sambuc jnz 1f // goto 1 47*0a6a1f1dSLionel Sambuc 48*0a6a1f1dSLionel Sambuc shrdl %cl, %edx, %eax // right shift low by count 49*0a6a1f1dSLionel Sambuc shrl %cl, %edx // right shift high by count 50*0a6a1f1dSLionel Sambuc ret 51*0a6a1f1dSLionel Sambuc 52*0a6a1f1dSLionel Sambuc1: movl %edx, %eax // Move high to low 53*0a6a1f1dSLionel Sambuc xorl %edx, %edx // clear high 54*0a6a1f1dSLionel Sambuc shrl %cl, %eax // shift low by count - 32 55*0a6a1f1dSLionel Sambuc ret 56*0a6a1f1dSLionel SambucEND_COMPILERRT_FUNCTION(__lshrdi3) 57*0a6a1f1dSLionel Sambuc 58*0a6a1f1dSLionel Sambuc#endif // __SSE2__ 59*0a6a1f1dSLionel Sambuc#endif // __i386__ 60