1*5f757f3fSDimitry Andric //===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===// 2*5f757f3fSDimitry Andric // 3*5f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5f757f3fSDimitry Andric // 7*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 8*5f757f3fSDimitry Andric // 9*5f757f3fSDimitry Andric /// \file 10*5f757f3fSDimitry Andric /// This file implements the lowering of LLVM calls to DAG nodes. 11*5f757f3fSDimitry Andric // 12*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 13*5f757f3fSDimitry Andric 14*5f757f3fSDimitry Andric #include "X86.h" 15*5f757f3fSDimitry Andric #include "X86CallingConv.h" 16*5f757f3fSDimitry Andric #include "X86FrameLowering.h" 17*5f757f3fSDimitry Andric #include "X86ISelLowering.h" 18*5f757f3fSDimitry Andric #include "X86InstrBuilder.h" 19*5f757f3fSDimitry Andric #include "X86MachineFunctionInfo.h" 20*5f757f3fSDimitry Andric #include "X86TargetMachine.h" 21*5f757f3fSDimitry Andric #include "X86TargetObjectFile.h" 22*5f757f3fSDimitry Andric #include "llvm/ADT/Statistic.h" 23*5f757f3fSDimitry Andric #include "llvm/Analysis/ObjCARCUtil.h" 24*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineJumpTableInfo.h" 25*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 26*5f757f3fSDimitry Andric #include "llvm/CodeGen/WinEHFuncInfo.h" 27*5f757f3fSDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 28*5f757f3fSDimitry Andric #include "llvm/IR/IRBuilder.h" 29*5f757f3fSDimitry Andric 30*5f757f3fSDimitry Andric #define DEBUG_TYPE "x86-isel" 31*5f757f3fSDimitry Andric 32*5f757f3fSDimitry Andric using namespace llvm; 33*5f757f3fSDimitry Andric 34*5f757f3fSDimitry Andric STATISTIC(NumTailCalls, "Number of tail calls"); 35*5f757f3fSDimitry Andric 36*5f757f3fSDimitry Andric /// Call this when the user attempts to do something unsupported, like 37*5f757f3fSDimitry Andric /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike 38*5f757f3fSDimitry Andric /// report_fatal_error, so calling code should attempt to recover without 39*5f757f3fSDimitry Andric /// crashing. 40*5f757f3fSDimitry Andric static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, 41*5f757f3fSDimitry Andric const char *Msg) { 42*5f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 43*5f757f3fSDimitry Andric DAG.getContext()->diagnose( 44*5f757f3fSDimitry Andric DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); 45*5f757f3fSDimitry Andric } 46*5f757f3fSDimitry Andric 47*5f757f3fSDimitry Andric /// Returns true if a CC can dynamically exclude a register from the list of 48*5f757f3fSDimitry Andric /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on 49*5f757f3fSDimitry Andric /// the return registers. 50*5f757f3fSDimitry Andric static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) { 51*5f757f3fSDimitry Andric switch (CC) { 52*5f757f3fSDimitry Andric default: 53*5f757f3fSDimitry Andric return false; 54*5f757f3fSDimitry Andric case CallingConv::X86_RegCall: 55*5f757f3fSDimitry Andric case CallingConv::PreserveMost: 56*5f757f3fSDimitry Andric case CallingConv::PreserveAll: 57*5f757f3fSDimitry Andric return true; 58*5f757f3fSDimitry Andric } 59*5f757f3fSDimitry Andric } 60*5f757f3fSDimitry Andric 61*5f757f3fSDimitry Andric /// Returns true if a CC can dynamically exclude a register from the list of 62*5f757f3fSDimitry Andric /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on 63*5f757f3fSDimitry Andric /// the parameters. 64*5f757f3fSDimitry Andric static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) { 65*5f757f3fSDimitry Andric return CC == CallingConv::X86_RegCall; 66*5f757f3fSDimitry Andric } 67*5f757f3fSDimitry Andric 68*5f757f3fSDimitry Andric static std::pair<MVT, unsigned> 69*5f757f3fSDimitry Andric handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, 70*5f757f3fSDimitry Andric const X86Subtarget &Subtarget) { 71*5f757f3fSDimitry Andric // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling 72*5f757f3fSDimitry Andric // convention is one that uses k registers. 73*5f757f3fSDimitry Andric if (NumElts == 2) 74*5f757f3fSDimitry Andric return {MVT::v2i64, 1}; 75*5f757f3fSDimitry Andric if (NumElts == 4) 76*5f757f3fSDimitry Andric return {MVT::v4i32, 1}; 77*5f757f3fSDimitry Andric if (NumElts == 8 && CC != CallingConv::X86_RegCall && 78*5f757f3fSDimitry Andric CC != CallingConv::Intel_OCL_BI) 79*5f757f3fSDimitry Andric return {MVT::v8i16, 1}; 80*5f757f3fSDimitry Andric if (NumElts == 16 && CC != CallingConv::X86_RegCall && 81*5f757f3fSDimitry Andric CC != CallingConv::Intel_OCL_BI) 82*5f757f3fSDimitry Andric return {MVT::v16i8, 1}; 83*5f757f3fSDimitry Andric // v32i1 passes in ymm unless we have BWI and the calling convention is 84*5f757f3fSDimitry Andric // regcall. 85*5f757f3fSDimitry Andric if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) 86*5f757f3fSDimitry Andric return {MVT::v32i8, 1}; 87*5f757f3fSDimitry Andric // Split v64i1 vectors if we don't have v64i8 available. 88*5f757f3fSDimitry Andric if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { 89*5f757f3fSDimitry Andric if (Subtarget.useAVX512Regs()) 90*5f757f3fSDimitry Andric return {MVT::v64i8, 1}; 91*5f757f3fSDimitry Andric return {MVT::v32i8, 2}; 92*5f757f3fSDimitry Andric } 93*5f757f3fSDimitry Andric 94*5f757f3fSDimitry Andric // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. 95*5f757f3fSDimitry Andric if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || 96*5f757f3fSDimitry Andric NumElts > 64) 97*5f757f3fSDimitry Andric return {MVT::i8, NumElts}; 98*5f757f3fSDimitry Andric 99*5f757f3fSDimitry Andric return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; 100*5f757f3fSDimitry Andric } 101*5f757f3fSDimitry Andric 102*5f757f3fSDimitry Andric MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 103*5f757f3fSDimitry Andric CallingConv::ID CC, 104*5f757f3fSDimitry Andric EVT VT) const { 105*5f757f3fSDimitry Andric if (VT.isVector()) { 106*5f757f3fSDimitry Andric if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { 107*5f757f3fSDimitry Andric unsigned NumElts = VT.getVectorNumElements(); 108*5f757f3fSDimitry Andric 109*5f757f3fSDimitry Andric MVT RegisterVT; 110*5f757f3fSDimitry Andric unsigned NumRegisters; 111*5f757f3fSDimitry Andric std::tie(RegisterVT, NumRegisters) = 112*5f757f3fSDimitry Andric handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); 113*5f757f3fSDimitry Andric if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) 114*5f757f3fSDimitry Andric return RegisterVT; 115*5f757f3fSDimitry Andric } 116*5f757f3fSDimitry Andric 117*5f757f3fSDimitry Andric if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) 118*5f757f3fSDimitry Andric return MVT::v8f16; 119*5f757f3fSDimitry Andric } 120*5f757f3fSDimitry Andric 121*5f757f3fSDimitry Andric // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled. 122*5f757f3fSDimitry Andric if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() && 123*5f757f3fSDimitry Andric !Subtarget.hasX87()) 124*5f757f3fSDimitry Andric return MVT::i32; 125*5f757f3fSDimitry Andric 126*5f757f3fSDimitry Andric if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) 127*5f757f3fSDimitry Andric return getRegisterTypeForCallingConv(Context, CC, 128*5f757f3fSDimitry Andric VT.changeVectorElementType(MVT::f16)); 129*5f757f3fSDimitry Andric 130*5f757f3fSDimitry Andric return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); 131*5f757f3fSDimitry Andric } 132*5f757f3fSDimitry Andric 133*5f757f3fSDimitry Andric unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 134*5f757f3fSDimitry Andric CallingConv::ID CC, 135*5f757f3fSDimitry Andric EVT VT) const { 136*5f757f3fSDimitry Andric if (VT.isVector()) { 137*5f757f3fSDimitry Andric if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { 138*5f757f3fSDimitry Andric unsigned NumElts = VT.getVectorNumElements(); 139*5f757f3fSDimitry Andric 140*5f757f3fSDimitry Andric MVT RegisterVT; 141*5f757f3fSDimitry Andric unsigned NumRegisters; 142*5f757f3fSDimitry Andric std::tie(RegisterVT, NumRegisters) = 143*5f757f3fSDimitry Andric handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); 144*5f757f3fSDimitry Andric if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) 145*5f757f3fSDimitry Andric return NumRegisters; 146*5f757f3fSDimitry Andric } 147*5f757f3fSDimitry Andric 148*5f757f3fSDimitry Andric if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) 149*5f757f3fSDimitry Andric return 1; 150*5f757f3fSDimitry Andric } 151*5f757f3fSDimitry Andric 152*5f757f3fSDimitry Andric // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if 153*5f757f3fSDimitry Andric // x87 is disabled. 154*5f757f3fSDimitry Andric if (!Subtarget.is64Bit() && !Subtarget.hasX87()) { 155*5f757f3fSDimitry Andric if (VT == MVT::f64) 156*5f757f3fSDimitry Andric return 2; 157*5f757f3fSDimitry Andric if (VT == MVT::f80) 158*5f757f3fSDimitry Andric return 3; 159*5f757f3fSDimitry Andric } 160*5f757f3fSDimitry Andric 161*5f757f3fSDimitry Andric if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) 162*5f757f3fSDimitry Andric return getNumRegistersForCallingConv(Context, CC, 163*5f757f3fSDimitry Andric VT.changeVectorElementType(MVT::f16)); 164*5f757f3fSDimitry Andric 165*5f757f3fSDimitry Andric return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); 166*5f757f3fSDimitry Andric } 167*5f757f3fSDimitry Andric 168*5f757f3fSDimitry Andric unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( 169*5f757f3fSDimitry Andric LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 170*5f757f3fSDimitry Andric unsigned &NumIntermediates, MVT &RegisterVT) const { 171*5f757f3fSDimitry Andric // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. 172*5f757f3fSDimitry Andric if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && 173*5f757f3fSDimitry Andric Subtarget.hasAVX512() && 174*5f757f3fSDimitry Andric (!isPowerOf2_32(VT.getVectorNumElements()) || 175*5f757f3fSDimitry Andric (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || 176*5f757f3fSDimitry Andric VT.getVectorNumElements() > 64)) { 177*5f757f3fSDimitry Andric RegisterVT = MVT::i8; 178*5f757f3fSDimitry Andric IntermediateVT = MVT::i1; 179*5f757f3fSDimitry Andric NumIntermediates = VT.getVectorNumElements(); 180*5f757f3fSDimitry Andric return NumIntermediates; 181*5f757f3fSDimitry Andric } 182*5f757f3fSDimitry Andric 183*5f757f3fSDimitry Andric // Split v64i1 vectors if we don't have v64i8 available. 184*5f757f3fSDimitry Andric if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && 185*5f757f3fSDimitry Andric CC != CallingConv::X86_RegCall) { 186*5f757f3fSDimitry Andric RegisterVT = MVT::v32i8; 187*5f757f3fSDimitry Andric IntermediateVT = MVT::v32i1; 188*5f757f3fSDimitry Andric NumIntermediates = 2; 189*5f757f3fSDimitry Andric return 2; 190*5f757f3fSDimitry Andric } 191*5f757f3fSDimitry Andric 192*5f757f3fSDimitry Andric // Split vNbf16 vectors according to vNf16. 193*5f757f3fSDimitry Andric if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) 194*5f757f3fSDimitry Andric VT = VT.changeVectorElementType(MVT::f16); 195*5f757f3fSDimitry Andric 196*5f757f3fSDimitry Andric return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, 197*5f757f3fSDimitry Andric NumIntermediates, RegisterVT); 198*5f757f3fSDimitry Andric } 199*5f757f3fSDimitry Andric 200*5f757f3fSDimitry Andric EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, 201*5f757f3fSDimitry Andric LLVMContext& Context, 202*5f757f3fSDimitry Andric EVT VT) const { 203*5f757f3fSDimitry Andric if (!VT.isVector()) 204*5f757f3fSDimitry Andric return MVT::i8; 205*5f757f3fSDimitry Andric 206*5f757f3fSDimitry Andric if (Subtarget.hasAVX512()) { 207*5f757f3fSDimitry Andric // Figure out what this type will be legalized to. 208*5f757f3fSDimitry Andric EVT LegalVT = VT; 209*5f757f3fSDimitry Andric while (getTypeAction(Context, LegalVT) != TypeLegal) 210*5f757f3fSDimitry Andric LegalVT = getTypeToTransformTo(Context, LegalVT); 211*5f757f3fSDimitry Andric 212*5f757f3fSDimitry Andric // If we got a 512-bit vector then we'll definitely have a vXi1 compare. 213*5f757f3fSDimitry Andric if (LegalVT.getSimpleVT().is512BitVector()) 214*5f757f3fSDimitry Andric return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 215*5f757f3fSDimitry Andric 216*5f757f3fSDimitry Andric if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { 217*5f757f3fSDimitry Andric // If we legalized to less than a 512-bit vector, then we will use a vXi1 218*5f757f3fSDimitry Andric // compare for vXi32/vXi64 for sure. If we have BWI we will also support 219*5f757f3fSDimitry Andric // vXi16/vXi8. 220*5f757f3fSDimitry Andric MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); 221*5f757f3fSDimitry Andric if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) 222*5f757f3fSDimitry Andric return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 223*5f757f3fSDimitry Andric } 224*5f757f3fSDimitry Andric } 225*5f757f3fSDimitry Andric 226*5f757f3fSDimitry Andric return VT.changeVectorElementTypeToInteger(); 227*5f757f3fSDimitry Andric } 228*5f757f3fSDimitry Andric 229*5f757f3fSDimitry Andric /// Helper for getByValTypeAlignment to determine 230*5f757f3fSDimitry Andric /// the desired ByVal argument alignment. 231*5f757f3fSDimitry Andric static void getMaxByValAlign(Type *Ty, Align &MaxAlign) { 232*5f757f3fSDimitry Andric if (MaxAlign == 16) 233*5f757f3fSDimitry Andric return; 234*5f757f3fSDimitry Andric if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 235*5f757f3fSDimitry Andric if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128) 236*5f757f3fSDimitry Andric MaxAlign = Align(16); 237*5f757f3fSDimitry Andric } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 238*5f757f3fSDimitry Andric Align EltAlign; 239*5f757f3fSDimitry Andric getMaxByValAlign(ATy->getElementType(), EltAlign); 240*5f757f3fSDimitry Andric if (EltAlign > MaxAlign) 241*5f757f3fSDimitry Andric MaxAlign = EltAlign; 242*5f757f3fSDimitry Andric } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 243*5f757f3fSDimitry Andric for (auto *EltTy : STy->elements()) { 244*5f757f3fSDimitry Andric Align EltAlign; 245*5f757f3fSDimitry Andric getMaxByValAlign(EltTy, EltAlign); 246*5f757f3fSDimitry Andric if (EltAlign > MaxAlign) 247*5f757f3fSDimitry Andric MaxAlign = EltAlign; 248*5f757f3fSDimitry Andric if (MaxAlign == 16) 249*5f757f3fSDimitry Andric break; 250*5f757f3fSDimitry Andric } 251*5f757f3fSDimitry Andric } 252*5f757f3fSDimitry Andric } 253*5f757f3fSDimitry Andric 254*5f757f3fSDimitry Andric /// Return the desired alignment for ByVal aggregate 255*5f757f3fSDimitry Andric /// function arguments in the caller parameter area. For X86, aggregates 256*5f757f3fSDimitry Andric /// that contain SSE vectors are placed at 16-byte boundaries while the rest 257*5f757f3fSDimitry Andric /// are at 4-byte boundaries. 258*5f757f3fSDimitry Andric uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty, 259*5f757f3fSDimitry Andric const DataLayout &DL) const { 260*5f757f3fSDimitry Andric if (Subtarget.is64Bit()) { 261*5f757f3fSDimitry Andric // Max of 8 and alignment of type. 262*5f757f3fSDimitry Andric Align TyAlign = DL.getABITypeAlign(Ty); 263*5f757f3fSDimitry Andric if (TyAlign > 8) 264*5f757f3fSDimitry Andric return TyAlign.value(); 265*5f757f3fSDimitry Andric return 8; 266*5f757f3fSDimitry Andric } 267*5f757f3fSDimitry Andric 268*5f757f3fSDimitry Andric Align Alignment(4); 269*5f757f3fSDimitry Andric if (Subtarget.hasSSE1()) 270*5f757f3fSDimitry Andric getMaxByValAlign(Ty, Alignment); 271*5f757f3fSDimitry Andric return Alignment.value(); 272*5f757f3fSDimitry Andric } 273*5f757f3fSDimitry Andric 274*5f757f3fSDimitry Andric /// It returns EVT::Other if the type should be determined using generic 275*5f757f3fSDimitry Andric /// target-independent logic. 276*5f757f3fSDimitry Andric /// For vector ops we check that the overall size isn't larger than our 277*5f757f3fSDimitry Andric /// preferred vector width. 278*5f757f3fSDimitry Andric EVT X86TargetLowering::getOptimalMemOpType( 279*5f757f3fSDimitry Andric const MemOp &Op, const AttributeList &FuncAttributes) const { 280*5f757f3fSDimitry Andric if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) { 281*5f757f3fSDimitry Andric if (Op.size() >= 16 && 282*5f757f3fSDimitry Andric (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) { 283*5f757f3fSDimitry Andric // FIXME: Check if unaligned 64-byte accesses are slow. 284*5f757f3fSDimitry Andric if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() && 285*5f757f3fSDimitry Andric (Subtarget.getPreferVectorWidth() >= 512)) { 286*5f757f3fSDimitry Andric return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; 287*5f757f3fSDimitry Andric } 288*5f757f3fSDimitry Andric // FIXME: Check if unaligned 32-byte accesses are slow. 289*5f757f3fSDimitry Andric if (Op.size() >= 32 && Subtarget.hasAVX() && 290*5f757f3fSDimitry Andric Subtarget.useLight256BitInstructions()) { 291*5f757f3fSDimitry Andric // Although this isn't a well-supported type for AVX1, we'll let 292*5f757f3fSDimitry Andric // legalization and shuffle lowering produce the optimal codegen. If we 293*5f757f3fSDimitry Andric // choose an optimal type with a vector element larger than a byte, 294*5f757f3fSDimitry Andric // getMemsetStores() may create an intermediate splat (using an integer 295*5f757f3fSDimitry Andric // multiply) before we splat as a vector. 296*5f757f3fSDimitry Andric return MVT::v32i8; 297*5f757f3fSDimitry Andric } 298*5f757f3fSDimitry Andric if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128)) 299*5f757f3fSDimitry Andric return MVT::v16i8; 300*5f757f3fSDimitry Andric // TODO: Can SSE1 handle a byte vector? 301*5f757f3fSDimitry Andric // If we have SSE1 registers we should be able to use them. 302*5f757f3fSDimitry Andric if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && 303*5f757f3fSDimitry Andric (Subtarget.getPreferVectorWidth() >= 128)) 304*5f757f3fSDimitry Andric return MVT::v4f32; 305*5f757f3fSDimitry Andric } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) && 306*5f757f3fSDimitry Andric Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { 307*5f757f3fSDimitry Andric // Do not use f64 to lower memcpy if source is string constant. It's 308*5f757f3fSDimitry Andric // better to use i32 to avoid the loads. 309*5f757f3fSDimitry Andric // Also, do not use f64 to lower memset unless this is a memset of zeros. 310*5f757f3fSDimitry Andric // The gymnastics of splatting a byte value into an XMM register and then 311*5f757f3fSDimitry Andric // only using 8-byte stores (because this is a CPU with slow unaligned 312*5f757f3fSDimitry Andric // 16-byte accesses) makes that a loser. 313*5f757f3fSDimitry Andric return MVT::f64; 314*5f757f3fSDimitry Andric } 315*5f757f3fSDimitry Andric } 316*5f757f3fSDimitry Andric // This is a compromise. If we reach here, unaligned accesses may be slow on 317*5f757f3fSDimitry Andric // this target. However, creating smaller, aligned accesses could be even 318*5f757f3fSDimitry Andric // slower and would certainly be a lot more code. 319*5f757f3fSDimitry Andric if (Subtarget.is64Bit() && Op.size() >= 8) 320*5f757f3fSDimitry Andric return MVT::i64; 321*5f757f3fSDimitry Andric return MVT::i32; 322*5f757f3fSDimitry Andric } 323*5f757f3fSDimitry Andric 324*5f757f3fSDimitry Andric bool X86TargetLowering::isSafeMemOpType(MVT VT) const { 325*5f757f3fSDimitry Andric if (VT == MVT::f32) 326*5f757f3fSDimitry Andric return Subtarget.hasSSE1(); 327*5f757f3fSDimitry Andric if (VT == MVT::f64) 328*5f757f3fSDimitry Andric return Subtarget.hasSSE2(); 329*5f757f3fSDimitry Andric return true; 330*5f757f3fSDimitry Andric } 331*5f757f3fSDimitry Andric 332*5f757f3fSDimitry Andric static bool isBitAligned(Align Alignment, uint64_t SizeInBits) { 333*5f757f3fSDimitry Andric return (8 * Alignment.value()) % SizeInBits == 0; 334*5f757f3fSDimitry Andric } 335*5f757f3fSDimitry Andric 336*5f757f3fSDimitry Andric bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const { 337*5f757f3fSDimitry Andric if (isBitAligned(Alignment, VT.getSizeInBits())) 338*5f757f3fSDimitry Andric return true; 339*5f757f3fSDimitry Andric switch (VT.getSizeInBits()) { 340*5f757f3fSDimitry Andric default: 341*5f757f3fSDimitry Andric // 8-byte and under are always assumed to be fast. 342*5f757f3fSDimitry Andric return true; 343*5f757f3fSDimitry Andric case 128: 344*5f757f3fSDimitry Andric return !Subtarget.isUnalignedMem16Slow(); 345*5f757f3fSDimitry Andric case 256: 346*5f757f3fSDimitry Andric return !Subtarget.isUnalignedMem32Slow(); 347*5f757f3fSDimitry Andric // TODO: What about AVX-512 (512-bit) accesses? 348*5f757f3fSDimitry Andric } 349*5f757f3fSDimitry Andric } 350*5f757f3fSDimitry Andric 351*5f757f3fSDimitry Andric bool X86TargetLowering::allowsMisalignedMemoryAccesses( 352*5f757f3fSDimitry Andric EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags, 353*5f757f3fSDimitry Andric unsigned *Fast) const { 354*5f757f3fSDimitry Andric if (Fast) 355*5f757f3fSDimitry Andric *Fast = isMemoryAccessFast(VT, Alignment); 356*5f757f3fSDimitry Andric // NonTemporal vector memory ops must be aligned. 357*5f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { 358*5f757f3fSDimitry Andric // NT loads can only be vector aligned, so if its less aligned than the 359*5f757f3fSDimitry Andric // minimum vector size (which we can split the vector down to), we might as 360*5f757f3fSDimitry Andric // well use a regular unaligned vector load. 361*5f757f3fSDimitry Andric // We don't have any NT loads pre-SSE41. 362*5f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MOLoad)) 363*5f757f3fSDimitry Andric return (Alignment < 16 || !Subtarget.hasSSE41()); 364*5f757f3fSDimitry Andric return false; 365*5f757f3fSDimitry Andric } 366*5f757f3fSDimitry Andric // Misaligned accesses of any size are always allowed. 367*5f757f3fSDimitry Andric return true; 368*5f757f3fSDimitry Andric } 369*5f757f3fSDimitry Andric 370*5f757f3fSDimitry Andric bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context, 371*5f757f3fSDimitry Andric const DataLayout &DL, EVT VT, 372*5f757f3fSDimitry Andric unsigned AddrSpace, Align Alignment, 373*5f757f3fSDimitry Andric MachineMemOperand::Flags Flags, 374*5f757f3fSDimitry Andric unsigned *Fast) const { 375*5f757f3fSDimitry Andric if (Fast) 376*5f757f3fSDimitry Andric *Fast = isMemoryAccessFast(VT, Alignment); 377*5f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { 378*5f757f3fSDimitry Andric if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, 379*5f757f3fSDimitry Andric /*Fast=*/nullptr)) 380*5f757f3fSDimitry Andric return true; 381*5f757f3fSDimitry Andric // NonTemporal vector memory ops are special, and must be aligned. 382*5f757f3fSDimitry Andric if (!isBitAligned(Alignment, VT.getSizeInBits())) 383*5f757f3fSDimitry Andric return false; 384*5f757f3fSDimitry Andric switch (VT.getSizeInBits()) { 385*5f757f3fSDimitry Andric case 128: 386*5f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41()) 387*5f757f3fSDimitry Andric return true; 388*5f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2()) 389*5f757f3fSDimitry Andric return true; 390*5f757f3fSDimitry Andric return false; 391*5f757f3fSDimitry Andric case 256: 392*5f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2()) 393*5f757f3fSDimitry Andric return true; 394*5f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX()) 395*5f757f3fSDimitry Andric return true; 396*5f757f3fSDimitry Andric return false; 397*5f757f3fSDimitry Andric case 512: 398*5f757f3fSDimitry Andric if (Subtarget.hasAVX512() && Subtarget.hasEVEX512()) 399*5f757f3fSDimitry Andric return true; 400*5f757f3fSDimitry Andric return false; 401*5f757f3fSDimitry Andric default: 402*5f757f3fSDimitry Andric return false; // Don't have NonTemporal vector memory ops of this size. 403*5f757f3fSDimitry Andric } 404*5f757f3fSDimitry Andric } 405*5f757f3fSDimitry Andric return true; 406*5f757f3fSDimitry Andric } 407*5f757f3fSDimitry Andric 408*5f757f3fSDimitry Andric /// Return the entry encoding for a jump table in the 409*5f757f3fSDimitry Andric /// current function. The returned value is a member of the 410*5f757f3fSDimitry Andric /// MachineJumpTableInfo::JTEntryKind enum. 411*5f757f3fSDimitry Andric unsigned X86TargetLowering::getJumpTableEncoding() const { 412*5f757f3fSDimitry Andric // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF 413*5f757f3fSDimitry Andric // symbol. 414*5f757f3fSDimitry Andric if (isPositionIndependent() && Subtarget.isPICStyleGOT()) 415*5f757f3fSDimitry Andric return MachineJumpTableInfo::EK_Custom32; 416*5f757f3fSDimitry Andric if (isPositionIndependent() && 417*5f757f3fSDimitry Andric getTargetMachine().getCodeModel() == CodeModel::Large) 418*5f757f3fSDimitry Andric return MachineJumpTableInfo::EK_LabelDifference64; 419*5f757f3fSDimitry Andric 420*5f757f3fSDimitry Andric // Otherwise, use the normal jump table encoding heuristics. 421*5f757f3fSDimitry Andric return TargetLowering::getJumpTableEncoding(); 422*5f757f3fSDimitry Andric } 423*5f757f3fSDimitry Andric 424*5f757f3fSDimitry Andric bool X86TargetLowering::splitValueIntoRegisterParts( 425*5f757f3fSDimitry Andric SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 426*5f757f3fSDimitry Andric unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 427*5f757f3fSDimitry Andric bool IsABIRegCopy = CC.has_value(); 428*5f757f3fSDimitry Andric EVT ValueVT = Val.getValueType(); 429*5f757f3fSDimitry Andric if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { 430*5f757f3fSDimitry Andric unsigned ValueBits = ValueVT.getSizeInBits(); 431*5f757f3fSDimitry Andric unsigned PartBits = PartVT.getSizeInBits(); 432*5f757f3fSDimitry Andric Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val); 433*5f757f3fSDimitry Andric Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val); 434*5f757f3fSDimitry Andric Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 435*5f757f3fSDimitry Andric Parts[0] = Val; 436*5f757f3fSDimitry Andric return true; 437*5f757f3fSDimitry Andric } 438*5f757f3fSDimitry Andric return false; 439*5f757f3fSDimitry Andric } 440*5f757f3fSDimitry Andric 441*5f757f3fSDimitry Andric SDValue X86TargetLowering::joinRegisterPartsIntoValue( 442*5f757f3fSDimitry Andric SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 443*5f757f3fSDimitry Andric MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { 444*5f757f3fSDimitry Andric bool IsABIRegCopy = CC.has_value(); 445*5f757f3fSDimitry Andric if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { 446*5f757f3fSDimitry Andric unsigned ValueBits = ValueVT.getSizeInBits(); 447*5f757f3fSDimitry Andric unsigned PartBits = PartVT.getSizeInBits(); 448*5f757f3fSDimitry Andric SDValue Val = Parts[0]; 449*5f757f3fSDimitry Andric 450*5f757f3fSDimitry Andric Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val); 451*5f757f3fSDimitry Andric Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val); 452*5f757f3fSDimitry Andric Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 453*5f757f3fSDimitry Andric return Val; 454*5f757f3fSDimitry Andric } 455*5f757f3fSDimitry Andric return SDValue(); 456*5f757f3fSDimitry Andric } 457*5f757f3fSDimitry Andric 458*5f757f3fSDimitry Andric bool X86TargetLowering::useSoftFloat() const { 459*5f757f3fSDimitry Andric return Subtarget.useSoftFloat(); 460*5f757f3fSDimitry Andric } 461*5f757f3fSDimitry Andric 462*5f757f3fSDimitry Andric void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, 463*5f757f3fSDimitry Andric ArgListTy &Args) const { 464*5f757f3fSDimitry Andric 465*5f757f3fSDimitry Andric // Only relabel X86-32 for C / Stdcall CCs. 466*5f757f3fSDimitry Andric if (Subtarget.is64Bit()) 467*5f757f3fSDimitry Andric return; 468*5f757f3fSDimitry Andric if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) 469*5f757f3fSDimitry Andric return; 470*5f757f3fSDimitry Andric unsigned ParamRegs = 0; 471*5f757f3fSDimitry Andric if (auto *M = MF->getFunction().getParent()) 472*5f757f3fSDimitry Andric ParamRegs = M->getNumberRegisterParameters(); 473*5f757f3fSDimitry Andric 474*5f757f3fSDimitry Andric // Mark the first N int arguments as having reg 475*5f757f3fSDimitry Andric for (auto &Arg : Args) { 476*5f757f3fSDimitry Andric Type *T = Arg.Ty; 477*5f757f3fSDimitry Andric if (T->isIntOrPtrTy()) 478*5f757f3fSDimitry Andric if (MF->getDataLayout().getTypeAllocSize(T) <= 8) { 479*5f757f3fSDimitry Andric unsigned numRegs = 1; 480*5f757f3fSDimitry Andric if (MF->getDataLayout().getTypeAllocSize(T) > 4) 481*5f757f3fSDimitry Andric numRegs = 2; 482*5f757f3fSDimitry Andric if (ParamRegs < numRegs) 483*5f757f3fSDimitry Andric return; 484*5f757f3fSDimitry Andric ParamRegs -= numRegs; 485*5f757f3fSDimitry Andric Arg.IsInReg = true; 486*5f757f3fSDimitry Andric } 487*5f757f3fSDimitry Andric } 488*5f757f3fSDimitry Andric } 489*5f757f3fSDimitry Andric 490*5f757f3fSDimitry Andric const MCExpr * 491*5f757f3fSDimitry Andric X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 492*5f757f3fSDimitry Andric const MachineBasicBlock *MBB, 493*5f757f3fSDimitry Andric unsigned uid,MCContext &Ctx) const{ 494*5f757f3fSDimitry Andric assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); 495*5f757f3fSDimitry Andric // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF 496*5f757f3fSDimitry Andric // entries. 497*5f757f3fSDimitry Andric return MCSymbolRefExpr::create(MBB->getSymbol(), 498*5f757f3fSDimitry Andric MCSymbolRefExpr::VK_GOTOFF, Ctx); 499*5f757f3fSDimitry Andric } 500*5f757f3fSDimitry Andric 501*5f757f3fSDimitry Andric /// Returns relocation base for the given PIC jumptable. 502*5f757f3fSDimitry Andric SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, 503*5f757f3fSDimitry Andric SelectionDAG &DAG) const { 504*5f757f3fSDimitry Andric if (!Subtarget.is64Bit()) 505*5f757f3fSDimitry Andric // This doesn't have SDLoc associated with it, but is not really the 506*5f757f3fSDimitry Andric // same as a Register. 507*5f757f3fSDimitry Andric return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), 508*5f757f3fSDimitry Andric getPointerTy(DAG.getDataLayout())); 509*5f757f3fSDimitry Andric return Table; 510*5f757f3fSDimitry Andric } 511*5f757f3fSDimitry Andric 512*5f757f3fSDimitry Andric /// This returns the relocation base for the given PIC jumptable, 513*5f757f3fSDimitry Andric /// the same as getPICJumpTableRelocBase, but as an MCExpr. 514*5f757f3fSDimitry Andric const MCExpr *X86TargetLowering:: 515*5f757f3fSDimitry Andric getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, 516*5f757f3fSDimitry Andric MCContext &Ctx) const { 517*5f757f3fSDimitry Andric // X86-64 uses RIP relative addressing based on the jump table label. 518*5f757f3fSDimitry Andric if (Subtarget.isPICStyleRIPRel() || 519*5f757f3fSDimitry Andric (Subtarget.is64Bit() && 520*5f757f3fSDimitry Andric getTargetMachine().getCodeModel() == CodeModel::Large)) 521*5f757f3fSDimitry Andric return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 522*5f757f3fSDimitry Andric 523*5f757f3fSDimitry Andric // Otherwise, the reference is relative to the PIC base. 524*5f757f3fSDimitry Andric return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); 525*5f757f3fSDimitry Andric } 526*5f757f3fSDimitry Andric 527*5f757f3fSDimitry Andric std::pair<const TargetRegisterClass *, uint8_t> 528*5f757f3fSDimitry Andric X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, 529*5f757f3fSDimitry Andric MVT VT) const { 530*5f757f3fSDimitry Andric const TargetRegisterClass *RRC = nullptr; 531*5f757f3fSDimitry Andric uint8_t Cost = 1; 532*5f757f3fSDimitry Andric switch (VT.SimpleTy) { 533*5f757f3fSDimitry Andric default: 534*5f757f3fSDimitry Andric return TargetLowering::findRepresentativeClass(TRI, VT); 535*5f757f3fSDimitry Andric case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: 536*5f757f3fSDimitry Andric RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; 537*5f757f3fSDimitry Andric break; 538*5f757f3fSDimitry Andric case MVT::x86mmx: 539*5f757f3fSDimitry Andric RRC = &X86::VR64RegClass; 540*5f757f3fSDimitry Andric break; 541*5f757f3fSDimitry Andric case MVT::f32: case MVT::f64: 542*5f757f3fSDimitry Andric case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 543*5f757f3fSDimitry Andric case MVT::v4f32: case MVT::v2f64: 544*5f757f3fSDimitry Andric case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: 545*5f757f3fSDimitry Andric case MVT::v8f32: case MVT::v4f64: 546*5f757f3fSDimitry Andric case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: 547*5f757f3fSDimitry Andric case MVT::v16f32: case MVT::v8f64: 548*5f757f3fSDimitry Andric RRC = &X86::VR128XRegClass; 549*5f757f3fSDimitry Andric break; 550*5f757f3fSDimitry Andric } 551*5f757f3fSDimitry Andric return std::make_pair(RRC, Cost); 552*5f757f3fSDimitry Andric } 553*5f757f3fSDimitry Andric 554*5f757f3fSDimitry Andric unsigned X86TargetLowering::getAddressSpace() const { 555*5f757f3fSDimitry Andric if (Subtarget.is64Bit()) 556*5f757f3fSDimitry Andric return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257; 557*5f757f3fSDimitry Andric return 256; 558*5f757f3fSDimitry Andric } 559*5f757f3fSDimitry Andric 560*5f757f3fSDimitry Andric static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { 561*5f757f3fSDimitry Andric return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || 562*5f757f3fSDimitry Andric (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); 563*5f757f3fSDimitry Andric } 564*5f757f3fSDimitry Andric 565*5f757f3fSDimitry Andric static Constant* SegmentOffset(IRBuilderBase &IRB, 566*5f757f3fSDimitry Andric int Offset, unsigned AddressSpace) { 567*5f757f3fSDimitry Andric return ConstantExpr::getIntToPtr( 568*5f757f3fSDimitry Andric ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), 569*5f757f3fSDimitry Andric IRB.getPtrTy(AddressSpace)); 570*5f757f3fSDimitry Andric } 571*5f757f3fSDimitry Andric 572*5f757f3fSDimitry Andric Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { 573*5f757f3fSDimitry Andric // glibc, bionic, and Fuchsia have a special slot for the stack guard in 574*5f757f3fSDimitry Andric // tcbhead_t; use it instead of the usual global variable (see 575*5f757f3fSDimitry Andric // sysdeps/{i386,x86_64}/nptl/tls.h) 576*5f757f3fSDimitry Andric if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) { 577*5f757f3fSDimitry Andric unsigned AddressSpace = getAddressSpace(); 578*5f757f3fSDimitry Andric 579*5f757f3fSDimitry Andric // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. 580*5f757f3fSDimitry Andric if (Subtarget.isTargetFuchsia()) 581*5f757f3fSDimitry Andric return SegmentOffset(IRB, 0x10, AddressSpace); 582*5f757f3fSDimitry Andric 583*5f757f3fSDimitry Andric Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 584*5f757f3fSDimitry Andric // Specially, some users may customize the base reg and offset. 585*5f757f3fSDimitry Andric int Offset = M->getStackProtectorGuardOffset(); 586*5f757f3fSDimitry Andric // If we don't set -stack-protector-guard-offset value: 587*5f757f3fSDimitry Andric // %fs:0x28, unless we're using a Kernel code model, in which case 588*5f757f3fSDimitry Andric // it's %gs:0x28. gs:0x14 on i386. 589*5f757f3fSDimitry Andric if (Offset == INT_MAX) 590*5f757f3fSDimitry Andric Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; 591*5f757f3fSDimitry Andric 592*5f757f3fSDimitry Andric StringRef GuardReg = M->getStackProtectorGuardReg(); 593*5f757f3fSDimitry Andric if (GuardReg == "fs") 594*5f757f3fSDimitry Andric AddressSpace = X86AS::FS; 595*5f757f3fSDimitry Andric else if (GuardReg == "gs") 596*5f757f3fSDimitry Andric AddressSpace = X86AS::GS; 597*5f757f3fSDimitry Andric 598*5f757f3fSDimitry Andric // Use symbol guard if user specify. 599*5f757f3fSDimitry Andric StringRef GuardSymb = M->getStackProtectorGuardSymbol(); 600*5f757f3fSDimitry Andric if (!GuardSymb.empty()) { 601*5f757f3fSDimitry Andric GlobalVariable *GV = M->getGlobalVariable(GuardSymb); 602*5f757f3fSDimitry Andric if (!GV) { 603*5f757f3fSDimitry Andric Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext()) 604*5f757f3fSDimitry Andric : Type::getInt32Ty(M->getContext()); 605*5f757f3fSDimitry Andric GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 606*5f757f3fSDimitry Andric nullptr, GuardSymb, nullptr, 607*5f757f3fSDimitry Andric GlobalValue::NotThreadLocal, AddressSpace); 608*5f757f3fSDimitry Andric if (!Subtarget.isTargetDarwin()) 609*5f757f3fSDimitry Andric GV->setDSOLocal(M->getDirectAccessExternalData()); 610*5f757f3fSDimitry Andric } 611*5f757f3fSDimitry Andric return GV; 612*5f757f3fSDimitry Andric } 613*5f757f3fSDimitry Andric 614*5f757f3fSDimitry Andric return SegmentOffset(IRB, Offset, AddressSpace); 615*5f757f3fSDimitry Andric } 616*5f757f3fSDimitry Andric return TargetLowering::getIRStackGuard(IRB); 617*5f757f3fSDimitry Andric } 618*5f757f3fSDimitry Andric 619*5f757f3fSDimitry Andric void X86TargetLowering::insertSSPDeclarations(Module &M) const { 620*5f757f3fSDimitry Andric // MSVC CRT provides functionalities for stack protection. 621*5f757f3fSDimitry Andric if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || 622*5f757f3fSDimitry Andric Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { 623*5f757f3fSDimitry Andric // MSVC CRT has a global variable holding security cookie. 624*5f757f3fSDimitry Andric M.getOrInsertGlobal("__security_cookie", 625*5f757f3fSDimitry Andric PointerType::getUnqual(M.getContext())); 626*5f757f3fSDimitry Andric 627*5f757f3fSDimitry Andric // MSVC CRT has a function to validate security cookie. 628*5f757f3fSDimitry Andric FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( 629*5f757f3fSDimitry Andric "__security_check_cookie", Type::getVoidTy(M.getContext()), 630*5f757f3fSDimitry Andric PointerType::getUnqual(M.getContext())); 631*5f757f3fSDimitry Andric if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { 632*5f757f3fSDimitry Andric F->setCallingConv(CallingConv::X86_FastCall); 633*5f757f3fSDimitry Andric F->addParamAttr(0, Attribute::AttrKind::InReg); 634*5f757f3fSDimitry Andric } 635*5f757f3fSDimitry Andric return; 636*5f757f3fSDimitry Andric } 637*5f757f3fSDimitry Andric 638*5f757f3fSDimitry Andric StringRef GuardMode = M.getStackProtectorGuard(); 639*5f757f3fSDimitry Andric 640*5f757f3fSDimitry Andric // glibc, bionic, and Fuchsia have a special slot for the stack guard. 641*5f757f3fSDimitry Andric if ((GuardMode == "tls" || GuardMode.empty()) && 642*5f757f3fSDimitry Andric hasStackGuardSlotTLS(Subtarget.getTargetTriple())) 643*5f757f3fSDimitry Andric return; 644*5f757f3fSDimitry Andric TargetLowering::insertSSPDeclarations(M); 645*5f757f3fSDimitry Andric } 646*5f757f3fSDimitry Andric 647*5f757f3fSDimitry Andric Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { 648*5f757f3fSDimitry Andric // MSVC CRT has a global variable holding security cookie. 649*5f757f3fSDimitry Andric if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || 650*5f757f3fSDimitry Andric Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { 651*5f757f3fSDimitry Andric return M.getGlobalVariable("__security_cookie"); 652*5f757f3fSDimitry Andric } 653*5f757f3fSDimitry Andric return TargetLowering::getSDagStackGuard(M); 654*5f757f3fSDimitry Andric } 655*5f757f3fSDimitry Andric 656*5f757f3fSDimitry Andric Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { 657*5f757f3fSDimitry Andric // MSVC CRT has a function to validate security cookie. 658*5f757f3fSDimitry Andric if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || 659*5f757f3fSDimitry Andric Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { 660*5f757f3fSDimitry Andric return M.getFunction("__security_check_cookie"); 661*5f757f3fSDimitry Andric } 662*5f757f3fSDimitry Andric return TargetLowering::getSSPStackGuardCheck(M); 663*5f757f3fSDimitry Andric } 664*5f757f3fSDimitry Andric 665*5f757f3fSDimitry Andric Value * 666*5f757f3fSDimitry Andric X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { 667*5f757f3fSDimitry Andric // Android provides a fixed TLS slot for the SafeStack pointer. See the 668*5f757f3fSDimitry Andric // definition of TLS_SLOT_SAFESTACK in 669*5f757f3fSDimitry Andric // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h 670*5f757f3fSDimitry Andric if (Subtarget.isTargetAndroid()) { 671*5f757f3fSDimitry Andric // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: 672*5f757f3fSDimitry Andric // %gs:0x24 on i386 673*5f757f3fSDimitry Andric int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; 674*5f757f3fSDimitry Andric return SegmentOffset(IRB, Offset, getAddressSpace()); 675*5f757f3fSDimitry Andric } 676*5f757f3fSDimitry Andric 677*5f757f3fSDimitry Andric // Fuchsia is similar. 678*5f757f3fSDimitry Andric if (Subtarget.isTargetFuchsia()) { 679*5f757f3fSDimitry Andric // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value. 680*5f757f3fSDimitry Andric return SegmentOffset(IRB, 0x18, getAddressSpace()); 681*5f757f3fSDimitry Andric } 682*5f757f3fSDimitry Andric 683*5f757f3fSDimitry Andric return TargetLowering::getSafeStackPointerLocation(IRB); 684*5f757f3fSDimitry Andric } 685*5f757f3fSDimitry Andric 686*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 687*5f757f3fSDimitry Andric // Return Value Calling Convention Implementation 688*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 689*5f757f3fSDimitry Andric 690*5f757f3fSDimitry Andric bool X86TargetLowering::CanLowerReturn( 691*5f757f3fSDimitry Andric CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, 692*5f757f3fSDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 693*5f757f3fSDimitry Andric SmallVector<CCValAssign, 16> RVLocs; 694*5f757f3fSDimitry Andric CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 695*5f757f3fSDimitry Andric return CCInfo.CheckReturn(Outs, RetCC_X86); 696*5f757f3fSDimitry Andric } 697*5f757f3fSDimitry Andric 698*5f757f3fSDimitry Andric const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { 699*5f757f3fSDimitry Andric static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; 700*5f757f3fSDimitry Andric return ScratchRegs; 701*5f757f3fSDimitry Andric } 702*5f757f3fSDimitry Andric 703*5f757f3fSDimitry Andric ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const { 704*5f757f3fSDimitry Andric // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit 705*5f757f3fSDimitry Andric // tests at the moment, which is not what we expected. 706*5f757f3fSDimitry Andric static const MCPhysReg RCRegs[] = {X86::MXCSR}; 707*5f757f3fSDimitry Andric return RCRegs; 708*5f757f3fSDimitry Andric } 709*5f757f3fSDimitry Andric 710*5f757f3fSDimitry Andric /// Lowers masks values (v*i1) to the local register values 711*5f757f3fSDimitry Andric /// \returns DAG node after lowering to register type 712*5f757f3fSDimitry Andric static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, 713*5f757f3fSDimitry Andric const SDLoc &DL, SelectionDAG &DAG) { 714*5f757f3fSDimitry Andric EVT ValVT = ValArg.getValueType(); 715*5f757f3fSDimitry Andric 716*5f757f3fSDimitry Andric if (ValVT == MVT::v1i1) 717*5f757f3fSDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg, 718*5f757f3fSDimitry Andric DAG.getIntPtrConstant(0, DL)); 719*5f757f3fSDimitry Andric 720*5f757f3fSDimitry Andric if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || 721*5f757f3fSDimitry Andric (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { 722*5f757f3fSDimitry Andric // Two stage lowering might be required 723*5f757f3fSDimitry Andric // bitcast: v8i1 -> i8 / v16i1 -> i16 724*5f757f3fSDimitry Andric // anyextend: i8 -> i32 / i16 -> i32 725*5f757f3fSDimitry Andric EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16; 726*5f757f3fSDimitry Andric SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg); 727*5f757f3fSDimitry Andric if (ValLoc == MVT::i32) 728*5f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy); 729*5f757f3fSDimitry Andric return ValToCopy; 730*5f757f3fSDimitry Andric } 731*5f757f3fSDimitry Andric 732*5f757f3fSDimitry Andric if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || 733*5f757f3fSDimitry Andric (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { 734*5f757f3fSDimitry Andric // One stage lowering is required 735*5f757f3fSDimitry Andric // bitcast: v32i1 -> i32 / v64i1 -> i64 736*5f757f3fSDimitry Andric return DAG.getBitcast(ValLoc, ValArg); 737*5f757f3fSDimitry Andric } 738*5f757f3fSDimitry Andric 739*5f757f3fSDimitry Andric return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg); 740*5f757f3fSDimitry Andric } 741*5f757f3fSDimitry Andric 742*5f757f3fSDimitry Andric /// Breaks v64i1 value into two registers and adds the new node to the DAG 743*5f757f3fSDimitry Andric static void Passv64i1ArgInRegs( 744*5f757f3fSDimitry Andric const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, 745*5f757f3fSDimitry Andric SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA, 746*5f757f3fSDimitry Andric CCValAssign &NextVA, const X86Subtarget &Subtarget) { 747*5f757f3fSDimitry Andric assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); 748*5f757f3fSDimitry Andric assert(Subtarget.is32Bit() && "Expecting 32 bit target"); 749*5f757f3fSDimitry Andric assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"); 750*5f757f3fSDimitry Andric assert(VA.isRegLoc() && NextVA.isRegLoc() && 751*5f757f3fSDimitry Andric "The value should reside in two registers"); 752*5f757f3fSDimitry Andric 753*5f757f3fSDimitry Andric // Before splitting the value we cast it to i64 754*5f757f3fSDimitry Andric Arg = DAG.getBitcast(MVT::i64, Arg); 755*5f757f3fSDimitry Andric 756*5f757f3fSDimitry Andric // Splitting the value into two i32 types 757*5f757f3fSDimitry Andric SDValue Lo, Hi; 758*5f757f3fSDimitry Andric std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32); 759*5f757f3fSDimitry Andric 760*5f757f3fSDimitry Andric // Attach the two i32 types into corresponding registers 761*5f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); 762*5f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi)); 763*5f757f3fSDimitry Andric } 764*5f757f3fSDimitry Andric 765*5f757f3fSDimitry Andric SDValue 766*5f757f3fSDimitry Andric X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 767*5f757f3fSDimitry Andric bool isVarArg, 768*5f757f3fSDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, 769*5f757f3fSDimitry Andric const SmallVectorImpl<SDValue> &OutVals, 770*5f757f3fSDimitry Andric const SDLoc &dl, SelectionDAG &DAG) const { 771*5f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 772*5f757f3fSDimitry Andric X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 773*5f757f3fSDimitry Andric 774*5f757f3fSDimitry Andric // In some cases we need to disable registers from the default CSR list. 775*5f757f3fSDimitry Andric // For example, when they are used as return registers (preserve_* and X86's 776*5f757f3fSDimitry Andric // regcall) or for argument passing (X86's regcall). 777*5f757f3fSDimitry Andric bool ShouldDisableCalleeSavedRegister = 778*5f757f3fSDimitry Andric shouldDisableRetRegFromCSR(CallConv) || 779*5f757f3fSDimitry Andric MF.getFunction().hasFnAttribute("no_caller_saved_registers"); 780*5f757f3fSDimitry Andric 781*5f757f3fSDimitry Andric if (CallConv == CallingConv::X86_INTR && !Outs.empty()) 782*5f757f3fSDimitry Andric report_fatal_error("X86 interrupts may not return any value"); 783*5f757f3fSDimitry Andric 784*5f757f3fSDimitry Andric SmallVector<CCValAssign, 16> RVLocs; 785*5f757f3fSDimitry Andric CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); 786*5f757f3fSDimitry Andric CCInfo.AnalyzeReturn(Outs, RetCC_X86); 787*5f757f3fSDimitry Andric 788*5f757f3fSDimitry Andric SmallVector<std::pair<Register, SDValue>, 4> RetVals; 789*5f757f3fSDimitry Andric for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E; 790*5f757f3fSDimitry Andric ++I, ++OutsIndex) { 791*5f757f3fSDimitry Andric CCValAssign &VA = RVLocs[I]; 792*5f757f3fSDimitry Andric assert(VA.isRegLoc() && "Can only return in registers!"); 793*5f757f3fSDimitry Andric 794*5f757f3fSDimitry Andric // Add the register to the CalleeSaveDisableRegs list. 795*5f757f3fSDimitry Andric if (ShouldDisableCalleeSavedRegister) 796*5f757f3fSDimitry Andric MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); 797*5f757f3fSDimitry Andric 798*5f757f3fSDimitry Andric SDValue ValToCopy = OutVals[OutsIndex]; 799*5f757f3fSDimitry Andric EVT ValVT = ValToCopy.getValueType(); 800*5f757f3fSDimitry Andric 801*5f757f3fSDimitry Andric // Promote values to the appropriate types. 802*5f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::SExt) 803*5f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); 804*5f757f3fSDimitry Andric else if (VA.getLocInfo() == CCValAssign::ZExt) 805*5f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); 806*5f757f3fSDimitry Andric else if (VA.getLocInfo() == CCValAssign::AExt) { 807*5f757f3fSDimitry Andric if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) 808*5f757f3fSDimitry Andric ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG); 809*5f757f3fSDimitry Andric else 810*5f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); 811*5f757f3fSDimitry Andric } 812*5f757f3fSDimitry Andric else if (VA.getLocInfo() == CCValAssign::BCvt) 813*5f757f3fSDimitry Andric ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); 814*5f757f3fSDimitry Andric 815*5f757f3fSDimitry Andric assert(VA.getLocInfo() != CCValAssign::FPExt && 816*5f757f3fSDimitry Andric "Unexpected FP-extend for return value."); 817*5f757f3fSDimitry Andric 818*5f757f3fSDimitry Andric // Report an error if we have attempted to return a value via an XMM 819*5f757f3fSDimitry Andric // register and SSE was disabled. 820*5f757f3fSDimitry Andric if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { 821*5f757f3fSDimitry Andric errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); 822*5f757f3fSDimitry Andric VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 823*5f757f3fSDimitry Andric } else if (!Subtarget.hasSSE2() && 824*5f757f3fSDimitry Andric X86::FR64XRegClass.contains(VA.getLocReg()) && 825*5f757f3fSDimitry Andric ValVT == MVT::f64) { 826*5f757f3fSDimitry Andric // When returning a double via an XMM register, report an error if SSE2 is 827*5f757f3fSDimitry Andric // not enabled. 828*5f757f3fSDimitry Andric errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); 829*5f757f3fSDimitry Andric VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 830*5f757f3fSDimitry Andric } 831*5f757f3fSDimitry Andric 832*5f757f3fSDimitry Andric // Returns in ST0/ST1 are handled specially: these are pushed as operands to 833*5f757f3fSDimitry Andric // the RET instruction and handled by the FP Stackifier. 834*5f757f3fSDimitry Andric if (VA.getLocReg() == X86::FP0 || 835*5f757f3fSDimitry Andric VA.getLocReg() == X86::FP1) { 836*5f757f3fSDimitry Andric // If this is a copy from an xmm register to ST(0), use an FPExtend to 837*5f757f3fSDimitry Andric // change the value to the FP stack register class. 838*5f757f3fSDimitry Andric if (isScalarFPTypeInSSEReg(VA.getValVT())) 839*5f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); 840*5f757f3fSDimitry Andric RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); 841*5f757f3fSDimitry Andric // Don't emit a copytoreg. 842*5f757f3fSDimitry Andric continue; 843*5f757f3fSDimitry Andric } 844*5f757f3fSDimitry Andric 845*5f757f3fSDimitry Andric // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 846*5f757f3fSDimitry Andric // which is returned in RAX / RDX. 847*5f757f3fSDimitry Andric if (Subtarget.is64Bit()) { 848*5f757f3fSDimitry Andric if (ValVT == MVT::x86mmx) { 849*5f757f3fSDimitry Andric if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { 850*5f757f3fSDimitry Andric ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); 851*5f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, 852*5f757f3fSDimitry Andric ValToCopy); 853*5f757f3fSDimitry Andric // If we don't have SSE2 available, convert to v4f32 so the generated 854*5f757f3fSDimitry Andric // register is legal. 855*5f757f3fSDimitry Andric if (!Subtarget.hasSSE2()) 856*5f757f3fSDimitry Andric ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); 857*5f757f3fSDimitry Andric } 858*5f757f3fSDimitry Andric } 859*5f757f3fSDimitry Andric } 860*5f757f3fSDimitry Andric 861*5f757f3fSDimitry Andric if (VA.needsCustom()) { 862*5f757f3fSDimitry Andric assert(VA.getValVT() == MVT::v64i1 && 863*5f757f3fSDimitry Andric "Currently the only custom case is when we split v64i1 to 2 regs"); 864*5f757f3fSDimitry Andric 865*5f757f3fSDimitry Andric Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I], 866*5f757f3fSDimitry Andric Subtarget); 867*5f757f3fSDimitry Andric 868*5f757f3fSDimitry Andric // Add the second register to the CalleeSaveDisableRegs list. 869*5f757f3fSDimitry Andric if (ShouldDisableCalleeSavedRegister) 870*5f757f3fSDimitry Andric MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); 871*5f757f3fSDimitry Andric } else { 872*5f757f3fSDimitry Andric RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); 873*5f757f3fSDimitry Andric } 874*5f757f3fSDimitry Andric } 875*5f757f3fSDimitry Andric 876*5f757f3fSDimitry Andric SDValue Glue; 877*5f757f3fSDimitry Andric SmallVector<SDValue, 6> RetOps; 878*5f757f3fSDimitry Andric RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 879*5f757f3fSDimitry Andric // Operand #1 = Bytes To Pop 880*5f757f3fSDimitry Andric RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, 881*5f757f3fSDimitry Andric MVT::i32)); 882*5f757f3fSDimitry Andric 883*5f757f3fSDimitry Andric // Copy the result values into the output registers. 884*5f757f3fSDimitry Andric for (auto &RetVal : RetVals) { 885*5f757f3fSDimitry Andric if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) { 886*5f757f3fSDimitry Andric RetOps.push_back(RetVal.second); 887*5f757f3fSDimitry Andric continue; // Don't emit a copytoreg. 888*5f757f3fSDimitry Andric } 889*5f757f3fSDimitry Andric 890*5f757f3fSDimitry Andric Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue); 891*5f757f3fSDimitry Andric Glue = Chain.getValue(1); 892*5f757f3fSDimitry Andric RetOps.push_back( 893*5f757f3fSDimitry Andric DAG.getRegister(RetVal.first, RetVal.second.getValueType())); 894*5f757f3fSDimitry Andric } 895*5f757f3fSDimitry Andric 896*5f757f3fSDimitry Andric // Swift calling convention does not require we copy the sret argument 897*5f757f3fSDimitry Andric // into %rax/%eax for the return, and SRetReturnReg is not set for Swift. 898*5f757f3fSDimitry Andric 899*5f757f3fSDimitry Andric // All x86 ABIs require that for returning structs by value we copy 900*5f757f3fSDimitry Andric // the sret argument into %rax/%eax (depending on ABI) for the return. 901*5f757f3fSDimitry Andric // We saved the argument into a virtual register in the entry block, 902*5f757f3fSDimitry Andric // so now we copy the value out and into %rax/%eax. 903*5f757f3fSDimitry Andric // 904*5f757f3fSDimitry Andric // Checking Function.hasStructRetAttr() here is insufficient because the IR 905*5f757f3fSDimitry Andric // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is 906*5f757f3fSDimitry Andric // false, then an sret argument may be implicitly inserted in the SelDAG. In 907*5f757f3fSDimitry Andric // either case FuncInfo->setSRetReturnReg() will have been called. 908*5f757f3fSDimitry Andric if (Register SRetReg = FuncInfo->getSRetReturnReg()) { 909*5f757f3fSDimitry Andric // When we have both sret and another return value, we should use the 910*5f757f3fSDimitry Andric // original Chain stored in RetOps[0], instead of the current Chain updated 911*5f757f3fSDimitry Andric // in the above loop. If we only have sret, RetOps[0] equals to Chain. 912*5f757f3fSDimitry Andric 913*5f757f3fSDimitry Andric // For the case of sret and another return value, we have 914*5f757f3fSDimitry Andric // Chain_0 at the function entry 915*5f757f3fSDimitry Andric // Chain_1 = getCopyToReg(Chain_0) in the above loop 916*5f757f3fSDimitry Andric // If we use Chain_1 in getCopyFromReg, we will have 917*5f757f3fSDimitry Andric // Val = getCopyFromReg(Chain_1) 918*5f757f3fSDimitry Andric // Chain_2 = getCopyToReg(Chain_1, Val) from below 919*5f757f3fSDimitry Andric 920*5f757f3fSDimitry Andric // getCopyToReg(Chain_0) will be glued together with 921*5f757f3fSDimitry Andric // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be 922*5f757f3fSDimitry Andric // in Unit B, and we will have cyclic dependency between Unit A and Unit B: 923*5f757f3fSDimitry Andric // Data dependency from Unit B to Unit A due to usage of Val in 924*5f757f3fSDimitry Andric // getCopyToReg(Chain_1, Val) 925*5f757f3fSDimitry Andric // Chain dependency from Unit A to Unit B 926*5f757f3fSDimitry Andric 927*5f757f3fSDimitry Andric // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg. 928*5f757f3fSDimitry Andric SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, 929*5f757f3fSDimitry Andric getPointerTy(MF.getDataLayout())); 930*5f757f3fSDimitry Andric 931*5f757f3fSDimitry Andric Register RetValReg 932*5f757f3fSDimitry Andric = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? 933*5f757f3fSDimitry Andric X86::RAX : X86::EAX; 934*5f757f3fSDimitry Andric Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue); 935*5f757f3fSDimitry Andric Glue = Chain.getValue(1); 936*5f757f3fSDimitry Andric 937*5f757f3fSDimitry Andric // RAX/EAX now acts like a return value. 938*5f757f3fSDimitry Andric RetOps.push_back( 939*5f757f3fSDimitry Andric DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); 940*5f757f3fSDimitry Andric 941*5f757f3fSDimitry Andric // Add the returned register to the CalleeSaveDisableRegs list. Don't do 942*5f757f3fSDimitry Andric // this however for preserve_most/preserve_all to minimize the number of 943*5f757f3fSDimitry Andric // callee-saved registers for these CCs. 944*5f757f3fSDimitry Andric if (ShouldDisableCalleeSavedRegister && 945*5f757f3fSDimitry Andric CallConv != CallingConv::PreserveAll && 946*5f757f3fSDimitry Andric CallConv != CallingConv::PreserveMost) 947*5f757f3fSDimitry Andric MF.getRegInfo().disableCalleeSavedRegister(RetValReg); 948*5f757f3fSDimitry Andric } 949*5f757f3fSDimitry Andric 950*5f757f3fSDimitry Andric const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); 951*5f757f3fSDimitry Andric const MCPhysReg *I = 952*5f757f3fSDimitry Andric TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 953*5f757f3fSDimitry Andric if (I) { 954*5f757f3fSDimitry Andric for (; *I; ++I) { 955*5f757f3fSDimitry Andric if (X86::GR64RegClass.contains(*I)) 956*5f757f3fSDimitry Andric RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 957*5f757f3fSDimitry Andric else 958*5f757f3fSDimitry Andric llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 959*5f757f3fSDimitry Andric } 960*5f757f3fSDimitry Andric } 961*5f757f3fSDimitry Andric 962*5f757f3fSDimitry Andric RetOps[0] = Chain; // Update chain. 963*5f757f3fSDimitry Andric 964*5f757f3fSDimitry Andric // Add the glue if we have it. 965*5f757f3fSDimitry Andric if (Glue.getNode()) 966*5f757f3fSDimitry Andric RetOps.push_back(Glue); 967*5f757f3fSDimitry Andric 968*5f757f3fSDimitry Andric X86ISD::NodeType opcode = X86ISD::RET_GLUE; 969*5f757f3fSDimitry Andric if (CallConv == CallingConv::X86_INTR) 970*5f757f3fSDimitry Andric opcode = X86ISD::IRET; 971*5f757f3fSDimitry Andric return DAG.getNode(opcode, dl, MVT::Other, RetOps); 972*5f757f3fSDimitry Andric } 973*5f757f3fSDimitry Andric 974*5f757f3fSDimitry Andric bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 975*5f757f3fSDimitry Andric if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0)) 976*5f757f3fSDimitry Andric return false; 977*5f757f3fSDimitry Andric 978*5f757f3fSDimitry Andric SDValue TCChain = Chain; 979*5f757f3fSDimitry Andric SDNode *Copy = *N->use_begin(); 980*5f757f3fSDimitry Andric if (Copy->getOpcode() == ISD::CopyToReg) { 981*5f757f3fSDimitry Andric // If the copy has a glue operand, we conservatively assume it isn't safe to 982*5f757f3fSDimitry Andric // perform a tail call. 983*5f757f3fSDimitry Andric if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 984*5f757f3fSDimitry Andric return false; 985*5f757f3fSDimitry Andric TCChain = Copy->getOperand(0); 986*5f757f3fSDimitry Andric } else if (Copy->getOpcode() != ISD::FP_EXTEND) 987*5f757f3fSDimitry Andric return false; 988*5f757f3fSDimitry Andric 989*5f757f3fSDimitry Andric bool HasRet = false; 990*5f757f3fSDimitry Andric for (const SDNode *U : Copy->uses()) { 991*5f757f3fSDimitry Andric if (U->getOpcode() != X86ISD::RET_GLUE) 992*5f757f3fSDimitry Andric return false; 993*5f757f3fSDimitry Andric // If we are returning more than one value, we can definitely 994*5f757f3fSDimitry Andric // not make a tail call see PR19530 995*5f757f3fSDimitry Andric if (U->getNumOperands() > 4) 996*5f757f3fSDimitry Andric return false; 997*5f757f3fSDimitry Andric if (U->getNumOperands() == 4 && 998*5f757f3fSDimitry Andric U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue) 999*5f757f3fSDimitry Andric return false; 1000*5f757f3fSDimitry Andric HasRet = true; 1001*5f757f3fSDimitry Andric } 1002*5f757f3fSDimitry Andric 1003*5f757f3fSDimitry Andric if (!HasRet) 1004*5f757f3fSDimitry Andric return false; 1005*5f757f3fSDimitry Andric 1006*5f757f3fSDimitry Andric Chain = TCChain; 1007*5f757f3fSDimitry Andric return true; 1008*5f757f3fSDimitry Andric } 1009*5f757f3fSDimitry Andric 1010*5f757f3fSDimitry Andric EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, 1011*5f757f3fSDimitry Andric ISD::NodeType ExtendKind) const { 1012*5f757f3fSDimitry Andric MVT ReturnMVT = MVT::i32; 1013*5f757f3fSDimitry Andric 1014*5f757f3fSDimitry Andric bool Darwin = Subtarget.getTargetTriple().isOSDarwin(); 1015*5f757f3fSDimitry Andric if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) { 1016*5f757f3fSDimitry Andric // The ABI does not require i1, i8 or i16 to be extended. 1017*5f757f3fSDimitry Andric // 1018*5f757f3fSDimitry Andric // On Darwin, there is code in the wild relying on Clang's old behaviour of 1019*5f757f3fSDimitry Andric // always extending i8/i16 return values, so keep doing that for now. 1020*5f757f3fSDimitry Andric // (PR26665). 1021*5f757f3fSDimitry Andric ReturnMVT = MVT::i8; 1022*5f757f3fSDimitry Andric } 1023*5f757f3fSDimitry Andric 1024*5f757f3fSDimitry Andric EVT MinVT = getRegisterType(Context, ReturnMVT); 1025*5f757f3fSDimitry Andric return VT.bitsLT(MinVT) ? MinVT : VT; 1026*5f757f3fSDimitry Andric } 1027*5f757f3fSDimitry Andric 1028*5f757f3fSDimitry Andric /// Reads two 32 bit registers and creates a 64 bit mask value. 1029*5f757f3fSDimitry Andric /// \param VA The current 32 bit value that need to be assigned. 1030*5f757f3fSDimitry Andric /// \param NextVA The next 32 bit value that need to be assigned. 1031*5f757f3fSDimitry Andric /// \param Root The parent DAG node. 1032*5f757f3fSDimitry Andric /// \param [in,out] InGlue Represents SDvalue in the parent DAG node for 1033*5f757f3fSDimitry Andric /// glue purposes. In the case the DAG is already using 1034*5f757f3fSDimitry Andric /// physical register instead of virtual, we should glue 1035*5f757f3fSDimitry Andric /// our new SDValue to InGlue SDvalue. 1036*5f757f3fSDimitry Andric /// \return a new SDvalue of size 64bit. 1037*5f757f3fSDimitry Andric static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, 1038*5f757f3fSDimitry Andric SDValue &Root, SelectionDAG &DAG, 1039*5f757f3fSDimitry Andric const SDLoc &DL, const X86Subtarget &Subtarget, 1040*5f757f3fSDimitry Andric SDValue *InGlue = nullptr) { 1041*5f757f3fSDimitry Andric assert((Subtarget.hasBWI()) && "Expected AVX512BW target!"); 1042*5f757f3fSDimitry Andric assert(Subtarget.is32Bit() && "Expecting 32 bit target"); 1043*5f757f3fSDimitry Andric assert(VA.getValVT() == MVT::v64i1 && 1044*5f757f3fSDimitry Andric "Expecting first location of 64 bit width type"); 1045*5f757f3fSDimitry Andric assert(NextVA.getValVT() == VA.getValVT() && 1046*5f757f3fSDimitry Andric "The locations should have the same type"); 1047*5f757f3fSDimitry Andric assert(VA.isRegLoc() && NextVA.isRegLoc() && 1048*5f757f3fSDimitry Andric "The values should reside in two registers"); 1049*5f757f3fSDimitry Andric 1050*5f757f3fSDimitry Andric SDValue Lo, Hi; 1051*5f757f3fSDimitry Andric SDValue ArgValueLo, ArgValueHi; 1052*5f757f3fSDimitry Andric 1053*5f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 1054*5f757f3fSDimitry Andric const TargetRegisterClass *RC = &X86::GR32RegClass; 1055*5f757f3fSDimitry Andric 1056*5f757f3fSDimitry Andric // Read a 32 bit value from the registers. 1057*5f757f3fSDimitry Andric if (nullptr == InGlue) { 1058*5f757f3fSDimitry Andric // When no physical register is present, 1059*5f757f3fSDimitry Andric // create an intermediate virtual register. 1060*5f757f3fSDimitry Andric Register Reg = MF.addLiveIn(VA.getLocReg(), RC); 1061*5f757f3fSDimitry Andric ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32); 1062*5f757f3fSDimitry Andric Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 1063*5f757f3fSDimitry Andric ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32); 1064*5f757f3fSDimitry Andric } else { 1065*5f757f3fSDimitry Andric // When a physical register is available read the value from it and glue 1066*5f757f3fSDimitry Andric // the reads together. 1067*5f757f3fSDimitry Andric ArgValueLo = 1068*5f757f3fSDimitry Andric DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue); 1069*5f757f3fSDimitry Andric *InGlue = ArgValueLo.getValue(2); 1070*5f757f3fSDimitry Andric ArgValueHi = 1071*5f757f3fSDimitry Andric DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue); 1072*5f757f3fSDimitry Andric *InGlue = ArgValueHi.getValue(2); 1073*5f757f3fSDimitry Andric } 1074*5f757f3fSDimitry Andric 1075*5f757f3fSDimitry Andric // Convert the i32 type into v32i1 type. 1076*5f757f3fSDimitry Andric Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo); 1077*5f757f3fSDimitry Andric 1078*5f757f3fSDimitry Andric // Convert the i32 type into v32i1 type. 1079*5f757f3fSDimitry Andric Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi); 1080*5f757f3fSDimitry Andric 1081*5f757f3fSDimitry Andric // Concatenate the two values together. 1082*5f757f3fSDimitry Andric return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi); 1083*5f757f3fSDimitry Andric } 1084*5f757f3fSDimitry Andric 1085*5f757f3fSDimitry Andric /// The function will lower a register of various sizes (8/16/32/64) 1086*5f757f3fSDimitry Andric /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1) 1087*5f757f3fSDimitry Andric /// \returns a DAG node contains the operand after lowering to mask type. 1088*5f757f3fSDimitry Andric static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, 1089*5f757f3fSDimitry Andric const EVT &ValLoc, const SDLoc &DL, 1090*5f757f3fSDimitry Andric SelectionDAG &DAG) { 1091*5f757f3fSDimitry Andric SDValue ValReturned = ValArg; 1092*5f757f3fSDimitry Andric 1093*5f757f3fSDimitry Andric if (ValVT == MVT::v1i1) 1094*5f757f3fSDimitry Andric return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned); 1095*5f757f3fSDimitry Andric 1096*5f757f3fSDimitry Andric if (ValVT == MVT::v64i1) { 1097*5f757f3fSDimitry Andric // In 32 bit machine, this case is handled by getv64i1Argument 1098*5f757f3fSDimitry Andric assert(ValLoc == MVT::i64 && "Expecting only i64 locations"); 1099*5f757f3fSDimitry Andric // In 64 bit machine, There is no need to truncate the value only bitcast 1100*5f757f3fSDimitry Andric } else { 1101*5f757f3fSDimitry Andric MVT MaskLenVT; 1102*5f757f3fSDimitry Andric switch (ValVT.getSimpleVT().SimpleTy) { 1103*5f757f3fSDimitry Andric case MVT::v8i1: 1104*5f757f3fSDimitry Andric MaskLenVT = MVT::i8; 1105*5f757f3fSDimitry Andric break; 1106*5f757f3fSDimitry Andric case MVT::v16i1: 1107*5f757f3fSDimitry Andric MaskLenVT = MVT::i16; 1108*5f757f3fSDimitry Andric break; 1109*5f757f3fSDimitry Andric case MVT::v32i1: 1110*5f757f3fSDimitry Andric MaskLenVT = MVT::i32; 1111*5f757f3fSDimitry Andric break; 1112*5f757f3fSDimitry Andric default: 1113*5f757f3fSDimitry Andric llvm_unreachable("Expecting a vector of i1 types"); 1114*5f757f3fSDimitry Andric } 1115*5f757f3fSDimitry Andric 1116*5f757f3fSDimitry Andric ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned); 1117*5f757f3fSDimitry Andric } 1118*5f757f3fSDimitry Andric return DAG.getBitcast(ValVT, ValReturned); 1119*5f757f3fSDimitry Andric } 1120*5f757f3fSDimitry Andric 1121*5f757f3fSDimitry Andric /// Lower the result values of a call into the 1122*5f757f3fSDimitry Andric /// appropriate copies out of appropriate physical registers. 1123*5f757f3fSDimitry Andric /// 1124*5f757f3fSDimitry Andric SDValue X86TargetLowering::LowerCallResult( 1125*5f757f3fSDimitry Andric SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, 1126*5f757f3fSDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 1127*5f757f3fSDimitry Andric SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 1128*5f757f3fSDimitry Andric uint32_t *RegMask) const { 1129*5f757f3fSDimitry Andric 1130*5f757f3fSDimitry Andric const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1131*5f757f3fSDimitry Andric // Assign locations to each value returned by this call. 1132*5f757f3fSDimitry Andric SmallVector<CCValAssign, 16> RVLocs; 1133*5f757f3fSDimitry Andric CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 1134*5f757f3fSDimitry Andric *DAG.getContext()); 1135*5f757f3fSDimitry Andric CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 1136*5f757f3fSDimitry Andric 1137*5f757f3fSDimitry Andric // Copy all of the result registers out of their specified physreg. 1138*5f757f3fSDimitry Andric for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E; 1139*5f757f3fSDimitry Andric ++I, ++InsIndex) { 1140*5f757f3fSDimitry Andric CCValAssign &VA = RVLocs[I]; 1141*5f757f3fSDimitry Andric EVT CopyVT = VA.getLocVT(); 1142*5f757f3fSDimitry Andric 1143*5f757f3fSDimitry Andric // In some calling conventions we need to remove the used registers 1144*5f757f3fSDimitry Andric // from the register mask. 1145*5f757f3fSDimitry Andric if (RegMask) { 1146*5f757f3fSDimitry Andric for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg())) 1147*5f757f3fSDimitry Andric RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); 1148*5f757f3fSDimitry Andric } 1149*5f757f3fSDimitry Andric 1150*5f757f3fSDimitry Andric // Report an error if there was an attempt to return FP values via XMM 1151*5f757f3fSDimitry Andric // registers. 1152*5f757f3fSDimitry Andric if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { 1153*5f757f3fSDimitry Andric errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); 1154*5f757f3fSDimitry Andric if (VA.getLocReg() == X86::XMM1) 1155*5f757f3fSDimitry Andric VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts. 1156*5f757f3fSDimitry Andric else 1157*5f757f3fSDimitry Andric VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 1158*5f757f3fSDimitry Andric } else if (!Subtarget.hasSSE2() && 1159*5f757f3fSDimitry Andric X86::FR64XRegClass.contains(VA.getLocReg()) && 1160*5f757f3fSDimitry Andric CopyVT == MVT::f64) { 1161*5f757f3fSDimitry Andric errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); 1162*5f757f3fSDimitry Andric if (VA.getLocReg() == X86::XMM1) 1163*5f757f3fSDimitry Andric VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts. 1164*5f757f3fSDimitry Andric else 1165*5f757f3fSDimitry Andric VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 1166*5f757f3fSDimitry Andric } 1167*5f757f3fSDimitry Andric 1168*5f757f3fSDimitry Andric // If we prefer to use the value in xmm registers, copy it out as f80 and 1169*5f757f3fSDimitry Andric // use a truncate to move it from fp stack reg to xmm reg. 1170*5f757f3fSDimitry Andric bool RoundAfterCopy = false; 1171*5f757f3fSDimitry Andric if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && 1172*5f757f3fSDimitry Andric isScalarFPTypeInSSEReg(VA.getValVT())) { 1173*5f757f3fSDimitry Andric if (!Subtarget.hasX87()) 1174*5f757f3fSDimitry Andric report_fatal_error("X87 register return with X87 disabled"); 1175*5f757f3fSDimitry Andric CopyVT = MVT::f80; 1176*5f757f3fSDimitry Andric RoundAfterCopy = (CopyVT != VA.getLocVT()); 1177*5f757f3fSDimitry Andric } 1178*5f757f3fSDimitry Andric 1179*5f757f3fSDimitry Andric SDValue Val; 1180*5f757f3fSDimitry Andric if (VA.needsCustom()) { 1181*5f757f3fSDimitry Andric assert(VA.getValVT() == MVT::v64i1 && 1182*5f757f3fSDimitry Andric "Currently the only custom case is when we split v64i1 to 2 regs"); 1183*5f757f3fSDimitry Andric Val = 1184*5f757f3fSDimitry Andric getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue); 1185*5f757f3fSDimitry Andric } else { 1186*5f757f3fSDimitry Andric Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue) 1187*5f757f3fSDimitry Andric .getValue(1); 1188*5f757f3fSDimitry Andric Val = Chain.getValue(0); 1189*5f757f3fSDimitry Andric InGlue = Chain.getValue(2); 1190*5f757f3fSDimitry Andric } 1191*5f757f3fSDimitry Andric 1192*5f757f3fSDimitry Andric if (RoundAfterCopy) 1193*5f757f3fSDimitry Andric Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, 1194*5f757f3fSDimitry Andric // This truncation won't change the value. 1195*5f757f3fSDimitry Andric DAG.getIntPtrConstant(1, dl, /*isTarget=*/true)); 1196*5f757f3fSDimitry Andric 1197*5f757f3fSDimitry Andric if (VA.isExtInLoc()) { 1198*5f757f3fSDimitry Andric if (VA.getValVT().isVector() && 1199*5f757f3fSDimitry Andric VA.getValVT().getScalarType() == MVT::i1 && 1200*5f757f3fSDimitry Andric ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || 1201*5f757f3fSDimitry Andric (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { 1202*5f757f3fSDimitry Andric // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 1203*5f757f3fSDimitry Andric Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG); 1204*5f757f3fSDimitry Andric } else 1205*5f757f3fSDimitry Andric Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 1206*5f757f3fSDimitry Andric } 1207*5f757f3fSDimitry Andric 1208*5f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::BCvt) 1209*5f757f3fSDimitry Andric Val = DAG.getBitcast(VA.getValVT(), Val); 1210*5f757f3fSDimitry Andric 1211*5f757f3fSDimitry Andric InVals.push_back(Val); 1212*5f757f3fSDimitry Andric } 1213*5f757f3fSDimitry Andric 1214*5f757f3fSDimitry Andric return Chain; 1215*5f757f3fSDimitry Andric } 1216*5f757f3fSDimitry Andric 1217*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 1218*5f757f3fSDimitry Andric // C & StdCall & Fast Calling Convention implementation 1219*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 1220*5f757f3fSDimitry Andric // StdCall calling convention seems to be standard for many Windows' API 1221*5f757f3fSDimitry Andric // routines and around. It differs from C calling convention just a little: 1222*5f757f3fSDimitry Andric // callee should clean up the stack, not caller. Symbols should be also 1223*5f757f3fSDimitry Andric // decorated in some fancy way :) It doesn't support any vector arguments. 1224*5f757f3fSDimitry Andric // For info on fast calling convention see Fast Calling Convention (tail call) 1225*5f757f3fSDimitry Andric // implementation LowerX86_32FastCCCallTo. 1226*5f757f3fSDimitry Andric 1227*5f757f3fSDimitry Andric /// Determines whether Args, either a set of outgoing arguments to a call, or a 1228*5f757f3fSDimitry Andric /// set of incoming args of a call, contains an sret pointer that the callee 1229*5f757f3fSDimitry Andric /// pops 1230*5f757f3fSDimitry Andric template <typename T> 1231*5f757f3fSDimitry Andric static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args, 1232*5f757f3fSDimitry Andric const X86Subtarget &Subtarget) { 1233*5f757f3fSDimitry Andric // Not C++20 (yet), so no concepts available. 1234*5f757f3fSDimitry Andric static_assert(std::is_same_v<T, ISD::OutputArg> || 1235*5f757f3fSDimitry Andric std::is_same_v<T, ISD::InputArg>, 1236*5f757f3fSDimitry Andric "requires ISD::OutputArg or ISD::InputArg"); 1237*5f757f3fSDimitry Andric 1238*5f757f3fSDimitry Andric // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out 1239*5f757f3fSDimitry Andric // for most compilations. 1240*5f757f3fSDimitry Andric if (!Subtarget.is32Bit()) 1241*5f757f3fSDimitry Andric return false; 1242*5f757f3fSDimitry Andric 1243*5f757f3fSDimitry Andric if (Args.empty()) 1244*5f757f3fSDimitry Andric return false; 1245*5f757f3fSDimitry Andric 1246*5f757f3fSDimitry Andric // Most calls do not have an sret argument, check the arg next. 1247*5f757f3fSDimitry Andric const ISD::ArgFlagsTy &Flags = Args[0].Flags; 1248*5f757f3fSDimitry Andric if (!Flags.isSRet() || Flags.isInReg()) 1249*5f757f3fSDimitry Andric return false; 1250*5f757f3fSDimitry Andric 1251*5f757f3fSDimitry Andric // The MSVCabi does not pop the sret. 1252*5f757f3fSDimitry Andric if (Subtarget.getTargetTriple().isOSMSVCRT()) 1253*5f757f3fSDimitry Andric return false; 1254*5f757f3fSDimitry Andric 1255*5f757f3fSDimitry Andric // MCUs don't pop the sret 1256*5f757f3fSDimitry Andric if (Subtarget.isTargetMCU()) 1257*5f757f3fSDimitry Andric return false; 1258*5f757f3fSDimitry Andric 1259*5f757f3fSDimitry Andric // Callee pops argument 1260*5f757f3fSDimitry Andric return true; 1261*5f757f3fSDimitry Andric } 1262*5f757f3fSDimitry Andric 1263*5f757f3fSDimitry Andric /// Make a copy of an aggregate at address specified by "Src" to address 1264*5f757f3fSDimitry Andric /// "Dst" with size and alignment information specified by the specific 1265*5f757f3fSDimitry Andric /// parameter attribute. The copy will be passed as a byval function parameter. 1266*5f757f3fSDimitry Andric static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, 1267*5f757f3fSDimitry Andric SDValue Chain, ISD::ArgFlagsTy Flags, 1268*5f757f3fSDimitry Andric SelectionDAG &DAG, const SDLoc &dl) { 1269*5f757f3fSDimitry Andric SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl); 1270*5f757f3fSDimitry Andric 1271*5f757f3fSDimitry Andric return DAG.getMemcpy( 1272*5f757f3fSDimitry Andric Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), 1273*5f757f3fSDimitry Andric /*isVolatile*/ false, /*AlwaysInline=*/true, 1274*5f757f3fSDimitry Andric /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); 1275*5f757f3fSDimitry Andric } 1276*5f757f3fSDimitry Andric 1277*5f757f3fSDimitry Andric /// Return true if the calling convention is one that we can guarantee TCO for. 1278*5f757f3fSDimitry Andric static bool canGuaranteeTCO(CallingConv::ID CC) { 1279*5f757f3fSDimitry Andric return (CC == CallingConv::Fast || CC == CallingConv::GHC || 1280*5f757f3fSDimitry Andric CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || 1281*5f757f3fSDimitry Andric CC == CallingConv::Tail || CC == CallingConv::SwiftTail); 1282*5f757f3fSDimitry Andric } 1283*5f757f3fSDimitry Andric 1284*5f757f3fSDimitry Andric /// Return true if we might ever do TCO for calls with this calling convention. 1285*5f757f3fSDimitry Andric static bool mayTailCallThisCC(CallingConv::ID CC) { 1286*5f757f3fSDimitry Andric switch (CC) { 1287*5f757f3fSDimitry Andric // C calling conventions: 1288*5f757f3fSDimitry Andric case CallingConv::C: 1289*5f757f3fSDimitry Andric case CallingConv::Win64: 1290*5f757f3fSDimitry Andric case CallingConv::X86_64_SysV: 1291*5f757f3fSDimitry Andric // Callee pop conventions: 1292*5f757f3fSDimitry Andric case CallingConv::X86_ThisCall: 1293*5f757f3fSDimitry Andric case CallingConv::X86_StdCall: 1294*5f757f3fSDimitry Andric case CallingConv::X86_VectorCall: 1295*5f757f3fSDimitry Andric case CallingConv::X86_FastCall: 1296*5f757f3fSDimitry Andric // Swift: 1297*5f757f3fSDimitry Andric case CallingConv::Swift: 1298*5f757f3fSDimitry Andric return true; 1299*5f757f3fSDimitry Andric default: 1300*5f757f3fSDimitry Andric return canGuaranteeTCO(CC); 1301*5f757f3fSDimitry Andric } 1302*5f757f3fSDimitry Andric } 1303*5f757f3fSDimitry Andric 1304*5f757f3fSDimitry Andric /// Return true if the function is being made into a tailcall target by 1305*5f757f3fSDimitry Andric /// changing its ABI. 1306*5f757f3fSDimitry Andric static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { 1307*5f757f3fSDimitry Andric return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || 1308*5f757f3fSDimitry Andric CC == CallingConv::Tail || CC == CallingConv::SwiftTail; 1309*5f757f3fSDimitry Andric } 1310*5f757f3fSDimitry Andric 1311*5f757f3fSDimitry Andric bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 1312*5f757f3fSDimitry Andric if (!CI->isTailCall()) 1313*5f757f3fSDimitry Andric return false; 1314*5f757f3fSDimitry Andric 1315*5f757f3fSDimitry Andric CallingConv::ID CalleeCC = CI->getCallingConv(); 1316*5f757f3fSDimitry Andric if (!mayTailCallThisCC(CalleeCC)) 1317*5f757f3fSDimitry Andric return false; 1318*5f757f3fSDimitry Andric 1319*5f757f3fSDimitry Andric return true; 1320*5f757f3fSDimitry Andric } 1321*5f757f3fSDimitry Andric 1322*5f757f3fSDimitry Andric SDValue 1323*5f757f3fSDimitry Andric X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1324*5f757f3fSDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, 1325*5f757f3fSDimitry Andric const SDLoc &dl, SelectionDAG &DAG, 1326*5f757f3fSDimitry Andric const CCValAssign &VA, 1327*5f757f3fSDimitry Andric MachineFrameInfo &MFI, unsigned i) const { 1328*5f757f3fSDimitry Andric // Create the nodes corresponding to a load from this parameter slot. 1329*5f757f3fSDimitry Andric ISD::ArgFlagsTy Flags = Ins[i].Flags; 1330*5f757f3fSDimitry Andric bool AlwaysUseMutable = shouldGuaranteeTCO( 1331*5f757f3fSDimitry Andric CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); 1332*5f757f3fSDimitry Andric bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 1333*5f757f3fSDimitry Andric EVT ValVT; 1334*5f757f3fSDimitry Andric MVT PtrVT = getPointerTy(DAG.getDataLayout()); 1335*5f757f3fSDimitry Andric 1336*5f757f3fSDimitry Andric // If value is passed by pointer we have address passed instead of the value 1337*5f757f3fSDimitry Andric // itself. No need to extend if the mask value and location share the same 1338*5f757f3fSDimitry Andric // absolute size. 1339*5f757f3fSDimitry Andric bool ExtendedInMem = 1340*5f757f3fSDimitry Andric VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 && 1341*5f757f3fSDimitry Andric VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits(); 1342*5f757f3fSDimitry Andric 1343*5f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) 1344*5f757f3fSDimitry Andric ValVT = VA.getLocVT(); 1345*5f757f3fSDimitry Andric else 1346*5f757f3fSDimitry Andric ValVT = VA.getValVT(); 1347*5f757f3fSDimitry Andric 1348*5f757f3fSDimitry Andric // FIXME: For now, all byval parameter objects are marked mutable. This can be 1349*5f757f3fSDimitry Andric // changed with more analysis. 1350*5f757f3fSDimitry Andric // In case of tail call optimization mark all arguments mutable. Since they 1351*5f757f3fSDimitry Andric // could be overwritten by lowering of arguments in case of a tail call. 1352*5f757f3fSDimitry Andric if (Flags.isByVal()) { 1353*5f757f3fSDimitry Andric unsigned Bytes = Flags.getByValSize(); 1354*5f757f3fSDimitry Andric if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 1355*5f757f3fSDimitry Andric 1356*5f757f3fSDimitry Andric // FIXME: For now, all byval parameter objects are marked as aliasing. This 1357*5f757f3fSDimitry Andric // can be improved with deeper analysis. 1358*5f757f3fSDimitry Andric int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable, 1359*5f757f3fSDimitry Andric /*isAliased=*/true); 1360*5f757f3fSDimitry Andric return DAG.getFrameIndex(FI, PtrVT); 1361*5f757f3fSDimitry Andric } 1362*5f757f3fSDimitry Andric 1363*5f757f3fSDimitry Andric EVT ArgVT = Ins[i].ArgVT; 1364*5f757f3fSDimitry Andric 1365*5f757f3fSDimitry Andric // If this is a vector that has been split into multiple parts, don't elide 1366*5f757f3fSDimitry Andric // the copy. The layout on the stack may not match the packed in-memory 1367*5f757f3fSDimitry Andric // layout. 1368*5f757f3fSDimitry Andric bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector(); 1369*5f757f3fSDimitry Andric 1370*5f757f3fSDimitry Andric // This is an argument in memory. We might be able to perform copy elision. 1371*5f757f3fSDimitry Andric // If the argument is passed directly in memory without any extension, then we 1372*5f757f3fSDimitry Andric // can perform copy elision. Large vector types, for example, may be passed 1373*5f757f3fSDimitry Andric // indirectly by pointer. 1374*5f757f3fSDimitry Andric if (Flags.isCopyElisionCandidate() && 1375*5f757f3fSDimitry Andric VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem && 1376*5f757f3fSDimitry Andric !ScalarizedVector) { 1377*5f757f3fSDimitry Andric SDValue PartAddr; 1378*5f757f3fSDimitry Andric if (Ins[i].PartOffset == 0) { 1379*5f757f3fSDimitry Andric // If this is a one-part value or the first part of a multi-part value, 1380*5f757f3fSDimitry Andric // create a stack object for the entire argument value type and return a 1381*5f757f3fSDimitry Andric // load from our portion of it. This assumes that if the first part of an 1382*5f757f3fSDimitry Andric // argument is in memory, the rest will also be in memory. 1383*5f757f3fSDimitry Andric int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(), 1384*5f757f3fSDimitry Andric /*IsImmutable=*/false); 1385*5f757f3fSDimitry Andric PartAddr = DAG.getFrameIndex(FI, PtrVT); 1386*5f757f3fSDimitry Andric return DAG.getLoad( 1387*5f757f3fSDimitry Andric ValVT, dl, Chain, PartAddr, 1388*5f757f3fSDimitry Andric MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); 1389*5f757f3fSDimitry Andric } 1390*5f757f3fSDimitry Andric 1391*5f757f3fSDimitry Andric // This is not the first piece of an argument in memory. See if there is 1392*5f757f3fSDimitry Andric // already a fixed stack object including this offset. If so, assume it 1393*5f757f3fSDimitry Andric // was created by the PartOffset == 0 branch above and create a load from 1394*5f757f3fSDimitry Andric // the appropriate offset into it. 1395*5f757f3fSDimitry Andric int64_t PartBegin = VA.getLocMemOffset(); 1396*5f757f3fSDimitry Andric int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; 1397*5f757f3fSDimitry Andric int FI = MFI.getObjectIndexBegin(); 1398*5f757f3fSDimitry Andric for (; MFI.isFixedObjectIndex(FI); ++FI) { 1399*5f757f3fSDimitry Andric int64_t ObjBegin = MFI.getObjectOffset(FI); 1400*5f757f3fSDimitry Andric int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI); 1401*5f757f3fSDimitry Andric if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) 1402*5f757f3fSDimitry Andric break; 1403*5f757f3fSDimitry Andric } 1404*5f757f3fSDimitry Andric if (MFI.isFixedObjectIndex(FI)) { 1405*5f757f3fSDimitry Andric SDValue Addr = 1406*5f757f3fSDimitry Andric DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT), 1407*5f757f3fSDimitry Andric DAG.getIntPtrConstant(Ins[i].PartOffset, dl)); 1408*5f757f3fSDimitry Andric return DAG.getLoad(ValVT, dl, Chain, Addr, 1409*5f757f3fSDimitry Andric MachinePointerInfo::getFixedStack( 1410*5f757f3fSDimitry Andric DAG.getMachineFunction(), FI, Ins[i].PartOffset)); 1411*5f757f3fSDimitry Andric } 1412*5f757f3fSDimitry Andric } 1413*5f757f3fSDimitry Andric 1414*5f757f3fSDimitry Andric int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 1415*5f757f3fSDimitry Andric VA.getLocMemOffset(), isImmutable); 1416*5f757f3fSDimitry Andric 1417*5f757f3fSDimitry Andric // Set SExt or ZExt flag. 1418*5f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::ZExt) { 1419*5f757f3fSDimitry Andric MFI.setObjectZExt(FI, true); 1420*5f757f3fSDimitry Andric } else if (VA.getLocInfo() == CCValAssign::SExt) { 1421*5f757f3fSDimitry Andric MFI.setObjectSExt(FI, true); 1422*5f757f3fSDimitry Andric } 1423*5f757f3fSDimitry Andric 1424*5f757f3fSDimitry Andric MaybeAlign Alignment; 1425*5f757f3fSDimitry Andric if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && 1426*5f757f3fSDimitry Andric ValVT != MVT::f80) 1427*5f757f3fSDimitry Andric Alignment = MaybeAlign(4); 1428*5f757f3fSDimitry Andric SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1429*5f757f3fSDimitry Andric SDValue Val = DAG.getLoad( 1430*5f757f3fSDimitry Andric ValVT, dl, Chain, FIN, 1431*5f757f3fSDimitry Andric MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), 1432*5f757f3fSDimitry Andric Alignment); 1433*5f757f3fSDimitry Andric return ExtendedInMem 1434*5f757f3fSDimitry Andric ? (VA.getValVT().isVector() 1435*5f757f3fSDimitry Andric ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) 1436*5f757f3fSDimitry Andric : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) 1437*5f757f3fSDimitry Andric : Val; 1438*5f757f3fSDimitry Andric } 1439*5f757f3fSDimitry Andric 1440*5f757f3fSDimitry Andric // FIXME: Get this from tablegen. 1441*5f757f3fSDimitry Andric static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, 1442*5f757f3fSDimitry Andric const X86Subtarget &Subtarget) { 1443*5f757f3fSDimitry Andric assert(Subtarget.is64Bit()); 1444*5f757f3fSDimitry Andric 1445*5f757f3fSDimitry Andric if (Subtarget.isCallingConvWin64(CallConv)) { 1446*5f757f3fSDimitry Andric static const MCPhysReg GPR64ArgRegsWin64[] = { 1447*5f757f3fSDimitry Andric X86::RCX, X86::RDX, X86::R8, X86::R9 1448*5f757f3fSDimitry Andric }; 1449*5f757f3fSDimitry Andric return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); 1450*5f757f3fSDimitry Andric } 1451*5f757f3fSDimitry Andric 1452*5f757f3fSDimitry Andric static const MCPhysReg GPR64ArgRegs64Bit[] = { 1453*5f757f3fSDimitry Andric X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1454*5f757f3fSDimitry Andric }; 1455*5f757f3fSDimitry Andric return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); 1456*5f757f3fSDimitry Andric } 1457*5f757f3fSDimitry Andric 1458*5f757f3fSDimitry Andric // FIXME: Get this from tablegen. 1459*5f757f3fSDimitry Andric static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF, 1460*5f757f3fSDimitry Andric CallingConv::ID CallConv, 1461*5f757f3fSDimitry Andric const X86Subtarget &Subtarget) { 1462*5f757f3fSDimitry Andric assert(Subtarget.is64Bit()); 1463*5f757f3fSDimitry Andric if (Subtarget.isCallingConvWin64(CallConv)) { 1464*5f757f3fSDimitry Andric // The XMM registers which might contain var arg parameters are shadowed 1465*5f757f3fSDimitry Andric // in their paired GPR. So we only need to save the GPR to their home 1466*5f757f3fSDimitry Andric // slots. 1467*5f757f3fSDimitry Andric // TODO: __vectorcall will change this. 1468*5f757f3fSDimitry Andric return std::nullopt; 1469*5f757f3fSDimitry Andric } 1470*5f757f3fSDimitry Andric 1471*5f757f3fSDimitry Andric bool isSoftFloat = Subtarget.useSoftFloat(); 1472*5f757f3fSDimitry Andric if (isSoftFloat || !Subtarget.hasSSE1()) 1473*5f757f3fSDimitry Andric // Kernel mode asks for SSE to be disabled, so there are no XMM argument 1474*5f757f3fSDimitry Andric // registers. 1475*5f757f3fSDimitry Andric return std::nullopt; 1476*5f757f3fSDimitry Andric 1477*5f757f3fSDimitry Andric static const MCPhysReg XMMArgRegs64Bit[] = { 1478*5f757f3fSDimitry Andric X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1479*5f757f3fSDimitry Andric X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1480*5f757f3fSDimitry Andric }; 1481*5f757f3fSDimitry Andric return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); 1482*5f757f3fSDimitry Andric } 1483*5f757f3fSDimitry Andric 1484*5f757f3fSDimitry Andric #ifndef NDEBUG 1485*5f757f3fSDimitry Andric static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) { 1486*5f757f3fSDimitry Andric return llvm::is_sorted( 1487*5f757f3fSDimitry Andric ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool { 1488*5f757f3fSDimitry Andric return A.getValNo() < B.getValNo(); 1489*5f757f3fSDimitry Andric }); 1490*5f757f3fSDimitry Andric } 1491*5f757f3fSDimitry Andric #endif 1492*5f757f3fSDimitry Andric 1493*5f757f3fSDimitry Andric namespace { 1494*5f757f3fSDimitry Andric /// This is a helper class for lowering variable arguments parameters. 1495*5f757f3fSDimitry Andric class VarArgsLoweringHelper { 1496*5f757f3fSDimitry Andric public: 1497*5f757f3fSDimitry Andric VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc, 1498*5f757f3fSDimitry Andric SelectionDAG &DAG, const X86Subtarget &Subtarget, 1499*5f757f3fSDimitry Andric CallingConv::ID CallConv, CCState &CCInfo) 1500*5f757f3fSDimitry Andric : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget), 1501*5f757f3fSDimitry Andric TheMachineFunction(DAG.getMachineFunction()), 1502*5f757f3fSDimitry Andric TheFunction(TheMachineFunction.getFunction()), 1503*5f757f3fSDimitry Andric FrameInfo(TheMachineFunction.getFrameInfo()), 1504*5f757f3fSDimitry Andric FrameLowering(*Subtarget.getFrameLowering()), 1505*5f757f3fSDimitry Andric TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv), 1506*5f757f3fSDimitry Andric CCInfo(CCInfo) {} 1507*5f757f3fSDimitry Andric 1508*5f757f3fSDimitry Andric // Lower variable arguments parameters. 1509*5f757f3fSDimitry Andric void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize); 1510*5f757f3fSDimitry Andric 1511*5f757f3fSDimitry Andric private: 1512*5f757f3fSDimitry Andric void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize); 1513*5f757f3fSDimitry Andric 1514*5f757f3fSDimitry Andric void forwardMustTailParameters(SDValue &Chain); 1515*5f757f3fSDimitry Andric 1516*5f757f3fSDimitry Andric bool is64Bit() const { return Subtarget.is64Bit(); } 1517*5f757f3fSDimitry Andric bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); } 1518*5f757f3fSDimitry Andric 1519*5f757f3fSDimitry Andric X86MachineFunctionInfo *FuncInfo; 1520*5f757f3fSDimitry Andric const SDLoc &DL; 1521*5f757f3fSDimitry Andric SelectionDAG &DAG; 1522*5f757f3fSDimitry Andric const X86Subtarget &Subtarget; 1523*5f757f3fSDimitry Andric MachineFunction &TheMachineFunction; 1524*5f757f3fSDimitry Andric const Function &TheFunction; 1525*5f757f3fSDimitry Andric MachineFrameInfo &FrameInfo; 1526*5f757f3fSDimitry Andric const TargetFrameLowering &FrameLowering; 1527*5f757f3fSDimitry Andric const TargetLowering &TargLowering; 1528*5f757f3fSDimitry Andric CallingConv::ID CallConv; 1529*5f757f3fSDimitry Andric CCState &CCInfo; 1530*5f757f3fSDimitry Andric }; 1531*5f757f3fSDimitry Andric } // namespace 1532*5f757f3fSDimitry Andric 1533*5f757f3fSDimitry Andric void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( 1534*5f757f3fSDimitry Andric SDValue &Chain, unsigned StackSize) { 1535*5f757f3fSDimitry Andric // If the function takes variable number of arguments, make a frame index for 1536*5f757f3fSDimitry Andric // the start of the first vararg value... for expansion of llvm.va_start. We 1537*5f757f3fSDimitry Andric // can skip this if there are no va_start calls. 1538*5f757f3fSDimitry Andric if (is64Bit() || (CallConv != CallingConv::X86_FastCall && 1539*5f757f3fSDimitry Andric CallConv != CallingConv::X86_ThisCall)) { 1540*5f757f3fSDimitry Andric FuncInfo->setVarArgsFrameIndex( 1541*5f757f3fSDimitry Andric FrameInfo.CreateFixedObject(1, StackSize, true)); 1542*5f757f3fSDimitry Andric } 1543*5f757f3fSDimitry Andric 1544*5f757f3fSDimitry Andric // 64-bit calling conventions support varargs and register parameters, so we 1545*5f757f3fSDimitry Andric // have to do extra work to spill them in the prologue. 1546*5f757f3fSDimitry Andric if (is64Bit()) { 1547*5f757f3fSDimitry Andric // Find the first unallocated argument registers. 1548*5f757f3fSDimitry Andric ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); 1549*5f757f3fSDimitry Andric ArrayRef<MCPhysReg> ArgXMMs = 1550*5f757f3fSDimitry Andric get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget); 1551*5f757f3fSDimitry Andric unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); 1552*5f757f3fSDimitry Andric unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); 1553*5f757f3fSDimitry Andric 1554*5f757f3fSDimitry Andric assert(!(NumXMMRegs && !Subtarget.hasSSE1()) && 1555*5f757f3fSDimitry Andric "SSE register cannot be used when SSE is disabled!"); 1556*5f757f3fSDimitry Andric 1557*5f757f3fSDimitry Andric if (isWin64()) { 1558*5f757f3fSDimitry Andric // Get to the caller-allocated home save location. Add 8 to account 1559*5f757f3fSDimitry Andric // for the return address. 1560*5f757f3fSDimitry Andric int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8; 1561*5f757f3fSDimitry Andric FuncInfo->setRegSaveFrameIndex( 1562*5f757f3fSDimitry Andric FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); 1563*5f757f3fSDimitry Andric // Fixup to set vararg frame on shadow area (4 x i64). 1564*5f757f3fSDimitry Andric if (NumIntRegs < 4) 1565*5f757f3fSDimitry Andric FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); 1566*5f757f3fSDimitry Andric } else { 1567*5f757f3fSDimitry Andric // For X86-64, if there are vararg parameters that are passed via 1568*5f757f3fSDimitry Andric // registers, then we must store them to their spots on the stack so 1569*5f757f3fSDimitry Andric // they may be loaded by dereferencing the result of va_next. 1570*5f757f3fSDimitry Andric FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); 1571*5f757f3fSDimitry Andric FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); 1572*5f757f3fSDimitry Andric FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject( 1573*5f757f3fSDimitry Andric ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false)); 1574*5f757f3fSDimitry Andric } 1575*5f757f3fSDimitry Andric 1576*5f757f3fSDimitry Andric SmallVector<SDValue, 6> 1577*5f757f3fSDimitry Andric LiveGPRs; // list of SDValue for GPR registers keeping live input value 1578*5f757f3fSDimitry Andric SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers 1579*5f757f3fSDimitry Andric // keeping live input value 1580*5f757f3fSDimitry Andric SDValue ALVal; // if applicable keeps SDValue for %al register 1581*5f757f3fSDimitry Andric 1582*5f757f3fSDimitry Andric // Gather all the live in physical registers. 1583*5f757f3fSDimitry Andric for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { 1584*5f757f3fSDimitry Andric Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass); 1585*5f757f3fSDimitry Andric LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64)); 1586*5f757f3fSDimitry Andric } 1587*5f757f3fSDimitry Andric const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs); 1588*5f757f3fSDimitry Andric if (!AvailableXmms.empty()) { 1589*5f757f3fSDimitry Andric Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); 1590*5f757f3fSDimitry Andric ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8); 1591*5f757f3fSDimitry Andric for (MCPhysReg Reg : AvailableXmms) { 1592*5f757f3fSDimitry Andric // FastRegisterAllocator spills virtual registers at basic 1593*5f757f3fSDimitry Andric // block boundary. That leads to usages of xmm registers 1594*5f757f3fSDimitry Andric // outside of check for %al. Pass physical registers to 1595*5f757f3fSDimitry Andric // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling. 1596*5f757f3fSDimitry Andric TheMachineFunction.getRegInfo().addLiveIn(Reg); 1597*5f757f3fSDimitry Andric LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32)); 1598*5f757f3fSDimitry Andric } 1599*5f757f3fSDimitry Andric } 1600*5f757f3fSDimitry Andric 1601*5f757f3fSDimitry Andric // Store the integer parameter registers. 1602*5f757f3fSDimitry Andric SmallVector<SDValue, 8> MemOps; 1603*5f757f3fSDimitry Andric SDValue RSFIN = 1604*5f757f3fSDimitry Andric DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), 1605*5f757f3fSDimitry Andric TargLowering.getPointerTy(DAG.getDataLayout())); 1606*5f757f3fSDimitry Andric unsigned Offset = FuncInfo->getVarArgsGPOffset(); 1607*5f757f3fSDimitry Andric for (SDValue Val : LiveGPRs) { 1608*5f757f3fSDimitry Andric SDValue FIN = DAG.getNode(ISD::ADD, DL, 1609*5f757f3fSDimitry Andric TargLowering.getPointerTy(DAG.getDataLayout()), 1610*5f757f3fSDimitry Andric RSFIN, DAG.getIntPtrConstant(Offset, DL)); 1611*5f757f3fSDimitry Andric SDValue Store = 1612*5f757f3fSDimitry Andric DAG.getStore(Val.getValue(1), DL, Val, FIN, 1613*5f757f3fSDimitry Andric MachinePointerInfo::getFixedStack( 1614*5f757f3fSDimitry Andric DAG.getMachineFunction(), 1615*5f757f3fSDimitry Andric FuncInfo->getRegSaveFrameIndex(), Offset)); 1616*5f757f3fSDimitry Andric MemOps.push_back(Store); 1617*5f757f3fSDimitry Andric Offset += 8; 1618*5f757f3fSDimitry Andric } 1619*5f757f3fSDimitry Andric 1620*5f757f3fSDimitry Andric // Now store the XMM (fp + vector) parameter registers. 1621*5f757f3fSDimitry Andric if (!LiveXMMRegs.empty()) { 1622*5f757f3fSDimitry Andric SmallVector<SDValue, 12> SaveXMMOps; 1623*5f757f3fSDimitry Andric SaveXMMOps.push_back(Chain); 1624*5f757f3fSDimitry Andric SaveXMMOps.push_back(ALVal); 1625*5f757f3fSDimitry Andric SaveXMMOps.push_back(RSFIN); 1626*5f757f3fSDimitry Andric SaveXMMOps.push_back( 1627*5f757f3fSDimitry Andric DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32)); 1628*5f757f3fSDimitry Andric llvm::append_range(SaveXMMOps, LiveXMMRegs); 1629*5f757f3fSDimitry Andric MachineMemOperand *StoreMMO = 1630*5f757f3fSDimitry Andric DAG.getMachineFunction().getMachineMemOperand( 1631*5f757f3fSDimitry Andric MachinePointerInfo::getFixedStack( 1632*5f757f3fSDimitry Andric DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(), 1633*5f757f3fSDimitry Andric Offset), 1634*5f757f3fSDimitry Andric MachineMemOperand::MOStore, 128, Align(16)); 1635*5f757f3fSDimitry Andric MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS, 1636*5f757f3fSDimitry Andric DL, DAG.getVTList(MVT::Other), 1637*5f757f3fSDimitry Andric SaveXMMOps, MVT::i8, StoreMMO)); 1638*5f757f3fSDimitry Andric } 1639*5f757f3fSDimitry Andric 1640*5f757f3fSDimitry Andric if (!MemOps.empty()) 1641*5f757f3fSDimitry Andric Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); 1642*5f757f3fSDimitry Andric } 1643*5f757f3fSDimitry Andric } 1644*5f757f3fSDimitry Andric 1645*5f757f3fSDimitry Andric void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) { 1646*5f757f3fSDimitry Andric // Find the largest legal vector type. 1647*5f757f3fSDimitry Andric MVT VecVT = MVT::Other; 1648*5f757f3fSDimitry Andric // FIXME: Only some x86_32 calling conventions support AVX512. 1649*5f757f3fSDimitry Andric if (Subtarget.useAVX512Regs() && 1650*5f757f3fSDimitry Andric (is64Bit() || (CallConv == CallingConv::X86_VectorCall || 1651*5f757f3fSDimitry Andric CallConv == CallingConv::Intel_OCL_BI))) 1652*5f757f3fSDimitry Andric VecVT = MVT::v16f32; 1653*5f757f3fSDimitry Andric else if (Subtarget.hasAVX()) 1654*5f757f3fSDimitry Andric VecVT = MVT::v8f32; 1655*5f757f3fSDimitry Andric else if (Subtarget.hasSSE2()) 1656*5f757f3fSDimitry Andric VecVT = MVT::v4f32; 1657*5f757f3fSDimitry Andric 1658*5f757f3fSDimitry Andric // We forward some GPRs and some vector types. 1659*5f757f3fSDimitry Andric SmallVector<MVT, 2> RegParmTypes; 1660*5f757f3fSDimitry Andric MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32; 1661*5f757f3fSDimitry Andric RegParmTypes.push_back(IntVT); 1662*5f757f3fSDimitry Andric if (VecVT != MVT::Other) 1663*5f757f3fSDimitry Andric RegParmTypes.push_back(VecVT); 1664*5f757f3fSDimitry Andric 1665*5f757f3fSDimitry Andric // Compute the set of forwarded registers. The rest are scratch. 1666*5f757f3fSDimitry Andric SmallVectorImpl<ForwardedRegister> &Forwards = 1667*5f757f3fSDimitry Andric FuncInfo->getForwardedMustTailRegParms(); 1668*5f757f3fSDimitry Andric CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); 1669*5f757f3fSDimitry Andric 1670*5f757f3fSDimitry Andric // Forward AL for SysV x86_64 targets, since it is used for varargs. 1671*5f757f3fSDimitry Andric if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) { 1672*5f757f3fSDimitry Andric Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); 1673*5f757f3fSDimitry Andric Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); 1674*5f757f3fSDimitry Andric } 1675*5f757f3fSDimitry Andric 1676*5f757f3fSDimitry Andric // Copy all forwards from physical to virtual registers. 1677*5f757f3fSDimitry Andric for (ForwardedRegister &FR : Forwards) { 1678*5f757f3fSDimitry Andric // FIXME: Can we use a less constrained schedule? 1679*5f757f3fSDimitry Andric SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT); 1680*5f757f3fSDimitry Andric FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister( 1681*5f757f3fSDimitry Andric TargLowering.getRegClassFor(FR.VT)); 1682*5f757f3fSDimitry Andric Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal); 1683*5f757f3fSDimitry Andric } 1684*5f757f3fSDimitry Andric } 1685*5f757f3fSDimitry Andric 1686*5f757f3fSDimitry Andric void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain, 1687*5f757f3fSDimitry Andric unsigned StackSize) { 1688*5f757f3fSDimitry Andric // Set FrameIndex to the 0xAAAAAAA value to mark unset state. 1689*5f757f3fSDimitry Andric // If necessary, it would be set into the correct value later. 1690*5f757f3fSDimitry Andric FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); 1691*5f757f3fSDimitry Andric FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); 1692*5f757f3fSDimitry Andric 1693*5f757f3fSDimitry Andric if (FrameInfo.hasVAStart()) 1694*5f757f3fSDimitry Andric createVarArgAreaAndStoreRegisters(Chain, StackSize); 1695*5f757f3fSDimitry Andric 1696*5f757f3fSDimitry Andric if (FrameInfo.hasMustTailInVarArgFunc()) 1697*5f757f3fSDimitry Andric forwardMustTailParameters(Chain); 1698*5f757f3fSDimitry Andric } 1699*5f757f3fSDimitry Andric 1700*5f757f3fSDimitry Andric SDValue X86TargetLowering::LowerFormalArguments( 1701*5f757f3fSDimitry Andric SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1702*5f757f3fSDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 1703*5f757f3fSDimitry Andric SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1704*5f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 1705*5f757f3fSDimitry Andric X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1706*5f757f3fSDimitry Andric 1707*5f757f3fSDimitry Andric const Function &F = MF.getFunction(); 1708*5f757f3fSDimitry Andric if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() && 1709*5f757f3fSDimitry Andric F.getName() == "main") 1710*5f757f3fSDimitry Andric FuncInfo->setForceFramePointer(true); 1711*5f757f3fSDimitry Andric 1712*5f757f3fSDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 1713*5f757f3fSDimitry Andric bool Is64Bit = Subtarget.is64Bit(); 1714*5f757f3fSDimitry Andric bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); 1715*5f757f3fSDimitry Andric 1716*5f757f3fSDimitry Andric assert( 1717*5f757f3fSDimitry Andric !(IsVarArg && canGuaranteeTCO(CallConv)) && 1718*5f757f3fSDimitry Andric "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"); 1719*5f757f3fSDimitry Andric 1720*5f757f3fSDimitry Andric // Assign locations to all of the incoming arguments. 1721*5f757f3fSDimitry Andric SmallVector<CCValAssign, 16> ArgLocs; 1722*5f757f3fSDimitry Andric CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1723*5f757f3fSDimitry Andric 1724*5f757f3fSDimitry Andric // Allocate shadow area for Win64. 1725*5f757f3fSDimitry Andric if (IsWin64) 1726*5f757f3fSDimitry Andric CCInfo.AllocateStack(32, Align(8)); 1727*5f757f3fSDimitry Andric 1728*5f757f3fSDimitry Andric CCInfo.AnalyzeArguments(Ins, CC_X86); 1729*5f757f3fSDimitry Andric 1730*5f757f3fSDimitry Andric // In vectorcall calling convention a second pass is required for the HVA 1731*5f757f3fSDimitry Andric // types. 1732*5f757f3fSDimitry Andric if (CallingConv::X86_VectorCall == CallConv) { 1733*5f757f3fSDimitry Andric CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86); 1734*5f757f3fSDimitry Andric } 1735*5f757f3fSDimitry Andric 1736*5f757f3fSDimitry Andric // The next loop assumes that the locations are in the same order of the 1737*5f757f3fSDimitry Andric // input arguments. 1738*5f757f3fSDimitry Andric assert(isSortedByValueNo(ArgLocs) && 1739*5f757f3fSDimitry Andric "Argument Location list must be sorted before lowering"); 1740*5f757f3fSDimitry Andric 1741*5f757f3fSDimitry Andric SDValue ArgValue; 1742*5f757f3fSDimitry Andric for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E; 1743*5f757f3fSDimitry Andric ++I, ++InsIndex) { 1744*5f757f3fSDimitry Andric assert(InsIndex < Ins.size() && "Invalid Ins index"); 1745*5f757f3fSDimitry Andric CCValAssign &VA = ArgLocs[I]; 1746*5f757f3fSDimitry Andric 1747*5f757f3fSDimitry Andric if (VA.isRegLoc()) { 1748*5f757f3fSDimitry Andric EVT RegVT = VA.getLocVT(); 1749*5f757f3fSDimitry Andric if (VA.needsCustom()) { 1750*5f757f3fSDimitry Andric assert( 1751*5f757f3fSDimitry Andric VA.getValVT() == MVT::v64i1 && 1752*5f757f3fSDimitry Andric "Currently the only custom case is when we split v64i1 to 2 regs"); 1753*5f757f3fSDimitry Andric 1754*5f757f3fSDimitry Andric // v64i1 values, in regcall calling convention, that are 1755*5f757f3fSDimitry Andric // compiled to 32 bit arch, are split up into two registers. 1756*5f757f3fSDimitry Andric ArgValue = 1757*5f757f3fSDimitry Andric getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget); 1758*5f757f3fSDimitry Andric } else { 1759*5f757f3fSDimitry Andric const TargetRegisterClass *RC; 1760*5f757f3fSDimitry Andric if (RegVT == MVT::i8) 1761*5f757f3fSDimitry Andric RC = &X86::GR8RegClass; 1762*5f757f3fSDimitry Andric else if (RegVT == MVT::i16) 1763*5f757f3fSDimitry Andric RC = &X86::GR16RegClass; 1764*5f757f3fSDimitry Andric else if (RegVT == MVT::i32) 1765*5f757f3fSDimitry Andric RC = &X86::GR32RegClass; 1766*5f757f3fSDimitry Andric else if (Is64Bit && RegVT == MVT::i64) 1767*5f757f3fSDimitry Andric RC = &X86::GR64RegClass; 1768*5f757f3fSDimitry Andric else if (RegVT == MVT::f16) 1769*5f757f3fSDimitry Andric RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass; 1770*5f757f3fSDimitry Andric else if (RegVT == MVT::f32) 1771*5f757f3fSDimitry Andric RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; 1772*5f757f3fSDimitry Andric else if (RegVT == MVT::f64) 1773*5f757f3fSDimitry Andric RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; 1774*5f757f3fSDimitry Andric else if (RegVT == MVT::f80) 1775*5f757f3fSDimitry Andric RC = &X86::RFP80RegClass; 1776*5f757f3fSDimitry Andric else if (RegVT == MVT::f128) 1777*5f757f3fSDimitry Andric RC = &X86::VR128RegClass; 1778*5f757f3fSDimitry Andric else if (RegVT.is512BitVector()) 1779*5f757f3fSDimitry Andric RC = &X86::VR512RegClass; 1780*5f757f3fSDimitry Andric else if (RegVT.is256BitVector()) 1781*5f757f3fSDimitry Andric RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass; 1782*5f757f3fSDimitry Andric else if (RegVT.is128BitVector()) 1783*5f757f3fSDimitry Andric RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; 1784*5f757f3fSDimitry Andric else if (RegVT == MVT::x86mmx) 1785*5f757f3fSDimitry Andric RC = &X86::VR64RegClass; 1786*5f757f3fSDimitry Andric else if (RegVT == MVT::v1i1) 1787*5f757f3fSDimitry Andric RC = &X86::VK1RegClass; 1788*5f757f3fSDimitry Andric else if (RegVT == MVT::v8i1) 1789*5f757f3fSDimitry Andric RC = &X86::VK8RegClass; 1790*5f757f3fSDimitry Andric else if (RegVT == MVT::v16i1) 1791*5f757f3fSDimitry Andric RC = &X86::VK16RegClass; 1792*5f757f3fSDimitry Andric else if (RegVT == MVT::v32i1) 1793*5f757f3fSDimitry Andric RC = &X86::VK32RegClass; 1794*5f757f3fSDimitry Andric else if (RegVT == MVT::v64i1) 1795*5f757f3fSDimitry Andric RC = &X86::VK64RegClass; 1796*5f757f3fSDimitry Andric else 1797*5f757f3fSDimitry Andric llvm_unreachable("Unknown argument type!"); 1798*5f757f3fSDimitry Andric 1799*5f757f3fSDimitry Andric Register Reg = MF.addLiveIn(VA.getLocReg(), RC); 1800*5f757f3fSDimitry Andric ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 1801*5f757f3fSDimitry Andric } 1802*5f757f3fSDimitry Andric 1803*5f757f3fSDimitry Andric // If this is an 8 or 16-bit value, it is really passed promoted to 32 1804*5f757f3fSDimitry Andric // bits. Insert an assert[sz]ext to capture this, then truncate to the 1805*5f757f3fSDimitry Andric // right size. 1806*5f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::SExt) 1807*5f757f3fSDimitry Andric ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 1808*5f757f3fSDimitry Andric DAG.getValueType(VA.getValVT())); 1809*5f757f3fSDimitry Andric else if (VA.getLocInfo() == CCValAssign::ZExt) 1810*5f757f3fSDimitry Andric ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 1811*5f757f3fSDimitry Andric DAG.getValueType(VA.getValVT())); 1812*5f757f3fSDimitry Andric else if (VA.getLocInfo() == CCValAssign::BCvt) 1813*5f757f3fSDimitry Andric ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); 1814*5f757f3fSDimitry Andric 1815*5f757f3fSDimitry Andric if (VA.isExtInLoc()) { 1816*5f757f3fSDimitry Andric // Handle MMX values passed in XMM regs. 1817*5f757f3fSDimitry Andric if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) 1818*5f757f3fSDimitry Andric ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); 1819*5f757f3fSDimitry Andric else if (VA.getValVT().isVector() && 1820*5f757f3fSDimitry Andric VA.getValVT().getScalarType() == MVT::i1 && 1821*5f757f3fSDimitry Andric ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || 1822*5f757f3fSDimitry Andric (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { 1823*5f757f3fSDimitry Andric // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 1824*5f757f3fSDimitry Andric ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG); 1825*5f757f3fSDimitry Andric } else 1826*5f757f3fSDimitry Andric ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1827*5f757f3fSDimitry Andric } 1828*5f757f3fSDimitry Andric } else { 1829*5f757f3fSDimitry Andric assert(VA.isMemLoc()); 1830*5f757f3fSDimitry Andric ArgValue = 1831*5f757f3fSDimitry Andric LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex); 1832*5f757f3fSDimitry Andric } 1833*5f757f3fSDimitry Andric 1834*5f757f3fSDimitry Andric // If value is passed via pointer - do a load. 1835*5f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::Indirect && 1836*5f757f3fSDimitry Andric !(Ins[I].Flags.isByVal() && VA.isRegLoc())) { 1837*5f757f3fSDimitry Andric ArgValue = 1838*5f757f3fSDimitry Andric DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo()); 1839*5f757f3fSDimitry Andric } 1840*5f757f3fSDimitry Andric 1841*5f757f3fSDimitry Andric InVals.push_back(ArgValue); 1842*5f757f3fSDimitry Andric } 1843*5f757f3fSDimitry Andric 1844*5f757f3fSDimitry Andric for (unsigned I = 0, E = Ins.size(); I != E; ++I) { 1845*5f757f3fSDimitry Andric if (Ins[I].Flags.isSwiftAsync()) { 1846*5f757f3fSDimitry Andric auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1847*5f757f3fSDimitry Andric if (Subtarget.is64Bit()) 1848*5f757f3fSDimitry Andric X86FI->setHasSwiftAsyncContext(true); 1849*5f757f3fSDimitry Andric else { 1850*5f757f3fSDimitry Andric int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false); 1851*5f757f3fSDimitry Andric X86FI->setSwiftAsyncContextFrameIdx(FI); 1852*5f757f3fSDimitry Andric SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I], 1853*5f757f3fSDimitry Andric DAG.getFrameIndex(FI, MVT::i32), 1854*5f757f3fSDimitry Andric MachinePointerInfo::getFixedStack(MF, FI)); 1855*5f757f3fSDimitry Andric Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain); 1856*5f757f3fSDimitry Andric } 1857*5f757f3fSDimitry Andric } 1858*5f757f3fSDimitry Andric 1859*5f757f3fSDimitry Andric // Swift calling convention does not require we copy the sret argument 1860*5f757f3fSDimitry Andric // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. 1861*5f757f3fSDimitry Andric if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) 1862*5f757f3fSDimitry Andric continue; 1863*5f757f3fSDimitry Andric 1864*5f757f3fSDimitry Andric // All x86 ABIs require that for returning structs by value we copy the 1865*5f757f3fSDimitry Andric // sret argument into %rax/%eax (depending on ABI) for the return. Save 1866*5f757f3fSDimitry Andric // the argument into a virtual register so that we can access it from the 1867*5f757f3fSDimitry Andric // return points. 1868*5f757f3fSDimitry Andric if (Ins[I].Flags.isSRet()) { 1869*5f757f3fSDimitry Andric assert(!FuncInfo->getSRetReturnReg() && 1870*5f757f3fSDimitry Andric "SRet return has already been set"); 1871*5f757f3fSDimitry Andric MVT PtrTy = getPointerTy(DAG.getDataLayout()); 1872*5f757f3fSDimitry Andric Register Reg = 1873*5f757f3fSDimitry Andric MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); 1874*5f757f3fSDimitry Andric FuncInfo->setSRetReturnReg(Reg); 1875*5f757f3fSDimitry Andric SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]); 1876*5f757f3fSDimitry Andric Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); 1877*5f757f3fSDimitry Andric break; 1878*5f757f3fSDimitry Andric } 1879*5f757f3fSDimitry Andric } 1880*5f757f3fSDimitry Andric 1881*5f757f3fSDimitry Andric unsigned StackSize = CCInfo.getStackSize(); 1882*5f757f3fSDimitry Andric // Align stack specially for tail calls. 1883*5f757f3fSDimitry Andric if (shouldGuaranteeTCO(CallConv, 1884*5f757f3fSDimitry Andric MF.getTarget().Options.GuaranteedTailCallOpt)) 1885*5f757f3fSDimitry Andric StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 1886*5f757f3fSDimitry Andric 1887*5f757f3fSDimitry Andric if (IsVarArg) 1888*5f757f3fSDimitry Andric VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo) 1889*5f757f3fSDimitry Andric .lowerVarArgsParameters(Chain, StackSize); 1890*5f757f3fSDimitry Andric 1891*5f757f3fSDimitry Andric // Some CCs need callee pop. 1892*5f757f3fSDimitry Andric if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg, 1893*5f757f3fSDimitry Andric MF.getTarget().Options.GuaranteedTailCallOpt)) { 1894*5f757f3fSDimitry Andric FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. 1895*5f757f3fSDimitry Andric } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { 1896*5f757f3fSDimitry Andric // X86 interrupts must pop the error code (and the alignment padding) if 1897*5f757f3fSDimitry Andric // present. 1898*5f757f3fSDimitry Andric FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4); 1899*5f757f3fSDimitry Andric } else { 1900*5f757f3fSDimitry Andric FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. 1901*5f757f3fSDimitry Andric // If this is an sret function, the return should pop the hidden pointer. 1902*5f757f3fSDimitry Andric if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget)) 1903*5f757f3fSDimitry Andric FuncInfo->setBytesToPopOnReturn(4); 1904*5f757f3fSDimitry Andric } 1905*5f757f3fSDimitry Andric 1906*5f757f3fSDimitry Andric if (!Is64Bit) { 1907*5f757f3fSDimitry Andric // RegSaveFrameIndex is X86-64 only. 1908*5f757f3fSDimitry Andric FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); 1909*5f757f3fSDimitry Andric } 1910*5f757f3fSDimitry Andric 1911*5f757f3fSDimitry Andric FuncInfo->setArgumentStackSize(StackSize); 1912*5f757f3fSDimitry Andric 1913*5f757f3fSDimitry Andric if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { 1914*5f757f3fSDimitry Andric EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); 1915*5f757f3fSDimitry Andric if (Personality == EHPersonality::CoreCLR) { 1916*5f757f3fSDimitry Andric assert(Is64Bit); 1917*5f757f3fSDimitry Andric // TODO: Add a mechanism to frame lowering that will allow us to indicate 1918*5f757f3fSDimitry Andric // that we'd prefer this slot be allocated towards the bottom of the frame 1919*5f757f3fSDimitry Andric // (i.e. near the stack pointer after allocating the frame). Every 1920*5f757f3fSDimitry Andric // funclet needs a copy of this slot in its (mostly empty) frame, and the 1921*5f757f3fSDimitry Andric // offset from the bottom of this and each funclet's frame must be the 1922*5f757f3fSDimitry Andric // same, so the size of funclets' (mostly empty) frames is dictated by 1923*5f757f3fSDimitry Andric // how far this slot is from the bottom (since they allocate just enough 1924*5f757f3fSDimitry Andric // space to accommodate holding this slot at the correct offset). 1925*5f757f3fSDimitry Andric int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false); 1926*5f757f3fSDimitry Andric EHInfo->PSPSymFrameIdx = PSPSymFI; 1927*5f757f3fSDimitry Andric } 1928*5f757f3fSDimitry Andric } 1929*5f757f3fSDimitry Andric 1930*5f757f3fSDimitry Andric if (shouldDisableArgRegFromCSR(CallConv) || 1931*5f757f3fSDimitry Andric F.hasFnAttribute("no_caller_saved_registers")) { 1932*5f757f3fSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 1933*5f757f3fSDimitry Andric for (std::pair<Register, Register> Pair : MRI.liveins()) 1934*5f757f3fSDimitry Andric MRI.disableCalleeSavedRegister(Pair.first); 1935*5f757f3fSDimitry Andric } 1936*5f757f3fSDimitry Andric 1937*5f757f3fSDimitry Andric return Chain; 1938*5f757f3fSDimitry Andric } 1939*5f757f3fSDimitry Andric 1940*5f757f3fSDimitry Andric SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, 1941*5f757f3fSDimitry Andric SDValue Arg, const SDLoc &dl, 1942*5f757f3fSDimitry Andric SelectionDAG &DAG, 1943*5f757f3fSDimitry Andric const CCValAssign &VA, 1944*5f757f3fSDimitry Andric ISD::ArgFlagsTy Flags, 1945*5f757f3fSDimitry Andric bool isByVal) const { 1946*5f757f3fSDimitry Andric unsigned LocMemOffset = VA.getLocMemOffset(); 1947*5f757f3fSDimitry Andric SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 1948*5f757f3fSDimitry Andric PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), 1949*5f757f3fSDimitry Andric StackPtr, PtrOff); 1950*5f757f3fSDimitry Andric if (isByVal) 1951*5f757f3fSDimitry Andric return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 1952*5f757f3fSDimitry Andric 1953*5f757f3fSDimitry Andric MaybeAlign Alignment; 1954*5f757f3fSDimitry Andric if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && 1955*5f757f3fSDimitry Andric Arg.getSimpleValueType() != MVT::f80) 1956*5f757f3fSDimitry Andric Alignment = MaybeAlign(4); 1957*5f757f3fSDimitry Andric return DAG.getStore( 1958*5f757f3fSDimitry Andric Chain, dl, Arg, PtrOff, 1959*5f757f3fSDimitry Andric MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), 1960*5f757f3fSDimitry Andric Alignment); 1961*5f757f3fSDimitry Andric } 1962*5f757f3fSDimitry Andric 1963*5f757f3fSDimitry Andric /// Emit a load of return address if tail call 1964*5f757f3fSDimitry Andric /// optimization is performed and it is required. 1965*5f757f3fSDimitry Andric SDValue X86TargetLowering::EmitTailCallLoadRetAddr( 1966*5f757f3fSDimitry Andric SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, 1967*5f757f3fSDimitry Andric bool Is64Bit, int FPDiff, const SDLoc &dl) const { 1968*5f757f3fSDimitry Andric // Adjust the Return address stack slot. 1969*5f757f3fSDimitry Andric EVT VT = getPointerTy(DAG.getDataLayout()); 1970*5f757f3fSDimitry Andric OutRetAddr = getReturnAddressFrameIndex(DAG); 1971*5f757f3fSDimitry Andric 1972*5f757f3fSDimitry Andric // Load the "old" Return address. 1973*5f757f3fSDimitry Andric OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo()); 1974*5f757f3fSDimitry Andric return SDValue(OutRetAddr.getNode(), 1); 1975*5f757f3fSDimitry Andric } 1976*5f757f3fSDimitry Andric 1977*5f757f3fSDimitry Andric /// Emit a store of the return address if tail call 1978*5f757f3fSDimitry Andric /// optimization is performed and it is required (FPDiff!=0). 1979*5f757f3fSDimitry Andric static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, 1980*5f757f3fSDimitry Andric SDValue Chain, SDValue RetAddrFrIdx, 1981*5f757f3fSDimitry Andric EVT PtrVT, unsigned SlotSize, 1982*5f757f3fSDimitry Andric int FPDiff, const SDLoc &dl) { 1983*5f757f3fSDimitry Andric // Store the return address to the appropriate stack slot. 1984*5f757f3fSDimitry Andric if (!FPDiff) return Chain; 1985*5f757f3fSDimitry Andric // Calculate the new stack slot for the return address. 1986*5f757f3fSDimitry Andric int NewReturnAddrFI = 1987*5f757f3fSDimitry Andric MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, 1988*5f757f3fSDimitry Andric false); 1989*5f757f3fSDimitry Andric SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); 1990*5f757f3fSDimitry Andric Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, 1991*5f757f3fSDimitry Andric MachinePointerInfo::getFixedStack( 1992*5f757f3fSDimitry Andric DAG.getMachineFunction(), NewReturnAddrFI)); 1993*5f757f3fSDimitry Andric return Chain; 1994*5f757f3fSDimitry Andric } 1995*5f757f3fSDimitry Andric 1996*5f757f3fSDimitry Andric /// Returns a vector_shuffle mask for an movs{s|d}, movd 1997*5f757f3fSDimitry Andric /// operation of specified width. 1998*5f757f3fSDimitry Andric SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, 1999*5f757f3fSDimitry Andric SDValue V1, SDValue V2) const { 2000*5f757f3fSDimitry Andric unsigned NumElems = VT.getVectorNumElements(); 2001*5f757f3fSDimitry Andric SmallVector<int, 8> Mask; 2002*5f757f3fSDimitry Andric Mask.push_back(NumElems); 2003*5f757f3fSDimitry Andric for (unsigned i = 1; i != NumElems; ++i) 2004*5f757f3fSDimitry Andric Mask.push_back(i); 2005*5f757f3fSDimitry Andric return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); 2006*5f757f3fSDimitry Andric } 2007*5f757f3fSDimitry Andric 2008*5f757f3fSDimitry Andric SDValue 2009*5f757f3fSDimitry Andric X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 2010*5f757f3fSDimitry Andric SmallVectorImpl<SDValue> &InVals) const { 2011*5f757f3fSDimitry Andric SelectionDAG &DAG = CLI.DAG; 2012*5f757f3fSDimitry Andric SDLoc &dl = CLI.DL; 2013*5f757f3fSDimitry Andric SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 2014*5f757f3fSDimitry Andric SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 2015*5f757f3fSDimitry Andric SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 2016*5f757f3fSDimitry Andric SDValue Chain = CLI.Chain; 2017*5f757f3fSDimitry Andric SDValue Callee = CLI.Callee; 2018*5f757f3fSDimitry Andric CallingConv::ID CallConv = CLI.CallConv; 2019*5f757f3fSDimitry Andric bool &isTailCall = CLI.IsTailCall; 2020*5f757f3fSDimitry Andric bool isVarArg = CLI.IsVarArg; 2021*5f757f3fSDimitry Andric const auto *CB = CLI.CB; 2022*5f757f3fSDimitry Andric 2023*5f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 2024*5f757f3fSDimitry Andric bool Is64Bit = Subtarget.is64Bit(); 2025*5f757f3fSDimitry Andric bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); 2026*5f757f3fSDimitry Andric bool IsSibcall = false; 2027*5f757f3fSDimitry Andric bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || 2028*5f757f3fSDimitry Andric CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; 2029*5f757f3fSDimitry Andric bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget); 2030*5f757f3fSDimitry Andric X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); 2031*5f757f3fSDimitry Andric bool HasNCSR = (CB && isa<CallInst>(CB) && 2032*5f757f3fSDimitry Andric CB->hasFnAttr("no_caller_saved_registers")); 2033*5f757f3fSDimitry Andric bool HasNoCfCheck = (CB && CB->doesNoCfCheck()); 2034*5f757f3fSDimitry Andric bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall()); 2035*5f757f3fSDimitry Andric bool IsCFICall = IsIndirectCall && CLI.CFIType; 2036*5f757f3fSDimitry Andric const Module *M = MF.getMMI().getModule(); 2037*5f757f3fSDimitry Andric Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); 2038*5f757f3fSDimitry Andric 2039*5f757f3fSDimitry Andric MachineFunction::CallSiteInfo CSInfo; 2040*5f757f3fSDimitry Andric if (CallConv == CallingConv::X86_INTR) 2041*5f757f3fSDimitry Andric report_fatal_error("X86 interrupts may not be called directly"); 2042*5f757f3fSDimitry Andric 2043*5f757f3fSDimitry Andric bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall(); 2044*5f757f3fSDimitry Andric if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) { 2045*5f757f3fSDimitry Andric // If we are using a GOT, disable tail calls to external symbols with 2046*5f757f3fSDimitry Andric // default visibility. Tail calling such a symbol requires using a GOT 2047*5f757f3fSDimitry Andric // relocation, which forces early binding of the symbol. This breaks code 2048*5f757f3fSDimitry Andric // that require lazy function symbol resolution. Using musttail or 2049*5f757f3fSDimitry Andric // GuaranteedTailCallOpt will override this. 2050*5f757f3fSDimitry Andric GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 2051*5f757f3fSDimitry Andric if (!G || (!G->getGlobal()->hasLocalLinkage() && 2052*5f757f3fSDimitry Andric G->getGlobal()->hasDefaultVisibility())) 2053*5f757f3fSDimitry Andric isTailCall = false; 2054*5f757f3fSDimitry Andric } 2055*5f757f3fSDimitry Andric 2056*5f757f3fSDimitry Andric if (isTailCall && !IsMustTail) { 2057*5f757f3fSDimitry Andric // Check if it's really possible to do a tail call. 2058*5f757f3fSDimitry Andric isTailCall = IsEligibleForTailCallOptimization( 2059*5f757f3fSDimitry Andric Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals, 2060*5f757f3fSDimitry Andric Ins, DAG); 2061*5f757f3fSDimitry Andric 2062*5f757f3fSDimitry Andric // Sibcalls are automatically detected tailcalls which do not require 2063*5f757f3fSDimitry Andric // ABI changes. 2064*5f757f3fSDimitry Andric if (!IsGuaranteeTCO && isTailCall) 2065*5f757f3fSDimitry Andric IsSibcall = true; 2066*5f757f3fSDimitry Andric 2067*5f757f3fSDimitry Andric if (isTailCall) 2068*5f757f3fSDimitry Andric ++NumTailCalls; 2069*5f757f3fSDimitry Andric } 2070*5f757f3fSDimitry Andric 2071*5f757f3fSDimitry Andric if (IsMustTail && !isTailCall) 2072*5f757f3fSDimitry Andric report_fatal_error("failed to perform tail call elimination on a call " 2073*5f757f3fSDimitry Andric "site marked musttail"); 2074*5f757f3fSDimitry Andric 2075*5f757f3fSDimitry Andric assert(!(isVarArg && canGuaranteeTCO(CallConv)) && 2076*5f757f3fSDimitry Andric "Var args not supported with calling convention fastcc, ghc or hipe"); 2077*5f757f3fSDimitry Andric 2078*5f757f3fSDimitry Andric // Analyze operands of the call, assigning locations to each operand. 2079*5f757f3fSDimitry Andric SmallVector<CCValAssign, 16> ArgLocs; 2080*5f757f3fSDimitry Andric CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 2081*5f757f3fSDimitry Andric 2082*5f757f3fSDimitry Andric // Allocate shadow area for Win64. 2083*5f757f3fSDimitry Andric if (IsWin64) 2084*5f757f3fSDimitry Andric CCInfo.AllocateStack(32, Align(8)); 2085*5f757f3fSDimitry Andric 2086*5f757f3fSDimitry Andric CCInfo.AnalyzeArguments(Outs, CC_X86); 2087*5f757f3fSDimitry Andric 2088*5f757f3fSDimitry Andric // In vectorcall calling convention a second pass is required for the HVA 2089*5f757f3fSDimitry Andric // types. 2090*5f757f3fSDimitry Andric if (CallingConv::X86_VectorCall == CallConv) { 2091*5f757f3fSDimitry Andric CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86); 2092*5f757f3fSDimitry Andric } 2093*5f757f3fSDimitry Andric 2094*5f757f3fSDimitry Andric // Get a count of how many bytes are to be pushed on the stack. 2095*5f757f3fSDimitry Andric unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 2096*5f757f3fSDimitry Andric if (IsSibcall) 2097*5f757f3fSDimitry Andric // This is a sibcall. The memory operands are available in caller's 2098*5f757f3fSDimitry Andric // own caller's stack. 2099*5f757f3fSDimitry Andric NumBytes = 0; 2100*5f757f3fSDimitry Andric else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) 2101*5f757f3fSDimitry Andric NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 2102*5f757f3fSDimitry Andric 2103*5f757f3fSDimitry Andric int FPDiff = 0; 2104*5f757f3fSDimitry Andric if (isTailCall && 2105*5f757f3fSDimitry Andric shouldGuaranteeTCO(CallConv, 2106*5f757f3fSDimitry Andric MF.getTarget().Options.GuaranteedTailCallOpt)) { 2107*5f757f3fSDimitry Andric // Lower arguments at fp - stackoffset + fpdiff. 2108*5f757f3fSDimitry Andric unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); 2109*5f757f3fSDimitry Andric 2110*5f757f3fSDimitry Andric FPDiff = NumBytesCallerPushed - NumBytes; 2111*5f757f3fSDimitry Andric 2112*5f757f3fSDimitry Andric // Set the delta of movement of the returnaddr stackslot. 2113*5f757f3fSDimitry Andric // But only set if delta is greater than previous delta. 2114*5f757f3fSDimitry Andric if (FPDiff < X86Info->getTCReturnAddrDelta()) 2115*5f757f3fSDimitry Andric X86Info->setTCReturnAddrDelta(FPDiff); 2116*5f757f3fSDimitry Andric } 2117*5f757f3fSDimitry Andric 2118*5f757f3fSDimitry Andric unsigned NumBytesToPush = NumBytes; 2119*5f757f3fSDimitry Andric unsigned NumBytesToPop = NumBytes; 2120*5f757f3fSDimitry Andric 2121*5f757f3fSDimitry Andric // If we have an inalloca argument, all stack space has already been allocated 2122*5f757f3fSDimitry Andric // for us and be right at the top of the stack. We don't support multiple 2123*5f757f3fSDimitry Andric // arguments passed in memory when using inalloca. 2124*5f757f3fSDimitry Andric if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { 2125*5f757f3fSDimitry Andric NumBytesToPush = 0; 2126*5f757f3fSDimitry Andric if (!ArgLocs.back().isMemLoc()) 2127*5f757f3fSDimitry Andric report_fatal_error("cannot use inalloca attribute on a register " 2128*5f757f3fSDimitry Andric "parameter"); 2129*5f757f3fSDimitry Andric if (ArgLocs.back().getLocMemOffset() != 0) 2130*5f757f3fSDimitry Andric report_fatal_error("any parameter with the inalloca attribute must be " 2131*5f757f3fSDimitry Andric "the only memory argument"); 2132*5f757f3fSDimitry Andric } else if (CLI.IsPreallocated) { 2133*5f757f3fSDimitry Andric assert(ArgLocs.back().isMemLoc() && 2134*5f757f3fSDimitry Andric "cannot use preallocated attribute on a register " 2135*5f757f3fSDimitry Andric "parameter"); 2136*5f757f3fSDimitry Andric SmallVector<size_t, 4> PreallocatedOffsets; 2137*5f757f3fSDimitry Andric for (size_t i = 0; i < CLI.OutVals.size(); ++i) { 2138*5f757f3fSDimitry Andric if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) { 2139*5f757f3fSDimitry Andric PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset()); 2140*5f757f3fSDimitry Andric } 2141*5f757f3fSDimitry Andric } 2142*5f757f3fSDimitry Andric auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>(); 2143*5f757f3fSDimitry Andric size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB); 2144*5f757f3fSDimitry Andric MFI->setPreallocatedStackSize(PreallocatedId, NumBytes); 2145*5f757f3fSDimitry Andric MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets); 2146*5f757f3fSDimitry Andric NumBytesToPush = 0; 2147*5f757f3fSDimitry Andric } 2148*5f757f3fSDimitry Andric 2149*5f757f3fSDimitry Andric if (!IsSibcall && !IsMustTail) 2150*5f757f3fSDimitry Andric Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, 2151*5f757f3fSDimitry Andric NumBytes - NumBytesToPush, dl); 2152*5f757f3fSDimitry Andric 2153*5f757f3fSDimitry Andric SDValue RetAddrFrIdx; 2154*5f757f3fSDimitry Andric // Load return address for tail calls. 2155*5f757f3fSDimitry Andric if (isTailCall && FPDiff) 2156*5f757f3fSDimitry Andric Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, 2157*5f757f3fSDimitry Andric Is64Bit, FPDiff, dl); 2158*5f757f3fSDimitry Andric 2159*5f757f3fSDimitry Andric SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 2160*5f757f3fSDimitry Andric SmallVector<SDValue, 8> MemOpChains; 2161*5f757f3fSDimitry Andric SDValue StackPtr; 2162*5f757f3fSDimitry Andric 2163*5f757f3fSDimitry Andric // The next loop assumes that the locations are in the same order of the 2164*5f757f3fSDimitry Andric // input arguments. 2165*5f757f3fSDimitry Andric assert(isSortedByValueNo(ArgLocs) && 2166*5f757f3fSDimitry Andric "Argument Location list must be sorted before lowering"); 2167*5f757f3fSDimitry Andric 2168*5f757f3fSDimitry Andric // Walk the register/memloc assignments, inserting copies/loads. In the case 2169*5f757f3fSDimitry Andric // of tail call optimization arguments are handle later. 2170*5f757f3fSDimitry Andric const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2171*5f757f3fSDimitry Andric for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; 2172*5f757f3fSDimitry Andric ++I, ++OutIndex) { 2173*5f757f3fSDimitry Andric assert(OutIndex < Outs.size() && "Invalid Out index"); 2174*5f757f3fSDimitry Andric // Skip inalloca/preallocated arguments, they have already been written. 2175*5f757f3fSDimitry Andric ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; 2176*5f757f3fSDimitry Andric if (Flags.isInAlloca() || Flags.isPreallocated()) 2177*5f757f3fSDimitry Andric continue; 2178*5f757f3fSDimitry Andric 2179*5f757f3fSDimitry Andric CCValAssign &VA = ArgLocs[I]; 2180*5f757f3fSDimitry Andric EVT RegVT = VA.getLocVT(); 2181*5f757f3fSDimitry Andric SDValue Arg = OutVals[OutIndex]; 2182*5f757f3fSDimitry Andric bool isByVal = Flags.isByVal(); 2183*5f757f3fSDimitry Andric 2184*5f757f3fSDimitry Andric // Promote the value if needed. 2185*5f757f3fSDimitry Andric switch (VA.getLocInfo()) { 2186*5f757f3fSDimitry Andric default: llvm_unreachable("Unknown loc info!"); 2187*5f757f3fSDimitry Andric case CCValAssign::Full: break; 2188*5f757f3fSDimitry Andric case CCValAssign::SExt: 2189*5f757f3fSDimitry Andric Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); 2190*5f757f3fSDimitry Andric break; 2191*5f757f3fSDimitry Andric case CCValAssign::ZExt: 2192*5f757f3fSDimitry Andric Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); 2193*5f757f3fSDimitry Andric break; 2194*5f757f3fSDimitry Andric case CCValAssign::AExt: 2195*5f757f3fSDimitry Andric if (Arg.getValueType().isVector() && 2196*5f757f3fSDimitry Andric Arg.getValueType().getVectorElementType() == MVT::i1) 2197*5f757f3fSDimitry Andric Arg = lowerMasksToReg(Arg, RegVT, dl, DAG); 2198*5f757f3fSDimitry Andric else if (RegVT.is128BitVector()) { 2199*5f757f3fSDimitry Andric // Special case: passing MMX values in XMM registers. 2200*5f757f3fSDimitry Andric Arg = DAG.getBitcast(MVT::i64, Arg); 2201*5f757f3fSDimitry Andric Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); 2202*5f757f3fSDimitry Andric Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); 2203*5f757f3fSDimitry Andric } else 2204*5f757f3fSDimitry Andric Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); 2205*5f757f3fSDimitry Andric break; 2206*5f757f3fSDimitry Andric case CCValAssign::BCvt: 2207*5f757f3fSDimitry Andric Arg = DAG.getBitcast(RegVT, Arg); 2208*5f757f3fSDimitry Andric break; 2209*5f757f3fSDimitry Andric case CCValAssign::Indirect: { 2210*5f757f3fSDimitry Andric if (isByVal) { 2211*5f757f3fSDimitry Andric // Memcpy the argument to a temporary stack slot to prevent 2212*5f757f3fSDimitry Andric // the caller from seeing any modifications the callee may make 2213*5f757f3fSDimitry Andric // as guaranteed by the `byval` attribute. 2214*5f757f3fSDimitry Andric int FrameIdx = MF.getFrameInfo().CreateStackObject( 2215*5f757f3fSDimitry Andric Flags.getByValSize(), 2216*5f757f3fSDimitry Andric std::max(Align(16), Flags.getNonZeroByValAlign()), false); 2217*5f757f3fSDimitry Andric SDValue StackSlot = 2218*5f757f3fSDimitry Andric DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout())); 2219*5f757f3fSDimitry Andric Chain = 2220*5f757f3fSDimitry Andric CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl); 2221*5f757f3fSDimitry Andric // From now on treat this as a regular pointer 2222*5f757f3fSDimitry Andric Arg = StackSlot; 2223*5f757f3fSDimitry Andric isByVal = false; 2224*5f757f3fSDimitry Andric } else { 2225*5f757f3fSDimitry Andric // Store the argument. 2226*5f757f3fSDimitry Andric SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); 2227*5f757f3fSDimitry Andric int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 2228*5f757f3fSDimitry Andric Chain = DAG.getStore( 2229*5f757f3fSDimitry Andric Chain, dl, Arg, SpillSlot, 2230*5f757f3fSDimitry Andric MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); 2231*5f757f3fSDimitry Andric Arg = SpillSlot; 2232*5f757f3fSDimitry Andric } 2233*5f757f3fSDimitry Andric break; 2234*5f757f3fSDimitry Andric } 2235*5f757f3fSDimitry Andric } 2236*5f757f3fSDimitry Andric 2237*5f757f3fSDimitry Andric if (VA.needsCustom()) { 2238*5f757f3fSDimitry Andric assert(VA.getValVT() == MVT::v64i1 && 2239*5f757f3fSDimitry Andric "Currently the only custom case is when we split v64i1 to 2 regs"); 2240*5f757f3fSDimitry Andric // Split v64i1 value into two registers 2241*5f757f3fSDimitry Andric Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget); 2242*5f757f3fSDimitry Andric } else if (VA.isRegLoc()) { 2243*5f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 2244*5f757f3fSDimitry Andric const TargetOptions &Options = DAG.getTarget().Options; 2245*5f757f3fSDimitry Andric if (Options.EmitCallSiteInfo) 2246*5f757f3fSDimitry Andric CSInfo.emplace_back(VA.getLocReg(), I); 2247*5f757f3fSDimitry Andric if (isVarArg && IsWin64) { 2248*5f757f3fSDimitry Andric // Win64 ABI requires argument XMM reg to be copied to the corresponding 2249*5f757f3fSDimitry Andric // shadow reg if callee is a varargs function. 2250*5f757f3fSDimitry Andric Register ShadowReg; 2251*5f757f3fSDimitry Andric switch (VA.getLocReg()) { 2252*5f757f3fSDimitry Andric case X86::XMM0: ShadowReg = X86::RCX; break; 2253*5f757f3fSDimitry Andric case X86::XMM1: ShadowReg = X86::RDX; break; 2254*5f757f3fSDimitry Andric case X86::XMM2: ShadowReg = X86::R8; break; 2255*5f757f3fSDimitry Andric case X86::XMM3: ShadowReg = X86::R9; break; 2256*5f757f3fSDimitry Andric } 2257*5f757f3fSDimitry Andric if (ShadowReg) 2258*5f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); 2259*5f757f3fSDimitry Andric } 2260*5f757f3fSDimitry Andric } else if (!IsSibcall && (!isTailCall || isByVal)) { 2261*5f757f3fSDimitry Andric assert(VA.isMemLoc()); 2262*5f757f3fSDimitry Andric if (!StackPtr.getNode()) 2263*5f757f3fSDimitry Andric StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), 2264*5f757f3fSDimitry Andric getPointerTy(DAG.getDataLayout())); 2265*5f757f3fSDimitry Andric MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 2266*5f757f3fSDimitry Andric dl, DAG, VA, Flags, isByVal)); 2267*5f757f3fSDimitry Andric } 2268*5f757f3fSDimitry Andric } 2269*5f757f3fSDimitry Andric 2270*5f757f3fSDimitry Andric if (!MemOpChains.empty()) 2271*5f757f3fSDimitry Andric Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 2272*5f757f3fSDimitry Andric 2273*5f757f3fSDimitry Andric if (Subtarget.isPICStyleGOT()) { 2274*5f757f3fSDimitry Andric // ELF / PIC requires GOT in the EBX register before function calls via PLT 2275*5f757f3fSDimitry Andric // GOT pointer (except regcall). 2276*5f757f3fSDimitry Andric if (!isTailCall) { 2277*5f757f3fSDimitry Andric // Indirect call with RegCall calling convertion may use up all the 2278*5f757f3fSDimitry Andric // general registers, so it is not suitable to bind EBX reister for 2279*5f757f3fSDimitry Andric // GOT address, just let register allocator handle it. 2280*5f757f3fSDimitry Andric if (CallConv != CallingConv::X86_RegCall) 2281*5f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair( 2282*5f757f3fSDimitry Andric Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), 2283*5f757f3fSDimitry Andric getPointerTy(DAG.getDataLayout())))); 2284*5f757f3fSDimitry Andric } else { 2285*5f757f3fSDimitry Andric // If we are tail calling and generating PIC/GOT style code load the 2286*5f757f3fSDimitry Andric // address of the callee into ECX. The value in ecx is used as target of 2287*5f757f3fSDimitry Andric // the tail jump. This is done to circumvent the ebx/callee-saved problem 2288*5f757f3fSDimitry Andric // for tail calls on PIC/GOT architectures. Normally we would just put the 2289*5f757f3fSDimitry Andric // address of GOT into ebx and then call target@PLT. But for tail calls 2290*5f757f3fSDimitry Andric // ebx would be restored (since ebx is callee saved) before jumping to the 2291*5f757f3fSDimitry Andric // target@PLT. 2292*5f757f3fSDimitry Andric 2293*5f757f3fSDimitry Andric // Note: The actual moving to ECX is done further down. 2294*5f757f3fSDimitry Andric GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 2295*5f757f3fSDimitry Andric if (G && !G->getGlobal()->hasLocalLinkage() && 2296*5f757f3fSDimitry Andric G->getGlobal()->hasDefaultVisibility()) 2297*5f757f3fSDimitry Andric Callee = LowerGlobalAddress(Callee, DAG); 2298*5f757f3fSDimitry Andric else if (isa<ExternalSymbolSDNode>(Callee)) 2299*5f757f3fSDimitry Andric Callee = LowerExternalSymbol(Callee, DAG); 2300*5f757f3fSDimitry Andric } 2301*5f757f3fSDimitry Andric } 2302*5f757f3fSDimitry Andric 2303*5f757f3fSDimitry Andric if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail && 2304*5f757f3fSDimitry Andric (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) { 2305*5f757f3fSDimitry Andric // From AMD64 ABI document: 2306*5f757f3fSDimitry Andric // For calls that may call functions that use varargs or stdargs 2307*5f757f3fSDimitry Andric // (prototype-less calls or calls to functions containing ellipsis (...) in 2308*5f757f3fSDimitry Andric // the declaration) %al is used as hidden argument to specify the number 2309*5f757f3fSDimitry Andric // of SSE registers used. The contents of %al do not need to match exactly 2310*5f757f3fSDimitry Andric // the number of registers, but must be an ubound on the number of SSE 2311*5f757f3fSDimitry Andric // registers used and is in the range 0 - 8 inclusive. 2312*5f757f3fSDimitry Andric 2313*5f757f3fSDimitry Andric // Count the number of XMM registers allocated. 2314*5f757f3fSDimitry Andric static const MCPhysReg XMMArgRegs[] = { 2315*5f757f3fSDimitry Andric X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 2316*5f757f3fSDimitry Andric X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 2317*5f757f3fSDimitry Andric }; 2318*5f757f3fSDimitry Andric unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); 2319*5f757f3fSDimitry Andric assert((Subtarget.hasSSE1() || !NumXMMRegs) 2320*5f757f3fSDimitry Andric && "SSE registers cannot be used when SSE is disabled"); 2321*5f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(Register(X86::AL), 2322*5f757f3fSDimitry Andric DAG.getConstant(NumXMMRegs, dl, 2323*5f757f3fSDimitry Andric MVT::i8))); 2324*5f757f3fSDimitry Andric } 2325*5f757f3fSDimitry Andric 2326*5f757f3fSDimitry Andric if (isVarArg && IsMustTail) { 2327*5f757f3fSDimitry Andric const auto &Forwards = X86Info->getForwardedMustTailRegParms(); 2328*5f757f3fSDimitry Andric for (const auto &F : Forwards) { 2329*5f757f3fSDimitry Andric SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); 2330*5f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(F.PReg, Val)); 2331*5f757f3fSDimitry Andric } 2332*5f757f3fSDimitry Andric } 2333*5f757f3fSDimitry Andric 2334*5f757f3fSDimitry Andric // For tail calls lower the arguments to the 'real' stack slots. Sibcalls 2335*5f757f3fSDimitry Andric // don't need this because the eligibility check rejects calls that require 2336*5f757f3fSDimitry Andric // shuffling arguments passed in memory. 2337*5f757f3fSDimitry Andric if (!IsSibcall && isTailCall) { 2338*5f757f3fSDimitry Andric // Force all the incoming stack arguments to be loaded from the stack 2339*5f757f3fSDimitry Andric // before any new outgoing arguments are stored to the stack, because the 2340*5f757f3fSDimitry Andric // outgoing stack slots may alias the incoming argument stack slots, and 2341*5f757f3fSDimitry Andric // the alias isn't otherwise explicit. This is slightly more conservative 2342*5f757f3fSDimitry Andric // than necessary, because it means that each store effectively depends 2343*5f757f3fSDimitry Andric // on every argument instead of just those arguments it would clobber. 2344*5f757f3fSDimitry Andric SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); 2345*5f757f3fSDimitry Andric 2346*5f757f3fSDimitry Andric SmallVector<SDValue, 8> MemOpChains2; 2347*5f757f3fSDimitry Andric SDValue FIN; 2348*5f757f3fSDimitry Andric int FI = 0; 2349*5f757f3fSDimitry Andric for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E; 2350*5f757f3fSDimitry Andric ++I, ++OutsIndex) { 2351*5f757f3fSDimitry Andric CCValAssign &VA = ArgLocs[I]; 2352*5f757f3fSDimitry Andric 2353*5f757f3fSDimitry Andric if (VA.isRegLoc()) { 2354*5f757f3fSDimitry Andric if (VA.needsCustom()) { 2355*5f757f3fSDimitry Andric assert((CallConv == CallingConv::X86_RegCall) && 2356*5f757f3fSDimitry Andric "Expecting custom case only in regcall calling convention"); 2357*5f757f3fSDimitry Andric // This means that we are in special case where one argument was 2358*5f757f3fSDimitry Andric // passed through two register locations - Skip the next location 2359*5f757f3fSDimitry Andric ++I; 2360*5f757f3fSDimitry Andric } 2361*5f757f3fSDimitry Andric 2362*5f757f3fSDimitry Andric continue; 2363*5f757f3fSDimitry Andric } 2364*5f757f3fSDimitry Andric 2365*5f757f3fSDimitry Andric assert(VA.isMemLoc()); 2366*5f757f3fSDimitry Andric SDValue Arg = OutVals[OutsIndex]; 2367*5f757f3fSDimitry Andric ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; 2368*5f757f3fSDimitry Andric // Skip inalloca/preallocated arguments. They don't require any work. 2369*5f757f3fSDimitry Andric if (Flags.isInAlloca() || Flags.isPreallocated()) 2370*5f757f3fSDimitry Andric continue; 2371*5f757f3fSDimitry Andric // Create frame index. 2372*5f757f3fSDimitry Andric int32_t Offset = VA.getLocMemOffset()+FPDiff; 2373*5f757f3fSDimitry Andric uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; 2374*5f757f3fSDimitry Andric FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); 2375*5f757f3fSDimitry Andric FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2376*5f757f3fSDimitry Andric 2377*5f757f3fSDimitry Andric if (Flags.isByVal()) { 2378*5f757f3fSDimitry Andric // Copy relative to framepointer. 2379*5f757f3fSDimitry Andric SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); 2380*5f757f3fSDimitry Andric if (!StackPtr.getNode()) 2381*5f757f3fSDimitry Andric StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), 2382*5f757f3fSDimitry Andric getPointerTy(DAG.getDataLayout())); 2383*5f757f3fSDimitry Andric Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), 2384*5f757f3fSDimitry Andric StackPtr, Source); 2385*5f757f3fSDimitry Andric 2386*5f757f3fSDimitry Andric MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, 2387*5f757f3fSDimitry Andric ArgChain, 2388*5f757f3fSDimitry Andric Flags, DAG, dl)); 2389*5f757f3fSDimitry Andric } else { 2390*5f757f3fSDimitry Andric // Store relative to framepointer. 2391*5f757f3fSDimitry Andric MemOpChains2.push_back(DAG.getStore( 2392*5f757f3fSDimitry Andric ArgChain, dl, Arg, FIN, 2393*5f757f3fSDimitry Andric MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); 2394*5f757f3fSDimitry Andric } 2395*5f757f3fSDimitry Andric } 2396*5f757f3fSDimitry Andric 2397*5f757f3fSDimitry Andric if (!MemOpChains2.empty()) 2398*5f757f3fSDimitry Andric Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 2399*5f757f3fSDimitry Andric 2400*5f757f3fSDimitry Andric // Store the return address to the appropriate stack slot. 2401*5f757f3fSDimitry Andric Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, 2402*5f757f3fSDimitry Andric getPointerTy(DAG.getDataLayout()), 2403*5f757f3fSDimitry Andric RegInfo->getSlotSize(), FPDiff, dl); 2404*5f757f3fSDimitry Andric } 2405*5f757f3fSDimitry Andric 2406*5f757f3fSDimitry Andric // Build a sequence of copy-to-reg nodes chained together with token chain 2407*5f757f3fSDimitry Andric // and glue operands which copy the outgoing args into registers. 2408*5f757f3fSDimitry Andric SDValue InGlue; 2409*5f757f3fSDimitry Andric for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 2410*5f757f3fSDimitry Andric Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 2411*5f757f3fSDimitry Andric RegsToPass[i].second, InGlue); 2412*5f757f3fSDimitry Andric InGlue = Chain.getValue(1); 2413*5f757f3fSDimitry Andric } 2414*5f757f3fSDimitry Andric 2415*5f757f3fSDimitry Andric if (DAG.getTarget().getCodeModel() == CodeModel::Large) { 2416*5f757f3fSDimitry Andric assert(Is64Bit && "Large code model is only legal in 64-bit mode."); 2417*5f757f3fSDimitry Andric // In the 64-bit large code model, we have to make all calls 2418*5f757f3fSDimitry Andric // through a register, since the call instruction's 32-bit 2419*5f757f3fSDimitry Andric // pc-relative offset may not be large enough to hold the whole 2420*5f757f3fSDimitry Andric // address. 2421*5f757f3fSDimitry Andric } else if (Callee->getOpcode() == ISD::GlobalAddress || 2422*5f757f3fSDimitry Andric Callee->getOpcode() == ISD::ExternalSymbol) { 2423*5f757f3fSDimitry Andric // Lower direct calls to global addresses and external symbols. Setting 2424*5f757f3fSDimitry Andric // ForCall to true here has the effect of removing WrapperRIP when possible 2425*5f757f3fSDimitry Andric // to allow direct calls to be selected without first materializing the 2426*5f757f3fSDimitry Andric // address into a register. 2427*5f757f3fSDimitry Andric Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true); 2428*5f757f3fSDimitry Andric } else if (Subtarget.isTarget64BitILP32() && 2429*5f757f3fSDimitry Andric Callee.getValueType() == MVT::i32) { 2430*5f757f3fSDimitry Andric // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI 2431*5f757f3fSDimitry Andric Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); 2432*5f757f3fSDimitry Andric } 2433*5f757f3fSDimitry Andric 2434*5f757f3fSDimitry Andric // Returns a chain & a glue for retval copy to use. 2435*5f757f3fSDimitry Andric SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 2436*5f757f3fSDimitry Andric SmallVector<SDValue, 8> Ops; 2437*5f757f3fSDimitry Andric 2438*5f757f3fSDimitry Andric if (!IsSibcall && isTailCall && !IsMustTail) { 2439*5f757f3fSDimitry Andric Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl); 2440*5f757f3fSDimitry Andric InGlue = Chain.getValue(1); 2441*5f757f3fSDimitry Andric } 2442*5f757f3fSDimitry Andric 2443*5f757f3fSDimitry Andric Ops.push_back(Chain); 2444*5f757f3fSDimitry Andric Ops.push_back(Callee); 2445*5f757f3fSDimitry Andric 2446*5f757f3fSDimitry Andric if (isTailCall) 2447*5f757f3fSDimitry Andric Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32)); 2448*5f757f3fSDimitry Andric 2449*5f757f3fSDimitry Andric // Add argument registers to the end of the list so that they are known live 2450*5f757f3fSDimitry Andric // into the call. 2451*5f757f3fSDimitry Andric for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 2452*5f757f3fSDimitry Andric Ops.push_back(DAG.getRegister(RegsToPass[i].first, 2453*5f757f3fSDimitry Andric RegsToPass[i].second.getValueType())); 2454*5f757f3fSDimitry Andric 2455*5f757f3fSDimitry Andric // Add a register mask operand representing the call-preserved registers. 2456*5f757f3fSDimitry Andric const uint32_t *Mask = [&]() { 2457*5f757f3fSDimitry Andric auto AdaptedCC = CallConv; 2458*5f757f3fSDimitry Andric // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists), 2459*5f757f3fSDimitry Andric // use X86_INTR calling convention because it has the same CSR mask 2460*5f757f3fSDimitry Andric // (same preserved registers). 2461*5f757f3fSDimitry Andric if (HasNCSR) 2462*5f757f3fSDimitry Andric AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR; 2463*5f757f3fSDimitry Andric // If NoCalleeSavedRegisters is requested, than use GHC since it happens 2464*5f757f3fSDimitry Andric // to use the CSR_NoRegs_RegMask. 2465*5f757f3fSDimitry Andric if (CB && CB->hasFnAttr("no_callee_saved_registers")) 2466*5f757f3fSDimitry Andric AdaptedCC = (CallingConv::ID)CallingConv::GHC; 2467*5f757f3fSDimitry Andric return RegInfo->getCallPreservedMask(MF, AdaptedCC); 2468*5f757f3fSDimitry Andric }(); 2469*5f757f3fSDimitry Andric assert(Mask && "Missing call preserved mask for calling convention"); 2470*5f757f3fSDimitry Andric 2471*5f757f3fSDimitry Andric // If this is an invoke in a 32-bit function using a funclet-based 2472*5f757f3fSDimitry Andric // personality, assume the function clobbers all registers. If an exception 2473*5f757f3fSDimitry Andric // is thrown, the runtime will not restore CSRs. 2474*5f757f3fSDimitry Andric // FIXME: Model this more precisely so that we can register allocate across 2475*5f757f3fSDimitry Andric // the normal edge and spill and fill across the exceptional edge. 2476*5f757f3fSDimitry Andric if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) { 2477*5f757f3fSDimitry Andric const Function &CallerFn = MF.getFunction(); 2478*5f757f3fSDimitry Andric EHPersonality Pers = 2479*5f757f3fSDimitry Andric CallerFn.hasPersonalityFn() 2480*5f757f3fSDimitry Andric ? classifyEHPersonality(CallerFn.getPersonalityFn()) 2481*5f757f3fSDimitry Andric : EHPersonality::Unknown; 2482*5f757f3fSDimitry Andric if (isFuncletEHPersonality(Pers)) 2483*5f757f3fSDimitry Andric Mask = RegInfo->getNoPreservedMask(); 2484*5f757f3fSDimitry Andric } 2485*5f757f3fSDimitry Andric 2486*5f757f3fSDimitry Andric // Define a new register mask from the existing mask. 2487*5f757f3fSDimitry Andric uint32_t *RegMask = nullptr; 2488*5f757f3fSDimitry Andric 2489*5f757f3fSDimitry Andric // In some calling conventions we need to remove the used physical registers 2490*5f757f3fSDimitry Andric // from the reg mask. Create a new RegMask for such calling conventions. 2491*5f757f3fSDimitry Andric // RegMask for calling conventions that disable only return registers (e.g. 2492*5f757f3fSDimitry Andric // preserve_most) will be modified later in LowerCallResult. 2493*5f757f3fSDimitry Andric bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR; 2494*5f757f3fSDimitry Andric if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) { 2495*5f757f3fSDimitry Andric const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2496*5f757f3fSDimitry Andric 2497*5f757f3fSDimitry Andric // Allocate a new Reg Mask and copy Mask. 2498*5f757f3fSDimitry Andric RegMask = MF.allocateRegMask(); 2499*5f757f3fSDimitry Andric unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); 2500*5f757f3fSDimitry Andric memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize); 2501*5f757f3fSDimitry Andric 2502*5f757f3fSDimitry Andric // Make sure all sub registers of the argument registers are reset 2503*5f757f3fSDimitry Andric // in the RegMask. 2504*5f757f3fSDimitry Andric if (ShouldDisableArgRegs) { 2505*5f757f3fSDimitry Andric for (auto const &RegPair : RegsToPass) 2506*5f757f3fSDimitry Andric for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first)) 2507*5f757f3fSDimitry Andric RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); 2508*5f757f3fSDimitry Andric } 2509*5f757f3fSDimitry Andric 2510*5f757f3fSDimitry Andric // Create the RegMask Operand according to our updated mask. 2511*5f757f3fSDimitry Andric Ops.push_back(DAG.getRegisterMask(RegMask)); 2512*5f757f3fSDimitry Andric } else { 2513*5f757f3fSDimitry Andric // Create the RegMask Operand according to the static mask. 2514*5f757f3fSDimitry Andric Ops.push_back(DAG.getRegisterMask(Mask)); 2515*5f757f3fSDimitry Andric } 2516*5f757f3fSDimitry Andric 2517*5f757f3fSDimitry Andric if (InGlue.getNode()) 2518*5f757f3fSDimitry Andric Ops.push_back(InGlue); 2519*5f757f3fSDimitry Andric 2520*5f757f3fSDimitry Andric if (isTailCall) { 2521*5f757f3fSDimitry Andric // We used to do: 2522*5f757f3fSDimitry Andric //// If this is the first return lowered for this function, add the regs 2523*5f757f3fSDimitry Andric //// to the liveout set for the function. 2524*5f757f3fSDimitry Andric // This isn't right, although it's probably harmless on x86; liveouts 2525*5f757f3fSDimitry Andric // should be computed from returns not tail calls. Consider a void 2526*5f757f3fSDimitry Andric // function making a tail call to a function returning int. 2527*5f757f3fSDimitry Andric MF.getFrameInfo().setHasTailCall(); 2528*5f757f3fSDimitry Andric SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); 2529*5f757f3fSDimitry Andric 2530*5f757f3fSDimitry Andric if (IsCFICall) 2531*5f757f3fSDimitry Andric Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 2532*5f757f3fSDimitry Andric 2533*5f757f3fSDimitry Andric DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 2534*5f757f3fSDimitry Andric DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); 2535*5f757f3fSDimitry Andric return Ret; 2536*5f757f3fSDimitry Andric } 2537*5f757f3fSDimitry Andric 2538*5f757f3fSDimitry Andric if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) { 2539*5f757f3fSDimitry Andric Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops); 2540*5f757f3fSDimitry Andric } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { 2541*5f757f3fSDimitry Andric // Calls with a "clang.arc.attachedcall" bundle are special. They should be 2542*5f757f3fSDimitry Andric // expanded to the call, directly followed by a special marker sequence and 2543*5f757f3fSDimitry Andric // a call to a ObjC library function. Use the CALL_RVMARKER to do that. 2544*5f757f3fSDimitry Andric assert(!isTailCall && 2545*5f757f3fSDimitry Andric "tail calls cannot be marked with clang.arc.attachedcall"); 2546*5f757f3fSDimitry Andric assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"); 2547*5f757f3fSDimitry Andric 2548*5f757f3fSDimitry Andric // Add a target global address for the retainRV/claimRV runtime function 2549*5f757f3fSDimitry Andric // just before the call target. 2550*5f757f3fSDimitry Andric Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB); 2551*5f757f3fSDimitry Andric auto PtrVT = getPointerTy(DAG.getDataLayout()); 2552*5f757f3fSDimitry Andric auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT); 2553*5f757f3fSDimitry Andric Ops.insert(Ops.begin() + 1, GA); 2554*5f757f3fSDimitry Andric Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops); 2555*5f757f3fSDimitry Andric } else { 2556*5f757f3fSDimitry Andric Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); 2557*5f757f3fSDimitry Andric } 2558*5f757f3fSDimitry Andric 2559*5f757f3fSDimitry Andric if (IsCFICall) 2560*5f757f3fSDimitry Andric Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 2561*5f757f3fSDimitry Andric 2562*5f757f3fSDimitry Andric InGlue = Chain.getValue(1); 2563*5f757f3fSDimitry Andric DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 2564*5f757f3fSDimitry Andric DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); 2565*5f757f3fSDimitry Andric 2566*5f757f3fSDimitry Andric // Save heapallocsite metadata. 2567*5f757f3fSDimitry Andric if (CLI.CB) 2568*5f757f3fSDimitry Andric if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite")) 2569*5f757f3fSDimitry Andric DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); 2570*5f757f3fSDimitry Andric 2571*5f757f3fSDimitry Andric // Create the CALLSEQ_END node. 2572*5f757f3fSDimitry Andric unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing. 2573*5f757f3fSDimitry Andric if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, 2574*5f757f3fSDimitry Andric DAG.getTarget().Options.GuaranteedTailCallOpt)) 2575*5f757f3fSDimitry Andric NumBytesForCalleeToPop = NumBytes; // Callee pops everything 2576*5f757f3fSDimitry Andric else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet) 2577*5f757f3fSDimitry Andric // If this call passes a struct-return pointer, the callee 2578*5f757f3fSDimitry Andric // pops that struct pointer. 2579*5f757f3fSDimitry Andric NumBytesForCalleeToPop = 4; 2580*5f757f3fSDimitry Andric 2581*5f757f3fSDimitry Andric // Returns a glue for retval copy to use. 2582*5f757f3fSDimitry Andric if (!IsSibcall) { 2583*5f757f3fSDimitry Andric Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop, 2584*5f757f3fSDimitry Andric InGlue, dl); 2585*5f757f3fSDimitry Andric InGlue = Chain.getValue(1); 2586*5f757f3fSDimitry Andric } 2587*5f757f3fSDimitry Andric 2588*5f757f3fSDimitry Andric // Handle result values, copying them out of physregs into vregs that we 2589*5f757f3fSDimitry Andric // return. 2590*5f757f3fSDimitry Andric return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG, 2591*5f757f3fSDimitry Andric InVals, RegMask); 2592*5f757f3fSDimitry Andric } 2593*5f757f3fSDimitry Andric 2594*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 2595*5f757f3fSDimitry Andric // Fast Calling Convention (tail call) implementation 2596*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 2597*5f757f3fSDimitry Andric 2598*5f757f3fSDimitry Andric // Like std call, callee cleans arguments, convention except that ECX is 2599*5f757f3fSDimitry Andric // reserved for storing the tail called function address. Only 2 registers are 2600*5f757f3fSDimitry Andric // free for argument passing (inreg). Tail call optimization is performed 2601*5f757f3fSDimitry Andric // provided: 2602*5f757f3fSDimitry Andric // * tailcallopt is enabled 2603*5f757f3fSDimitry Andric // * caller/callee are fastcc 2604*5f757f3fSDimitry Andric // On X86_64 architecture with GOT-style position independent code only local 2605*5f757f3fSDimitry Andric // (within module) calls are supported at the moment. 2606*5f757f3fSDimitry Andric // To keep the stack aligned according to platform abi the function 2607*5f757f3fSDimitry Andric // GetAlignedArgumentStackSize ensures that argument delta is always multiples 2608*5f757f3fSDimitry Andric // of stack alignment. (Dynamic linkers need this - Darwin's dyld for example) 2609*5f757f3fSDimitry Andric // If a tail called function callee has more arguments than the caller the 2610*5f757f3fSDimitry Andric // caller needs to make sure that there is room to move the RETADDR to. This is 2611*5f757f3fSDimitry Andric // achieved by reserving an area the size of the argument delta right after the 2612*5f757f3fSDimitry Andric // original RETADDR, but before the saved framepointer or the spilled registers 2613*5f757f3fSDimitry Andric // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 2614*5f757f3fSDimitry Andric // stack layout: 2615*5f757f3fSDimitry Andric // arg1 2616*5f757f3fSDimitry Andric // arg2 2617*5f757f3fSDimitry Andric // RETADDR 2618*5f757f3fSDimitry Andric // [ new RETADDR 2619*5f757f3fSDimitry Andric // move area ] 2620*5f757f3fSDimitry Andric // (possible EBP) 2621*5f757f3fSDimitry Andric // ESI 2622*5f757f3fSDimitry Andric // EDI 2623*5f757f3fSDimitry Andric // local1 .. 2624*5f757f3fSDimitry Andric 2625*5f757f3fSDimitry Andric /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align 2626*5f757f3fSDimitry Andric /// requirement. 2627*5f757f3fSDimitry Andric unsigned 2628*5f757f3fSDimitry Andric X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize, 2629*5f757f3fSDimitry Andric SelectionDAG &DAG) const { 2630*5f757f3fSDimitry Andric const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign(); 2631*5f757f3fSDimitry Andric const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize(); 2632*5f757f3fSDimitry Andric assert(StackSize % SlotSize == 0 && 2633*5f757f3fSDimitry Andric "StackSize must be a multiple of SlotSize"); 2634*5f757f3fSDimitry Andric return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize; 2635*5f757f3fSDimitry Andric } 2636*5f757f3fSDimitry Andric 2637*5f757f3fSDimitry Andric /// Return true if the given stack call argument is already available in the 2638*5f757f3fSDimitry Andric /// same position (relatively) of the caller's incoming argument stack. 2639*5f757f3fSDimitry Andric static 2640*5f757f3fSDimitry Andric bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 2641*5f757f3fSDimitry Andric MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, 2642*5f757f3fSDimitry Andric const X86InstrInfo *TII, const CCValAssign &VA) { 2643*5f757f3fSDimitry Andric unsigned Bytes = Arg.getValueSizeInBits() / 8; 2644*5f757f3fSDimitry Andric 2645*5f757f3fSDimitry Andric for (;;) { 2646*5f757f3fSDimitry Andric // Look through nodes that don't alter the bits of the incoming value. 2647*5f757f3fSDimitry Andric unsigned Op = Arg.getOpcode(); 2648*5f757f3fSDimitry Andric if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST || 2649*5f757f3fSDimitry Andric Op == ISD::AssertZext) { 2650*5f757f3fSDimitry Andric Arg = Arg.getOperand(0); 2651*5f757f3fSDimitry Andric continue; 2652*5f757f3fSDimitry Andric } 2653*5f757f3fSDimitry Andric if (Op == ISD::TRUNCATE) { 2654*5f757f3fSDimitry Andric const SDValue &TruncInput = Arg.getOperand(0); 2655*5f757f3fSDimitry Andric if (TruncInput.getOpcode() == ISD::AssertZext && 2656*5f757f3fSDimitry Andric cast<VTSDNode>(TruncInput.getOperand(1))->getVT() == 2657*5f757f3fSDimitry Andric Arg.getValueType()) { 2658*5f757f3fSDimitry Andric Arg = TruncInput.getOperand(0); 2659*5f757f3fSDimitry Andric continue; 2660*5f757f3fSDimitry Andric } 2661*5f757f3fSDimitry Andric } 2662*5f757f3fSDimitry Andric break; 2663*5f757f3fSDimitry Andric } 2664*5f757f3fSDimitry Andric 2665*5f757f3fSDimitry Andric int FI = INT_MAX; 2666*5f757f3fSDimitry Andric if (Arg.getOpcode() == ISD::CopyFromReg) { 2667*5f757f3fSDimitry Andric Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 2668*5f757f3fSDimitry Andric if (!VR.isVirtual()) 2669*5f757f3fSDimitry Andric return false; 2670*5f757f3fSDimitry Andric MachineInstr *Def = MRI->getVRegDef(VR); 2671*5f757f3fSDimitry Andric if (!Def) 2672*5f757f3fSDimitry Andric return false; 2673*5f757f3fSDimitry Andric if (!Flags.isByVal()) { 2674*5f757f3fSDimitry Andric if (!TII->isLoadFromStackSlot(*Def, FI)) 2675*5f757f3fSDimitry Andric return false; 2676*5f757f3fSDimitry Andric } else { 2677*5f757f3fSDimitry Andric unsigned Opcode = Def->getOpcode(); 2678*5f757f3fSDimitry Andric if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || 2679*5f757f3fSDimitry Andric Opcode == X86::LEA64_32r) && 2680*5f757f3fSDimitry Andric Def->getOperand(1).isFI()) { 2681*5f757f3fSDimitry Andric FI = Def->getOperand(1).getIndex(); 2682*5f757f3fSDimitry Andric Bytes = Flags.getByValSize(); 2683*5f757f3fSDimitry Andric } else 2684*5f757f3fSDimitry Andric return false; 2685*5f757f3fSDimitry Andric } 2686*5f757f3fSDimitry Andric } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 2687*5f757f3fSDimitry Andric if (Flags.isByVal()) 2688*5f757f3fSDimitry Andric // ByVal argument is passed in as a pointer but it's now being 2689*5f757f3fSDimitry Andric // dereferenced. e.g. 2690*5f757f3fSDimitry Andric // define @foo(%struct.X* %A) { 2691*5f757f3fSDimitry Andric // tail call @bar(%struct.X* byval %A) 2692*5f757f3fSDimitry Andric // } 2693*5f757f3fSDimitry Andric return false; 2694*5f757f3fSDimitry Andric SDValue Ptr = Ld->getBasePtr(); 2695*5f757f3fSDimitry Andric FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 2696*5f757f3fSDimitry Andric if (!FINode) 2697*5f757f3fSDimitry Andric return false; 2698*5f757f3fSDimitry Andric FI = FINode->getIndex(); 2699*5f757f3fSDimitry Andric } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { 2700*5f757f3fSDimitry Andric FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); 2701*5f757f3fSDimitry Andric FI = FINode->getIndex(); 2702*5f757f3fSDimitry Andric Bytes = Flags.getByValSize(); 2703*5f757f3fSDimitry Andric } else 2704*5f757f3fSDimitry Andric return false; 2705*5f757f3fSDimitry Andric 2706*5f757f3fSDimitry Andric assert(FI != INT_MAX); 2707*5f757f3fSDimitry Andric if (!MFI.isFixedObjectIndex(FI)) 2708*5f757f3fSDimitry Andric return false; 2709*5f757f3fSDimitry Andric 2710*5f757f3fSDimitry Andric if (Offset != MFI.getObjectOffset(FI)) 2711*5f757f3fSDimitry Andric return false; 2712*5f757f3fSDimitry Andric 2713*5f757f3fSDimitry Andric // If this is not byval, check that the argument stack object is immutable. 2714*5f757f3fSDimitry Andric // inalloca and argument copy elision can create mutable argument stack 2715*5f757f3fSDimitry Andric // objects. Byval objects can be mutated, but a byval call intends to pass the 2716*5f757f3fSDimitry Andric // mutated memory. 2717*5f757f3fSDimitry Andric if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI)) 2718*5f757f3fSDimitry Andric return false; 2719*5f757f3fSDimitry Andric 2720*5f757f3fSDimitry Andric if (VA.getLocVT().getFixedSizeInBits() > 2721*5f757f3fSDimitry Andric Arg.getValueSizeInBits().getFixedValue()) { 2722*5f757f3fSDimitry Andric // If the argument location is wider than the argument type, check that any 2723*5f757f3fSDimitry Andric // extension flags match. 2724*5f757f3fSDimitry Andric if (Flags.isZExt() != MFI.isObjectZExt(FI) || 2725*5f757f3fSDimitry Andric Flags.isSExt() != MFI.isObjectSExt(FI)) { 2726*5f757f3fSDimitry Andric return false; 2727*5f757f3fSDimitry Andric } 2728*5f757f3fSDimitry Andric } 2729*5f757f3fSDimitry Andric 2730*5f757f3fSDimitry Andric return Bytes == MFI.getObjectSize(FI); 2731*5f757f3fSDimitry Andric } 2732*5f757f3fSDimitry Andric 2733*5f757f3fSDimitry Andric /// Check whether the call is eligible for tail call optimization. Targets 2734*5f757f3fSDimitry Andric /// that want to do tail call optimization should implement this function. 2735*5f757f3fSDimitry Andric bool X86TargetLowering::IsEligibleForTailCallOptimization( 2736*5f757f3fSDimitry Andric SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet, 2737*5f757f3fSDimitry Andric bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, 2738*5f757f3fSDimitry Andric const SmallVectorImpl<SDValue> &OutVals, 2739*5f757f3fSDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const { 2740*5f757f3fSDimitry Andric if (!mayTailCallThisCC(CalleeCC)) 2741*5f757f3fSDimitry Andric return false; 2742*5f757f3fSDimitry Andric 2743*5f757f3fSDimitry Andric // If -tailcallopt is specified, make fastcc functions tail-callable. 2744*5f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 2745*5f757f3fSDimitry Andric const Function &CallerF = MF.getFunction(); 2746*5f757f3fSDimitry Andric 2747*5f757f3fSDimitry Andric // If the function return type is x86_fp80 and the callee return type is not, 2748*5f757f3fSDimitry Andric // then the FP_EXTEND of the call result is not a nop. It's not safe to 2749*5f757f3fSDimitry Andric // perform a tailcall optimization here. 2750*5f757f3fSDimitry Andric if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) 2751*5f757f3fSDimitry Andric return false; 2752*5f757f3fSDimitry Andric 2753*5f757f3fSDimitry Andric CallingConv::ID CallerCC = CallerF.getCallingConv(); 2754*5f757f3fSDimitry Andric bool CCMatch = CallerCC == CalleeCC; 2755*5f757f3fSDimitry Andric bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); 2756*5f757f3fSDimitry Andric bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); 2757*5f757f3fSDimitry Andric bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || 2758*5f757f3fSDimitry Andric CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail; 2759*5f757f3fSDimitry Andric 2760*5f757f3fSDimitry Andric // Win64 functions have extra shadow space for argument homing. Don't do the 2761*5f757f3fSDimitry Andric // sibcall if the caller and callee have mismatched expectations for this 2762*5f757f3fSDimitry Andric // space. 2763*5f757f3fSDimitry Andric if (IsCalleeWin64 != IsCallerWin64) 2764*5f757f3fSDimitry Andric return false; 2765*5f757f3fSDimitry Andric 2766*5f757f3fSDimitry Andric if (IsGuaranteeTCO) { 2767*5f757f3fSDimitry Andric if (canGuaranteeTCO(CalleeCC) && CCMatch) 2768*5f757f3fSDimitry Andric return true; 2769*5f757f3fSDimitry Andric return false; 2770*5f757f3fSDimitry Andric } 2771*5f757f3fSDimitry Andric 2772*5f757f3fSDimitry Andric // Look for obvious safe cases to perform tail call optimization that do not 2773*5f757f3fSDimitry Andric // require ABI changes. This is what gcc calls sibcall. 2774*5f757f3fSDimitry Andric 2775*5f757f3fSDimitry Andric // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to 2776*5f757f3fSDimitry Andric // emit a special epilogue. 2777*5f757f3fSDimitry Andric const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2778*5f757f3fSDimitry Andric if (RegInfo->hasStackRealignment(MF)) 2779*5f757f3fSDimitry Andric return false; 2780*5f757f3fSDimitry Andric 2781*5f757f3fSDimitry Andric // Also avoid sibcall optimization if we're an sret return fn and the callee 2782*5f757f3fSDimitry Andric // is incompatible. See comment in LowerReturn about why hasStructRetAttr is 2783*5f757f3fSDimitry Andric // insufficient. 2784*5f757f3fSDimitry Andric if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) { 2785*5f757f3fSDimitry Andric // For a compatible tail call the callee must return our sret pointer. So it 2786*5f757f3fSDimitry Andric // needs to be (a) an sret function itself and (b) we pass our sret as its 2787*5f757f3fSDimitry Andric // sret. Condition #b is harder to determine. 2788*5f757f3fSDimitry Andric return false; 2789*5f757f3fSDimitry Andric } else if (IsCalleePopSRet) 2790*5f757f3fSDimitry Andric // The callee pops an sret, so we cannot tail-call, as our caller doesn't 2791*5f757f3fSDimitry Andric // expect that. 2792*5f757f3fSDimitry Andric return false; 2793*5f757f3fSDimitry Andric 2794*5f757f3fSDimitry Andric // Do not sibcall optimize vararg calls unless all arguments are passed via 2795*5f757f3fSDimitry Andric // registers. 2796*5f757f3fSDimitry Andric LLVMContext &C = *DAG.getContext(); 2797*5f757f3fSDimitry Andric if (isVarArg && !Outs.empty()) { 2798*5f757f3fSDimitry Andric // Optimizing for varargs on Win64 is unlikely to be safe without 2799*5f757f3fSDimitry Andric // additional testing. 2800*5f757f3fSDimitry Andric if (IsCalleeWin64 || IsCallerWin64) 2801*5f757f3fSDimitry Andric return false; 2802*5f757f3fSDimitry Andric 2803*5f757f3fSDimitry Andric SmallVector<CCValAssign, 16> ArgLocs; 2804*5f757f3fSDimitry Andric CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); 2805*5f757f3fSDimitry Andric CCInfo.AnalyzeCallOperands(Outs, CC_X86); 2806*5f757f3fSDimitry Andric for (const auto &VA : ArgLocs) 2807*5f757f3fSDimitry Andric if (!VA.isRegLoc()) 2808*5f757f3fSDimitry Andric return false; 2809*5f757f3fSDimitry Andric } 2810*5f757f3fSDimitry Andric 2811*5f757f3fSDimitry Andric // If the call result is in ST0 / ST1, it needs to be popped off the x87 2812*5f757f3fSDimitry Andric // stack. Therefore, if it's not used by the call it is not safe to optimize 2813*5f757f3fSDimitry Andric // this into a sibcall. 2814*5f757f3fSDimitry Andric bool Unused = false; 2815*5f757f3fSDimitry Andric for (const auto &In : Ins) { 2816*5f757f3fSDimitry Andric if (!In.Used) { 2817*5f757f3fSDimitry Andric Unused = true; 2818*5f757f3fSDimitry Andric break; 2819*5f757f3fSDimitry Andric } 2820*5f757f3fSDimitry Andric } 2821*5f757f3fSDimitry Andric if (Unused) { 2822*5f757f3fSDimitry Andric SmallVector<CCValAssign, 16> RVLocs; 2823*5f757f3fSDimitry Andric CCState CCInfo(CalleeCC, false, MF, RVLocs, C); 2824*5f757f3fSDimitry Andric CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 2825*5f757f3fSDimitry Andric for (const auto &VA : RVLocs) { 2826*5f757f3fSDimitry Andric if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) 2827*5f757f3fSDimitry Andric return false; 2828*5f757f3fSDimitry Andric } 2829*5f757f3fSDimitry Andric } 2830*5f757f3fSDimitry Andric 2831*5f757f3fSDimitry Andric // Check that the call results are passed in the same way. 2832*5f757f3fSDimitry Andric if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, 2833*5f757f3fSDimitry Andric RetCC_X86, RetCC_X86)) 2834*5f757f3fSDimitry Andric return false; 2835*5f757f3fSDimitry Andric // The callee has to preserve all registers the caller needs to preserve. 2836*5f757f3fSDimitry Andric const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); 2837*5f757f3fSDimitry Andric const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 2838*5f757f3fSDimitry Andric if (!CCMatch) { 2839*5f757f3fSDimitry Andric const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 2840*5f757f3fSDimitry Andric if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 2841*5f757f3fSDimitry Andric return false; 2842*5f757f3fSDimitry Andric } 2843*5f757f3fSDimitry Andric 2844*5f757f3fSDimitry Andric unsigned StackArgsSize = 0; 2845*5f757f3fSDimitry Andric 2846*5f757f3fSDimitry Andric // If the callee takes no arguments then go on to check the results of the 2847*5f757f3fSDimitry Andric // call. 2848*5f757f3fSDimitry Andric if (!Outs.empty()) { 2849*5f757f3fSDimitry Andric // Check if stack adjustment is needed. For now, do not do this if any 2850*5f757f3fSDimitry Andric // argument is passed on the stack. 2851*5f757f3fSDimitry Andric SmallVector<CCValAssign, 16> ArgLocs; 2852*5f757f3fSDimitry Andric CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); 2853*5f757f3fSDimitry Andric 2854*5f757f3fSDimitry Andric // Allocate shadow area for Win64 2855*5f757f3fSDimitry Andric if (IsCalleeWin64) 2856*5f757f3fSDimitry Andric CCInfo.AllocateStack(32, Align(8)); 2857*5f757f3fSDimitry Andric 2858*5f757f3fSDimitry Andric CCInfo.AnalyzeCallOperands(Outs, CC_X86); 2859*5f757f3fSDimitry Andric StackArgsSize = CCInfo.getStackSize(); 2860*5f757f3fSDimitry Andric 2861*5f757f3fSDimitry Andric if (CCInfo.getStackSize()) { 2862*5f757f3fSDimitry Andric // Check if the arguments are already laid out in the right way as 2863*5f757f3fSDimitry Andric // the caller's fixed stack objects. 2864*5f757f3fSDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 2865*5f757f3fSDimitry Andric const MachineRegisterInfo *MRI = &MF.getRegInfo(); 2866*5f757f3fSDimitry Andric const X86InstrInfo *TII = Subtarget.getInstrInfo(); 2867*5f757f3fSDimitry Andric for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 2868*5f757f3fSDimitry Andric const CCValAssign &VA = ArgLocs[I]; 2869*5f757f3fSDimitry Andric SDValue Arg = OutVals[I]; 2870*5f757f3fSDimitry Andric ISD::ArgFlagsTy Flags = Outs[I].Flags; 2871*5f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::Indirect) 2872*5f757f3fSDimitry Andric return false; 2873*5f757f3fSDimitry Andric if (!VA.isRegLoc()) { 2874*5f757f3fSDimitry Andric if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI, 2875*5f757f3fSDimitry Andric TII, VA)) 2876*5f757f3fSDimitry Andric return false; 2877*5f757f3fSDimitry Andric } 2878*5f757f3fSDimitry Andric } 2879*5f757f3fSDimitry Andric } 2880*5f757f3fSDimitry Andric 2881*5f757f3fSDimitry Andric bool PositionIndependent = isPositionIndependent(); 2882*5f757f3fSDimitry Andric // If the tailcall address may be in a register, then make sure it's 2883*5f757f3fSDimitry Andric // possible to register allocate for it. In 32-bit, the call address can 2884*5f757f3fSDimitry Andric // only target EAX, EDX, or ECX since the tail call must be scheduled after 2885*5f757f3fSDimitry Andric // callee-saved registers are restored. These happen to be the same 2886*5f757f3fSDimitry Andric // registers used to pass 'inreg' arguments so watch out for those. 2887*5f757f3fSDimitry Andric if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) && 2888*5f757f3fSDimitry Andric !isa<ExternalSymbolSDNode>(Callee)) || 2889*5f757f3fSDimitry Andric PositionIndependent)) { 2890*5f757f3fSDimitry Andric unsigned NumInRegs = 0; 2891*5f757f3fSDimitry Andric // In PIC we need an extra register to formulate the address computation 2892*5f757f3fSDimitry Andric // for the callee. 2893*5f757f3fSDimitry Andric unsigned MaxInRegs = PositionIndependent ? 2 : 3; 2894*5f757f3fSDimitry Andric 2895*5f757f3fSDimitry Andric for (const auto &VA : ArgLocs) { 2896*5f757f3fSDimitry Andric if (!VA.isRegLoc()) 2897*5f757f3fSDimitry Andric continue; 2898*5f757f3fSDimitry Andric Register Reg = VA.getLocReg(); 2899*5f757f3fSDimitry Andric switch (Reg) { 2900*5f757f3fSDimitry Andric default: break; 2901*5f757f3fSDimitry Andric case X86::EAX: case X86::EDX: case X86::ECX: 2902*5f757f3fSDimitry Andric if (++NumInRegs == MaxInRegs) 2903*5f757f3fSDimitry Andric return false; 2904*5f757f3fSDimitry Andric break; 2905*5f757f3fSDimitry Andric } 2906*5f757f3fSDimitry Andric } 2907*5f757f3fSDimitry Andric } 2908*5f757f3fSDimitry Andric 2909*5f757f3fSDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 2910*5f757f3fSDimitry Andric if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) 2911*5f757f3fSDimitry Andric return false; 2912*5f757f3fSDimitry Andric } 2913*5f757f3fSDimitry Andric 2914*5f757f3fSDimitry Andric bool CalleeWillPop = 2915*5f757f3fSDimitry Andric X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, 2916*5f757f3fSDimitry Andric MF.getTarget().Options.GuaranteedTailCallOpt); 2917*5f757f3fSDimitry Andric 2918*5f757f3fSDimitry Andric if (unsigned BytesToPop = 2919*5f757f3fSDimitry Andric MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) { 2920*5f757f3fSDimitry Andric // If we have bytes to pop, the callee must pop them. 2921*5f757f3fSDimitry Andric bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; 2922*5f757f3fSDimitry Andric if (!CalleePopMatches) 2923*5f757f3fSDimitry Andric return false; 2924*5f757f3fSDimitry Andric } else if (CalleeWillPop && StackArgsSize > 0) { 2925*5f757f3fSDimitry Andric // If we don't have bytes to pop, make sure the callee doesn't pop any. 2926*5f757f3fSDimitry Andric return false; 2927*5f757f3fSDimitry Andric } 2928*5f757f3fSDimitry Andric 2929*5f757f3fSDimitry Andric return true; 2930*5f757f3fSDimitry Andric } 2931*5f757f3fSDimitry Andric 2932*5f757f3fSDimitry Andric /// Determines whether the callee is required to pop its own arguments. 2933*5f757f3fSDimitry Andric /// Callee pop is necessary to support tail calls. 2934*5f757f3fSDimitry Andric bool X86::isCalleePop(CallingConv::ID CallingConv, 2935*5f757f3fSDimitry Andric bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { 2936*5f757f3fSDimitry Andric // If GuaranteeTCO is true, we force some calls to be callee pop so that we 2937*5f757f3fSDimitry Andric // can guarantee TCO. 2938*5f757f3fSDimitry Andric if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) 2939*5f757f3fSDimitry Andric return true; 2940*5f757f3fSDimitry Andric 2941*5f757f3fSDimitry Andric switch (CallingConv) { 2942*5f757f3fSDimitry Andric default: 2943*5f757f3fSDimitry Andric return false; 2944*5f757f3fSDimitry Andric case CallingConv::X86_StdCall: 2945*5f757f3fSDimitry Andric case CallingConv::X86_FastCall: 2946*5f757f3fSDimitry Andric case CallingConv::X86_ThisCall: 2947*5f757f3fSDimitry Andric case CallingConv::X86_VectorCall: 2948*5f757f3fSDimitry Andric return !is64Bit; 2949*5f757f3fSDimitry Andric } 2950*5f757f3fSDimitry Andric } 2951