15f757f3fSDimitry Andric //===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===// 25f757f3fSDimitry Andric // 35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65f757f3fSDimitry Andric // 75f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 85f757f3fSDimitry Andric // 95f757f3fSDimitry Andric /// \file 105f757f3fSDimitry Andric /// This file implements the lowering of LLVM calls to DAG nodes. 115f757f3fSDimitry Andric // 125f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 135f757f3fSDimitry Andric 145f757f3fSDimitry Andric #include "X86.h" 155f757f3fSDimitry Andric #include "X86CallingConv.h" 165f757f3fSDimitry Andric #include "X86FrameLowering.h" 175f757f3fSDimitry Andric #include "X86ISelLowering.h" 185f757f3fSDimitry Andric #include "X86InstrBuilder.h" 195f757f3fSDimitry Andric #include "X86MachineFunctionInfo.h" 205f757f3fSDimitry Andric #include "X86TargetMachine.h" 215f757f3fSDimitry Andric #include "X86TargetObjectFile.h" 225f757f3fSDimitry Andric #include "llvm/ADT/Statistic.h" 235f757f3fSDimitry Andric #include "llvm/Analysis/ObjCARCUtil.h" 245f757f3fSDimitry Andric #include "llvm/CodeGen/MachineJumpTableInfo.h" 255f757f3fSDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 265f757f3fSDimitry Andric #include "llvm/CodeGen/WinEHFuncInfo.h" 275f757f3fSDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 285f757f3fSDimitry Andric #include "llvm/IR/IRBuilder.h" 29*0fca6ea1SDimitry Andric #include "llvm/IR/Module.h" 305f757f3fSDimitry Andric 315f757f3fSDimitry Andric #define DEBUG_TYPE "x86-isel" 325f757f3fSDimitry Andric 335f757f3fSDimitry Andric using namespace llvm; 345f757f3fSDimitry Andric 355f757f3fSDimitry Andric STATISTIC(NumTailCalls, "Number of tail calls"); 365f757f3fSDimitry Andric 375f757f3fSDimitry Andric /// Call this when the user attempts to do something unsupported, like 385f757f3fSDimitry Andric /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike 395f757f3fSDimitry Andric /// report_fatal_error, so calling code should attempt to recover without 405f757f3fSDimitry Andric /// crashing. 415f757f3fSDimitry Andric static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, 425f757f3fSDimitry Andric const char *Msg) { 435f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 445f757f3fSDimitry Andric DAG.getContext()->diagnose( 455f757f3fSDimitry Andric DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); 465f757f3fSDimitry Andric } 475f757f3fSDimitry Andric 485f757f3fSDimitry Andric /// Returns true if a CC can dynamically exclude a register from the list of 495f757f3fSDimitry Andric /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on 505f757f3fSDimitry Andric /// the return registers. 515f757f3fSDimitry Andric static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) { 525f757f3fSDimitry Andric switch (CC) { 535f757f3fSDimitry Andric default: 545f757f3fSDimitry Andric return false; 555f757f3fSDimitry Andric case CallingConv::X86_RegCall: 565f757f3fSDimitry Andric case CallingConv::PreserveMost: 575f757f3fSDimitry Andric case CallingConv::PreserveAll: 585f757f3fSDimitry Andric return true; 595f757f3fSDimitry Andric } 605f757f3fSDimitry Andric } 615f757f3fSDimitry Andric 625f757f3fSDimitry Andric /// Returns true if a CC can dynamically exclude a register from the list of 635f757f3fSDimitry Andric /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on 645f757f3fSDimitry Andric /// the parameters. 655f757f3fSDimitry Andric static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) { 665f757f3fSDimitry Andric return CC == CallingConv::X86_RegCall; 675f757f3fSDimitry Andric } 685f757f3fSDimitry Andric 695f757f3fSDimitry Andric static std::pair<MVT, unsigned> 705f757f3fSDimitry Andric handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, 715f757f3fSDimitry Andric const X86Subtarget &Subtarget) { 725f757f3fSDimitry Andric // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling 735f757f3fSDimitry Andric // convention is one that uses k registers. 745f757f3fSDimitry Andric if (NumElts == 2) 755f757f3fSDimitry Andric return {MVT::v2i64, 1}; 765f757f3fSDimitry Andric if (NumElts == 4) 775f757f3fSDimitry Andric return {MVT::v4i32, 1}; 785f757f3fSDimitry Andric if (NumElts == 8 && CC != CallingConv::X86_RegCall && 795f757f3fSDimitry Andric CC != CallingConv::Intel_OCL_BI) 805f757f3fSDimitry Andric return {MVT::v8i16, 1}; 815f757f3fSDimitry Andric if (NumElts == 16 && CC != CallingConv::X86_RegCall && 825f757f3fSDimitry Andric CC != CallingConv::Intel_OCL_BI) 835f757f3fSDimitry Andric return {MVT::v16i8, 1}; 845f757f3fSDimitry Andric // v32i1 passes in ymm unless we have BWI and the calling convention is 855f757f3fSDimitry Andric // regcall. 865f757f3fSDimitry Andric if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) 875f757f3fSDimitry Andric return {MVT::v32i8, 1}; 885f757f3fSDimitry Andric // Split v64i1 vectors if we don't have v64i8 available. 895f757f3fSDimitry Andric if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { 905f757f3fSDimitry Andric if (Subtarget.useAVX512Regs()) 915f757f3fSDimitry Andric return {MVT::v64i8, 1}; 925f757f3fSDimitry Andric return {MVT::v32i8, 2}; 935f757f3fSDimitry Andric } 945f757f3fSDimitry Andric 955f757f3fSDimitry Andric // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. 965f757f3fSDimitry Andric if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || 975f757f3fSDimitry Andric NumElts > 64) 985f757f3fSDimitry Andric return {MVT::i8, NumElts}; 995f757f3fSDimitry Andric 1005f757f3fSDimitry Andric return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; 1015f757f3fSDimitry Andric } 1025f757f3fSDimitry Andric 1035f757f3fSDimitry Andric MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 1045f757f3fSDimitry Andric CallingConv::ID CC, 1055f757f3fSDimitry Andric EVT VT) const { 1065f757f3fSDimitry Andric if (VT.isVector()) { 1075f757f3fSDimitry Andric if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { 1085f757f3fSDimitry Andric unsigned NumElts = VT.getVectorNumElements(); 1095f757f3fSDimitry Andric 1105f757f3fSDimitry Andric MVT RegisterVT; 1115f757f3fSDimitry Andric unsigned NumRegisters; 1125f757f3fSDimitry Andric std::tie(RegisterVT, NumRegisters) = 1135f757f3fSDimitry Andric handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); 1145f757f3fSDimitry Andric if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) 1155f757f3fSDimitry Andric return RegisterVT; 1165f757f3fSDimitry Andric } 1175f757f3fSDimitry Andric 1185f757f3fSDimitry Andric if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) 1195f757f3fSDimitry Andric return MVT::v8f16; 1205f757f3fSDimitry Andric } 1215f757f3fSDimitry Andric 1225f757f3fSDimitry Andric // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled. 1235f757f3fSDimitry Andric if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() && 1245f757f3fSDimitry Andric !Subtarget.hasX87()) 1255f757f3fSDimitry Andric return MVT::i32; 1265f757f3fSDimitry Andric 1275f757f3fSDimitry Andric if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) 1285f757f3fSDimitry Andric return getRegisterTypeForCallingConv(Context, CC, 1295f757f3fSDimitry Andric VT.changeVectorElementType(MVT::f16)); 1305f757f3fSDimitry Andric 1311db9f3b2SDimitry Andric if (VT == MVT::bf16) 1321db9f3b2SDimitry Andric return MVT::f16; 1331db9f3b2SDimitry Andric 1345f757f3fSDimitry Andric return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); 1355f757f3fSDimitry Andric } 1365f757f3fSDimitry Andric 1375f757f3fSDimitry Andric unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 1385f757f3fSDimitry Andric CallingConv::ID CC, 1395f757f3fSDimitry Andric EVT VT) const { 1405f757f3fSDimitry Andric if (VT.isVector()) { 1415f757f3fSDimitry Andric if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { 1425f757f3fSDimitry Andric unsigned NumElts = VT.getVectorNumElements(); 1435f757f3fSDimitry Andric 1445f757f3fSDimitry Andric MVT RegisterVT; 1455f757f3fSDimitry Andric unsigned NumRegisters; 1465f757f3fSDimitry Andric std::tie(RegisterVT, NumRegisters) = 1475f757f3fSDimitry Andric handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); 1485f757f3fSDimitry Andric if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) 1495f757f3fSDimitry Andric return NumRegisters; 1505f757f3fSDimitry Andric } 1515f757f3fSDimitry Andric 1525f757f3fSDimitry Andric if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) 1535f757f3fSDimitry Andric return 1; 1545f757f3fSDimitry Andric } 1555f757f3fSDimitry Andric 1565f757f3fSDimitry Andric // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if 1575f757f3fSDimitry Andric // x87 is disabled. 1585f757f3fSDimitry Andric if (!Subtarget.is64Bit() && !Subtarget.hasX87()) { 1595f757f3fSDimitry Andric if (VT == MVT::f64) 1605f757f3fSDimitry Andric return 2; 1615f757f3fSDimitry Andric if (VT == MVT::f80) 1625f757f3fSDimitry Andric return 3; 1635f757f3fSDimitry Andric } 1645f757f3fSDimitry Andric 1655f757f3fSDimitry Andric if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) 1665f757f3fSDimitry Andric return getNumRegistersForCallingConv(Context, CC, 1675f757f3fSDimitry Andric VT.changeVectorElementType(MVT::f16)); 1685f757f3fSDimitry Andric 1695f757f3fSDimitry Andric return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); 1705f757f3fSDimitry Andric } 1715f757f3fSDimitry Andric 1725f757f3fSDimitry Andric unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( 1735f757f3fSDimitry Andric LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1745f757f3fSDimitry Andric unsigned &NumIntermediates, MVT &RegisterVT) const { 1755f757f3fSDimitry Andric // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. 1765f757f3fSDimitry Andric if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && 1775f757f3fSDimitry Andric Subtarget.hasAVX512() && 1785f757f3fSDimitry Andric (!isPowerOf2_32(VT.getVectorNumElements()) || 1795f757f3fSDimitry Andric (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || 1805f757f3fSDimitry Andric VT.getVectorNumElements() > 64)) { 1815f757f3fSDimitry Andric RegisterVT = MVT::i8; 1825f757f3fSDimitry Andric IntermediateVT = MVT::i1; 1835f757f3fSDimitry Andric NumIntermediates = VT.getVectorNumElements(); 1845f757f3fSDimitry Andric return NumIntermediates; 1855f757f3fSDimitry Andric } 1865f757f3fSDimitry Andric 1875f757f3fSDimitry Andric // Split v64i1 vectors if we don't have v64i8 available. 1885f757f3fSDimitry Andric if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && 1895f757f3fSDimitry Andric CC != CallingConv::X86_RegCall) { 1905f757f3fSDimitry Andric RegisterVT = MVT::v32i8; 1915f757f3fSDimitry Andric IntermediateVT = MVT::v32i1; 1925f757f3fSDimitry Andric NumIntermediates = 2; 1935f757f3fSDimitry Andric return 2; 1945f757f3fSDimitry Andric } 1955f757f3fSDimitry Andric 1965f757f3fSDimitry Andric // Split vNbf16 vectors according to vNf16. 1975f757f3fSDimitry Andric if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) 1985f757f3fSDimitry Andric VT = VT.changeVectorElementType(MVT::f16); 1995f757f3fSDimitry Andric 2005f757f3fSDimitry Andric return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, 2015f757f3fSDimitry Andric NumIntermediates, RegisterVT); 2025f757f3fSDimitry Andric } 2035f757f3fSDimitry Andric 2045f757f3fSDimitry Andric EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, 2055f757f3fSDimitry Andric LLVMContext& Context, 2065f757f3fSDimitry Andric EVT VT) const { 2075f757f3fSDimitry Andric if (!VT.isVector()) 2085f757f3fSDimitry Andric return MVT::i8; 2095f757f3fSDimitry Andric 2105f757f3fSDimitry Andric if (Subtarget.hasAVX512()) { 2115f757f3fSDimitry Andric // Figure out what this type will be legalized to. 2125f757f3fSDimitry Andric EVT LegalVT = VT; 2135f757f3fSDimitry Andric while (getTypeAction(Context, LegalVT) != TypeLegal) 2145f757f3fSDimitry Andric LegalVT = getTypeToTransformTo(Context, LegalVT); 2155f757f3fSDimitry Andric 2165f757f3fSDimitry Andric // If we got a 512-bit vector then we'll definitely have a vXi1 compare. 2175f757f3fSDimitry Andric if (LegalVT.getSimpleVT().is512BitVector()) 2185f757f3fSDimitry Andric return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 2195f757f3fSDimitry Andric 2205f757f3fSDimitry Andric if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { 2215f757f3fSDimitry Andric // If we legalized to less than a 512-bit vector, then we will use a vXi1 2225f757f3fSDimitry Andric // compare for vXi32/vXi64 for sure. If we have BWI we will also support 2235f757f3fSDimitry Andric // vXi16/vXi8. 2245f757f3fSDimitry Andric MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); 2255f757f3fSDimitry Andric if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) 2265f757f3fSDimitry Andric return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 2275f757f3fSDimitry Andric } 2285f757f3fSDimitry Andric } 2295f757f3fSDimitry Andric 2305f757f3fSDimitry Andric return VT.changeVectorElementTypeToInteger(); 2315f757f3fSDimitry Andric } 2325f757f3fSDimitry Andric 2335f757f3fSDimitry Andric /// Helper for getByValTypeAlignment to determine 2345f757f3fSDimitry Andric /// the desired ByVal argument alignment. 2355f757f3fSDimitry Andric static void getMaxByValAlign(Type *Ty, Align &MaxAlign) { 2365f757f3fSDimitry Andric if (MaxAlign == 16) 2375f757f3fSDimitry Andric return; 2385f757f3fSDimitry Andric if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 2395f757f3fSDimitry Andric if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128) 2405f757f3fSDimitry Andric MaxAlign = Align(16); 2415f757f3fSDimitry Andric } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 2425f757f3fSDimitry Andric Align EltAlign; 2435f757f3fSDimitry Andric getMaxByValAlign(ATy->getElementType(), EltAlign); 2445f757f3fSDimitry Andric if (EltAlign > MaxAlign) 2455f757f3fSDimitry Andric MaxAlign = EltAlign; 2465f757f3fSDimitry Andric } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 2475f757f3fSDimitry Andric for (auto *EltTy : STy->elements()) { 2485f757f3fSDimitry Andric Align EltAlign; 2495f757f3fSDimitry Andric getMaxByValAlign(EltTy, EltAlign); 2505f757f3fSDimitry Andric if (EltAlign > MaxAlign) 2515f757f3fSDimitry Andric MaxAlign = EltAlign; 2525f757f3fSDimitry Andric if (MaxAlign == 16) 2535f757f3fSDimitry Andric break; 2545f757f3fSDimitry Andric } 2555f757f3fSDimitry Andric } 2565f757f3fSDimitry Andric } 2575f757f3fSDimitry Andric 2585f757f3fSDimitry Andric /// Return the desired alignment for ByVal aggregate 2595f757f3fSDimitry Andric /// function arguments in the caller parameter area. For X86, aggregates 2605f757f3fSDimitry Andric /// that contain SSE vectors are placed at 16-byte boundaries while the rest 2615f757f3fSDimitry Andric /// are at 4-byte boundaries. 2625f757f3fSDimitry Andric uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty, 2635f757f3fSDimitry Andric const DataLayout &DL) const { 2645f757f3fSDimitry Andric if (Subtarget.is64Bit()) { 2655f757f3fSDimitry Andric // Max of 8 and alignment of type. 2665f757f3fSDimitry Andric Align TyAlign = DL.getABITypeAlign(Ty); 2675f757f3fSDimitry Andric if (TyAlign > 8) 2685f757f3fSDimitry Andric return TyAlign.value(); 2695f757f3fSDimitry Andric return 8; 2705f757f3fSDimitry Andric } 2715f757f3fSDimitry Andric 2725f757f3fSDimitry Andric Align Alignment(4); 2735f757f3fSDimitry Andric if (Subtarget.hasSSE1()) 2745f757f3fSDimitry Andric getMaxByValAlign(Ty, Alignment); 2755f757f3fSDimitry Andric return Alignment.value(); 2765f757f3fSDimitry Andric } 2775f757f3fSDimitry Andric 2785f757f3fSDimitry Andric /// It returns EVT::Other if the type should be determined using generic 2795f757f3fSDimitry Andric /// target-independent logic. 2805f757f3fSDimitry Andric /// For vector ops we check that the overall size isn't larger than our 2815f757f3fSDimitry Andric /// preferred vector width. 2825f757f3fSDimitry Andric EVT X86TargetLowering::getOptimalMemOpType( 2835f757f3fSDimitry Andric const MemOp &Op, const AttributeList &FuncAttributes) const { 2845f757f3fSDimitry Andric if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) { 2855f757f3fSDimitry Andric if (Op.size() >= 16 && 2865f757f3fSDimitry Andric (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) { 2875f757f3fSDimitry Andric // FIXME: Check if unaligned 64-byte accesses are slow. 2885f757f3fSDimitry Andric if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() && 2895f757f3fSDimitry Andric (Subtarget.getPreferVectorWidth() >= 512)) { 2905f757f3fSDimitry Andric return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; 2915f757f3fSDimitry Andric } 2925f757f3fSDimitry Andric // FIXME: Check if unaligned 32-byte accesses are slow. 2935f757f3fSDimitry Andric if (Op.size() >= 32 && Subtarget.hasAVX() && 2945f757f3fSDimitry Andric Subtarget.useLight256BitInstructions()) { 2955f757f3fSDimitry Andric // Although this isn't a well-supported type for AVX1, we'll let 2965f757f3fSDimitry Andric // legalization and shuffle lowering produce the optimal codegen. If we 2975f757f3fSDimitry Andric // choose an optimal type with a vector element larger than a byte, 2985f757f3fSDimitry Andric // getMemsetStores() may create an intermediate splat (using an integer 2995f757f3fSDimitry Andric // multiply) before we splat as a vector. 3005f757f3fSDimitry Andric return MVT::v32i8; 3015f757f3fSDimitry Andric } 3025f757f3fSDimitry Andric if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128)) 3035f757f3fSDimitry Andric return MVT::v16i8; 3045f757f3fSDimitry Andric // TODO: Can SSE1 handle a byte vector? 3055f757f3fSDimitry Andric // If we have SSE1 registers we should be able to use them. 3065f757f3fSDimitry Andric if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && 3075f757f3fSDimitry Andric (Subtarget.getPreferVectorWidth() >= 128)) 3085f757f3fSDimitry Andric return MVT::v4f32; 3095f757f3fSDimitry Andric } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) && 3105f757f3fSDimitry Andric Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { 3115f757f3fSDimitry Andric // Do not use f64 to lower memcpy if source is string constant. It's 3125f757f3fSDimitry Andric // better to use i32 to avoid the loads. 3135f757f3fSDimitry Andric // Also, do not use f64 to lower memset unless this is a memset of zeros. 3145f757f3fSDimitry Andric // The gymnastics of splatting a byte value into an XMM register and then 3155f757f3fSDimitry Andric // only using 8-byte stores (because this is a CPU with slow unaligned 3165f757f3fSDimitry Andric // 16-byte accesses) makes that a loser. 3175f757f3fSDimitry Andric return MVT::f64; 3185f757f3fSDimitry Andric } 3195f757f3fSDimitry Andric } 3205f757f3fSDimitry Andric // This is a compromise. If we reach here, unaligned accesses may be slow on 3215f757f3fSDimitry Andric // this target. However, creating smaller, aligned accesses could be even 3225f757f3fSDimitry Andric // slower and would certainly be a lot more code. 3235f757f3fSDimitry Andric if (Subtarget.is64Bit() && Op.size() >= 8) 3245f757f3fSDimitry Andric return MVT::i64; 3255f757f3fSDimitry Andric return MVT::i32; 3265f757f3fSDimitry Andric } 3275f757f3fSDimitry Andric 3285f757f3fSDimitry Andric bool X86TargetLowering::isSafeMemOpType(MVT VT) const { 3295f757f3fSDimitry Andric if (VT == MVT::f32) 3305f757f3fSDimitry Andric return Subtarget.hasSSE1(); 3315f757f3fSDimitry Andric if (VT == MVT::f64) 3325f757f3fSDimitry Andric return Subtarget.hasSSE2(); 3335f757f3fSDimitry Andric return true; 3345f757f3fSDimitry Andric } 3355f757f3fSDimitry Andric 3365f757f3fSDimitry Andric static bool isBitAligned(Align Alignment, uint64_t SizeInBits) { 3375f757f3fSDimitry Andric return (8 * Alignment.value()) % SizeInBits == 0; 3385f757f3fSDimitry Andric } 3395f757f3fSDimitry Andric 3405f757f3fSDimitry Andric bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const { 3415f757f3fSDimitry Andric if (isBitAligned(Alignment, VT.getSizeInBits())) 3425f757f3fSDimitry Andric return true; 3435f757f3fSDimitry Andric switch (VT.getSizeInBits()) { 3445f757f3fSDimitry Andric default: 3455f757f3fSDimitry Andric // 8-byte and under are always assumed to be fast. 3465f757f3fSDimitry Andric return true; 3475f757f3fSDimitry Andric case 128: 3485f757f3fSDimitry Andric return !Subtarget.isUnalignedMem16Slow(); 3495f757f3fSDimitry Andric case 256: 3505f757f3fSDimitry Andric return !Subtarget.isUnalignedMem32Slow(); 3515f757f3fSDimitry Andric // TODO: What about AVX-512 (512-bit) accesses? 3525f757f3fSDimitry Andric } 3535f757f3fSDimitry Andric } 3545f757f3fSDimitry Andric 3555f757f3fSDimitry Andric bool X86TargetLowering::allowsMisalignedMemoryAccesses( 3565f757f3fSDimitry Andric EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags, 3575f757f3fSDimitry Andric unsigned *Fast) const { 3585f757f3fSDimitry Andric if (Fast) 3595f757f3fSDimitry Andric *Fast = isMemoryAccessFast(VT, Alignment); 3605f757f3fSDimitry Andric // NonTemporal vector memory ops must be aligned. 3615f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { 3625f757f3fSDimitry Andric // NT loads can only be vector aligned, so if its less aligned than the 3635f757f3fSDimitry Andric // minimum vector size (which we can split the vector down to), we might as 3645f757f3fSDimitry Andric // well use a regular unaligned vector load. 3655f757f3fSDimitry Andric // We don't have any NT loads pre-SSE41. 3665f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MOLoad)) 3675f757f3fSDimitry Andric return (Alignment < 16 || !Subtarget.hasSSE41()); 3685f757f3fSDimitry Andric return false; 3695f757f3fSDimitry Andric } 3705f757f3fSDimitry Andric // Misaligned accesses of any size are always allowed. 3715f757f3fSDimitry Andric return true; 3725f757f3fSDimitry Andric } 3735f757f3fSDimitry Andric 3745f757f3fSDimitry Andric bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context, 3755f757f3fSDimitry Andric const DataLayout &DL, EVT VT, 3765f757f3fSDimitry Andric unsigned AddrSpace, Align Alignment, 3775f757f3fSDimitry Andric MachineMemOperand::Flags Flags, 3785f757f3fSDimitry Andric unsigned *Fast) const { 3795f757f3fSDimitry Andric if (Fast) 3805f757f3fSDimitry Andric *Fast = isMemoryAccessFast(VT, Alignment); 3815f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { 3825f757f3fSDimitry Andric if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, 3835f757f3fSDimitry Andric /*Fast=*/nullptr)) 3845f757f3fSDimitry Andric return true; 3855f757f3fSDimitry Andric // NonTemporal vector memory ops are special, and must be aligned. 3865f757f3fSDimitry Andric if (!isBitAligned(Alignment, VT.getSizeInBits())) 3875f757f3fSDimitry Andric return false; 3885f757f3fSDimitry Andric switch (VT.getSizeInBits()) { 3895f757f3fSDimitry Andric case 128: 3905f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41()) 3915f757f3fSDimitry Andric return true; 3925f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2()) 3935f757f3fSDimitry Andric return true; 3945f757f3fSDimitry Andric return false; 3955f757f3fSDimitry Andric case 256: 3965f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2()) 3975f757f3fSDimitry Andric return true; 3985f757f3fSDimitry Andric if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX()) 3995f757f3fSDimitry Andric return true; 4005f757f3fSDimitry Andric return false; 4015f757f3fSDimitry Andric case 512: 4025f757f3fSDimitry Andric if (Subtarget.hasAVX512() && Subtarget.hasEVEX512()) 4035f757f3fSDimitry Andric return true; 4045f757f3fSDimitry Andric return false; 4055f757f3fSDimitry Andric default: 4065f757f3fSDimitry Andric return false; // Don't have NonTemporal vector memory ops of this size. 4075f757f3fSDimitry Andric } 4085f757f3fSDimitry Andric } 4095f757f3fSDimitry Andric return true; 4105f757f3fSDimitry Andric } 4115f757f3fSDimitry Andric 4125f757f3fSDimitry Andric /// Return the entry encoding for a jump table in the 4135f757f3fSDimitry Andric /// current function. The returned value is a member of the 4145f757f3fSDimitry Andric /// MachineJumpTableInfo::JTEntryKind enum. 4155f757f3fSDimitry Andric unsigned X86TargetLowering::getJumpTableEncoding() const { 4165f757f3fSDimitry Andric // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF 4175f757f3fSDimitry Andric // symbol. 4185f757f3fSDimitry Andric if (isPositionIndependent() && Subtarget.isPICStyleGOT()) 4195f757f3fSDimitry Andric return MachineJumpTableInfo::EK_Custom32; 4205f757f3fSDimitry Andric if (isPositionIndependent() && 421*0fca6ea1SDimitry Andric getTargetMachine().getCodeModel() == CodeModel::Large && 422*0fca6ea1SDimitry Andric !Subtarget.isTargetCOFF()) 4235f757f3fSDimitry Andric return MachineJumpTableInfo::EK_LabelDifference64; 4245f757f3fSDimitry Andric 4255f757f3fSDimitry Andric // Otherwise, use the normal jump table encoding heuristics. 4265f757f3fSDimitry Andric return TargetLowering::getJumpTableEncoding(); 4275f757f3fSDimitry Andric } 4285f757f3fSDimitry Andric 4295f757f3fSDimitry Andric bool X86TargetLowering::useSoftFloat() const { 4305f757f3fSDimitry Andric return Subtarget.useSoftFloat(); 4315f757f3fSDimitry Andric } 4325f757f3fSDimitry Andric 4335f757f3fSDimitry Andric void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, 4345f757f3fSDimitry Andric ArgListTy &Args) const { 4355f757f3fSDimitry Andric 4365f757f3fSDimitry Andric // Only relabel X86-32 for C / Stdcall CCs. 4375f757f3fSDimitry Andric if (Subtarget.is64Bit()) 4385f757f3fSDimitry Andric return; 4395f757f3fSDimitry Andric if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) 4405f757f3fSDimitry Andric return; 4415f757f3fSDimitry Andric unsigned ParamRegs = 0; 4425f757f3fSDimitry Andric if (auto *M = MF->getFunction().getParent()) 4435f757f3fSDimitry Andric ParamRegs = M->getNumberRegisterParameters(); 4445f757f3fSDimitry Andric 4455f757f3fSDimitry Andric // Mark the first N int arguments as having reg 4465f757f3fSDimitry Andric for (auto &Arg : Args) { 4475f757f3fSDimitry Andric Type *T = Arg.Ty; 4485f757f3fSDimitry Andric if (T->isIntOrPtrTy()) 4495f757f3fSDimitry Andric if (MF->getDataLayout().getTypeAllocSize(T) <= 8) { 4505f757f3fSDimitry Andric unsigned numRegs = 1; 4515f757f3fSDimitry Andric if (MF->getDataLayout().getTypeAllocSize(T) > 4) 4525f757f3fSDimitry Andric numRegs = 2; 4535f757f3fSDimitry Andric if (ParamRegs < numRegs) 4545f757f3fSDimitry Andric return; 4555f757f3fSDimitry Andric ParamRegs -= numRegs; 4565f757f3fSDimitry Andric Arg.IsInReg = true; 4575f757f3fSDimitry Andric } 4585f757f3fSDimitry Andric } 4595f757f3fSDimitry Andric } 4605f757f3fSDimitry Andric 4615f757f3fSDimitry Andric const MCExpr * 4625f757f3fSDimitry Andric X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 4635f757f3fSDimitry Andric const MachineBasicBlock *MBB, 4645f757f3fSDimitry Andric unsigned uid,MCContext &Ctx) const{ 4655f757f3fSDimitry Andric assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); 4665f757f3fSDimitry Andric // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF 4675f757f3fSDimitry Andric // entries. 4685f757f3fSDimitry Andric return MCSymbolRefExpr::create(MBB->getSymbol(), 4695f757f3fSDimitry Andric MCSymbolRefExpr::VK_GOTOFF, Ctx); 4705f757f3fSDimitry Andric } 4715f757f3fSDimitry Andric 4725f757f3fSDimitry Andric /// Returns relocation base for the given PIC jumptable. 4735f757f3fSDimitry Andric SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, 4745f757f3fSDimitry Andric SelectionDAG &DAG) const { 4755f757f3fSDimitry Andric if (!Subtarget.is64Bit()) 4765f757f3fSDimitry Andric // This doesn't have SDLoc associated with it, but is not really the 4775f757f3fSDimitry Andric // same as a Register. 4785f757f3fSDimitry Andric return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), 4795f757f3fSDimitry Andric getPointerTy(DAG.getDataLayout())); 4805f757f3fSDimitry Andric return Table; 4815f757f3fSDimitry Andric } 4825f757f3fSDimitry Andric 4835f757f3fSDimitry Andric /// This returns the relocation base for the given PIC jumptable, 4845f757f3fSDimitry Andric /// the same as getPICJumpTableRelocBase, but as an MCExpr. 4855f757f3fSDimitry Andric const MCExpr *X86TargetLowering:: 4865f757f3fSDimitry Andric getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, 4875f757f3fSDimitry Andric MCContext &Ctx) const { 4885f757f3fSDimitry Andric // X86-64 uses RIP relative addressing based on the jump table label. 4895f757f3fSDimitry Andric if (Subtarget.isPICStyleRIPRel() || 4905f757f3fSDimitry Andric (Subtarget.is64Bit() && 4915f757f3fSDimitry Andric getTargetMachine().getCodeModel() == CodeModel::Large)) 4925f757f3fSDimitry Andric return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 4935f757f3fSDimitry Andric 4945f757f3fSDimitry Andric // Otherwise, the reference is relative to the PIC base. 4955f757f3fSDimitry Andric return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); 4965f757f3fSDimitry Andric } 4975f757f3fSDimitry Andric 4985f757f3fSDimitry Andric std::pair<const TargetRegisterClass *, uint8_t> 4995f757f3fSDimitry Andric X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, 5005f757f3fSDimitry Andric MVT VT) const { 5015f757f3fSDimitry Andric const TargetRegisterClass *RRC = nullptr; 5025f757f3fSDimitry Andric uint8_t Cost = 1; 5035f757f3fSDimitry Andric switch (VT.SimpleTy) { 5045f757f3fSDimitry Andric default: 5055f757f3fSDimitry Andric return TargetLowering::findRepresentativeClass(TRI, VT); 5065f757f3fSDimitry Andric case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: 5075f757f3fSDimitry Andric RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; 5085f757f3fSDimitry Andric break; 5095f757f3fSDimitry Andric case MVT::x86mmx: 5105f757f3fSDimitry Andric RRC = &X86::VR64RegClass; 5115f757f3fSDimitry Andric break; 5125f757f3fSDimitry Andric case MVT::f32: case MVT::f64: 5135f757f3fSDimitry Andric case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 5145f757f3fSDimitry Andric case MVT::v4f32: case MVT::v2f64: 5155f757f3fSDimitry Andric case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: 5165f757f3fSDimitry Andric case MVT::v8f32: case MVT::v4f64: 5175f757f3fSDimitry Andric case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: 5185f757f3fSDimitry Andric case MVT::v16f32: case MVT::v8f64: 5195f757f3fSDimitry Andric RRC = &X86::VR128XRegClass; 5205f757f3fSDimitry Andric break; 5215f757f3fSDimitry Andric } 5225f757f3fSDimitry Andric return std::make_pair(RRC, Cost); 5235f757f3fSDimitry Andric } 5245f757f3fSDimitry Andric 5255f757f3fSDimitry Andric unsigned X86TargetLowering::getAddressSpace() const { 5265f757f3fSDimitry Andric if (Subtarget.is64Bit()) 5275f757f3fSDimitry Andric return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257; 5285f757f3fSDimitry Andric return 256; 5295f757f3fSDimitry Andric } 5305f757f3fSDimitry Andric 5315f757f3fSDimitry Andric static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { 5325f757f3fSDimitry Andric return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || 5335f757f3fSDimitry Andric (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); 5345f757f3fSDimitry Andric } 5355f757f3fSDimitry Andric 5365f757f3fSDimitry Andric static Constant* SegmentOffset(IRBuilderBase &IRB, 5375f757f3fSDimitry Andric int Offset, unsigned AddressSpace) { 5385f757f3fSDimitry Andric return ConstantExpr::getIntToPtr( 5395f757f3fSDimitry Andric ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), 5405f757f3fSDimitry Andric IRB.getPtrTy(AddressSpace)); 5415f757f3fSDimitry Andric } 5425f757f3fSDimitry Andric 5435f757f3fSDimitry Andric Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { 5445f757f3fSDimitry Andric // glibc, bionic, and Fuchsia have a special slot for the stack guard in 5455f757f3fSDimitry Andric // tcbhead_t; use it instead of the usual global variable (see 5465f757f3fSDimitry Andric // sysdeps/{i386,x86_64}/nptl/tls.h) 5475f757f3fSDimitry Andric if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) { 5485f757f3fSDimitry Andric unsigned AddressSpace = getAddressSpace(); 5495f757f3fSDimitry Andric 5505f757f3fSDimitry Andric // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. 5515f757f3fSDimitry Andric if (Subtarget.isTargetFuchsia()) 5525f757f3fSDimitry Andric return SegmentOffset(IRB, 0x10, AddressSpace); 5535f757f3fSDimitry Andric 5545f757f3fSDimitry Andric Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 5555f757f3fSDimitry Andric // Specially, some users may customize the base reg and offset. 5565f757f3fSDimitry Andric int Offset = M->getStackProtectorGuardOffset(); 5575f757f3fSDimitry Andric // If we don't set -stack-protector-guard-offset value: 5585f757f3fSDimitry Andric // %fs:0x28, unless we're using a Kernel code model, in which case 5595f757f3fSDimitry Andric // it's %gs:0x28. gs:0x14 on i386. 5605f757f3fSDimitry Andric if (Offset == INT_MAX) 5615f757f3fSDimitry Andric Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; 5625f757f3fSDimitry Andric 5635f757f3fSDimitry Andric StringRef GuardReg = M->getStackProtectorGuardReg(); 5645f757f3fSDimitry Andric if (GuardReg == "fs") 5655f757f3fSDimitry Andric AddressSpace = X86AS::FS; 5665f757f3fSDimitry Andric else if (GuardReg == "gs") 5675f757f3fSDimitry Andric AddressSpace = X86AS::GS; 5685f757f3fSDimitry Andric 5695f757f3fSDimitry Andric // Use symbol guard if user specify. 5705f757f3fSDimitry Andric StringRef GuardSymb = M->getStackProtectorGuardSymbol(); 5715f757f3fSDimitry Andric if (!GuardSymb.empty()) { 5725f757f3fSDimitry Andric GlobalVariable *GV = M->getGlobalVariable(GuardSymb); 5735f757f3fSDimitry Andric if (!GV) { 5745f757f3fSDimitry Andric Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext()) 5755f757f3fSDimitry Andric : Type::getInt32Ty(M->getContext()); 5765f757f3fSDimitry Andric GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 5775f757f3fSDimitry Andric nullptr, GuardSymb, nullptr, 5785f757f3fSDimitry Andric GlobalValue::NotThreadLocal, AddressSpace); 5795f757f3fSDimitry Andric if (!Subtarget.isTargetDarwin()) 5805f757f3fSDimitry Andric GV->setDSOLocal(M->getDirectAccessExternalData()); 5815f757f3fSDimitry Andric } 5825f757f3fSDimitry Andric return GV; 5835f757f3fSDimitry Andric } 5845f757f3fSDimitry Andric 5855f757f3fSDimitry Andric return SegmentOffset(IRB, Offset, AddressSpace); 5865f757f3fSDimitry Andric } 5875f757f3fSDimitry Andric return TargetLowering::getIRStackGuard(IRB); 5885f757f3fSDimitry Andric } 5895f757f3fSDimitry Andric 5905f757f3fSDimitry Andric void X86TargetLowering::insertSSPDeclarations(Module &M) const { 5915f757f3fSDimitry Andric // MSVC CRT provides functionalities for stack protection. 5925f757f3fSDimitry Andric if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || 5935f757f3fSDimitry Andric Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { 5945f757f3fSDimitry Andric // MSVC CRT has a global variable holding security cookie. 5955f757f3fSDimitry Andric M.getOrInsertGlobal("__security_cookie", 5965f757f3fSDimitry Andric PointerType::getUnqual(M.getContext())); 5975f757f3fSDimitry Andric 5985f757f3fSDimitry Andric // MSVC CRT has a function to validate security cookie. 5995f757f3fSDimitry Andric FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( 6005f757f3fSDimitry Andric "__security_check_cookie", Type::getVoidTy(M.getContext()), 6015f757f3fSDimitry Andric PointerType::getUnqual(M.getContext())); 6025f757f3fSDimitry Andric if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { 6035f757f3fSDimitry Andric F->setCallingConv(CallingConv::X86_FastCall); 6045f757f3fSDimitry Andric F->addParamAttr(0, Attribute::AttrKind::InReg); 6055f757f3fSDimitry Andric } 6065f757f3fSDimitry Andric return; 6075f757f3fSDimitry Andric } 6085f757f3fSDimitry Andric 6095f757f3fSDimitry Andric StringRef GuardMode = M.getStackProtectorGuard(); 6105f757f3fSDimitry Andric 6115f757f3fSDimitry Andric // glibc, bionic, and Fuchsia have a special slot for the stack guard. 6125f757f3fSDimitry Andric if ((GuardMode == "tls" || GuardMode.empty()) && 6135f757f3fSDimitry Andric hasStackGuardSlotTLS(Subtarget.getTargetTriple())) 6145f757f3fSDimitry Andric return; 6155f757f3fSDimitry Andric TargetLowering::insertSSPDeclarations(M); 6165f757f3fSDimitry Andric } 6175f757f3fSDimitry Andric 6185f757f3fSDimitry Andric Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { 6195f757f3fSDimitry Andric // MSVC CRT has a global variable holding security cookie. 6205f757f3fSDimitry Andric if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || 6215f757f3fSDimitry Andric Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { 6225f757f3fSDimitry Andric return M.getGlobalVariable("__security_cookie"); 6235f757f3fSDimitry Andric } 6245f757f3fSDimitry Andric return TargetLowering::getSDagStackGuard(M); 6255f757f3fSDimitry Andric } 6265f757f3fSDimitry Andric 6275f757f3fSDimitry Andric Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { 6285f757f3fSDimitry Andric // MSVC CRT has a function to validate security cookie. 6295f757f3fSDimitry Andric if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || 6305f757f3fSDimitry Andric Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { 6315f757f3fSDimitry Andric return M.getFunction("__security_check_cookie"); 6325f757f3fSDimitry Andric } 6335f757f3fSDimitry Andric return TargetLowering::getSSPStackGuardCheck(M); 6345f757f3fSDimitry Andric } 6355f757f3fSDimitry Andric 6365f757f3fSDimitry Andric Value * 6375f757f3fSDimitry Andric X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { 6385f757f3fSDimitry Andric // Android provides a fixed TLS slot for the SafeStack pointer. See the 6395f757f3fSDimitry Andric // definition of TLS_SLOT_SAFESTACK in 6405f757f3fSDimitry Andric // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h 6415f757f3fSDimitry Andric if (Subtarget.isTargetAndroid()) { 6425f757f3fSDimitry Andric // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: 6435f757f3fSDimitry Andric // %gs:0x24 on i386 6445f757f3fSDimitry Andric int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; 6455f757f3fSDimitry Andric return SegmentOffset(IRB, Offset, getAddressSpace()); 6465f757f3fSDimitry Andric } 6475f757f3fSDimitry Andric 6485f757f3fSDimitry Andric // Fuchsia is similar. 6495f757f3fSDimitry Andric if (Subtarget.isTargetFuchsia()) { 6505f757f3fSDimitry Andric // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value. 6515f757f3fSDimitry Andric return SegmentOffset(IRB, 0x18, getAddressSpace()); 6525f757f3fSDimitry Andric } 6535f757f3fSDimitry Andric 6545f757f3fSDimitry Andric return TargetLowering::getSafeStackPointerLocation(IRB); 6555f757f3fSDimitry Andric } 6565f757f3fSDimitry Andric 6575f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 6585f757f3fSDimitry Andric // Return Value Calling Convention Implementation 6595f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 6605f757f3fSDimitry Andric 6615f757f3fSDimitry Andric bool X86TargetLowering::CanLowerReturn( 6625f757f3fSDimitry Andric CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, 6635f757f3fSDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 6645f757f3fSDimitry Andric SmallVector<CCValAssign, 16> RVLocs; 6655f757f3fSDimitry Andric CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 6665f757f3fSDimitry Andric return CCInfo.CheckReturn(Outs, RetCC_X86); 6675f757f3fSDimitry Andric } 6685f757f3fSDimitry Andric 6695f757f3fSDimitry Andric const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { 6705f757f3fSDimitry Andric static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; 6715f757f3fSDimitry Andric return ScratchRegs; 6725f757f3fSDimitry Andric } 6735f757f3fSDimitry Andric 6745f757f3fSDimitry Andric ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const { 675*0fca6ea1SDimitry Andric static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR}; 6765f757f3fSDimitry Andric return RCRegs; 6775f757f3fSDimitry Andric } 6785f757f3fSDimitry Andric 6795f757f3fSDimitry Andric /// Lowers masks values (v*i1) to the local register values 6805f757f3fSDimitry Andric /// \returns DAG node after lowering to register type 6815f757f3fSDimitry Andric static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, 6825f757f3fSDimitry Andric const SDLoc &DL, SelectionDAG &DAG) { 6835f757f3fSDimitry Andric EVT ValVT = ValArg.getValueType(); 6845f757f3fSDimitry Andric 6855f757f3fSDimitry Andric if (ValVT == MVT::v1i1) 6865f757f3fSDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg, 6875f757f3fSDimitry Andric DAG.getIntPtrConstant(0, DL)); 6885f757f3fSDimitry Andric 6895f757f3fSDimitry Andric if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || 6905f757f3fSDimitry Andric (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { 6915f757f3fSDimitry Andric // Two stage lowering might be required 6925f757f3fSDimitry Andric // bitcast: v8i1 -> i8 / v16i1 -> i16 6935f757f3fSDimitry Andric // anyextend: i8 -> i32 / i16 -> i32 6945f757f3fSDimitry Andric EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16; 6955f757f3fSDimitry Andric SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg); 6965f757f3fSDimitry Andric if (ValLoc == MVT::i32) 6975f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy); 6985f757f3fSDimitry Andric return ValToCopy; 6995f757f3fSDimitry Andric } 7005f757f3fSDimitry Andric 7015f757f3fSDimitry Andric if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || 7025f757f3fSDimitry Andric (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { 7035f757f3fSDimitry Andric // One stage lowering is required 7045f757f3fSDimitry Andric // bitcast: v32i1 -> i32 / v64i1 -> i64 7055f757f3fSDimitry Andric return DAG.getBitcast(ValLoc, ValArg); 7065f757f3fSDimitry Andric } 7075f757f3fSDimitry Andric 7085f757f3fSDimitry Andric return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg); 7095f757f3fSDimitry Andric } 7105f757f3fSDimitry Andric 7115f757f3fSDimitry Andric /// Breaks v64i1 value into two registers and adds the new node to the DAG 7125f757f3fSDimitry Andric static void Passv64i1ArgInRegs( 7135f757f3fSDimitry Andric const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, 7145f757f3fSDimitry Andric SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA, 7155f757f3fSDimitry Andric CCValAssign &NextVA, const X86Subtarget &Subtarget) { 7165f757f3fSDimitry Andric assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); 7175f757f3fSDimitry Andric assert(Subtarget.is32Bit() && "Expecting 32 bit target"); 7185f757f3fSDimitry Andric assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"); 7195f757f3fSDimitry Andric assert(VA.isRegLoc() && NextVA.isRegLoc() && 7205f757f3fSDimitry Andric "The value should reside in two registers"); 7215f757f3fSDimitry Andric 7225f757f3fSDimitry Andric // Before splitting the value we cast it to i64 7235f757f3fSDimitry Andric Arg = DAG.getBitcast(MVT::i64, Arg); 7245f757f3fSDimitry Andric 7255f757f3fSDimitry Andric // Splitting the value into two i32 types 7265f757f3fSDimitry Andric SDValue Lo, Hi; 7275f757f3fSDimitry Andric std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32); 7285f757f3fSDimitry Andric 7295f757f3fSDimitry Andric // Attach the two i32 types into corresponding registers 7305f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); 7315f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi)); 7325f757f3fSDimitry Andric } 7335f757f3fSDimitry Andric 7345f757f3fSDimitry Andric SDValue 7355f757f3fSDimitry Andric X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 7365f757f3fSDimitry Andric bool isVarArg, 7375f757f3fSDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, 7385f757f3fSDimitry Andric const SmallVectorImpl<SDValue> &OutVals, 7395f757f3fSDimitry Andric const SDLoc &dl, SelectionDAG &DAG) const { 7405f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 7415f757f3fSDimitry Andric X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 7425f757f3fSDimitry Andric 7435f757f3fSDimitry Andric // In some cases we need to disable registers from the default CSR list. 7445f757f3fSDimitry Andric // For example, when they are used as return registers (preserve_* and X86's 7455f757f3fSDimitry Andric // regcall) or for argument passing (X86's regcall). 7465f757f3fSDimitry Andric bool ShouldDisableCalleeSavedRegister = 7475f757f3fSDimitry Andric shouldDisableRetRegFromCSR(CallConv) || 7485f757f3fSDimitry Andric MF.getFunction().hasFnAttribute("no_caller_saved_registers"); 7495f757f3fSDimitry Andric 7505f757f3fSDimitry Andric if (CallConv == CallingConv::X86_INTR && !Outs.empty()) 7515f757f3fSDimitry Andric report_fatal_error("X86 interrupts may not return any value"); 7525f757f3fSDimitry Andric 7535f757f3fSDimitry Andric SmallVector<CCValAssign, 16> RVLocs; 7545f757f3fSDimitry Andric CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); 7555f757f3fSDimitry Andric CCInfo.AnalyzeReturn(Outs, RetCC_X86); 7565f757f3fSDimitry Andric 7575f757f3fSDimitry Andric SmallVector<std::pair<Register, SDValue>, 4> RetVals; 7585f757f3fSDimitry Andric for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E; 7595f757f3fSDimitry Andric ++I, ++OutsIndex) { 7605f757f3fSDimitry Andric CCValAssign &VA = RVLocs[I]; 7615f757f3fSDimitry Andric assert(VA.isRegLoc() && "Can only return in registers!"); 7625f757f3fSDimitry Andric 7635f757f3fSDimitry Andric // Add the register to the CalleeSaveDisableRegs list. 7645f757f3fSDimitry Andric if (ShouldDisableCalleeSavedRegister) 7655f757f3fSDimitry Andric MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); 7665f757f3fSDimitry Andric 7675f757f3fSDimitry Andric SDValue ValToCopy = OutVals[OutsIndex]; 7685f757f3fSDimitry Andric EVT ValVT = ValToCopy.getValueType(); 7695f757f3fSDimitry Andric 7705f757f3fSDimitry Andric // Promote values to the appropriate types. 7715f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::SExt) 7725f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); 7735f757f3fSDimitry Andric else if (VA.getLocInfo() == CCValAssign::ZExt) 7745f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); 7755f757f3fSDimitry Andric else if (VA.getLocInfo() == CCValAssign::AExt) { 7765f757f3fSDimitry Andric if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) 7775f757f3fSDimitry Andric ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG); 7785f757f3fSDimitry Andric else 7795f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); 7805f757f3fSDimitry Andric } 7815f757f3fSDimitry Andric else if (VA.getLocInfo() == CCValAssign::BCvt) 7825f757f3fSDimitry Andric ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); 7835f757f3fSDimitry Andric 7845f757f3fSDimitry Andric assert(VA.getLocInfo() != CCValAssign::FPExt && 7855f757f3fSDimitry Andric "Unexpected FP-extend for return value."); 7865f757f3fSDimitry Andric 7875f757f3fSDimitry Andric // Report an error if we have attempted to return a value via an XMM 7885f757f3fSDimitry Andric // register and SSE was disabled. 7895f757f3fSDimitry Andric if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { 7905f757f3fSDimitry Andric errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); 7915f757f3fSDimitry Andric VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 7925f757f3fSDimitry Andric } else if (!Subtarget.hasSSE2() && 7935f757f3fSDimitry Andric X86::FR64XRegClass.contains(VA.getLocReg()) && 7945f757f3fSDimitry Andric ValVT == MVT::f64) { 7955f757f3fSDimitry Andric // When returning a double via an XMM register, report an error if SSE2 is 7965f757f3fSDimitry Andric // not enabled. 7975f757f3fSDimitry Andric errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); 7985f757f3fSDimitry Andric VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 7995f757f3fSDimitry Andric } 8005f757f3fSDimitry Andric 8015f757f3fSDimitry Andric // Returns in ST0/ST1 are handled specially: these are pushed as operands to 8025f757f3fSDimitry Andric // the RET instruction and handled by the FP Stackifier. 8035f757f3fSDimitry Andric if (VA.getLocReg() == X86::FP0 || 8045f757f3fSDimitry Andric VA.getLocReg() == X86::FP1) { 8055f757f3fSDimitry Andric // If this is a copy from an xmm register to ST(0), use an FPExtend to 8065f757f3fSDimitry Andric // change the value to the FP stack register class. 8075f757f3fSDimitry Andric if (isScalarFPTypeInSSEReg(VA.getValVT())) 8085f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); 8095f757f3fSDimitry Andric RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); 8105f757f3fSDimitry Andric // Don't emit a copytoreg. 8115f757f3fSDimitry Andric continue; 8125f757f3fSDimitry Andric } 8135f757f3fSDimitry Andric 8145f757f3fSDimitry Andric // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 8155f757f3fSDimitry Andric // which is returned in RAX / RDX. 8165f757f3fSDimitry Andric if (Subtarget.is64Bit()) { 8175f757f3fSDimitry Andric if (ValVT == MVT::x86mmx) { 8185f757f3fSDimitry Andric if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { 8195f757f3fSDimitry Andric ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); 8205f757f3fSDimitry Andric ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, 8215f757f3fSDimitry Andric ValToCopy); 8225f757f3fSDimitry Andric // If we don't have SSE2 available, convert to v4f32 so the generated 8235f757f3fSDimitry Andric // register is legal. 8245f757f3fSDimitry Andric if (!Subtarget.hasSSE2()) 8255f757f3fSDimitry Andric ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); 8265f757f3fSDimitry Andric } 8275f757f3fSDimitry Andric } 8285f757f3fSDimitry Andric } 8295f757f3fSDimitry Andric 8305f757f3fSDimitry Andric if (VA.needsCustom()) { 8315f757f3fSDimitry Andric assert(VA.getValVT() == MVT::v64i1 && 8325f757f3fSDimitry Andric "Currently the only custom case is when we split v64i1 to 2 regs"); 8335f757f3fSDimitry Andric 8345f757f3fSDimitry Andric Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I], 8355f757f3fSDimitry Andric Subtarget); 8365f757f3fSDimitry Andric 8375f757f3fSDimitry Andric // Add the second register to the CalleeSaveDisableRegs list. 8385f757f3fSDimitry Andric if (ShouldDisableCalleeSavedRegister) 8395f757f3fSDimitry Andric MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); 8405f757f3fSDimitry Andric } else { 8415f757f3fSDimitry Andric RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); 8425f757f3fSDimitry Andric } 8435f757f3fSDimitry Andric } 8445f757f3fSDimitry Andric 8455f757f3fSDimitry Andric SDValue Glue; 8465f757f3fSDimitry Andric SmallVector<SDValue, 6> RetOps; 8475f757f3fSDimitry Andric RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 8485f757f3fSDimitry Andric // Operand #1 = Bytes To Pop 8495f757f3fSDimitry Andric RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, 8505f757f3fSDimitry Andric MVT::i32)); 8515f757f3fSDimitry Andric 8525f757f3fSDimitry Andric // Copy the result values into the output registers. 8535f757f3fSDimitry Andric for (auto &RetVal : RetVals) { 8545f757f3fSDimitry Andric if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) { 8555f757f3fSDimitry Andric RetOps.push_back(RetVal.second); 8565f757f3fSDimitry Andric continue; // Don't emit a copytoreg. 8575f757f3fSDimitry Andric } 8585f757f3fSDimitry Andric 8595f757f3fSDimitry Andric Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue); 8605f757f3fSDimitry Andric Glue = Chain.getValue(1); 8615f757f3fSDimitry Andric RetOps.push_back( 8625f757f3fSDimitry Andric DAG.getRegister(RetVal.first, RetVal.second.getValueType())); 8635f757f3fSDimitry Andric } 8645f757f3fSDimitry Andric 8655f757f3fSDimitry Andric // Swift calling convention does not require we copy the sret argument 8665f757f3fSDimitry Andric // into %rax/%eax for the return, and SRetReturnReg is not set for Swift. 8675f757f3fSDimitry Andric 8685f757f3fSDimitry Andric // All x86 ABIs require that for returning structs by value we copy 8695f757f3fSDimitry Andric // the sret argument into %rax/%eax (depending on ABI) for the return. 8705f757f3fSDimitry Andric // We saved the argument into a virtual register in the entry block, 8715f757f3fSDimitry Andric // so now we copy the value out and into %rax/%eax. 8725f757f3fSDimitry Andric // 8735f757f3fSDimitry Andric // Checking Function.hasStructRetAttr() here is insufficient because the IR 8745f757f3fSDimitry Andric // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is 8755f757f3fSDimitry Andric // false, then an sret argument may be implicitly inserted in the SelDAG. In 8765f757f3fSDimitry Andric // either case FuncInfo->setSRetReturnReg() will have been called. 8775f757f3fSDimitry Andric if (Register SRetReg = FuncInfo->getSRetReturnReg()) { 8785f757f3fSDimitry Andric // When we have both sret and another return value, we should use the 8795f757f3fSDimitry Andric // original Chain stored in RetOps[0], instead of the current Chain updated 8805f757f3fSDimitry Andric // in the above loop. If we only have sret, RetOps[0] equals to Chain. 8815f757f3fSDimitry Andric 8825f757f3fSDimitry Andric // For the case of sret and another return value, we have 8835f757f3fSDimitry Andric // Chain_0 at the function entry 8845f757f3fSDimitry Andric // Chain_1 = getCopyToReg(Chain_0) in the above loop 8855f757f3fSDimitry Andric // If we use Chain_1 in getCopyFromReg, we will have 8865f757f3fSDimitry Andric // Val = getCopyFromReg(Chain_1) 8875f757f3fSDimitry Andric // Chain_2 = getCopyToReg(Chain_1, Val) from below 8885f757f3fSDimitry Andric 8895f757f3fSDimitry Andric // getCopyToReg(Chain_0) will be glued together with 8905f757f3fSDimitry Andric // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be 8915f757f3fSDimitry Andric // in Unit B, and we will have cyclic dependency between Unit A and Unit B: 8925f757f3fSDimitry Andric // Data dependency from Unit B to Unit A due to usage of Val in 8935f757f3fSDimitry Andric // getCopyToReg(Chain_1, Val) 8945f757f3fSDimitry Andric // Chain dependency from Unit A to Unit B 8955f757f3fSDimitry Andric 8965f757f3fSDimitry Andric // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg. 8975f757f3fSDimitry Andric SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, 8985f757f3fSDimitry Andric getPointerTy(MF.getDataLayout())); 8995f757f3fSDimitry Andric 9005f757f3fSDimitry Andric Register RetValReg 9015f757f3fSDimitry Andric = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? 9025f757f3fSDimitry Andric X86::RAX : X86::EAX; 9035f757f3fSDimitry Andric Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue); 9045f757f3fSDimitry Andric Glue = Chain.getValue(1); 9055f757f3fSDimitry Andric 9065f757f3fSDimitry Andric // RAX/EAX now acts like a return value. 9075f757f3fSDimitry Andric RetOps.push_back( 9085f757f3fSDimitry Andric DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); 9095f757f3fSDimitry Andric 9105f757f3fSDimitry Andric // Add the returned register to the CalleeSaveDisableRegs list. Don't do 9115f757f3fSDimitry Andric // this however for preserve_most/preserve_all to minimize the number of 9125f757f3fSDimitry Andric // callee-saved registers for these CCs. 9135f757f3fSDimitry Andric if (ShouldDisableCalleeSavedRegister && 9145f757f3fSDimitry Andric CallConv != CallingConv::PreserveAll && 9155f757f3fSDimitry Andric CallConv != CallingConv::PreserveMost) 9165f757f3fSDimitry Andric MF.getRegInfo().disableCalleeSavedRegister(RetValReg); 9175f757f3fSDimitry Andric } 9185f757f3fSDimitry Andric 9195f757f3fSDimitry Andric const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); 9205f757f3fSDimitry Andric const MCPhysReg *I = 9215f757f3fSDimitry Andric TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 9225f757f3fSDimitry Andric if (I) { 9235f757f3fSDimitry Andric for (; *I; ++I) { 9245f757f3fSDimitry Andric if (X86::GR64RegClass.contains(*I)) 9255f757f3fSDimitry Andric RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 9265f757f3fSDimitry Andric else 9275f757f3fSDimitry Andric llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 9285f757f3fSDimitry Andric } 9295f757f3fSDimitry Andric } 9305f757f3fSDimitry Andric 9315f757f3fSDimitry Andric RetOps[0] = Chain; // Update chain. 9325f757f3fSDimitry Andric 9335f757f3fSDimitry Andric // Add the glue if we have it. 9345f757f3fSDimitry Andric if (Glue.getNode()) 9355f757f3fSDimitry Andric RetOps.push_back(Glue); 9365f757f3fSDimitry Andric 9375f757f3fSDimitry Andric X86ISD::NodeType opcode = X86ISD::RET_GLUE; 9385f757f3fSDimitry Andric if (CallConv == CallingConv::X86_INTR) 9395f757f3fSDimitry Andric opcode = X86ISD::IRET; 9405f757f3fSDimitry Andric return DAG.getNode(opcode, dl, MVT::Other, RetOps); 9415f757f3fSDimitry Andric } 9425f757f3fSDimitry Andric 9435f757f3fSDimitry Andric bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 9445f757f3fSDimitry Andric if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0)) 9455f757f3fSDimitry Andric return false; 9465f757f3fSDimitry Andric 9475f757f3fSDimitry Andric SDValue TCChain = Chain; 9485f757f3fSDimitry Andric SDNode *Copy = *N->use_begin(); 9495f757f3fSDimitry Andric if (Copy->getOpcode() == ISD::CopyToReg) { 9505f757f3fSDimitry Andric // If the copy has a glue operand, we conservatively assume it isn't safe to 9515f757f3fSDimitry Andric // perform a tail call. 9525f757f3fSDimitry Andric if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 9535f757f3fSDimitry Andric return false; 9545f757f3fSDimitry Andric TCChain = Copy->getOperand(0); 9555f757f3fSDimitry Andric } else if (Copy->getOpcode() != ISD::FP_EXTEND) 9565f757f3fSDimitry Andric return false; 9575f757f3fSDimitry Andric 9585f757f3fSDimitry Andric bool HasRet = false; 9595f757f3fSDimitry Andric for (const SDNode *U : Copy->uses()) { 9605f757f3fSDimitry Andric if (U->getOpcode() != X86ISD::RET_GLUE) 9615f757f3fSDimitry Andric return false; 9625f757f3fSDimitry Andric // If we are returning more than one value, we can definitely 9635f757f3fSDimitry Andric // not make a tail call see PR19530 9645f757f3fSDimitry Andric if (U->getNumOperands() > 4) 9655f757f3fSDimitry Andric return false; 9665f757f3fSDimitry Andric if (U->getNumOperands() == 4 && 9675f757f3fSDimitry Andric U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue) 9685f757f3fSDimitry Andric return false; 9695f757f3fSDimitry Andric HasRet = true; 9705f757f3fSDimitry Andric } 9715f757f3fSDimitry Andric 9725f757f3fSDimitry Andric if (!HasRet) 9735f757f3fSDimitry Andric return false; 9745f757f3fSDimitry Andric 9755f757f3fSDimitry Andric Chain = TCChain; 9765f757f3fSDimitry Andric return true; 9775f757f3fSDimitry Andric } 9785f757f3fSDimitry Andric 9795f757f3fSDimitry Andric EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, 9805f757f3fSDimitry Andric ISD::NodeType ExtendKind) const { 9815f757f3fSDimitry Andric MVT ReturnMVT = MVT::i32; 9825f757f3fSDimitry Andric 9835f757f3fSDimitry Andric bool Darwin = Subtarget.getTargetTriple().isOSDarwin(); 9845f757f3fSDimitry Andric if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) { 9855f757f3fSDimitry Andric // The ABI does not require i1, i8 or i16 to be extended. 9865f757f3fSDimitry Andric // 9875f757f3fSDimitry Andric // On Darwin, there is code in the wild relying on Clang's old behaviour of 9885f757f3fSDimitry Andric // always extending i8/i16 return values, so keep doing that for now. 9895f757f3fSDimitry Andric // (PR26665). 9905f757f3fSDimitry Andric ReturnMVT = MVT::i8; 9915f757f3fSDimitry Andric } 9925f757f3fSDimitry Andric 9935f757f3fSDimitry Andric EVT MinVT = getRegisterType(Context, ReturnMVT); 9945f757f3fSDimitry Andric return VT.bitsLT(MinVT) ? MinVT : VT; 9955f757f3fSDimitry Andric } 9965f757f3fSDimitry Andric 9975f757f3fSDimitry Andric /// Reads two 32 bit registers and creates a 64 bit mask value. 9985f757f3fSDimitry Andric /// \param VA The current 32 bit value that need to be assigned. 9995f757f3fSDimitry Andric /// \param NextVA The next 32 bit value that need to be assigned. 10005f757f3fSDimitry Andric /// \param Root The parent DAG node. 10015f757f3fSDimitry Andric /// \param [in,out] InGlue Represents SDvalue in the parent DAG node for 10025f757f3fSDimitry Andric /// glue purposes. In the case the DAG is already using 10035f757f3fSDimitry Andric /// physical register instead of virtual, we should glue 10045f757f3fSDimitry Andric /// our new SDValue to InGlue SDvalue. 10055f757f3fSDimitry Andric /// \return a new SDvalue of size 64bit. 10065f757f3fSDimitry Andric static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, 10075f757f3fSDimitry Andric SDValue &Root, SelectionDAG &DAG, 10085f757f3fSDimitry Andric const SDLoc &DL, const X86Subtarget &Subtarget, 10095f757f3fSDimitry Andric SDValue *InGlue = nullptr) { 10105f757f3fSDimitry Andric assert((Subtarget.hasBWI()) && "Expected AVX512BW target!"); 10115f757f3fSDimitry Andric assert(Subtarget.is32Bit() && "Expecting 32 bit target"); 10125f757f3fSDimitry Andric assert(VA.getValVT() == MVT::v64i1 && 10135f757f3fSDimitry Andric "Expecting first location of 64 bit width type"); 10145f757f3fSDimitry Andric assert(NextVA.getValVT() == VA.getValVT() && 10155f757f3fSDimitry Andric "The locations should have the same type"); 10165f757f3fSDimitry Andric assert(VA.isRegLoc() && NextVA.isRegLoc() && 10175f757f3fSDimitry Andric "The values should reside in two registers"); 10185f757f3fSDimitry Andric 10195f757f3fSDimitry Andric SDValue Lo, Hi; 10205f757f3fSDimitry Andric SDValue ArgValueLo, ArgValueHi; 10215f757f3fSDimitry Andric 10225f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 10235f757f3fSDimitry Andric const TargetRegisterClass *RC = &X86::GR32RegClass; 10245f757f3fSDimitry Andric 10255f757f3fSDimitry Andric // Read a 32 bit value from the registers. 10265f757f3fSDimitry Andric if (nullptr == InGlue) { 10275f757f3fSDimitry Andric // When no physical register is present, 10285f757f3fSDimitry Andric // create an intermediate virtual register. 10295f757f3fSDimitry Andric Register Reg = MF.addLiveIn(VA.getLocReg(), RC); 10305f757f3fSDimitry Andric ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32); 10315f757f3fSDimitry Andric Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 10325f757f3fSDimitry Andric ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32); 10335f757f3fSDimitry Andric } else { 10345f757f3fSDimitry Andric // When a physical register is available read the value from it and glue 10355f757f3fSDimitry Andric // the reads together. 10365f757f3fSDimitry Andric ArgValueLo = 10375f757f3fSDimitry Andric DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue); 10385f757f3fSDimitry Andric *InGlue = ArgValueLo.getValue(2); 10395f757f3fSDimitry Andric ArgValueHi = 10405f757f3fSDimitry Andric DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue); 10415f757f3fSDimitry Andric *InGlue = ArgValueHi.getValue(2); 10425f757f3fSDimitry Andric } 10435f757f3fSDimitry Andric 10445f757f3fSDimitry Andric // Convert the i32 type into v32i1 type. 10455f757f3fSDimitry Andric Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo); 10465f757f3fSDimitry Andric 10475f757f3fSDimitry Andric // Convert the i32 type into v32i1 type. 10485f757f3fSDimitry Andric Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi); 10495f757f3fSDimitry Andric 10505f757f3fSDimitry Andric // Concatenate the two values together. 10515f757f3fSDimitry Andric return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi); 10525f757f3fSDimitry Andric } 10535f757f3fSDimitry Andric 10545f757f3fSDimitry Andric /// The function will lower a register of various sizes (8/16/32/64) 10555f757f3fSDimitry Andric /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1) 10565f757f3fSDimitry Andric /// \returns a DAG node contains the operand after lowering to mask type. 10575f757f3fSDimitry Andric static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, 10585f757f3fSDimitry Andric const EVT &ValLoc, const SDLoc &DL, 10595f757f3fSDimitry Andric SelectionDAG &DAG) { 10605f757f3fSDimitry Andric SDValue ValReturned = ValArg; 10615f757f3fSDimitry Andric 10625f757f3fSDimitry Andric if (ValVT == MVT::v1i1) 10635f757f3fSDimitry Andric return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned); 10645f757f3fSDimitry Andric 10655f757f3fSDimitry Andric if (ValVT == MVT::v64i1) { 10665f757f3fSDimitry Andric // In 32 bit machine, this case is handled by getv64i1Argument 10675f757f3fSDimitry Andric assert(ValLoc == MVT::i64 && "Expecting only i64 locations"); 10685f757f3fSDimitry Andric // In 64 bit machine, There is no need to truncate the value only bitcast 10695f757f3fSDimitry Andric } else { 10705f757f3fSDimitry Andric MVT MaskLenVT; 10715f757f3fSDimitry Andric switch (ValVT.getSimpleVT().SimpleTy) { 10725f757f3fSDimitry Andric case MVT::v8i1: 10735f757f3fSDimitry Andric MaskLenVT = MVT::i8; 10745f757f3fSDimitry Andric break; 10755f757f3fSDimitry Andric case MVT::v16i1: 10765f757f3fSDimitry Andric MaskLenVT = MVT::i16; 10775f757f3fSDimitry Andric break; 10785f757f3fSDimitry Andric case MVT::v32i1: 10795f757f3fSDimitry Andric MaskLenVT = MVT::i32; 10805f757f3fSDimitry Andric break; 10815f757f3fSDimitry Andric default: 10825f757f3fSDimitry Andric llvm_unreachable("Expecting a vector of i1 types"); 10835f757f3fSDimitry Andric } 10845f757f3fSDimitry Andric 10855f757f3fSDimitry Andric ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned); 10865f757f3fSDimitry Andric } 10875f757f3fSDimitry Andric return DAG.getBitcast(ValVT, ValReturned); 10885f757f3fSDimitry Andric } 10895f757f3fSDimitry Andric 10905f757f3fSDimitry Andric /// Lower the result values of a call into the 10915f757f3fSDimitry Andric /// appropriate copies out of appropriate physical registers. 10925f757f3fSDimitry Andric /// 10935f757f3fSDimitry Andric SDValue X86TargetLowering::LowerCallResult( 10945f757f3fSDimitry Andric SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, 10955f757f3fSDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 10965f757f3fSDimitry Andric SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 10975f757f3fSDimitry Andric uint32_t *RegMask) const { 10985f757f3fSDimitry Andric 10995f757f3fSDimitry Andric const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 11005f757f3fSDimitry Andric // Assign locations to each value returned by this call. 11015f757f3fSDimitry Andric SmallVector<CCValAssign, 16> RVLocs; 11025f757f3fSDimitry Andric CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 11035f757f3fSDimitry Andric *DAG.getContext()); 11045f757f3fSDimitry Andric CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 11055f757f3fSDimitry Andric 11065f757f3fSDimitry Andric // Copy all of the result registers out of their specified physreg. 11075f757f3fSDimitry Andric for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E; 11085f757f3fSDimitry Andric ++I, ++InsIndex) { 11095f757f3fSDimitry Andric CCValAssign &VA = RVLocs[I]; 11105f757f3fSDimitry Andric EVT CopyVT = VA.getLocVT(); 11115f757f3fSDimitry Andric 11125f757f3fSDimitry Andric // In some calling conventions we need to remove the used registers 11135f757f3fSDimitry Andric // from the register mask. 11145f757f3fSDimitry Andric if (RegMask) { 11155f757f3fSDimitry Andric for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg())) 11165f757f3fSDimitry Andric RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); 11175f757f3fSDimitry Andric } 11185f757f3fSDimitry Andric 11195f757f3fSDimitry Andric // Report an error if there was an attempt to return FP values via XMM 11205f757f3fSDimitry Andric // registers. 11215f757f3fSDimitry Andric if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { 11225f757f3fSDimitry Andric errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); 11235f757f3fSDimitry Andric if (VA.getLocReg() == X86::XMM1) 11245f757f3fSDimitry Andric VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts. 11255f757f3fSDimitry Andric else 11265f757f3fSDimitry Andric VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 11275f757f3fSDimitry Andric } else if (!Subtarget.hasSSE2() && 11285f757f3fSDimitry Andric X86::FR64XRegClass.contains(VA.getLocReg()) && 11295f757f3fSDimitry Andric CopyVT == MVT::f64) { 11305f757f3fSDimitry Andric errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); 11315f757f3fSDimitry Andric if (VA.getLocReg() == X86::XMM1) 11325f757f3fSDimitry Andric VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts. 11335f757f3fSDimitry Andric else 11345f757f3fSDimitry Andric VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 11355f757f3fSDimitry Andric } 11365f757f3fSDimitry Andric 11375f757f3fSDimitry Andric // If we prefer to use the value in xmm registers, copy it out as f80 and 11385f757f3fSDimitry Andric // use a truncate to move it from fp stack reg to xmm reg. 11395f757f3fSDimitry Andric bool RoundAfterCopy = false; 11405f757f3fSDimitry Andric if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && 11415f757f3fSDimitry Andric isScalarFPTypeInSSEReg(VA.getValVT())) { 11425f757f3fSDimitry Andric if (!Subtarget.hasX87()) 11435f757f3fSDimitry Andric report_fatal_error("X87 register return with X87 disabled"); 11445f757f3fSDimitry Andric CopyVT = MVT::f80; 11455f757f3fSDimitry Andric RoundAfterCopy = (CopyVT != VA.getLocVT()); 11465f757f3fSDimitry Andric } 11475f757f3fSDimitry Andric 11485f757f3fSDimitry Andric SDValue Val; 11495f757f3fSDimitry Andric if (VA.needsCustom()) { 11505f757f3fSDimitry Andric assert(VA.getValVT() == MVT::v64i1 && 11515f757f3fSDimitry Andric "Currently the only custom case is when we split v64i1 to 2 regs"); 11525f757f3fSDimitry Andric Val = 11535f757f3fSDimitry Andric getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue); 11545f757f3fSDimitry Andric } else { 11555f757f3fSDimitry Andric Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue) 11565f757f3fSDimitry Andric .getValue(1); 11575f757f3fSDimitry Andric Val = Chain.getValue(0); 11585f757f3fSDimitry Andric InGlue = Chain.getValue(2); 11595f757f3fSDimitry Andric } 11605f757f3fSDimitry Andric 11615f757f3fSDimitry Andric if (RoundAfterCopy) 11625f757f3fSDimitry Andric Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, 11635f757f3fSDimitry Andric // This truncation won't change the value. 11645f757f3fSDimitry Andric DAG.getIntPtrConstant(1, dl, /*isTarget=*/true)); 11655f757f3fSDimitry Andric 11665f757f3fSDimitry Andric if (VA.isExtInLoc()) { 11675f757f3fSDimitry Andric if (VA.getValVT().isVector() && 11685f757f3fSDimitry Andric VA.getValVT().getScalarType() == MVT::i1 && 11695f757f3fSDimitry Andric ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || 11705f757f3fSDimitry Andric (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { 11715f757f3fSDimitry Andric // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 11725f757f3fSDimitry Andric Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG); 11735f757f3fSDimitry Andric } else 11745f757f3fSDimitry Andric Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 11755f757f3fSDimitry Andric } 11765f757f3fSDimitry Andric 11775f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::BCvt) 11785f757f3fSDimitry Andric Val = DAG.getBitcast(VA.getValVT(), Val); 11795f757f3fSDimitry Andric 11805f757f3fSDimitry Andric InVals.push_back(Val); 11815f757f3fSDimitry Andric } 11825f757f3fSDimitry Andric 11835f757f3fSDimitry Andric return Chain; 11845f757f3fSDimitry Andric } 11855f757f3fSDimitry Andric 11865f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 11875f757f3fSDimitry Andric // C & StdCall & Fast Calling Convention implementation 11885f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 11895f757f3fSDimitry Andric // StdCall calling convention seems to be standard for many Windows' API 11905f757f3fSDimitry Andric // routines and around. It differs from C calling convention just a little: 11915f757f3fSDimitry Andric // callee should clean up the stack, not caller. Symbols should be also 11925f757f3fSDimitry Andric // decorated in some fancy way :) It doesn't support any vector arguments. 11935f757f3fSDimitry Andric // For info on fast calling convention see Fast Calling Convention (tail call) 11945f757f3fSDimitry Andric // implementation LowerX86_32FastCCCallTo. 11955f757f3fSDimitry Andric 11965f757f3fSDimitry Andric /// Determines whether Args, either a set of outgoing arguments to a call, or a 11975f757f3fSDimitry Andric /// set of incoming args of a call, contains an sret pointer that the callee 11985f757f3fSDimitry Andric /// pops 11995f757f3fSDimitry Andric template <typename T> 12005f757f3fSDimitry Andric static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args, 12015f757f3fSDimitry Andric const X86Subtarget &Subtarget) { 12025f757f3fSDimitry Andric // Not C++20 (yet), so no concepts available. 12035f757f3fSDimitry Andric static_assert(std::is_same_v<T, ISD::OutputArg> || 12045f757f3fSDimitry Andric std::is_same_v<T, ISD::InputArg>, 12055f757f3fSDimitry Andric "requires ISD::OutputArg or ISD::InputArg"); 12065f757f3fSDimitry Andric 12075f757f3fSDimitry Andric // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out 12085f757f3fSDimitry Andric // for most compilations. 12095f757f3fSDimitry Andric if (!Subtarget.is32Bit()) 12105f757f3fSDimitry Andric return false; 12115f757f3fSDimitry Andric 12125f757f3fSDimitry Andric if (Args.empty()) 12135f757f3fSDimitry Andric return false; 12145f757f3fSDimitry Andric 12155f757f3fSDimitry Andric // Most calls do not have an sret argument, check the arg next. 12165f757f3fSDimitry Andric const ISD::ArgFlagsTy &Flags = Args[0].Flags; 12175f757f3fSDimitry Andric if (!Flags.isSRet() || Flags.isInReg()) 12185f757f3fSDimitry Andric return false; 12195f757f3fSDimitry Andric 12205f757f3fSDimitry Andric // The MSVCabi does not pop the sret. 12215f757f3fSDimitry Andric if (Subtarget.getTargetTriple().isOSMSVCRT()) 12225f757f3fSDimitry Andric return false; 12235f757f3fSDimitry Andric 12245f757f3fSDimitry Andric // MCUs don't pop the sret 12255f757f3fSDimitry Andric if (Subtarget.isTargetMCU()) 12265f757f3fSDimitry Andric return false; 12275f757f3fSDimitry Andric 12285f757f3fSDimitry Andric // Callee pops argument 12295f757f3fSDimitry Andric return true; 12305f757f3fSDimitry Andric } 12315f757f3fSDimitry Andric 12325f757f3fSDimitry Andric /// Make a copy of an aggregate at address specified by "Src" to address 12335f757f3fSDimitry Andric /// "Dst" with size and alignment information specified by the specific 12345f757f3fSDimitry Andric /// parameter attribute. The copy will be passed as a byval function parameter. 12355f757f3fSDimitry Andric static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, 12365f757f3fSDimitry Andric SDValue Chain, ISD::ArgFlagsTy Flags, 12375f757f3fSDimitry Andric SelectionDAG &DAG, const SDLoc &dl) { 12385f757f3fSDimitry Andric SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl); 12395f757f3fSDimitry Andric 12405f757f3fSDimitry Andric return DAG.getMemcpy( 12415f757f3fSDimitry Andric Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), 12425f757f3fSDimitry Andric /*isVolatile*/ false, /*AlwaysInline=*/true, 1243*0fca6ea1SDimitry Andric /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo()); 12445f757f3fSDimitry Andric } 12455f757f3fSDimitry Andric 12465f757f3fSDimitry Andric /// Return true if the calling convention is one that we can guarantee TCO for. 12475f757f3fSDimitry Andric static bool canGuaranteeTCO(CallingConv::ID CC) { 12485f757f3fSDimitry Andric return (CC == CallingConv::Fast || CC == CallingConv::GHC || 12495f757f3fSDimitry Andric CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || 12505f757f3fSDimitry Andric CC == CallingConv::Tail || CC == CallingConv::SwiftTail); 12515f757f3fSDimitry Andric } 12525f757f3fSDimitry Andric 12535f757f3fSDimitry Andric /// Return true if we might ever do TCO for calls with this calling convention. 12545f757f3fSDimitry Andric static bool mayTailCallThisCC(CallingConv::ID CC) { 12555f757f3fSDimitry Andric switch (CC) { 12565f757f3fSDimitry Andric // C calling conventions: 12575f757f3fSDimitry Andric case CallingConv::C: 12585f757f3fSDimitry Andric case CallingConv::Win64: 12595f757f3fSDimitry Andric case CallingConv::X86_64_SysV: 1260*0fca6ea1SDimitry Andric case CallingConv::PreserveNone: 12615f757f3fSDimitry Andric // Callee pop conventions: 12625f757f3fSDimitry Andric case CallingConv::X86_ThisCall: 12635f757f3fSDimitry Andric case CallingConv::X86_StdCall: 12645f757f3fSDimitry Andric case CallingConv::X86_VectorCall: 12655f757f3fSDimitry Andric case CallingConv::X86_FastCall: 12665f757f3fSDimitry Andric // Swift: 12675f757f3fSDimitry Andric case CallingConv::Swift: 12685f757f3fSDimitry Andric return true; 12695f757f3fSDimitry Andric default: 12705f757f3fSDimitry Andric return canGuaranteeTCO(CC); 12715f757f3fSDimitry Andric } 12725f757f3fSDimitry Andric } 12735f757f3fSDimitry Andric 12745f757f3fSDimitry Andric /// Return true if the function is being made into a tailcall target by 12755f757f3fSDimitry Andric /// changing its ABI. 12765f757f3fSDimitry Andric static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { 12775f757f3fSDimitry Andric return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || 12785f757f3fSDimitry Andric CC == CallingConv::Tail || CC == CallingConv::SwiftTail; 12795f757f3fSDimitry Andric } 12805f757f3fSDimitry Andric 12815f757f3fSDimitry Andric bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 12825f757f3fSDimitry Andric if (!CI->isTailCall()) 12835f757f3fSDimitry Andric return false; 12845f757f3fSDimitry Andric 12855f757f3fSDimitry Andric CallingConv::ID CalleeCC = CI->getCallingConv(); 12865f757f3fSDimitry Andric if (!mayTailCallThisCC(CalleeCC)) 12875f757f3fSDimitry Andric return false; 12885f757f3fSDimitry Andric 12895f757f3fSDimitry Andric return true; 12905f757f3fSDimitry Andric } 12915f757f3fSDimitry Andric 12925f757f3fSDimitry Andric SDValue 12935f757f3fSDimitry Andric X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 12945f757f3fSDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, 12955f757f3fSDimitry Andric const SDLoc &dl, SelectionDAG &DAG, 12965f757f3fSDimitry Andric const CCValAssign &VA, 12975f757f3fSDimitry Andric MachineFrameInfo &MFI, unsigned i) const { 12985f757f3fSDimitry Andric // Create the nodes corresponding to a load from this parameter slot. 12995f757f3fSDimitry Andric ISD::ArgFlagsTy Flags = Ins[i].Flags; 13005f757f3fSDimitry Andric bool AlwaysUseMutable = shouldGuaranteeTCO( 13015f757f3fSDimitry Andric CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); 13025f757f3fSDimitry Andric bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 13035f757f3fSDimitry Andric EVT ValVT; 13045f757f3fSDimitry Andric MVT PtrVT = getPointerTy(DAG.getDataLayout()); 13055f757f3fSDimitry Andric 13065f757f3fSDimitry Andric // If value is passed by pointer we have address passed instead of the value 13075f757f3fSDimitry Andric // itself. No need to extend if the mask value and location share the same 13085f757f3fSDimitry Andric // absolute size. 13095f757f3fSDimitry Andric bool ExtendedInMem = 13105f757f3fSDimitry Andric VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 && 13115f757f3fSDimitry Andric VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits(); 13125f757f3fSDimitry Andric 13135f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) 13145f757f3fSDimitry Andric ValVT = VA.getLocVT(); 13155f757f3fSDimitry Andric else 13165f757f3fSDimitry Andric ValVT = VA.getValVT(); 13175f757f3fSDimitry Andric 13185f757f3fSDimitry Andric // FIXME: For now, all byval parameter objects are marked mutable. This can be 13195f757f3fSDimitry Andric // changed with more analysis. 13205f757f3fSDimitry Andric // In case of tail call optimization mark all arguments mutable. Since they 13215f757f3fSDimitry Andric // could be overwritten by lowering of arguments in case of a tail call. 13225f757f3fSDimitry Andric if (Flags.isByVal()) { 13235f757f3fSDimitry Andric unsigned Bytes = Flags.getByValSize(); 13245f757f3fSDimitry Andric if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 13255f757f3fSDimitry Andric 13265f757f3fSDimitry Andric // FIXME: For now, all byval parameter objects are marked as aliasing. This 13275f757f3fSDimitry Andric // can be improved with deeper analysis. 13285f757f3fSDimitry Andric int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable, 13295f757f3fSDimitry Andric /*isAliased=*/true); 13305f757f3fSDimitry Andric return DAG.getFrameIndex(FI, PtrVT); 13315f757f3fSDimitry Andric } 13325f757f3fSDimitry Andric 13335f757f3fSDimitry Andric EVT ArgVT = Ins[i].ArgVT; 13345f757f3fSDimitry Andric 13355f757f3fSDimitry Andric // If this is a vector that has been split into multiple parts, don't elide 13365f757f3fSDimitry Andric // the copy. The layout on the stack may not match the packed in-memory 13375f757f3fSDimitry Andric // layout. 13385f757f3fSDimitry Andric bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector(); 13395f757f3fSDimitry Andric 13405f757f3fSDimitry Andric // This is an argument in memory. We might be able to perform copy elision. 13415f757f3fSDimitry Andric // If the argument is passed directly in memory without any extension, then we 13425f757f3fSDimitry Andric // can perform copy elision. Large vector types, for example, may be passed 13435f757f3fSDimitry Andric // indirectly by pointer. 13445f757f3fSDimitry Andric if (Flags.isCopyElisionCandidate() && 13455f757f3fSDimitry Andric VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem && 13465f757f3fSDimitry Andric !ScalarizedVector) { 13475f757f3fSDimitry Andric SDValue PartAddr; 13485f757f3fSDimitry Andric if (Ins[i].PartOffset == 0) { 13495f757f3fSDimitry Andric // If this is a one-part value or the first part of a multi-part value, 13505f757f3fSDimitry Andric // create a stack object for the entire argument value type and return a 13515f757f3fSDimitry Andric // load from our portion of it. This assumes that if the first part of an 13525f757f3fSDimitry Andric // argument is in memory, the rest will also be in memory. 13535f757f3fSDimitry Andric int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(), 13545f757f3fSDimitry Andric /*IsImmutable=*/false); 13555f757f3fSDimitry Andric PartAddr = DAG.getFrameIndex(FI, PtrVT); 13565f757f3fSDimitry Andric return DAG.getLoad( 13575f757f3fSDimitry Andric ValVT, dl, Chain, PartAddr, 13585f757f3fSDimitry Andric MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); 13595f757f3fSDimitry Andric } 13605f757f3fSDimitry Andric 13615f757f3fSDimitry Andric // This is not the first piece of an argument in memory. See if there is 13625f757f3fSDimitry Andric // already a fixed stack object including this offset. If so, assume it 13635f757f3fSDimitry Andric // was created by the PartOffset == 0 branch above and create a load from 13645f757f3fSDimitry Andric // the appropriate offset into it. 13655f757f3fSDimitry Andric int64_t PartBegin = VA.getLocMemOffset(); 13665f757f3fSDimitry Andric int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; 13675f757f3fSDimitry Andric int FI = MFI.getObjectIndexBegin(); 13685f757f3fSDimitry Andric for (; MFI.isFixedObjectIndex(FI); ++FI) { 13695f757f3fSDimitry Andric int64_t ObjBegin = MFI.getObjectOffset(FI); 13705f757f3fSDimitry Andric int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI); 13715f757f3fSDimitry Andric if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) 13725f757f3fSDimitry Andric break; 13735f757f3fSDimitry Andric } 13745f757f3fSDimitry Andric if (MFI.isFixedObjectIndex(FI)) { 13755f757f3fSDimitry Andric SDValue Addr = 13765f757f3fSDimitry Andric DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT), 13775f757f3fSDimitry Andric DAG.getIntPtrConstant(Ins[i].PartOffset, dl)); 13785f757f3fSDimitry Andric return DAG.getLoad(ValVT, dl, Chain, Addr, 13795f757f3fSDimitry Andric MachinePointerInfo::getFixedStack( 13805f757f3fSDimitry Andric DAG.getMachineFunction(), FI, Ins[i].PartOffset)); 13815f757f3fSDimitry Andric } 13825f757f3fSDimitry Andric } 13835f757f3fSDimitry Andric 13845f757f3fSDimitry Andric int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 13855f757f3fSDimitry Andric VA.getLocMemOffset(), isImmutable); 13865f757f3fSDimitry Andric 13875f757f3fSDimitry Andric // Set SExt or ZExt flag. 13885f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::ZExt) { 13895f757f3fSDimitry Andric MFI.setObjectZExt(FI, true); 13905f757f3fSDimitry Andric } else if (VA.getLocInfo() == CCValAssign::SExt) { 13915f757f3fSDimitry Andric MFI.setObjectSExt(FI, true); 13925f757f3fSDimitry Andric } 13935f757f3fSDimitry Andric 13945f757f3fSDimitry Andric MaybeAlign Alignment; 13955f757f3fSDimitry Andric if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && 13965f757f3fSDimitry Andric ValVT != MVT::f80) 13975f757f3fSDimitry Andric Alignment = MaybeAlign(4); 13985f757f3fSDimitry Andric SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 13995f757f3fSDimitry Andric SDValue Val = DAG.getLoad( 14005f757f3fSDimitry Andric ValVT, dl, Chain, FIN, 14015f757f3fSDimitry Andric MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), 14025f757f3fSDimitry Andric Alignment); 14035f757f3fSDimitry Andric return ExtendedInMem 14045f757f3fSDimitry Andric ? (VA.getValVT().isVector() 14055f757f3fSDimitry Andric ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) 14065f757f3fSDimitry Andric : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) 14075f757f3fSDimitry Andric : Val; 14085f757f3fSDimitry Andric } 14095f757f3fSDimitry Andric 14105f757f3fSDimitry Andric // FIXME: Get this from tablegen. 14115f757f3fSDimitry Andric static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, 14125f757f3fSDimitry Andric const X86Subtarget &Subtarget) { 14135f757f3fSDimitry Andric assert(Subtarget.is64Bit()); 14145f757f3fSDimitry Andric 14155f757f3fSDimitry Andric if (Subtarget.isCallingConvWin64(CallConv)) { 14165f757f3fSDimitry Andric static const MCPhysReg GPR64ArgRegsWin64[] = { 14175f757f3fSDimitry Andric X86::RCX, X86::RDX, X86::R8, X86::R9 14185f757f3fSDimitry Andric }; 14195f757f3fSDimitry Andric return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); 14205f757f3fSDimitry Andric } 14215f757f3fSDimitry Andric 14225f757f3fSDimitry Andric static const MCPhysReg GPR64ArgRegs64Bit[] = { 14235f757f3fSDimitry Andric X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 14245f757f3fSDimitry Andric }; 14255f757f3fSDimitry Andric return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); 14265f757f3fSDimitry Andric } 14275f757f3fSDimitry Andric 14285f757f3fSDimitry Andric // FIXME: Get this from tablegen. 14295f757f3fSDimitry Andric static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF, 14305f757f3fSDimitry Andric CallingConv::ID CallConv, 14315f757f3fSDimitry Andric const X86Subtarget &Subtarget) { 14325f757f3fSDimitry Andric assert(Subtarget.is64Bit()); 14335f757f3fSDimitry Andric if (Subtarget.isCallingConvWin64(CallConv)) { 14345f757f3fSDimitry Andric // The XMM registers which might contain var arg parameters are shadowed 14355f757f3fSDimitry Andric // in their paired GPR. So we only need to save the GPR to their home 14365f757f3fSDimitry Andric // slots. 14375f757f3fSDimitry Andric // TODO: __vectorcall will change this. 14385f757f3fSDimitry Andric return std::nullopt; 14395f757f3fSDimitry Andric } 14405f757f3fSDimitry Andric 14415f757f3fSDimitry Andric bool isSoftFloat = Subtarget.useSoftFloat(); 14425f757f3fSDimitry Andric if (isSoftFloat || !Subtarget.hasSSE1()) 14435f757f3fSDimitry Andric // Kernel mode asks for SSE to be disabled, so there are no XMM argument 14445f757f3fSDimitry Andric // registers. 14455f757f3fSDimitry Andric return std::nullopt; 14465f757f3fSDimitry Andric 14475f757f3fSDimitry Andric static const MCPhysReg XMMArgRegs64Bit[] = { 14485f757f3fSDimitry Andric X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 14495f757f3fSDimitry Andric X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 14505f757f3fSDimitry Andric }; 14515f757f3fSDimitry Andric return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); 14525f757f3fSDimitry Andric } 14535f757f3fSDimitry Andric 14545f757f3fSDimitry Andric #ifndef NDEBUG 14555f757f3fSDimitry Andric static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) { 14565f757f3fSDimitry Andric return llvm::is_sorted( 14575f757f3fSDimitry Andric ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool { 14585f757f3fSDimitry Andric return A.getValNo() < B.getValNo(); 14595f757f3fSDimitry Andric }); 14605f757f3fSDimitry Andric } 14615f757f3fSDimitry Andric #endif 14625f757f3fSDimitry Andric 14635f757f3fSDimitry Andric namespace { 14645f757f3fSDimitry Andric /// This is a helper class for lowering variable arguments parameters. 14655f757f3fSDimitry Andric class VarArgsLoweringHelper { 14665f757f3fSDimitry Andric public: 14675f757f3fSDimitry Andric VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc, 14685f757f3fSDimitry Andric SelectionDAG &DAG, const X86Subtarget &Subtarget, 14695f757f3fSDimitry Andric CallingConv::ID CallConv, CCState &CCInfo) 14705f757f3fSDimitry Andric : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget), 14715f757f3fSDimitry Andric TheMachineFunction(DAG.getMachineFunction()), 14725f757f3fSDimitry Andric TheFunction(TheMachineFunction.getFunction()), 14735f757f3fSDimitry Andric FrameInfo(TheMachineFunction.getFrameInfo()), 14745f757f3fSDimitry Andric FrameLowering(*Subtarget.getFrameLowering()), 14755f757f3fSDimitry Andric TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv), 14765f757f3fSDimitry Andric CCInfo(CCInfo) {} 14775f757f3fSDimitry Andric 14785f757f3fSDimitry Andric // Lower variable arguments parameters. 14795f757f3fSDimitry Andric void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize); 14805f757f3fSDimitry Andric 14815f757f3fSDimitry Andric private: 14825f757f3fSDimitry Andric void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize); 14835f757f3fSDimitry Andric 14845f757f3fSDimitry Andric void forwardMustTailParameters(SDValue &Chain); 14855f757f3fSDimitry Andric 14865f757f3fSDimitry Andric bool is64Bit() const { return Subtarget.is64Bit(); } 14875f757f3fSDimitry Andric bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); } 14885f757f3fSDimitry Andric 14895f757f3fSDimitry Andric X86MachineFunctionInfo *FuncInfo; 14905f757f3fSDimitry Andric const SDLoc &DL; 14915f757f3fSDimitry Andric SelectionDAG &DAG; 14925f757f3fSDimitry Andric const X86Subtarget &Subtarget; 14935f757f3fSDimitry Andric MachineFunction &TheMachineFunction; 14945f757f3fSDimitry Andric const Function &TheFunction; 14955f757f3fSDimitry Andric MachineFrameInfo &FrameInfo; 14965f757f3fSDimitry Andric const TargetFrameLowering &FrameLowering; 14975f757f3fSDimitry Andric const TargetLowering &TargLowering; 14985f757f3fSDimitry Andric CallingConv::ID CallConv; 14995f757f3fSDimitry Andric CCState &CCInfo; 15005f757f3fSDimitry Andric }; 15015f757f3fSDimitry Andric } // namespace 15025f757f3fSDimitry Andric 15035f757f3fSDimitry Andric void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( 15045f757f3fSDimitry Andric SDValue &Chain, unsigned StackSize) { 15055f757f3fSDimitry Andric // If the function takes variable number of arguments, make a frame index for 15065f757f3fSDimitry Andric // the start of the first vararg value... for expansion of llvm.va_start. We 15075f757f3fSDimitry Andric // can skip this if there are no va_start calls. 15085f757f3fSDimitry Andric if (is64Bit() || (CallConv != CallingConv::X86_FastCall && 15095f757f3fSDimitry Andric CallConv != CallingConv::X86_ThisCall)) { 15105f757f3fSDimitry Andric FuncInfo->setVarArgsFrameIndex( 15115f757f3fSDimitry Andric FrameInfo.CreateFixedObject(1, StackSize, true)); 15125f757f3fSDimitry Andric } 15135f757f3fSDimitry Andric 15145f757f3fSDimitry Andric // 64-bit calling conventions support varargs and register parameters, so we 15155f757f3fSDimitry Andric // have to do extra work to spill them in the prologue. 15165f757f3fSDimitry Andric if (is64Bit()) { 15175f757f3fSDimitry Andric // Find the first unallocated argument registers. 15185f757f3fSDimitry Andric ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); 15195f757f3fSDimitry Andric ArrayRef<MCPhysReg> ArgXMMs = 15205f757f3fSDimitry Andric get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget); 15215f757f3fSDimitry Andric unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); 15225f757f3fSDimitry Andric unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); 15235f757f3fSDimitry Andric 15245f757f3fSDimitry Andric assert(!(NumXMMRegs && !Subtarget.hasSSE1()) && 15255f757f3fSDimitry Andric "SSE register cannot be used when SSE is disabled!"); 15265f757f3fSDimitry Andric 15275f757f3fSDimitry Andric if (isWin64()) { 15285f757f3fSDimitry Andric // Get to the caller-allocated home save location. Add 8 to account 15295f757f3fSDimitry Andric // for the return address. 15305f757f3fSDimitry Andric int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8; 15315f757f3fSDimitry Andric FuncInfo->setRegSaveFrameIndex( 15325f757f3fSDimitry Andric FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); 15335f757f3fSDimitry Andric // Fixup to set vararg frame on shadow area (4 x i64). 15345f757f3fSDimitry Andric if (NumIntRegs < 4) 15355f757f3fSDimitry Andric FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); 15365f757f3fSDimitry Andric } else { 15375f757f3fSDimitry Andric // For X86-64, if there are vararg parameters that are passed via 15385f757f3fSDimitry Andric // registers, then we must store them to their spots on the stack so 15395f757f3fSDimitry Andric // they may be loaded by dereferencing the result of va_next. 15405f757f3fSDimitry Andric FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); 15415f757f3fSDimitry Andric FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); 15425f757f3fSDimitry Andric FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject( 15435f757f3fSDimitry Andric ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false)); 15445f757f3fSDimitry Andric } 15455f757f3fSDimitry Andric 15465f757f3fSDimitry Andric SmallVector<SDValue, 6> 15475f757f3fSDimitry Andric LiveGPRs; // list of SDValue for GPR registers keeping live input value 15485f757f3fSDimitry Andric SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers 15495f757f3fSDimitry Andric // keeping live input value 15505f757f3fSDimitry Andric SDValue ALVal; // if applicable keeps SDValue for %al register 15515f757f3fSDimitry Andric 15525f757f3fSDimitry Andric // Gather all the live in physical registers. 15535f757f3fSDimitry Andric for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { 15545f757f3fSDimitry Andric Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass); 15555f757f3fSDimitry Andric LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64)); 15565f757f3fSDimitry Andric } 15575f757f3fSDimitry Andric const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs); 15585f757f3fSDimitry Andric if (!AvailableXmms.empty()) { 15595f757f3fSDimitry Andric Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); 15605f757f3fSDimitry Andric ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8); 15615f757f3fSDimitry Andric for (MCPhysReg Reg : AvailableXmms) { 15625f757f3fSDimitry Andric // FastRegisterAllocator spills virtual registers at basic 15635f757f3fSDimitry Andric // block boundary. That leads to usages of xmm registers 15645f757f3fSDimitry Andric // outside of check for %al. Pass physical registers to 15655f757f3fSDimitry Andric // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling. 15665f757f3fSDimitry Andric TheMachineFunction.getRegInfo().addLiveIn(Reg); 15675f757f3fSDimitry Andric LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32)); 15685f757f3fSDimitry Andric } 15695f757f3fSDimitry Andric } 15705f757f3fSDimitry Andric 15715f757f3fSDimitry Andric // Store the integer parameter registers. 15725f757f3fSDimitry Andric SmallVector<SDValue, 8> MemOps; 15735f757f3fSDimitry Andric SDValue RSFIN = 15745f757f3fSDimitry Andric DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), 15755f757f3fSDimitry Andric TargLowering.getPointerTy(DAG.getDataLayout())); 15765f757f3fSDimitry Andric unsigned Offset = FuncInfo->getVarArgsGPOffset(); 15775f757f3fSDimitry Andric for (SDValue Val : LiveGPRs) { 15785f757f3fSDimitry Andric SDValue FIN = DAG.getNode(ISD::ADD, DL, 15795f757f3fSDimitry Andric TargLowering.getPointerTy(DAG.getDataLayout()), 15805f757f3fSDimitry Andric RSFIN, DAG.getIntPtrConstant(Offset, DL)); 15815f757f3fSDimitry Andric SDValue Store = 15825f757f3fSDimitry Andric DAG.getStore(Val.getValue(1), DL, Val, FIN, 15835f757f3fSDimitry Andric MachinePointerInfo::getFixedStack( 15845f757f3fSDimitry Andric DAG.getMachineFunction(), 15855f757f3fSDimitry Andric FuncInfo->getRegSaveFrameIndex(), Offset)); 15865f757f3fSDimitry Andric MemOps.push_back(Store); 15875f757f3fSDimitry Andric Offset += 8; 15885f757f3fSDimitry Andric } 15895f757f3fSDimitry Andric 15905f757f3fSDimitry Andric // Now store the XMM (fp + vector) parameter registers. 15915f757f3fSDimitry Andric if (!LiveXMMRegs.empty()) { 15925f757f3fSDimitry Andric SmallVector<SDValue, 12> SaveXMMOps; 15935f757f3fSDimitry Andric SaveXMMOps.push_back(Chain); 15945f757f3fSDimitry Andric SaveXMMOps.push_back(ALVal); 15955f757f3fSDimitry Andric SaveXMMOps.push_back(RSFIN); 15965f757f3fSDimitry Andric SaveXMMOps.push_back( 15975f757f3fSDimitry Andric DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32)); 15985f757f3fSDimitry Andric llvm::append_range(SaveXMMOps, LiveXMMRegs); 15995f757f3fSDimitry Andric MachineMemOperand *StoreMMO = 16005f757f3fSDimitry Andric DAG.getMachineFunction().getMachineMemOperand( 16015f757f3fSDimitry Andric MachinePointerInfo::getFixedStack( 16025f757f3fSDimitry Andric DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(), 16035f757f3fSDimitry Andric Offset), 16045f757f3fSDimitry Andric MachineMemOperand::MOStore, 128, Align(16)); 16055f757f3fSDimitry Andric MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS, 16065f757f3fSDimitry Andric DL, DAG.getVTList(MVT::Other), 16075f757f3fSDimitry Andric SaveXMMOps, MVT::i8, StoreMMO)); 16085f757f3fSDimitry Andric } 16095f757f3fSDimitry Andric 16105f757f3fSDimitry Andric if (!MemOps.empty()) 16115f757f3fSDimitry Andric Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); 16125f757f3fSDimitry Andric } 16135f757f3fSDimitry Andric } 16145f757f3fSDimitry Andric 16155f757f3fSDimitry Andric void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) { 16165f757f3fSDimitry Andric // Find the largest legal vector type. 16175f757f3fSDimitry Andric MVT VecVT = MVT::Other; 16185f757f3fSDimitry Andric // FIXME: Only some x86_32 calling conventions support AVX512. 16195f757f3fSDimitry Andric if (Subtarget.useAVX512Regs() && 16205f757f3fSDimitry Andric (is64Bit() || (CallConv == CallingConv::X86_VectorCall || 16215f757f3fSDimitry Andric CallConv == CallingConv::Intel_OCL_BI))) 16225f757f3fSDimitry Andric VecVT = MVT::v16f32; 16235f757f3fSDimitry Andric else if (Subtarget.hasAVX()) 16245f757f3fSDimitry Andric VecVT = MVT::v8f32; 16255f757f3fSDimitry Andric else if (Subtarget.hasSSE2()) 16265f757f3fSDimitry Andric VecVT = MVT::v4f32; 16275f757f3fSDimitry Andric 16285f757f3fSDimitry Andric // We forward some GPRs and some vector types. 16295f757f3fSDimitry Andric SmallVector<MVT, 2> RegParmTypes; 16305f757f3fSDimitry Andric MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32; 16315f757f3fSDimitry Andric RegParmTypes.push_back(IntVT); 16325f757f3fSDimitry Andric if (VecVT != MVT::Other) 16335f757f3fSDimitry Andric RegParmTypes.push_back(VecVT); 16345f757f3fSDimitry Andric 16355f757f3fSDimitry Andric // Compute the set of forwarded registers. The rest are scratch. 16365f757f3fSDimitry Andric SmallVectorImpl<ForwardedRegister> &Forwards = 16375f757f3fSDimitry Andric FuncInfo->getForwardedMustTailRegParms(); 16385f757f3fSDimitry Andric CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); 16395f757f3fSDimitry Andric 16405f757f3fSDimitry Andric // Forward AL for SysV x86_64 targets, since it is used for varargs. 16415f757f3fSDimitry Andric if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) { 16425f757f3fSDimitry Andric Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); 16435f757f3fSDimitry Andric Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); 16445f757f3fSDimitry Andric } 16455f757f3fSDimitry Andric 16465f757f3fSDimitry Andric // Copy all forwards from physical to virtual registers. 16475f757f3fSDimitry Andric for (ForwardedRegister &FR : Forwards) { 16485f757f3fSDimitry Andric // FIXME: Can we use a less constrained schedule? 16495f757f3fSDimitry Andric SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT); 16505f757f3fSDimitry Andric FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister( 16515f757f3fSDimitry Andric TargLowering.getRegClassFor(FR.VT)); 16525f757f3fSDimitry Andric Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal); 16535f757f3fSDimitry Andric } 16545f757f3fSDimitry Andric } 16555f757f3fSDimitry Andric 16565f757f3fSDimitry Andric void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain, 16575f757f3fSDimitry Andric unsigned StackSize) { 16585f757f3fSDimitry Andric // Set FrameIndex to the 0xAAAAAAA value to mark unset state. 16595f757f3fSDimitry Andric // If necessary, it would be set into the correct value later. 16605f757f3fSDimitry Andric FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); 16615f757f3fSDimitry Andric FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); 16625f757f3fSDimitry Andric 16635f757f3fSDimitry Andric if (FrameInfo.hasVAStart()) 16645f757f3fSDimitry Andric createVarArgAreaAndStoreRegisters(Chain, StackSize); 16655f757f3fSDimitry Andric 16665f757f3fSDimitry Andric if (FrameInfo.hasMustTailInVarArgFunc()) 16675f757f3fSDimitry Andric forwardMustTailParameters(Chain); 16685f757f3fSDimitry Andric } 16695f757f3fSDimitry Andric 16705f757f3fSDimitry Andric SDValue X86TargetLowering::LowerFormalArguments( 16715f757f3fSDimitry Andric SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 16725f757f3fSDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 16735f757f3fSDimitry Andric SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 16745f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 16755f757f3fSDimitry Andric X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 16765f757f3fSDimitry Andric 16775f757f3fSDimitry Andric const Function &F = MF.getFunction(); 16785f757f3fSDimitry Andric if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() && 16795f757f3fSDimitry Andric F.getName() == "main") 16805f757f3fSDimitry Andric FuncInfo->setForceFramePointer(true); 16815f757f3fSDimitry Andric 16825f757f3fSDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 16835f757f3fSDimitry Andric bool Is64Bit = Subtarget.is64Bit(); 16845f757f3fSDimitry Andric bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); 16855f757f3fSDimitry Andric 16865f757f3fSDimitry Andric assert( 16875f757f3fSDimitry Andric !(IsVarArg && canGuaranteeTCO(CallConv)) && 16885f757f3fSDimitry Andric "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"); 16895f757f3fSDimitry Andric 16905f757f3fSDimitry Andric // Assign locations to all of the incoming arguments. 16915f757f3fSDimitry Andric SmallVector<CCValAssign, 16> ArgLocs; 16925f757f3fSDimitry Andric CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 16935f757f3fSDimitry Andric 16945f757f3fSDimitry Andric // Allocate shadow area for Win64. 16955f757f3fSDimitry Andric if (IsWin64) 16965f757f3fSDimitry Andric CCInfo.AllocateStack(32, Align(8)); 16975f757f3fSDimitry Andric 16985f757f3fSDimitry Andric CCInfo.AnalyzeArguments(Ins, CC_X86); 16995f757f3fSDimitry Andric 17005f757f3fSDimitry Andric // In vectorcall calling convention a second pass is required for the HVA 17015f757f3fSDimitry Andric // types. 17025f757f3fSDimitry Andric if (CallingConv::X86_VectorCall == CallConv) { 17035f757f3fSDimitry Andric CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86); 17045f757f3fSDimitry Andric } 17055f757f3fSDimitry Andric 17065f757f3fSDimitry Andric // The next loop assumes that the locations are in the same order of the 17075f757f3fSDimitry Andric // input arguments. 17085f757f3fSDimitry Andric assert(isSortedByValueNo(ArgLocs) && 17095f757f3fSDimitry Andric "Argument Location list must be sorted before lowering"); 17105f757f3fSDimitry Andric 17115f757f3fSDimitry Andric SDValue ArgValue; 17125f757f3fSDimitry Andric for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E; 17135f757f3fSDimitry Andric ++I, ++InsIndex) { 17145f757f3fSDimitry Andric assert(InsIndex < Ins.size() && "Invalid Ins index"); 17155f757f3fSDimitry Andric CCValAssign &VA = ArgLocs[I]; 17165f757f3fSDimitry Andric 17175f757f3fSDimitry Andric if (VA.isRegLoc()) { 17185f757f3fSDimitry Andric EVT RegVT = VA.getLocVT(); 17195f757f3fSDimitry Andric if (VA.needsCustom()) { 17205f757f3fSDimitry Andric assert( 17215f757f3fSDimitry Andric VA.getValVT() == MVT::v64i1 && 17225f757f3fSDimitry Andric "Currently the only custom case is when we split v64i1 to 2 regs"); 17235f757f3fSDimitry Andric 17245f757f3fSDimitry Andric // v64i1 values, in regcall calling convention, that are 17255f757f3fSDimitry Andric // compiled to 32 bit arch, are split up into two registers. 17265f757f3fSDimitry Andric ArgValue = 17275f757f3fSDimitry Andric getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget); 17285f757f3fSDimitry Andric } else { 17295f757f3fSDimitry Andric const TargetRegisterClass *RC; 17305f757f3fSDimitry Andric if (RegVT == MVT::i8) 17315f757f3fSDimitry Andric RC = &X86::GR8RegClass; 17325f757f3fSDimitry Andric else if (RegVT == MVT::i16) 17335f757f3fSDimitry Andric RC = &X86::GR16RegClass; 17345f757f3fSDimitry Andric else if (RegVT == MVT::i32) 17355f757f3fSDimitry Andric RC = &X86::GR32RegClass; 17365f757f3fSDimitry Andric else if (Is64Bit && RegVT == MVT::i64) 17375f757f3fSDimitry Andric RC = &X86::GR64RegClass; 17385f757f3fSDimitry Andric else if (RegVT == MVT::f16) 17395f757f3fSDimitry Andric RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass; 17405f757f3fSDimitry Andric else if (RegVT == MVT::f32) 17415f757f3fSDimitry Andric RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; 17425f757f3fSDimitry Andric else if (RegVT == MVT::f64) 17435f757f3fSDimitry Andric RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; 17445f757f3fSDimitry Andric else if (RegVT == MVT::f80) 17455f757f3fSDimitry Andric RC = &X86::RFP80RegClass; 17465f757f3fSDimitry Andric else if (RegVT == MVT::f128) 17475f757f3fSDimitry Andric RC = &X86::VR128RegClass; 17485f757f3fSDimitry Andric else if (RegVT.is512BitVector()) 17495f757f3fSDimitry Andric RC = &X86::VR512RegClass; 17505f757f3fSDimitry Andric else if (RegVT.is256BitVector()) 17515f757f3fSDimitry Andric RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass; 17525f757f3fSDimitry Andric else if (RegVT.is128BitVector()) 17535f757f3fSDimitry Andric RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; 17545f757f3fSDimitry Andric else if (RegVT == MVT::x86mmx) 17555f757f3fSDimitry Andric RC = &X86::VR64RegClass; 17565f757f3fSDimitry Andric else if (RegVT == MVT::v1i1) 17575f757f3fSDimitry Andric RC = &X86::VK1RegClass; 17585f757f3fSDimitry Andric else if (RegVT == MVT::v8i1) 17595f757f3fSDimitry Andric RC = &X86::VK8RegClass; 17605f757f3fSDimitry Andric else if (RegVT == MVT::v16i1) 17615f757f3fSDimitry Andric RC = &X86::VK16RegClass; 17625f757f3fSDimitry Andric else if (RegVT == MVT::v32i1) 17635f757f3fSDimitry Andric RC = &X86::VK32RegClass; 17645f757f3fSDimitry Andric else if (RegVT == MVT::v64i1) 17655f757f3fSDimitry Andric RC = &X86::VK64RegClass; 17665f757f3fSDimitry Andric else 17675f757f3fSDimitry Andric llvm_unreachable("Unknown argument type!"); 17685f757f3fSDimitry Andric 17695f757f3fSDimitry Andric Register Reg = MF.addLiveIn(VA.getLocReg(), RC); 17705f757f3fSDimitry Andric ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 17715f757f3fSDimitry Andric } 17725f757f3fSDimitry Andric 17735f757f3fSDimitry Andric // If this is an 8 or 16-bit value, it is really passed promoted to 32 17745f757f3fSDimitry Andric // bits. Insert an assert[sz]ext to capture this, then truncate to the 17755f757f3fSDimitry Andric // right size. 17765f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::SExt) 17775f757f3fSDimitry Andric ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 17785f757f3fSDimitry Andric DAG.getValueType(VA.getValVT())); 17795f757f3fSDimitry Andric else if (VA.getLocInfo() == CCValAssign::ZExt) 17805f757f3fSDimitry Andric ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 17815f757f3fSDimitry Andric DAG.getValueType(VA.getValVT())); 17825f757f3fSDimitry Andric else if (VA.getLocInfo() == CCValAssign::BCvt) 17835f757f3fSDimitry Andric ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); 17845f757f3fSDimitry Andric 17855f757f3fSDimitry Andric if (VA.isExtInLoc()) { 17865f757f3fSDimitry Andric // Handle MMX values passed in XMM regs. 17875f757f3fSDimitry Andric if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) 17885f757f3fSDimitry Andric ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); 17895f757f3fSDimitry Andric else if (VA.getValVT().isVector() && 17905f757f3fSDimitry Andric VA.getValVT().getScalarType() == MVT::i1 && 17915f757f3fSDimitry Andric ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || 17925f757f3fSDimitry Andric (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { 17935f757f3fSDimitry Andric // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 17945f757f3fSDimitry Andric ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG); 17955f757f3fSDimitry Andric } else 17965f757f3fSDimitry Andric ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 17975f757f3fSDimitry Andric } 17985f757f3fSDimitry Andric } else { 17995f757f3fSDimitry Andric assert(VA.isMemLoc()); 18005f757f3fSDimitry Andric ArgValue = 18015f757f3fSDimitry Andric LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex); 18025f757f3fSDimitry Andric } 18035f757f3fSDimitry Andric 18045f757f3fSDimitry Andric // If value is passed via pointer - do a load. 18055f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::Indirect && 18065f757f3fSDimitry Andric !(Ins[I].Flags.isByVal() && VA.isRegLoc())) { 18075f757f3fSDimitry Andric ArgValue = 18085f757f3fSDimitry Andric DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo()); 18095f757f3fSDimitry Andric } 18105f757f3fSDimitry Andric 18115f757f3fSDimitry Andric InVals.push_back(ArgValue); 18125f757f3fSDimitry Andric } 18135f757f3fSDimitry Andric 18145f757f3fSDimitry Andric for (unsigned I = 0, E = Ins.size(); I != E; ++I) { 18155f757f3fSDimitry Andric if (Ins[I].Flags.isSwiftAsync()) { 18165f757f3fSDimitry Andric auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1817*0fca6ea1SDimitry Andric if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) 18185f757f3fSDimitry Andric X86FI->setHasSwiftAsyncContext(true); 18195f757f3fSDimitry Andric else { 1820*0fca6ea1SDimitry Andric int PtrSize = Subtarget.is64Bit() ? 8 : 4; 1821*0fca6ea1SDimitry Andric int FI = 1822*0fca6ea1SDimitry Andric MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false); 18235f757f3fSDimitry Andric X86FI->setSwiftAsyncContextFrameIdx(FI); 1824*0fca6ea1SDimitry Andric SDValue St = DAG.getStore( 1825*0fca6ea1SDimitry Andric DAG.getEntryNode(), dl, InVals[I], 1826*0fca6ea1SDimitry Andric DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32), 18275f757f3fSDimitry Andric MachinePointerInfo::getFixedStack(MF, FI)); 18285f757f3fSDimitry Andric Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain); 18295f757f3fSDimitry Andric } 18305f757f3fSDimitry Andric } 18315f757f3fSDimitry Andric 18325f757f3fSDimitry Andric // Swift calling convention does not require we copy the sret argument 18335f757f3fSDimitry Andric // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. 18345f757f3fSDimitry Andric if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) 18355f757f3fSDimitry Andric continue; 18365f757f3fSDimitry Andric 18375f757f3fSDimitry Andric // All x86 ABIs require that for returning structs by value we copy the 18385f757f3fSDimitry Andric // sret argument into %rax/%eax (depending on ABI) for the return. Save 18395f757f3fSDimitry Andric // the argument into a virtual register so that we can access it from the 18405f757f3fSDimitry Andric // return points. 18415f757f3fSDimitry Andric if (Ins[I].Flags.isSRet()) { 18425f757f3fSDimitry Andric assert(!FuncInfo->getSRetReturnReg() && 18435f757f3fSDimitry Andric "SRet return has already been set"); 18445f757f3fSDimitry Andric MVT PtrTy = getPointerTy(DAG.getDataLayout()); 18455f757f3fSDimitry Andric Register Reg = 18465f757f3fSDimitry Andric MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); 18475f757f3fSDimitry Andric FuncInfo->setSRetReturnReg(Reg); 18485f757f3fSDimitry Andric SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]); 18495f757f3fSDimitry Andric Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); 18505f757f3fSDimitry Andric break; 18515f757f3fSDimitry Andric } 18525f757f3fSDimitry Andric } 18535f757f3fSDimitry Andric 18545f757f3fSDimitry Andric unsigned StackSize = CCInfo.getStackSize(); 18555f757f3fSDimitry Andric // Align stack specially for tail calls. 18565f757f3fSDimitry Andric if (shouldGuaranteeTCO(CallConv, 18575f757f3fSDimitry Andric MF.getTarget().Options.GuaranteedTailCallOpt)) 18585f757f3fSDimitry Andric StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 18595f757f3fSDimitry Andric 18605f757f3fSDimitry Andric if (IsVarArg) 18615f757f3fSDimitry Andric VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo) 18625f757f3fSDimitry Andric .lowerVarArgsParameters(Chain, StackSize); 18635f757f3fSDimitry Andric 18645f757f3fSDimitry Andric // Some CCs need callee pop. 18655f757f3fSDimitry Andric if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg, 18665f757f3fSDimitry Andric MF.getTarget().Options.GuaranteedTailCallOpt)) { 18675f757f3fSDimitry Andric FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. 18685f757f3fSDimitry Andric } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { 18695f757f3fSDimitry Andric // X86 interrupts must pop the error code (and the alignment padding) if 18705f757f3fSDimitry Andric // present. 18715f757f3fSDimitry Andric FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4); 18725f757f3fSDimitry Andric } else { 18735f757f3fSDimitry Andric FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. 18745f757f3fSDimitry Andric // If this is an sret function, the return should pop the hidden pointer. 18755f757f3fSDimitry Andric if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget)) 18765f757f3fSDimitry Andric FuncInfo->setBytesToPopOnReturn(4); 18775f757f3fSDimitry Andric } 18785f757f3fSDimitry Andric 18795f757f3fSDimitry Andric if (!Is64Bit) { 18805f757f3fSDimitry Andric // RegSaveFrameIndex is X86-64 only. 18815f757f3fSDimitry Andric FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); 18825f757f3fSDimitry Andric } 18835f757f3fSDimitry Andric 18845f757f3fSDimitry Andric FuncInfo->setArgumentStackSize(StackSize); 18855f757f3fSDimitry Andric 18865f757f3fSDimitry Andric if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { 18875f757f3fSDimitry Andric EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); 18885f757f3fSDimitry Andric if (Personality == EHPersonality::CoreCLR) { 18895f757f3fSDimitry Andric assert(Is64Bit); 18905f757f3fSDimitry Andric // TODO: Add a mechanism to frame lowering that will allow us to indicate 18915f757f3fSDimitry Andric // that we'd prefer this slot be allocated towards the bottom of the frame 18925f757f3fSDimitry Andric // (i.e. near the stack pointer after allocating the frame). Every 18935f757f3fSDimitry Andric // funclet needs a copy of this slot in its (mostly empty) frame, and the 18945f757f3fSDimitry Andric // offset from the bottom of this and each funclet's frame must be the 18955f757f3fSDimitry Andric // same, so the size of funclets' (mostly empty) frames is dictated by 18965f757f3fSDimitry Andric // how far this slot is from the bottom (since they allocate just enough 18975f757f3fSDimitry Andric // space to accommodate holding this slot at the correct offset). 18985f757f3fSDimitry Andric int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false); 18995f757f3fSDimitry Andric EHInfo->PSPSymFrameIdx = PSPSymFI; 19005f757f3fSDimitry Andric } 19015f757f3fSDimitry Andric } 19025f757f3fSDimitry Andric 19035f757f3fSDimitry Andric if (shouldDisableArgRegFromCSR(CallConv) || 19045f757f3fSDimitry Andric F.hasFnAttribute("no_caller_saved_registers")) { 19055f757f3fSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 19065f757f3fSDimitry Andric for (std::pair<Register, Register> Pair : MRI.liveins()) 19075f757f3fSDimitry Andric MRI.disableCalleeSavedRegister(Pair.first); 19085f757f3fSDimitry Andric } 19095f757f3fSDimitry Andric 1910*0fca6ea1SDimitry Andric if (CallingConv::PreserveNone == CallConv) 1911*0fca6ea1SDimitry Andric for (unsigned I = 0, E = Ins.size(); I != E; ++I) { 1912*0fca6ea1SDimitry Andric if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() || 1913*0fca6ea1SDimitry Andric Ins[I].Flags.isSwiftError()) { 1914*0fca6ea1SDimitry Andric errorUnsupported(DAG, dl, 1915*0fca6ea1SDimitry Andric "Swift attributes can't be used with preserve_none"); 1916*0fca6ea1SDimitry Andric break; 1917*0fca6ea1SDimitry Andric } 1918*0fca6ea1SDimitry Andric } 1919*0fca6ea1SDimitry Andric 19205f757f3fSDimitry Andric return Chain; 19215f757f3fSDimitry Andric } 19225f757f3fSDimitry Andric 19235f757f3fSDimitry Andric SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, 19245f757f3fSDimitry Andric SDValue Arg, const SDLoc &dl, 19255f757f3fSDimitry Andric SelectionDAG &DAG, 19265f757f3fSDimitry Andric const CCValAssign &VA, 19275f757f3fSDimitry Andric ISD::ArgFlagsTy Flags, 19285f757f3fSDimitry Andric bool isByVal) const { 19295f757f3fSDimitry Andric unsigned LocMemOffset = VA.getLocMemOffset(); 19305f757f3fSDimitry Andric SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 19315f757f3fSDimitry Andric PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), 19325f757f3fSDimitry Andric StackPtr, PtrOff); 19335f757f3fSDimitry Andric if (isByVal) 19345f757f3fSDimitry Andric return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 19355f757f3fSDimitry Andric 19365f757f3fSDimitry Andric MaybeAlign Alignment; 19375f757f3fSDimitry Andric if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && 19385f757f3fSDimitry Andric Arg.getSimpleValueType() != MVT::f80) 19395f757f3fSDimitry Andric Alignment = MaybeAlign(4); 19405f757f3fSDimitry Andric return DAG.getStore( 19415f757f3fSDimitry Andric Chain, dl, Arg, PtrOff, 19425f757f3fSDimitry Andric MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), 19435f757f3fSDimitry Andric Alignment); 19445f757f3fSDimitry Andric } 19455f757f3fSDimitry Andric 19465f757f3fSDimitry Andric /// Emit a load of return address if tail call 19475f757f3fSDimitry Andric /// optimization is performed and it is required. 19485f757f3fSDimitry Andric SDValue X86TargetLowering::EmitTailCallLoadRetAddr( 19495f757f3fSDimitry Andric SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, 19505f757f3fSDimitry Andric bool Is64Bit, int FPDiff, const SDLoc &dl) const { 19515f757f3fSDimitry Andric // Adjust the Return address stack slot. 19525f757f3fSDimitry Andric EVT VT = getPointerTy(DAG.getDataLayout()); 19535f757f3fSDimitry Andric OutRetAddr = getReturnAddressFrameIndex(DAG); 19545f757f3fSDimitry Andric 19555f757f3fSDimitry Andric // Load the "old" Return address. 19565f757f3fSDimitry Andric OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo()); 19575f757f3fSDimitry Andric return SDValue(OutRetAddr.getNode(), 1); 19585f757f3fSDimitry Andric } 19595f757f3fSDimitry Andric 19605f757f3fSDimitry Andric /// Emit a store of the return address if tail call 19615f757f3fSDimitry Andric /// optimization is performed and it is required (FPDiff!=0). 19625f757f3fSDimitry Andric static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, 19635f757f3fSDimitry Andric SDValue Chain, SDValue RetAddrFrIdx, 19645f757f3fSDimitry Andric EVT PtrVT, unsigned SlotSize, 19655f757f3fSDimitry Andric int FPDiff, const SDLoc &dl) { 19665f757f3fSDimitry Andric // Store the return address to the appropriate stack slot. 19675f757f3fSDimitry Andric if (!FPDiff) return Chain; 19685f757f3fSDimitry Andric // Calculate the new stack slot for the return address. 19695f757f3fSDimitry Andric int NewReturnAddrFI = 19705f757f3fSDimitry Andric MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, 19715f757f3fSDimitry Andric false); 19725f757f3fSDimitry Andric SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); 19735f757f3fSDimitry Andric Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, 19745f757f3fSDimitry Andric MachinePointerInfo::getFixedStack( 19755f757f3fSDimitry Andric DAG.getMachineFunction(), NewReturnAddrFI)); 19765f757f3fSDimitry Andric return Chain; 19775f757f3fSDimitry Andric } 19785f757f3fSDimitry Andric 19795f757f3fSDimitry Andric /// Returns a vector_shuffle mask for an movs{s|d}, movd 19805f757f3fSDimitry Andric /// operation of specified width. 19815f757f3fSDimitry Andric SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, 19825f757f3fSDimitry Andric SDValue V1, SDValue V2) const { 19835f757f3fSDimitry Andric unsigned NumElems = VT.getVectorNumElements(); 19845f757f3fSDimitry Andric SmallVector<int, 8> Mask; 19855f757f3fSDimitry Andric Mask.push_back(NumElems); 19865f757f3fSDimitry Andric for (unsigned i = 1; i != NumElems; ++i) 19875f757f3fSDimitry Andric Mask.push_back(i); 19885f757f3fSDimitry Andric return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); 19895f757f3fSDimitry Andric } 19905f757f3fSDimitry Andric 19915f757f3fSDimitry Andric SDValue 19925f757f3fSDimitry Andric X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 19935f757f3fSDimitry Andric SmallVectorImpl<SDValue> &InVals) const { 19945f757f3fSDimitry Andric SelectionDAG &DAG = CLI.DAG; 19955f757f3fSDimitry Andric SDLoc &dl = CLI.DL; 19965f757f3fSDimitry Andric SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 19975f757f3fSDimitry Andric SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 19985f757f3fSDimitry Andric SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 19995f757f3fSDimitry Andric SDValue Chain = CLI.Chain; 20005f757f3fSDimitry Andric SDValue Callee = CLI.Callee; 20015f757f3fSDimitry Andric CallingConv::ID CallConv = CLI.CallConv; 20025f757f3fSDimitry Andric bool &isTailCall = CLI.IsTailCall; 20035f757f3fSDimitry Andric bool isVarArg = CLI.IsVarArg; 20045f757f3fSDimitry Andric const auto *CB = CLI.CB; 20055f757f3fSDimitry Andric 20065f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 20075f757f3fSDimitry Andric bool Is64Bit = Subtarget.is64Bit(); 20085f757f3fSDimitry Andric bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); 20095f757f3fSDimitry Andric bool IsSibcall = false; 20105f757f3fSDimitry Andric bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || 20115f757f3fSDimitry Andric CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; 20125f757f3fSDimitry Andric bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget); 20135f757f3fSDimitry Andric X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); 20145f757f3fSDimitry Andric bool HasNCSR = (CB && isa<CallInst>(CB) && 20155f757f3fSDimitry Andric CB->hasFnAttr("no_caller_saved_registers")); 20165f757f3fSDimitry Andric bool HasNoCfCheck = (CB && CB->doesNoCfCheck()); 20175f757f3fSDimitry Andric bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall()); 20185f757f3fSDimitry Andric bool IsCFICall = IsIndirectCall && CLI.CFIType; 2019*0fca6ea1SDimitry Andric const Module *M = MF.getFunction().getParent(); 20205f757f3fSDimitry Andric Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); 20215f757f3fSDimitry Andric 20225f757f3fSDimitry Andric MachineFunction::CallSiteInfo CSInfo; 20235f757f3fSDimitry Andric if (CallConv == CallingConv::X86_INTR) 20245f757f3fSDimitry Andric report_fatal_error("X86 interrupts may not be called directly"); 20255f757f3fSDimitry Andric 20265f757f3fSDimitry Andric // Analyze operands of the call, assigning locations to each operand. 20275f757f3fSDimitry Andric SmallVector<CCValAssign, 16> ArgLocs; 20285f757f3fSDimitry Andric CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 20295f757f3fSDimitry Andric 20305f757f3fSDimitry Andric // Allocate shadow area for Win64. 20315f757f3fSDimitry Andric if (IsWin64) 20325f757f3fSDimitry Andric CCInfo.AllocateStack(32, Align(8)); 20335f757f3fSDimitry Andric 20345f757f3fSDimitry Andric CCInfo.AnalyzeArguments(Outs, CC_X86); 20355f757f3fSDimitry Andric 20365f757f3fSDimitry Andric // In vectorcall calling convention a second pass is required for the HVA 20375f757f3fSDimitry Andric // types. 20385f757f3fSDimitry Andric if (CallingConv::X86_VectorCall == CallConv) { 20395f757f3fSDimitry Andric CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86); 20405f757f3fSDimitry Andric } 20415f757f3fSDimitry Andric 2042*0fca6ea1SDimitry Andric bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall(); 2043*0fca6ea1SDimitry Andric if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) { 2044*0fca6ea1SDimitry Andric // If we are using a GOT, disable tail calls to external symbols with 2045*0fca6ea1SDimitry Andric // default visibility. Tail calling such a symbol requires using a GOT 2046*0fca6ea1SDimitry Andric // relocation, which forces early binding of the symbol. This breaks code 2047*0fca6ea1SDimitry Andric // that require lazy function symbol resolution. Using musttail or 2048*0fca6ea1SDimitry Andric // GuaranteedTailCallOpt will override this. 2049*0fca6ea1SDimitry Andric GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 2050*0fca6ea1SDimitry Andric if (!G || (!G->getGlobal()->hasLocalLinkage() && 2051*0fca6ea1SDimitry Andric G->getGlobal()->hasDefaultVisibility())) 2052*0fca6ea1SDimitry Andric isTailCall = false; 2053*0fca6ea1SDimitry Andric } 2054*0fca6ea1SDimitry Andric 2055*0fca6ea1SDimitry Andric if (isTailCall && !IsMustTail) { 2056*0fca6ea1SDimitry Andric // Check if it's really possible to do a tail call. 2057*0fca6ea1SDimitry Andric isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, 2058*0fca6ea1SDimitry Andric IsCalleePopSRet); 2059*0fca6ea1SDimitry Andric 2060*0fca6ea1SDimitry Andric // Sibcalls are automatically detected tailcalls which do not require 2061*0fca6ea1SDimitry Andric // ABI changes. 2062*0fca6ea1SDimitry Andric if (!IsGuaranteeTCO && isTailCall) 2063*0fca6ea1SDimitry Andric IsSibcall = true; 2064*0fca6ea1SDimitry Andric 2065*0fca6ea1SDimitry Andric if (isTailCall) 2066*0fca6ea1SDimitry Andric ++NumTailCalls; 2067*0fca6ea1SDimitry Andric } 2068*0fca6ea1SDimitry Andric 2069*0fca6ea1SDimitry Andric if (IsMustTail && !isTailCall) 2070*0fca6ea1SDimitry Andric report_fatal_error("failed to perform tail call elimination on a call " 2071*0fca6ea1SDimitry Andric "site marked musttail"); 2072*0fca6ea1SDimitry Andric 2073*0fca6ea1SDimitry Andric assert(!(isVarArg && canGuaranteeTCO(CallConv)) && 2074*0fca6ea1SDimitry Andric "Var args not supported with calling convention fastcc, ghc or hipe"); 2075*0fca6ea1SDimitry Andric 20765f757f3fSDimitry Andric // Get a count of how many bytes are to be pushed on the stack. 20775f757f3fSDimitry Andric unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 20785f757f3fSDimitry Andric if (IsSibcall) 20795f757f3fSDimitry Andric // This is a sibcall. The memory operands are available in caller's 20805f757f3fSDimitry Andric // own caller's stack. 20815f757f3fSDimitry Andric NumBytes = 0; 20825f757f3fSDimitry Andric else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) 20835f757f3fSDimitry Andric NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 20845f757f3fSDimitry Andric 20855f757f3fSDimitry Andric int FPDiff = 0; 20865f757f3fSDimitry Andric if (isTailCall && 20875f757f3fSDimitry Andric shouldGuaranteeTCO(CallConv, 20885f757f3fSDimitry Andric MF.getTarget().Options.GuaranteedTailCallOpt)) { 20895f757f3fSDimitry Andric // Lower arguments at fp - stackoffset + fpdiff. 20905f757f3fSDimitry Andric unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); 20915f757f3fSDimitry Andric 20925f757f3fSDimitry Andric FPDiff = NumBytesCallerPushed - NumBytes; 20935f757f3fSDimitry Andric 20945f757f3fSDimitry Andric // Set the delta of movement of the returnaddr stackslot. 20955f757f3fSDimitry Andric // But only set if delta is greater than previous delta. 20965f757f3fSDimitry Andric if (FPDiff < X86Info->getTCReturnAddrDelta()) 20975f757f3fSDimitry Andric X86Info->setTCReturnAddrDelta(FPDiff); 20985f757f3fSDimitry Andric } 20995f757f3fSDimitry Andric 21005f757f3fSDimitry Andric unsigned NumBytesToPush = NumBytes; 21015f757f3fSDimitry Andric unsigned NumBytesToPop = NumBytes; 21025f757f3fSDimitry Andric 21035f757f3fSDimitry Andric // If we have an inalloca argument, all stack space has already been allocated 21045f757f3fSDimitry Andric // for us and be right at the top of the stack. We don't support multiple 21055f757f3fSDimitry Andric // arguments passed in memory when using inalloca. 21065f757f3fSDimitry Andric if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { 21075f757f3fSDimitry Andric NumBytesToPush = 0; 21085f757f3fSDimitry Andric if (!ArgLocs.back().isMemLoc()) 21095f757f3fSDimitry Andric report_fatal_error("cannot use inalloca attribute on a register " 21105f757f3fSDimitry Andric "parameter"); 21115f757f3fSDimitry Andric if (ArgLocs.back().getLocMemOffset() != 0) 21125f757f3fSDimitry Andric report_fatal_error("any parameter with the inalloca attribute must be " 21135f757f3fSDimitry Andric "the only memory argument"); 21145f757f3fSDimitry Andric } else if (CLI.IsPreallocated) { 21155f757f3fSDimitry Andric assert(ArgLocs.back().isMemLoc() && 21165f757f3fSDimitry Andric "cannot use preallocated attribute on a register " 21175f757f3fSDimitry Andric "parameter"); 21185f757f3fSDimitry Andric SmallVector<size_t, 4> PreallocatedOffsets; 21195f757f3fSDimitry Andric for (size_t i = 0; i < CLI.OutVals.size(); ++i) { 21205f757f3fSDimitry Andric if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) { 21215f757f3fSDimitry Andric PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset()); 21225f757f3fSDimitry Andric } 21235f757f3fSDimitry Andric } 21245f757f3fSDimitry Andric auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>(); 21255f757f3fSDimitry Andric size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB); 21265f757f3fSDimitry Andric MFI->setPreallocatedStackSize(PreallocatedId, NumBytes); 21275f757f3fSDimitry Andric MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets); 21285f757f3fSDimitry Andric NumBytesToPush = 0; 21295f757f3fSDimitry Andric } 21305f757f3fSDimitry Andric 21315f757f3fSDimitry Andric if (!IsSibcall && !IsMustTail) 21325f757f3fSDimitry Andric Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, 21335f757f3fSDimitry Andric NumBytes - NumBytesToPush, dl); 21345f757f3fSDimitry Andric 21355f757f3fSDimitry Andric SDValue RetAddrFrIdx; 21365f757f3fSDimitry Andric // Load return address for tail calls. 21375f757f3fSDimitry Andric if (isTailCall && FPDiff) 21385f757f3fSDimitry Andric Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, 21395f757f3fSDimitry Andric Is64Bit, FPDiff, dl); 21405f757f3fSDimitry Andric 21415f757f3fSDimitry Andric SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 21425f757f3fSDimitry Andric SmallVector<SDValue, 8> MemOpChains; 21435f757f3fSDimitry Andric SDValue StackPtr; 21445f757f3fSDimitry Andric 21455f757f3fSDimitry Andric // The next loop assumes that the locations are in the same order of the 21465f757f3fSDimitry Andric // input arguments. 21475f757f3fSDimitry Andric assert(isSortedByValueNo(ArgLocs) && 21485f757f3fSDimitry Andric "Argument Location list must be sorted before lowering"); 21495f757f3fSDimitry Andric 21505f757f3fSDimitry Andric // Walk the register/memloc assignments, inserting copies/loads. In the case 21515f757f3fSDimitry Andric // of tail call optimization arguments are handle later. 21525f757f3fSDimitry Andric const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 21535f757f3fSDimitry Andric for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; 21545f757f3fSDimitry Andric ++I, ++OutIndex) { 21555f757f3fSDimitry Andric assert(OutIndex < Outs.size() && "Invalid Out index"); 21565f757f3fSDimitry Andric // Skip inalloca/preallocated arguments, they have already been written. 21575f757f3fSDimitry Andric ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; 21585f757f3fSDimitry Andric if (Flags.isInAlloca() || Flags.isPreallocated()) 21595f757f3fSDimitry Andric continue; 21605f757f3fSDimitry Andric 21615f757f3fSDimitry Andric CCValAssign &VA = ArgLocs[I]; 21625f757f3fSDimitry Andric EVT RegVT = VA.getLocVT(); 21635f757f3fSDimitry Andric SDValue Arg = OutVals[OutIndex]; 21645f757f3fSDimitry Andric bool isByVal = Flags.isByVal(); 21655f757f3fSDimitry Andric 21665f757f3fSDimitry Andric // Promote the value if needed. 21675f757f3fSDimitry Andric switch (VA.getLocInfo()) { 21685f757f3fSDimitry Andric default: llvm_unreachable("Unknown loc info!"); 21695f757f3fSDimitry Andric case CCValAssign::Full: break; 21705f757f3fSDimitry Andric case CCValAssign::SExt: 21715f757f3fSDimitry Andric Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); 21725f757f3fSDimitry Andric break; 21735f757f3fSDimitry Andric case CCValAssign::ZExt: 21745f757f3fSDimitry Andric Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); 21755f757f3fSDimitry Andric break; 21765f757f3fSDimitry Andric case CCValAssign::AExt: 21775f757f3fSDimitry Andric if (Arg.getValueType().isVector() && 21785f757f3fSDimitry Andric Arg.getValueType().getVectorElementType() == MVT::i1) 21795f757f3fSDimitry Andric Arg = lowerMasksToReg(Arg, RegVT, dl, DAG); 21805f757f3fSDimitry Andric else if (RegVT.is128BitVector()) { 21815f757f3fSDimitry Andric // Special case: passing MMX values in XMM registers. 21825f757f3fSDimitry Andric Arg = DAG.getBitcast(MVT::i64, Arg); 21835f757f3fSDimitry Andric Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); 21845f757f3fSDimitry Andric Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); 21855f757f3fSDimitry Andric } else 21865f757f3fSDimitry Andric Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); 21875f757f3fSDimitry Andric break; 21885f757f3fSDimitry Andric case CCValAssign::BCvt: 21895f757f3fSDimitry Andric Arg = DAG.getBitcast(RegVT, Arg); 21905f757f3fSDimitry Andric break; 21915f757f3fSDimitry Andric case CCValAssign::Indirect: { 21925f757f3fSDimitry Andric if (isByVal) { 21935f757f3fSDimitry Andric // Memcpy the argument to a temporary stack slot to prevent 21945f757f3fSDimitry Andric // the caller from seeing any modifications the callee may make 21955f757f3fSDimitry Andric // as guaranteed by the `byval` attribute. 21965f757f3fSDimitry Andric int FrameIdx = MF.getFrameInfo().CreateStackObject( 21975f757f3fSDimitry Andric Flags.getByValSize(), 21985f757f3fSDimitry Andric std::max(Align(16), Flags.getNonZeroByValAlign()), false); 21995f757f3fSDimitry Andric SDValue StackSlot = 22005f757f3fSDimitry Andric DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout())); 22015f757f3fSDimitry Andric Chain = 22025f757f3fSDimitry Andric CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl); 22035f757f3fSDimitry Andric // From now on treat this as a regular pointer 22045f757f3fSDimitry Andric Arg = StackSlot; 22055f757f3fSDimitry Andric isByVal = false; 22065f757f3fSDimitry Andric } else { 22075f757f3fSDimitry Andric // Store the argument. 22085f757f3fSDimitry Andric SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); 22095f757f3fSDimitry Andric int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 22105f757f3fSDimitry Andric Chain = DAG.getStore( 22115f757f3fSDimitry Andric Chain, dl, Arg, SpillSlot, 22125f757f3fSDimitry Andric MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); 22135f757f3fSDimitry Andric Arg = SpillSlot; 22145f757f3fSDimitry Andric } 22155f757f3fSDimitry Andric break; 22165f757f3fSDimitry Andric } 22175f757f3fSDimitry Andric } 22185f757f3fSDimitry Andric 22195f757f3fSDimitry Andric if (VA.needsCustom()) { 22205f757f3fSDimitry Andric assert(VA.getValVT() == MVT::v64i1 && 22215f757f3fSDimitry Andric "Currently the only custom case is when we split v64i1 to 2 regs"); 22225f757f3fSDimitry Andric // Split v64i1 value into two registers 22235f757f3fSDimitry Andric Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget); 22245f757f3fSDimitry Andric } else if (VA.isRegLoc()) { 22255f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 22265f757f3fSDimitry Andric const TargetOptions &Options = DAG.getTarget().Options; 22275f757f3fSDimitry Andric if (Options.EmitCallSiteInfo) 2228*0fca6ea1SDimitry Andric CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I); 22295f757f3fSDimitry Andric if (isVarArg && IsWin64) { 22305f757f3fSDimitry Andric // Win64 ABI requires argument XMM reg to be copied to the corresponding 22315f757f3fSDimitry Andric // shadow reg if callee is a varargs function. 22325f757f3fSDimitry Andric Register ShadowReg; 22335f757f3fSDimitry Andric switch (VA.getLocReg()) { 22345f757f3fSDimitry Andric case X86::XMM0: ShadowReg = X86::RCX; break; 22355f757f3fSDimitry Andric case X86::XMM1: ShadowReg = X86::RDX; break; 22365f757f3fSDimitry Andric case X86::XMM2: ShadowReg = X86::R8; break; 22375f757f3fSDimitry Andric case X86::XMM3: ShadowReg = X86::R9; break; 22385f757f3fSDimitry Andric } 22395f757f3fSDimitry Andric if (ShadowReg) 22405f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); 22415f757f3fSDimitry Andric } 22425f757f3fSDimitry Andric } else if (!IsSibcall && (!isTailCall || isByVal)) { 22435f757f3fSDimitry Andric assert(VA.isMemLoc()); 22445f757f3fSDimitry Andric if (!StackPtr.getNode()) 22455f757f3fSDimitry Andric StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), 22465f757f3fSDimitry Andric getPointerTy(DAG.getDataLayout())); 22475f757f3fSDimitry Andric MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 22485f757f3fSDimitry Andric dl, DAG, VA, Flags, isByVal)); 22495f757f3fSDimitry Andric } 22505f757f3fSDimitry Andric } 22515f757f3fSDimitry Andric 22525f757f3fSDimitry Andric if (!MemOpChains.empty()) 22535f757f3fSDimitry Andric Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 22545f757f3fSDimitry Andric 22555f757f3fSDimitry Andric if (Subtarget.isPICStyleGOT()) { 22565f757f3fSDimitry Andric // ELF / PIC requires GOT in the EBX register before function calls via PLT 22575f757f3fSDimitry Andric // GOT pointer (except regcall). 22585f757f3fSDimitry Andric if (!isTailCall) { 22595f757f3fSDimitry Andric // Indirect call with RegCall calling convertion may use up all the 22605f757f3fSDimitry Andric // general registers, so it is not suitable to bind EBX reister for 22615f757f3fSDimitry Andric // GOT address, just let register allocator handle it. 22625f757f3fSDimitry Andric if (CallConv != CallingConv::X86_RegCall) 22635f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair( 22645f757f3fSDimitry Andric Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), 22655f757f3fSDimitry Andric getPointerTy(DAG.getDataLayout())))); 22665f757f3fSDimitry Andric } else { 22675f757f3fSDimitry Andric // If we are tail calling and generating PIC/GOT style code load the 22685f757f3fSDimitry Andric // address of the callee into ECX. The value in ecx is used as target of 22695f757f3fSDimitry Andric // the tail jump. This is done to circumvent the ebx/callee-saved problem 22705f757f3fSDimitry Andric // for tail calls on PIC/GOT architectures. Normally we would just put the 22715f757f3fSDimitry Andric // address of GOT into ebx and then call target@PLT. But for tail calls 22725f757f3fSDimitry Andric // ebx would be restored (since ebx is callee saved) before jumping to the 22735f757f3fSDimitry Andric // target@PLT. 22745f757f3fSDimitry Andric 22755f757f3fSDimitry Andric // Note: The actual moving to ECX is done further down. 22765f757f3fSDimitry Andric GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 22775f757f3fSDimitry Andric if (G && !G->getGlobal()->hasLocalLinkage() && 22785f757f3fSDimitry Andric G->getGlobal()->hasDefaultVisibility()) 22795f757f3fSDimitry Andric Callee = LowerGlobalAddress(Callee, DAG); 22805f757f3fSDimitry Andric else if (isa<ExternalSymbolSDNode>(Callee)) 22815f757f3fSDimitry Andric Callee = LowerExternalSymbol(Callee, DAG); 22825f757f3fSDimitry Andric } 22835f757f3fSDimitry Andric } 22845f757f3fSDimitry Andric 22855f757f3fSDimitry Andric if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail && 22865f757f3fSDimitry Andric (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) { 22875f757f3fSDimitry Andric // From AMD64 ABI document: 22885f757f3fSDimitry Andric // For calls that may call functions that use varargs or stdargs 22895f757f3fSDimitry Andric // (prototype-less calls or calls to functions containing ellipsis (...) in 22905f757f3fSDimitry Andric // the declaration) %al is used as hidden argument to specify the number 22915f757f3fSDimitry Andric // of SSE registers used. The contents of %al do not need to match exactly 22925f757f3fSDimitry Andric // the number of registers, but must be an ubound on the number of SSE 22935f757f3fSDimitry Andric // registers used and is in the range 0 - 8 inclusive. 22945f757f3fSDimitry Andric 22955f757f3fSDimitry Andric // Count the number of XMM registers allocated. 22965f757f3fSDimitry Andric static const MCPhysReg XMMArgRegs[] = { 22975f757f3fSDimitry Andric X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 22985f757f3fSDimitry Andric X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 22995f757f3fSDimitry Andric }; 23005f757f3fSDimitry Andric unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); 23015f757f3fSDimitry Andric assert((Subtarget.hasSSE1() || !NumXMMRegs) 23025f757f3fSDimitry Andric && "SSE registers cannot be used when SSE is disabled"); 23035f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(Register(X86::AL), 23045f757f3fSDimitry Andric DAG.getConstant(NumXMMRegs, dl, 23055f757f3fSDimitry Andric MVT::i8))); 23065f757f3fSDimitry Andric } 23075f757f3fSDimitry Andric 23085f757f3fSDimitry Andric if (isVarArg && IsMustTail) { 23095f757f3fSDimitry Andric const auto &Forwards = X86Info->getForwardedMustTailRegParms(); 23105f757f3fSDimitry Andric for (const auto &F : Forwards) { 23115f757f3fSDimitry Andric SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); 23125f757f3fSDimitry Andric RegsToPass.push_back(std::make_pair(F.PReg, Val)); 23135f757f3fSDimitry Andric } 23145f757f3fSDimitry Andric } 23155f757f3fSDimitry Andric 23165f757f3fSDimitry Andric // For tail calls lower the arguments to the 'real' stack slots. Sibcalls 23175f757f3fSDimitry Andric // don't need this because the eligibility check rejects calls that require 23185f757f3fSDimitry Andric // shuffling arguments passed in memory. 23195f757f3fSDimitry Andric if (!IsSibcall && isTailCall) { 23205f757f3fSDimitry Andric // Force all the incoming stack arguments to be loaded from the stack 23215f757f3fSDimitry Andric // before any new outgoing arguments are stored to the stack, because the 23225f757f3fSDimitry Andric // outgoing stack slots may alias the incoming argument stack slots, and 23235f757f3fSDimitry Andric // the alias isn't otherwise explicit. This is slightly more conservative 23245f757f3fSDimitry Andric // than necessary, because it means that each store effectively depends 23255f757f3fSDimitry Andric // on every argument instead of just those arguments it would clobber. 23265f757f3fSDimitry Andric SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); 23275f757f3fSDimitry Andric 23285f757f3fSDimitry Andric SmallVector<SDValue, 8> MemOpChains2; 23295f757f3fSDimitry Andric SDValue FIN; 23305f757f3fSDimitry Andric int FI = 0; 23315f757f3fSDimitry Andric for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E; 23325f757f3fSDimitry Andric ++I, ++OutsIndex) { 23335f757f3fSDimitry Andric CCValAssign &VA = ArgLocs[I]; 23345f757f3fSDimitry Andric 23355f757f3fSDimitry Andric if (VA.isRegLoc()) { 23365f757f3fSDimitry Andric if (VA.needsCustom()) { 23375f757f3fSDimitry Andric assert((CallConv == CallingConv::X86_RegCall) && 23385f757f3fSDimitry Andric "Expecting custom case only in regcall calling convention"); 23395f757f3fSDimitry Andric // This means that we are in special case where one argument was 23405f757f3fSDimitry Andric // passed through two register locations - Skip the next location 23415f757f3fSDimitry Andric ++I; 23425f757f3fSDimitry Andric } 23435f757f3fSDimitry Andric 23445f757f3fSDimitry Andric continue; 23455f757f3fSDimitry Andric } 23465f757f3fSDimitry Andric 23475f757f3fSDimitry Andric assert(VA.isMemLoc()); 23485f757f3fSDimitry Andric SDValue Arg = OutVals[OutsIndex]; 23495f757f3fSDimitry Andric ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; 23505f757f3fSDimitry Andric // Skip inalloca/preallocated arguments. They don't require any work. 23515f757f3fSDimitry Andric if (Flags.isInAlloca() || Flags.isPreallocated()) 23525f757f3fSDimitry Andric continue; 23535f757f3fSDimitry Andric // Create frame index. 23545f757f3fSDimitry Andric int32_t Offset = VA.getLocMemOffset()+FPDiff; 23555f757f3fSDimitry Andric uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; 23565f757f3fSDimitry Andric FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); 23575f757f3fSDimitry Andric FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 23585f757f3fSDimitry Andric 23595f757f3fSDimitry Andric if (Flags.isByVal()) { 23605f757f3fSDimitry Andric // Copy relative to framepointer. 23615f757f3fSDimitry Andric SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); 23625f757f3fSDimitry Andric if (!StackPtr.getNode()) 23635f757f3fSDimitry Andric StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), 23645f757f3fSDimitry Andric getPointerTy(DAG.getDataLayout())); 23655f757f3fSDimitry Andric Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), 23665f757f3fSDimitry Andric StackPtr, Source); 23675f757f3fSDimitry Andric 23685f757f3fSDimitry Andric MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, 23695f757f3fSDimitry Andric ArgChain, 23705f757f3fSDimitry Andric Flags, DAG, dl)); 23715f757f3fSDimitry Andric } else { 23725f757f3fSDimitry Andric // Store relative to framepointer. 23735f757f3fSDimitry Andric MemOpChains2.push_back(DAG.getStore( 23745f757f3fSDimitry Andric ArgChain, dl, Arg, FIN, 23755f757f3fSDimitry Andric MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); 23765f757f3fSDimitry Andric } 23775f757f3fSDimitry Andric } 23785f757f3fSDimitry Andric 23795f757f3fSDimitry Andric if (!MemOpChains2.empty()) 23805f757f3fSDimitry Andric Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 23815f757f3fSDimitry Andric 23825f757f3fSDimitry Andric // Store the return address to the appropriate stack slot. 23835f757f3fSDimitry Andric Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, 23845f757f3fSDimitry Andric getPointerTy(DAG.getDataLayout()), 23855f757f3fSDimitry Andric RegInfo->getSlotSize(), FPDiff, dl); 23865f757f3fSDimitry Andric } 23875f757f3fSDimitry Andric 23885f757f3fSDimitry Andric // Build a sequence of copy-to-reg nodes chained together with token chain 23895f757f3fSDimitry Andric // and glue operands which copy the outgoing args into registers. 23905f757f3fSDimitry Andric SDValue InGlue; 23915f757f3fSDimitry Andric for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 23925f757f3fSDimitry Andric Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 23935f757f3fSDimitry Andric RegsToPass[i].second, InGlue); 23945f757f3fSDimitry Andric InGlue = Chain.getValue(1); 23955f757f3fSDimitry Andric } 23965f757f3fSDimitry Andric 23975f757f3fSDimitry Andric if (DAG.getTarget().getCodeModel() == CodeModel::Large) { 23985f757f3fSDimitry Andric assert(Is64Bit && "Large code model is only legal in 64-bit mode."); 23995f757f3fSDimitry Andric // In the 64-bit large code model, we have to make all calls 24005f757f3fSDimitry Andric // through a register, since the call instruction's 32-bit 24015f757f3fSDimitry Andric // pc-relative offset may not be large enough to hold the whole 24025f757f3fSDimitry Andric // address. 24035f757f3fSDimitry Andric } else if (Callee->getOpcode() == ISD::GlobalAddress || 24045f757f3fSDimitry Andric Callee->getOpcode() == ISD::ExternalSymbol) { 24055f757f3fSDimitry Andric // Lower direct calls to global addresses and external symbols. Setting 24065f757f3fSDimitry Andric // ForCall to true here has the effect of removing WrapperRIP when possible 24075f757f3fSDimitry Andric // to allow direct calls to be selected without first materializing the 24085f757f3fSDimitry Andric // address into a register. 24095f757f3fSDimitry Andric Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true); 24105f757f3fSDimitry Andric } else if (Subtarget.isTarget64BitILP32() && 24115f757f3fSDimitry Andric Callee.getValueType() == MVT::i32) { 24125f757f3fSDimitry Andric // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI 24135f757f3fSDimitry Andric Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); 24145f757f3fSDimitry Andric } 24155f757f3fSDimitry Andric 24165f757f3fSDimitry Andric // Returns a chain & a glue for retval copy to use. 24175f757f3fSDimitry Andric SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 24185f757f3fSDimitry Andric SmallVector<SDValue, 8> Ops; 24195f757f3fSDimitry Andric 24205f757f3fSDimitry Andric if (!IsSibcall && isTailCall && !IsMustTail) { 24215f757f3fSDimitry Andric Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl); 24225f757f3fSDimitry Andric InGlue = Chain.getValue(1); 24235f757f3fSDimitry Andric } 24245f757f3fSDimitry Andric 24255f757f3fSDimitry Andric Ops.push_back(Chain); 24265f757f3fSDimitry Andric Ops.push_back(Callee); 24275f757f3fSDimitry Andric 24285f757f3fSDimitry Andric if (isTailCall) 24295f757f3fSDimitry Andric Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32)); 24305f757f3fSDimitry Andric 24315f757f3fSDimitry Andric // Add argument registers to the end of the list so that they are known live 24325f757f3fSDimitry Andric // into the call. 24335f757f3fSDimitry Andric for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 24345f757f3fSDimitry Andric Ops.push_back(DAG.getRegister(RegsToPass[i].first, 24355f757f3fSDimitry Andric RegsToPass[i].second.getValueType())); 24365f757f3fSDimitry Andric 24375f757f3fSDimitry Andric // Add a register mask operand representing the call-preserved registers. 24385f757f3fSDimitry Andric const uint32_t *Mask = [&]() { 24395f757f3fSDimitry Andric auto AdaptedCC = CallConv; 24405f757f3fSDimitry Andric // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists), 24415f757f3fSDimitry Andric // use X86_INTR calling convention because it has the same CSR mask 24425f757f3fSDimitry Andric // (same preserved registers). 24435f757f3fSDimitry Andric if (HasNCSR) 24445f757f3fSDimitry Andric AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR; 24455f757f3fSDimitry Andric // If NoCalleeSavedRegisters is requested, than use GHC since it happens 24465f757f3fSDimitry Andric // to use the CSR_NoRegs_RegMask. 24475f757f3fSDimitry Andric if (CB && CB->hasFnAttr("no_callee_saved_registers")) 24485f757f3fSDimitry Andric AdaptedCC = (CallingConv::ID)CallingConv::GHC; 24495f757f3fSDimitry Andric return RegInfo->getCallPreservedMask(MF, AdaptedCC); 24505f757f3fSDimitry Andric }(); 24515f757f3fSDimitry Andric assert(Mask && "Missing call preserved mask for calling convention"); 24525f757f3fSDimitry Andric 24535f757f3fSDimitry Andric // If this is an invoke in a 32-bit function using a funclet-based 24545f757f3fSDimitry Andric // personality, assume the function clobbers all registers. If an exception 24555f757f3fSDimitry Andric // is thrown, the runtime will not restore CSRs. 24565f757f3fSDimitry Andric // FIXME: Model this more precisely so that we can register allocate across 24575f757f3fSDimitry Andric // the normal edge and spill and fill across the exceptional edge. 24585f757f3fSDimitry Andric if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) { 24595f757f3fSDimitry Andric const Function &CallerFn = MF.getFunction(); 24605f757f3fSDimitry Andric EHPersonality Pers = 24615f757f3fSDimitry Andric CallerFn.hasPersonalityFn() 24625f757f3fSDimitry Andric ? classifyEHPersonality(CallerFn.getPersonalityFn()) 24635f757f3fSDimitry Andric : EHPersonality::Unknown; 24645f757f3fSDimitry Andric if (isFuncletEHPersonality(Pers)) 24655f757f3fSDimitry Andric Mask = RegInfo->getNoPreservedMask(); 24665f757f3fSDimitry Andric } 24675f757f3fSDimitry Andric 24685f757f3fSDimitry Andric // Define a new register mask from the existing mask. 24695f757f3fSDimitry Andric uint32_t *RegMask = nullptr; 24705f757f3fSDimitry Andric 24715f757f3fSDimitry Andric // In some calling conventions we need to remove the used physical registers 24725f757f3fSDimitry Andric // from the reg mask. Create a new RegMask for such calling conventions. 24735f757f3fSDimitry Andric // RegMask for calling conventions that disable only return registers (e.g. 24745f757f3fSDimitry Andric // preserve_most) will be modified later in LowerCallResult. 24755f757f3fSDimitry Andric bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR; 24765f757f3fSDimitry Andric if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) { 24775f757f3fSDimitry Andric const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 24785f757f3fSDimitry Andric 24795f757f3fSDimitry Andric // Allocate a new Reg Mask and copy Mask. 24805f757f3fSDimitry Andric RegMask = MF.allocateRegMask(); 24815f757f3fSDimitry Andric unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); 24825f757f3fSDimitry Andric memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize); 24835f757f3fSDimitry Andric 24845f757f3fSDimitry Andric // Make sure all sub registers of the argument registers are reset 24855f757f3fSDimitry Andric // in the RegMask. 24865f757f3fSDimitry Andric if (ShouldDisableArgRegs) { 24875f757f3fSDimitry Andric for (auto const &RegPair : RegsToPass) 24885f757f3fSDimitry Andric for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first)) 24895f757f3fSDimitry Andric RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); 24905f757f3fSDimitry Andric } 24915f757f3fSDimitry Andric 24925f757f3fSDimitry Andric // Create the RegMask Operand according to our updated mask. 24935f757f3fSDimitry Andric Ops.push_back(DAG.getRegisterMask(RegMask)); 24945f757f3fSDimitry Andric } else { 24955f757f3fSDimitry Andric // Create the RegMask Operand according to the static mask. 24965f757f3fSDimitry Andric Ops.push_back(DAG.getRegisterMask(Mask)); 24975f757f3fSDimitry Andric } 24985f757f3fSDimitry Andric 24995f757f3fSDimitry Andric if (InGlue.getNode()) 25005f757f3fSDimitry Andric Ops.push_back(InGlue); 25015f757f3fSDimitry Andric 25025f757f3fSDimitry Andric if (isTailCall) { 25035f757f3fSDimitry Andric // We used to do: 25045f757f3fSDimitry Andric //// If this is the first return lowered for this function, add the regs 25055f757f3fSDimitry Andric //// to the liveout set for the function. 25065f757f3fSDimitry Andric // This isn't right, although it's probably harmless on x86; liveouts 25075f757f3fSDimitry Andric // should be computed from returns not tail calls. Consider a void 25085f757f3fSDimitry Andric // function making a tail call to a function returning int. 25095f757f3fSDimitry Andric MF.getFrameInfo().setHasTailCall(); 25105f757f3fSDimitry Andric SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); 25115f757f3fSDimitry Andric 25125f757f3fSDimitry Andric if (IsCFICall) 25135f757f3fSDimitry Andric Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 25145f757f3fSDimitry Andric 25155f757f3fSDimitry Andric DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 25165f757f3fSDimitry Andric DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); 25175f757f3fSDimitry Andric return Ret; 25185f757f3fSDimitry Andric } 25195f757f3fSDimitry Andric 25205f757f3fSDimitry Andric if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) { 25215f757f3fSDimitry Andric Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops); 25225f757f3fSDimitry Andric } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { 25235f757f3fSDimitry Andric // Calls with a "clang.arc.attachedcall" bundle are special. They should be 25245f757f3fSDimitry Andric // expanded to the call, directly followed by a special marker sequence and 25255f757f3fSDimitry Andric // a call to a ObjC library function. Use the CALL_RVMARKER to do that. 25265f757f3fSDimitry Andric assert(!isTailCall && 25275f757f3fSDimitry Andric "tail calls cannot be marked with clang.arc.attachedcall"); 25285f757f3fSDimitry Andric assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"); 25295f757f3fSDimitry Andric 25305f757f3fSDimitry Andric // Add a target global address for the retainRV/claimRV runtime function 25315f757f3fSDimitry Andric // just before the call target. 25325f757f3fSDimitry Andric Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB); 25335f757f3fSDimitry Andric auto PtrVT = getPointerTy(DAG.getDataLayout()); 25345f757f3fSDimitry Andric auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT); 25355f757f3fSDimitry Andric Ops.insert(Ops.begin() + 1, GA); 25365f757f3fSDimitry Andric Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops); 25375f757f3fSDimitry Andric } else { 25385f757f3fSDimitry Andric Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); 25395f757f3fSDimitry Andric } 25405f757f3fSDimitry Andric 25415f757f3fSDimitry Andric if (IsCFICall) 25425f757f3fSDimitry Andric Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 25435f757f3fSDimitry Andric 25445f757f3fSDimitry Andric InGlue = Chain.getValue(1); 25455f757f3fSDimitry Andric DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 25465f757f3fSDimitry Andric DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); 25475f757f3fSDimitry Andric 25485f757f3fSDimitry Andric // Save heapallocsite metadata. 25495f757f3fSDimitry Andric if (CLI.CB) 25505f757f3fSDimitry Andric if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite")) 25515f757f3fSDimitry Andric DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); 25525f757f3fSDimitry Andric 25535f757f3fSDimitry Andric // Create the CALLSEQ_END node. 25545f757f3fSDimitry Andric unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing. 25555f757f3fSDimitry Andric if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, 25565f757f3fSDimitry Andric DAG.getTarget().Options.GuaranteedTailCallOpt)) 25575f757f3fSDimitry Andric NumBytesForCalleeToPop = NumBytes; // Callee pops everything 25585f757f3fSDimitry Andric else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet) 25595f757f3fSDimitry Andric // If this call passes a struct-return pointer, the callee 25605f757f3fSDimitry Andric // pops that struct pointer. 25615f757f3fSDimitry Andric NumBytesForCalleeToPop = 4; 25625f757f3fSDimitry Andric 25635f757f3fSDimitry Andric // Returns a glue for retval copy to use. 25645f757f3fSDimitry Andric if (!IsSibcall) { 25655f757f3fSDimitry Andric Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop, 25665f757f3fSDimitry Andric InGlue, dl); 25675f757f3fSDimitry Andric InGlue = Chain.getValue(1); 25685f757f3fSDimitry Andric } 25695f757f3fSDimitry Andric 2570*0fca6ea1SDimitry Andric if (CallingConv::PreserveNone == CallConv) 2571*0fca6ea1SDimitry Andric for (unsigned I = 0, E = Outs.size(); I != E; ++I) { 2572*0fca6ea1SDimitry Andric if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() || 2573*0fca6ea1SDimitry Andric Outs[I].Flags.isSwiftError()) { 2574*0fca6ea1SDimitry Andric errorUnsupported(DAG, dl, 2575*0fca6ea1SDimitry Andric "Swift attributes can't be used with preserve_none"); 2576*0fca6ea1SDimitry Andric break; 2577*0fca6ea1SDimitry Andric } 2578*0fca6ea1SDimitry Andric } 2579*0fca6ea1SDimitry Andric 25805f757f3fSDimitry Andric // Handle result values, copying them out of physregs into vregs that we 25815f757f3fSDimitry Andric // return. 25825f757f3fSDimitry Andric return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG, 25835f757f3fSDimitry Andric InVals, RegMask); 25845f757f3fSDimitry Andric } 25855f757f3fSDimitry Andric 25865f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 25875f757f3fSDimitry Andric // Fast Calling Convention (tail call) implementation 25885f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 25895f757f3fSDimitry Andric 25905f757f3fSDimitry Andric // Like std call, callee cleans arguments, convention except that ECX is 25915f757f3fSDimitry Andric // reserved for storing the tail called function address. Only 2 registers are 25925f757f3fSDimitry Andric // free for argument passing (inreg). Tail call optimization is performed 25935f757f3fSDimitry Andric // provided: 25945f757f3fSDimitry Andric // * tailcallopt is enabled 25955f757f3fSDimitry Andric // * caller/callee are fastcc 25965f757f3fSDimitry Andric // On X86_64 architecture with GOT-style position independent code only local 25975f757f3fSDimitry Andric // (within module) calls are supported at the moment. 25985f757f3fSDimitry Andric // To keep the stack aligned according to platform abi the function 25995f757f3fSDimitry Andric // GetAlignedArgumentStackSize ensures that argument delta is always multiples 26005f757f3fSDimitry Andric // of stack alignment. (Dynamic linkers need this - Darwin's dyld for example) 26015f757f3fSDimitry Andric // If a tail called function callee has more arguments than the caller the 26025f757f3fSDimitry Andric // caller needs to make sure that there is room to move the RETADDR to. This is 26035f757f3fSDimitry Andric // achieved by reserving an area the size of the argument delta right after the 26045f757f3fSDimitry Andric // original RETADDR, but before the saved framepointer or the spilled registers 26055f757f3fSDimitry Andric // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 26065f757f3fSDimitry Andric // stack layout: 26075f757f3fSDimitry Andric // arg1 26085f757f3fSDimitry Andric // arg2 26095f757f3fSDimitry Andric // RETADDR 26105f757f3fSDimitry Andric // [ new RETADDR 26115f757f3fSDimitry Andric // move area ] 26125f757f3fSDimitry Andric // (possible EBP) 26135f757f3fSDimitry Andric // ESI 26145f757f3fSDimitry Andric // EDI 26155f757f3fSDimitry Andric // local1 .. 26165f757f3fSDimitry Andric 26175f757f3fSDimitry Andric /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align 26185f757f3fSDimitry Andric /// requirement. 26195f757f3fSDimitry Andric unsigned 26205f757f3fSDimitry Andric X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize, 26215f757f3fSDimitry Andric SelectionDAG &DAG) const { 26225f757f3fSDimitry Andric const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign(); 26235f757f3fSDimitry Andric const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize(); 26245f757f3fSDimitry Andric assert(StackSize % SlotSize == 0 && 26255f757f3fSDimitry Andric "StackSize must be a multiple of SlotSize"); 26265f757f3fSDimitry Andric return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize; 26275f757f3fSDimitry Andric } 26285f757f3fSDimitry Andric 26295f757f3fSDimitry Andric /// Return true if the given stack call argument is already available in the 26305f757f3fSDimitry Andric /// same position (relatively) of the caller's incoming argument stack. 26315f757f3fSDimitry Andric static 26325f757f3fSDimitry Andric bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 26335f757f3fSDimitry Andric MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, 26345f757f3fSDimitry Andric const X86InstrInfo *TII, const CCValAssign &VA) { 26355f757f3fSDimitry Andric unsigned Bytes = Arg.getValueSizeInBits() / 8; 26365f757f3fSDimitry Andric 26375f757f3fSDimitry Andric for (;;) { 26385f757f3fSDimitry Andric // Look through nodes that don't alter the bits of the incoming value. 26395f757f3fSDimitry Andric unsigned Op = Arg.getOpcode(); 26405f757f3fSDimitry Andric if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST || 26415f757f3fSDimitry Andric Op == ISD::AssertZext) { 26425f757f3fSDimitry Andric Arg = Arg.getOperand(0); 26435f757f3fSDimitry Andric continue; 26445f757f3fSDimitry Andric } 26455f757f3fSDimitry Andric if (Op == ISD::TRUNCATE) { 26465f757f3fSDimitry Andric const SDValue &TruncInput = Arg.getOperand(0); 26475f757f3fSDimitry Andric if (TruncInput.getOpcode() == ISD::AssertZext && 26485f757f3fSDimitry Andric cast<VTSDNode>(TruncInput.getOperand(1))->getVT() == 26495f757f3fSDimitry Andric Arg.getValueType()) { 26505f757f3fSDimitry Andric Arg = TruncInput.getOperand(0); 26515f757f3fSDimitry Andric continue; 26525f757f3fSDimitry Andric } 26535f757f3fSDimitry Andric } 26545f757f3fSDimitry Andric break; 26555f757f3fSDimitry Andric } 26565f757f3fSDimitry Andric 26575f757f3fSDimitry Andric int FI = INT_MAX; 26585f757f3fSDimitry Andric if (Arg.getOpcode() == ISD::CopyFromReg) { 26595f757f3fSDimitry Andric Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 26605f757f3fSDimitry Andric if (!VR.isVirtual()) 26615f757f3fSDimitry Andric return false; 26625f757f3fSDimitry Andric MachineInstr *Def = MRI->getVRegDef(VR); 26635f757f3fSDimitry Andric if (!Def) 26645f757f3fSDimitry Andric return false; 26655f757f3fSDimitry Andric if (!Flags.isByVal()) { 26665f757f3fSDimitry Andric if (!TII->isLoadFromStackSlot(*Def, FI)) 26675f757f3fSDimitry Andric return false; 26685f757f3fSDimitry Andric } else { 26695f757f3fSDimitry Andric unsigned Opcode = Def->getOpcode(); 26705f757f3fSDimitry Andric if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || 26715f757f3fSDimitry Andric Opcode == X86::LEA64_32r) && 26725f757f3fSDimitry Andric Def->getOperand(1).isFI()) { 26735f757f3fSDimitry Andric FI = Def->getOperand(1).getIndex(); 26745f757f3fSDimitry Andric Bytes = Flags.getByValSize(); 26755f757f3fSDimitry Andric } else 26765f757f3fSDimitry Andric return false; 26775f757f3fSDimitry Andric } 26785f757f3fSDimitry Andric } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 26795f757f3fSDimitry Andric if (Flags.isByVal()) 26805f757f3fSDimitry Andric // ByVal argument is passed in as a pointer but it's now being 26815f757f3fSDimitry Andric // dereferenced. e.g. 26825f757f3fSDimitry Andric // define @foo(%struct.X* %A) { 26835f757f3fSDimitry Andric // tail call @bar(%struct.X* byval %A) 26845f757f3fSDimitry Andric // } 26855f757f3fSDimitry Andric return false; 26865f757f3fSDimitry Andric SDValue Ptr = Ld->getBasePtr(); 26875f757f3fSDimitry Andric FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 26885f757f3fSDimitry Andric if (!FINode) 26895f757f3fSDimitry Andric return false; 26905f757f3fSDimitry Andric FI = FINode->getIndex(); 26915f757f3fSDimitry Andric } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { 26925f757f3fSDimitry Andric FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); 26935f757f3fSDimitry Andric FI = FINode->getIndex(); 26945f757f3fSDimitry Andric Bytes = Flags.getByValSize(); 26955f757f3fSDimitry Andric } else 26965f757f3fSDimitry Andric return false; 26975f757f3fSDimitry Andric 26985f757f3fSDimitry Andric assert(FI != INT_MAX); 26995f757f3fSDimitry Andric if (!MFI.isFixedObjectIndex(FI)) 27005f757f3fSDimitry Andric return false; 27015f757f3fSDimitry Andric 27025f757f3fSDimitry Andric if (Offset != MFI.getObjectOffset(FI)) 27035f757f3fSDimitry Andric return false; 27045f757f3fSDimitry Andric 27055f757f3fSDimitry Andric // If this is not byval, check that the argument stack object is immutable. 27065f757f3fSDimitry Andric // inalloca and argument copy elision can create mutable argument stack 27075f757f3fSDimitry Andric // objects. Byval objects can be mutated, but a byval call intends to pass the 27085f757f3fSDimitry Andric // mutated memory. 27095f757f3fSDimitry Andric if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI)) 27105f757f3fSDimitry Andric return false; 27115f757f3fSDimitry Andric 27125f757f3fSDimitry Andric if (VA.getLocVT().getFixedSizeInBits() > 27135f757f3fSDimitry Andric Arg.getValueSizeInBits().getFixedValue()) { 27145f757f3fSDimitry Andric // If the argument location is wider than the argument type, check that any 27155f757f3fSDimitry Andric // extension flags match. 27165f757f3fSDimitry Andric if (Flags.isZExt() != MFI.isObjectZExt(FI) || 27175f757f3fSDimitry Andric Flags.isSExt() != MFI.isObjectSExt(FI)) { 27185f757f3fSDimitry Andric return false; 27195f757f3fSDimitry Andric } 27205f757f3fSDimitry Andric } 27215f757f3fSDimitry Andric 27225f757f3fSDimitry Andric return Bytes == MFI.getObjectSize(FI); 27235f757f3fSDimitry Andric } 27245f757f3fSDimitry Andric 27255f757f3fSDimitry Andric /// Check whether the call is eligible for tail call optimization. Targets 27265f757f3fSDimitry Andric /// that want to do tail call optimization should implement this function. 2727*0fca6ea1SDimitry Andric /// Note that the x86 backend does not check musttail calls for eligibility! The 2728*0fca6ea1SDimitry Andric /// rest of x86 tail call lowering must be prepared to forward arguments of any 2729*0fca6ea1SDimitry Andric /// type. 27305f757f3fSDimitry Andric bool X86TargetLowering::IsEligibleForTailCallOptimization( 2731*0fca6ea1SDimitry Andric TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo, 2732*0fca6ea1SDimitry Andric SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const { 2733*0fca6ea1SDimitry Andric SelectionDAG &DAG = CLI.DAG; 2734*0fca6ea1SDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 2735*0fca6ea1SDimitry Andric const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 2736*0fca6ea1SDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 2737*0fca6ea1SDimitry Andric SDValue Callee = CLI.Callee; 2738*0fca6ea1SDimitry Andric CallingConv::ID CalleeCC = CLI.CallConv; 2739*0fca6ea1SDimitry Andric bool isVarArg = CLI.IsVarArg; 2740*0fca6ea1SDimitry Andric 27415f757f3fSDimitry Andric if (!mayTailCallThisCC(CalleeCC)) 27425f757f3fSDimitry Andric return false; 27435f757f3fSDimitry Andric 27445f757f3fSDimitry Andric // If -tailcallopt is specified, make fastcc functions tail-callable. 27455f757f3fSDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 27465f757f3fSDimitry Andric const Function &CallerF = MF.getFunction(); 27475f757f3fSDimitry Andric 27485f757f3fSDimitry Andric // If the function return type is x86_fp80 and the callee return type is not, 27495f757f3fSDimitry Andric // then the FP_EXTEND of the call result is not a nop. It's not safe to 27505f757f3fSDimitry Andric // perform a tailcall optimization here. 2751*0fca6ea1SDimitry Andric if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty()) 27525f757f3fSDimitry Andric return false; 27535f757f3fSDimitry Andric 27545f757f3fSDimitry Andric CallingConv::ID CallerCC = CallerF.getCallingConv(); 27555f757f3fSDimitry Andric bool CCMatch = CallerCC == CalleeCC; 27565f757f3fSDimitry Andric bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); 27575f757f3fSDimitry Andric bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); 27585f757f3fSDimitry Andric bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || 27595f757f3fSDimitry Andric CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail; 27605f757f3fSDimitry Andric 27615f757f3fSDimitry Andric // Win64 functions have extra shadow space for argument homing. Don't do the 27625f757f3fSDimitry Andric // sibcall if the caller and callee have mismatched expectations for this 27635f757f3fSDimitry Andric // space. 27645f757f3fSDimitry Andric if (IsCalleeWin64 != IsCallerWin64) 27655f757f3fSDimitry Andric return false; 27665f757f3fSDimitry Andric 27675f757f3fSDimitry Andric if (IsGuaranteeTCO) { 27685f757f3fSDimitry Andric if (canGuaranteeTCO(CalleeCC) && CCMatch) 27695f757f3fSDimitry Andric return true; 27705f757f3fSDimitry Andric return false; 27715f757f3fSDimitry Andric } 27725f757f3fSDimitry Andric 27735f757f3fSDimitry Andric // Look for obvious safe cases to perform tail call optimization that do not 27745f757f3fSDimitry Andric // require ABI changes. This is what gcc calls sibcall. 27755f757f3fSDimitry Andric 27765f757f3fSDimitry Andric // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to 27775f757f3fSDimitry Andric // emit a special epilogue. 27785f757f3fSDimitry Andric const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 27795f757f3fSDimitry Andric if (RegInfo->hasStackRealignment(MF)) 27805f757f3fSDimitry Andric return false; 27815f757f3fSDimitry Andric 27825f757f3fSDimitry Andric // Also avoid sibcall optimization if we're an sret return fn and the callee 27835f757f3fSDimitry Andric // is incompatible. See comment in LowerReturn about why hasStructRetAttr is 27845f757f3fSDimitry Andric // insufficient. 27855f757f3fSDimitry Andric if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) { 27865f757f3fSDimitry Andric // For a compatible tail call the callee must return our sret pointer. So it 27875f757f3fSDimitry Andric // needs to be (a) an sret function itself and (b) we pass our sret as its 27885f757f3fSDimitry Andric // sret. Condition #b is harder to determine. 27895f757f3fSDimitry Andric return false; 27905f757f3fSDimitry Andric } else if (IsCalleePopSRet) 27915f757f3fSDimitry Andric // The callee pops an sret, so we cannot tail-call, as our caller doesn't 27925f757f3fSDimitry Andric // expect that. 27935f757f3fSDimitry Andric return false; 27945f757f3fSDimitry Andric 27955f757f3fSDimitry Andric // Do not sibcall optimize vararg calls unless all arguments are passed via 27965f757f3fSDimitry Andric // registers. 27975f757f3fSDimitry Andric LLVMContext &C = *DAG.getContext(); 27985f757f3fSDimitry Andric if (isVarArg && !Outs.empty()) { 27995f757f3fSDimitry Andric // Optimizing for varargs on Win64 is unlikely to be safe without 28005f757f3fSDimitry Andric // additional testing. 28015f757f3fSDimitry Andric if (IsCalleeWin64 || IsCallerWin64) 28025f757f3fSDimitry Andric return false; 28035f757f3fSDimitry Andric 28045f757f3fSDimitry Andric for (const auto &VA : ArgLocs) 28055f757f3fSDimitry Andric if (!VA.isRegLoc()) 28065f757f3fSDimitry Andric return false; 28075f757f3fSDimitry Andric } 28085f757f3fSDimitry Andric 28095f757f3fSDimitry Andric // If the call result is in ST0 / ST1, it needs to be popped off the x87 28105f757f3fSDimitry Andric // stack. Therefore, if it's not used by the call it is not safe to optimize 28115f757f3fSDimitry Andric // this into a sibcall. 28125f757f3fSDimitry Andric bool Unused = false; 28135f757f3fSDimitry Andric for (const auto &In : Ins) { 28145f757f3fSDimitry Andric if (!In.Used) { 28155f757f3fSDimitry Andric Unused = true; 28165f757f3fSDimitry Andric break; 28175f757f3fSDimitry Andric } 28185f757f3fSDimitry Andric } 28195f757f3fSDimitry Andric if (Unused) { 28205f757f3fSDimitry Andric SmallVector<CCValAssign, 16> RVLocs; 2821*0fca6ea1SDimitry Andric CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C); 2822*0fca6ea1SDimitry Andric RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86); 28235f757f3fSDimitry Andric for (const auto &VA : RVLocs) { 28245f757f3fSDimitry Andric if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) 28255f757f3fSDimitry Andric return false; 28265f757f3fSDimitry Andric } 28275f757f3fSDimitry Andric } 28285f757f3fSDimitry Andric 28295f757f3fSDimitry Andric // Check that the call results are passed in the same way. 28305f757f3fSDimitry Andric if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, 28315f757f3fSDimitry Andric RetCC_X86, RetCC_X86)) 28325f757f3fSDimitry Andric return false; 28335f757f3fSDimitry Andric // The callee has to preserve all registers the caller needs to preserve. 28345f757f3fSDimitry Andric const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); 28355f757f3fSDimitry Andric const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 28365f757f3fSDimitry Andric if (!CCMatch) { 28375f757f3fSDimitry Andric const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 28385f757f3fSDimitry Andric if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 28395f757f3fSDimitry Andric return false; 28405f757f3fSDimitry Andric } 28415f757f3fSDimitry Andric 2842*0fca6ea1SDimitry Andric unsigned StackArgsSize = CCInfo.getStackSize(); 28435f757f3fSDimitry Andric 28445f757f3fSDimitry Andric // If the callee takes no arguments then go on to check the results of the 28455f757f3fSDimitry Andric // call. 28465f757f3fSDimitry Andric if (!Outs.empty()) { 2847*0fca6ea1SDimitry Andric if (StackArgsSize > 0) { 28485f757f3fSDimitry Andric // Check if the arguments are already laid out in the right way as 28495f757f3fSDimitry Andric // the caller's fixed stack objects. 28505f757f3fSDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 28515f757f3fSDimitry Andric const MachineRegisterInfo *MRI = &MF.getRegInfo(); 28525f757f3fSDimitry Andric const X86InstrInfo *TII = Subtarget.getInstrInfo(); 28535f757f3fSDimitry Andric for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 28545f757f3fSDimitry Andric const CCValAssign &VA = ArgLocs[I]; 28555f757f3fSDimitry Andric SDValue Arg = OutVals[I]; 28565f757f3fSDimitry Andric ISD::ArgFlagsTy Flags = Outs[I].Flags; 28575f757f3fSDimitry Andric if (VA.getLocInfo() == CCValAssign::Indirect) 28585f757f3fSDimitry Andric return false; 28595f757f3fSDimitry Andric if (!VA.isRegLoc()) { 28605f757f3fSDimitry Andric if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI, 28615f757f3fSDimitry Andric TII, VA)) 28625f757f3fSDimitry Andric return false; 28635f757f3fSDimitry Andric } 28645f757f3fSDimitry Andric } 28655f757f3fSDimitry Andric } 28665f757f3fSDimitry Andric 28675f757f3fSDimitry Andric bool PositionIndependent = isPositionIndependent(); 28685f757f3fSDimitry Andric // If the tailcall address may be in a register, then make sure it's 28695f757f3fSDimitry Andric // possible to register allocate for it. In 32-bit, the call address can 28705f757f3fSDimitry Andric // only target EAX, EDX, or ECX since the tail call must be scheduled after 28715f757f3fSDimitry Andric // callee-saved registers are restored. These happen to be the same 28725f757f3fSDimitry Andric // registers used to pass 'inreg' arguments so watch out for those. 28735f757f3fSDimitry Andric if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) && 28745f757f3fSDimitry Andric !isa<ExternalSymbolSDNode>(Callee)) || 28755f757f3fSDimitry Andric PositionIndependent)) { 28765f757f3fSDimitry Andric unsigned NumInRegs = 0; 28775f757f3fSDimitry Andric // In PIC we need an extra register to formulate the address computation 28785f757f3fSDimitry Andric // for the callee. 28795f757f3fSDimitry Andric unsigned MaxInRegs = PositionIndependent ? 2 : 3; 28805f757f3fSDimitry Andric 28815f757f3fSDimitry Andric for (const auto &VA : ArgLocs) { 28825f757f3fSDimitry Andric if (!VA.isRegLoc()) 28835f757f3fSDimitry Andric continue; 28845f757f3fSDimitry Andric Register Reg = VA.getLocReg(); 28855f757f3fSDimitry Andric switch (Reg) { 28865f757f3fSDimitry Andric default: break; 28875f757f3fSDimitry Andric case X86::EAX: case X86::EDX: case X86::ECX: 28885f757f3fSDimitry Andric if (++NumInRegs == MaxInRegs) 28895f757f3fSDimitry Andric return false; 28905f757f3fSDimitry Andric break; 28915f757f3fSDimitry Andric } 28925f757f3fSDimitry Andric } 28935f757f3fSDimitry Andric } 28945f757f3fSDimitry Andric 28955f757f3fSDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 28965f757f3fSDimitry Andric if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) 28975f757f3fSDimitry Andric return false; 28985f757f3fSDimitry Andric } 28995f757f3fSDimitry Andric 29005f757f3fSDimitry Andric bool CalleeWillPop = 29015f757f3fSDimitry Andric X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, 29025f757f3fSDimitry Andric MF.getTarget().Options.GuaranteedTailCallOpt); 29035f757f3fSDimitry Andric 29045f757f3fSDimitry Andric if (unsigned BytesToPop = 29055f757f3fSDimitry Andric MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) { 29065f757f3fSDimitry Andric // If we have bytes to pop, the callee must pop them. 29075f757f3fSDimitry Andric bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; 29085f757f3fSDimitry Andric if (!CalleePopMatches) 29095f757f3fSDimitry Andric return false; 29105f757f3fSDimitry Andric } else if (CalleeWillPop && StackArgsSize > 0) { 29115f757f3fSDimitry Andric // If we don't have bytes to pop, make sure the callee doesn't pop any. 29125f757f3fSDimitry Andric return false; 29135f757f3fSDimitry Andric } 29145f757f3fSDimitry Andric 29155f757f3fSDimitry Andric return true; 29165f757f3fSDimitry Andric } 29175f757f3fSDimitry Andric 29185f757f3fSDimitry Andric /// Determines whether the callee is required to pop its own arguments. 29195f757f3fSDimitry Andric /// Callee pop is necessary to support tail calls. 29205f757f3fSDimitry Andric bool X86::isCalleePop(CallingConv::ID CallingConv, 29215f757f3fSDimitry Andric bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { 29225f757f3fSDimitry Andric // If GuaranteeTCO is true, we force some calls to be callee pop so that we 29235f757f3fSDimitry Andric // can guarantee TCO. 29245f757f3fSDimitry Andric if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) 29255f757f3fSDimitry Andric return true; 29265f757f3fSDimitry Andric 29275f757f3fSDimitry Andric switch (CallingConv) { 29285f757f3fSDimitry Andric default: 29295f757f3fSDimitry Andric return false; 29305f757f3fSDimitry Andric case CallingConv::X86_StdCall: 29315f757f3fSDimitry Andric case CallingConv::X86_FastCall: 29325f757f3fSDimitry Andric case CallingConv::X86_ThisCall: 29335f757f3fSDimitry Andric case CallingConv::X86_VectorCall: 29345f757f3fSDimitry Andric return !is64Bit; 29355f757f3fSDimitry Andric } 29365f757f3fSDimitry Andric } 2937