xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLoweringCall.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
15f757f3fSDimitry Andric //===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
25f757f3fSDimitry Andric //
35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65f757f3fSDimitry Andric //
75f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
85f757f3fSDimitry Andric //
95f757f3fSDimitry Andric /// \file
105f757f3fSDimitry Andric /// This file implements the lowering of LLVM calls to DAG nodes.
115f757f3fSDimitry Andric //
125f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
135f757f3fSDimitry Andric 
145f757f3fSDimitry Andric #include "X86.h"
155f757f3fSDimitry Andric #include "X86CallingConv.h"
165f757f3fSDimitry Andric #include "X86FrameLowering.h"
175f757f3fSDimitry Andric #include "X86ISelLowering.h"
185f757f3fSDimitry Andric #include "X86InstrBuilder.h"
195f757f3fSDimitry Andric #include "X86MachineFunctionInfo.h"
205f757f3fSDimitry Andric #include "X86TargetMachine.h"
215f757f3fSDimitry Andric #include "X86TargetObjectFile.h"
225f757f3fSDimitry Andric #include "llvm/ADT/Statistic.h"
235f757f3fSDimitry Andric #include "llvm/Analysis/ObjCARCUtil.h"
245f757f3fSDimitry Andric #include "llvm/CodeGen/MachineJumpTableInfo.h"
255f757f3fSDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
265f757f3fSDimitry Andric #include "llvm/CodeGen/WinEHFuncInfo.h"
275f757f3fSDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
285f757f3fSDimitry Andric #include "llvm/IR/IRBuilder.h"
29*0fca6ea1SDimitry Andric #include "llvm/IR/Module.h"
305f757f3fSDimitry Andric 
315f757f3fSDimitry Andric #define DEBUG_TYPE "x86-isel"
325f757f3fSDimitry Andric 
335f757f3fSDimitry Andric using namespace llvm;
345f757f3fSDimitry Andric 
355f757f3fSDimitry Andric STATISTIC(NumTailCalls, "Number of tail calls");
365f757f3fSDimitry Andric 
375f757f3fSDimitry Andric /// Call this when the user attempts to do something unsupported, like
385f757f3fSDimitry Andric /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
395f757f3fSDimitry Andric /// report_fatal_error, so calling code should attempt to recover without
405f757f3fSDimitry Andric /// crashing.
415f757f3fSDimitry Andric static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
425f757f3fSDimitry Andric                              const char *Msg) {
435f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
445f757f3fSDimitry Andric   DAG.getContext()->diagnose(
455f757f3fSDimitry Andric       DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
465f757f3fSDimitry Andric }
475f757f3fSDimitry Andric 
485f757f3fSDimitry Andric /// Returns true if a CC can dynamically exclude a register from the list of
495f757f3fSDimitry Andric /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
505f757f3fSDimitry Andric /// the return registers.
515f757f3fSDimitry Andric static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
525f757f3fSDimitry Andric   switch (CC) {
535f757f3fSDimitry Andric   default:
545f757f3fSDimitry Andric     return false;
555f757f3fSDimitry Andric   case CallingConv::X86_RegCall:
565f757f3fSDimitry Andric   case CallingConv::PreserveMost:
575f757f3fSDimitry Andric   case CallingConv::PreserveAll:
585f757f3fSDimitry Andric     return true;
595f757f3fSDimitry Andric   }
605f757f3fSDimitry Andric }
615f757f3fSDimitry Andric 
625f757f3fSDimitry Andric /// Returns true if a CC can dynamically exclude a register from the list of
635f757f3fSDimitry Andric /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
645f757f3fSDimitry Andric /// the parameters.
655f757f3fSDimitry Andric static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
665f757f3fSDimitry Andric   return CC == CallingConv::X86_RegCall;
675f757f3fSDimitry Andric }
685f757f3fSDimitry Andric 
695f757f3fSDimitry Andric static std::pair<MVT, unsigned>
705f757f3fSDimitry Andric handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
715f757f3fSDimitry Andric                                  const X86Subtarget &Subtarget) {
725f757f3fSDimitry Andric   // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
735f757f3fSDimitry Andric   // convention is one that uses k registers.
745f757f3fSDimitry Andric   if (NumElts == 2)
755f757f3fSDimitry Andric     return {MVT::v2i64, 1};
765f757f3fSDimitry Andric   if (NumElts == 4)
775f757f3fSDimitry Andric     return {MVT::v4i32, 1};
785f757f3fSDimitry Andric   if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
795f757f3fSDimitry Andric       CC != CallingConv::Intel_OCL_BI)
805f757f3fSDimitry Andric     return {MVT::v8i16, 1};
815f757f3fSDimitry Andric   if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
825f757f3fSDimitry Andric       CC != CallingConv::Intel_OCL_BI)
835f757f3fSDimitry Andric     return {MVT::v16i8, 1};
845f757f3fSDimitry Andric   // v32i1 passes in ymm unless we have BWI and the calling convention is
855f757f3fSDimitry Andric   // regcall.
865f757f3fSDimitry Andric   if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
875f757f3fSDimitry Andric     return {MVT::v32i8, 1};
885f757f3fSDimitry Andric   // Split v64i1 vectors if we don't have v64i8 available.
895f757f3fSDimitry Andric   if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
905f757f3fSDimitry Andric     if (Subtarget.useAVX512Regs())
915f757f3fSDimitry Andric       return {MVT::v64i8, 1};
925f757f3fSDimitry Andric     return {MVT::v32i8, 2};
935f757f3fSDimitry Andric   }
945f757f3fSDimitry Andric 
955f757f3fSDimitry Andric   // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
965f757f3fSDimitry Andric   if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
975f757f3fSDimitry Andric       NumElts > 64)
985f757f3fSDimitry Andric     return {MVT::i8, NumElts};
995f757f3fSDimitry Andric 
1005f757f3fSDimitry Andric   return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
1015f757f3fSDimitry Andric }
1025f757f3fSDimitry Andric 
1035f757f3fSDimitry Andric MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1045f757f3fSDimitry Andric                                                      CallingConv::ID CC,
1055f757f3fSDimitry Andric                                                      EVT VT) const {
1065f757f3fSDimitry Andric   if (VT.isVector()) {
1075f757f3fSDimitry Andric     if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
1085f757f3fSDimitry Andric       unsigned NumElts = VT.getVectorNumElements();
1095f757f3fSDimitry Andric 
1105f757f3fSDimitry Andric       MVT RegisterVT;
1115f757f3fSDimitry Andric       unsigned NumRegisters;
1125f757f3fSDimitry Andric       std::tie(RegisterVT, NumRegisters) =
1135f757f3fSDimitry Andric           handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
1145f757f3fSDimitry Andric       if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
1155f757f3fSDimitry Andric         return RegisterVT;
1165f757f3fSDimitry Andric     }
1175f757f3fSDimitry Andric 
1185f757f3fSDimitry Andric     if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
1195f757f3fSDimitry Andric       return MVT::v8f16;
1205f757f3fSDimitry Andric   }
1215f757f3fSDimitry Andric 
1225f757f3fSDimitry Andric   // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
1235f757f3fSDimitry Andric   if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
1245f757f3fSDimitry Andric       !Subtarget.hasX87())
1255f757f3fSDimitry Andric     return MVT::i32;
1265f757f3fSDimitry Andric 
1275f757f3fSDimitry Andric   if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
1285f757f3fSDimitry Andric     return getRegisterTypeForCallingConv(Context, CC,
1295f757f3fSDimitry Andric                                          VT.changeVectorElementType(MVT::f16));
1305f757f3fSDimitry Andric 
1311db9f3b2SDimitry Andric   if (VT == MVT::bf16)
1321db9f3b2SDimitry Andric     return MVT::f16;
1331db9f3b2SDimitry Andric 
1345f757f3fSDimitry Andric   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1355f757f3fSDimitry Andric }
1365f757f3fSDimitry Andric 
1375f757f3fSDimitry Andric unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1385f757f3fSDimitry Andric                                                           CallingConv::ID CC,
1395f757f3fSDimitry Andric                                                           EVT VT) const {
1405f757f3fSDimitry Andric   if (VT.isVector()) {
1415f757f3fSDimitry Andric     if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
1425f757f3fSDimitry Andric       unsigned NumElts = VT.getVectorNumElements();
1435f757f3fSDimitry Andric 
1445f757f3fSDimitry Andric       MVT RegisterVT;
1455f757f3fSDimitry Andric       unsigned NumRegisters;
1465f757f3fSDimitry Andric       std::tie(RegisterVT, NumRegisters) =
1475f757f3fSDimitry Andric           handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
1485f757f3fSDimitry Andric       if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
1495f757f3fSDimitry Andric         return NumRegisters;
1505f757f3fSDimitry Andric     }
1515f757f3fSDimitry Andric 
1525f757f3fSDimitry Andric     if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
1535f757f3fSDimitry Andric       return 1;
1545f757f3fSDimitry Andric   }
1555f757f3fSDimitry Andric 
1565f757f3fSDimitry Andric   // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
1575f757f3fSDimitry Andric   // x87 is disabled.
1585f757f3fSDimitry Andric   if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
1595f757f3fSDimitry Andric     if (VT == MVT::f64)
1605f757f3fSDimitry Andric       return 2;
1615f757f3fSDimitry Andric     if (VT == MVT::f80)
1625f757f3fSDimitry Andric       return 3;
1635f757f3fSDimitry Andric   }
1645f757f3fSDimitry Andric 
1655f757f3fSDimitry Andric   if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
1665f757f3fSDimitry Andric     return getNumRegistersForCallingConv(Context, CC,
1675f757f3fSDimitry Andric                                          VT.changeVectorElementType(MVT::f16));
1685f757f3fSDimitry Andric 
1695f757f3fSDimitry Andric   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1705f757f3fSDimitry Andric }
1715f757f3fSDimitry Andric 
1725f757f3fSDimitry Andric unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
1735f757f3fSDimitry Andric     LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1745f757f3fSDimitry Andric     unsigned &NumIntermediates, MVT &RegisterVT) const {
1755f757f3fSDimitry Andric   // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
1765f757f3fSDimitry Andric   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
1775f757f3fSDimitry Andric       Subtarget.hasAVX512() &&
1785f757f3fSDimitry Andric       (!isPowerOf2_32(VT.getVectorNumElements()) ||
1795f757f3fSDimitry Andric        (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
1805f757f3fSDimitry Andric        VT.getVectorNumElements() > 64)) {
1815f757f3fSDimitry Andric     RegisterVT = MVT::i8;
1825f757f3fSDimitry Andric     IntermediateVT = MVT::i1;
1835f757f3fSDimitry Andric     NumIntermediates = VT.getVectorNumElements();
1845f757f3fSDimitry Andric     return NumIntermediates;
1855f757f3fSDimitry Andric   }
1865f757f3fSDimitry Andric 
1875f757f3fSDimitry Andric   // Split v64i1 vectors if we don't have v64i8 available.
1885f757f3fSDimitry Andric   if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
1895f757f3fSDimitry Andric       CC != CallingConv::X86_RegCall) {
1905f757f3fSDimitry Andric     RegisterVT = MVT::v32i8;
1915f757f3fSDimitry Andric     IntermediateVT = MVT::v32i1;
1925f757f3fSDimitry Andric     NumIntermediates = 2;
1935f757f3fSDimitry Andric     return 2;
1945f757f3fSDimitry Andric   }
1955f757f3fSDimitry Andric 
1965f757f3fSDimitry Andric   // Split vNbf16 vectors according to vNf16.
1975f757f3fSDimitry Andric   if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
1985f757f3fSDimitry Andric     VT = VT.changeVectorElementType(MVT::f16);
1995f757f3fSDimitry Andric 
2005f757f3fSDimitry Andric   return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2015f757f3fSDimitry Andric                                               NumIntermediates, RegisterVT);
2025f757f3fSDimitry Andric }
2035f757f3fSDimitry Andric 
2045f757f3fSDimitry Andric EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2055f757f3fSDimitry Andric                                           LLVMContext& Context,
2065f757f3fSDimitry Andric                                           EVT VT) const {
2075f757f3fSDimitry Andric   if (!VT.isVector())
2085f757f3fSDimitry Andric     return MVT::i8;
2095f757f3fSDimitry Andric 
2105f757f3fSDimitry Andric   if (Subtarget.hasAVX512()) {
2115f757f3fSDimitry Andric     // Figure out what this type will be legalized to.
2125f757f3fSDimitry Andric     EVT LegalVT = VT;
2135f757f3fSDimitry Andric     while (getTypeAction(Context, LegalVT) != TypeLegal)
2145f757f3fSDimitry Andric       LegalVT = getTypeToTransformTo(Context, LegalVT);
2155f757f3fSDimitry Andric 
2165f757f3fSDimitry Andric     // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2175f757f3fSDimitry Andric     if (LegalVT.getSimpleVT().is512BitVector())
2185f757f3fSDimitry Andric       return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2195f757f3fSDimitry Andric 
2205f757f3fSDimitry Andric     if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2215f757f3fSDimitry Andric       // If we legalized to less than a 512-bit vector, then we will use a vXi1
2225f757f3fSDimitry Andric       // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2235f757f3fSDimitry Andric       // vXi16/vXi8.
2245f757f3fSDimitry Andric       MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2255f757f3fSDimitry Andric       if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2265f757f3fSDimitry Andric         return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2275f757f3fSDimitry Andric     }
2285f757f3fSDimitry Andric   }
2295f757f3fSDimitry Andric 
2305f757f3fSDimitry Andric   return VT.changeVectorElementTypeToInteger();
2315f757f3fSDimitry Andric }
2325f757f3fSDimitry Andric 
2335f757f3fSDimitry Andric /// Helper for getByValTypeAlignment to determine
2345f757f3fSDimitry Andric /// the desired ByVal argument alignment.
2355f757f3fSDimitry Andric static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2365f757f3fSDimitry Andric   if (MaxAlign == 16)
2375f757f3fSDimitry Andric     return;
2385f757f3fSDimitry Andric   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2395f757f3fSDimitry Andric     if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
2405f757f3fSDimitry Andric       MaxAlign = Align(16);
2415f757f3fSDimitry Andric   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2425f757f3fSDimitry Andric     Align EltAlign;
2435f757f3fSDimitry Andric     getMaxByValAlign(ATy->getElementType(), EltAlign);
2445f757f3fSDimitry Andric     if (EltAlign > MaxAlign)
2455f757f3fSDimitry Andric       MaxAlign = EltAlign;
2465f757f3fSDimitry Andric   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2475f757f3fSDimitry Andric     for (auto *EltTy : STy->elements()) {
2485f757f3fSDimitry Andric       Align EltAlign;
2495f757f3fSDimitry Andric       getMaxByValAlign(EltTy, EltAlign);
2505f757f3fSDimitry Andric       if (EltAlign > MaxAlign)
2515f757f3fSDimitry Andric         MaxAlign = EltAlign;
2525f757f3fSDimitry Andric       if (MaxAlign == 16)
2535f757f3fSDimitry Andric         break;
2545f757f3fSDimitry Andric     }
2555f757f3fSDimitry Andric   }
2565f757f3fSDimitry Andric }
2575f757f3fSDimitry Andric 
2585f757f3fSDimitry Andric /// Return the desired alignment for ByVal aggregate
2595f757f3fSDimitry Andric /// function arguments in the caller parameter area. For X86, aggregates
2605f757f3fSDimitry Andric /// that contain SSE vectors are placed at 16-byte boundaries while the rest
2615f757f3fSDimitry Andric /// are at 4-byte boundaries.
2625f757f3fSDimitry Andric uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
2635f757f3fSDimitry Andric                                                   const DataLayout &DL) const {
2645f757f3fSDimitry Andric   if (Subtarget.is64Bit()) {
2655f757f3fSDimitry Andric     // Max of 8 and alignment of type.
2665f757f3fSDimitry Andric     Align TyAlign = DL.getABITypeAlign(Ty);
2675f757f3fSDimitry Andric     if (TyAlign > 8)
2685f757f3fSDimitry Andric       return TyAlign.value();
2695f757f3fSDimitry Andric     return 8;
2705f757f3fSDimitry Andric   }
2715f757f3fSDimitry Andric 
2725f757f3fSDimitry Andric   Align Alignment(4);
2735f757f3fSDimitry Andric   if (Subtarget.hasSSE1())
2745f757f3fSDimitry Andric     getMaxByValAlign(Ty, Alignment);
2755f757f3fSDimitry Andric   return Alignment.value();
2765f757f3fSDimitry Andric }
2775f757f3fSDimitry Andric 
2785f757f3fSDimitry Andric /// It returns EVT::Other if the type should be determined using generic
2795f757f3fSDimitry Andric /// target-independent logic.
2805f757f3fSDimitry Andric /// For vector ops we check that the overall size isn't larger than our
2815f757f3fSDimitry Andric /// preferred vector width.
2825f757f3fSDimitry Andric EVT X86TargetLowering::getOptimalMemOpType(
2835f757f3fSDimitry Andric     const MemOp &Op, const AttributeList &FuncAttributes) const {
2845f757f3fSDimitry Andric   if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
2855f757f3fSDimitry Andric     if (Op.size() >= 16 &&
2865f757f3fSDimitry Andric         (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2875f757f3fSDimitry Andric       // FIXME: Check if unaligned 64-byte accesses are slow.
2885f757f3fSDimitry Andric       if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
2895f757f3fSDimitry Andric           (Subtarget.getPreferVectorWidth() >= 512)) {
2905f757f3fSDimitry Andric         return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2915f757f3fSDimitry Andric       }
2925f757f3fSDimitry Andric       // FIXME: Check if unaligned 32-byte accesses are slow.
2935f757f3fSDimitry Andric       if (Op.size() >= 32 && Subtarget.hasAVX() &&
2945f757f3fSDimitry Andric           Subtarget.useLight256BitInstructions()) {
2955f757f3fSDimitry Andric         // Although this isn't a well-supported type for AVX1, we'll let
2965f757f3fSDimitry Andric         // legalization and shuffle lowering produce the optimal codegen. If we
2975f757f3fSDimitry Andric         // choose an optimal type with a vector element larger than a byte,
2985f757f3fSDimitry Andric         // getMemsetStores() may create an intermediate splat (using an integer
2995f757f3fSDimitry Andric         // multiply) before we splat as a vector.
3005f757f3fSDimitry Andric         return MVT::v32i8;
3015f757f3fSDimitry Andric       }
3025f757f3fSDimitry Andric       if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
3035f757f3fSDimitry Andric         return MVT::v16i8;
3045f757f3fSDimitry Andric       // TODO: Can SSE1 handle a byte vector?
3055f757f3fSDimitry Andric       // If we have SSE1 registers we should be able to use them.
3065f757f3fSDimitry Andric       if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
3075f757f3fSDimitry Andric           (Subtarget.getPreferVectorWidth() >= 128))
3085f757f3fSDimitry Andric         return MVT::v4f32;
3095f757f3fSDimitry Andric     } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
3105f757f3fSDimitry Andric                Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
3115f757f3fSDimitry Andric       // Do not use f64 to lower memcpy if source is string constant. It's
3125f757f3fSDimitry Andric       // better to use i32 to avoid the loads.
3135f757f3fSDimitry Andric       // Also, do not use f64 to lower memset unless this is a memset of zeros.
3145f757f3fSDimitry Andric       // The gymnastics of splatting a byte value into an XMM register and then
3155f757f3fSDimitry Andric       // only using 8-byte stores (because this is a CPU with slow unaligned
3165f757f3fSDimitry Andric       // 16-byte accesses) makes that a loser.
3175f757f3fSDimitry Andric       return MVT::f64;
3185f757f3fSDimitry Andric     }
3195f757f3fSDimitry Andric   }
3205f757f3fSDimitry Andric   // This is a compromise. If we reach here, unaligned accesses may be slow on
3215f757f3fSDimitry Andric   // this target. However, creating smaller, aligned accesses could be even
3225f757f3fSDimitry Andric   // slower and would certainly be a lot more code.
3235f757f3fSDimitry Andric   if (Subtarget.is64Bit() && Op.size() >= 8)
3245f757f3fSDimitry Andric     return MVT::i64;
3255f757f3fSDimitry Andric   return MVT::i32;
3265f757f3fSDimitry Andric }
3275f757f3fSDimitry Andric 
3285f757f3fSDimitry Andric bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
3295f757f3fSDimitry Andric   if (VT == MVT::f32)
3305f757f3fSDimitry Andric     return Subtarget.hasSSE1();
3315f757f3fSDimitry Andric   if (VT == MVT::f64)
3325f757f3fSDimitry Andric     return Subtarget.hasSSE2();
3335f757f3fSDimitry Andric   return true;
3345f757f3fSDimitry Andric }
3355f757f3fSDimitry Andric 
3365f757f3fSDimitry Andric static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
3375f757f3fSDimitry Andric   return (8 * Alignment.value()) % SizeInBits == 0;
3385f757f3fSDimitry Andric }
3395f757f3fSDimitry Andric 
3405f757f3fSDimitry Andric bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
3415f757f3fSDimitry Andric   if (isBitAligned(Alignment, VT.getSizeInBits()))
3425f757f3fSDimitry Andric     return true;
3435f757f3fSDimitry Andric   switch (VT.getSizeInBits()) {
3445f757f3fSDimitry Andric   default:
3455f757f3fSDimitry Andric     // 8-byte and under are always assumed to be fast.
3465f757f3fSDimitry Andric     return true;
3475f757f3fSDimitry Andric   case 128:
3485f757f3fSDimitry Andric     return !Subtarget.isUnalignedMem16Slow();
3495f757f3fSDimitry Andric   case 256:
3505f757f3fSDimitry Andric     return !Subtarget.isUnalignedMem32Slow();
3515f757f3fSDimitry Andric     // TODO: What about AVX-512 (512-bit) accesses?
3525f757f3fSDimitry Andric   }
3535f757f3fSDimitry Andric }
3545f757f3fSDimitry Andric 
3555f757f3fSDimitry Andric bool X86TargetLowering::allowsMisalignedMemoryAccesses(
3565f757f3fSDimitry Andric     EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
3575f757f3fSDimitry Andric     unsigned *Fast) const {
3585f757f3fSDimitry Andric   if (Fast)
3595f757f3fSDimitry Andric     *Fast = isMemoryAccessFast(VT, Alignment);
3605f757f3fSDimitry Andric   // NonTemporal vector memory ops must be aligned.
3615f757f3fSDimitry Andric   if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
3625f757f3fSDimitry Andric     // NT loads can only be vector aligned, so if its less aligned than the
3635f757f3fSDimitry Andric     // minimum vector size (which we can split the vector down to), we might as
3645f757f3fSDimitry Andric     // well use a regular unaligned vector load.
3655f757f3fSDimitry Andric     // We don't have any NT loads pre-SSE41.
3665f757f3fSDimitry Andric     if (!!(Flags & MachineMemOperand::MOLoad))
3675f757f3fSDimitry Andric       return (Alignment < 16 || !Subtarget.hasSSE41());
3685f757f3fSDimitry Andric     return false;
3695f757f3fSDimitry Andric   }
3705f757f3fSDimitry Andric   // Misaligned accesses of any size are always allowed.
3715f757f3fSDimitry Andric   return true;
3725f757f3fSDimitry Andric }
3735f757f3fSDimitry Andric 
3745f757f3fSDimitry Andric bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
3755f757f3fSDimitry Andric                                            const DataLayout &DL, EVT VT,
3765f757f3fSDimitry Andric                                            unsigned AddrSpace, Align Alignment,
3775f757f3fSDimitry Andric                                            MachineMemOperand::Flags Flags,
3785f757f3fSDimitry Andric                                            unsigned *Fast) const {
3795f757f3fSDimitry Andric   if (Fast)
3805f757f3fSDimitry Andric     *Fast = isMemoryAccessFast(VT, Alignment);
3815f757f3fSDimitry Andric   if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
3825f757f3fSDimitry Andric     if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
3835f757f3fSDimitry Andric                                        /*Fast=*/nullptr))
3845f757f3fSDimitry Andric       return true;
3855f757f3fSDimitry Andric     // NonTemporal vector memory ops are special, and must be aligned.
3865f757f3fSDimitry Andric     if (!isBitAligned(Alignment, VT.getSizeInBits()))
3875f757f3fSDimitry Andric       return false;
3885f757f3fSDimitry Andric     switch (VT.getSizeInBits()) {
3895f757f3fSDimitry Andric     case 128:
3905f757f3fSDimitry Andric       if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
3915f757f3fSDimitry Andric         return true;
3925f757f3fSDimitry Andric       if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
3935f757f3fSDimitry Andric         return true;
3945f757f3fSDimitry Andric       return false;
3955f757f3fSDimitry Andric     case 256:
3965f757f3fSDimitry Andric       if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
3975f757f3fSDimitry Andric         return true;
3985f757f3fSDimitry Andric       if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
3995f757f3fSDimitry Andric         return true;
4005f757f3fSDimitry Andric       return false;
4015f757f3fSDimitry Andric     case 512:
4025f757f3fSDimitry Andric       if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
4035f757f3fSDimitry Andric         return true;
4045f757f3fSDimitry Andric       return false;
4055f757f3fSDimitry Andric     default:
4065f757f3fSDimitry Andric       return false; // Don't have NonTemporal vector memory ops of this size.
4075f757f3fSDimitry Andric     }
4085f757f3fSDimitry Andric   }
4095f757f3fSDimitry Andric   return true;
4105f757f3fSDimitry Andric }
4115f757f3fSDimitry Andric 
4125f757f3fSDimitry Andric /// Return the entry encoding for a jump table in the
4135f757f3fSDimitry Andric /// current function.  The returned value is a member of the
4145f757f3fSDimitry Andric /// MachineJumpTableInfo::JTEntryKind enum.
4155f757f3fSDimitry Andric unsigned X86TargetLowering::getJumpTableEncoding() const {
4165f757f3fSDimitry Andric   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
4175f757f3fSDimitry Andric   // symbol.
4185f757f3fSDimitry Andric   if (isPositionIndependent() && Subtarget.isPICStyleGOT())
4195f757f3fSDimitry Andric     return MachineJumpTableInfo::EK_Custom32;
4205f757f3fSDimitry Andric   if (isPositionIndependent() &&
421*0fca6ea1SDimitry Andric       getTargetMachine().getCodeModel() == CodeModel::Large &&
422*0fca6ea1SDimitry Andric       !Subtarget.isTargetCOFF())
4235f757f3fSDimitry Andric     return MachineJumpTableInfo::EK_LabelDifference64;
4245f757f3fSDimitry Andric 
4255f757f3fSDimitry Andric   // Otherwise, use the normal jump table encoding heuristics.
4265f757f3fSDimitry Andric   return TargetLowering::getJumpTableEncoding();
4275f757f3fSDimitry Andric }
4285f757f3fSDimitry Andric 
4295f757f3fSDimitry Andric bool X86TargetLowering::useSoftFloat() const {
4305f757f3fSDimitry Andric   return Subtarget.useSoftFloat();
4315f757f3fSDimitry Andric }
4325f757f3fSDimitry Andric 
4335f757f3fSDimitry Andric void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
4345f757f3fSDimitry Andric                                               ArgListTy &Args) const {
4355f757f3fSDimitry Andric 
4365f757f3fSDimitry Andric   // Only relabel X86-32 for C / Stdcall CCs.
4375f757f3fSDimitry Andric   if (Subtarget.is64Bit())
4385f757f3fSDimitry Andric     return;
4395f757f3fSDimitry Andric   if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
4405f757f3fSDimitry Andric     return;
4415f757f3fSDimitry Andric   unsigned ParamRegs = 0;
4425f757f3fSDimitry Andric   if (auto *M = MF->getFunction().getParent())
4435f757f3fSDimitry Andric     ParamRegs = M->getNumberRegisterParameters();
4445f757f3fSDimitry Andric 
4455f757f3fSDimitry Andric   // Mark the first N int arguments as having reg
4465f757f3fSDimitry Andric   for (auto &Arg : Args) {
4475f757f3fSDimitry Andric     Type *T = Arg.Ty;
4485f757f3fSDimitry Andric     if (T->isIntOrPtrTy())
4495f757f3fSDimitry Andric       if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
4505f757f3fSDimitry Andric         unsigned numRegs = 1;
4515f757f3fSDimitry Andric         if (MF->getDataLayout().getTypeAllocSize(T) > 4)
4525f757f3fSDimitry Andric           numRegs = 2;
4535f757f3fSDimitry Andric         if (ParamRegs < numRegs)
4545f757f3fSDimitry Andric           return;
4555f757f3fSDimitry Andric         ParamRegs -= numRegs;
4565f757f3fSDimitry Andric         Arg.IsInReg = true;
4575f757f3fSDimitry Andric       }
4585f757f3fSDimitry Andric   }
4595f757f3fSDimitry Andric }
4605f757f3fSDimitry Andric 
4615f757f3fSDimitry Andric const MCExpr *
4625f757f3fSDimitry Andric X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
4635f757f3fSDimitry Andric                                              const MachineBasicBlock *MBB,
4645f757f3fSDimitry Andric                                              unsigned uid,MCContext &Ctx) const{
4655f757f3fSDimitry Andric   assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
4665f757f3fSDimitry Andric   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
4675f757f3fSDimitry Andric   // entries.
4685f757f3fSDimitry Andric   return MCSymbolRefExpr::create(MBB->getSymbol(),
4695f757f3fSDimitry Andric                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
4705f757f3fSDimitry Andric }
4715f757f3fSDimitry Andric 
4725f757f3fSDimitry Andric /// Returns relocation base for the given PIC jumptable.
4735f757f3fSDimitry Andric SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
4745f757f3fSDimitry Andric                                                     SelectionDAG &DAG) const {
4755f757f3fSDimitry Andric   if (!Subtarget.is64Bit())
4765f757f3fSDimitry Andric     // This doesn't have SDLoc associated with it, but is not really the
4775f757f3fSDimitry Andric     // same as a Register.
4785f757f3fSDimitry Andric     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4795f757f3fSDimitry Andric                        getPointerTy(DAG.getDataLayout()));
4805f757f3fSDimitry Andric   return Table;
4815f757f3fSDimitry Andric }
4825f757f3fSDimitry Andric 
4835f757f3fSDimitry Andric /// This returns the relocation base for the given PIC jumptable,
4845f757f3fSDimitry Andric /// the same as getPICJumpTableRelocBase, but as an MCExpr.
4855f757f3fSDimitry Andric const MCExpr *X86TargetLowering::
4865f757f3fSDimitry Andric getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
4875f757f3fSDimitry Andric                              MCContext &Ctx) const {
4885f757f3fSDimitry Andric   // X86-64 uses RIP relative addressing based on the jump table label.
4895f757f3fSDimitry Andric   if (Subtarget.isPICStyleRIPRel() ||
4905f757f3fSDimitry Andric       (Subtarget.is64Bit() &&
4915f757f3fSDimitry Andric        getTargetMachine().getCodeModel() == CodeModel::Large))
4925f757f3fSDimitry Andric     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
4935f757f3fSDimitry Andric 
4945f757f3fSDimitry Andric   // Otherwise, the reference is relative to the PIC base.
4955f757f3fSDimitry Andric   return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
4965f757f3fSDimitry Andric }
4975f757f3fSDimitry Andric 
4985f757f3fSDimitry Andric std::pair<const TargetRegisterClass *, uint8_t>
4995f757f3fSDimitry Andric X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
5005f757f3fSDimitry Andric                                            MVT VT) const {
5015f757f3fSDimitry Andric   const TargetRegisterClass *RRC = nullptr;
5025f757f3fSDimitry Andric   uint8_t Cost = 1;
5035f757f3fSDimitry Andric   switch (VT.SimpleTy) {
5045f757f3fSDimitry Andric   default:
5055f757f3fSDimitry Andric     return TargetLowering::findRepresentativeClass(TRI, VT);
5065f757f3fSDimitry Andric   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
5075f757f3fSDimitry Andric     RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
5085f757f3fSDimitry Andric     break;
5095f757f3fSDimitry Andric   case MVT::x86mmx:
5105f757f3fSDimitry Andric     RRC = &X86::VR64RegClass;
5115f757f3fSDimitry Andric     break;
5125f757f3fSDimitry Andric   case MVT::f32: case MVT::f64:
5135f757f3fSDimitry Andric   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
5145f757f3fSDimitry Andric   case MVT::v4f32: case MVT::v2f64:
5155f757f3fSDimitry Andric   case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
5165f757f3fSDimitry Andric   case MVT::v8f32: case MVT::v4f64:
5175f757f3fSDimitry Andric   case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
5185f757f3fSDimitry Andric   case MVT::v16f32: case MVT::v8f64:
5195f757f3fSDimitry Andric     RRC = &X86::VR128XRegClass;
5205f757f3fSDimitry Andric     break;
5215f757f3fSDimitry Andric   }
5225f757f3fSDimitry Andric   return std::make_pair(RRC, Cost);
5235f757f3fSDimitry Andric }
5245f757f3fSDimitry Andric 
5255f757f3fSDimitry Andric unsigned X86TargetLowering::getAddressSpace() const {
5265f757f3fSDimitry Andric   if (Subtarget.is64Bit())
5275f757f3fSDimitry Andric     return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
5285f757f3fSDimitry Andric   return 256;
5295f757f3fSDimitry Andric }
5305f757f3fSDimitry Andric 
5315f757f3fSDimitry Andric static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
5325f757f3fSDimitry Andric   return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
5335f757f3fSDimitry Andric          (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
5345f757f3fSDimitry Andric }
5355f757f3fSDimitry Andric 
5365f757f3fSDimitry Andric static Constant* SegmentOffset(IRBuilderBase &IRB,
5375f757f3fSDimitry Andric                                int Offset, unsigned AddressSpace) {
5385f757f3fSDimitry Andric   return ConstantExpr::getIntToPtr(
5395f757f3fSDimitry Andric       ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
5405f757f3fSDimitry Andric       IRB.getPtrTy(AddressSpace));
5415f757f3fSDimitry Andric }
5425f757f3fSDimitry Andric 
5435f757f3fSDimitry Andric Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
5445f757f3fSDimitry Andric   // glibc, bionic, and Fuchsia have a special slot for the stack guard in
5455f757f3fSDimitry Andric   // tcbhead_t; use it instead of the usual global variable (see
5465f757f3fSDimitry Andric   // sysdeps/{i386,x86_64}/nptl/tls.h)
5475f757f3fSDimitry Andric   if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
5485f757f3fSDimitry Andric     unsigned AddressSpace = getAddressSpace();
5495f757f3fSDimitry Andric 
5505f757f3fSDimitry Andric     // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
5515f757f3fSDimitry Andric     if (Subtarget.isTargetFuchsia())
5525f757f3fSDimitry Andric       return SegmentOffset(IRB, 0x10, AddressSpace);
5535f757f3fSDimitry Andric 
5545f757f3fSDimitry Andric     Module *M = IRB.GetInsertBlock()->getParent()->getParent();
5555f757f3fSDimitry Andric     // Specially, some users may customize the base reg and offset.
5565f757f3fSDimitry Andric     int Offset = M->getStackProtectorGuardOffset();
5575f757f3fSDimitry Andric     // If we don't set -stack-protector-guard-offset value:
5585f757f3fSDimitry Andric     // %fs:0x28, unless we're using a Kernel code model, in which case
5595f757f3fSDimitry Andric     // it's %gs:0x28.  gs:0x14 on i386.
5605f757f3fSDimitry Andric     if (Offset == INT_MAX)
5615f757f3fSDimitry Andric       Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
5625f757f3fSDimitry Andric 
5635f757f3fSDimitry Andric     StringRef GuardReg = M->getStackProtectorGuardReg();
5645f757f3fSDimitry Andric     if (GuardReg == "fs")
5655f757f3fSDimitry Andric       AddressSpace = X86AS::FS;
5665f757f3fSDimitry Andric     else if (GuardReg == "gs")
5675f757f3fSDimitry Andric       AddressSpace = X86AS::GS;
5685f757f3fSDimitry Andric 
5695f757f3fSDimitry Andric     // Use symbol guard if user specify.
5705f757f3fSDimitry Andric     StringRef GuardSymb = M->getStackProtectorGuardSymbol();
5715f757f3fSDimitry Andric     if (!GuardSymb.empty()) {
5725f757f3fSDimitry Andric       GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
5735f757f3fSDimitry Andric       if (!GV) {
5745f757f3fSDimitry Andric         Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
5755f757f3fSDimitry Andric                                        : Type::getInt32Ty(M->getContext());
5765f757f3fSDimitry Andric         GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
5775f757f3fSDimitry Andric                                 nullptr, GuardSymb, nullptr,
5785f757f3fSDimitry Andric                                 GlobalValue::NotThreadLocal, AddressSpace);
5795f757f3fSDimitry Andric         if (!Subtarget.isTargetDarwin())
5805f757f3fSDimitry Andric           GV->setDSOLocal(M->getDirectAccessExternalData());
5815f757f3fSDimitry Andric       }
5825f757f3fSDimitry Andric       return GV;
5835f757f3fSDimitry Andric     }
5845f757f3fSDimitry Andric 
5855f757f3fSDimitry Andric     return SegmentOffset(IRB, Offset, AddressSpace);
5865f757f3fSDimitry Andric   }
5875f757f3fSDimitry Andric   return TargetLowering::getIRStackGuard(IRB);
5885f757f3fSDimitry Andric }
5895f757f3fSDimitry Andric 
5905f757f3fSDimitry Andric void X86TargetLowering::insertSSPDeclarations(Module &M) const {
5915f757f3fSDimitry Andric   // MSVC CRT provides functionalities for stack protection.
5925f757f3fSDimitry Andric   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
5935f757f3fSDimitry Andric       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
5945f757f3fSDimitry Andric     // MSVC CRT has a global variable holding security cookie.
5955f757f3fSDimitry Andric     M.getOrInsertGlobal("__security_cookie",
5965f757f3fSDimitry Andric                         PointerType::getUnqual(M.getContext()));
5975f757f3fSDimitry Andric 
5985f757f3fSDimitry Andric     // MSVC CRT has a function to validate security cookie.
5995f757f3fSDimitry Andric     FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
6005f757f3fSDimitry Andric         "__security_check_cookie", Type::getVoidTy(M.getContext()),
6015f757f3fSDimitry Andric         PointerType::getUnqual(M.getContext()));
6025f757f3fSDimitry Andric     if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
6035f757f3fSDimitry Andric       F->setCallingConv(CallingConv::X86_FastCall);
6045f757f3fSDimitry Andric       F->addParamAttr(0, Attribute::AttrKind::InReg);
6055f757f3fSDimitry Andric     }
6065f757f3fSDimitry Andric     return;
6075f757f3fSDimitry Andric   }
6085f757f3fSDimitry Andric 
6095f757f3fSDimitry Andric   StringRef GuardMode = M.getStackProtectorGuard();
6105f757f3fSDimitry Andric 
6115f757f3fSDimitry Andric   // glibc, bionic, and Fuchsia have a special slot for the stack guard.
6125f757f3fSDimitry Andric   if ((GuardMode == "tls" || GuardMode.empty()) &&
6135f757f3fSDimitry Andric       hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
6145f757f3fSDimitry Andric     return;
6155f757f3fSDimitry Andric   TargetLowering::insertSSPDeclarations(M);
6165f757f3fSDimitry Andric }
6175f757f3fSDimitry Andric 
6185f757f3fSDimitry Andric Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
6195f757f3fSDimitry Andric   // MSVC CRT has a global variable holding security cookie.
6205f757f3fSDimitry Andric   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
6215f757f3fSDimitry Andric       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
6225f757f3fSDimitry Andric     return M.getGlobalVariable("__security_cookie");
6235f757f3fSDimitry Andric   }
6245f757f3fSDimitry Andric   return TargetLowering::getSDagStackGuard(M);
6255f757f3fSDimitry Andric }
6265f757f3fSDimitry Andric 
6275f757f3fSDimitry Andric Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
6285f757f3fSDimitry Andric   // MSVC CRT has a function to validate security cookie.
6295f757f3fSDimitry Andric   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
6305f757f3fSDimitry Andric       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
6315f757f3fSDimitry Andric     return M.getFunction("__security_check_cookie");
6325f757f3fSDimitry Andric   }
6335f757f3fSDimitry Andric   return TargetLowering::getSSPStackGuardCheck(M);
6345f757f3fSDimitry Andric }
6355f757f3fSDimitry Andric 
6365f757f3fSDimitry Andric Value *
6375f757f3fSDimitry Andric X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
6385f757f3fSDimitry Andric   // Android provides a fixed TLS slot for the SafeStack pointer. See the
6395f757f3fSDimitry Andric   // definition of TLS_SLOT_SAFESTACK in
6405f757f3fSDimitry Andric   // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
6415f757f3fSDimitry Andric   if (Subtarget.isTargetAndroid()) {
6425f757f3fSDimitry Andric     // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
6435f757f3fSDimitry Andric     // %gs:0x24 on i386
6445f757f3fSDimitry Andric     int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
6455f757f3fSDimitry Andric     return SegmentOffset(IRB, Offset, getAddressSpace());
6465f757f3fSDimitry Andric   }
6475f757f3fSDimitry Andric 
6485f757f3fSDimitry Andric   // Fuchsia is similar.
6495f757f3fSDimitry Andric   if (Subtarget.isTargetFuchsia()) {
6505f757f3fSDimitry Andric     // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
6515f757f3fSDimitry Andric     return SegmentOffset(IRB, 0x18, getAddressSpace());
6525f757f3fSDimitry Andric   }
6535f757f3fSDimitry Andric 
6545f757f3fSDimitry Andric   return TargetLowering::getSafeStackPointerLocation(IRB);
6555f757f3fSDimitry Andric }
6565f757f3fSDimitry Andric 
6575f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
6585f757f3fSDimitry Andric //               Return Value Calling Convention Implementation
6595f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
6605f757f3fSDimitry Andric 
6615f757f3fSDimitry Andric bool X86TargetLowering::CanLowerReturn(
6625f757f3fSDimitry Andric     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
6635f757f3fSDimitry Andric     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
6645f757f3fSDimitry Andric   SmallVector<CCValAssign, 16> RVLocs;
6655f757f3fSDimitry Andric   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6665f757f3fSDimitry Andric   return CCInfo.CheckReturn(Outs, RetCC_X86);
6675f757f3fSDimitry Andric }
6685f757f3fSDimitry Andric 
6695f757f3fSDimitry Andric const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
6705f757f3fSDimitry Andric   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
6715f757f3fSDimitry Andric   return ScratchRegs;
6725f757f3fSDimitry Andric }
6735f757f3fSDimitry Andric 
6745f757f3fSDimitry Andric ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
675*0fca6ea1SDimitry Andric   static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
6765f757f3fSDimitry Andric   return RCRegs;
6775f757f3fSDimitry Andric }
6785f757f3fSDimitry Andric 
6795f757f3fSDimitry Andric /// Lowers masks values (v*i1) to the local register values
6805f757f3fSDimitry Andric /// \returns DAG node after lowering to register type
6815f757f3fSDimitry Andric static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
6825f757f3fSDimitry Andric                                const SDLoc &DL, SelectionDAG &DAG) {
6835f757f3fSDimitry Andric   EVT ValVT = ValArg.getValueType();
6845f757f3fSDimitry Andric 
6855f757f3fSDimitry Andric   if (ValVT == MVT::v1i1)
6865f757f3fSDimitry Andric     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
6875f757f3fSDimitry Andric                        DAG.getIntPtrConstant(0, DL));
6885f757f3fSDimitry Andric 
6895f757f3fSDimitry Andric   if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
6905f757f3fSDimitry Andric       (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
6915f757f3fSDimitry Andric     // Two stage lowering might be required
6925f757f3fSDimitry Andric     // bitcast:   v8i1 -> i8 / v16i1 -> i16
6935f757f3fSDimitry Andric     // anyextend: i8   -> i32 / i16   -> i32
6945f757f3fSDimitry Andric     EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
6955f757f3fSDimitry Andric     SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
6965f757f3fSDimitry Andric     if (ValLoc == MVT::i32)
6975f757f3fSDimitry Andric       ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
6985f757f3fSDimitry Andric     return ValToCopy;
6995f757f3fSDimitry Andric   }
7005f757f3fSDimitry Andric 
7015f757f3fSDimitry Andric   if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
7025f757f3fSDimitry Andric       (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
7035f757f3fSDimitry Andric     // One stage lowering is required
7045f757f3fSDimitry Andric     // bitcast:   v32i1 -> i32 / v64i1 -> i64
7055f757f3fSDimitry Andric     return DAG.getBitcast(ValLoc, ValArg);
7065f757f3fSDimitry Andric   }
7075f757f3fSDimitry Andric 
7085f757f3fSDimitry Andric   return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
7095f757f3fSDimitry Andric }
7105f757f3fSDimitry Andric 
7115f757f3fSDimitry Andric /// Breaks v64i1 value into two registers and adds the new node to the DAG
7125f757f3fSDimitry Andric static void Passv64i1ArgInRegs(
7135f757f3fSDimitry Andric     const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
7145f757f3fSDimitry Andric     SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
7155f757f3fSDimitry Andric     CCValAssign &NextVA, const X86Subtarget &Subtarget) {
7165f757f3fSDimitry Andric   assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
7175f757f3fSDimitry Andric   assert(Subtarget.is32Bit() && "Expecting 32 bit target");
7185f757f3fSDimitry Andric   assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
7195f757f3fSDimitry Andric   assert(VA.isRegLoc() && NextVA.isRegLoc() &&
7205f757f3fSDimitry Andric          "The value should reside in two registers");
7215f757f3fSDimitry Andric 
7225f757f3fSDimitry Andric   // Before splitting the value we cast it to i64
7235f757f3fSDimitry Andric   Arg = DAG.getBitcast(MVT::i64, Arg);
7245f757f3fSDimitry Andric 
7255f757f3fSDimitry Andric   // Splitting the value into two i32 types
7265f757f3fSDimitry Andric   SDValue Lo, Hi;
7275f757f3fSDimitry Andric   std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
7285f757f3fSDimitry Andric 
7295f757f3fSDimitry Andric   // Attach the two i32 types into corresponding registers
7305f757f3fSDimitry Andric   RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
7315f757f3fSDimitry Andric   RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
7325f757f3fSDimitry Andric }
7335f757f3fSDimitry Andric 
7345f757f3fSDimitry Andric SDValue
7355f757f3fSDimitry Andric X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7365f757f3fSDimitry Andric                                bool isVarArg,
7375f757f3fSDimitry Andric                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7385f757f3fSDimitry Andric                                const SmallVectorImpl<SDValue> &OutVals,
7395f757f3fSDimitry Andric                                const SDLoc &dl, SelectionDAG &DAG) const {
7405f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
7415f757f3fSDimitry Andric   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
7425f757f3fSDimitry Andric 
7435f757f3fSDimitry Andric   // In some cases we need to disable registers from the default CSR list.
7445f757f3fSDimitry Andric   // For example, when they are used as return registers (preserve_* and X86's
7455f757f3fSDimitry Andric   // regcall) or for argument passing (X86's regcall).
7465f757f3fSDimitry Andric   bool ShouldDisableCalleeSavedRegister =
7475f757f3fSDimitry Andric       shouldDisableRetRegFromCSR(CallConv) ||
7485f757f3fSDimitry Andric       MF.getFunction().hasFnAttribute("no_caller_saved_registers");
7495f757f3fSDimitry Andric 
7505f757f3fSDimitry Andric   if (CallConv == CallingConv::X86_INTR && !Outs.empty())
7515f757f3fSDimitry Andric     report_fatal_error("X86 interrupts may not return any value");
7525f757f3fSDimitry Andric 
7535f757f3fSDimitry Andric   SmallVector<CCValAssign, 16> RVLocs;
7545f757f3fSDimitry Andric   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
7555f757f3fSDimitry Andric   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
7565f757f3fSDimitry Andric 
7575f757f3fSDimitry Andric   SmallVector<std::pair<Register, SDValue>, 4> RetVals;
7585f757f3fSDimitry Andric   for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
7595f757f3fSDimitry Andric        ++I, ++OutsIndex) {
7605f757f3fSDimitry Andric     CCValAssign &VA = RVLocs[I];
7615f757f3fSDimitry Andric     assert(VA.isRegLoc() && "Can only return in registers!");
7625f757f3fSDimitry Andric 
7635f757f3fSDimitry Andric     // Add the register to the CalleeSaveDisableRegs list.
7645f757f3fSDimitry Andric     if (ShouldDisableCalleeSavedRegister)
7655f757f3fSDimitry Andric       MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
7665f757f3fSDimitry Andric 
7675f757f3fSDimitry Andric     SDValue ValToCopy = OutVals[OutsIndex];
7685f757f3fSDimitry Andric     EVT ValVT = ValToCopy.getValueType();
7695f757f3fSDimitry Andric 
7705f757f3fSDimitry Andric     // Promote values to the appropriate types.
7715f757f3fSDimitry Andric     if (VA.getLocInfo() == CCValAssign::SExt)
7725f757f3fSDimitry Andric       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
7735f757f3fSDimitry Andric     else if (VA.getLocInfo() == CCValAssign::ZExt)
7745f757f3fSDimitry Andric       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
7755f757f3fSDimitry Andric     else if (VA.getLocInfo() == CCValAssign::AExt) {
7765f757f3fSDimitry Andric       if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
7775f757f3fSDimitry Andric         ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
7785f757f3fSDimitry Andric       else
7795f757f3fSDimitry Andric         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
7805f757f3fSDimitry Andric     }
7815f757f3fSDimitry Andric     else if (VA.getLocInfo() == CCValAssign::BCvt)
7825f757f3fSDimitry Andric       ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
7835f757f3fSDimitry Andric 
7845f757f3fSDimitry Andric     assert(VA.getLocInfo() != CCValAssign::FPExt &&
7855f757f3fSDimitry Andric            "Unexpected FP-extend for return value.");
7865f757f3fSDimitry Andric 
7875f757f3fSDimitry Andric     // Report an error if we have attempted to return a value via an XMM
7885f757f3fSDimitry Andric     // register and SSE was disabled.
7895f757f3fSDimitry Andric     if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
7905f757f3fSDimitry Andric       errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
7915f757f3fSDimitry Andric       VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
7925f757f3fSDimitry Andric     } else if (!Subtarget.hasSSE2() &&
7935f757f3fSDimitry Andric                X86::FR64XRegClass.contains(VA.getLocReg()) &&
7945f757f3fSDimitry Andric                ValVT == MVT::f64) {
7955f757f3fSDimitry Andric       // When returning a double via an XMM register, report an error if SSE2 is
7965f757f3fSDimitry Andric       // not enabled.
7975f757f3fSDimitry Andric       errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
7985f757f3fSDimitry Andric       VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
7995f757f3fSDimitry Andric     }
8005f757f3fSDimitry Andric 
8015f757f3fSDimitry Andric     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
8025f757f3fSDimitry Andric     // the RET instruction and handled by the FP Stackifier.
8035f757f3fSDimitry Andric     if (VA.getLocReg() == X86::FP0 ||
8045f757f3fSDimitry Andric         VA.getLocReg() == X86::FP1) {
8055f757f3fSDimitry Andric       // If this is a copy from an xmm register to ST(0), use an FPExtend to
8065f757f3fSDimitry Andric       // change the value to the FP stack register class.
8075f757f3fSDimitry Andric       if (isScalarFPTypeInSSEReg(VA.getValVT()))
8085f757f3fSDimitry Andric         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
8095f757f3fSDimitry Andric       RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
8105f757f3fSDimitry Andric       // Don't emit a copytoreg.
8115f757f3fSDimitry Andric       continue;
8125f757f3fSDimitry Andric     }
8135f757f3fSDimitry Andric 
8145f757f3fSDimitry Andric     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
8155f757f3fSDimitry Andric     // which is returned in RAX / RDX.
8165f757f3fSDimitry Andric     if (Subtarget.is64Bit()) {
8175f757f3fSDimitry Andric       if (ValVT == MVT::x86mmx) {
8185f757f3fSDimitry Andric         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
8195f757f3fSDimitry Andric           ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
8205f757f3fSDimitry Andric           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
8215f757f3fSDimitry Andric                                   ValToCopy);
8225f757f3fSDimitry Andric           // If we don't have SSE2 available, convert to v4f32 so the generated
8235f757f3fSDimitry Andric           // register is legal.
8245f757f3fSDimitry Andric           if (!Subtarget.hasSSE2())
8255f757f3fSDimitry Andric             ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
8265f757f3fSDimitry Andric         }
8275f757f3fSDimitry Andric       }
8285f757f3fSDimitry Andric     }
8295f757f3fSDimitry Andric 
8305f757f3fSDimitry Andric     if (VA.needsCustom()) {
8315f757f3fSDimitry Andric       assert(VA.getValVT() == MVT::v64i1 &&
8325f757f3fSDimitry Andric              "Currently the only custom case is when we split v64i1 to 2 regs");
8335f757f3fSDimitry Andric 
8345f757f3fSDimitry Andric       Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
8355f757f3fSDimitry Andric                          Subtarget);
8365f757f3fSDimitry Andric 
8375f757f3fSDimitry Andric       // Add the second register to the CalleeSaveDisableRegs list.
8385f757f3fSDimitry Andric       if (ShouldDisableCalleeSavedRegister)
8395f757f3fSDimitry Andric         MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
8405f757f3fSDimitry Andric     } else {
8415f757f3fSDimitry Andric       RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
8425f757f3fSDimitry Andric     }
8435f757f3fSDimitry Andric   }
8445f757f3fSDimitry Andric 
8455f757f3fSDimitry Andric   SDValue Glue;
8465f757f3fSDimitry Andric   SmallVector<SDValue, 6> RetOps;
8475f757f3fSDimitry Andric   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
8485f757f3fSDimitry Andric   // Operand #1 = Bytes To Pop
8495f757f3fSDimitry Andric   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
8505f757f3fSDimitry Andric                    MVT::i32));
8515f757f3fSDimitry Andric 
8525f757f3fSDimitry Andric   // Copy the result values into the output registers.
8535f757f3fSDimitry Andric   for (auto &RetVal : RetVals) {
8545f757f3fSDimitry Andric     if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
8555f757f3fSDimitry Andric       RetOps.push_back(RetVal.second);
8565f757f3fSDimitry Andric       continue; // Don't emit a copytoreg.
8575f757f3fSDimitry Andric     }
8585f757f3fSDimitry Andric 
8595f757f3fSDimitry Andric     Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
8605f757f3fSDimitry Andric     Glue = Chain.getValue(1);
8615f757f3fSDimitry Andric     RetOps.push_back(
8625f757f3fSDimitry Andric         DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
8635f757f3fSDimitry Andric   }
8645f757f3fSDimitry Andric 
8655f757f3fSDimitry Andric   // Swift calling convention does not require we copy the sret argument
8665f757f3fSDimitry Andric   // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
8675f757f3fSDimitry Andric 
8685f757f3fSDimitry Andric   // All x86 ABIs require that for returning structs by value we copy
8695f757f3fSDimitry Andric   // the sret argument into %rax/%eax (depending on ABI) for the return.
8705f757f3fSDimitry Andric   // We saved the argument into a virtual register in the entry block,
8715f757f3fSDimitry Andric   // so now we copy the value out and into %rax/%eax.
8725f757f3fSDimitry Andric   //
8735f757f3fSDimitry Andric   // Checking Function.hasStructRetAttr() here is insufficient because the IR
8745f757f3fSDimitry Andric   // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
8755f757f3fSDimitry Andric   // false, then an sret argument may be implicitly inserted in the SelDAG. In
8765f757f3fSDimitry Andric   // either case FuncInfo->setSRetReturnReg() will have been called.
8775f757f3fSDimitry Andric   if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
8785f757f3fSDimitry Andric     // When we have both sret and another return value, we should use the
8795f757f3fSDimitry Andric     // original Chain stored in RetOps[0], instead of the current Chain updated
8805f757f3fSDimitry Andric     // in the above loop. If we only have sret, RetOps[0] equals to Chain.
8815f757f3fSDimitry Andric 
8825f757f3fSDimitry Andric     // For the case of sret and another return value, we have
8835f757f3fSDimitry Andric     //   Chain_0 at the function entry
8845f757f3fSDimitry Andric     //   Chain_1 = getCopyToReg(Chain_0) in the above loop
8855f757f3fSDimitry Andric     // If we use Chain_1 in getCopyFromReg, we will have
8865f757f3fSDimitry Andric     //   Val = getCopyFromReg(Chain_1)
8875f757f3fSDimitry Andric     //   Chain_2 = getCopyToReg(Chain_1, Val) from below
8885f757f3fSDimitry Andric 
8895f757f3fSDimitry Andric     // getCopyToReg(Chain_0) will be glued together with
8905f757f3fSDimitry Andric     // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
8915f757f3fSDimitry Andric     // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
8925f757f3fSDimitry Andric     //   Data dependency from Unit B to Unit A due to usage of Val in
8935f757f3fSDimitry Andric     //     getCopyToReg(Chain_1, Val)
8945f757f3fSDimitry Andric     //   Chain dependency from Unit A to Unit B
8955f757f3fSDimitry Andric 
8965f757f3fSDimitry Andric     // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
8975f757f3fSDimitry Andric     SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
8985f757f3fSDimitry Andric                                      getPointerTy(MF.getDataLayout()));
8995f757f3fSDimitry Andric 
9005f757f3fSDimitry Andric     Register RetValReg
9015f757f3fSDimitry Andric         = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
9025f757f3fSDimitry Andric           X86::RAX : X86::EAX;
9035f757f3fSDimitry Andric     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
9045f757f3fSDimitry Andric     Glue = Chain.getValue(1);
9055f757f3fSDimitry Andric 
9065f757f3fSDimitry Andric     // RAX/EAX now acts like a return value.
9075f757f3fSDimitry Andric     RetOps.push_back(
9085f757f3fSDimitry Andric         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
9095f757f3fSDimitry Andric 
9105f757f3fSDimitry Andric     // Add the returned register to the CalleeSaveDisableRegs list. Don't do
9115f757f3fSDimitry Andric     // this however for preserve_most/preserve_all to minimize the number of
9125f757f3fSDimitry Andric     // callee-saved registers for these CCs.
9135f757f3fSDimitry Andric     if (ShouldDisableCalleeSavedRegister &&
9145f757f3fSDimitry Andric         CallConv != CallingConv::PreserveAll &&
9155f757f3fSDimitry Andric         CallConv != CallingConv::PreserveMost)
9165f757f3fSDimitry Andric       MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
9175f757f3fSDimitry Andric   }
9185f757f3fSDimitry Andric 
9195f757f3fSDimitry Andric   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
9205f757f3fSDimitry Andric   const MCPhysReg *I =
9215f757f3fSDimitry Andric       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
9225f757f3fSDimitry Andric   if (I) {
9235f757f3fSDimitry Andric     for (; *I; ++I) {
9245f757f3fSDimitry Andric       if (X86::GR64RegClass.contains(*I))
9255f757f3fSDimitry Andric         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
9265f757f3fSDimitry Andric       else
9275f757f3fSDimitry Andric         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
9285f757f3fSDimitry Andric     }
9295f757f3fSDimitry Andric   }
9305f757f3fSDimitry Andric 
9315f757f3fSDimitry Andric   RetOps[0] = Chain;  // Update chain.
9325f757f3fSDimitry Andric 
9335f757f3fSDimitry Andric   // Add the glue if we have it.
9345f757f3fSDimitry Andric   if (Glue.getNode())
9355f757f3fSDimitry Andric     RetOps.push_back(Glue);
9365f757f3fSDimitry Andric 
9375f757f3fSDimitry Andric   X86ISD::NodeType opcode = X86ISD::RET_GLUE;
9385f757f3fSDimitry Andric   if (CallConv == CallingConv::X86_INTR)
9395f757f3fSDimitry Andric     opcode = X86ISD::IRET;
9405f757f3fSDimitry Andric   return DAG.getNode(opcode, dl, MVT::Other, RetOps);
9415f757f3fSDimitry Andric }
9425f757f3fSDimitry Andric 
9435f757f3fSDimitry Andric bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
9445f757f3fSDimitry Andric   if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
9455f757f3fSDimitry Andric     return false;
9465f757f3fSDimitry Andric 
9475f757f3fSDimitry Andric   SDValue TCChain = Chain;
9485f757f3fSDimitry Andric   SDNode *Copy = *N->use_begin();
9495f757f3fSDimitry Andric   if (Copy->getOpcode() == ISD::CopyToReg) {
9505f757f3fSDimitry Andric     // If the copy has a glue operand, we conservatively assume it isn't safe to
9515f757f3fSDimitry Andric     // perform a tail call.
9525f757f3fSDimitry Andric     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
9535f757f3fSDimitry Andric       return false;
9545f757f3fSDimitry Andric     TCChain = Copy->getOperand(0);
9555f757f3fSDimitry Andric   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
9565f757f3fSDimitry Andric     return false;
9575f757f3fSDimitry Andric 
9585f757f3fSDimitry Andric   bool HasRet = false;
9595f757f3fSDimitry Andric   for (const SDNode *U : Copy->uses()) {
9605f757f3fSDimitry Andric     if (U->getOpcode() != X86ISD::RET_GLUE)
9615f757f3fSDimitry Andric       return false;
9625f757f3fSDimitry Andric     // If we are returning more than one value, we can definitely
9635f757f3fSDimitry Andric     // not make a tail call see PR19530
9645f757f3fSDimitry Andric     if (U->getNumOperands() > 4)
9655f757f3fSDimitry Andric       return false;
9665f757f3fSDimitry Andric     if (U->getNumOperands() == 4 &&
9675f757f3fSDimitry Andric         U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
9685f757f3fSDimitry Andric       return false;
9695f757f3fSDimitry Andric     HasRet = true;
9705f757f3fSDimitry Andric   }
9715f757f3fSDimitry Andric 
9725f757f3fSDimitry Andric   if (!HasRet)
9735f757f3fSDimitry Andric     return false;
9745f757f3fSDimitry Andric 
9755f757f3fSDimitry Andric   Chain = TCChain;
9765f757f3fSDimitry Andric   return true;
9775f757f3fSDimitry Andric }
9785f757f3fSDimitry Andric 
9795f757f3fSDimitry Andric EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
9805f757f3fSDimitry Andric                                            ISD::NodeType ExtendKind) const {
9815f757f3fSDimitry Andric   MVT ReturnMVT = MVT::i32;
9825f757f3fSDimitry Andric 
9835f757f3fSDimitry Andric   bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
9845f757f3fSDimitry Andric   if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
9855f757f3fSDimitry Andric     // The ABI does not require i1, i8 or i16 to be extended.
9865f757f3fSDimitry Andric     //
9875f757f3fSDimitry Andric     // On Darwin, there is code in the wild relying on Clang's old behaviour of
9885f757f3fSDimitry Andric     // always extending i8/i16 return values, so keep doing that for now.
9895f757f3fSDimitry Andric     // (PR26665).
9905f757f3fSDimitry Andric     ReturnMVT = MVT::i8;
9915f757f3fSDimitry Andric   }
9925f757f3fSDimitry Andric 
9935f757f3fSDimitry Andric   EVT MinVT = getRegisterType(Context, ReturnMVT);
9945f757f3fSDimitry Andric   return VT.bitsLT(MinVT) ? MinVT : VT;
9955f757f3fSDimitry Andric }
9965f757f3fSDimitry Andric 
9975f757f3fSDimitry Andric /// Reads two 32 bit registers and creates a 64 bit mask value.
9985f757f3fSDimitry Andric /// \param VA The current 32 bit value that need to be assigned.
9995f757f3fSDimitry Andric /// \param NextVA The next 32 bit value that need to be assigned.
10005f757f3fSDimitry Andric /// \param Root The parent DAG node.
10015f757f3fSDimitry Andric /// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
10025f757f3fSDimitry Andric ///                        glue purposes. In the case the DAG is already using
10035f757f3fSDimitry Andric ///                        physical register instead of virtual, we should glue
10045f757f3fSDimitry Andric ///                        our new SDValue to InGlue SDvalue.
10055f757f3fSDimitry Andric /// \return a new SDvalue of size 64bit.
10065f757f3fSDimitry Andric static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
10075f757f3fSDimitry Andric                                 SDValue &Root, SelectionDAG &DAG,
10085f757f3fSDimitry Andric                                 const SDLoc &DL, const X86Subtarget &Subtarget,
10095f757f3fSDimitry Andric                                 SDValue *InGlue = nullptr) {
10105f757f3fSDimitry Andric   assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
10115f757f3fSDimitry Andric   assert(Subtarget.is32Bit() && "Expecting 32 bit target");
10125f757f3fSDimitry Andric   assert(VA.getValVT() == MVT::v64i1 &&
10135f757f3fSDimitry Andric          "Expecting first location of 64 bit width type");
10145f757f3fSDimitry Andric   assert(NextVA.getValVT() == VA.getValVT() &&
10155f757f3fSDimitry Andric          "The locations should have the same type");
10165f757f3fSDimitry Andric   assert(VA.isRegLoc() && NextVA.isRegLoc() &&
10175f757f3fSDimitry Andric          "The values should reside in two registers");
10185f757f3fSDimitry Andric 
10195f757f3fSDimitry Andric   SDValue Lo, Hi;
10205f757f3fSDimitry Andric   SDValue ArgValueLo, ArgValueHi;
10215f757f3fSDimitry Andric 
10225f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
10235f757f3fSDimitry Andric   const TargetRegisterClass *RC = &X86::GR32RegClass;
10245f757f3fSDimitry Andric 
10255f757f3fSDimitry Andric   // Read a 32 bit value from the registers.
10265f757f3fSDimitry Andric   if (nullptr == InGlue) {
10275f757f3fSDimitry Andric     // When no physical register is present,
10285f757f3fSDimitry Andric     // create an intermediate virtual register.
10295f757f3fSDimitry Andric     Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
10305f757f3fSDimitry Andric     ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
10315f757f3fSDimitry Andric     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
10325f757f3fSDimitry Andric     ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
10335f757f3fSDimitry Andric   } else {
10345f757f3fSDimitry Andric     // When a physical register is available read the value from it and glue
10355f757f3fSDimitry Andric     // the reads together.
10365f757f3fSDimitry Andric     ArgValueLo =
10375f757f3fSDimitry Andric       DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
10385f757f3fSDimitry Andric     *InGlue = ArgValueLo.getValue(2);
10395f757f3fSDimitry Andric     ArgValueHi =
10405f757f3fSDimitry Andric       DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
10415f757f3fSDimitry Andric     *InGlue = ArgValueHi.getValue(2);
10425f757f3fSDimitry Andric   }
10435f757f3fSDimitry Andric 
10445f757f3fSDimitry Andric   // Convert the i32 type into v32i1 type.
10455f757f3fSDimitry Andric   Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
10465f757f3fSDimitry Andric 
10475f757f3fSDimitry Andric   // Convert the i32 type into v32i1 type.
10485f757f3fSDimitry Andric   Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
10495f757f3fSDimitry Andric 
10505f757f3fSDimitry Andric   // Concatenate the two values together.
10515f757f3fSDimitry Andric   return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
10525f757f3fSDimitry Andric }
10535f757f3fSDimitry Andric 
10545f757f3fSDimitry Andric /// The function will lower a register of various sizes (8/16/32/64)
10555f757f3fSDimitry Andric /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
10565f757f3fSDimitry Andric /// \returns a DAG node contains the operand after lowering to mask type.
10575f757f3fSDimitry Andric static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
10585f757f3fSDimitry Andric                                const EVT &ValLoc, const SDLoc &DL,
10595f757f3fSDimitry Andric                                SelectionDAG &DAG) {
10605f757f3fSDimitry Andric   SDValue ValReturned = ValArg;
10615f757f3fSDimitry Andric 
10625f757f3fSDimitry Andric   if (ValVT == MVT::v1i1)
10635f757f3fSDimitry Andric     return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
10645f757f3fSDimitry Andric 
10655f757f3fSDimitry Andric   if (ValVT == MVT::v64i1) {
10665f757f3fSDimitry Andric     // In 32 bit machine, this case is handled by getv64i1Argument
10675f757f3fSDimitry Andric     assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
10685f757f3fSDimitry Andric     // In 64 bit machine, There is no need to truncate the value only bitcast
10695f757f3fSDimitry Andric   } else {
10705f757f3fSDimitry Andric     MVT MaskLenVT;
10715f757f3fSDimitry Andric     switch (ValVT.getSimpleVT().SimpleTy) {
10725f757f3fSDimitry Andric     case MVT::v8i1:
10735f757f3fSDimitry Andric       MaskLenVT = MVT::i8;
10745f757f3fSDimitry Andric       break;
10755f757f3fSDimitry Andric     case MVT::v16i1:
10765f757f3fSDimitry Andric       MaskLenVT = MVT::i16;
10775f757f3fSDimitry Andric       break;
10785f757f3fSDimitry Andric     case MVT::v32i1:
10795f757f3fSDimitry Andric       MaskLenVT = MVT::i32;
10805f757f3fSDimitry Andric       break;
10815f757f3fSDimitry Andric     default:
10825f757f3fSDimitry Andric       llvm_unreachable("Expecting a vector of i1 types");
10835f757f3fSDimitry Andric     }
10845f757f3fSDimitry Andric 
10855f757f3fSDimitry Andric     ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
10865f757f3fSDimitry Andric   }
10875f757f3fSDimitry Andric   return DAG.getBitcast(ValVT, ValReturned);
10885f757f3fSDimitry Andric }
10895f757f3fSDimitry Andric 
10905f757f3fSDimitry Andric /// Lower the result values of a call into the
10915f757f3fSDimitry Andric /// appropriate copies out of appropriate physical registers.
10925f757f3fSDimitry Andric ///
10935f757f3fSDimitry Andric SDValue X86TargetLowering::LowerCallResult(
10945f757f3fSDimitry Andric     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
10955f757f3fSDimitry Andric     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
10965f757f3fSDimitry Andric     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
10975f757f3fSDimitry Andric     uint32_t *RegMask) const {
10985f757f3fSDimitry Andric 
10995f757f3fSDimitry Andric   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
11005f757f3fSDimitry Andric   // Assign locations to each value returned by this call.
11015f757f3fSDimitry Andric   SmallVector<CCValAssign, 16> RVLocs;
11025f757f3fSDimitry Andric   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
11035f757f3fSDimitry Andric                  *DAG.getContext());
11045f757f3fSDimitry Andric   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
11055f757f3fSDimitry Andric 
11065f757f3fSDimitry Andric   // Copy all of the result registers out of their specified physreg.
11075f757f3fSDimitry Andric   for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
11085f757f3fSDimitry Andric        ++I, ++InsIndex) {
11095f757f3fSDimitry Andric     CCValAssign &VA = RVLocs[I];
11105f757f3fSDimitry Andric     EVT CopyVT = VA.getLocVT();
11115f757f3fSDimitry Andric 
11125f757f3fSDimitry Andric     // In some calling conventions we need to remove the used registers
11135f757f3fSDimitry Andric     // from the register mask.
11145f757f3fSDimitry Andric     if (RegMask) {
11155f757f3fSDimitry Andric       for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
11165f757f3fSDimitry Andric         RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
11175f757f3fSDimitry Andric     }
11185f757f3fSDimitry Andric 
11195f757f3fSDimitry Andric     // Report an error if there was an attempt to return FP values via XMM
11205f757f3fSDimitry Andric     // registers.
11215f757f3fSDimitry Andric     if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
11225f757f3fSDimitry Andric       errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
11235f757f3fSDimitry Andric       if (VA.getLocReg() == X86::XMM1)
11245f757f3fSDimitry Andric         VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
11255f757f3fSDimitry Andric       else
11265f757f3fSDimitry Andric         VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
11275f757f3fSDimitry Andric     } else if (!Subtarget.hasSSE2() &&
11285f757f3fSDimitry Andric                X86::FR64XRegClass.contains(VA.getLocReg()) &&
11295f757f3fSDimitry Andric                CopyVT == MVT::f64) {
11305f757f3fSDimitry Andric       errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
11315f757f3fSDimitry Andric       if (VA.getLocReg() == X86::XMM1)
11325f757f3fSDimitry Andric         VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
11335f757f3fSDimitry Andric       else
11345f757f3fSDimitry Andric         VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
11355f757f3fSDimitry Andric     }
11365f757f3fSDimitry Andric 
11375f757f3fSDimitry Andric     // If we prefer to use the value in xmm registers, copy it out as f80 and
11385f757f3fSDimitry Andric     // use a truncate to move it from fp stack reg to xmm reg.
11395f757f3fSDimitry Andric     bool RoundAfterCopy = false;
11405f757f3fSDimitry Andric     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
11415f757f3fSDimitry Andric         isScalarFPTypeInSSEReg(VA.getValVT())) {
11425f757f3fSDimitry Andric       if (!Subtarget.hasX87())
11435f757f3fSDimitry Andric         report_fatal_error("X87 register return with X87 disabled");
11445f757f3fSDimitry Andric       CopyVT = MVT::f80;
11455f757f3fSDimitry Andric       RoundAfterCopy = (CopyVT != VA.getLocVT());
11465f757f3fSDimitry Andric     }
11475f757f3fSDimitry Andric 
11485f757f3fSDimitry Andric     SDValue Val;
11495f757f3fSDimitry Andric     if (VA.needsCustom()) {
11505f757f3fSDimitry Andric       assert(VA.getValVT() == MVT::v64i1 &&
11515f757f3fSDimitry Andric              "Currently the only custom case is when we split v64i1 to 2 regs");
11525f757f3fSDimitry Andric       Val =
11535f757f3fSDimitry Andric           getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
11545f757f3fSDimitry Andric     } else {
11555f757f3fSDimitry Andric       Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
11565f757f3fSDimitry Andric                   .getValue(1);
11575f757f3fSDimitry Andric       Val = Chain.getValue(0);
11585f757f3fSDimitry Andric       InGlue = Chain.getValue(2);
11595f757f3fSDimitry Andric     }
11605f757f3fSDimitry Andric 
11615f757f3fSDimitry Andric     if (RoundAfterCopy)
11625f757f3fSDimitry Andric       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
11635f757f3fSDimitry Andric                         // This truncation won't change the value.
11645f757f3fSDimitry Andric                         DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
11655f757f3fSDimitry Andric 
11665f757f3fSDimitry Andric     if (VA.isExtInLoc()) {
11675f757f3fSDimitry Andric       if (VA.getValVT().isVector() &&
11685f757f3fSDimitry Andric           VA.getValVT().getScalarType() == MVT::i1 &&
11695f757f3fSDimitry Andric           ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
11705f757f3fSDimitry Andric            (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
11715f757f3fSDimitry Andric         // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
11725f757f3fSDimitry Andric         Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
11735f757f3fSDimitry Andric       } else
11745f757f3fSDimitry Andric         Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
11755f757f3fSDimitry Andric     }
11765f757f3fSDimitry Andric 
11775f757f3fSDimitry Andric     if (VA.getLocInfo() == CCValAssign::BCvt)
11785f757f3fSDimitry Andric       Val = DAG.getBitcast(VA.getValVT(), Val);
11795f757f3fSDimitry Andric 
11805f757f3fSDimitry Andric     InVals.push_back(Val);
11815f757f3fSDimitry Andric   }
11825f757f3fSDimitry Andric 
11835f757f3fSDimitry Andric   return Chain;
11845f757f3fSDimitry Andric }
11855f757f3fSDimitry Andric 
11865f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
11875f757f3fSDimitry Andric //                C & StdCall & Fast Calling Convention implementation
11885f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
11895f757f3fSDimitry Andric //  StdCall calling convention seems to be standard for many Windows' API
11905f757f3fSDimitry Andric //  routines and around. It differs from C calling convention just a little:
11915f757f3fSDimitry Andric //  callee should clean up the stack, not caller. Symbols should be also
11925f757f3fSDimitry Andric //  decorated in some fancy way :) It doesn't support any vector arguments.
11935f757f3fSDimitry Andric //  For info on fast calling convention see Fast Calling Convention (tail call)
11945f757f3fSDimitry Andric //  implementation LowerX86_32FastCCCallTo.
11955f757f3fSDimitry Andric 
11965f757f3fSDimitry Andric /// Determines whether Args, either a set of outgoing arguments to a call, or a
11975f757f3fSDimitry Andric /// set of incoming args of a call, contains an sret pointer that the callee
11985f757f3fSDimitry Andric /// pops
11995f757f3fSDimitry Andric template <typename T>
12005f757f3fSDimitry Andric static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
12015f757f3fSDimitry Andric                              const X86Subtarget &Subtarget) {
12025f757f3fSDimitry Andric   // Not C++20 (yet), so no concepts available.
12035f757f3fSDimitry Andric   static_assert(std::is_same_v<T, ISD::OutputArg> ||
12045f757f3fSDimitry Andric                     std::is_same_v<T, ISD::InputArg>,
12055f757f3fSDimitry Andric                 "requires ISD::OutputArg or ISD::InputArg");
12065f757f3fSDimitry Andric 
12075f757f3fSDimitry Andric   // Only 32-bit pops the sret.  It's a 64-bit world these days, so early-out
12085f757f3fSDimitry Andric   // for most compilations.
12095f757f3fSDimitry Andric   if (!Subtarget.is32Bit())
12105f757f3fSDimitry Andric     return false;
12115f757f3fSDimitry Andric 
12125f757f3fSDimitry Andric   if (Args.empty())
12135f757f3fSDimitry Andric     return false;
12145f757f3fSDimitry Andric 
12155f757f3fSDimitry Andric   // Most calls do not have an sret argument, check the arg next.
12165f757f3fSDimitry Andric   const ISD::ArgFlagsTy &Flags = Args[0].Flags;
12175f757f3fSDimitry Andric   if (!Flags.isSRet() || Flags.isInReg())
12185f757f3fSDimitry Andric     return false;
12195f757f3fSDimitry Andric 
12205f757f3fSDimitry Andric   // The MSVCabi does not pop the sret.
12215f757f3fSDimitry Andric   if (Subtarget.getTargetTriple().isOSMSVCRT())
12225f757f3fSDimitry Andric     return false;
12235f757f3fSDimitry Andric 
12245f757f3fSDimitry Andric   // MCUs don't pop the sret
12255f757f3fSDimitry Andric   if (Subtarget.isTargetMCU())
12265f757f3fSDimitry Andric     return false;
12275f757f3fSDimitry Andric 
12285f757f3fSDimitry Andric   // Callee pops argument
12295f757f3fSDimitry Andric   return true;
12305f757f3fSDimitry Andric }
12315f757f3fSDimitry Andric 
12325f757f3fSDimitry Andric /// Make a copy of an aggregate at address specified by "Src" to address
12335f757f3fSDimitry Andric /// "Dst" with size and alignment information specified by the specific
12345f757f3fSDimitry Andric /// parameter attribute. The copy will be passed as a byval function parameter.
12355f757f3fSDimitry Andric static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
12365f757f3fSDimitry Andric                                          SDValue Chain, ISD::ArgFlagsTy Flags,
12375f757f3fSDimitry Andric                                          SelectionDAG &DAG, const SDLoc &dl) {
12385f757f3fSDimitry Andric   SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
12395f757f3fSDimitry Andric 
12405f757f3fSDimitry Andric   return DAG.getMemcpy(
12415f757f3fSDimitry Andric       Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
12425f757f3fSDimitry Andric       /*isVolatile*/ false, /*AlwaysInline=*/true,
1243*0fca6ea1SDimitry Andric       /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
12445f757f3fSDimitry Andric }
12455f757f3fSDimitry Andric 
12465f757f3fSDimitry Andric /// Return true if the calling convention is one that we can guarantee TCO for.
12475f757f3fSDimitry Andric static bool canGuaranteeTCO(CallingConv::ID CC) {
12485f757f3fSDimitry Andric   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
12495f757f3fSDimitry Andric           CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
12505f757f3fSDimitry Andric           CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
12515f757f3fSDimitry Andric }
12525f757f3fSDimitry Andric 
12535f757f3fSDimitry Andric /// Return true if we might ever do TCO for calls with this calling convention.
12545f757f3fSDimitry Andric static bool mayTailCallThisCC(CallingConv::ID CC) {
12555f757f3fSDimitry Andric   switch (CC) {
12565f757f3fSDimitry Andric   // C calling conventions:
12575f757f3fSDimitry Andric   case CallingConv::C:
12585f757f3fSDimitry Andric   case CallingConv::Win64:
12595f757f3fSDimitry Andric   case CallingConv::X86_64_SysV:
1260*0fca6ea1SDimitry Andric   case CallingConv::PreserveNone:
12615f757f3fSDimitry Andric   // Callee pop conventions:
12625f757f3fSDimitry Andric   case CallingConv::X86_ThisCall:
12635f757f3fSDimitry Andric   case CallingConv::X86_StdCall:
12645f757f3fSDimitry Andric   case CallingConv::X86_VectorCall:
12655f757f3fSDimitry Andric   case CallingConv::X86_FastCall:
12665f757f3fSDimitry Andric   // Swift:
12675f757f3fSDimitry Andric   case CallingConv::Swift:
12685f757f3fSDimitry Andric     return true;
12695f757f3fSDimitry Andric   default:
12705f757f3fSDimitry Andric     return canGuaranteeTCO(CC);
12715f757f3fSDimitry Andric   }
12725f757f3fSDimitry Andric }
12735f757f3fSDimitry Andric 
12745f757f3fSDimitry Andric /// Return true if the function is being made into a tailcall target by
12755f757f3fSDimitry Andric /// changing its ABI.
12765f757f3fSDimitry Andric static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
12775f757f3fSDimitry Andric   return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
12785f757f3fSDimitry Andric          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
12795f757f3fSDimitry Andric }
12805f757f3fSDimitry Andric 
12815f757f3fSDimitry Andric bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
12825f757f3fSDimitry Andric   if (!CI->isTailCall())
12835f757f3fSDimitry Andric     return false;
12845f757f3fSDimitry Andric 
12855f757f3fSDimitry Andric   CallingConv::ID CalleeCC = CI->getCallingConv();
12865f757f3fSDimitry Andric   if (!mayTailCallThisCC(CalleeCC))
12875f757f3fSDimitry Andric     return false;
12885f757f3fSDimitry Andric 
12895f757f3fSDimitry Andric   return true;
12905f757f3fSDimitry Andric }
12915f757f3fSDimitry Andric 
12925f757f3fSDimitry Andric SDValue
12935f757f3fSDimitry Andric X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
12945f757f3fSDimitry Andric                                     const SmallVectorImpl<ISD::InputArg> &Ins,
12955f757f3fSDimitry Andric                                     const SDLoc &dl, SelectionDAG &DAG,
12965f757f3fSDimitry Andric                                     const CCValAssign &VA,
12975f757f3fSDimitry Andric                                     MachineFrameInfo &MFI, unsigned i) const {
12985f757f3fSDimitry Andric   // Create the nodes corresponding to a load from this parameter slot.
12995f757f3fSDimitry Andric   ISD::ArgFlagsTy Flags = Ins[i].Flags;
13005f757f3fSDimitry Andric   bool AlwaysUseMutable = shouldGuaranteeTCO(
13015f757f3fSDimitry Andric       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
13025f757f3fSDimitry Andric   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
13035f757f3fSDimitry Andric   EVT ValVT;
13045f757f3fSDimitry Andric   MVT PtrVT = getPointerTy(DAG.getDataLayout());
13055f757f3fSDimitry Andric 
13065f757f3fSDimitry Andric   // If value is passed by pointer we have address passed instead of the value
13075f757f3fSDimitry Andric   // itself. No need to extend if the mask value and location share the same
13085f757f3fSDimitry Andric   // absolute size.
13095f757f3fSDimitry Andric   bool ExtendedInMem =
13105f757f3fSDimitry Andric       VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
13115f757f3fSDimitry Andric       VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
13125f757f3fSDimitry Andric 
13135f757f3fSDimitry Andric   if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
13145f757f3fSDimitry Andric     ValVT = VA.getLocVT();
13155f757f3fSDimitry Andric   else
13165f757f3fSDimitry Andric     ValVT = VA.getValVT();
13175f757f3fSDimitry Andric 
13185f757f3fSDimitry Andric   // FIXME: For now, all byval parameter objects are marked mutable. This can be
13195f757f3fSDimitry Andric   // changed with more analysis.
13205f757f3fSDimitry Andric   // In case of tail call optimization mark all arguments mutable. Since they
13215f757f3fSDimitry Andric   // could be overwritten by lowering of arguments in case of a tail call.
13225f757f3fSDimitry Andric   if (Flags.isByVal()) {
13235f757f3fSDimitry Andric     unsigned Bytes = Flags.getByValSize();
13245f757f3fSDimitry Andric     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
13255f757f3fSDimitry Andric 
13265f757f3fSDimitry Andric     // FIXME: For now, all byval parameter objects are marked as aliasing. This
13275f757f3fSDimitry Andric     // can be improved with deeper analysis.
13285f757f3fSDimitry Andric     int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
13295f757f3fSDimitry Andric                                    /*isAliased=*/true);
13305f757f3fSDimitry Andric     return DAG.getFrameIndex(FI, PtrVT);
13315f757f3fSDimitry Andric   }
13325f757f3fSDimitry Andric 
13335f757f3fSDimitry Andric   EVT ArgVT = Ins[i].ArgVT;
13345f757f3fSDimitry Andric 
13355f757f3fSDimitry Andric   // If this is a vector that has been split into multiple parts, don't elide
13365f757f3fSDimitry Andric   // the copy. The layout on the stack may not match the packed in-memory
13375f757f3fSDimitry Andric   // layout.
13385f757f3fSDimitry Andric   bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
13395f757f3fSDimitry Andric 
13405f757f3fSDimitry Andric   // This is an argument in memory. We might be able to perform copy elision.
13415f757f3fSDimitry Andric   // If the argument is passed directly in memory without any extension, then we
13425f757f3fSDimitry Andric   // can perform copy elision. Large vector types, for example, may be passed
13435f757f3fSDimitry Andric   // indirectly by pointer.
13445f757f3fSDimitry Andric   if (Flags.isCopyElisionCandidate() &&
13455f757f3fSDimitry Andric       VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
13465f757f3fSDimitry Andric       !ScalarizedVector) {
13475f757f3fSDimitry Andric     SDValue PartAddr;
13485f757f3fSDimitry Andric     if (Ins[i].PartOffset == 0) {
13495f757f3fSDimitry Andric       // If this is a one-part value or the first part of a multi-part value,
13505f757f3fSDimitry Andric       // create a stack object for the entire argument value type and return a
13515f757f3fSDimitry Andric       // load from our portion of it. This assumes that if the first part of an
13525f757f3fSDimitry Andric       // argument is in memory, the rest will also be in memory.
13535f757f3fSDimitry Andric       int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
13545f757f3fSDimitry Andric                                      /*IsImmutable=*/false);
13555f757f3fSDimitry Andric       PartAddr = DAG.getFrameIndex(FI, PtrVT);
13565f757f3fSDimitry Andric       return DAG.getLoad(
13575f757f3fSDimitry Andric           ValVT, dl, Chain, PartAddr,
13585f757f3fSDimitry Andric           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
13595f757f3fSDimitry Andric     }
13605f757f3fSDimitry Andric 
13615f757f3fSDimitry Andric     // This is not the first piece of an argument in memory. See if there is
13625f757f3fSDimitry Andric     // already a fixed stack object including this offset. If so, assume it
13635f757f3fSDimitry Andric     // was created by the PartOffset == 0 branch above and create a load from
13645f757f3fSDimitry Andric     // the appropriate offset into it.
13655f757f3fSDimitry Andric     int64_t PartBegin = VA.getLocMemOffset();
13665f757f3fSDimitry Andric     int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
13675f757f3fSDimitry Andric     int FI = MFI.getObjectIndexBegin();
13685f757f3fSDimitry Andric     for (; MFI.isFixedObjectIndex(FI); ++FI) {
13695f757f3fSDimitry Andric       int64_t ObjBegin = MFI.getObjectOffset(FI);
13705f757f3fSDimitry Andric       int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
13715f757f3fSDimitry Andric       if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
13725f757f3fSDimitry Andric         break;
13735f757f3fSDimitry Andric     }
13745f757f3fSDimitry Andric     if (MFI.isFixedObjectIndex(FI)) {
13755f757f3fSDimitry Andric       SDValue Addr =
13765f757f3fSDimitry Andric           DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
13775f757f3fSDimitry Andric                       DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
13785f757f3fSDimitry Andric       return DAG.getLoad(ValVT, dl, Chain, Addr,
13795f757f3fSDimitry Andric                          MachinePointerInfo::getFixedStack(
13805f757f3fSDimitry Andric                              DAG.getMachineFunction(), FI, Ins[i].PartOffset));
13815f757f3fSDimitry Andric     }
13825f757f3fSDimitry Andric   }
13835f757f3fSDimitry Andric 
13845f757f3fSDimitry Andric   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
13855f757f3fSDimitry Andric                                  VA.getLocMemOffset(), isImmutable);
13865f757f3fSDimitry Andric 
13875f757f3fSDimitry Andric   // Set SExt or ZExt flag.
13885f757f3fSDimitry Andric   if (VA.getLocInfo() == CCValAssign::ZExt) {
13895f757f3fSDimitry Andric     MFI.setObjectZExt(FI, true);
13905f757f3fSDimitry Andric   } else if (VA.getLocInfo() == CCValAssign::SExt) {
13915f757f3fSDimitry Andric     MFI.setObjectSExt(FI, true);
13925f757f3fSDimitry Andric   }
13935f757f3fSDimitry Andric 
13945f757f3fSDimitry Andric   MaybeAlign Alignment;
13955f757f3fSDimitry Andric   if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
13965f757f3fSDimitry Andric       ValVT != MVT::f80)
13975f757f3fSDimitry Andric     Alignment = MaybeAlign(4);
13985f757f3fSDimitry Andric   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
13995f757f3fSDimitry Andric   SDValue Val = DAG.getLoad(
14005f757f3fSDimitry Andric       ValVT, dl, Chain, FIN,
14015f757f3fSDimitry Andric       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
14025f757f3fSDimitry Andric       Alignment);
14035f757f3fSDimitry Andric   return ExtendedInMem
14045f757f3fSDimitry Andric              ? (VA.getValVT().isVector()
14055f757f3fSDimitry Andric                     ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
14065f757f3fSDimitry Andric                     : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
14075f757f3fSDimitry Andric              : Val;
14085f757f3fSDimitry Andric }
14095f757f3fSDimitry Andric 
14105f757f3fSDimitry Andric // FIXME: Get this from tablegen.
14115f757f3fSDimitry Andric static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
14125f757f3fSDimitry Andric                                                 const X86Subtarget &Subtarget) {
14135f757f3fSDimitry Andric   assert(Subtarget.is64Bit());
14145f757f3fSDimitry Andric 
14155f757f3fSDimitry Andric   if (Subtarget.isCallingConvWin64(CallConv)) {
14165f757f3fSDimitry Andric     static const MCPhysReg GPR64ArgRegsWin64[] = {
14175f757f3fSDimitry Andric       X86::RCX, X86::RDX, X86::R8,  X86::R9
14185f757f3fSDimitry Andric     };
14195f757f3fSDimitry Andric     return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
14205f757f3fSDimitry Andric   }
14215f757f3fSDimitry Andric 
14225f757f3fSDimitry Andric   static const MCPhysReg GPR64ArgRegs64Bit[] = {
14235f757f3fSDimitry Andric     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
14245f757f3fSDimitry Andric   };
14255f757f3fSDimitry Andric   return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
14265f757f3fSDimitry Andric }
14275f757f3fSDimitry Andric 
14285f757f3fSDimitry Andric // FIXME: Get this from tablegen.
14295f757f3fSDimitry Andric static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
14305f757f3fSDimitry Andric                                                 CallingConv::ID CallConv,
14315f757f3fSDimitry Andric                                                 const X86Subtarget &Subtarget) {
14325f757f3fSDimitry Andric   assert(Subtarget.is64Bit());
14335f757f3fSDimitry Andric   if (Subtarget.isCallingConvWin64(CallConv)) {
14345f757f3fSDimitry Andric     // The XMM registers which might contain var arg parameters are shadowed
14355f757f3fSDimitry Andric     // in their paired GPR.  So we only need to save the GPR to their home
14365f757f3fSDimitry Andric     // slots.
14375f757f3fSDimitry Andric     // TODO: __vectorcall will change this.
14385f757f3fSDimitry Andric     return std::nullopt;
14395f757f3fSDimitry Andric   }
14405f757f3fSDimitry Andric 
14415f757f3fSDimitry Andric   bool isSoftFloat = Subtarget.useSoftFloat();
14425f757f3fSDimitry Andric   if (isSoftFloat || !Subtarget.hasSSE1())
14435f757f3fSDimitry Andric     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
14445f757f3fSDimitry Andric     // registers.
14455f757f3fSDimitry Andric     return std::nullopt;
14465f757f3fSDimitry Andric 
14475f757f3fSDimitry Andric   static const MCPhysReg XMMArgRegs64Bit[] = {
14485f757f3fSDimitry Andric     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
14495f757f3fSDimitry Andric     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
14505f757f3fSDimitry Andric   };
14515f757f3fSDimitry Andric   return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
14525f757f3fSDimitry Andric }
14535f757f3fSDimitry Andric 
14545f757f3fSDimitry Andric #ifndef NDEBUG
14555f757f3fSDimitry Andric static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
14565f757f3fSDimitry Andric   return llvm::is_sorted(
14575f757f3fSDimitry Andric       ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
14585f757f3fSDimitry Andric         return A.getValNo() < B.getValNo();
14595f757f3fSDimitry Andric       });
14605f757f3fSDimitry Andric }
14615f757f3fSDimitry Andric #endif
14625f757f3fSDimitry Andric 
14635f757f3fSDimitry Andric namespace {
14645f757f3fSDimitry Andric /// This is a helper class for lowering variable arguments parameters.
14655f757f3fSDimitry Andric class VarArgsLoweringHelper {
14665f757f3fSDimitry Andric public:
14675f757f3fSDimitry Andric   VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
14685f757f3fSDimitry Andric                         SelectionDAG &DAG, const X86Subtarget &Subtarget,
14695f757f3fSDimitry Andric                         CallingConv::ID CallConv, CCState &CCInfo)
14705f757f3fSDimitry Andric       : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
14715f757f3fSDimitry Andric         TheMachineFunction(DAG.getMachineFunction()),
14725f757f3fSDimitry Andric         TheFunction(TheMachineFunction.getFunction()),
14735f757f3fSDimitry Andric         FrameInfo(TheMachineFunction.getFrameInfo()),
14745f757f3fSDimitry Andric         FrameLowering(*Subtarget.getFrameLowering()),
14755f757f3fSDimitry Andric         TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
14765f757f3fSDimitry Andric         CCInfo(CCInfo) {}
14775f757f3fSDimitry Andric 
14785f757f3fSDimitry Andric   // Lower variable arguments parameters.
14795f757f3fSDimitry Andric   void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
14805f757f3fSDimitry Andric 
14815f757f3fSDimitry Andric private:
14825f757f3fSDimitry Andric   void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
14835f757f3fSDimitry Andric 
14845f757f3fSDimitry Andric   void forwardMustTailParameters(SDValue &Chain);
14855f757f3fSDimitry Andric 
14865f757f3fSDimitry Andric   bool is64Bit() const { return Subtarget.is64Bit(); }
14875f757f3fSDimitry Andric   bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
14885f757f3fSDimitry Andric 
14895f757f3fSDimitry Andric   X86MachineFunctionInfo *FuncInfo;
14905f757f3fSDimitry Andric   const SDLoc &DL;
14915f757f3fSDimitry Andric   SelectionDAG &DAG;
14925f757f3fSDimitry Andric   const X86Subtarget &Subtarget;
14935f757f3fSDimitry Andric   MachineFunction &TheMachineFunction;
14945f757f3fSDimitry Andric   const Function &TheFunction;
14955f757f3fSDimitry Andric   MachineFrameInfo &FrameInfo;
14965f757f3fSDimitry Andric   const TargetFrameLowering &FrameLowering;
14975f757f3fSDimitry Andric   const TargetLowering &TargLowering;
14985f757f3fSDimitry Andric   CallingConv::ID CallConv;
14995f757f3fSDimitry Andric   CCState &CCInfo;
15005f757f3fSDimitry Andric };
15015f757f3fSDimitry Andric } // namespace
15025f757f3fSDimitry Andric 
15035f757f3fSDimitry Andric void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
15045f757f3fSDimitry Andric     SDValue &Chain, unsigned StackSize) {
15055f757f3fSDimitry Andric   // If the function takes variable number of arguments, make a frame index for
15065f757f3fSDimitry Andric   // the start of the first vararg value... for expansion of llvm.va_start. We
15075f757f3fSDimitry Andric   // can skip this if there are no va_start calls.
15085f757f3fSDimitry Andric   if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
15095f757f3fSDimitry Andric                     CallConv != CallingConv::X86_ThisCall)) {
15105f757f3fSDimitry Andric     FuncInfo->setVarArgsFrameIndex(
15115f757f3fSDimitry Andric         FrameInfo.CreateFixedObject(1, StackSize, true));
15125f757f3fSDimitry Andric   }
15135f757f3fSDimitry Andric 
15145f757f3fSDimitry Andric   // 64-bit calling conventions support varargs and register parameters, so we
15155f757f3fSDimitry Andric   // have to do extra work to spill them in the prologue.
15165f757f3fSDimitry Andric   if (is64Bit()) {
15175f757f3fSDimitry Andric     // Find the first unallocated argument registers.
15185f757f3fSDimitry Andric     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
15195f757f3fSDimitry Andric     ArrayRef<MCPhysReg> ArgXMMs =
15205f757f3fSDimitry Andric         get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
15215f757f3fSDimitry Andric     unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
15225f757f3fSDimitry Andric     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
15235f757f3fSDimitry Andric 
15245f757f3fSDimitry Andric     assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
15255f757f3fSDimitry Andric            "SSE register cannot be used when SSE is disabled!");
15265f757f3fSDimitry Andric 
15275f757f3fSDimitry Andric     if (isWin64()) {
15285f757f3fSDimitry Andric       // Get to the caller-allocated home save location.  Add 8 to account
15295f757f3fSDimitry Andric       // for the return address.
15305f757f3fSDimitry Andric       int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
15315f757f3fSDimitry Andric       FuncInfo->setRegSaveFrameIndex(
15325f757f3fSDimitry Andric           FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
15335f757f3fSDimitry Andric       // Fixup to set vararg frame on shadow area (4 x i64).
15345f757f3fSDimitry Andric       if (NumIntRegs < 4)
15355f757f3fSDimitry Andric         FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
15365f757f3fSDimitry Andric     } else {
15375f757f3fSDimitry Andric       // For X86-64, if there are vararg parameters that are passed via
15385f757f3fSDimitry Andric       // registers, then we must store them to their spots on the stack so
15395f757f3fSDimitry Andric       // they may be loaded by dereferencing the result of va_next.
15405f757f3fSDimitry Andric       FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
15415f757f3fSDimitry Andric       FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
15425f757f3fSDimitry Andric       FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
15435f757f3fSDimitry Andric           ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
15445f757f3fSDimitry Andric     }
15455f757f3fSDimitry Andric 
15465f757f3fSDimitry Andric     SmallVector<SDValue, 6>
15475f757f3fSDimitry Andric         LiveGPRs; // list of SDValue for GPR registers keeping live input value
15485f757f3fSDimitry Andric     SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
15495f757f3fSDimitry Andric                                          // keeping live input value
15505f757f3fSDimitry Andric     SDValue ALVal; // if applicable keeps SDValue for %al register
15515f757f3fSDimitry Andric 
15525f757f3fSDimitry Andric     // Gather all the live in physical registers.
15535f757f3fSDimitry Andric     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
15545f757f3fSDimitry Andric       Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
15555f757f3fSDimitry Andric       LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
15565f757f3fSDimitry Andric     }
15575f757f3fSDimitry Andric     const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
15585f757f3fSDimitry Andric     if (!AvailableXmms.empty()) {
15595f757f3fSDimitry Andric       Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
15605f757f3fSDimitry Andric       ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
15615f757f3fSDimitry Andric       for (MCPhysReg Reg : AvailableXmms) {
15625f757f3fSDimitry Andric         // FastRegisterAllocator spills virtual registers at basic
15635f757f3fSDimitry Andric         // block boundary. That leads to usages of xmm registers
15645f757f3fSDimitry Andric         // outside of check for %al. Pass physical registers to
15655f757f3fSDimitry Andric         // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
15665f757f3fSDimitry Andric         TheMachineFunction.getRegInfo().addLiveIn(Reg);
15675f757f3fSDimitry Andric         LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
15685f757f3fSDimitry Andric       }
15695f757f3fSDimitry Andric     }
15705f757f3fSDimitry Andric 
15715f757f3fSDimitry Andric     // Store the integer parameter registers.
15725f757f3fSDimitry Andric     SmallVector<SDValue, 8> MemOps;
15735f757f3fSDimitry Andric     SDValue RSFIN =
15745f757f3fSDimitry Andric         DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
15755f757f3fSDimitry Andric                           TargLowering.getPointerTy(DAG.getDataLayout()));
15765f757f3fSDimitry Andric     unsigned Offset = FuncInfo->getVarArgsGPOffset();
15775f757f3fSDimitry Andric     for (SDValue Val : LiveGPRs) {
15785f757f3fSDimitry Andric       SDValue FIN = DAG.getNode(ISD::ADD, DL,
15795f757f3fSDimitry Andric                                 TargLowering.getPointerTy(DAG.getDataLayout()),
15805f757f3fSDimitry Andric                                 RSFIN, DAG.getIntPtrConstant(Offset, DL));
15815f757f3fSDimitry Andric       SDValue Store =
15825f757f3fSDimitry Andric           DAG.getStore(Val.getValue(1), DL, Val, FIN,
15835f757f3fSDimitry Andric                        MachinePointerInfo::getFixedStack(
15845f757f3fSDimitry Andric                            DAG.getMachineFunction(),
15855f757f3fSDimitry Andric                            FuncInfo->getRegSaveFrameIndex(), Offset));
15865f757f3fSDimitry Andric       MemOps.push_back(Store);
15875f757f3fSDimitry Andric       Offset += 8;
15885f757f3fSDimitry Andric     }
15895f757f3fSDimitry Andric 
15905f757f3fSDimitry Andric     // Now store the XMM (fp + vector) parameter registers.
15915f757f3fSDimitry Andric     if (!LiveXMMRegs.empty()) {
15925f757f3fSDimitry Andric       SmallVector<SDValue, 12> SaveXMMOps;
15935f757f3fSDimitry Andric       SaveXMMOps.push_back(Chain);
15945f757f3fSDimitry Andric       SaveXMMOps.push_back(ALVal);
15955f757f3fSDimitry Andric       SaveXMMOps.push_back(RSFIN);
15965f757f3fSDimitry Andric       SaveXMMOps.push_back(
15975f757f3fSDimitry Andric           DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
15985f757f3fSDimitry Andric       llvm::append_range(SaveXMMOps, LiveXMMRegs);
15995f757f3fSDimitry Andric       MachineMemOperand *StoreMMO =
16005f757f3fSDimitry Andric           DAG.getMachineFunction().getMachineMemOperand(
16015f757f3fSDimitry Andric               MachinePointerInfo::getFixedStack(
16025f757f3fSDimitry Andric                   DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
16035f757f3fSDimitry Andric                   Offset),
16045f757f3fSDimitry Andric               MachineMemOperand::MOStore, 128, Align(16));
16055f757f3fSDimitry Andric       MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
16065f757f3fSDimitry Andric                                                DL, DAG.getVTList(MVT::Other),
16075f757f3fSDimitry Andric                                                SaveXMMOps, MVT::i8, StoreMMO));
16085f757f3fSDimitry Andric     }
16095f757f3fSDimitry Andric 
16105f757f3fSDimitry Andric     if (!MemOps.empty())
16115f757f3fSDimitry Andric       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
16125f757f3fSDimitry Andric   }
16135f757f3fSDimitry Andric }
16145f757f3fSDimitry Andric 
16155f757f3fSDimitry Andric void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
16165f757f3fSDimitry Andric   // Find the largest legal vector type.
16175f757f3fSDimitry Andric   MVT VecVT = MVT::Other;
16185f757f3fSDimitry Andric   // FIXME: Only some x86_32 calling conventions support AVX512.
16195f757f3fSDimitry Andric   if (Subtarget.useAVX512Regs() &&
16205f757f3fSDimitry Andric       (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
16215f757f3fSDimitry Andric                      CallConv == CallingConv::Intel_OCL_BI)))
16225f757f3fSDimitry Andric     VecVT = MVT::v16f32;
16235f757f3fSDimitry Andric   else if (Subtarget.hasAVX())
16245f757f3fSDimitry Andric     VecVT = MVT::v8f32;
16255f757f3fSDimitry Andric   else if (Subtarget.hasSSE2())
16265f757f3fSDimitry Andric     VecVT = MVT::v4f32;
16275f757f3fSDimitry Andric 
16285f757f3fSDimitry Andric   // We forward some GPRs and some vector types.
16295f757f3fSDimitry Andric   SmallVector<MVT, 2> RegParmTypes;
16305f757f3fSDimitry Andric   MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
16315f757f3fSDimitry Andric   RegParmTypes.push_back(IntVT);
16325f757f3fSDimitry Andric   if (VecVT != MVT::Other)
16335f757f3fSDimitry Andric     RegParmTypes.push_back(VecVT);
16345f757f3fSDimitry Andric 
16355f757f3fSDimitry Andric   // Compute the set of forwarded registers. The rest are scratch.
16365f757f3fSDimitry Andric   SmallVectorImpl<ForwardedRegister> &Forwards =
16375f757f3fSDimitry Andric       FuncInfo->getForwardedMustTailRegParms();
16385f757f3fSDimitry Andric   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
16395f757f3fSDimitry Andric 
16405f757f3fSDimitry Andric   // Forward AL for SysV x86_64 targets, since it is used for varargs.
16415f757f3fSDimitry Andric   if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
16425f757f3fSDimitry Andric     Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
16435f757f3fSDimitry Andric     Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
16445f757f3fSDimitry Andric   }
16455f757f3fSDimitry Andric 
16465f757f3fSDimitry Andric   // Copy all forwards from physical to virtual registers.
16475f757f3fSDimitry Andric   for (ForwardedRegister &FR : Forwards) {
16485f757f3fSDimitry Andric     // FIXME: Can we use a less constrained schedule?
16495f757f3fSDimitry Andric     SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
16505f757f3fSDimitry Andric     FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
16515f757f3fSDimitry Andric         TargLowering.getRegClassFor(FR.VT));
16525f757f3fSDimitry Andric     Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
16535f757f3fSDimitry Andric   }
16545f757f3fSDimitry Andric }
16555f757f3fSDimitry Andric 
16565f757f3fSDimitry Andric void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
16575f757f3fSDimitry Andric                                                    unsigned StackSize) {
16585f757f3fSDimitry Andric   // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
16595f757f3fSDimitry Andric   // If necessary, it would be set into the correct value later.
16605f757f3fSDimitry Andric   FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
16615f757f3fSDimitry Andric   FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
16625f757f3fSDimitry Andric 
16635f757f3fSDimitry Andric   if (FrameInfo.hasVAStart())
16645f757f3fSDimitry Andric     createVarArgAreaAndStoreRegisters(Chain, StackSize);
16655f757f3fSDimitry Andric 
16665f757f3fSDimitry Andric   if (FrameInfo.hasMustTailInVarArgFunc())
16675f757f3fSDimitry Andric     forwardMustTailParameters(Chain);
16685f757f3fSDimitry Andric }
16695f757f3fSDimitry Andric 
16705f757f3fSDimitry Andric SDValue X86TargetLowering::LowerFormalArguments(
16715f757f3fSDimitry Andric     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
16725f757f3fSDimitry Andric     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
16735f757f3fSDimitry Andric     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
16745f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
16755f757f3fSDimitry Andric   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
16765f757f3fSDimitry Andric 
16775f757f3fSDimitry Andric   const Function &F = MF.getFunction();
16785f757f3fSDimitry Andric   if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
16795f757f3fSDimitry Andric       F.getName() == "main")
16805f757f3fSDimitry Andric     FuncInfo->setForceFramePointer(true);
16815f757f3fSDimitry Andric 
16825f757f3fSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
16835f757f3fSDimitry Andric   bool Is64Bit = Subtarget.is64Bit();
16845f757f3fSDimitry Andric   bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
16855f757f3fSDimitry Andric 
16865f757f3fSDimitry Andric   assert(
16875f757f3fSDimitry Andric       !(IsVarArg && canGuaranteeTCO(CallConv)) &&
16885f757f3fSDimitry Andric       "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
16895f757f3fSDimitry Andric 
16905f757f3fSDimitry Andric   // Assign locations to all of the incoming arguments.
16915f757f3fSDimitry Andric   SmallVector<CCValAssign, 16> ArgLocs;
16925f757f3fSDimitry Andric   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
16935f757f3fSDimitry Andric 
16945f757f3fSDimitry Andric   // Allocate shadow area for Win64.
16955f757f3fSDimitry Andric   if (IsWin64)
16965f757f3fSDimitry Andric     CCInfo.AllocateStack(32, Align(8));
16975f757f3fSDimitry Andric 
16985f757f3fSDimitry Andric   CCInfo.AnalyzeArguments(Ins, CC_X86);
16995f757f3fSDimitry Andric 
17005f757f3fSDimitry Andric   // In vectorcall calling convention a second pass is required for the HVA
17015f757f3fSDimitry Andric   // types.
17025f757f3fSDimitry Andric   if (CallingConv::X86_VectorCall == CallConv) {
17035f757f3fSDimitry Andric     CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
17045f757f3fSDimitry Andric   }
17055f757f3fSDimitry Andric 
17065f757f3fSDimitry Andric   // The next loop assumes that the locations are in the same order of the
17075f757f3fSDimitry Andric   // input arguments.
17085f757f3fSDimitry Andric   assert(isSortedByValueNo(ArgLocs) &&
17095f757f3fSDimitry Andric          "Argument Location list must be sorted before lowering");
17105f757f3fSDimitry Andric 
17115f757f3fSDimitry Andric   SDValue ArgValue;
17125f757f3fSDimitry Andric   for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
17135f757f3fSDimitry Andric        ++I, ++InsIndex) {
17145f757f3fSDimitry Andric     assert(InsIndex < Ins.size() && "Invalid Ins index");
17155f757f3fSDimitry Andric     CCValAssign &VA = ArgLocs[I];
17165f757f3fSDimitry Andric 
17175f757f3fSDimitry Andric     if (VA.isRegLoc()) {
17185f757f3fSDimitry Andric       EVT RegVT = VA.getLocVT();
17195f757f3fSDimitry Andric       if (VA.needsCustom()) {
17205f757f3fSDimitry Andric         assert(
17215f757f3fSDimitry Andric             VA.getValVT() == MVT::v64i1 &&
17225f757f3fSDimitry Andric             "Currently the only custom case is when we split v64i1 to 2 regs");
17235f757f3fSDimitry Andric 
17245f757f3fSDimitry Andric         // v64i1 values, in regcall calling convention, that are
17255f757f3fSDimitry Andric         // compiled to 32 bit arch, are split up into two registers.
17265f757f3fSDimitry Andric         ArgValue =
17275f757f3fSDimitry Andric             getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
17285f757f3fSDimitry Andric       } else {
17295f757f3fSDimitry Andric         const TargetRegisterClass *RC;
17305f757f3fSDimitry Andric         if (RegVT == MVT::i8)
17315f757f3fSDimitry Andric           RC = &X86::GR8RegClass;
17325f757f3fSDimitry Andric         else if (RegVT == MVT::i16)
17335f757f3fSDimitry Andric           RC = &X86::GR16RegClass;
17345f757f3fSDimitry Andric         else if (RegVT == MVT::i32)
17355f757f3fSDimitry Andric           RC = &X86::GR32RegClass;
17365f757f3fSDimitry Andric         else if (Is64Bit && RegVT == MVT::i64)
17375f757f3fSDimitry Andric           RC = &X86::GR64RegClass;
17385f757f3fSDimitry Andric         else if (RegVT == MVT::f16)
17395f757f3fSDimitry Andric           RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
17405f757f3fSDimitry Andric         else if (RegVT == MVT::f32)
17415f757f3fSDimitry Andric           RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
17425f757f3fSDimitry Andric         else if (RegVT == MVT::f64)
17435f757f3fSDimitry Andric           RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
17445f757f3fSDimitry Andric         else if (RegVT == MVT::f80)
17455f757f3fSDimitry Andric           RC = &X86::RFP80RegClass;
17465f757f3fSDimitry Andric         else if (RegVT == MVT::f128)
17475f757f3fSDimitry Andric           RC = &X86::VR128RegClass;
17485f757f3fSDimitry Andric         else if (RegVT.is512BitVector())
17495f757f3fSDimitry Andric           RC = &X86::VR512RegClass;
17505f757f3fSDimitry Andric         else if (RegVT.is256BitVector())
17515f757f3fSDimitry Andric           RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
17525f757f3fSDimitry Andric         else if (RegVT.is128BitVector())
17535f757f3fSDimitry Andric           RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
17545f757f3fSDimitry Andric         else if (RegVT == MVT::x86mmx)
17555f757f3fSDimitry Andric           RC = &X86::VR64RegClass;
17565f757f3fSDimitry Andric         else if (RegVT == MVT::v1i1)
17575f757f3fSDimitry Andric           RC = &X86::VK1RegClass;
17585f757f3fSDimitry Andric         else if (RegVT == MVT::v8i1)
17595f757f3fSDimitry Andric           RC = &X86::VK8RegClass;
17605f757f3fSDimitry Andric         else if (RegVT == MVT::v16i1)
17615f757f3fSDimitry Andric           RC = &X86::VK16RegClass;
17625f757f3fSDimitry Andric         else if (RegVT == MVT::v32i1)
17635f757f3fSDimitry Andric           RC = &X86::VK32RegClass;
17645f757f3fSDimitry Andric         else if (RegVT == MVT::v64i1)
17655f757f3fSDimitry Andric           RC = &X86::VK64RegClass;
17665f757f3fSDimitry Andric         else
17675f757f3fSDimitry Andric           llvm_unreachable("Unknown argument type!");
17685f757f3fSDimitry Andric 
17695f757f3fSDimitry Andric         Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
17705f757f3fSDimitry Andric         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
17715f757f3fSDimitry Andric       }
17725f757f3fSDimitry Andric 
17735f757f3fSDimitry Andric       // If this is an 8 or 16-bit value, it is really passed promoted to 32
17745f757f3fSDimitry Andric       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
17755f757f3fSDimitry Andric       // right size.
17765f757f3fSDimitry Andric       if (VA.getLocInfo() == CCValAssign::SExt)
17775f757f3fSDimitry Andric         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
17785f757f3fSDimitry Andric                                DAG.getValueType(VA.getValVT()));
17795f757f3fSDimitry Andric       else if (VA.getLocInfo() == CCValAssign::ZExt)
17805f757f3fSDimitry Andric         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
17815f757f3fSDimitry Andric                                DAG.getValueType(VA.getValVT()));
17825f757f3fSDimitry Andric       else if (VA.getLocInfo() == CCValAssign::BCvt)
17835f757f3fSDimitry Andric         ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
17845f757f3fSDimitry Andric 
17855f757f3fSDimitry Andric       if (VA.isExtInLoc()) {
17865f757f3fSDimitry Andric         // Handle MMX values passed in XMM regs.
17875f757f3fSDimitry Andric         if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
17885f757f3fSDimitry Andric           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
17895f757f3fSDimitry Andric         else if (VA.getValVT().isVector() &&
17905f757f3fSDimitry Andric                  VA.getValVT().getScalarType() == MVT::i1 &&
17915f757f3fSDimitry Andric                  ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
17925f757f3fSDimitry Andric                   (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
17935f757f3fSDimitry Andric           // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
17945f757f3fSDimitry Andric           ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
17955f757f3fSDimitry Andric         } else
17965f757f3fSDimitry Andric           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
17975f757f3fSDimitry Andric       }
17985f757f3fSDimitry Andric     } else {
17995f757f3fSDimitry Andric       assert(VA.isMemLoc());
18005f757f3fSDimitry Andric       ArgValue =
18015f757f3fSDimitry Andric           LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
18025f757f3fSDimitry Andric     }
18035f757f3fSDimitry Andric 
18045f757f3fSDimitry Andric     // If value is passed via pointer - do a load.
18055f757f3fSDimitry Andric     if (VA.getLocInfo() == CCValAssign::Indirect &&
18065f757f3fSDimitry Andric         !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
18075f757f3fSDimitry Andric       ArgValue =
18085f757f3fSDimitry Andric           DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
18095f757f3fSDimitry Andric     }
18105f757f3fSDimitry Andric 
18115f757f3fSDimitry Andric     InVals.push_back(ArgValue);
18125f757f3fSDimitry Andric   }
18135f757f3fSDimitry Andric 
18145f757f3fSDimitry Andric   for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
18155f757f3fSDimitry Andric     if (Ins[I].Flags.isSwiftAsync()) {
18165f757f3fSDimitry Andric       auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1817*0fca6ea1SDimitry Andric       if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
18185f757f3fSDimitry Andric         X86FI->setHasSwiftAsyncContext(true);
18195f757f3fSDimitry Andric       else {
1820*0fca6ea1SDimitry Andric         int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1821*0fca6ea1SDimitry Andric         int FI =
1822*0fca6ea1SDimitry Andric             MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
18235f757f3fSDimitry Andric         X86FI->setSwiftAsyncContextFrameIdx(FI);
1824*0fca6ea1SDimitry Andric         SDValue St = DAG.getStore(
1825*0fca6ea1SDimitry Andric             DAG.getEntryNode(), dl, InVals[I],
1826*0fca6ea1SDimitry Andric             DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
18275f757f3fSDimitry Andric             MachinePointerInfo::getFixedStack(MF, FI));
18285f757f3fSDimitry Andric         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
18295f757f3fSDimitry Andric       }
18305f757f3fSDimitry Andric     }
18315f757f3fSDimitry Andric 
18325f757f3fSDimitry Andric     // Swift calling convention does not require we copy the sret argument
18335f757f3fSDimitry Andric     // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
18345f757f3fSDimitry Andric     if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
18355f757f3fSDimitry Andric       continue;
18365f757f3fSDimitry Andric 
18375f757f3fSDimitry Andric     // All x86 ABIs require that for returning structs by value we copy the
18385f757f3fSDimitry Andric     // sret argument into %rax/%eax (depending on ABI) for the return. Save
18395f757f3fSDimitry Andric     // the argument into a virtual register so that we can access it from the
18405f757f3fSDimitry Andric     // return points.
18415f757f3fSDimitry Andric     if (Ins[I].Flags.isSRet()) {
18425f757f3fSDimitry Andric       assert(!FuncInfo->getSRetReturnReg() &&
18435f757f3fSDimitry Andric              "SRet return has already been set");
18445f757f3fSDimitry Andric       MVT PtrTy = getPointerTy(DAG.getDataLayout());
18455f757f3fSDimitry Andric       Register Reg =
18465f757f3fSDimitry Andric           MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
18475f757f3fSDimitry Andric       FuncInfo->setSRetReturnReg(Reg);
18485f757f3fSDimitry Andric       SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
18495f757f3fSDimitry Andric       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
18505f757f3fSDimitry Andric       break;
18515f757f3fSDimitry Andric     }
18525f757f3fSDimitry Andric   }
18535f757f3fSDimitry Andric 
18545f757f3fSDimitry Andric   unsigned StackSize = CCInfo.getStackSize();
18555f757f3fSDimitry Andric   // Align stack specially for tail calls.
18565f757f3fSDimitry Andric   if (shouldGuaranteeTCO(CallConv,
18575f757f3fSDimitry Andric                          MF.getTarget().Options.GuaranteedTailCallOpt))
18585f757f3fSDimitry Andric     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
18595f757f3fSDimitry Andric 
18605f757f3fSDimitry Andric   if (IsVarArg)
18615f757f3fSDimitry Andric     VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
18625f757f3fSDimitry Andric         .lowerVarArgsParameters(Chain, StackSize);
18635f757f3fSDimitry Andric 
18645f757f3fSDimitry Andric   // Some CCs need callee pop.
18655f757f3fSDimitry Andric   if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
18665f757f3fSDimitry Andric                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
18675f757f3fSDimitry Andric     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
18685f757f3fSDimitry Andric   } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
18695f757f3fSDimitry Andric     // X86 interrupts must pop the error code (and the alignment padding) if
18705f757f3fSDimitry Andric     // present.
18715f757f3fSDimitry Andric     FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
18725f757f3fSDimitry Andric   } else {
18735f757f3fSDimitry Andric     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
18745f757f3fSDimitry Andric     // If this is an sret function, the return should pop the hidden pointer.
18755f757f3fSDimitry Andric     if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
18765f757f3fSDimitry Andric       FuncInfo->setBytesToPopOnReturn(4);
18775f757f3fSDimitry Andric   }
18785f757f3fSDimitry Andric 
18795f757f3fSDimitry Andric   if (!Is64Bit) {
18805f757f3fSDimitry Andric     // RegSaveFrameIndex is X86-64 only.
18815f757f3fSDimitry Andric     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
18825f757f3fSDimitry Andric   }
18835f757f3fSDimitry Andric 
18845f757f3fSDimitry Andric   FuncInfo->setArgumentStackSize(StackSize);
18855f757f3fSDimitry Andric 
18865f757f3fSDimitry Andric   if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
18875f757f3fSDimitry Andric     EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
18885f757f3fSDimitry Andric     if (Personality == EHPersonality::CoreCLR) {
18895f757f3fSDimitry Andric       assert(Is64Bit);
18905f757f3fSDimitry Andric       // TODO: Add a mechanism to frame lowering that will allow us to indicate
18915f757f3fSDimitry Andric       // that we'd prefer this slot be allocated towards the bottom of the frame
18925f757f3fSDimitry Andric       // (i.e. near the stack pointer after allocating the frame).  Every
18935f757f3fSDimitry Andric       // funclet needs a copy of this slot in its (mostly empty) frame, and the
18945f757f3fSDimitry Andric       // offset from the bottom of this and each funclet's frame must be the
18955f757f3fSDimitry Andric       // same, so the size of funclets' (mostly empty) frames is dictated by
18965f757f3fSDimitry Andric       // how far this slot is from the bottom (since they allocate just enough
18975f757f3fSDimitry Andric       // space to accommodate holding this slot at the correct offset).
18985f757f3fSDimitry Andric       int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
18995f757f3fSDimitry Andric       EHInfo->PSPSymFrameIdx = PSPSymFI;
19005f757f3fSDimitry Andric     }
19015f757f3fSDimitry Andric   }
19025f757f3fSDimitry Andric 
19035f757f3fSDimitry Andric   if (shouldDisableArgRegFromCSR(CallConv) ||
19045f757f3fSDimitry Andric       F.hasFnAttribute("no_caller_saved_registers")) {
19055f757f3fSDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
19065f757f3fSDimitry Andric     for (std::pair<Register, Register> Pair : MRI.liveins())
19075f757f3fSDimitry Andric       MRI.disableCalleeSavedRegister(Pair.first);
19085f757f3fSDimitry Andric   }
19095f757f3fSDimitry Andric 
1910*0fca6ea1SDimitry Andric   if (CallingConv::PreserveNone == CallConv)
1911*0fca6ea1SDimitry Andric     for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1912*0fca6ea1SDimitry Andric       if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||
1913*0fca6ea1SDimitry Andric           Ins[I].Flags.isSwiftError()) {
1914*0fca6ea1SDimitry Andric         errorUnsupported(DAG, dl,
1915*0fca6ea1SDimitry Andric                          "Swift attributes can't be used with preserve_none");
1916*0fca6ea1SDimitry Andric         break;
1917*0fca6ea1SDimitry Andric       }
1918*0fca6ea1SDimitry Andric     }
1919*0fca6ea1SDimitry Andric 
19205f757f3fSDimitry Andric   return Chain;
19215f757f3fSDimitry Andric }
19225f757f3fSDimitry Andric 
19235f757f3fSDimitry Andric SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
19245f757f3fSDimitry Andric                                             SDValue Arg, const SDLoc &dl,
19255f757f3fSDimitry Andric                                             SelectionDAG &DAG,
19265f757f3fSDimitry Andric                                             const CCValAssign &VA,
19275f757f3fSDimitry Andric                                             ISD::ArgFlagsTy Flags,
19285f757f3fSDimitry Andric                                             bool isByVal) const {
19295f757f3fSDimitry Andric   unsigned LocMemOffset = VA.getLocMemOffset();
19305f757f3fSDimitry Andric   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
19315f757f3fSDimitry Andric   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
19325f757f3fSDimitry Andric                        StackPtr, PtrOff);
19335f757f3fSDimitry Andric   if (isByVal)
19345f757f3fSDimitry Andric     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
19355f757f3fSDimitry Andric 
19365f757f3fSDimitry Andric   MaybeAlign Alignment;
19375f757f3fSDimitry Andric   if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
19385f757f3fSDimitry Andric       Arg.getSimpleValueType() != MVT::f80)
19395f757f3fSDimitry Andric     Alignment = MaybeAlign(4);
19405f757f3fSDimitry Andric   return DAG.getStore(
19415f757f3fSDimitry Andric       Chain, dl, Arg, PtrOff,
19425f757f3fSDimitry Andric       MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
19435f757f3fSDimitry Andric       Alignment);
19445f757f3fSDimitry Andric }
19455f757f3fSDimitry Andric 
19465f757f3fSDimitry Andric /// Emit a load of return address if tail call
19475f757f3fSDimitry Andric /// optimization is performed and it is required.
19485f757f3fSDimitry Andric SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
19495f757f3fSDimitry Andric     SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
19505f757f3fSDimitry Andric     bool Is64Bit, int FPDiff, const SDLoc &dl) const {
19515f757f3fSDimitry Andric   // Adjust the Return address stack slot.
19525f757f3fSDimitry Andric   EVT VT = getPointerTy(DAG.getDataLayout());
19535f757f3fSDimitry Andric   OutRetAddr = getReturnAddressFrameIndex(DAG);
19545f757f3fSDimitry Andric 
19555f757f3fSDimitry Andric   // Load the "old" Return address.
19565f757f3fSDimitry Andric   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
19575f757f3fSDimitry Andric   return SDValue(OutRetAddr.getNode(), 1);
19585f757f3fSDimitry Andric }
19595f757f3fSDimitry Andric 
19605f757f3fSDimitry Andric /// Emit a store of the return address if tail call
19615f757f3fSDimitry Andric /// optimization is performed and it is required (FPDiff!=0).
19625f757f3fSDimitry Andric static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
19635f757f3fSDimitry Andric                                         SDValue Chain, SDValue RetAddrFrIdx,
19645f757f3fSDimitry Andric                                         EVT PtrVT, unsigned SlotSize,
19655f757f3fSDimitry Andric                                         int FPDiff, const SDLoc &dl) {
19665f757f3fSDimitry Andric   // Store the return address to the appropriate stack slot.
19675f757f3fSDimitry Andric   if (!FPDiff) return Chain;
19685f757f3fSDimitry Andric   // Calculate the new stack slot for the return address.
19695f757f3fSDimitry Andric   int NewReturnAddrFI =
19705f757f3fSDimitry Andric     MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
19715f757f3fSDimitry Andric                                          false);
19725f757f3fSDimitry Andric   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
19735f757f3fSDimitry Andric   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
19745f757f3fSDimitry Andric                        MachinePointerInfo::getFixedStack(
19755f757f3fSDimitry Andric                            DAG.getMachineFunction(), NewReturnAddrFI));
19765f757f3fSDimitry Andric   return Chain;
19775f757f3fSDimitry Andric }
19785f757f3fSDimitry Andric 
19795f757f3fSDimitry Andric /// Returns a vector_shuffle mask for an movs{s|d}, movd
19805f757f3fSDimitry Andric /// operation of specified width.
19815f757f3fSDimitry Andric SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
19825f757f3fSDimitry Andric                                    SDValue V1, SDValue V2) const {
19835f757f3fSDimitry Andric   unsigned NumElems = VT.getVectorNumElements();
19845f757f3fSDimitry Andric   SmallVector<int, 8> Mask;
19855f757f3fSDimitry Andric   Mask.push_back(NumElems);
19865f757f3fSDimitry Andric   for (unsigned i = 1; i != NumElems; ++i)
19875f757f3fSDimitry Andric     Mask.push_back(i);
19885f757f3fSDimitry Andric   return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
19895f757f3fSDimitry Andric }
19905f757f3fSDimitry Andric 
19915f757f3fSDimitry Andric SDValue
19925f757f3fSDimitry Andric X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
19935f757f3fSDimitry Andric                              SmallVectorImpl<SDValue> &InVals) const {
19945f757f3fSDimitry Andric   SelectionDAG &DAG                     = CLI.DAG;
19955f757f3fSDimitry Andric   SDLoc &dl                             = CLI.DL;
19965f757f3fSDimitry Andric   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
19975f757f3fSDimitry Andric   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
19985f757f3fSDimitry Andric   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
19995f757f3fSDimitry Andric   SDValue Chain                         = CLI.Chain;
20005f757f3fSDimitry Andric   SDValue Callee                        = CLI.Callee;
20015f757f3fSDimitry Andric   CallingConv::ID CallConv              = CLI.CallConv;
20025f757f3fSDimitry Andric   bool &isTailCall                      = CLI.IsTailCall;
20035f757f3fSDimitry Andric   bool isVarArg                         = CLI.IsVarArg;
20045f757f3fSDimitry Andric   const auto *CB                        = CLI.CB;
20055f757f3fSDimitry Andric 
20065f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
20075f757f3fSDimitry Andric   bool Is64Bit        = Subtarget.is64Bit();
20085f757f3fSDimitry Andric   bool IsWin64        = Subtarget.isCallingConvWin64(CallConv);
20095f757f3fSDimitry Andric   bool IsSibcall      = false;
20105f757f3fSDimitry Andric   bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
20115f757f3fSDimitry Andric       CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
20125f757f3fSDimitry Andric   bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
20135f757f3fSDimitry Andric   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
20145f757f3fSDimitry Andric   bool HasNCSR = (CB && isa<CallInst>(CB) &&
20155f757f3fSDimitry Andric                   CB->hasFnAttr("no_caller_saved_registers"));
20165f757f3fSDimitry Andric   bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
20175f757f3fSDimitry Andric   bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
20185f757f3fSDimitry Andric   bool IsCFICall = IsIndirectCall && CLI.CFIType;
2019*0fca6ea1SDimitry Andric   const Module *M = MF.getFunction().getParent();
20205f757f3fSDimitry Andric   Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
20215f757f3fSDimitry Andric 
20225f757f3fSDimitry Andric   MachineFunction::CallSiteInfo CSInfo;
20235f757f3fSDimitry Andric   if (CallConv == CallingConv::X86_INTR)
20245f757f3fSDimitry Andric     report_fatal_error("X86 interrupts may not be called directly");
20255f757f3fSDimitry Andric 
20265f757f3fSDimitry Andric   // Analyze operands of the call, assigning locations to each operand.
20275f757f3fSDimitry Andric   SmallVector<CCValAssign, 16> ArgLocs;
20285f757f3fSDimitry Andric   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
20295f757f3fSDimitry Andric 
20305f757f3fSDimitry Andric   // Allocate shadow area for Win64.
20315f757f3fSDimitry Andric   if (IsWin64)
20325f757f3fSDimitry Andric     CCInfo.AllocateStack(32, Align(8));
20335f757f3fSDimitry Andric 
20345f757f3fSDimitry Andric   CCInfo.AnalyzeArguments(Outs, CC_X86);
20355f757f3fSDimitry Andric 
20365f757f3fSDimitry Andric   // In vectorcall calling convention a second pass is required for the HVA
20375f757f3fSDimitry Andric   // types.
20385f757f3fSDimitry Andric   if (CallingConv::X86_VectorCall == CallConv) {
20395f757f3fSDimitry Andric     CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
20405f757f3fSDimitry Andric   }
20415f757f3fSDimitry Andric 
2042*0fca6ea1SDimitry Andric   bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2043*0fca6ea1SDimitry Andric   if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2044*0fca6ea1SDimitry Andric     // If we are using a GOT, disable tail calls to external symbols with
2045*0fca6ea1SDimitry Andric     // default visibility. Tail calling such a symbol requires using a GOT
2046*0fca6ea1SDimitry Andric     // relocation, which forces early binding of the symbol. This breaks code
2047*0fca6ea1SDimitry Andric     // that require lazy function symbol resolution. Using musttail or
2048*0fca6ea1SDimitry Andric     // GuaranteedTailCallOpt will override this.
2049*0fca6ea1SDimitry Andric     GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2050*0fca6ea1SDimitry Andric     if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2051*0fca6ea1SDimitry Andric                G->getGlobal()->hasDefaultVisibility()))
2052*0fca6ea1SDimitry Andric       isTailCall = false;
2053*0fca6ea1SDimitry Andric   }
2054*0fca6ea1SDimitry Andric 
2055*0fca6ea1SDimitry Andric   if (isTailCall && !IsMustTail) {
2056*0fca6ea1SDimitry Andric     // Check if it's really possible to do a tail call.
2057*0fca6ea1SDimitry Andric     isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2058*0fca6ea1SDimitry Andric                                                    IsCalleePopSRet);
2059*0fca6ea1SDimitry Andric 
2060*0fca6ea1SDimitry Andric     // Sibcalls are automatically detected tailcalls which do not require
2061*0fca6ea1SDimitry Andric     // ABI changes.
2062*0fca6ea1SDimitry Andric     if (!IsGuaranteeTCO && isTailCall)
2063*0fca6ea1SDimitry Andric       IsSibcall = true;
2064*0fca6ea1SDimitry Andric 
2065*0fca6ea1SDimitry Andric     if (isTailCall)
2066*0fca6ea1SDimitry Andric       ++NumTailCalls;
2067*0fca6ea1SDimitry Andric   }
2068*0fca6ea1SDimitry Andric 
2069*0fca6ea1SDimitry Andric   if (IsMustTail && !isTailCall)
2070*0fca6ea1SDimitry Andric     report_fatal_error("failed to perform tail call elimination on a call "
2071*0fca6ea1SDimitry Andric                        "site marked musttail");
2072*0fca6ea1SDimitry Andric 
2073*0fca6ea1SDimitry Andric   assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2074*0fca6ea1SDimitry Andric          "Var args not supported with calling convention fastcc, ghc or hipe");
2075*0fca6ea1SDimitry Andric 
20765f757f3fSDimitry Andric   // Get a count of how many bytes are to be pushed on the stack.
20775f757f3fSDimitry Andric   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
20785f757f3fSDimitry Andric   if (IsSibcall)
20795f757f3fSDimitry Andric     // This is a sibcall. The memory operands are available in caller's
20805f757f3fSDimitry Andric     // own caller's stack.
20815f757f3fSDimitry Andric     NumBytes = 0;
20825f757f3fSDimitry Andric   else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
20835f757f3fSDimitry Andric     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
20845f757f3fSDimitry Andric 
20855f757f3fSDimitry Andric   int FPDiff = 0;
20865f757f3fSDimitry Andric   if (isTailCall &&
20875f757f3fSDimitry Andric       shouldGuaranteeTCO(CallConv,
20885f757f3fSDimitry Andric                          MF.getTarget().Options.GuaranteedTailCallOpt)) {
20895f757f3fSDimitry Andric     // Lower arguments at fp - stackoffset + fpdiff.
20905f757f3fSDimitry Andric     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
20915f757f3fSDimitry Andric 
20925f757f3fSDimitry Andric     FPDiff = NumBytesCallerPushed - NumBytes;
20935f757f3fSDimitry Andric 
20945f757f3fSDimitry Andric     // Set the delta of movement of the returnaddr stackslot.
20955f757f3fSDimitry Andric     // But only set if delta is greater than previous delta.
20965f757f3fSDimitry Andric     if (FPDiff < X86Info->getTCReturnAddrDelta())
20975f757f3fSDimitry Andric       X86Info->setTCReturnAddrDelta(FPDiff);
20985f757f3fSDimitry Andric   }
20995f757f3fSDimitry Andric 
21005f757f3fSDimitry Andric   unsigned NumBytesToPush = NumBytes;
21015f757f3fSDimitry Andric   unsigned NumBytesToPop = NumBytes;
21025f757f3fSDimitry Andric 
21035f757f3fSDimitry Andric   // If we have an inalloca argument, all stack space has already been allocated
21045f757f3fSDimitry Andric   // for us and be right at the top of the stack.  We don't support multiple
21055f757f3fSDimitry Andric   // arguments passed in memory when using inalloca.
21065f757f3fSDimitry Andric   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
21075f757f3fSDimitry Andric     NumBytesToPush = 0;
21085f757f3fSDimitry Andric     if (!ArgLocs.back().isMemLoc())
21095f757f3fSDimitry Andric       report_fatal_error("cannot use inalloca attribute on a register "
21105f757f3fSDimitry Andric                          "parameter");
21115f757f3fSDimitry Andric     if (ArgLocs.back().getLocMemOffset() != 0)
21125f757f3fSDimitry Andric       report_fatal_error("any parameter with the inalloca attribute must be "
21135f757f3fSDimitry Andric                          "the only memory argument");
21145f757f3fSDimitry Andric   } else if (CLI.IsPreallocated) {
21155f757f3fSDimitry Andric     assert(ArgLocs.back().isMemLoc() &&
21165f757f3fSDimitry Andric            "cannot use preallocated attribute on a register "
21175f757f3fSDimitry Andric            "parameter");
21185f757f3fSDimitry Andric     SmallVector<size_t, 4> PreallocatedOffsets;
21195f757f3fSDimitry Andric     for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
21205f757f3fSDimitry Andric       if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
21215f757f3fSDimitry Andric         PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
21225f757f3fSDimitry Andric       }
21235f757f3fSDimitry Andric     }
21245f757f3fSDimitry Andric     auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
21255f757f3fSDimitry Andric     size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
21265f757f3fSDimitry Andric     MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
21275f757f3fSDimitry Andric     MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
21285f757f3fSDimitry Andric     NumBytesToPush = 0;
21295f757f3fSDimitry Andric   }
21305f757f3fSDimitry Andric 
21315f757f3fSDimitry Andric   if (!IsSibcall && !IsMustTail)
21325f757f3fSDimitry Andric     Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
21335f757f3fSDimitry Andric                                  NumBytes - NumBytesToPush, dl);
21345f757f3fSDimitry Andric 
21355f757f3fSDimitry Andric   SDValue RetAddrFrIdx;
21365f757f3fSDimitry Andric   // Load return address for tail calls.
21375f757f3fSDimitry Andric   if (isTailCall && FPDiff)
21385f757f3fSDimitry Andric     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
21395f757f3fSDimitry Andric                                     Is64Bit, FPDiff, dl);
21405f757f3fSDimitry Andric 
21415f757f3fSDimitry Andric   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
21425f757f3fSDimitry Andric   SmallVector<SDValue, 8> MemOpChains;
21435f757f3fSDimitry Andric   SDValue StackPtr;
21445f757f3fSDimitry Andric 
21455f757f3fSDimitry Andric   // The next loop assumes that the locations are in the same order of the
21465f757f3fSDimitry Andric   // input arguments.
21475f757f3fSDimitry Andric   assert(isSortedByValueNo(ArgLocs) &&
21485f757f3fSDimitry Andric          "Argument Location list must be sorted before lowering");
21495f757f3fSDimitry Andric 
21505f757f3fSDimitry Andric   // Walk the register/memloc assignments, inserting copies/loads.  In the case
21515f757f3fSDimitry Andric   // of tail call optimization arguments are handle later.
21525f757f3fSDimitry Andric   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
21535f757f3fSDimitry Andric   for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
21545f757f3fSDimitry Andric        ++I, ++OutIndex) {
21555f757f3fSDimitry Andric     assert(OutIndex < Outs.size() && "Invalid Out index");
21565f757f3fSDimitry Andric     // Skip inalloca/preallocated arguments, they have already been written.
21575f757f3fSDimitry Andric     ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
21585f757f3fSDimitry Andric     if (Flags.isInAlloca() || Flags.isPreallocated())
21595f757f3fSDimitry Andric       continue;
21605f757f3fSDimitry Andric 
21615f757f3fSDimitry Andric     CCValAssign &VA = ArgLocs[I];
21625f757f3fSDimitry Andric     EVT RegVT = VA.getLocVT();
21635f757f3fSDimitry Andric     SDValue Arg = OutVals[OutIndex];
21645f757f3fSDimitry Andric     bool isByVal = Flags.isByVal();
21655f757f3fSDimitry Andric 
21665f757f3fSDimitry Andric     // Promote the value if needed.
21675f757f3fSDimitry Andric     switch (VA.getLocInfo()) {
21685f757f3fSDimitry Andric     default: llvm_unreachable("Unknown loc info!");
21695f757f3fSDimitry Andric     case CCValAssign::Full: break;
21705f757f3fSDimitry Andric     case CCValAssign::SExt:
21715f757f3fSDimitry Andric       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
21725f757f3fSDimitry Andric       break;
21735f757f3fSDimitry Andric     case CCValAssign::ZExt:
21745f757f3fSDimitry Andric       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
21755f757f3fSDimitry Andric       break;
21765f757f3fSDimitry Andric     case CCValAssign::AExt:
21775f757f3fSDimitry Andric       if (Arg.getValueType().isVector() &&
21785f757f3fSDimitry Andric           Arg.getValueType().getVectorElementType() == MVT::i1)
21795f757f3fSDimitry Andric         Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
21805f757f3fSDimitry Andric       else if (RegVT.is128BitVector()) {
21815f757f3fSDimitry Andric         // Special case: passing MMX values in XMM registers.
21825f757f3fSDimitry Andric         Arg = DAG.getBitcast(MVT::i64, Arg);
21835f757f3fSDimitry Andric         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
21845f757f3fSDimitry Andric         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
21855f757f3fSDimitry Andric       } else
21865f757f3fSDimitry Andric         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
21875f757f3fSDimitry Andric       break;
21885f757f3fSDimitry Andric     case CCValAssign::BCvt:
21895f757f3fSDimitry Andric       Arg = DAG.getBitcast(RegVT, Arg);
21905f757f3fSDimitry Andric       break;
21915f757f3fSDimitry Andric     case CCValAssign::Indirect: {
21925f757f3fSDimitry Andric       if (isByVal) {
21935f757f3fSDimitry Andric         // Memcpy the argument to a temporary stack slot to prevent
21945f757f3fSDimitry Andric         // the caller from seeing any modifications the callee may make
21955f757f3fSDimitry Andric         // as guaranteed by the `byval` attribute.
21965f757f3fSDimitry Andric         int FrameIdx = MF.getFrameInfo().CreateStackObject(
21975f757f3fSDimitry Andric             Flags.getByValSize(),
21985f757f3fSDimitry Andric             std::max(Align(16), Flags.getNonZeroByValAlign()), false);
21995f757f3fSDimitry Andric         SDValue StackSlot =
22005f757f3fSDimitry Andric             DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
22015f757f3fSDimitry Andric         Chain =
22025f757f3fSDimitry Andric             CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
22035f757f3fSDimitry Andric         // From now on treat this as a regular pointer
22045f757f3fSDimitry Andric         Arg = StackSlot;
22055f757f3fSDimitry Andric         isByVal = false;
22065f757f3fSDimitry Andric       } else {
22075f757f3fSDimitry Andric         // Store the argument.
22085f757f3fSDimitry Andric         SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
22095f757f3fSDimitry Andric         int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
22105f757f3fSDimitry Andric         Chain = DAG.getStore(
22115f757f3fSDimitry Andric             Chain, dl, Arg, SpillSlot,
22125f757f3fSDimitry Andric             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
22135f757f3fSDimitry Andric         Arg = SpillSlot;
22145f757f3fSDimitry Andric       }
22155f757f3fSDimitry Andric       break;
22165f757f3fSDimitry Andric     }
22175f757f3fSDimitry Andric     }
22185f757f3fSDimitry Andric 
22195f757f3fSDimitry Andric     if (VA.needsCustom()) {
22205f757f3fSDimitry Andric       assert(VA.getValVT() == MVT::v64i1 &&
22215f757f3fSDimitry Andric              "Currently the only custom case is when we split v64i1 to 2 regs");
22225f757f3fSDimitry Andric       // Split v64i1 value into two registers
22235f757f3fSDimitry Andric       Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
22245f757f3fSDimitry Andric     } else if (VA.isRegLoc()) {
22255f757f3fSDimitry Andric       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
22265f757f3fSDimitry Andric       const TargetOptions &Options = DAG.getTarget().Options;
22275f757f3fSDimitry Andric       if (Options.EmitCallSiteInfo)
2228*0fca6ea1SDimitry Andric         CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
22295f757f3fSDimitry Andric       if (isVarArg && IsWin64) {
22305f757f3fSDimitry Andric         // Win64 ABI requires argument XMM reg to be copied to the corresponding
22315f757f3fSDimitry Andric         // shadow reg if callee is a varargs function.
22325f757f3fSDimitry Andric         Register ShadowReg;
22335f757f3fSDimitry Andric         switch (VA.getLocReg()) {
22345f757f3fSDimitry Andric         case X86::XMM0: ShadowReg = X86::RCX; break;
22355f757f3fSDimitry Andric         case X86::XMM1: ShadowReg = X86::RDX; break;
22365f757f3fSDimitry Andric         case X86::XMM2: ShadowReg = X86::R8; break;
22375f757f3fSDimitry Andric         case X86::XMM3: ShadowReg = X86::R9; break;
22385f757f3fSDimitry Andric         }
22395f757f3fSDimitry Andric         if (ShadowReg)
22405f757f3fSDimitry Andric           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
22415f757f3fSDimitry Andric       }
22425f757f3fSDimitry Andric     } else if (!IsSibcall && (!isTailCall || isByVal)) {
22435f757f3fSDimitry Andric       assert(VA.isMemLoc());
22445f757f3fSDimitry Andric       if (!StackPtr.getNode())
22455f757f3fSDimitry Andric         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
22465f757f3fSDimitry Andric                                       getPointerTy(DAG.getDataLayout()));
22475f757f3fSDimitry Andric       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
22485f757f3fSDimitry Andric                                              dl, DAG, VA, Flags, isByVal));
22495f757f3fSDimitry Andric     }
22505f757f3fSDimitry Andric   }
22515f757f3fSDimitry Andric 
22525f757f3fSDimitry Andric   if (!MemOpChains.empty())
22535f757f3fSDimitry Andric     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
22545f757f3fSDimitry Andric 
22555f757f3fSDimitry Andric   if (Subtarget.isPICStyleGOT()) {
22565f757f3fSDimitry Andric     // ELF / PIC requires GOT in the EBX register before function calls via PLT
22575f757f3fSDimitry Andric     // GOT pointer (except regcall).
22585f757f3fSDimitry Andric     if (!isTailCall) {
22595f757f3fSDimitry Andric       // Indirect call with RegCall calling convertion may use up all the
22605f757f3fSDimitry Andric       // general registers, so it is not suitable to bind EBX reister for
22615f757f3fSDimitry Andric       // GOT address, just let register allocator handle it.
22625f757f3fSDimitry Andric       if (CallConv != CallingConv::X86_RegCall)
22635f757f3fSDimitry Andric         RegsToPass.push_back(std::make_pair(
22645f757f3fSDimitry Andric           Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
22655f757f3fSDimitry Andric                                           getPointerTy(DAG.getDataLayout()))));
22665f757f3fSDimitry Andric     } else {
22675f757f3fSDimitry Andric       // If we are tail calling and generating PIC/GOT style code load the
22685f757f3fSDimitry Andric       // address of the callee into ECX. The value in ecx is used as target of
22695f757f3fSDimitry Andric       // the tail jump. This is done to circumvent the ebx/callee-saved problem
22705f757f3fSDimitry Andric       // for tail calls on PIC/GOT architectures. Normally we would just put the
22715f757f3fSDimitry Andric       // address of GOT into ebx and then call target@PLT. But for tail calls
22725f757f3fSDimitry Andric       // ebx would be restored (since ebx is callee saved) before jumping to the
22735f757f3fSDimitry Andric       // target@PLT.
22745f757f3fSDimitry Andric 
22755f757f3fSDimitry Andric       // Note: The actual moving to ECX is done further down.
22765f757f3fSDimitry Andric       GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
22775f757f3fSDimitry Andric       if (G && !G->getGlobal()->hasLocalLinkage() &&
22785f757f3fSDimitry Andric           G->getGlobal()->hasDefaultVisibility())
22795f757f3fSDimitry Andric         Callee = LowerGlobalAddress(Callee, DAG);
22805f757f3fSDimitry Andric       else if (isa<ExternalSymbolSDNode>(Callee))
22815f757f3fSDimitry Andric         Callee = LowerExternalSymbol(Callee, DAG);
22825f757f3fSDimitry Andric     }
22835f757f3fSDimitry Andric   }
22845f757f3fSDimitry Andric 
22855f757f3fSDimitry Andric   if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
22865f757f3fSDimitry Andric       (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
22875f757f3fSDimitry Andric     // From AMD64 ABI document:
22885f757f3fSDimitry Andric     // For calls that may call functions that use varargs or stdargs
22895f757f3fSDimitry Andric     // (prototype-less calls or calls to functions containing ellipsis (...) in
22905f757f3fSDimitry Andric     // the declaration) %al is used as hidden argument to specify the number
22915f757f3fSDimitry Andric     // of SSE registers used. The contents of %al do not need to match exactly
22925f757f3fSDimitry Andric     // the number of registers, but must be an ubound on the number of SSE
22935f757f3fSDimitry Andric     // registers used and is in the range 0 - 8 inclusive.
22945f757f3fSDimitry Andric 
22955f757f3fSDimitry Andric     // Count the number of XMM registers allocated.
22965f757f3fSDimitry Andric     static const MCPhysReg XMMArgRegs[] = {
22975f757f3fSDimitry Andric       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
22985f757f3fSDimitry Andric       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
22995f757f3fSDimitry Andric     };
23005f757f3fSDimitry Andric     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
23015f757f3fSDimitry Andric     assert((Subtarget.hasSSE1() || !NumXMMRegs)
23025f757f3fSDimitry Andric            && "SSE registers cannot be used when SSE is disabled");
23035f757f3fSDimitry Andric     RegsToPass.push_back(std::make_pair(Register(X86::AL),
23045f757f3fSDimitry Andric                                         DAG.getConstant(NumXMMRegs, dl,
23055f757f3fSDimitry Andric                                                         MVT::i8)));
23065f757f3fSDimitry Andric   }
23075f757f3fSDimitry Andric 
23085f757f3fSDimitry Andric   if (isVarArg && IsMustTail) {
23095f757f3fSDimitry Andric     const auto &Forwards = X86Info->getForwardedMustTailRegParms();
23105f757f3fSDimitry Andric     for (const auto &F : Forwards) {
23115f757f3fSDimitry Andric       SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
23125f757f3fSDimitry Andric       RegsToPass.push_back(std::make_pair(F.PReg, Val));
23135f757f3fSDimitry Andric     }
23145f757f3fSDimitry Andric   }
23155f757f3fSDimitry Andric 
23165f757f3fSDimitry Andric   // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
23175f757f3fSDimitry Andric   // don't need this because the eligibility check rejects calls that require
23185f757f3fSDimitry Andric   // shuffling arguments passed in memory.
23195f757f3fSDimitry Andric   if (!IsSibcall && isTailCall) {
23205f757f3fSDimitry Andric     // Force all the incoming stack arguments to be loaded from the stack
23215f757f3fSDimitry Andric     // before any new outgoing arguments are stored to the stack, because the
23225f757f3fSDimitry Andric     // outgoing stack slots may alias the incoming argument stack slots, and
23235f757f3fSDimitry Andric     // the alias isn't otherwise explicit. This is slightly more conservative
23245f757f3fSDimitry Andric     // than necessary, because it means that each store effectively depends
23255f757f3fSDimitry Andric     // on every argument instead of just those arguments it would clobber.
23265f757f3fSDimitry Andric     SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
23275f757f3fSDimitry Andric 
23285f757f3fSDimitry Andric     SmallVector<SDValue, 8> MemOpChains2;
23295f757f3fSDimitry Andric     SDValue FIN;
23305f757f3fSDimitry Andric     int FI = 0;
23315f757f3fSDimitry Andric     for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
23325f757f3fSDimitry Andric          ++I, ++OutsIndex) {
23335f757f3fSDimitry Andric       CCValAssign &VA = ArgLocs[I];
23345f757f3fSDimitry Andric 
23355f757f3fSDimitry Andric       if (VA.isRegLoc()) {
23365f757f3fSDimitry Andric         if (VA.needsCustom()) {
23375f757f3fSDimitry Andric           assert((CallConv == CallingConv::X86_RegCall) &&
23385f757f3fSDimitry Andric                  "Expecting custom case only in regcall calling convention");
23395f757f3fSDimitry Andric           // This means that we are in special case where one argument was
23405f757f3fSDimitry Andric           // passed through two register locations - Skip the next location
23415f757f3fSDimitry Andric           ++I;
23425f757f3fSDimitry Andric         }
23435f757f3fSDimitry Andric 
23445f757f3fSDimitry Andric         continue;
23455f757f3fSDimitry Andric       }
23465f757f3fSDimitry Andric 
23475f757f3fSDimitry Andric       assert(VA.isMemLoc());
23485f757f3fSDimitry Andric       SDValue Arg = OutVals[OutsIndex];
23495f757f3fSDimitry Andric       ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
23505f757f3fSDimitry Andric       // Skip inalloca/preallocated arguments.  They don't require any work.
23515f757f3fSDimitry Andric       if (Flags.isInAlloca() || Flags.isPreallocated())
23525f757f3fSDimitry Andric         continue;
23535f757f3fSDimitry Andric       // Create frame index.
23545f757f3fSDimitry Andric       int32_t Offset = VA.getLocMemOffset()+FPDiff;
23555f757f3fSDimitry Andric       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
23565f757f3fSDimitry Andric       FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
23575f757f3fSDimitry Andric       FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
23585f757f3fSDimitry Andric 
23595f757f3fSDimitry Andric       if (Flags.isByVal()) {
23605f757f3fSDimitry Andric         // Copy relative to framepointer.
23615f757f3fSDimitry Andric         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
23625f757f3fSDimitry Andric         if (!StackPtr.getNode())
23635f757f3fSDimitry Andric           StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
23645f757f3fSDimitry Andric                                         getPointerTy(DAG.getDataLayout()));
23655f757f3fSDimitry Andric         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
23665f757f3fSDimitry Andric                              StackPtr, Source);
23675f757f3fSDimitry Andric 
23685f757f3fSDimitry Andric         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
23695f757f3fSDimitry Andric                                                          ArgChain,
23705f757f3fSDimitry Andric                                                          Flags, DAG, dl));
23715f757f3fSDimitry Andric       } else {
23725f757f3fSDimitry Andric         // Store relative to framepointer.
23735f757f3fSDimitry Andric         MemOpChains2.push_back(DAG.getStore(
23745f757f3fSDimitry Andric             ArgChain, dl, Arg, FIN,
23755f757f3fSDimitry Andric             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
23765f757f3fSDimitry Andric       }
23775f757f3fSDimitry Andric     }
23785f757f3fSDimitry Andric 
23795f757f3fSDimitry Andric     if (!MemOpChains2.empty())
23805f757f3fSDimitry Andric       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
23815f757f3fSDimitry Andric 
23825f757f3fSDimitry Andric     // Store the return address to the appropriate stack slot.
23835f757f3fSDimitry Andric     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
23845f757f3fSDimitry Andric                                      getPointerTy(DAG.getDataLayout()),
23855f757f3fSDimitry Andric                                      RegInfo->getSlotSize(), FPDiff, dl);
23865f757f3fSDimitry Andric   }
23875f757f3fSDimitry Andric 
23885f757f3fSDimitry Andric   // Build a sequence of copy-to-reg nodes chained together with token chain
23895f757f3fSDimitry Andric   // and glue operands which copy the outgoing args into registers.
23905f757f3fSDimitry Andric   SDValue InGlue;
23915f757f3fSDimitry Andric   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
23925f757f3fSDimitry Andric     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
23935f757f3fSDimitry Andric                              RegsToPass[i].second, InGlue);
23945f757f3fSDimitry Andric     InGlue = Chain.getValue(1);
23955f757f3fSDimitry Andric   }
23965f757f3fSDimitry Andric 
23975f757f3fSDimitry Andric   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
23985f757f3fSDimitry Andric     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
23995f757f3fSDimitry Andric     // In the 64-bit large code model, we have to make all calls
24005f757f3fSDimitry Andric     // through a register, since the call instruction's 32-bit
24015f757f3fSDimitry Andric     // pc-relative offset may not be large enough to hold the whole
24025f757f3fSDimitry Andric     // address.
24035f757f3fSDimitry Andric   } else if (Callee->getOpcode() == ISD::GlobalAddress ||
24045f757f3fSDimitry Andric              Callee->getOpcode() == ISD::ExternalSymbol) {
24055f757f3fSDimitry Andric     // Lower direct calls to global addresses and external symbols. Setting
24065f757f3fSDimitry Andric     // ForCall to true here has the effect of removing WrapperRIP when possible
24075f757f3fSDimitry Andric     // to allow direct calls to be selected without first materializing the
24085f757f3fSDimitry Andric     // address into a register.
24095f757f3fSDimitry Andric     Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
24105f757f3fSDimitry Andric   } else if (Subtarget.isTarget64BitILP32() &&
24115f757f3fSDimitry Andric              Callee.getValueType() == MVT::i32) {
24125f757f3fSDimitry Andric     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
24135f757f3fSDimitry Andric     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
24145f757f3fSDimitry Andric   }
24155f757f3fSDimitry Andric 
24165f757f3fSDimitry Andric   // Returns a chain & a glue for retval copy to use.
24175f757f3fSDimitry Andric   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
24185f757f3fSDimitry Andric   SmallVector<SDValue, 8> Ops;
24195f757f3fSDimitry Andric 
24205f757f3fSDimitry Andric   if (!IsSibcall && isTailCall && !IsMustTail) {
24215f757f3fSDimitry Andric     Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
24225f757f3fSDimitry Andric     InGlue = Chain.getValue(1);
24235f757f3fSDimitry Andric   }
24245f757f3fSDimitry Andric 
24255f757f3fSDimitry Andric   Ops.push_back(Chain);
24265f757f3fSDimitry Andric   Ops.push_back(Callee);
24275f757f3fSDimitry Andric 
24285f757f3fSDimitry Andric   if (isTailCall)
24295f757f3fSDimitry Andric     Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
24305f757f3fSDimitry Andric 
24315f757f3fSDimitry Andric   // Add argument registers to the end of the list so that they are known live
24325f757f3fSDimitry Andric   // into the call.
24335f757f3fSDimitry Andric   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
24345f757f3fSDimitry Andric     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
24355f757f3fSDimitry Andric                                   RegsToPass[i].second.getValueType()));
24365f757f3fSDimitry Andric 
24375f757f3fSDimitry Andric   // Add a register mask operand representing the call-preserved registers.
24385f757f3fSDimitry Andric   const uint32_t *Mask = [&]() {
24395f757f3fSDimitry Andric     auto AdaptedCC = CallConv;
24405f757f3fSDimitry Andric     // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
24415f757f3fSDimitry Andric     // use X86_INTR calling convention because it has the same CSR mask
24425f757f3fSDimitry Andric     // (same preserved registers).
24435f757f3fSDimitry Andric     if (HasNCSR)
24445f757f3fSDimitry Andric       AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
24455f757f3fSDimitry Andric     // If NoCalleeSavedRegisters is requested, than use GHC since it happens
24465f757f3fSDimitry Andric     // to use the CSR_NoRegs_RegMask.
24475f757f3fSDimitry Andric     if (CB && CB->hasFnAttr("no_callee_saved_registers"))
24485f757f3fSDimitry Andric       AdaptedCC = (CallingConv::ID)CallingConv::GHC;
24495f757f3fSDimitry Andric     return RegInfo->getCallPreservedMask(MF, AdaptedCC);
24505f757f3fSDimitry Andric   }();
24515f757f3fSDimitry Andric   assert(Mask && "Missing call preserved mask for calling convention");
24525f757f3fSDimitry Andric 
24535f757f3fSDimitry Andric   // If this is an invoke in a 32-bit function using a funclet-based
24545f757f3fSDimitry Andric   // personality, assume the function clobbers all registers. If an exception
24555f757f3fSDimitry Andric   // is thrown, the runtime will not restore CSRs.
24565f757f3fSDimitry Andric   // FIXME: Model this more precisely so that we can register allocate across
24575f757f3fSDimitry Andric   // the normal edge and spill and fill across the exceptional edge.
24585f757f3fSDimitry Andric   if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
24595f757f3fSDimitry Andric     const Function &CallerFn = MF.getFunction();
24605f757f3fSDimitry Andric     EHPersonality Pers =
24615f757f3fSDimitry Andric         CallerFn.hasPersonalityFn()
24625f757f3fSDimitry Andric             ? classifyEHPersonality(CallerFn.getPersonalityFn())
24635f757f3fSDimitry Andric             : EHPersonality::Unknown;
24645f757f3fSDimitry Andric     if (isFuncletEHPersonality(Pers))
24655f757f3fSDimitry Andric       Mask = RegInfo->getNoPreservedMask();
24665f757f3fSDimitry Andric   }
24675f757f3fSDimitry Andric 
24685f757f3fSDimitry Andric   // Define a new register mask from the existing mask.
24695f757f3fSDimitry Andric   uint32_t *RegMask = nullptr;
24705f757f3fSDimitry Andric 
24715f757f3fSDimitry Andric   // In some calling conventions we need to remove the used physical registers
24725f757f3fSDimitry Andric   // from the reg mask. Create a new RegMask for such calling conventions.
24735f757f3fSDimitry Andric   // RegMask for calling conventions that disable only return registers (e.g.
24745f757f3fSDimitry Andric   // preserve_most) will be modified later in LowerCallResult.
24755f757f3fSDimitry Andric   bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
24765f757f3fSDimitry Andric   if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
24775f757f3fSDimitry Andric     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
24785f757f3fSDimitry Andric 
24795f757f3fSDimitry Andric     // Allocate a new Reg Mask and copy Mask.
24805f757f3fSDimitry Andric     RegMask = MF.allocateRegMask();
24815f757f3fSDimitry Andric     unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
24825f757f3fSDimitry Andric     memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
24835f757f3fSDimitry Andric 
24845f757f3fSDimitry Andric     // Make sure all sub registers of the argument registers are reset
24855f757f3fSDimitry Andric     // in the RegMask.
24865f757f3fSDimitry Andric     if (ShouldDisableArgRegs) {
24875f757f3fSDimitry Andric       for (auto const &RegPair : RegsToPass)
24885f757f3fSDimitry Andric         for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
24895f757f3fSDimitry Andric           RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
24905f757f3fSDimitry Andric     }
24915f757f3fSDimitry Andric 
24925f757f3fSDimitry Andric     // Create the RegMask Operand according to our updated mask.
24935f757f3fSDimitry Andric     Ops.push_back(DAG.getRegisterMask(RegMask));
24945f757f3fSDimitry Andric   } else {
24955f757f3fSDimitry Andric     // Create the RegMask Operand according to the static mask.
24965f757f3fSDimitry Andric     Ops.push_back(DAG.getRegisterMask(Mask));
24975f757f3fSDimitry Andric   }
24985f757f3fSDimitry Andric 
24995f757f3fSDimitry Andric   if (InGlue.getNode())
25005f757f3fSDimitry Andric     Ops.push_back(InGlue);
25015f757f3fSDimitry Andric 
25025f757f3fSDimitry Andric   if (isTailCall) {
25035f757f3fSDimitry Andric     // We used to do:
25045f757f3fSDimitry Andric     //// If this is the first return lowered for this function, add the regs
25055f757f3fSDimitry Andric     //// to the liveout set for the function.
25065f757f3fSDimitry Andric     // This isn't right, although it's probably harmless on x86; liveouts
25075f757f3fSDimitry Andric     // should be computed from returns not tail calls.  Consider a void
25085f757f3fSDimitry Andric     // function making a tail call to a function returning int.
25095f757f3fSDimitry Andric     MF.getFrameInfo().setHasTailCall();
25105f757f3fSDimitry Andric     SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
25115f757f3fSDimitry Andric 
25125f757f3fSDimitry Andric     if (IsCFICall)
25135f757f3fSDimitry Andric       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
25145f757f3fSDimitry Andric 
25155f757f3fSDimitry Andric     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
25165f757f3fSDimitry Andric     DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
25175f757f3fSDimitry Andric     return Ret;
25185f757f3fSDimitry Andric   }
25195f757f3fSDimitry Andric 
25205f757f3fSDimitry Andric   if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
25215f757f3fSDimitry Andric     Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
25225f757f3fSDimitry Andric   } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
25235f757f3fSDimitry Andric     // Calls with a "clang.arc.attachedcall" bundle are special. They should be
25245f757f3fSDimitry Andric     // expanded to the call, directly followed by a special marker sequence and
25255f757f3fSDimitry Andric     // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
25265f757f3fSDimitry Andric     assert(!isTailCall &&
25275f757f3fSDimitry Andric            "tail calls cannot be marked with clang.arc.attachedcall");
25285f757f3fSDimitry Andric     assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
25295f757f3fSDimitry Andric 
25305f757f3fSDimitry Andric     // Add a target global address for the retainRV/claimRV runtime function
25315f757f3fSDimitry Andric     // just before the call target.
25325f757f3fSDimitry Andric     Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
25335f757f3fSDimitry Andric     auto PtrVT = getPointerTy(DAG.getDataLayout());
25345f757f3fSDimitry Andric     auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
25355f757f3fSDimitry Andric     Ops.insert(Ops.begin() + 1, GA);
25365f757f3fSDimitry Andric     Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
25375f757f3fSDimitry Andric   } else {
25385f757f3fSDimitry Andric     Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
25395f757f3fSDimitry Andric   }
25405f757f3fSDimitry Andric 
25415f757f3fSDimitry Andric   if (IsCFICall)
25425f757f3fSDimitry Andric     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
25435f757f3fSDimitry Andric 
25445f757f3fSDimitry Andric   InGlue = Chain.getValue(1);
25455f757f3fSDimitry Andric   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
25465f757f3fSDimitry Andric   DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
25475f757f3fSDimitry Andric 
25485f757f3fSDimitry Andric   // Save heapallocsite metadata.
25495f757f3fSDimitry Andric   if (CLI.CB)
25505f757f3fSDimitry Andric     if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
25515f757f3fSDimitry Andric       DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
25525f757f3fSDimitry Andric 
25535f757f3fSDimitry Andric   // Create the CALLSEQ_END node.
25545f757f3fSDimitry Andric   unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
25555f757f3fSDimitry Andric   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
25565f757f3fSDimitry Andric                        DAG.getTarget().Options.GuaranteedTailCallOpt))
25575f757f3fSDimitry Andric     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
25585f757f3fSDimitry Andric   else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
25595f757f3fSDimitry Andric     // If this call passes a struct-return pointer, the callee
25605f757f3fSDimitry Andric     // pops that struct pointer.
25615f757f3fSDimitry Andric     NumBytesForCalleeToPop = 4;
25625f757f3fSDimitry Andric 
25635f757f3fSDimitry Andric   // Returns a glue for retval copy to use.
25645f757f3fSDimitry Andric   if (!IsSibcall) {
25655f757f3fSDimitry Andric     Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
25665f757f3fSDimitry Andric                                InGlue, dl);
25675f757f3fSDimitry Andric     InGlue = Chain.getValue(1);
25685f757f3fSDimitry Andric   }
25695f757f3fSDimitry Andric 
2570*0fca6ea1SDimitry Andric   if (CallingConv::PreserveNone == CallConv)
2571*0fca6ea1SDimitry Andric     for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
2572*0fca6ea1SDimitry Andric       if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||
2573*0fca6ea1SDimitry Andric           Outs[I].Flags.isSwiftError()) {
2574*0fca6ea1SDimitry Andric         errorUnsupported(DAG, dl,
2575*0fca6ea1SDimitry Andric                          "Swift attributes can't be used with preserve_none");
2576*0fca6ea1SDimitry Andric         break;
2577*0fca6ea1SDimitry Andric       }
2578*0fca6ea1SDimitry Andric     }
2579*0fca6ea1SDimitry Andric 
25805f757f3fSDimitry Andric   // Handle result values, copying them out of physregs into vregs that we
25815f757f3fSDimitry Andric   // return.
25825f757f3fSDimitry Andric   return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
25835f757f3fSDimitry Andric                          InVals, RegMask);
25845f757f3fSDimitry Andric }
25855f757f3fSDimitry Andric 
25865f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
25875f757f3fSDimitry Andric //                Fast Calling Convention (tail call) implementation
25885f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
25895f757f3fSDimitry Andric 
25905f757f3fSDimitry Andric //  Like std call, callee cleans arguments, convention except that ECX is
25915f757f3fSDimitry Andric //  reserved for storing the tail called function address. Only 2 registers are
25925f757f3fSDimitry Andric //  free for argument passing (inreg). Tail call optimization is performed
25935f757f3fSDimitry Andric //  provided:
25945f757f3fSDimitry Andric //                * tailcallopt is enabled
25955f757f3fSDimitry Andric //                * caller/callee are fastcc
25965f757f3fSDimitry Andric //  On X86_64 architecture with GOT-style position independent code only local
25975f757f3fSDimitry Andric //  (within module) calls are supported at the moment.
25985f757f3fSDimitry Andric //  To keep the stack aligned according to platform abi the function
25995f757f3fSDimitry Andric //  GetAlignedArgumentStackSize ensures that argument delta is always multiples
26005f757f3fSDimitry Andric //  of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
26015f757f3fSDimitry Andric //  If a tail called function callee has more arguments than the caller the
26025f757f3fSDimitry Andric //  caller needs to make sure that there is room to move the RETADDR to. This is
26035f757f3fSDimitry Andric //  achieved by reserving an area the size of the argument delta right after the
26045f757f3fSDimitry Andric //  original RETADDR, but before the saved framepointer or the spilled registers
26055f757f3fSDimitry Andric //  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
26065f757f3fSDimitry Andric //  stack layout:
26075f757f3fSDimitry Andric //    arg1
26085f757f3fSDimitry Andric //    arg2
26095f757f3fSDimitry Andric //    RETADDR
26105f757f3fSDimitry Andric //    [ new RETADDR
26115f757f3fSDimitry Andric //      move area ]
26125f757f3fSDimitry Andric //    (possible EBP)
26135f757f3fSDimitry Andric //    ESI
26145f757f3fSDimitry Andric //    EDI
26155f757f3fSDimitry Andric //    local1 ..
26165f757f3fSDimitry Andric 
26175f757f3fSDimitry Andric /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
26185f757f3fSDimitry Andric /// requirement.
26195f757f3fSDimitry Andric unsigned
26205f757f3fSDimitry Andric X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
26215f757f3fSDimitry Andric                                                SelectionDAG &DAG) const {
26225f757f3fSDimitry Andric   const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
26235f757f3fSDimitry Andric   const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
26245f757f3fSDimitry Andric   assert(StackSize % SlotSize == 0 &&
26255f757f3fSDimitry Andric          "StackSize must be a multiple of SlotSize");
26265f757f3fSDimitry Andric   return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
26275f757f3fSDimitry Andric }
26285f757f3fSDimitry Andric 
26295f757f3fSDimitry Andric /// Return true if the given stack call argument is already available in the
26305f757f3fSDimitry Andric /// same position (relatively) of the caller's incoming argument stack.
26315f757f3fSDimitry Andric static
26325f757f3fSDimitry Andric bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
26335f757f3fSDimitry Andric                          MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
26345f757f3fSDimitry Andric                          const X86InstrInfo *TII, const CCValAssign &VA) {
26355f757f3fSDimitry Andric   unsigned Bytes = Arg.getValueSizeInBits() / 8;
26365f757f3fSDimitry Andric 
26375f757f3fSDimitry Andric   for (;;) {
26385f757f3fSDimitry Andric     // Look through nodes that don't alter the bits of the incoming value.
26395f757f3fSDimitry Andric     unsigned Op = Arg.getOpcode();
26405f757f3fSDimitry Andric     if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
26415f757f3fSDimitry Andric         Op == ISD::AssertZext) {
26425f757f3fSDimitry Andric       Arg = Arg.getOperand(0);
26435f757f3fSDimitry Andric       continue;
26445f757f3fSDimitry Andric     }
26455f757f3fSDimitry Andric     if (Op == ISD::TRUNCATE) {
26465f757f3fSDimitry Andric       const SDValue &TruncInput = Arg.getOperand(0);
26475f757f3fSDimitry Andric       if (TruncInput.getOpcode() == ISD::AssertZext &&
26485f757f3fSDimitry Andric           cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
26495f757f3fSDimitry Andric               Arg.getValueType()) {
26505f757f3fSDimitry Andric         Arg = TruncInput.getOperand(0);
26515f757f3fSDimitry Andric         continue;
26525f757f3fSDimitry Andric       }
26535f757f3fSDimitry Andric     }
26545f757f3fSDimitry Andric     break;
26555f757f3fSDimitry Andric   }
26565f757f3fSDimitry Andric 
26575f757f3fSDimitry Andric   int FI = INT_MAX;
26585f757f3fSDimitry Andric   if (Arg.getOpcode() == ISD::CopyFromReg) {
26595f757f3fSDimitry Andric     Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
26605f757f3fSDimitry Andric     if (!VR.isVirtual())
26615f757f3fSDimitry Andric       return false;
26625f757f3fSDimitry Andric     MachineInstr *Def = MRI->getVRegDef(VR);
26635f757f3fSDimitry Andric     if (!Def)
26645f757f3fSDimitry Andric       return false;
26655f757f3fSDimitry Andric     if (!Flags.isByVal()) {
26665f757f3fSDimitry Andric       if (!TII->isLoadFromStackSlot(*Def, FI))
26675f757f3fSDimitry Andric         return false;
26685f757f3fSDimitry Andric     } else {
26695f757f3fSDimitry Andric       unsigned Opcode = Def->getOpcode();
26705f757f3fSDimitry Andric       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
26715f757f3fSDimitry Andric            Opcode == X86::LEA64_32r) &&
26725f757f3fSDimitry Andric           Def->getOperand(1).isFI()) {
26735f757f3fSDimitry Andric         FI = Def->getOperand(1).getIndex();
26745f757f3fSDimitry Andric         Bytes = Flags.getByValSize();
26755f757f3fSDimitry Andric       } else
26765f757f3fSDimitry Andric         return false;
26775f757f3fSDimitry Andric     }
26785f757f3fSDimitry Andric   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
26795f757f3fSDimitry Andric     if (Flags.isByVal())
26805f757f3fSDimitry Andric       // ByVal argument is passed in as a pointer but it's now being
26815f757f3fSDimitry Andric       // dereferenced. e.g.
26825f757f3fSDimitry Andric       // define @foo(%struct.X* %A) {
26835f757f3fSDimitry Andric       //   tail call @bar(%struct.X* byval %A)
26845f757f3fSDimitry Andric       // }
26855f757f3fSDimitry Andric       return false;
26865f757f3fSDimitry Andric     SDValue Ptr = Ld->getBasePtr();
26875f757f3fSDimitry Andric     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
26885f757f3fSDimitry Andric     if (!FINode)
26895f757f3fSDimitry Andric       return false;
26905f757f3fSDimitry Andric     FI = FINode->getIndex();
26915f757f3fSDimitry Andric   } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
26925f757f3fSDimitry Andric     FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
26935f757f3fSDimitry Andric     FI = FINode->getIndex();
26945f757f3fSDimitry Andric     Bytes = Flags.getByValSize();
26955f757f3fSDimitry Andric   } else
26965f757f3fSDimitry Andric     return false;
26975f757f3fSDimitry Andric 
26985f757f3fSDimitry Andric   assert(FI != INT_MAX);
26995f757f3fSDimitry Andric   if (!MFI.isFixedObjectIndex(FI))
27005f757f3fSDimitry Andric     return false;
27015f757f3fSDimitry Andric 
27025f757f3fSDimitry Andric   if (Offset != MFI.getObjectOffset(FI))
27035f757f3fSDimitry Andric     return false;
27045f757f3fSDimitry Andric 
27055f757f3fSDimitry Andric   // If this is not byval, check that the argument stack object is immutable.
27065f757f3fSDimitry Andric   // inalloca and argument copy elision can create mutable argument stack
27075f757f3fSDimitry Andric   // objects. Byval objects can be mutated, but a byval call intends to pass the
27085f757f3fSDimitry Andric   // mutated memory.
27095f757f3fSDimitry Andric   if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
27105f757f3fSDimitry Andric     return false;
27115f757f3fSDimitry Andric 
27125f757f3fSDimitry Andric   if (VA.getLocVT().getFixedSizeInBits() >
27135f757f3fSDimitry Andric       Arg.getValueSizeInBits().getFixedValue()) {
27145f757f3fSDimitry Andric     // If the argument location is wider than the argument type, check that any
27155f757f3fSDimitry Andric     // extension flags match.
27165f757f3fSDimitry Andric     if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
27175f757f3fSDimitry Andric         Flags.isSExt() != MFI.isObjectSExt(FI)) {
27185f757f3fSDimitry Andric       return false;
27195f757f3fSDimitry Andric     }
27205f757f3fSDimitry Andric   }
27215f757f3fSDimitry Andric 
27225f757f3fSDimitry Andric   return Bytes == MFI.getObjectSize(FI);
27235f757f3fSDimitry Andric }
27245f757f3fSDimitry Andric 
27255f757f3fSDimitry Andric /// Check whether the call is eligible for tail call optimization. Targets
27265f757f3fSDimitry Andric /// that want to do tail call optimization should implement this function.
2727*0fca6ea1SDimitry Andric /// Note that the x86 backend does not check musttail calls for eligibility! The
2728*0fca6ea1SDimitry Andric /// rest of x86 tail call lowering must be prepared to forward arguments of any
2729*0fca6ea1SDimitry Andric /// type.
27305f757f3fSDimitry Andric bool X86TargetLowering::IsEligibleForTailCallOptimization(
2731*0fca6ea1SDimitry Andric     TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
2732*0fca6ea1SDimitry Andric     SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2733*0fca6ea1SDimitry Andric   SelectionDAG &DAG = CLI.DAG;
2734*0fca6ea1SDimitry Andric   const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2735*0fca6ea1SDimitry Andric   const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2736*0fca6ea1SDimitry Andric   const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2737*0fca6ea1SDimitry Andric   SDValue Callee = CLI.Callee;
2738*0fca6ea1SDimitry Andric   CallingConv::ID CalleeCC = CLI.CallConv;
2739*0fca6ea1SDimitry Andric   bool isVarArg = CLI.IsVarArg;
2740*0fca6ea1SDimitry Andric 
27415f757f3fSDimitry Andric   if (!mayTailCallThisCC(CalleeCC))
27425f757f3fSDimitry Andric     return false;
27435f757f3fSDimitry Andric 
27445f757f3fSDimitry Andric   // If -tailcallopt is specified, make fastcc functions tail-callable.
27455f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
27465f757f3fSDimitry Andric   const Function &CallerF = MF.getFunction();
27475f757f3fSDimitry Andric 
27485f757f3fSDimitry Andric   // If the function return type is x86_fp80 and the callee return type is not,
27495f757f3fSDimitry Andric   // then the FP_EXTEND of the call result is not a nop. It's not safe to
27505f757f3fSDimitry Andric   // perform a tailcall optimization here.
2751*0fca6ea1SDimitry Andric   if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
27525f757f3fSDimitry Andric     return false;
27535f757f3fSDimitry Andric 
27545f757f3fSDimitry Andric   CallingConv::ID CallerCC = CallerF.getCallingConv();
27555f757f3fSDimitry Andric   bool CCMatch = CallerCC == CalleeCC;
27565f757f3fSDimitry Andric   bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
27575f757f3fSDimitry Andric   bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
27585f757f3fSDimitry Andric   bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
27595f757f3fSDimitry Andric       CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
27605f757f3fSDimitry Andric 
27615f757f3fSDimitry Andric   // Win64 functions have extra shadow space for argument homing. Don't do the
27625f757f3fSDimitry Andric   // sibcall if the caller and callee have mismatched expectations for this
27635f757f3fSDimitry Andric   // space.
27645f757f3fSDimitry Andric   if (IsCalleeWin64 != IsCallerWin64)
27655f757f3fSDimitry Andric     return false;
27665f757f3fSDimitry Andric 
27675f757f3fSDimitry Andric   if (IsGuaranteeTCO) {
27685f757f3fSDimitry Andric     if (canGuaranteeTCO(CalleeCC) && CCMatch)
27695f757f3fSDimitry Andric       return true;
27705f757f3fSDimitry Andric     return false;
27715f757f3fSDimitry Andric   }
27725f757f3fSDimitry Andric 
27735f757f3fSDimitry Andric   // Look for obvious safe cases to perform tail call optimization that do not
27745f757f3fSDimitry Andric   // require ABI changes. This is what gcc calls sibcall.
27755f757f3fSDimitry Andric 
27765f757f3fSDimitry Andric   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
27775f757f3fSDimitry Andric   // emit a special epilogue.
27785f757f3fSDimitry Andric   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
27795f757f3fSDimitry Andric   if (RegInfo->hasStackRealignment(MF))
27805f757f3fSDimitry Andric     return false;
27815f757f3fSDimitry Andric 
27825f757f3fSDimitry Andric   // Also avoid sibcall optimization if we're an sret return fn and the callee
27835f757f3fSDimitry Andric   // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
27845f757f3fSDimitry Andric   // insufficient.
27855f757f3fSDimitry Andric   if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
27865f757f3fSDimitry Andric     // For a compatible tail call the callee must return our sret pointer. So it
27875f757f3fSDimitry Andric     // needs to be (a) an sret function itself and (b) we pass our sret as its
27885f757f3fSDimitry Andric     // sret. Condition #b is harder to determine.
27895f757f3fSDimitry Andric     return false;
27905f757f3fSDimitry Andric   } else if (IsCalleePopSRet)
27915f757f3fSDimitry Andric     // The callee pops an sret, so we cannot tail-call, as our caller doesn't
27925f757f3fSDimitry Andric     // expect that.
27935f757f3fSDimitry Andric     return false;
27945f757f3fSDimitry Andric 
27955f757f3fSDimitry Andric   // Do not sibcall optimize vararg calls unless all arguments are passed via
27965f757f3fSDimitry Andric   // registers.
27975f757f3fSDimitry Andric   LLVMContext &C = *DAG.getContext();
27985f757f3fSDimitry Andric   if (isVarArg && !Outs.empty()) {
27995f757f3fSDimitry Andric     // Optimizing for varargs on Win64 is unlikely to be safe without
28005f757f3fSDimitry Andric     // additional testing.
28015f757f3fSDimitry Andric     if (IsCalleeWin64 || IsCallerWin64)
28025f757f3fSDimitry Andric       return false;
28035f757f3fSDimitry Andric 
28045f757f3fSDimitry Andric     for (const auto &VA : ArgLocs)
28055f757f3fSDimitry Andric       if (!VA.isRegLoc())
28065f757f3fSDimitry Andric         return false;
28075f757f3fSDimitry Andric   }
28085f757f3fSDimitry Andric 
28095f757f3fSDimitry Andric   // If the call result is in ST0 / ST1, it needs to be popped off the x87
28105f757f3fSDimitry Andric   // stack.  Therefore, if it's not used by the call it is not safe to optimize
28115f757f3fSDimitry Andric   // this into a sibcall.
28125f757f3fSDimitry Andric   bool Unused = false;
28135f757f3fSDimitry Andric   for (const auto &In : Ins) {
28145f757f3fSDimitry Andric     if (!In.Used) {
28155f757f3fSDimitry Andric       Unused = true;
28165f757f3fSDimitry Andric       break;
28175f757f3fSDimitry Andric     }
28185f757f3fSDimitry Andric   }
28195f757f3fSDimitry Andric   if (Unused) {
28205f757f3fSDimitry Andric     SmallVector<CCValAssign, 16> RVLocs;
2821*0fca6ea1SDimitry Andric     CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2822*0fca6ea1SDimitry Andric     RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
28235f757f3fSDimitry Andric     for (const auto &VA : RVLocs) {
28245f757f3fSDimitry Andric       if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
28255f757f3fSDimitry Andric         return false;
28265f757f3fSDimitry Andric     }
28275f757f3fSDimitry Andric   }
28285f757f3fSDimitry Andric 
28295f757f3fSDimitry Andric   // Check that the call results are passed in the same way.
28305f757f3fSDimitry Andric   if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
28315f757f3fSDimitry Andric                                   RetCC_X86, RetCC_X86))
28325f757f3fSDimitry Andric     return false;
28335f757f3fSDimitry Andric   // The callee has to preserve all registers the caller needs to preserve.
28345f757f3fSDimitry Andric   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
28355f757f3fSDimitry Andric   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
28365f757f3fSDimitry Andric   if (!CCMatch) {
28375f757f3fSDimitry Andric     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
28385f757f3fSDimitry Andric     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
28395f757f3fSDimitry Andric       return false;
28405f757f3fSDimitry Andric   }
28415f757f3fSDimitry Andric 
2842*0fca6ea1SDimitry Andric   unsigned StackArgsSize = CCInfo.getStackSize();
28435f757f3fSDimitry Andric 
28445f757f3fSDimitry Andric   // If the callee takes no arguments then go on to check the results of the
28455f757f3fSDimitry Andric   // call.
28465f757f3fSDimitry Andric   if (!Outs.empty()) {
2847*0fca6ea1SDimitry Andric     if (StackArgsSize > 0) {
28485f757f3fSDimitry Andric       // Check if the arguments are already laid out in the right way as
28495f757f3fSDimitry Andric       // the caller's fixed stack objects.
28505f757f3fSDimitry Andric       MachineFrameInfo &MFI = MF.getFrameInfo();
28515f757f3fSDimitry Andric       const MachineRegisterInfo *MRI = &MF.getRegInfo();
28525f757f3fSDimitry Andric       const X86InstrInfo *TII = Subtarget.getInstrInfo();
28535f757f3fSDimitry Andric       for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
28545f757f3fSDimitry Andric         const CCValAssign &VA = ArgLocs[I];
28555f757f3fSDimitry Andric         SDValue Arg = OutVals[I];
28565f757f3fSDimitry Andric         ISD::ArgFlagsTy Flags = Outs[I].Flags;
28575f757f3fSDimitry Andric         if (VA.getLocInfo() == CCValAssign::Indirect)
28585f757f3fSDimitry Andric           return false;
28595f757f3fSDimitry Andric         if (!VA.isRegLoc()) {
28605f757f3fSDimitry Andric           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
28615f757f3fSDimitry Andric                                    TII, VA))
28625f757f3fSDimitry Andric             return false;
28635f757f3fSDimitry Andric         }
28645f757f3fSDimitry Andric       }
28655f757f3fSDimitry Andric     }
28665f757f3fSDimitry Andric 
28675f757f3fSDimitry Andric     bool PositionIndependent = isPositionIndependent();
28685f757f3fSDimitry Andric     // If the tailcall address may be in a register, then make sure it's
28695f757f3fSDimitry Andric     // possible to register allocate for it. In 32-bit, the call address can
28705f757f3fSDimitry Andric     // only target EAX, EDX, or ECX since the tail call must be scheduled after
28715f757f3fSDimitry Andric     // callee-saved registers are restored. These happen to be the same
28725f757f3fSDimitry Andric     // registers used to pass 'inreg' arguments so watch out for those.
28735f757f3fSDimitry Andric     if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
28745f757f3fSDimitry Andric                                   !isa<ExternalSymbolSDNode>(Callee)) ||
28755f757f3fSDimitry Andric                                  PositionIndependent)) {
28765f757f3fSDimitry Andric       unsigned NumInRegs = 0;
28775f757f3fSDimitry Andric       // In PIC we need an extra register to formulate the address computation
28785f757f3fSDimitry Andric       // for the callee.
28795f757f3fSDimitry Andric       unsigned MaxInRegs = PositionIndependent ? 2 : 3;
28805f757f3fSDimitry Andric 
28815f757f3fSDimitry Andric       for (const auto &VA : ArgLocs) {
28825f757f3fSDimitry Andric         if (!VA.isRegLoc())
28835f757f3fSDimitry Andric           continue;
28845f757f3fSDimitry Andric         Register Reg = VA.getLocReg();
28855f757f3fSDimitry Andric         switch (Reg) {
28865f757f3fSDimitry Andric         default: break;
28875f757f3fSDimitry Andric         case X86::EAX: case X86::EDX: case X86::ECX:
28885f757f3fSDimitry Andric           if (++NumInRegs == MaxInRegs)
28895f757f3fSDimitry Andric             return false;
28905f757f3fSDimitry Andric           break;
28915f757f3fSDimitry Andric         }
28925f757f3fSDimitry Andric       }
28935f757f3fSDimitry Andric     }
28945f757f3fSDimitry Andric 
28955f757f3fSDimitry Andric     const MachineRegisterInfo &MRI = MF.getRegInfo();
28965f757f3fSDimitry Andric     if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
28975f757f3fSDimitry Andric       return false;
28985f757f3fSDimitry Andric   }
28995f757f3fSDimitry Andric 
29005f757f3fSDimitry Andric   bool CalleeWillPop =
29015f757f3fSDimitry Andric       X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
29025f757f3fSDimitry Andric                        MF.getTarget().Options.GuaranteedTailCallOpt);
29035f757f3fSDimitry Andric 
29045f757f3fSDimitry Andric   if (unsigned BytesToPop =
29055f757f3fSDimitry Andric           MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
29065f757f3fSDimitry Andric     // If we have bytes to pop, the callee must pop them.
29075f757f3fSDimitry Andric     bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
29085f757f3fSDimitry Andric     if (!CalleePopMatches)
29095f757f3fSDimitry Andric       return false;
29105f757f3fSDimitry Andric   } else if (CalleeWillPop && StackArgsSize > 0) {
29115f757f3fSDimitry Andric     // If we don't have bytes to pop, make sure the callee doesn't pop any.
29125f757f3fSDimitry Andric     return false;
29135f757f3fSDimitry Andric   }
29145f757f3fSDimitry Andric 
29155f757f3fSDimitry Andric   return true;
29165f757f3fSDimitry Andric }
29175f757f3fSDimitry Andric 
29185f757f3fSDimitry Andric /// Determines whether the callee is required to pop its own arguments.
29195f757f3fSDimitry Andric /// Callee pop is necessary to support tail calls.
29205f757f3fSDimitry Andric bool X86::isCalleePop(CallingConv::ID CallingConv,
29215f757f3fSDimitry Andric                       bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
29225f757f3fSDimitry Andric   // If GuaranteeTCO is true, we force some calls to be callee pop so that we
29235f757f3fSDimitry Andric   // can guarantee TCO.
29245f757f3fSDimitry Andric   if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
29255f757f3fSDimitry Andric     return true;
29265f757f3fSDimitry Andric 
29275f757f3fSDimitry Andric   switch (CallingConv) {
29285f757f3fSDimitry Andric   default:
29295f757f3fSDimitry Andric     return false;
29305f757f3fSDimitry Andric   case CallingConv::X86_StdCall:
29315f757f3fSDimitry Andric   case CallingConv::X86_FastCall:
29325f757f3fSDimitry Andric   case CallingConv::X86_ThisCall:
29335f757f3fSDimitry Andric   case CallingConv::X86_VectorCall:
29345f757f3fSDimitry Andric     return !is64Bit;
29355f757f3fSDimitry Andric   }
29365f757f3fSDimitry Andric }
2937