xref: /llvm-project/llvm/lib/Target/X86/X86ISelLoweringCall.cpp (revision 2068b1ba031e258a6448bea372005d19692c802a)
126a73082SReid Kleckner //===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
226a73082SReid Kleckner //
326a73082SReid Kleckner // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
426a73082SReid Kleckner // See https://llvm.org/LICENSE.txt for license information.
526a73082SReid Kleckner // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
626a73082SReid Kleckner //
726a73082SReid Kleckner //===----------------------------------------------------------------------===//
826a73082SReid Kleckner //
926a73082SReid Kleckner /// \file
1026a73082SReid Kleckner /// This file implements the lowering of LLVM calls to DAG nodes.
1126a73082SReid Kleckner //
1226a73082SReid Kleckner //===----------------------------------------------------------------------===//
1326a73082SReid Kleckner 
1426a73082SReid Kleckner #include "X86.h"
1526a73082SReid Kleckner #include "X86CallingConv.h"
1626a73082SReid Kleckner #include "X86FrameLowering.h"
1726a73082SReid Kleckner #include "X86ISelLowering.h"
1826a73082SReid Kleckner #include "X86InstrBuilder.h"
1926a73082SReid Kleckner #include "X86MachineFunctionInfo.h"
2026a73082SReid Kleckner #include "X86TargetMachine.h"
2126a73082SReid Kleckner #include "llvm/ADT/Statistic.h"
2226a73082SReid Kleckner #include "llvm/Analysis/ObjCARCUtil.h"
2326a73082SReid Kleckner #include "llvm/CodeGen/MachineJumpTableInfo.h"
2426a73082SReid Kleckner #include "llvm/CodeGen/MachineModuleInfo.h"
2526a73082SReid Kleckner #include "llvm/CodeGen/WinEHFuncInfo.h"
2626a73082SReid Kleckner #include "llvm/IR/DiagnosticInfo.h"
2726a73082SReid Kleckner #include "llvm/IR/IRBuilder.h"
2874deadf1SNikita Popov #include "llvm/IR/Module.h"
2926a73082SReid Kleckner 
3026a73082SReid Kleckner #define DEBUG_TYPE "x86-isel"
3126a73082SReid Kleckner 
3226a73082SReid Kleckner using namespace llvm;
3326a73082SReid Kleckner 
3426a73082SReid Kleckner STATISTIC(NumTailCalls, "Number of tail calls");
3526a73082SReid Kleckner 
3626a73082SReid Kleckner /// Call this when the user attempts to do something unsupported, like
3726a73082SReid Kleckner /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
3826a73082SReid Kleckner /// report_fatal_error, so calling code should attempt to recover without
3926a73082SReid Kleckner /// crashing.
4026a73082SReid Kleckner static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
4126a73082SReid Kleckner                              const char *Msg) {
4226a73082SReid Kleckner   MachineFunction &MF = DAG.getMachineFunction();
4326a73082SReid Kleckner   DAG.getContext()->diagnose(
4426a73082SReid Kleckner       DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
4526a73082SReid Kleckner }
4626a73082SReid Kleckner 
4726a73082SReid Kleckner /// Returns true if a CC can dynamically exclude a register from the list of
4826a73082SReid Kleckner /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
4926a73082SReid Kleckner /// the return registers.
5026a73082SReid Kleckner static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
5126a73082SReid Kleckner   switch (CC) {
5226a73082SReid Kleckner   default:
5326a73082SReid Kleckner     return false;
5426a73082SReid Kleckner   case CallingConv::X86_RegCall:
5526a73082SReid Kleckner   case CallingConv::PreserveMost:
5626a73082SReid Kleckner   case CallingConv::PreserveAll:
5726a73082SReid Kleckner     return true;
5826a73082SReid Kleckner   }
5926a73082SReid Kleckner }
6026a73082SReid Kleckner 
6126a73082SReid Kleckner /// Returns true if a CC can dynamically exclude a register from the list of
6226a73082SReid Kleckner /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
6326a73082SReid Kleckner /// the parameters.
6426a73082SReid Kleckner static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
6526a73082SReid Kleckner   return CC == CallingConv::X86_RegCall;
6626a73082SReid Kleckner }
6726a73082SReid Kleckner 
6826a73082SReid Kleckner static std::pair<MVT, unsigned>
6926a73082SReid Kleckner handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
7026a73082SReid Kleckner                                  const X86Subtarget &Subtarget) {
7126a73082SReid Kleckner   // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
7226a73082SReid Kleckner   // convention is one that uses k registers.
7326a73082SReid Kleckner   if (NumElts == 2)
7426a73082SReid Kleckner     return {MVT::v2i64, 1};
7526a73082SReid Kleckner   if (NumElts == 4)
7626a73082SReid Kleckner     return {MVT::v4i32, 1};
7726a73082SReid Kleckner   if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
7826a73082SReid Kleckner       CC != CallingConv::Intel_OCL_BI)
7926a73082SReid Kleckner     return {MVT::v8i16, 1};
8026a73082SReid Kleckner   if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
8126a73082SReid Kleckner       CC != CallingConv::Intel_OCL_BI)
8226a73082SReid Kleckner     return {MVT::v16i8, 1};
8326a73082SReid Kleckner   // v32i1 passes in ymm unless we have BWI and the calling convention is
8426a73082SReid Kleckner   // regcall.
8526a73082SReid Kleckner   if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
8626a73082SReid Kleckner     return {MVT::v32i8, 1};
8726a73082SReid Kleckner   // Split v64i1 vectors if we don't have v64i8 available.
8826a73082SReid Kleckner   if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
8926a73082SReid Kleckner     if (Subtarget.useAVX512Regs())
9026a73082SReid Kleckner       return {MVT::v64i8, 1};
9126a73082SReid Kleckner     return {MVT::v32i8, 2};
9226a73082SReid Kleckner   }
9326a73082SReid Kleckner 
9426a73082SReid Kleckner   // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
9526a73082SReid Kleckner   if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
9626a73082SReid Kleckner       NumElts > 64)
9726a73082SReid Kleckner     return {MVT::i8, NumElts};
9826a73082SReid Kleckner 
9926a73082SReid Kleckner   return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
10026a73082SReid Kleckner }
10126a73082SReid Kleckner 
10226a73082SReid Kleckner MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
10326a73082SReid Kleckner                                                      CallingConv::ID CC,
10426a73082SReid Kleckner                                                      EVT VT) const {
10526a73082SReid Kleckner   if (VT.isVector()) {
10626a73082SReid Kleckner     if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
10726a73082SReid Kleckner       unsigned NumElts = VT.getVectorNumElements();
10826a73082SReid Kleckner 
10926a73082SReid Kleckner       MVT RegisterVT;
11026a73082SReid Kleckner       unsigned NumRegisters;
11126a73082SReid Kleckner       std::tie(RegisterVT, NumRegisters) =
11226a73082SReid Kleckner           handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
11326a73082SReid Kleckner       if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
11426a73082SReid Kleckner         return RegisterVT;
11526a73082SReid Kleckner     }
11626a73082SReid Kleckner 
11726a73082SReid Kleckner     if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
11826a73082SReid Kleckner       return MVT::v8f16;
11926a73082SReid Kleckner   }
12026a73082SReid Kleckner 
12126a73082SReid Kleckner   // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
12226a73082SReid Kleckner   if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
12326a73082SReid Kleckner       !Subtarget.hasX87())
12426a73082SReid Kleckner     return MVT::i32;
12526a73082SReid Kleckner 
1269177e812SMatt Arsenault   if (isTypeLegal(MVT::f16)) {
12726a73082SReid Kleckner     if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
1289177e812SMatt Arsenault       return getRegisterTypeForCallingConv(
1299177e812SMatt Arsenault           Context, CC, VT.changeVectorElementType(MVT::f16));
13026a73082SReid Kleckner 
13159af659eSPhoebe Wang     if (VT == MVT::bf16)
13259af659eSPhoebe Wang       return MVT::f16;
1339177e812SMatt Arsenault   }
13459af659eSPhoebe Wang 
13526a73082SReid Kleckner   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
13626a73082SReid Kleckner }
13726a73082SReid Kleckner 
13826a73082SReid Kleckner unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
13926a73082SReid Kleckner                                                           CallingConv::ID CC,
14026a73082SReid Kleckner                                                           EVT VT) const {
14126a73082SReid Kleckner   if (VT.isVector()) {
14226a73082SReid Kleckner     if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
14326a73082SReid Kleckner       unsigned NumElts = VT.getVectorNumElements();
14426a73082SReid Kleckner 
14526a73082SReid Kleckner       MVT RegisterVT;
14626a73082SReid Kleckner       unsigned NumRegisters;
14726a73082SReid Kleckner       std::tie(RegisterVT, NumRegisters) =
14826a73082SReid Kleckner           handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
14926a73082SReid Kleckner       if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
15026a73082SReid Kleckner         return NumRegisters;
15126a73082SReid Kleckner     }
15226a73082SReid Kleckner 
15326a73082SReid Kleckner     if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
15426a73082SReid Kleckner       return 1;
15526a73082SReid Kleckner   }
15626a73082SReid Kleckner 
15726a73082SReid Kleckner   // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
15826a73082SReid Kleckner   // x87 is disabled.
15926a73082SReid Kleckner   if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
16026a73082SReid Kleckner     if (VT == MVT::f64)
16126a73082SReid Kleckner       return 2;
16226a73082SReid Kleckner     if (VT == MVT::f80)
16326a73082SReid Kleckner       return 3;
16426a73082SReid Kleckner   }
16526a73082SReid Kleckner 
1669177e812SMatt Arsenault   if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
1679177e812SMatt Arsenault       isTypeLegal(MVT::f16))
16826a73082SReid Kleckner     return getNumRegistersForCallingConv(Context, CC,
16926a73082SReid Kleckner                                          VT.changeVectorElementType(MVT::f16));
17026a73082SReid Kleckner 
17126a73082SReid Kleckner   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
17226a73082SReid Kleckner }
17326a73082SReid Kleckner 
17426a73082SReid Kleckner unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
17526a73082SReid Kleckner     LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
17626a73082SReid Kleckner     unsigned &NumIntermediates, MVT &RegisterVT) const {
17726a73082SReid Kleckner   // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
17826a73082SReid Kleckner   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
17926a73082SReid Kleckner       Subtarget.hasAVX512() &&
18026a73082SReid Kleckner       (!isPowerOf2_32(VT.getVectorNumElements()) ||
18126a73082SReid Kleckner        (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
18226a73082SReid Kleckner        VT.getVectorNumElements() > 64)) {
18326a73082SReid Kleckner     RegisterVT = MVT::i8;
18426a73082SReid Kleckner     IntermediateVT = MVT::i1;
18526a73082SReid Kleckner     NumIntermediates = VT.getVectorNumElements();
18626a73082SReid Kleckner     return NumIntermediates;
18726a73082SReid Kleckner   }
18826a73082SReid Kleckner 
18926a73082SReid Kleckner   // Split v64i1 vectors if we don't have v64i8 available.
19026a73082SReid Kleckner   if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
19126a73082SReid Kleckner       CC != CallingConv::X86_RegCall) {
19226a73082SReid Kleckner     RegisterVT = MVT::v32i8;
19326a73082SReid Kleckner     IntermediateVT = MVT::v32i1;
19426a73082SReid Kleckner     NumIntermediates = 2;
19526a73082SReid Kleckner     return 2;
19626a73082SReid Kleckner   }
19726a73082SReid Kleckner 
19826a73082SReid Kleckner   // Split vNbf16 vectors according to vNf16.
1999177e812SMatt Arsenault   if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
2009177e812SMatt Arsenault       isTypeLegal(MVT::f16))
20126a73082SReid Kleckner     VT = VT.changeVectorElementType(MVT::f16);
20226a73082SReid Kleckner 
20326a73082SReid Kleckner   return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
20426a73082SReid Kleckner                                               NumIntermediates, RegisterVT);
20526a73082SReid Kleckner }
20626a73082SReid Kleckner 
20726a73082SReid Kleckner EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
20826a73082SReid Kleckner                                           LLVMContext& Context,
20926a73082SReid Kleckner                                           EVT VT) const {
21026a73082SReid Kleckner   if (!VT.isVector())
21126a73082SReid Kleckner     return MVT::i8;
21226a73082SReid Kleckner 
21326a73082SReid Kleckner   if (Subtarget.hasAVX512()) {
21426a73082SReid Kleckner     // Figure out what this type will be legalized to.
21526a73082SReid Kleckner     EVT LegalVT = VT;
21626a73082SReid Kleckner     while (getTypeAction(Context, LegalVT) != TypeLegal)
21726a73082SReid Kleckner       LegalVT = getTypeToTransformTo(Context, LegalVT);
21826a73082SReid Kleckner 
21926a73082SReid Kleckner     // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
22026a73082SReid Kleckner     if (LegalVT.getSimpleVT().is512BitVector())
22126a73082SReid Kleckner       return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
22226a73082SReid Kleckner 
22326a73082SReid Kleckner     if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
22426a73082SReid Kleckner       // If we legalized to less than a 512-bit vector, then we will use a vXi1
22526a73082SReid Kleckner       // compare for vXi32/vXi64 for sure. If we have BWI we will also support
22626a73082SReid Kleckner       // vXi16/vXi8.
22726a73082SReid Kleckner       MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
22826a73082SReid Kleckner       if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
22926a73082SReid Kleckner         return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
23026a73082SReid Kleckner     }
23126a73082SReid Kleckner   }
23226a73082SReid Kleckner 
23326a73082SReid Kleckner   return VT.changeVectorElementTypeToInteger();
23426a73082SReid Kleckner }
23526a73082SReid Kleckner 
236*2068b1baSNikita Popov bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
237*2068b1baSNikita Popov     Type *Ty, CallingConv::ID CallConv, bool isVarArg,
238*2068b1baSNikita Popov     const DataLayout &DL) const {
239*2068b1baSNikita Popov   // i128 split into i64 needs to be allocated to two consecutive registers,
240*2068b1baSNikita Popov   // or spilled to the stack as a whole.
241*2068b1baSNikita Popov   return Ty->isIntegerTy(128);
242*2068b1baSNikita Popov }
243*2068b1baSNikita Popov 
24426a73082SReid Kleckner /// Helper for getByValTypeAlignment to determine
24526a73082SReid Kleckner /// the desired ByVal argument alignment.
24626a73082SReid Kleckner static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
24726a73082SReid Kleckner   if (MaxAlign == 16)
24826a73082SReid Kleckner     return;
24926a73082SReid Kleckner   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
25026a73082SReid Kleckner     if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
25126a73082SReid Kleckner       MaxAlign = Align(16);
25226a73082SReid Kleckner   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
25326a73082SReid Kleckner     Align EltAlign;
25426a73082SReid Kleckner     getMaxByValAlign(ATy->getElementType(), EltAlign);
25526a73082SReid Kleckner     if (EltAlign > MaxAlign)
25626a73082SReid Kleckner       MaxAlign = EltAlign;
25726a73082SReid Kleckner   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
25826a73082SReid Kleckner     for (auto *EltTy : STy->elements()) {
25926a73082SReid Kleckner       Align EltAlign;
26026a73082SReid Kleckner       getMaxByValAlign(EltTy, EltAlign);
26126a73082SReid Kleckner       if (EltAlign > MaxAlign)
26226a73082SReid Kleckner         MaxAlign = EltAlign;
26326a73082SReid Kleckner       if (MaxAlign == 16)
26426a73082SReid Kleckner         break;
26526a73082SReid Kleckner     }
26626a73082SReid Kleckner   }
26726a73082SReid Kleckner }
26826a73082SReid Kleckner 
26926a73082SReid Kleckner /// Return the desired alignment for ByVal aggregate
27026a73082SReid Kleckner /// function arguments in the caller parameter area. For X86, aggregates
27126a73082SReid Kleckner /// that contain SSE vectors are placed at 16-byte boundaries while the rest
27226a73082SReid Kleckner /// are at 4-byte boundaries.
273e55c1677SSergei Barannikov Align X86TargetLowering::getByValTypeAlignment(Type *Ty,
27426a73082SReid Kleckner                                                const DataLayout &DL) const {
275e55c1677SSergei Barannikov   if (Subtarget.is64Bit())
276e55c1677SSergei Barannikov     return std::max(DL.getABITypeAlign(Ty), Align::Constant<8>());
27726a73082SReid Kleckner 
27826a73082SReid Kleckner   Align Alignment(4);
27926a73082SReid Kleckner   if (Subtarget.hasSSE1())
28026a73082SReid Kleckner     getMaxByValAlign(Ty, Alignment);
281e55c1677SSergei Barannikov   return Alignment;
28226a73082SReid Kleckner }
28326a73082SReid Kleckner 
28426a73082SReid Kleckner /// It returns EVT::Other if the type should be determined using generic
28526a73082SReid Kleckner /// target-independent logic.
28626a73082SReid Kleckner /// For vector ops we check that the overall size isn't larger than our
28726a73082SReid Kleckner /// preferred vector width.
28826a73082SReid Kleckner EVT X86TargetLowering::getOptimalMemOpType(
28926a73082SReid Kleckner     const MemOp &Op, const AttributeList &FuncAttributes) const {
29026a73082SReid Kleckner   if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
29126a73082SReid Kleckner     if (Op.size() >= 16 &&
29226a73082SReid Kleckner         (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
29326a73082SReid Kleckner       // FIXME: Check if unaligned 64-byte accesses are slow.
29458d4fe28SPhoebe Wang       if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
29526a73082SReid Kleckner           (Subtarget.getPreferVectorWidth() >= 512)) {
29626a73082SReid Kleckner         return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
29726a73082SReid Kleckner       }
29826a73082SReid Kleckner       // FIXME: Check if unaligned 32-byte accesses are slow.
29926a73082SReid Kleckner       if (Op.size() >= 32 && Subtarget.hasAVX() &&
30026a73082SReid Kleckner           Subtarget.useLight256BitInstructions()) {
30126a73082SReid Kleckner         // Although this isn't a well-supported type for AVX1, we'll let
30226a73082SReid Kleckner         // legalization and shuffle lowering produce the optimal codegen. If we
30326a73082SReid Kleckner         // choose an optimal type with a vector element larger than a byte,
30426a73082SReid Kleckner         // getMemsetStores() may create an intermediate splat (using an integer
30526a73082SReid Kleckner         // multiply) before we splat as a vector.
30626a73082SReid Kleckner         return MVT::v32i8;
30726a73082SReid Kleckner       }
30826a73082SReid Kleckner       if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
30926a73082SReid Kleckner         return MVT::v16i8;
31026a73082SReid Kleckner       // TODO: Can SSE1 handle a byte vector?
31126a73082SReid Kleckner       // If we have SSE1 registers we should be able to use them.
31226a73082SReid Kleckner       if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
31326a73082SReid Kleckner           (Subtarget.getPreferVectorWidth() >= 128))
31426a73082SReid Kleckner         return MVT::v4f32;
31526a73082SReid Kleckner     } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
31626a73082SReid Kleckner                Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
31726a73082SReid Kleckner       // Do not use f64 to lower memcpy if source is string constant. It's
31826a73082SReid Kleckner       // better to use i32 to avoid the loads.
31926a73082SReid Kleckner       // Also, do not use f64 to lower memset unless this is a memset of zeros.
32026a73082SReid Kleckner       // The gymnastics of splatting a byte value into an XMM register and then
32126a73082SReid Kleckner       // only using 8-byte stores (because this is a CPU with slow unaligned
32226a73082SReid Kleckner       // 16-byte accesses) makes that a loser.
32326a73082SReid Kleckner       return MVT::f64;
32426a73082SReid Kleckner     }
32526a73082SReid Kleckner   }
32626a73082SReid Kleckner   // This is a compromise. If we reach here, unaligned accesses may be slow on
32726a73082SReid Kleckner   // this target. However, creating smaller, aligned accesses could be even
32826a73082SReid Kleckner   // slower and would certainly be a lot more code.
32926a73082SReid Kleckner   if (Subtarget.is64Bit() && Op.size() >= 8)
33026a73082SReid Kleckner     return MVT::i64;
33126a73082SReid Kleckner   return MVT::i32;
33226a73082SReid Kleckner }
33326a73082SReid Kleckner 
33426a73082SReid Kleckner bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
33526a73082SReid Kleckner   if (VT == MVT::f32)
33626a73082SReid Kleckner     return Subtarget.hasSSE1();
33726a73082SReid Kleckner   if (VT == MVT::f64)
33826a73082SReid Kleckner     return Subtarget.hasSSE2();
33926a73082SReid Kleckner   return true;
34026a73082SReid Kleckner }
34126a73082SReid Kleckner 
34226a73082SReid Kleckner static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
34326a73082SReid Kleckner   return (8 * Alignment.value()) % SizeInBits == 0;
34426a73082SReid Kleckner }
34526a73082SReid Kleckner 
34626a73082SReid Kleckner bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
34726a73082SReid Kleckner   if (isBitAligned(Alignment, VT.getSizeInBits()))
34826a73082SReid Kleckner     return true;
34926a73082SReid Kleckner   switch (VT.getSizeInBits()) {
35026a73082SReid Kleckner   default:
35126a73082SReid Kleckner     // 8-byte and under are always assumed to be fast.
35226a73082SReid Kleckner     return true;
35326a73082SReid Kleckner   case 128:
35426a73082SReid Kleckner     return !Subtarget.isUnalignedMem16Slow();
35526a73082SReid Kleckner   case 256:
35626a73082SReid Kleckner     return !Subtarget.isUnalignedMem32Slow();
35726a73082SReid Kleckner     // TODO: What about AVX-512 (512-bit) accesses?
35826a73082SReid Kleckner   }
35926a73082SReid Kleckner }
36026a73082SReid Kleckner 
36126a73082SReid Kleckner bool X86TargetLowering::allowsMisalignedMemoryAccesses(
36226a73082SReid Kleckner     EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
36326a73082SReid Kleckner     unsigned *Fast) const {
36426a73082SReid Kleckner   if (Fast)
36526a73082SReid Kleckner     *Fast = isMemoryAccessFast(VT, Alignment);
36626a73082SReid Kleckner   // NonTemporal vector memory ops must be aligned.
36726a73082SReid Kleckner   if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
36826a73082SReid Kleckner     // NT loads can only be vector aligned, so if its less aligned than the
36926a73082SReid Kleckner     // minimum vector size (which we can split the vector down to), we might as
37026a73082SReid Kleckner     // well use a regular unaligned vector load.
37126a73082SReid Kleckner     // We don't have any NT loads pre-SSE41.
37226a73082SReid Kleckner     if (!!(Flags & MachineMemOperand::MOLoad))
37326a73082SReid Kleckner       return (Alignment < 16 || !Subtarget.hasSSE41());
37426a73082SReid Kleckner     return false;
37526a73082SReid Kleckner   }
37626a73082SReid Kleckner   // Misaligned accesses of any size are always allowed.
37726a73082SReid Kleckner   return true;
37826a73082SReid Kleckner }
37926a73082SReid Kleckner 
38026a73082SReid Kleckner bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
38126a73082SReid Kleckner                                            const DataLayout &DL, EVT VT,
38226a73082SReid Kleckner                                            unsigned AddrSpace, Align Alignment,
38326a73082SReid Kleckner                                            MachineMemOperand::Flags Flags,
38426a73082SReid Kleckner                                            unsigned *Fast) const {
38526a73082SReid Kleckner   if (Fast)
38626a73082SReid Kleckner     *Fast = isMemoryAccessFast(VT, Alignment);
38726a73082SReid Kleckner   if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
38826a73082SReid Kleckner     if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
38926a73082SReid Kleckner                                        /*Fast=*/nullptr))
39026a73082SReid Kleckner       return true;
39126a73082SReid Kleckner     // NonTemporal vector memory ops are special, and must be aligned.
39226a73082SReid Kleckner     if (!isBitAligned(Alignment, VT.getSizeInBits()))
39326a73082SReid Kleckner       return false;
39426a73082SReid Kleckner     switch (VT.getSizeInBits()) {
39526a73082SReid Kleckner     case 128:
39626a73082SReid Kleckner       if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
39726a73082SReid Kleckner         return true;
39826a73082SReid Kleckner       if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
39926a73082SReid Kleckner         return true;
40026a73082SReid Kleckner       return false;
40126a73082SReid Kleckner     case 256:
40226a73082SReid Kleckner       if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
40326a73082SReid Kleckner         return true;
40426a73082SReid Kleckner       if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
40526a73082SReid Kleckner         return true;
40626a73082SReid Kleckner       return false;
40726a73082SReid Kleckner     case 512:
40858d4fe28SPhoebe Wang       if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
40926a73082SReid Kleckner         return true;
41026a73082SReid Kleckner       return false;
41126a73082SReid Kleckner     default:
41226a73082SReid Kleckner       return false; // Don't have NonTemporal vector memory ops of this size.
41326a73082SReid Kleckner     }
41426a73082SReid Kleckner   }
41526a73082SReid Kleckner   return true;
41626a73082SReid Kleckner }
41726a73082SReid Kleckner 
41826a73082SReid Kleckner /// Return the entry encoding for a jump table in the
41926a73082SReid Kleckner /// current function.  The returned value is a member of the
42026a73082SReid Kleckner /// MachineJumpTableInfo::JTEntryKind enum.
42126a73082SReid Kleckner unsigned X86TargetLowering::getJumpTableEncoding() const {
42226a73082SReid Kleckner   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
42326a73082SReid Kleckner   // symbol.
42426a73082SReid Kleckner   if (isPositionIndependent() && Subtarget.isPICStyleGOT())
42526a73082SReid Kleckner     return MachineJumpTableInfo::EK_Custom32;
4262a2f02e1SArthur Eubanks   if (isPositionIndependent() &&
427300c41c2SArthur Eubanks       getTargetMachine().getCodeModel() == CodeModel::Large &&
428300c41c2SArthur Eubanks       !Subtarget.isTargetCOFF())
4292a2f02e1SArthur Eubanks     return MachineJumpTableInfo::EK_LabelDifference64;
43026a73082SReid Kleckner 
43126a73082SReid Kleckner   // Otherwise, use the normal jump table encoding heuristics.
43226a73082SReid Kleckner   return TargetLowering::getJumpTableEncoding();
43326a73082SReid Kleckner }
43426a73082SReid Kleckner 
43526a73082SReid Kleckner bool X86TargetLowering::useSoftFloat() const {
43626a73082SReid Kleckner   return Subtarget.useSoftFloat();
43726a73082SReid Kleckner }
43826a73082SReid Kleckner 
43926a73082SReid Kleckner void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
44026a73082SReid Kleckner                                               ArgListTy &Args) const {
44126a73082SReid Kleckner 
44226a73082SReid Kleckner   // Only relabel X86-32 for C / Stdcall CCs.
44326a73082SReid Kleckner   if (Subtarget.is64Bit())
44426a73082SReid Kleckner     return;
44526a73082SReid Kleckner   if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
44626a73082SReid Kleckner     return;
44726a73082SReid Kleckner   unsigned ParamRegs = 0;
44826a73082SReid Kleckner   if (auto *M = MF->getFunction().getParent())
44926a73082SReid Kleckner     ParamRegs = M->getNumberRegisterParameters();
45026a73082SReid Kleckner 
45126a73082SReid Kleckner   // Mark the first N int arguments as having reg
45226a73082SReid Kleckner   for (auto &Arg : Args) {
45326a73082SReid Kleckner     Type *T = Arg.Ty;
45426a73082SReid Kleckner     if (T->isIntOrPtrTy())
45526a73082SReid Kleckner       if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
45626a73082SReid Kleckner         unsigned numRegs = 1;
45726a73082SReid Kleckner         if (MF->getDataLayout().getTypeAllocSize(T) > 4)
45826a73082SReid Kleckner           numRegs = 2;
45926a73082SReid Kleckner         if (ParamRegs < numRegs)
46026a73082SReid Kleckner           return;
46126a73082SReid Kleckner         ParamRegs -= numRegs;
46226a73082SReid Kleckner         Arg.IsInReg = true;
46326a73082SReid Kleckner       }
46426a73082SReid Kleckner   }
46526a73082SReid Kleckner }
46626a73082SReid Kleckner 
46726a73082SReid Kleckner const MCExpr *
46826a73082SReid Kleckner X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
46926a73082SReid Kleckner                                              const MachineBasicBlock *MBB,
47026a73082SReid Kleckner                                              unsigned uid,MCContext &Ctx) const{
47126a73082SReid Kleckner   assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
47226a73082SReid Kleckner   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
47326a73082SReid Kleckner   // entries.
47426a73082SReid Kleckner   return MCSymbolRefExpr::create(MBB->getSymbol(),
47526a73082SReid Kleckner                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
47626a73082SReid Kleckner }
47726a73082SReid Kleckner 
47826a73082SReid Kleckner /// Returns relocation base for the given PIC jumptable.
47926a73082SReid Kleckner SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
48026a73082SReid Kleckner                                                     SelectionDAG &DAG) const {
48126a73082SReid Kleckner   if (!Subtarget.is64Bit())
48226a73082SReid Kleckner     // This doesn't have SDLoc associated with it, but is not really the
48326a73082SReid Kleckner     // same as a Register.
48426a73082SReid Kleckner     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
48526a73082SReid Kleckner                        getPointerTy(DAG.getDataLayout()));
48626a73082SReid Kleckner   return Table;
48726a73082SReid Kleckner }
48826a73082SReid Kleckner 
48926a73082SReid Kleckner /// This returns the relocation base for the given PIC jumptable,
49026a73082SReid Kleckner /// the same as getPICJumpTableRelocBase, but as an MCExpr.
49126a73082SReid Kleckner const MCExpr *X86TargetLowering::
49226a73082SReid Kleckner getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
49326a73082SReid Kleckner                              MCContext &Ctx) const {
49426a73082SReid Kleckner   // X86-64 uses RIP relative addressing based on the jump table label.
4952a2f02e1SArthur Eubanks   if (Subtarget.isPICStyleRIPRel() ||
4962a2f02e1SArthur Eubanks       (Subtarget.is64Bit() &&
4972a2f02e1SArthur Eubanks        getTargetMachine().getCodeModel() == CodeModel::Large))
49826a73082SReid Kleckner     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
49926a73082SReid Kleckner 
50026a73082SReid Kleckner   // Otherwise, the reference is relative to the PIC base.
50126a73082SReid Kleckner   return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
50226a73082SReid Kleckner }
50326a73082SReid Kleckner 
50426a73082SReid Kleckner std::pair<const TargetRegisterClass *, uint8_t>
50526a73082SReid Kleckner X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
50626a73082SReid Kleckner                                            MVT VT) const {
50726a73082SReid Kleckner   const TargetRegisterClass *RRC = nullptr;
50826a73082SReid Kleckner   uint8_t Cost = 1;
50926a73082SReid Kleckner   switch (VT.SimpleTy) {
51026a73082SReid Kleckner   default:
51126a73082SReid Kleckner     return TargetLowering::findRepresentativeClass(TRI, VT);
51226a73082SReid Kleckner   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
51326a73082SReid Kleckner     RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
51426a73082SReid Kleckner     break;
51526a73082SReid Kleckner   case MVT::x86mmx:
51626a73082SReid Kleckner     RRC = &X86::VR64RegClass;
51726a73082SReid Kleckner     break;
51826a73082SReid Kleckner   case MVT::f32: case MVT::f64:
51926a73082SReid Kleckner   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
52026a73082SReid Kleckner   case MVT::v4f32: case MVT::v2f64:
52126a73082SReid Kleckner   case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
52226a73082SReid Kleckner   case MVT::v8f32: case MVT::v4f64:
52326a73082SReid Kleckner   case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
52426a73082SReid Kleckner   case MVT::v16f32: case MVT::v8f64:
52526a73082SReid Kleckner     RRC = &X86::VR128XRegClass;
52626a73082SReid Kleckner     break;
52726a73082SReid Kleckner   }
52826a73082SReid Kleckner   return std::make_pair(RRC, Cost);
52926a73082SReid Kleckner }
53026a73082SReid Kleckner 
53126a73082SReid Kleckner unsigned X86TargetLowering::getAddressSpace() const {
53226a73082SReid Kleckner   if (Subtarget.is64Bit())
5336611efdfSCraig Topper     return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? X86AS::GS
5346611efdfSCraig Topper                                                                     : X86AS::FS;
5356611efdfSCraig Topper   return X86AS::GS;
53626a73082SReid Kleckner }
53726a73082SReid Kleckner 
53826a73082SReid Kleckner static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
53926a73082SReid Kleckner   return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
54026a73082SReid Kleckner          (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
54126a73082SReid Kleckner }
54226a73082SReid Kleckner 
54326a73082SReid Kleckner static Constant* SegmentOffset(IRBuilderBase &IRB,
54426a73082SReid Kleckner                                int Offset, unsigned AddressSpace) {
54526a73082SReid Kleckner   return ConstantExpr::getIntToPtr(
54626a73082SReid Kleckner       ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
5474ce7c4a9SBjorn Pettersson       IRB.getPtrTy(AddressSpace));
54826a73082SReid Kleckner }
54926a73082SReid Kleckner 
55026a73082SReid Kleckner Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
55126a73082SReid Kleckner   // glibc, bionic, and Fuchsia have a special slot for the stack guard in
55226a73082SReid Kleckner   // tcbhead_t; use it instead of the usual global variable (see
55326a73082SReid Kleckner   // sysdeps/{i386,x86_64}/nptl/tls.h)
55426a73082SReid Kleckner   if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
55526a73082SReid Kleckner     unsigned AddressSpace = getAddressSpace();
55626a73082SReid Kleckner 
55726a73082SReid Kleckner     // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
55826a73082SReid Kleckner     if (Subtarget.isTargetFuchsia())
55926a73082SReid Kleckner       return SegmentOffset(IRB, 0x10, AddressSpace);
56026a73082SReid Kleckner 
56126a73082SReid Kleckner     Module *M = IRB.GetInsertBlock()->getParent()->getParent();
56226a73082SReid Kleckner     // Specially, some users may customize the base reg and offset.
56326a73082SReid Kleckner     int Offset = M->getStackProtectorGuardOffset();
56426a73082SReid Kleckner     // If we don't set -stack-protector-guard-offset value:
56526a73082SReid Kleckner     // %fs:0x28, unless we're using a Kernel code model, in which case
56626a73082SReid Kleckner     // it's %gs:0x28.  gs:0x14 on i386.
56726a73082SReid Kleckner     if (Offset == INT_MAX)
56826a73082SReid Kleckner       Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
56926a73082SReid Kleckner 
57026a73082SReid Kleckner     StringRef GuardReg = M->getStackProtectorGuardReg();
57126a73082SReid Kleckner     if (GuardReg == "fs")
57226a73082SReid Kleckner       AddressSpace = X86AS::FS;
57326a73082SReid Kleckner     else if (GuardReg == "gs")
57426a73082SReid Kleckner       AddressSpace = X86AS::GS;
57526a73082SReid Kleckner 
57626a73082SReid Kleckner     // Use symbol guard if user specify.
57726a73082SReid Kleckner     StringRef GuardSymb = M->getStackProtectorGuardSymbol();
57826a73082SReid Kleckner     if (!GuardSymb.empty()) {
57926a73082SReid Kleckner       GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
58026a73082SReid Kleckner       if (!GV) {
58126a73082SReid Kleckner         Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
58226a73082SReid Kleckner                                        : Type::getInt32Ty(M->getContext());
58326a73082SReid Kleckner         GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
58426a73082SReid Kleckner                                 nullptr, GuardSymb, nullptr,
58526a73082SReid Kleckner                                 GlobalValue::NotThreadLocal, AddressSpace);
58626a73082SReid Kleckner         if (!Subtarget.isTargetDarwin())
58726a73082SReid Kleckner           GV->setDSOLocal(M->getDirectAccessExternalData());
58826a73082SReid Kleckner       }
58926a73082SReid Kleckner       return GV;
59026a73082SReid Kleckner     }
59126a73082SReid Kleckner 
59226a73082SReid Kleckner     return SegmentOffset(IRB, Offset, AddressSpace);
59326a73082SReid Kleckner   }
59426a73082SReid Kleckner   return TargetLowering::getIRStackGuard(IRB);
59526a73082SReid Kleckner }
59626a73082SReid Kleckner 
59726a73082SReid Kleckner void X86TargetLowering::insertSSPDeclarations(Module &M) const {
59826a73082SReid Kleckner   // MSVC CRT provides functionalities for stack protection.
59926a73082SReid Kleckner   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
60026a73082SReid Kleckner       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
60126a73082SReid Kleckner     // MSVC CRT has a global variable holding security cookie.
60226a73082SReid Kleckner     M.getOrInsertGlobal("__security_cookie",
6037b9d73c2SPaulo Matos                         PointerType::getUnqual(M.getContext()));
60426a73082SReid Kleckner 
60526a73082SReid Kleckner     // MSVC CRT has a function to validate security cookie.
60626a73082SReid Kleckner     FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
60726a73082SReid Kleckner         "__security_check_cookie", Type::getVoidTy(M.getContext()),
6087b9d73c2SPaulo Matos         PointerType::getUnqual(M.getContext()));
60926a73082SReid Kleckner     if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
61026a73082SReid Kleckner       F->setCallingConv(CallingConv::X86_FastCall);
61126a73082SReid Kleckner       F->addParamAttr(0, Attribute::AttrKind::InReg);
61226a73082SReid Kleckner     }
61326a73082SReid Kleckner     return;
61426a73082SReid Kleckner   }
61526a73082SReid Kleckner 
61626a73082SReid Kleckner   StringRef GuardMode = M.getStackProtectorGuard();
61726a73082SReid Kleckner 
61826a73082SReid Kleckner   // glibc, bionic, and Fuchsia have a special slot for the stack guard.
61926a73082SReid Kleckner   if ((GuardMode == "tls" || GuardMode.empty()) &&
62026a73082SReid Kleckner       hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
62126a73082SReid Kleckner     return;
62226a73082SReid Kleckner   TargetLowering::insertSSPDeclarations(M);
62326a73082SReid Kleckner }
62426a73082SReid Kleckner 
62526a73082SReid Kleckner Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
62626a73082SReid Kleckner   // MSVC CRT has a global variable holding security cookie.
62726a73082SReid Kleckner   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
62826a73082SReid Kleckner       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
62926a73082SReid Kleckner     return M.getGlobalVariable("__security_cookie");
63026a73082SReid Kleckner   }
63126a73082SReid Kleckner   return TargetLowering::getSDagStackGuard(M);
63226a73082SReid Kleckner }
63326a73082SReid Kleckner 
63426a73082SReid Kleckner Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
63526a73082SReid Kleckner   // MSVC CRT has a function to validate security cookie.
63626a73082SReid Kleckner   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
63726a73082SReid Kleckner       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
63826a73082SReid Kleckner     return M.getFunction("__security_check_cookie");
63926a73082SReid Kleckner   }
64026a73082SReid Kleckner   return TargetLowering::getSSPStackGuardCheck(M);
64126a73082SReid Kleckner }
64226a73082SReid Kleckner 
64326a73082SReid Kleckner Value *
64426a73082SReid Kleckner X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
64526a73082SReid Kleckner   // Android provides a fixed TLS slot for the SafeStack pointer. See the
64626a73082SReid Kleckner   // definition of TLS_SLOT_SAFESTACK in
64726a73082SReid Kleckner   // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
64826a73082SReid Kleckner   if (Subtarget.isTargetAndroid()) {
64926a73082SReid Kleckner     // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
65026a73082SReid Kleckner     // %gs:0x24 on i386
65126a73082SReid Kleckner     int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
65226a73082SReid Kleckner     return SegmentOffset(IRB, Offset, getAddressSpace());
65326a73082SReid Kleckner   }
65426a73082SReid Kleckner 
65526a73082SReid Kleckner   // Fuchsia is similar.
65626a73082SReid Kleckner   if (Subtarget.isTargetFuchsia()) {
65726a73082SReid Kleckner     // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
65826a73082SReid Kleckner     return SegmentOffset(IRB, 0x18, getAddressSpace());
65926a73082SReid Kleckner   }
66026a73082SReid Kleckner 
66126a73082SReid Kleckner   return TargetLowering::getSafeStackPointerLocation(IRB);
66226a73082SReid Kleckner }
66326a73082SReid Kleckner 
66426a73082SReid Kleckner //===----------------------------------------------------------------------===//
66526a73082SReid Kleckner //               Return Value Calling Convention Implementation
66626a73082SReid Kleckner //===----------------------------------------------------------------------===//
66726a73082SReid Kleckner 
66826a73082SReid Kleckner bool X86TargetLowering::CanLowerReturn(
66926a73082SReid Kleckner     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
670754ed95bSyingopq     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
671754ed95bSyingopq     const Type *RetTy) const {
67226a73082SReid Kleckner   SmallVector<CCValAssign, 16> RVLocs;
67326a73082SReid Kleckner   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
67426a73082SReid Kleckner   return CCInfo.CheckReturn(Outs, RetCC_X86);
67526a73082SReid Kleckner }
67626a73082SReid Kleckner 
67726a73082SReid Kleckner const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
67826a73082SReid Kleckner   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
67926a73082SReid Kleckner   return ScratchRegs;
68026a73082SReid Kleckner }
68126a73082SReid Kleckner 
68226a73082SReid Kleckner ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
6833e40c96dSAtariDreams   static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
68426a73082SReid Kleckner   return RCRegs;
68526a73082SReid Kleckner }
68626a73082SReid Kleckner 
68726a73082SReid Kleckner /// Lowers masks values (v*i1) to the local register values
68826a73082SReid Kleckner /// \returns DAG node after lowering to register type
68926a73082SReid Kleckner static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
69026a73082SReid Kleckner                                const SDLoc &DL, SelectionDAG &DAG) {
69126a73082SReid Kleckner   EVT ValVT = ValArg.getValueType();
69226a73082SReid Kleckner 
69326a73082SReid Kleckner   if (ValVT == MVT::v1i1)
69426a73082SReid Kleckner     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
69526a73082SReid Kleckner                        DAG.getIntPtrConstant(0, DL));
69626a73082SReid Kleckner 
69726a73082SReid Kleckner   if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
69826a73082SReid Kleckner       (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
69926a73082SReid Kleckner     // Two stage lowering might be required
70026a73082SReid Kleckner     // bitcast:   v8i1 -> i8 / v16i1 -> i16
70126a73082SReid Kleckner     // anyextend: i8   -> i32 / i16   -> i32
70226a73082SReid Kleckner     EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
70326a73082SReid Kleckner     SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
70426a73082SReid Kleckner     if (ValLoc == MVT::i32)
70526a73082SReid Kleckner       ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
70626a73082SReid Kleckner     return ValToCopy;
70726a73082SReid Kleckner   }
70826a73082SReid Kleckner 
70926a73082SReid Kleckner   if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
71026a73082SReid Kleckner       (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
71126a73082SReid Kleckner     // One stage lowering is required
71226a73082SReid Kleckner     // bitcast:   v32i1 -> i32 / v64i1 -> i64
71326a73082SReid Kleckner     return DAG.getBitcast(ValLoc, ValArg);
71426a73082SReid Kleckner   }
71526a73082SReid Kleckner 
71626a73082SReid Kleckner   return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
71726a73082SReid Kleckner }
71826a73082SReid Kleckner 
71926a73082SReid Kleckner /// Breaks v64i1 value into two registers and adds the new node to the DAG
72026a73082SReid Kleckner static void Passv64i1ArgInRegs(
72126a73082SReid Kleckner     const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
72226a73082SReid Kleckner     SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
72326a73082SReid Kleckner     CCValAssign &NextVA, const X86Subtarget &Subtarget) {
72426a73082SReid Kleckner   assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
72526a73082SReid Kleckner   assert(Subtarget.is32Bit() && "Expecting 32 bit target");
72626a73082SReid Kleckner   assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
72726a73082SReid Kleckner   assert(VA.isRegLoc() && NextVA.isRegLoc() &&
72826a73082SReid Kleckner          "The value should reside in two registers");
72926a73082SReid Kleckner 
73026a73082SReid Kleckner   // Before splitting the value we cast it to i64
73126a73082SReid Kleckner   Arg = DAG.getBitcast(MVT::i64, Arg);
73226a73082SReid Kleckner 
73326a73082SReid Kleckner   // Splitting the value into two i32 types
73426a73082SReid Kleckner   SDValue Lo, Hi;
73526a73082SReid Kleckner   std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
73626a73082SReid Kleckner 
73726a73082SReid Kleckner   // Attach the two i32 types into corresponding registers
73826a73082SReid Kleckner   RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
73926a73082SReid Kleckner   RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
74026a73082SReid Kleckner }
74126a73082SReid Kleckner 
74226a73082SReid Kleckner SDValue
74326a73082SReid Kleckner X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
74426a73082SReid Kleckner                                bool isVarArg,
74526a73082SReid Kleckner                                const SmallVectorImpl<ISD::OutputArg> &Outs,
74626a73082SReid Kleckner                                const SmallVectorImpl<SDValue> &OutVals,
74726a73082SReid Kleckner                                const SDLoc &dl, SelectionDAG &DAG) const {
74826a73082SReid Kleckner   MachineFunction &MF = DAG.getMachineFunction();
74926a73082SReid Kleckner   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
75026a73082SReid Kleckner 
75126a73082SReid Kleckner   // In some cases we need to disable registers from the default CSR list.
75226a73082SReid Kleckner   // For example, when they are used as return registers (preserve_* and X86's
75326a73082SReid Kleckner   // regcall) or for argument passing (X86's regcall).
75426a73082SReid Kleckner   bool ShouldDisableCalleeSavedRegister =
75526a73082SReid Kleckner       shouldDisableRetRegFromCSR(CallConv) ||
75626a73082SReid Kleckner       MF.getFunction().hasFnAttribute("no_caller_saved_registers");
75726a73082SReid Kleckner 
75826a73082SReid Kleckner   if (CallConv == CallingConv::X86_INTR && !Outs.empty())
75926a73082SReid Kleckner     report_fatal_error("X86 interrupts may not return any value");
76026a73082SReid Kleckner 
76126a73082SReid Kleckner   SmallVector<CCValAssign, 16> RVLocs;
76226a73082SReid Kleckner   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
76326a73082SReid Kleckner   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
76426a73082SReid Kleckner 
76526a73082SReid Kleckner   SmallVector<std::pair<Register, SDValue>, 4> RetVals;
76626a73082SReid Kleckner   for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
76726a73082SReid Kleckner        ++I, ++OutsIndex) {
76826a73082SReid Kleckner     CCValAssign &VA = RVLocs[I];
76926a73082SReid Kleckner     assert(VA.isRegLoc() && "Can only return in registers!");
77026a73082SReid Kleckner 
77126a73082SReid Kleckner     // Add the register to the CalleeSaveDisableRegs list.
77226a73082SReid Kleckner     if (ShouldDisableCalleeSavedRegister)
77326a73082SReid Kleckner       MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
77426a73082SReid Kleckner 
77526a73082SReid Kleckner     SDValue ValToCopy = OutVals[OutsIndex];
77626a73082SReid Kleckner     EVT ValVT = ValToCopy.getValueType();
77726a73082SReid Kleckner 
77826a73082SReid Kleckner     // Promote values to the appropriate types.
77926a73082SReid Kleckner     if (VA.getLocInfo() == CCValAssign::SExt)
78026a73082SReid Kleckner       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
78126a73082SReid Kleckner     else if (VA.getLocInfo() == CCValAssign::ZExt)
78226a73082SReid Kleckner       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
78326a73082SReid Kleckner     else if (VA.getLocInfo() == CCValAssign::AExt) {
78426a73082SReid Kleckner       if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
78526a73082SReid Kleckner         ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
78626a73082SReid Kleckner       else
78726a73082SReid Kleckner         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
78826a73082SReid Kleckner     }
78926a73082SReid Kleckner     else if (VA.getLocInfo() == CCValAssign::BCvt)
79026a73082SReid Kleckner       ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
79126a73082SReid Kleckner 
79226a73082SReid Kleckner     assert(VA.getLocInfo() != CCValAssign::FPExt &&
79326a73082SReid Kleckner            "Unexpected FP-extend for return value.");
79426a73082SReid Kleckner 
79526a73082SReid Kleckner     // Report an error if we have attempted to return a value via an XMM
79626a73082SReid Kleckner     // register and SSE was disabled.
79726a73082SReid Kleckner     if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
79826a73082SReid Kleckner       errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
79926a73082SReid Kleckner       VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
80026a73082SReid Kleckner     } else if (!Subtarget.hasSSE2() &&
80126a73082SReid Kleckner                X86::FR64XRegClass.contains(VA.getLocReg()) &&
80226a73082SReid Kleckner                ValVT == MVT::f64) {
80326a73082SReid Kleckner       // When returning a double via an XMM register, report an error if SSE2 is
80426a73082SReid Kleckner       // not enabled.
80526a73082SReid Kleckner       errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
80626a73082SReid Kleckner       VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
80726a73082SReid Kleckner     }
80826a73082SReid Kleckner 
80926a73082SReid Kleckner     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
81026a73082SReid Kleckner     // the RET instruction and handled by the FP Stackifier.
81126a73082SReid Kleckner     if (VA.getLocReg() == X86::FP0 ||
81226a73082SReid Kleckner         VA.getLocReg() == X86::FP1) {
81326a73082SReid Kleckner       // If this is a copy from an xmm register to ST(0), use an FPExtend to
81426a73082SReid Kleckner       // change the value to the FP stack register class.
81526a73082SReid Kleckner       if (isScalarFPTypeInSSEReg(VA.getValVT()))
81626a73082SReid Kleckner         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
81726a73082SReid Kleckner       RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
81826a73082SReid Kleckner       // Don't emit a copytoreg.
81926a73082SReid Kleckner       continue;
82026a73082SReid Kleckner     }
82126a73082SReid Kleckner 
82226a73082SReid Kleckner     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
82326a73082SReid Kleckner     // which is returned in RAX / RDX.
82426a73082SReid Kleckner     if (Subtarget.is64Bit()) {
82526a73082SReid Kleckner       if (ValVT == MVT::x86mmx) {
82626a73082SReid Kleckner         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
82726a73082SReid Kleckner           ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
82826a73082SReid Kleckner           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
82926a73082SReid Kleckner                                   ValToCopy);
83026a73082SReid Kleckner           // If we don't have SSE2 available, convert to v4f32 so the generated
83126a73082SReid Kleckner           // register is legal.
83226a73082SReid Kleckner           if (!Subtarget.hasSSE2())
83326a73082SReid Kleckner             ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
83426a73082SReid Kleckner         }
83526a73082SReid Kleckner       }
83626a73082SReid Kleckner     }
83726a73082SReid Kleckner 
83826a73082SReid Kleckner     if (VA.needsCustom()) {
83926a73082SReid Kleckner       assert(VA.getValVT() == MVT::v64i1 &&
84026a73082SReid Kleckner              "Currently the only custom case is when we split v64i1 to 2 regs");
84126a73082SReid Kleckner 
84226a73082SReid Kleckner       Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
84326a73082SReid Kleckner                          Subtarget);
84426a73082SReid Kleckner 
84526a73082SReid Kleckner       // Add the second register to the CalleeSaveDisableRegs list.
84626a73082SReid Kleckner       if (ShouldDisableCalleeSavedRegister)
84726a73082SReid Kleckner         MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
84826a73082SReid Kleckner     } else {
84926a73082SReid Kleckner       RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
85026a73082SReid Kleckner     }
85126a73082SReid Kleckner   }
85226a73082SReid Kleckner 
85326a73082SReid Kleckner   SDValue Glue;
85426a73082SReid Kleckner   SmallVector<SDValue, 6> RetOps;
85526a73082SReid Kleckner   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
85626a73082SReid Kleckner   // Operand #1 = Bytes To Pop
85726a73082SReid Kleckner   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
85826a73082SReid Kleckner                    MVT::i32));
85926a73082SReid Kleckner 
86026a73082SReid Kleckner   // Copy the result values into the output registers.
86126a73082SReid Kleckner   for (auto &RetVal : RetVals) {
86226a73082SReid Kleckner     if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
86326a73082SReid Kleckner       RetOps.push_back(RetVal.second);
86426a73082SReid Kleckner       continue; // Don't emit a copytoreg.
86526a73082SReid Kleckner     }
86626a73082SReid Kleckner 
86726a73082SReid Kleckner     Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
86826a73082SReid Kleckner     Glue = Chain.getValue(1);
86926a73082SReid Kleckner     RetOps.push_back(
87026a73082SReid Kleckner         DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
87126a73082SReid Kleckner   }
87226a73082SReid Kleckner 
87326a73082SReid Kleckner   // Swift calling convention does not require we copy the sret argument
87426a73082SReid Kleckner   // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
87526a73082SReid Kleckner 
87626a73082SReid Kleckner   // All x86 ABIs require that for returning structs by value we copy
87726a73082SReid Kleckner   // the sret argument into %rax/%eax (depending on ABI) for the return.
87826a73082SReid Kleckner   // We saved the argument into a virtual register in the entry block,
87926a73082SReid Kleckner   // so now we copy the value out and into %rax/%eax.
88026a73082SReid Kleckner   //
88126a73082SReid Kleckner   // Checking Function.hasStructRetAttr() here is insufficient because the IR
88226a73082SReid Kleckner   // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
88326a73082SReid Kleckner   // false, then an sret argument may be implicitly inserted in the SelDAG. In
88426a73082SReid Kleckner   // either case FuncInfo->setSRetReturnReg() will have been called.
88526a73082SReid Kleckner   if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
88626a73082SReid Kleckner     // When we have both sret and another return value, we should use the
88726a73082SReid Kleckner     // original Chain stored in RetOps[0], instead of the current Chain updated
88826a73082SReid Kleckner     // in the above loop. If we only have sret, RetOps[0] equals to Chain.
88926a73082SReid Kleckner 
89026a73082SReid Kleckner     // For the case of sret and another return value, we have
89126a73082SReid Kleckner     //   Chain_0 at the function entry
89226a73082SReid Kleckner     //   Chain_1 = getCopyToReg(Chain_0) in the above loop
89326a73082SReid Kleckner     // If we use Chain_1 in getCopyFromReg, we will have
89426a73082SReid Kleckner     //   Val = getCopyFromReg(Chain_1)
89526a73082SReid Kleckner     //   Chain_2 = getCopyToReg(Chain_1, Val) from below
89626a73082SReid Kleckner 
89726a73082SReid Kleckner     // getCopyToReg(Chain_0) will be glued together with
89826a73082SReid Kleckner     // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
89926a73082SReid Kleckner     // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
90026a73082SReid Kleckner     //   Data dependency from Unit B to Unit A due to usage of Val in
90126a73082SReid Kleckner     //     getCopyToReg(Chain_1, Val)
90226a73082SReid Kleckner     //   Chain dependency from Unit A to Unit B
90326a73082SReid Kleckner 
90426a73082SReid Kleckner     // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
90526a73082SReid Kleckner     SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
90626a73082SReid Kleckner                                      getPointerTy(MF.getDataLayout()));
90726a73082SReid Kleckner 
90826a73082SReid Kleckner     Register RetValReg
90926a73082SReid Kleckner         = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
91026a73082SReid Kleckner           X86::RAX : X86::EAX;
91126a73082SReid Kleckner     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
91226a73082SReid Kleckner     Glue = Chain.getValue(1);
91326a73082SReid Kleckner 
91426a73082SReid Kleckner     // RAX/EAX now acts like a return value.
91526a73082SReid Kleckner     RetOps.push_back(
91626a73082SReid Kleckner         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
91726a73082SReid Kleckner 
91826a73082SReid Kleckner     // Add the returned register to the CalleeSaveDisableRegs list. Don't do
91926a73082SReid Kleckner     // this however for preserve_most/preserve_all to minimize the number of
92026a73082SReid Kleckner     // callee-saved registers for these CCs.
92126a73082SReid Kleckner     if (ShouldDisableCalleeSavedRegister &&
92226a73082SReid Kleckner         CallConv != CallingConv::PreserveAll &&
92326a73082SReid Kleckner         CallConv != CallingConv::PreserveMost)
92426a73082SReid Kleckner       MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
92526a73082SReid Kleckner   }
92626a73082SReid Kleckner 
92726a73082SReid Kleckner   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
92826a73082SReid Kleckner   const MCPhysReg *I =
92926a73082SReid Kleckner       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
93026a73082SReid Kleckner   if (I) {
93126a73082SReid Kleckner     for (; *I; ++I) {
93226a73082SReid Kleckner       if (X86::GR64RegClass.contains(*I))
93326a73082SReid Kleckner         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
93426a73082SReid Kleckner       else
93526a73082SReid Kleckner         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
93626a73082SReid Kleckner     }
93726a73082SReid Kleckner   }
93826a73082SReid Kleckner 
93926a73082SReid Kleckner   RetOps[0] = Chain;  // Update chain.
94026a73082SReid Kleckner 
94126a73082SReid Kleckner   // Add the glue if we have it.
94226a73082SReid Kleckner   if (Glue.getNode())
94326a73082SReid Kleckner     RetOps.push_back(Glue);
94426a73082SReid Kleckner 
94526a73082SReid Kleckner   X86ISD::NodeType opcode = X86ISD::RET_GLUE;
94626a73082SReid Kleckner   if (CallConv == CallingConv::X86_INTR)
94726a73082SReid Kleckner     opcode = X86ISD::IRET;
94826a73082SReid Kleckner   return DAG.getNode(opcode, dl, MVT::Other, RetOps);
94926a73082SReid Kleckner }
95026a73082SReid Kleckner 
95126a73082SReid Kleckner bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
95226a73082SReid Kleckner   if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
95326a73082SReid Kleckner     return false;
95426a73082SReid Kleckner 
95526a73082SReid Kleckner   SDValue TCChain = Chain;
956bd261eccSCraig Topper   SDNode *Copy = *N->user_begin();
95726a73082SReid Kleckner   if (Copy->getOpcode() == ISD::CopyToReg) {
95826a73082SReid Kleckner     // If the copy has a glue operand, we conservatively assume it isn't safe to
95926a73082SReid Kleckner     // perform a tail call.
96026a73082SReid Kleckner     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
96126a73082SReid Kleckner       return false;
96226a73082SReid Kleckner     TCChain = Copy->getOperand(0);
96326a73082SReid Kleckner   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
96426a73082SReid Kleckner     return false;
96526a73082SReid Kleckner 
96626a73082SReid Kleckner   bool HasRet = false;
967104ad925SCraig Topper   for (const SDNode *U : Copy->users()) {
96826a73082SReid Kleckner     if (U->getOpcode() != X86ISD::RET_GLUE)
96926a73082SReid Kleckner       return false;
97026a73082SReid Kleckner     // If we are returning more than one value, we can definitely
97126a73082SReid Kleckner     // not make a tail call see PR19530
97226a73082SReid Kleckner     if (U->getNumOperands() > 4)
97326a73082SReid Kleckner       return false;
97426a73082SReid Kleckner     if (U->getNumOperands() == 4 &&
97526a73082SReid Kleckner         U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
97626a73082SReid Kleckner       return false;
97726a73082SReid Kleckner     HasRet = true;
97826a73082SReid Kleckner   }
97926a73082SReid Kleckner 
98026a73082SReid Kleckner   if (!HasRet)
98126a73082SReid Kleckner     return false;
98226a73082SReid Kleckner 
98326a73082SReid Kleckner   Chain = TCChain;
98426a73082SReid Kleckner   return true;
98526a73082SReid Kleckner }
98626a73082SReid Kleckner 
98726a73082SReid Kleckner EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
98826a73082SReid Kleckner                                            ISD::NodeType ExtendKind) const {
98926a73082SReid Kleckner   MVT ReturnMVT = MVT::i32;
99026a73082SReid Kleckner 
99126a73082SReid Kleckner   bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
99226a73082SReid Kleckner   if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
99326a73082SReid Kleckner     // The ABI does not require i1, i8 or i16 to be extended.
99426a73082SReid Kleckner     //
99526a73082SReid Kleckner     // On Darwin, there is code in the wild relying on Clang's old behaviour of
99626a73082SReid Kleckner     // always extending i8/i16 return values, so keep doing that for now.
99726a73082SReid Kleckner     // (PR26665).
99826a73082SReid Kleckner     ReturnMVT = MVT::i8;
99926a73082SReid Kleckner   }
100026a73082SReid Kleckner 
100126a73082SReid Kleckner   EVT MinVT = getRegisterType(Context, ReturnMVT);
100226a73082SReid Kleckner   return VT.bitsLT(MinVT) ? MinVT : VT;
100326a73082SReid Kleckner }
100426a73082SReid Kleckner 
100526a73082SReid Kleckner /// Reads two 32 bit registers and creates a 64 bit mask value.
100626a73082SReid Kleckner /// \param VA The current 32 bit value that need to be assigned.
100726a73082SReid Kleckner /// \param NextVA The next 32 bit value that need to be assigned.
100826a73082SReid Kleckner /// \param Root The parent DAG node.
100926a73082SReid Kleckner /// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
101026a73082SReid Kleckner ///                        glue purposes. In the case the DAG is already using
101126a73082SReid Kleckner ///                        physical register instead of virtual, we should glue
101226a73082SReid Kleckner ///                        our new SDValue to InGlue SDvalue.
101326a73082SReid Kleckner /// \return a new SDvalue of size 64bit.
101426a73082SReid Kleckner static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
101526a73082SReid Kleckner                                 SDValue &Root, SelectionDAG &DAG,
101626a73082SReid Kleckner                                 const SDLoc &DL, const X86Subtarget &Subtarget,
101726a73082SReid Kleckner                                 SDValue *InGlue = nullptr) {
101826a73082SReid Kleckner   assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
101926a73082SReid Kleckner   assert(Subtarget.is32Bit() && "Expecting 32 bit target");
102026a73082SReid Kleckner   assert(VA.getValVT() == MVT::v64i1 &&
102126a73082SReid Kleckner          "Expecting first location of 64 bit width type");
102226a73082SReid Kleckner   assert(NextVA.getValVT() == VA.getValVT() &&
102326a73082SReid Kleckner          "The locations should have the same type");
102426a73082SReid Kleckner   assert(VA.isRegLoc() && NextVA.isRegLoc() &&
102526a73082SReid Kleckner          "The values should reside in two registers");
102626a73082SReid Kleckner 
102726a73082SReid Kleckner   SDValue Lo, Hi;
102826a73082SReid Kleckner   SDValue ArgValueLo, ArgValueHi;
102926a73082SReid Kleckner 
103026a73082SReid Kleckner   MachineFunction &MF = DAG.getMachineFunction();
103126a73082SReid Kleckner   const TargetRegisterClass *RC = &X86::GR32RegClass;
103226a73082SReid Kleckner 
103326a73082SReid Kleckner   // Read a 32 bit value from the registers.
103426a73082SReid Kleckner   if (nullptr == InGlue) {
103526a73082SReid Kleckner     // When no physical register is present,
103626a73082SReid Kleckner     // create an intermediate virtual register.
103726a73082SReid Kleckner     Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
103826a73082SReid Kleckner     ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
103926a73082SReid Kleckner     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
104026a73082SReid Kleckner     ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
104126a73082SReid Kleckner   } else {
104226a73082SReid Kleckner     // When a physical register is available read the value from it and glue
104326a73082SReid Kleckner     // the reads together.
104426a73082SReid Kleckner     ArgValueLo =
104526a73082SReid Kleckner       DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
104626a73082SReid Kleckner     *InGlue = ArgValueLo.getValue(2);
104726a73082SReid Kleckner     ArgValueHi =
104826a73082SReid Kleckner       DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
104926a73082SReid Kleckner     *InGlue = ArgValueHi.getValue(2);
105026a73082SReid Kleckner   }
105126a73082SReid Kleckner 
105226a73082SReid Kleckner   // Convert the i32 type into v32i1 type.
105326a73082SReid Kleckner   Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
105426a73082SReid Kleckner 
105526a73082SReid Kleckner   // Convert the i32 type into v32i1 type.
105626a73082SReid Kleckner   Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
105726a73082SReid Kleckner 
105826a73082SReid Kleckner   // Concatenate the two values together.
105926a73082SReid Kleckner   return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
106026a73082SReid Kleckner }
106126a73082SReid Kleckner 
106226a73082SReid Kleckner /// The function will lower a register of various sizes (8/16/32/64)
106326a73082SReid Kleckner /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
106426a73082SReid Kleckner /// \returns a DAG node contains the operand after lowering to mask type.
106526a73082SReid Kleckner static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
106626a73082SReid Kleckner                                const EVT &ValLoc, const SDLoc &DL,
106726a73082SReid Kleckner                                SelectionDAG &DAG) {
106826a73082SReid Kleckner   SDValue ValReturned = ValArg;
106926a73082SReid Kleckner 
107026a73082SReid Kleckner   if (ValVT == MVT::v1i1)
107126a73082SReid Kleckner     return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
107226a73082SReid Kleckner 
107326a73082SReid Kleckner   if (ValVT == MVT::v64i1) {
107426a73082SReid Kleckner     // In 32 bit machine, this case is handled by getv64i1Argument
107526a73082SReid Kleckner     assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
107626a73082SReid Kleckner     // In 64 bit machine, There is no need to truncate the value only bitcast
107726a73082SReid Kleckner   } else {
107826a73082SReid Kleckner     MVT MaskLenVT;
107926a73082SReid Kleckner     switch (ValVT.getSimpleVT().SimpleTy) {
108026a73082SReid Kleckner     case MVT::v8i1:
108126a73082SReid Kleckner       MaskLenVT = MVT::i8;
108226a73082SReid Kleckner       break;
108326a73082SReid Kleckner     case MVT::v16i1:
108426a73082SReid Kleckner       MaskLenVT = MVT::i16;
108526a73082SReid Kleckner       break;
108626a73082SReid Kleckner     case MVT::v32i1:
108726a73082SReid Kleckner       MaskLenVT = MVT::i32;
108826a73082SReid Kleckner       break;
108926a73082SReid Kleckner     default:
109026a73082SReid Kleckner       llvm_unreachable("Expecting a vector of i1 types");
109126a73082SReid Kleckner     }
109226a73082SReid Kleckner 
109326a73082SReid Kleckner     ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
109426a73082SReid Kleckner   }
109526a73082SReid Kleckner   return DAG.getBitcast(ValVT, ValReturned);
109626a73082SReid Kleckner }
109726a73082SReid Kleckner 
109826a73082SReid Kleckner /// Lower the result values of a call into the
109926a73082SReid Kleckner /// appropriate copies out of appropriate physical registers.
110026a73082SReid Kleckner ///
110126a73082SReid Kleckner SDValue X86TargetLowering::LowerCallResult(
110226a73082SReid Kleckner     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
110326a73082SReid Kleckner     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
110426a73082SReid Kleckner     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
110526a73082SReid Kleckner     uint32_t *RegMask) const {
110626a73082SReid Kleckner 
110726a73082SReid Kleckner   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
110826a73082SReid Kleckner   // Assign locations to each value returned by this call.
110926a73082SReid Kleckner   SmallVector<CCValAssign, 16> RVLocs;
111026a73082SReid Kleckner   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
111126a73082SReid Kleckner                  *DAG.getContext());
111226a73082SReid Kleckner   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
111326a73082SReid Kleckner 
111426a73082SReid Kleckner   // Copy all of the result registers out of their specified physreg.
111526a73082SReid Kleckner   for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
111626a73082SReid Kleckner        ++I, ++InsIndex) {
111726a73082SReid Kleckner     CCValAssign &VA = RVLocs[I];
111826a73082SReid Kleckner     EVT CopyVT = VA.getLocVT();
111926a73082SReid Kleckner 
112026a73082SReid Kleckner     // In some calling conventions we need to remove the used registers
112126a73082SReid Kleckner     // from the register mask.
112226a73082SReid Kleckner     if (RegMask) {
112326a73082SReid Kleckner       for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
112426a73082SReid Kleckner         RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
112526a73082SReid Kleckner     }
112626a73082SReid Kleckner 
112726a73082SReid Kleckner     // Report an error if there was an attempt to return FP values via XMM
112826a73082SReid Kleckner     // registers.
112926a73082SReid Kleckner     if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
113026a73082SReid Kleckner       errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
113126a73082SReid Kleckner       if (VA.getLocReg() == X86::XMM1)
113226a73082SReid Kleckner         VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
113326a73082SReid Kleckner       else
113426a73082SReid Kleckner         VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
113526a73082SReid Kleckner     } else if (!Subtarget.hasSSE2() &&
113626a73082SReid Kleckner                X86::FR64XRegClass.contains(VA.getLocReg()) &&
113726a73082SReid Kleckner                CopyVT == MVT::f64) {
113826a73082SReid Kleckner       errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
113926a73082SReid Kleckner       if (VA.getLocReg() == X86::XMM1)
114026a73082SReid Kleckner         VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
114126a73082SReid Kleckner       else
114226a73082SReid Kleckner         VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
114326a73082SReid Kleckner     }
114426a73082SReid Kleckner 
114526a73082SReid Kleckner     // If we prefer to use the value in xmm registers, copy it out as f80 and
114626a73082SReid Kleckner     // use a truncate to move it from fp stack reg to xmm reg.
114726a73082SReid Kleckner     bool RoundAfterCopy = false;
114826a73082SReid Kleckner     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
114926a73082SReid Kleckner         isScalarFPTypeInSSEReg(VA.getValVT())) {
115026a73082SReid Kleckner       if (!Subtarget.hasX87())
115126a73082SReid Kleckner         report_fatal_error("X87 register return with X87 disabled");
115226a73082SReid Kleckner       CopyVT = MVT::f80;
115326a73082SReid Kleckner       RoundAfterCopy = (CopyVT != VA.getLocVT());
115426a73082SReid Kleckner     }
115526a73082SReid Kleckner 
115626a73082SReid Kleckner     SDValue Val;
115726a73082SReid Kleckner     if (VA.needsCustom()) {
115826a73082SReid Kleckner       assert(VA.getValVT() == MVT::v64i1 &&
115926a73082SReid Kleckner              "Currently the only custom case is when we split v64i1 to 2 regs");
116026a73082SReid Kleckner       Val =
116126a73082SReid Kleckner           getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
116226a73082SReid Kleckner     } else {
116326a73082SReid Kleckner       Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
116426a73082SReid Kleckner                   .getValue(1);
116526a73082SReid Kleckner       Val = Chain.getValue(0);
116626a73082SReid Kleckner       InGlue = Chain.getValue(2);
116726a73082SReid Kleckner     }
116826a73082SReid Kleckner 
116926a73082SReid Kleckner     if (RoundAfterCopy)
117026a73082SReid Kleckner       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
117126a73082SReid Kleckner                         // This truncation won't change the value.
117226a73082SReid Kleckner                         DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
117326a73082SReid Kleckner 
117426a73082SReid Kleckner     if (VA.isExtInLoc()) {
117526a73082SReid Kleckner       if (VA.getValVT().isVector() &&
117626a73082SReid Kleckner           VA.getValVT().getScalarType() == MVT::i1 &&
117726a73082SReid Kleckner           ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
117826a73082SReid Kleckner            (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
117926a73082SReid Kleckner         // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
118026a73082SReid Kleckner         Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
118126a73082SReid Kleckner       } else
118226a73082SReid Kleckner         Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
118326a73082SReid Kleckner     }
118426a73082SReid Kleckner 
118526a73082SReid Kleckner     if (VA.getLocInfo() == CCValAssign::BCvt)
118626a73082SReid Kleckner       Val = DAG.getBitcast(VA.getValVT(), Val);
118726a73082SReid Kleckner 
118826a73082SReid Kleckner     InVals.push_back(Val);
118926a73082SReid Kleckner   }
119026a73082SReid Kleckner 
119126a73082SReid Kleckner   return Chain;
119226a73082SReid Kleckner }
119326a73082SReid Kleckner 
119426a73082SReid Kleckner //===----------------------------------------------------------------------===//
119526a73082SReid Kleckner //                C & StdCall & Fast Calling Convention implementation
119626a73082SReid Kleckner //===----------------------------------------------------------------------===//
119726a73082SReid Kleckner //  StdCall calling convention seems to be standard for many Windows' API
119826a73082SReid Kleckner //  routines and around. It differs from C calling convention just a little:
119926a73082SReid Kleckner //  callee should clean up the stack, not caller. Symbols should be also
120026a73082SReid Kleckner //  decorated in some fancy way :) It doesn't support any vector arguments.
120126a73082SReid Kleckner //  For info on fast calling convention see Fast Calling Convention (tail call)
120226a73082SReid Kleckner //  implementation LowerX86_32FastCCCallTo.
120326a73082SReid Kleckner 
120426a73082SReid Kleckner /// Determines whether Args, either a set of outgoing arguments to a call, or a
120526a73082SReid Kleckner /// set of incoming args of a call, contains an sret pointer that the callee
120626a73082SReid Kleckner /// pops
120726a73082SReid Kleckner template <typename T>
120826a73082SReid Kleckner static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
120926a73082SReid Kleckner                              const X86Subtarget &Subtarget) {
121026a73082SReid Kleckner   // Not C++20 (yet), so no concepts available.
121126a73082SReid Kleckner   static_assert(std::is_same_v<T, ISD::OutputArg> ||
121226a73082SReid Kleckner                     std::is_same_v<T, ISD::InputArg>,
121326a73082SReid Kleckner                 "requires ISD::OutputArg or ISD::InputArg");
121426a73082SReid Kleckner 
121526a73082SReid Kleckner   // Only 32-bit pops the sret.  It's a 64-bit world these days, so early-out
121626a73082SReid Kleckner   // for most compilations.
121726a73082SReid Kleckner   if (!Subtarget.is32Bit())
121826a73082SReid Kleckner     return false;
121926a73082SReid Kleckner 
122026a73082SReid Kleckner   if (Args.empty())
122126a73082SReid Kleckner     return false;
122226a73082SReid Kleckner 
122326a73082SReid Kleckner   // Most calls do not have an sret argument, check the arg next.
122426a73082SReid Kleckner   const ISD::ArgFlagsTy &Flags = Args[0].Flags;
122526a73082SReid Kleckner   if (!Flags.isSRet() || Flags.isInReg())
122626a73082SReid Kleckner     return false;
122726a73082SReid Kleckner 
122826a73082SReid Kleckner   // The MSVCabi does not pop the sret.
122926a73082SReid Kleckner   if (Subtarget.getTargetTriple().isOSMSVCRT())
123026a73082SReid Kleckner     return false;
123126a73082SReid Kleckner 
123226a73082SReid Kleckner   // MCUs don't pop the sret
123326a73082SReid Kleckner   if (Subtarget.isTargetMCU())
123426a73082SReid Kleckner     return false;
123526a73082SReid Kleckner 
123626a73082SReid Kleckner   // Callee pops argument
123726a73082SReid Kleckner   return true;
123826a73082SReid Kleckner }
123926a73082SReid Kleckner 
124026a73082SReid Kleckner /// Make a copy of an aggregate at address specified by "Src" to address
124126a73082SReid Kleckner /// "Dst" with size and alignment information specified by the specific
124226a73082SReid Kleckner /// parameter attribute. The copy will be passed as a byval function parameter.
124326a73082SReid Kleckner static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
124426a73082SReid Kleckner                                          SDValue Chain, ISD::ArgFlagsTy Flags,
124526a73082SReid Kleckner                                          SelectionDAG &DAG, const SDLoc &dl) {
124626a73082SReid Kleckner   SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
124726a73082SReid Kleckner 
124826a73082SReid Kleckner   return DAG.getMemcpy(
124926a73082SReid Kleckner       Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
125026a73082SReid Kleckner       /*isVolatile*/ false, /*AlwaysInline=*/true,
1251f270a4ddSAmara Emerson       /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
125226a73082SReid Kleckner }
125326a73082SReid Kleckner 
125426a73082SReid Kleckner /// Return true if the calling convention is one that we can guarantee TCO for.
125526a73082SReid Kleckner static bool canGuaranteeTCO(CallingConv::ID CC) {
125626a73082SReid Kleckner   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
125726a73082SReid Kleckner           CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
125826a73082SReid Kleckner           CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
125926a73082SReid Kleckner }
126026a73082SReid Kleckner 
126126a73082SReid Kleckner /// Return true if we might ever do TCO for calls with this calling convention.
126226a73082SReid Kleckner static bool mayTailCallThisCC(CallingConv::ID CC) {
126326a73082SReid Kleckner   switch (CC) {
126426a73082SReid Kleckner   // C calling conventions:
126526a73082SReid Kleckner   case CallingConv::C:
126626a73082SReid Kleckner   case CallingConv::Win64:
126726a73082SReid Kleckner   case CallingConv::X86_64_SysV:
1268c166a43cSweiguozhi   case CallingConv::PreserveNone:
126926a73082SReid Kleckner   // Callee pop conventions:
127026a73082SReid Kleckner   case CallingConv::X86_ThisCall:
127126a73082SReid Kleckner   case CallingConv::X86_StdCall:
127226a73082SReid Kleckner   case CallingConv::X86_VectorCall:
127326a73082SReid Kleckner   case CallingConv::X86_FastCall:
127426a73082SReid Kleckner   // Swift:
127526a73082SReid Kleckner   case CallingConv::Swift:
127626a73082SReid Kleckner     return true;
127726a73082SReid Kleckner   default:
127826a73082SReid Kleckner     return canGuaranteeTCO(CC);
127926a73082SReid Kleckner   }
128026a73082SReid Kleckner }
128126a73082SReid Kleckner 
128226a73082SReid Kleckner /// Return true if the function is being made into a tailcall target by
128326a73082SReid Kleckner /// changing its ABI.
128426a73082SReid Kleckner static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
128526a73082SReid Kleckner   return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
128626a73082SReid Kleckner          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
128726a73082SReid Kleckner }
128826a73082SReid Kleckner 
128926a73082SReid Kleckner bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
129026a73082SReid Kleckner   if (!CI->isTailCall())
129126a73082SReid Kleckner     return false;
129226a73082SReid Kleckner 
129326a73082SReid Kleckner   CallingConv::ID CalleeCC = CI->getCallingConv();
129426a73082SReid Kleckner   if (!mayTailCallThisCC(CalleeCC))
129526a73082SReid Kleckner     return false;
129626a73082SReid Kleckner 
129726a73082SReid Kleckner   return true;
129826a73082SReid Kleckner }
129926a73082SReid Kleckner 
130026a73082SReid Kleckner SDValue
130126a73082SReid Kleckner X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
130226a73082SReid Kleckner                                     const SmallVectorImpl<ISD::InputArg> &Ins,
130326a73082SReid Kleckner                                     const SDLoc &dl, SelectionDAG &DAG,
130426a73082SReid Kleckner                                     const CCValAssign &VA,
130526a73082SReid Kleckner                                     MachineFrameInfo &MFI, unsigned i) const {
130626a73082SReid Kleckner   // Create the nodes corresponding to a load from this parameter slot.
130726a73082SReid Kleckner   ISD::ArgFlagsTy Flags = Ins[i].Flags;
130826a73082SReid Kleckner   bool AlwaysUseMutable = shouldGuaranteeTCO(
130926a73082SReid Kleckner       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
131026a73082SReid Kleckner   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
131126a73082SReid Kleckner   EVT ValVT;
131226a73082SReid Kleckner   MVT PtrVT = getPointerTy(DAG.getDataLayout());
131326a73082SReid Kleckner 
131426a73082SReid Kleckner   // If value is passed by pointer we have address passed instead of the value
131526a73082SReid Kleckner   // itself. No need to extend if the mask value and location share the same
131626a73082SReid Kleckner   // absolute size.
131726a73082SReid Kleckner   bool ExtendedInMem =
131826a73082SReid Kleckner       VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
131926a73082SReid Kleckner       VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
132026a73082SReid Kleckner 
132126a73082SReid Kleckner   if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
132226a73082SReid Kleckner     ValVT = VA.getLocVT();
132326a73082SReid Kleckner   else
132426a73082SReid Kleckner     ValVT = VA.getValVT();
132526a73082SReid Kleckner 
132626a73082SReid Kleckner   // FIXME: For now, all byval parameter objects are marked mutable. This can be
132726a73082SReid Kleckner   // changed with more analysis.
132826a73082SReid Kleckner   // In case of tail call optimization mark all arguments mutable. Since they
132926a73082SReid Kleckner   // could be overwritten by lowering of arguments in case of a tail call.
133026a73082SReid Kleckner   if (Flags.isByVal()) {
133126a73082SReid Kleckner     unsigned Bytes = Flags.getByValSize();
133226a73082SReid Kleckner     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
133326a73082SReid Kleckner 
133426a73082SReid Kleckner     // FIXME: For now, all byval parameter objects are marked as aliasing. This
133526a73082SReid Kleckner     // can be improved with deeper analysis.
133626a73082SReid Kleckner     int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
133726a73082SReid Kleckner                                    /*isAliased=*/true);
133826a73082SReid Kleckner     return DAG.getFrameIndex(FI, PtrVT);
133926a73082SReid Kleckner   }
134026a73082SReid Kleckner 
134126a73082SReid Kleckner   EVT ArgVT = Ins[i].ArgVT;
134226a73082SReid Kleckner 
134326a73082SReid Kleckner   // If this is a vector that has been split into multiple parts, don't elide
134426a73082SReid Kleckner   // the copy. The layout on the stack may not match the packed in-memory
134526a73082SReid Kleckner   // layout.
134626a73082SReid Kleckner   bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
134726a73082SReid Kleckner 
134826a73082SReid Kleckner   // This is an argument in memory. We might be able to perform copy elision.
134926a73082SReid Kleckner   // If the argument is passed directly in memory without any extension, then we
135026a73082SReid Kleckner   // can perform copy elision. Large vector types, for example, may be passed
135126a73082SReid Kleckner   // indirectly by pointer.
135226a73082SReid Kleckner   if (Flags.isCopyElisionCandidate() &&
135326a73082SReid Kleckner       VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
135426a73082SReid Kleckner       !ScalarizedVector) {
135526a73082SReid Kleckner     SDValue PartAddr;
135626a73082SReid Kleckner     if (Ins[i].PartOffset == 0) {
135726a73082SReid Kleckner       // If this is a one-part value or the first part of a multi-part value,
135826a73082SReid Kleckner       // create a stack object for the entire argument value type and return a
135926a73082SReid Kleckner       // load from our portion of it. This assumes that if the first part of an
136026a73082SReid Kleckner       // argument is in memory, the rest will also be in memory.
136126a73082SReid Kleckner       int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
136226a73082SReid Kleckner                                      /*IsImmutable=*/false);
136326a73082SReid Kleckner       PartAddr = DAG.getFrameIndex(FI, PtrVT);
136426a73082SReid Kleckner       return DAG.getLoad(
136526a73082SReid Kleckner           ValVT, dl, Chain, PartAddr,
136626a73082SReid Kleckner           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
136726a73082SReid Kleckner     }
136826a73082SReid Kleckner 
136926a73082SReid Kleckner     // This is not the first piece of an argument in memory. See if there is
137026a73082SReid Kleckner     // already a fixed stack object including this offset. If so, assume it
137126a73082SReid Kleckner     // was created by the PartOffset == 0 branch above and create a load from
137226a73082SReid Kleckner     // the appropriate offset into it.
137326a73082SReid Kleckner     int64_t PartBegin = VA.getLocMemOffset();
137426a73082SReid Kleckner     int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
137526a73082SReid Kleckner     int FI = MFI.getObjectIndexBegin();
137626a73082SReid Kleckner     for (; MFI.isFixedObjectIndex(FI); ++FI) {
137726a73082SReid Kleckner       int64_t ObjBegin = MFI.getObjectOffset(FI);
137826a73082SReid Kleckner       int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
137926a73082SReid Kleckner       if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
138026a73082SReid Kleckner         break;
138126a73082SReid Kleckner     }
138226a73082SReid Kleckner     if (MFI.isFixedObjectIndex(FI)) {
138326a73082SReid Kleckner       SDValue Addr =
138426a73082SReid Kleckner           DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
138526a73082SReid Kleckner                       DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
138626a73082SReid Kleckner       return DAG.getLoad(ValVT, dl, Chain, Addr,
138726a73082SReid Kleckner                          MachinePointerInfo::getFixedStack(
138826a73082SReid Kleckner                              DAG.getMachineFunction(), FI, Ins[i].PartOffset));
138926a73082SReid Kleckner     }
139026a73082SReid Kleckner   }
139126a73082SReid Kleckner 
139226a73082SReid Kleckner   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
139326a73082SReid Kleckner                                  VA.getLocMemOffset(), isImmutable);
139426a73082SReid Kleckner 
139526a73082SReid Kleckner   // Set SExt or ZExt flag.
139626a73082SReid Kleckner   if (VA.getLocInfo() == CCValAssign::ZExt) {
139726a73082SReid Kleckner     MFI.setObjectZExt(FI, true);
139826a73082SReid Kleckner   } else if (VA.getLocInfo() == CCValAssign::SExt) {
139926a73082SReid Kleckner     MFI.setObjectSExt(FI, true);
140026a73082SReid Kleckner   }
140126a73082SReid Kleckner 
140226a73082SReid Kleckner   MaybeAlign Alignment;
140326a73082SReid Kleckner   if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
140426a73082SReid Kleckner       ValVT != MVT::f80)
140526a73082SReid Kleckner     Alignment = MaybeAlign(4);
140626a73082SReid Kleckner   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
140726a73082SReid Kleckner   SDValue Val = DAG.getLoad(
140826a73082SReid Kleckner       ValVT, dl, Chain, FIN,
140926a73082SReid Kleckner       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
141026a73082SReid Kleckner       Alignment);
141126a73082SReid Kleckner   return ExtendedInMem
141226a73082SReid Kleckner              ? (VA.getValVT().isVector()
141326a73082SReid Kleckner                     ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
141426a73082SReid Kleckner                     : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
141526a73082SReid Kleckner              : Val;
141626a73082SReid Kleckner }
141726a73082SReid Kleckner 
141826a73082SReid Kleckner // FIXME: Get this from tablegen.
141926a73082SReid Kleckner static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
142026a73082SReid Kleckner                                                 const X86Subtarget &Subtarget) {
142126a73082SReid Kleckner   assert(Subtarget.is64Bit());
142226a73082SReid Kleckner 
142326a73082SReid Kleckner   if (Subtarget.isCallingConvWin64(CallConv)) {
142426a73082SReid Kleckner     static const MCPhysReg GPR64ArgRegsWin64[] = {
142526a73082SReid Kleckner       X86::RCX, X86::RDX, X86::R8,  X86::R9
142626a73082SReid Kleckner     };
142713d09dfaSCraig Topper     return GPR64ArgRegsWin64;
142826a73082SReid Kleckner   }
142926a73082SReid Kleckner 
143026a73082SReid Kleckner   static const MCPhysReg GPR64ArgRegs64Bit[] = {
143126a73082SReid Kleckner     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
143226a73082SReid Kleckner   };
143313d09dfaSCraig Topper   return GPR64ArgRegs64Bit;
143426a73082SReid Kleckner }
143526a73082SReid Kleckner 
143626a73082SReid Kleckner // FIXME: Get this from tablegen.
143726a73082SReid Kleckner static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
143826a73082SReid Kleckner                                                 CallingConv::ID CallConv,
143926a73082SReid Kleckner                                                 const X86Subtarget &Subtarget) {
144026a73082SReid Kleckner   assert(Subtarget.is64Bit());
144126a73082SReid Kleckner   if (Subtarget.isCallingConvWin64(CallConv)) {
144226a73082SReid Kleckner     // The XMM registers which might contain var arg parameters are shadowed
144326a73082SReid Kleckner     // in their paired GPR.  So we only need to save the GPR to their home
144426a73082SReid Kleckner     // slots.
144526a73082SReid Kleckner     // TODO: __vectorcall will change this.
1446e03f4271SJay Foad     return {};
144726a73082SReid Kleckner   }
144826a73082SReid Kleckner 
144926a73082SReid Kleckner   bool isSoftFloat = Subtarget.useSoftFloat();
145026a73082SReid Kleckner   if (isSoftFloat || !Subtarget.hasSSE1())
145126a73082SReid Kleckner     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
145226a73082SReid Kleckner     // registers.
1453e03f4271SJay Foad     return {};
145426a73082SReid Kleckner 
145526a73082SReid Kleckner   static const MCPhysReg XMMArgRegs64Bit[] = {
145626a73082SReid Kleckner     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
145726a73082SReid Kleckner     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
145826a73082SReid Kleckner   };
145913d09dfaSCraig Topper   return XMMArgRegs64Bit;
146026a73082SReid Kleckner }
146126a73082SReid Kleckner 
146226a73082SReid Kleckner #ifndef NDEBUG
146326a73082SReid Kleckner static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
146426a73082SReid Kleckner   return llvm::is_sorted(
146526a73082SReid Kleckner       ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
146626a73082SReid Kleckner         return A.getValNo() < B.getValNo();
146726a73082SReid Kleckner       });
146826a73082SReid Kleckner }
146926a73082SReid Kleckner #endif
147026a73082SReid Kleckner 
147126a73082SReid Kleckner namespace {
147226a73082SReid Kleckner /// This is a helper class for lowering variable arguments parameters.
147326a73082SReid Kleckner class VarArgsLoweringHelper {
147426a73082SReid Kleckner public:
147526a73082SReid Kleckner   VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
147626a73082SReid Kleckner                         SelectionDAG &DAG, const X86Subtarget &Subtarget,
147726a73082SReid Kleckner                         CallingConv::ID CallConv, CCState &CCInfo)
147826a73082SReid Kleckner       : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
147926a73082SReid Kleckner         TheMachineFunction(DAG.getMachineFunction()),
148026a73082SReid Kleckner         TheFunction(TheMachineFunction.getFunction()),
148126a73082SReid Kleckner         FrameInfo(TheMachineFunction.getFrameInfo()),
148226a73082SReid Kleckner         FrameLowering(*Subtarget.getFrameLowering()),
148326a73082SReid Kleckner         TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
148426a73082SReid Kleckner         CCInfo(CCInfo) {}
148526a73082SReid Kleckner 
148626a73082SReid Kleckner   // Lower variable arguments parameters.
148726a73082SReid Kleckner   void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
148826a73082SReid Kleckner 
148926a73082SReid Kleckner private:
149026a73082SReid Kleckner   void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
149126a73082SReid Kleckner 
149226a73082SReid Kleckner   void forwardMustTailParameters(SDValue &Chain);
149326a73082SReid Kleckner 
149426a73082SReid Kleckner   bool is64Bit() const { return Subtarget.is64Bit(); }
149526a73082SReid Kleckner   bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
149626a73082SReid Kleckner 
149726a73082SReid Kleckner   X86MachineFunctionInfo *FuncInfo;
149826a73082SReid Kleckner   const SDLoc &DL;
149926a73082SReid Kleckner   SelectionDAG &DAG;
150026a73082SReid Kleckner   const X86Subtarget &Subtarget;
150126a73082SReid Kleckner   MachineFunction &TheMachineFunction;
150226a73082SReid Kleckner   const Function &TheFunction;
150326a73082SReid Kleckner   MachineFrameInfo &FrameInfo;
150426a73082SReid Kleckner   const TargetFrameLowering &FrameLowering;
150526a73082SReid Kleckner   const TargetLowering &TargLowering;
150626a73082SReid Kleckner   CallingConv::ID CallConv;
150726a73082SReid Kleckner   CCState &CCInfo;
150826a73082SReid Kleckner };
150926a73082SReid Kleckner } // namespace
151026a73082SReid Kleckner 
151126a73082SReid Kleckner void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
151226a73082SReid Kleckner     SDValue &Chain, unsigned StackSize) {
151326a73082SReid Kleckner   // If the function takes variable number of arguments, make a frame index for
151426a73082SReid Kleckner   // the start of the first vararg value... for expansion of llvm.va_start. We
151526a73082SReid Kleckner   // can skip this if there are no va_start calls.
151626a73082SReid Kleckner   if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
151726a73082SReid Kleckner                     CallConv != CallingConv::X86_ThisCall)) {
151826a73082SReid Kleckner     FuncInfo->setVarArgsFrameIndex(
151926a73082SReid Kleckner         FrameInfo.CreateFixedObject(1, StackSize, true));
152026a73082SReid Kleckner   }
152126a73082SReid Kleckner 
152226a73082SReid Kleckner   // 64-bit calling conventions support varargs and register parameters, so we
152326a73082SReid Kleckner   // have to do extra work to spill them in the prologue.
152426a73082SReid Kleckner   if (is64Bit()) {
152526a73082SReid Kleckner     // Find the first unallocated argument registers.
152626a73082SReid Kleckner     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
152726a73082SReid Kleckner     ArrayRef<MCPhysReg> ArgXMMs =
152826a73082SReid Kleckner         get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
152926a73082SReid Kleckner     unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
153026a73082SReid Kleckner     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
153126a73082SReid Kleckner 
153226a73082SReid Kleckner     assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
153326a73082SReid Kleckner            "SSE register cannot be used when SSE is disabled!");
153426a73082SReid Kleckner 
153526a73082SReid Kleckner     if (isWin64()) {
153626a73082SReid Kleckner       // Get to the caller-allocated home save location.  Add 8 to account
153726a73082SReid Kleckner       // for the return address.
153826a73082SReid Kleckner       int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
153926a73082SReid Kleckner       FuncInfo->setRegSaveFrameIndex(
154026a73082SReid Kleckner           FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
154126a73082SReid Kleckner       // Fixup to set vararg frame on shadow area (4 x i64).
154226a73082SReid Kleckner       if (NumIntRegs < 4)
154326a73082SReid Kleckner         FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
154426a73082SReid Kleckner     } else {
154526a73082SReid Kleckner       // For X86-64, if there are vararg parameters that are passed via
154626a73082SReid Kleckner       // registers, then we must store them to their spots on the stack so
154726a73082SReid Kleckner       // they may be loaded by dereferencing the result of va_next.
154826a73082SReid Kleckner       FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
154926a73082SReid Kleckner       FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
155026a73082SReid Kleckner       FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
155126a73082SReid Kleckner           ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
155226a73082SReid Kleckner     }
155326a73082SReid Kleckner 
155426a73082SReid Kleckner     SmallVector<SDValue, 6>
155526a73082SReid Kleckner         LiveGPRs; // list of SDValue for GPR registers keeping live input value
155626a73082SReid Kleckner     SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
155726a73082SReid Kleckner                                          // keeping live input value
155826a73082SReid Kleckner     SDValue ALVal; // if applicable keeps SDValue for %al register
155926a73082SReid Kleckner 
156026a73082SReid Kleckner     // Gather all the live in physical registers.
156126a73082SReid Kleckner     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
156226a73082SReid Kleckner       Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
156326a73082SReid Kleckner       LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
156426a73082SReid Kleckner     }
156526a73082SReid Kleckner     const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
156626a73082SReid Kleckner     if (!AvailableXmms.empty()) {
156726a73082SReid Kleckner       Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
156826a73082SReid Kleckner       ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
156926a73082SReid Kleckner       for (MCPhysReg Reg : AvailableXmms) {
157026a73082SReid Kleckner         // FastRegisterAllocator spills virtual registers at basic
157126a73082SReid Kleckner         // block boundary. That leads to usages of xmm registers
157226a73082SReid Kleckner         // outside of check for %al. Pass physical registers to
157326a73082SReid Kleckner         // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
157426a73082SReid Kleckner         TheMachineFunction.getRegInfo().addLiveIn(Reg);
157526a73082SReid Kleckner         LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
157626a73082SReid Kleckner       }
157726a73082SReid Kleckner     }
157826a73082SReid Kleckner 
157926a73082SReid Kleckner     // Store the integer parameter registers.
158026a73082SReid Kleckner     SmallVector<SDValue, 8> MemOps;
158126a73082SReid Kleckner     SDValue RSFIN =
158226a73082SReid Kleckner         DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
158326a73082SReid Kleckner                           TargLowering.getPointerTy(DAG.getDataLayout()));
158426a73082SReid Kleckner     unsigned Offset = FuncInfo->getVarArgsGPOffset();
158526a73082SReid Kleckner     for (SDValue Val : LiveGPRs) {
158626a73082SReid Kleckner       SDValue FIN = DAG.getNode(ISD::ADD, DL,
158726a73082SReid Kleckner                                 TargLowering.getPointerTy(DAG.getDataLayout()),
158826a73082SReid Kleckner                                 RSFIN, DAG.getIntPtrConstant(Offset, DL));
158926a73082SReid Kleckner       SDValue Store =
159026a73082SReid Kleckner           DAG.getStore(Val.getValue(1), DL, Val, FIN,
159126a73082SReid Kleckner                        MachinePointerInfo::getFixedStack(
159226a73082SReid Kleckner                            DAG.getMachineFunction(),
159326a73082SReid Kleckner                            FuncInfo->getRegSaveFrameIndex(), Offset));
159426a73082SReid Kleckner       MemOps.push_back(Store);
159526a73082SReid Kleckner       Offset += 8;
159626a73082SReid Kleckner     }
159726a73082SReid Kleckner 
159826a73082SReid Kleckner     // Now store the XMM (fp + vector) parameter registers.
159926a73082SReid Kleckner     if (!LiveXMMRegs.empty()) {
160026a73082SReid Kleckner       SmallVector<SDValue, 12> SaveXMMOps;
160126a73082SReid Kleckner       SaveXMMOps.push_back(Chain);
160226a73082SReid Kleckner       SaveXMMOps.push_back(ALVal);
160326a73082SReid Kleckner       SaveXMMOps.push_back(RSFIN);
160426a73082SReid Kleckner       SaveXMMOps.push_back(
160526a73082SReid Kleckner           DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
160626a73082SReid Kleckner       llvm::append_range(SaveXMMOps, LiveXMMRegs);
160726a73082SReid Kleckner       MachineMemOperand *StoreMMO =
160826a73082SReid Kleckner           DAG.getMachineFunction().getMachineMemOperand(
160926a73082SReid Kleckner               MachinePointerInfo::getFixedStack(
161026a73082SReid Kleckner                   DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
161126a73082SReid Kleckner                   Offset),
161226a73082SReid Kleckner               MachineMemOperand::MOStore, 128, Align(16));
161326a73082SReid Kleckner       MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
161426a73082SReid Kleckner                                                DL, DAG.getVTList(MVT::Other),
161526a73082SReid Kleckner                                                SaveXMMOps, MVT::i8, StoreMMO));
161626a73082SReid Kleckner     }
161726a73082SReid Kleckner 
161826a73082SReid Kleckner     if (!MemOps.empty())
161926a73082SReid Kleckner       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
162026a73082SReid Kleckner   }
162126a73082SReid Kleckner }
162226a73082SReid Kleckner 
162326a73082SReid Kleckner void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
162426a73082SReid Kleckner   // Find the largest legal vector type.
162526a73082SReid Kleckner   MVT VecVT = MVT::Other;
162626a73082SReid Kleckner   // FIXME: Only some x86_32 calling conventions support AVX512.
162726a73082SReid Kleckner   if (Subtarget.useAVX512Regs() &&
162826a73082SReid Kleckner       (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
162926a73082SReid Kleckner                      CallConv == CallingConv::Intel_OCL_BI)))
163026a73082SReid Kleckner     VecVT = MVT::v16f32;
163126a73082SReid Kleckner   else if (Subtarget.hasAVX())
163226a73082SReid Kleckner     VecVT = MVT::v8f32;
163326a73082SReid Kleckner   else if (Subtarget.hasSSE2())
163426a73082SReid Kleckner     VecVT = MVT::v4f32;
163526a73082SReid Kleckner 
163626a73082SReid Kleckner   // We forward some GPRs and some vector types.
163726a73082SReid Kleckner   SmallVector<MVT, 2> RegParmTypes;
163826a73082SReid Kleckner   MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
163926a73082SReid Kleckner   RegParmTypes.push_back(IntVT);
164026a73082SReid Kleckner   if (VecVT != MVT::Other)
164126a73082SReid Kleckner     RegParmTypes.push_back(VecVT);
164226a73082SReid Kleckner 
164326a73082SReid Kleckner   // Compute the set of forwarded registers. The rest are scratch.
164426a73082SReid Kleckner   SmallVectorImpl<ForwardedRegister> &Forwards =
164526a73082SReid Kleckner       FuncInfo->getForwardedMustTailRegParms();
164626a73082SReid Kleckner   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
164726a73082SReid Kleckner 
164826a73082SReid Kleckner   // Forward AL for SysV x86_64 targets, since it is used for varargs.
164926a73082SReid Kleckner   if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
165026a73082SReid Kleckner     Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
165126a73082SReid Kleckner     Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
165226a73082SReid Kleckner   }
165326a73082SReid Kleckner 
165426a73082SReid Kleckner   // Copy all forwards from physical to virtual registers.
165526a73082SReid Kleckner   for (ForwardedRegister &FR : Forwards) {
165626a73082SReid Kleckner     // FIXME: Can we use a less constrained schedule?
165726a73082SReid Kleckner     SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
165826a73082SReid Kleckner     FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
165926a73082SReid Kleckner         TargLowering.getRegClassFor(FR.VT));
166026a73082SReid Kleckner     Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
166126a73082SReid Kleckner   }
166226a73082SReid Kleckner }
166326a73082SReid Kleckner 
166426a73082SReid Kleckner void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
166526a73082SReid Kleckner                                                    unsigned StackSize) {
166626a73082SReid Kleckner   // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
166726a73082SReid Kleckner   // If necessary, it would be set into the correct value later.
166826a73082SReid Kleckner   FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
166926a73082SReid Kleckner   FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
167026a73082SReid Kleckner 
167126a73082SReid Kleckner   if (FrameInfo.hasVAStart())
167226a73082SReid Kleckner     createVarArgAreaAndStoreRegisters(Chain, StackSize);
167326a73082SReid Kleckner 
167426a73082SReid Kleckner   if (FrameInfo.hasMustTailInVarArgFunc())
167526a73082SReid Kleckner     forwardMustTailParameters(Chain);
167626a73082SReid Kleckner }
167726a73082SReid Kleckner 
167826a73082SReid Kleckner SDValue X86TargetLowering::LowerFormalArguments(
167926a73082SReid Kleckner     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
168026a73082SReid Kleckner     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
168126a73082SReid Kleckner     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
168226a73082SReid Kleckner   MachineFunction &MF = DAG.getMachineFunction();
168326a73082SReid Kleckner   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
168426a73082SReid Kleckner 
168526a73082SReid Kleckner   const Function &F = MF.getFunction();
168626a73082SReid Kleckner   if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
168726a73082SReid Kleckner       F.getName() == "main")
168826a73082SReid Kleckner     FuncInfo->setForceFramePointer(true);
168926a73082SReid Kleckner 
169026a73082SReid Kleckner   MachineFrameInfo &MFI = MF.getFrameInfo();
169126a73082SReid Kleckner   bool Is64Bit = Subtarget.is64Bit();
169226a73082SReid Kleckner   bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
169326a73082SReid Kleckner 
169426a73082SReid Kleckner   assert(
169526a73082SReid Kleckner       !(IsVarArg && canGuaranteeTCO(CallConv)) &&
169626a73082SReid Kleckner       "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
169726a73082SReid Kleckner 
169826a73082SReid Kleckner   // Assign locations to all of the incoming arguments.
169926a73082SReid Kleckner   SmallVector<CCValAssign, 16> ArgLocs;
170026a73082SReid Kleckner   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
170126a73082SReid Kleckner 
170226a73082SReid Kleckner   // Allocate shadow area for Win64.
170326a73082SReid Kleckner   if (IsWin64)
170426a73082SReid Kleckner     CCInfo.AllocateStack(32, Align(8));
170526a73082SReid Kleckner 
170626a73082SReid Kleckner   CCInfo.AnalyzeArguments(Ins, CC_X86);
170726a73082SReid Kleckner 
170826a73082SReid Kleckner   // In vectorcall calling convention a second pass is required for the HVA
170926a73082SReid Kleckner   // types.
171026a73082SReid Kleckner   if (CallingConv::X86_VectorCall == CallConv) {
171126a73082SReid Kleckner     CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
171226a73082SReid Kleckner   }
171326a73082SReid Kleckner 
171426a73082SReid Kleckner   // The next loop assumes that the locations are in the same order of the
171526a73082SReid Kleckner   // input arguments.
171626a73082SReid Kleckner   assert(isSortedByValueNo(ArgLocs) &&
171726a73082SReid Kleckner          "Argument Location list must be sorted before lowering");
171826a73082SReid Kleckner 
171926a73082SReid Kleckner   SDValue ArgValue;
172026a73082SReid Kleckner   for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
172126a73082SReid Kleckner        ++I, ++InsIndex) {
172226a73082SReid Kleckner     assert(InsIndex < Ins.size() && "Invalid Ins index");
172326a73082SReid Kleckner     CCValAssign &VA = ArgLocs[I];
172426a73082SReid Kleckner 
172526a73082SReid Kleckner     if (VA.isRegLoc()) {
172626a73082SReid Kleckner       EVT RegVT = VA.getLocVT();
172726a73082SReid Kleckner       if (VA.needsCustom()) {
172826a73082SReid Kleckner         assert(
172926a73082SReid Kleckner             VA.getValVT() == MVT::v64i1 &&
173026a73082SReid Kleckner             "Currently the only custom case is when we split v64i1 to 2 regs");
173126a73082SReid Kleckner 
173226a73082SReid Kleckner         // v64i1 values, in regcall calling convention, that are
173326a73082SReid Kleckner         // compiled to 32 bit arch, are split up into two registers.
173426a73082SReid Kleckner         ArgValue =
173526a73082SReid Kleckner             getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
173626a73082SReid Kleckner       } else {
173726a73082SReid Kleckner         const TargetRegisterClass *RC;
173826a73082SReid Kleckner         if (RegVT == MVT::i8)
173926a73082SReid Kleckner           RC = &X86::GR8RegClass;
174026a73082SReid Kleckner         else if (RegVT == MVT::i16)
174126a73082SReid Kleckner           RC = &X86::GR16RegClass;
174226a73082SReid Kleckner         else if (RegVT == MVT::i32)
174326a73082SReid Kleckner           RC = &X86::GR32RegClass;
174426a73082SReid Kleckner         else if (Is64Bit && RegVT == MVT::i64)
174526a73082SReid Kleckner           RC = &X86::GR64RegClass;
174626a73082SReid Kleckner         else if (RegVT == MVT::f16)
174726a73082SReid Kleckner           RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
174826a73082SReid Kleckner         else if (RegVT == MVT::f32)
174926a73082SReid Kleckner           RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
175026a73082SReid Kleckner         else if (RegVT == MVT::f64)
175126a73082SReid Kleckner           RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
175226a73082SReid Kleckner         else if (RegVT == MVT::f80)
175326a73082SReid Kleckner           RC = &X86::RFP80RegClass;
175426a73082SReid Kleckner         else if (RegVT == MVT::f128)
175526a73082SReid Kleckner           RC = &X86::VR128RegClass;
175626a73082SReid Kleckner         else if (RegVT.is512BitVector())
175726a73082SReid Kleckner           RC = &X86::VR512RegClass;
175826a73082SReid Kleckner         else if (RegVT.is256BitVector())
175926a73082SReid Kleckner           RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
176026a73082SReid Kleckner         else if (RegVT.is128BitVector())
176126a73082SReid Kleckner           RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
176226a73082SReid Kleckner         else if (RegVT == MVT::x86mmx)
176326a73082SReid Kleckner           RC = &X86::VR64RegClass;
176426a73082SReid Kleckner         else if (RegVT == MVT::v1i1)
176526a73082SReid Kleckner           RC = &X86::VK1RegClass;
176626a73082SReid Kleckner         else if (RegVT == MVT::v8i1)
176726a73082SReid Kleckner           RC = &X86::VK8RegClass;
176826a73082SReid Kleckner         else if (RegVT == MVT::v16i1)
176926a73082SReid Kleckner           RC = &X86::VK16RegClass;
177026a73082SReid Kleckner         else if (RegVT == MVT::v32i1)
177126a73082SReid Kleckner           RC = &X86::VK32RegClass;
177226a73082SReid Kleckner         else if (RegVT == MVT::v64i1)
177326a73082SReid Kleckner           RC = &X86::VK64RegClass;
177426a73082SReid Kleckner         else
177526a73082SReid Kleckner           llvm_unreachable("Unknown argument type!");
177626a73082SReid Kleckner 
177726a73082SReid Kleckner         Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
177826a73082SReid Kleckner         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
177926a73082SReid Kleckner       }
178026a73082SReid Kleckner 
178126a73082SReid Kleckner       // If this is an 8 or 16-bit value, it is really passed promoted to 32
178226a73082SReid Kleckner       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
178326a73082SReid Kleckner       // right size.
178426a73082SReid Kleckner       if (VA.getLocInfo() == CCValAssign::SExt)
178526a73082SReid Kleckner         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
178626a73082SReid Kleckner                                DAG.getValueType(VA.getValVT()));
178726a73082SReid Kleckner       else if (VA.getLocInfo() == CCValAssign::ZExt)
178826a73082SReid Kleckner         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
178926a73082SReid Kleckner                                DAG.getValueType(VA.getValVT()));
179026a73082SReid Kleckner       else if (VA.getLocInfo() == CCValAssign::BCvt)
179126a73082SReid Kleckner         ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
179226a73082SReid Kleckner 
179326a73082SReid Kleckner       if (VA.isExtInLoc()) {
179426a73082SReid Kleckner         // Handle MMX values passed in XMM regs.
179526a73082SReid Kleckner         if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
179626a73082SReid Kleckner           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
179726a73082SReid Kleckner         else if (VA.getValVT().isVector() &&
179826a73082SReid Kleckner                  VA.getValVT().getScalarType() == MVT::i1 &&
179926a73082SReid Kleckner                  ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
180026a73082SReid Kleckner                   (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
180126a73082SReid Kleckner           // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
180226a73082SReid Kleckner           ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
180326a73082SReid Kleckner         } else
180426a73082SReid Kleckner           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
180526a73082SReid Kleckner       }
180626a73082SReid Kleckner     } else {
180726a73082SReid Kleckner       assert(VA.isMemLoc());
180826a73082SReid Kleckner       ArgValue =
180926a73082SReid Kleckner           LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
181026a73082SReid Kleckner     }
181126a73082SReid Kleckner 
181226a73082SReid Kleckner     // If value is passed via pointer - do a load.
181326a73082SReid Kleckner     if (VA.getLocInfo() == CCValAssign::Indirect &&
181426a73082SReid Kleckner         !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
181526a73082SReid Kleckner       ArgValue =
181626a73082SReid Kleckner           DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
181726a73082SReid Kleckner     }
181826a73082SReid Kleckner 
181926a73082SReid Kleckner     InVals.push_back(ArgValue);
182026a73082SReid Kleckner   }
182126a73082SReid Kleckner 
182226a73082SReid Kleckner   for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
182326a73082SReid Kleckner     if (Ins[I].Flags.isSwiftAsync()) {
182426a73082SReid Kleckner       auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1825dd70aef0SAlex Lorenz       if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
182626a73082SReid Kleckner         X86FI->setHasSwiftAsyncContext(true);
182726a73082SReid Kleckner       else {
1828dd70aef0SAlex Lorenz         int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1829dd70aef0SAlex Lorenz         int FI =
1830dd70aef0SAlex Lorenz             MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
183126a73082SReid Kleckner         X86FI->setSwiftAsyncContextFrameIdx(FI);
1832dd70aef0SAlex Lorenz         SDValue St = DAG.getStore(
1833dd70aef0SAlex Lorenz             DAG.getEntryNode(), dl, InVals[I],
1834dd70aef0SAlex Lorenz             DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
183526a73082SReid Kleckner             MachinePointerInfo::getFixedStack(MF, FI));
183626a73082SReid Kleckner         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
183726a73082SReid Kleckner       }
183826a73082SReid Kleckner     }
183926a73082SReid Kleckner 
184026a73082SReid Kleckner     // Swift calling convention does not require we copy the sret argument
184126a73082SReid Kleckner     // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
184226a73082SReid Kleckner     if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
184326a73082SReid Kleckner       continue;
184426a73082SReid Kleckner 
184526a73082SReid Kleckner     // All x86 ABIs require that for returning structs by value we copy the
184626a73082SReid Kleckner     // sret argument into %rax/%eax (depending on ABI) for the return. Save
184726a73082SReid Kleckner     // the argument into a virtual register so that we can access it from the
184826a73082SReid Kleckner     // return points.
184926a73082SReid Kleckner     if (Ins[I].Flags.isSRet()) {
185026a73082SReid Kleckner       assert(!FuncInfo->getSRetReturnReg() &&
185126a73082SReid Kleckner              "SRet return has already been set");
185226a73082SReid Kleckner       MVT PtrTy = getPointerTy(DAG.getDataLayout());
185326a73082SReid Kleckner       Register Reg =
185426a73082SReid Kleckner           MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
185526a73082SReid Kleckner       FuncInfo->setSRetReturnReg(Reg);
185626a73082SReid Kleckner       SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
185726a73082SReid Kleckner       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
185826a73082SReid Kleckner       break;
185926a73082SReid Kleckner     }
186026a73082SReid Kleckner   }
186126a73082SReid Kleckner 
186226a73082SReid Kleckner   unsigned StackSize = CCInfo.getStackSize();
186326a73082SReid Kleckner   // Align stack specially for tail calls.
186426a73082SReid Kleckner   if (shouldGuaranteeTCO(CallConv,
186526a73082SReid Kleckner                          MF.getTarget().Options.GuaranteedTailCallOpt))
186626a73082SReid Kleckner     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
186726a73082SReid Kleckner 
186826a73082SReid Kleckner   if (IsVarArg)
186926a73082SReid Kleckner     VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
187026a73082SReid Kleckner         .lowerVarArgsParameters(Chain, StackSize);
187126a73082SReid Kleckner 
187226a73082SReid Kleckner   // Some CCs need callee pop.
187326a73082SReid Kleckner   if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
187426a73082SReid Kleckner                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
187526a73082SReid Kleckner     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
187626a73082SReid Kleckner   } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
187726a73082SReid Kleckner     // X86 interrupts must pop the error code (and the alignment padding) if
187826a73082SReid Kleckner     // present.
187926a73082SReid Kleckner     FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
188026a73082SReid Kleckner   } else {
188126a73082SReid Kleckner     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
188226a73082SReid Kleckner     // If this is an sret function, the return should pop the hidden pointer.
188326a73082SReid Kleckner     if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
188426a73082SReid Kleckner       FuncInfo->setBytesToPopOnReturn(4);
188526a73082SReid Kleckner   }
188626a73082SReid Kleckner 
188726a73082SReid Kleckner   if (!Is64Bit) {
188826a73082SReid Kleckner     // RegSaveFrameIndex is X86-64 only.
188926a73082SReid Kleckner     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
189026a73082SReid Kleckner   }
189126a73082SReid Kleckner 
189226a73082SReid Kleckner   FuncInfo->setArgumentStackSize(StackSize);
189326a73082SReid Kleckner 
189426a73082SReid Kleckner   if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
189526a73082SReid Kleckner     EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
189626a73082SReid Kleckner     if (Personality == EHPersonality::CoreCLR) {
189726a73082SReid Kleckner       assert(Is64Bit);
189826a73082SReid Kleckner       // TODO: Add a mechanism to frame lowering that will allow us to indicate
189926a73082SReid Kleckner       // that we'd prefer this slot be allocated towards the bottom of the frame
190026a73082SReid Kleckner       // (i.e. near the stack pointer after allocating the frame).  Every
190126a73082SReid Kleckner       // funclet needs a copy of this slot in its (mostly empty) frame, and the
190226a73082SReid Kleckner       // offset from the bottom of this and each funclet's frame must be the
190326a73082SReid Kleckner       // same, so the size of funclets' (mostly empty) frames is dictated by
190426a73082SReid Kleckner       // how far this slot is from the bottom (since they allocate just enough
190526a73082SReid Kleckner       // space to accommodate holding this slot at the correct offset).
190626a73082SReid Kleckner       int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
190726a73082SReid Kleckner       EHInfo->PSPSymFrameIdx = PSPSymFI;
190826a73082SReid Kleckner     }
190926a73082SReid Kleckner   }
191026a73082SReid Kleckner 
191126a73082SReid Kleckner   if (shouldDisableArgRegFromCSR(CallConv) ||
191226a73082SReid Kleckner       F.hasFnAttribute("no_caller_saved_registers")) {
191326a73082SReid Kleckner     MachineRegisterInfo &MRI = MF.getRegInfo();
1914c503758aSCraig Topper     for (std::pair<MCRegister, Register> Pair : MRI.liveins())
191526a73082SReid Kleckner       MRI.disableCalleeSavedRegister(Pair.first);
191626a73082SReid Kleckner   }
191726a73082SReid Kleckner 
1918c166a43cSweiguozhi   if (CallingConv::PreserveNone == CallConv)
1919c166a43cSweiguozhi     for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1920c166a43cSweiguozhi       if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||
1921c166a43cSweiguozhi           Ins[I].Flags.isSwiftError()) {
1922c166a43cSweiguozhi         errorUnsupported(DAG, dl,
1923c166a43cSweiguozhi                          "Swift attributes can't be used with preserve_none");
1924c166a43cSweiguozhi         break;
1925c166a43cSweiguozhi       }
1926c166a43cSweiguozhi     }
1927c166a43cSweiguozhi 
192826a73082SReid Kleckner   return Chain;
192926a73082SReid Kleckner }
193026a73082SReid Kleckner 
193126a73082SReid Kleckner SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
193226a73082SReid Kleckner                                             SDValue Arg, const SDLoc &dl,
193326a73082SReid Kleckner                                             SelectionDAG &DAG,
193426a73082SReid Kleckner                                             const CCValAssign &VA,
193526a73082SReid Kleckner                                             ISD::ArgFlagsTy Flags,
193626a73082SReid Kleckner                                             bool isByVal) const {
193726a73082SReid Kleckner   unsigned LocMemOffset = VA.getLocMemOffset();
193826a73082SReid Kleckner   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
193926a73082SReid Kleckner   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
194026a73082SReid Kleckner                        StackPtr, PtrOff);
194126a73082SReid Kleckner   if (isByVal)
194226a73082SReid Kleckner     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
194326a73082SReid Kleckner 
194426a73082SReid Kleckner   MaybeAlign Alignment;
194526a73082SReid Kleckner   if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
194626a73082SReid Kleckner       Arg.getSimpleValueType() != MVT::f80)
194726a73082SReid Kleckner     Alignment = MaybeAlign(4);
194826a73082SReid Kleckner   return DAG.getStore(
194926a73082SReid Kleckner       Chain, dl, Arg, PtrOff,
195026a73082SReid Kleckner       MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
195126a73082SReid Kleckner       Alignment);
195226a73082SReid Kleckner }
195326a73082SReid Kleckner 
195426a73082SReid Kleckner /// Emit a load of return address if tail call
195526a73082SReid Kleckner /// optimization is performed and it is required.
195626a73082SReid Kleckner SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
195726a73082SReid Kleckner     SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
195826a73082SReid Kleckner     bool Is64Bit, int FPDiff, const SDLoc &dl) const {
195926a73082SReid Kleckner   // Adjust the Return address stack slot.
196026a73082SReid Kleckner   EVT VT = getPointerTy(DAG.getDataLayout());
196126a73082SReid Kleckner   OutRetAddr = getReturnAddressFrameIndex(DAG);
196226a73082SReid Kleckner 
196326a73082SReid Kleckner   // Load the "old" Return address.
196426a73082SReid Kleckner   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
196526a73082SReid Kleckner   return SDValue(OutRetAddr.getNode(), 1);
196626a73082SReid Kleckner }
196726a73082SReid Kleckner 
196826a73082SReid Kleckner /// Emit a store of the return address if tail call
196926a73082SReid Kleckner /// optimization is performed and it is required (FPDiff!=0).
197026a73082SReid Kleckner static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
197126a73082SReid Kleckner                                         SDValue Chain, SDValue RetAddrFrIdx,
197226a73082SReid Kleckner                                         EVT PtrVT, unsigned SlotSize,
197326a73082SReid Kleckner                                         int FPDiff, const SDLoc &dl) {
197426a73082SReid Kleckner   // Store the return address to the appropriate stack slot.
197526a73082SReid Kleckner   if (!FPDiff) return Chain;
197626a73082SReid Kleckner   // Calculate the new stack slot for the return address.
197726a73082SReid Kleckner   int NewReturnAddrFI =
197826a73082SReid Kleckner     MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
197926a73082SReid Kleckner                                          false);
198026a73082SReid Kleckner   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
198126a73082SReid Kleckner   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
198226a73082SReid Kleckner                        MachinePointerInfo::getFixedStack(
198326a73082SReid Kleckner                            DAG.getMachineFunction(), NewReturnAddrFI));
198426a73082SReid Kleckner   return Chain;
198526a73082SReid Kleckner }
198626a73082SReid Kleckner 
198726a73082SReid Kleckner /// Returns a vector_shuffle mask for an movs{s|d}, movd
198826a73082SReid Kleckner /// operation of specified width.
198926a73082SReid Kleckner SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
199026a73082SReid Kleckner                                    SDValue V1, SDValue V2) const {
199126a73082SReid Kleckner   unsigned NumElems = VT.getVectorNumElements();
199226a73082SReid Kleckner   SmallVector<int, 8> Mask;
199326a73082SReid Kleckner   Mask.push_back(NumElems);
199426a73082SReid Kleckner   for (unsigned i = 1; i != NumElems; ++i)
199526a73082SReid Kleckner     Mask.push_back(i);
199626a73082SReid Kleckner   return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
199726a73082SReid Kleckner }
199826a73082SReid Kleckner 
199926a73082SReid Kleckner SDValue
200026a73082SReid Kleckner X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
200126a73082SReid Kleckner                              SmallVectorImpl<SDValue> &InVals) const {
200226a73082SReid Kleckner   SelectionDAG &DAG                     = CLI.DAG;
200326a73082SReid Kleckner   SDLoc &dl                             = CLI.DL;
200426a73082SReid Kleckner   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
200526a73082SReid Kleckner   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
200626a73082SReid Kleckner   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
200726a73082SReid Kleckner   SDValue Chain                         = CLI.Chain;
200826a73082SReid Kleckner   SDValue Callee                        = CLI.Callee;
200926a73082SReid Kleckner   CallingConv::ID CallConv              = CLI.CallConv;
201026a73082SReid Kleckner   bool &isTailCall                      = CLI.IsTailCall;
201126a73082SReid Kleckner   bool isVarArg                         = CLI.IsVarArg;
201226a73082SReid Kleckner   const auto *CB                        = CLI.CB;
201326a73082SReid Kleckner 
201426a73082SReid Kleckner   MachineFunction &MF = DAG.getMachineFunction();
201526a73082SReid Kleckner   bool Is64Bit        = Subtarget.is64Bit();
201626a73082SReid Kleckner   bool IsWin64        = Subtarget.isCallingConvWin64(CallConv);
201726a73082SReid Kleckner   bool IsSibcall      = false;
201826a73082SReid Kleckner   bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
201926a73082SReid Kleckner       CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
202026a73082SReid Kleckner   bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
202126a73082SReid Kleckner   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
202226a73082SReid Kleckner   bool HasNCSR = (CB && isa<CallInst>(CB) &&
202326a73082SReid Kleckner                   CB->hasFnAttr("no_caller_saved_registers"));
202426a73082SReid Kleckner   bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
202526a73082SReid Kleckner   bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
202626a73082SReid Kleckner   bool IsCFICall = IsIndirectCall && CLI.CFIType;
20270f0cfcffSMatt Arsenault   const Module *M = MF.getFunction().getParent();
202826a73082SReid Kleckner   Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
202926a73082SReid Kleckner 
203026a73082SReid Kleckner   MachineFunction::CallSiteInfo CSInfo;
203126a73082SReid Kleckner   if (CallConv == CallingConv::X86_INTR)
203226a73082SReid Kleckner     report_fatal_error("X86 interrupts may not be called directly");
203326a73082SReid Kleckner 
2034385faf9cSReid Kleckner   // Analyze operands of the call, assigning locations to each operand.
2035385faf9cSReid Kleckner   SmallVector<CCValAssign, 16> ArgLocs;
2036385faf9cSReid Kleckner   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2037385faf9cSReid Kleckner 
2038385faf9cSReid Kleckner   // Allocate shadow area for Win64.
2039385faf9cSReid Kleckner   if (IsWin64)
2040385faf9cSReid Kleckner     CCInfo.AllocateStack(32, Align(8));
2041385faf9cSReid Kleckner 
2042385faf9cSReid Kleckner   CCInfo.AnalyzeArguments(Outs, CC_X86);
2043385faf9cSReid Kleckner 
2044385faf9cSReid Kleckner   // In vectorcall calling convention a second pass is required for the HVA
2045385faf9cSReid Kleckner   // types.
2046385faf9cSReid Kleckner   if (CallingConv::X86_VectorCall == CallConv) {
2047385faf9cSReid Kleckner     CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2048385faf9cSReid Kleckner   }
2049385faf9cSReid Kleckner 
205026a73082SReid Kleckner   bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
205126a73082SReid Kleckner   if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
205226a73082SReid Kleckner     // If we are using a GOT, disable tail calls to external symbols with
205326a73082SReid Kleckner     // default visibility. Tail calling such a symbol requires using a GOT
205426a73082SReid Kleckner     // relocation, which forces early binding of the symbol. This breaks code
205526a73082SReid Kleckner     // that require lazy function symbol resolution. Using musttail or
205626a73082SReid Kleckner     // GuaranteedTailCallOpt will override this.
205726a73082SReid Kleckner     GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
205826a73082SReid Kleckner     if (!G || (!G->getGlobal()->hasLocalLinkage() &&
205926a73082SReid Kleckner                G->getGlobal()->hasDefaultVisibility()))
206026a73082SReid Kleckner       isTailCall = false;
206126a73082SReid Kleckner   }
206226a73082SReid Kleckner 
206326a73082SReid Kleckner   if (isTailCall && !IsMustTail) {
206426a73082SReid Kleckner     // Check if it's really possible to do a tail call.
2065385faf9cSReid Kleckner     isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2066385faf9cSReid Kleckner                                                    IsCalleePopSRet);
206726a73082SReid Kleckner 
206826a73082SReid Kleckner     // Sibcalls are automatically detected tailcalls which do not require
206926a73082SReid Kleckner     // ABI changes.
207026a73082SReid Kleckner     if (!IsGuaranteeTCO && isTailCall)
207126a73082SReid Kleckner       IsSibcall = true;
207226a73082SReid Kleckner 
207326a73082SReid Kleckner     if (isTailCall)
207426a73082SReid Kleckner       ++NumTailCalls;
207526a73082SReid Kleckner   }
207626a73082SReid Kleckner 
207726a73082SReid Kleckner   if (IsMustTail && !isTailCall)
207826a73082SReid Kleckner     report_fatal_error("failed to perform tail call elimination on a call "
207926a73082SReid Kleckner                        "site marked musttail");
208026a73082SReid Kleckner 
208126a73082SReid Kleckner   assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
208226a73082SReid Kleckner          "Var args not supported with calling convention fastcc, ghc or hipe");
208326a73082SReid Kleckner 
208426a73082SReid Kleckner   // Get a count of how many bytes are to be pushed on the stack.
208526a73082SReid Kleckner   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
208626a73082SReid Kleckner   if (IsSibcall)
208726a73082SReid Kleckner     // This is a sibcall. The memory operands are available in caller's
208826a73082SReid Kleckner     // own caller's stack.
208926a73082SReid Kleckner     NumBytes = 0;
209026a73082SReid Kleckner   else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
209126a73082SReid Kleckner     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
209226a73082SReid Kleckner 
209326a73082SReid Kleckner   int FPDiff = 0;
209426a73082SReid Kleckner   if (isTailCall &&
209526a73082SReid Kleckner       shouldGuaranteeTCO(CallConv,
209626a73082SReid Kleckner                          MF.getTarget().Options.GuaranteedTailCallOpt)) {
209726a73082SReid Kleckner     // Lower arguments at fp - stackoffset + fpdiff.
209826a73082SReid Kleckner     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
209926a73082SReid Kleckner 
210026a73082SReid Kleckner     FPDiff = NumBytesCallerPushed - NumBytes;
210126a73082SReid Kleckner 
210226a73082SReid Kleckner     // Set the delta of movement of the returnaddr stackslot.
210326a73082SReid Kleckner     // But only set if delta is greater than previous delta.
210426a73082SReid Kleckner     if (FPDiff < X86Info->getTCReturnAddrDelta())
210526a73082SReid Kleckner       X86Info->setTCReturnAddrDelta(FPDiff);
210626a73082SReid Kleckner   }
210726a73082SReid Kleckner 
210826a73082SReid Kleckner   unsigned NumBytesToPush = NumBytes;
210926a73082SReid Kleckner   unsigned NumBytesToPop = NumBytes;
211026a73082SReid Kleckner 
211126a73082SReid Kleckner   // If we have an inalloca argument, all stack space has already been allocated
211226a73082SReid Kleckner   // for us and be right at the top of the stack.  We don't support multiple
211326a73082SReid Kleckner   // arguments passed in memory when using inalloca.
211426a73082SReid Kleckner   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
211526a73082SReid Kleckner     NumBytesToPush = 0;
211626a73082SReid Kleckner     if (!ArgLocs.back().isMemLoc())
211726a73082SReid Kleckner       report_fatal_error("cannot use inalloca attribute on a register "
211826a73082SReid Kleckner                          "parameter");
211926a73082SReid Kleckner     if (ArgLocs.back().getLocMemOffset() != 0)
212026a73082SReid Kleckner       report_fatal_error("any parameter with the inalloca attribute must be "
212126a73082SReid Kleckner                          "the only memory argument");
212226a73082SReid Kleckner   } else if (CLI.IsPreallocated) {
212326a73082SReid Kleckner     assert(ArgLocs.back().isMemLoc() &&
212426a73082SReid Kleckner            "cannot use preallocated attribute on a register "
212526a73082SReid Kleckner            "parameter");
212626a73082SReid Kleckner     SmallVector<size_t, 4> PreallocatedOffsets;
212726a73082SReid Kleckner     for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
212826a73082SReid Kleckner       if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
212926a73082SReid Kleckner         PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
213026a73082SReid Kleckner       }
213126a73082SReid Kleckner     }
213226a73082SReid Kleckner     auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
213326a73082SReid Kleckner     size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
213426a73082SReid Kleckner     MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
213526a73082SReid Kleckner     MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
213626a73082SReid Kleckner     NumBytesToPush = 0;
213726a73082SReid Kleckner   }
213826a73082SReid Kleckner 
213926a73082SReid Kleckner   if (!IsSibcall && !IsMustTail)
214026a73082SReid Kleckner     Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
214126a73082SReid Kleckner                                  NumBytes - NumBytesToPush, dl);
214226a73082SReid Kleckner 
214326a73082SReid Kleckner   SDValue RetAddrFrIdx;
214426a73082SReid Kleckner   // Load return address for tail calls.
214526a73082SReid Kleckner   if (isTailCall && FPDiff)
214626a73082SReid Kleckner     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
214726a73082SReid Kleckner                                     Is64Bit, FPDiff, dl);
214826a73082SReid Kleckner 
214926a73082SReid Kleckner   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
215026a73082SReid Kleckner   SmallVector<SDValue, 8> MemOpChains;
215126a73082SReid Kleckner   SDValue StackPtr;
215226a73082SReid Kleckner 
215326a73082SReid Kleckner   // The next loop assumes that the locations are in the same order of the
215426a73082SReid Kleckner   // input arguments.
215526a73082SReid Kleckner   assert(isSortedByValueNo(ArgLocs) &&
215626a73082SReid Kleckner          "Argument Location list must be sorted before lowering");
215726a73082SReid Kleckner 
215826a73082SReid Kleckner   // Walk the register/memloc assignments, inserting copies/loads.  In the case
215926a73082SReid Kleckner   // of tail call optimization arguments are handle later.
216026a73082SReid Kleckner   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
216126a73082SReid Kleckner   for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
216226a73082SReid Kleckner        ++I, ++OutIndex) {
216326a73082SReid Kleckner     assert(OutIndex < Outs.size() && "Invalid Out index");
216426a73082SReid Kleckner     // Skip inalloca/preallocated arguments, they have already been written.
216526a73082SReid Kleckner     ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
216626a73082SReid Kleckner     if (Flags.isInAlloca() || Flags.isPreallocated())
216726a73082SReid Kleckner       continue;
216826a73082SReid Kleckner 
216926a73082SReid Kleckner     CCValAssign &VA = ArgLocs[I];
217026a73082SReid Kleckner     EVT RegVT = VA.getLocVT();
217126a73082SReid Kleckner     SDValue Arg = OutVals[OutIndex];
217226a73082SReid Kleckner     bool isByVal = Flags.isByVal();
217326a73082SReid Kleckner 
217426a73082SReid Kleckner     // Promote the value if needed.
217526a73082SReid Kleckner     switch (VA.getLocInfo()) {
217626a73082SReid Kleckner     default: llvm_unreachable("Unknown loc info!");
217726a73082SReid Kleckner     case CCValAssign::Full: break;
217826a73082SReid Kleckner     case CCValAssign::SExt:
217926a73082SReid Kleckner       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
218026a73082SReid Kleckner       break;
218126a73082SReid Kleckner     case CCValAssign::ZExt:
218226a73082SReid Kleckner       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
218326a73082SReid Kleckner       break;
218426a73082SReid Kleckner     case CCValAssign::AExt:
218526a73082SReid Kleckner       if (Arg.getValueType().isVector() &&
218626a73082SReid Kleckner           Arg.getValueType().getVectorElementType() == MVT::i1)
218726a73082SReid Kleckner         Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
218826a73082SReid Kleckner       else if (RegVT.is128BitVector()) {
218926a73082SReid Kleckner         // Special case: passing MMX values in XMM registers.
219026a73082SReid Kleckner         Arg = DAG.getBitcast(MVT::i64, Arg);
219126a73082SReid Kleckner         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
219226a73082SReid Kleckner         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
219326a73082SReid Kleckner       } else
219426a73082SReid Kleckner         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
219526a73082SReid Kleckner       break;
219626a73082SReid Kleckner     case CCValAssign::BCvt:
219726a73082SReid Kleckner       Arg = DAG.getBitcast(RegVT, Arg);
219826a73082SReid Kleckner       break;
219926a73082SReid Kleckner     case CCValAssign::Indirect: {
220026a73082SReid Kleckner       if (isByVal) {
220126a73082SReid Kleckner         // Memcpy the argument to a temporary stack slot to prevent
220226a73082SReid Kleckner         // the caller from seeing any modifications the callee may make
220326a73082SReid Kleckner         // as guaranteed by the `byval` attribute.
220426a73082SReid Kleckner         int FrameIdx = MF.getFrameInfo().CreateStackObject(
220526a73082SReid Kleckner             Flags.getByValSize(),
220626a73082SReid Kleckner             std::max(Align(16), Flags.getNonZeroByValAlign()), false);
220726a73082SReid Kleckner         SDValue StackSlot =
220826a73082SReid Kleckner             DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
220926a73082SReid Kleckner         Chain =
221026a73082SReid Kleckner             CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
221126a73082SReid Kleckner         // From now on treat this as a regular pointer
221226a73082SReid Kleckner         Arg = StackSlot;
221326a73082SReid Kleckner         isByVal = false;
221426a73082SReid Kleckner       } else {
221526a73082SReid Kleckner         // Store the argument.
221626a73082SReid Kleckner         SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
221726a73082SReid Kleckner         int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
221826a73082SReid Kleckner         Chain = DAG.getStore(
221926a73082SReid Kleckner             Chain, dl, Arg, SpillSlot,
222026a73082SReid Kleckner             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
222126a73082SReid Kleckner         Arg = SpillSlot;
222226a73082SReid Kleckner       }
222326a73082SReid Kleckner       break;
222426a73082SReid Kleckner     }
222526a73082SReid Kleckner     }
222626a73082SReid Kleckner 
222726a73082SReid Kleckner     if (VA.needsCustom()) {
222826a73082SReid Kleckner       assert(VA.getValVT() == MVT::v64i1 &&
222926a73082SReid Kleckner              "Currently the only custom case is when we split v64i1 to 2 regs");
223026a73082SReid Kleckner       // Split v64i1 value into two registers
223126a73082SReid Kleckner       Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
223226a73082SReid Kleckner     } else if (VA.isRegLoc()) {
223326a73082SReid Kleckner       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
223426a73082SReid Kleckner       const TargetOptions &Options = DAG.getTarget().Options;
223526a73082SReid Kleckner       if (Options.EmitCallSiteInfo)
2236212b1a84SPrabhuk         CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
223726a73082SReid Kleckner       if (isVarArg && IsWin64) {
223826a73082SReid Kleckner         // Win64 ABI requires argument XMM reg to be copied to the corresponding
223926a73082SReid Kleckner         // shadow reg if callee is a varargs function.
224026a73082SReid Kleckner         Register ShadowReg;
224126a73082SReid Kleckner         switch (VA.getLocReg()) {
224226a73082SReid Kleckner         case X86::XMM0: ShadowReg = X86::RCX; break;
224326a73082SReid Kleckner         case X86::XMM1: ShadowReg = X86::RDX; break;
224426a73082SReid Kleckner         case X86::XMM2: ShadowReg = X86::R8; break;
224526a73082SReid Kleckner         case X86::XMM3: ShadowReg = X86::R9; break;
224626a73082SReid Kleckner         }
224726a73082SReid Kleckner         if (ShadowReg)
224826a73082SReid Kleckner           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
224926a73082SReid Kleckner       }
225026a73082SReid Kleckner     } else if (!IsSibcall && (!isTailCall || isByVal)) {
225126a73082SReid Kleckner       assert(VA.isMemLoc());
225226a73082SReid Kleckner       if (!StackPtr.getNode())
225326a73082SReid Kleckner         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
225426a73082SReid Kleckner                                       getPointerTy(DAG.getDataLayout()));
225526a73082SReid Kleckner       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
225626a73082SReid Kleckner                                              dl, DAG, VA, Flags, isByVal));
225726a73082SReid Kleckner     }
225826a73082SReid Kleckner   }
225926a73082SReid Kleckner 
226026a73082SReid Kleckner   if (!MemOpChains.empty())
226126a73082SReid Kleckner     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
226226a73082SReid Kleckner 
226326a73082SReid Kleckner   if (Subtarget.isPICStyleGOT()) {
226426a73082SReid Kleckner     // ELF / PIC requires GOT in the EBX register before function calls via PLT
226526a73082SReid Kleckner     // GOT pointer (except regcall).
226626a73082SReid Kleckner     if (!isTailCall) {
226726a73082SReid Kleckner       // Indirect call with RegCall calling convertion may use up all the
226826a73082SReid Kleckner       // general registers, so it is not suitable to bind EBX reister for
226926a73082SReid Kleckner       // GOT address, just let register allocator handle it.
227026a73082SReid Kleckner       if (CallConv != CallingConv::X86_RegCall)
227126a73082SReid Kleckner         RegsToPass.push_back(std::make_pair(
227226a73082SReid Kleckner           Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
227326a73082SReid Kleckner                                           getPointerTy(DAG.getDataLayout()))));
227426a73082SReid Kleckner     } else {
227526a73082SReid Kleckner       // If we are tail calling and generating PIC/GOT style code load the
227626a73082SReid Kleckner       // address of the callee into ECX. The value in ecx is used as target of
227726a73082SReid Kleckner       // the tail jump. This is done to circumvent the ebx/callee-saved problem
227826a73082SReid Kleckner       // for tail calls on PIC/GOT architectures. Normally we would just put the
227926a73082SReid Kleckner       // address of GOT into ebx and then call target@PLT. But for tail calls
228026a73082SReid Kleckner       // ebx would be restored (since ebx is callee saved) before jumping to the
228126a73082SReid Kleckner       // target@PLT.
228226a73082SReid Kleckner 
228326a73082SReid Kleckner       // Note: The actual moving to ECX is done further down.
228426a73082SReid Kleckner       GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
228526a73082SReid Kleckner       if (G && !G->getGlobal()->hasLocalLinkage() &&
228626a73082SReid Kleckner           G->getGlobal()->hasDefaultVisibility())
228726a73082SReid Kleckner         Callee = LowerGlobalAddress(Callee, DAG);
228826a73082SReid Kleckner       else if (isa<ExternalSymbolSDNode>(Callee))
228926a73082SReid Kleckner         Callee = LowerExternalSymbol(Callee, DAG);
229026a73082SReid Kleckner     }
229126a73082SReid Kleckner   }
229226a73082SReid Kleckner 
229326a73082SReid Kleckner   if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
229426a73082SReid Kleckner       (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
229526a73082SReid Kleckner     // From AMD64 ABI document:
229626a73082SReid Kleckner     // For calls that may call functions that use varargs or stdargs
229726a73082SReid Kleckner     // (prototype-less calls or calls to functions containing ellipsis (...) in
229826a73082SReid Kleckner     // the declaration) %al is used as hidden argument to specify the number
229926a73082SReid Kleckner     // of SSE registers used. The contents of %al do not need to match exactly
230026a73082SReid Kleckner     // the number of registers, but must be an ubound on the number of SSE
230126a73082SReid Kleckner     // registers used and is in the range 0 - 8 inclusive.
230226a73082SReid Kleckner 
230326a73082SReid Kleckner     // Count the number of XMM registers allocated.
230426a73082SReid Kleckner     static const MCPhysReg XMMArgRegs[] = {
230526a73082SReid Kleckner       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
230626a73082SReid Kleckner       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
230726a73082SReid Kleckner     };
230826a73082SReid Kleckner     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
230926a73082SReid Kleckner     assert((Subtarget.hasSSE1() || !NumXMMRegs)
231026a73082SReid Kleckner            && "SSE registers cannot be used when SSE is disabled");
231126a73082SReid Kleckner     RegsToPass.push_back(std::make_pair(Register(X86::AL),
231226a73082SReid Kleckner                                         DAG.getConstant(NumXMMRegs, dl,
231326a73082SReid Kleckner                                                         MVT::i8)));
231426a73082SReid Kleckner   }
231526a73082SReid Kleckner 
231626a73082SReid Kleckner   if (isVarArg && IsMustTail) {
231726a73082SReid Kleckner     const auto &Forwards = X86Info->getForwardedMustTailRegParms();
231826a73082SReid Kleckner     for (const auto &F : Forwards) {
231926a73082SReid Kleckner       SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
232026a73082SReid Kleckner       RegsToPass.push_back(std::make_pair(F.PReg, Val));
232126a73082SReid Kleckner     }
232226a73082SReid Kleckner   }
232326a73082SReid Kleckner 
232426a73082SReid Kleckner   // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
232526a73082SReid Kleckner   // don't need this because the eligibility check rejects calls that require
232626a73082SReid Kleckner   // shuffling arguments passed in memory.
232726a73082SReid Kleckner   if (!IsSibcall && isTailCall) {
232826a73082SReid Kleckner     // Force all the incoming stack arguments to be loaded from the stack
232926a73082SReid Kleckner     // before any new outgoing arguments are stored to the stack, because the
233026a73082SReid Kleckner     // outgoing stack slots may alias the incoming argument stack slots, and
233126a73082SReid Kleckner     // the alias isn't otherwise explicit. This is slightly more conservative
233226a73082SReid Kleckner     // than necessary, because it means that each store effectively depends
233326a73082SReid Kleckner     // on every argument instead of just those arguments it would clobber.
233426a73082SReid Kleckner     SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
233526a73082SReid Kleckner 
233626a73082SReid Kleckner     SmallVector<SDValue, 8> MemOpChains2;
233726a73082SReid Kleckner     SDValue FIN;
233826a73082SReid Kleckner     int FI = 0;
233926a73082SReid Kleckner     for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
234026a73082SReid Kleckner          ++I, ++OutsIndex) {
234126a73082SReid Kleckner       CCValAssign &VA = ArgLocs[I];
234226a73082SReid Kleckner 
234326a73082SReid Kleckner       if (VA.isRegLoc()) {
234426a73082SReid Kleckner         if (VA.needsCustom()) {
234526a73082SReid Kleckner           assert((CallConv == CallingConv::X86_RegCall) &&
234626a73082SReid Kleckner                  "Expecting custom case only in regcall calling convention");
234726a73082SReid Kleckner           // This means that we are in special case where one argument was
234826a73082SReid Kleckner           // passed through two register locations - Skip the next location
234926a73082SReid Kleckner           ++I;
235026a73082SReid Kleckner         }
235126a73082SReid Kleckner 
235226a73082SReid Kleckner         continue;
235326a73082SReid Kleckner       }
235426a73082SReid Kleckner 
235526a73082SReid Kleckner       assert(VA.isMemLoc());
235626a73082SReid Kleckner       SDValue Arg = OutVals[OutsIndex];
235726a73082SReid Kleckner       ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
235826a73082SReid Kleckner       // Skip inalloca/preallocated arguments.  They don't require any work.
235926a73082SReid Kleckner       if (Flags.isInAlloca() || Flags.isPreallocated())
236026a73082SReid Kleckner         continue;
236126a73082SReid Kleckner       // Create frame index.
236226a73082SReid Kleckner       int32_t Offset = VA.getLocMemOffset()+FPDiff;
236326a73082SReid Kleckner       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
236426a73082SReid Kleckner       FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
236526a73082SReid Kleckner       FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
236626a73082SReid Kleckner 
236726a73082SReid Kleckner       if (Flags.isByVal()) {
236826a73082SReid Kleckner         // Copy relative to framepointer.
236926a73082SReid Kleckner         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
237026a73082SReid Kleckner         if (!StackPtr.getNode())
237126a73082SReid Kleckner           StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
237226a73082SReid Kleckner                                         getPointerTy(DAG.getDataLayout()));
237326a73082SReid Kleckner         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
237426a73082SReid Kleckner                              StackPtr, Source);
237526a73082SReid Kleckner 
237626a73082SReid Kleckner         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
237726a73082SReid Kleckner                                                          ArgChain,
237826a73082SReid Kleckner                                                          Flags, DAG, dl));
237926a73082SReid Kleckner       } else {
238026a73082SReid Kleckner         // Store relative to framepointer.
238126a73082SReid Kleckner         MemOpChains2.push_back(DAG.getStore(
238226a73082SReid Kleckner             ArgChain, dl, Arg, FIN,
238326a73082SReid Kleckner             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
238426a73082SReid Kleckner       }
238526a73082SReid Kleckner     }
238626a73082SReid Kleckner 
238726a73082SReid Kleckner     if (!MemOpChains2.empty())
238826a73082SReid Kleckner       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
238926a73082SReid Kleckner 
239026a73082SReid Kleckner     // Store the return address to the appropriate stack slot.
239126a73082SReid Kleckner     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
239226a73082SReid Kleckner                                      getPointerTy(DAG.getDataLayout()),
239326a73082SReid Kleckner                                      RegInfo->getSlotSize(), FPDiff, dl);
239426a73082SReid Kleckner   }
239526a73082SReid Kleckner 
239626a73082SReid Kleckner   // Build a sequence of copy-to-reg nodes chained together with token chain
239726a73082SReid Kleckner   // and glue operands which copy the outgoing args into registers.
239826a73082SReid Kleckner   SDValue InGlue;
239926a73082SReid Kleckner   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
240026a73082SReid Kleckner     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
240126a73082SReid Kleckner                              RegsToPass[i].second, InGlue);
240226a73082SReid Kleckner     InGlue = Chain.getValue(1);
240326a73082SReid Kleckner   }
240426a73082SReid Kleckner 
240526a73082SReid Kleckner   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
240626a73082SReid Kleckner     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
240726a73082SReid Kleckner     // In the 64-bit large code model, we have to make all calls
240826a73082SReid Kleckner     // through a register, since the call instruction's 32-bit
240926a73082SReid Kleckner     // pc-relative offset may not be large enough to hold the whole
241026a73082SReid Kleckner     // address.
241126a73082SReid Kleckner   } else if (Callee->getOpcode() == ISD::GlobalAddress ||
241226a73082SReid Kleckner              Callee->getOpcode() == ISD::ExternalSymbol) {
241326a73082SReid Kleckner     // Lower direct calls to global addresses and external symbols. Setting
241426a73082SReid Kleckner     // ForCall to true here has the effect of removing WrapperRIP when possible
241526a73082SReid Kleckner     // to allow direct calls to be selected without first materializing the
241626a73082SReid Kleckner     // address into a register.
241726a73082SReid Kleckner     Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
241826a73082SReid Kleckner   } else if (Subtarget.isTarget64BitILP32() &&
241926a73082SReid Kleckner              Callee.getValueType() == MVT::i32) {
242026a73082SReid Kleckner     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
242126a73082SReid Kleckner     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
242226a73082SReid Kleckner   }
242326a73082SReid Kleckner 
242426a73082SReid Kleckner   SmallVector<SDValue, 8> Ops;
242526a73082SReid Kleckner 
242626a73082SReid Kleckner   if (!IsSibcall && isTailCall && !IsMustTail) {
242726a73082SReid Kleckner     Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
242826a73082SReid Kleckner     InGlue = Chain.getValue(1);
242926a73082SReid Kleckner   }
243026a73082SReid Kleckner 
243126a73082SReid Kleckner   Ops.push_back(Chain);
243226a73082SReid Kleckner   Ops.push_back(Callee);
243326a73082SReid Kleckner 
243426a73082SReid Kleckner   if (isTailCall)
2435ce0cc8e9SCraig Topper     Ops.push_back(DAG.getSignedTargetConstant(FPDiff, dl, MVT::i32));
243626a73082SReid Kleckner 
243726a73082SReid Kleckner   // Add argument registers to the end of the list so that they are known live
243826a73082SReid Kleckner   // into the call.
243926a73082SReid Kleckner   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
244026a73082SReid Kleckner     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
244126a73082SReid Kleckner                                   RegsToPass[i].second.getValueType()));
244226a73082SReid Kleckner 
244326a73082SReid Kleckner   // Add a register mask operand representing the call-preserved registers.
244426a73082SReid Kleckner   const uint32_t *Mask = [&]() {
244526a73082SReid Kleckner     auto AdaptedCC = CallConv;
244626a73082SReid Kleckner     // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
244726a73082SReid Kleckner     // use X86_INTR calling convention because it has the same CSR mask
244826a73082SReid Kleckner     // (same preserved registers).
244926a73082SReid Kleckner     if (HasNCSR)
245026a73082SReid Kleckner       AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
245126a73082SReid Kleckner     // If NoCalleeSavedRegisters is requested, than use GHC since it happens
245226a73082SReid Kleckner     // to use the CSR_NoRegs_RegMask.
245326a73082SReid Kleckner     if (CB && CB->hasFnAttr("no_callee_saved_registers"))
245426a73082SReid Kleckner       AdaptedCC = (CallingConv::ID)CallingConv::GHC;
245526a73082SReid Kleckner     return RegInfo->getCallPreservedMask(MF, AdaptedCC);
245626a73082SReid Kleckner   }();
245726a73082SReid Kleckner   assert(Mask && "Missing call preserved mask for calling convention");
245826a73082SReid Kleckner 
24592c12c1e7Sweiguozhi   if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFramePtr())) {
24607e5fe697Sweiguozhi     X86Info->setFPClobberedByCall(true);
24612c12c1e7Sweiguozhi     if (CLI.CB && isa<InvokeInst>(CLI.CB))
24622c12c1e7Sweiguozhi       X86Info->setFPClobberedByInvoke(true);
24632c12c1e7Sweiguozhi   }
24642c12c1e7Sweiguozhi   if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister())) {
24657e5fe697Sweiguozhi     X86Info->setBPClobberedByCall(true);
24662c12c1e7Sweiguozhi     if (CLI.CB && isa<InvokeInst>(CLI.CB))
24672c12c1e7Sweiguozhi       X86Info->setBPClobberedByInvoke(true);
24682c12c1e7Sweiguozhi   }
24697e5fe697Sweiguozhi 
247026a73082SReid Kleckner   // If this is an invoke in a 32-bit function using a funclet-based
247126a73082SReid Kleckner   // personality, assume the function clobbers all registers. If an exception
247226a73082SReid Kleckner   // is thrown, the runtime will not restore CSRs.
247326a73082SReid Kleckner   // FIXME: Model this more precisely so that we can register allocate across
247426a73082SReid Kleckner   // the normal edge and spill and fill across the exceptional edge.
247526a73082SReid Kleckner   if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
247626a73082SReid Kleckner     const Function &CallerFn = MF.getFunction();
247726a73082SReid Kleckner     EHPersonality Pers =
247826a73082SReid Kleckner         CallerFn.hasPersonalityFn()
247926a73082SReid Kleckner             ? classifyEHPersonality(CallerFn.getPersonalityFn())
248026a73082SReid Kleckner             : EHPersonality::Unknown;
248126a73082SReid Kleckner     if (isFuncletEHPersonality(Pers))
248226a73082SReid Kleckner       Mask = RegInfo->getNoPreservedMask();
248326a73082SReid Kleckner   }
248426a73082SReid Kleckner 
248526a73082SReid Kleckner   // Define a new register mask from the existing mask.
248626a73082SReid Kleckner   uint32_t *RegMask = nullptr;
248726a73082SReid Kleckner 
248826a73082SReid Kleckner   // In some calling conventions we need to remove the used physical registers
248926a73082SReid Kleckner   // from the reg mask. Create a new RegMask for such calling conventions.
249026a73082SReid Kleckner   // RegMask for calling conventions that disable only return registers (e.g.
249126a73082SReid Kleckner   // preserve_most) will be modified later in LowerCallResult.
249226a73082SReid Kleckner   bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
249326a73082SReid Kleckner   if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
249426a73082SReid Kleckner     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
249526a73082SReid Kleckner 
249626a73082SReid Kleckner     // Allocate a new Reg Mask and copy Mask.
249726a73082SReid Kleckner     RegMask = MF.allocateRegMask();
249826a73082SReid Kleckner     unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
249926a73082SReid Kleckner     memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
250026a73082SReid Kleckner 
250126a73082SReid Kleckner     // Make sure all sub registers of the argument registers are reset
250226a73082SReid Kleckner     // in the RegMask.
250326a73082SReid Kleckner     if (ShouldDisableArgRegs) {
250426a73082SReid Kleckner       for (auto const &RegPair : RegsToPass)
250526a73082SReid Kleckner         for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
250626a73082SReid Kleckner           RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
250726a73082SReid Kleckner     }
250826a73082SReid Kleckner 
250926a73082SReid Kleckner     // Create the RegMask Operand according to our updated mask.
251026a73082SReid Kleckner     Ops.push_back(DAG.getRegisterMask(RegMask));
251126a73082SReid Kleckner   } else {
251226a73082SReid Kleckner     // Create the RegMask Operand according to the static mask.
251326a73082SReid Kleckner     Ops.push_back(DAG.getRegisterMask(Mask));
251426a73082SReid Kleckner   }
251526a73082SReid Kleckner 
251626a73082SReid Kleckner   if (InGlue.getNode())
251726a73082SReid Kleckner     Ops.push_back(InGlue);
251826a73082SReid Kleckner 
251926a73082SReid Kleckner   if (isTailCall) {
252026a73082SReid Kleckner     // We used to do:
252126a73082SReid Kleckner     //// If this is the first return lowered for this function, add the regs
252226a73082SReid Kleckner     //// to the liveout set for the function.
252326a73082SReid Kleckner     // This isn't right, although it's probably harmless on x86; liveouts
252426a73082SReid Kleckner     // should be computed from returns not tail calls.  Consider a void
252526a73082SReid Kleckner     // function making a tail call to a function returning int.
252626a73082SReid Kleckner     MF.getFrameInfo().setHasTailCall();
2527baf59be8SSergei Barannikov     SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, Ops);
252826a73082SReid Kleckner 
252926a73082SReid Kleckner     if (IsCFICall)
253026a73082SReid Kleckner       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
253126a73082SReid Kleckner 
253226a73082SReid Kleckner     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
253326a73082SReid Kleckner     DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
253426a73082SReid Kleckner     return Ret;
253526a73082SReid Kleckner   }
253626a73082SReid Kleckner 
2537baf59be8SSergei Barannikov   // Returns a chain & a glue for retval copy to use.
2538baf59be8SSergei Barannikov   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
253926a73082SReid Kleckner   if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
254026a73082SReid Kleckner     Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
254126a73082SReid Kleckner   } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
254226a73082SReid Kleckner     // Calls with a "clang.arc.attachedcall" bundle are special. They should be
254326a73082SReid Kleckner     // expanded to the call, directly followed by a special marker sequence and
254426a73082SReid Kleckner     // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
254526a73082SReid Kleckner     assert(!isTailCall &&
254626a73082SReid Kleckner            "tail calls cannot be marked with clang.arc.attachedcall");
254726a73082SReid Kleckner     assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
254826a73082SReid Kleckner 
254926a73082SReid Kleckner     // Add a target global address for the retainRV/claimRV runtime function
255026a73082SReid Kleckner     // just before the call target.
255126a73082SReid Kleckner     Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
255226a73082SReid Kleckner     auto PtrVT = getPointerTy(DAG.getDataLayout());
255326a73082SReid Kleckner     auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
255426a73082SReid Kleckner     Ops.insert(Ops.begin() + 1, GA);
255526a73082SReid Kleckner     Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
255626a73082SReid Kleckner   } else {
255726a73082SReid Kleckner     Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
255826a73082SReid Kleckner   }
255926a73082SReid Kleckner 
256026a73082SReid Kleckner   if (IsCFICall)
256126a73082SReid Kleckner     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
256226a73082SReid Kleckner 
256326a73082SReid Kleckner   InGlue = Chain.getValue(1);
256426a73082SReid Kleckner   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
256526a73082SReid Kleckner   DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
256626a73082SReid Kleckner 
256726a73082SReid Kleckner   // Save heapallocsite metadata.
256826a73082SReid Kleckner   if (CLI.CB)
256926a73082SReid Kleckner     if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
257026a73082SReid Kleckner       DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
257126a73082SReid Kleckner 
257226a73082SReid Kleckner   // Create the CALLSEQ_END node.
257326a73082SReid Kleckner   unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
257426a73082SReid Kleckner   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
257526a73082SReid Kleckner                        DAG.getTarget().Options.GuaranteedTailCallOpt))
257626a73082SReid Kleckner     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
257726a73082SReid Kleckner   else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
257826a73082SReid Kleckner     // If this call passes a struct-return pointer, the callee
257926a73082SReid Kleckner     // pops that struct pointer.
258026a73082SReid Kleckner     NumBytesForCalleeToPop = 4;
258126a73082SReid Kleckner 
258226a73082SReid Kleckner   // Returns a glue for retval copy to use.
258326a73082SReid Kleckner   if (!IsSibcall) {
258426a73082SReid Kleckner     Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
258526a73082SReid Kleckner                                InGlue, dl);
258626a73082SReid Kleckner     InGlue = Chain.getValue(1);
258726a73082SReid Kleckner   }
258826a73082SReid Kleckner 
2589c166a43cSweiguozhi   if (CallingConv::PreserveNone == CallConv)
2590c166a43cSweiguozhi     for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
2591c166a43cSweiguozhi       if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||
2592c166a43cSweiguozhi           Outs[I].Flags.isSwiftError()) {
2593c166a43cSweiguozhi         errorUnsupported(DAG, dl,
2594c166a43cSweiguozhi                          "Swift attributes can't be used with preserve_none");
2595c166a43cSweiguozhi         break;
2596c166a43cSweiguozhi       }
2597c166a43cSweiguozhi     }
2598c166a43cSweiguozhi 
259926a73082SReid Kleckner   // Handle result values, copying them out of physregs into vregs that we
260026a73082SReid Kleckner   // return.
260126a73082SReid Kleckner   return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
260226a73082SReid Kleckner                          InVals, RegMask);
260326a73082SReid Kleckner }
260426a73082SReid Kleckner 
260526a73082SReid Kleckner //===----------------------------------------------------------------------===//
260626a73082SReid Kleckner //                Fast Calling Convention (tail call) implementation
260726a73082SReid Kleckner //===----------------------------------------------------------------------===//
260826a73082SReid Kleckner 
260926a73082SReid Kleckner //  Like std call, callee cleans arguments, convention except that ECX is
261026a73082SReid Kleckner //  reserved for storing the tail called function address. Only 2 registers are
261126a73082SReid Kleckner //  free for argument passing (inreg). Tail call optimization is performed
261226a73082SReid Kleckner //  provided:
261326a73082SReid Kleckner //                * tailcallopt is enabled
261426a73082SReid Kleckner //                * caller/callee are fastcc
261526a73082SReid Kleckner //  On X86_64 architecture with GOT-style position independent code only local
261626a73082SReid Kleckner //  (within module) calls are supported at the moment.
261726a73082SReid Kleckner //  To keep the stack aligned according to platform abi the function
261826a73082SReid Kleckner //  GetAlignedArgumentStackSize ensures that argument delta is always multiples
261926a73082SReid Kleckner //  of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
262026a73082SReid Kleckner //  If a tail called function callee has more arguments than the caller the
262126a73082SReid Kleckner //  caller needs to make sure that there is room to move the RETADDR to. This is
262226a73082SReid Kleckner //  achieved by reserving an area the size of the argument delta right after the
262326a73082SReid Kleckner //  original RETADDR, but before the saved framepointer or the spilled registers
262426a73082SReid Kleckner //  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
262526a73082SReid Kleckner //  stack layout:
262626a73082SReid Kleckner //    arg1
262726a73082SReid Kleckner //    arg2
262826a73082SReid Kleckner //    RETADDR
262926a73082SReid Kleckner //    [ new RETADDR
263026a73082SReid Kleckner //      move area ]
263126a73082SReid Kleckner //    (possible EBP)
263226a73082SReid Kleckner //    ESI
263326a73082SReid Kleckner //    EDI
263426a73082SReid Kleckner //    local1 ..
263526a73082SReid Kleckner 
263626a73082SReid Kleckner /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
263726a73082SReid Kleckner /// requirement.
263826a73082SReid Kleckner unsigned
263926a73082SReid Kleckner X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
264026a73082SReid Kleckner                                                SelectionDAG &DAG) const {
264126a73082SReid Kleckner   const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
264226a73082SReid Kleckner   const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
264326a73082SReid Kleckner   assert(StackSize % SlotSize == 0 &&
264426a73082SReid Kleckner          "StackSize must be a multiple of SlotSize");
264526a73082SReid Kleckner   return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
264626a73082SReid Kleckner }
264726a73082SReid Kleckner 
264826a73082SReid Kleckner /// Return true if the given stack call argument is already available in the
264926a73082SReid Kleckner /// same position (relatively) of the caller's incoming argument stack.
265026a73082SReid Kleckner static
265126a73082SReid Kleckner bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
265226a73082SReid Kleckner                          MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
265326a73082SReid Kleckner                          const X86InstrInfo *TII, const CCValAssign &VA) {
265426a73082SReid Kleckner   unsigned Bytes = Arg.getValueSizeInBits() / 8;
265526a73082SReid Kleckner 
265626a73082SReid Kleckner   for (;;) {
265726a73082SReid Kleckner     // Look through nodes that don't alter the bits of the incoming value.
265826a73082SReid Kleckner     unsigned Op = Arg.getOpcode();
2659b61b2426SSimon Pilgrim     if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2660b61b2426SSimon Pilgrim         Op == ISD::AssertZext) {
266126a73082SReid Kleckner       Arg = Arg.getOperand(0);
266226a73082SReid Kleckner       continue;
266326a73082SReid Kleckner     }
266426a73082SReid Kleckner     if (Op == ISD::TRUNCATE) {
266526a73082SReid Kleckner       const SDValue &TruncInput = Arg.getOperand(0);
266626a73082SReid Kleckner       if (TruncInput.getOpcode() == ISD::AssertZext &&
266726a73082SReid Kleckner           cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
266826a73082SReid Kleckner               Arg.getValueType()) {
266926a73082SReid Kleckner         Arg = TruncInput.getOperand(0);
267026a73082SReid Kleckner         continue;
267126a73082SReid Kleckner       }
267226a73082SReid Kleckner     }
267326a73082SReid Kleckner     break;
267426a73082SReid Kleckner   }
267526a73082SReid Kleckner 
267626a73082SReid Kleckner   int FI = INT_MAX;
267726a73082SReid Kleckner   if (Arg.getOpcode() == ISD::CopyFromReg) {
267826a73082SReid Kleckner     Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
267926a73082SReid Kleckner     if (!VR.isVirtual())
268026a73082SReid Kleckner       return false;
268126a73082SReid Kleckner     MachineInstr *Def = MRI->getVRegDef(VR);
268226a73082SReid Kleckner     if (!Def)
268326a73082SReid Kleckner       return false;
268426a73082SReid Kleckner     if (!Flags.isByVal()) {
268526a73082SReid Kleckner       if (!TII->isLoadFromStackSlot(*Def, FI))
268626a73082SReid Kleckner         return false;
268726a73082SReid Kleckner     } else {
268826a73082SReid Kleckner       unsigned Opcode = Def->getOpcode();
268926a73082SReid Kleckner       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
269026a73082SReid Kleckner            Opcode == X86::LEA64_32r) &&
269126a73082SReid Kleckner           Def->getOperand(1).isFI()) {
269226a73082SReid Kleckner         FI = Def->getOperand(1).getIndex();
269326a73082SReid Kleckner         Bytes = Flags.getByValSize();
269426a73082SReid Kleckner       } else
269526a73082SReid Kleckner         return false;
269626a73082SReid Kleckner     }
269726a73082SReid Kleckner   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
269826a73082SReid Kleckner     if (Flags.isByVal())
269926a73082SReid Kleckner       // ByVal argument is passed in as a pointer but it's now being
270026a73082SReid Kleckner       // dereferenced. e.g.
270126a73082SReid Kleckner       // define @foo(%struct.X* %A) {
270226a73082SReid Kleckner       //   tail call @bar(%struct.X* byval %A)
270326a73082SReid Kleckner       // }
270426a73082SReid Kleckner       return false;
270526a73082SReid Kleckner     SDValue Ptr = Ld->getBasePtr();
270626a73082SReid Kleckner     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
270726a73082SReid Kleckner     if (!FINode)
270826a73082SReid Kleckner       return false;
270926a73082SReid Kleckner     FI = FINode->getIndex();
271026a73082SReid Kleckner   } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
271126a73082SReid Kleckner     FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
271226a73082SReid Kleckner     FI = FINode->getIndex();
271326a73082SReid Kleckner     Bytes = Flags.getByValSize();
271426a73082SReid Kleckner   } else
271526a73082SReid Kleckner     return false;
271626a73082SReid Kleckner 
271726a73082SReid Kleckner   assert(FI != INT_MAX);
271826a73082SReid Kleckner   if (!MFI.isFixedObjectIndex(FI))
271926a73082SReid Kleckner     return false;
272026a73082SReid Kleckner 
272126a73082SReid Kleckner   if (Offset != MFI.getObjectOffset(FI))
272226a73082SReid Kleckner     return false;
272326a73082SReid Kleckner 
272426a73082SReid Kleckner   // If this is not byval, check that the argument stack object is immutable.
272526a73082SReid Kleckner   // inalloca and argument copy elision can create mutable argument stack
272626a73082SReid Kleckner   // objects. Byval objects can be mutated, but a byval call intends to pass the
272726a73082SReid Kleckner   // mutated memory.
272826a73082SReid Kleckner   if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
272926a73082SReid Kleckner     return false;
273026a73082SReid Kleckner 
273126a73082SReid Kleckner   if (VA.getLocVT().getFixedSizeInBits() >
273226a73082SReid Kleckner       Arg.getValueSizeInBits().getFixedValue()) {
273326a73082SReid Kleckner     // If the argument location is wider than the argument type, check that any
273426a73082SReid Kleckner     // extension flags match.
273526a73082SReid Kleckner     if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
273626a73082SReid Kleckner         Flags.isSExt() != MFI.isObjectSExt(FI)) {
273726a73082SReid Kleckner       return false;
273826a73082SReid Kleckner     }
273926a73082SReid Kleckner   }
274026a73082SReid Kleckner 
274126a73082SReid Kleckner   return Bytes == MFI.getObjectSize(FI);
274226a73082SReid Kleckner }
274326a73082SReid Kleckner 
274426a73082SReid Kleckner /// Check whether the call is eligible for tail call optimization. Targets
274526a73082SReid Kleckner /// that want to do tail call optimization should implement this function.
2746385faf9cSReid Kleckner /// Note that the x86 backend does not check musttail calls for eligibility! The
2747385faf9cSReid Kleckner /// rest of x86 tail call lowering must be prepared to forward arguments of any
2748385faf9cSReid Kleckner /// type.
274926a73082SReid Kleckner bool X86TargetLowering::IsEligibleForTailCallOptimization(
2750385faf9cSReid Kleckner     TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
2751385faf9cSReid Kleckner     SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2752385faf9cSReid Kleckner   SelectionDAG &DAG = CLI.DAG;
2753385faf9cSReid Kleckner   const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2754385faf9cSReid Kleckner   const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2755385faf9cSReid Kleckner   const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2756385faf9cSReid Kleckner   SDValue Callee = CLI.Callee;
2757385faf9cSReid Kleckner   CallingConv::ID CalleeCC = CLI.CallConv;
2758385faf9cSReid Kleckner   bool isVarArg = CLI.IsVarArg;
2759385faf9cSReid Kleckner 
276026a73082SReid Kleckner   if (!mayTailCallThisCC(CalleeCC))
276126a73082SReid Kleckner     return false;
276226a73082SReid Kleckner 
276326a73082SReid Kleckner   // If -tailcallopt is specified, make fastcc functions tail-callable.
276426a73082SReid Kleckner   MachineFunction &MF = DAG.getMachineFunction();
276526a73082SReid Kleckner   const Function &CallerF = MF.getFunction();
276626a73082SReid Kleckner 
276726a73082SReid Kleckner   // If the function return type is x86_fp80 and the callee return type is not,
276826a73082SReid Kleckner   // then the FP_EXTEND of the call result is not a nop. It's not safe to
276926a73082SReid Kleckner   // perform a tailcall optimization here.
2770385faf9cSReid Kleckner   if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
277126a73082SReid Kleckner     return false;
277226a73082SReid Kleckner 
277326a73082SReid Kleckner   CallingConv::ID CallerCC = CallerF.getCallingConv();
277426a73082SReid Kleckner   bool CCMatch = CallerCC == CalleeCC;
277526a73082SReid Kleckner   bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
277626a73082SReid Kleckner   bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
277726a73082SReid Kleckner   bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
277826a73082SReid Kleckner       CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
277926a73082SReid Kleckner 
278026a73082SReid Kleckner   // Win64 functions have extra shadow space for argument homing. Don't do the
278126a73082SReid Kleckner   // sibcall if the caller and callee have mismatched expectations for this
278226a73082SReid Kleckner   // space.
278326a73082SReid Kleckner   if (IsCalleeWin64 != IsCallerWin64)
278426a73082SReid Kleckner     return false;
278526a73082SReid Kleckner 
278626a73082SReid Kleckner   if (IsGuaranteeTCO) {
278726a73082SReid Kleckner     if (canGuaranteeTCO(CalleeCC) && CCMatch)
278826a73082SReid Kleckner       return true;
278926a73082SReid Kleckner     return false;
279026a73082SReid Kleckner   }
279126a73082SReid Kleckner 
279226a73082SReid Kleckner   // Look for obvious safe cases to perform tail call optimization that do not
279326a73082SReid Kleckner   // require ABI changes. This is what gcc calls sibcall.
279426a73082SReid Kleckner 
279526a73082SReid Kleckner   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
279626a73082SReid Kleckner   // emit a special epilogue.
279726a73082SReid Kleckner   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
279826a73082SReid Kleckner   if (RegInfo->hasStackRealignment(MF))
279926a73082SReid Kleckner     return false;
280026a73082SReid Kleckner 
280126a73082SReid Kleckner   // Also avoid sibcall optimization if we're an sret return fn and the callee
280226a73082SReid Kleckner   // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
280326a73082SReid Kleckner   // insufficient.
280426a73082SReid Kleckner   if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
280526a73082SReid Kleckner     // For a compatible tail call the callee must return our sret pointer. So it
280626a73082SReid Kleckner     // needs to be (a) an sret function itself and (b) we pass our sret as its
280726a73082SReid Kleckner     // sret. Condition #b is harder to determine.
280826a73082SReid Kleckner     return false;
280926a73082SReid Kleckner   } else if (IsCalleePopSRet)
281026a73082SReid Kleckner     // The callee pops an sret, so we cannot tail-call, as our caller doesn't
281126a73082SReid Kleckner     // expect that.
281226a73082SReid Kleckner     return false;
281326a73082SReid Kleckner 
281426a73082SReid Kleckner   // Do not sibcall optimize vararg calls unless all arguments are passed via
281526a73082SReid Kleckner   // registers.
281626a73082SReid Kleckner   LLVMContext &C = *DAG.getContext();
281726a73082SReid Kleckner   if (isVarArg && !Outs.empty()) {
281826a73082SReid Kleckner     // Optimizing for varargs on Win64 is unlikely to be safe without
281926a73082SReid Kleckner     // additional testing.
282026a73082SReid Kleckner     if (IsCalleeWin64 || IsCallerWin64)
282126a73082SReid Kleckner       return false;
282226a73082SReid Kleckner 
282326a73082SReid Kleckner     for (const auto &VA : ArgLocs)
282426a73082SReid Kleckner       if (!VA.isRegLoc())
282526a73082SReid Kleckner         return false;
282626a73082SReid Kleckner   }
282726a73082SReid Kleckner 
282826a73082SReid Kleckner   // If the call result is in ST0 / ST1, it needs to be popped off the x87
282926a73082SReid Kleckner   // stack.  Therefore, if it's not used by the call it is not safe to optimize
283026a73082SReid Kleckner   // this into a sibcall.
283126a73082SReid Kleckner   bool Unused = false;
283226a73082SReid Kleckner   for (const auto &In : Ins) {
283326a73082SReid Kleckner     if (!In.Used) {
283426a73082SReid Kleckner       Unused = true;
283526a73082SReid Kleckner       break;
283626a73082SReid Kleckner     }
283726a73082SReid Kleckner   }
283826a73082SReid Kleckner   if (Unused) {
283926a73082SReid Kleckner     SmallVector<CCValAssign, 16> RVLocs;
2840385faf9cSReid Kleckner     CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2841385faf9cSReid Kleckner     RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
284226a73082SReid Kleckner     for (const auto &VA : RVLocs) {
284326a73082SReid Kleckner       if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
284426a73082SReid Kleckner         return false;
284526a73082SReid Kleckner     }
284626a73082SReid Kleckner   }
284726a73082SReid Kleckner 
284826a73082SReid Kleckner   // Check that the call results are passed in the same way.
284926a73082SReid Kleckner   if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
285026a73082SReid Kleckner                                   RetCC_X86, RetCC_X86))
285126a73082SReid Kleckner     return false;
285226a73082SReid Kleckner   // The callee has to preserve all registers the caller needs to preserve.
285326a73082SReid Kleckner   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
285426a73082SReid Kleckner   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
285526a73082SReid Kleckner   if (!CCMatch) {
285626a73082SReid Kleckner     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
285726a73082SReid Kleckner     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
285826a73082SReid Kleckner       return false;
285926a73082SReid Kleckner   }
286026a73082SReid Kleckner 
2861d3a8363bSAntonio Frighetto   // The stack frame of the caller cannot be replaced by the tail-callee one's
2862d3a8363bSAntonio Frighetto   // if the function is required to preserve all the registers. Conservatively
2863d3a8363bSAntonio Frighetto   // prevent tail optimization even if hypothetically all the registers are used
2864d3a8363bSAntonio Frighetto   // for passing formal parameters or returning values.
2865d3a8363bSAntonio Frighetto   if (CallerF.hasFnAttribute("no_caller_saved_registers"))
2866d3a8363bSAntonio Frighetto     return false;
2867d3a8363bSAntonio Frighetto 
2868385faf9cSReid Kleckner   unsigned StackArgsSize = CCInfo.getStackSize();
286926a73082SReid Kleckner 
287026a73082SReid Kleckner   // If the callee takes no arguments then go on to check the results of the
287126a73082SReid Kleckner   // call.
287226a73082SReid Kleckner   if (!Outs.empty()) {
2873385faf9cSReid Kleckner     if (StackArgsSize > 0) {
287426a73082SReid Kleckner       // Check if the arguments are already laid out in the right way as
287526a73082SReid Kleckner       // the caller's fixed stack objects.
287626a73082SReid Kleckner       MachineFrameInfo &MFI = MF.getFrameInfo();
287726a73082SReid Kleckner       const MachineRegisterInfo *MRI = &MF.getRegInfo();
287826a73082SReid Kleckner       const X86InstrInfo *TII = Subtarget.getInstrInfo();
287926a73082SReid Kleckner       for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
288026a73082SReid Kleckner         const CCValAssign &VA = ArgLocs[I];
288126a73082SReid Kleckner         SDValue Arg = OutVals[I];
288226a73082SReid Kleckner         ISD::ArgFlagsTy Flags = Outs[I].Flags;
288326a73082SReid Kleckner         if (VA.getLocInfo() == CCValAssign::Indirect)
288426a73082SReid Kleckner           return false;
288526a73082SReid Kleckner         if (!VA.isRegLoc()) {
288626a73082SReid Kleckner           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
288726a73082SReid Kleckner                                    TII, VA))
288826a73082SReid Kleckner             return false;
288926a73082SReid Kleckner         }
289026a73082SReid Kleckner       }
289126a73082SReid Kleckner     }
289226a73082SReid Kleckner 
289326a73082SReid Kleckner     bool PositionIndependent = isPositionIndependent();
289426a73082SReid Kleckner     // If the tailcall address may be in a register, then make sure it's
289526a73082SReid Kleckner     // possible to register allocate for it. In 32-bit, the call address can
289626a73082SReid Kleckner     // only target EAX, EDX, or ECX since the tail call must be scheduled after
289726a73082SReid Kleckner     // callee-saved registers are restored. These happen to be the same
289826a73082SReid Kleckner     // registers used to pass 'inreg' arguments so watch out for those.
289926a73082SReid Kleckner     if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
290026a73082SReid Kleckner                                   !isa<ExternalSymbolSDNode>(Callee)) ||
290126a73082SReid Kleckner                                  PositionIndependent)) {
290226a73082SReid Kleckner       unsigned NumInRegs = 0;
290326a73082SReid Kleckner       // In PIC we need an extra register to formulate the address computation
290426a73082SReid Kleckner       // for the callee.
290526a73082SReid Kleckner       unsigned MaxInRegs = PositionIndependent ? 2 : 3;
290626a73082SReid Kleckner 
290726a73082SReid Kleckner       for (const auto &VA : ArgLocs) {
290826a73082SReid Kleckner         if (!VA.isRegLoc())
290926a73082SReid Kleckner           continue;
291026a73082SReid Kleckner         Register Reg = VA.getLocReg();
291126a73082SReid Kleckner         switch (Reg) {
291226a73082SReid Kleckner         default: break;
291326a73082SReid Kleckner         case X86::EAX: case X86::EDX: case X86::ECX:
291426a73082SReid Kleckner           if (++NumInRegs == MaxInRegs)
291526a73082SReid Kleckner             return false;
291626a73082SReid Kleckner           break;
291726a73082SReid Kleckner         }
291826a73082SReid Kleckner       }
291926a73082SReid Kleckner     }
292026a73082SReid Kleckner 
292126a73082SReid Kleckner     const MachineRegisterInfo &MRI = MF.getRegInfo();
292226a73082SReid Kleckner     if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
292326a73082SReid Kleckner       return false;
292426a73082SReid Kleckner   }
292526a73082SReid Kleckner 
292626a73082SReid Kleckner   bool CalleeWillPop =
292726a73082SReid Kleckner       X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
292826a73082SReid Kleckner                        MF.getTarget().Options.GuaranteedTailCallOpt);
292926a73082SReid Kleckner 
293026a73082SReid Kleckner   if (unsigned BytesToPop =
293126a73082SReid Kleckner           MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
293226a73082SReid Kleckner     // If we have bytes to pop, the callee must pop them.
293326a73082SReid Kleckner     bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
293426a73082SReid Kleckner     if (!CalleePopMatches)
293526a73082SReid Kleckner       return false;
293626a73082SReid Kleckner   } else if (CalleeWillPop && StackArgsSize > 0) {
293726a73082SReid Kleckner     // If we don't have bytes to pop, make sure the callee doesn't pop any.
293826a73082SReid Kleckner     return false;
293926a73082SReid Kleckner   }
294026a73082SReid Kleckner 
294126a73082SReid Kleckner   return true;
294226a73082SReid Kleckner }
294326a73082SReid Kleckner 
294426a73082SReid Kleckner /// Determines whether the callee is required to pop its own arguments.
294526a73082SReid Kleckner /// Callee pop is necessary to support tail calls.
294626a73082SReid Kleckner bool X86::isCalleePop(CallingConv::ID CallingConv,
294726a73082SReid Kleckner                       bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
294826a73082SReid Kleckner   // If GuaranteeTCO is true, we force some calls to be callee pop so that we
294926a73082SReid Kleckner   // can guarantee TCO.
295026a73082SReid Kleckner   if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
295126a73082SReid Kleckner     return true;
295226a73082SReid Kleckner 
295326a73082SReid Kleckner   switch (CallingConv) {
295426a73082SReid Kleckner   default:
295526a73082SReid Kleckner     return false;
295626a73082SReid Kleckner   case CallingConv::X86_StdCall:
295726a73082SReid Kleckner   case CallingConv::X86_FastCall:
295826a73082SReid Kleckner   case CallingConv::X86_ThisCall:
295926a73082SReid Kleckner   case CallingConv::X86_VectorCall:
296026a73082SReid Kleckner     return !is64Bit;
296126a73082SReid Kleckner   }
296226a73082SReid Kleckner }
2963