xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLoweringCall.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1*5f757f3fSDimitry Andric //===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2*5f757f3fSDimitry Andric //
3*5f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*5f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*5f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*5f757f3fSDimitry Andric //
7*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
8*5f757f3fSDimitry Andric //
9*5f757f3fSDimitry Andric /// \file
10*5f757f3fSDimitry Andric /// This file implements the lowering of LLVM calls to DAG nodes.
11*5f757f3fSDimitry Andric //
12*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
13*5f757f3fSDimitry Andric 
14*5f757f3fSDimitry Andric #include "X86.h"
15*5f757f3fSDimitry Andric #include "X86CallingConv.h"
16*5f757f3fSDimitry Andric #include "X86FrameLowering.h"
17*5f757f3fSDimitry Andric #include "X86ISelLowering.h"
18*5f757f3fSDimitry Andric #include "X86InstrBuilder.h"
19*5f757f3fSDimitry Andric #include "X86MachineFunctionInfo.h"
20*5f757f3fSDimitry Andric #include "X86TargetMachine.h"
21*5f757f3fSDimitry Andric #include "X86TargetObjectFile.h"
22*5f757f3fSDimitry Andric #include "llvm/ADT/Statistic.h"
23*5f757f3fSDimitry Andric #include "llvm/Analysis/ObjCARCUtil.h"
24*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineJumpTableInfo.h"
25*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
26*5f757f3fSDimitry Andric #include "llvm/CodeGen/WinEHFuncInfo.h"
27*5f757f3fSDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
28*5f757f3fSDimitry Andric #include "llvm/IR/IRBuilder.h"
29*5f757f3fSDimitry Andric 
30*5f757f3fSDimitry Andric #define DEBUG_TYPE "x86-isel"
31*5f757f3fSDimitry Andric 
32*5f757f3fSDimitry Andric using namespace llvm;
33*5f757f3fSDimitry Andric 
34*5f757f3fSDimitry Andric STATISTIC(NumTailCalls, "Number of tail calls");
35*5f757f3fSDimitry Andric 
36*5f757f3fSDimitry Andric /// Call this when the user attempts to do something unsupported, like
37*5f757f3fSDimitry Andric /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
38*5f757f3fSDimitry Andric /// report_fatal_error, so calling code should attempt to recover without
39*5f757f3fSDimitry Andric /// crashing.
40*5f757f3fSDimitry Andric static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
41*5f757f3fSDimitry Andric                              const char *Msg) {
42*5f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
43*5f757f3fSDimitry Andric   DAG.getContext()->diagnose(
44*5f757f3fSDimitry Andric       DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
45*5f757f3fSDimitry Andric }
46*5f757f3fSDimitry Andric 
47*5f757f3fSDimitry Andric /// Returns true if a CC can dynamically exclude a register from the list of
48*5f757f3fSDimitry Andric /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
49*5f757f3fSDimitry Andric /// the return registers.
50*5f757f3fSDimitry Andric static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
51*5f757f3fSDimitry Andric   switch (CC) {
52*5f757f3fSDimitry Andric   default:
53*5f757f3fSDimitry Andric     return false;
54*5f757f3fSDimitry Andric   case CallingConv::X86_RegCall:
55*5f757f3fSDimitry Andric   case CallingConv::PreserveMost:
56*5f757f3fSDimitry Andric   case CallingConv::PreserveAll:
57*5f757f3fSDimitry Andric     return true;
58*5f757f3fSDimitry Andric   }
59*5f757f3fSDimitry Andric }
60*5f757f3fSDimitry Andric 
61*5f757f3fSDimitry Andric /// Returns true if a CC can dynamically exclude a register from the list of
62*5f757f3fSDimitry Andric /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
63*5f757f3fSDimitry Andric /// the parameters.
64*5f757f3fSDimitry Andric static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
65*5f757f3fSDimitry Andric   return CC == CallingConv::X86_RegCall;
66*5f757f3fSDimitry Andric }
67*5f757f3fSDimitry Andric 
68*5f757f3fSDimitry Andric static std::pair<MVT, unsigned>
69*5f757f3fSDimitry Andric handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
70*5f757f3fSDimitry Andric                                  const X86Subtarget &Subtarget) {
71*5f757f3fSDimitry Andric   // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
72*5f757f3fSDimitry Andric   // convention is one that uses k registers.
73*5f757f3fSDimitry Andric   if (NumElts == 2)
74*5f757f3fSDimitry Andric     return {MVT::v2i64, 1};
75*5f757f3fSDimitry Andric   if (NumElts == 4)
76*5f757f3fSDimitry Andric     return {MVT::v4i32, 1};
77*5f757f3fSDimitry Andric   if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
78*5f757f3fSDimitry Andric       CC != CallingConv::Intel_OCL_BI)
79*5f757f3fSDimitry Andric     return {MVT::v8i16, 1};
80*5f757f3fSDimitry Andric   if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
81*5f757f3fSDimitry Andric       CC != CallingConv::Intel_OCL_BI)
82*5f757f3fSDimitry Andric     return {MVT::v16i8, 1};
83*5f757f3fSDimitry Andric   // v32i1 passes in ymm unless we have BWI and the calling convention is
84*5f757f3fSDimitry Andric   // regcall.
85*5f757f3fSDimitry Andric   if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
86*5f757f3fSDimitry Andric     return {MVT::v32i8, 1};
87*5f757f3fSDimitry Andric   // Split v64i1 vectors if we don't have v64i8 available.
88*5f757f3fSDimitry Andric   if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
89*5f757f3fSDimitry Andric     if (Subtarget.useAVX512Regs())
90*5f757f3fSDimitry Andric       return {MVT::v64i8, 1};
91*5f757f3fSDimitry Andric     return {MVT::v32i8, 2};
92*5f757f3fSDimitry Andric   }
93*5f757f3fSDimitry Andric 
94*5f757f3fSDimitry Andric   // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
95*5f757f3fSDimitry Andric   if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
96*5f757f3fSDimitry Andric       NumElts > 64)
97*5f757f3fSDimitry Andric     return {MVT::i8, NumElts};
98*5f757f3fSDimitry Andric 
99*5f757f3fSDimitry Andric   return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
100*5f757f3fSDimitry Andric }
101*5f757f3fSDimitry Andric 
102*5f757f3fSDimitry Andric MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
103*5f757f3fSDimitry Andric                                                      CallingConv::ID CC,
104*5f757f3fSDimitry Andric                                                      EVT VT) const {
105*5f757f3fSDimitry Andric   if (VT.isVector()) {
106*5f757f3fSDimitry Andric     if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
107*5f757f3fSDimitry Andric       unsigned NumElts = VT.getVectorNumElements();
108*5f757f3fSDimitry Andric 
109*5f757f3fSDimitry Andric       MVT RegisterVT;
110*5f757f3fSDimitry Andric       unsigned NumRegisters;
111*5f757f3fSDimitry Andric       std::tie(RegisterVT, NumRegisters) =
112*5f757f3fSDimitry Andric           handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
113*5f757f3fSDimitry Andric       if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
114*5f757f3fSDimitry Andric         return RegisterVT;
115*5f757f3fSDimitry Andric     }
116*5f757f3fSDimitry Andric 
117*5f757f3fSDimitry Andric     if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
118*5f757f3fSDimitry Andric       return MVT::v8f16;
119*5f757f3fSDimitry Andric   }
120*5f757f3fSDimitry Andric 
121*5f757f3fSDimitry Andric   // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
122*5f757f3fSDimitry Andric   if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
123*5f757f3fSDimitry Andric       !Subtarget.hasX87())
124*5f757f3fSDimitry Andric     return MVT::i32;
125*5f757f3fSDimitry Andric 
126*5f757f3fSDimitry Andric   if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
127*5f757f3fSDimitry Andric     return getRegisterTypeForCallingConv(Context, CC,
128*5f757f3fSDimitry Andric                                          VT.changeVectorElementType(MVT::f16));
129*5f757f3fSDimitry Andric 
130*5f757f3fSDimitry Andric   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
131*5f757f3fSDimitry Andric }
132*5f757f3fSDimitry Andric 
133*5f757f3fSDimitry Andric unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
134*5f757f3fSDimitry Andric                                                           CallingConv::ID CC,
135*5f757f3fSDimitry Andric                                                           EVT VT) const {
136*5f757f3fSDimitry Andric   if (VT.isVector()) {
137*5f757f3fSDimitry Andric     if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
138*5f757f3fSDimitry Andric       unsigned NumElts = VT.getVectorNumElements();
139*5f757f3fSDimitry Andric 
140*5f757f3fSDimitry Andric       MVT RegisterVT;
141*5f757f3fSDimitry Andric       unsigned NumRegisters;
142*5f757f3fSDimitry Andric       std::tie(RegisterVT, NumRegisters) =
143*5f757f3fSDimitry Andric           handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
144*5f757f3fSDimitry Andric       if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
145*5f757f3fSDimitry Andric         return NumRegisters;
146*5f757f3fSDimitry Andric     }
147*5f757f3fSDimitry Andric 
148*5f757f3fSDimitry Andric     if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
149*5f757f3fSDimitry Andric       return 1;
150*5f757f3fSDimitry Andric   }
151*5f757f3fSDimitry Andric 
152*5f757f3fSDimitry Andric   // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
153*5f757f3fSDimitry Andric   // x87 is disabled.
154*5f757f3fSDimitry Andric   if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
155*5f757f3fSDimitry Andric     if (VT == MVT::f64)
156*5f757f3fSDimitry Andric       return 2;
157*5f757f3fSDimitry Andric     if (VT == MVT::f80)
158*5f757f3fSDimitry Andric       return 3;
159*5f757f3fSDimitry Andric   }
160*5f757f3fSDimitry Andric 
161*5f757f3fSDimitry Andric   if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
162*5f757f3fSDimitry Andric     return getNumRegistersForCallingConv(Context, CC,
163*5f757f3fSDimitry Andric                                          VT.changeVectorElementType(MVT::f16));
164*5f757f3fSDimitry Andric 
165*5f757f3fSDimitry Andric   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
166*5f757f3fSDimitry Andric }
167*5f757f3fSDimitry Andric 
168*5f757f3fSDimitry Andric unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
169*5f757f3fSDimitry Andric     LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
170*5f757f3fSDimitry Andric     unsigned &NumIntermediates, MVT &RegisterVT) const {
171*5f757f3fSDimitry Andric   // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
172*5f757f3fSDimitry Andric   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
173*5f757f3fSDimitry Andric       Subtarget.hasAVX512() &&
174*5f757f3fSDimitry Andric       (!isPowerOf2_32(VT.getVectorNumElements()) ||
175*5f757f3fSDimitry Andric        (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
176*5f757f3fSDimitry Andric        VT.getVectorNumElements() > 64)) {
177*5f757f3fSDimitry Andric     RegisterVT = MVT::i8;
178*5f757f3fSDimitry Andric     IntermediateVT = MVT::i1;
179*5f757f3fSDimitry Andric     NumIntermediates = VT.getVectorNumElements();
180*5f757f3fSDimitry Andric     return NumIntermediates;
181*5f757f3fSDimitry Andric   }
182*5f757f3fSDimitry Andric 
183*5f757f3fSDimitry Andric   // Split v64i1 vectors if we don't have v64i8 available.
184*5f757f3fSDimitry Andric   if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
185*5f757f3fSDimitry Andric       CC != CallingConv::X86_RegCall) {
186*5f757f3fSDimitry Andric     RegisterVT = MVT::v32i8;
187*5f757f3fSDimitry Andric     IntermediateVT = MVT::v32i1;
188*5f757f3fSDimitry Andric     NumIntermediates = 2;
189*5f757f3fSDimitry Andric     return 2;
190*5f757f3fSDimitry Andric   }
191*5f757f3fSDimitry Andric 
192*5f757f3fSDimitry Andric   // Split vNbf16 vectors according to vNf16.
193*5f757f3fSDimitry Andric   if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
194*5f757f3fSDimitry Andric     VT = VT.changeVectorElementType(MVT::f16);
195*5f757f3fSDimitry Andric 
196*5f757f3fSDimitry Andric   return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
197*5f757f3fSDimitry Andric                                               NumIntermediates, RegisterVT);
198*5f757f3fSDimitry Andric }
199*5f757f3fSDimitry Andric 
200*5f757f3fSDimitry Andric EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
201*5f757f3fSDimitry Andric                                           LLVMContext& Context,
202*5f757f3fSDimitry Andric                                           EVT VT) const {
203*5f757f3fSDimitry Andric   if (!VT.isVector())
204*5f757f3fSDimitry Andric     return MVT::i8;
205*5f757f3fSDimitry Andric 
206*5f757f3fSDimitry Andric   if (Subtarget.hasAVX512()) {
207*5f757f3fSDimitry Andric     // Figure out what this type will be legalized to.
208*5f757f3fSDimitry Andric     EVT LegalVT = VT;
209*5f757f3fSDimitry Andric     while (getTypeAction(Context, LegalVT) != TypeLegal)
210*5f757f3fSDimitry Andric       LegalVT = getTypeToTransformTo(Context, LegalVT);
211*5f757f3fSDimitry Andric 
212*5f757f3fSDimitry Andric     // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
213*5f757f3fSDimitry Andric     if (LegalVT.getSimpleVT().is512BitVector())
214*5f757f3fSDimitry Andric       return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
215*5f757f3fSDimitry Andric 
216*5f757f3fSDimitry Andric     if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
217*5f757f3fSDimitry Andric       // If we legalized to less than a 512-bit vector, then we will use a vXi1
218*5f757f3fSDimitry Andric       // compare for vXi32/vXi64 for sure. If we have BWI we will also support
219*5f757f3fSDimitry Andric       // vXi16/vXi8.
220*5f757f3fSDimitry Andric       MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
221*5f757f3fSDimitry Andric       if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
222*5f757f3fSDimitry Andric         return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
223*5f757f3fSDimitry Andric     }
224*5f757f3fSDimitry Andric   }
225*5f757f3fSDimitry Andric 
226*5f757f3fSDimitry Andric   return VT.changeVectorElementTypeToInteger();
227*5f757f3fSDimitry Andric }
228*5f757f3fSDimitry Andric 
229*5f757f3fSDimitry Andric /// Helper for getByValTypeAlignment to determine
230*5f757f3fSDimitry Andric /// the desired ByVal argument alignment.
231*5f757f3fSDimitry Andric static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
232*5f757f3fSDimitry Andric   if (MaxAlign == 16)
233*5f757f3fSDimitry Andric     return;
234*5f757f3fSDimitry Andric   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
235*5f757f3fSDimitry Andric     if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
236*5f757f3fSDimitry Andric       MaxAlign = Align(16);
237*5f757f3fSDimitry Andric   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
238*5f757f3fSDimitry Andric     Align EltAlign;
239*5f757f3fSDimitry Andric     getMaxByValAlign(ATy->getElementType(), EltAlign);
240*5f757f3fSDimitry Andric     if (EltAlign > MaxAlign)
241*5f757f3fSDimitry Andric       MaxAlign = EltAlign;
242*5f757f3fSDimitry Andric   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
243*5f757f3fSDimitry Andric     for (auto *EltTy : STy->elements()) {
244*5f757f3fSDimitry Andric       Align EltAlign;
245*5f757f3fSDimitry Andric       getMaxByValAlign(EltTy, EltAlign);
246*5f757f3fSDimitry Andric       if (EltAlign > MaxAlign)
247*5f757f3fSDimitry Andric         MaxAlign = EltAlign;
248*5f757f3fSDimitry Andric       if (MaxAlign == 16)
249*5f757f3fSDimitry Andric         break;
250*5f757f3fSDimitry Andric     }
251*5f757f3fSDimitry Andric   }
252*5f757f3fSDimitry Andric }
253*5f757f3fSDimitry Andric 
254*5f757f3fSDimitry Andric /// Return the desired alignment for ByVal aggregate
255*5f757f3fSDimitry Andric /// function arguments in the caller parameter area. For X86, aggregates
256*5f757f3fSDimitry Andric /// that contain SSE vectors are placed at 16-byte boundaries while the rest
257*5f757f3fSDimitry Andric /// are at 4-byte boundaries.
258*5f757f3fSDimitry Andric uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
259*5f757f3fSDimitry Andric                                                   const DataLayout &DL) const {
260*5f757f3fSDimitry Andric   if (Subtarget.is64Bit()) {
261*5f757f3fSDimitry Andric     // Max of 8 and alignment of type.
262*5f757f3fSDimitry Andric     Align TyAlign = DL.getABITypeAlign(Ty);
263*5f757f3fSDimitry Andric     if (TyAlign > 8)
264*5f757f3fSDimitry Andric       return TyAlign.value();
265*5f757f3fSDimitry Andric     return 8;
266*5f757f3fSDimitry Andric   }
267*5f757f3fSDimitry Andric 
268*5f757f3fSDimitry Andric   Align Alignment(4);
269*5f757f3fSDimitry Andric   if (Subtarget.hasSSE1())
270*5f757f3fSDimitry Andric     getMaxByValAlign(Ty, Alignment);
271*5f757f3fSDimitry Andric   return Alignment.value();
272*5f757f3fSDimitry Andric }
273*5f757f3fSDimitry Andric 
274*5f757f3fSDimitry Andric /// It returns EVT::Other if the type should be determined using generic
275*5f757f3fSDimitry Andric /// target-independent logic.
276*5f757f3fSDimitry Andric /// For vector ops we check that the overall size isn't larger than our
277*5f757f3fSDimitry Andric /// preferred vector width.
278*5f757f3fSDimitry Andric EVT X86TargetLowering::getOptimalMemOpType(
279*5f757f3fSDimitry Andric     const MemOp &Op, const AttributeList &FuncAttributes) const {
280*5f757f3fSDimitry Andric   if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
281*5f757f3fSDimitry Andric     if (Op.size() >= 16 &&
282*5f757f3fSDimitry Andric         (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
283*5f757f3fSDimitry Andric       // FIXME: Check if unaligned 64-byte accesses are slow.
284*5f757f3fSDimitry Andric       if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
285*5f757f3fSDimitry Andric           (Subtarget.getPreferVectorWidth() >= 512)) {
286*5f757f3fSDimitry Andric         return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
287*5f757f3fSDimitry Andric       }
288*5f757f3fSDimitry Andric       // FIXME: Check if unaligned 32-byte accesses are slow.
289*5f757f3fSDimitry Andric       if (Op.size() >= 32 && Subtarget.hasAVX() &&
290*5f757f3fSDimitry Andric           Subtarget.useLight256BitInstructions()) {
291*5f757f3fSDimitry Andric         // Although this isn't a well-supported type for AVX1, we'll let
292*5f757f3fSDimitry Andric         // legalization and shuffle lowering produce the optimal codegen. If we
293*5f757f3fSDimitry Andric         // choose an optimal type with a vector element larger than a byte,
294*5f757f3fSDimitry Andric         // getMemsetStores() may create an intermediate splat (using an integer
295*5f757f3fSDimitry Andric         // multiply) before we splat as a vector.
296*5f757f3fSDimitry Andric         return MVT::v32i8;
297*5f757f3fSDimitry Andric       }
298*5f757f3fSDimitry Andric       if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
299*5f757f3fSDimitry Andric         return MVT::v16i8;
300*5f757f3fSDimitry Andric       // TODO: Can SSE1 handle a byte vector?
301*5f757f3fSDimitry Andric       // If we have SSE1 registers we should be able to use them.
302*5f757f3fSDimitry Andric       if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
303*5f757f3fSDimitry Andric           (Subtarget.getPreferVectorWidth() >= 128))
304*5f757f3fSDimitry Andric         return MVT::v4f32;
305*5f757f3fSDimitry Andric     } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
306*5f757f3fSDimitry Andric                Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
307*5f757f3fSDimitry Andric       // Do not use f64 to lower memcpy if source is string constant. It's
308*5f757f3fSDimitry Andric       // better to use i32 to avoid the loads.
309*5f757f3fSDimitry Andric       // Also, do not use f64 to lower memset unless this is a memset of zeros.
310*5f757f3fSDimitry Andric       // The gymnastics of splatting a byte value into an XMM register and then
311*5f757f3fSDimitry Andric       // only using 8-byte stores (because this is a CPU with slow unaligned
312*5f757f3fSDimitry Andric       // 16-byte accesses) makes that a loser.
313*5f757f3fSDimitry Andric       return MVT::f64;
314*5f757f3fSDimitry Andric     }
315*5f757f3fSDimitry Andric   }
316*5f757f3fSDimitry Andric   // This is a compromise. If we reach here, unaligned accesses may be slow on
317*5f757f3fSDimitry Andric   // this target. However, creating smaller, aligned accesses could be even
318*5f757f3fSDimitry Andric   // slower and would certainly be a lot more code.
319*5f757f3fSDimitry Andric   if (Subtarget.is64Bit() && Op.size() >= 8)
320*5f757f3fSDimitry Andric     return MVT::i64;
321*5f757f3fSDimitry Andric   return MVT::i32;
322*5f757f3fSDimitry Andric }
323*5f757f3fSDimitry Andric 
324*5f757f3fSDimitry Andric bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
325*5f757f3fSDimitry Andric   if (VT == MVT::f32)
326*5f757f3fSDimitry Andric     return Subtarget.hasSSE1();
327*5f757f3fSDimitry Andric   if (VT == MVT::f64)
328*5f757f3fSDimitry Andric     return Subtarget.hasSSE2();
329*5f757f3fSDimitry Andric   return true;
330*5f757f3fSDimitry Andric }
331*5f757f3fSDimitry Andric 
332*5f757f3fSDimitry Andric static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
333*5f757f3fSDimitry Andric   return (8 * Alignment.value()) % SizeInBits == 0;
334*5f757f3fSDimitry Andric }
335*5f757f3fSDimitry Andric 
336*5f757f3fSDimitry Andric bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
337*5f757f3fSDimitry Andric   if (isBitAligned(Alignment, VT.getSizeInBits()))
338*5f757f3fSDimitry Andric     return true;
339*5f757f3fSDimitry Andric   switch (VT.getSizeInBits()) {
340*5f757f3fSDimitry Andric   default:
341*5f757f3fSDimitry Andric     // 8-byte and under are always assumed to be fast.
342*5f757f3fSDimitry Andric     return true;
343*5f757f3fSDimitry Andric   case 128:
344*5f757f3fSDimitry Andric     return !Subtarget.isUnalignedMem16Slow();
345*5f757f3fSDimitry Andric   case 256:
346*5f757f3fSDimitry Andric     return !Subtarget.isUnalignedMem32Slow();
347*5f757f3fSDimitry Andric     // TODO: What about AVX-512 (512-bit) accesses?
348*5f757f3fSDimitry Andric   }
349*5f757f3fSDimitry Andric }
350*5f757f3fSDimitry Andric 
351*5f757f3fSDimitry Andric bool X86TargetLowering::allowsMisalignedMemoryAccesses(
352*5f757f3fSDimitry Andric     EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
353*5f757f3fSDimitry Andric     unsigned *Fast) const {
354*5f757f3fSDimitry Andric   if (Fast)
355*5f757f3fSDimitry Andric     *Fast = isMemoryAccessFast(VT, Alignment);
356*5f757f3fSDimitry Andric   // NonTemporal vector memory ops must be aligned.
357*5f757f3fSDimitry Andric   if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
358*5f757f3fSDimitry Andric     // NT loads can only be vector aligned, so if its less aligned than the
359*5f757f3fSDimitry Andric     // minimum vector size (which we can split the vector down to), we might as
360*5f757f3fSDimitry Andric     // well use a regular unaligned vector load.
361*5f757f3fSDimitry Andric     // We don't have any NT loads pre-SSE41.
362*5f757f3fSDimitry Andric     if (!!(Flags & MachineMemOperand::MOLoad))
363*5f757f3fSDimitry Andric       return (Alignment < 16 || !Subtarget.hasSSE41());
364*5f757f3fSDimitry Andric     return false;
365*5f757f3fSDimitry Andric   }
366*5f757f3fSDimitry Andric   // Misaligned accesses of any size are always allowed.
367*5f757f3fSDimitry Andric   return true;
368*5f757f3fSDimitry Andric }
369*5f757f3fSDimitry Andric 
370*5f757f3fSDimitry Andric bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
371*5f757f3fSDimitry Andric                                            const DataLayout &DL, EVT VT,
372*5f757f3fSDimitry Andric                                            unsigned AddrSpace, Align Alignment,
373*5f757f3fSDimitry Andric                                            MachineMemOperand::Flags Flags,
374*5f757f3fSDimitry Andric                                            unsigned *Fast) const {
375*5f757f3fSDimitry Andric   if (Fast)
376*5f757f3fSDimitry Andric     *Fast = isMemoryAccessFast(VT, Alignment);
377*5f757f3fSDimitry Andric   if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
378*5f757f3fSDimitry Andric     if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
379*5f757f3fSDimitry Andric                                        /*Fast=*/nullptr))
380*5f757f3fSDimitry Andric       return true;
381*5f757f3fSDimitry Andric     // NonTemporal vector memory ops are special, and must be aligned.
382*5f757f3fSDimitry Andric     if (!isBitAligned(Alignment, VT.getSizeInBits()))
383*5f757f3fSDimitry Andric       return false;
384*5f757f3fSDimitry Andric     switch (VT.getSizeInBits()) {
385*5f757f3fSDimitry Andric     case 128:
386*5f757f3fSDimitry Andric       if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
387*5f757f3fSDimitry Andric         return true;
388*5f757f3fSDimitry Andric       if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
389*5f757f3fSDimitry Andric         return true;
390*5f757f3fSDimitry Andric       return false;
391*5f757f3fSDimitry Andric     case 256:
392*5f757f3fSDimitry Andric       if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
393*5f757f3fSDimitry Andric         return true;
394*5f757f3fSDimitry Andric       if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
395*5f757f3fSDimitry Andric         return true;
396*5f757f3fSDimitry Andric       return false;
397*5f757f3fSDimitry Andric     case 512:
398*5f757f3fSDimitry Andric       if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
399*5f757f3fSDimitry Andric         return true;
400*5f757f3fSDimitry Andric       return false;
401*5f757f3fSDimitry Andric     default:
402*5f757f3fSDimitry Andric       return false; // Don't have NonTemporal vector memory ops of this size.
403*5f757f3fSDimitry Andric     }
404*5f757f3fSDimitry Andric   }
405*5f757f3fSDimitry Andric   return true;
406*5f757f3fSDimitry Andric }
407*5f757f3fSDimitry Andric 
408*5f757f3fSDimitry Andric /// Return the entry encoding for a jump table in the
409*5f757f3fSDimitry Andric /// current function.  The returned value is a member of the
410*5f757f3fSDimitry Andric /// MachineJumpTableInfo::JTEntryKind enum.
411*5f757f3fSDimitry Andric unsigned X86TargetLowering::getJumpTableEncoding() const {
412*5f757f3fSDimitry Andric   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
413*5f757f3fSDimitry Andric   // symbol.
414*5f757f3fSDimitry Andric   if (isPositionIndependent() && Subtarget.isPICStyleGOT())
415*5f757f3fSDimitry Andric     return MachineJumpTableInfo::EK_Custom32;
416*5f757f3fSDimitry Andric   if (isPositionIndependent() &&
417*5f757f3fSDimitry Andric       getTargetMachine().getCodeModel() == CodeModel::Large)
418*5f757f3fSDimitry Andric     return MachineJumpTableInfo::EK_LabelDifference64;
419*5f757f3fSDimitry Andric 
420*5f757f3fSDimitry Andric   // Otherwise, use the normal jump table encoding heuristics.
421*5f757f3fSDimitry Andric   return TargetLowering::getJumpTableEncoding();
422*5f757f3fSDimitry Andric }
423*5f757f3fSDimitry Andric 
424*5f757f3fSDimitry Andric bool X86TargetLowering::splitValueIntoRegisterParts(
425*5f757f3fSDimitry Andric     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
426*5f757f3fSDimitry Andric     unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
427*5f757f3fSDimitry Andric   bool IsABIRegCopy = CC.has_value();
428*5f757f3fSDimitry Andric   EVT ValueVT = Val.getValueType();
429*5f757f3fSDimitry Andric   if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
430*5f757f3fSDimitry Andric     unsigned ValueBits = ValueVT.getSizeInBits();
431*5f757f3fSDimitry Andric     unsigned PartBits = PartVT.getSizeInBits();
432*5f757f3fSDimitry Andric     Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
433*5f757f3fSDimitry Andric     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
434*5f757f3fSDimitry Andric     Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
435*5f757f3fSDimitry Andric     Parts[0] = Val;
436*5f757f3fSDimitry Andric     return true;
437*5f757f3fSDimitry Andric   }
438*5f757f3fSDimitry Andric   return false;
439*5f757f3fSDimitry Andric }
440*5f757f3fSDimitry Andric 
441*5f757f3fSDimitry Andric SDValue X86TargetLowering::joinRegisterPartsIntoValue(
442*5f757f3fSDimitry Andric     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
443*5f757f3fSDimitry Andric     MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
444*5f757f3fSDimitry Andric   bool IsABIRegCopy = CC.has_value();
445*5f757f3fSDimitry Andric   if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
446*5f757f3fSDimitry Andric     unsigned ValueBits = ValueVT.getSizeInBits();
447*5f757f3fSDimitry Andric     unsigned PartBits = PartVT.getSizeInBits();
448*5f757f3fSDimitry Andric     SDValue Val = Parts[0];
449*5f757f3fSDimitry Andric 
450*5f757f3fSDimitry Andric     Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
451*5f757f3fSDimitry Andric     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
452*5f757f3fSDimitry Andric     Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
453*5f757f3fSDimitry Andric     return Val;
454*5f757f3fSDimitry Andric   }
455*5f757f3fSDimitry Andric   return SDValue();
456*5f757f3fSDimitry Andric }
457*5f757f3fSDimitry Andric 
458*5f757f3fSDimitry Andric bool X86TargetLowering::useSoftFloat() const {
459*5f757f3fSDimitry Andric   return Subtarget.useSoftFloat();
460*5f757f3fSDimitry Andric }
461*5f757f3fSDimitry Andric 
462*5f757f3fSDimitry Andric void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
463*5f757f3fSDimitry Andric                                               ArgListTy &Args) const {
464*5f757f3fSDimitry Andric 
465*5f757f3fSDimitry Andric   // Only relabel X86-32 for C / Stdcall CCs.
466*5f757f3fSDimitry Andric   if (Subtarget.is64Bit())
467*5f757f3fSDimitry Andric     return;
468*5f757f3fSDimitry Andric   if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
469*5f757f3fSDimitry Andric     return;
470*5f757f3fSDimitry Andric   unsigned ParamRegs = 0;
471*5f757f3fSDimitry Andric   if (auto *M = MF->getFunction().getParent())
472*5f757f3fSDimitry Andric     ParamRegs = M->getNumberRegisterParameters();
473*5f757f3fSDimitry Andric 
474*5f757f3fSDimitry Andric   // Mark the first N int arguments as having reg
475*5f757f3fSDimitry Andric   for (auto &Arg : Args) {
476*5f757f3fSDimitry Andric     Type *T = Arg.Ty;
477*5f757f3fSDimitry Andric     if (T->isIntOrPtrTy())
478*5f757f3fSDimitry Andric       if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
479*5f757f3fSDimitry Andric         unsigned numRegs = 1;
480*5f757f3fSDimitry Andric         if (MF->getDataLayout().getTypeAllocSize(T) > 4)
481*5f757f3fSDimitry Andric           numRegs = 2;
482*5f757f3fSDimitry Andric         if (ParamRegs < numRegs)
483*5f757f3fSDimitry Andric           return;
484*5f757f3fSDimitry Andric         ParamRegs -= numRegs;
485*5f757f3fSDimitry Andric         Arg.IsInReg = true;
486*5f757f3fSDimitry Andric       }
487*5f757f3fSDimitry Andric   }
488*5f757f3fSDimitry Andric }
489*5f757f3fSDimitry Andric 
490*5f757f3fSDimitry Andric const MCExpr *
491*5f757f3fSDimitry Andric X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
492*5f757f3fSDimitry Andric                                              const MachineBasicBlock *MBB,
493*5f757f3fSDimitry Andric                                              unsigned uid,MCContext &Ctx) const{
494*5f757f3fSDimitry Andric   assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
495*5f757f3fSDimitry Andric   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
496*5f757f3fSDimitry Andric   // entries.
497*5f757f3fSDimitry Andric   return MCSymbolRefExpr::create(MBB->getSymbol(),
498*5f757f3fSDimitry Andric                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
499*5f757f3fSDimitry Andric }
500*5f757f3fSDimitry Andric 
501*5f757f3fSDimitry Andric /// Returns relocation base for the given PIC jumptable.
502*5f757f3fSDimitry Andric SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
503*5f757f3fSDimitry Andric                                                     SelectionDAG &DAG) const {
504*5f757f3fSDimitry Andric   if (!Subtarget.is64Bit())
505*5f757f3fSDimitry Andric     // This doesn't have SDLoc associated with it, but is not really the
506*5f757f3fSDimitry Andric     // same as a Register.
507*5f757f3fSDimitry Andric     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
508*5f757f3fSDimitry Andric                        getPointerTy(DAG.getDataLayout()));
509*5f757f3fSDimitry Andric   return Table;
510*5f757f3fSDimitry Andric }
511*5f757f3fSDimitry Andric 
512*5f757f3fSDimitry Andric /// This returns the relocation base for the given PIC jumptable,
513*5f757f3fSDimitry Andric /// the same as getPICJumpTableRelocBase, but as an MCExpr.
514*5f757f3fSDimitry Andric const MCExpr *X86TargetLowering::
515*5f757f3fSDimitry Andric getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
516*5f757f3fSDimitry Andric                              MCContext &Ctx) const {
517*5f757f3fSDimitry Andric   // X86-64 uses RIP relative addressing based on the jump table label.
518*5f757f3fSDimitry Andric   if (Subtarget.isPICStyleRIPRel() ||
519*5f757f3fSDimitry Andric       (Subtarget.is64Bit() &&
520*5f757f3fSDimitry Andric        getTargetMachine().getCodeModel() == CodeModel::Large))
521*5f757f3fSDimitry Andric     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
522*5f757f3fSDimitry Andric 
523*5f757f3fSDimitry Andric   // Otherwise, the reference is relative to the PIC base.
524*5f757f3fSDimitry Andric   return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
525*5f757f3fSDimitry Andric }
526*5f757f3fSDimitry Andric 
527*5f757f3fSDimitry Andric std::pair<const TargetRegisterClass *, uint8_t>
528*5f757f3fSDimitry Andric X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
529*5f757f3fSDimitry Andric                                            MVT VT) const {
530*5f757f3fSDimitry Andric   const TargetRegisterClass *RRC = nullptr;
531*5f757f3fSDimitry Andric   uint8_t Cost = 1;
532*5f757f3fSDimitry Andric   switch (VT.SimpleTy) {
533*5f757f3fSDimitry Andric   default:
534*5f757f3fSDimitry Andric     return TargetLowering::findRepresentativeClass(TRI, VT);
535*5f757f3fSDimitry Andric   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
536*5f757f3fSDimitry Andric     RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
537*5f757f3fSDimitry Andric     break;
538*5f757f3fSDimitry Andric   case MVT::x86mmx:
539*5f757f3fSDimitry Andric     RRC = &X86::VR64RegClass;
540*5f757f3fSDimitry Andric     break;
541*5f757f3fSDimitry Andric   case MVT::f32: case MVT::f64:
542*5f757f3fSDimitry Andric   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
543*5f757f3fSDimitry Andric   case MVT::v4f32: case MVT::v2f64:
544*5f757f3fSDimitry Andric   case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
545*5f757f3fSDimitry Andric   case MVT::v8f32: case MVT::v4f64:
546*5f757f3fSDimitry Andric   case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
547*5f757f3fSDimitry Andric   case MVT::v16f32: case MVT::v8f64:
548*5f757f3fSDimitry Andric     RRC = &X86::VR128XRegClass;
549*5f757f3fSDimitry Andric     break;
550*5f757f3fSDimitry Andric   }
551*5f757f3fSDimitry Andric   return std::make_pair(RRC, Cost);
552*5f757f3fSDimitry Andric }
553*5f757f3fSDimitry Andric 
554*5f757f3fSDimitry Andric unsigned X86TargetLowering::getAddressSpace() const {
555*5f757f3fSDimitry Andric   if (Subtarget.is64Bit())
556*5f757f3fSDimitry Andric     return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
557*5f757f3fSDimitry Andric   return 256;
558*5f757f3fSDimitry Andric }
559*5f757f3fSDimitry Andric 
560*5f757f3fSDimitry Andric static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
561*5f757f3fSDimitry Andric   return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
562*5f757f3fSDimitry Andric          (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
563*5f757f3fSDimitry Andric }
564*5f757f3fSDimitry Andric 
565*5f757f3fSDimitry Andric static Constant* SegmentOffset(IRBuilderBase &IRB,
566*5f757f3fSDimitry Andric                                int Offset, unsigned AddressSpace) {
567*5f757f3fSDimitry Andric   return ConstantExpr::getIntToPtr(
568*5f757f3fSDimitry Andric       ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
569*5f757f3fSDimitry Andric       IRB.getPtrTy(AddressSpace));
570*5f757f3fSDimitry Andric }
571*5f757f3fSDimitry Andric 
572*5f757f3fSDimitry Andric Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
573*5f757f3fSDimitry Andric   // glibc, bionic, and Fuchsia have a special slot for the stack guard in
574*5f757f3fSDimitry Andric   // tcbhead_t; use it instead of the usual global variable (see
575*5f757f3fSDimitry Andric   // sysdeps/{i386,x86_64}/nptl/tls.h)
576*5f757f3fSDimitry Andric   if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
577*5f757f3fSDimitry Andric     unsigned AddressSpace = getAddressSpace();
578*5f757f3fSDimitry Andric 
579*5f757f3fSDimitry Andric     // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
580*5f757f3fSDimitry Andric     if (Subtarget.isTargetFuchsia())
581*5f757f3fSDimitry Andric       return SegmentOffset(IRB, 0x10, AddressSpace);
582*5f757f3fSDimitry Andric 
583*5f757f3fSDimitry Andric     Module *M = IRB.GetInsertBlock()->getParent()->getParent();
584*5f757f3fSDimitry Andric     // Specially, some users may customize the base reg and offset.
585*5f757f3fSDimitry Andric     int Offset = M->getStackProtectorGuardOffset();
586*5f757f3fSDimitry Andric     // If we don't set -stack-protector-guard-offset value:
587*5f757f3fSDimitry Andric     // %fs:0x28, unless we're using a Kernel code model, in which case
588*5f757f3fSDimitry Andric     // it's %gs:0x28.  gs:0x14 on i386.
589*5f757f3fSDimitry Andric     if (Offset == INT_MAX)
590*5f757f3fSDimitry Andric       Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
591*5f757f3fSDimitry Andric 
592*5f757f3fSDimitry Andric     StringRef GuardReg = M->getStackProtectorGuardReg();
593*5f757f3fSDimitry Andric     if (GuardReg == "fs")
594*5f757f3fSDimitry Andric       AddressSpace = X86AS::FS;
595*5f757f3fSDimitry Andric     else if (GuardReg == "gs")
596*5f757f3fSDimitry Andric       AddressSpace = X86AS::GS;
597*5f757f3fSDimitry Andric 
598*5f757f3fSDimitry Andric     // Use symbol guard if user specify.
599*5f757f3fSDimitry Andric     StringRef GuardSymb = M->getStackProtectorGuardSymbol();
600*5f757f3fSDimitry Andric     if (!GuardSymb.empty()) {
601*5f757f3fSDimitry Andric       GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
602*5f757f3fSDimitry Andric       if (!GV) {
603*5f757f3fSDimitry Andric         Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
604*5f757f3fSDimitry Andric                                        : Type::getInt32Ty(M->getContext());
605*5f757f3fSDimitry Andric         GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
606*5f757f3fSDimitry Andric                                 nullptr, GuardSymb, nullptr,
607*5f757f3fSDimitry Andric                                 GlobalValue::NotThreadLocal, AddressSpace);
608*5f757f3fSDimitry Andric         if (!Subtarget.isTargetDarwin())
609*5f757f3fSDimitry Andric           GV->setDSOLocal(M->getDirectAccessExternalData());
610*5f757f3fSDimitry Andric       }
611*5f757f3fSDimitry Andric       return GV;
612*5f757f3fSDimitry Andric     }
613*5f757f3fSDimitry Andric 
614*5f757f3fSDimitry Andric     return SegmentOffset(IRB, Offset, AddressSpace);
615*5f757f3fSDimitry Andric   }
616*5f757f3fSDimitry Andric   return TargetLowering::getIRStackGuard(IRB);
617*5f757f3fSDimitry Andric }
618*5f757f3fSDimitry Andric 
619*5f757f3fSDimitry Andric void X86TargetLowering::insertSSPDeclarations(Module &M) const {
620*5f757f3fSDimitry Andric   // MSVC CRT provides functionalities for stack protection.
621*5f757f3fSDimitry Andric   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
622*5f757f3fSDimitry Andric       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
623*5f757f3fSDimitry Andric     // MSVC CRT has a global variable holding security cookie.
624*5f757f3fSDimitry Andric     M.getOrInsertGlobal("__security_cookie",
625*5f757f3fSDimitry Andric                         PointerType::getUnqual(M.getContext()));
626*5f757f3fSDimitry Andric 
627*5f757f3fSDimitry Andric     // MSVC CRT has a function to validate security cookie.
628*5f757f3fSDimitry Andric     FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
629*5f757f3fSDimitry Andric         "__security_check_cookie", Type::getVoidTy(M.getContext()),
630*5f757f3fSDimitry Andric         PointerType::getUnqual(M.getContext()));
631*5f757f3fSDimitry Andric     if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
632*5f757f3fSDimitry Andric       F->setCallingConv(CallingConv::X86_FastCall);
633*5f757f3fSDimitry Andric       F->addParamAttr(0, Attribute::AttrKind::InReg);
634*5f757f3fSDimitry Andric     }
635*5f757f3fSDimitry Andric     return;
636*5f757f3fSDimitry Andric   }
637*5f757f3fSDimitry Andric 
638*5f757f3fSDimitry Andric   StringRef GuardMode = M.getStackProtectorGuard();
639*5f757f3fSDimitry Andric 
640*5f757f3fSDimitry Andric   // glibc, bionic, and Fuchsia have a special slot for the stack guard.
641*5f757f3fSDimitry Andric   if ((GuardMode == "tls" || GuardMode.empty()) &&
642*5f757f3fSDimitry Andric       hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
643*5f757f3fSDimitry Andric     return;
644*5f757f3fSDimitry Andric   TargetLowering::insertSSPDeclarations(M);
645*5f757f3fSDimitry Andric }
646*5f757f3fSDimitry Andric 
647*5f757f3fSDimitry Andric Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
648*5f757f3fSDimitry Andric   // MSVC CRT has a global variable holding security cookie.
649*5f757f3fSDimitry Andric   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
650*5f757f3fSDimitry Andric       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
651*5f757f3fSDimitry Andric     return M.getGlobalVariable("__security_cookie");
652*5f757f3fSDimitry Andric   }
653*5f757f3fSDimitry Andric   return TargetLowering::getSDagStackGuard(M);
654*5f757f3fSDimitry Andric }
655*5f757f3fSDimitry Andric 
656*5f757f3fSDimitry Andric Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
657*5f757f3fSDimitry Andric   // MSVC CRT has a function to validate security cookie.
658*5f757f3fSDimitry Andric   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
659*5f757f3fSDimitry Andric       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
660*5f757f3fSDimitry Andric     return M.getFunction("__security_check_cookie");
661*5f757f3fSDimitry Andric   }
662*5f757f3fSDimitry Andric   return TargetLowering::getSSPStackGuardCheck(M);
663*5f757f3fSDimitry Andric }
664*5f757f3fSDimitry Andric 
665*5f757f3fSDimitry Andric Value *
666*5f757f3fSDimitry Andric X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
667*5f757f3fSDimitry Andric   // Android provides a fixed TLS slot for the SafeStack pointer. See the
668*5f757f3fSDimitry Andric   // definition of TLS_SLOT_SAFESTACK in
669*5f757f3fSDimitry Andric   // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
670*5f757f3fSDimitry Andric   if (Subtarget.isTargetAndroid()) {
671*5f757f3fSDimitry Andric     // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
672*5f757f3fSDimitry Andric     // %gs:0x24 on i386
673*5f757f3fSDimitry Andric     int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
674*5f757f3fSDimitry Andric     return SegmentOffset(IRB, Offset, getAddressSpace());
675*5f757f3fSDimitry Andric   }
676*5f757f3fSDimitry Andric 
677*5f757f3fSDimitry Andric   // Fuchsia is similar.
678*5f757f3fSDimitry Andric   if (Subtarget.isTargetFuchsia()) {
679*5f757f3fSDimitry Andric     // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
680*5f757f3fSDimitry Andric     return SegmentOffset(IRB, 0x18, getAddressSpace());
681*5f757f3fSDimitry Andric   }
682*5f757f3fSDimitry Andric 
683*5f757f3fSDimitry Andric   return TargetLowering::getSafeStackPointerLocation(IRB);
684*5f757f3fSDimitry Andric }
685*5f757f3fSDimitry Andric 
686*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
687*5f757f3fSDimitry Andric //               Return Value Calling Convention Implementation
688*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
689*5f757f3fSDimitry Andric 
690*5f757f3fSDimitry Andric bool X86TargetLowering::CanLowerReturn(
691*5f757f3fSDimitry Andric     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
692*5f757f3fSDimitry Andric     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
693*5f757f3fSDimitry Andric   SmallVector<CCValAssign, 16> RVLocs;
694*5f757f3fSDimitry Andric   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
695*5f757f3fSDimitry Andric   return CCInfo.CheckReturn(Outs, RetCC_X86);
696*5f757f3fSDimitry Andric }
697*5f757f3fSDimitry Andric 
698*5f757f3fSDimitry Andric const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
699*5f757f3fSDimitry Andric   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
700*5f757f3fSDimitry Andric   return ScratchRegs;
701*5f757f3fSDimitry Andric }
702*5f757f3fSDimitry Andric 
703*5f757f3fSDimitry Andric ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
704*5f757f3fSDimitry Andric   // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
705*5f757f3fSDimitry Andric   // tests at the moment, which is not what we expected.
706*5f757f3fSDimitry Andric   static const MCPhysReg RCRegs[] = {X86::MXCSR};
707*5f757f3fSDimitry Andric   return RCRegs;
708*5f757f3fSDimitry Andric }
709*5f757f3fSDimitry Andric 
710*5f757f3fSDimitry Andric /// Lowers masks values (v*i1) to the local register values
711*5f757f3fSDimitry Andric /// \returns DAG node after lowering to register type
712*5f757f3fSDimitry Andric static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
713*5f757f3fSDimitry Andric                                const SDLoc &DL, SelectionDAG &DAG) {
714*5f757f3fSDimitry Andric   EVT ValVT = ValArg.getValueType();
715*5f757f3fSDimitry Andric 
716*5f757f3fSDimitry Andric   if (ValVT == MVT::v1i1)
717*5f757f3fSDimitry Andric     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
718*5f757f3fSDimitry Andric                        DAG.getIntPtrConstant(0, DL));
719*5f757f3fSDimitry Andric 
720*5f757f3fSDimitry Andric   if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
721*5f757f3fSDimitry Andric       (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
722*5f757f3fSDimitry Andric     // Two stage lowering might be required
723*5f757f3fSDimitry Andric     // bitcast:   v8i1 -> i8 / v16i1 -> i16
724*5f757f3fSDimitry Andric     // anyextend: i8   -> i32 / i16   -> i32
725*5f757f3fSDimitry Andric     EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
726*5f757f3fSDimitry Andric     SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
727*5f757f3fSDimitry Andric     if (ValLoc == MVT::i32)
728*5f757f3fSDimitry Andric       ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
729*5f757f3fSDimitry Andric     return ValToCopy;
730*5f757f3fSDimitry Andric   }
731*5f757f3fSDimitry Andric 
732*5f757f3fSDimitry Andric   if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
733*5f757f3fSDimitry Andric       (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
734*5f757f3fSDimitry Andric     // One stage lowering is required
735*5f757f3fSDimitry Andric     // bitcast:   v32i1 -> i32 / v64i1 -> i64
736*5f757f3fSDimitry Andric     return DAG.getBitcast(ValLoc, ValArg);
737*5f757f3fSDimitry Andric   }
738*5f757f3fSDimitry Andric 
739*5f757f3fSDimitry Andric   return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
740*5f757f3fSDimitry Andric }
741*5f757f3fSDimitry Andric 
742*5f757f3fSDimitry Andric /// Breaks v64i1 value into two registers and adds the new node to the DAG
743*5f757f3fSDimitry Andric static void Passv64i1ArgInRegs(
744*5f757f3fSDimitry Andric     const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
745*5f757f3fSDimitry Andric     SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
746*5f757f3fSDimitry Andric     CCValAssign &NextVA, const X86Subtarget &Subtarget) {
747*5f757f3fSDimitry Andric   assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
748*5f757f3fSDimitry Andric   assert(Subtarget.is32Bit() && "Expecting 32 bit target");
749*5f757f3fSDimitry Andric   assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
750*5f757f3fSDimitry Andric   assert(VA.isRegLoc() && NextVA.isRegLoc() &&
751*5f757f3fSDimitry Andric          "The value should reside in two registers");
752*5f757f3fSDimitry Andric 
753*5f757f3fSDimitry Andric   // Before splitting the value we cast it to i64
754*5f757f3fSDimitry Andric   Arg = DAG.getBitcast(MVT::i64, Arg);
755*5f757f3fSDimitry Andric 
756*5f757f3fSDimitry Andric   // Splitting the value into two i32 types
757*5f757f3fSDimitry Andric   SDValue Lo, Hi;
758*5f757f3fSDimitry Andric   std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
759*5f757f3fSDimitry Andric 
760*5f757f3fSDimitry Andric   // Attach the two i32 types into corresponding registers
761*5f757f3fSDimitry Andric   RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
762*5f757f3fSDimitry Andric   RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
763*5f757f3fSDimitry Andric }
764*5f757f3fSDimitry Andric 
765*5f757f3fSDimitry Andric SDValue
766*5f757f3fSDimitry Andric X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
767*5f757f3fSDimitry Andric                                bool isVarArg,
768*5f757f3fSDimitry Andric                                const SmallVectorImpl<ISD::OutputArg> &Outs,
769*5f757f3fSDimitry Andric                                const SmallVectorImpl<SDValue> &OutVals,
770*5f757f3fSDimitry Andric                                const SDLoc &dl, SelectionDAG &DAG) const {
771*5f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
772*5f757f3fSDimitry Andric   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
773*5f757f3fSDimitry Andric 
774*5f757f3fSDimitry Andric   // In some cases we need to disable registers from the default CSR list.
775*5f757f3fSDimitry Andric   // For example, when they are used as return registers (preserve_* and X86's
776*5f757f3fSDimitry Andric   // regcall) or for argument passing (X86's regcall).
777*5f757f3fSDimitry Andric   bool ShouldDisableCalleeSavedRegister =
778*5f757f3fSDimitry Andric       shouldDisableRetRegFromCSR(CallConv) ||
779*5f757f3fSDimitry Andric       MF.getFunction().hasFnAttribute("no_caller_saved_registers");
780*5f757f3fSDimitry Andric 
781*5f757f3fSDimitry Andric   if (CallConv == CallingConv::X86_INTR && !Outs.empty())
782*5f757f3fSDimitry Andric     report_fatal_error("X86 interrupts may not return any value");
783*5f757f3fSDimitry Andric 
784*5f757f3fSDimitry Andric   SmallVector<CCValAssign, 16> RVLocs;
785*5f757f3fSDimitry Andric   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
786*5f757f3fSDimitry Andric   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
787*5f757f3fSDimitry Andric 
788*5f757f3fSDimitry Andric   SmallVector<std::pair<Register, SDValue>, 4> RetVals;
789*5f757f3fSDimitry Andric   for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
790*5f757f3fSDimitry Andric        ++I, ++OutsIndex) {
791*5f757f3fSDimitry Andric     CCValAssign &VA = RVLocs[I];
792*5f757f3fSDimitry Andric     assert(VA.isRegLoc() && "Can only return in registers!");
793*5f757f3fSDimitry Andric 
794*5f757f3fSDimitry Andric     // Add the register to the CalleeSaveDisableRegs list.
795*5f757f3fSDimitry Andric     if (ShouldDisableCalleeSavedRegister)
796*5f757f3fSDimitry Andric       MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
797*5f757f3fSDimitry Andric 
798*5f757f3fSDimitry Andric     SDValue ValToCopy = OutVals[OutsIndex];
799*5f757f3fSDimitry Andric     EVT ValVT = ValToCopy.getValueType();
800*5f757f3fSDimitry Andric 
801*5f757f3fSDimitry Andric     // Promote values to the appropriate types.
802*5f757f3fSDimitry Andric     if (VA.getLocInfo() == CCValAssign::SExt)
803*5f757f3fSDimitry Andric       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
804*5f757f3fSDimitry Andric     else if (VA.getLocInfo() == CCValAssign::ZExt)
805*5f757f3fSDimitry Andric       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
806*5f757f3fSDimitry Andric     else if (VA.getLocInfo() == CCValAssign::AExt) {
807*5f757f3fSDimitry Andric       if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
808*5f757f3fSDimitry Andric         ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
809*5f757f3fSDimitry Andric       else
810*5f757f3fSDimitry Andric         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
811*5f757f3fSDimitry Andric     }
812*5f757f3fSDimitry Andric     else if (VA.getLocInfo() == CCValAssign::BCvt)
813*5f757f3fSDimitry Andric       ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
814*5f757f3fSDimitry Andric 
815*5f757f3fSDimitry Andric     assert(VA.getLocInfo() != CCValAssign::FPExt &&
816*5f757f3fSDimitry Andric            "Unexpected FP-extend for return value.");
817*5f757f3fSDimitry Andric 
818*5f757f3fSDimitry Andric     // Report an error if we have attempted to return a value via an XMM
819*5f757f3fSDimitry Andric     // register and SSE was disabled.
820*5f757f3fSDimitry Andric     if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
821*5f757f3fSDimitry Andric       errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
822*5f757f3fSDimitry Andric       VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
823*5f757f3fSDimitry Andric     } else if (!Subtarget.hasSSE2() &&
824*5f757f3fSDimitry Andric                X86::FR64XRegClass.contains(VA.getLocReg()) &&
825*5f757f3fSDimitry Andric                ValVT == MVT::f64) {
826*5f757f3fSDimitry Andric       // When returning a double via an XMM register, report an error if SSE2 is
827*5f757f3fSDimitry Andric       // not enabled.
828*5f757f3fSDimitry Andric       errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
829*5f757f3fSDimitry Andric       VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
830*5f757f3fSDimitry Andric     }
831*5f757f3fSDimitry Andric 
832*5f757f3fSDimitry Andric     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
833*5f757f3fSDimitry Andric     // the RET instruction and handled by the FP Stackifier.
834*5f757f3fSDimitry Andric     if (VA.getLocReg() == X86::FP0 ||
835*5f757f3fSDimitry Andric         VA.getLocReg() == X86::FP1) {
836*5f757f3fSDimitry Andric       // If this is a copy from an xmm register to ST(0), use an FPExtend to
837*5f757f3fSDimitry Andric       // change the value to the FP stack register class.
838*5f757f3fSDimitry Andric       if (isScalarFPTypeInSSEReg(VA.getValVT()))
839*5f757f3fSDimitry Andric         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
840*5f757f3fSDimitry Andric       RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
841*5f757f3fSDimitry Andric       // Don't emit a copytoreg.
842*5f757f3fSDimitry Andric       continue;
843*5f757f3fSDimitry Andric     }
844*5f757f3fSDimitry Andric 
845*5f757f3fSDimitry Andric     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
846*5f757f3fSDimitry Andric     // which is returned in RAX / RDX.
847*5f757f3fSDimitry Andric     if (Subtarget.is64Bit()) {
848*5f757f3fSDimitry Andric       if (ValVT == MVT::x86mmx) {
849*5f757f3fSDimitry Andric         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
850*5f757f3fSDimitry Andric           ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
851*5f757f3fSDimitry Andric           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
852*5f757f3fSDimitry Andric                                   ValToCopy);
853*5f757f3fSDimitry Andric           // If we don't have SSE2 available, convert to v4f32 so the generated
854*5f757f3fSDimitry Andric           // register is legal.
855*5f757f3fSDimitry Andric           if (!Subtarget.hasSSE2())
856*5f757f3fSDimitry Andric             ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
857*5f757f3fSDimitry Andric         }
858*5f757f3fSDimitry Andric       }
859*5f757f3fSDimitry Andric     }
860*5f757f3fSDimitry Andric 
861*5f757f3fSDimitry Andric     if (VA.needsCustom()) {
862*5f757f3fSDimitry Andric       assert(VA.getValVT() == MVT::v64i1 &&
863*5f757f3fSDimitry Andric              "Currently the only custom case is when we split v64i1 to 2 regs");
864*5f757f3fSDimitry Andric 
865*5f757f3fSDimitry Andric       Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
866*5f757f3fSDimitry Andric                          Subtarget);
867*5f757f3fSDimitry Andric 
868*5f757f3fSDimitry Andric       // Add the second register to the CalleeSaveDisableRegs list.
869*5f757f3fSDimitry Andric       if (ShouldDisableCalleeSavedRegister)
870*5f757f3fSDimitry Andric         MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
871*5f757f3fSDimitry Andric     } else {
872*5f757f3fSDimitry Andric       RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
873*5f757f3fSDimitry Andric     }
874*5f757f3fSDimitry Andric   }
875*5f757f3fSDimitry Andric 
876*5f757f3fSDimitry Andric   SDValue Glue;
877*5f757f3fSDimitry Andric   SmallVector<SDValue, 6> RetOps;
878*5f757f3fSDimitry Andric   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
879*5f757f3fSDimitry Andric   // Operand #1 = Bytes To Pop
880*5f757f3fSDimitry Andric   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
881*5f757f3fSDimitry Andric                    MVT::i32));
882*5f757f3fSDimitry Andric 
883*5f757f3fSDimitry Andric   // Copy the result values into the output registers.
884*5f757f3fSDimitry Andric   for (auto &RetVal : RetVals) {
885*5f757f3fSDimitry Andric     if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
886*5f757f3fSDimitry Andric       RetOps.push_back(RetVal.second);
887*5f757f3fSDimitry Andric       continue; // Don't emit a copytoreg.
888*5f757f3fSDimitry Andric     }
889*5f757f3fSDimitry Andric 
890*5f757f3fSDimitry Andric     Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
891*5f757f3fSDimitry Andric     Glue = Chain.getValue(1);
892*5f757f3fSDimitry Andric     RetOps.push_back(
893*5f757f3fSDimitry Andric         DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
894*5f757f3fSDimitry Andric   }
895*5f757f3fSDimitry Andric 
896*5f757f3fSDimitry Andric   // Swift calling convention does not require we copy the sret argument
897*5f757f3fSDimitry Andric   // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
898*5f757f3fSDimitry Andric 
899*5f757f3fSDimitry Andric   // All x86 ABIs require that for returning structs by value we copy
900*5f757f3fSDimitry Andric   // the sret argument into %rax/%eax (depending on ABI) for the return.
901*5f757f3fSDimitry Andric   // We saved the argument into a virtual register in the entry block,
902*5f757f3fSDimitry Andric   // so now we copy the value out and into %rax/%eax.
903*5f757f3fSDimitry Andric   //
904*5f757f3fSDimitry Andric   // Checking Function.hasStructRetAttr() here is insufficient because the IR
905*5f757f3fSDimitry Andric   // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
906*5f757f3fSDimitry Andric   // false, then an sret argument may be implicitly inserted in the SelDAG. In
907*5f757f3fSDimitry Andric   // either case FuncInfo->setSRetReturnReg() will have been called.
908*5f757f3fSDimitry Andric   if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
909*5f757f3fSDimitry Andric     // When we have both sret and another return value, we should use the
910*5f757f3fSDimitry Andric     // original Chain stored in RetOps[0], instead of the current Chain updated
911*5f757f3fSDimitry Andric     // in the above loop. If we only have sret, RetOps[0] equals to Chain.
912*5f757f3fSDimitry Andric 
913*5f757f3fSDimitry Andric     // For the case of sret and another return value, we have
914*5f757f3fSDimitry Andric     //   Chain_0 at the function entry
915*5f757f3fSDimitry Andric     //   Chain_1 = getCopyToReg(Chain_0) in the above loop
916*5f757f3fSDimitry Andric     // If we use Chain_1 in getCopyFromReg, we will have
917*5f757f3fSDimitry Andric     //   Val = getCopyFromReg(Chain_1)
918*5f757f3fSDimitry Andric     //   Chain_2 = getCopyToReg(Chain_1, Val) from below
919*5f757f3fSDimitry Andric 
920*5f757f3fSDimitry Andric     // getCopyToReg(Chain_0) will be glued together with
921*5f757f3fSDimitry Andric     // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
922*5f757f3fSDimitry Andric     // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
923*5f757f3fSDimitry Andric     //   Data dependency from Unit B to Unit A due to usage of Val in
924*5f757f3fSDimitry Andric     //     getCopyToReg(Chain_1, Val)
925*5f757f3fSDimitry Andric     //   Chain dependency from Unit A to Unit B
926*5f757f3fSDimitry Andric 
927*5f757f3fSDimitry Andric     // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
928*5f757f3fSDimitry Andric     SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
929*5f757f3fSDimitry Andric                                      getPointerTy(MF.getDataLayout()));
930*5f757f3fSDimitry Andric 
931*5f757f3fSDimitry Andric     Register RetValReg
932*5f757f3fSDimitry Andric         = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
933*5f757f3fSDimitry Andric           X86::RAX : X86::EAX;
934*5f757f3fSDimitry Andric     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
935*5f757f3fSDimitry Andric     Glue = Chain.getValue(1);
936*5f757f3fSDimitry Andric 
937*5f757f3fSDimitry Andric     // RAX/EAX now acts like a return value.
938*5f757f3fSDimitry Andric     RetOps.push_back(
939*5f757f3fSDimitry Andric         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
940*5f757f3fSDimitry Andric 
941*5f757f3fSDimitry Andric     // Add the returned register to the CalleeSaveDisableRegs list. Don't do
942*5f757f3fSDimitry Andric     // this however for preserve_most/preserve_all to minimize the number of
943*5f757f3fSDimitry Andric     // callee-saved registers for these CCs.
944*5f757f3fSDimitry Andric     if (ShouldDisableCalleeSavedRegister &&
945*5f757f3fSDimitry Andric         CallConv != CallingConv::PreserveAll &&
946*5f757f3fSDimitry Andric         CallConv != CallingConv::PreserveMost)
947*5f757f3fSDimitry Andric       MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
948*5f757f3fSDimitry Andric   }
949*5f757f3fSDimitry Andric 
950*5f757f3fSDimitry Andric   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
951*5f757f3fSDimitry Andric   const MCPhysReg *I =
952*5f757f3fSDimitry Andric       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
953*5f757f3fSDimitry Andric   if (I) {
954*5f757f3fSDimitry Andric     for (; *I; ++I) {
955*5f757f3fSDimitry Andric       if (X86::GR64RegClass.contains(*I))
956*5f757f3fSDimitry Andric         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
957*5f757f3fSDimitry Andric       else
958*5f757f3fSDimitry Andric         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
959*5f757f3fSDimitry Andric     }
960*5f757f3fSDimitry Andric   }
961*5f757f3fSDimitry Andric 
962*5f757f3fSDimitry Andric   RetOps[0] = Chain;  // Update chain.
963*5f757f3fSDimitry Andric 
964*5f757f3fSDimitry Andric   // Add the glue if we have it.
965*5f757f3fSDimitry Andric   if (Glue.getNode())
966*5f757f3fSDimitry Andric     RetOps.push_back(Glue);
967*5f757f3fSDimitry Andric 
968*5f757f3fSDimitry Andric   X86ISD::NodeType opcode = X86ISD::RET_GLUE;
969*5f757f3fSDimitry Andric   if (CallConv == CallingConv::X86_INTR)
970*5f757f3fSDimitry Andric     opcode = X86ISD::IRET;
971*5f757f3fSDimitry Andric   return DAG.getNode(opcode, dl, MVT::Other, RetOps);
972*5f757f3fSDimitry Andric }
973*5f757f3fSDimitry Andric 
974*5f757f3fSDimitry Andric bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
975*5f757f3fSDimitry Andric   if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
976*5f757f3fSDimitry Andric     return false;
977*5f757f3fSDimitry Andric 
978*5f757f3fSDimitry Andric   SDValue TCChain = Chain;
979*5f757f3fSDimitry Andric   SDNode *Copy = *N->use_begin();
980*5f757f3fSDimitry Andric   if (Copy->getOpcode() == ISD::CopyToReg) {
981*5f757f3fSDimitry Andric     // If the copy has a glue operand, we conservatively assume it isn't safe to
982*5f757f3fSDimitry Andric     // perform a tail call.
983*5f757f3fSDimitry Andric     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
984*5f757f3fSDimitry Andric       return false;
985*5f757f3fSDimitry Andric     TCChain = Copy->getOperand(0);
986*5f757f3fSDimitry Andric   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
987*5f757f3fSDimitry Andric     return false;
988*5f757f3fSDimitry Andric 
989*5f757f3fSDimitry Andric   bool HasRet = false;
990*5f757f3fSDimitry Andric   for (const SDNode *U : Copy->uses()) {
991*5f757f3fSDimitry Andric     if (U->getOpcode() != X86ISD::RET_GLUE)
992*5f757f3fSDimitry Andric       return false;
993*5f757f3fSDimitry Andric     // If we are returning more than one value, we can definitely
994*5f757f3fSDimitry Andric     // not make a tail call see PR19530
995*5f757f3fSDimitry Andric     if (U->getNumOperands() > 4)
996*5f757f3fSDimitry Andric       return false;
997*5f757f3fSDimitry Andric     if (U->getNumOperands() == 4 &&
998*5f757f3fSDimitry Andric         U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
999*5f757f3fSDimitry Andric       return false;
1000*5f757f3fSDimitry Andric     HasRet = true;
1001*5f757f3fSDimitry Andric   }
1002*5f757f3fSDimitry Andric 
1003*5f757f3fSDimitry Andric   if (!HasRet)
1004*5f757f3fSDimitry Andric     return false;
1005*5f757f3fSDimitry Andric 
1006*5f757f3fSDimitry Andric   Chain = TCChain;
1007*5f757f3fSDimitry Andric   return true;
1008*5f757f3fSDimitry Andric }
1009*5f757f3fSDimitry Andric 
1010*5f757f3fSDimitry Andric EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
1011*5f757f3fSDimitry Andric                                            ISD::NodeType ExtendKind) const {
1012*5f757f3fSDimitry Andric   MVT ReturnMVT = MVT::i32;
1013*5f757f3fSDimitry Andric 
1014*5f757f3fSDimitry Andric   bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
1015*5f757f3fSDimitry Andric   if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
1016*5f757f3fSDimitry Andric     // The ABI does not require i1, i8 or i16 to be extended.
1017*5f757f3fSDimitry Andric     //
1018*5f757f3fSDimitry Andric     // On Darwin, there is code in the wild relying on Clang's old behaviour of
1019*5f757f3fSDimitry Andric     // always extending i8/i16 return values, so keep doing that for now.
1020*5f757f3fSDimitry Andric     // (PR26665).
1021*5f757f3fSDimitry Andric     ReturnMVT = MVT::i8;
1022*5f757f3fSDimitry Andric   }
1023*5f757f3fSDimitry Andric 
1024*5f757f3fSDimitry Andric   EVT MinVT = getRegisterType(Context, ReturnMVT);
1025*5f757f3fSDimitry Andric   return VT.bitsLT(MinVT) ? MinVT : VT;
1026*5f757f3fSDimitry Andric }
1027*5f757f3fSDimitry Andric 
1028*5f757f3fSDimitry Andric /// Reads two 32 bit registers and creates a 64 bit mask value.
1029*5f757f3fSDimitry Andric /// \param VA The current 32 bit value that need to be assigned.
1030*5f757f3fSDimitry Andric /// \param NextVA The next 32 bit value that need to be assigned.
1031*5f757f3fSDimitry Andric /// \param Root The parent DAG node.
1032*5f757f3fSDimitry Andric /// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1033*5f757f3fSDimitry Andric ///                        glue purposes. In the case the DAG is already using
1034*5f757f3fSDimitry Andric ///                        physical register instead of virtual, we should glue
1035*5f757f3fSDimitry Andric ///                        our new SDValue to InGlue SDvalue.
1036*5f757f3fSDimitry Andric /// \return a new SDvalue of size 64bit.
1037*5f757f3fSDimitry Andric static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
1038*5f757f3fSDimitry Andric                                 SDValue &Root, SelectionDAG &DAG,
1039*5f757f3fSDimitry Andric                                 const SDLoc &DL, const X86Subtarget &Subtarget,
1040*5f757f3fSDimitry Andric                                 SDValue *InGlue = nullptr) {
1041*5f757f3fSDimitry Andric   assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1042*5f757f3fSDimitry Andric   assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1043*5f757f3fSDimitry Andric   assert(VA.getValVT() == MVT::v64i1 &&
1044*5f757f3fSDimitry Andric          "Expecting first location of 64 bit width type");
1045*5f757f3fSDimitry Andric   assert(NextVA.getValVT() == VA.getValVT() &&
1046*5f757f3fSDimitry Andric          "The locations should have the same type");
1047*5f757f3fSDimitry Andric   assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1048*5f757f3fSDimitry Andric          "The values should reside in two registers");
1049*5f757f3fSDimitry Andric 
1050*5f757f3fSDimitry Andric   SDValue Lo, Hi;
1051*5f757f3fSDimitry Andric   SDValue ArgValueLo, ArgValueHi;
1052*5f757f3fSDimitry Andric 
1053*5f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
1054*5f757f3fSDimitry Andric   const TargetRegisterClass *RC = &X86::GR32RegClass;
1055*5f757f3fSDimitry Andric 
1056*5f757f3fSDimitry Andric   // Read a 32 bit value from the registers.
1057*5f757f3fSDimitry Andric   if (nullptr == InGlue) {
1058*5f757f3fSDimitry Andric     // When no physical register is present,
1059*5f757f3fSDimitry Andric     // create an intermediate virtual register.
1060*5f757f3fSDimitry Andric     Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1061*5f757f3fSDimitry Andric     ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1062*5f757f3fSDimitry Andric     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1063*5f757f3fSDimitry Andric     ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1064*5f757f3fSDimitry Andric   } else {
1065*5f757f3fSDimitry Andric     // When a physical register is available read the value from it and glue
1066*5f757f3fSDimitry Andric     // the reads together.
1067*5f757f3fSDimitry Andric     ArgValueLo =
1068*5f757f3fSDimitry Andric       DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1069*5f757f3fSDimitry Andric     *InGlue = ArgValueLo.getValue(2);
1070*5f757f3fSDimitry Andric     ArgValueHi =
1071*5f757f3fSDimitry Andric       DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1072*5f757f3fSDimitry Andric     *InGlue = ArgValueHi.getValue(2);
1073*5f757f3fSDimitry Andric   }
1074*5f757f3fSDimitry Andric 
1075*5f757f3fSDimitry Andric   // Convert the i32 type into v32i1 type.
1076*5f757f3fSDimitry Andric   Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1077*5f757f3fSDimitry Andric 
1078*5f757f3fSDimitry Andric   // Convert the i32 type into v32i1 type.
1079*5f757f3fSDimitry Andric   Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1080*5f757f3fSDimitry Andric 
1081*5f757f3fSDimitry Andric   // Concatenate the two values together.
1082*5f757f3fSDimitry Andric   return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1083*5f757f3fSDimitry Andric }
1084*5f757f3fSDimitry Andric 
1085*5f757f3fSDimitry Andric /// The function will lower a register of various sizes (8/16/32/64)
1086*5f757f3fSDimitry Andric /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1087*5f757f3fSDimitry Andric /// \returns a DAG node contains the operand after lowering to mask type.
1088*5f757f3fSDimitry Andric static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1089*5f757f3fSDimitry Andric                                const EVT &ValLoc, const SDLoc &DL,
1090*5f757f3fSDimitry Andric                                SelectionDAG &DAG) {
1091*5f757f3fSDimitry Andric   SDValue ValReturned = ValArg;
1092*5f757f3fSDimitry Andric 
1093*5f757f3fSDimitry Andric   if (ValVT == MVT::v1i1)
1094*5f757f3fSDimitry Andric     return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1095*5f757f3fSDimitry Andric 
1096*5f757f3fSDimitry Andric   if (ValVT == MVT::v64i1) {
1097*5f757f3fSDimitry Andric     // In 32 bit machine, this case is handled by getv64i1Argument
1098*5f757f3fSDimitry Andric     assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1099*5f757f3fSDimitry Andric     // In 64 bit machine, There is no need to truncate the value only bitcast
1100*5f757f3fSDimitry Andric   } else {
1101*5f757f3fSDimitry Andric     MVT MaskLenVT;
1102*5f757f3fSDimitry Andric     switch (ValVT.getSimpleVT().SimpleTy) {
1103*5f757f3fSDimitry Andric     case MVT::v8i1:
1104*5f757f3fSDimitry Andric       MaskLenVT = MVT::i8;
1105*5f757f3fSDimitry Andric       break;
1106*5f757f3fSDimitry Andric     case MVT::v16i1:
1107*5f757f3fSDimitry Andric       MaskLenVT = MVT::i16;
1108*5f757f3fSDimitry Andric       break;
1109*5f757f3fSDimitry Andric     case MVT::v32i1:
1110*5f757f3fSDimitry Andric       MaskLenVT = MVT::i32;
1111*5f757f3fSDimitry Andric       break;
1112*5f757f3fSDimitry Andric     default:
1113*5f757f3fSDimitry Andric       llvm_unreachable("Expecting a vector of i1 types");
1114*5f757f3fSDimitry Andric     }
1115*5f757f3fSDimitry Andric 
1116*5f757f3fSDimitry Andric     ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1117*5f757f3fSDimitry Andric   }
1118*5f757f3fSDimitry Andric   return DAG.getBitcast(ValVT, ValReturned);
1119*5f757f3fSDimitry Andric }
1120*5f757f3fSDimitry Andric 
1121*5f757f3fSDimitry Andric /// Lower the result values of a call into the
1122*5f757f3fSDimitry Andric /// appropriate copies out of appropriate physical registers.
1123*5f757f3fSDimitry Andric ///
1124*5f757f3fSDimitry Andric SDValue X86TargetLowering::LowerCallResult(
1125*5f757f3fSDimitry Andric     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1126*5f757f3fSDimitry Andric     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1127*5f757f3fSDimitry Andric     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
1128*5f757f3fSDimitry Andric     uint32_t *RegMask) const {
1129*5f757f3fSDimitry Andric 
1130*5f757f3fSDimitry Andric   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1131*5f757f3fSDimitry Andric   // Assign locations to each value returned by this call.
1132*5f757f3fSDimitry Andric   SmallVector<CCValAssign, 16> RVLocs;
1133*5f757f3fSDimitry Andric   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1134*5f757f3fSDimitry Andric                  *DAG.getContext());
1135*5f757f3fSDimitry Andric   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1136*5f757f3fSDimitry Andric 
1137*5f757f3fSDimitry Andric   // Copy all of the result registers out of their specified physreg.
1138*5f757f3fSDimitry Andric   for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1139*5f757f3fSDimitry Andric        ++I, ++InsIndex) {
1140*5f757f3fSDimitry Andric     CCValAssign &VA = RVLocs[I];
1141*5f757f3fSDimitry Andric     EVT CopyVT = VA.getLocVT();
1142*5f757f3fSDimitry Andric 
1143*5f757f3fSDimitry Andric     // In some calling conventions we need to remove the used registers
1144*5f757f3fSDimitry Andric     // from the register mask.
1145*5f757f3fSDimitry Andric     if (RegMask) {
1146*5f757f3fSDimitry Andric       for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1147*5f757f3fSDimitry Andric         RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1148*5f757f3fSDimitry Andric     }
1149*5f757f3fSDimitry Andric 
1150*5f757f3fSDimitry Andric     // Report an error if there was an attempt to return FP values via XMM
1151*5f757f3fSDimitry Andric     // registers.
1152*5f757f3fSDimitry Andric     if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1153*5f757f3fSDimitry Andric       errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1154*5f757f3fSDimitry Andric       if (VA.getLocReg() == X86::XMM1)
1155*5f757f3fSDimitry Andric         VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1156*5f757f3fSDimitry Andric       else
1157*5f757f3fSDimitry Andric         VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1158*5f757f3fSDimitry Andric     } else if (!Subtarget.hasSSE2() &&
1159*5f757f3fSDimitry Andric                X86::FR64XRegClass.contains(VA.getLocReg()) &&
1160*5f757f3fSDimitry Andric                CopyVT == MVT::f64) {
1161*5f757f3fSDimitry Andric       errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1162*5f757f3fSDimitry Andric       if (VA.getLocReg() == X86::XMM1)
1163*5f757f3fSDimitry Andric         VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1164*5f757f3fSDimitry Andric       else
1165*5f757f3fSDimitry Andric         VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1166*5f757f3fSDimitry Andric     }
1167*5f757f3fSDimitry Andric 
1168*5f757f3fSDimitry Andric     // If we prefer to use the value in xmm registers, copy it out as f80 and
1169*5f757f3fSDimitry Andric     // use a truncate to move it from fp stack reg to xmm reg.
1170*5f757f3fSDimitry Andric     bool RoundAfterCopy = false;
1171*5f757f3fSDimitry Andric     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
1172*5f757f3fSDimitry Andric         isScalarFPTypeInSSEReg(VA.getValVT())) {
1173*5f757f3fSDimitry Andric       if (!Subtarget.hasX87())
1174*5f757f3fSDimitry Andric         report_fatal_error("X87 register return with X87 disabled");
1175*5f757f3fSDimitry Andric       CopyVT = MVT::f80;
1176*5f757f3fSDimitry Andric       RoundAfterCopy = (CopyVT != VA.getLocVT());
1177*5f757f3fSDimitry Andric     }
1178*5f757f3fSDimitry Andric 
1179*5f757f3fSDimitry Andric     SDValue Val;
1180*5f757f3fSDimitry Andric     if (VA.needsCustom()) {
1181*5f757f3fSDimitry Andric       assert(VA.getValVT() == MVT::v64i1 &&
1182*5f757f3fSDimitry Andric              "Currently the only custom case is when we split v64i1 to 2 regs");
1183*5f757f3fSDimitry Andric       Val =
1184*5f757f3fSDimitry Andric           getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1185*5f757f3fSDimitry Andric     } else {
1186*5f757f3fSDimitry Andric       Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1187*5f757f3fSDimitry Andric                   .getValue(1);
1188*5f757f3fSDimitry Andric       Val = Chain.getValue(0);
1189*5f757f3fSDimitry Andric       InGlue = Chain.getValue(2);
1190*5f757f3fSDimitry Andric     }
1191*5f757f3fSDimitry Andric 
1192*5f757f3fSDimitry Andric     if (RoundAfterCopy)
1193*5f757f3fSDimitry Andric       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1194*5f757f3fSDimitry Andric                         // This truncation won't change the value.
1195*5f757f3fSDimitry Andric                         DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1196*5f757f3fSDimitry Andric 
1197*5f757f3fSDimitry Andric     if (VA.isExtInLoc()) {
1198*5f757f3fSDimitry Andric       if (VA.getValVT().isVector() &&
1199*5f757f3fSDimitry Andric           VA.getValVT().getScalarType() == MVT::i1 &&
1200*5f757f3fSDimitry Andric           ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1201*5f757f3fSDimitry Andric            (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1202*5f757f3fSDimitry Andric         // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1203*5f757f3fSDimitry Andric         Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1204*5f757f3fSDimitry Andric       } else
1205*5f757f3fSDimitry Andric         Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1206*5f757f3fSDimitry Andric     }
1207*5f757f3fSDimitry Andric 
1208*5f757f3fSDimitry Andric     if (VA.getLocInfo() == CCValAssign::BCvt)
1209*5f757f3fSDimitry Andric       Val = DAG.getBitcast(VA.getValVT(), Val);
1210*5f757f3fSDimitry Andric 
1211*5f757f3fSDimitry Andric     InVals.push_back(Val);
1212*5f757f3fSDimitry Andric   }
1213*5f757f3fSDimitry Andric 
1214*5f757f3fSDimitry Andric   return Chain;
1215*5f757f3fSDimitry Andric }
1216*5f757f3fSDimitry Andric 
1217*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
1218*5f757f3fSDimitry Andric //                C & StdCall & Fast Calling Convention implementation
1219*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
1220*5f757f3fSDimitry Andric //  StdCall calling convention seems to be standard for many Windows' API
1221*5f757f3fSDimitry Andric //  routines and around. It differs from C calling convention just a little:
1222*5f757f3fSDimitry Andric //  callee should clean up the stack, not caller. Symbols should be also
1223*5f757f3fSDimitry Andric //  decorated in some fancy way :) It doesn't support any vector arguments.
1224*5f757f3fSDimitry Andric //  For info on fast calling convention see Fast Calling Convention (tail call)
1225*5f757f3fSDimitry Andric //  implementation LowerX86_32FastCCCallTo.
1226*5f757f3fSDimitry Andric 
1227*5f757f3fSDimitry Andric /// Determines whether Args, either a set of outgoing arguments to a call, or a
1228*5f757f3fSDimitry Andric /// set of incoming args of a call, contains an sret pointer that the callee
1229*5f757f3fSDimitry Andric /// pops
1230*5f757f3fSDimitry Andric template <typename T>
1231*5f757f3fSDimitry Andric static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1232*5f757f3fSDimitry Andric                              const X86Subtarget &Subtarget) {
1233*5f757f3fSDimitry Andric   // Not C++20 (yet), so no concepts available.
1234*5f757f3fSDimitry Andric   static_assert(std::is_same_v<T, ISD::OutputArg> ||
1235*5f757f3fSDimitry Andric                     std::is_same_v<T, ISD::InputArg>,
1236*5f757f3fSDimitry Andric                 "requires ISD::OutputArg or ISD::InputArg");
1237*5f757f3fSDimitry Andric 
1238*5f757f3fSDimitry Andric   // Only 32-bit pops the sret.  It's a 64-bit world these days, so early-out
1239*5f757f3fSDimitry Andric   // for most compilations.
1240*5f757f3fSDimitry Andric   if (!Subtarget.is32Bit())
1241*5f757f3fSDimitry Andric     return false;
1242*5f757f3fSDimitry Andric 
1243*5f757f3fSDimitry Andric   if (Args.empty())
1244*5f757f3fSDimitry Andric     return false;
1245*5f757f3fSDimitry Andric 
1246*5f757f3fSDimitry Andric   // Most calls do not have an sret argument, check the arg next.
1247*5f757f3fSDimitry Andric   const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1248*5f757f3fSDimitry Andric   if (!Flags.isSRet() || Flags.isInReg())
1249*5f757f3fSDimitry Andric     return false;
1250*5f757f3fSDimitry Andric 
1251*5f757f3fSDimitry Andric   // The MSVCabi does not pop the sret.
1252*5f757f3fSDimitry Andric   if (Subtarget.getTargetTriple().isOSMSVCRT())
1253*5f757f3fSDimitry Andric     return false;
1254*5f757f3fSDimitry Andric 
1255*5f757f3fSDimitry Andric   // MCUs don't pop the sret
1256*5f757f3fSDimitry Andric   if (Subtarget.isTargetMCU())
1257*5f757f3fSDimitry Andric     return false;
1258*5f757f3fSDimitry Andric 
1259*5f757f3fSDimitry Andric   // Callee pops argument
1260*5f757f3fSDimitry Andric   return true;
1261*5f757f3fSDimitry Andric }
1262*5f757f3fSDimitry Andric 
1263*5f757f3fSDimitry Andric /// Make a copy of an aggregate at address specified by "Src" to address
1264*5f757f3fSDimitry Andric /// "Dst" with size and alignment information specified by the specific
1265*5f757f3fSDimitry Andric /// parameter attribute. The copy will be passed as a byval function parameter.
1266*5f757f3fSDimitry Andric static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
1267*5f757f3fSDimitry Andric                                          SDValue Chain, ISD::ArgFlagsTy Flags,
1268*5f757f3fSDimitry Andric                                          SelectionDAG &DAG, const SDLoc &dl) {
1269*5f757f3fSDimitry Andric   SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1270*5f757f3fSDimitry Andric 
1271*5f757f3fSDimitry Andric   return DAG.getMemcpy(
1272*5f757f3fSDimitry Andric       Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1273*5f757f3fSDimitry Andric       /*isVolatile*/ false, /*AlwaysInline=*/true,
1274*5f757f3fSDimitry Andric       /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
1275*5f757f3fSDimitry Andric }
1276*5f757f3fSDimitry Andric 
1277*5f757f3fSDimitry Andric /// Return true if the calling convention is one that we can guarantee TCO for.
1278*5f757f3fSDimitry Andric static bool canGuaranteeTCO(CallingConv::ID CC) {
1279*5f757f3fSDimitry Andric   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1280*5f757f3fSDimitry Andric           CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
1281*5f757f3fSDimitry Andric           CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
1282*5f757f3fSDimitry Andric }
1283*5f757f3fSDimitry Andric 
1284*5f757f3fSDimitry Andric /// Return true if we might ever do TCO for calls with this calling convention.
1285*5f757f3fSDimitry Andric static bool mayTailCallThisCC(CallingConv::ID CC) {
1286*5f757f3fSDimitry Andric   switch (CC) {
1287*5f757f3fSDimitry Andric   // C calling conventions:
1288*5f757f3fSDimitry Andric   case CallingConv::C:
1289*5f757f3fSDimitry Andric   case CallingConv::Win64:
1290*5f757f3fSDimitry Andric   case CallingConv::X86_64_SysV:
1291*5f757f3fSDimitry Andric   // Callee pop conventions:
1292*5f757f3fSDimitry Andric   case CallingConv::X86_ThisCall:
1293*5f757f3fSDimitry Andric   case CallingConv::X86_StdCall:
1294*5f757f3fSDimitry Andric   case CallingConv::X86_VectorCall:
1295*5f757f3fSDimitry Andric   case CallingConv::X86_FastCall:
1296*5f757f3fSDimitry Andric   // Swift:
1297*5f757f3fSDimitry Andric   case CallingConv::Swift:
1298*5f757f3fSDimitry Andric     return true;
1299*5f757f3fSDimitry Andric   default:
1300*5f757f3fSDimitry Andric     return canGuaranteeTCO(CC);
1301*5f757f3fSDimitry Andric   }
1302*5f757f3fSDimitry Andric }
1303*5f757f3fSDimitry Andric 
1304*5f757f3fSDimitry Andric /// Return true if the function is being made into a tailcall target by
1305*5f757f3fSDimitry Andric /// changing its ABI.
1306*5f757f3fSDimitry Andric static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1307*5f757f3fSDimitry Andric   return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1308*5f757f3fSDimitry Andric          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
1309*5f757f3fSDimitry Andric }
1310*5f757f3fSDimitry Andric 
1311*5f757f3fSDimitry Andric bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1312*5f757f3fSDimitry Andric   if (!CI->isTailCall())
1313*5f757f3fSDimitry Andric     return false;
1314*5f757f3fSDimitry Andric 
1315*5f757f3fSDimitry Andric   CallingConv::ID CalleeCC = CI->getCallingConv();
1316*5f757f3fSDimitry Andric   if (!mayTailCallThisCC(CalleeCC))
1317*5f757f3fSDimitry Andric     return false;
1318*5f757f3fSDimitry Andric 
1319*5f757f3fSDimitry Andric   return true;
1320*5f757f3fSDimitry Andric }
1321*5f757f3fSDimitry Andric 
1322*5f757f3fSDimitry Andric SDValue
1323*5f757f3fSDimitry Andric X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1324*5f757f3fSDimitry Andric                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1325*5f757f3fSDimitry Andric                                     const SDLoc &dl, SelectionDAG &DAG,
1326*5f757f3fSDimitry Andric                                     const CCValAssign &VA,
1327*5f757f3fSDimitry Andric                                     MachineFrameInfo &MFI, unsigned i) const {
1328*5f757f3fSDimitry Andric   // Create the nodes corresponding to a load from this parameter slot.
1329*5f757f3fSDimitry Andric   ISD::ArgFlagsTy Flags = Ins[i].Flags;
1330*5f757f3fSDimitry Andric   bool AlwaysUseMutable = shouldGuaranteeTCO(
1331*5f757f3fSDimitry Andric       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1332*5f757f3fSDimitry Andric   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1333*5f757f3fSDimitry Andric   EVT ValVT;
1334*5f757f3fSDimitry Andric   MVT PtrVT = getPointerTy(DAG.getDataLayout());
1335*5f757f3fSDimitry Andric 
1336*5f757f3fSDimitry Andric   // If value is passed by pointer we have address passed instead of the value
1337*5f757f3fSDimitry Andric   // itself. No need to extend if the mask value and location share the same
1338*5f757f3fSDimitry Andric   // absolute size.
1339*5f757f3fSDimitry Andric   bool ExtendedInMem =
1340*5f757f3fSDimitry Andric       VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1341*5f757f3fSDimitry Andric       VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
1342*5f757f3fSDimitry Andric 
1343*5f757f3fSDimitry Andric   if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1344*5f757f3fSDimitry Andric     ValVT = VA.getLocVT();
1345*5f757f3fSDimitry Andric   else
1346*5f757f3fSDimitry Andric     ValVT = VA.getValVT();
1347*5f757f3fSDimitry Andric 
1348*5f757f3fSDimitry Andric   // FIXME: For now, all byval parameter objects are marked mutable. This can be
1349*5f757f3fSDimitry Andric   // changed with more analysis.
1350*5f757f3fSDimitry Andric   // In case of tail call optimization mark all arguments mutable. Since they
1351*5f757f3fSDimitry Andric   // could be overwritten by lowering of arguments in case of a tail call.
1352*5f757f3fSDimitry Andric   if (Flags.isByVal()) {
1353*5f757f3fSDimitry Andric     unsigned Bytes = Flags.getByValSize();
1354*5f757f3fSDimitry Andric     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1355*5f757f3fSDimitry Andric 
1356*5f757f3fSDimitry Andric     // FIXME: For now, all byval parameter objects are marked as aliasing. This
1357*5f757f3fSDimitry Andric     // can be improved with deeper analysis.
1358*5f757f3fSDimitry Andric     int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1359*5f757f3fSDimitry Andric                                    /*isAliased=*/true);
1360*5f757f3fSDimitry Andric     return DAG.getFrameIndex(FI, PtrVT);
1361*5f757f3fSDimitry Andric   }
1362*5f757f3fSDimitry Andric 
1363*5f757f3fSDimitry Andric   EVT ArgVT = Ins[i].ArgVT;
1364*5f757f3fSDimitry Andric 
1365*5f757f3fSDimitry Andric   // If this is a vector that has been split into multiple parts, don't elide
1366*5f757f3fSDimitry Andric   // the copy. The layout on the stack may not match the packed in-memory
1367*5f757f3fSDimitry Andric   // layout.
1368*5f757f3fSDimitry Andric   bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1369*5f757f3fSDimitry Andric 
1370*5f757f3fSDimitry Andric   // This is an argument in memory. We might be able to perform copy elision.
1371*5f757f3fSDimitry Andric   // If the argument is passed directly in memory without any extension, then we
1372*5f757f3fSDimitry Andric   // can perform copy elision. Large vector types, for example, may be passed
1373*5f757f3fSDimitry Andric   // indirectly by pointer.
1374*5f757f3fSDimitry Andric   if (Flags.isCopyElisionCandidate() &&
1375*5f757f3fSDimitry Andric       VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1376*5f757f3fSDimitry Andric       !ScalarizedVector) {
1377*5f757f3fSDimitry Andric     SDValue PartAddr;
1378*5f757f3fSDimitry Andric     if (Ins[i].PartOffset == 0) {
1379*5f757f3fSDimitry Andric       // If this is a one-part value or the first part of a multi-part value,
1380*5f757f3fSDimitry Andric       // create a stack object for the entire argument value type and return a
1381*5f757f3fSDimitry Andric       // load from our portion of it. This assumes that if the first part of an
1382*5f757f3fSDimitry Andric       // argument is in memory, the rest will also be in memory.
1383*5f757f3fSDimitry Andric       int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1384*5f757f3fSDimitry Andric                                      /*IsImmutable=*/false);
1385*5f757f3fSDimitry Andric       PartAddr = DAG.getFrameIndex(FI, PtrVT);
1386*5f757f3fSDimitry Andric       return DAG.getLoad(
1387*5f757f3fSDimitry Andric           ValVT, dl, Chain, PartAddr,
1388*5f757f3fSDimitry Andric           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
1389*5f757f3fSDimitry Andric     }
1390*5f757f3fSDimitry Andric 
1391*5f757f3fSDimitry Andric     // This is not the first piece of an argument in memory. See if there is
1392*5f757f3fSDimitry Andric     // already a fixed stack object including this offset. If so, assume it
1393*5f757f3fSDimitry Andric     // was created by the PartOffset == 0 branch above and create a load from
1394*5f757f3fSDimitry Andric     // the appropriate offset into it.
1395*5f757f3fSDimitry Andric     int64_t PartBegin = VA.getLocMemOffset();
1396*5f757f3fSDimitry Andric     int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1397*5f757f3fSDimitry Andric     int FI = MFI.getObjectIndexBegin();
1398*5f757f3fSDimitry Andric     for (; MFI.isFixedObjectIndex(FI); ++FI) {
1399*5f757f3fSDimitry Andric       int64_t ObjBegin = MFI.getObjectOffset(FI);
1400*5f757f3fSDimitry Andric       int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1401*5f757f3fSDimitry Andric       if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1402*5f757f3fSDimitry Andric         break;
1403*5f757f3fSDimitry Andric     }
1404*5f757f3fSDimitry Andric     if (MFI.isFixedObjectIndex(FI)) {
1405*5f757f3fSDimitry Andric       SDValue Addr =
1406*5f757f3fSDimitry Andric           DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1407*5f757f3fSDimitry Andric                       DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1408*5f757f3fSDimitry Andric       return DAG.getLoad(ValVT, dl, Chain, Addr,
1409*5f757f3fSDimitry Andric                          MachinePointerInfo::getFixedStack(
1410*5f757f3fSDimitry Andric                              DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1411*5f757f3fSDimitry Andric     }
1412*5f757f3fSDimitry Andric   }
1413*5f757f3fSDimitry Andric 
1414*5f757f3fSDimitry Andric   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1415*5f757f3fSDimitry Andric                                  VA.getLocMemOffset(), isImmutable);
1416*5f757f3fSDimitry Andric 
1417*5f757f3fSDimitry Andric   // Set SExt or ZExt flag.
1418*5f757f3fSDimitry Andric   if (VA.getLocInfo() == CCValAssign::ZExt) {
1419*5f757f3fSDimitry Andric     MFI.setObjectZExt(FI, true);
1420*5f757f3fSDimitry Andric   } else if (VA.getLocInfo() == CCValAssign::SExt) {
1421*5f757f3fSDimitry Andric     MFI.setObjectSExt(FI, true);
1422*5f757f3fSDimitry Andric   }
1423*5f757f3fSDimitry Andric 
1424*5f757f3fSDimitry Andric   MaybeAlign Alignment;
1425*5f757f3fSDimitry Andric   if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1426*5f757f3fSDimitry Andric       ValVT != MVT::f80)
1427*5f757f3fSDimitry Andric     Alignment = MaybeAlign(4);
1428*5f757f3fSDimitry Andric   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1429*5f757f3fSDimitry Andric   SDValue Val = DAG.getLoad(
1430*5f757f3fSDimitry Andric       ValVT, dl, Chain, FIN,
1431*5f757f3fSDimitry Andric       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
1432*5f757f3fSDimitry Andric       Alignment);
1433*5f757f3fSDimitry Andric   return ExtendedInMem
1434*5f757f3fSDimitry Andric              ? (VA.getValVT().isVector()
1435*5f757f3fSDimitry Andric                     ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1436*5f757f3fSDimitry Andric                     : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1437*5f757f3fSDimitry Andric              : Val;
1438*5f757f3fSDimitry Andric }
1439*5f757f3fSDimitry Andric 
1440*5f757f3fSDimitry Andric // FIXME: Get this from tablegen.
1441*5f757f3fSDimitry Andric static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
1442*5f757f3fSDimitry Andric                                                 const X86Subtarget &Subtarget) {
1443*5f757f3fSDimitry Andric   assert(Subtarget.is64Bit());
1444*5f757f3fSDimitry Andric 
1445*5f757f3fSDimitry Andric   if (Subtarget.isCallingConvWin64(CallConv)) {
1446*5f757f3fSDimitry Andric     static const MCPhysReg GPR64ArgRegsWin64[] = {
1447*5f757f3fSDimitry Andric       X86::RCX, X86::RDX, X86::R8,  X86::R9
1448*5f757f3fSDimitry Andric     };
1449*5f757f3fSDimitry Andric     return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
1450*5f757f3fSDimitry Andric   }
1451*5f757f3fSDimitry Andric 
1452*5f757f3fSDimitry Andric   static const MCPhysReg GPR64ArgRegs64Bit[] = {
1453*5f757f3fSDimitry Andric     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1454*5f757f3fSDimitry Andric   };
1455*5f757f3fSDimitry Andric   return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
1456*5f757f3fSDimitry Andric }
1457*5f757f3fSDimitry Andric 
1458*5f757f3fSDimitry Andric // FIXME: Get this from tablegen.
1459*5f757f3fSDimitry Andric static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
1460*5f757f3fSDimitry Andric                                                 CallingConv::ID CallConv,
1461*5f757f3fSDimitry Andric                                                 const X86Subtarget &Subtarget) {
1462*5f757f3fSDimitry Andric   assert(Subtarget.is64Bit());
1463*5f757f3fSDimitry Andric   if (Subtarget.isCallingConvWin64(CallConv)) {
1464*5f757f3fSDimitry Andric     // The XMM registers which might contain var arg parameters are shadowed
1465*5f757f3fSDimitry Andric     // in their paired GPR.  So we only need to save the GPR to their home
1466*5f757f3fSDimitry Andric     // slots.
1467*5f757f3fSDimitry Andric     // TODO: __vectorcall will change this.
1468*5f757f3fSDimitry Andric     return std::nullopt;
1469*5f757f3fSDimitry Andric   }
1470*5f757f3fSDimitry Andric 
1471*5f757f3fSDimitry Andric   bool isSoftFloat = Subtarget.useSoftFloat();
1472*5f757f3fSDimitry Andric   if (isSoftFloat || !Subtarget.hasSSE1())
1473*5f757f3fSDimitry Andric     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1474*5f757f3fSDimitry Andric     // registers.
1475*5f757f3fSDimitry Andric     return std::nullopt;
1476*5f757f3fSDimitry Andric 
1477*5f757f3fSDimitry Andric   static const MCPhysReg XMMArgRegs64Bit[] = {
1478*5f757f3fSDimitry Andric     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1479*5f757f3fSDimitry Andric     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1480*5f757f3fSDimitry Andric   };
1481*5f757f3fSDimitry Andric   return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
1482*5f757f3fSDimitry Andric }
1483*5f757f3fSDimitry Andric 
1484*5f757f3fSDimitry Andric #ifndef NDEBUG
1485*5f757f3fSDimitry Andric static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
1486*5f757f3fSDimitry Andric   return llvm::is_sorted(
1487*5f757f3fSDimitry Andric       ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1488*5f757f3fSDimitry Andric         return A.getValNo() < B.getValNo();
1489*5f757f3fSDimitry Andric       });
1490*5f757f3fSDimitry Andric }
1491*5f757f3fSDimitry Andric #endif
1492*5f757f3fSDimitry Andric 
1493*5f757f3fSDimitry Andric namespace {
1494*5f757f3fSDimitry Andric /// This is a helper class for lowering variable arguments parameters.
1495*5f757f3fSDimitry Andric class VarArgsLoweringHelper {
1496*5f757f3fSDimitry Andric public:
1497*5f757f3fSDimitry Andric   VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1498*5f757f3fSDimitry Andric                         SelectionDAG &DAG, const X86Subtarget &Subtarget,
1499*5f757f3fSDimitry Andric                         CallingConv::ID CallConv, CCState &CCInfo)
1500*5f757f3fSDimitry Andric       : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1501*5f757f3fSDimitry Andric         TheMachineFunction(DAG.getMachineFunction()),
1502*5f757f3fSDimitry Andric         TheFunction(TheMachineFunction.getFunction()),
1503*5f757f3fSDimitry Andric         FrameInfo(TheMachineFunction.getFrameInfo()),
1504*5f757f3fSDimitry Andric         FrameLowering(*Subtarget.getFrameLowering()),
1505*5f757f3fSDimitry Andric         TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1506*5f757f3fSDimitry Andric         CCInfo(CCInfo) {}
1507*5f757f3fSDimitry Andric 
1508*5f757f3fSDimitry Andric   // Lower variable arguments parameters.
1509*5f757f3fSDimitry Andric   void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1510*5f757f3fSDimitry Andric 
1511*5f757f3fSDimitry Andric private:
1512*5f757f3fSDimitry Andric   void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1513*5f757f3fSDimitry Andric 
1514*5f757f3fSDimitry Andric   void forwardMustTailParameters(SDValue &Chain);
1515*5f757f3fSDimitry Andric 
1516*5f757f3fSDimitry Andric   bool is64Bit() const { return Subtarget.is64Bit(); }
1517*5f757f3fSDimitry Andric   bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1518*5f757f3fSDimitry Andric 
1519*5f757f3fSDimitry Andric   X86MachineFunctionInfo *FuncInfo;
1520*5f757f3fSDimitry Andric   const SDLoc &DL;
1521*5f757f3fSDimitry Andric   SelectionDAG &DAG;
1522*5f757f3fSDimitry Andric   const X86Subtarget &Subtarget;
1523*5f757f3fSDimitry Andric   MachineFunction &TheMachineFunction;
1524*5f757f3fSDimitry Andric   const Function &TheFunction;
1525*5f757f3fSDimitry Andric   MachineFrameInfo &FrameInfo;
1526*5f757f3fSDimitry Andric   const TargetFrameLowering &FrameLowering;
1527*5f757f3fSDimitry Andric   const TargetLowering &TargLowering;
1528*5f757f3fSDimitry Andric   CallingConv::ID CallConv;
1529*5f757f3fSDimitry Andric   CCState &CCInfo;
1530*5f757f3fSDimitry Andric };
1531*5f757f3fSDimitry Andric } // namespace
1532*5f757f3fSDimitry Andric 
1533*5f757f3fSDimitry Andric void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1534*5f757f3fSDimitry Andric     SDValue &Chain, unsigned StackSize) {
1535*5f757f3fSDimitry Andric   // If the function takes variable number of arguments, make a frame index for
1536*5f757f3fSDimitry Andric   // the start of the first vararg value... for expansion of llvm.va_start. We
1537*5f757f3fSDimitry Andric   // can skip this if there are no va_start calls.
1538*5f757f3fSDimitry Andric   if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1539*5f757f3fSDimitry Andric                     CallConv != CallingConv::X86_ThisCall)) {
1540*5f757f3fSDimitry Andric     FuncInfo->setVarArgsFrameIndex(
1541*5f757f3fSDimitry Andric         FrameInfo.CreateFixedObject(1, StackSize, true));
1542*5f757f3fSDimitry Andric   }
1543*5f757f3fSDimitry Andric 
1544*5f757f3fSDimitry Andric   // 64-bit calling conventions support varargs and register parameters, so we
1545*5f757f3fSDimitry Andric   // have to do extra work to spill them in the prologue.
1546*5f757f3fSDimitry Andric   if (is64Bit()) {
1547*5f757f3fSDimitry Andric     // Find the first unallocated argument registers.
1548*5f757f3fSDimitry Andric     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1549*5f757f3fSDimitry Andric     ArrayRef<MCPhysReg> ArgXMMs =
1550*5f757f3fSDimitry Andric         get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1551*5f757f3fSDimitry Andric     unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1552*5f757f3fSDimitry Andric     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1553*5f757f3fSDimitry Andric 
1554*5f757f3fSDimitry Andric     assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1555*5f757f3fSDimitry Andric            "SSE register cannot be used when SSE is disabled!");
1556*5f757f3fSDimitry Andric 
1557*5f757f3fSDimitry Andric     if (isWin64()) {
1558*5f757f3fSDimitry Andric       // Get to the caller-allocated home save location.  Add 8 to account
1559*5f757f3fSDimitry Andric       // for the return address.
1560*5f757f3fSDimitry Andric       int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1561*5f757f3fSDimitry Andric       FuncInfo->setRegSaveFrameIndex(
1562*5f757f3fSDimitry Andric           FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1563*5f757f3fSDimitry Andric       // Fixup to set vararg frame on shadow area (4 x i64).
1564*5f757f3fSDimitry Andric       if (NumIntRegs < 4)
1565*5f757f3fSDimitry Andric         FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1566*5f757f3fSDimitry Andric     } else {
1567*5f757f3fSDimitry Andric       // For X86-64, if there are vararg parameters that are passed via
1568*5f757f3fSDimitry Andric       // registers, then we must store them to their spots on the stack so
1569*5f757f3fSDimitry Andric       // they may be loaded by dereferencing the result of va_next.
1570*5f757f3fSDimitry Andric       FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1571*5f757f3fSDimitry Andric       FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1572*5f757f3fSDimitry Andric       FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1573*5f757f3fSDimitry Andric           ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1574*5f757f3fSDimitry Andric     }
1575*5f757f3fSDimitry Andric 
1576*5f757f3fSDimitry Andric     SmallVector<SDValue, 6>
1577*5f757f3fSDimitry Andric         LiveGPRs; // list of SDValue for GPR registers keeping live input value
1578*5f757f3fSDimitry Andric     SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1579*5f757f3fSDimitry Andric                                          // keeping live input value
1580*5f757f3fSDimitry Andric     SDValue ALVal; // if applicable keeps SDValue for %al register
1581*5f757f3fSDimitry Andric 
1582*5f757f3fSDimitry Andric     // Gather all the live in physical registers.
1583*5f757f3fSDimitry Andric     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1584*5f757f3fSDimitry Andric       Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1585*5f757f3fSDimitry Andric       LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1586*5f757f3fSDimitry Andric     }
1587*5f757f3fSDimitry Andric     const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1588*5f757f3fSDimitry Andric     if (!AvailableXmms.empty()) {
1589*5f757f3fSDimitry Andric       Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1590*5f757f3fSDimitry Andric       ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1591*5f757f3fSDimitry Andric       for (MCPhysReg Reg : AvailableXmms) {
1592*5f757f3fSDimitry Andric         // FastRegisterAllocator spills virtual registers at basic
1593*5f757f3fSDimitry Andric         // block boundary. That leads to usages of xmm registers
1594*5f757f3fSDimitry Andric         // outside of check for %al. Pass physical registers to
1595*5f757f3fSDimitry Andric         // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1596*5f757f3fSDimitry Andric         TheMachineFunction.getRegInfo().addLiveIn(Reg);
1597*5f757f3fSDimitry Andric         LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1598*5f757f3fSDimitry Andric       }
1599*5f757f3fSDimitry Andric     }
1600*5f757f3fSDimitry Andric 
1601*5f757f3fSDimitry Andric     // Store the integer parameter registers.
1602*5f757f3fSDimitry Andric     SmallVector<SDValue, 8> MemOps;
1603*5f757f3fSDimitry Andric     SDValue RSFIN =
1604*5f757f3fSDimitry Andric         DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1605*5f757f3fSDimitry Andric                           TargLowering.getPointerTy(DAG.getDataLayout()));
1606*5f757f3fSDimitry Andric     unsigned Offset = FuncInfo->getVarArgsGPOffset();
1607*5f757f3fSDimitry Andric     for (SDValue Val : LiveGPRs) {
1608*5f757f3fSDimitry Andric       SDValue FIN = DAG.getNode(ISD::ADD, DL,
1609*5f757f3fSDimitry Andric                                 TargLowering.getPointerTy(DAG.getDataLayout()),
1610*5f757f3fSDimitry Andric                                 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1611*5f757f3fSDimitry Andric       SDValue Store =
1612*5f757f3fSDimitry Andric           DAG.getStore(Val.getValue(1), DL, Val, FIN,
1613*5f757f3fSDimitry Andric                        MachinePointerInfo::getFixedStack(
1614*5f757f3fSDimitry Andric                            DAG.getMachineFunction(),
1615*5f757f3fSDimitry Andric                            FuncInfo->getRegSaveFrameIndex(), Offset));
1616*5f757f3fSDimitry Andric       MemOps.push_back(Store);
1617*5f757f3fSDimitry Andric       Offset += 8;
1618*5f757f3fSDimitry Andric     }
1619*5f757f3fSDimitry Andric 
1620*5f757f3fSDimitry Andric     // Now store the XMM (fp + vector) parameter registers.
1621*5f757f3fSDimitry Andric     if (!LiveXMMRegs.empty()) {
1622*5f757f3fSDimitry Andric       SmallVector<SDValue, 12> SaveXMMOps;
1623*5f757f3fSDimitry Andric       SaveXMMOps.push_back(Chain);
1624*5f757f3fSDimitry Andric       SaveXMMOps.push_back(ALVal);
1625*5f757f3fSDimitry Andric       SaveXMMOps.push_back(RSFIN);
1626*5f757f3fSDimitry Andric       SaveXMMOps.push_back(
1627*5f757f3fSDimitry Andric           DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1628*5f757f3fSDimitry Andric       llvm::append_range(SaveXMMOps, LiveXMMRegs);
1629*5f757f3fSDimitry Andric       MachineMemOperand *StoreMMO =
1630*5f757f3fSDimitry Andric           DAG.getMachineFunction().getMachineMemOperand(
1631*5f757f3fSDimitry Andric               MachinePointerInfo::getFixedStack(
1632*5f757f3fSDimitry Andric                   DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1633*5f757f3fSDimitry Andric                   Offset),
1634*5f757f3fSDimitry Andric               MachineMemOperand::MOStore, 128, Align(16));
1635*5f757f3fSDimitry Andric       MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
1636*5f757f3fSDimitry Andric                                                DL, DAG.getVTList(MVT::Other),
1637*5f757f3fSDimitry Andric                                                SaveXMMOps, MVT::i8, StoreMMO));
1638*5f757f3fSDimitry Andric     }
1639*5f757f3fSDimitry Andric 
1640*5f757f3fSDimitry Andric     if (!MemOps.empty())
1641*5f757f3fSDimitry Andric       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1642*5f757f3fSDimitry Andric   }
1643*5f757f3fSDimitry Andric }
1644*5f757f3fSDimitry Andric 
1645*5f757f3fSDimitry Andric void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1646*5f757f3fSDimitry Andric   // Find the largest legal vector type.
1647*5f757f3fSDimitry Andric   MVT VecVT = MVT::Other;
1648*5f757f3fSDimitry Andric   // FIXME: Only some x86_32 calling conventions support AVX512.
1649*5f757f3fSDimitry Andric   if (Subtarget.useAVX512Regs() &&
1650*5f757f3fSDimitry Andric       (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1651*5f757f3fSDimitry Andric                      CallConv == CallingConv::Intel_OCL_BI)))
1652*5f757f3fSDimitry Andric     VecVT = MVT::v16f32;
1653*5f757f3fSDimitry Andric   else if (Subtarget.hasAVX())
1654*5f757f3fSDimitry Andric     VecVT = MVT::v8f32;
1655*5f757f3fSDimitry Andric   else if (Subtarget.hasSSE2())
1656*5f757f3fSDimitry Andric     VecVT = MVT::v4f32;
1657*5f757f3fSDimitry Andric 
1658*5f757f3fSDimitry Andric   // We forward some GPRs and some vector types.
1659*5f757f3fSDimitry Andric   SmallVector<MVT, 2> RegParmTypes;
1660*5f757f3fSDimitry Andric   MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1661*5f757f3fSDimitry Andric   RegParmTypes.push_back(IntVT);
1662*5f757f3fSDimitry Andric   if (VecVT != MVT::Other)
1663*5f757f3fSDimitry Andric     RegParmTypes.push_back(VecVT);
1664*5f757f3fSDimitry Andric 
1665*5f757f3fSDimitry Andric   // Compute the set of forwarded registers. The rest are scratch.
1666*5f757f3fSDimitry Andric   SmallVectorImpl<ForwardedRegister> &Forwards =
1667*5f757f3fSDimitry Andric       FuncInfo->getForwardedMustTailRegParms();
1668*5f757f3fSDimitry Andric   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1669*5f757f3fSDimitry Andric 
1670*5f757f3fSDimitry Andric   // Forward AL for SysV x86_64 targets, since it is used for varargs.
1671*5f757f3fSDimitry Andric   if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1672*5f757f3fSDimitry Andric     Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1673*5f757f3fSDimitry Andric     Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1674*5f757f3fSDimitry Andric   }
1675*5f757f3fSDimitry Andric 
1676*5f757f3fSDimitry Andric   // Copy all forwards from physical to virtual registers.
1677*5f757f3fSDimitry Andric   for (ForwardedRegister &FR : Forwards) {
1678*5f757f3fSDimitry Andric     // FIXME: Can we use a less constrained schedule?
1679*5f757f3fSDimitry Andric     SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1680*5f757f3fSDimitry Andric     FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1681*5f757f3fSDimitry Andric         TargLowering.getRegClassFor(FR.VT));
1682*5f757f3fSDimitry Andric     Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1683*5f757f3fSDimitry Andric   }
1684*5f757f3fSDimitry Andric }
1685*5f757f3fSDimitry Andric 
1686*5f757f3fSDimitry Andric void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1687*5f757f3fSDimitry Andric                                                    unsigned StackSize) {
1688*5f757f3fSDimitry Andric   // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1689*5f757f3fSDimitry Andric   // If necessary, it would be set into the correct value later.
1690*5f757f3fSDimitry Andric   FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1691*5f757f3fSDimitry Andric   FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1692*5f757f3fSDimitry Andric 
1693*5f757f3fSDimitry Andric   if (FrameInfo.hasVAStart())
1694*5f757f3fSDimitry Andric     createVarArgAreaAndStoreRegisters(Chain, StackSize);
1695*5f757f3fSDimitry Andric 
1696*5f757f3fSDimitry Andric   if (FrameInfo.hasMustTailInVarArgFunc())
1697*5f757f3fSDimitry Andric     forwardMustTailParameters(Chain);
1698*5f757f3fSDimitry Andric }
1699*5f757f3fSDimitry Andric 
1700*5f757f3fSDimitry Andric SDValue X86TargetLowering::LowerFormalArguments(
1701*5f757f3fSDimitry Andric     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1702*5f757f3fSDimitry Andric     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1703*5f757f3fSDimitry Andric     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1704*5f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
1705*5f757f3fSDimitry Andric   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1706*5f757f3fSDimitry Andric 
1707*5f757f3fSDimitry Andric   const Function &F = MF.getFunction();
1708*5f757f3fSDimitry Andric   if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1709*5f757f3fSDimitry Andric       F.getName() == "main")
1710*5f757f3fSDimitry Andric     FuncInfo->setForceFramePointer(true);
1711*5f757f3fSDimitry Andric 
1712*5f757f3fSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
1713*5f757f3fSDimitry Andric   bool Is64Bit = Subtarget.is64Bit();
1714*5f757f3fSDimitry Andric   bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1715*5f757f3fSDimitry Andric 
1716*5f757f3fSDimitry Andric   assert(
1717*5f757f3fSDimitry Andric       !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1718*5f757f3fSDimitry Andric       "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1719*5f757f3fSDimitry Andric 
1720*5f757f3fSDimitry Andric   // Assign locations to all of the incoming arguments.
1721*5f757f3fSDimitry Andric   SmallVector<CCValAssign, 16> ArgLocs;
1722*5f757f3fSDimitry Andric   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1723*5f757f3fSDimitry Andric 
1724*5f757f3fSDimitry Andric   // Allocate shadow area for Win64.
1725*5f757f3fSDimitry Andric   if (IsWin64)
1726*5f757f3fSDimitry Andric     CCInfo.AllocateStack(32, Align(8));
1727*5f757f3fSDimitry Andric 
1728*5f757f3fSDimitry Andric   CCInfo.AnalyzeArguments(Ins, CC_X86);
1729*5f757f3fSDimitry Andric 
1730*5f757f3fSDimitry Andric   // In vectorcall calling convention a second pass is required for the HVA
1731*5f757f3fSDimitry Andric   // types.
1732*5f757f3fSDimitry Andric   if (CallingConv::X86_VectorCall == CallConv) {
1733*5f757f3fSDimitry Andric     CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1734*5f757f3fSDimitry Andric   }
1735*5f757f3fSDimitry Andric 
1736*5f757f3fSDimitry Andric   // The next loop assumes that the locations are in the same order of the
1737*5f757f3fSDimitry Andric   // input arguments.
1738*5f757f3fSDimitry Andric   assert(isSortedByValueNo(ArgLocs) &&
1739*5f757f3fSDimitry Andric          "Argument Location list must be sorted before lowering");
1740*5f757f3fSDimitry Andric 
1741*5f757f3fSDimitry Andric   SDValue ArgValue;
1742*5f757f3fSDimitry Andric   for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1743*5f757f3fSDimitry Andric        ++I, ++InsIndex) {
1744*5f757f3fSDimitry Andric     assert(InsIndex < Ins.size() && "Invalid Ins index");
1745*5f757f3fSDimitry Andric     CCValAssign &VA = ArgLocs[I];
1746*5f757f3fSDimitry Andric 
1747*5f757f3fSDimitry Andric     if (VA.isRegLoc()) {
1748*5f757f3fSDimitry Andric       EVT RegVT = VA.getLocVT();
1749*5f757f3fSDimitry Andric       if (VA.needsCustom()) {
1750*5f757f3fSDimitry Andric         assert(
1751*5f757f3fSDimitry Andric             VA.getValVT() == MVT::v64i1 &&
1752*5f757f3fSDimitry Andric             "Currently the only custom case is when we split v64i1 to 2 regs");
1753*5f757f3fSDimitry Andric 
1754*5f757f3fSDimitry Andric         // v64i1 values, in regcall calling convention, that are
1755*5f757f3fSDimitry Andric         // compiled to 32 bit arch, are split up into two registers.
1756*5f757f3fSDimitry Andric         ArgValue =
1757*5f757f3fSDimitry Andric             getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1758*5f757f3fSDimitry Andric       } else {
1759*5f757f3fSDimitry Andric         const TargetRegisterClass *RC;
1760*5f757f3fSDimitry Andric         if (RegVT == MVT::i8)
1761*5f757f3fSDimitry Andric           RC = &X86::GR8RegClass;
1762*5f757f3fSDimitry Andric         else if (RegVT == MVT::i16)
1763*5f757f3fSDimitry Andric           RC = &X86::GR16RegClass;
1764*5f757f3fSDimitry Andric         else if (RegVT == MVT::i32)
1765*5f757f3fSDimitry Andric           RC = &X86::GR32RegClass;
1766*5f757f3fSDimitry Andric         else if (Is64Bit && RegVT == MVT::i64)
1767*5f757f3fSDimitry Andric           RC = &X86::GR64RegClass;
1768*5f757f3fSDimitry Andric         else if (RegVT == MVT::f16)
1769*5f757f3fSDimitry Andric           RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1770*5f757f3fSDimitry Andric         else if (RegVT == MVT::f32)
1771*5f757f3fSDimitry Andric           RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1772*5f757f3fSDimitry Andric         else if (RegVT == MVT::f64)
1773*5f757f3fSDimitry Andric           RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1774*5f757f3fSDimitry Andric         else if (RegVT == MVT::f80)
1775*5f757f3fSDimitry Andric           RC = &X86::RFP80RegClass;
1776*5f757f3fSDimitry Andric         else if (RegVT == MVT::f128)
1777*5f757f3fSDimitry Andric           RC = &X86::VR128RegClass;
1778*5f757f3fSDimitry Andric         else if (RegVT.is512BitVector())
1779*5f757f3fSDimitry Andric           RC = &X86::VR512RegClass;
1780*5f757f3fSDimitry Andric         else if (RegVT.is256BitVector())
1781*5f757f3fSDimitry Andric           RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1782*5f757f3fSDimitry Andric         else if (RegVT.is128BitVector())
1783*5f757f3fSDimitry Andric           RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1784*5f757f3fSDimitry Andric         else if (RegVT == MVT::x86mmx)
1785*5f757f3fSDimitry Andric           RC = &X86::VR64RegClass;
1786*5f757f3fSDimitry Andric         else if (RegVT == MVT::v1i1)
1787*5f757f3fSDimitry Andric           RC = &X86::VK1RegClass;
1788*5f757f3fSDimitry Andric         else if (RegVT == MVT::v8i1)
1789*5f757f3fSDimitry Andric           RC = &X86::VK8RegClass;
1790*5f757f3fSDimitry Andric         else if (RegVT == MVT::v16i1)
1791*5f757f3fSDimitry Andric           RC = &X86::VK16RegClass;
1792*5f757f3fSDimitry Andric         else if (RegVT == MVT::v32i1)
1793*5f757f3fSDimitry Andric           RC = &X86::VK32RegClass;
1794*5f757f3fSDimitry Andric         else if (RegVT == MVT::v64i1)
1795*5f757f3fSDimitry Andric           RC = &X86::VK64RegClass;
1796*5f757f3fSDimitry Andric         else
1797*5f757f3fSDimitry Andric           llvm_unreachable("Unknown argument type!");
1798*5f757f3fSDimitry Andric 
1799*5f757f3fSDimitry Andric         Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1800*5f757f3fSDimitry Andric         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1801*5f757f3fSDimitry Andric       }
1802*5f757f3fSDimitry Andric 
1803*5f757f3fSDimitry Andric       // If this is an 8 or 16-bit value, it is really passed promoted to 32
1804*5f757f3fSDimitry Andric       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
1805*5f757f3fSDimitry Andric       // right size.
1806*5f757f3fSDimitry Andric       if (VA.getLocInfo() == CCValAssign::SExt)
1807*5f757f3fSDimitry Andric         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1808*5f757f3fSDimitry Andric                                DAG.getValueType(VA.getValVT()));
1809*5f757f3fSDimitry Andric       else if (VA.getLocInfo() == CCValAssign::ZExt)
1810*5f757f3fSDimitry Andric         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1811*5f757f3fSDimitry Andric                                DAG.getValueType(VA.getValVT()));
1812*5f757f3fSDimitry Andric       else if (VA.getLocInfo() == CCValAssign::BCvt)
1813*5f757f3fSDimitry Andric         ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1814*5f757f3fSDimitry Andric 
1815*5f757f3fSDimitry Andric       if (VA.isExtInLoc()) {
1816*5f757f3fSDimitry Andric         // Handle MMX values passed in XMM regs.
1817*5f757f3fSDimitry Andric         if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1818*5f757f3fSDimitry Andric           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1819*5f757f3fSDimitry Andric         else if (VA.getValVT().isVector() &&
1820*5f757f3fSDimitry Andric                  VA.getValVT().getScalarType() == MVT::i1 &&
1821*5f757f3fSDimitry Andric                  ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1822*5f757f3fSDimitry Andric                   (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1823*5f757f3fSDimitry Andric           // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1824*5f757f3fSDimitry Andric           ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1825*5f757f3fSDimitry Andric         } else
1826*5f757f3fSDimitry Andric           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1827*5f757f3fSDimitry Andric       }
1828*5f757f3fSDimitry Andric     } else {
1829*5f757f3fSDimitry Andric       assert(VA.isMemLoc());
1830*5f757f3fSDimitry Andric       ArgValue =
1831*5f757f3fSDimitry Andric           LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1832*5f757f3fSDimitry Andric     }
1833*5f757f3fSDimitry Andric 
1834*5f757f3fSDimitry Andric     // If value is passed via pointer - do a load.
1835*5f757f3fSDimitry Andric     if (VA.getLocInfo() == CCValAssign::Indirect &&
1836*5f757f3fSDimitry Andric         !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1837*5f757f3fSDimitry Andric       ArgValue =
1838*5f757f3fSDimitry Andric           DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1839*5f757f3fSDimitry Andric     }
1840*5f757f3fSDimitry Andric 
1841*5f757f3fSDimitry Andric     InVals.push_back(ArgValue);
1842*5f757f3fSDimitry Andric   }
1843*5f757f3fSDimitry Andric 
1844*5f757f3fSDimitry Andric   for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1845*5f757f3fSDimitry Andric     if (Ins[I].Flags.isSwiftAsync()) {
1846*5f757f3fSDimitry Andric       auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1847*5f757f3fSDimitry Andric       if (Subtarget.is64Bit())
1848*5f757f3fSDimitry Andric         X86FI->setHasSwiftAsyncContext(true);
1849*5f757f3fSDimitry Andric       else {
1850*5f757f3fSDimitry Andric         int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
1851*5f757f3fSDimitry Andric         X86FI->setSwiftAsyncContextFrameIdx(FI);
1852*5f757f3fSDimitry Andric         SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
1853*5f757f3fSDimitry Andric                                   DAG.getFrameIndex(FI, MVT::i32),
1854*5f757f3fSDimitry Andric                                   MachinePointerInfo::getFixedStack(MF, FI));
1855*5f757f3fSDimitry Andric         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1856*5f757f3fSDimitry Andric       }
1857*5f757f3fSDimitry Andric     }
1858*5f757f3fSDimitry Andric 
1859*5f757f3fSDimitry Andric     // Swift calling convention does not require we copy the sret argument
1860*5f757f3fSDimitry Andric     // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1861*5f757f3fSDimitry Andric     if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1862*5f757f3fSDimitry Andric       continue;
1863*5f757f3fSDimitry Andric 
1864*5f757f3fSDimitry Andric     // All x86 ABIs require that for returning structs by value we copy the
1865*5f757f3fSDimitry Andric     // sret argument into %rax/%eax (depending on ABI) for the return. Save
1866*5f757f3fSDimitry Andric     // the argument into a virtual register so that we can access it from the
1867*5f757f3fSDimitry Andric     // return points.
1868*5f757f3fSDimitry Andric     if (Ins[I].Flags.isSRet()) {
1869*5f757f3fSDimitry Andric       assert(!FuncInfo->getSRetReturnReg() &&
1870*5f757f3fSDimitry Andric              "SRet return has already been set");
1871*5f757f3fSDimitry Andric       MVT PtrTy = getPointerTy(DAG.getDataLayout());
1872*5f757f3fSDimitry Andric       Register Reg =
1873*5f757f3fSDimitry Andric           MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
1874*5f757f3fSDimitry Andric       FuncInfo->setSRetReturnReg(Reg);
1875*5f757f3fSDimitry Andric       SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1876*5f757f3fSDimitry Andric       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1877*5f757f3fSDimitry Andric       break;
1878*5f757f3fSDimitry Andric     }
1879*5f757f3fSDimitry Andric   }
1880*5f757f3fSDimitry Andric 
1881*5f757f3fSDimitry Andric   unsigned StackSize = CCInfo.getStackSize();
1882*5f757f3fSDimitry Andric   // Align stack specially for tail calls.
1883*5f757f3fSDimitry Andric   if (shouldGuaranteeTCO(CallConv,
1884*5f757f3fSDimitry Andric                          MF.getTarget().Options.GuaranteedTailCallOpt))
1885*5f757f3fSDimitry Andric     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1886*5f757f3fSDimitry Andric 
1887*5f757f3fSDimitry Andric   if (IsVarArg)
1888*5f757f3fSDimitry Andric     VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1889*5f757f3fSDimitry Andric         .lowerVarArgsParameters(Chain, StackSize);
1890*5f757f3fSDimitry Andric 
1891*5f757f3fSDimitry Andric   // Some CCs need callee pop.
1892*5f757f3fSDimitry Andric   if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1893*5f757f3fSDimitry Andric                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
1894*5f757f3fSDimitry Andric     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1895*5f757f3fSDimitry Andric   } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1896*5f757f3fSDimitry Andric     // X86 interrupts must pop the error code (and the alignment padding) if
1897*5f757f3fSDimitry Andric     // present.
1898*5f757f3fSDimitry Andric     FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1899*5f757f3fSDimitry Andric   } else {
1900*5f757f3fSDimitry Andric     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1901*5f757f3fSDimitry Andric     // If this is an sret function, the return should pop the hidden pointer.
1902*5f757f3fSDimitry Andric     if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1903*5f757f3fSDimitry Andric       FuncInfo->setBytesToPopOnReturn(4);
1904*5f757f3fSDimitry Andric   }
1905*5f757f3fSDimitry Andric 
1906*5f757f3fSDimitry Andric   if (!Is64Bit) {
1907*5f757f3fSDimitry Andric     // RegSaveFrameIndex is X86-64 only.
1908*5f757f3fSDimitry Andric     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1909*5f757f3fSDimitry Andric   }
1910*5f757f3fSDimitry Andric 
1911*5f757f3fSDimitry Andric   FuncInfo->setArgumentStackSize(StackSize);
1912*5f757f3fSDimitry Andric 
1913*5f757f3fSDimitry Andric   if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1914*5f757f3fSDimitry Andric     EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1915*5f757f3fSDimitry Andric     if (Personality == EHPersonality::CoreCLR) {
1916*5f757f3fSDimitry Andric       assert(Is64Bit);
1917*5f757f3fSDimitry Andric       // TODO: Add a mechanism to frame lowering that will allow us to indicate
1918*5f757f3fSDimitry Andric       // that we'd prefer this slot be allocated towards the bottom of the frame
1919*5f757f3fSDimitry Andric       // (i.e. near the stack pointer after allocating the frame).  Every
1920*5f757f3fSDimitry Andric       // funclet needs a copy of this slot in its (mostly empty) frame, and the
1921*5f757f3fSDimitry Andric       // offset from the bottom of this and each funclet's frame must be the
1922*5f757f3fSDimitry Andric       // same, so the size of funclets' (mostly empty) frames is dictated by
1923*5f757f3fSDimitry Andric       // how far this slot is from the bottom (since they allocate just enough
1924*5f757f3fSDimitry Andric       // space to accommodate holding this slot at the correct offset).
1925*5f757f3fSDimitry Andric       int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1926*5f757f3fSDimitry Andric       EHInfo->PSPSymFrameIdx = PSPSymFI;
1927*5f757f3fSDimitry Andric     }
1928*5f757f3fSDimitry Andric   }
1929*5f757f3fSDimitry Andric 
1930*5f757f3fSDimitry Andric   if (shouldDisableArgRegFromCSR(CallConv) ||
1931*5f757f3fSDimitry Andric       F.hasFnAttribute("no_caller_saved_registers")) {
1932*5f757f3fSDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
1933*5f757f3fSDimitry Andric     for (std::pair<Register, Register> Pair : MRI.liveins())
1934*5f757f3fSDimitry Andric       MRI.disableCalleeSavedRegister(Pair.first);
1935*5f757f3fSDimitry Andric   }
1936*5f757f3fSDimitry Andric 
1937*5f757f3fSDimitry Andric   return Chain;
1938*5f757f3fSDimitry Andric }
1939*5f757f3fSDimitry Andric 
1940*5f757f3fSDimitry Andric SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1941*5f757f3fSDimitry Andric                                             SDValue Arg, const SDLoc &dl,
1942*5f757f3fSDimitry Andric                                             SelectionDAG &DAG,
1943*5f757f3fSDimitry Andric                                             const CCValAssign &VA,
1944*5f757f3fSDimitry Andric                                             ISD::ArgFlagsTy Flags,
1945*5f757f3fSDimitry Andric                                             bool isByVal) const {
1946*5f757f3fSDimitry Andric   unsigned LocMemOffset = VA.getLocMemOffset();
1947*5f757f3fSDimitry Andric   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1948*5f757f3fSDimitry Andric   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1949*5f757f3fSDimitry Andric                        StackPtr, PtrOff);
1950*5f757f3fSDimitry Andric   if (isByVal)
1951*5f757f3fSDimitry Andric     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1952*5f757f3fSDimitry Andric 
1953*5f757f3fSDimitry Andric   MaybeAlign Alignment;
1954*5f757f3fSDimitry Andric   if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1955*5f757f3fSDimitry Andric       Arg.getSimpleValueType() != MVT::f80)
1956*5f757f3fSDimitry Andric     Alignment = MaybeAlign(4);
1957*5f757f3fSDimitry Andric   return DAG.getStore(
1958*5f757f3fSDimitry Andric       Chain, dl, Arg, PtrOff,
1959*5f757f3fSDimitry Andric       MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
1960*5f757f3fSDimitry Andric       Alignment);
1961*5f757f3fSDimitry Andric }
1962*5f757f3fSDimitry Andric 
1963*5f757f3fSDimitry Andric /// Emit a load of return address if tail call
1964*5f757f3fSDimitry Andric /// optimization is performed and it is required.
1965*5f757f3fSDimitry Andric SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1966*5f757f3fSDimitry Andric     SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1967*5f757f3fSDimitry Andric     bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1968*5f757f3fSDimitry Andric   // Adjust the Return address stack slot.
1969*5f757f3fSDimitry Andric   EVT VT = getPointerTy(DAG.getDataLayout());
1970*5f757f3fSDimitry Andric   OutRetAddr = getReturnAddressFrameIndex(DAG);
1971*5f757f3fSDimitry Andric 
1972*5f757f3fSDimitry Andric   // Load the "old" Return address.
1973*5f757f3fSDimitry Andric   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1974*5f757f3fSDimitry Andric   return SDValue(OutRetAddr.getNode(), 1);
1975*5f757f3fSDimitry Andric }
1976*5f757f3fSDimitry Andric 
1977*5f757f3fSDimitry Andric /// Emit a store of the return address if tail call
1978*5f757f3fSDimitry Andric /// optimization is performed and it is required (FPDiff!=0).
1979*5f757f3fSDimitry Andric static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
1980*5f757f3fSDimitry Andric                                         SDValue Chain, SDValue RetAddrFrIdx,
1981*5f757f3fSDimitry Andric                                         EVT PtrVT, unsigned SlotSize,
1982*5f757f3fSDimitry Andric                                         int FPDiff, const SDLoc &dl) {
1983*5f757f3fSDimitry Andric   // Store the return address to the appropriate stack slot.
1984*5f757f3fSDimitry Andric   if (!FPDiff) return Chain;
1985*5f757f3fSDimitry Andric   // Calculate the new stack slot for the return address.
1986*5f757f3fSDimitry Andric   int NewReturnAddrFI =
1987*5f757f3fSDimitry Andric     MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1988*5f757f3fSDimitry Andric                                          false);
1989*5f757f3fSDimitry Andric   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1990*5f757f3fSDimitry Andric   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1991*5f757f3fSDimitry Andric                        MachinePointerInfo::getFixedStack(
1992*5f757f3fSDimitry Andric                            DAG.getMachineFunction(), NewReturnAddrFI));
1993*5f757f3fSDimitry Andric   return Chain;
1994*5f757f3fSDimitry Andric }
1995*5f757f3fSDimitry Andric 
1996*5f757f3fSDimitry Andric /// Returns a vector_shuffle mask for an movs{s|d}, movd
1997*5f757f3fSDimitry Andric /// operation of specified width.
1998*5f757f3fSDimitry Andric SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1999*5f757f3fSDimitry Andric                                    SDValue V1, SDValue V2) const {
2000*5f757f3fSDimitry Andric   unsigned NumElems = VT.getVectorNumElements();
2001*5f757f3fSDimitry Andric   SmallVector<int, 8> Mask;
2002*5f757f3fSDimitry Andric   Mask.push_back(NumElems);
2003*5f757f3fSDimitry Andric   for (unsigned i = 1; i != NumElems; ++i)
2004*5f757f3fSDimitry Andric     Mask.push_back(i);
2005*5f757f3fSDimitry Andric   return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
2006*5f757f3fSDimitry Andric }
2007*5f757f3fSDimitry Andric 
2008*5f757f3fSDimitry Andric SDValue
2009*5f757f3fSDimitry Andric X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2010*5f757f3fSDimitry Andric                              SmallVectorImpl<SDValue> &InVals) const {
2011*5f757f3fSDimitry Andric   SelectionDAG &DAG                     = CLI.DAG;
2012*5f757f3fSDimitry Andric   SDLoc &dl                             = CLI.DL;
2013*5f757f3fSDimitry Andric   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2014*5f757f3fSDimitry Andric   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
2015*5f757f3fSDimitry Andric   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
2016*5f757f3fSDimitry Andric   SDValue Chain                         = CLI.Chain;
2017*5f757f3fSDimitry Andric   SDValue Callee                        = CLI.Callee;
2018*5f757f3fSDimitry Andric   CallingConv::ID CallConv              = CLI.CallConv;
2019*5f757f3fSDimitry Andric   bool &isTailCall                      = CLI.IsTailCall;
2020*5f757f3fSDimitry Andric   bool isVarArg                         = CLI.IsVarArg;
2021*5f757f3fSDimitry Andric   const auto *CB                        = CLI.CB;
2022*5f757f3fSDimitry Andric 
2023*5f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
2024*5f757f3fSDimitry Andric   bool Is64Bit        = Subtarget.is64Bit();
2025*5f757f3fSDimitry Andric   bool IsWin64        = Subtarget.isCallingConvWin64(CallConv);
2026*5f757f3fSDimitry Andric   bool IsSibcall      = false;
2027*5f757f3fSDimitry Andric   bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2028*5f757f3fSDimitry Andric       CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2029*5f757f3fSDimitry Andric   bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2030*5f757f3fSDimitry Andric   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2031*5f757f3fSDimitry Andric   bool HasNCSR = (CB && isa<CallInst>(CB) &&
2032*5f757f3fSDimitry Andric                   CB->hasFnAttr("no_caller_saved_registers"));
2033*5f757f3fSDimitry Andric   bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2034*5f757f3fSDimitry Andric   bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2035*5f757f3fSDimitry Andric   bool IsCFICall = IsIndirectCall && CLI.CFIType;
2036*5f757f3fSDimitry Andric   const Module *M = MF.getMMI().getModule();
2037*5f757f3fSDimitry Andric   Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
2038*5f757f3fSDimitry Andric 
2039*5f757f3fSDimitry Andric   MachineFunction::CallSiteInfo CSInfo;
2040*5f757f3fSDimitry Andric   if (CallConv == CallingConv::X86_INTR)
2041*5f757f3fSDimitry Andric     report_fatal_error("X86 interrupts may not be called directly");
2042*5f757f3fSDimitry Andric 
2043*5f757f3fSDimitry Andric   bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2044*5f757f3fSDimitry Andric   if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2045*5f757f3fSDimitry Andric     // If we are using a GOT, disable tail calls to external symbols with
2046*5f757f3fSDimitry Andric     // default visibility. Tail calling such a symbol requires using a GOT
2047*5f757f3fSDimitry Andric     // relocation, which forces early binding of the symbol. This breaks code
2048*5f757f3fSDimitry Andric     // that require lazy function symbol resolution. Using musttail or
2049*5f757f3fSDimitry Andric     // GuaranteedTailCallOpt will override this.
2050*5f757f3fSDimitry Andric     GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2051*5f757f3fSDimitry Andric     if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2052*5f757f3fSDimitry Andric                G->getGlobal()->hasDefaultVisibility()))
2053*5f757f3fSDimitry Andric       isTailCall = false;
2054*5f757f3fSDimitry Andric   }
2055*5f757f3fSDimitry Andric 
2056*5f757f3fSDimitry Andric   if (isTailCall && !IsMustTail) {
2057*5f757f3fSDimitry Andric     // Check if it's really possible to do a tail call.
2058*5f757f3fSDimitry Andric     isTailCall = IsEligibleForTailCallOptimization(
2059*5f757f3fSDimitry Andric         Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
2060*5f757f3fSDimitry Andric         Ins, DAG);
2061*5f757f3fSDimitry Andric 
2062*5f757f3fSDimitry Andric     // Sibcalls are automatically detected tailcalls which do not require
2063*5f757f3fSDimitry Andric     // ABI changes.
2064*5f757f3fSDimitry Andric     if (!IsGuaranteeTCO && isTailCall)
2065*5f757f3fSDimitry Andric       IsSibcall = true;
2066*5f757f3fSDimitry Andric 
2067*5f757f3fSDimitry Andric     if (isTailCall)
2068*5f757f3fSDimitry Andric       ++NumTailCalls;
2069*5f757f3fSDimitry Andric   }
2070*5f757f3fSDimitry Andric 
2071*5f757f3fSDimitry Andric   if (IsMustTail && !isTailCall)
2072*5f757f3fSDimitry Andric     report_fatal_error("failed to perform tail call elimination on a call "
2073*5f757f3fSDimitry Andric                        "site marked musttail");
2074*5f757f3fSDimitry Andric 
2075*5f757f3fSDimitry Andric   assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2076*5f757f3fSDimitry Andric          "Var args not supported with calling convention fastcc, ghc or hipe");
2077*5f757f3fSDimitry Andric 
2078*5f757f3fSDimitry Andric   // Analyze operands of the call, assigning locations to each operand.
2079*5f757f3fSDimitry Andric   SmallVector<CCValAssign, 16> ArgLocs;
2080*5f757f3fSDimitry Andric   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2081*5f757f3fSDimitry Andric 
2082*5f757f3fSDimitry Andric   // Allocate shadow area for Win64.
2083*5f757f3fSDimitry Andric   if (IsWin64)
2084*5f757f3fSDimitry Andric     CCInfo.AllocateStack(32, Align(8));
2085*5f757f3fSDimitry Andric 
2086*5f757f3fSDimitry Andric   CCInfo.AnalyzeArguments(Outs, CC_X86);
2087*5f757f3fSDimitry Andric 
2088*5f757f3fSDimitry Andric   // In vectorcall calling convention a second pass is required for the HVA
2089*5f757f3fSDimitry Andric   // types.
2090*5f757f3fSDimitry Andric   if (CallingConv::X86_VectorCall == CallConv) {
2091*5f757f3fSDimitry Andric     CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2092*5f757f3fSDimitry Andric   }
2093*5f757f3fSDimitry Andric 
2094*5f757f3fSDimitry Andric   // Get a count of how many bytes are to be pushed on the stack.
2095*5f757f3fSDimitry Andric   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2096*5f757f3fSDimitry Andric   if (IsSibcall)
2097*5f757f3fSDimitry Andric     // This is a sibcall. The memory operands are available in caller's
2098*5f757f3fSDimitry Andric     // own caller's stack.
2099*5f757f3fSDimitry Andric     NumBytes = 0;
2100*5f757f3fSDimitry Andric   else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
2101*5f757f3fSDimitry Andric     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2102*5f757f3fSDimitry Andric 
2103*5f757f3fSDimitry Andric   int FPDiff = 0;
2104*5f757f3fSDimitry Andric   if (isTailCall &&
2105*5f757f3fSDimitry Andric       shouldGuaranteeTCO(CallConv,
2106*5f757f3fSDimitry Andric                          MF.getTarget().Options.GuaranteedTailCallOpt)) {
2107*5f757f3fSDimitry Andric     // Lower arguments at fp - stackoffset + fpdiff.
2108*5f757f3fSDimitry Andric     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2109*5f757f3fSDimitry Andric 
2110*5f757f3fSDimitry Andric     FPDiff = NumBytesCallerPushed - NumBytes;
2111*5f757f3fSDimitry Andric 
2112*5f757f3fSDimitry Andric     // Set the delta of movement of the returnaddr stackslot.
2113*5f757f3fSDimitry Andric     // But only set if delta is greater than previous delta.
2114*5f757f3fSDimitry Andric     if (FPDiff < X86Info->getTCReturnAddrDelta())
2115*5f757f3fSDimitry Andric       X86Info->setTCReturnAddrDelta(FPDiff);
2116*5f757f3fSDimitry Andric   }
2117*5f757f3fSDimitry Andric 
2118*5f757f3fSDimitry Andric   unsigned NumBytesToPush = NumBytes;
2119*5f757f3fSDimitry Andric   unsigned NumBytesToPop = NumBytes;
2120*5f757f3fSDimitry Andric 
2121*5f757f3fSDimitry Andric   // If we have an inalloca argument, all stack space has already been allocated
2122*5f757f3fSDimitry Andric   // for us and be right at the top of the stack.  We don't support multiple
2123*5f757f3fSDimitry Andric   // arguments passed in memory when using inalloca.
2124*5f757f3fSDimitry Andric   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2125*5f757f3fSDimitry Andric     NumBytesToPush = 0;
2126*5f757f3fSDimitry Andric     if (!ArgLocs.back().isMemLoc())
2127*5f757f3fSDimitry Andric       report_fatal_error("cannot use inalloca attribute on a register "
2128*5f757f3fSDimitry Andric                          "parameter");
2129*5f757f3fSDimitry Andric     if (ArgLocs.back().getLocMemOffset() != 0)
2130*5f757f3fSDimitry Andric       report_fatal_error("any parameter with the inalloca attribute must be "
2131*5f757f3fSDimitry Andric                          "the only memory argument");
2132*5f757f3fSDimitry Andric   } else if (CLI.IsPreallocated) {
2133*5f757f3fSDimitry Andric     assert(ArgLocs.back().isMemLoc() &&
2134*5f757f3fSDimitry Andric            "cannot use preallocated attribute on a register "
2135*5f757f3fSDimitry Andric            "parameter");
2136*5f757f3fSDimitry Andric     SmallVector<size_t, 4> PreallocatedOffsets;
2137*5f757f3fSDimitry Andric     for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2138*5f757f3fSDimitry Andric       if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2139*5f757f3fSDimitry Andric         PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2140*5f757f3fSDimitry Andric       }
2141*5f757f3fSDimitry Andric     }
2142*5f757f3fSDimitry Andric     auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2143*5f757f3fSDimitry Andric     size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2144*5f757f3fSDimitry Andric     MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2145*5f757f3fSDimitry Andric     MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2146*5f757f3fSDimitry Andric     NumBytesToPush = 0;
2147*5f757f3fSDimitry Andric   }
2148*5f757f3fSDimitry Andric 
2149*5f757f3fSDimitry Andric   if (!IsSibcall && !IsMustTail)
2150*5f757f3fSDimitry Andric     Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2151*5f757f3fSDimitry Andric                                  NumBytes - NumBytesToPush, dl);
2152*5f757f3fSDimitry Andric 
2153*5f757f3fSDimitry Andric   SDValue RetAddrFrIdx;
2154*5f757f3fSDimitry Andric   // Load return address for tail calls.
2155*5f757f3fSDimitry Andric   if (isTailCall && FPDiff)
2156*5f757f3fSDimitry Andric     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2157*5f757f3fSDimitry Andric                                     Is64Bit, FPDiff, dl);
2158*5f757f3fSDimitry Andric 
2159*5f757f3fSDimitry Andric   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
2160*5f757f3fSDimitry Andric   SmallVector<SDValue, 8> MemOpChains;
2161*5f757f3fSDimitry Andric   SDValue StackPtr;
2162*5f757f3fSDimitry Andric 
2163*5f757f3fSDimitry Andric   // The next loop assumes that the locations are in the same order of the
2164*5f757f3fSDimitry Andric   // input arguments.
2165*5f757f3fSDimitry Andric   assert(isSortedByValueNo(ArgLocs) &&
2166*5f757f3fSDimitry Andric          "Argument Location list must be sorted before lowering");
2167*5f757f3fSDimitry Andric 
2168*5f757f3fSDimitry Andric   // Walk the register/memloc assignments, inserting copies/loads.  In the case
2169*5f757f3fSDimitry Andric   // of tail call optimization arguments are handle later.
2170*5f757f3fSDimitry Andric   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2171*5f757f3fSDimitry Andric   for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2172*5f757f3fSDimitry Andric        ++I, ++OutIndex) {
2173*5f757f3fSDimitry Andric     assert(OutIndex < Outs.size() && "Invalid Out index");
2174*5f757f3fSDimitry Andric     // Skip inalloca/preallocated arguments, they have already been written.
2175*5f757f3fSDimitry Andric     ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2176*5f757f3fSDimitry Andric     if (Flags.isInAlloca() || Flags.isPreallocated())
2177*5f757f3fSDimitry Andric       continue;
2178*5f757f3fSDimitry Andric 
2179*5f757f3fSDimitry Andric     CCValAssign &VA = ArgLocs[I];
2180*5f757f3fSDimitry Andric     EVT RegVT = VA.getLocVT();
2181*5f757f3fSDimitry Andric     SDValue Arg = OutVals[OutIndex];
2182*5f757f3fSDimitry Andric     bool isByVal = Flags.isByVal();
2183*5f757f3fSDimitry Andric 
2184*5f757f3fSDimitry Andric     // Promote the value if needed.
2185*5f757f3fSDimitry Andric     switch (VA.getLocInfo()) {
2186*5f757f3fSDimitry Andric     default: llvm_unreachable("Unknown loc info!");
2187*5f757f3fSDimitry Andric     case CCValAssign::Full: break;
2188*5f757f3fSDimitry Andric     case CCValAssign::SExt:
2189*5f757f3fSDimitry Andric       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2190*5f757f3fSDimitry Andric       break;
2191*5f757f3fSDimitry Andric     case CCValAssign::ZExt:
2192*5f757f3fSDimitry Andric       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2193*5f757f3fSDimitry Andric       break;
2194*5f757f3fSDimitry Andric     case CCValAssign::AExt:
2195*5f757f3fSDimitry Andric       if (Arg.getValueType().isVector() &&
2196*5f757f3fSDimitry Andric           Arg.getValueType().getVectorElementType() == MVT::i1)
2197*5f757f3fSDimitry Andric         Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2198*5f757f3fSDimitry Andric       else if (RegVT.is128BitVector()) {
2199*5f757f3fSDimitry Andric         // Special case: passing MMX values in XMM registers.
2200*5f757f3fSDimitry Andric         Arg = DAG.getBitcast(MVT::i64, Arg);
2201*5f757f3fSDimitry Andric         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2202*5f757f3fSDimitry Andric         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2203*5f757f3fSDimitry Andric       } else
2204*5f757f3fSDimitry Andric         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2205*5f757f3fSDimitry Andric       break;
2206*5f757f3fSDimitry Andric     case CCValAssign::BCvt:
2207*5f757f3fSDimitry Andric       Arg = DAG.getBitcast(RegVT, Arg);
2208*5f757f3fSDimitry Andric       break;
2209*5f757f3fSDimitry Andric     case CCValAssign::Indirect: {
2210*5f757f3fSDimitry Andric       if (isByVal) {
2211*5f757f3fSDimitry Andric         // Memcpy the argument to a temporary stack slot to prevent
2212*5f757f3fSDimitry Andric         // the caller from seeing any modifications the callee may make
2213*5f757f3fSDimitry Andric         // as guaranteed by the `byval` attribute.
2214*5f757f3fSDimitry Andric         int FrameIdx = MF.getFrameInfo().CreateStackObject(
2215*5f757f3fSDimitry Andric             Flags.getByValSize(),
2216*5f757f3fSDimitry Andric             std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2217*5f757f3fSDimitry Andric         SDValue StackSlot =
2218*5f757f3fSDimitry Andric             DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2219*5f757f3fSDimitry Andric         Chain =
2220*5f757f3fSDimitry Andric             CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2221*5f757f3fSDimitry Andric         // From now on treat this as a regular pointer
2222*5f757f3fSDimitry Andric         Arg = StackSlot;
2223*5f757f3fSDimitry Andric         isByVal = false;
2224*5f757f3fSDimitry Andric       } else {
2225*5f757f3fSDimitry Andric         // Store the argument.
2226*5f757f3fSDimitry Andric         SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2227*5f757f3fSDimitry Andric         int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2228*5f757f3fSDimitry Andric         Chain = DAG.getStore(
2229*5f757f3fSDimitry Andric             Chain, dl, Arg, SpillSlot,
2230*5f757f3fSDimitry Andric             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2231*5f757f3fSDimitry Andric         Arg = SpillSlot;
2232*5f757f3fSDimitry Andric       }
2233*5f757f3fSDimitry Andric       break;
2234*5f757f3fSDimitry Andric     }
2235*5f757f3fSDimitry Andric     }
2236*5f757f3fSDimitry Andric 
2237*5f757f3fSDimitry Andric     if (VA.needsCustom()) {
2238*5f757f3fSDimitry Andric       assert(VA.getValVT() == MVT::v64i1 &&
2239*5f757f3fSDimitry Andric              "Currently the only custom case is when we split v64i1 to 2 regs");
2240*5f757f3fSDimitry Andric       // Split v64i1 value into two registers
2241*5f757f3fSDimitry Andric       Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2242*5f757f3fSDimitry Andric     } else if (VA.isRegLoc()) {
2243*5f757f3fSDimitry Andric       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2244*5f757f3fSDimitry Andric       const TargetOptions &Options = DAG.getTarget().Options;
2245*5f757f3fSDimitry Andric       if (Options.EmitCallSiteInfo)
2246*5f757f3fSDimitry Andric         CSInfo.emplace_back(VA.getLocReg(), I);
2247*5f757f3fSDimitry Andric       if (isVarArg && IsWin64) {
2248*5f757f3fSDimitry Andric         // Win64 ABI requires argument XMM reg to be copied to the corresponding
2249*5f757f3fSDimitry Andric         // shadow reg if callee is a varargs function.
2250*5f757f3fSDimitry Andric         Register ShadowReg;
2251*5f757f3fSDimitry Andric         switch (VA.getLocReg()) {
2252*5f757f3fSDimitry Andric         case X86::XMM0: ShadowReg = X86::RCX; break;
2253*5f757f3fSDimitry Andric         case X86::XMM1: ShadowReg = X86::RDX; break;
2254*5f757f3fSDimitry Andric         case X86::XMM2: ShadowReg = X86::R8; break;
2255*5f757f3fSDimitry Andric         case X86::XMM3: ShadowReg = X86::R9; break;
2256*5f757f3fSDimitry Andric         }
2257*5f757f3fSDimitry Andric         if (ShadowReg)
2258*5f757f3fSDimitry Andric           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2259*5f757f3fSDimitry Andric       }
2260*5f757f3fSDimitry Andric     } else if (!IsSibcall && (!isTailCall || isByVal)) {
2261*5f757f3fSDimitry Andric       assert(VA.isMemLoc());
2262*5f757f3fSDimitry Andric       if (!StackPtr.getNode())
2263*5f757f3fSDimitry Andric         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2264*5f757f3fSDimitry Andric                                       getPointerTy(DAG.getDataLayout()));
2265*5f757f3fSDimitry Andric       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2266*5f757f3fSDimitry Andric                                              dl, DAG, VA, Flags, isByVal));
2267*5f757f3fSDimitry Andric     }
2268*5f757f3fSDimitry Andric   }
2269*5f757f3fSDimitry Andric 
2270*5f757f3fSDimitry Andric   if (!MemOpChains.empty())
2271*5f757f3fSDimitry Andric     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2272*5f757f3fSDimitry Andric 
2273*5f757f3fSDimitry Andric   if (Subtarget.isPICStyleGOT()) {
2274*5f757f3fSDimitry Andric     // ELF / PIC requires GOT in the EBX register before function calls via PLT
2275*5f757f3fSDimitry Andric     // GOT pointer (except regcall).
2276*5f757f3fSDimitry Andric     if (!isTailCall) {
2277*5f757f3fSDimitry Andric       // Indirect call with RegCall calling convertion may use up all the
2278*5f757f3fSDimitry Andric       // general registers, so it is not suitable to bind EBX reister for
2279*5f757f3fSDimitry Andric       // GOT address, just let register allocator handle it.
2280*5f757f3fSDimitry Andric       if (CallConv != CallingConv::X86_RegCall)
2281*5f757f3fSDimitry Andric         RegsToPass.push_back(std::make_pair(
2282*5f757f3fSDimitry Andric           Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2283*5f757f3fSDimitry Andric                                           getPointerTy(DAG.getDataLayout()))));
2284*5f757f3fSDimitry Andric     } else {
2285*5f757f3fSDimitry Andric       // If we are tail calling and generating PIC/GOT style code load the
2286*5f757f3fSDimitry Andric       // address of the callee into ECX. The value in ecx is used as target of
2287*5f757f3fSDimitry Andric       // the tail jump. This is done to circumvent the ebx/callee-saved problem
2288*5f757f3fSDimitry Andric       // for tail calls on PIC/GOT architectures. Normally we would just put the
2289*5f757f3fSDimitry Andric       // address of GOT into ebx and then call target@PLT. But for tail calls
2290*5f757f3fSDimitry Andric       // ebx would be restored (since ebx is callee saved) before jumping to the
2291*5f757f3fSDimitry Andric       // target@PLT.
2292*5f757f3fSDimitry Andric 
2293*5f757f3fSDimitry Andric       // Note: The actual moving to ECX is done further down.
2294*5f757f3fSDimitry Andric       GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2295*5f757f3fSDimitry Andric       if (G && !G->getGlobal()->hasLocalLinkage() &&
2296*5f757f3fSDimitry Andric           G->getGlobal()->hasDefaultVisibility())
2297*5f757f3fSDimitry Andric         Callee = LowerGlobalAddress(Callee, DAG);
2298*5f757f3fSDimitry Andric       else if (isa<ExternalSymbolSDNode>(Callee))
2299*5f757f3fSDimitry Andric         Callee = LowerExternalSymbol(Callee, DAG);
2300*5f757f3fSDimitry Andric     }
2301*5f757f3fSDimitry Andric   }
2302*5f757f3fSDimitry Andric 
2303*5f757f3fSDimitry Andric   if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2304*5f757f3fSDimitry Andric       (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2305*5f757f3fSDimitry Andric     // From AMD64 ABI document:
2306*5f757f3fSDimitry Andric     // For calls that may call functions that use varargs or stdargs
2307*5f757f3fSDimitry Andric     // (prototype-less calls or calls to functions containing ellipsis (...) in
2308*5f757f3fSDimitry Andric     // the declaration) %al is used as hidden argument to specify the number
2309*5f757f3fSDimitry Andric     // of SSE registers used. The contents of %al do not need to match exactly
2310*5f757f3fSDimitry Andric     // the number of registers, but must be an ubound on the number of SSE
2311*5f757f3fSDimitry Andric     // registers used and is in the range 0 - 8 inclusive.
2312*5f757f3fSDimitry Andric 
2313*5f757f3fSDimitry Andric     // Count the number of XMM registers allocated.
2314*5f757f3fSDimitry Andric     static const MCPhysReg XMMArgRegs[] = {
2315*5f757f3fSDimitry Andric       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2316*5f757f3fSDimitry Andric       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2317*5f757f3fSDimitry Andric     };
2318*5f757f3fSDimitry Andric     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2319*5f757f3fSDimitry Andric     assert((Subtarget.hasSSE1() || !NumXMMRegs)
2320*5f757f3fSDimitry Andric            && "SSE registers cannot be used when SSE is disabled");
2321*5f757f3fSDimitry Andric     RegsToPass.push_back(std::make_pair(Register(X86::AL),
2322*5f757f3fSDimitry Andric                                         DAG.getConstant(NumXMMRegs, dl,
2323*5f757f3fSDimitry Andric                                                         MVT::i8)));
2324*5f757f3fSDimitry Andric   }
2325*5f757f3fSDimitry Andric 
2326*5f757f3fSDimitry Andric   if (isVarArg && IsMustTail) {
2327*5f757f3fSDimitry Andric     const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2328*5f757f3fSDimitry Andric     for (const auto &F : Forwards) {
2329*5f757f3fSDimitry Andric       SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2330*5f757f3fSDimitry Andric       RegsToPass.push_back(std::make_pair(F.PReg, Val));
2331*5f757f3fSDimitry Andric     }
2332*5f757f3fSDimitry Andric   }
2333*5f757f3fSDimitry Andric 
2334*5f757f3fSDimitry Andric   // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
2335*5f757f3fSDimitry Andric   // don't need this because the eligibility check rejects calls that require
2336*5f757f3fSDimitry Andric   // shuffling arguments passed in memory.
2337*5f757f3fSDimitry Andric   if (!IsSibcall && isTailCall) {
2338*5f757f3fSDimitry Andric     // Force all the incoming stack arguments to be loaded from the stack
2339*5f757f3fSDimitry Andric     // before any new outgoing arguments are stored to the stack, because the
2340*5f757f3fSDimitry Andric     // outgoing stack slots may alias the incoming argument stack slots, and
2341*5f757f3fSDimitry Andric     // the alias isn't otherwise explicit. This is slightly more conservative
2342*5f757f3fSDimitry Andric     // than necessary, because it means that each store effectively depends
2343*5f757f3fSDimitry Andric     // on every argument instead of just those arguments it would clobber.
2344*5f757f3fSDimitry Andric     SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2345*5f757f3fSDimitry Andric 
2346*5f757f3fSDimitry Andric     SmallVector<SDValue, 8> MemOpChains2;
2347*5f757f3fSDimitry Andric     SDValue FIN;
2348*5f757f3fSDimitry Andric     int FI = 0;
2349*5f757f3fSDimitry Andric     for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2350*5f757f3fSDimitry Andric          ++I, ++OutsIndex) {
2351*5f757f3fSDimitry Andric       CCValAssign &VA = ArgLocs[I];
2352*5f757f3fSDimitry Andric 
2353*5f757f3fSDimitry Andric       if (VA.isRegLoc()) {
2354*5f757f3fSDimitry Andric         if (VA.needsCustom()) {
2355*5f757f3fSDimitry Andric           assert((CallConv == CallingConv::X86_RegCall) &&
2356*5f757f3fSDimitry Andric                  "Expecting custom case only in regcall calling convention");
2357*5f757f3fSDimitry Andric           // This means that we are in special case where one argument was
2358*5f757f3fSDimitry Andric           // passed through two register locations - Skip the next location
2359*5f757f3fSDimitry Andric           ++I;
2360*5f757f3fSDimitry Andric         }
2361*5f757f3fSDimitry Andric 
2362*5f757f3fSDimitry Andric         continue;
2363*5f757f3fSDimitry Andric       }
2364*5f757f3fSDimitry Andric 
2365*5f757f3fSDimitry Andric       assert(VA.isMemLoc());
2366*5f757f3fSDimitry Andric       SDValue Arg = OutVals[OutsIndex];
2367*5f757f3fSDimitry Andric       ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2368*5f757f3fSDimitry Andric       // Skip inalloca/preallocated arguments.  They don't require any work.
2369*5f757f3fSDimitry Andric       if (Flags.isInAlloca() || Flags.isPreallocated())
2370*5f757f3fSDimitry Andric         continue;
2371*5f757f3fSDimitry Andric       // Create frame index.
2372*5f757f3fSDimitry Andric       int32_t Offset = VA.getLocMemOffset()+FPDiff;
2373*5f757f3fSDimitry Andric       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2374*5f757f3fSDimitry Andric       FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2375*5f757f3fSDimitry Andric       FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2376*5f757f3fSDimitry Andric 
2377*5f757f3fSDimitry Andric       if (Flags.isByVal()) {
2378*5f757f3fSDimitry Andric         // Copy relative to framepointer.
2379*5f757f3fSDimitry Andric         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
2380*5f757f3fSDimitry Andric         if (!StackPtr.getNode())
2381*5f757f3fSDimitry Andric           StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2382*5f757f3fSDimitry Andric                                         getPointerTy(DAG.getDataLayout()));
2383*5f757f3fSDimitry Andric         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2384*5f757f3fSDimitry Andric                              StackPtr, Source);
2385*5f757f3fSDimitry Andric 
2386*5f757f3fSDimitry Andric         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
2387*5f757f3fSDimitry Andric                                                          ArgChain,
2388*5f757f3fSDimitry Andric                                                          Flags, DAG, dl));
2389*5f757f3fSDimitry Andric       } else {
2390*5f757f3fSDimitry Andric         // Store relative to framepointer.
2391*5f757f3fSDimitry Andric         MemOpChains2.push_back(DAG.getStore(
2392*5f757f3fSDimitry Andric             ArgChain, dl, Arg, FIN,
2393*5f757f3fSDimitry Andric             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
2394*5f757f3fSDimitry Andric       }
2395*5f757f3fSDimitry Andric     }
2396*5f757f3fSDimitry Andric 
2397*5f757f3fSDimitry Andric     if (!MemOpChains2.empty())
2398*5f757f3fSDimitry Andric       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2399*5f757f3fSDimitry Andric 
2400*5f757f3fSDimitry Andric     // Store the return address to the appropriate stack slot.
2401*5f757f3fSDimitry Andric     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2402*5f757f3fSDimitry Andric                                      getPointerTy(DAG.getDataLayout()),
2403*5f757f3fSDimitry Andric                                      RegInfo->getSlotSize(), FPDiff, dl);
2404*5f757f3fSDimitry Andric   }
2405*5f757f3fSDimitry Andric 
2406*5f757f3fSDimitry Andric   // Build a sequence of copy-to-reg nodes chained together with token chain
2407*5f757f3fSDimitry Andric   // and glue operands which copy the outgoing args into registers.
2408*5f757f3fSDimitry Andric   SDValue InGlue;
2409*5f757f3fSDimitry Andric   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2410*5f757f3fSDimitry Andric     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2411*5f757f3fSDimitry Andric                              RegsToPass[i].second, InGlue);
2412*5f757f3fSDimitry Andric     InGlue = Chain.getValue(1);
2413*5f757f3fSDimitry Andric   }
2414*5f757f3fSDimitry Andric 
2415*5f757f3fSDimitry Andric   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2416*5f757f3fSDimitry Andric     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2417*5f757f3fSDimitry Andric     // In the 64-bit large code model, we have to make all calls
2418*5f757f3fSDimitry Andric     // through a register, since the call instruction's 32-bit
2419*5f757f3fSDimitry Andric     // pc-relative offset may not be large enough to hold the whole
2420*5f757f3fSDimitry Andric     // address.
2421*5f757f3fSDimitry Andric   } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2422*5f757f3fSDimitry Andric              Callee->getOpcode() == ISD::ExternalSymbol) {
2423*5f757f3fSDimitry Andric     // Lower direct calls to global addresses and external symbols. Setting
2424*5f757f3fSDimitry Andric     // ForCall to true here has the effect of removing WrapperRIP when possible
2425*5f757f3fSDimitry Andric     // to allow direct calls to be selected without first materializing the
2426*5f757f3fSDimitry Andric     // address into a register.
2427*5f757f3fSDimitry Andric     Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2428*5f757f3fSDimitry Andric   } else if (Subtarget.isTarget64BitILP32() &&
2429*5f757f3fSDimitry Andric              Callee.getValueType() == MVT::i32) {
2430*5f757f3fSDimitry Andric     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2431*5f757f3fSDimitry Andric     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2432*5f757f3fSDimitry Andric   }
2433*5f757f3fSDimitry Andric 
2434*5f757f3fSDimitry Andric   // Returns a chain & a glue for retval copy to use.
2435*5f757f3fSDimitry Andric   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2436*5f757f3fSDimitry Andric   SmallVector<SDValue, 8> Ops;
2437*5f757f3fSDimitry Andric 
2438*5f757f3fSDimitry Andric   if (!IsSibcall && isTailCall && !IsMustTail) {
2439*5f757f3fSDimitry Andric     Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2440*5f757f3fSDimitry Andric     InGlue = Chain.getValue(1);
2441*5f757f3fSDimitry Andric   }
2442*5f757f3fSDimitry Andric 
2443*5f757f3fSDimitry Andric   Ops.push_back(Chain);
2444*5f757f3fSDimitry Andric   Ops.push_back(Callee);
2445*5f757f3fSDimitry Andric 
2446*5f757f3fSDimitry Andric   if (isTailCall)
2447*5f757f3fSDimitry Andric     Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
2448*5f757f3fSDimitry Andric 
2449*5f757f3fSDimitry Andric   // Add argument registers to the end of the list so that they are known live
2450*5f757f3fSDimitry Andric   // into the call.
2451*5f757f3fSDimitry Andric   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2452*5f757f3fSDimitry Andric     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2453*5f757f3fSDimitry Andric                                   RegsToPass[i].second.getValueType()));
2454*5f757f3fSDimitry Andric 
2455*5f757f3fSDimitry Andric   // Add a register mask operand representing the call-preserved registers.
2456*5f757f3fSDimitry Andric   const uint32_t *Mask = [&]() {
2457*5f757f3fSDimitry Andric     auto AdaptedCC = CallConv;
2458*5f757f3fSDimitry Andric     // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2459*5f757f3fSDimitry Andric     // use X86_INTR calling convention because it has the same CSR mask
2460*5f757f3fSDimitry Andric     // (same preserved registers).
2461*5f757f3fSDimitry Andric     if (HasNCSR)
2462*5f757f3fSDimitry Andric       AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
2463*5f757f3fSDimitry Andric     // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2464*5f757f3fSDimitry Andric     // to use the CSR_NoRegs_RegMask.
2465*5f757f3fSDimitry Andric     if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2466*5f757f3fSDimitry Andric       AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2467*5f757f3fSDimitry Andric     return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2468*5f757f3fSDimitry Andric   }();
2469*5f757f3fSDimitry Andric   assert(Mask && "Missing call preserved mask for calling convention");
2470*5f757f3fSDimitry Andric 
2471*5f757f3fSDimitry Andric   // If this is an invoke in a 32-bit function using a funclet-based
2472*5f757f3fSDimitry Andric   // personality, assume the function clobbers all registers. If an exception
2473*5f757f3fSDimitry Andric   // is thrown, the runtime will not restore CSRs.
2474*5f757f3fSDimitry Andric   // FIXME: Model this more precisely so that we can register allocate across
2475*5f757f3fSDimitry Andric   // the normal edge and spill and fill across the exceptional edge.
2476*5f757f3fSDimitry Andric   if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2477*5f757f3fSDimitry Andric     const Function &CallerFn = MF.getFunction();
2478*5f757f3fSDimitry Andric     EHPersonality Pers =
2479*5f757f3fSDimitry Andric         CallerFn.hasPersonalityFn()
2480*5f757f3fSDimitry Andric             ? classifyEHPersonality(CallerFn.getPersonalityFn())
2481*5f757f3fSDimitry Andric             : EHPersonality::Unknown;
2482*5f757f3fSDimitry Andric     if (isFuncletEHPersonality(Pers))
2483*5f757f3fSDimitry Andric       Mask = RegInfo->getNoPreservedMask();
2484*5f757f3fSDimitry Andric   }
2485*5f757f3fSDimitry Andric 
2486*5f757f3fSDimitry Andric   // Define a new register mask from the existing mask.
2487*5f757f3fSDimitry Andric   uint32_t *RegMask = nullptr;
2488*5f757f3fSDimitry Andric 
2489*5f757f3fSDimitry Andric   // In some calling conventions we need to remove the used physical registers
2490*5f757f3fSDimitry Andric   // from the reg mask. Create a new RegMask for such calling conventions.
2491*5f757f3fSDimitry Andric   // RegMask for calling conventions that disable only return registers (e.g.
2492*5f757f3fSDimitry Andric   // preserve_most) will be modified later in LowerCallResult.
2493*5f757f3fSDimitry Andric   bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2494*5f757f3fSDimitry Andric   if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2495*5f757f3fSDimitry Andric     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2496*5f757f3fSDimitry Andric 
2497*5f757f3fSDimitry Andric     // Allocate a new Reg Mask and copy Mask.
2498*5f757f3fSDimitry Andric     RegMask = MF.allocateRegMask();
2499*5f757f3fSDimitry Andric     unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2500*5f757f3fSDimitry Andric     memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2501*5f757f3fSDimitry Andric 
2502*5f757f3fSDimitry Andric     // Make sure all sub registers of the argument registers are reset
2503*5f757f3fSDimitry Andric     // in the RegMask.
2504*5f757f3fSDimitry Andric     if (ShouldDisableArgRegs) {
2505*5f757f3fSDimitry Andric       for (auto const &RegPair : RegsToPass)
2506*5f757f3fSDimitry Andric         for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2507*5f757f3fSDimitry Andric           RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2508*5f757f3fSDimitry Andric     }
2509*5f757f3fSDimitry Andric 
2510*5f757f3fSDimitry Andric     // Create the RegMask Operand according to our updated mask.
2511*5f757f3fSDimitry Andric     Ops.push_back(DAG.getRegisterMask(RegMask));
2512*5f757f3fSDimitry Andric   } else {
2513*5f757f3fSDimitry Andric     // Create the RegMask Operand according to the static mask.
2514*5f757f3fSDimitry Andric     Ops.push_back(DAG.getRegisterMask(Mask));
2515*5f757f3fSDimitry Andric   }
2516*5f757f3fSDimitry Andric 
2517*5f757f3fSDimitry Andric   if (InGlue.getNode())
2518*5f757f3fSDimitry Andric     Ops.push_back(InGlue);
2519*5f757f3fSDimitry Andric 
2520*5f757f3fSDimitry Andric   if (isTailCall) {
2521*5f757f3fSDimitry Andric     // We used to do:
2522*5f757f3fSDimitry Andric     //// If this is the first return lowered for this function, add the regs
2523*5f757f3fSDimitry Andric     //// to the liveout set for the function.
2524*5f757f3fSDimitry Andric     // This isn't right, although it's probably harmless on x86; liveouts
2525*5f757f3fSDimitry Andric     // should be computed from returns not tail calls.  Consider a void
2526*5f757f3fSDimitry Andric     // function making a tail call to a function returning int.
2527*5f757f3fSDimitry Andric     MF.getFrameInfo().setHasTailCall();
2528*5f757f3fSDimitry Andric     SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
2529*5f757f3fSDimitry Andric 
2530*5f757f3fSDimitry Andric     if (IsCFICall)
2531*5f757f3fSDimitry Andric       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2532*5f757f3fSDimitry Andric 
2533*5f757f3fSDimitry Andric     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2534*5f757f3fSDimitry Andric     DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2535*5f757f3fSDimitry Andric     return Ret;
2536*5f757f3fSDimitry Andric   }
2537*5f757f3fSDimitry Andric 
2538*5f757f3fSDimitry Andric   if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2539*5f757f3fSDimitry Andric     Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2540*5f757f3fSDimitry Andric   } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2541*5f757f3fSDimitry Andric     // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2542*5f757f3fSDimitry Andric     // expanded to the call, directly followed by a special marker sequence and
2543*5f757f3fSDimitry Andric     // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2544*5f757f3fSDimitry Andric     assert(!isTailCall &&
2545*5f757f3fSDimitry Andric            "tail calls cannot be marked with clang.arc.attachedcall");
2546*5f757f3fSDimitry Andric     assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2547*5f757f3fSDimitry Andric 
2548*5f757f3fSDimitry Andric     // Add a target global address for the retainRV/claimRV runtime function
2549*5f757f3fSDimitry Andric     // just before the call target.
2550*5f757f3fSDimitry Andric     Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
2551*5f757f3fSDimitry Andric     auto PtrVT = getPointerTy(DAG.getDataLayout());
2552*5f757f3fSDimitry Andric     auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2553*5f757f3fSDimitry Andric     Ops.insert(Ops.begin() + 1, GA);
2554*5f757f3fSDimitry Andric     Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2555*5f757f3fSDimitry Andric   } else {
2556*5f757f3fSDimitry Andric     Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2557*5f757f3fSDimitry Andric   }
2558*5f757f3fSDimitry Andric 
2559*5f757f3fSDimitry Andric   if (IsCFICall)
2560*5f757f3fSDimitry Andric     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2561*5f757f3fSDimitry Andric 
2562*5f757f3fSDimitry Andric   InGlue = Chain.getValue(1);
2563*5f757f3fSDimitry Andric   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2564*5f757f3fSDimitry Andric   DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2565*5f757f3fSDimitry Andric 
2566*5f757f3fSDimitry Andric   // Save heapallocsite metadata.
2567*5f757f3fSDimitry Andric   if (CLI.CB)
2568*5f757f3fSDimitry Andric     if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2569*5f757f3fSDimitry Andric       DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2570*5f757f3fSDimitry Andric 
2571*5f757f3fSDimitry Andric   // Create the CALLSEQ_END node.
2572*5f757f3fSDimitry Andric   unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2573*5f757f3fSDimitry Andric   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2574*5f757f3fSDimitry Andric                        DAG.getTarget().Options.GuaranteedTailCallOpt))
2575*5f757f3fSDimitry Andric     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
2576*5f757f3fSDimitry Andric   else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2577*5f757f3fSDimitry Andric     // If this call passes a struct-return pointer, the callee
2578*5f757f3fSDimitry Andric     // pops that struct pointer.
2579*5f757f3fSDimitry Andric     NumBytesForCalleeToPop = 4;
2580*5f757f3fSDimitry Andric 
2581*5f757f3fSDimitry Andric   // Returns a glue for retval copy to use.
2582*5f757f3fSDimitry Andric   if (!IsSibcall) {
2583*5f757f3fSDimitry Andric     Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2584*5f757f3fSDimitry Andric                                InGlue, dl);
2585*5f757f3fSDimitry Andric     InGlue = Chain.getValue(1);
2586*5f757f3fSDimitry Andric   }
2587*5f757f3fSDimitry Andric 
2588*5f757f3fSDimitry Andric   // Handle result values, copying them out of physregs into vregs that we
2589*5f757f3fSDimitry Andric   // return.
2590*5f757f3fSDimitry Andric   return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2591*5f757f3fSDimitry Andric                          InVals, RegMask);
2592*5f757f3fSDimitry Andric }
2593*5f757f3fSDimitry Andric 
2594*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
2595*5f757f3fSDimitry Andric //                Fast Calling Convention (tail call) implementation
2596*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
2597*5f757f3fSDimitry Andric 
2598*5f757f3fSDimitry Andric //  Like std call, callee cleans arguments, convention except that ECX is
2599*5f757f3fSDimitry Andric //  reserved for storing the tail called function address. Only 2 registers are
2600*5f757f3fSDimitry Andric //  free for argument passing (inreg). Tail call optimization is performed
2601*5f757f3fSDimitry Andric //  provided:
2602*5f757f3fSDimitry Andric //                * tailcallopt is enabled
2603*5f757f3fSDimitry Andric //                * caller/callee are fastcc
2604*5f757f3fSDimitry Andric //  On X86_64 architecture with GOT-style position independent code only local
2605*5f757f3fSDimitry Andric //  (within module) calls are supported at the moment.
2606*5f757f3fSDimitry Andric //  To keep the stack aligned according to platform abi the function
2607*5f757f3fSDimitry Andric //  GetAlignedArgumentStackSize ensures that argument delta is always multiples
2608*5f757f3fSDimitry Andric //  of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2609*5f757f3fSDimitry Andric //  If a tail called function callee has more arguments than the caller the
2610*5f757f3fSDimitry Andric //  caller needs to make sure that there is room to move the RETADDR to. This is
2611*5f757f3fSDimitry Andric //  achieved by reserving an area the size of the argument delta right after the
2612*5f757f3fSDimitry Andric //  original RETADDR, but before the saved framepointer or the spilled registers
2613*5f757f3fSDimitry Andric //  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2614*5f757f3fSDimitry Andric //  stack layout:
2615*5f757f3fSDimitry Andric //    arg1
2616*5f757f3fSDimitry Andric //    arg2
2617*5f757f3fSDimitry Andric //    RETADDR
2618*5f757f3fSDimitry Andric //    [ new RETADDR
2619*5f757f3fSDimitry Andric //      move area ]
2620*5f757f3fSDimitry Andric //    (possible EBP)
2621*5f757f3fSDimitry Andric //    ESI
2622*5f757f3fSDimitry Andric //    EDI
2623*5f757f3fSDimitry Andric //    local1 ..
2624*5f757f3fSDimitry Andric 
2625*5f757f3fSDimitry Andric /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2626*5f757f3fSDimitry Andric /// requirement.
2627*5f757f3fSDimitry Andric unsigned
2628*5f757f3fSDimitry Andric X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2629*5f757f3fSDimitry Andric                                                SelectionDAG &DAG) const {
2630*5f757f3fSDimitry Andric   const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2631*5f757f3fSDimitry Andric   const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2632*5f757f3fSDimitry Andric   assert(StackSize % SlotSize == 0 &&
2633*5f757f3fSDimitry Andric          "StackSize must be a multiple of SlotSize");
2634*5f757f3fSDimitry Andric   return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2635*5f757f3fSDimitry Andric }
2636*5f757f3fSDimitry Andric 
2637*5f757f3fSDimitry Andric /// Return true if the given stack call argument is already available in the
2638*5f757f3fSDimitry Andric /// same position (relatively) of the caller's incoming argument stack.
2639*5f757f3fSDimitry Andric static
2640*5f757f3fSDimitry Andric bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2641*5f757f3fSDimitry Andric                          MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2642*5f757f3fSDimitry Andric                          const X86InstrInfo *TII, const CCValAssign &VA) {
2643*5f757f3fSDimitry Andric   unsigned Bytes = Arg.getValueSizeInBits() / 8;
2644*5f757f3fSDimitry Andric 
2645*5f757f3fSDimitry Andric   for (;;) {
2646*5f757f3fSDimitry Andric     // Look through nodes that don't alter the bits of the incoming value.
2647*5f757f3fSDimitry Andric     unsigned Op = Arg.getOpcode();
2648*5f757f3fSDimitry Andric     if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2649*5f757f3fSDimitry Andric         Op == ISD::AssertZext) {
2650*5f757f3fSDimitry Andric       Arg = Arg.getOperand(0);
2651*5f757f3fSDimitry Andric       continue;
2652*5f757f3fSDimitry Andric     }
2653*5f757f3fSDimitry Andric     if (Op == ISD::TRUNCATE) {
2654*5f757f3fSDimitry Andric       const SDValue &TruncInput = Arg.getOperand(0);
2655*5f757f3fSDimitry Andric       if (TruncInput.getOpcode() == ISD::AssertZext &&
2656*5f757f3fSDimitry Andric           cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2657*5f757f3fSDimitry Andric               Arg.getValueType()) {
2658*5f757f3fSDimitry Andric         Arg = TruncInput.getOperand(0);
2659*5f757f3fSDimitry Andric         continue;
2660*5f757f3fSDimitry Andric       }
2661*5f757f3fSDimitry Andric     }
2662*5f757f3fSDimitry Andric     break;
2663*5f757f3fSDimitry Andric   }
2664*5f757f3fSDimitry Andric 
2665*5f757f3fSDimitry Andric   int FI = INT_MAX;
2666*5f757f3fSDimitry Andric   if (Arg.getOpcode() == ISD::CopyFromReg) {
2667*5f757f3fSDimitry Andric     Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2668*5f757f3fSDimitry Andric     if (!VR.isVirtual())
2669*5f757f3fSDimitry Andric       return false;
2670*5f757f3fSDimitry Andric     MachineInstr *Def = MRI->getVRegDef(VR);
2671*5f757f3fSDimitry Andric     if (!Def)
2672*5f757f3fSDimitry Andric       return false;
2673*5f757f3fSDimitry Andric     if (!Flags.isByVal()) {
2674*5f757f3fSDimitry Andric       if (!TII->isLoadFromStackSlot(*Def, FI))
2675*5f757f3fSDimitry Andric         return false;
2676*5f757f3fSDimitry Andric     } else {
2677*5f757f3fSDimitry Andric       unsigned Opcode = Def->getOpcode();
2678*5f757f3fSDimitry Andric       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2679*5f757f3fSDimitry Andric            Opcode == X86::LEA64_32r) &&
2680*5f757f3fSDimitry Andric           Def->getOperand(1).isFI()) {
2681*5f757f3fSDimitry Andric         FI = Def->getOperand(1).getIndex();
2682*5f757f3fSDimitry Andric         Bytes = Flags.getByValSize();
2683*5f757f3fSDimitry Andric       } else
2684*5f757f3fSDimitry Andric         return false;
2685*5f757f3fSDimitry Andric     }
2686*5f757f3fSDimitry Andric   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2687*5f757f3fSDimitry Andric     if (Flags.isByVal())
2688*5f757f3fSDimitry Andric       // ByVal argument is passed in as a pointer but it's now being
2689*5f757f3fSDimitry Andric       // dereferenced. e.g.
2690*5f757f3fSDimitry Andric       // define @foo(%struct.X* %A) {
2691*5f757f3fSDimitry Andric       //   tail call @bar(%struct.X* byval %A)
2692*5f757f3fSDimitry Andric       // }
2693*5f757f3fSDimitry Andric       return false;
2694*5f757f3fSDimitry Andric     SDValue Ptr = Ld->getBasePtr();
2695*5f757f3fSDimitry Andric     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2696*5f757f3fSDimitry Andric     if (!FINode)
2697*5f757f3fSDimitry Andric       return false;
2698*5f757f3fSDimitry Andric     FI = FINode->getIndex();
2699*5f757f3fSDimitry Andric   } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2700*5f757f3fSDimitry Andric     FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
2701*5f757f3fSDimitry Andric     FI = FINode->getIndex();
2702*5f757f3fSDimitry Andric     Bytes = Flags.getByValSize();
2703*5f757f3fSDimitry Andric   } else
2704*5f757f3fSDimitry Andric     return false;
2705*5f757f3fSDimitry Andric 
2706*5f757f3fSDimitry Andric   assert(FI != INT_MAX);
2707*5f757f3fSDimitry Andric   if (!MFI.isFixedObjectIndex(FI))
2708*5f757f3fSDimitry Andric     return false;
2709*5f757f3fSDimitry Andric 
2710*5f757f3fSDimitry Andric   if (Offset != MFI.getObjectOffset(FI))
2711*5f757f3fSDimitry Andric     return false;
2712*5f757f3fSDimitry Andric 
2713*5f757f3fSDimitry Andric   // If this is not byval, check that the argument stack object is immutable.
2714*5f757f3fSDimitry Andric   // inalloca and argument copy elision can create mutable argument stack
2715*5f757f3fSDimitry Andric   // objects. Byval objects can be mutated, but a byval call intends to pass the
2716*5f757f3fSDimitry Andric   // mutated memory.
2717*5f757f3fSDimitry Andric   if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2718*5f757f3fSDimitry Andric     return false;
2719*5f757f3fSDimitry Andric 
2720*5f757f3fSDimitry Andric   if (VA.getLocVT().getFixedSizeInBits() >
2721*5f757f3fSDimitry Andric       Arg.getValueSizeInBits().getFixedValue()) {
2722*5f757f3fSDimitry Andric     // If the argument location is wider than the argument type, check that any
2723*5f757f3fSDimitry Andric     // extension flags match.
2724*5f757f3fSDimitry Andric     if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2725*5f757f3fSDimitry Andric         Flags.isSExt() != MFI.isObjectSExt(FI)) {
2726*5f757f3fSDimitry Andric       return false;
2727*5f757f3fSDimitry Andric     }
2728*5f757f3fSDimitry Andric   }
2729*5f757f3fSDimitry Andric 
2730*5f757f3fSDimitry Andric   return Bytes == MFI.getObjectSize(FI);
2731*5f757f3fSDimitry Andric }
2732*5f757f3fSDimitry Andric 
2733*5f757f3fSDimitry Andric /// Check whether the call is eligible for tail call optimization. Targets
2734*5f757f3fSDimitry Andric /// that want to do tail call optimization should implement this function.
2735*5f757f3fSDimitry Andric bool X86TargetLowering::IsEligibleForTailCallOptimization(
2736*5f757f3fSDimitry Andric     SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
2737*5f757f3fSDimitry Andric     bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
2738*5f757f3fSDimitry Andric     const SmallVectorImpl<SDValue> &OutVals,
2739*5f757f3fSDimitry Andric     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
2740*5f757f3fSDimitry Andric   if (!mayTailCallThisCC(CalleeCC))
2741*5f757f3fSDimitry Andric     return false;
2742*5f757f3fSDimitry Andric 
2743*5f757f3fSDimitry Andric   // If -tailcallopt is specified, make fastcc functions tail-callable.
2744*5f757f3fSDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
2745*5f757f3fSDimitry Andric   const Function &CallerF = MF.getFunction();
2746*5f757f3fSDimitry Andric 
2747*5f757f3fSDimitry Andric   // If the function return type is x86_fp80 and the callee return type is not,
2748*5f757f3fSDimitry Andric   // then the FP_EXTEND of the call result is not a nop. It's not safe to
2749*5f757f3fSDimitry Andric   // perform a tailcall optimization here.
2750*5f757f3fSDimitry Andric   if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
2751*5f757f3fSDimitry Andric     return false;
2752*5f757f3fSDimitry Andric 
2753*5f757f3fSDimitry Andric   CallingConv::ID CallerCC = CallerF.getCallingConv();
2754*5f757f3fSDimitry Andric   bool CCMatch = CallerCC == CalleeCC;
2755*5f757f3fSDimitry Andric   bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2756*5f757f3fSDimitry Andric   bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2757*5f757f3fSDimitry Andric   bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2758*5f757f3fSDimitry Andric       CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2759*5f757f3fSDimitry Andric 
2760*5f757f3fSDimitry Andric   // Win64 functions have extra shadow space for argument homing. Don't do the
2761*5f757f3fSDimitry Andric   // sibcall if the caller and callee have mismatched expectations for this
2762*5f757f3fSDimitry Andric   // space.
2763*5f757f3fSDimitry Andric   if (IsCalleeWin64 != IsCallerWin64)
2764*5f757f3fSDimitry Andric     return false;
2765*5f757f3fSDimitry Andric 
2766*5f757f3fSDimitry Andric   if (IsGuaranteeTCO) {
2767*5f757f3fSDimitry Andric     if (canGuaranteeTCO(CalleeCC) && CCMatch)
2768*5f757f3fSDimitry Andric       return true;
2769*5f757f3fSDimitry Andric     return false;
2770*5f757f3fSDimitry Andric   }
2771*5f757f3fSDimitry Andric 
2772*5f757f3fSDimitry Andric   // Look for obvious safe cases to perform tail call optimization that do not
2773*5f757f3fSDimitry Andric   // require ABI changes. This is what gcc calls sibcall.
2774*5f757f3fSDimitry Andric 
2775*5f757f3fSDimitry Andric   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2776*5f757f3fSDimitry Andric   // emit a special epilogue.
2777*5f757f3fSDimitry Andric   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2778*5f757f3fSDimitry Andric   if (RegInfo->hasStackRealignment(MF))
2779*5f757f3fSDimitry Andric     return false;
2780*5f757f3fSDimitry Andric 
2781*5f757f3fSDimitry Andric   // Also avoid sibcall optimization if we're an sret return fn and the callee
2782*5f757f3fSDimitry Andric   // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2783*5f757f3fSDimitry Andric   // insufficient.
2784*5f757f3fSDimitry Andric   if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
2785*5f757f3fSDimitry Andric     // For a compatible tail call the callee must return our sret pointer. So it
2786*5f757f3fSDimitry Andric     // needs to be (a) an sret function itself and (b) we pass our sret as its
2787*5f757f3fSDimitry Andric     // sret. Condition #b is harder to determine.
2788*5f757f3fSDimitry Andric     return false;
2789*5f757f3fSDimitry Andric   } else if (IsCalleePopSRet)
2790*5f757f3fSDimitry Andric     // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2791*5f757f3fSDimitry Andric     // expect that.
2792*5f757f3fSDimitry Andric     return false;
2793*5f757f3fSDimitry Andric 
2794*5f757f3fSDimitry Andric   // Do not sibcall optimize vararg calls unless all arguments are passed via
2795*5f757f3fSDimitry Andric   // registers.
2796*5f757f3fSDimitry Andric   LLVMContext &C = *DAG.getContext();
2797*5f757f3fSDimitry Andric   if (isVarArg && !Outs.empty()) {
2798*5f757f3fSDimitry Andric     // Optimizing for varargs on Win64 is unlikely to be safe without
2799*5f757f3fSDimitry Andric     // additional testing.
2800*5f757f3fSDimitry Andric     if (IsCalleeWin64 || IsCallerWin64)
2801*5f757f3fSDimitry Andric       return false;
2802*5f757f3fSDimitry Andric 
2803*5f757f3fSDimitry Andric     SmallVector<CCValAssign, 16> ArgLocs;
2804*5f757f3fSDimitry Andric     CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2805*5f757f3fSDimitry Andric     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
2806*5f757f3fSDimitry Andric     for (const auto &VA : ArgLocs)
2807*5f757f3fSDimitry Andric       if (!VA.isRegLoc())
2808*5f757f3fSDimitry Andric         return false;
2809*5f757f3fSDimitry Andric   }
2810*5f757f3fSDimitry Andric 
2811*5f757f3fSDimitry Andric   // If the call result is in ST0 / ST1, it needs to be popped off the x87
2812*5f757f3fSDimitry Andric   // stack.  Therefore, if it's not used by the call it is not safe to optimize
2813*5f757f3fSDimitry Andric   // this into a sibcall.
2814*5f757f3fSDimitry Andric   bool Unused = false;
2815*5f757f3fSDimitry Andric   for (const auto &In : Ins) {
2816*5f757f3fSDimitry Andric     if (!In.Used) {
2817*5f757f3fSDimitry Andric       Unused = true;
2818*5f757f3fSDimitry Andric       break;
2819*5f757f3fSDimitry Andric     }
2820*5f757f3fSDimitry Andric   }
2821*5f757f3fSDimitry Andric   if (Unused) {
2822*5f757f3fSDimitry Andric     SmallVector<CCValAssign, 16> RVLocs;
2823*5f757f3fSDimitry Andric     CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
2824*5f757f3fSDimitry Andric     CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2825*5f757f3fSDimitry Andric     for (const auto &VA : RVLocs) {
2826*5f757f3fSDimitry Andric       if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2827*5f757f3fSDimitry Andric         return false;
2828*5f757f3fSDimitry Andric     }
2829*5f757f3fSDimitry Andric   }
2830*5f757f3fSDimitry Andric 
2831*5f757f3fSDimitry Andric   // Check that the call results are passed in the same way.
2832*5f757f3fSDimitry Andric   if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2833*5f757f3fSDimitry Andric                                   RetCC_X86, RetCC_X86))
2834*5f757f3fSDimitry Andric     return false;
2835*5f757f3fSDimitry Andric   // The callee has to preserve all registers the caller needs to preserve.
2836*5f757f3fSDimitry Andric   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2837*5f757f3fSDimitry Andric   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2838*5f757f3fSDimitry Andric   if (!CCMatch) {
2839*5f757f3fSDimitry Andric     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2840*5f757f3fSDimitry Andric     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2841*5f757f3fSDimitry Andric       return false;
2842*5f757f3fSDimitry Andric   }
2843*5f757f3fSDimitry Andric 
2844*5f757f3fSDimitry Andric   unsigned StackArgsSize = 0;
2845*5f757f3fSDimitry Andric 
2846*5f757f3fSDimitry Andric   // If the callee takes no arguments then go on to check the results of the
2847*5f757f3fSDimitry Andric   // call.
2848*5f757f3fSDimitry Andric   if (!Outs.empty()) {
2849*5f757f3fSDimitry Andric     // Check if stack adjustment is needed. For now, do not do this if any
2850*5f757f3fSDimitry Andric     // argument is passed on the stack.
2851*5f757f3fSDimitry Andric     SmallVector<CCValAssign, 16> ArgLocs;
2852*5f757f3fSDimitry Andric     CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2853*5f757f3fSDimitry Andric 
2854*5f757f3fSDimitry Andric     // Allocate shadow area for Win64
2855*5f757f3fSDimitry Andric     if (IsCalleeWin64)
2856*5f757f3fSDimitry Andric       CCInfo.AllocateStack(32, Align(8));
2857*5f757f3fSDimitry Andric 
2858*5f757f3fSDimitry Andric     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
2859*5f757f3fSDimitry Andric     StackArgsSize = CCInfo.getStackSize();
2860*5f757f3fSDimitry Andric 
2861*5f757f3fSDimitry Andric     if (CCInfo.getStackSize()) {
2862*5f757f3fSDimitry Andric       // Check if the arguments are already laid out in the right way as
2863*5f757f3fSDimitry Andric       // the caller's fixed stack objects.
2864*5f757f3fSDimitry Andric       MachineFrameInfo &MFI = MF.getFrameInfo();
2865*5f757f3fSDimitry Andric       const MachineRegisterInfo *MRI = &MF.getRegInfo();
2866*5f757f3fSDimitry Andric       const X86InstrInfo *TII = Subtarget.getInstrInfo();
2867*5f757f3fSDimitry Andric       for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2868*5f757f3fSDimitry Andric         const CCValAssign &VA = ArgLocs[I];
2869*5f757f3fSDimitry Andric         SDValue Arg = OutVals[I];
2870*5f757f3fSDimitry Andric         ISD::ArgFlagsTy Flags = Outs[I].Flags;
2871*5f757f3fSDimitry Andric         if (VA.getLocInfo() == CCValAssign::Indirect)
2872*5f757f3fSDimitry Andric           return false;
2873*5f757f3fSDimitry Andric         if (!VA.isRegLoc()) {
2874*5f757f3fSDimitry Andric           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
2875*5f757f3fSDimitry Andric                                    TII, VA))
2876*5f757f3fSDimitry Andric             return false;
2877*5f757f3fSDimitry Andric         }
2878*5f757f3fSDimitry Andric       }
2879*5f757f3fSDimitry Andric     }
2880*5f757f3fSDimitry Andric 
2881*5f757f3fSDimitry Andric     bool PositionIndependent = isPositionIndependent();
2882*5f757f3fSDimitry Andric     // If the tailcall address may be in a register, then make sure it's
2883*5f757f3fSDimitry Andric     // possible to register allocate for it. In 32-bit, the call address can
2884*5f757f3fSDimitry Andric     // only target EAX, EDX, or ECX since the tail call must be scheduled after
2885*5f757f3fSDimitry Andric     // callee-saved registers are restored. These happen to be the same
2886*5f757f3fSDimitry Andric     // registers used to pass 'inreg' arguments so watch out for those.
2887*5f757f3fSDimitry Andric     if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
2888*5f757f3fSDimitry Andric                                   !isa<ExternalSymbolSDNode>(Callee)) ||
2889*5f757f3fSDimitry Andric                                  PositionIndependent)) {
2890*5f757f3fSDimitry Andric       unsigned NumInRegs = 0;
2891*5f757f3fSDimitry Andric       // In PIC we need an extra register to formulate the address computation
2892*5f757f3fSDimitry Andric       // for the callee.
2893*5f757f3fSDimitry Andric       unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2894*5f757f3fSDimitry Andric 
2895*5f757f3fSDimitry Andric       for (const auto &VA : ArgLocs) {
2896*5f757f3fSDimitry Andric         if (!VA.isRegLoc())
2897*5f757f3fSDimitry Andric           continue;
2898*5f757f3fSDimitry Andric         Register Reg = VA.getLocReg();
2899*5f757f3fSDimitry Andric         switch (Reg) {
2900*5f757f3fSDimitry Andric         default: break;
2901*5f757f3fSDimitry Andric         case X86::EAX: case X86::EDX: case X86::ECX:
2902*5f757f3fSDimitry Andric           if (++NumInRegs == MaxInRegs)
2903*5f757f3fSDimitry Andric             return false;
2904*5f757f3fSDimitry Andric           break;
2905*5f757f3fSDimitry Andric         }
2906*5f757f3fSDimitry Andric       }
2907*5f757f3fSDimitry Andric     }
2908*5f757f3fSDimitry Andric 
2909*5f757f3fSDimitry Andric     const MachineRegisterInfo &MRI = MF.getRegInfo();
2910*5f757f3fSDimitry Andric     if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2911*5f757f3fSDimitry Andric       return false;
2912*5f757f3fSDimitry Andric   }
2913*5f757f3fSDimitry Andric 
2914*5f757f3fSDimitry Andric   bool CalleeWillPop =
2915*5f757f3fSDimitry Andric       X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
2916*5f757f3fSDimitry Andric                        MF.getTarget().Options.GuaranteedTailCallOpt);
2917*5f757f3fSDimitry Andric 
2918*5f757f3fSDimitry Andric   if (unsigned BytesToPop =
2919*5f757f3fSDimitry Andric           MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
2920*5f757f3fSDimitry Andric     // If we have bytes to pop, the callee must pop them.
2921*5f757f3fSDimitry Andric     bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2922*5f757f3fSDimitry Andric     if (!CalleePopMatches)
2923*5f757f3fSDimitry Andric       return false;
2924*5f757f3fSDimitry Andric   } else if (CalleeWillPop && StackArgsSize > 0) {
2925*5f757f3fSDimitry Andric     // If we don't have bytes to pop, make sure the callee doesn't pop any.
2926*5f757f3fSDimitry Andric     return false;
2927*5f757f3fSDimitry Andric   }
2928*5f757f3fSDimitry Andric 
2929*5f757f3fSDimitry Andric   return true;
2930*5f757f3fSDimitry Andric }
2931*5f757f3fSDimitry Andric 
2932*5f757f3fSDimitry Andric /// Determines whether the callee is required to pop its own arguments.
2933*5f757f3fSDimitry Andric /// Callee pop is necessary to support tail calls.
2934*5f757f3fSDimitry Andric bool X86::isCalleePop(CallingConv::ID CallingConv,
2935*5f757f3fSDimitry Andric                       bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2936*5f757f3fSDimitry Andric   // If GuaranteeTCO is true, we force some calls to be callee pop so that we
2937*5f757f3fSDimitry Andric   // can guarantee TCO.
2938*5f757f3fSDimitry Andric   if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
2939*5f757f3fSDimitry Andric     return true;
2940*5f757f3fSDimitry Andric 
2941*5f757f3fSDimitry Andric   switch (CallingConv) {
2942*5f757f3fSDimitry Andric   default:
2943*5f757f3fSDimitry Andric     return false;
2944*5f757f3fSDimitry Andric   case CallingConv::X86_StdCall:
2945*5f757f3fSDimitry Andric   case CallingConv::X86_FastCall:
2946*5f757f3fSDimitry Andric   case CallingConv::X86_ThisCall:
2947*5f757f3fSDimitry Andric   case CallingConv::X86_VectorCall:
2948*5f757f3fSDimitry Andric     return !is64Bit;
2949*5f757f3fSDimitry Andric   }
2950*5f757f3fSDimitry Andric }
2951