xref: /llvm-project/llvm/lib/Target/X86/X86ISelLoweringCall.cpp (revision 2068b1ba031e258a6448bea372005d19692c802a)
1 //===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file implements the lowering of LLVM calls to DAG nodes.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86.h"
15 #include "X86CallingConv.h"
16 #include "X86FrameLowering.h"
17 #include "X86ISelLowering.h"
18 #include "X86InstrBuilder.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86TargetMachine.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/ObjCARCUtil.h"
23 #include "llvm/CodeGen/MachineJumpTableInfo.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/WinEHFuncInfo.h"
26 #include "llvm/IR/DiagnosticInfo.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/Module.h"
29 
30 #define DEBUG_TYPE "x86-isel"
31 
32 using namespace llvm;
33 
34 STATISTIC(NumTailCalls, "Number of tail calls");
35 
36 /// Call this when the user attempts to do something unsupported, like
37 /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
38 /// report_fatal_error, so calling code should attempt to recover without
39 /// crashing.
40 static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
41                              const char *Msg) {
42   MachineFunction &MF = DAG.getMachineFunction();
43   DAG.getContext()->diagnose(
44       DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
45 }
46 
47 /// Returns true if a CC can dynamically exclude a register from the list of
48 /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
49 /// the return registers.
50 static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
51   switch (CC) {
52   default:
53     return false;
54   case CallingConv::X86_RegCall:
55   case CallingConv::PreserveMost:
56   case CallingConv::PreserveAll:
57     return true;
58   }
59 }
60 
61 /// Returns true if a CC can dynamically exclude a register from the list of
62 /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
63 /// the parameters.
64 static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
65   return CC == CallingConv::X86_RegCall;
66 }
67 
68 static std::pair<MVT, unsigned>
69 handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
70                                  const X86Subtarget &Subtarget) {
71   // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
72   // convention is one that uses k registers.
73   if (NumElts == 2)
74     return {MVT::v2i64, 1};
75   if (NumElts == 4)
76     return {MVT::v4i32, 1};
77   if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
78       CC != CallingConv::Intel_OCL_BI)
79     return {MVT::v8i16, 1};
80   if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
81       CC != CallingConv::Intel_OCL_BI)
82     return {MVT::v16i8, 1};
83   // v32i1 passes in ymm unless we have BWI and the calling convention is
84   // regcall.
85   if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
86     return {MVT::v32i8, 1};
87   // Split v64i1 vectors if we don't have v64i8 available.
88   if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
89     if (Subtarget.useAVX512Regs())
90       return {MVT::v64i8, 1};
91     return {MVT::v32i8, 2};
92   }
93 
94   // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
95   if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
96       NumElts > 64)
97     return {MVT::i8, NumElts};
98 
99   return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
100 }
101 
102 MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
103                                                      CallingConv::ID CC,
104                                                      EVT VT) const {
105   if (VT.isVector()) {
106     if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
107       unsigned NumElts = VT.getVectorNumElements();
108 
109       MVT RegisterVT;
110       unsigned NumRegisters;
111       std::tie(RegisterVT, NumRegisters) =
112           handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
113       if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
114         return RegisterVT;
115     }
116 
117     if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
118       return MVT::v8f16;
119   }
120 
121   // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
122   if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
123       !Subtarget.hasX87())
124     return MVT::i32;
125 
126   if (isTypeLegal(MVT::f16)) {
127     if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
128       return getRegisterTypeForCallingConv(
129           Context, CC, VT.changeVectorElementType(MVT::f16));
130 
131     if (VT == MVT::bf16)
132       return MVT::f16;
133   }
134 
135   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
136 }
137 
138 unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
139                                                           CallingConv::ID CC,
140                                                           EVT VT) const {
141   if (VT.isVector()) {
142     if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
143       unsigned NumElts = VT.getVectorNumElements();
144 
145       MVT RegisterVT;
146       unsigned NumRegisters;
147       std::tie(RegisterVT, NumRegisters) =
148           handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
149       if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
150         return NumRegisters;
151     }
152 
153     if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
154       return 1;
155   }
156 
157   // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
158   // x87 is disabled.
159   if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
160     if (VT == MVT::f64)
161       return 2;
162     if (VT == MVT::f80)
163       return 3;
164   }
165 
166   if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
167       isTypeLegal(MVT::f16))
168     return getNumRegistersForCallingConv(Context, CC,
169                                          VT.changeVectorElementType(MVT::f16));
170 
171   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
172 }
173 
174 unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
175     LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
176     unsigned &NumIntermediates, MVT &RegisterVT) const {
177   // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
178   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
179       Subtarget.hasAVX512() &&
180       (!isPowerOf2_32(VT.getVectorNumElements()) ||
181        (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
182        VT.getVectorNumElements() > 64)) {
183     RegisterVT = MVT::i8;
184     IntermediateVT = MVT::i1;
185     NumIntermediates = VT.getVectorNumElements();
186     return NumIntermediates;
187   }
188 
189   // Split v64i1 vectors if we don't have v64i8 available.
190   if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
191       CC != CallingConv::X86_RegCall) {
192     RegisterVT = MVT::v32i8;
193     IntermediateVT = MVT::v32i1;
194     NumIntermediates = 2;
195     return 2;
196   }
197 
198   // Split vNbf16 vectors according to vNf16.
199   if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
200       isTypeLegal(MVT::f16))
201     VT = VT.changeVectorElementType(MVT::f16);
202 
203   return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
204                                               NumIntermediates, RegisterVT);
205 }
206 
207 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
208                                           LLVMContext& Context,
209                                           EVT VT) const {
210   if (!VT.isVector())
211     return MVT::i8;
212 
213   if (Subtarget.hasAVX512()) {
214     // Figure out what this type will be legalized to.
215     EVT LegalVT = VT;
216     while (getTypeAction(Context, LegalVT) != TypeLegal)
217       LegalVT = getTypeToTransformTo(Context, LegalVT);
218 
219     // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
220     if (LegalVT.getSimpleVT().is512BitVector())
221       return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
222 
223     if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
224       // If we legalized to less than a 512-bit vector, then we will use a vXi1
225       // compare for vXi32/vXi64 for sure. If we have BWI we will also support
226       // vXi16/vXi8.
227       MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
228       if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
229         return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
230     }
231   }
232 
233   return VT.changeVectorElementTypeToInteger();
234 }
235 
236 bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
237     Type *Ty, CallingConv::ID CallConv, bool isVarArg,
238     const DataLayout &DL) const {
239   // i128 split into i64 needs to be allocated to two consecutive registers,
240   // or spilled to the stack as a whole.
241   return Ty->isIntegerTy(128);
242 }
243 
244 /// Helper for getByValTypeAlignment to determine
245 /// the desired ByVal argument alignment.
246 static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
247   if (MaxAlign == 16)
248     return;
249   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
250     if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
251       MaxAlign = Align(16);
252   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
253     Align EltAlign;
254     getMaxByValAlign(ATy->getElementType(), EltAlign);
255     if (EltAlign > MaxAlign)
256       MaxAlign = EltAlign;
257   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
258     for (auto *EltTy : STy->elements()) {
259       Align EltAlign;
260       getMaxByValAlign(EltTy, EltAlign);
261       if (EltAlign > MaxAlign)
262         MaxAlign = EltAlign;
263       if (MaxAlign == 16)
264         break;
265     }
266   }
267 }
268 
269 /// Return the desired alignment for ByVal aggregate
270 /// function arguments in the caller parameter area. For X86, aggregates
271 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
272 /// are at 4-byte boundaries.
273 Align X86TargetLowering::getByValTypeAlignment(Type *Ty,
274                                                const DataLayout &DL) const {
275   if (Subtarget.is64Bit())
276     return std::max(DL.getABITypeAlign(Ty), Align::Constant<8>());
277 
278   Align Alignment(4);
279   if (Subtarget.hasSSE1())
280     getMaxByValAlign(Ty, Alignment);
281   return Alignment;
282 }
283 
284 /// It returns EVT::Other if the type should be determined using generic
285 /// target-independent logic.
286 /// For vector ops we check that the overall size isn't larger than our
287 /// preferred vector width.
288 EVT X86TargetLowering::getOptimalMemOpType(
289     const MemOp &Op, const AttributeList &FuncAttributes) const {
290   if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
291     if (Op.size() >= 16 &&
292         (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
293       // FIXME: Check if unaligned 64-byte accesses are slow.
294       if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
295           (Subtarget.getPreferVectorWidth() >= 512)) {
296         return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
297       }
298       // FIXME: Check if unaligned 32-byte accesses are slow.
299       if (Op.size() >= 32 && Subtarget.hasAVX() &&
300           Subtarget.useLight256BitInstructions()) {
301         // Although this isn't a well-supported type for AVX1, we'll let
302         // legalization and shuffle lowering produce the optimal codegen. If we
303         // choose an optimal type with a vector element larger than a byte,
304         // getMemsetStores() may create an intermediate splat (using an integer
305         // multiply) before we splat as a vector.
306         return MVT::v32i8;
307       }
308       if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
309         return MVT::v16i8;
310       // TODO: Can SSE1 handle a byte vector?
311       // If we have SSE1 registers we should be able to use them.
312       if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
313           (Subtarget.getPreferVectorWidth() >= 128))
314         return MVT::v4f32;
315     } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
316                Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
317       // Do not use f64 to lower memcpy if source is string constant. It's
318       // better to use i32 to avoid the loads.
319       // Also, do not use f64 to lower memset unless this is a memset of zeros.
320       // The gymnastics of splatting a byte value into an XMM register and then
321       // only using 8-byte stores (because this is a CPU with slow unaligned
322       // 16-byte accesses) makes that a loser.
323       return MVT::f64;
324     }
325   }
326   // This is a compromise. If we reach here, unaligned accesses may be slow on
327   // this target. However, creating smaller, aligned accesses could be even
328   // slower and would certainly be a lot more code.
329   if (Subtarget.is64Bit() && Op.size() >= 8)
330     return MVT::i64;
331   return MVT::i32;
332 }
333 
334 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
335   if (VT == MVT::f32)
336     return Subtarget.hasSSE1();
337   if (VT == MVT::f64)
338     return Subtarget.hasSSE2();
339   return true;
340 }
341 
342 static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
343   return (8 * Alignment.value()) % SizeInBits == 0;
344 }
345 
346 bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
347   if (isBitAligned(Alignment, VT.getSizeInBits()))
348     return true;
349   switch (VT.getSizeInBits()) {
350   default:
351     // 8-byte and under are always assumed to be fast.
352     return true;
353   case 128:
354     return !Subtarget.isUnalignedMem16Slow();
355   case 256:
356     return !Subtarget.isUnalignedMem32Slow();
357     // TODO: What about AVX-512 (512-bit) accesses?
358   }
359 }
360 
361 bool X86TargetLowering::allowsMisalignedMemoryAccesses(
362     EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
363     unsigned *Fast) const {
364   if (Fast)
365     *Fast = isMemoryAccessFast(VT, Alignment);
366   // NonTemporal vector memory ops must be aligned.
367   if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
368     // NT loads can only be vector aligned, so if its less aligned than the
369     // minimum vector size (which we can split the vector down to), we might as
370     // well use a regular unaligned vector load.
371     // We don't have any NT loads pre-SSE41.
372     if (!!(Flags & MachineMemOperand::MOLoad))
373       return (Alignment < 16 || !Subtarget.hasSSE41());
374     return false;
375   }
376   // Misaligned accesses of any size are always allowed.
377   return true;
378 }
379 
380 bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
381                                            const DataLayout &DL, EVT VT,
382                                            unsigned AddrSpace, Align Alignment,
383                                            MachineMemOperand::Flags Flags,
384                                            unsigned *Fast) const {
385   if (Fast)
386     *Fast = isMemoryAccessFast(VT, Alignment);
387   if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
388     if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
389                                        /*Fast=*/nullptr))
390       return true;
391     // NonTemporal vector memory ops are special, and must be aligned.
392     if (!isBitAligned(Alignment, VT.getSizeInBits()))
393       return false;
394     switch (VT.getSizeInBits()) {
395     case 128:
396       if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
397         return true;
398       if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
399         return true;
400       return false;
401     case 256:
402       if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
403         return true;
404       if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
405         return true;
406       return false;
407     case 512:
408       if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
409         return true;
410       return false;
411     default:
412       return false; // Don't have NonTemporal vector memory ops of this size.
413     }
414   }
415   return true;
416 }
417 
418 /// Return the entry encoding for a jump table in the
419 /// current function.  The returned value is a member of the
420 /// MachineJumpTableInfo::JTEntryKind enum.
421 unsigned X86TargetLowering::getJumpTableEncoding() const {
422   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
423   // symbol.
424   if (isPositionIndependent() && Subtarget.isPICStyleGOT())
425     return MachineJumpTableInfo::EK_Custom32;
426   if (isPositionIndependent() &&
427       getTargetMachine().getCodeModel() == CodeModel::Large &&
428       !Subtarget.isTargetCOFF())
429     return MachineJumpTableInfo::EK_LabelDifference64;
430 
431   // Otherwise, use the normal jump table encoding heuristics.
432   return TargetLowering::getJumpTableEncoding();
433 }
434 
435 bool X86TargetLowering::useSoftFloat() const {
436   return Subtarget.useSoftFloat();
437 }
438 
439 void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
440                                               ArgListTy &Args) const {
441 
442   // Only relabel X86-32 for C / Stdcall CCs.
443   if (Subtarget.is64Bit())
444     return;
445   if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
446     return;
447   unsigned ParamRegs = 0;
448   if (auto *M = MF->getFunction().getParent())
449     ParamRegs = M->getNumberRegisterParameters();
450 
451   // Mark the first N int arguments as having reg
452   for (auto &Arg : Args) {
453     Type *T = Arg.Ty;
454     if (T->isIntOrPtrTy())
455       if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
456         unsigned numRegs = 1;
457         if (MF->getDataLayout().getTypeAllocSize(T) > 4)
458           numRegs = 2;
459         if (ParamRegs < numRegs)
460           return;
461         ParamRegs -= numRegs;
462         Arg.IsInReg = true;
463       }
464   }
465 }
466 
467 const MCExpr *
468 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
469                                              const MachineBasicBlock *MBB,
470                                              unsigned uid,MCContext &Ctx) const{
471   assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
472   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
473   // entries.
474   return MCSymbolRefExpr::create(MBB->getSymbol(),
475                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
476 }
477 
478 /// Returns relocation base for the given PIC jumptable.
479 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
480                                                     SelectionDAG &DAG) const {
481   if (!Subtarget.is64Bit())
482     // This doesn't have SDLoc associated with it, but is not really the
483     // same as a Register.
484     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
485                        getPointerTy(DAG.getDataLayout()));
486   return Table;
487 }
488 
489 /// This returns the relocation base for the given PIC jumptable,
490 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
491 const MCExpr *X86TargetLowering::
492 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
493                              MCContext &Ctx) const {
494   // X86-64 uses RIP relative addressing based on the jump table label.
495   if (Subtarget.isPICStyleRIPRel() ||
496       (Subtarget.is64Bit() &&
497        getTargetMachine().getCodeModel() == CodeModel::Large))
498     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
499 
500   // Otherwise, the reference is relative to the PIC base.
501   return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
502 }
503 
504 std::pair<const TargetRegisterClass *, uint8_t>
505 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
506                                            MVT VT) const {
507   const TargetRegisterClass *RRC = nullptr;
508   uint8_t Cost = 1;
509   switch (VT.SimpleTy) {
510   default:
511     return TargetLowering::findRepresentativeClass(TRI, VT);
512   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
513     RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
514     break;
515   case MVT::x86mmx:
516     RRC = &X86::VR64RegClass;
517     break;
518   case MVT::f32: case MVT::f64:
519   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
520   case MVT::v4f32: case MVT::v2f64:
521   case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
522   case MVT::v8f32: case MVT::v4f64:
523   case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
524   case MVT::v16f32: case MVT::v8f64:
525     RRC = &X86::VR128XRegClass;
526     break;
527   }
528   return std::make_pair(RRC, Cost);
529 }
530 
531 unsigned X86TargetLowering::getAddressSpace() const {
532   if (Subtarget.is64Bit())
533     return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? X86AS::GS
534                                                                     : X86AS::FS;
535   return X86AS::GS;
536 }
537 
538 static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
539   return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
540          (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
541 }
542 
543 static Constant* SegmentOffset(IRBuilderBase &IRB,
544                                int Offset, unsigned AddressSpace) {
545   return ConstantExpr::getIntToPtr(
546       ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
547       IRB.getPtrTy(AddressSpace));
548 }
549 
550 Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
551   // glibc, bionic, and Fuchsia have a special slot for the stack guard in
552   // tcbhead_t; use it instead of the usual global variable (see
553   // sysdeps/{i386,x86_64}/nptl/tls.h)
554   if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
555     unsigned AddressSpace = getAddressSpace();
556 
557     // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
558     if (Subtarget.isTargetFuchsia())
559       return SegmentOffset(IRB, 0x10, AddressSpace);
560 
561     Module *M = IRB.GetInsertBlock()->getParent()->getParent();
562     // Specially, some users may customize the base reg and offset.
563     int Offset = M->getStackProtectorGuardOffset();
564     // If we don't set -stack-protector-guard-offset value:
565     // %fs:0x28, unless we're using a Kernel code model, in which case
566     // it's %gs:0x28.  gs:0x14 on i386.
567     if (Offset == INT_MAX)
568       Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
569 
570     StringRef GuardReg = M->getStackProtectorGuardReg();
571     if (GuardReg == "fs")
572       AddressSpace = X86AS::FS;
573     else if (GuardReg == "gs")
574       AddressSpace = X86AS::GS;
575 
576     // Use symbol guard if user specify.
577     StringRef GuardSymb = M->getStackProtectorGuardSymbol();
578     if (!GuardSymb.empty()) {
579       GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
580       if (!GV) {
581         Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
582                                        : Type::getInt32Ty(M->getContext());
583         GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
584                                 nullptr, GuardSymb, nullptr,
585                                 GlobalValue::NotThreadLocal, AddressSpace);
586         if (!Subtarget.isTargetDarwin())
587           GV->setDSOLocal(M->getDirectAccessExternalData());
588       }
589       return GV;
590     }
591 
592     return SegmentOffset(IRB, Offset, AddressSpace);
593   }
594   return TargetLowering::getIRStackGuard(IRB);
595 }
596 
597 void X86TargetLowering::insertSSPDeclarations(Module &M) const {
598   // MSVC CRT provides functionalities for stack protection.
599   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
600       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
601     // MSVC CRT has a global variable holding security cookie.
602     M.getOrInsertGlobal("__security_cookie",
603                         PointerType::getUnqual(M.getContext()));
604 
605     // MSVC CRT has a function to validate security cookie.
606     FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
607         "__security_check_cookie", Type::getVoidTy(M.getContext()),
608         PointerType::getUnqual(M.getContext()));
609     if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
610       F->setCallingConv(CallingConv::X86_FastCall);
611       F->addParamAttr(0, Attribute::AttrKind::InReg);
612     }
613     return;
614   }
615 
616   StringRef GuardMode = M.getStackProtectorGuard();
617 
618   // glibc, bionic, and Fuchsia have a special slot for the stack guard.
619   if ((GuardMode == "tls" || GuardMode.empty()) &&
620       hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
621     return;
622   TargetLowering::insertSSPDeclarations(M);
623 }
624 
625 Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
626   // MSVC CRT has a global variable holding security cookie.
627   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
628       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
629     return M.getGlobalVariable("__security_cookie");
630   }
631   return TargetLowering::getSDagStackGuard(M);
632 }
633 
634 Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
635   // MSVC CRT has a function to validate security cookie.
636   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
637       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
638     return M.getFunction("__security_check_cookie");
639   }
640   return TargetLowering::getSSPStackGuardCheck(M);
641 }
642 
643 Value *
644 X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
645   // Android provides a fixed TLS slot for the SafeStack pointer. See the
646   // definition of TLS_SLOT_SAFESTACK in
647   // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
648   if (Subtarget.isTargetAndroid()) {
649     // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
650     // %gs:0x24 on i386
651     int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
652     return SegmentOffset(IRB, Offset, getAddressSpace());
653   }
654 
655   // Fuchsia is similar.
656   if (Subtarget.isTargetFuchsia()) {
657     // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
658     return SegmentOffset(IRB, 0x18, getAddressSpace());
659   }
660 
661   return TargetLowering::getSafeStackPointerLocation(IRB);
662 }
663 
664 //===----------------------------------------------------------------------===//
665 //               Return Value Calling Convention Implementation
666 //===----------------------------------------------------------------------===//
667 
668 bool X86TargetLowering::CanLowerReturn(
669     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
670     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
671     const Type *RetTy) const {
672   SmallVector<CCValAssign, 16> RVLocs;
673   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
674   return CCInfo.CheckReturn(Outs, RetCC_X86);
675 }
676 
677 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
678   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
679   return ScratchRegs;
680 }
681 
682 ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
683   static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
684   return RCRegs;
685 }
686 
687 /// Lowers masks values (v*i1) to the local register values
688 /// \returns DAG node after lowering to register type
689 static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
690                                const SDLoc &DL, SelectionDAG &DAG) {
691   EVT ValVT = ValArg.getValueType();
692 
693   if (ValVT == MVT::v1i1)
694     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
695                        DAG.getIntPtrConstant(0, DL));
696 
697   if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
698       (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
699     // Two stage lowering might be required
700     // bitcast:   v8i1 -> i8 / v16i1 -> i16
701     // anyextend: i8   -> i32 / i16   -> i32
702     EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
703     SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
704     if (ValLoc == MVT::i32)
705       ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
706     return ValToCopy;
707   }
708 
709   if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
710       (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
711     // One stage lowering is required
712     // bitcast:   v32i1 -> i32 / v64i1 -> i64
713     return DAG.getBitcast(ValLoc, ValArg);
714   }
715 
716   return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
717 }
718 
719 /// Breaks v64i1 value into two registers and adds the new node to the DAG
720 static void Passv64i1ArgInRegs(
721     const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
722     SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
723     CCValAssign &NextVA, const X86Subtarget &Subtarget) {
724   assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
725   assert(Subtarget.is32Bit() && "Expecting 32 bit target");
726   assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
727   assert(VA.isRegLoc() && NextVA.isRegLoc() &&
728          "The value should reside in two registers");
729 
730   // Before splitting the value we cast it to i64
731   Arg = DAG.getBitcast(MVT::i64, Arg);
732 
733   // Splitting the value into two i32 types
734   SDValue Lo, Hi;
735   std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
736 
737   // Attach the two i32 types into corresponding registers
738   RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
739   RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
740 }
741 
742 SDValue
743 X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
744                                bool isVarArg,
745                                const SmallVectorImpl<ISD::OutputArg> &Outs,
746                                const SmallVectorImpl<SDValue> &OutVals,
747                                const SDLoc &dl, SelectionDAG &DAG) const {
748   MachineFunction &MF = DAG.getMachineFunction();
749   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
750 
751   // In some cases we need to disable registers from the default CSR list.
752   // For example, when they are used as return registers (preserve_* and X86's
753   // regcall) or for argument passing (X86's regcall).
754   bool ShouldDisableCalleeSavedRegister =
755       shouldDisableRetRegFromCSR(CallConv) ||
756       MF.getFunction().hasFnAttribute("no_caller_saved_registers");
757 
758   if (CallConv == CallingConv::X86_INTR && !Outs.empty())
759     report_fatal_error("X86 interrupts may not return any value");
760 
761   SmallVector<CCValAssign, 16> RVLocs;
762   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
763   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
764 
765   SmallVector<std::pair<Register, SDValue>, 4> RetVals;
766   for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
767        ++I, ++OutsIndex) {
768     CCValAssign &VA = RVLocs[I];
769     assert(VA.isRegLoc() && "Can only return in registers!");
770 
771     // Add the register to the CalleeSaveDisableRegs list.
772     if (ShouldDisableCalleeSavedRegister)
773       MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
774 
775     SDValue ValToCopy = OutVals[OutsIndex];
776     EVT ValVT = ValToCopy.getValueType();
777 
778     // Promote values to the appropriate types.
779     if (VA.getLocInfo() == CCValAssign::SExt)
780       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
781     else if (VA.getLocInfo() == CCValAssign::ZExt)
782       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
783     else if (VA.getLocInfo() == CCValAssign::AExt) {
784       if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
785         ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
786       else
787         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
788     }
789     else if (VA.getLocInfo() == CCValAssign::BCvt)
790       ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
791 
792     assert(VA.getLocInfo() != CCValAssign::FPExt &&
793            "Unexpected FP-extend for return value.");
794 
795     // Report an error if we have attempted to return a value via an XMM
796     // register and SSE was disabled.
797     if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
798       errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
799       VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
800     } else if (!Subtarget.hasSSE2() &&
801                X86::FR64XRegClass.contains(VA.getLocReg()) &&
802                ValVT == MVT::f64) {
803       // When returning a double via an XMM register, report an error if SSE2 is
804       // not enabled.
805       errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
806       VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
807     }
808 
809     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
810     // the RET instruction and handled by the FP Stackifier.
811     if (VA.getLocReg() == X86::FP0 ||
812         VA.getLocReg() == X86::FP1) {
813       // If this is a copy from an xmm register to ST(0), use an FPExtend to
814       // change the value to the FP stack register class.
815       if (isScalarFPTypeInSSEReg(VA.getValVT()))
816         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
817       RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
818       // Don't emit a copytoreg.
819       continue;
820     }
821 
822     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
823     // which is returned in RAX / RDX.
824     if (Subtarget.is64Bit()) {
825       if (ValVT == MVT::x86mmx) {
826         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
827           ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
828           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
829                                   ValToCopy);
830           // If we don't have SSE2 available, convert to v4f32 so the generated
831           // register is legal.
832           if (!Subtarget.hasSSE2())
833             ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
834         }
835       }
836     }
837 
838     if (VA.needsCustom()) {
839       assert(VA.getValVT() == MVT::v64i1 &&
840              "Currently the only custom case is when we split v64i1 to 2 regs");
841 
842       Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
843                          Subtarget);
844 
845       // Add the second register to the CalleeSaveDisableRegs list.
846       if (ShouldDisableCalleeSavedRegister)
847         MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
848     } else {
849       RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
850     }
851   }
852 
853   SDValue Glue;
854   SmallVector<SDValue, 6> RetOps;
855   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
856   // Operand #1 = Bytes To Pop
857   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
858                    MVT::i32));
859 
860   // Copy the result values into the output registers.
861   for (auto &RetVal : RetVals) {
862     if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
863       RetOps.push_back(RetVal.second);
864       continue; // Don't emit a copytoreg.
865     }
866 
867     Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
868     Glue = Chain.getValue(1);
869     RetOps.push_back(
870         DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
871   }
872 
873   // Swift calling convention does not require we copy the sret argument
874   // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
875 
876   // All x86 ABIs require that for returning structs by value we copy
877   // the sret argument into %rax/%eax (depending on ABI) for the return.
878   // We saved the argument into a virtual register in the entry block,
879   // so now we copy the value out and into %rax/%eax.
880   //
881   // Checking Function.hasStructRetAttr() here is insufficient because the IR
882   // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
883   // false, then an sret argument may be implicitly inserted in the SelDAG. In
884   // either case FuncInfo->setSRetReturnReg() will have been called.
885   if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
886     // When we have both sret and another return value, we should use the
887     // original Chain stored in RetOps[0], instead of the current Chain updated
888     // in the above loop. If we only have sret, RetOps[0] equals to Chain.
889 
890     // For the case of sret and another return value, we have
891     //   Chain_0 at the function entry
892     //   Chain_1 = getCopyToReg(Chain_0) in the above loop
893     // If we use Chain_1 in getCopyFromReg, we will have
894     //   Val = getCopyFromReg(Chain_1)
895     //   Chain_2 = getCopyToReg(Chain_1, Val) from below
896 
897     // getCopyToReg(Chain_0) will be glued together with
898     // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
899     // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
900     //   Data dependency from Unit B to Unit A due to usage of Val in
901     //     getCopyToReg(Chain_1, Val)
902     //   Chain dependency from Unit A to Unit B
903 
904     // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
905     SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
906                                      getPointerTy(MF.getDataLayout()));
907 
908     Register RetValReg
909         = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
910           X86::RAX : X86::EAX;
911     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
912     Glue = Chain.getValue(1);
913 
914     // RAX/EAX now acts like a return value.
915     RetOps.push_back(
916         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
917 
918     // Add the returned register to the CalleeSaveDisableRegs list. Don't do
919     // this however for preserve_most/preserve_all to minimize the number of
920     // callee-saved registers for these CCs.
921     if (ShouldDisableCalleeSavedRegister &&
922         CallConv != CallingConv::PreserveAll &&
923         CallConv != CallingConv::PreserveMost)
924       MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
925   }
926 
927   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
928   const MCPhysReg *I =
929       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
930   if (I) {
931     for (; *I; ++I) {
932       if (X86::GR64RegClass.contains(*I))
933         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
934       else
935         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
936     }
937   }
938 
939   RetOps[0] = Chain;  // Update chain.
940 
941   // Add the glue if we have it.
942   if (Glue.getNode())
943     RetOps.push_back(Glue);
944 
945   X86ISD::NodeType opcode = X86ISD::RET_GLUE;
946   if (CallConv == CallingConv::X86_INTR)
947     opcode = X86ISD::IRET;
948   return DAG.getNode(opcode, dl, MVT::Other, RetOps);
949 }
950 
951 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
952   if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
953     return false;
954 
955   SDValue TCChain = Chain;
956   SDNode *Copy = *N->user_begin();
957   if (Copy->getOpcode() == ISD::CopyToReg) {
958     // If the copy has a glue operand, we conservatively assume it isn't safe to
959     // perform a tail call.
960     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
961       return false;
962     TCChain = Copy->getOperand(0);
963   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
964     return false;
965 
966   bool HasRet = false;
967   for (const SDNode *U : Copy->users()) {
968     if (U->getOpcode() != X86ISD::RET_GLUE)
969       return false;
970     // If we are returning more than one value, we can definitely
971     // not make a tail call see PR19530
972     if (U->getNumOperands() > 4)
973       return false;
974     if (U->getNumOperands() == 4 &&
975         U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
976       return false;
977     HasRet = true;
978   }
979 
980   if (!HasRet)
981     return false;
982 
983   Chain = TCChain;
984   return true;
985 }
986 
987 EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
988                                            ISD::NodeType ExtendKind) const {
989   MVT ReturnMVT = MVT::i32;
990 
991   bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
992   if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
993     // The ABI does not require i1, i8 or i16 to be extended.
994     //
995     // On Darwin, there is code in the wild relying on Clang's old behaviour of
996     // always extending i8/i16 return values, so keep doing that for now.
997     // (PR26665).
998     ReturnMVT = MVT::i8;
999   }
1000 
1001   EVT MinVT = getRegisterType(Context, ReturnMVT);
1002   return VT.bitsLT(MinVT) ? MinVT : VT;
1003 }
1004 
1005 /// Reads two 32 bit registers and creates a 64 bit mask value.
1006 /// \param VA The current 32 bit value that need to be assigned.
1007 /// \param NextVA The next 32 bit value that need to be assigned.
1008 /// \param Root The parent DAG node.
1009 /// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1010 ///                        glue purposes. In the case the DAG is already using
1011 ///                        physical register instead of virtual, we should glue
1012 ///                        our new SDValue to InGlue SDvalue.
1013 /// \return a new SDvalue of size 64bit.
1014 static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
1015                                 SDValue &Root, SelectionDAG &DAG,
1016                                 const SDLoc &DL, const X86Subtarget &Subtarget,
1017                                 SDValue *InGlue = nullptr) {
1018   assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1019   assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1020   assert(VA.getValVT() == MVT::v64i1 &&
1021          "Expecting first location of 64 bit width type");
1022   assert(NextVA.getValVT() == VA.getValVT() &&
1023          "The locations should have the same type");
1024   assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1025          "The values should reside in two registers");
1026 
1027   SDValue Lo, Hi;
1028   SDValue ArgValueLo, ArgValueHi;
1029 
1030   MachineFunction &MF = DAG.getMachineFunction();
1031   const TargetRegisterClass *RC = &X86::GR32RegClass;
1032 
1033   // Read a 32 bit value from the registers.
1034   if (nullptr == InGlue) {
1035     // When no physical register is present,
1036     // create an intermediate virtual register.
1037     Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1038     ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1039     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1040     ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1041   } else {
1042     // When a physical register is available read the value from it and glue
1043     // the reads together.
1044     ArgValueLo =
1045       DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1046     *InGlue = ArgValueLo.getValue(2);
1047     ArgValueHi =
1048       DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1049     *InGlue = ArgValueHi.getValue(2);
1050   }
1051 
1052   // Convert the i32 type into v32i1 type.
1053   Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1054 
1055   // Convert the i32 type into v32i1 type.
1056   Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1057 
1058   // Concatenate the two values together.
1059   return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1060 }
1061 
1062 /// The function will lower a register of various sizes (8/16/32/64)
1063 /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1064 /// \returns a DAG node contains the operand after lowering to mask type.
1065 static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1066                                const EVT &ValLoc, const SDLoc &DL,
1067                                SelectionDAG &DAG) {
1068   SDValue ValReturned = ValArg;
1069 
1070   if (ValVT == MVT::v1i1)
1071     return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1072 
1073   if (ValVT == MVT::v64i1) {
1074     // In 32 bit machine, this case is handled by getv64i1Argument
1075     assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1076     // In 64 bit machine, There is no need to truncate the value only bitcast
1077   } else {
1078     MVT MaskLenVT;
1079     switch (ValVT.getSimpleVT().SimpleTy) {
1080     case MVT::v8i1:
1081       MaskLenVT = MVT::i8;
1082       break;
1083     case MVT::v16i1:
1084       MaskLenVT = MVT::i16;
1085       break;
1086     case MVT::v32i1:
1087       MaskLenVT = MVT::i32;
1088       break;
1089     default:
1090       llvm_unreachable("Expecting a vector of i1 types");
1091     }
1092 
1093     ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1094   }
1095   return DAG.getBitcast(ValVT, ValReturned);
1096 }
1097 
1098 /// Lower the result values of a call into the
1099 /// appropriate copies out of appropriate physical registers.
1100 ///
1101 SDValue X86TargetLowering::LowerCallResult(
1102     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1103     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1104     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
1105     uint32_t *RegMask) const {
1106 
1107   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1108   // Assign locations to each value returned by this call.
1109   SmallVector<CCValAssign, 16> RVLocs;
1110   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1111                  *DAG.getContext());
1112   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1113 
1114   // Copy all of the result registers out of their specified physreg.
1115   for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1116        ++I, ++InsIndex) {
1117     CCValAssign &VA = RVLocs[I];
1118     EVT CopyVT = VA.getLocVT();
1119 
1120     // In some calling conventions we need to remove the used registers
1121     // from the register mask.
1122     if (RegMask) {
1123       for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1124         RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1125     }
1126 
1127     // Report an error if there was an attempt to return FP values via XMM
1128     // registers.
1129     if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1130       errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1131       if (VA.getLocReg() == X86::XMM1)
1132         VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1133       else
1134         VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1135     } else if (!Subtarget.hasSSE2() &&
1136                X86::FR64XRegClass.contains(VA.getLocReg()) &&
1137                CopyVT == MVT::f64) {
1138       errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1139       if (VA.getLocReg() == X86::XMM1)
1140         VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1141       else
1142         VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1143     }
1144 
1145     // If we prefer to use the value in xmm registers, copy it out as f80 and
1146     // use a truncate to move it from fp stack reg to xmm reg.
1147     bool RoundAfterCopy = false;
1148     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
1149         isScalarFPTypeInSSEReg(VA.getValVT())) {
1150       if (!Subtarget.hasX87())
1151         report_fatal_error("X87 register return with X87 disabled");
1152       CopyVT = MVT::f80;
1153       RoundAfterCopy = (CopyVT != VA.getLocVT());
1154     }
1155 
1156     SDValue Val;
1157     if (VA.needsCustom()) {
1158       assert(VA.getValVT() == MVT::v64i1 &&
1159              "Currently the only custom case is when we split v64i1 to 2 regs");
1160       Val =
1161           getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1162     } else {
1163       Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1164                   .getValue(1);
1165       Val = Chain.getValue(0);
1166       InGlue = Chain.getValue(2);
1167     }
1168 
1169     if (RoundAfterCopy)
1170       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1171                         // This truncation won't change the value.
1172                         DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1173 
1174     if (VA.isExtInLoc()) {
1175       if (VA.getValVT().isVector() &&
1176           VA.getValVT().getScalarType() == MVT::i1 &&
1177           ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1178            (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1179         // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1180         Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1181       } else
1182         Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1183     }
1184 
1185     if (VA.getLocInfo() == CCValAssign::BCvt)
1186       Val = DAG.getBitcast(VA.getValVT(), Val);
1187 
1188     InVals.push_back(Val);
1189   }
1190 
1191   return Chain;
1192 }
1193 
1194 //===----------------------------------------------------------------------===//
1195 //                C & StdCall & Fast Calling Convention implementation
1196 //===----------------------------------------------------------------------===//
1197 //  StdCall calling convention seems to be standard for many Windows' API
1198 //  routines and around. It differs from C calling convention just a little:
1199 //  callee should clean up the stack, not caller. Symbols should be also
1200 //  decorated in some fancy way :) It doesn't support any vector arguments.
1201 //  For info on fast calling convention see Fast Calling Convention (tail call)
1202 //  implementation LowerX86_32FastCCCallTo.
1203 
1204 /// Determines whether Args, either a set of outgoing arguments to a call, or a
1205 /// set of incoming args of a call, contains an sret pointer that the callee
1206 /// pops
1207 template <typename T>
1208 static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1209                              const X86Subtarget &Subtarget) {
1210   // Not C++20 (yet), so no concepts available.
1211   static_assert(std::is_same_v<T, ISD::OutputArg> ||
1212                     std::is_same_v<T, ISD::InputArg>,
1213                 "requires ISD::OutputArg or ISD::InputArg");
1214 
1215   // Only 32-bit pops the sret.  It's a 64-bit world these days, so early-out
1216   // for most compilations.
1217   if (!Subtarget.is32Bit())
1218     return false;
1219 
1220   if (Args.empty())
1221     return false;
1222 
1223   // Most calls do not have an sret argument, check the arg next.
1224   const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1225   if (!Flags.isSRet() || Flags.isInReg())
1226     return false;
1227 
1228   // The MSVCabi does not pop the sret.
1229   if (Subtarget.getTargetTriple().isOSMSVCRT())
1230     return false;
1231 
1232   // MCUs don't pop the sret
1233   if (Subtarget.isTargetMCU())
1234     return false;
1235 
1236   // Callee pops argument
1237   return true;
1238 }
1239 
1240 /// Make a copy of an aggregate at address specified by "Src" to address
1241 /// "Dst" with size and alignment information specified by the specific
1242 /// parameter attribute. The copy will be passed as a byval function parameter.
1243 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
1244                                          SDValue Chain, ISD::ArgFlagsTy Flags,
1245                                          SelectionDAG &DAG, const SDLoc &dl) {
1246   SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1247 
1248   return DAG.getMemcpy(
1249       Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1250       /*isVolatile*/ false, /*AlwaysInline=*/true,
1251       /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
1252 }
1253 
1254 /// Return true if the calling convention is one that we can guarantee TCO for.
1255 static bool canGuaranteeTCO(CallingConv::ID CC) {
1256   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1257           CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
1258           CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
1259 }
1260 
1261 /// Return true if we might ever do TCO for calls with this calling convention.
1262 static bool mayTailCallThisCC(CallingConv::ID CC) {
1263   switch (CC) {
1264   // C calling conventions:
1265   case CallingConv::C:
1266   case CallingConv::Win64:
1267   case CallingConv::X86_64_SysV:
1268   case CallingConv::PreserveNone:
1269   // Callee pop conventions:
1270   case CallingConv::X86_ThisCall:
1271   case CallingConv::X86_StdCall:
1272   case CallingConv::X86_VectorCall:
1273   case CallingConv::X86_FastCall:
1274   // Swift:
1275   case CallingConv::Swift:
1276     return true;
1277   default:
1278     return canGuaranteeTCO(CC);
1279   }
1280 }
1281 
1282 /// Return true if the function is being made into a tailcall target by
1283 /// changing its ABI.
1284 static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1285   return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1286          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
1287 }
1288 
1289 bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1290   if (!CI->isTailCall())
1291     return false;
1292 
1293   CallingConv::ID CalleeCC = CI->getCallingConv();
1294   if (!mayTailCallThisCC(CalleeCC))
1295     return false;
1296 
1297   return true;
1298 }
1299 
1300 SDValue
1301 X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1302                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1303                                     const SDLoc &dl, SelectionDAG &DAG,
1304                                     const CCValAssign &VA,
1305                                     MachineFrameInfo &MFI, unsigned i) const {
1306   // Create the nodes corresponding to a load from this parameter slot.
1307   ISD::ArgFlagsTy Flags = Ins[i].Flags;
1308   bool AlwaysUseMutable = shouldGuaranteeTCO(
1309       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1310   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1311   EVT ValVT;
1312   MVT PtrVT = getPointerTy(DAG.getDataLayout());
1313 
1314   // If value is passed by pointer we have address passed instead of the value
1315   // itself. No need to extend if the mask value and location share the same
1316   // absolute size.
1317   bool ExtendedInMem =
1318       VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1319       VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
1320 
1321   if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1322     ValVT = VA.getLocVT();
1323   else
1324     ValVT = VA.getValVT();
1325 
1326   // FIXME: For now, all byval parameter objects are marked mutable. This can be
1327   // changed with more analysis.
1328   // In case of tail call optimization mark all arguments mutable. Since they
1329   // could be overwritten by lowering of arguments in case of a tail call.
1330   if (Flags.isByVal()) {
1331     unsigned Bytes = Flags.getByValSize();
1332     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1333 
1334     // FIXME: For now, all byval parameter objects are marked as aliasing. This
1335     // can be improved with deeper analysis.
1336     int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1337                                    /*isAliased=*/true);
1338     return DAG.getFrameIndex(FI, PtrVT);
1339   }
1340 
1341   EVT ArgVT = Ins[i].ArgVT;
1342 
1343   // If this is a vector that has been split into multiple parts, don't elide
1344   // the copy. The layout on the stack may not match the packed in-memory
1345   // layout.
1346   bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1347 
1348   // This is an argument in memory. We might be able to perform copy elision.
1349   // If the argument is passed directly in memory without any extension, then we
1350   // can perform copy elision. Large vector types, for example, may be passed
1351   // indirectly by pointer.
1352   if (Flags.isCopyElisionCandidate() &&
1353       VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1354       !ScalarizedVector) {
1355     SDValue PartAddr;
1356     if (Ins[i].PartOffset == 0) {
1357       // If this is a one-part value or the first part of a multi-part value,
1358       // create a stack object for the entire argument value type and return a
1359       // load from our portion of it. This assumes that if the first part of an
1360       // argument is in memory, the rest will also be in memory.
1361       int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1362                                      /*IsImmutable=*/false);
1363       PartAddr = DAG.getFrameIndex(FI, PtrVT);
1364       return DAG.getLoad(
1365           ValVT, dl, Chain, PartAddr,
1366           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
1367     }
1368 
1369     // This is not the first piece of an argument in memory. See if there is
1370     // already a fixed stack object including this offset. If so, assume it
1371     // was created by the PartOffset == 0 branch above and create a load from
1372     // the appropriate offset into it.
1373     int64_t PartBegin = VA.getLocMemOffset();
1374     int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1375     int FI = MFI.getObjectIndexBegin();
1376     for (; MFI.isFixedObjectIndex(FI); ++FI) {
1377       int64_t ObjBegin = MFI.getObjectOffset(FI);
1378       int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1379       if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1380         break;
1381     }
1382     if (MFI.isFixedObjectIndex(FI)) {
1383       SDValue Addr =
1384           DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1385                       DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1386       return DAG.getLoad(ValVT, dl, Chain, Addr,
1387                          MachinePointerInfo::getFixedStack(
1388                              DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1389     }
1390   }
1391 
1392   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1393                                  VA.getLocMemOffset(), isImmutable);
1394 
1395   // Set SExt or ZExt flag.
1396   if (VA.getLocInfo() == CCValAssign::ZExt) {
1397     MFI.setObjectZExt(FI, true);
1398   } else if (VA.getLocInfo() == CCValAssign::SExt) {
1399     MFI.setObjectSExt(FI, true);
1400   }
1401 
1402   MaybeAlign Alignment;
1403   if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1404       ValVT != MVT::f80)
1405     Alignment = MaybeAlign(4);
1406   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1407   SDValue Val = DAG.getLoad(
1408       ValVT, dl, Chain, FIN,
1409       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
1410       Alignment);
1411   return ExtendedInMem
1412              ? (VA.getValVT().isVector()
1413                     ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1414                     : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1415              : Val;
1416 }
1417 
1418 // FIXME: Get this from tablegen.
1419 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
1420                                                 const X86Subtarget &Subtarget) {
1421   assert(Subtarget.is64Bit());
1422 
1423   if (Subtarget.isCallingConvWin64(CallConv)) {
1424     static const MCPhysReg GPR64ArgRegsWin64[] = {
1425       X86::RCX, X86::RDX, X86::R8,  X86::R9
1426     };
1427     return GPR64ArgRegsWin64;
1428   }
1429 
1430   static const MCPhysReg GPR64ArgRegs64Bit[] = {
1431     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1432   };
1433   return GPR64ArgRegs64Bit;
1434 }
1435 
1436 // FIXME: Get this from tablegen.
1437 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
1438                                                 CallingConv::ID CallConv,
1439                                                 const X86Subtarget &Subtarget) {
1440   assert(Subtarget.is64Bit());
1441   if (Subtarget.isCallingConvWin64(CallConv)) {
1442     // The XMM registers which might contain var arg parameters are shadowed
1443     // in their paired GPR.  So we only need to save the GPR to their home
1444     // slots.
1445     // TODO: __vectorcall will change this.
1446     return {};
1447   }
1448 
1449   bool isSoftFloat = Subtarget.useSoftFloat();
1450   if (isSoftFloat || !Subtarget.hasSSE1())
1451     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1452     // registers.
1453     return {};
1454 
1455   static const MCPhysReg XMMArgRegs64Bit[] = {
1456     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1457     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1458   };
1459   return XMMArgRegs64Bit;
1460 }
1461 
1462 #ifndef NDEBUG
1463 static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
1464   return llvm::is_sorted(
1465       ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1466         return A.getValNo() < B.getValNo();
1467       });
1468 }
1469 #endif
1470 
1471 namespace {
1472 /// This is a helper class for lowering variable arguments parameters.
1473 class VarArgsLoweringHelper {
1474 public:
1475   VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1476                         SelectionDAG &DAG, const X86Subtarget &Subtarget,
1477                         CallingConv::ID CallConv, CCState &CCInfo)
1478       : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1479         TheMachineFunction(DAG.getMachineFunction()),
1480         TheFunction(TheMachineFunction.getFunction()),
1481         FrameInfo(TheMachineFunction.getFrameInfo()),
1482         FrameLowering(*Subtarget.getFrameLowering()),
1483         TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1484         CCInfo(CCInfo) {}
1485 
1486   // Lower variable arguments parameters.
1487   void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1488 
1489 private:
1490   void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1491 
1492   void forwardMustTailParameters(SDValue &Chain);
1493 
1494   bool is64Bit() const { return Subtarget.is64Bit(); }
1495   bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1496 
1497   X86MachineFunctionInfo *FuncInfo;
1498   const SDLoc &DL;
1499   SelectionDAG &DAG;
1500   const X86Subtarget &Subtarget;
1501   MachineFunction &TheMachineFunction;
1502   const Function &TheFunction;
1503   MachineFrameInfo &FrameInfo;
1504   const TargetFrameLowering &FrameLowering;
1505   const TargetLowering &TargLowering;
1506   CallingConv::ID CallConv;
1507   CCState &CCInfo;
1508 };
1509 } // namespace
1510 
1511 void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1512     SDValue &Chain, unsigned StackSize) {
1513   // If the function takes variable number of arguments, make a frame index for
1514   // the start of the first vararg value... for expansion of llvm.va_start. We
1515   // can skip this if there are no va_start calls.
1516   if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1517                     CallConv != CallingConv::X86_ThisCall)) {
1518     FuncInfo->setVarArgsFrameIndex(
1519         FrameInfo.CreateFixedObject(1, StackSize, true));
1520   }
1521 
1522   // 64-bit calling conventions support varargs and register parameters, so we
1523   // have to do extra work to spill them in the prologue.
1524   if (is64Bit()) {
1525     // Find the first unallocated argument registers.
1526     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1527     ArrayRef<MCPhysReg> ArgXMMs =
1528         get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1529     unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1530     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1531 
1532     assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1533            "SSE register cannot be used when SSE is disabled!");
1534 
1535     if (isWin64()) {
1536       // Get to the caller-allocated home save location.  Add 8 to account
1537       // for the return address.
1538       int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1539       FuncInfo->setRegSaveFrameIndex(
1540           FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1541       // Fixup to set vararg frame on shadow area (4 x i64).
1542       if (NumIntRegs < 4)
1543         FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1544     } else {
1545       // For X86-64, if there are vararg parameters that are passed via
1546       // registers, then we must store them to their spots on the stack so
1547       // they may be loaded by dereferencing the result of va_next.
1548       FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1549       FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1550       FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1551           ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1552     }
1553 
1554     SmallVector<SDValue, 6>
1555         LiveGPRs; // list of SDValue for GPR registers keeping live input value
1556     SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1557                                          // keeping live input value
1558     SDValue ALVal; // if applicable keeps SDValue for %al register
1559 
1560     // Gather all the live in physical registers.
1561     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1562       Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1563       LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1564     }
1565     const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1566     if (!AvailableXmms.empty()) {
1567       Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1568       ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1569       for (MCPhysReg Reg : AvailableXmms) {
1570         // FastRegisterAllocator spills virtual registers at basic
1571         // block boundary. That leads to usages of xmm registers
1572         // outside of check for %al. Pass physical registers to
1573         // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1574         TheMachineFunction.getRegInfo().addLiveIn(Reg);
1575         LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1576       }
1577     }
1578 
1579     // Store the integer parameter registers.
1580     SmallVector<SDValue, 8> MemOps;
1581     SDValue RSFIN =
1582         DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1583                           TargLowering.getPointerTy(DAG.getDataLayout()));
1584     unsigned Offset = FuncInfo->getVarArgsGPOffset();
1585     for (SDValue Val : LiveGPRs) {
1586       SDValue FIN = DAG.getNode(ISD::ADD, DL,
1587                                 TargLowering.getPointerTy(DAG.getDataLayout()),
1588                                 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1589       SDValue Store =
1590           DAG.getStore(Val.getValue(1), DL, Val, FIN,
1591                        MachinePointerInfo::getFixedStack(
1592                            DAG.getMachineFunction(),
1593                            FuncInfo->getRegSaveFrameIndex(), Offset));
1594       MemOps.push_back(Store);
1595       Offset += 8;
1596     }
1597 
1598     // Now store the XMM (fp + vector) parameter registers.
1599     if (!LiveXMMRegs.empty()) {
1600       SmallVector<SDValue, 12> SaveXMMOps;
1601       SaveXMMOps.push_back(Chain);
1602       SaveXMMOps.push_back(ALVal);
1603       SaveXMMOps.push_back(RSFIN);
1604       SaveXMMOps.push_back(
1605           DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1606       llvm::append_range(SaveXMMOps, LiveXMMRegs);
1607       MachineMemOperand *StoreMMO =
1608           DAG.getMachineFunction().getMachineMemOperand(
1609               MachinePointerInfo::getFixedStack(
1610                   DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1611                   Offset),
1612               MachineMemOperand::MOStore, 128, Align(16));
1613       MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
1614                                                DL, DAG.getVTList(MVT::Other),
1615                                                SaveXMMOps, MVT::i8, StoreMMO));
1616     }
1617 
1618     if (!MemOps.empty())
1619       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1620   }
1621 }
1622 
1623 void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1624   // Find the largest legal vector type.
1625   MVT VecVT = MVT::Other;
1626   // FIXME: Only some x86_32 calling conventions support AVX512.
1627   if (Subtarget.useAVX512Regs() &&
1628       (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1629                      CallConv == CallingConv::Intel_OCL_BI)))
1630     VecVT = MVT::v16f32;
1631   else if (Subtarget.hasAVX())
1632     VecVT = MVT::v8f32;
1633   else if (Subtarget.hasSSE2())
1634     VecVT = MVT::v4f32;
1635 
1636   // We forward some GPRs and some vector types.
1637   SmallVector<MVT, 2> RegParmTypes;
1638   MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1639   RegParmTypes.push_back(IntVT);
1640   if (VecVT != MVT::Other)
1641     RegParmTypes.push_back(VecVT);
1642 
1643   // Compute the set of forwarded registers. The rest are scratch.
1644   SmallVectorImpl<ForwardedRegister> &Forwards =
1645       FuncInfo->getForwardedMustTailRegParms();
1646   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1647 
1648   // Forward AL for SysV x86_64 targets, since it is used for varargs.
1649   if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1650     Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1651     Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1652   }
1653 
1654   // Copy all forwards from physical to virtual registers.
1655   for (ForwardedRegister &FR : Forwards) {
1656     // FIXME: Can we use a less constrained schedule?
1657     SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1658     FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1659         TargLowering.getRegClassFor(FR.VT));
1660     Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1661   }
1662 }
1663 
1664 void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1665                                                    unsigned StackSize) {
1666   // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1667   // If necessary, it would be set into the correct value later.
1668   FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1669   FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1670 
1671   if (FrameInfo.hasVAStart())
1672     createVarArgAreaAndStoreRegisters(Chain, StackSize);
1673 
1674   if (FrameInfo.hasMustTailInVarArgFunc())
1675     forwardMustTailParameters(Chain);
1676 }
1677 
1678 SDValue X86TargetLowering::LowerFormalArguments(
1679     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1680     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1681     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1682   MachineFunction &MF = DAG.getMachineFunction();
1683   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1684 
1685   const Function &F = MF.getFunction();
1686   if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1687       F.getName() == "main")
1688     FuncInfo->setForceFramePointer(true);
1689 
1690   MachineFrameInfo &MFI = MF.getFrameInfo();
1691   bool Is64Bit = Subtarget.is64Bit();
1692   bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1693 
1694   assert(
1695       !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1696       "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1697 
1698   // Assign locations to all of the incoming arguments.
1699   SmallVector<CCValAssign, 16> ArgLocs;
1700   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1701 
1702   // Allocate shadow area for Win64.
1703   if (IsWin64)
1704     CCInfo.AllocateStack(32, Align(8));
1705 
1706   CCInfo.AnalyzeArguments(Ins, CC_X86);
1707 
1708   // In vectorcall calling convention a second pass is required for the HVA
1709   // types.
1710   if (CallingConv::X86_VectorCall == CallConv) {
1711     CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1712   }
1713 
1714   // The next loop assumes that the locations are in the same order of the
1715   // input arguments.
1716   assert(isSortedByValueNo(ArgLocs) &&
1717          "Argument Location list must be sorted before lowering");
1718 
1719   SDValue ArgValue;
1720   for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1721        ++I, ++InsIndex) {
1722     assert(InsIndex < Ins.size() && "Invalid Ins index");
1723     CCValAssign &VA = ArgLocs[I];
1724 
1725     if (VA.isRegLoc()) {
1726       EVT RegVT = VA.getLocVT();
1727       if (VA.needsCustom()) {
1728         assert(
1729             VA.getValVT() == MVT::v64i1 &&
1730             "Currently the only custom case is when we split v64i1 to 2 regs");
1731 
1732         // v64i1 values, in regcall calling convention, that are
1733         // compiled to 32 bit arch, are split up into two registers.
1734         ArgValue =
1735             getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1736       } else {
1737         const TargetRegisterClass *RC;
1738         if (RegVT == MVT::i8)
1739           RC = &X86::GR8RegClass;
1740         else if (RegVT == MVT::i16)
1741           RC = &X86::GR16RegClass;
1742         else if (RegVT == MVT::i32)
1743           RC = &X86::GR32RegClass;
1744         else if (Is64Bit && RegVT == MVT::i64)
1745           RC = &X86::GR64RegClass;
1746         else if (RegVT == MVT::f16)
1747           RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1748         else if (RegVT == MVT::f32)
1749           RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1750         else if (RegVT == MVT::f64)
1751           RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1752         else if (RegVT == MVT::f80)
1753           RC = &X86::RFP80RegClass;
1754         else if (RegVT == MVT::f128)
1755           RC = &X86::VR128RegClass;
1756         else if (RegVT.is512BitVector())
1757           RC = &X86::VR512RegClass;
1758         else if (RegVT.is256BitVector())
1759           RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1760         else if (RegVT.is128BitVector())
1761           RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1762         else if (RegVT == MVT::x86mmx)
1763           RC = &X86::VR64RegClass;
1764         else if (RegVT == MVT::v1i1)
1765           RC = &X86::VK1RegClass;
1766         else if (RegVT == MVT::v8i1)
1767           RC = &X86::VK8RegClass;
1768         else if (RegVT == MVT::v16i1)
1769           RC = &X86::VK16RegClass;
1770         else if (RegVT == MVT::v32i1)
1771           RC = &X86::VK32RegClass;
1772         else if (RegVT == MVT::v64i1)
1773           RC = &X86::VK64RegClass;
1774         else
1775           llvm_unreachable("Unknown argument type!");
1776 
1777         Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1778         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1779       }
1780 
1781       // If this is an 8 or 16-bit value, it is really passed promoted to 32
1782       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
1783       // right size.
1784       if (VA.getLocInfo() == CCValAssign::SExt)
1785         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1786                                DAG.getValueType(VA.getValVT()));
1787       else if (VA.getLocInfo() == CCValAssign::ZExt)
1788         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1789                                DAG.getValueType(VA.getValVT()));
1790       else if (VA.getLocInfo() == CCValAssign::BCvt)
1791         ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1792 
1793       if (VA.isExtInLoc()) {
1794         // Handle MMX values passed in XMM regs.
1795         if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1796           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1797         else if (VA.getValVT().isVector() &&
1798                  VA.getValVT().getScalarType() == MVT::i1 &&
1799                  ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1800                   (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1801           // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1802           ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1803         } else
1804           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1805       }
1806     } else {
1807       assert(VA.isMemLoc());
1808       ArgValue =
1809           LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1810     }
1811 
1812     // If value is passed via pointer - do a load.
1813     if (VA.getLocInfo() == CCValAssign::Indirect &&
1814         !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1815       ArgValue =
1816           DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1817     }
1818 
1819     InVals.push_back(ArgValue);
1820   }
1821 
1822   for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1823     if (Ins[I].Flags.isSwiftAsync()) {
1824       auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1825       if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1826         X86FI->setHasSwiftAsyncContext(true);
1827       else {
1828         int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1829         int FI =
1830             MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1831         X86FI->setSwiftAsyncContextFrameIdx(FI);
1832         SDValue St = DAG.getStore(
1833             DAG.getEntryNode(), dl, InVals[I],
1834             DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1835             MachinePointerInfo::getFixedStack(MF, FI));
1836         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1837       }
1838     }
1839 
1840     // Swift calling convention does not require we copy the sret argument
1841     // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1842     if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1843       continue;
1844 
1845     // All x86 ABIs require that for returning structs by value we copy the
1846     // sret argument into %rax/%eax (depending on ABI) for the return. Save
1847     // the argument into a virtual register so that we can access it from the
1848     // return points.
1849     if (Ins[I].Flags.isSRet()) {
1850       assert(!FuncInfo->getSRetReturnReg() &&
1851              "SRet return has already been set");
1852       MVT PtrTy = getPointerTy(DAG.getDataLayout());
1853       Register Reg =
1854           MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
1855       FuncInfo->setSRetReturnReg(Reg);
1856       SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1857       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1858       break;
1859     }
1860   }
1861 
1862   unsigned StackSize = CCInfo.getStackSize();
1863   // Align stack specially for tail calls.
1864   if (shouldGuaranteeTCO(CallConv,
1865                          MF.getTarget().Options.GuaranteedTailCallOpt))
1866     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1867 
1868   if (IsVarArg)
1869     VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1870         .lowerVarArgsParameters(Chain, StackSize);
1871 
1872   // Some CCs need callee pop.
1873   if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1874                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
1875     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1876   } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1877     // X86 interrupts must pop the error code (and the alignment padding) if
1878     // present.
1879     FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1880   } else {
1881     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1882     // If this is an sret function, the return should pop the hidden pointer.
1883     if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1884       FuncInfo->setBytesToPopOnReturn(4);
1885   }
1886 
1887   if (!Is64Bit) {
1888     // RegSaveFrameIndex is X86-64 only.
1889     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1890   }
1891 
1892   FuncInfo->setArgumentStackSize(StackSize);
1893 
1894   if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1895     EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1896     if (Personality == EHPersonality::CoreCLR) {
1897       assert(Is64Bit);
1898       // TODO: Add a mechanism to frame lowering that will allow us to indicate
1899       // that we'd prefer this slot be allocated towards the bottom of the frame
1900       // (i.e. near the stack pointer after allocating the frame).  Every
1901       // funclet needs a copy of this slot in its (mostly empty) frame, and the
1902       // offset from the bottom of this and each funclet's frame must be the
1903       // same, so the size of funclets' (mostly empty) frames is dictated by
1904       // how far this slot is from the bottom (since they allocate just enough
1905       // space to accommodate holding this slot at the correct offset).
1906       int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1907       EHInfo->PSPSymFrameIdx = PSPSymFI;
1908     }
1909   }
1910 
1911   if (shouldDisableArgRegFromCSR(CallConv) ||
1912       F.hasFnAttribute("no_caller_saved_registers")) {
1913     MachineRegisterInfo &MRI = MF.getRegInfo();
1914     for (std::pair<MCRegister, Register> Pair : MRI.liveins())
1915       MRI.disableCalleeSavedRegister(Pair.first);
1916   }
1917 
1918   if (CallingConv::PreserveNone == CallConv)
1919     for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1920       if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||
1921           Ins[I].Flags.isSwiftError()) {
1922         errorUnsupported(DAG, dl,
1923                          "Swift attributes can't be used with preserve_none");
1924         break;
1925       }
1926     }
1927 
1928   return Chain;
1929 }
1930 
1931 SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1932                                             SDValue Arg, const SDLoc &dl,
1933                                             SelectionDAG &DAG,
1934                                             const CCValAssign &VA,
1935                                             ISD::ArgFlagsTy Flags,
1936                                             bool isByVal) const {
1937   unsigned LocMemOffset = VA.getLocMemOffset();
1938   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1939   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1940                        StackPtr, PtrOff);
1941   if (isByVal)
1942     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1943 
1944   MaybeAlign Alignment;
1945   if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1946       Arg.getSimpleValueType() != MVT::f80)
1947     Alignment = MaybeAlign(4);
1948   return DAG.getStore(
1949       Chain, dl, Arg, PtrOff,
1950       MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
1951       Alignment);
1952 }
1953 
1954 /// Emit a load of return address if tail call
1955 /// optimization is performed and it is required.
1956 SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1957     SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1958     bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1959   // Adjust the Return address stack slot.
1960   EVT VT = getPointerTy(DAG.getDataLayout());
1961   OutRetAddr = getReturnAddressFrameIndex(DAG);
1962 
1963   // Load the "old" Return address.
1964   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1965   return SDValue(OutRetAddr.getNode(), 1);
1966 }
1967 
1968 /// Emit a store of the return address if tail call
1969 /// optimization is performed and it is required (FPDiff!=0).
1970 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
1971                                         SDValue Chain, SDValue RetAddrFrIdx,
1972                                         EVT PtrVT, unsigned SlotSize,
1973                                         int FPDiff, const SDLoc &dl) {
1974   // Store the return address to the appropriate stack slot.
1975   if (!FPDiff) return Chain;
1976   // Calculate the new stack slot for the return address.
1977   int NewReturnAddrFI =
1978     MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1979                                          false);
1980   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1981   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1982                        MachinePointerInfo::getFixedStack(
1983                            DAG.getMachineFunction(), NewReturnAddrFI));
1984   return Chain;
1985 }
1986 
1987 /// Returns a vector_shuffle mask for an movs{s|d}, movd
1988 /// operation of specified width.
1989 SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1990                                    SDValue V1, SDValue V2) const {
1991   unsigned NumElems = VT.getVectorNumElements();
1992   SmallVector<int, 8> Mask;
1993   Mask.push_back(NumElems);
1994   for (unsigned i = 1; i != NumElems; ++i)
1995     Mask.push_back(i);
1996   return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
1997 }
1998 
1999 SDValue
2000 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2001                              SmallVectorImpl<SDValue> &InVals) const {
2002   SelectionDAG &DAG                     = CLI.DAG;
2003   SDLoc &dl                             = CLI.DL;
2004   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2005   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
2006   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
2007   SDValue Chain                         = CLI.Chain;
2008   SDValue Callee                        = CLI.Callee;
2009   CallingConv::ID CallConv              = CLI.CallConv;
2010   bool &isTailCall                      = CLI.IsTailCall;
2011   bool isVarArg                         = CLI.IsVarArg;
2012   const auto *CB                        = CLI.CB;
2013 
2014   MachineFunction &MF = DAG.getMachineFunction();
2015   bool Is64Bit        = Subtarget.is64Bit();
2016   bool IsWin64        = Subtarget.isCallingConvWin64(CallConv);
2017   bool IsSibcall      = false;
2018   bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2019       CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2020   bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2021   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2022   bool HasNCSR = (CB && isa<CallInst>(CB) &&
2023                   CB->hasFnAttr("no_caller_saved_registers"));
2024   bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2025   bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2026   bool IsCFICall = IsIndirectCall && CLI.CFIType;
2027   const Module *M = MF.getFunction().getParent();
2028   Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
2029 
2030   MachineFunction::CallSiteInfo CSInfo;
2031   if (CallConv == CallingConv::X86_INTR)
2032     report_fatal_error("X86 interrupts may not be called directly");
2033 
2034   // Analyze operands of the call, assigning locations to each operand.
2035   SmallVector<CCValAssign, 16> ArgLocs;
2036   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2037 
2038   // Allocate shadow area for Win64.
2039   if (IsWin64)
2040     CCInfo.AllocateStack(32, Align(8));
2041 
2042   CCInfo.AnalyzeArguments(Outs, CC_X86);
2043 
2044   // In vectorcall calling convention a second pass is required for the HVA
2045   // types.
2046   if (CallingConv::X86_VectorCall == CallConv) {
2047     CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2048   }
2049 
2050   bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2051   if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2052     // If we are using a GOT, disable tail calls to external symbols with
2053     // default visibility. Tail calling such a symbol requires using a GOT
2054     // relocation, which forces early binding of the symbol. This breaks code
2055     // that require lazy function symbol resolution. Using musttail or
2056     // GuaranteedTailCallOpt will override this.
2057     GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2058     if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2059                G->getGlobal()->hasDefaultVisibility()))
2060       isTailCall = false;
2061   }
2062 
2063   if (isTailCall && !IsMustTail) {
2064     // Check if it's really possible to do a tail call.
2065     isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2066                                                    IsCalleePopSRet);
2067 
2068     // Sibcalls are automatically detected tailcalls which do not require
2069     // ABI changes.
2070     if (!IsGuaranteeTCO && isTailCall)
2071       IsSibcall = true;
2072 
2073     if (isTailCall)
2074       ++NumTailCalls;
2075   }
2076 
2077   if (IsMustTail && !isTailCall)
2078     report_fatal_error("failed to perform tail call elimination on a call "
2079                        "site marked musttail");
2080 
2081   assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2082          "Var args not supported with calling convention fastcc, ghc or hipe");
2083 
2084   // Get a count of how many bytes are to be pushed on the stack.
2085   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2086   if (IsSibcall)
2087     // This is a sibcall. The memory operands are available in caller's
2088     // own caller's stack.
2089     NumBytes = 0;
2090   else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
2091     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2092 
2093   int FPDiff = 0;
2094   if (isTailCall &&
2095       shouldGuaranteeTCO(CallConv,
2096                          MF.getTarget().Options.GuaranteedTailCallOpt)) {
2097     // Lower arguments at fp - stackoffset + fpdiff.
2098     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2099 
2100     FPDiff = NumBytesCallerPushed - NumBytes;
2101 
2102     // Set the delta of movement of the returnaddr stackslot.
2103     // But only set if delta is greater than previous delta.
2104     if (FPDiff < X86Info->getTCReturnAddrDelta())
2105       X86Info->setTCReturnAddrDelta(FPDiff);
2106   }
2107 
2108   unsigned NumBytesToPush = NumBytes;
2109   unsigned NumBytesToPop = NumBytes;
2110 
2111   // If we have an inalloca argument, all stack space has already been allocated
2112   // for us and be right at the top of the stack.  We don't support multiple
2113   // arguments passed in memory when using inalloca.
2114   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2115     NumBytesToPush = 0;
2116     if (!ArgLocs.back().isMemLoc())
2117       report_fatal_error("cannot use inalloca attribute on a register "
2118                          "parameter");
2119     if (ArgLocs.back().getLocMemOffset() != 0)
2120       report_fatal_error("any parameter with the inalloca attribute must be "
2121                          "the only memory argument");
2122   } else if (CLI.IsPreallocated) {
2123     assert(ArgLocs.back().isMemLoc() &&
2124            "cannot use preallocated attribute on a register "
2125            "parameter");
2126     SmallVector<size_t, 4> PreallocatedOffsets;
2127     for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2128       if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2129         PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2130       }
2131     }
2132     auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2133     size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2134     MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2135     MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2136     NumBytesToPush = 0;
2137   }
2138 
2139   if (!IsSibcall && !IsMustTail)
2140     Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2141                                  NumBytes - NumBytesToPush, dl);
2142 
2143   SDValue RetAddrFrIdx;
2144   // Load return address for tail calls.
2145   if (isTailCall && FPDiff)
2146     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2147                                     Is64Bit, FPDiff, dl);
2148 
2149   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
2150   SmallVector<SDValue, 8> MemOpChains;
2151   SDValue StackPtr;
2152 
2153   // The next loop assumes that the locations are in the same order of the
2154   // input arguments.
2155   assert(isSortedByValueNo(ArgLocs) &&
2156          "Argument Location list must be sorted before lowering");
2157 
2158   // Walk the register/memloc assignments, inserting copies/loads.  In the case
2159   // of tail call optimization arguments are handle later.
2160   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2161   for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2162        ++I, ++OutIndex) {
2163     assert(OutIndex < Outs.size() && "Invalid Out index");
2164     // Skip inalloca/preallocated arguments, they have already been written.
2165     ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2166     if (Flags.isInAlloca() || Flags.isPreallocated())
2167       continue;
2168 
2169     CCValAssign &VA = ArgLocs[I];
2170     EVT RegVT = VA.getLocVT();
2171     SDValue Arg = OutVals[OutIndex];
2172     bool isByVal = Flags.isByVal();
2173 
2174     // Promote the value if needed.
2175     switch (VA.getLocInfo()) {
2176     default: llvm_unreachable("Unknown loc info!");
2177     case CCValAssign::Full: break;
2178     case CCValAssign::SExt:
2179       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2180       break;
2181     case CCValAssign::ZExt:
2182       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2183       break;
2184     case CCValAssign::AExt:
2185       if (Arg.getValueType().isVector() &&
2186           Arg.getValueType().getVectorElementType() == MVT::i1)
2187         Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2188       else if (RegVT.is128BitVector()) {
2189         // Special case: passing MMX values in XMM registers.
2190         Arg = DAG.getBitcast(MVT::i64, Arg);
2191         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2192         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2193       } else
2194         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2195       break;
2196     case CCValAssign::BCvt:
2197       Arg = DAG.getBitcast(RegVT, Arg);
2198       break;
2199     case CCValAssign::Indirect: {
2200       if (isByVal) {
2201         // Memcpy the argument to a temporary stack slot to prevent
2202         // the caller from seeing any modifications the callee may make
2203         // as guaranteed by the `byval` attribute.
2204         int FrameIdx = MF.getFrameInfo().CreateStackObject(
2205             Flags.getByValSize(),
2206             std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2207         SDValue StackSlot =
2208             DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2209         Chain =
2210             CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2211         // From now on treat this as a regular pointer
2212         Arg = StackSlot;
2213         isByVal = false;
2214       } else {
2215         // Store the argument.
2216         SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2217         int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2218         Chain = DAG.getStore(
2219             Chain, dl, Arg, SpillSlot,
2220             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2221         Arg = SpillSlot;
2222       }
2223       break;
2224     }
2225     }
2226 
2227     if (VA.needsCustom()) {
2228       assert(VA.getValVT() == MVT::v64i1 &&
2229              "Currently the only custom case is when we split v64i1 to 2 regs");
2230       // Split v64i1 value into two registers
2231       Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2232     } else if (VA.isRegLoc()) {
2233       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2234       const TargetOptions &Options = DAG.getTarget().Options;
2235       if (Options.EmitCallSiteInfo)
2236         CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2237       if (isVarArg && IsWin64) {
2238         // Win64 ABI requires argument XMM reg to be copied to the corresponding
2239         // shadow reg if callee is a varargs function.
2240         Register ShadowReg;
2241         switch (VA.getLocReg()) {
2242         case X86::XMM0: ShadowReg = X86::RCX; break;
2243         case X86::XMM1: ShadowReg = X86::RDX; break;
2244         case X86::XMM2: ShadowReg = X86::R8; break;
2245         case X86::XMM3: ShadowReg = X86::R9; break;
2246         }
2247         if (ShadowReg)
2248           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2249       }
2250     } else if (!IsSibcall && (!isTailCall || isByVal)) {
2251       assert(VA.isMemLoc());
2252       if (!StackPtr.getNode())
2253         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2254                                       getPointerTy(DAG.getDataLayout()));
2255       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2256                                              dl, DAG, VA, Flags, isByVal));
2257     }
2258   }
2259 
2260   if (!MemOpChains.empty())
2261     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2262 
2263   if (Subtarget.isPICStyleGOT()) {
2264     // ELF / PIC requires GOT in the EBX register before function calls via PLT
2265     // GOT pointer (except regcall).
2266     if (!isTailCall) {
2267       // Indirect call with RegCall calling convertion may use up all the
2268       // general registers, so it is not suitable to bind EBX reister for
2269       // GOT address, just let register allocator handle it.
2270       if (CallConv != CallingConv::X86_RegCall)
2271         RegsToPass.push_back(std::make_pair(
2272           Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2273                                           getPointerTy(DAG.getDataLayout()))));
2274     } else {
2275       // If we are tail calling and generating PIC/GOT style code load the
2276       // address of the callee into ECX. The value in ecx is used as target of
2277       // the tail jump. This is done to circumvent the ebx/callee-saved problem
2278       // for tail calls on PIC/GOT architectures. Normally we would just put the
2279       // address of GOT into ebx and then call target@PLT. But for tail calls
2280       // ebx would be restored (since ebx is callee saved) before jumping to the
2281       // target@PLT.
2282 
2283       // Note: The actual moving to ECX is done further down.
2284       GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2285       if (G && !G->getGlobal()->hasLocalLinkage() &&
2286           G->getGlobal()->hasDefaultVisibility())
2287         Callee = LowerGlobalAddress(Callee, DAG);
2288       else if (isa<ExternalSymbolSDNode>(Callee))
2289         Callee = LowerExternalSymbol(Callee, DAG);
2290     }
2291   }
2292 
2293   if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2294       (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2295     // From AMD64 ABI document:
2296     // For calls that may call functions that use varargs or stdargs
2297     // (prototype-less calls or calls to functions containing ellipsis (...) in
2298     // the declaration) %al is used as hidden argument to specify the number
2299     // of SSE registers used. The contents of %al do not need to match exactly
2300     // the number of registers, but must be an ubound on the number of SSE
2301     // registers used and is in the range 0 - 8 inclusive.
2302 
2303     // Count the number of XMM registers allocated.
2304     static const MCPhysReg XMMArgRegs[] = {
2305       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2306       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2307     };
2308     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2309     assert((Subtarget.hasSSE1() || !NumXMMRegs)
2310            && "SSE registers cannot be used when SSE is disabled");
2311     RegsToPass.push_back(std::make_pair(Register(X86::AL),
2312                                         DAG.getConstant(NumXMMRegs, dl,
2313                                                         MVT::i8)));
2314   }
2315 
2316   if (isVarArg && IsMustTail) {
2317     const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2318     for (const auto &F : Forwards) {
2319       SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2320       RegsToPass.push_back(std::make_pair(F.PReg, Val));
2321     }
2322   }
2323 
2324   // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
2325   // don't need this because the eligibility check rejects calls that require
2326   // shuffling arguments passed in memory.
2327   if (!IsSibcall && isTailCall) {
2328     // Force all the incoming stack arguments to be loaded from the stack
2329     // before any new outgoing arguments are stored to the stack, because the
2330     // outgoing stack slots may alias the incoming argument stack slots, and
2331     // the alias isn't otherwise explicit. This is slightly more conservative
2332     // than necessary, because it means that each store effectively depends
2333     // on every argument instead of just those arguments it would clobber.
2334     SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2335 
2336     SmallVector<SDValue, 8> MemOpChains2;
2337     SDValue FIN;
2338     int FI = 0;
2339     for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2340          ++I, ++OutsIndex) {
2341       CCValAssign &VA = ArgLocs[I];
2342 
2343       if (VA.isRegLoc()) {
2344         if (VA.needsCustom()) {
2345           assert((CallConv == CallingConv::X86_RegCall) &&
2346                  "Expecting custom case only in regcall calling convention");
2347           // This means that we are in special case where one argument was
2348           // passed through two register locations - Skip the next location
2349           ++I;
2350         }
2351 
2352         continue;
2353       }
2354 
2355       assert(VA.isMemLoc());
2356       SDValue Arg = OutVals[OutsIndex];
2357       ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2358       // Skip inalloca/preallocated arguments.  They don't require any work.
2359       if (Flags.isInAlloca() || Flags.isPreallocated())
2360         continue;
2361       // Create frame index.
2362       int32_t Offset = VA.getLocMemOffset()+FPDiff;
2363       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2364       FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2365       FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2366 
2367       if (Flags.isByVal()) {
2368         // Copy relative to framepointer.
2369         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
2370         if (!StackPtr.getNode())
2371           StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2372                                         getPointerTy(DAG.getDataLayout()));
2373         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2374                              StackPtr, Source);
2375 
2376         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
2377                                                          ArgChain,
2378                                                          Flags, DAG, dl));
2379       } else {
2380         // Store relative to framepointer.
2381         MemOpChains2.push_back(DAG.getStore(
2382             ArgChain, dl, Arg, FIN,
2383             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
2384       }
2385     }
2386 
2387     if (!MemOpChains2.empty())
2388       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2389 
2390     // Store the return address to the appropriate stack slot.
2391     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2392                                      getPointerTy(DAG.getDataLayout()),
2393                                      RegInfo->getSlotSize(), FPDiff, dl);
2394   }
2395 
2396   // Build a sequence of copy-to-reg nodes chained together with token chain
2397   // and glue operands which copy the outgoing args into registers.
2398   SDValue InGlue;
2399   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2400     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2401                              RegsToPass[i].second, InGlue);
2402     InGlue = Chain.getValue(1);
2403   }
2404 
2405   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2406     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2407     // In the 64-bit large code model, we have to make all calls
2408     // through a register, since the call instruction's 32-bit
2409     // pc-relative offset may not be large enough to hold the whole
2410     // address.
2411   } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2412              Callee->getOpcode() == ISD::ExternalSymbol) {
2413     // Lower direct calls to global addresses and external symbols. Setting
2414     // ForCall to true here has the effect of removing WrapperRIP when possible
2415     // to allow direct calls to be selected without first materializing the
2416     // address into a register.
2417     Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2418   } else if (Subtarget.isTarget64BitILP32() &&
2419              Callee.getValueType() == MVT::i32) {
2420     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2421     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2422   }
2423 
2424   SmallVector<SDValue, 8> Ops;
2425 
2426   if (!IsSibcall && isTailCall && !IsMustTail) {
2427     Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2428     InGlue = Chain.getValue(1);
2429   }
2430 
2431   Ops.push_back(Chain);
2432   Ops.push_back(Callee);
2433 
2434   if (isTailCall)
2435     Ops.push_back(DAG.getSignedTargetConstant(FPDiff, dl, MVT::i32));
2436 
2437   // Add argument registers to the end of the list so that they are known live
2438   // into the call.
2439   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2440     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2441                                   RegsToPass[i].second.getValueType()));
2442 
2443   // Add a register mask operand representing the call-preserved registers.
2444   const uint32_t *Mask = [&]() {
2445     auto AdaptedCC = CallConv;
2446     // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2447     // use X86_INTR calling convention because it has the same CSR mask
2448     // (same preserved registers).
2449     if (HasNCSR)
2450       AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
2451     // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2452     // to use the CSR_NoRegs_RegMask.
2453     if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2454       AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2455     return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2456   }();
2457   assert(Mask && "Missing call preserved mask for calling convention");
2458 
2459   if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFramePtr())) {
2460     X86Info->setFPClobberedByCall(true);
2461     if (CLI.CB && isa<InvokeInst>(CLI.CB))
2462       X86Info->setFPClobberedByInvoke(true);
2463   }
2464   if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister())) {
2465     X86Info->setBPClobberedByCall(true);
2466     if (CLI.CB && isa<InvokeInst>(CLI.CB))
2467       X86Info->setBPClobberedByInvoke(true);
2468   }
2469 
2470   // If this is an invoke in a 32-bit function using a funclet-based
2471   // personality, assume the function clobbers all registers. If an exception
2472   // is thrown, the runtime will not restore CSRs.
2473   // FIXME: Model this more precisely so that we can register allocate across
2474   // the normal edge and spill and fill across the exceptional edge.
2475   if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2476     const Function &CallerFn = MF.getFunction();
2477     EHPersonality Pers =
2478         CallerFn.hasPersonalityFn()
2479             ? classifyEHPersonality(CallerFn.getPersonalityFn())
2480             : EHPersonality::Unknown;
2481     if (isFuncletEHPersonality(Pers))
2482       Mask = RegInfo->getNoPreservedMask();
2483   }
2484 
2485   // Define a new register mask from the existing mask.
2486   uint32_t *RegMask = nullptr;
2487 
2488   // In some calling conventions we need to remove the used physical registers
2489   // from the reg mask. Create a new RegMask for such calling conventions.
2490   // RegMask for calling conventions that disable only return registers (e.g.
2491   // preserve_most) will be modified later in LowerCallResult.
2492   bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2493   if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2494     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2495 
2496     // Allocate a new Reg Mask and copy Mask.
2497     RegMask = MF.allocateRegMask();
2498     unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2499     memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2500 
2501     // Make sure all sub registers of the argument registers are reset
2502     // in the RegMask.
2503     if (ShouldDisableArgRegs) {
2504       for (auto const &RegPair : RegsToPass)
2505         for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2506           RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2507     }
2508 
2509     // Create the RegMask Operand according to our updated mask.
2510     Ops.push_back(DAG.getRegisterMask(RegMask));
2511   } else {
2512     // Create the RegMask Operand according to the static mask.
2513     Ops.push_back(DAG.getRegisterMask(Mask));
2514   }
2515 
2516   if (InGlue.getNode())
2517     Ops.push_back(InGlue);
2518 
2519   if (isTailCall) {
2520     // We used to do:
2521     //// If this is the first return lowered for this function, add the regs
2522     //// to the liveout set for the function.
2523     // This isn't right, although it's probably harmless on x86; liveouts
2524     // should be computed from returns not tail calls.  Consider a void
2525     // function making a tail call to a function returning int.
2526     MF.getFrameInfo().setHasTailCall();
2527     SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, Ops);
2528 
2529     if (IsCFICall)
2530       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2531 
2532     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2533     DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2534     return Ret;
2535   }
2536 
2537   // Returns a chain & a glue for retval copy to use.
2538   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2539   if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2540     Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2541   } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2542     // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2543     // expanded to the call, directly followed by a special marker sequence and
2544     // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2545     assert(!isTailCall &&
2546            "tail calls cannot be marked with clang.arc.attachedcall");
2547     assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2548 
2549     // Add a target global address for the retainRV/claimRV runtime function
2550     // just before the call target.
2551     Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
2552     auto PtrVT = getPointerTy(DAG.getDataLayout());
2553     auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2554     Ops.insert(Ops.begin() + 1, GA);
2555     Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2556   } else {
2557     Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2558   }
2559 
2560   if (IsCFICall)
2561     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2562 
2563   InGlue = Chain.getValue(1);
2564   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2565   DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2566 
2567   // Save heapallocsite metadata.
2568   if (CLI.CB)
2569     if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2570       DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2571 
2572   // Create the CALLSEQ_END node.
2573   unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2574   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2575                        DAG.getTarget().Options.GuaranteedTailCallOpt))
2576     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
2577   else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2578     // If this call passes a struct-return pointer, the callee
2579     // pops that struct pointer.
2580     NumBytesForCalleeToPop = 4;
2581 
2582   // Returns a glue for retval copy to use.
2583   if (!IsSibcall) {
2584     Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2585                                InGlue, dl);
2586     InGlue = Chain.getValue(1);
2587   }
2588 
2589   if (CallingConv::PreserveNone == CallConv)
2590     for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
2591       if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||
2592           Outs[I].Flags.isSwiftError()) {
2593         errorUnsupported(DAG, dl,
2594                          "Swift attributes can't be used with preserve_none");
2595         break;
2596       }
2597     }
2598 
2599   // Handle result values, copying them out of physregs into vregs that we
2600   // return.
2601   return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2602                          InVals, RegMask);
2603 }
2604 
2605 //===----------------------------------------------------------------------===//
2606 //                Fast Calling Convention (tail call) implementation
2607 //===----------------------------------------------------------------------===//
2608 
2609 //  Like std call, callee cleans arguments, convention except that ECX is
2610 //  reserved for storing the tail called function address. Only 2 registers are
2611 //  free for argument passing (inreg). Tail call optimization is performed
2612 //  provided:
2613 //                * tailcallopt is enabled
2614 //                * caller/callee are fastcc
2615 //  On X86_64 architecture with GOT-style position independent code only local
2616 //  (within module) calls are supported at the moment.
2617 //  To keep the stack aligned according to platform abi the function
2618 //  GetAlignedArgumentStackSize ensures that argument delta is always multiples
2619 //  of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2620 //  If a tail called function callee has more arguments than the caller the
2621 //  caller needs to make sure that there is room to move the RETADDR to. This is
2622 //  achieved by reserving an area the size of the argument delta right after the
2623 //  original RETADDR, but before the saved framepointer or the spilled registers
2624 //  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2625 //  stack layout:
2626 //    arg1
2627 //    arg2
2628 //    RETADDR
2629 //    [ new RETADDR
2630 //      move area ]
2631 //    (possible EBP)
2632 //    ESI
2633 //    EDI
2634 //    local1 ..
2635 
2636 /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2637 /// requirement.
2638 unsigned
2639 X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2640                                                SelectionDAG &DAG) const {
2641   const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2642   const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2643   assert(StackSize % SlotSize == 0 &&
2644          "StackSize must be a multiple of SlotSize");
2645   return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2646 }
2647 
2648 /// Return true if the given stack call argument is already available in the
2649 /// same position (relatively) of the caller's incoming argument stack.
2650 static
2651 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2652                          MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2653                          const X86InstrInfo *TII, const CCValAssign &VA) {
2654   unsigned Bytes = Arg.getValueSizeInBits() / 8;
2655 
2656   for (;;) {
2657     // Look through nodes that don't alter the bits of the incoming value.
2658     unsigned Op = Arg.getOpcode();
2659     if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2660         Op == ISD::AssertZext) {
2661       Arg = Arg.getOperand(0);
2662       continue;
2663     }
2664     if (Op == ISD::TRUNCATE) {
2665       const SDValue &TruncInput = Arg.getOperand(0);
2666       if (TruncInput.getOpcode() == ISD::AssertZext &&
2667           cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2668               Arg.getValueType()) {
2669         Arg = TruncInput.getOperand(0);
2670         continue;
2671       }
2672     }
2673     break;
2674   }
2675 
2676   int FI = INT_MAX;
2677   if (Arg.getOpcode() == ISD::CopyFromReg) {
2678     Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2679     if (!VR.isVirtual())
2680       return false;
2681     MachineInstr *Def = MRI->getVRegDef(VR);
2682     if (!Def)
2683       return false;
2684     if (!Flags.isByVal()) {
2685       if (!TII->isLoadFromStackSlot(*Def, FI))
2686         return false;
2687     } else {
2688       unsigned Opcode = Def->getOpcode();
2689       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2690            Opcode == X86::LEA64_32r) &&
2691           Def->getOperand(1).isFI()) {
2692         FI = Def->getOperand(1).getIndex();
2693         Bytes = Flags.getByValSize();
2694       } else
2695         return false;
2696     }
2697   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2698     if (Flags.isByVal())
2699       // ByVal argument is passed in as a pointer but it's now being
2700       // dereferenced. e.g.
2701       // define @foo(%struct.X* %A) {
2702       //   tail call @bar(%struct.X* byval %A)
2703       // }
2704       return false;
2705     SDValue Ptr = Ld->getBasePtr();
2706     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2707     if (!FINode)
2708       return false;
2709     FI = FINode->getIndex();
2710   } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2711     FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
2712     FI = FINode->getIndex();
2713     Bytes = Flags.getByValSize();
2714   } else
2715     return false;
2716 
2717   assert(FI != INT_MAX);
2718   if (!MFI.isFixedObjectIndex(FI))
2719     return false;
2720 
2721   if (Offset != MFI.getObjectOffset(FI))
2722     return false;
2723 
2724   // If this is not byval, check that the argument stack object is immutable.
2725   // inalloca and argument copy elision can create mutable argument stack
2726   // objects. Byval objects can be mutated, but a byval call intends to pass the
2727   // mutated memory.
2728   if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2729     return false;
2730 
2731   if (VA.getLocVT().getFixedSizeInBits() >
2732       Arg.getValueSizeInBits().getFixedValue()) {
2733     // If the argument location is wider than the argument type, check that any
2734     // extension flags match.
2735     if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2736         Flags.isSExt() != MFI.isObjectSExt(FI)) {
2737       return false;
2738     }
2739   }
2740 
2741   return Bytes == MFI.getObjectSize(FI);
2742 }
2743 
2744 /// Check whether the call is eligible for tail call optimization. Targets
2745 /// that want to do tail call optimization should implement this function.
2746 /// Note that the x86 backend does not check musttail calls for eligibility! The
2747 /// rest of x86 tail call lowering must be prepared to forward arguments of any
2748 /// type.
2749 bool X86TargetLowering::IsEligibleForTailCallOptimization(
2750     TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
2751     SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2752   SelectionDAG &DAG = CLI.DAG;
2753   const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2754   const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2755   const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2756   SDValue Callee = CLI.Callee;
2757   CallingConv::ID CalleeCC = CLI.CallConv;
2758   bool isVarArg = CLI.IsVarArg;
2759 
2760   if (!mayTailCallThisCC(CalleeCC))
2761     return false;
2762 
2763   // If -tailcallopt is specified, make fastcc functions tail-callable.
2764   MachineFunction &MF = DAG.getMachineFunction();
2765   const Function &CallerF = MF.getFunction();
2766 
2767   // If the function return type is x86_fp80 and the callee return type is not,
2768   // then the FP_EXTEND of the call result is not a nop. It's not safe to
2769   // perform a tailcall optimization here.
2770   if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2771     return false;
2772 
2773   CallingConv::ID CallerCC = CallerF.getCallingConv();
2774   bool CCMatch = CallerCC == CalleeCC;
2775   bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2776   bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2777   bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2778       CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2779 
2780   // Win64 functions have extra shadow space for argument homing. Don't do the
2781   // sibcall if the caller and callee have mismatched expectations for this
2782   // space.
2783   if (IsCalleeWin64 != IsCallerWin64)
2784     return false;
2785 
2786   if (IsGuaranteeTCO) {
2787     if (canGuaranteeTCO(CalleeCC) && CCMatch)
2788       return true;
2789     return false;
2790   }
2791 
2792   // Look for obvious safe cases to perform tail call optimization that do not
2793   // require ABI changes. This is what gcc calls sibcall.
2794 
2795   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2796   // emit a special epilogue.
2797   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2798   if (RegInfo->hasStackRealignment(MF))
2799     return false;
2800 
2801   // Also avoid sibcall optimization if we're an sret return fn and the callee
2802   // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2803   // insufficient.
2804   if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
2805     // For a compatible tail call the callee must return our sret pointer. So it
2806     // needs to be (a) an sret function itself and (b) we pass our sret as its
2807     // sret. Condition #b is harder to determine.
2808     return false;
2809   } else if (IsCalleePopSRet)
2810     // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2811     // expect that.
2812     return false;
2813 
2814   // Do not sibcall optimize vararg calls unless all arguments are passed via
2815   // registers.
2816   LLVMContext &C = *DAG.getContext();
2817   if (isVarArg && !Outs.empty()) {
2818     // Optimizing for varargs on Win64 is unlikely to be safe without
2819     // additional testing.
2820     if (IsCalleeWin64 || IsCallerWin64)
2821       return false;
2822 
2823     for (const auto &VA : ArgLocs)
2824       if (!VA.isRegLoc())
2825         return false;
2826   }
2827 
2828   // If the call result is in ST0 / ST1, it needs to be popped off the x87
2829   // stack.  Therefore, if it's not used by the call it is not safe to optimize
2830   // this into a sibcall.
2831   bool Unused = false;
2832   for (const auto &In : Ins) {
2833     if (!In.Used) {
2834       Unused = true;
2835       break;
2836     }
2837   }
2838   if (Unused) {
2839     SmallVector<CCValAssign, 16> RVLocs;
2840     CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2841     RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2842     for (const auto &VA : RVLocs) {
2843       if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2844         return false;
2845     }
2846   }
2847 
2848   // Check that the call results are passed in the same way.
2849   if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2850                                   RetCC_X86, RetCC_X86))
2851     return false;
2852   // The callee has to preserve all registers the caller needs to preserve.
2853   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2854   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2855   if (!CCMatch) {
2856     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2857     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2858       return false;
2859   }
2860 
2861   // The stack frame of the caller cannot be replaced by the tail-callee one's
2862   // if the function is required to preserve all the registers. Conservatively
2863   // prevent tail optimization even if hypothetically all the registers are used
2864   // for passing formal parameters or returning values.
2865   if (CallerF.hasFnAttribute("no_caller_saved_registers"))
2866     return false;
2867 
2868   unsigned StackArgsSize = CCInfo.getStackSize();
2869 
2870   // If the callee takes no arguments then go on to check the results of the
2871   // call.
2872   if (!Outs.empty()) {
2873     if (StackArgsSize > 0) {
2874       // Check if the arguments are already laid out in the right way as
2875       // the caller's fixed stack objects.
2876       MachineFrameInfo &MFI = MF.getFrameInfo();
2877       const MachineRegisterInfo *MRI = &MF.getRegInfo();
2878       const X86InstrInfo *TII = Subtarget.getInstrInfo();
2879       for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2880         const CCValAssign &VA = ArgLocs[I];
2881         SDValue Arg = OutVals[I];
2882         ISD::ArgFlagsTy Flags = Outs[I].Flags;
2883         if (VA.getLocInfo() == CCValAssign::Indirect)
2884           return false;
2885         if (!VA.isRegLoc()) {
2886           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
2887                                    TII, VA))
2888             return false;
2889         }
2890       }
2891     }
2892 
2893     bool PositionIndependent = isPositionIndependent();
2894     // If the tailcall address may be in a register, then make sure it's
2895     // possible to register allocate for it. In 32-bit, the call address can
2896     // only target EAX, EDX, or ECX since the tail call must be scheduled after
2897     // callee-saved registers are restored. These happen to be the same
2898     // registers used to pass 'inreg' arguments so watch out for those.
2899     if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
2900                                   !isa<ExternalSymbolSDNode>(Callee)) ||
2901                                  PositionIndependent)) {
2902       unsigned NumInRegs = 0;
2903       // In PIC we need an extra register to formulate the address computation
2904       // for the callee.
2905       unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2906 
2907       for (const auto &VA : ArgLocs) {
2908         if (!VA.isRegLoc())
2909           continue;
2910         Register Reg = VA.getLocReg();
2911         switch (Reg) {
2912         default: break;
2913         case X86::EAX: case X86::EDX: case X86::ECX:
2914           if (++NumInRegs == MaxInRegs)
2915             return false;
2916           break;
2917         }
2918       }
2919     }
2920 
2921     const MachineRegisterInfo &MRI = MF.getRegInfo();
2922     if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2923       return false;
2924   }
2925 
2926   bool CalleeWillPop =
2927       X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
2928                        MF.getTarget().Options.GuaranteedTailCallOpt);
2929 
2930   if (unsigned BytesToPop =
2931           MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
2932     // If we have bytes to pop, the callee must pop them.
2933     bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2934     if (!CalleePopMatches)
2935       return false;
2936   } else if (CalleeWillPop && StackArgsSize > 0) {
2937     // If we don't have bytes to pop, make sure the callee doesn't pop any.
2938     return false;
2939   }
2940 
2941   return true;
2942 }
2943 
2944 /// Determines whether the callee is required to pop its own arguments.
2945 /// Callee pop is necessary to support tail calls.
2946 bool X86::isCalleePop(CallingConv::ID CallingConv,
2947                       bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2948   // If GuaranteeTCO is true, we force some calls to be callee pop so that we
2949   // can guarantee TCO.
2950   if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
2951     return true;
2952 
2953   switch (CallingConv) {
2954   default:
2955     return false;
2956   case CallingConv::X86_StdCall:
2957   case CallingConv::X86_FastCall:
2958   case CallingConv::X86_ThisCall:
2959   case CallingConv::X86_VectorCall:
2960     return !is64Bit;
2961   }
2962 }
2963