126a73082SReid Kleckner //===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===// 226a73082SReid Kleckner // 326a73082SReid Kleckner // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 426a73082SReid Kleckner // See https://llvm.org/LICENSE.txt for license information. 526a73082SReid Kleckner // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 626a73082SReid Kleckner // 726a73082SReid Kleckner //===----------------------------------------------------------------------===// 826a73082SReid Kleckner // 926a73082SReid Kleckner /// \file 1026a73082SReid Kleckner /// This file implements the lowering of LLVM calls to DAG nodes. 1126a73082SReid Kleckner // 1226a73082SReid Kleckner //===----------------------------------------------------------------------===// 1326a73082SReid Kleckner 1426a73082SReid Kleckner #include "X86.h" 1526a73082SReid Kleckner #include "X86CallingConv.h" 1626a73082SReid Kleckner #include "X86FrameLowering.h" 1726a73082SReid Kleckner #include "X86ISelLowering.h" 1826a73082SReid Kleckner #include "X86InstrBuilder.h" 1926a73082SReid Kleckner #include "X86MachineFunctionInfo.h" 2026a73082SReid Kleckner #include "X86TargetMachine.h" 2126a73082SReid Kleckner #include "llvm/ADT/Statistic.h" 2226a73082SReid Kleckner #include "llvm/Analysis/ObjCARCUtil.h" 2326a73082SReid Kleckner #include "llvm/CodeGen/MachineJumpTableInfo.h" 2426a73082SReid Kleckner #include "llvm/CodeGen/MachineModuleInfo.h" 2526a73082SReid Kleckner #include "llvm/CodeGen/WinEHFuncInfo.h" 2626a73082SReid Kleckner #include "llvm/IR/DiagnosticInfo.h" 2726a73082SReid Kleckner #include "llvm/IR/IRBuilder.h" 2874deadf1SNikita Popov #include "llvm/IR/Module.h" 2926a73082SReid Kleckner 3026a73082SReid Kleckner #define DEBUG_TYPE "x86-isel" 3126a73082SReid Kleckner 3226a73082SReid Kleckner using namespace llvm; 3326a73082SReid Kleckner 3426a73082SReid Kleckner STATISTIC(NumTailCalls, "Number of tail calls"); 3526a73082SReid Kleckner 3626a73082SReid Kleckner /// Call this when the user attempts to do something unsupported, like 3726a73082SReid Kleckner /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike 3826a73082SReid Kleckner /// report_fatal_error, so calling code should attempt to recover without 3926a73082SReid Kleckner /// crashing. 4026a73082SReid Kleckner static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, 4126a73082SReid Kleckner const char *Msg) { 4226a73082SReid Kleckner MachineFunction &MF = DAG.getMachineFunction(); 4326a73082SReid Kleckner DAG.getContext()->diagnose( 4426a73082SReid Kleckner DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); 4526a73082SReid Kleckner } 4626a73082SReid Kleckner 4726a73082SReid Kleckner /// Returns true if a CC can dynamically exclude a register from the list of 4826a73082SReid Kleckner /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on 4926a73082SReid Kleckner /// the return registers. 5026a73082SReid Kleckner static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) { 5126a73082SReid Kleckner switch (CC) { 5226a73082SReid Kleckner default: 5326a73082SReid Kleckner return false; 5426a73082SReid Kleckner case CallingConv::X86_RegCall: 5526a73082SReid Kleckner case CallingConv::PreserveMost: 5626a73082SReid Kleckner case CallingConv::PreserveAll: 5726a73082SReid Kleckner return true; 5826a73082SReid Kleckner } 5926a73082SReid Kleckner } 6026a73082SReid Kleckner 6126a73082SReid Kleckner /// Returns true if a CC can dynamically exclude a register from the list of 6226a73082SReid Kleckner /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on 6326a73082SReid Kleckner /// the parameters. 6426a73082SReid Kleckner static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) { 6526a73082SReid Kleckner return CC == CallingConv::X86_RegCall; 6626a73082SReid Kleckner } 6726a73082SReid Kleckner 6826a73082SReid Kleckner static std::pair<MVT, unsigned> 6926a73082SReid Kleckner handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, 7026a73082SReid Kleckner const X86Subtarget &Subtarget) { 7126a73082SReid Kleckner // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling 7226a73082SReid Kleckner // convention is one that uses k registers. 7326a73082SReid Kleckner if (NumElts == 2) 7426a73082SReid Kleckner return {MVT::v2i64, 1}; 7526a73082SReid Kleckner if (NumElts == 4) 7626a73082SReid Kleckner return {MVT::v4i32, 1}; 7726a73082SReid Kleckner if (NumElts == 8 && CC != CallingConv::X86_RegCall && 7826a73082SReid Kleckner CC != CallingConv::Intel_OCL_BI) 7926a73082SReid Kleckner return {MVT::v8i16, 1}; 8026a73082SReid Kleckner if (NumElts == 16 && CC != CallingConv::X86_RegCall && 8126a73082SReid Kleckner CC != CallingConv::Intel_OCL_BI) 8226a73082SReid Kleckner return {MVT::v16i8, 1}; 8326a73082SReid Kleckner // v32i1 passes in ymm unless we have BWI and the calling convention is 8426a73082SReid Kleckner // regcall. 8526a73082SReid Kleckner if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) 8626a73082SReid Kleckner return {MVT::v32i8, 1}; 8726a73082SReid Kleckner // Split v64i1 vectors if we don't have v64i8 available. 8826a73082SReid Kleckner if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { 8926a73082SReid Kleckner if (Subtarget.useAVX512Regs()) 9026a73082SReid Kleckner return {MVT::v64i8, 1}; 9126a73082SReid Kleckner return {MVT::v32i8, 2}; 9226a73082SReid Kleckner } 9326a73082SReid Kleckner 9426a73082SReid Kleckner // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. 9526a73082SReid Kleckner if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || 9626a73082SReid Kleckner NumElts > 64) 9726a73082SReid Kleckner return {MVT::i8, NumElts}; 9826a73082SReid Kleckner 9926a73082SReid Kleckner return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; 10026a73082SReid Kleckner } 10126a73082SReid Kleckner 10226a73082SReid Kleckner MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 10326a73082SReid Kleckner CallingConv::ID CC, 10426a73082SReid Kleckner EVT VT) const { 10526a73082SReid Kleckner if (VT.isVector()) { 10626a73082SReid Kleckner if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { 10726a73082SReid Kleckner unsigned NumElts = VT.getVectorNumElements(); 10826a73082SReid Kleckner 10926a73082SReid Kleckner MVT RegisterVT; 11026a73082SReid Kleckner unsigned NumRegisters; 11126a73082SReid Kleckner std::tie(RegisterVT, NumRegisters) = 11226a73082SReid Kleckner handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); 11326a73082SReid Kleckner if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) 11426a73082SReid Kleckner return RegisterVT; 11526a73082SReid Kleckner } 11626a73082SReid Kleckner 11726a73082SReid Kleckner if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) 11826a73082SReid Kleckner return MVT::v8f16; 11926a73082SReid Kleckner } 12026a73082SReid Kleckner 12126a73082SReid Kleckner // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled. 12226a73082SReid Kleckner if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() && 12326a73082SReid Kleckner !Subtarget.hasX87()) 12426a73082SReid Kleckner return MVT::i32; 12526a73082SReid Kleckner 1269177e812SMatt Arsenault if (isTypeLegal(MVT::f16)) { 12726a73082SReid Kleckner if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) 1289177e812SMatt Arsenault return getRegisterTypeForCallingConv( 1299177e812SMatt Arsenault Context, CC, VT.changeVectorElementType(MVT::f16)); 13026a73082SReid Kleckner 13159af659eSPhoebe Wang if (VT == MVT::bf16) 13259af659eSPhoebe Wang return MVT::f16; 1339177e812SMatt Arsenault } 13459af659eSPhoebe Wang 13526a73082SReid Kleckner return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); 13626a73082SReid Kleckner } 13726a73082SReid Kleckner 13826a73082SReid Kleckner unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 13926a73082SReid Kleckner CallingConv::ID CC, 14026a73082SReid Kleckner EVT VT) const { 14126a73082SReid Kleckner if (VT.isVector()) { 14226a73082SReid Kleckner if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { 14326a73082SReid Kleckner unsigned NumElts = VT.getVectorNumElements(); 14426a73082SReid Kleckner 14526a73082SReid Kleckner MVT RegisterVT; 14626a73082SReid Kleckner unsigned NumRegisters; 14726a73082SReid Kleckner std::tie(RegisterVT, NumRegisters) = 14826a73082SReid Kleckner handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); 14926a73082SReid Kleckner if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) 15026a73082SReid Kleckner return NumRegisters; 15126a73082SReid Kleckner } 15226a73082SReid Kleckner 15326a73082SReid Kleckner if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) 15426a73082SReid Kleckner return 1; 15526a73082SReid Kleckner } 15626a73082SReid Kleckner 15726a73082SReid Kleckner // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if 15826a73082SReid Kleckner // x87 is disabled. 15926a73082SReid Kleckner if (!Subtarget.is64Bit() && !Subtarget.hasX87()) { 16026a73082SReid Kleckner if (VT == MVT::f64) 16126a73082SReid Kleckner return 2; 16226a73082SReid Kleckner if (VT == MVT::f80) 16326a73082SReid Kleckner return 3; 16426a73082SReid Kleckner } 16526a73082SReid Kleckner 1669177e812SMatt Arsenault if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 && 1679177e812SMatt Arsenault isTypeLegal(MVT::f16)) 16826a73082SReid Kleckner return getNumRegistersForCallingConv(Context, CC, 16926a73082SReid Kleckner VT.changeVectorElementType(MVT::f16)); 17026a73082SReid Kleckner 17126a73082SReid Kleckner return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); 17226a73082SReid Kleckner } 17326a73082SReid Kleckner 17426a73082SReid Kleckner unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( 17526a73082SReid Kleckner LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 17626a73082SReid Kleckner unsigned &NumIntermediates, MVT &RegisterVT) const { 17726a73082SReid Kleckner // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. 17826a73082SReid Kleckner if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && 17926a73082SReid Kleckner Subtarget.hasAVX512() && 18026a73082SReid Kleckner (!isPowerOf2_32(VT.getVectorNumElements()) || 18126a73082SReid Kleckner (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || 18226a73082SReid Kleckner VT.getVectorNumElements() > 64)) { 18326a73082SReid Kleckner RegisterVT = MVT::i8; 18426a73082SReid Kleckner IntermediateVT = MVT::i1; 18526a73082SReid Kleckner NumIntermediates = VT.getVectorNumElements(); 18626a73082SReid Kleckner return NumIntermediates; 18726a73082SReid Kleckner } 18826a73082SReid Kleckner 18926a73082SReid Kleckner // Split v64i1 vectors if we don't have v64i8 available. 19026a73082SReid Kleckner if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && 19126a73082SReid Kleckner CC != CallingConv::X86_RegCall) { 19226a73082SReid Kleckner RegisterVT = MVT::v32i8; 19326a73082SReid Kleckner IntermediateVT = MVT::v32i1; 19426a73082SReid Kleckner NumIntermediates = 2; 19526a73082SReid Kleckner return 2; 19626a73082SReid Kleckner } 19726a73082SReid Kleckner 19826a73082SReid Kleckner // Split vNbf16 vectors according to vNf16. 1999177e812SMatt Arsenault if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 && 2009177e812SMatt Arsenault isTypeLegal(MVT::f16)) 20126a73082SReid Kleckner VT = VT.changeVectorElementType(MVT::f16); 20226a73082SReid Kleckner 20326a73082SReid Kleckner return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, 20426a73082SReid Kleckner NumIntermediates, RegisterVT); 20526a73082SReid Kleckner } 20626a73082SReid Kleckner 20726a73082SReid Kleckner EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, 20826a73082SReid Kleckner LLVMContext& Context, 20926a73082SReid Kleckner EVT VT) const { 21026a73082SReid Kleckner if (!VT.isVector()) 21126a73082SReid Kleckner return MVT::i8; 21226a73082SReid Kleckner 21326a73082SReid Kleckner if (Subtarget.hasAVX512()) { 21426a73082SReid Kleckner // Figure out what this type will be legalized to. 21526a73082SReid Kleckner EVT LegalVT = VT; 21626a73082SReid Kleckner while (getTypeAction(Context, LegalVT) != TypeLegal) 21726a73082SReid Kleckner LegalVT = getTypeToTransformTo(Context, LegalVT); 21826a73082SReid Kleckner 21926a73082SReid Kleckner // If we got a 512-bit vector then we'll definitely have a vXi1 compare. 22026a73082SReid Kleckner if (LegalVT.getSimpleVT().is512BitVector()) 22126a73082SReid Kleckner return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 22226a73082SReid Kleckner 22326a73082SReid Kleckner if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { 22426a73082SReid Kleckner // If we legalized to less than a 512-bit vector, then we will use a vXi1 22526a73082SReid Kleckner // compare for vXi32/vXi64 for sure. If we have BWI we will also support 22626a73082SReid Kleckner // vXi16/vXi8. 22726a73082SReid Kleckner MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); 22826a73082SReid Kleckner if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) 22926a73082SReid Kleckner return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 23026a73082SReid Kleckner } 23126a73082SReid Kleckner } 23226a73082SReid Kleckner 23326a73082SReid Kleckner return VT.changeVectorElementTypeToInteger(); 23426a73082SReid Kleckner } 23526a73082SReid Kleckner 236*2068b1baSNikita Popov bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters( 237*2068b1baSNikita Popov Type *Ty, CallingConv::ID CallConv, bool isVarArg, 238*2068b1baSNikita Popov const DataLayout &DL) const { 239*2068b1baSNikita Popov // i128 split into i64 needs to be allocated to two consecutive registers, 240*2068b1baSNikita Popov // or spilled to the stack as a whole. 241*2068b1baSNikita Popov return Ty->isIntegerTy(128); 242*2068b1baSNikita Popov } 243*2068b1baSNikita Popov 24426a73082SReid Kleckner /// Helper for getByValTypeAlignment to determine 24526a73082SReid Kleckner /// the desired ByVal argument alignment. 24626a73082SReid Kleckner static void getMaxByValAlign(Type *Ty, Align &MaxAlign) { 24726a73082SReid Kleckner if (MaxAlign == 16) 24826a73082SReid Kleckner return; 24926a73082SReid Kleckner if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 25026a73082SReid Kleckner if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128) 25126a73082SReid Kleckner MaxAlign = Align(16); 25226a73082SReid Kleckner } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 25326a73082SReid Kleckner Align EltAlign; 25426a73082SReid Kleckner getMaxByValAlign(ATy->getElementType(), EltAlign); 25526a73082SReid Kleckner if (EltAlign > MaxAlign) 25626a73082SReid Kleckner MaxAlign = EltAlign; 25726a73082SReid Kleckner } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 25826a73082SReid Kleckner for (auto *EltTy : STy->elements()) { 25926a73082SReid Kleckner Align EltAlign; 26026a73082SReid Kleckner getMaxByValAlign(EltTy, EltAlign); 26126a73082SReid Kleckner if (EltAlign > MaxAlign) 26226a73082SReid Kleckner MaxAlign = EltAlign; 26326a73082SReid Kleckner if (MaxAlign == 16) 26426a73082SReid Kleckner break; 26526a73082SReid Kleckner } 26626a73082SReid Kleckner } 26726a73082SReid Kleckner } 26826a73082SReid Kleckner 26926a73082SReid Kleckner /// Return the desired alignment for ByVal aggregate 27026a73082SReid Kleckner /// function arguments in the caller parameter area. For X86, aggregates 27126a73082SReid Kleckner /// that contain SSE vectors are placed at 16-byte boundaries while the rest 27226a73082SReid Kleckner /// are at 4-byte boundaries. 273e55c1677SSergei Barannikov Align X86TargetLowering::getByValTypeAlignment(Type *Ty, 27426a73082SReid Kleckner const DataLayout &DL) const { 275e55c1677SSergei Barannikov if (Subtarget.is64Bit()) 276e55c1677SSergei Barannikov return std::max(DL.getABITypeAlign(Ty), Align::Constant<8>()); 27726a73082SReid Kleckner 27826a73082SReid Kleckner Align Alignment(4); 27926a73082SReid Kleckner if (Subtarget.hasSSE1()) 28026a73082SReid Kleckner getMaxByValAlign(Ty, Alignment); 281e55c1677SSergei Barannikov return Alignment; 28226a73082SReid Kleckner } 28326a73082SReid Kleckner 28426a73082SReid Kleckner /// It returns EVT::Other if the type should be determined using generic 28526a73082SReid Kleckner /// target-independent logic. 28626a73082SReid Kleckner /// For vector ops we check that the overall size isn't larger than our 28726a73082SReid Kleckner /// preferred vector width. 28826a73082SReid Kleckner EVT X86TargetLowering::getOptimalMemOpType( 28926a73082SReid Kleckner const MemOp &Op, const AttributeList &FuncAttributes) const { 29026a73082SReid Kleckner if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) { 29126a73082SReid Kleckner if (Op.size() >= 16 && 29226a73082SReid Kleckner (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) { 29326a73082SReid Kleckner // FIXME: Check if unaligned 64-byte accesses are slow. 29458d4fe28SPhoebe Wang if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() && 29526a73082SReid Kleckner (Subtarget.getPreferVectorWidth() >= 512)) { 29626a73082SReid Kleckner return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; 29726a73082SReid Kleckner } 29826a73082SReid Kleckner // FIXME: Check if unaligned 32-byte accesses are slow. 29926a73082SReid Kleckner if (Op.size() >= 32 && Subtarget.hasAVX() && 30026a73082SReid Kleckner Subtarget.useLight256BitInstructions()) { 30126a73082SReid Kleckner // Although this isn't a well-supported type for AVX1, we'll let 30226a73082SReid Kleckner // legalization and shuffle lowering produce the optimal codegen. If we 30326a73082SReid Kleckner // choose an optimal type with a vector element larger than a byte, 30426a73082SReid Kleckner // getMemsetStores() may create an intermediate splat (using an integer 30526a73082SReid Kleckner // multiply) before we splat as a vector. 30626a73082SReid Kleckner return MVT::v32i8; 30726a73082SReid Kleckner } 30826a73082SReid Kleckner if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128)) 30926a73082SReid Kleckner return MVT::v16i8; 31026a73082SReid Kleckner // TODO: Can SSE1 handle a byte vector? 31126a73082SReid Kleckner // If we have SSE1 registers we should be able to use them. 31226a73082SReid Kleckner if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && 31326a73082SReid Kleckner (Subtarget.getPreferVectorWidth() >= 128)) 31426a73082SReid Kleckner return MVT::v4f32; 31526a73082SReid Kleckner } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) && 31626a73082SReid Kleckner Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { 31726a73082SReid Kleckner // Do not use f64 to lower memcpy if source is string constant. It's 31826a73082SReid Kleckner // better to use i32 to avoid the loads. 31926a73082SReid Kleckner // Also, do not use f64 to lower memset unless this is a memset of zeros. 32026a73082SReid Kleckner // The gymnastics of splatting a byte value into an XMM register and then 32126a73082SReid Kleckner // only using 8-byte stores (because this is a CPU with slow unaligned 32226a73082SReid Kleckner // 16-byte accesses) makes that a loser. 32326a73082SReid Kleckner return MVT::f64; 32426a73082SReid Kleckner } 32526a73082SReid Kleckner } 32626a73082SReid Kleckner // This is a compromise. If we reach here, unaligned accesses may be slow on 32726a73082SReid Kleckner // this target. However, creating smaller, aligned accesses could be even 32826a73082SReid Kleckner // slower and would certainly be a lot more code. 32926a73082SReid Kleckner if (Subtarget.is64Bit() && Op.size() >= 8) 33026a73082SReid Kleckner return MVT::i64; 33126a73082SReid Kleckner return MVT::i32; 33226a73082SReid Kleckner } 33326a73082SReid Kleckner 33426a73082SReid Kleckner bool X86TargetLowering::isSafeMemOpType(MVT VT) const { 33526a73082SReid Kleckner if (VT == MVT::f32) 33626a73082SReid Kleckner return Subtarget.hasSSE1(); 33726a73082SReid Kleckner if (VT == MVT::f64) 33826a73082SReid Kleckner return Subtarget.hasSSE2(); 33926a73082SReid Kleckner return true; 34026a73082SReid Kleckner } 34126a73082SReid Kleckner 34226a73082SReid Kleckner static bool isBitAligned(Align Alignment, uint64_t SizeInBits) { 34326a73082SReid Kleckner return (8 * Alignment.value()) % SizeInBits == 0; 34426a73082SReid Kleckner } 34526a73082SReid Kleckner 34626a73082SReid Kleckner bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const { 34726a73082SReid Kleckner if (isBitAligned(Alignment, VT.getSizeInBits())) 34826a73082SReid Kleckner return true; 34926a73082SReid Kleckner switch (VT.getSizeInBits()) { 35026a73082SReid Kleckner default: 35126a73082SReid Kleckner // 8-byte and under are always assumed to be fast. 35226a73082SReid Kleckner return true; 35326a73082SReid Kleckner case 128: 35426a73082SReid Kleckner return !Subtarget.isUnalignedMem16Slow(); 35526a73082SReid Kleckner case 256: 35626a73082SReid Kleckner return !Subtarget.isUnalignedMem32Slow(); 35726a73082SReid Kleckner // TODO: What about AVX-512 (512-bit) accesses? 35826a73082SReid Kleckner } 35926a73082SReid Kleckner } 36026a73082SReid Kleckner 36126a73082SReid Kleckner bool X86TargetLowering::allowsMisalignedMemoryAccesses( 36226a73082SReid Kleckner EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags, 36326a73082SReid Kleckner unsigned *Fast) const { 36426a73082SReid Kleckner if (Fast) 36526a73082SReid Kleckner *Fast = isMemoryAccessFast(VT, Alignment); 36626a73082SReid Kleckner // NonTemporal vector memory ops must be aligned. 36726a73082SReid Kleckner if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { 36826a73082SReid Kleckner // NT loads can only be vector aligned, so if its less aligned than the 36926a73082SReid Kleckner // minimum vector size (which we can split the vector down to), we might as 37026a73082SReid Kleckner // well use a regular unaligned vector load. 37126a73082SReid Kleckner // We don't have any NT loads pre-SSE41. 37226a73082SReid Kleckner if (!!(Flags & MachineMemOperand::MOLoad)) 37326a73082SReid Kleckner return (Alignment < 16 || !Subtarget.hasSSE41()); 37426a73082SReid Kleckner return false; 37526a73082SReid Kleckner } 37626a73082SReid Kleckner // Misaligned accesses of any size are always allowed. 37726a73082SReid Kleckner return true; 37826a73082SReid Kleckner } 37926a73082SReid Kleckner 38026a73082SReid Kleckner bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context, 38126a73082SReid Kleckner const DataLayout &DL, EVT VT, 38226a73082SReid Kleckner unsigned AddrSpace, Align Alignment, 38326a73082SReid Kleckner MachineMemOperand::Flags Flags, 38426a73082SReid Kleckner unsigned *Fast) const { 38526a73082SReid Kleckner if (Fast) 38626a73082SReid Kleckner *Fast = isMemoryAccessFast(VT, Alignment); 38726a73082SReid Kleckner if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { 38826a73082SReid Kleckner if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, 38926a73082SReid Kleckner /*Fast=*/nullptr)) 39026a73082SReid Kleckner return true; 39126a73082SReid Kleckner // NonTemporal vector memory ops are special, and must be aligned. 39226a73082SReid Kleckner if (!isBitAligned(Alignment, VT.getSizeInBits())) 39326a73082SReid Kleckner return false; 39426a73082SReid Kleckner switch (VT.getSizeInBits()) { 39526a73082SReid Kleckner case 128: 39626a73082SReid Kleckner if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41()) 39726a73082SReid Kleckner return true; 39826a73082SReid Kleckner if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2()) 39926a73082SReid Kleckner return true; 40026a73082SReid Kleckner return false; 40126a73082SReid Kleckner case 256: 40226a73082SReid Kleckner if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2()) 40326a73082SReid Kleckner return true; 40426a73082SReid Kleckner if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX()) 40526a73082SReid Kleckner return true; 40626a73082SReid Kleckner return false; 40726a73082SReid Kleckner case 512: 40858d4fe28SPhoebe Wang if (Subtarget.hasAVX512() && Subtarget.hasEVEX512()) 40926a73082SReid Kleckner return true; 41026a73082SReid Kleckner return false; 41126a73082SReid Kleckner default: 41226a73082SReid Kleckner return false; // Don't have NonTemporal vector memory ops of this size. 41326a73082SReid Kleckner } 41426a73082SReid Kleckner } 41526a73082SReid Kleckner return true; 41626a73082SReid Kleckner } 41726a73082SReid Kleckner 41826a73082SReid Kleckner /// Return the entry encoding for a jump table in the 41926a73082SReid Kleckner /// current function. The returned value is a member of the 42026a73082SReid Kleckner /// MachineJumpTableInfo::JTEntryKind enum. 42126a73082SReid Kleckner unsigned X86TargetLowering::getJumpTableEncoding() const { 42226a73082SReid Kleckner // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF 42326a73082SReid Kleckner // symbol. 42426a73082SReid Kleckner if (isPositionIndependent() && Subtarget.isPICStyleGOT()) 42526a73082SReid Kleckner return MachineJumpTableInfo::EK_Custom32; 4262a2f02e1SArthur Eubanks if (isPositionIndependent() && 427300c41c2SArthur Eubanks getTargetMachine().getCodeModel() == CodeModel::Large && 428300c41c2SArthur Eubanks !Subtarget.isTargetCOFF()) 4292a2f02e1SArthur Eubanks return MachineJumpTableInfo::EK_LabelDifference64; 43026a73082SReid Kleckner 43126a73082SReid Kleckner // Otherwise, use the normal jump table encoding heuristics. 43226a73082SReid Kleckner return TargetLowering::getJumpTableEncoding(); 43326a73082SReid Kleckner } 43426a73082SReid Kleckner 43526a73082SReid Kleckner bool X86TargetLowering::useSoftFloat() const { 43626a73082SReid Kleckner return Subtarget.useSoftFloat(); 43726a73082SReid Kleckner } 43826a73082SReid Kleckner 43926a73082SReid Kleckner void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, 44026a73082SReid Kleckner ArgListTy &Args) const { 44126a73082SReid Kleckner 44226a73082SReid Kleckner // Only relabel X86-32 for C / Stdcall CCs. 44326a73082SReid Kleckner if (Subtarget.is64Bit()) 44426a73082SReid Kleckner return; 44526a73082SReid Kleckner if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) 44626a73082SReid Kleckner return; 44726a73082SReid Kleckner unsigned ParamRegs = 0; 44826a73082SReid Kleckner if (auto *M = MF->getFunction().getParent()) 44926a73082SReid Kleckner ParamRegs = M->getNumberRegisterParameters(); 45026a73082SReid Kleckner 45126a73082SReid Kleckner // Mark the first N int arguments as having reg 45226a73082SReid Kleckner for (auto &Arg : Args) { 45326a73082SReid Kleckner Type *T = Arg.Ty; 45426a73082SReid Kleckner if (T->isIntOrPtrTy()) 45526a73082SReid Kleckner if (MF->getDataLayout().getTypeAllocSize(T) <= 8) { 45626a73082SReid Kleckner unsigned numRegs = 1; 45726a73082SReid Kleckner if (MF->getDataLayout().getTypeAllocSize(T) > 4) 45826a73082SReid Kleckner numRegs = 2; 45926a73082SReid Kleckner if (ParamRegs < numRegs) 46026a73082SReid Kleckner return; 46126a73082SReid Kleckner ParamRegs -= numRegs; 46226a73082SReid Kleckner Arg.IsInReg = true; 46326a73082SReid Kleckner } 46426a73082SReid Kleckner } 46526a73082SReid Kleckner } 46626a73082SReid Kleckner 46726a73082SReid Kleckner const MCExpr * 46826a73082SReid Kleckner X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 46926a73082SReid Kleckner const MachineBasicBlock *MBB, 47026a73082SReid Kleckner unsigned uid,MCContext &Ctx) const{ 47126a73082SReid Kleckner assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); 47226a73082SReid Kleckner // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF 47326a73082SReid Kleckner // entries. 47426a73082SReid Kleckner return MCSymbolRefExpr::create(MBB->getSymbol(), 47526a73082SReid Kleckner MCSymbolRefExpr::VK_GOTOFF, Ctx); 47626a73082SReid Kleckner } 47726a73082SReid Kleckner 47826a73082SReid Kleckner /// Returns relocation base for the given PIC jumptable. 47926a73082SReid Kleckner SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, 48026a73082SReid Kleckner SelectionDAG &DAG) const { 48126a73082SReid Kleckner if (!Subtarget.is64Bit()) 48226a73082SReid Kleckner // This doesn't have SDLoc associated with it, but is not really the 48326a73082SReid Kleckner // same as a Register. 48426a73082SReid Kleckner return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), 48526a73082SReid Kleckner getPointerTy(DAG.getDataLayout())); 48626a73082SReid Kleckner return Table; 48726a73082SReid Kleckner } 48826a73082SReid Kleckner 48926a73082SReid Kleckner /// This returns the relocation base for the given PIC jumptable, 49026a73082SReid Kleckner /// the same as getPICJumpTableRelocBase, but as an MCExpr. 49126a73082SReid Kleckner const MCExpr *X86TargetLowering:: 49226a73082SReid Kleckner getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, 49326a73082SReid Kleckner MCContext &Ctx) const { 49426a73082SReid Kleckner // X86-64 uses RIP relative addressing based on the jump table label. 4952a2f02e1SArthur Eubanks if (Subtarget.isPICStyleRIPRel() || 4962a2f02e1SArthur Eubanks (Subtarget.is64Bit() && 4972a2f02e1SArthur Eubanks getTargetMachine().getCodeModel() == CodeModel::Large)) 49826a73082SReid Kleckner return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 49926a73082SReid Kleckner 50026a73082SReid Kleckner // Otherwise, the reference is relative to the PIC base. 50126a73082SReid Kleckner return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); 50226a73082SReid Kleckner } 50326a73082SReid Kleckner 50426a73082SReid Kleckner std::pair<const TargetRegisterClass *, uint8_t> 50526a73082SReid Kleckner X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, 50626a73082SReid Kleckner MVT VT) const { 50726a73082SReid Kleckner const TargetRegisterClass *RRC = nullptr; 50826a73082SReid Kleckner uint8_t Cost = 1; 50926a73082SReid Kleckner switch (VT.SimpleTy) { 51026a73082SReid Kleckner default: 51126a73082SReid Kleckner return TargetLowering::findRepresentativeClass(TRI, VT); 51226a73082SReid Kleckner case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: 51326a73082SReid Kleckner RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; 51426a73082SReid Kleckner break; 51526a73082SReid Kleckner case MVT::x86mmx: 51626a73082SReid Kleckner RRC = &X86::VR64RegClass; 51726a73082SReid Kleckner break; 51826a73082SReid Kleckner case MVT::f32: case MVT::f64: 51926a73082SReid Kleckner case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 52026a73082SReid Kleckner case MVT::v4f32: case MVT::v2f64: 52126a73082SReid Kleckner case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: 52226a73082SReid Kleckner case MVT::v8f32: case MVT::v4f64: 52326a73082SReid Kleckner case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: 52426a73082SReid Kleckner case MVT::v16f32: case MVT::v8f64: 52526a73082SReid Kleckner RRC = &X86::VR128XRegClass; 52626a73082SReid Kleckner break; 52726a73082SReid Kleckner } 52826a73082SReid Kleckner return std::make_pair(RRC, Cost); 52926a73082SReid Kleckner } 53026a73082SReid Kleckner 53126a73082SReid Kleckner unsigned X86TargetLowering::getAddressSpace() const { 53226a73082SReid Kleckner if (Subtarget.is64Bit()) 5336611efdfSCraig Topper return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? X86AS::GS 5346611efdfSCraig Topper : X86AS::FS; 5356611efdfSCraig Topper return X86AS::GS; 53626a73082SReid Kleckner } 53726a73082SReid Kleckner 53826a73082SReid Kleckner static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { 53926a73082SReid Kleckner return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || 54026a73082SReid Kleckner (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); 54126a73082SReid Kleckner } 54226a73082SReid Kleckner 54326a73082SReid Kleckner static Constant* SegmentOffset(IRBuilderBase &IRB, 54426a73082SReid Kleckner int Offset, unsigned AddressSpace) { 54526a73082SReid Kleckner return ConstantExpr::getIntToPtr( 54626a73082SReid Kleckner ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), 5474ce7c4a9SBjorn Pettersson IRB.getPtrTy(AddressSpace)); 54826a73082SReid Kleckner } 54926a73082SReid Kleckner 55026a73082SReid Kleckner Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { 55126a73082SReid Kleckner // glibc, bionic, and Fuchsia have a special slot for the stack guard in 55226a73082SReid Kleckner // tcbhead_t; use it instead of the usual global variable (see 55326a73082SReid Kleckner // sysdeps/{i386,x86_64}/nptl/tls.h) 55426a73082SReid Kleckner if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) { 55526a73082SReid Kleckner unsigned AddressSpace = getAddressSpace(); 55626a73082SReid Kleckner 55726a73082SReid Kleckner // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. 55826a73082SReid Kleckner if (Subtarget.isTargetFuchsia()) 55926a73082SReid Kleckner return SegmentOffset(IRB, 0x10, AddressSpace); 56026a73082SReid Kleckner 56126a73082SReid Kleckner Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 56226a73082SReid Kleckner // Specially, some users may customize the base reg and offset. 56326a73082SReid Kleckner int Offset = M->getStackProtectorGuardOffset(); 56426a73082SReid Kleckner // If we don't set -stack-protector-guard-offset value: 56526a73082SReid Kleckner // %fs:0x28, unless we're using a Kernel code model, in which case 56626a73082SReid Kleckner // it's %gs:0x28. gs:0x14 on i386. 56726a73082SReid Kleckner if (Offset == INT_MAX) 56826a73082SReid Kleckner Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; 56926a73082SReid Kleckner 57026a73082SReid Kleckner StringRef GuardReg = M->getStackProtectorGuardReg(); 57126a73082SReid Kleckner if (GuardReg == "fs") 57226a73082SReid Kleckner AddressSpace = X86AS::FS; 57326a73082SReid Kleckner else if (GuardReg == "gs") 57426a73082SReid Kleckner AddressSpace = X86AS::GS; 57526a73082SReid Kleckner 57626a73082SReid Kleckner // Use symbol guard if user specify. 57726a73082SReid Kleckner StringRef GuardSymb = M->getStackProtectorGuardSymbol(); 57826a73082SReid Kleckner if (!GuardSymb.empty()) { 57926a73082SReid Kleckner GlobalVariable *GV = M->getGlobalVariable(GuardSymb); 58026a73082SReid Kleckner if (!GV) { 58126a73082SReid Kleckner Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext()) 58226a73082SReid Kleckner : Type::getInt32Ty(M->getContext()); 58326a73082SReid Kleckner GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 58426a73082SReid Kleckner nullptr, GuardSymb, nullptr, 58526a73082SReid Kleckner GlobalValue::NotThreadLocal, AddressSpace); 58626a73082SReid Kleckner if (!Subtarget.isTargetDarwin()) 58726a73082SReid Kleckner GV->setDSOLocal(M->getDirectAccessExternalData()); 58826a73082SReid Kleckner } 58926a73082SReid Kleckner return GV; 59026a73082SReid Kleckner } 59126a73082SReid Kleckner 59226a73082SReid Kleckner return SegmentOffset(IRB, Offset, AddressSpace); 59326a73082SReid Kleckner } 59426a73082SReid Kleckner return TargetLowering::getIRStackGuard(IRB); 59526a73082SReid Kleckner } 59626a73082SReid Kleckner 59726a73082SReid Kleckner void X86TargetLowering::insertSSPDeclarations(Module &M) const { 59826a73082SReid Kleckner // MSVC CRT provides functionalities for stack protection. 59926a73082SReid Kleckner if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || 60026a73082SReid Kleckner Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { 60126a73082SReid Kleckner // MSVC CRT has a global variable holding security cookie. 60226a73082SReid Kleckner M.getOrInsertGlobal("__security_cookie", 6037b9d73c2SPaulo Matos PointerType::getUnqual(M.getContext())); 60426a73082SReid Kleckner 60526a73082SReid Kleckner // MSVC CRT has a function to validate security cookie. 60626a73082SReid Kleckner FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( 60726a73082SReid Kleckner "__security_check_cookie", Type::getVoidTy(M.getContext()), 6087b9d73c2SPaulo Matos PointerType::getUnqual(M.getContext())); 60926a73082SReid Kleckner if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { 61026a73082SReid Kleckner F->setCallingConv(CallingConv::X86_FastCall); 61126a73082SReid Kleckner F->addParamAttr(0, Attribute::AttrKind::InReg); 61226a73082SReid Kleckner } 61326a73082SReid Kleckner return; 61426a73082SReid Kleckner } 61526a73082SReid Kleckner 61626a73082SReid Kleckner StringRef GuardMode = M.getStackProtectorGuard(); 61726a73082SReid Kleckner 61826a73082SReid Kleckner // glibc, bionic, and Fuchsia have a special slot for the stack guard. 61926a73082SReid Kleckner if ((GuardMode == "tls" || GuardMode.empty()) && 62026a73082SReid Kleckner hasStackGuardSlotTLS(Subtarget.getTargetTriple())) 62126a73082SReid Kleckner return; 62226a73082SReid Kleckner TargetLowering::insertSSPDeclarations(M); 62326a73082SReid Kleckner } 62426a73082SReid Kleckner 62526a73082SReid Kleckner Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { 62626a73082SReid Kleckner // MSVC CRT has a global variable holding security cookie. 62726a73082SReid Kleckner if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || 62826a73082SReid Kleckner Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { 62926a73082SReid Kleckner return M.getGlobalVariable("__security_cookie"); 63026a73082SReid Kleckner } 63126a73082SReid Kleckner return TargetLowering::getSDagStackGuard(M); 63226a73082SReid Kleckner } 63326a73082SReid Kleckner 63426a73082SReid Kleckner Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { 63526a73082SReid Kleckner // MSVC CRT has a function to validate security cookie. 63626a73082SReid Kleckner if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || 63726a73082SReid Kleckner Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { 63826a73082SReid Kleckner return M.getFunction("__security_check_cookie"); 63926a73082SReid Kleckner } 64026a73082SReid Kleckner return TargetLowering::getSSPStackGuardCheck(M); 64126a73082SReid Kleckner } 64226a73082SReid Kleckner 64326a73082SReid Kleckner Value * 64426a73082SReid Kleckner X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { 64526a73082SReid Kleckner // Android provides a fixed TLS slot for the SafeStack pointer. See the 64626a73082SReid Kleckner // definition of TLS_SLOT_SAFESTACK in 64726a73082SReid Kleckner // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h 64826a73082SReid Kleckner if (Subtarget.isTargetAndroid()) { 64926a73082SReid Kleckner // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: 65026a73082SReid Kleckner // %gs:0x24 on i386 65126a73082SReid Kleckner int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; 65226a73082SReid Kleckner return SegmentOffset(IRB, Offset, getAddressSpace()); 65326a73082SReid Kleckner } 65426a73082SReid Kleckner 65526a73082SReid Kleckner // Fuchsia is similar. 65626a73082SReid Kleckner if (Subtarget.isTargetFuchsia()) { 65726a73082SReid Kleckner // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value. 65826a73082SReid Kleckner return SegmentOffset(IRB, 0x18, getAddressSpace()); 65926a73082SReid Kleckner } 66026a73082SReid Kleckner 66126a73082SReid Kleckner return TargetLowering::getSafeStackPointerLocation(IRB); 66226a73082SReid Kleckner } 66326a73082SReid Kleckner 66426a73082SReid Kleckner //===----------------------------------------------------------------------===// 66526a73082SReid Kleckner // Return Value Calling Convention Implementation 66626a73082SReid Kleckner //===----------------------------------------------------------------------===// 66726a73082SReid Kleckner 66826a73082SReid Kleckner bool X86TargetLowering::CanLowerReturn( 66926a73082SReid Kleckner CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, 670754ed95bSyingopq const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context, 671754ed95bSyingopq const Type *RetTy) const { 67226a73082SReid Kleckner SmallVector<CCValAssign, 16> RVLocs; 67326a73082SReid Kleckner CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 67426a73082SReid Kleckner return CCInfo.CheckReturn(Outs, RetCC_X86); 67526a73082SReid Kleckner } 67626a73082SReid Kleckner 67726a73082SReid Kleckner const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { 67826a73082SReid Kleckner static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; 67926a73082SReid Kleckner return ScratchRegs; 68026a73082SReid Kleckner } 68126a73082SReid Kleckner 68226a73082SReid Kleckner ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const { 6833e40c96dSAtariDreams static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR}; 68426a73082SReid Kleckner return RCRegs; 68526a73082SReid Kleckner } 68626a73082SReid Kleckner 68726a73082SReid Kleckner /// Lowers masks values (v*i1) to the local register values 68826a73082SReid Kleckner /// \returns DAG node after lowering to register type 68926a73082SReid Kleckner static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, 69026a73082SReid Kleckner const SDLoc &DL, SelectionDAG &DAG) { 69126a73082SReid Kleckner EVT ValVT = ValArg.getValueType(); 69226a73082SReid Kleckner 69326a73082SReid Kleckner if (ValVT == MVT::v1i1) 69426a73082SReid Kleckner return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg, 69526a73082SReid Kleckner DAG.getIntPtrConstant(0, DL)); 69626a73082SReid Kleckner 69726a73082SReid Kleckner if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || 69826a73082SReid Kleckner (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { 69926a73082SReid Kleckner // Two stage lowering might be required 70026a73082SReid Kleckner // bitcast: v8i1 -> i8 / v16i1 -> i16 70126a73082SReid Kleckner // anyextend: i8 -> i32 / i16 -> i32 70226a73082SReid Kleckner EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16; 70326a73082SReid Kleckner SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg); 70426a73082SReid Kleckner if (ValLoc == MVT::i32) 70526a73082SReid Kleckner ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy); 70626a73082SReid Kleckner return ValToCopy; 70726a73082SReid Kleckner } 70826a73082SReid Kleckner 70926a73082SReid Kleckner if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || 71026a73082SReid Kleckner (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { 71126a73082SReid Kleckner // One stage lowering is required 71226a73082SReid Kleckner // bitcast: v32i1 -> i32 / v64i1 -> i64 71326a73082SReid Kleckner return DAG.getBitcast(ValLoc, ValArg); 71426a73082SReid Kleckner } 71526a73082SReid Kleckner 71626a73082SReid Kleckner return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg); 71726a73082SReid Kleckner } 71826a73082SReid Kleckner 71926a73082SReid Kleckner /// Breaks v64i1 value into two registers and adds the new node to the DAG 72026a73082SReid Kleckner static void Passv64i1ArgInRegs( 72126a73082SReid Kleckner const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, 72226a73082SReid Kleckner SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA, 72326a73082SReid Kleckner CCValAssign &NextVA, const X86Subtarget &Subtarget) { 72426a73082SReid Kleckner assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); 72526a73082SReid Kleckner assert(Subtarget.is32Bit() && "Expecting 32 bit target"); 72626a73082SReid Kleckner assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"); 72726a73082SReid Kleckner assert(VA.isRegLoc() && NextVA.isRegLoc() && 72826a73082SReid Kleckner "The value should reside in two registers"); 72926a73082SReid Kleckner 73026a73082SReid Kleckner // Before splitting the value we cast it to i64 73126a73082SReid Kleckner Arg = DAG.getBitcast(MVT::i64, Arg); 73226a73082SReid Kleckner 73326a73082SReid Kleckner // Splitting the value into two i32 types 73426a73082SReid Kleckner SDValue Lo, Hi; 73526a73082SReid Kleckner std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32); 73626a73082SReid Kleckner 73726a73082SReid Kleckner // Attach the two i32 types into corresponding registers 73826a73082SReid Kleckner RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); 73926a73082SReid Kleckner RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi)); 74026a73082SReid Kleckner } 74126a73082SReid Kleckner 74226a73082SReid Kleckner SDValue 74326a73082SReid Kleckner X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 74426a73082SReid Kleckner bool isVarArg, 74526a73082SReid Kleckner const SmallVectorImpl<ISD::OutputArg> &Outs, 74626a73082SReid Kleckner const SmallVectorImpl<SDValue> &OutVals, 74726a73082SReid Kleckner const SDLoc &dl, SelectionDAG &DAG) const { 74826a73082SReid Kleckner MachineFunction &MF = DAG.getMachineFunction(); 74926a73082SReid Kleckner X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 75026a73082SReid Kleckner 75126a73082SReid Kleckner // In some cases we need to disable registers from the default CSR list. 75226a73082SReid Kleckner // For example, when they are used as return registers (preserve_* and X86's 75326a73082SReid Kleckner // regcall) or for argument passing (X86's regcall). 75426a73082SReid Kleckner bool ShouldDisableCalleeSavedRegister = 75526a73082SReid Kleckner shouldDisableRetRegFromCSR(CallConv) || 75626a73082SReid Kleckner MF.getFunction().hasFnAttribute("no_caller_saved_registers"); 75726a73082SReid Kleckner 75826a73082SReid Kleckner if (CallConv == CallingConv::X86_INTR && !Outs.empty()) 75926a73082SReid Kleckner report_fatal_error("X86 interrupts may not return any value"); 76026a73082SReid Kleckner 76126a73082SReid Kleckner SmallVector<CCValAssign, 16> RVLocs; 76226a73082SReid Kleckner CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); 76326a73082SReid Kleckner CCInfo.AnalyzeReturn(Outs, RetCC_X86); 76426a73082SReid Kleckner 76526a73082SReid Kleckner SmallVector<std::pair<Register, SDValue>, 4> RetVals; 76626a73082SReid Kleckner for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E; 76726a73082SReid Kleckner ++I, ++OutsIndex) { 76826a73082SReid Kleckner CCValAssign &VA = RVLocs[I]; 76926a73082SReid Kleckner assert(VA.isRegLoc() && "Can only return in registers!"); 77026a73082SReid Kleckner 77126a73082SReid Kleckner // Add the register to the CalleeSaveDisableRegs list. 77226a73082SReid Kleckner if (ShouldDisableCalleeSavedRegister) 77326a73082SReid Kleckner MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); 77426a73082SReid Kleckner 77526a73082SReid Kleckner SDValue ValToCopy = OutVals[OutsIndex]; 77626a73082SReid Kleckner EVT ValVT = ValToCopy.getValueType(); 77726a73082SReid Kleckner 77826a73082SReid Kleckner // Promote values to the appropriate types. 77926a73082SReid Kleckner if (VA.getLocInfo() == CCValAssign::SExt) 78026a73082SReid Kleckner ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); 78126a73082SReid Kleckner else if (VA.getLocInfo() == CCValAssign::ZExt) 78226a73082SReid Kleckner ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); 78326a73082SReid Kleckner else if (VA.getLocInfo() == CCValAssign::AExt) { 78426a73082SReid Kleckner if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) 78526a73082SReid Kleckner ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG); 78626a73082SReid Kleckner else 78726a73082SReid Kleckner ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); 78826a73082SReid Kleckner } 78926a73082SReid Kleckner else if (VA.getLocInfo() == CCValAssign::BCvt) 79026a73082SReid Kleckner ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); 79126a73082SReid Kleckner 79226a73082SReid Kleckner assert(VA.getLocInfo() != CCValAssign::FPExt && 79326a73082SReid Kleckner "Unexpected FP-extend for return value."); 79426a73082SReid Kleckner 79526a73082SReid Kleckner // Report an error if we have attempted to return a value via an XMM 79626a73082SReid Kleckner // register and SSE was disabled. 79726a73082SReid Kleckner if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { 79826a73082SReid Kleckner errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); 79926a73082SReid Kleckner VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 80026a73082SReid Kleckner } else if (!Subtarget.hasSSE2() && 80126a73082SReid Kleckner X86::FR64XRegClass.contains(VA.getLocReg()) && 80226a73082SReid Kleckner ValVT == MVT::f64) { 80326a73082SReid Kleckner // When returning a double via an XMM register, report an error if SSE2 is 80426a73082SReid Kleckner // not enabled. 80526a73082SReid Kleckner errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); 80626a73082SReid Kleckner VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 80726a73082SReid Kleckner } 80826a73082SReid Kleckner 80926a73082SReid Kleckner // Returns in ST0/ST1 are handled specially: these are pushed as operands to 81026a73082SReid Kleckner // the RET instruction and handled by the FP Stackifier. 81126a73082SReid Kleckner if (VA.getLocReg() == X86::FP0 || 81226a73082SReid Kleckner VA.getLocReg() == X86::FP1) { 81326a73082SReid Kleckner // If this is a copy from an xmm register to ST(0), use an FPExtend to 81426a73082SReid Kleckner // change the value to the FP stack register class. 81526a73082SReid Kleckner if (isScalarFPTypeInSSEReg(VA.getValVT())) 81626a73082SReid Kleckner ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); 81726a73082SReid Kleckner RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); 81826a73082SReid Kleckner // Don't emit a copytoreg. 81926a73082SReid Kleckner continue; 82026a73082SReid Kleckner } 82126a73082SReid Kleckner 82226a73082SReid Kleckner // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 82326a73082SReid Kleckner // which is returned in RAX / RDX. 82426a73082SReid Kleckner if (Subtarget.is64Bit()) { 82526a73082SReid Kleckner if (ValVT == MVT::x86mmx) { 82626a73082SReid Kleckner if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { 82726a73082SReid Kleckner ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); 82826a73082SReid Kleckner ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, 82926a73082SReid Kleckner ValToCopy); 83026a73082SReid Kleckner // If we don't have SSE2 available, convert to v4f32 so the generated 83126a73082SReid Kleckner // register is legal. 83226a73082SReid Kleckner if (!Subtarget.hasSSE2()) 83326a73082SReid Kleckner ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); 83426a73082SReid Kleckner } 83526a73082SReid Kleckner } 83626a73082SReid Kleckner } 83726a73082SReid Kleckner 83826a73082SReid Kleckner if (VA.needsCustom()) { 83926a73082SReid Kleckner assert(VA.getValVT() == MVT::v64i1 && 84026a73082SReid Kleckner "Currently the only custom case is when we split v64i1 to 2 regs"); 84126a73082SReid Kleckner 84226a73082SReid Kleckner Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I], 84326a73082SReid Kleckner Subtarget); 84426a73082SReid Kleckner 84526a73082SReid Kleckner // Add the second register to the CalleeSaveDisableRegs list. 84626a73082SReid Kleckner if (ShouldDisableCalleeSavedRegister) 84726a73082SReid Kleckner MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); 84826a73082SReid Kleckner } else { 84926a73082SReid Kleckner RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); 85026a73082SReid Kleckner } 85126a73082SReid Kleckner } 85226a73082SReid Kleckner 85326a73082SReid Kleckner SDValue Glue; 85426a73082SReid Kleckner SmallVector<SDValue, 6> RetOps; 85526a73082SReid Kleckner RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 85626a73082SReid Kleckner // Operand #1 = Bytes To Pop 85726a73082SReid Kleckner RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, 85826a73082SReid Kleckner MVT::i32)); 85926a73082SReid Kleckner 86026a73082SReid Kleckner // Copy the result values into the output registers. 86126a73082SReid Kleckner for (auto &RetVal : RetVals) { 86226a73082SReid Kleckner if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) { 86326a73082SReid Kleckner RetOps.push_back(RetVal.second); 86426a73082SReid Kleckner continue; // Don't emit a copytoreg. 86526a73082SReid Kleckner } 86626a73082SReid Kleckner 86726a73082SReid Kleckner Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue); 86826a73082SReid Kleckner Glue = Chain.getValue(1); 86926a73082SReid Kleckner RetOps.push_back( 87026a73082SReid Kleckner DAG.getRegister(RetVal.first, RetVal.second.getValueType())); 87126a73082SReid Kleckner } 87226a73082SReid Kleckner 87326a73082SReid Kleckner // Swift calling convention does not require we copy the sret argument 87426a73082SReid Kleckner // into %rax/%eax for the return, and SRetReturnReg is not set for Swift. 87526a73082SReid Kleckner 87626a73082SReid Kleckner // All x86 ABIs require that for returning structs by value we copy 87726a73082SReid Kleckner // the sret argument into %rax/%eax (depending on ABI) for the return. 87826a73082SReid Kleckner // We saved the argument into a virtual register in the entry block, 87926a73082SReid Kleckner // so now we copy the value out and into %rax/%eax. 88026a73082SReid Kleckner // 88126a73082SReid Kleckner // Checking Function.hasStructRetAttr() here is insufficient because the IR 88226a73082SReid Kleckner // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is 88326a73082SReid Kleckner // false, then an sret argument may be implicitly inserted in the SelDAG. In 88426a73082SReid Kleckner // either case FuncInfo->setSRetReturnReg() will have been called. 88526a73082SReid Kleckner if (Register SRetReg = FuncInfo->getSRetReturnReg()) { 88626a73082SReid Kleckner // When we have both sret and another return value, we should use the 88726a73082SReid Kleckner // original Chain stored in RetOps[0], instead of the current Chain updated 88826a73082SReid Kleckner // in the above loop. If we only have sret, RetOps[0] equals to Chain. 88926a73082SReid Kleckner 89026a73082SReid Kleckner // For the case of sret and another return value, we have 89126a73082SReid Kleckner // Chain_0 at the function entry 89226a73082SReid Kleckner // Chain_1 = getCopyToReg(Chain_0) in the above loop 89326a73082SReid Kleckner // If we use Chain_1 in getCopyFromReg, we will have 89426a73082SReid Kleckner // Val = getCopyFromReg(Chain_1) 89526a73082SReid Kleckner // Chain_2 = getCopyToReg(Chain_1, Val) from below 89626a73082SReid Kleckner 89726a73082SReid Kleckner // getCopyToReg(Chain_0) will be glued together with 89826a73082SReid Kleckner // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be 89926a73082SReid Kleckner // in Unit B, and we will have cyclic dependency between Unit A and Unit B: 90026a73082SReid Kleckner // Data dependency from Unit B to Unit A due to usage of Val in 90126a73082SReid Kleckner // getCopyToReg(Chain_1, Val) 90226a73082SReid Kleckner // Chain dependency from Unit A to Unit B 90326a73082SReid Kleckner 90426a73082SReid Kleckner // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg. 90526a73082SReid Kleckner SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, 90626a73082SReid Kleckner getPointerTy(MF.getDataLayout())); 90726a73082SReid Kleckner 90826a73082SReid Kleckner Register RetValReg 90926a73082SReid Kleckner = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? 91026a73082SReid Kleckner X86::RAX : X86::EAX; 91126a73082SReid Kleckner Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue); 91226a73082SReid Kleckner Glue = Chain.getValue(1); 91326a73082SReid Kleckner 91426a73082SReid Kleckner // RAX/EAX now acts like a return value. 91526a73082SReid Kleckner RetOps.push_back( 91626a73082SReid Kleckner DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); 91726a73082SReid Kleckner 91826a73082SReid Kleckner // Add the returned register to the CalleeSaveDisableRegs list. Don't do 91926a73082SReid Kleckner // this however for preserve_most/preserve_all to minimize the number of 92026a73082SReid Kleckner // callee-saved registers for these CCs. 92126a73082SReid Kleckner if (ShouldDisableCalleeSavedRegister && 92226a73082SReid Kleckner CallConv != CallingConv::PreserveAll && 92326a73082SReid Kleckner CallConv != CallingConv::PreserveMost) 92426a73082SReid Kleckner MF.getRegInfo().disableCalleeSavedRegister(RetValReg); 92526a73082SReid Kleckner } 92626a73082SReid Kleckner 92726a73082SReid Kleckner const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); 92826a73082SReid Kleckner const MCPhysReg *I = 92926a73082SReid Kleckner TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 93026a73082SReid Kleckner if (I) { 93126a73082SReid Kleckner for (; *I; ++I) { 93226a73082SReid Kleckner if (X86::GR64RegClass.contains(*I)) 93326a73082SReid Kleckner RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 93426a73082SReid Kleckner else 93526a73082SReid Kleckner llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 93626a73082SReid Kleckner } 93726a73082SReid Kleckner } 93826a73082SReid Kleckner 93926a73082SReid Kleckner RetOps[0] = Chain; // Update chain. 94026a73082SReid Kleckner 94126a73082SReid Kleckner // Add the glue if we have it. 94226a73082SReid Kleckner if (Glue.getNode()) 94326a73082SReid Kleckner RetOps.push_back(Glue); 94426a73082SReid Kleckner 94526a73082SReid Kleckner X86ISD::NodeType opcode = X86ISD::RET_GLUE; 94626a73082SReid Kleckner if (CallConv == CallingConv::X86_INTR) 94726a73082SReid Kleckner opcode = X86ISD::IRET; 94826a73082SReid Kleckner return DAG.getNode(opcode, dl, MVT::Other, RetOps); 94926a73082SReid Kleckner } 95026a73082SReid Kleckner 95126a73082SReid Kleckner bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 95226a73082SReid Kleckner if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0)) 95326a73082SReid Kleckner return false; 95426a73082SReid Kleckner 95526a73082SReid Kleckner SDValue TCChain = Chain; 956bd261eccSCraig Topper SDNode *Copy = *N->user_begin(); 95726a73082SReid Kleckner if (Copy->getOpcode() == ISD::CopyToReg) { 95826a73082SReid Kleckner // If the copy has a glue operand, we conservatively assume it isn't safe to 95926a73082SReid Kleckner // perform a tail call. 96026a73082SReid Kleckner if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 96126a73082SReid Kleckner return false; 96226a73082SReid Kleckner TCChain = Copy->getOperand(0); 96326a73082SReid Kleckner } else if (Copy->getOpcode() != ISD::FP_EXTEND) 96426a73082SReid Kleckner return false; 96526a73082SReid Kleckner 96626a73082SReid Kleckner bool HasRet = false; 967104ad925SCraig Topper for (const SDNode *U : Copy->users()) { 96826a73082SReid Kleckner if (U->getOpcode() != X86ISD::RET_GLUE) 96926a73082SReid Kleckner return false; 97026a73082SReid Kleckner // If we are returning more than one value, we can definitely 97126a73082SReid Kleckner // not make a tail call see PR19530 97226a73082SReid Kleckner if (U->getNumOperands() > 4) 97326a73082SReid Kleckner return false; 97426a73082SReid Kleckner if (U->getNumOperands() == 4 && 97526a73082SReid Kleckner U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue) 97626a73082SReid Kleckner return false; 97726a73082SReid Kleckner HasRet = true; 97826a73082SReid Kleckner } 97926a73082SReid Kleckner 98026a73082SReid Kleckner if (!HasRet) 98126a73082SReid Kleckner return false; 98226a73082SReid Kleckner 98326a73082SReid Kleckner Chain = TCChain; 98426a73082SReid Kleckner return true; 98526a73082SReid Kleckner } 98626a73082SReid Kleckner 98726a73082SReid Kleckner EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, 98826a73082SReid Kleckner ISD::NodeType ExtendKind) const { 98926a73082SReid Kleckner MVT ReturnMVT = MVT::i32; 99026a73082SReid Kleckner 99126a73082SReid Kleckner bool Darwin = Subtarget.getTargetTriple().isOSDarwin(); 99226a73082SReid Kleckner if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) { 99326a73082SReid Kleckner // The ABI does not require i1, i8 or i16 to be extended. 99426a73082SReid Kleckner // 99526a73082SReid Kleckner // On Darwin, there is code in the wild relying on Clang's old behaviour of 99626a73082SReid Kleckner // always extending i8/i16 return values, so keep doing that for now. 99726a73082SReid Kleckner // (PR26665). 99826a73082SReid Kleckner ReturnMVT = MVT::i8; 99926a73082SReid Kleckner } 100026a73082SReid Kleckner 100126a73082SReid Kleckner EVT MinVT = getRegisterType(Context, ReturnMVT); 100226a73082SReid Kleckner return VT.bitsLT(MinVT) ? MinVT : VT; 100326a73082SReid Kleckner } 100426a73082SReid Kleckner 100526a73082SReid Kleckner /// Reads two 32 bit registers and creates a 64 bit mask value. 100626a73082SReid Kleckner /// \param VA The current 32 bit value that need to be assigned. 100726a73082SReid Kleckner /// \param NextVA The next 32 bit value that need to be assigned. 100826a73082SReid Kleckner /// \param Root The parent DAG node. 100926a73082SReid Kleckner /// \param [in,out] InGlue Represents SDvalue in the parent DAG node for 101026a73082SReid Kleckner /// glue purposes. In the case the DAG is already using 101126a73082SReid Kleckner /// physical register instead of virtual, we should glue 101226a73082SReid Kleckner /// our new SDValue to InGlue SDvalue. 101326a73082SReid Kleckner /// \return a new SDvalue of size 64bit. 101426a73082SReid Kleckner static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, 101526a73082SReid Kleckner SDValue &Root, SelectionDAG &DAG, 101626a73082SReid Kleckner const SDLoc &DL, const X86Subtarget &Subtarget, 101726a73082SReid Kleckner SDValue *InGlue = nullptr) { 101826a73082SReid Kleckner assert((Subtarget.hasBWI()) && "Expected AVX512BW target!"); 101926a73082SReid Kleckner assert(Subtarget.is32Bit() && "Expecting 32 bit target"); 102026a73082SReid Kleckner assert(VA.getValVT() == MVT::v64i1 && 102126a73082SReid Kleckner "Expecting first location of 64 bit width type"); 102226a73082SReid Kleckner assert(NextVA.getValVT() == VA.getValVT() && 102326a73082SReid Kleckner "The locations should have the same type"); 102426a73082SReid Kleckner assert(VA.isRegLoc() && NextVA.isRegLoc() && 102526a73082SReid Kleckner "The values should reside in two registers"); 102626a73082SReid Kleckner 102726a73082SReid Kleckner SDValue Lo, Hi; 102826a73082SReid Kleckner SDValue ArgValueLo, ArgValueHi; 102926a73082SReid Kleckner 103026a73082SReid Kleckner MachineFunction &MF = DAG.getMachineFunction(); 103126a73082SReid Kleckner const TargetRegisterClass *RC = &X86::GR32RegClass; 103226a73082SReid Kleckner 103326a73082SReid Kleckner // Read a 32 bit value from the registers. 103426a73082SReid Kleckner if (nullptr == InGlue) { 103526a73082SReid Kleckner // When no physical register is present, 103626a73082SReid Kleckner // create an intermediate virtual register. 103726a73082SReid Kleckner Register Reg = MF.addLiveIn(VA.getLocReg(), RC); 103826a73082SReid Kleckner ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32); 103926a73082SReid Kleckner Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 104026a73082SReid Kleckner ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32); 104126a73082SReid Kleckner } else { 104226a73082SReid Kleckner // When a physical register is available read the value from it and glue 104326a73082SReid Kleckner // the reads together. 104426a73082SReid Kleckner ArgValueLo = 104526a73082SReid Kleckner DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue); 104626a73082SReid Kleckner *InGlue = ArgValueLo.getValue(2); 104726a73082SReid Kleckner ArgValueHi = 104826a73082SReid Kleckner DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue); 104926a73082SReid Kleckner *InGlue = ArgValueHi.getValue(2); 105026a73082SReid Kleckner } 105126a73082SReid Kleckner 105226a73082SReid Kleckner // Convert the i32 type into v32i1 type. 105326a73082SReid Kleckner Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo); 105426a73082SReid Kleckner 105526a73082SReid Kleckner // Convert the i32 type into v32i1 type. 105626a73082SReid Kleckner Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi); 105726a73082SReid Kleckner 105826a73082SReid Kleckner // Concatenate the two values together. 105926a73082SReid Kleckner return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi); 106026a73082SReid Kleckner } 106126a73082SReid Kleckner 106226a73082SReid Kleckner /// The function will lower a register of various sizes (8/16/32/64) 106326a73082SReid Kleckner /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1) 106426a73082SReid Kleckner /// \returns a DAG node contains the operand after lowering to mask type. 106526a73082SReid Kleckner static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, 106626a73082SReid Kleckner const EVT &ValLoc, const SDLoc &DL, 106726a73082SReid Kleckner SelectionDAG &DAG) { 106826a73082SReid Kleckner SDValue ValReturned = ValArg; 106926a73082SReid Kleckner 107026a73082SReid Kleckner if (ValVT == MVT::v1i1) 107126a73082SReid Kleckner return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned); 107226a73082SReid Kleckner 107326a73082SReid Kleckner if (ValVT == MVT::v64i1) { 107426a73082SReid Kleckner // In 32 bit machine, this case is handled by getv64i1Argument 107526a73082SReid Kleckner assert(ValLoc == MVT::i64 && "Expecting only i64 locations"); 107626a73082SReid Kleckner // In 64 bit machine, There is no need to truncate the value only bitcast 107726a73082SReid Kleckner } else { 107826a73082SReid Kleckner MVT MaskLenVT; 107926a73082SReid Kleckner switch (ValVT.getSimpleVT().SimpleTy) { 108026a73082SReid Kleckner case MVT::v8i1: 108126a73082SReid Kleckner MaskLenVT = MVT::i8; 108226a73082SReid Kleckner break; 108326a73082SReid Kleckner case MVT::v16i1: 108426a73082SReid Kleckner MaskLenVT = MVT::i16; 108526a73082SReid Kleckner break; 108626a73082SReid Kleckner case MVT::v32i1: 108726a73082SReid Kleckner MaskLenVT = MVT::i32; 108826a73082SReid Kleckner break; 108926a73082SReid Kleckner default: 109026a73082SReid Kleckner llvm_unreachable("Expecting a vector of i1 types"); 109126a73082SReid Kleckner } 109226a73082SReid Kleckner 109326a73082SReid Kleckner ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned); 109426a73082SReid Kleckner } 109526a73082SReid Kleckner return DAG.getBitcast(ValVT, ValReturned); 109626a73082SReid Kleckner } 109726a73082SReid Kleckner 109826a73082SReid Kleckner /// Lower the result values of a call into the 109926a73082SReid Kleckner /// appropriate copies out of appropriate physical registers. 110026a73082SReid Kleckner /// 110126a73082SReid Kleckner SDValue X86TargetLowering::LowerCallResult( 110226a73082SReid Kleckner SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, 110326a73082SReid Kleckner const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 110426a73082SReid Kleckner SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 110526a73082SReid Kleckner uint32_t *RegMask) const { 110626a73082SReid Kleckner 110726a73082SReid Kleckner const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 110826a73082SReid Kleckner // Assign locations to each value returned by this call. 110926a73082SReid Kleckner SmallVector<CCValAssign, 16> RVLocs; 111026a73082SReid Kleckner CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 111126a73082SReid Kleckner *DAG.getContext()); 111226a73082SReid Kleckner CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 111326a73082SReid Kleckner 111426a73082SReid Kleckner // Copy all of the result registers out of their specified physreg. 111526a73082SReid Kleckner for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E; 111626a73082SReid Kleckner ++I, ++InsIndex) { 111726a73082SReid Kleckner CCValAssign &VA = RVLocs[I]; 111826a73082SReid Kleckner EVT CopyVT = VA.getLocVT(); 111926a73082SReid Kleckner 112026a73082SReid Kleckner // In some calling conventions we need to remove the used registers 112126a73082SReid Kleckner // from the register mask. 112226a73082SReid Kleckner if (RegMask) { 112326a73082SReid Kleckner for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg())) 112426a73082SReid Kleckner RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); 112526a73082SReid Kleckner } 112626a73082SReid Kleckner 112726a73082SReid Kleckner // Report an error if there was an attempt to return FP values via XMM 112826a73082SReid Kleckner // registers. 112926a73082SReid Kleckner if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { 113026a73082SReid Kleckner errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); 113126a73082SReid Kleckner if (VA.getLocReg() == X86::XMM1) 113226a73082SReid Kleckner VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts. 113326a73082SReid Kleckner else 113426a73082SReid Kleckner VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 113526a73082SReid Kleckner } else if (!Subtarget.hasSSE2() && 113626a73082SReid Kleckner X86::FR64XRegClass.contains(VA.getLocReg()) && 113726a73082SReid Kleckner CopyVT == MVT::f64) { 113826a73082SReid Kleckner errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); 113926a73082SReid Kleckner if (VA.getLocReg() == X86::XMM1) 114026a73082SReid Kleckner VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts. 114126a73082SReid Kleckner else 114226a73082SReid Kleckner VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. 114326a73082SReid Kleckner } 114426a73082SReid Kleckner 114526a73082SReid Kleckner // If we prefer to use the value in xmm registers, copy it out as f80 and 114626a73082SReid Kleckner // use a truncate to move it from fp stack reg to xmm reg. 114726a73082SReid Kleckner bool RoundAfterCopy = false; 114826a73082SReid Kleckner if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && 114926a73082SReid Kleckner isScalarFPTypeInSSEReg(VA.getValVT())) { 115026a73082SReid Kleckner if (!Subtarget.hasX87()) 115126a73082SReid Kleckner report_fatal_error("X87 register return with X87 disabled"); 115226a73082SReid Kleckner CopyVT = MVT::f80; 115326a73082SReid Kleckner RoundAfterCopy = (CopyVT != VA.getLocVT()); 115426a73082SReid Kleckner } 115526a73082SReid Kleckner 115626a73082SReid Kleckner SDValue Val; 115726a73082SReid Kleckner if (VA.needsCustom()) { 115826a73082SReid Kleckner assert(VA.getValVT() == MVT::v64i1 && 115926a73082SReid Kleckner "Currently the only custom case is when we split v64i1 to 2 regs"); 116026a73082SReid Kleckner Val = 116126a73082SReid Kleckner getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue); 116226a73082SReid Kleckner } else { 116326a73082SReid Kleckner Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue) 116426a73082SReid Kleckner .getValue(1); 116526a73082SReid Kleckner Val = Chain.getValue(0); 116626a73082SReid Kleckner InGlue = Chain.getValue(2); 116726a73082SReid Kleckner } 116826a73082SReid Kleckner 116926a73082SReid Kleckner if (RoundAfterCopy) 117026a73082SReid Kleckner Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, 117126a73082SReid Kleckner // This truncation won't change the value. 117226a73082SReid Kleckner DAG.getIntPtrConstant(1, dl, /*isTarget=*/true)); 117326a73082SReid Kleckner 117426a73082SReid Kleckner if (VA.isExtInLoc()) { 117526a73082SReid Kleckner if (VA.getValVT().isVector() && 117626a73082SReid Kleckner VA.getValVT().getScalarType() == MVT::i1 && 117726a73082SReid Kleckner ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || 117826a73082SReid Kleckner (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { 117926a73082SReid Kleckner // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 118026a73082SReid Kleckner Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG); 118126a73082SReid Kleckner } else 118226a73082SReid Kleckner Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 118326a73082SReid Kleckner } 118426a73082SReid Kleckner 118526a73082SReid Kleckner if (VA.getLocInfo() == CCValAssign::BCvt) 118626a73082SReid Kleckner Val = DAG.getBitcast(VA.getValVT(), Val); 118726a73082SReid Kleckner 118826a73082SReid Kleckner InVals.push_back(Val); 118926a73082SReid Kleckner } 119026a73082SReid Kleckner 119126a73082SReid Kleckner return Chain; 119226a73082SReid Kleckner } 119326a73082SReid Kleckner 119426a73082SReid Kleckner //===----------------------------------------------------------------------===// 119526a73082SReid Kleckner // C & StdCall & Fast Calling Convention implementation 119626a73082SReid Kleckner //===----------------------------------------------------------------------===// 119726a73082SReid Kleckner // StdCall calling convention seems to be standard for many Windows' API 119826a73082SReid Kleckner // routines and around. It differs from C calling convention just a little: 119926a73082SReid Kleckner // callee should clean up the stack, not caller. Symbols should be also 120026a73082SReid Kleckner // decorated in some fancy way :) It doesn't support any vector arguments. 120126a73082SReid Kleckner // For info on fast calling convention see Fast Calling Convention (tail call) 120226a73082SReid Kleckner // implementation LowerX86_32FastCCCallTo. 120326a73082SReid Kleckner 120426a73082SReid Kleckner /// Determines whether Args, either a set of outgoing arguments to a call, or a 120526a73082SReid Kleckner /// set of incoming args of a call, contains an sret pointer that the callee 120626a73082SReid Kleckner /// pops 120726a73082SReid Kleckner template <typename T> 120826a73082SReid Kleckner static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args, 120926a73082SReid Kleckner const X86Subtarget &Subtarget) { 121026a73082SReid Kleckner // Not C++20 (yet), so no concepts available. 121126a73082SReid Kleckner static_assert(std::is_same_v<T, ISD::OutputArg> || 121226a73082SReid Kleckner std::is_same_v<T, ISD::InputArg>, 121326a73082SReid Kleckner "requires ISD::OutputArg or ISD::InputArg"); 121426a73082SReid Kleckner 121526a73082SReid Kleckner // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out 121626a73082SReid Kleckner // for most compilations. 121726a73082SReid Kleckner if (!Subtarget.is32Bit()) 121826a73082SReid Kleckner return false; 121926a73082SReid Kleckner 122026a73082SReid Kleckner if (Args.empty()) 122126a73082SReid Kleckner return false; 122226a73082SReid Kleckner 122326a73082SReid Kleckner // Most calls do not have an sret argument, check the arg next. 122426a73082SReid Kleckner const ISD::ArgFlagsTy &Flags = Args[0].Flags; 122526a73082SReid Kleckner if (!Flags.isSRet() || Flags.isInReg()) 122626a73082SReid Kleckner return false; 122726a73082SReid Kleckner 122826a73082SReid Kleckner // The MSVCabi does not pop the sret. 122926a73082SReid Kleckner if (Subtarget.getTargetTriple().isOSMSVCRT()) 123026a73082SReid Kleckner return false; 123126a73082SReid Kleckner 123226a73082SReid Kleckner // MCUs don't pop the sret 123326a73082SReid Kleckner if (Subtarget.isTargetMCU()) 123426a73082SReid Kleckner return false; 123526a73082SReid Kleckner 123626a73082SReid Kleckner // Callee pops argument 123726a73082SReid Kleckner return true; 123826a73082SReid Kleckner } 123926a73082SReid Kleckner 124026a73082SReid Kleckner /// Make a copy of an aggregate at address specified by "Src" to address 124126a73082SReid Kleckner /// "Dst" with size and alignment information specified by the specific 124226a73082SReid Kleckner /// parameter attribute. The copy will be passed as a byval function parameter. 124326a73082SReid Kleckner static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, 124426a73082SReid Kleckner SDValue Chain, ISD::ArgFlagsTy Flags, 124526a73082SReid Kleckner SelectionDAG &DAG, const SDLoc &dl) { 124626a73082SReid Kleckner SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl); 124726a73082SReid Kleckner 124826a73082SReid Kleckner return DAG.getMemcpy( 124926a73082SReid Kleckner Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), 125026a73082SReid Kleckner /*isVolatile*/ false, /*AlwaysInline=*/true, 1251f270a4ddSAmara Emerson /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo()); 125226a73082SReid Kleckner } 125326a73082SReid Kleckner 125426a73082SReid Kleckner /// Return true if the calling convention is one that we can guarantee TCO for. 125526a73082SReid Kleckner static bool canGuaranteeTCO(CallingConv::ID CC) { 125626a73082SReid Kleckner return (CC == CallingConv::Fast || CC == CallingConv::GHC || 125726a73082SReid Kleckner CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || 125826a73082SReid Kleckner CC == CallingConv::Tail || CC == CallingConv::SwiftTail); 125926a73082SReid Kleckner } 126026a73082SReid Kleckner 126126a73082SReid Kleckner /// Return true if we might ever do TCO for calls with this calling convention. 126226a73082SReid Kleckner static bool mayTailCallThisCC(CallingConv::ID CC) { 126326a73082SReid Kleckner switch (CC) { 126426a73082SReid Kleckner // C calling conventions: 126526a73082SReid Kleckner case CallingConv::C: 126626a73082SReid Kleckner case CallingConv::Win64: 126726a73082SReid Kleckner case CallingConv::X86_64_SysV: 1268c166a43cSweiguozhi case CallingConv::PreserveNone: 126926a73082SReid Kleckner // Callee pop conventions: 127026a73082SReid Kleckner case CallingConv::X86_ThisCall: 127126a73082SReid Kleckner case CallingConv::X86_StdCall: 127226a73082SReid Kleckner case CallingConv::X86_VectorCall: 127326a73082SReid Kleckner case CallingConv::X86_FastCall: 127426a73082SReid Kleckner // Swift: 127526a73082SReid Kleckner case CallingConv::Swift: 127626a73082SReid Kleckner return true; 127726a73082SReid Kleckner default: 127826a73082SReid Kleckner return canGuaranteeTCO(CC); 127926a73082SReid Kleckner } 128026a73082SReid Kleckner } 128126a73082SReid Kleckner 128226a73082SReid Kleckner /// Return true if the function is being made into a tailcall target by 128326a73082SReid Kleckner /// changing its ABI. 128426a73082SReid Kleckner static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { 128526a73082SReid Kleckner return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || 128626a73082SReid Kleckner CC == CallingConv::Tail || CC == CallingConv::SwiftTail; 128726a73082SReid Kleckner } 128826a73082SReid Kleckner 128926a73082SReid Kleckner bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 129026a73082SReid Kleckner if (!CI->isTailCall()) 129126a73082SReid Kleckner return false; 129226a73082SReid Kleckner 129326a73082SReid Kleckner CallingConv::ID CalleeCC = CI->getCallingConv(); 129426a73082SReid Kleckner if (!mayTailCallThisCC(CalleeCC)) 129526a73082SReid Kleckner return false; 129626a73082SReid Kleckner 129726a73082SReid Kleckner return true; 129826a73082SReid Kleckner } 129926a73082SReid Kleckner 130026a73082SReid Kleckner SDValue 130126a73082SReid Kleckner X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 130226a73082SReid Kleckner const SmallVectorImpl<ISD::InputArg> &Ins, 130326a73082SReid Kleckner const SDLoc &dl, SelectionDAG &DAG, 130426a73082SReid Kleckner const CCValAssign &VA, 130526a73082SReid Kleckner MachineFrameInfo &MFI, unsigned i) const { 130626a73082SReid Kleckner // Create the nodes corresponding to a load from this parameter slot. 130726a73082SReid Kleckner ISD::ArgFlagsTy Flags = Ins[i].Flags; 130826a73082SReid Kleckner bool AlwaysUseMutable = shouldGuaranteeTCO( 130926a73082SReid Kleckner CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); 131026a73082SReid Kleckner bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 131126a73082SReid Kleckner EVT ValVT; 131226a73082SReid Kleckner MVT PtrVT = getPointerTy(DAG.getDataLayout()); 131326a73082SReid Kleckner 131426a73082SReid Kleckner // If value is passed by pointer we have address passed instead of the value 131526a73082SReid Kleckner // itself. No need to extend if the mask value and location share the same 131626a73082SReid Kleckner // absolute size. 131726a73082SReid Kleckner bool ExtendedInMem = 131826a73082SReid Kleckner VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 && 131926a73082SReid Kleckner VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits(); 132026a73082SReid Kleckner 132126a73082SReid Kleckner if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) 132226a73082SReid Kleckner ValVT = VA.getLocVT(); 132326a73082SReid Kleckner else 132426a73082SReid Kleckner ValVT = VA.getValVT(); 132526a73082SReid Kleckner 132626a73082SReid Kleckner // FIXME: For now, all byval parameter objects are marked mutable. This can be 132726a73082SReid Kleckner // changed with more analysis. 132826a73082SReid Kleckner // In case of tail call optimization mark all arguments mutable. Since they 132926a73082SReid Kleckner // could be overwritten by lowering of arguments in case of a tail call. 133026a73082SReid Kleckner if (Flags.isByVal()) { 133126a73082SReid Kleckner unsigned Bytes = Flags.getByValSize(); 133226a73082SReid Kleckner if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 133326a73082SReid Kleckner 133426a73082SReid Kleckner // FIXME: For now, all byval parameter objects are marked as aliasing. This 133526a73082SReid Kleckner // can be improved with deeper analysis. 133626a73082SReid Kleckner int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable, 133726a73082SReid Kleckner /*isAliased=*/true); 133826a73082SReid Kleckner return DAG.getFrameIndex(FI, PtrVT); 133926a73082SReid Kleckner } 134026a73082SReid Kleckner 134126a73082SReid Kleckner EVT ArgVT = Ins[i].ArgVT; 134226a73082SReid Kleckner 134326a73082SReid Kleckner // If this is a vector that has been split into multiple parts, don't elide 134426a73082SReid Kleckner // the copy. The layout on the stack may not match the packed in-memory 134526a73082SReid Kleckner // layout. 134626a73082SReid Kleckner bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector(); 134726a73082SReid Kleckner 134826a73082SReid Kleckner // This is an argument in memory. We might be able to perform copy elision. 134926a73082SReid Kleckner // If the argument is passed directly in memory without any extension, then we 135026a73082SReid Kleckner // can perform copy elision. Large vector types, for example, may be passed 135126a73082SReid Kleckner // indirectly by pointer. 135226a73082SReid Kleckner if (Flags.isCopyElisionCandidate() && 135326a73082SReid Kleckner VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem && 135426a73082SReid Kleckner !ScalarizedVector) { 135526a73082SReid Kleckner SDValue PartAddr; 135626a73082SReid Kleckner if (Ins[i].PartOffset == 0) { 135726a73082SReid Kleckner // If this is a one-part value or the first part of a multi-part value, 135826a73082SReid Kleckner // create a stack object for the entire argument value type and return a 135926a73082SReid Kleckner // load from our portion of it. This assumes that if the first part of an 136026a73082SReid Kleckner // argument is in memory, the rest will also be in memory. 136126a73082SReid Kleckner int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(), 136226a73082SReid Kleckner /*IsImmutable=*/false); 136326a73082SReid Kleckner PartAddr = DAG.getFrameIndex(FI, PtrVT); 136426a73082SReid Kleckner return DAG.getLoad( 136526a73082SReid Kleckner ValVT, dl, Chain, PartAddr, 136626a73082SReid Kleckner MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); 136726a73082SReid Kleckner } 136826a73082SReid Kleckner 136926a73082SReid Kleckner // This is not the first piece of an argument in memory. See if there is 137026a73082SReid Kleckner // already a fixed stack object including this offset. If so, assume it 137126a73082SReid Kleckner // was created by the PartOffset == 0 branch above and create a load from 137226a73082SReid Kleckner // the appropriate offset into it. 137326a73082SReid Kleckner int64_t PartBegin = VA.getLocMemOffset(); 137426a73082SReid Kleckner int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; 137526a73082SReid Kleckner int FI = MFI.getObjectIndexBegin(); 137626a73082SReid Kleckner for (; MFI.isFixedObjectIndex(FI); ++FI) { 137726a73082SReid Kleckner int64_t ObjBegin = MFI.getObjectOffset(FI); 137826a73082SReid Kleckner int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI); 137926a73082SReid Kleckner if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) 138026a73082SReid Kleckner break; 138126a73082SReid Kleckner } 138226a73082SReid Kleckner if (MFI.isFixedObjectIndex(FI)) { 138326a73082SReid Kleckner SDValue Addr = 138426a73082SReid Kleckner DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT), 138526a73082SReid Kleckner DAG.getIntPtrConstant(Ins[i].PartOffset, dl)); 138626a73082SReid Kleckner return DAG.getLoad(ValVT, dl, Chain, Addr, 138726a73082SReid Kleckner MachinePointerInfo::getFixedStack( 138826a73082SReid Kleckner DAG.getMachineFunction(), FI, Ins[i].PartOffset)); 138926a73082SReid Kleckner } 139026a73082SReid Kleckner } 139126a73082SReid Kleckner 139226a73082SReid Kleckner int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 139326a73082SReid Kleckner VA.getLocMemOffset(), isImmutable); 139426a73082SReid Kleckner 139526a73082SReid Kleckner // Set SExt or ZExt flag. 139626a73082SReid Kleckner if (VA.getLocInfo() == CCValAssign::ZExt) { 139726a73082SReid Kleckner MFI.setObjectZExt(FI, true); 139826a73082SReid Kleckner } else if (VA.getLocInfo() == CCValAssign::SExt) { 139926a73082SReid Kleckner MFI.setObjectSExt(FI, true); 140026a73082SReid Kleckner } 140126a73082SReid Kleckner 140226a73082SReid Kleckner MaybeAlign Alignment; 140326a73082SReid Kleckner if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && 140426a73082SReid Kleckner ValVT != MVT::f80) 140526a73082SReid Kleckner Alignment = MaybeAlign(4); 140626a73082SReid Kleckner SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 140726a73082SReid Kleckner SDValue Val = DAG.getLoad( 140826a73082SReid Kleckner ValVT, dl, Chain, FIN, 140926a73082SReid Kleckner MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), 141026a73082SReid Kleckner Alignment); 141126a73082SReid Kleckner return ExtendedInMem 141226a73082SReid Kleckner ? (VA.getValVT().isVector() 141326a73082SReid Kleckner ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) 141426a73082SReid Kleckner : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) 141526a73082SReid Kleckner : Val; 141626a73082SReid Kleckner } 141726a73082SReid Kleckner 141826a73082SReid Kleckner // FIXME: Get this from tablegen. 141926a73082SReid Kleckner static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, 142026a73082SReid Kleckner const X86Subtarget &Subtarget) { 142126a73082SReid Kleckner assert(Subtarget.is64Bit()); 142226a73082SReid Kleckner 142326a73082SReid Kleckner if (Subtarget.isCallingConvWin64(CallConv)) { 142426a73082SReid Kleckner static const MCPhysReg GPR64ArgRegsWin64[] = { 142526a73082SReid Kleckner X86::RCX, X86::RDX, X86::R8, X86::R9 142626a73082SReid Kleckner }; 142713d09dfaSCraig Topper return GPR64ArgRegsWin64; 142826a73082SReid Kleckner } 142926a73082SReid Kleckner 143026a73082SReid Kleckner static const MCPhysReg GPR64ArgRegs64Bit[] = { 143126a73082SReid Kleckner X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 143226a73082SReid Kleckner }; 143313d09dfaSCraig Topper return GPR64ArgRegs64Bit; 143426a73082SReid Kleckner } 143526a73082SReid Kleckner 143626a73082SReid Kleckner // FIXME: Get this from tablegen. 143726a73082SReid Kleckner static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF, 143826a73082SReid Kleckner CallingConv::ID CallConv, 143926a73082SReid Kleckner const X86Subtarget &Subtarget) { 144026a73082SReid Kleckner assert(Subtarget.is64Bit()); 144126a73082SReid Kleckner if (Subtarget.isCallingConvWin64(CallConv)) { 144226a73082SReid Kleckner // The XMM registers which might contain var arg parameters are shadowed 144326a73082SReid Kleckner // in their paired GPR. So we only need to save the GPR to their home 144426a73082SReid Kleckner // slots. 144526a73082SReid Kleckner // TODO: __vectorcall will change this. 1446e03f4271SJay Foad return {}; 144726a73082SReid Kleckner } 144826a73082SReid Kleckner 144926a73082SReid Kleckner bool isSoftFloat = Subtarget.useSoftFloat(); 145026a73082SReid Kleckner if (isSoftFloat || !Subtarget.hasSSE1()) 145126a73082SReid Kleckner // Kernel mode asks for SSE to be disabled, so there are no XMM argument 145226a73082SReid Kleckner // registers. 1453e03f4271SJay Foad return {}; 145426a73082SReid Kleckner 145526a73082SReid Kleckner static const MCPhysReg XMMArgRegs64Bit[] = { 145626a73082SReid Kleckner X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 145726a73082SReid Kleckner X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 145826a73082SReid Kleckner }; 145913d09dfaSCraig Topper return XMMArgRegs64Bit; 146026a73082SReid Kleckner } 146126a73082SReid Kleckner 146226a73082SReid Kleckner #ifndef NDEBUG 146326a73082SReid Kleckner static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) { 146426a73082SReid Kleckner return llvm::is_sorted( 146526a73082SReid Kleckner ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool { 146626a73082SReid Kleckner return A.getValNo() < B.getValNo(); 146726a73082SReid Kleckner }); 146826a73082SReid Kleckner } 146926a73082SReid Kleckner #endif 147026a73082SReid Kleckner 147126a73082SReid Kleckner namespace { 147226a73082SReid Kleckner /// This is a helper class for lowering variable arguments parameters. 147326a73082SReid Kleckner class VarArgsLoweringHelper { 147426a73082SReid Kleckner public: 147526a73082SReid Kleckner VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc, 147626a73082SReid Kleckner SelectionDAG &DAG, const X86Subtarget &Subtarget, 147726a73082SReid Kleckner CallingConv::ID CallConv, CCState &CCInfo) 147826a73082SReid Kleckner : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget), 147926a73082SReid Kleckner TheMachineFunction(DAG.getMachineFunction()), 148026a73082SReid Kleckner TheFunction(TheMachineFunction.getFunction()), 148126a73082SReid Kleckner FrameInfo(TheMachineFunction.getFrameInfo()), 148226a73082SReid Kleckner FrameLowering(*Subtarget.getFrameLowering()), 148326a73082SReid Kleckner TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv), 148426a73082SReid Kleckner CCInfo(CCInfo) {} 148526a73082SReid Kleckner 148626a73082SReid Kleckner // Lower variable arguments parameters. 148726a73082SReid Kleckner void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize); 148826a73082SReid Kleckner 148926a73082SReid Kleckner private: 149026a73082SReid Kleckner void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize); 149126a73082SReid Kleckner 149226a73082SReid Kleckner void forwardMustTailParameters(SDValue &Chain); 149326a73082SReid Kleckner 149426a73082SReid Kleckner bool is64Bit() const { return Subtarget.is64Bit(); } 149526a73082SReid Kleckner bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); } 149626a73082SReid Kleckner 149726a73082SReid Kleckner X86MachineFunctionInfo *FuncInfo; 149826a73082SReid Kleckner const SDLoc &DL; 149926a73082SReid Kleckner SelectionDAG &DAG; 150026a73082SReid Kleckner const X86Subtarget &Subtarget; 150126a73082SReid Kleckner MachineFunction &TheMachineFunction; 150226a73082SReid Kleckner const Function &TheFunction; 150326a73082SReid Kleckner MachineFrameInfo &FrameInfo; 150426a73082SReid Kleckner const TargetFrameLowering &FrameLowering; 150526a73082SReid Kleckner const TargetLowering &TargLowering; 150626a73082SReid Kleckner CallingConv::ID CallConv; 150726a73082SReid Kleckner CCState &CCInfo; 150826a73082SReid Kleckner }; 150926a73082SReid Kleckner } // namespace 151026a73082SReid Kleckner 151126a73082SReid Kleckner void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( 151226a73082SReid Kleckner SDValue &Chain, unsigned StackSize) { 151326a73082SReid Kleckner // If the function takes variable number of arguments, make a frame index for 151426a73082SReid Kleckner // the start of the first vararg value... for expansion of llvm.va_start. We 151526a73082SReid Kleckner // can skip this if there are no va_start calls. 151626a73082SReid Kleckner if (is64Bit() || (CallConv != CallingConv::X86_FastCall && 151726a73082SReid Kleckner CallConv != CallingConv::X86_ThisCall)) { 151826a73082SReid Kleckner FuncInfo->setVarArgsFrameIndex( 151926a73082SReid Kleckner FrameInfo.CreateFixedObject(1, StackSize, true)); 152026a73082SReid Kleckner } 152126a73082SReid Kleckner 152226a73082SReid Kleckner // 64-bit calling conventions support varargs and register parameters, so we 152326a73082SReid Kleckner // have to do extra work to spill them in the prologue. 152426a73082SReid Kleckner if (is64Bit()) { 152526a73082SReid Kleckner // Find the first unallocated argument registers. 152626a73082SReid Kleckner ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); 152726a73082SReid Kleckner ArrayRef<MCPhysReg> ArgXMMs = 152826a73082SReid Kleckner get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget); 152926a73082SReid Kleckner unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); 153026a73082SReid Kleckner unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); 153126a73082SReid Kleckner 153226a73082SReid Kleckner assert(!(NumXMMRegs && !Subtarget.hasSSE1()) && 153326a73082SReid Kleckner "SSE register cannot be used when SSE is disabled!"); 153426a73082SReid Kleckner 153526a73082SReid Kleckner if (isWin64()) { 153626a73082SReid Kleckner // Get to the caller-allocated home save location. Add 8 to account 153726a73082SReid Kleckner // for the return address. 153826a73082SReid Kleckner int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8; 153926a73082SReid Kleckner FuncInfo->setRegSaveFrameIndex( 154026a73082SReid Kleckner FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); 154126a73082SReid Kleckner // Fixup to set vararg frame on shadow area (4 x i64). 154226a73082SReid Kleckner if (NumIntRegs < 4) 154326a73082SReid Kleckner FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); 154426a73082SReid Kleckner } else { 154526a73082SReid Kleckner // For X86-64, if there are vararg parameters that are passed via 154626a73082SReid Kleckner // registers, then we must store them to their spots on the stack so 154726a73082SReid Kleckner // they may be loaded by dereferencing the result of va_next. 154826a73082SReid Kleckner FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); 154926a73082SReid Kleckner FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); 155026a73082SReid Kleckner FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject( 155126a73082SReid Kleckner ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false)); 155226a73082SReid Kleckner } 155326a73082SReid Kleckner 155426a73082SReid Kleckner SmallVector<SDValue, 6> 155526a73082SReid Kleckner LiveGPRs; // list of SDValue for GPR registers keeping live input value 155626a73082SReid Kleckner SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers 155726a73082SReid Kleckner // keeping live input value 155826a73082SReid Kleckner SDValue ALVal; // if applicable keeps SDValue for %al register 155926a73082SReid Kleckner 156026a73082SReid Kleckner // Gather all the live in physical registers. 156126a73082SReid Kleckner for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { 156226a73082SReid Kleckner Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass); 156326a73082SReid Kleckner LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64)); 156426a73082SReid Kleckner } 156526a73082SReid Kleckner const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs); 156626a73082SReid Kleckner if (!AvailableXmms.empty()) { 156726a73082SReid Kleckner Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); 156826a73082SReid Kleckner ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8); 156926a73082SReid Kleckner for (MCPhysReg Reg : AvailableXmms) { 157026a73082SReid Kleckner // FastRegisterAllocator spills virtual registers at basic 157126a73082SReid Kleckner // block boundary. That leads to usages of xmm registers 157226a73082SReid Kleckner // outside of check for %al. Pass physical registers to 157326a73082SReid Kleckner // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling. 157426a73082SReid Kleckner TheMachineFunction.getRegInfo().addLiveIn(Reg); 157526a73082SReid Kleckner LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32)); 157626a73082SReid Kleckner } 157726a73082SReid Kleckner } 157826a73082SReid Kleckner 157926a73082SReid Kleckner // Store the integer parameter registers. 158026a73082SReid Kleckner SmallVector<SDValue, 8> MemOps; 158126a73082SReid Kleckner SDValue RSFIN = 158226a73082SReid Kleckner DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), 158326a73082SReid Kleckner TargLowering.getPointerTy(DAG.getDataLayout())); 158426a73082SReid Kleckner unsigned Offset = FuncInfo->getVarArgsGPOffset(); 158526a73082SReid Kleckner for (SDValue Val : LiveGPRs) { 158626a73082SReid Kleckner SDValue FIN = DAG.getNode(ISD::ADD, DL, 158726a73082SReid Kleckner TargLowering.getPointerTy(DAG.getDataLayout()), 158826a73082SReid Kleckner RSFIN, DAG.getIntPtrConstant(Offset, DL)); 158926a73082SReid Kleckner SDValue Store = 159026a73082SReid Kleckner DAG.getStore(Val.getValue(1), DL, Val, FIN, 159126a73082SReid Kleckner MachinePointerInfo::getFixedStack( 159226a73082SReid Kleckner DAG.getMachineFunction(), 159326a73082SReid Kleckner FuncInfo->getRegSaveFrameIndex(), Offset)); 159426a73082SReid Kleckner MemOps.push_back(Store); 159526a73082SReid Kleckner Offset += 8; 159626a73082SReid Kleckner } 159726a73082SReid Kleckner 159826a73082SReid Kleckner // Now store the XMM (fp + vector) parameter registers. 159926a73082SReid Kleckner if (!LiveXMMRegs.empty()) { 160026a73082SReid Kleckner SmallVector<SDValue, 12> SaveXMMOps; 160126a73082SReid Kleckner SaveXMMOps.push_back(Chain); 160226a73082SReid Kleckner SaveXMMOps.push_back(ALVal); 160326a73082SReid Kleckner SaveXMMOps.push_back(RSFIN); 160426a73082SReid Kleckner SaveXMMOps.push_back( 160526a73082SReid Kleckner DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32)); 160626a73082SReid Kleckner llvm::append_range(SaveXMMOps, LiveXMMRegs); 160726a73082SReid Kleckner MachineMemOperand *StoreMMO = 160826a73082SReid Kleckner DAG.getMachineFunction().getMachineMemOperand( 160926a73082SReid Kleckner MachinePointerInfo::getFixedStack( 161026a73082SReid Kleckner DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(), 161126a73082SReid Kleckner Offset), 161226a73082SReid Kleckner MachineMemOperand::MOStore, 128, Align(16)); 161326a73082SReid Kleckner MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS, 161426a73082SReid Kleckner DL, DAG.getVTList(MVT::Other), 161526a73082SReid Kleckner SaveXMMOps, MVT::i8, StoreMMO)); 161626a73082SReid Kleckner } 161726a73082SReid Kleckner 161826a73082SReid Kleckner if (!MemOps.empty()) 161926a73082SReid Kleckner Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); 162026a73082SReid Kleckner } 162126a73082SReid Kleckner } 162226a73082SReid Kleckner 162326a73082SReid Kleckner void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) { 162426a73082SReid Kleckner // Find the largest legal vector type. 162526a73082SReid Kleckner MVT VecVT = MVT::Other; 162626a73082SReid Kleckner // FIXME: Only some x86_32 calling conventions support AVX512. 162726a73082SReid Kleckner if (Subtarget.useAVX512Regs() && 162826a73082SReid Kleckner (is64Bit() || (CallConv == CallingConv::X86_VectorCall || 162926a73082SReid Kleckner CallConv == CallingConv::Intel_OCL_BI))) 163026a73082SReid Kleckner VecVT = MVT::v16f32; 163126a73082SReid Kleckner else if (Subtarget.hasAVX()) 163226a73082SReid Kleckner VecVT = MVT::v8f32; 163326a73082SReid Kleckner else if (Subtarget.hasSSE2()) 163426a73082SReid Kleckner VecVT = MVT::v4f32; 163526a73082SReid Kleckner 163626a73082SReid Kleckner // We forward some GPRs and some vector types. 163726a73082SReid Kleckner SmallVector<MVT, 2> RegParmTypes; 163826a73082SReid Kleckner MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32; 163926a73082SReid Kleckner RegParmTypes.push_back(IntVT); 164026a73082SReid Kleckner if (VecVT != MVT::Other) 164126a73082SReid Kleckner RegParmTypes.push_back(VecVT); 164226a73082SReid Kleckner 164326a73082SReid Kleckner // Compute the set of forwarded registers. The rest are scratch. 164426a73082SReid Kleckner SmallVectorImpl<ForwardedRegister> &Forwards = 164526a73082SReid Kleckner FuncInfo->getForwardedMustTailRegParms(); 164626a73082SReid Kleckner CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); 164726a73082SReid Kleckner 164826a73082SReid Kleckner // Forward AL for SysV x86_64 targets, since it is used for varargs. 164926a73082SReid Kleckner if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) { 165026a73082SReid Kleckner Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); 165126a73082SReid Kleckner Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); 165226a73082SReid Kleckner } 165326a73082SReid Kleckner 165426a73082SReid Kleckner // Copy all forwards from physical to virtual registers. 165526a73082SReid Kleckner for (ForwardedRegister &FR : Forwards) { 165626a73082SReid Kleckner // FIXME: Can we use a less constrained schedule? 165726a73082SReid Kleckner SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT); 165826a73082SReid Kleckner FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister( 165926a73082SReid Kleckner TargLowering.getRegClassFor(FR.VT)); 166026a73082SReid Kleckner Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal); 166126a73082SReid Kleckner } 166226a73082SReid Kleckner } 166326a73082SReid Kleckner 166426a73082SReid Kleckner void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain, 166526a73082SReid Kleckner unsigned StackSize) { 166626a73082SReid Kleckner // Set FrameIndex to the 0xAAAAAAA value to mark unset state. 166726a73082SReid Kleckner // If necessary, it would be set into the correct value later. 166826a73082SReid Kleckner FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); 166926a73082SReid Kleckner FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); 167026a73082SReid Kleckner 167126a73082SReid Kleckner if (FrameInfo.hasVAStart()) 167226a73082SReid Kleckner createVarArgAreaAndStoreRegisters(Chain, StackSize); 167326a73082SReid Kleckner 167426a73082SReid Kleckner if (FrameInfo.hasMustTailInVarArgFunc()) 167526a73082SReid Kleckner forwardMustTailParameters(Chain); 167626a73082SReid Kleckner } 167726a73082SReid Kleckner 167826a73082SReid Kleckner SDValue X86TargetLowering::LowerFormalArguments( 167926a73082SReid Kleckner SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 168026a73082SReid Kleckner const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 168126a73082SReid Kleckner SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 168226a73082SReid Kleckner MachineFunction &MF = DAG.getMachineFunction(); 168326a73082SReid Kleckner X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 168426a73082SReid Kleckner 168526a73082SReid Kleckner const Function &F = MF.getFunction(); 168626a73082SReid Kleckner if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() && 168726a73082SReid Kleckner F.getName() == "main") 168826a73082SReid Kleckner FuncInfo->setForceFramePointer(true); 168926a73082SReid Kleckner 169026a73082SReid Kleckner MachineFrameInfo &MFI = MF.getFrameInfo(); 169126a73082SReid Kleckner bool Is64Bit = Subtarget.is64Bit(); 169226a73082SReid Kleckner bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); 169326a73082SReid Kleckner 169426a73082SReid Kleckner assert( 169526a73082SReid Kleckner !(IsVarArg && canGuaranteeTCO(CallConv)) && 169626a73082SReid Kleckner "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"); 169726a73082SReid Kleckner 169826a73082SReid Kleckner // Assign locations to all of the incoming arguments. 169926a73082SReid Kleckner SmallVector<CCValAssign, 16> ArgLocs; 170026a73082SReid Kleckner CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 170126a73082SReid Kleckner 170226a73082SReid Kleckner // Allocate shadow area for Win64. 170326a73082SReid Kleckner if (IsWin64) 170426a73082SReid Kleckner CCInfo.AllocateStack(32, Align(8)); 170526a73082SReid Kleckner 170626a73082SReid Kleckner CCInfo.AnalyzeArguments(Ins, CC_X86); 170726a73082SReid Kleckner 170826a73082SReid Kleckner // In vectorcall calling convention a second pass is required for the HVA 170926a73082SReid Kleckner // types. 171026a73082SReid Kleckner if (CallingConv::X86_VectorCall == CallConv) { 171126a73082SReid Kleckner CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86); 171226a73082SReid Kleckner } 171326a73082SReid Kleckner 171426a73082SReid Kleckner // The next loop assumes that the locations are in the same order of the 171526a73082SReid Kleckner // input arguments. 171626a73082SReid Kleckner assert(isSortedByValueNo(ArgLocs) && 171726a73082SReid Kleckner "Argument Location list must be sorted before lowering"); 171826a73082SReid Kleckner 171926a73082SReid Kleckner SDValue ArgValue; 172026a73082SReid Kleckner for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E; 172126a73082SReid Kleckner ++I, ++InsIndex) { 172226a73082SReid Kleckner assert(InsIndex < Ins.size() && "Invalid Ins index"); 172326a73082SReid Kleckner CCValAssign &VA = ArgLocs[I]; 172426a73082SReid Kleckner 172526a73082SReid Kleckner if (VA.isRegLoc()) { 172626a73082SReid Kleckner EVT RegVT = VA.getLocVT(); 172726a73082SReid Kleckner if (VA.needsCustom()) { 172826a73082SReid Kleckner assert( 172926a73082SReid Kleckner VA.getValVT() == MVT::v64i1 && 173026a73082SReid Kleckner "Currently the only custom case is when we split v64i1 to 2 regs"); 173126a73082SReid Kleckner 173226a73082SReid Kleckner // v64i1 values, in regcall calling convention, that are 173326a73082SReid Kleckner // compiled to 32 bit arch, are split up into two registers. 173426a73082SReid Kleckner ArgValue = 173526a73082SReid Kleckner getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget); 173626a73082SReid Kleckner } else { 173726a73082SReid Kleckner const TargetRegisterClass *RC; 173826a73082SReid Kleckner if (RegVT == MVT::i8) 173926a73082SReid Kleckner RC = &X86::GR8RegClass; 174026a73082SReid Kleckner else if (RegVT == MVT::i16) 174126a73082SReid Kleckner RC = &X86::GR16RegClass; 174226a73082SReid Kleckner else if (RegVT == MVT::i32) 174326a73082SReid Kleckner RC = &X86::GR32RegClass; 174426a73082SReid Kleckner else if (Is64Bit && RegVT == MVT::i64) 174526a73082SReid Kleckner RC = &X86::GR64RegClass; 174626a73082SReid Kleckner else if (RegVT == MVT::f16) 174726a73082SReid Kleckner RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass; 174826a73082SReid Kleckner else if (RegVT == MVT::f32) 174926a73082SReid Kleckner RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; 175026a73082SReid Kleckner else if (RegVT == MVT::f64) 175126a73082SReid Kleckner RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; 175226a73082SReid Kleckner else if (RegVT == MVT::f80) 175326a73082SReid Kleckner RC = &X86::RFP80RegClass; 175426a73082SReid Kleckner else if (RegVT == MVT::f128) 175526a73082SReid Kleckner RC = &X86::VR128RegClass; 175626a73082SReid Kleckner else if (RegVT.is512BitVector()) 175726a73082SReid Kleckner RC = &X86::VR512RegClass; 175826a73082SReid Kleckner else if (RegVT.is256BitVector()) 175926a73082SReid Kleckner RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass; 176026a73082SReid Kleckner else if (RegVT.is128BitVector()) 176126a73082SReid Kleckner RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; 176226a73082SReid Kleckner else if (RegVT == MVT::x86mmx) 176326a73082SReid Kleckner RC = &X86::VR64RegClass; 176426a73082SReid Kleckner else if (RegVT == MVT::v1i1) 176526a73082SReid Kleckner RC = &X86::VK1RegClass; 176626a73082SReid Kleckner else if (RegVT == MVT::v8i1) 176726a73082SReid Kleckner RC = &X86::VK8RegClass; 176826a73082SReid Kleckner else if (RegVT == MVT::v16i1) 176926a73082SReid Kleckner RC = &X86::VK16RegClass; 177026a73082SReid Kleckner else if (RegVT == MVT::v32i1) 177126a73082SReid Kleckner RC = &X86::VK32RegClass; 177226a73082SReid Kleckner else if (RegVT == MVT::v64i1) 177326a73082SReid Kleckner RC = &X86::VK64RegClass; 177426a73082SReid Kleckner else 177526a73082SReid Kleckner llvm_unreachable("Unknown argument type!"); 177626a73082SReid Kleckner 177726a73082SReid Kleckner Register Reg = MF.addLiveIn(VA.getLocReg(), RC); 177826a73082SReid Kleckner ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 177926a73082SReid Kleckner } 178026a73082SReid Kleckner 178126a73082SReid Kleckner // If this is an 8 or 16-bit value, it is really passed promoted to 32 178226a73082SReid Kleckner // bits. Insert an assert[sz]ext to capture this, then truncate to the 178326a73082SReid Kleckner // right size. 178426a73082SReid Kleckner if (VA.getLocInfo() == CCValAssign::SExt) 178526a73082SReid Kleckner ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 178626a73082SReid Kleckner DAG.getValueType(VA.getValVT())); 178726a73082SReid Kleckner else if (VA.getLocInfo() == CCValAssign::ZExt) 178826a73082SReid Kleckner ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 178926a73082SReid Kleckner DAG.getValueType(VA.getValVT())); 179026a73082SReid Kleckner else if (VA.getLocInfo() == CCValAssign::BCvt) 179126a73082SReid Kleckner ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); 179226a73082SReid Kleckner 179326a73082SReid Kleckner if (VA.isExtInLoc()) { 179426a73082SReid Kleckner // Handle MMX values passed in XMM regs. 179526a73082SReid Kleckner if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) 179626a73082SReid Kleckner ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); 179726a73082SReid Kleckner else if (VA.getValVT().isVector() && 179826a73082SReid Kleckner VA.getValVT().getScalarType() == MVT::i1 && 179926a73082SReid Kleckner ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || 180026a73082SReid Kleckner (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { 180126a73082SReid Kleckner // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 180226a73082SReid Kleckner ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG); 180326a73082SReid Kleckner } else 180426a73082SReid Kleckner ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 180526a73082SReid Kleckner } 180626a73082SReid Kleckner } else { 180726a73082SReid Kleckner assert(VA.isMemLoc()); 180826a73082SReid Kleckner ArgValue = 180926a73082SReid Kleckner LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex); 181026a73082SReid Kleckner } 181126a73082SReid Kleckner 181226a73082SReid Kleckner // If value is passed via pointer - do a load. 181326a73082SReid Kleckner if (VA.getLocInfo() == CCValAssign::Indirect && 181426a73082SReid Kleckner !(Ins[I].Flags.isByVal() && VA.isRegLoc())) { 181526a73082SReid Kleckner ArgValue = 181626a73082SReid Kleckner DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo()); 181726a73082SReid Kleckner } 181826a73082SReid Kleckner 181926a73082SReid Kleckner InVals.push_back(ArgValue); 182026a73082SReid Kleckner } 182126a73082SReid Kleckner 182226a73082SReid Kleckner for (unsigned I = 0, E = Ins.size(); I != E; ++I) { 182326a73082SReid Kleckner if (Ins[I].Flags.isSwiftAsync()) { 182426a73082SReid Kleckner auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1825dd70aef0SAlex Lorenz if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) 182626a73082SReid Kleckner X86FI->setHasSwiftAsyncContext(true); 182726a73082SReid Kleckner else { 1828dd70aef0SAlex Lorenz int PtrSize = Subtarget.is64Bit() ? 8 : 4; 1829dd70aef0SAlex Lorenz int FI = 1830dd70aef0SAlex Lorenz MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false); 183126a73082SReid Kleckner X86FI->setSwiftAsyncContextFrameIdx(FI); 1832dd70aef0SAlex Lorenz SDValue St = DAG.getStore( 1833dd70aef0SAlex Lorenz DAG.getEntryNode(), dl, InVals[I], 1834dd70aef0SAlex Lorenz DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32), 183526a73082SReid Kleckner MachinePointerInfo::getFixedStack(MF, FI)); 183626a73082SReid Kleckner Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain); 183726a73082SReid Kleckner } 183826a73082SReid Kleckner } 183926a73082SReid Kleckner 184026a73082SReid Kleckner // Swift calling convention does not require we copy the sret argument 184126a73082SReid Kleckner // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. 184226a73082SReid Kleckner if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) 184326a73082SReid Kleckner continue; 184426a73082SReid Kleckner 184526a73082SReid Kleckner // All x86 ABIs require that for returning structs by value we copy the 184626a73082SReid Kleckner // sret argument into %rax/%eax (depending on ABI) for the return. Save 184726a73082SReid Kleckner // the argument into a virtual register so that we can access it from the 184826a73082SReid Kleckner // return points. 184926a73082SReid Kleckner if (Ins[I].Flags.isSRet()) { 185026a73082SReid Kleckner assert(!FuncInfo->getSRetReturnReg() && 185126a73082SReid Kleckner "SRet return has already been set"); 185226a73082SReid Kleckner MVT PtrTy = getPointerTy(DAG.getDataLayout()); 185326a73082SReid Kleckner Register Reg = 185426a73082SReid Kleckner MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); 185526a73082SReid Kleckner FuncInfo->setSRetReturnReg(Reg); 185626a73082SReid Kleckner SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]); 185726a73082SReid Kleckner Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); 185826a73082SReid Kleckner break; 185926a73082SReid Kleckner } 186026a73082SReid Kleckner } 186126a73082SReid Kleckner 186226a73082SReid Kleckner unsigned StackSize = CCInfo.getStackSize(); 186326a73082SReid Kleckner // Align stack specially for tail calls. 186426a73082SReid Kleckner if (shouldGuaranteeTCO(CallConv, 186526a73082SReid Kleckner MF.getTarget().Options.GuaranteedTailCallOpt)) 186626a73082SReid Kleckner StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 186726a73082SReid Kleckner 186826a73082SReid Kleckner if (IsVarArg) 186926a73082SReid Kleckner VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo) 187026a73082SReid Kleckner .lowerVarArgsParameters(Chain, StackSize); 187126a73082SReid Kleckner 187226a73082SReid Kleckner // Some CCs need callee pop. 187326a73082SReid Kleckner if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg, 187426a73082SReid Kleckner MF.getTarget().Options.GuaranteedTailCallOpt)) { 187526a73082SReid Kleckner FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. 187626a73082SReid Kleckner } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { 187726a73082SReid Kleckner // X86 interrupts must pop the error code (and the alignment padding) if 187826a73082SReid Kleckner // present. 187926a73082SReid Kleckner FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4); 188026a73082SReid Kleckner } else { 188126a73082SReid Kleckner FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. 188226a73082SReid Kleckner // If this is an sret function, the return should pop the hidden pointer. 188326a73082SReid Kleckner if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget)) 188426a73082SReid Kleckner FuncInfo->setBytesToPopOnReturn(4); 188526a73082SReid Kleckner } 188626a73082SReid Kleckner 188726a73082SReid Kleckner if (!Is64Bit) { 188826a73082SReid Kleckner // RegSaveFrameIndex is X86-64 only. 188926a73082SReid Kleckner FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); 189026a73082SReid Kleckner } 189126a73082SReid Kleckner 189226a73082SReid Kleckner FuncInfo->setArgumentStackSize(StackSize); 189326a73082SReid Kleckner 189426a73082SReid Kleckner if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { 189526a73082SReid Kleckner EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); 189626a73082SReid Kleckner if (Personality == EHPersonality::CoreCLR) { 189726a73082SReid Kleckner assert(Is64Bit); 189826a73082SReid Kleckner // TODO: Add a mechanism to frame lowering that will allow us to indicate 189926a73082SReid Kleckner // that we'd prefer this slot be allocated towards the bottom of the frame 190026a73082SReid Kleckner // (i.e. near the stack pointer after allocating the frame). Every 190126a73082SReid Kleckner // funclet needs a copy of this slot in its (mostly empty) frame, and the 190226a73082SReid Kleckner // offset from the bottom of this and each funclet's frame must be the 190326a73082SReid Kleckner // same, so the size of funclets' (mostly empty) frames is dictated by 190426a73082SReid Kleckner // how far this slot is from the bottom (since they allocate just enough 190526a73082SReid Kleckner // space to accommodate holding this slot at the correct offset). 190626a73082SReid Kleckner int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false); 190726a73082SReid Kleckner EHInfo->PSPSymFrameIdx = PSPSymFI; 190826a73082SReid Kleckner } 190926a73082SReid Kleckner } 191026a73082SReid Kleckner 191126a73082SReid Kleckner if (shouldDisableArgRegFromCSR(CallConv) || 191226a73082SReid Kleckner F.hasFnAttribute("no_caller_saved_registers")) { 191326a73082SReid Kleckner MachineRegisterInfo &MRI = MF.getRegInfo(); 1914c503758aSCraig Topper for (std::pair<MCRegister, Register> Pair : MRI.liveins()) 191526a73082SReid Kleckner MRI.disableCalleeSavedRegister(Pair.first); 191626a73082SReid Kleckner } 191726a73082SReid Kleckner 1918c166a43cSweiguozhi if (CallingConv::PreserveNone == CallConv) 1919c166a43cSweiguozhi for (unsigned I = 0, E = Ins.size(); I != E; ++I) { 1920c166a43cSweiguozhi if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() || 1921c166a43cSweiguozhi Ins[I].Flags.isSwiftError()) { 1922c166a43cSweiguozhi errorUnsupported(DAG, dl, 1923c166a43cSweiguozhi "Swift attributes can't be used with preserve_none"); 1924c166a43cSweiguozhi break; 1925c166a43cSweiguozhi } 1926c166a43cSweiguozhi } 1927c166a43cSweiguozhi 192826a73082SReid Kleckner return Chain; 192926a73082SReid Kleckner } 193026a73082SReid Kleckner 193126a73082SReid Kleckner SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, 193226a73082SReid Kleckner SDValue Arg, const SDLoc &dl, 193326a73082SReid Kleckner SelectionDAG &DAG, 193426a73082SReid Kleckner const CCValAssign &VA, 193526a73082SReid Kleckner ISD::ArgFlagsTy Flags, 193626a73082SReid Kleckner bool isByVal) const { 193726a73082SReid Kleckner unsigned LocMemOffset = VA.getLocMemOffset(); 193826a73082SReid Kleckner SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 193926a73082SReid Kleckner PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), 194026a73082SReid Kleckner StackPtr, PtrOff); 194126a73082SReid Kleckner if (isByVal) 194226a73082SReid Kleckner return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 194326a73082SReid Kleckner 194426a73082SReid Kleckner MaybeAlign Alignment; 194526a73082SReid Kleckner if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && 194626a73082SReid Kleckner Arg.getSimpleValueType() != MVT::f80) 194726a73082SReid Kleckner Alignment = MaybeAlign(4); 194826a73082SReid Kleckner return DAG.getStore( 194926a73082SReid Kleckner Chain, dl, Arg, PtrOff, 195026a73082SReid Kleckner MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), 195126a73082SReid Kleckner Alignment); 195226a73082SReid Kleckner } 195326a73082SReid Kleckner 195426a73082SReid Kleckner /// Emit a load of return address if tail call 195526a73082SReid Kleckner /// optimization is performed and it is required. 195626a73082SReid Kleckner SDValue X86TargetLowering::EmitTailCallLoadRetAddr( 195726a73082SReid Kleckner SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, 195826a73082SReid Kleckner bool Is64Bit, int FPDiff, const SDLoc &dl) const { 195926a73082SReid Kleckner // Adjust the Return address stack slot. 196026a73082SReid Kleckner EVT VT = getPointerTy(DAG.getDataLayout()); 196126a73082SReid Kleckner OutRetAddr = getReturnAddressFrameIndex(DAG); 196226a73082SReid Kleckner 196326a73082SReid Kleckner // Load the "old" Return address. 196426a73082SReid Kleckner OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo()); 196526a73082SReid Kleckner return SDValue(OutRetAddr.getNode(), 1); 196626a73082SReid Kleckner } 196726a73082SReid Kleckner 196826a73082SReid Kleckner /// Emit a store of the return address if tail call 196926a73082SReid Kleckner /// optimization is performed and it is required (FPDiff!=0). 197026a73082SReid Kleckner static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, 197126a73082SReid Kleckner SDValue Chain, SDValue RetAddrFrIdx, 197226a73082SReid Kleckner EVT PtrVT, unsigned SlotSize, 197326a73082SReid Kleckner int FPDiff, const SDLoc &dl) { 197426a73082SReid Kleckner // Store the return address to the appropriate stack slot. 197526a73082SReid Kleckner if (!FPDiff) return Chain; 197626a73082SReid Kleckner // Calculate the new stack slot for the return address. 197726a73082SReid Kleckner int NewReturnAddrFI = 197826a73082SReid Kleckner MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, 197926a73082SReid Kleckner false); 198026a73082SReid Kleckner SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); 198126a73082SReid Kleckner Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, 198226a73082SReid Kleckner MachinePointerInfo::getFixedStack( 198326a73082SReid Kleckner DAG.getMachineFunction(), NewReturnAddrFI)); 198426a73082SReid Kleckner return Chain; 198526a73082SReid Kleckner } 198626a73082SReid Kleckner 198726a73082SReid Kleckner /// Returns a vector_shuffle mask for an movs{s|d}, movd 198826a73082SReid Kleckner /// operation of specified width. 198926a73082SReid Kleckner SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, 199026a73082SReid Kleckner SDValue V1, SDValue V2) const { 199126a73082SReid Kleckner unsigned NumElems = VT.getVectorNumElements(); 199226a73082SReid Kleckner SmallVector<int, 8> Mask; 199326a73082SReid Kleckner Mask.push_back(NumElems); 199426a73082SReid Kleckner for (unsigned i = 1; i != NumElems; ++i) 199526a73082SReid Kleckner Mask.push_back(i); 199626a73082SReid Kleckner return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); 199726a73082SReid Kleckner } 199826a73082SReid Kleckner 199926a73082SReid Kleckner SDValue 200026a73082SReid Kleckner X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 200126a73082SReid Kleckner SmallVectorImpl<SDValue> &InVals) const { 200226a73082SReid Kleckner SelectionDAG &DAG = CLI.DAG; 200326a73082SReid Kleckner SDLoc &dl = CLI.DL; 200426a73082SReid Kleckner SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 200526a73082SReid Kleckner SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 200626a73082SReid Kleckner SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 200726a73082SReid Kleckner SDValue Chain = CLI.Chain; 200826a73082SReid Kleckner SDValue Callee = CLI.Callee; 200926a73082SReid Kleckner CallingConv::ID CallConv = CLI.CallConv; 201026a73082SReid Kleckner bool &isTailCall = CLI.IsTailCall; 201126a73082SReid Kleckner bool isVarArg = CLI.IsVarArg; 201226a73082SReid Kleckner const auto *CB = CLI.CB; 201326a73082SReid Kleckner 201426a73082SReid Kleckner MachineFunction &MF = DAG.getMachineFunction(); 201526a73082SReid Kleckner bool Is64Bit = Subtarget.is64Bit(); 201626a73082SReid Kleckner bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); 201726a73082SReid Kleckner bool IsSibcall = false; 201826a73082SReid Kleckner bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || 201926a73082SReid Kleckner CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; 202026a73082SReid Kleckner bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget); 202126a73082SReid Kleckner X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); 202226a73082SReid Kleckner bool HasNCSR = (CB && isa<CallInst>(CB) && 202326a73082SReid Kleckner CB->hasFnAttr("no_caller_saved_registers")); 202426a73082SReid Kleckner bool HasNoCfCheck = (CB && CB->doesNoCfCheck()); 202526a73082SReid Kleckner bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall()); 202626a73082SReid Kleckner bool IsCFICall = IsIndirectCall && CLI.CFIType; 20270f0cfcffSMatt Arsenault const Module *M = MF.getFunction().getParent(); 202826a73082SReid Kleckner Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); 202926a73082SReid Kleckner 203026a73082SReid Kleckner MachineFunction::CallSiteInfo CSInfo; 203126a73082SReid Kleckner if (CallConv == CallingConv::X86_INTR) 203226a73082SReid Kleckner report_fatal_error("X86 interrupts may not be called directly"); 203326a73082SReid Kleckner 2034385faf9cSReid Kleckner // Analyze operands of the call, assigning locations to each operand. 2035385faf9cSReid Kleckner SmallVector<CCValAssign, 16> ArgLocs; 2036385faf9cSReid Kleckner CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 2037385faf9cSReid Kleckner 2038385faf9cSReid Kleckner // Allocate shadow area for Win64. 2039385faf9cSReid Kleckner if (IsWin64) 2040385faf9cSReid Kleckner CCInfo.AllocateStack(32, Align(8)); 2041385faf9cSReid Kleckner 2042385faf9cSReid Kleckner CCInfo.AnalyzeArguments(Outs, CC_X86); 2043385faf9cSReid Kleckner 2044385faf9cSReid Kleckner // In vectorcall calling convention a second pass is required for the HVA 2045385faf9cSReid Kleckner // types. 2046385faf9cSReid Kleckner if (CallingConv::X86_VectorCall == CallConv) { 2047385faf9cSReid Kleckner CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86); 2048385faf9cSReid Kleckner } 2049385faf9cSReid Kleckner 205026a73082SReid Kleckner bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall(); 205126a73082SReid Kleckner if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) { 205226a73082SReid Kleckner // If we are using a GOT, disable tail calls to external symbols with 205326a73082SReid Kleckner // default visibility. Tail calling such a symbol requires using a GOT 205426a73082SReid Kleckner // relocation, which forces early binding of the symbol. This breaks code 205526a73082SReid Kleckner // that require lazy function symbol resolution. Using musttail or 205626a73082SReid Kleckner // GuaranteedTailCallOpt will override this. 205726a73082SReid Kleckner GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 205826a73082SReid Kleckner if (!G || (!G->getGlobal()->hasLocalLinkage() && 205926a73082SReid Kleckner G->getGlobal()->hasDefaultVisibility())) 206026a73082SReid Kleckner isTailCall = false; 206126a73082SReid Kleckner } 206226a73082SReid Kleckner 206326a73082SReid Kleckner if (isTailCall && !IsMustTail) { 206426a73082SReid Kleckner // Check if it's really possible to do a tail call. 2065385faf9cSReid Kleckner isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, 2066385faf9cSReid Kleckner IsCalleePopSRet); 206726a73082SReid Kleckner 206826a73082SReid Kleckner // Sibcalls are automatically detected tailcalls which do not require 206926a73082SReid Kleckner // ABI changes. 207026a73082SReid Kleckner if (!IsGuaranteeTCO && isTailCall) 207126a73082SReid Kleckner IsSibcall = true; 207226a73082SReid Kleckner 207326a73082SReid Kleckner if (isTailCall) 207426a73082SReid Kleckner ++NumTailCalls; 207526a73082SReid Kleckner } 207626a73082SReid Kleckner 207726a73082SReid Kleckner if (IsMustTail && !isTailCall) 207826a73082SReid Kleckner report_fatal_error("failed to perform tail call elimination on a call " 207926a73082SReid Kleckner "site marked musttail"); 208026a73082SReid Kleckner 208126a73082SReid Kleckner assert(!(isVarArg && canGuaranteeTCO(CallConv)) && 208226a73082SReid Kleckner "Var args not supported with calling convention fastcc, ghc or hipe"); 208326a73082SReid Kleckner 208426a73082SReid Kleckner // Get a count of how many bytes are to be pushed on the stack. 208526a73082SReid Kleckner unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 208626a73082SReid Kleckner if (IsSibcall) 208726a73082SReid Kleckner // This is a sibcall. The memory operands are available in caller's 208826a73082SReid Kleckner // own caller's stack. 208926a73082SReid Kleckner NumBytes = 0; 209026a73082SReid Kleckner else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) 209126a73082SReid Kleckner NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 209226a73082SReid Kleckner 209326a73082SReid Kleckner int FPDiff = 0; 209426a73082SReid Kleckner if (isTailCall && 209526a73082SReid Kleckner shouldGuaranteeTCO(CallConv, 209626a73082SReid Kleckner MF.getTarget().Options.GuaranteedTailCallOpt)) { 209726a73082SReid Kleckner // Lower arguments at fp - stackoffset + fpdiff. 209826a73082SReid Kleckner unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); 209926a73082SReid Kleckner 210026a73082SReid Kleckner FPDiff = NumBytesCallerPushed - NumBytes; 210126a73082SReid Kleckner 210226a73082SReid Kleckner // Set the delta of movement of the returnaddr stackslot. 210326a73082SReid Kleckner // But only set if delta is greater than previous delta. 210426a73082SReid Kleckner if (FPDiff < X86Info->getTCReturnAddrDelta()) 210526a73082SReid Kleckner X86Info->setTCReturnAddrDelta(FPDiff); 210626a73082SReid Kleckner } 210726a73082SReid Kleckner 210826a73082SReid Kleckner unsigned NumBytesToPush = NumBytes; 210926a73082SReid Kleckner unsigned NumBytesToPop = NumBytes; 211026a73082SReid Kleckner 211126a73082SReid Kleckner // If we have an inalloca argument, all stack space has already been allocated 211226a73082SReid Kleckner // for us and be right at the top of the stack. We don't support multiple 211326a73082SReid Kleckner // arguments passed in memory when using inalloca. 211426a73082SReid Kleckner if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { 211526a73082SReid Kleckner NumBytesToPush = 0; 211626a73082SReid Kleckner if (!ArgLocs.back().isMemLoc()) 211726a73082SReid Kleckner report_fatal_error("cannot use inalloca attribute on a register " 211826a73082SReid Kleckner "parameter"); 211926a73082SReid Kleckner if (ArgLocs.back().getLocMemOffset() != 0) 212026a73082SReid Kleckner report_fatal_error("any parameter with the inalloca attribute must be " 212126a73082SReid Kleckner "the only memory argument"); 212226a73082SReid Kleckner } else if (CLI.IsPreallocated) { 212326a73082SReid Kleckner assert(ArgLocs.back().isMemLoc() && 212426a73082SReid Kleckner "cannot use preallocated attribute on a register " 212526a73082SReid Kleckner "parameter"); 212626a73082SReid Kleckner SmallVector<size_t, 4> PreallocatedOffsets; 212726a73082SReid Kleckner for (size_t i = 0; i < CLI.OutVals.size(); ++i) { 212826a73082SReid Kleckner if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) { 212926a73082SReid Kleckner PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset()); 213026a73082SReid Kleckner } 213126a73082SReid Kleckner } 213226a73082SReid Kleckner auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>(); 213326a73082SReid Kleckner size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB); 213426a73082SReid Kleckner MFI->setPreallocatedStackSize(PreallocatedId, NumBytes); 213526a73082SReid Kleckner MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets); 213626a73082SReid Kleckner NumBytesToPush = 0; 213726a73082SReid Kleckner } 213826a73082SReid Kleckner 213926a73082SReid Kleckner if (!IsSibcall && !IsMustTail) 214026a73082SReid Kleckner Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, 214126a73082SReid Kleckner NumBytes - NumBytesToPush, dl); 214226a73082SReid Kleckner 214326a73082SReid Kleckner SDValue RetAddrFrIdx; 214426a73082SReid Kleckner // Load return address for tail calls. 214526a73082SReid Kleckner if (isTailCall && FPDiff) 214626a73082SReid Kleckner Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, 214726a73082SReid Kleckner Is64Bit, FPDiff, dl); 214826a73082SReid Kleckner 214926a73082SReid Kleckner SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 215026a73082SReid Kleckner SmallVector<SDValue, 8> MemOpChains; 215126a73082SReid Kleckner SDValue StackPtr; 215226a73082SReid Kleckner 215326a73082SReid Kleckner // The next loop assumes that the locations are in the same order of the 215426a73082SReid Kleckner // input arguments. 215526a73082SReid Kleckner assert(isSortedByValueNo(ArgLocs) && 215626a73082SReid Kleckner "Argument Location list must be sorted before lowering"); 215726a73082SReid Kleckner 215826a73082SReid Kleckner // Walk the register/memloc assignments, inserting copies/loads. In the case 215926a73082SReid Kleckner // of tail call optimization arguments are handle later. 216026a73082SReid Kleckner const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 216126a73082SReid Kleckner for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; 216226a73082SReid Kleckner ++I, ++OutIndex) { 216326a73082SReid Kleckner assert(OutIndex < Outs.size() && "Invalid Out index"); 216426a73082SReid Kleckner // Skip inalloca/preallocated arguments, they have already been written. 216526a73082SReid Kleckner ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; 216626a73082SReid Kleckner if (Flags.isInAlloca() || Flags.isPreallocated()) 216726a73082SReid Kleckner continue; 216826a73082SReid Kleckner 216926a73082SReid Kleckner CCValAssign &VA = ArgLocs[I]; 217026a73082SReid Kleckner EVT RegVT = VA.getLocVT(); 217126a73082SReid Kleckner SDValue Arg = OutVals[OutIndex]; 217226a73082SReid Kleckner bool isByVal = Flags.isByVal(); 217326a73082SReid Kleckner 217426a73082SReid Kleckner // Promote the value if needed. 217526a73082SReid Kleckner switch (VA.getLocInfo()) { 217626a73082SReid Kleckner default: llvm_unreachable("Unknown loc info!"); 217726a73082SReid Kleckner case CCValAssign::Full: break; 217826a73082SReid Kleckner case CCValAssign::SExt: 217926a73082SReid Kleckner Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); 218026a73082SReid Kleckner break; 218126a73082SReid Kleckner case CCValAssign::ZExt: 218226a73082SReid Kleckner Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); 218326a73082SReid Kleckner break; 218426a73082SReid Kleckner case CCValAssign::AExt: 218526a73082SReid Kleckner if (Arg.getValueType().isVector() && 218626a73082SReid Kleckner Arg.getValueType().getVectorElementType() == MVT::i1) 218726a73082SReid Kleckner Arg = lowerMasksToReg(Arg, RegVT, dl, DAG); 218826a73082SReid Kleckner else if (RegVT.is128BitVector()) { 218926a73082SReid Kleckner // Special case: passing MMX values in XMM registers. 219026a73082SReid Kleckner Arg = DAG.getBitcast(MVT::i64, Arg); 219126a73082SReid Kleckner Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); 219226a73082SReid Kleckner Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); 219326a73082SReid Kleckner } else 219426a73082SReid Kleckner Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); 219526a73082SReid Kleckner break; 219626a73082SReid Kleckner case CCValAssign::BCvt: 219726a73082SReid Kleckner Arg = DAG.getBitcast(RegVT, Arg); 219826a73082SReid Kleckner break; 219926a73082SReid Kleckner case CCValAssign::Indirect: { 220026a73082SReid Kleckner if (isByVal) { 220126a73082SReid Kleckner // Memcpy the argument to a temporary stack slot to prevent 220226a73082SReid Kleckner // the caller from seeing any modifications the callee may make 220326a73082SReid Kleckner // as guaranteed by the `byval` attribute. 220426a73082SReid Kleckner int FrameIdx = MF.getFrameInfo().CreateStackObject( 220526a73082SReid Kleckner Flags.getByValSize(), 220626a73082SReid Kleckner std::max(Align(16), Flags.getNonZeroByValAlign()), false); 220726a73082SReid Kleckner SDValue StackSlot = 220826a73082SReid Kleckner DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout())); 220926a73082SReid Kleckner Chain = 221026a73082SReid Kleckner CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl); 221126a73082SReid Kleckner // From now on treat this as a regular pointer 221226a73082SReid Kleckner Arg = StackSlot; 221326a73082SReid Kleckner isByVal = false; 221426a73082SReid Kleckner } else { 221526a73082SReid Kleckner // Store the argument. 221626a73082SReid Kleckner SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); 221726a73082SReid Kleckner int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 221826a73082SReid Kleckner Chain = DAG.getStore( 221926a73082SReid Kleckner Chain, dl, Arg, SpillSlot, 222026a73082SReid Kleckner MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); 222126a73082SReid Kleckner Arg = SpillSlot; 222226a73082SReid Kleckner } 222326a73082SReid Kleckner break; 222426a73082SReid Kleckner } 222526a73082SReid Kleckner } 222626a73082SReid Kleckner 222726a73082SReid Kleckner if (VA.needsCustom()) { 222826a73082SReid Kleckner assert(VA.getValVT() == MVT::v64i1 && 222926a73082SReid Kleckner "Currently the only custom case is when we split v64i1 to 2 regs"); 223026a73082SReid Kleckner // Split v64i1 value into two registers 223126a73082SReid Kleckner Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget); 223226a73082SReid Kleckner } else if (VA.isRegLoc()) { 223326a73082SReid Kleckner RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 223426a73082SReid Kleckner const TargetOptions &Options = DAG.getTarget().Options; 223526a73082SReid Kleckner if (Options.EmitCallSiteInfo) 2236212b1a84SPrabhuk CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I); 223726a73082SReid Kleckner if (isVarArg && IsWin64) { 223826a73082SReid Kleckner // Win64 ABI requires argument XMM reg to be copied to the corresponding 223926a73082SReid Kleckner // shadow reg if callee is a varargs function. 224026a73082SReid Kleckner Register ShadowReg; 224126a73082SReid Kleckner switch (VA.getLocReg()) { 224226a73082SReid Kleckner case X86::XMM0: ShadowReg = X86::RCX; break; 224326a73082SReid Kleckner case X86::XMM1: ShadowReg = X86::RDX; break; 224426a73082SReid Kleckner case X86::XMM2: ShadowReg = X86::R8; break; 224526a73082SReid Kleckner case X86::XMM3: ShadowReg = X86::R9; break; 224626a73082SReid Kleckner } 224726a73082SReid Kleckner if (ShadowReg) 224826a73082SReid Kleckner RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); 224926a73082SReid Kleckner } 225026a73082SReid Kleckner } else if (!IsSibcall && (!isTailCall || isByVal)) { 225126a73082SReid Kleckner assert(VA.isMemLoc()); 225226a73082SReid Kleckner if (!StackPtr.getNode()) 225326a73082SReid Kleckner StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), 225426a73082SReid Kleckner getPointerTy(DAG.getDataLayout())); 225526a73082SReid Kleckner MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 225626a73082SReid Kleckner dl, DAG, VA, Flags, isByVal)); 225726a73082SReid Kleckner } 225826a73082SReid Kleckner } 225926a73082SReid Kleckner 226026a73082SReid Kleckner if (!MemOpChains.empty()) 226126a73082SReid Kleckner Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 226226a73082SReid Kleckner 226326a73082SReid Kleckner if (Subtarget.isPICStyleGOT()) { 226426a73082SReid Kleckner // ELF / PIC requires GOT in the EBX register before function calls via PLT 226526a73082SReid Kleckner // GOT pointer (except regcall). 226626a73082SReid Kleckner if (!isTailCall) { 226726a73082SReid Kleckner // Indirect call with RegCall calling convertion may use up all the 226826a73082SReid Kleckner // general registers, so it is not suitable to bind EBX reister for 226926a73082SReid Kleckner // GOT address, just let register allocator handle it. 227026a73082SReid Kleckner if (CallConv != CallingConv::X86_RegCall) 227126a73082SReid Kleckner RegsToPass.push_back(std::make_pair( 227226a73082SReid Kleckner Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), 227326a73082SReid Kleckner getPointerTy(DAG.getDataLayout())))); 227426a73082SReid Kleckner } else { 227526a73082SReid Kleckner // If we are tail calling and generating PIC/GOT style code load the 227626a73082SReid Kleckner // address of the callee into ECX. The value in ecx is used as target of 227726a73082SReid Kleckner // the tail jump. This is done to circumvent the ebx/callee-saved problem 227826a73082SReid Kleckner // for tail calls on PIC/GOT architectures. Normally we would just put the 227926a73082SReid Kleckner // address of GOT into ebx and then call target@PLT. But for tail calls 228026a73082SReid Kleckner // ebx would be restored (since ebx is callee saved) before jumping to the 228126a73082SReid Kleckner // target@PLT. 228226a73082SReid Kleckner 228326a73082SReid Kleckner // Note: The actual moving to ECX is done further down. 228426a73082SReid Kleckner GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 228526a73082SReid Kleckner if (G && !G->getGlobal()->hasLocalLinkage() && 228626a73082SReid Kleckner G->getGlobal()->hasDefaultVisibility()) 228726a73082SReid Kleckner Callee = LowerGlobalAddress(Callee, DAG); 228826a73082SReid Kleckner else if (isa<ExternalSymbolSDNode>(Callee)) 228926a73082SReid Kleckner Callee = LowerExternalSymbol(Callee, DAG); 229026a73082SReid Kleckner } 229126a73082SReid Kleckner } 229226a73082SReid Kleckner 229326a73082SReid Kleckner if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail && 229426a73082SReid Kleckner (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) { 229526a73082SReid Kleckner // From AMD64 ABI document: 229626a73082SReid Kleckner // For calls that may call functions that use varargs or stdargs 229726a73082SReid Kleckner // (prototype-less calls or calls to functions containing ellipsis (...) in 229826a73082SReid Kleckner // the declaration) %al is used as hidden argument to specify the number 229926a73082SReid Kleckner // of SSE registers used. The contents of %al do not need to match exactly 230026a73082SReid Kleckner // the number of registers, but must be an ubound on the number of SSE 230126a73082SReid Kleckner // registers used and is in the range 0 - 8 inclusive. 230226a73082SReid Kleckner 230326a73082SReid Kleckner // Count the number of XMM registers allocated. 230426a73082SReid Kleckner static const MCPhysReg XMMArgRegs[] = { 230526a73082SReid Kleckner X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 230626a73082SReid Kleckner X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 230726a73082SReid Kleckner }; 230826a73082SReid Kleckner unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); 230926a73082SReid Kleckner assert((Subtarget.hasSSE1() || !NumXMMRegs) 231026a73082SReid Kleckner && "SSE registers cannot be used when SSE is disabled"); 231126a73082SReid Kleckner RegsToPass.push_back(std::make_pair(Register(X86::AL), 231226a73082SReid Kleckner DAG.getConstant(NumXMMRegs, dl, 231326a73082SReid Kleckner MVT::i8))); 231426a73082SReid Kleckner } 231526a73082SReid Kleckner 231626a73082SReid Kleckner if (isVarArg && IsMustTail) { 231726a73082SReid Kleckner const auto &Forwards = X86Info->getForwardedMustTailRegParms(); 231826a73082SReid Kleckner for (const auto &F : Forwards) { 231926a73082SReid Kleckner SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); 232026a73082SReid Kleckner RegsToPass.push_back(std::make_pair(F.PReg, Val)); 232126a73082SReid Kleckner } 232226a73082SReid Kleckner } 232326a73082SReid Kleckner 232426a73082SReid Kleckner // For tail calls lower the arguments to the 'real' stack slots. Sibcalls 232526a73082SReid Kleckner // don't need this because the eligibility check rejects calls that require 232626a73082SReid Kleckner // shuffling arguments passed in memory. 232726a73082SReid Kleckner if (!IsSibcall && isTailCall) { 232826a73082SReid Kleckner // Force all the incoming stack arguments to be loaded from the stack 232926a73082SReid Kleckner // before any new outgoing arguments are stored to the stack, because the 233026a73082SReid Kleckner // outgoing stack slots may alias the incoming argument stack slots, and 233126a73082SReid Kleckner // the alias isn't otherwise explicit. This is slightly more conservative 233226a73082SReid Kleckner // than necessary, because it means that each store effectively depends 233326a73082SReid Kleckner // on every argument instead of just those arguments it would clobber. 233426a73082SReid Kleckner SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); 233526a73082SReid Kleckner 233626a73082SReid Kleckner SmallVector<SDValue, 8> MemOpChains2; 233726a73082SReid Kleckner SDValue FIN; 233826a73082SReid Kleckner int FI = 0; 233926a73082SReid Kleckner for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E; 234026a73082SReid Kleckner ++I, ++OutsIndex) { 234126a73082SReid Kleckner CCValAssign &VA = ArgLocs[I]; 234226a73082SReid Kleckner 234326a73082SReid Kleckner if (VA.isRegLoc()) { 234426a73082SReid Kleckner if (VA.needsCustom()) { 234526a73082SReid Kleckner assert((CallConv == CallingConv::X86_RegCall) && 234626a73082SReid Kleckner "Expecting custom case only in regcall calling convention"); 234726a73082SReid Kleckner // This means that we are in special case where one argument was 234826a73082SReid Kleckner // passed through two register locations - Skip the next location 234926a73082SReid Kleckner ++I; 235026a73082SReid Kleckner } 235126a73082SReid Kleckner 235226a73082SReid Kleckner continue; 235326a73082SReid Kleckner } 235426a73082SReid Kleckner 235526a73082SReid Kleckner assert(VA.isMemLoc()); 235626a73082SReid Kleckner SDValue Arg = OutVals[OutsIndex]; 235726a73082SReid Kleckner ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; 235826a73082SReid Kleckner // Skip inalloca/preallocated arguments. They don't require any work. 235926a73082SReid Kleckner if (Flags.isInAlloca() || Flags.isPreallocated()) 236026a73082SReid Kleckner continue; 236126a73082SReid Kleckner // Create frame index. 236226a73082SReid Kleckner int32_t Offset = VA.getLocMemOffset()+FPDiff; 236326a73082SReid Kleckner uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; 236426a73082SReid Kleckner FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); 236526a73082SReid Kleckner FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 236626a73082SReid Kleckner 236726a73082SReid Kleckner if (Flags.isByVal()) { 236826a73082SReid Kleckner // Copy relative to framepointer. 236926a73082SReid Kleckner SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); 237026a73082SReid Kleckner if (!StackPtr.getNode()) 237126a73082SReid Kleckner StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), 237226a73082SReid Kleckner getPointerTy(DAG.getDataLayout())); 237326a73082SReid Kleckner Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), 237426a73082SReid Kleckner StackPtr, Source); 237526a73082SReid Kleckner 237626a73082SReid Kleckner MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, 237726a73082SReid Kleckner ArgChain, 237826a73082SReid Kleckner Flags, DAG, dl)); 237926a73082SReid Kleckner } else { 238026a73082SReid Kleckner // Store relative to framepointer. 238126a73082SReid Kleckner MemOpChains2.push_back(DAG.getStore( 238226a73082SReid Kleckner ArgChain, dl, Arg, FIN, 238326a73082SReid Kleckner MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); 238426a73082SReid Kleckner } 238526a73082SReid Kleckner } 238626a73082SReid Kleckner 238726a73082SReid Kleckner if (!MemOpChains2.empty()) 238826a73082SReid Kleckner Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 238926a73082SReid Kleckner 239026a73082SReid Kleckner // Store the return address to the appropriate stack slot. 239126a73082SReid Kleckner Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, 239226a73082SReid Kleckner getPointerTy(DAG.getDataLayout()), 239326a73082SReid Kleckner RegInfo->getSlotSize(), FPDiff, dl); 239426a73082SReid Kleckner } 239526a73082SReid Kleckner 239626a73082SReid Kleckner // Build a sequence of copy-to-reg nodes chained together with token chain 239726a73082SReid Kleckner // and glue operands which copy the outgoing args into registers. 239826a73082SReid Kleckner SDValue InGlue; 239926a73082SReid Kleckner for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 240026a73082SReid Kleckner Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 240126a73082SReid Kleckner RegsToPass[i].second, InGlue); 240226a73082SReid Kleckner InGlue = Chain.getValue(1); 240326a73082SReid Kleckner } 240426a73082SReid Kleckner 240526a73082SReid Kleckner if (DAG.getTarget().getCodeModel() == CodeModel::Large) { 240626a73082SReid Kleckner assert(Is64Bit && "Large code model is only legal in 64-bit mode."); 240726a73082SReid Kleckner // In the 64-bit large code model, we have to make all calls 240826a73082SReid Kleckner // through a register, since the call instruction's 32-bit 240926a73082SReid Kleckner // pc-relative offset may not be large enough to hold the whole 241026a73082SReid Kleckner // address. 241126a73082SReid Kleckner } else if (Callee->getOpcode() == ISD::GlobalAddress || 241226a73082SReid Kleckner Callee->getOpcode() == ISD::ExternalSymbol) { 241326a73082SReid Kleckner // Lower direct calls to global addresses and external symbols. Setting 241426a73082SReid Kleckner // ForCall to true here has the effect of removing WrapperRIP when possible 241526a73082SReid Kleckner // to allow direct calls to be selected without first materializing the 241626a73082SReid Kleckner // address into a register. 241726a73082SReid Kleckner Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true); 241826a73082SReid Kleckner } else if (Subtarget.isTarget64BitILP32() && 241926a73082SReid Kleckner Callee.getValueType() == MVT::i32) { 242026a73082SReid Kleckner // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI 242126a73082SReid Kleckner Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); 242226a73082SReid Kleckner } 242326a73082SReid Kleckner 242426a73082SReid Kleckner SmallVector<SDValue, 8> Ops; 242526a73082SReid Kleckner 242626a73082SReid Kleckner if (!IsSibcall && isTailCall && !IsMustTail) { 242726a73082SReid Kleckner Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl); 242826a73082SReid Kleckner InGlue = Chain.getValue(1); 242926a73082SReid Kleckner } 243026a73082SReid Kleckner 243126a73082SReid Kleckner Ops.push_back(Chain); 243226a73082SReid Kleckner Ops.push_back(Callee); 243326a73082SReid Kleckner 243426a73082SReid Kleckner if (isTailCall) 2435ce0cc8e9SCraig Topper Ops.push_back(DAG.getSignedTargetConstant(FPDiff, dl, MVT::i32)); 243626a73082SReid Kleckner 243726a73082SReid Kleckner // Add argument registers to the end of the list so that they are known live 243826a73082SReid Kleckner // into the call. 243926a73082SReid Kleckner for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 244026a73082SReid Kleckner Ops.push_back(DAG.getRegister(RegsToPass[i].first, 244126a73082SReid Kleckner RegsToPass[i].second.getValueType())); 244226a73082SReid Kleckner 244326a73082SReid Kleckner // Add a register mask operand representing the call-preserved registers. 244426a73082SReid Kleckner const uint32_t *Mask = [&]() { 244526a73082SReid Kleckner auto AdaptedCC = CallConv; 244626a73082SReid Kleckner // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists), 244726a73082SReid Kleckner // use X86_INTR calling convention because it has the same CSR mask 244826a73082SReid Kleckner // (same preserved registers). 244926a73082SReid Kleckner if (HasNCSR) 245026a73082SReid Kleckner AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR; 245126a73082SReid Kleckner // If NoCalleeSavedRegisters is requested, than use GHC since it happens 245226a73082SReid Kleckner // to use the CSR_NoRegs_RegMask. 245326a73082SReid Kleckner if (CB && CB->hasFnAttr("no_callee_saved_registers")) 245426a73082SReid Kleckner AdaptedCC = (CallingConv::ID)CallingConv::GHC; 245526a73082SReid Kleckner return RegInfo->getCallPreservedMask(MF, AdaptedCC); 245626a73082SReid Kleckner }(); 245726a73082SReid Kleckner assert(Mask && "Missing call preserved mask for calling convention"); 245826a73082SReid Kleckner 24592c12c1e7Sweiguozhi if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFramePtr())) { 24607e5fe697Sweiguozhi X86Info->setFPClobberedByCall(true); 24612c12c1e7Sweiguozhi if (CLI.CB && isa<InvokeInst>(CLI.CB)) 24622c12c1e7Sweiguozhi X86Info->setFPClobberedByInvoke(true); 24632c12c1e7Sweiguozhi } 24642c12c1e7Sweiguozhi if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister())) { 24657e5fe697Sweiguozhi X86Info->setBPClobberedByCall(true); 24662c12c1e7Sweiguozhi if (CLI.CB && isa<InvokeInst>(CLI.CB)) 24672c12c1e7Sweiguozhi X86Info->setBPClobberedByInvoke(true); 24682c12c1e7Sweiguozhi } 24697e5fe697Sweiguozhi 247026a73082SReid Kleckner // If this is an invoke in a 32-bit function using a funclet-based 247126a73082SReid Kleckner // personality, assume the function clobbers all registers. If an exception 247226a73082SReid Kleckner // is thrown, the runtime will not restore CSRs. 247326a73082SReid Kleckner // FIXME: Model this more precisely so that we can register allocate across 247426a73082SReid Kleckner // the normal edge and spill and fill across the exceptional edge. 247526a73082SReid Kleckner if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) { 247626a73082SReid Kleckner const Function &CallerFn = MF.getFunction(); 247726a73082SReid Kleckner EHPersonality Pers = 247826a73082SReid Kleckner CallerFn.hasPersonalityFn() 247926a73082SReid Kleckner ? classifyEHPersonality(CallerFn.getPersonalityFn()) 248026a73082SReid Kleckner : EHPersonality::Unknown; 248126a73082SReid Kleckner if (isFuncletEHPersonality(Pers)) 248226a73082SReid Kleckner Mask = RegInfo->getNoPreservedMask(); 248326a73082SReid Kleckner } 248426a73082SReid Kleckner 248526a73082SReid Kleckner // Define a new register mask from the existing mask. 248626a73082SReid Kleckner uint32_t *RegMask = nullptr; 248726a73082SReid Kleckner 248826a73082SReid Kleckner // In some calling conventions we need to remove the used physical registers 248926a73082SReid Kleckner // from the reg mask. Create a new RegMask for such calling conventions. 249026a73082SReid Kleckner // RegMask for calling conventions that disable only return registers (e.g. 249126a73082SReid Kleckner // preserve_most) will be modified later in LowerCallResult. 249226a73082SReid Kleckner bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR; 249326a73082SReid Kleckner if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) { 249426a73082SReid Kleckner const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 249526a73082SReid Kleckner 249626a73082SReid Kleckner // Allocate a new Reg Mask and copy Mask. 249726a73082SReid Kleckner RegMask = MF.allocateRegMask(); 249826a73082SReid Kleckner unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); 249926a73082SReid Kleckner memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize); 250026a73082SReid Kleckner 250126a73082SReid Kleckner // Make sure all sub registers of the argument registers are reset 250226a73082SReid Kleckner // in the RegMask. 250326a73082SReid Kleckner if (ShouldDisableArgRegs) { 250426a73082SReid Kleckner for (auto const &RegPair : RegsToPass) 250526a73082SReid Kleckner for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first)) 250626a73082SReid Kleckner RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); 250726a73082SReid Kleckner } 250826a73082SReid Kleckner 250926a73082SReid Kleckner // Create the RegMask Operand according to our updated mask. 251026a73082SReid Kleckner Ops.push_back(DAG.getRegisterMask(RegMask)); 251126a73082SReid Kleckner } else { 251226a73082SReid Kleckner // Create the RegMask Operand according to the static mask. 251326a73082SReid Kleckner Ops.push_back(DAG.getRegisterMask(Mask)); 251426a73082SReid Kleckner } 251526a73082SReid Kleckner 251626a73082SReid Kleckner if (InGlue.getNode()) 251726a73082SReid Kleckner Ops.push_back(InGlue); 251826a73082SReid Kleckner 251926a73082SReid Kleckner if (isTailCall) { 252026a73082SReid Kleckner // We used to do: 252126a73082SReid Kleckner //// If this is the first return lowered for this function, add the regs 252226a73082SReid Kleckner //// to the liveout set for the function. 252326a73082SReid Kleckner // This isn't right, although it's probably harmless on x86; liveouts 252426a73082SReid Kleckner // should be computed from returns not tail calls. Consider a void 252526a73082SReid Kleckner // function making a tail call to a function returning int. 252626a73082SReid Kleckner MF.getFrameInfo().setHasTailCall(); 2527baf59be8SSergei Barannikov SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, Ops); 252826a73082SReid Kleckner 252926a73082SReid Kleckner if (IsCFICall) 253026a73082SReid Kleckner Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 253126a73082SReid Kleckner 253226a73082SReid Kleckner DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 253326a73082SReid Kleckner DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); 253426a73082SReid Kleckner return Ret; 253526a73082SReid Kleckner } 253626a73082SReid Kleckner 2537baf59be8SSergei Barannikov // Returns a chain & a glue for retval copy to use. 2538baf59be8SSergei Barannikov SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 253926a73082SReid Kleckner if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) { 254026a73082SReid Kleckner Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops); 254126a73082SReid Kleckner } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { 254226a73082SReid Kleckner // Calls with a "clang.arc.attachedcall" bundle are special. They should be 254326a73082SReid Kleckner // expanded to the call, directly followed by a special marker sequence and 254426a73082SReid Kleckner // a call to a ObjC library function. Use the CALL_RVMARKER to do that. 254526a73082SReid Kleckner assert(!isTailCall && 254626a73082SReid Kleckner "tail calls cannot be marked with clang.arc.attachedcall"); 254726a73082SReid Kleckner assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"); 254826a73082SReid Kleckner 254926a73082SReid Kleckner // Add a target global address for the retainRV/claimRV runtime function 255026a73082SReid Kleckner // just before the call target. 255126a73082SReid Kleckner Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB); 255226a73082SReid Kleckner auto PtrVT = getPointerTy(DAG.getDataLayout()); 255326a73082SReid Kleckner auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT); 255426a73082SReid Kleckner Ops.insert(Ops.begin() + 1, GA); 255526a73082SReid Kleckner Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops); 255626a73082SReid Kleckner } else { 255726a73082SReid Kleckner Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); 255826a73082SReid Kleckner } 255926a73082SReid Kleckner 256026a73082SReid Kleckner if (IsCFICall) 256126a73082SReid Kleckner Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 256226a73082SReid Kleckner 256326a73082SReid Kleckner InGlue = Chain.getValue(1); 256426a73082SReid Kleckner DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 256526a73082SReid Kleckner DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); 256626a73082SReid Kleckner 256726a73082SReid Kleckner // Save heapallocsite metadata. 256826a73082SReid Kleckner if (CLI.CB) 256926a73082SReid Kleckner if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite")) 257026a73082SReid Kleckner DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); 257126a73082SReid Kleckner 257226a73082SReid Kleckner // Create the CALLSEQ_END node. 257326a73082SReid Kleckner unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing. 257426a73082SReid Kleckner if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, 257526a73082SReid Kleckner DAG.getTarget().Options.GuaranteedTailCallOpt)) 257626a73082SReid Kleckner NumBytesForCalleeToPop = NumBytes; // Callee pops everything 257726a73082SReid Kleckner else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet) 257826a73082SReid Kleckner // If this call passes a struct-return pointer, the callee 257926a73082SReid Kleckner // pops that struct pointer. 258026a73082SReid Kleckner NumBytesForCalleeToPop = 4; 258126a73082SReid Kleckner 258226a73082SReid Kleckner // Returns a glue for retval copy to use. 258326a73082SReid Kleckner if (!IsSibcall) { 258426a73082SReid Kleckner Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop, 258526a73082SReid Kleckner InGlue, dl); 258626a73082SReid Kleckner InGlue = Chain.getValue(1); 258726a73082SReid Kleckner } 258826a73082SReid Kleckner 2589c166a43cSweiguozhi if (CallingConv::PreserveNone == CallConv) 2590c166a43cSweiguozhi for (unsigned I = 0, E = Outs.size(); I != E; ++I) { 2591c166a43cSweiguozhi if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() || 2592c166a43cSweiguozhi Outs[I].Flags.isSwiftError()) { 2593c166a43cSweiguozhi errorUnsupported(DAG, dl, 2594c166a43cSweiguozhi "Swift attributes can't be used with preserve_none"); 2595c166a43cSweiguozhi break; 2596c166a43cSweiguozhi } 2597c166a43cSweiguozhi } 2598c166a43cSweiguozhi 259926a73082SReid Kleckner // Handle result values, copying them out of physregs into vregs that we 260026a73082SReid Kleckner // return. 260126a73082SReid Kleckner return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG, 260226a73082SReid Kleckner InVals, RegMask); 260326a73082SReid Kleckner } 260426a73082SReid Kleckner 260526a73082SReid Kleckner //===----------------------------------------------------------------------===// 260626a73082SReid Kleckner // Fast Calling Convention (tail call) implementation 260726a73082SReid Kleckner //===----------------------------------------------------------------------===// 260826a73082SReid Kleckner 260926a73082SReid Kleckner // Like std call, callee cleans arguments, convention except that ECX is 261026a73082SReid Kleckner // reserved for storing the tail called function address. Only 2 registers are 261126a73082SReid Kleckner // free for argument passing (inreg). Tail call optimization is performed 261226a73082SReid Kleckner // provided: 261326a73082SReid Kleckner // * tailcallopt is enabled 261426a73082SReid Kleckner // * caller/callee are fastcc 261526a73082SReid Kleckner // On X86_64 architecture with GOT-style position independent code only local 261626a73082SReid Kleckner // (within module) calls are supported at the moment. 261726a73082SReid Kleckner // To keep the stack aligned according to platform abi the function 261826a73082SReid Kleckner // GetAlignedArgumentStackSize ensures that argument delta is always multiples 261926a73082SReid Kleckner // of stack alignment. (Dynamic linkers need this - Darwin's dyld for example) 262026a73082SReid Kleckner // If a tail called function callee has more arguments than the caller the 262126a73082SReid Kleckner // caller needs to make sure that there is room to move the RETADDR to. This is 262226a73082SReid Kleckner // achieved by reserving an area the size of the argument delta right after the 262326a73082SReid Kleckner // original RETADDR, but before the saved framepointer or the spilled registers 262426a73082SReid Kleckner // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 262526a73082SReid Kleckner // stack layout: 262626a73082SReid Kleckner // arg1 262726a73082SReid Kleckner // arg2 262826a73082SReid Kleckner // RETADDR 262926a73082SReid Kleckner // [ new RETADDR 263026a73082SReid Kleckner // move area ] 263126a73082SReid Kleckner // (possible EBP) 263226a73082SReid Kleckner // ESI 263326a73082SReid Kleckner // EDI 263426a73082SReid Kleckner // local1 .. 263526a73082SReid Kleckner 263626a73082SReid Kleckner /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align 263726a73082SReid Kleckner /// requirement. 263826a73082SReid Kleckner unsigned 263926a73082SReid Kleckner X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize, 264026a73082SReid Kleckner SelectionDAG &DAG) const { 264126a73082SReid Kleckner const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign(); 264226a73082SReid Kleckner const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize(); 264326a73082SReid Kleckner assert(StackSize % SlotSize == 0 && 264426a73082SReid Kleckner "StackSize must be a multiple of SlotSize"); 264526a73082SReid Kleckner return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize; 264626a73082SReid Kleckner } 264726a73082SReid Kleckner 264826a73082SReid Kleckner /// Return true if the given stack call argument is already available in the 264926a73082SReid Kleckner /// same position (relatively) of the caller's incoming argument stack. 265026a73082SReid Kleckner static 265126a73082SReid Kleckner bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 265226a73082SReid Kleckner MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, 265326a73082SReid Kleckner const X86InstrInfo *TII, const CCValAssign &VA) { 265426a73082SReid Kleckner unsigned Bytes = Arg.getValueSizeInBits() / 8; 265526a73082SReid Kleckner 265626a73082SReid Kleckner for (;;) { 265726a73082SReid Kleckner // Look through nodes that don't alter the bits of the incoming value. 265826a73082SReid Kleckner unsigned Op = Arg.getOpcode(); 2659b61b2426SSimon Pilgrim if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST || 2660b61b2426SSimon Pilgrim Op == ISD::AssertZext) { 266126a73082SReid Kleckner Arg = Arg.getOperand(0); 266226a73082SReid Kleckner continue; 266326a73082SReid Kleckner } 266426a73082SReid Kleckner if (Op == ISD::TRUNCATE) { 266526a73082SReid Kleckner const SDValue &TruncInput = Arg.getOperand(0); 266626a73082SReid Kleckner if (TruncInput.getOpcode() == ISD::AssertZext && 266726a73082SReid Kleckner cast<VTSDNode>(TruncInput.getOperand(1))->getVT() == 266826a73082SReid Kleckner Arg.getValueType()) { 266926a73082SReid Kleckner Arg = TruncInput.getOperand(0); 267026a73082SReid Kleckner continue; 267126a73082SReid Kleckner } 267226a73082SReid Kleckner } 267326a73082SReid Kleckner break; 267426a73082SReid Kleckner } 267526a73082SReid Kleckner 267626a73082SReid Kleckner int FI = INT_MAX; 267726a73082SReid Kleckner if (Arg.getOpcode() == ISD::CopyFromReg) { 267826a73082SReid Kleckner Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 267926a73082SReid Kleckner if (!VR.isVirtual()) 268026a73082SReid Kleckner return false; 268126a73082SReid Kleckner MachineInstr *Def = MRI->getVRegDef(VR); 268226a73082SReid Kleckner if (!Def) 268326a73082SReid Kleckner return false; 268426a73082SReid Kleckner if (!Flags.isByVal()) { 268526a73082SReid Kleckner if (!TII->isLoadFromStackSlot(*Def, FI)) 268626a73082SReid Kleckner return false; 268726a73082SReid Kleckner } else { 268826a73082SReid Kleckner unsigned Opcode = Def->getOpcode(); 268926a73082SReid Kleckner if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || 269026a73082SReid Kleckner Opcode == X86::LEA64_32r) && 269126a73082SReid Kleckner Def->getOperand(1).isFI()) { 269226a73082SReid Kleckner FI = Def->getOperand(1).getIndex(); 269326a73082SReid Kleckner Bytes = Flags.getByValSize(); 269426a73082SReid Kleckner } else 269526a73082SReid Kleckner return false; 269626a73082SReid Kleckner } 269726a73082SReid Kleckner } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 269826a73082SReid Kleckner if (Flags.isByVal()) 269926a73082SReid Kleckner // ByVal argument is passed in as a pointer but it's now being 270026a73082SReid Kleckner // dereferenced. e.g. 270126a73082SReid Kleckner // define @foo(%struct.X* %A) { 270226a73082SReid Kleckner // tail call @bar(%struct.X* byval %A) 270326a73082SReid Kleckner // } 270426a73082SReid Kleckner return false; 270526a73082SReid Kleckner SDValue Ptr = Ld->getBasePtr(); 270626a73082SReid Kleckner FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 270726a73082SReid Kleckner if (!FINode) 270826a73082SReid Kleckner return false; 270926a73082SReid Kleckner FI = FINode->getIndex(); 271026a73082SReid Kleckner } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { 271126a73082SReid Kleckner FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); 271226a73082SReid Kleckner FI = FINode->getIndex(); 271326a73082SReid Kleckner Bytes = Flags.getByValSize(); 271426a73082SReid Kleckner } else 271526a73082SReid Kleckner return false; 271626a73082SReid Kleckner 271726a73082SReid Kleckner assert(FI != INT_MAX); 271826a73082SReid Kleckner if (!MFI.isFixedObjectIndex(FI)) 271926a73082SReid Kleckner return false; 272026a73082SReid Kleckner 272126a73082SReid Kleckner if (Offset != MFI.getObjectOffset(FI)) 272226a73082SReid Kleckner return false; 272326a73082SReid Kleckner 272426a73082SReid Kleckner // If this is not byval, check that the argument stack object is immutable. 272526a73082SReid Kleckner // inalloca and argument copy elision can create mutable argument stack 272626a73082SReid Kleckner // objects. Byval objects can be mutated, but a byval call intends to pass the 272726a73082SReid Kleckner // mutated memory. 272826a73082SReid Kleckner if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI)) 272926a73082SReid Kleckner return false; 273026a73082SReid Kleckner 273126a73082SReid Kleckner if (VA.getLocVT().getFixedSizeInBits() > 273226a73082SReid Kleckner Arg.getValueSizeInBits().getFixedValue()) { 273326a73082SReid Kleckner // If the argument location is wider than the argument type, check that any 273426a73082SReid Kleckner // extension flags match. 273526a73082SReid Kleckner if (Flags.isZExt() != MFI.isObjectZExt(FI) || 273626a73082SReid Kleckner Flags.isSExt() != MFI.isObjectSExt(FI)) { 273726a73082SReid Kleckner return false; 273826a73082SReid Kleckner } 273926a73082SReid Kleckner } 274026a73082SReid Kleckner 274126a73082SReid Kleckner return Bytes == MFI.getObjectSize(FI); 274226a73082SReid Kleckner } 274326a73082SReid Kleckner 274426a73082SReid Kleckner /// Check whether the call is eligible for tail call optimization. Targets 274526a73082SReid Kleckner /// that want to do tail call optimization should implement this function. 2746385faf9cSReid Kleckner /// Note that the x86 backend does not check musttail calls for eligibility! The 2747385faf9cSReid Kleckner /// rest of x86 tail call lowering must be prepared to forward arguments of any 2748385faf9cSReid Kleckner /// type. 274926a73082SReid Kleckner bool X86TargetLowering::IsEligibleForTailCallOptimization( 2750385faf9cSReid Kleckner TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo, 2751385faf9cSReid Kleckner SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const { 2752385faf9cSReid Kleckner SelectionDAG &DAG = CLI.DAG; 2753385faf9cSReid Kleckner const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 2754385faf9cSReid Kleckner const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 2755385faf9cSReid Kleckner const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 2756385faf9cSReid Kleckner SDValue Callee = CLI.Callee; 2757385faf9cSReid Kleckner CallingConv::ID CalleeCC = CLI.CallConv; 2758385faf9cSReid Kleckner bool isVarArg = CLI.IsVarArg; 2759385faf9cSReid Kleckner 276026a73082SReid Kleckner if (!mayTailCallThisCC(CalleeCC)) 276126a73082SReid Kleckner return false; 276226a73082SReid Kleckner 276326a73082SReid Kleckner // If -tailcallopt is specified, make fastcc functions tail-callable. 276426a73082SReid Kleckner MachineFunction &MF = DAG.getMachineFunction(); 276526a73082SReid Kleckner const Function &CallerF = MF.getFunction(); 276626a73082SReid Kleckner 276726a73082SReid Kleckner // If the function return type is x86_fp80 and the callee return type is not, 276826a73082SReid Kleckner // then the FP_EXTEND of the call result is not a nop. It's not safe to 276926a73082SReid Kleckner // perform a tailcall optimization here. 2770385faf9cSReid Kleckner if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty()) 277126a73082SReid Kleckner return false; 277226a73082SReid Kleckner 277326a73082SReid Kleckner CallingConv::ID CallerCC = CallerF.getCallingConv(); 277426a73082SReid Kleckner bool CCMatch = CallerCC == CalleeCC; 277526a73082SReid Kleckner bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); 277626a73082SReid Kleckner bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); 277726a73082SReid Kleckner bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || 277826a73082SReid Kleckner CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail; 277926a73082SReid Kleckner 278026a73082SReid Kleckner // Win64 functions have extra shadow space for argument homing. Don't do the 278126a73082SReid Kleckner // sibcall if the caller and callee have mismatched expectations for this 278226a73082SReid Kleckner // space. 278326a73082SReid Kleckner if (IsCalleeWin64 != IsCallerWin64) 278426a73082SReid Kleckner return false; 278526a73082SReid Kleckner 278626a73082SReid Kleckner if (IsGuaranteeTCO) { 278726a73082SReid Kleckner if (canGuaranteeTCO(CalleeCC) && CCMatch) 278826a73082SReid Kleckner return true; 278926a73082SReid Kleckner return false; 279026a73082SReid Kleckner } 279126a73082SReid Kleckner 279226a73082SReid Kleckner // Look for obvious safe cases to perform tail call optimization that do not 279326a73082SReid Kleckner // require ABI changes. This is what gcc calls sibcall. 279426a73082SReid Kleckner 279526a73082SReid Kleckner // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to 279626a73082SReid Kleckner // emit a special epilogue. 279726a73082SReid Kleckner const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 279826a73082SReid Kleckner if (RegInfo->hasStackRealignment(MF)) 279926a73082SReid Kleckner return false; 280026a73082SReid Kleckner 280126a73082SReid Kleckner // Also avoid sibcall optimization if we're an sret return fn and the callee 280226a73082SReid Kleckner // is incompatible. See comment in LowerReturn about why hasStructRetAttr is 280326a73082SReid Kleckner // insufficient. 280426a73082SReid Kleckner if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) { 280526a73082SReid Kleckner // For a compatible tail call the callee must return our sret pointer. So it 280626a73082SReid Kleckner // needs to be (a) an sret function itself and (b) we pass our sret as its 280726a73082SReid Kleckner // sret. Condition #b is harder to determine. 280826a73082SReid Kleckner return false; 280926a73082SReid Kleckner } else if (IsCalleePopSRet) 281026a73082SReid Kleckner // The callee pops an sret, so we cannot tail-call, as our caller doesn't 281126a73082SReid Kleckner // expect that. 281226a73082SReid Kleckner return false; 281326a73082SReid Kleckner 281426a73082SReid Kleckner // Do not sibcall optimize vararg calls unless all arguments are passed via 281526a73082SReid Kleckner // registers. 281626a73082SReid Kleckner LLVMContext &C = *DAG.getContext(); 281726a73082SReid Kleckner if (isVarArg && !Outs.empty()) { 281826a73082SReid Kleckner // Optimizing for varargs on Win64 is unlikely to be safe without 281926a73082SReid Kleckner // additional testing. 282026a73082SReid Kleckner if (IsCalleeWin64 || IsCallerWin64) 282126a73082SReid Kleckner return false; 282226a73082SReid Kleckner 282326a73082SReid Kleckner for (const auto &VA : ArgLocs) 282426a73082SReid Kleckner if (!VA.isRegLoc()) 282526a73082SReid Kleckner return false; 282626a73082SReid Kleckner } 282726a73082SReid Kleckner 282826a73082SReid Kleckner // If the call result is in ST0 / ST1, it needs to be popped off the x87 282926a73082SReid Kleckner // stack. Therefore, if it's not used by the call it is not safe to optimize 283026a73082SReid Kleckner // this into a sibcall. 283126a73082SReid Kleckner bool Unused = false; 283226a73082SReid Kleckner for (const auto &In : Ins) { 283326a73082SReid Kleckner if (!In.Used) { 283426a73082SReid Kleckner Unused = true; 283526a73082SReid Kleckner break; 283626a73082SReid Kleckner } 283726a73082SReid Kleckner } 283826a73082SReid Kleckner if (Unused) { 283926a73082SReid Kleckner SmallVector<CCValAssign, 16> RVLocs; 2840385faf9cSReid Kleckner CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C); 2841385faf9cSReid Kleckner RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86); 284226a73082SReid Kleckner for (const auto &VA : RVLocs) { 284326a73082SReid Kleckner if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) 284426a73082SReid Kleckner return false; 284526a73082SReid Kleckner } 284626a73082SReid Kleckner } 284726a73082SReid Kleckner 284826a73082SReid Kleckner // Check that the call results are passed in the same way. 284926a73082SReid Kleckner if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, 285026a73082SReid Kleckner RetCC_X86, RetCC_X86)) 285126a73082SReid Kleckner return false; 285226a73082SReid Kleckner // The callee has to preserve all registers the caller needs to preserve. 285326a73082SReid Kleckner const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); 285426a73082SReid Kleckner const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 285526a73082SReid Kleckner if (!CCMatch) { 285626a73082SReid Kleckner const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 285726a73082SReid Kleckner if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 285826a73082SReid Kleckner return false; 285926a73082SReid Kleckner } 286026a73082SReid Kleckner 2861d3a8363bSAntonio Frighetto // The stack frame of the caller cannot be replaced by the tail-callee one's 2862d3a8363bSAntonio Frighetto // if the function is required to preserve all the registers. Conservatively 2863d3a8363bSAntonio Frighetto // prevent tail optimization even if hypothetically all the registers are used 2864d3a8363bSAntonio Frighetto // for passing formal parameters or returning values. 2865d3a8363bSAntonio Frighetto if (CallerF.hasFnAttribute("no_caller_saved_registers")) 2866d3a8363bSAntonio Frighetto return false; 2867d3a8363bSAntonio Frighetto 2868385faf9cSReid Kleckner unsigned StackArgsSize = CCInfo.getStackSize(); 286926a73082SReid Kleckner 287026a73082SReid Kleckner // If the callee takes no arguments then go on to check the results of the 287126a73082SReid Kleckner // call. 287226a73082SReid Kleckner if (!Outs.empty()) { 2873385faf9cSReid Kleckner if (StackArgsSize > 0) { 287426a73082SReid Kleckner // Check if the arguments are already laid out in the right way as 287526a73082SReid Kleckner // the caller's fixed stack objects. 287626a73082SReid Kleckner MachineFrameInfo &MFI = MF.getFrameInfo(); 287726a73082SReid Kleckner const MachineRegisterInfo *MRI = &MF.getRegInfo(); 287826a73082SReid Kleckner const X86InstrInfo *TII = Subtarget.getInstrInfo(); 287926a73082SReid Kleckner for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 288026a73082SReid Kleckner const CCValAssign &VA = ArgLocs[I]; 288126a73082SReid Kleckner SDValue Arg = OutVals[I]; 288226a73082SReid Kleckner ISD::ArgFlagsTy Flags = Outs[I].Flags; 288326a73082SReid Kleckner if (VA.getLocInfo() == CCValAssign::Indirect) 288426a73082SReid Kleckner return false; 288526a73082SReid Kleckner if (!VA.isRegLoc()) { 288626a73082SReid Kleckner if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI, 288726a73082SReid Kleckner TII, VA)) 288826a73082SReid Kleckner return false; 288926a73082SReid Kleckner } 289026a73082SReid Kleckner } 289126a73082SReid Kleckner } 289226a73082SReid Kleckner 289326a73082SReid Kleckner bool PositionIndependent = isPositionIndependent(); 289426a73082SReid Kleckner // If the tailcall address may be in a register, then make sure it's 289526a73082SReid Kleckner // possible to register allocate for it. In 32-bit, the call address can 289626a73082SReid Kleckner // only target EAX, EDX, or ECX since the tail call must be scheduled after 289726a73082SReid Kleckner // callee-saved registers are restored. These happen to be the same 289826a73082SReid Kleckner // registers used to pass 'inreg' arguments so watch out for those. 289926a73082SReid Kleckner if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) && 290026a73082SReid Kleckner !isa<ExternalSymbolSDNode>(Callee)) || 290126a73082SReid Kleckner PositionIndependent)) { 290226a73082SReid Kleckner unsigned NumInRegs = 0; 290326a73082SReid Kleckner // In PIC we need an extra register to formulate the address computation 290426a73082SReid Kleckner // for the callee. 290526a73082SReid Kleckner unsigned MaxInRegs = PositionIndependent ? 2 : 3; 290626a73082SReid Kleckner 290726a73082SReid Kleckner for (const auto &VA : ArgLocs) { 290826a73082SReid Kleckner if (!VA.isRegLoc()) 290926a73082SReid Kleckner continue; 291026a73082SReid Kleckner Register Reg = VA.getLocReg(); 291126a73082SReid Kleckner switch (Reg) { 291226a73082SReid Kleckner default: break; 291326a73082SReid Kleckner case X86::EAX: case X86::EDX: case X86::ECX: 291426a73082SReid Kleckner if (++NumInRegs == MaxInRegs) 291526a73082SReid Kleckner return false; 291626a73082SReid Kleckner break; 291726a73082SReid Kleckner } 291826a73082SReid Kleckner } 291926a73082SReid Kleckner } 292026a73082SReid Kleckner 292126a73082SReid Kleckner const MachineRegisterInfo &MRI = MF.getRegInfo(); 292226a73082SReid Kleckner if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) 292326a73082SReid Kleckner return false; 292426a73082SReid Kleckner } 292526a73082SReid Kleckner 292626a73082SReid Kleckner bool CalleeWillPop = 292726a73082SReid Kleckner X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, 292826a73082SReid Kleckner MF.getTarget().Options.GuaranteedTailCallOpt); 292926a73082SReid Kleckner 293026a73082SReid Kleckner if (unsigned BytesToPop = 293126a73082SReid Kleckner MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) { 293226a73082SReid Kleckner // If we have bytes to pop, the callee must pop them. 293326a73082SReid Kleckner bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; 293426a73082SReid Kleckner if (!CalleePopMatches) 293526a73082SReid Kleckner return false; 293626a73082SReid Kleckner } else if (CalleeWillPop && StackArgsSize > 0) { 293726a73082SReid Kleckner // If we don't have bytes to pop, make sure the callee doesn't pop any. 293826a73082SReid Kleckner return false; 293926a73082SReid Kleckner } 294026a73082SReid Kleckner 294126a73082SReid Kleckner return true; 294226a73082SReid Kleckner } 294326a73082SReid Kleckner 294426a73082SReid Kleckner /// Determines whether the callee is required to pop its own arguments. 294526a73082SReid Kleckner /// Callee pop is necessary to support tail calls. 294626a73082SReid Kleckner bool X86::isCalleePop(CallingConv::ID CallingConv, 294726a73082SReid Kleckner bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { 294826a73082SReid Kleckner // If GuaranteeTCO is true, we force some calls to be callee pop so that we 294926a73082SReid Kleckner // can guarantee TCO. 295026a73082SReid Kleckner if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) 295126a73082SReid Kleckner return true; 295226a73082SReid Kleckner 295326a73082SReid Kleckner switch (CallingConv) { 295426a73082SReid Kleckner default: 295526a73082SReid Kleckner return false; 295626a73082SReid Kleckner case CallingConv::X86_StdCall: 295726a73082SReid Kleckner case CallingConv::X86_FastCall: 295826a73082SReid Kleckner case CallingConv::X86_ThisCall: 295926a73082SReid Kleckner case CallingConv::X86_VectorCall: 296026a73082SReid Kleckner return !is64Bit; 296126a73082SReid Kleckner } 296226a73082SReid Kleckner } 2963