10b57cec5SDimitry Andric //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file defines the X86-specific support for the FastISel class. Much 100b57cec5SDimitry Andric // of the target-specific code is generated by tablegen in the file 110b57cec5SDimitry Andric // X86GenFastISel.inc, which is #included here. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "X86.h" 160b57cec5SDimitry Andric #include "X86CallingConv.h" 170b57cec5SDimitry Andric #include "X86InstrBuilder.h" 180b57cec5SDimitry Andric #include "X86InstrInfo.h" 190b57cec5SDimitry Andric #include "X86MachineFunctionInfo.h" 200b57cec5SDimitry Andric #include "X86RegisterInfo.h" 210b57cec5SDimitry Andric #include "X86Subtarget.h" 220b57cec5SDimitry Andric #include "X86TargetMachine.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h" 240b57cec5SDimitry Andric #include "llvm/CodeGen/FastISel.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/FunctionLoweringInfo.h" 260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h" 270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 290b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h" 300b57cec5SDimitry Andric #include "llvm/IR/DebugInfo.h" 310b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 320b57cec5SDimitry Andric #include "llvm/IR/GetElementPtrTypeIterator.h" 330b57cec5SDimitry Andric #include "llvm/IR/GlobalAlias.h" 340b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h" 350b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 360b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 37480093f4SDimitry Andric #include "llvm/IR/IntrinsicsX86.h" 380b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 390b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h" 400b57cec5SDimitry Andric #include "llvm/MC/MCSymbol.h" 410b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 420b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h" 430b57cec5SDimitry Andric using namespace llvm; 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric namespace { 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric class X86FastISel final : public FastISel { 480b57cec5SDimitry Andric /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 490b57cec5SDimitry Andric /// make the right decision when generating code for different targets. 500b57cec5SDimitry Andric const X86Subtarget *Subtarget; 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric public: 530b57cec5SDimitry Andric explicit X86FastISel(FunctionLoweringInfo &funcInfo, 540b57cec5SDimitry Andric const TargetLibraryInfo *libInfo) 550b57cec5SDimitry Andric : FastISel(funcInfo, libInfo) { 560b57cec5SDimitry Andric Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>(); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric bool fastSelectInstruction(const Instruction *I) override; 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric /// The specified machine instr operand is a vreg, and that 620b57cec5SDimitry Andric /// vreg is being provided by the specified load instruction. If possible, 630b57cec5SDimitry Andric /// try to fold the load as an operand to the instruction, returning true if 640b57cec5SDimitry Andric /// possible. 650b57cec5SDimitry Andric bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, 660b57cec5SDimitry Andric const LoadInst *LI) override; 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric bool fastLowerArguments() override; 690b57cec5SDimitry Andric bool fastLowerCall(CallLoweringInfo &CLI) override; 700b57cec5SDimitry Andric bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric #include "X86GenFastISel.inc" 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric private: 750b57cec5SDimitry Andric bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, 760b57cec5SDimitry Andric const DebugLoc &DL); 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO, 790b57cec5SDimitry Andric unsigned &ResultReg, unsigned Alignment = 1); 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM, 820b57cec5SDimitry Andric MachineMemOperand *MMO = nullptr, bool Aligned = false); 83fe6060f1SDimitry Andric bool X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM, 840b57cec5SDimitry Andric MachineMemOperand *MMO = nullptr, bool Aligned = false); 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 870b57cec5SDimitry Andric unsigned &ResultReg); 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric bool X86SelectAddress(const Value *V, X86AddressMode &AM); 900b57cec5SDimitry Andric bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric bool X86SelectLoad(const Instruction *I); 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric bool X86SelectStore(const Instruction *I); 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric bool X86SelectRet(const Instruction *I); 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric bool X86SelectCmp(const Instruction *I); 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric bool X86SelectZExt(const Instruction *I); 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric bool X86SelectSExt(const Instruction *I); 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric bool X86SelectBranch(const Instruction *I); 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric bool X86SelectShift(const Instruction *I); 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric bool X86SelectDivRem(const Instruction *I); 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I); 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I); 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I); 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric bool X86SelectSelect(const Instruction *I); 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric bool X86SelectTrunc(const Instruction *I); 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc, 1210b57cec5SDimitry Andric const TargetRegisterClass *RC); 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric bool X86SelectFPExt(const Instruction *I); 1240b57cec5SDimitry Andric bool X86SelectFPTrunc(const Instruction *I); 1250b57cec5SDimitry Andric bool X86SelectSIToFP(const Instruction *I); 1260b57cec5SDimitry Andric bool X86SelectUIToFP(const Instruction *I); 1270b57cec5SDimitry Andric bool X86SelectIntToFP(const Instruction *I, bool IsSigned); 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric const X86InstrInfo *getInstrInfo() const { 1300b57cec5SDimitry Andric return Subtarget->getInstrInfo(); 1310b57cec5SDimitry Andric } 1320b57cec5SDimitry Andric const X86TargetMachine *getTargetMachine() const { 1330b57cec5SDimitry Andric return static_cast<const X86TargetMachine *>(&TM); 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric bool handleConstantAddresses(const Value *V, X86AddressMode &AM); 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT); 1390b57cec5SDimitry Andric unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT); 1400b57cec5SDimitry Andric unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT); 1410b57cec5SDimitry Andric unsigned fastMaterializeConstant(const Constant *C) override; 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric unsigned fastMaterializeAlloca(const AllocaInst *C) override; 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric unsigned fastMaterializeFloatZero(const ConstantFP *CF) override; 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 1480b57cec5SDimitry Andric /// computed in an SSE register, not on the X87 floating point stack. 1490b57cec5SDimitry Andric bool isScalarFPTypeInSSEReg(EVT VT) const { 15081ad6265SDimitry Andric return (VT == MVT::f64 && Subtarget->hasSSE2()) || 15181ad6265SDimitry Andric (VT == MVT::f32 && Subtarget->hasSSE1()) || VT == MVT::f16; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false); 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric bool IsMemcpySmall(uint64_t Len); 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric bool TryEmitSmallMemcpy(X86AddressMode DestAM, 1590b57cec5SDimitry Andric X86AddressMode SrcAM, uint64_t Len); 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, 1620b57cec5SDimitry Andric const Value *Cond); 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, 1650b57cec5SDimitry Andric X86AddressMode &AM); 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode, 1680b57cec5SDimitry Andric const TargetRegisterClass *RC, unsigned Op0, 169fe6060f1SDimitry Andric unsigned Op1, unsigned Op2, unsigned Op3); 1700b57cec5SDimitry Andric }; 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric } // end anonymous namespace. 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric static std::pair<unsigned, bool> 1750b57cec5SDimitry Andric getX86SSEConditionCode(CmpInst::Predicate Predicate) { 1760b57cec5SDimitry Andric unsigned CC; 1770b57cec5SDimitry Andric bool NeedSwap = false; 1780b57cec5SDimitry Andric 1790b57cec5SDimitry Andric // SSE Condition code mapping: 1800b57cec5SDimitry Andric // 0 - EQ 1810b57cec5SDimitry Andric // 1 - LT 1820b57cec5SDimitry Andric // 2 - LE 1830b57cec5SDimitry Andric // 3 - UNORD 1840b57cec5SDimitry Andric // 4 - NEQ 1850b57cec5SDimitry Andric // 5 - NLT 1860b57cec5SDimitry Andric // 6 - NLE 1870b57cec5SDimitry Andric // 7 - ORD 1880b57cec5SDimitry Andric switch (Predicate) { 1890b57cec5SDimitry Andric default: llvm_unreachable("Unexpected predicate"); 1900b57cec5SDimitry Andric case CmpInst::FCMP_OEQ: CC = 0; break; 191bdd1243dSDimitry Andric case CmpInst::FCMP_OGT: NeedSwap = true; [[fallthrough]]; 1920b57cec5SDimitry Andric case CmpInst::FCMP_OLT: CC = 1; break; 193bdd1243dSDimitry Andric case CmpInst::FCMP_OGE: NeedSwap = true; [[fallthrough]]; 1940b57cec5SDimitry Andric case CmpInst::FCMP_OLE: CC = 2; break; 1950b57cec5SDimitry Andric case CmpInst::FCMP_UNO: CC = 3; break; 1960b57cec5SDimitry Andric case CmpInst::FCMP_UNE: CC = 4; break; 197bdd1243dSDimitry Andric case CmpInst::FCMP_ULE: NeedSwap = true; [[fallthrough]]; 1980b57cec5SDimitry Andric case CmpInst::FCMP_UGE: CC = 5; break; 199bdd1243dSDimitry Andric case CmpInst::FCMP_ULT: NeedSwap = true; [[fallthrough]]; 2000b57cec5SDimitry Andric case CmpInst::FCMP_UGT: CC = 6; break; 2010b57cec5SDimitry Andric case CmpInst::FCMP_ORD: CC = 7; break; 2020b57cec5SDimitry Andric case CmpInst::FCMP_UEQ: CC = 8; break; 2030b57cec5SDimitry Andric case CmpInst::FCMP_ONE: CC = 12; break; 2040b57cec5SDimitry Andric } 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric return std::make_pair(CC, NeedSwap); 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric /// Adds a complex addressing mode to the given machine instr builder. 2100b57cec5SDimitry Andric /// Note, this will constrain the index register. If its not possible to 2110b57cec5SDimitry Andric /// constrain the given index register, then a new one will be created. The 2120b57cec5SDimitry Andric /// IndexReg field of the addressing mode will be updated to match in this case. 2130b57cec5SDimitry Andric const MachineInstrBuilder & 2140b57cec5SDimitry Andric X86FastISel::addFullAddress(const MachineInstrBuilder &MIB, 2150b57cec5SDimitry Andric X86AddressMode &AM) { 2160b57cec5SDimitry Andric // First constrain the index register. It needs to be a GR64_NOSP. 2170b57cec5SDimitry Andric AM.IndexReg = constrainOperandRegClass(MIB->getDesc(), AM.IndexReg, 2180b57cec5SDimitry Andric MIB->getNumOperands() + 2190b57cec5SDimitry Andric X86::AddrIndexReg); 2200b57cec5SDimitry Andric return ::addFullAddress(MIB, AM); 2210b57cec5SDimitry Andric } 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric /// Check if it is possible to fold the condition from the XALU intrinsic 2240b57cec5SDimitry Andric /// into the user. The condition code will only be updated on success. 2250b57cec5SDimitry Andric bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, 2260b57cec5SDimitry Andric const Value *Cond) { 2270b57cec5SDimitry Andric if (!isa<ExtractValueInst>(Cond)) 2280b57cec5SDimitry Andric return false; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric const auto *EV = cast<ExtractValueInst>(Cond); 2310b57cec5SDimitry Andric if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 2320b57cec5SDimitry Andric return false; 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 2350b57cec5SDimitry Andric MVT RetVT; 2360b57cec5SDimitry Andric const Function *Callee = II->getCalledFunction(); 2370b57cec5SDimitry Andric Type *RetTy = 2380b57cec5SDimitry Andric cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 2390b57cec5SDimitry Andric if (!isTypeLegal(RetTy, RetVT)) 2400b57cec5SDimitry Andric return false; 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric if (RetVT != MVT::i32 && RetVT != MVT::i64) 2430b57cec5SDimitry Andric return false; 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric X86::CondCode TmpCC; 2460b57cec5SDimitry Andric switch (II->getIntrinsicID()) { 2470b57cec5SDimitry Andric default: return false; 2480b57cec5SDimitry Andric case Intrinsic::sadd_with_overflow: 2490b57cec5SDimitry Andric case Intrinsic::ssub_with_overflow: 2500b57cec5SDimitry Andric case Intrinsic::smul_with_overflow: 2510b57cec5SDimitry Andric case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break; 2520b57cec5SDimitry Andric case Intrinsic::uadd_with_overflow: 2530b57cec5SDimitry Andric case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break; 2540b57cec5SDimitry Andric } 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric // Check if both instructions are in the same basic block. 2570b57cec5SDimitry Andric if (II->getParent() != I->getParent()) 2580b57cec5SDimitry Andric return false; 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric // Make sure nothing is in the way 2610b57cec5SDimitry Andric BasicBlock::const_iterator Start(I); 2620b57cec5SDimitry Andric BasicBlock::const_iterator End(II); 2630b57cec5SDimitry Andric for (auto Itr = std::prev(Start); Itr != End; --Itr) { 2640b57cec5SDimitry Andric // We only expect extractvalue instructions between the intrinsic and the 2650b57cec5SDimitry Andric // instruction to be selected. 2660b57cec5SDimitry Andric if (!isa<ExtractValueInst>(Itr)) 2670b57cec5SDimitry Andric return false; 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric // Check that the extractvalue operand comes from the intrinsic. 2700b57cec5SDimitry Andric const auto *EVI = cast<ExtractValueInst>(Itr); 2710b57cec5SDimitry Andric if (EVI->getAggregateOperand() != II) 2720b57cec5SDimitry Andric return false; 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric 275d409305fSDimitry Andric // Make sure no potentially eflags clobbering phi moves can be inserted in 276d409305fSDimitry Andric // between. 277bdd1243dSDimitry Andric auto HasPhis = [](const BasicBlock *Succ) { return !Succ->phis().empty(); }; 278d409305fSDimitry Andric if (I->isTerminator() && llvm::any_of(successors(I), HasPhis)) 279d409305fSDimitry Andric return false; 280d409305fSDimitry Andric 28181ad6265SDimitry Andric // Make sure there are no potentially eflags clobbering constant 28281ad6265SDimitry Andric // materializations in between. 28381ad6265SDimitry Andric if (llvm::any_of(I->operands(), [](Value *V) { return isa<Constant>(V); })) 28481ad6265SDimitry Andric return false; 28581ad6265SDimitry Andric 2860b57cec5SDimitry Andric CC = TmpCC; 2870b57cec5SDimitry Andric return true; 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { 2910b57cec5SDimitry Andric EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true); 2920b57cec5SDimitry Andric if (evt == MVT::Other || !evt.isSimple()) 2930b57cec5SDimitry Andric // Unhandled type. Halt "fast" selection and bail. 2940b57cec5SDimitry Andric return false; 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric VT = evt.getSimpleVT(); 2970b57cec5SDimitry Andric // For now, require SSE/SSE2 for performing floating-point operations, 2980b57cec5SDimitry Andric // since x87 requires additional work. 29981ad6265SDimitry Andric if (VT == MVT::f64 && !Subtarget->hasSSE2()) 3000b57cec5SDimitry Andric return false; 30181ad6265SDimitry Andric if (VT == MVT::f32 && !Subtarget->hasSSE1()) 3020b57cec5SDimitry Andric return false; 3030b57cec5SDimitry Andric // Similarly, no f80 support yet. 3040b57cec5SDimitry Andric if (VT == MVT::f80) 3050b57cec5SDimitry Andric return false; 3060b57cec5SDimitry Andric // We only handle legal types. For example, on x86-32 the instruction 3070b57cec5SDimitry Andric // selector contains all of the 64-bit instructions from x86-64, 3080b57cec5SDimitry Andric // under the assumption that i64 won't be used if the target doesn't 3090b57cec5SDimitry Andric // support it. 3100b57cec5SDimitry Andric return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 3110b57cec5SDimitry Andric } 3120b57cec5SDimitry Andric 3130b57cec5SDimitry Andric /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 3140b57cec5SDimitry Andric /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 3150b57cec5SDimitry Andric /// Return true and the result register by reference if it is possible. 3160b57cec5SDimitry Andric bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM, 3170b57cec5SDimitry Andric MachineMemOperand *MMO, unsigned &ResultReg, 3180b57cec5SDimitry Andric unsigned Alignment) { 31981ad6265SDimitry Andric bool HasSSE1 = Subtarget->hasSSE1(); 32081ad6265SDimitry Andric bool HasSSE2 = Subtarget->hasSSE2(); 3210b57cec5SDimitry Andric bool HasSSE41 = Subtarget->hasSSE41(); 3220b57cec5SDimitry Andric bool HasAVX = Subtarget->hasAVX(); 3230b57cec5SDimitry Andric bool HasAVX2 = Subtarget->hasAVX2(); 3240b57cec5SDimitry Andric bool HasAVX512 = Subtarget->hasAVX512(); 3250b57cec5SDimitry Andric bool HasVLX = Subtarget->hasVLX(); 3260b57cec5SDimitry Andric bool IsNonTemporal = MMO && MMO->isNonTemporal(); 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric // Treat i1 loads the same as i8 loads. Masking will be done when storing. 3290b57cec5SDimitry Andric if (VT == MVT::i1) 3300b57cec5SDimitry Andric VT = MVT::i8; 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric // Get opcode and regclass of the output for the given load instruction. 3330b57cec5SDimitry Andric unsigned Opc = 0; 3340b57cec5SDimitry Andric switch (VT.SimpleTy) { 3350b57cec5SDimitry Andric default: return false; 3360b57cec5SDimitry Andric case MVT::i8: 3370b57cec5SDimitry Andric Opc = X86::MOV8rm; 3380b57cec5SDimitry Andric break; 3390b57cec5SDimitry Andric case MVT::i16: 3400b57cec5SDimitry Andric Opc = X86::MOV16rm; 3410b57cec5SDimitry Andric break; 3420b57cec5SDimitry Andric case MVT::i32: 3430b57cec5SDimitry Andric Opc = X86::MOV32rm; 3440b57cec5SDimitry Andric break; 3450b57cec5SDimitry Andric case MVT::i64: 3460b57cec5SDimitry Andric // Must be in x86-64 mode. 3470b57cec5SDimitry Andric Opc = X86::MOV64rm; 3480b57cec5SDimitry Andric break; 3490b57cec5SDimitry Andric case MVT::f32: 35081ad6265SDimitry Andric Opc = HasAVX512 ? X86::VMOVSSZrm_alt 35181ad6265SDimitry Andric : HasAVX ? X86::VMOVSSrm_alt 35281ad6265SDimitry Andric : HasSSE1 ? X86::MOVSSrm_alt 35381ad6265SDimitry Andric : X86::LD_Fp32m; 3540b57cec5SDimitry Andric break; 3550b57cec5SDimitry Andric case MVT::f64: 35681ad6265SDimitry Andric Opc = HasAVX512 ? X86::VMOVSDZrm_alt 35781ad6265SDimitry Andric : HasAVX ? X86::VMOVSDrm_alt 35881ad6265SDimitry Andric : HasSSE2 ? X86::MOVSDrm_alt 35981ad6265SDimitry Andric : X86::LD_Fp64m; 3600b57cec5SDimitry Andric break; 3610b57cec5SDimitry Andric case MVT::f80: 3620b57cec5SDimitry Andric // No f80 support yet. 3630b57cec5SDimitry Andric return false; 3640b57cec5SDimitry Andric case MVT::v4f32: 3650b57cec5SDimitry Andric if (IsNonTemporal && Alignment >= 16 && HasSSE41) 3660b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTDQAZ128rm : 3670b57cec5SDimitry Andric HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; 3680b57cec5SDimitry Andric else if (Alignment >= 16) 3690b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVAPSZ128rm : 3700b57cec5SDimitry Andric HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm; 3710b57cec5SDimitry Andric else 3720b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVUPSZ128rm : 3730b57cec5SDimitry Andric HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm; 3740b57cec5SDimitry Andric break; 3750b57cec5SDimitry Andric case MVT::v2f64: 3760b57cec5SDimitry Andric if (IsNonTemporal && Alignment >= 16 && HasSSE41) 3770b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTDQAZ128rm : 3780b57cec5SDimitry Andric HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; 3790b57cec5SDimitry Andric else if (Alignment >= 16) 3800b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVAPDZ128rm : 3810b57cec5SDimitry Andric HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm; 3820b57cec5SDimitry Andric else 3830b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVUPDZ128rm : 3840b57cec5SDimitry Andric HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm; 3850b57cec5SDimitry Andric break; 3860b57cec5SDimitry Andric case MVT::v4i32: 3870b57cec5SDimitry Andric case MVT::v2i64: 3880b57cec5SDimitry Andric case MVT::v8i16: 3890b57cec5SDimitry Andric case MVT::v16i8: 3900b57cec5SDimitry Andric if (IsNonTemporal && Alignment >= 16 && HasSSE41) 3910b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTDQAZ128rm : 3920b57cec5SDimitry Andric HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; 3930b57cec5SDimitry Andric else if (Alignment >= 16) 3940b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVDQA64Z128rm : 3950b57cec5SDimitry Andric HasAVX ? X86::VMOVDQArm : X86::MOVDQArm; 3960b57cec5SDimitry Andric else 3970b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVDQU64Z128rm : 3980b57cec5SDimitry Andric HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm; 3990b57cec5SDimitry Andric break; 4000b57cec5SDimitry Andric case MVT::v8f32: 4010b57cec5SDimitry Andric assert(HasAVX); 4020b57cec5SDimitry Andric if (IsNonTemporal && Alignment >= 32 && HasAVX2) 4030b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; 4040b57cec5SDimitry Andric else if (IsNonTemporal && Alignment >= 16) 4050b57cec5SDimitry Andric return false; // Force split for X86::VMOVNTDQArm 4060b57cec5SDimitry Andric else if (Alignment >= 32) 4070b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm; 4080b57cec5SDimitry Andric else 4090b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm; 4100b57cec5SDimitry Andric break; 4110b57cec5SDimitry Andric case MVT::v4f64: 4120b57cec5SDimitry Andric assert(HasAVX); 4130b57cec5SDimitry Andric if (IsNonTemporal && Alignment >= 32 && HasAVX2) 4140b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; 4150b57cec5SDimitry Andric else if (IsNonTemporal && Alignment >= 16) 4160b57cec5SDimitry Andric return false; // Force split for X86::VMOVNTDQArm 4170b57cec5SDimitry Andric else if (Alignment >= 32) 4180b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm; 4190b57cec5SDimitry Andric else 4200b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm; 4210b57cec5SDimitry Andric break; 4220b57cec5SDimitry Andric case MVT::v8i32: 4230b57cec5SDimitry Andric case MVT::v4i64: 4240b57cec5SDimitry Andric case MVT::v16i16: 4250b57cec5SDimitry Andric case MVT::v32i8: 4260b57cec5SDimitry Andric assert(HasAVX); 4270b57cec5SDimitry Andric if (IsNonTemporal && Alignment >= 32 && HasAVX2) 4280b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; 4290b57cec5SDimitry Andric else if (IsNonTemporal && Alignment >= 16) 4300b57cec5SDimitry Andric return false; // Force split for X86::VMOVNTDQArm 4310b57cec5SDimitry Andric else if (Alignment >= 32) 4320b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm; 4330b57cec5SDimitry Andric else 4340b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm; 4350b57cec5SDimitry Andric break; 4360b57cec5SDimitry Andric case MVT::v16f32: 4370b57cec5SDimitry Andric assert(HasAVX512); 4380b57cec5SDimitry Andric if (IsNonTemporal && Alignment >= 64) 4390b57cec5SDimitry Andric Opc = X86::VMOVNTDQAZrm; 4400b57cec5SDimitry Andric else 4410b57cec5SDimitry Andric Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm; 4420b57cec5SDimitry Andric break; 4430b57cec5SDimitry Andric case MVT::v8f64: 4440b57cec5SDimitry Andric assert(HasAVX512); 4450b57cec5SDimitry Andric if (IsNonTemporal && Alignment >= 64) 4460b57cec5SDimitry Andric Opc = X86::VMOVNTDQAZrm; 4470b57cec5SDimitry Andric else 4480b57cec5SDimitry Andric Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm; 4490b57cec5SDimitry Andric break; 4500b57cec5SDimitry Andric case MVT::v8i64: 4510b57cec5SDimitry Andric case MVT::v16i32: 4520b57cec5SDimitry Andric case MVT::v32i16: 4530b57cec5SDimitry Andric case MVT::v64i8: 4540b57cec5SDimitry Andric assert(HasAVX512); 4550b57cec5SDimitry Andric // Note: There are a lot more choices based on type with AVX-512, but 4560b57cec5SDimitry Andric // there's really no advantage when the load isn't masked. 4570b57cec5SDimitry Andric if (IsNonTemporal && Alignment >= 64) 4580b57cec5SDimitry Andric Opc = X86::VMOVNTDQAZrm; 4590b57cec5SDimitry Andric else 4600b57cec5SDimitry Andric Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm; 4610b57cec5SDimitry Andric break; 4620b57cec5SDimitry Andric } 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric ResultReg = createResultReg(RC); 4670b57cec5SDimitry Andric MachineInstrBuilder MIB = 468bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg); 4690b57cec5SDimitry Andric addFullAddress(MIB, AM); 4700b57cec5SDimitry Andric if (MMO) 4710b57cec5SDimitry Andric MIB->addMemOperand(*FuncInfo.MF, MMO); 4720b57cec5SDimitry Andric return true; 4730b57cec5SDimitry Andric } 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric /// X86FastEmitStore - Emit a machine instruction to store a value Val of 4760b57cec5SDimitry Andric /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 4770b57cec5SDimitry Andric /// and a displacement offset, or a GlobalAddress, 4780b57cec5SDimitry Andric /// i.e. V. Return true if it is possible. 479fe6060f1SDimitry Andric bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM, 4800b57cec5SDimitry Andric MachineMemOperand *MMO, bool Aligned) { 4810b57cec5SDimitry Andric bool HasSSE1 = Subtarget->hasSSE1(); 4820b57cec5SDimitry Andric bool HasSSE2 = Subtarget->hasSSE2(); 4830b57cec5SDimitry Andric bool HasSSE4A = Subtarget->hasSSE4A(); 4840b57cec5SDimitry Andric bool HasAVX = Subtarget->hasAVX(); 4850b57cec5SDimitry Andric bool HasAVX512 = Subtarget->hasAVX512(); 4860b57cec5SDimitry Andric bool HasVLX = Subtarget->hasVLX(); 4870b57cec5SDimitry Andric bool IsNonTemporal = MMO && MMO->isNonTemporal(); 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric // Get opcode and regclass of the output for the given store instruction. 4900b57cec5SDimitry Andric unsigned Opc = 0; 4910b57cec5SDimitry Andric switch (VT.getSimpleVT().SimpleTy) { 4920b57cec5SDimitry Andric case MVT::f80: // No f80 support yet. 4930b57cec5SDimitry Andric default: return false; 4940b57cec5SDimitry Andric case MVT::i1: { 4950b57cec5SDimitry Andric // Mask out all but lowest bit. 4965ffd83dbSDimitry Andric Register AndResult = createResultReg(&X86::GR8RegClass); 497bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4980b57cec5SDimitry Andric TII.get(X86::AND8ri), AndResult) 499fe6060f1SDimitry Andric .addReg(ValReg).addImm(1); 5000b57cec5SDimitry Andric ValReg = AndResult; 501bdd1243dSDimitry Andric [[fallthrough]]; // handle i1 as i8. 5020b57cec5SDimitry Andric } 5030b57cec5SDimitry Andric case MVT::i8: Opc = X86::MOV8mr; break; 5040b57cec5SDimitry Andric case MVT::i16: Opc = X86::MOV16mr; break; 5050b57cec5SDimitry Andric case MVT::i32: 5060b57cec5SDimitry Andric Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr; 5070b57cec5SDimitry Andric break; 5080b57cec5SDimitry Andric case MVT::i64: 5090b57cec5SDimitry Andric // Must be in x86-64 mode. 5100b57cec5SDimitry Andric Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr; 5110b57cec5SDimitry Andric break; 5120b57cec5SDimitry Andric case MVT::f32: 51381ad6265SDimitry Andric if (HasSSE1) { 5140b57cec5SDimitry Andric if (IsNonTemporal && HasSSE4A) 5150b57cec5SDimitry Andric Opc = X86::MOVNTSS; 5160b57cec5SDimitry Andric else 5170b57cec5SDimitry Andric Opc = HasAVX512 ? X86::VMOVSSZmr : 5180b57cec5SDimitry Andric HasAVX ? X86::VMOVSSmr : X86::MOVSSmr; 5190b57cec5SDimitry Andric } else 5200b57cec5SDimitry Andric Opc = X86::ST_Fp32m; 5210b57cec5SDimitry Andric break; 5220b57cec5SDimitry Andric case MVT::f64: 52381ad6265SDimitry Andric if (HasSSE2) { 5240b57cec5SDimitry Andric if (IsNonTemporal && HasSSE4A) 5250b57cec5SDimitry Andric Opc = X86::MOVNTSD; 5260b57cec5SDimitry Andric else 5270b57cec5SDimitry Andric Opc = HasAVX512 ? X86::VMOVSDZmr : 5280b57cec5SDimitry Andric HasAVX ? X86::VMOVSDmr : X86::MOVSDmr; 5290b57cec5SDimitry Andric } else 5300b57cec5SDimitry Andric Opc = X86::ST_Fp64m; 5310b57cec5SDimitry Andric break; 5320b57cec5SDimitry Andric case MVT::x86mmx: 5330b57cec5SDimitry Andric Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr; 5340b57cec5SDimitry Andric break; 5350b57cec5SDimitry Andric case MVT::v4f32: 5360b57cec5SDimitry Andric if (Aligned) { 5370b57cec5SDimitry Andric if (IsNonTemporal) 5380b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTPSZ128mr : 5390b57cec5SDimitry Andric HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr; 5400b57cec5SDimitry Andric else 5410b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVAPSZ128mr : 5420b57cec5SDimitry Andric HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr; 5430b57cec5SDimitry Andric } else 5440b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVUPSZ128mr : 5450b57cec5SDimitry Andric HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr; 5460b57cec5SDimitry Andric break; 5470b57cec5SDimitry Andric case MVT::v2f64: 5480b57cec5SDimitry Andric if (Aligned) { 5490b57cec5SDimitry Andric if (IsNonTemporal) 5500b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTPDZ128mr : 5510b57cec5SDimitry Andric HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr; 5520b57cec5SDimitry Andric else 5530b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVAPDZ128mr : 5540b57cec5SDimitry Andric HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr; 5550b57cec5SDimitry Andric } else 5560b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVUPDZ128mr : 5570b57cec5SDimitry Andric HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr; 5580b57cec5SDimitry Andric break; 5590b57cec5SDimitry Andric case MVT::v4i32: 5600b57cec5SDimitry Andric case MVT::v2i64: 5610b57cec5SDimitry Andric case MVT::v8i16: 5620b57cec5SDimitry Andric case MVT::v16i8: 5630b57cec5SDimitry Andric if (Aligned) { 5640b57cec5SDimitry Andric if (IsNonTemporal) 5650b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTDQZ128mr : 5660b57cec5SDimitry Andric HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr; 5670b57cec5SDimitry Andric else 5680b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVDQA64Z128mr : 5690b57cec5SDimitry Andric HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr; 5700b57cec5SDimitry Andric } else 5710b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVDQU64Z128mr : 5720b57cec5SDimitry Andric HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr; 5730b57cec5SDimitry Andric break; 5740b57cec5SDimitry Andric case MVT::v8f32: 5750b57cec5SDimitry Andric assert(HasAVX); 5760b57cec5SDimitry Andric if (Aligned) { 5770b57cec5SDimitry Andric if (IsNonTemporal) 5780b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr; 5790b57cec5SDimitry Andric else 5800b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr; 5810b57cec5SDimitry Andric } else 5820b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr; 5830b57cec5SDimitry Andric break; 5840b57cec5SDimitry Andric case MVT::v4f64: 5850b57cec5SDimitry Andric assert(HasAVX); 5860b57cec5SDimitry Andric if (Aligned) { 5870b57cec5SDimitry Andric if (IsNonTemporal) 5880b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr; 5890b57cec5SDimitry Andric else 5900b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr; 5910b57cec5SDimitry Andric } else 5920b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr; 5930b57cec5SDimitry Andric break; 5940b57cec5SDimitry Andric case MVT::v8i32: 5950b57cec5SDimitry Andric case MVT::v4i64: 5960b57cec5SDimitry Andric case MVT::v16i16: 5970b57cec5SDimitry Andric case MVT::v32i8: 5980b57cec5SDimitry Andric assert(HasAVX); 5990b57cec5SDimitry Andric if (Aligned) { 6000b57cec5SDimitry Andric if (IsNonTemporal) 6010b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr; 6020b57cec5SDimitry Andric else 6030b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr; 6040b57cec5SDimitry Andric } else 6050b57cec5SDimitry Andric Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr; 6060b57cec5SDimitry Andric break; 6070b57cec5SDimitry Andric case MVT::v16f32: 6080b57cec5SDimitry Andric assert(HasAVX512); 6090b57cec5SDimitry Andric if (Aligned) 6100b57cec5SDimitry Andric Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr; 6110b57cec5SDimitry Andric else 6120b57cec5SDimitry Andric Opc = X86::VMOVUPSZmr; 6130b57cec5SDimitry Andric break; 6140b57cec5SDimitry Andric case MVT::v8f64: 6150b57cec5SDimitry Andric assert(HasAVX512); 6160b57cec5SDimitry Andric if (Aligned) { 6170b57cec5SDimitry Andric Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr; 6180b57cec5SDimitry Andric } else 6190b57cec5SDimitry Andric Opc = X86::VMOVUPDZmr; 6200b57cec5SDimitry Andric break; 6210b57cec5SDimitry Andric case MVT::v8i64: 6220b57cec5SDimitry Andric case MVT::v16i32: 6230b57cec5SDimitry Andric case MVT::v32i16: 6240b57cec5SDimitry Andric case MVT::v64i8: 6250b57cec5SDimitry Andric assert(HasAVX512); 6260b57cec5SDimitry Andric // Note: There are a lot more choices based on type with AVX-512, but 6270b57cec5SDimitry Andric // there's really no advantage when the store isn't masked. 6280b57cec5SDimitry Andric if (Aligned) 6290b57cec5SDimitry Andric Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr; 6300b57cec5SDimitry Andric else 6310b57cec5SDimitry Andric Opc = X86::VMOVDQU64Zmr; 6320b57cec5SDimitry Andric break; 6330b57cec5SDimitry Andric } 6340b57cec5SDimitry Andric 6350b57cec5SDimitry Andric const MCInstrDesc &Desc = TII.get(Opc); 6360b57cec5SDimitry Andric // Some of the instructions in the previous switch use FR128 instead 6370b57cec5SDimitry Andric // of FR32 for ValReg. Make sure the register we feed the instruction 6380b57cec5SDimitry Andric // matches its register class constraints. 6390b57cec5SDimitry Andric // Note: This is fine to do a copy from FR32 to FR128, this is the 6400b57cec5SDimitry Andric // same registers behind the scene and actually why it did not trigger 6410b57cec5SDimitry Andric // any bugs before. 6420b57cec5SDimitry Andric ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1); 6430b57cec5SDimitry Andric MachineInstrBuilder MIB = 644bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, Desc); 645fe6060f1SDimitry Andric addFullAddress(MIB, AM).addReg(ValReg); 6460b57cec5SDimitry Andric if (MMO) 6470b57cec5SDimitry Andric MIB->addMemOperand(*FuncInfo.MF, MMO); 6480b57cec5SDimitry Andric 6490b57cec5SDimitry Andric return true; 6500b57cec5SDimitry Andric } 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andric bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, 6530b57cec5SDimitry Andric X86AddressMode &AM, 6540b57cec5SDimitry Andric MachineMemOperand *MMO, bool Aligned) { 6550b57cec5SDimitry Andric // Handle 'null' like i32/i64 0. 6560b57cec5SDimitry Andric if (isa<ConstantPointerNull>(Val)) 6570b57cec5SDimitry Andric Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext())); 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric // If this is a store of a simple constant, fold the constant into the store. 6600b57cec5SDimitry Andric if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 6610b57cec5SDimitry Andric unsigned Opc = 0; 6620b57cec5SDimitry Andric bool Signed = true; 6630b57cec5SDimitry Andric switch (VT.getSimpleVT().SimpleTy) { 6640b57cec5SDimitry Andric default: break; 6650b57cec5SDimitry Andric case MVT::i1: 6660b57cec5SDimitry Andric Signed = false; 667bdd1243dSDimitry Andric [[fallthrough]]; // Handle as i8. 6680b57cec5SDimitry Andric case MVT::i8: Opc = X86::MOV8mi; break; 6690b57cec5SDimitry Andric case MVT::i16: Opc = X86::MOV16mi; break; 6700b57cec5SDimitry Andric case MVT::i32: Opc = X86::MOV32mi; break; 6710b57cec5SDimitry Andric case MVT::i64: 6720b57cec5SDimitry Andric // Must be a 32-bit sign extended value. 6730b57cec5SDimitry Andric if (isInt<32>(CI->getSExtValue())) 6740b57cec5SDimitry Andric Opc = X86::MOV64mi32; 6750b57cec5SDimitry Andric break; 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric 6780b57cec5SDimitry Andric if (Opc) { 6790b57cec5SDimitry Andric MachineInstrBuilder MIB = 680bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)); 6810b57cec5SDimitry Andric addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue() 6820b57cec5SDimitry Andric : CI->getZExtValue()); 6830b57cec5SDimitry Andric if (MMO) 6840b57cec5SDimitry Andric MIB->addMemOperand(*FuncInfo.MF, MMO); 6850b57cec5SDimitry Andric return true; 6860b57cec5SDimitry Andric } 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric 6895ffd83dbSDimitry Andric Register ValReg = getRegForValue(Val); 6900b57cec5SDimitry Andric if (ValReg == 0) 6910b57cec5SDimitry Andric return false; 6920b57cec5SDimitry Andric 693fe6060f1SDimitry Andric return X86FastEmitStore(VT, ValReg, AM, MMO, Aligned); 6940b57cec5SDimitry Andric } 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 6970b57cec5SDimitry Andric /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 6980b57cec5SDimitry Andric /// ISD::SIGN_EXTEND). 6990b57cec5SDimitry Andric bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, 7000b57cec5SDimitry Andric unsigned Src, EVT SrcVT, 7010b57cec5SDimitry Andric unsigned &ResultReg) { 702fe6060f1SDimitry Andric unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); 7030b57cec5SDimitry Andric if (RR == 0) 7040b57cec5SDimitry Andric return false; 7050b57cec5SDimitry Andric 7060b57cec5SDimitry Andric ResultReg = RR; 7070b57cec5SDimitry Andric return true; 7080b57cec5SDimitry Andric } 7090b57cec5SDimitry Andric 7100b57cec5SDimitry Andric bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { 7110b57cec5SDimitry Andric // Handle constant address. 7120b57cec5SDimitry Andric if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 7130b57cec5SDimitry Andric // Can't handle alternate code models yet. 7145f757f3fSDimitry Andric if (TM.getCodeModel() != CodeModel::Small && 7155f757f3fSDimitry Andric TM.getCodeModel() != CodeModel::Medium) 7165f757f3fSDimitry Andric return false; 7175f757f3fSDimitry Andric 7185f757f3fSDimitry Andric // Can't handle large objects yet. 7195f757f3fSDimitry Andric if (TM.isLargeGlobalValue(GV)) 7200b57cec5SDimitry Andric return false; 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric // Can't handle TLS yet. 7230b57cec5SDimitry Andric if (GV->isThreadLocal()) 7240b57cec5SDimitry Andric return false; 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric // Can't handle !absolute_symbol references yet. 7270b57cec5SDimitry Andric if (GV->isAbsoluteSymbolRef()) 7280b57cec5SDimitry Andric return false; 7290b57cec5SDimitry Andric 7300b57cec5SDimitry Andric // RIP-relative addresses can't have additional register operands, so if 7310b57cec5SDimitry Andric // we've already folded stuff into the addressing mode, just force the 7320b57cec5SDimitry Andric // global value into its own register, which we can use as the basereg. 7330b57cec5SDimitry Andric if (!Subtarget->isPICStyleRIPRel() || 7340b57cec5SDimitry Andric (AM.Base.Reg == 0 && AM.IndexReg == 0)) { 7350b57cec5SDimitry Andric // Okay, we've committed to selecting this global. Set up the address. 7360b57cec5SDimitry Andric AM.GV = GV; 7370b57cec5SDimitry Andric 7380b57cec5SDimitry Andric // Allow the subtarget to classify the global. 7390b57cec5SDimitry Andric unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); 7400b57cec5SDimitry Andric 7410b57cec5SDimitry Andric // If this reference is relative to the pic base, set it now. 7420b57cec5SDimitry Andric if (isGlobalRelativeToPICBase(GVFlags)) { 7430b57cec5SDimitry Andric // FIXME: How do we know Base.Reg is free?? 7440b57cec5SDimitry Andric AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric 7470b57cec5SDimitry Andric // Unless the ABI requires an extra load, return a direct reference to 7480b57cec5SDimitry Andric // the global. 7490b57cec5SDimitry Andric if (!isGlobalStubReference(GVFlags)) { 7500b57cec5SDimitry Andric if (Subtarget->isPICStyleRIPRel()) { 7510b57cec5SDimitry Andric // Use rip-relative addressing if we can. Above we verified that the 7520b57cec5SDimitry Andric // base and index registers are unused. 7530b57cec5SDimitry Andric assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 7540b57cec5SDimitry Andric AM.Base.Reg = X86::RIP; 7550b57cec5SDimitry Andric } 7560b57cec5SDimitry Andric AM.GVOpFlags = GVFlags; 7570b57cec5SDimitry Andric return true; 7580b57cec5SDimitry Andric } 7590b57cec5SDimitry Andric 7600b57cec5SDimitry Andric // Ok, we need to do a load from a stub. If we've already loaded from 7610b57cec5SDimitry Andric // this stub, reuse the loaded pointer, otherwise emit the load now. 7625ffd83dbSDimitry Andric DenseMap<const Value *, Register>::iterator I = LocalValueMap.find(V); 7635ffd83dbSDimitry Andric Register LoadReg; 7645ffd83dbSDimitry Andric if (I != LocalValueMap.end() && I->second) { 7650b57cec5SDimitry Andric LoadReg = I->second; 7660b57cec5SDimitry Andric } else { 7670b57cec5SDimitry Andric // Issue load from stub. 7680b57cec5SDimitry Andric unsigned Opc = 0; 7690b57cec5SDimitry Andric const TargetRegisterClass *RC = nullptr; 7700b57cec5SDimitry Andric X86AddressMode StubAM; 7710b57cec5SDimitry Andric StubAM.Base.Reg = AM.Base.Reg; 7720b57cec5SDimitry Andric StubAM.GV = GV; 7730b57cec5SDimitry Andric StubAM.GVOpFlags = GVFlags; 7740b57cec5SDimitry Andric 7750b57cec5SDimitry Andric // Prepare for inserting code in the local-value area. 7760b57cec5SDimitry Andric SavePoint SaveInsertPt = enterLocalValueArea(); 7770b57cec5SDimitry Andric 7780b57cec5SDimitry Andric if (TLI.getPointerTy(DL) == MVT::i64) { 7790b57cec5SDimitry Andric Opc = X86::MOV64rm; 7800b57cec5SDimitry Andric RC = &X86::GR64RegClass; 7810b57cec5SDimitry Andric } else { 7820b57cec5SDimitry Andric Opc = X86::MOV32rm; 7830b57cec5SDimitry Andric RC = &X86::GR32RegClass; 7840b57cec5SDimitry Andric } 7850b57cec5SDimitry Andric 786349cc55cSDimitry Andric if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL || 787349cc55cSDimitry Andric GVFlags == X86II::MO_GOTPCREL_NORELAX) 788e8d8bef9SDimitry Andric StubAM.Base.Reg = X86::RIP; 789e8d8bef9SDimitry Andric 7900b57cec5SDimitry Andric LoadReg = createResultReg(RC); 7910b57cec5SDimitry Andric MachineInstrBuilder LoadMI = 792bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), LoadReg); 7930b57cec5SDimitry Andric addFullAddress(LoadMI, StubAM); 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric // Ok, back to normal mode. 7960b57cec5SDimitry Andric leaveLocalValueArea(SaveInsertPt); 7970b57cec5SDimitry Andric 7980b57cec5SDimitry Andric // Prevent loading GV stub multiple times in same MBB. 7990b57cec5SDimitry Andric LocalValueMap[V] = LoadReg; 8000b57cec5SDimitry Andric } 8010b57cec5SDimitry Andric 8020b57cec5SDimitry Andric // Now construct the final address. Note that the Disp, Scale, 8030b57cec5SDimitry Andric // and Index values may already be set here. 8040b57cec5SDimitry Andric AM.Base.Reg = LoadReg; 8050b57cec5SDimitry Andric AM.GV = nullptr; 8060b57cec5SDimitry Andric return true; 8070b57cec5SDimitry Andric } 8080b57cec5SDimitry Andric } 8090b57cec5SDimitry Andric 8100b57cec5SDimitry Andric // If all else fails, try to materialize the value in a register. 8110b57cec5SDimitry Andric if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 8120b57cec5SDimitry Andric if (AM.Base.Reg == 0) { 8130b57cec5SDimitry Andric AM.Base.Reg = getRegForValue(V); 8140b57cec5SDimitry Andric return AM.Base.Reg != 0; 8150b57cec5SDimitry Andric } 8160b57cec5SDimitry Andric if (AM.IndexReg == 0) { 8170b57cec5SDimitry Andric assert(AM.Scale == 1 && "Scale with no index!"); 8180b57cec5SDimitry Andric AM.IndexReg = getRegForValue(V); 8190b57cec5SDimitry Andric return AM.IndexReg != 0; 8200b57cec5SDimitry Andric } 8210b57cec5SDimitry Andric } 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andric return false; 8240b57cec5SDimitry Andric } 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric /// X86SelectAddress - Attempt to fill in an address from the given value. 8270b57cec5SDimitry Andric /// 8280b57cec5SDimitry Andric bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { 8290b57cec5SDimitry Andric SmallVector<const Value *, 32> GEPs; 8300b57cec5SDimitry Andric redo_gep: 8310b57cec5SDimitry Andric const User *U = nullptr; 8320b57cec5SDimitry Andric unsigned Opcode = Instruction::UserOp1; 8330b57cec5SDimitry Andric if (const Instruction *I = dyn_cast<Instruction>(V)) { 8340b57cec5SDimitry Andric // Don't walk into other basic blocks; it's possible we haven't 8350b57cec5SDimitry Andric // visited them yet, so the instructions may not yet be assigned 8360b57cec5SDimitry Andric // virtual registers. 8370b57cec5SDimitry Andric if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) || 8380b57cec5SDimitry Andric FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 8390b57cec5SDimitry Andric Opcode = I->getOpcode(); 8400b57cec5SDimitry Andric U = I; 8410b57cec5SDimitry Andric } 8420b57cec5SDimitry Andric } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 8430b57cec5SDimitry Andric Opcode = C->getOpcode(); 8440b57cec5SDimitry Andric U = C; 8450b57cec5SDimitry Andric } 8460b57cec5SDimitry Andric 8470b57cec5SDimitry Andric if (PointerType *Ty = dyn_cast<PointerType>(V->getType())) 8480b57cec5SDimitry Andric if (Ty->getAddressSpace() > 255) 8490b57cec5SDimitry Andric // Fast instruction selection doesn't support the special 8500b57cec5SDimitry Andric // address spaces. 8510b57cec5SDimitry Andric return false; 8520b57cec5SDimitry Andric 8530b57cec5SDimitry Andric switch (Opcode) { 8540b57cec5SDimitry Andric default: break; 8550b57cec5SDimitry Andric case Instruction::BitCast: 8560b57cec5SDimitry Andric // Look past bitcasts. 8570b57cec5SDimitry Andric return X86SelectAddress(U->getOperand(0), AM); 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric case Instruction::IntToPtr: 8600b57cec5SDimitry Andric // Look past no-op inttoptrs. 8610b57cec5SDimitry Andric if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 8620b57cec5SDimitry Andric TLI.getPointerTy(DL)) 8630b57cec5SDimitry Andric return X86SelectAddress(U->getOperand(0), AM); 8640b57cec5SDimitry Andric break; 8650b57cec5SDimitry Andric 8660b57cec5SDimitry Andric case Instruction::PtrToInt: 8670b57cec5SDimitry Andric // Look past no-op ptrtoints. 8680b57cec5SDimitry Andric if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 8690b57cec5SDimitry Andric return X86SelectAddress(U->getOperand(0), AM); 8700b57cec5SDimitry Andric break; 8710b57cec5SDimitry Andric 8720b57cec5SDimitry Andric case Instruction::Alloca: { 8730b57cec5SDimitry Andric // Do static allocas. 8740b57cec5SDimitry Andric const AllocaInst *A = cast<AllocaInst>(V); 8750b57cec5SDimitry Andric DenseMap<const AllocaInst *, int>::iterator SI = 8760b57cec5SDimitry Andric FuncInfo.StaticAllocaMap.find(A); 8770b57cec5SDimitry Andric if (SI != FuncInfo.StaticAllocaMap.end()) { 8780b57cec5SDimitry Andric AM.BaseType = X86AddressMode::FrameIndexBase; 8790b57cec5SDimitry Andric AM.Base.FrameIndex = SI->second; 8800b57cec5SDimitry Andric return true; 8810b57cec5SDimitry Andric } 8820b57cec5SDimitry Andric break; 8830b57cec5SDimitry Andric } 8840b57cec5SDimitry Andric 8850b57cec5SDimitry Andric case Instruction::Add: { 8860b57cec5SDimitry Andric // Adds of constants are common and easy enough. 8870b57cec5SDimitry Andric if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 8880b57cec5SDimitry Andric uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 8890b57cec5SDimitry Andric // They have to fit in the 32-bit signed displacement field though. 8900b57cec5SDimitry Andric if (isInt<32>(Disp)) { 8910b57cec5SDimitry Andric AM.Disp = (uint32_t)Disp; 8920b57cec5SDimitry Andric return X86SelectAddress(U->getOperand(0), AM); 8930b57cec5SDimitry Andric } 8940b57cec5SDimitry Andric } 8950b57cec5SDimitry Andric break; 8960b57cec5SDimitry Andric } 8970b57cec5SDimitry Andric 8980b57cec5SDimitry Andric case Instruction::GetElementPtr: { 8990b57cec5SDimitry Andric X86AddressMode SavedAM = AM; 9000b57cec5SDimitry Andric 9010b57cec5SDimitry Andric // Pattern-match simple GEPs. 9020b57cec5SDimitry Andric uint64_t Disp = (int32_t)AM.Disp; 9030b57cec5SDimitry Andric unsigned IndexReg = AM.IndexReg; 9040b57cec5SDimitry Andric unsigned Scale = AM.Scale; 905*5deeebd8SDimitry Andric MVT PtrVT = TLI.getValueType(DL, U->getType()).getSimpleVT(); 906*5deeebd8SDimitry Andric 9070b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(U); 9080b57cec5SDimitry Andric // Iterate through the indices, folding what we can. Constants can be 9090b57cec5SDimitry Andric // folded, and one dynamic index can be handled, if the scale is supported. 9100b57cec5SDimitry Andric for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 9110b57cec5SDimitry Andric i != e; ++i, ++GTI) { 9120b57cec5SDimitry Andric const Value *Op = *i; 9130b57cec5SDimitry Andric if (StructType *STy = GTI.getStructTypeOrNull()) { 9140b57cec5SDimitry Andric const StructLayout *SL = DL.getStructLayout(STy); 9150b57cec5SDimitry Andric Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); 9160b57cec5SDimitry Andric continue; 9170b57cec5SDimitry Andric } 9180b57cec5SDimitry Andric 9190b57cec5SDimitry Andric // A array/variable index is always of the form i*S where S is the 9200b57cec5SDimitry Andric // constant scale size. See if we can push the scale into immediates. 9211db9f3b2SDimitry Andric uint64_t S = GTI.getSequentialElementStride(DL); 9220b57cec5SDimitry Andric for (;;) { 9230b57cec5SDimitry Andric if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 9240b57cec5SDimitry Andric // Constant-offset addressing. 9250b57cec5SDimitry Andric Disp += CI->getSExtValue() * S; 9260b57cec5SDimitry Andric break; 9270b57cec5SDimitry Andric } 9280b57cec5SDimitry Andric if (canFoldAddIntoGEP(U, Op)) { 9290b57cec5SDimitry Andric // A compatible add with a constant operand. Fold the constant. 9300b57cec5SDimitry Andric ConstantInt *CI = 9310b57cec5SDimitry Andric cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 9320b57cec5SDimitry Andric Disp += CI->getSExtValue() * S; 9330b57cec5SDimitry Andric // Iterate on the other operand. 9340b57cec5SDimitry Andric Op = cast<AddOperator>(Op)->getOperand(0); 9350b57cec5SDimitry Andric continue; 9360b57cec5SDimitry Andric } 9370b57cec5SDimitry Andric if (IndexReg == 0 && 9380b57cec5SDimitry Andric (!AM.GV || !Subtarget->isPICStyleRIPRel()) && 9390b57cec5SDimitry Andric (S == 1 || S == 2 || S == 4 || S == 8)) { 9400b57cec5SDimitry Andric // Scaled-index addressing. 9410b57cec5SDimitry Andric Scale = S; 942*5deeebd8SDimitry Andric IndexReg = getRegForGEPIndex(PtrVT, Op); 9430b57cec5SDimitry Andric if (IndexReg == 0) 9440b57cec5SDimitry Andric return false; 9450b57cec5SDimitry Andric break; 9460b57cec5SDimitry Andric } 9470b57cec5SDimitry Andric // Unsupported. 9480b57cec5SDimitry Andric goto unsupported_gep; 9490b57cec5SDimitry Andric } 9500b57cec5SDimitry Andric } 9510b57cec5SDimitry Andric 9520b57cec5SDimitry Andric // Check for displacement overflow. 9530b57cec5SDimitry Andric if (!isInt<32>(Disp)) 9540b57cec5SDimitry Andric break; 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric AM.IndexReg = IndexReg; 9570b57cec5SDimitry Andric AM.Scale = Scale; 9580b57cec5SDimitry Andric AM.Disp = (uint32_t)Disp; 9590b57cec5SDimitry Andric GEPs.push_back(V); 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric if (const GetElementPtrInst *GEP = 9620b57cec5SDimitry Andric dyn_cast<GetElementPtrInst>(U->getOperand(0))) { 9630b57cec5SDimitry Andric // Ok, the GEP indices were covered by constant-offset and scaled-index 9640b57cec5SDimitry Andric // addressing. Update the address state and move on to examining the base. 9650b57cec5SDimitry Andric V = GEP; 9660b57cec5SDimitry Andric goto redo_gep; 9670b57cec5SDimitry Andric } else if (X86SelectAddress(U->getOperand(0), AM)) { 9680b57cec5SDimitry Andric return true; 9690b57cec5SDimitry Andric } 9700b57cec5SDimitry Andric 9710b57cec5SDimitry Andric // If we couldn't merge the gep value into this addr mode, revert back to 9720b57cec5SDimitry Andric // our address and just match the value instead of completely failing. 9730b57cec5SDimitry Andric AM = SavedAM; 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric for (const Value *I : reverse(GEPs)) 9760b57cec5SDimitry Andric if (handleConstantAddresses(I, AM)) 9770b57cec5SDimitry Andric return true; 9780b57cec5SDimitry Andric 9790b57cec5SDimitry Andric return false; 9800b57cec5SDimitry Andric unsupported_gep: 9810b57cec5SDimitry Andric // Ok, the GEP indices weren't all covered. 9820b57cec5SDimitry Andric break; 9830b57cec5SDimitry Andric } 9840b57cec5SDimitry Andric } 9850b57cec5SDimitry Andric 9860b57cec5SDimitry Andric return handleConstantAddresses(V, AM); 9870b57cec5SDimitry Andric } 9880b57cec5SDimitry Andric 9890b57cec5SDimitry Andric /// X86SelectCallAddress - Attempt to fill in an address from the given value. 9900b57cec5SDimitry Andric /// 9910b57cec5SDimitry Andric bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { 9920b57cec5SDimitry Andric const User *U = nullptr; 9930b57cec5SDimitry Andric unsigned Opcode = Instruction::UserOp1; 9940b57cec5SDimitry Andric const Instruction *I = dyn_cast<Instruction>(V); 9950b57cec5SDimitry Andric // Record if the value is defined in the same basic block. 9960b57cec5SDimitry Andric // 9970b57cec5SDimitry Andric // This information is crucial to know whether or not folding an 9980b57cec5SDimitry Andric // operand is valid. 9990b57cec5SDimitry Andric // Indeed, FastISel generates or reuses a virtual register for all 10000b57cec5SDimitry Andric // operands of all instructions it selects. Obviously, the definition and 10010b57cec5SDimitry Andric // its uses must use the same virtual register otherwise the produced 10020b57cec5SDimitry Andric // code is incorrect. 10030b57cec5SDimitry Andric // Before instruction selection, FunctionLoweringInfo::set sets the virtual 10040b57cec5SDimitry Andric // registers for values that are alive across basic blocks. This ensures 10050b57cec5SDimitry Andric // that the values are consistently set between across basic block, even 10060b57cec5SDimitry Andric // if different instruction selection mechanisms are used (e.g., a mix of 10070b57cec5SDimitry Andric // SDISel and FastISel). 10080b57cec5SDimitry Andric // For values local to a basic block, the instruction selection process 10090b57cec5SDimitry Andric // generates these virtual registers with whatever method is appropriate 10100b57cec5SDimitry Andric // for its needs. In particular, FastISel and SDISel do not share the way 10110b57cec5SDimitry Andric // local virtual registers are set. 10120b57cec5SDimitry Andric // Therefore, this is impossible (or at least unsafe) to share values 10130b57cec5SDimitry Andric // between basic blocks unless they use the same instruction selection 10140b57cec5SDimitry Andric // method, which is not guarantee for X86. 10150b57cec5SDimitry Andric // Moreover, things like hasOneUse could not be used accurately, if we 10160b57cec5SDimitry Andric // allow to reference values across basic blocks whereas they are not 10170b57cec5SDimitry Andric // alive across basic blocks initially. 10180b57cec5SDimitry Andric bool InMBB = true; 10190b57cec5SDimitry Andric if (I) { 10200b57cec5SDimitry Andric Opcode = I->getOpcode(); 10210b57cec5SDimitry Andric U = I; 10220b57cec5SDimitry Andric InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 10230b57cec5SDimitry Andric } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 10240b57cec5SDimitry Andric Opcode = C->getOpcode(); 10250b57cec5SDimitry Andric U = C; 10260b57cec5SDimitry Andric } 10270b57cec5SDimitry Andric 10280b57cec5SDimitry Andric switch (Opcode) { 10290b57cec5SDimitry Andric default: break; 10300b57cec5SDimitry Andric case Instruction::BitCast: 10310b57cec5SDimitry Andric // Look past bitcasts if its operand is in the same BB. 10320b57cec5SDimitry Andric if (InMBB) 10330b57cec5SDimitry Andric return X86SelectCallAddress(U->getOperand(0), AM); 10340b57cec5SDimitry Andric break; 10350b57cec5SDimitry Andric 10360b57cec5SDimitry Andric case Instruction::IntToPtr: 10370b57cec5SDimitry Andric // Look past no-op inttoptrs if its operand is in the same BB. 10380b57cec5SDimitry Andric if (InMBB && 10390b57cec5SDimitry Andric TLI.getValueType(DL, U->getOperand(0)->getType()) == 10400b57cec5SDimitry Andric TLI.getPointerTy(DL)) 10410b57cec5SDimitry Andric return X86SelectCallAddress(U->getOperand(0), AM); 10420b57cec5SDimitry Andric break; 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric case Instruction::PtrToInt: 10450b57cec5SDimitry Andric // Look past no-op ptrtoints if its operand is in the same BB. 10460b57cec5SDimitry Andric if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 10470b57cec5SDimitry Andric return X86SelectCallAddress(U->getOperand(0), AM); 10480b57cec5SDimitry Andric break; 10490b57cec5SDimitry Andric } 10500b57cec5SDimitry Andric 10510b57cec5SDimitry Andric // Handle constant address. 10520b57cec5SDimitry Andric if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 10530b57cec5SDimitry Andric // Can't handle alternate code models yet. 10545f757f3fSDimitry Andric if (TM.getCodeModel() != CodeModel::Small && 10555f757f3fSDimitry Andric TM.getCodeModel() != CodeModel::Medium) 10560b57cec5SDimitry Andric return false; 10570b57cec5SDimitry Andric 10580b57cec5SDimitry Andric // RIP-relative addresses can't have additional register operands. 10590b57cec5SDimitry Andric if (Subtarget->isPICStyleRIPRel() && 10600b57cec5SDimitry Andric (AM.Base.Reg != 0 || AM.IndexReg != 0)) 10610b57cec5SDimitry Andric return false; 10620b57cec5SDimitry Andric 10630b57cec5SDimitry Andric // Can't handle TLS. 10640b57cec5SDimitry Andric if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 10650b57cec5SDimitry Andric if (GVar->isThreadLocal()) 10660b57cec5SDimitry Andric return false; 10670b57cec5SDimitry Andric 10680b57cec5SDimitry Andric // Okay, we've committed to selecting this global. Set up the basic address. 10690b57cec5SDimitry Andric AM.GV = GV; 10700b57cec5SDimitry Andric 10710b57cec5SDimitry Andric // Return a direct reference to the global. Fastisel can handle calls to 10720b57cec5SDimitry Andric // functions that require loads, such as dllimport and nonlazybind 10730b57cec5SDimitry Andric // functions. 10740b57cec5SDimitry Andric if (Subtarget->isPICStyleRIPRel()) { 10750b57cec5SDimitry Andric // Use rip-relative addressing if we can. Above we verified that the 10760b57cec5SDimitry Andric // base and index registers are unused. 10770b57cec5SDimitry Andric assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 10780b57cec5SDimitry Andric AM.Base.Reg = X86::RIP; 10790b57cec5SDimitry Andric } else { 10800b57cec5SDimitry Andric AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr); 10810b57cec5SDimitry Andric } 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric return true; 10840b57cec5SDimitry Andric } 10850b57cec5SDimitry Andric 10860b57cec5SDimitry Andric // If all else fails, try to materialize the value in a register. 10870b57cec5SDimitry Andric if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 1088e8d8bef9SDimitry Andric auto GetCallRegForValue = [this](const Value *V) { 1089e8d8bef9SDimitry Andric Register Reg = getRegForValue(V); 1090e8d8bef9SDimitry Andric 1091e8d8bef9SDimitry Andric // In 64-bit mode, we need a 64-bit register even if pointers are 32 bits. 1092e8d8bef9SDimitry Andric if (Reg && Subtarget->isTarget64BitILP32()) { 1093e8d8bef9SDimitry Andric Register CopyReg = createResultReg(&X86::GR32RegClass); 1094bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV32rr), 1095e8d8bef9SDimitry Andric CopyReg) 1096e8d8bef9SDimitry Andric .addReg(Reg); 1097e8d8bef9SDimitry Andric 1098e8d8bef9SDimitry Andric Register ExtReg = createResultReg(&X86::GR64RegClass); 1099bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1100e8d8bef9SDimitry Andric TII.get(TargetOpcode::SUBREG_TO_REG), ExtReg) 1101e8d8bef9SDimitry Andric .addImm(0) 1102e8d8bef9SDimitry Andric .addReg(CopyReg) 1103e8d8bef9SDimitry Andric .addImm(X86::sub_32bit); 1104e8d8bef9SDimitry Andric Reg = ExtReg; 1105e8d8bef9SDimitry Andric } 1106e8d8bef9SDimitry Andric 1107e8d8bef9SDimitry Andric return Reg; 1108e8d8bef9SDimitry Andric }; 1109e8d8bef9SDimitry Andric 11100b57cec5SDimitry Andric if (AM.Base.Reg == 0) { 1111e8d8bef9SDimitry Andric AM.Base.Reg = GetCallRegForValue(V); 11120b57cec5SDimitry Andric return AM.Base.Reg != 0; 11130b57cec5SDimitry Andric } 11140b57cec5SDimitry Andric if (AM.IndexReg == 0) { 11150b57cec5SDimitry Andric assert(AM.Scale == 1 && "Scale with no index!"); 1116e8d8bef9SDimitry Andric AM.IndexReg = GetCallRegForValue(V); 11170b57cec5SDimitry Andric return AM.IndexReg != 0; 11180b57cec5SDimitry Andric } 11190b57cec5SDimitry Andric } 11200b57cec5SDimitry Andric 11210b57cec5SDimitry Andric return false; 11220b57cec5SDimitry Andric } 11230b57cec5SDimitry Andric 11240b57cec5SDimitry Andric 11250b57cec5SDimitry Andric /// X86SelectStore - Select and emit code to implement store instructions. 11260b57cec5SDimitry Andric bool X86FastISel::X86SelectStore(const Instruction *I) { 11270b57cec5SDimitry Andric // Atomic stores need special handling. 11280b57cec5SDimitry Andric const StoreInst *S = cast<StoreInst>(I); 11290b57cec5SDimitry Andric 11300b57cec5SDimitry Andric if (S->isAtomic()) 11310b57cec5SDimitry Andric return false; 11320b57cec5SDimitry Andric 11330b57cec5SDimitry Andric const Value *PtrV = I->getOperand(1); 11340b57cec5SDimitry Andric if (TLI.supportSwiftError()) { 11350b57cec5SDimitry Andric // Swifterror values can come from either a function parameter with 11360b57cec5SDimitry Andric // swifterror attribute or an alloca with swifterror attribute. 11370b57cec5SDimitry Andric if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 11380b57cec5SDimitry Andric if (Arg->hasSwiftErrorAttr()) 11390b57cec5SDimitry Andric return false; 11400b57cec5SDimitry Andric } 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 11430b57cec5SDimitry Andric if (Alloca->isSwiftError()) 11440b57cec5SDimitry Andric return false; 11450b57cec5SDimitry Andric } 11460b57cec5SDimitry Andric } 11470b57cec5SDimitry Andric 11480b57cec5SDimitry Andric const Value *Val = S->getValueOperand(); 11490b57cec5SDimitry Andric const Value *Ptr = S->getPointerOperand(); 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric MVT VT; 11520b57cec5SDimitry Andric if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true)) 11530b57cec5SDimitry Andric return false; 11540b57cec5SDimitry Andric 11555ffd83dbSDimitry Andric Align Alignment = S->getAlign(); 11565ffd83dbSDimitry Andric Align ABIAlignment = DL.getABITypeAlign(Val->getType()); 11570b57cec5SDimitry Andric bool Aligned = Alignment >= ABIAlignment; 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric X86AddressMode AM; 11600b57cec5SDimitry Andric if (!X86SelectAddress(Ptr, AM)) 11610b57cec5SDimitry Andric return false; 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned); 11640b57cec5SDimitry Andric } 11650b57cec5SDimitry Andric 11660b57cec5SDimitry Andric /// X86SelectRet - Select and emit code to implement ret instructions. 11670b57cec5SDimitry Andric bool X86FastISel::X86SelectRet(const Instruction *I) { 11680b57cec5SDimitry Andric const ReturnInst *Ret = cast<ReturnInst>(I); 11690b57cec5SDimitry Andric const Function &F = *I->getParent()->getParent(); 11700b57cec5SDimitry Andric const X86MachineFunctionInfo *X86MFInfo = 11710b57cec5SDimitry Andric FuncInfo.MF->getInfo<X86MachineFunctionInfo>(); 11720b57cec5SDimitry Andric 11730b57cec5SDimitry Andric if (!FuncInfo.CanLowerReturn) 11740b57cec5SDimitry Andric return false; 11750b57cec5SDimitry Andric 11760b57cec5SDimitry Andric if (TLI.supportSwiftError() && 11770b57cec5SDimitry Andric F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 11780b57cec5SDimitry Andric return false; 11790b57cec5SDimitry Andric 11800b57cec5SDimitry Andric if (TLI.supportSplitCSR(FuncInfo.MF)) 11810b57cec5SDimitry Andric return false; 11820b57cec5SDimitry Andric 11830b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 11840b57cec5SDimitry Andric if (CC != CallingConv::C && 11850b57cec5SDimitry Andric CC != CallingConv::Fast && 11868bcb0991SDimitry Andric CC != CallingConv::Tail && 1187fe6060f1SDimitry Andric CC != CallingConv::SwiftTail && 11880b57cec5SDimitry Andric CC != CallingConv::X86_FastCall && 11890b57cec5SDimitry Andric CC != CallingConv::X86_StdCall && 11900b57cec5SDimitry Andric CC != CallingConv::X86_ThisCall && 11910b57cec5SDimitry Andric CC != CallingConv::X86_64_SysV && 11920b57cec5SDimitry Andric CC != CallingConv::Win64) 11930b57cec5SDimitry Andric return false; 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric // Don't handle popping bytes if they don't fit the ret's immediate. 11960b57cec5SDimitry Andric if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn())) 11970b57cec5SDimitry Andric return false; 11980b57cec5SDimitry Andric 11990b57cec5SDimitry Andric // fastcc with -tailcallopt is intended to provide a guaranteed 12000b57cec5SDimitry Andric // tail call optimization. Fastisel doesn't know how to do that. 12018bcb0991SDimitry Andric if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || 1202fe6060f1SDimitry Andric CC == CallingConv::Tail || CC == CallingConv::SwiftTail) 12030b57cec5SDimitry Andric return false; 12040b57cec5SDimitry Andric 12050b57cec5SDimitry Andric // Let SDISel handle vararg functions. 12060b57cec5SDimitry Andric if (F.isVarArg()) 12070b57cec5SDimitry Andric return false; 12080b57cec5SDimitry Andric 12090b57cec5SDimitry Andric // Build a list of return value registers. 12100b57cec5SDimitry Andric SmallVector<unsigned, 4> RetRegs; 12110b57cec5SDimitry Andric 12120b57cec5SDimitry Andric if (Ret->getNumOperands() > 0) { 12130b57cec5SDimitry Andric SmallVector<ISD::OutputArg, 4> Outs; 12140b57cec5SDimitry Andric GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 12150b57cec5SDimitry Andric 12160b57cec5SDimitry Andric // Analyze operands of the call, assigning locations to each operand. 12170b57cec5SDimitry Andric SmallVector<CCValAssign, 16> ValLocs; 12180b57cec5SDimitry Andric CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 12190b57cec5SDimitry Andric CCInfo.AnalyzeReturn(Outs, RetCC_X86); 12200b57cec5SDimitry Andric 12210b57cec5SDimitry Andric const Value *RV = Ret->getOperand(0); 12225ffd83dbSDimitry Andric Register Reg = getRegForValue(RV); 12230b57cec5SDimitry Andric if (Reg == 0) 12240b57cec5SDimitry Andric return false; 12250b57cec5SDimitry Andric 12260b57cec5SDimitry Andric // Only handle a single return value for now. 12270b57cec5SDimitry Andric if (ValLocs.size() != 1) 12280b57cec5SDimitry Andric return false; 12290b57cec5SDimitry Andric 12300b57cec5SDimitry Andric CCValAssign &VA = ValLocs[0]; 12310b57cec5SDimitry Andric 12320b57cec5SDimitry Andric // Don't bother handling odd stuff for now. 12330b57cec5SDimitry Andric if (VA.getLocInfo() != CCValAssign::Full) 12340b57cec5SDimitry Andric return false; 12350b57cec5SDimitry Andric // Only handle register returns for now. 12360b57cec5SDimitry Andric if (!VA.isRegLoc()) 12370b57cec5SDimitry Andric return false; 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric // The calling-convention tables for x87 returns don't tell 12400b57cec5SDimitry Andric // the whole story. 12410b57cec5SDimitry Andric if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) 12420b57cec5SDimitry Andric return false; 12430b57cec5SDimitry Andric 12440b57cec5SDimitry Andric unsigned SrcReg = Reg + VA.getValNo(); 12450b57cec5SDimitry Andric EVT SrcVT = TLI.getValueType(DL, RV->getType()); 12460b57cec5SDimitry Andric EVT DstVT = VA.getValVT(); 12470b57cec5SDimitry Andric // Special handling for extended integers. 12480b57cec5SDimitry Andric if (SrcVT != DstVT) { 12490b57cec5SDimitry Andric if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16) 12500b57cec5SDimitry Andric return false; 12510b57cec5SDimitry Andric 12520b57cec5SDimitry Andric if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 12530b57cec5SDimitry Andric return false; 12540b57cec5SDimitry Andric 12550b57cec5SDimitry Andric if (SrcVT == MVT::i1) { 12560b57cec5SDimitry Andric if (Outs[0].Flags.isSExt()) 12570b57cec5SDimitry Andric return false; 1258fe6060f1SDimitry Andric SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg); 12590b57cec5SDimitry Andric SrcVT = MVT::i8; 12600b57cec5SDimitry Andric } 12610fca6ea1SDimitry Andric if (SrcVT != DstVT) { 12620fca6ea1SDimitry Andric unsigned Op = 12630fca6ea1SDimitry Andric Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; 12640fca6ea1SDimitry Andric SrcReg = 12650fca6ea1SDimitry Andric fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, SrcReg); 12660fca6ea1SDimitry Andric } 12670b57cec5SDimitry Andric } 12680b57cec5SDimitry Andric 12690b57cec5SDimitry Andric // Make the copy. 12708bcb0991SDimitry Andric Register DstReg = VA.getLocReg(); 12710b57cec5SDimitry Andric const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); 12720b57cec5SDimitry Andric // Avoid a cross-class copy. This is very unlikely. 12730b57cec5SDimitry Andric if (!SrcRC->contains(DstReg)) 12740b57cec5SDimitry Andric return false; 1275bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 12760b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); 12770b57cec5SDimitry Andric 12780b57cec5SDimitry Andric // Add register to return instruction. 12790b57cec5SDimitry Andric RetRegs.push_back(VA.getLocReg()); 12800b57cec5SDimitry Andric } 12810b57cec5SDimitry Andric 12820b57cec5SDimitry Andric // Swift calling convention does not require we copy the sret argument 12830b57cec5SDimitry Andric // into %rax/%eax for the return, and SRetReturnReg is not set for Swift. 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric // All x86 ABIs require that for returning structs by value we copy 12860b57cec5SDimitry Andric // the sret argument into %rax/%eax (depending on ABI) for the return. 12870b57cec5SDimitry Andric // We saved the argument into a virtual register in the entry block, 12880b57cec5SDimitry Andric // so now we copy the value out and into %rax/%eax. 1289fe6060f1SDimitry Andric if (F.hasStructRetAttr() && CC != CallingConv::Swift && 1290fe6060f1SDimitry Andric CC != CallingConv::SwiftTail) { 12915ffd83dbSDimitry Andric Register Reg = X86MFInfo->getSRetReturnReg(); 12920b57cec5SDimitry Andric assert(Reg && 12930b57cec5SDimitry Andric "SRetReturnReg should have been set in LowerFormalArguments()!"); 12940b57cec5SDimitry Andric unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX; 1295bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 12960b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), RetReg).addReg(Reg); 12970b57cec5SDimitry Andric RetRegs.push_back(RetReg); 12980b57cec5SDimitry Andric } 12990b57cec5SDimitry Andric 13000b57cec5SDimitry Andric // Now emit the RET. 13010b57cec5SDimitry Andric MachineInstrBuilder MIB; 13020b57cec5SDimitry Andric if (X86MFInfo->getBytesToPopOnReturn()) { 1303bdd1243dSDimitry Andric MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1304349cc55cSDimitry Andric TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32)) 13050b57cec5SDimitry Andric .addImm(X86MFInfo->getBytesToPopOnReturn()); 13060b57cec5SDimitry Andric } else { 1307bdd1243dSDimitry Andric MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1308349cc55cSDimitry Andric TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32)); 13090b57cec5SDimitry Andric } 1310cb14a3feSDimitry Andric for (unsigned Reg : RetRegs) 1311cb14a3feSDimitry Andric MIB.addReg(Reg, RegState::Implicit); 13120b57cec5SDimitry Andric return true; 13130b57cec5SDimitry Andric } 13140b57cec5SDimitry Andric 13150b57cec5SDimitry Andric /// X86SelectLoad - Select and emit code to implement load instructions. 13160b57cec5SDimitry Andric /// 13170b57cec5SDimitry Andric bool X86FastISel::X86SelectLoad(const Instruction *I) { 13180b57cec5SDimitry Andric const LoadInst *LI = cast<LoadInst>(I); 13190b57cec5SDimitry Andric 13200b57cec5SDimitry Andric // Atomic loads need special handling. 13210b57cec5SDimitry Andric if (LI->isAtomic()) 13220b57cec5SDimitry Andric return false; 13230b57cec5SDimitry Andric 13240b57cec5SDimitry Andric const Value *SV = I->getOperand(0); 13250b57cec5SDimitry Andric if (TLI.supportSwiftError()) { 13260b57cec5SDimitry Andric // Swifterror values can come from either a function parameter with 13270b57cec5SDimitry Andric // swifterror attribute or an alloca with swifterror attribute. 13280b57cec5SDimitry Andric if (const Argument *Arg = dyn_cast<Argument>(SV)) { 13290b57cec5SDimitry Andric if (Arg->hasSwiftErrorAttr()) 13300b57cec5SDimitry Andric return false; 13310b57cec5SDimitry Andric } 13320b57cec5SDimitry Andric 13330b57cec5SDimitry Andric if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 13340b57cec5SDimitry Andric if (Alloca->isSwiftError()) 13350b57cec5SDimitry Andric return false; 13360b57cec5SDimitry Andric } 13370b57cec5SDimitry Andric } 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric MVT VT; 13400b57cec5SDimitry Andric if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true)) 13410b57cec5SDimitry Andric return false; 13420b57cec5SDimitry Andric 13430b57cec5SDimitry Andric const Value *Ptr = LI->getPointerOperand(); 13440b57cec5SDimitry Andric 13450b57cec5SDimitry Andric X86AddressMode AM; 13460b57cec5SDimitry Andric if (!X86SelectAddress(Ptr, AM)) 13470b57cec5SDimitry Andric return false; 13480b57cec5SDimitry Andric 13490b57cec5SDimitry Andric unsigned ResultReg = 0; 13500b57cec5SDimitry Andric if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg, 13515ffd83dbSDimitry Andric LI->getAlign().value())) 13520b57cec5SDimitry Andric return false; 13530b57cec5SDimitry Andric 13540b57cec5SDimitry Andric updateValueMap(I, ResultReg); 13550b57cec5SDimitry Andric return true; 13560b57cec5SDimitry Andric } 13570b57cec5SDimitry Andric 13580b57cec5SDimitry Andric static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { 13590b57cec5SDimitry Andric bool HasAVX512 = Subtarget->hasAVX512(); 13600b57cec5SDimitry Andric bool HasAVX = Subtarget->hasAVX(); 136181ad6265SDimitry Andric bool HasSSE1 = Subtarget->hasSSE1(); 136281ad6265SDimitry Andric bool HasSSE2 = Subtarget->hasSSE2(); 13630b57cec5SDimitry Andric 13640b57cec5SDimitry Andric switch (VT.getSimpleVT().SimpleTy) { 13650b57cec5SDimitry Andric default: return 0; 13660b57cec5SDimitry Andric case MVT::i8: return X86::CMP8rr; 13670b57cec5SDimitry Andric case MVT::i16: return X86::CMP16rr; 13680b57cec5SDimitry Andric case MVT::i32: return X86::CMP32rr; 13690b57cec5SDimitry Andric case MVT::i64: return X86::CMP64rr; 13700b57cec5SDimitry Andric case MVT::f32: 137181ad6265SDimitry Andric return HasAVX512 ? X86::VUCOMISSZrr 137281ad6265SDimitry Andric : HasAVX ? X86::VUCOMISSrr 137381ad6265SDimitry Andric : HasSSE1 ? X86::UCOMISSrr 13740b57cec5SDimitry Andric : 0; 13750b57cec5SDimitry Andric case MVT::f64: 137681ad6265SDimitry Andric return HasAVX512 ? X86::VUCOMISDZrr 137781ad6265SDimitry Andric : HasAVX ? X86::VUCOMISDrr 137881ad6265SDimitry Andric : HasSSE2 ? X86::UCOMISDrr 13790b57cec5SDimitry Andric : 0; 13800b57cec5SDimitry Andric } 13810b57cec5SDimitry Andric } 13820b57cec5SDimitry Andric 13830b57cec5SDimitry Andric /// If we have a comparison with RHS as the RHS of the comparison, return an 13840b57cec5SDimitry Andric /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0. 13850b57cec5SDimitry Andric static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { 13860b57cec5SDimitry Andric switch (VT.getSimpleVT().SimpleTy) { 13870b57cec5SDimitry Andric // Otherwise, we can't fold the immediate into this comparison. 13880b57cec5SDimitry Andric default: 13890b57cec5SDimitry Andric return 0; 13900b57cec5SDimitry Andric case MVT::i8: 13910b57cec5SDimitry Andric return X86::CMP8ri; 13920b57cec5SDimitry Andric case MVT::i16: 13930b57cec5SDimitry Andric return X86::CMP16ri; 13940b57cec5SDimitry Andric case MVT::i32: 13950b57cec5SDimitry Andric return X86::CMP32ri; 13960b57cec5SDimitry Andric case MVT::i64: 13970b57cec5SDimitry Andric // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 13980b57cec5SDimitry Andric // field. 139906c3fb27SDimitry Andric return isInt<32>(RHSC->getSExtValue()) ? X86::CMP64ri32 : 0; 14000b57cec5SDimitry Andric } 14010b57cec5SDimitry Andric } 14020b57cec5SDimitry Andric 14030b57cec5SDimitry Andric bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT, 1404bdd1243dSDimitry Andric const DebugLoc &CurMIMD) { 14055ffd83dbSDimitry Andric Register Op0Reg = getRegForValue(Op0); 14060b57cec5SDimitry Andric if (Op0Reg == 0) return false; 14070b57cec5SDimitry Andric 14080b57cec5SDimitry Andric // Handle 'null' like i32/i64 0. 14090b57cec5SDimitry Andric if (isa<ConstantPointerNull>(Op1)) 14100b57cec5SDimitry Andric Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext())); 14110b57cec5SDimitry Andric 14120b57cec5SDimitry Andric // We have two options: compare with register or immediate. If the RHS of 14130b57cec5SDimitry Andric // the compare is an immediate that we can fold into this compare, use 14140b57cec5SDimitry Andric // CMPri, otherwise use CMPrr. 14150b57cec5SDimitry Andric if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 14160b57cec5SDimitry Andric if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 1417bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurMIMD, TII.get(CompareImmOpc)) 14180b57cec5SDimitry Andric .addReg(Op0Reg) 14190b57cec5SDimitry Andric .addImm(Op1C->getSExtValue()); 14200b57cec5SDimitry Andric return true; 14210b57cec5SDimitry Andric } 14220b57cec5SDimitry Andric } 14230b57cec5SDimitry Andric 14240b57cec5SDimitry Andric unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); 14250b57cec5SDimitry Andric if (CompareOpc == 0) return false; 14260b57cec5SDimitry Andric 14275ffd83dbSDimitry Andric Register Op1Reg = getRegForValue(Op1); 14280b57cec5SDimitry Andric if (Op1Reg == 0) return false; 1429bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurMIMD, TII.get(CompareOpc)) 14300b57cec5SDimitry Andric .addReg(Op0Reg) 14310b57cec5SDimitry Andric .addReg(Op1Reg); 14320b57cec5SDimitry Andric 14330b57cec5SDimitry Andric return true; 14340b57cec5SDimitry Andric } 14350b57cec5SDimitry Andric 14360b57cec5SDimitry Andric bool X86FastISel::X86SelectCmp(const Instruction *I) { 14370b57cec5SDimitry Andric const CmpInst *CI = cast<CmpInst>(I); 14380b57cec5SDimitry Andric 14390b57cec5SDimitry Andric MVT VT; 14400b57cec5SDimitry Andric if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 14410b57cec5SDimitry Andric return false; 14420b57cec5SDimitry Andric 1443fe6060f1SDimitry Andric // Below code only works for scalars. 1444fe6060f1SDimitry Andric if (VT.isVector()) 1445fe6060f1SDimitry Andric return false; 1446fe6060f1SDimitry Andric 14470b57cec5SDimitry Andric // Try to optimize or fold the cmp. 14480b57cec5SDimitry Andric CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 14490b57cec5SDimitry Andric unsigned ResultReg = 0; 14500b57cec5SDimitry Andric switch (Predicate) { 14510b57cec5SDimitry Andric default: break; 14520b57cec5SDimitry Andric case CmpInst::FCMP_FALSE: { 14530b57cec5SDimitry Andric ResultReg = createResultReg(&X86::GR32RegClass); 1454bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV32r0), 14550b57cec5SDimitry Andric ResultReg); 1456fe6060f1SDimitry Andric ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, X86::sub_8bit); 14570b57cec5SDimitry Andric if (!ResultReg) 14580b57cec5SDimitry Andric return false; 14590b57cec5SDimitry Andric break; 14600b57cec5SDimitry Andric } 14610b57cec5SDimitry Andric case CmpInst::FCMP_TRUE: { 14620b57cec5SDimitry Andric ResultReg = createResultReg(&X86::GR8RegClass); 1463bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV8ri), 14640b57cec5SDimitry Andric ResultReg).addImm(1); 14650b57cec5SDimitry Andric break; 14660b57cec5SDimitry Andric } 14670b57cec5SDimitry Andric } 14680b57cec5SDimitry Andric 14690b57cec5SDimitry Andric if (ResultReg) { 14700b57cec5SDimitry Andric updateValueMap(I, ResultReg); 14710b57cec5SDimitry Andric return true; 14720b57cec5SDimitry Andric } 14730b57cec5SDimitry Andric 14740b57cec5SDimitry Andric const Value *LHS = CI->getOperand(0); 14750b57cec5SDimitry Andric const Value *RHS = CI->getOperand(1); 14760b57cec5SDimitry Andric 14770b57cec5SDimitry Andric // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. 14780b57cec5SDimitry Andric // We don't have to materialize a zero constant for this case and can just use 14790b57cec5SDimitry Andric // %x again on the RHS. 14800b57cec5SDimitry Andric if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 14810b57cec5SDimitry Andric const auto *RHSC = dyn_cast<ConstantFP>(RHS); 14820b57cec5SDimitry Andric if (RHSC && RHSC->isNullValue()) 14830b57cec5SDimitry Andric RHS = LHS; 14840b57cec5SDimitry Andric } 14850b57cec5SDimitry Andric 14860b57cec5SDimitry Andric // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 14870b57cec5SDimitry Andric static const uint16_t SETFOpcTable[2][3] = { 14880b57cec5SDimitry Andric { X86::COND_E, X86::COND_NP, X86::AND8rr }, 14890b57cec5SDimitry Andric { X86::COND_NE, X86::COND_P, X86::OR8rr } 14900b57cec5SDimitry Andric }; 14910b57cec5SDimitry Andric const uint16_t *SETFOpc = nullptr; 14920b57cec5SDimitry Andric switch (Predicate) { 14930b57cec5SDimitry Andric default: break; 14940b57cec5SDimitry Andric case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break; 14950b57cec5SDimitry Andric case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break; 14960b57cec5SDimitry Andric } 14970b57cec5SDimitry Andric 14980b57cec5SDimitry Andric ResultReg = createResultReg(&X86::GR8RegClass); 14990b57cec5SDimitry Andric if (SETFOpc) { 15000b57cec5SDimitry Andric if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc())) 15010b57cec5SDimitry Andric return false; 15020b57cec5SDimitry Andric 15035ffd83dbSDimitry Andric Register FlagReg1 = createResultReg(&X86::GR8RegClass); 15045ffd83dbSDimitry Andric Register FlagReg2 = createResultReg(&X86::GR8RegClass); 1505bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr), 15060b57cec5SDimitry Andric FlagReg1).addImm(SETFOpc[0]); 1507bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr), 15080b57cec5SDimitry Andric FlagReg2).addImm(SETFOpc[1]); 1509bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(SETFOpc[2]), 15100b57cec5SDimitry Andric ResultReg).addReg(FlagReg1).addReg(FlagReg2); 15110b57cec5SDimitry Andric updateValueMap(I, ResultReg); 15120b57cec5SDimitry Andric return true; 15130b57cec5SDimitry Andric } 15140b57cec5SDimitry Andric 15150b57cec5SDimitry Andric X86::CondCode CC; 15160b57cec5SDimitry Andric bool SwapArgs; 15170b57cec5SDimitry Andric std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); 15180b57cec5SDimitry Andric assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 15190b57cec5SDimitry Andric 15200b57cec5SDimitry Andric if (SwapArgs) 15210b57cec5SDimitry Andric std::swap(LHS, RHS); 15220b57cec5SDimitry Andric 15230b57cec5SDimitry Andric // Emit a compare of LHS/RHS. 15240b57cec5SDimitry Andric if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc())) 15250b57cec5SDimitry Andric return false; 15260b57cec5SDimitry Andric 1527bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr), 15280b57cec5SDimitry Andric ResultReg).addImm(CC); 15290b57cec5SDimitry Andric updateValueMap(I, ResultReg); 15300b57cec5SDimitry Andric return true; 15310b57cec5SDimitry Andric } 15320b57cec5SDimitry Andric 15330b57cec5SDimitry Andric bool X86FastISel::X86SelectZExt(const Instruction *I) { 15340b57cec5SDimitry Andric EVT DstVT = TLI.getValueType(DL, I->getType()); 15350b57cec5SDimitry Andric if (!TLI.isTypeLegal(DstVT)) 15360b57cec5SDimitry Andric return false; 15370b57cec5SDimitry Andric 15385ffd83dbSDimitry Andric Register ResultReg = getRegForValue(I->getOperand(0)); 15390b57cec5SDimitry Andric if (ResultReg == 0) 15400b57cec5SDimitry Andric return false; 15410b57cec5SDimitry Andric 15420b57cec5SDimitry Andric // Handle zero-extension from i1 to i8, which is common. 15430b57cec5SDimitry Andric MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); 15440b57cec5SDimitry Andric if (SrcVT == MVT::i1) { 15450b57cec5SDimitry Andric // Set the high bits to zero. 1546fe6060f1SDimitry Andric ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg); 15470b57cec5SDimitry Andric SrcVT = MVT::i8; 15480b57cec5SDimitry Andric 15490b57cec5SDimitry Andric if (ResultReg == 0) 15500b57cec5SDimitry Andric return false; 15510b57cec5SDimitry Andric } 15520b57cec5SDimitry Andric 15530b57cec5SDimitry Andric if (DstVT == MVT::i64) { 15540b57cec5SDimitry Andric // Handle extension to 64-bits via sub-register shenanigans. 15550b57cec5SDimitry Andric unsigned MovInst; 15560b57cec5SDimitry Andric 15570b57cec5SDimitry Andric switch (SrcVT.SimpleTy) { 15580b57cec5SDimitry Andric case MVT::i8: MovInst = X86::MOVZX32rr8; break; 15590b57cec5SDimitry Andric case MVT::i16: MovInst = X86::MOVZX32rr16; break; 15600b57cec5SDimitry Andric case MVT::i32: MovInst = X86::MOV32rr; break; 15610b57cec5SDimitry Andric default: llvm_unreachable("Unexpected zext to i64 source type"); 15620b57cec5SDimitry Andric } 15630b57cec5SDimitry Andric 15645ffd83dbSDimitry Andric Register Result32 = createResultReg(&X86::GR32RegClass); 1565bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(MovInst), Result32) 15660b57cec5SDimitry Andric .addReg(ResultReg); 15670b57cec5SDimitry Andric 15680b57cec5SDimitry Andric ResultReg = createResultReg(&X86::GR64RegClass); 1569bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::SUBREG_TO_REG), 15700b57cec5SDimitry Andric ResultReg) 15710b57cec5SDimitry Andric .addImm(0).addReg(Result32).addImm(X86::sub_32bit); 15720b57cec5SDimitry Andric } else if (DstVT == MVT::i16) { 15730b57cec5SDimitry Andric // i8->i16 doesn't exist in the autogenerated isel table. Need to zero 15740b57cec5SDimitry Andric // extend to 32-bits and then extract down to 16-bits. 15755ffd83dbSDimitry Andric Register Result32 = createResultReg(&X86::GR32RegClass); 1576bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOVZX32rr8), 15770b57cec5SDimitry Andric Result32).addReg(ResultReg); 15780b57cec5SDimitry Andric 1579fe6060f1SDimitry Andric ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit); 15800b57cec5SDimitry Andric } else if (DstVT != MVT::i8) { 15810b57cec5SDimitry Andric ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND, 1582fe6060f1SDimitry Andric ResultReg); 15830b57cec5SDimitry Andric if (ResultReg == 0) 15840b57cec5SDimitry Andric return false; 15850b57cec5SDimitry Andric } 15860b57cec5SDimitry Andric 15870b57cec5SDimitry Andric updateValueMap(I, ResultReg); 15880b57cec5SDimitry Andric return true; 15890b57cec5SDimitry Andric } 15900b57cec5SDimitry Andric 15910b57cec5SDimitry Andric bool X86FastISel::X86SelectSExt(const Instruction *I) { 15920b57cec5SDimitry Andric EVT DstVT = TLI.getValueType(DL, I->getType()); 15930b57cec5SDimitry Andric if (!TLI.isTypeLegal(DstVT)) 15940b57cec5SDimitry Andric return false; 15950b57cec5SDimitry Andric 15965ffd83dbSDimitry Andric Register ResultReg = getRegForValue(I->getOperand(0)); 15970b57cec5SDimitry Andric if (ResultReg == 0) 15980b57cec5SDimitry Andric return false; 15990b57cec5SDimitry Andric 16000b57cec5SDimitry Andric // Handle sign-extension from i1 to i8. 16010b57cec5SDimitry Andric MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); 16020b57cec5SDimitry Andric if (SrcVT == MVT::i1) { 16030b57cec5SDimitry Andric // Set the high bits to zero. 1604fe6060f1SDimitry Andric Register ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg); 16050b57cec5SDimitry Andric if (ZExtReg == 0) 16060b57cec5SDimitry Andric return false; 16070b57cec5SDimitry Andric 16080b57cec5SDimitry Andric // Negate the result to make an 8-bit sign extended value. 16090b57cec5SDimitry Andric ResultReg = createResultReg(&X86::GR8RegClass); 1610bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::NEG8r), 16110b57cec5SDimitry Andric ResultReg).addReg(ZExtReg); 16120b57cec5SDimitry Andric 16130b57cec5SDimitry Andric SrcVT = MVT::i8; 16140b57cec5SDimitry Andric } 16150b57cec5SDimitry Andric 16160b57cec5SDimitry Andric if (DstVT == MVT::i16) { 16170b57cec5SDimitry Andric // i8->i16 doesn't exist in the autogenerated isel table. Need to sign 16180b57cec5SDimitry Andric // extend to 32-bits and then extract down to 16-bits. 16195ffd83dbSDimitry Andric Register Result32 = createResultReg(&X86::GR32RegClass); 1620bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOVSX32rr8), 16210b57cec5SDimitry Andric Result32).addReg(ResultReg); 16220b57cec5SDimitry Andric 1623fe6060f1SDimitry Andric ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit); 16240b57cec5SDimitry Andric } else if (DstVT != MVT::i8) { 16250b57cec5SDimitry Andric ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND, 1626fe6060f1SDimitry Andric ResultReg); 16270b57cec5SDimitry Andric if (ResultReg == 0) 16280b57cec5SDimitry Andric return false; 16290b57cec5SDimitry Andric } 16300b57cec5SDimitry Andric 16310b57cec5SDimitry Andric updateValueMap(I, ResultReg); 16320b57cec5SDimitry Andric return true; 16330b57cec5SDimitry Andric } 16340b57cec5SDimitry Andric 16350b57cec5SDimitry Andric bool X86FastISel::X86SelectBranch(const Instruction *I) { 16360b57cec5SDimitry Andric // Unconditional branches are selected by tablegen-generated code. 16370b57cec5SDimitry Andric // Handle a conditional branch. 16380b57cec5SDimitry Andric const BranchInst *BI = cast<BranchInst>(I); 16390b57cec5SDimitry Andric MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 16400b57cec5SDimitry Andric MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 16410b57cec5SDimitry Andric 16420b57cec5SDimitry Andric // Fold the common case of a conditional branch with a comparison 16430b57cec5SDimitry Andric // in the same block (values defined on other blocks may not have 16440b57cec5SDimitry Andric // initialized registers). 16450b57cec5SDimitry Andric X86::CondCode CC; 16460b57cec5SDimitry Andric if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 16470b57cec5SDimitry Andric if (CI->hasOneUse() && CI->getParent() == I->getParent()) { 16480b57cec5SDimitry Andric EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType()); 16490b57cec5SDimitry Andric 16500b57cec5SDimitry Andric // Try to optimize or fold the cmp. 16510b57cec5SDimitry Andric CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 16520b57cec5SDimitry Andric switch (Predicate) { 16530b57cec5SDimitry Andric default: break; 1654bdd1243dSDimitry Andric case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, MIMD.getDL()); return true; 1655bdd1243dSDimitry Andric case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, MIMD.getDL()); return true; 16560b57cec5SDimitry Andric } 16570b57cec5SDimitry Andric 16580b57cec5SDimitry Andric const Value *CmpLHS = CI->getOperand(0); 16590b57cec5SDimitry Andric const Value *CmpRHS = CI->getOperand(1); 16600b57cec5SDimitry Andric 16610b57cec5SDimitry Andric // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 16620b57cec5SDimitry Andric // 0.0. 16630b57cec5SDimitry Andric // We don't have to materialize a zero constant for this case and can just 16640b57cec5SDimitry Andric // use %x again on the RHS. 16650b57cec5SDimitry Andric if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 16660b57cec5SDimitry Andric const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); 16670b57cec5SDimitry Andric if (CmpRHSC && CmpRHSC->isNullValue()) 16680b57cec5SDimitry Andric CmpRHS = CmpLHS; 16690b57cec5SDimitry Andric } 16700b57cec5SDimitry Andric 16710b57cec5SDimitry Andric // Try to take advantage of fallthrough opportunities. 16720b57cec5SDimitry Andric if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { 16730b57cec5SDimitry Andric std::swap(TrueMBB, FalseMBB); 16740b57cec5SDimitry Andric Predicate = CmpInst::getInversePredicate(Predicate); 16750b57cec5SDimitry Andric } 16760b57cec5SDimitry Andric 16770b57cec5SDimitry Andric // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition 16780b57cec5SDimitry Andric // code check. Instead two branch instructions are required to check all 16790b57cec5SDimitry Andric // the flags. First we change the predicate to a supported condition code, 16800b57cec5SDimitry Andric // which will be the first branch. Later one we will emit the second 16810b57cec5SDimitry Andric // branch. 16820b57cec5SDimitry Andric bool NeedExtraBranch = false; 16830b57cec5SDimitry Andric switch (Predicate) { 16840b57cec5SDimitry Andric default: break; 16850b57cec5SDimitry Andric case CmpInst::FCMP_OEQ: 16860b57cec5SDimitry Andric std::swap(TrueMBB, FalseMBB); 1687bdd1243dSDimitry Andric [[fallthrough]]; 16880b57cec5SDimitry Andric case CmpInst::FCMP_UNE: 16890b57cec5SDimitry Andric NeedExtraBranch = true; 16900b57cec5SDimitry Andric Predicate = CmpInst::FCMP_ONE; 16910b57cec5SDimitry Andric break; 16920b57cec5SDimitry Andric } 16930b57cec5SDimitry Andric 16940b57cec5SDimitry Andric bool SwapArgs; 16950b57cec5SDimitry Andric std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); 16960b57cec5SDimitry Andric assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 16970b57cec5SDimitry Andric 16980b57cec5SDimitry Andric if (SwapArgs) 16990b57cec5SDimitry Andric std::swap(CmpLHS, CmpRHS); 17000b57cec5SDimitry Andric 17010b57cec5SDimitry Andric // Emit a compare of the LHS and RHS, setting the flags. 17020b57cec5SDimitry Andric if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc())) 17030b57cec5SDimitry Andric return false; 17040b57cec5SDimitry Andric 1705bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1)) 17060b57cec5SDimitry Andric .addMBB(TrueMBB).addImm(CC); 17070b57cec5SDimitry Andric 17080b57cec5SDimitry Andric // X86 requires a second branch to handle UNE (and OEQ, which is mapped 17090b57cec5SDimitry Andric // to UNE above). 17100b57cec5SDimitry Andric if (NeedExtraBranch) { 1711bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1)) 17120b57cec5SDimitry Andric .addMBB(TrueMBB).addImm(X86::COND_P); 17130b57cec5SDimitry Andric } 17140b57cec5SDimitry Andric 17150b57cec5SDimitry Andric finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); 17160b57cec5SDimitry Andric return true; 17170b57cec5SDimitry Andric } 17180b57cec5SDimitry Andric } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 17190b57cec5SDimitry Andric // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which 17200b57cec5SDimitry Andric // typically happen for _Bool and C++ bools. 17210b57cec5SDimitry Andric MVT SourceVT; 17220b57cec5SDimitry Andric if (TI->hasOneUse() && TI->getParent() == I->getParent() && 17230b57cec5SDimitry Andric isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) { 17240b57cec5SDimitry Andric unsigned TestOpc = 0; 17250b57cec5SDimitry Andric switch (SourceVT.SimpleTy) { 17260b57cec5SDimitry Andric default: break; 17270b57cec5SDimitry Andric case MVT::i8: TestOpc = X86::TEST8ri; break; 17280b57cec5SDimitry Andric case MVT::i16: TestOpc = X86::TEST16ri; break; 17290b57cec5SDimitry Andric case MVT::i32: TestOpc = X86::TEST32ri; break; 17300b57cec5SDimitry Andric case MVT::i64: TestOpc = X86::TEST64ri32; break; 17310b57cec5SDimitry Andric } 17320b57cec5SDimitry Andric if (TestOpc) { 17335ffd83dbSDimitry Andric Register OpReg = getRegForValue(TI->getOperand(0)); 17340b57cec5SDimitry Andric if (OpReg == 0) return false; 17350b57cec5SDimitry Andric 1736bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TestOpc)) 17370b57cec5SDimitry Andric .addReg(OpReg).addImm(1); 17380b57cec5SDimitry Andric 17390b57cec5SDimitry Andric unsigned JmpCond = X86::COND_NE; 17400b57cec5SDimitry Andric if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { 17410b57cec5SDimitry Andric std::swap(TrueMBB, FalseMBB); 17420b57cec5SDimitry Andric JmpCond = X86::COND_E; 17430b57cec5SDimitry Andric } 17440b57cec5SDimitry Andric 1745bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1)) 17460b57cec5SDimitry Andric .addMBB(TrueMBB).addImm(JmpCond); 17470b57cec5SDimitry Andric 17480b57cec5SDimitry Andric finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); 17490b57cec5SDimitry Andric return true; 17500b57cec5SDimitry Andric } 17510b57cec5SDimitry Andric } 17520b57cec5SDimitry Andric } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) { 17530b57cec5SDimitry Andric // Fake request the condition, otherwise the intrinsic might be completely 17540b57cec5SDimitry Andric // optimized away. 17555ffd83dbSDimitry Andric Register TmpReg = getRegForValue(BI->getCondition()); 17560b57cec5SDimitry Andric if (TmpReg == 0) 17570b57cec5SDimitry Andric return false; 17580b57cec5SDimitry Andric 1759bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1)) 17600b57cec5SDimitry Andric .addMBB(TrueMBB).addImm(CC); 17610b57cec5SDimitry Andric finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); 17620b57cec5SDimitry Andric return true; 17630b57cec5SDimitry Andric } 17640b57cec5SDimitry Andric 17650b57cec5SDimitry Andric // Otherwise do a clumsy setcc and re-test it. 17660b57cec5SDimitry Andric // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used 17670b57cec5SDimitry Andric // in an explicit cast, so make sure to handle that correctly. 17685ffd83dbSDimitry Andric Register OpReg = getRegForValue(BI->getCondition()); 17690b57cec5SDimitry Andric if (OpReg == 0) return false; 17700b57cec5SDimitry Andric 17710b57cec5SDimitry Andric // In case OpReg is a K register, COPY to a GPR 17720b57cec5SDimitry Andric if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) { 17730b57cec5SDimitry Andric unsigned KOpReg = OpReg; 17740b57cec5SDimitry Andric OpReg = createResultReg(&X86::GR32RegClass); 1775bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 17760b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), OpReg) 17770b57cec5SDimitry Andric .addReg(KOpReg); 1778fe6060f1SDimitry Andric OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, X86::sub_8bit); 17790b57cec5SDimitry Andric } 1780bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri)) 17810b57cec5SDimitry Andric .addReg(OpReg) 17820b57cec5SDimitry Andric .addImm(1); 1783bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1)) 17840b57cec5SDimitry Andric .addMBB(TrueMBB).addImm(X86::COND_NE); 17850b57cec5SDimitry Andric finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); 17860b57cec5SDimitry Andric return true; 17870b57cec5SDimitry Andric } 17880b57cec5SDimitry Andric 17890b57cec5SDimitry Andric bool X86FastISel::X86SelectShift(const Instruction *I) { 17900b57cec5SDimitry Andric unsigned CReg = 0, OpReg = 0; 17910b57cec5SDimitry Andric const TargetRegisterClass *RC = nullptr; 17920b57cec5SDimitry Andric if (I->getType()->isIntegerTy(8)) { 17930b57cec5SDimitry Andric CReg = X86::CL; 17940b57cec5SDimitry Andric RC = &X86::GR8RegClass; 17950b57cec5SDimitry Andric switch (I->getOpcode()) { 17960b57cec5SDimitry Andric case Instruction::LShr: OpReg = X86::SHR8rCL; break; 17970b57cec5SDimitry Andric case Instruction::AShr: OpReg = X86::SAR8rCL; break; 17980b57cec5SDimitry Andric case Instruction::Shl: OpReg = X86::SHL8rCL; break; 17990b57cec5SDimitry Andric default: return false; 18000b57cec5SDimitry Andric } 18010b57cec5SDimitry Andric } else if (I->getType()->isIntegerTy(16)) { 18020b57cec5SDimitry Andric CReg = X86::CX; 18030b57cec5SDimitry Andric RC = &X86::GR16RegClass; 18040b57cec5SDimitry Andric switch (I->getOpcode()) { 18050b57cec5SDimitry Andric default: llvm_unreachable("Unexpected shift opcode"); 18060b57cec5SDimitry Andric case Instruction::LShr: OpReg = X86::SHR16rCL; break; 18070b57cec5SDimitry Andric case Instruction::AShr: OpReg = X86::SAR16rCL; break; 18080b57cec5SDimitry Andric case Instruction::Shl: OpReg = X86::SHL16rCL; break; 18090b57cec5SDimitry Andric } 18100b57cec5SDimitry Andric } else if (I->getType()->isIntegerTy(32)) { 18110b57cec5SDimitry Andric CReg = X86::ECX; 18120b57cec5SDimitry Andric RC = &X86::GR32RegClass; 18130b57cec5SDimitry Andric switch (I->getOpcode()) { 18140b57cec5SDimitry Andric default: llvm_unreachable("Unexpected shift opcode"); 18150b57cec5SDimitry Andric case Instruction::LShr: OpReg = X86::SHR32rCL; break; 18160b57cec5SDimitry Andric case Instruction::AShr: OpReg = X86::SAR32rCL; break; 18170b57cec5SDimitry Andric case Instruction::Shl: OpReg = X86::SHL32rCL; break; 18180b57cec5SDimitry Andric } 18190b57cec5SDimitry Andric } else if (I->getType()->isIntegerTy(64)) { 18200b57cec5SDimitry Andric CReg = X86::RCX; 18210b57cec5SDimitry Andric RC = &X86::GR64RegClass; 18220b57cec5SDimitry Andric switch (I->getOpcode()) { 18230b57cec5SDimitry Andric default: llvm_unreachable("Unexpected shift opcode"); 18240b57cec5SDimitry Andric case Instruction::LShr: OpReg = X86::SHR64rCL; break; 18250b57cec5SDimitry Andric case Instruction::AShr: OpReg = X86::SAR64rCL; break; 18260b57cec5SDimitry Andric case Instruction::Shl: OpReg = X86::SHL64rCL; break; 18270b57cec5SDimitry Andric } 18280b57cec5SDimitry Andric } else { 18290b57cec5SDimitry Andric return false; 18300b57cec5SDimitry Andric } 18310b57cec5SDimitry Andric 18320b57cec5SDimitry Andric MVT VT; 18330b57cec5SDimitry Andric if (!isTypeLegal(I->getType(), VT)) 18340b57cec5SDimitry Andric return false; 18350b57cec5SDimitry Andric 18365ffd83dbSDimitry Andric Register Op0Reg = getRegForValue(I->getOperand(0)); 18370b57cec5SDimitry Andric if (Op0Reg == 0) return false; 18380b57cec5SDimitry Andric 18395ffd83dbSDimitry Andric Register Op1Reg = getRegForValue(I->getOperand(1)); 18400b57cec5SDimitry Andric if (Op1Reg == 0) return false; 1841bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 18420b57cec5SDimitry Andric CReg).addReg(Op1Reg); 18430b57cec5SDimitry Andric 18440b57cec5SDimitry Andric // The shift instruction uses X86::CL. If we defined a super-register 18450b57cec5SDimitry Andric // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. 18460b57cec5SDimitry Andric if (CReg != X86::CL) 1847bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 18480b57cec5SDimitry Andric TII.get(TargetOpcode::KILL), X86::CL) 18490b57cec5SDimitry Andric .addReg(CReg, RegState::Kill); 18500b57cec5SDimitry Andric 18515ffd83dbSDimitry Andric Register ResultReg = createResultReg(RC); 1852bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(OpReg), ResultReg) 18530b57cec5SDimitry Andric .addReg(Op0Reg); 18540b57cec5SDimitry Andric updateValueMap(I, ResultReg); 18550b57cec5SDimitry Andric return true; 18560b57cec5SDimitry Andric } 18570b57cec5SDimitry Andric 18580b57cec5SDimitry Andric bool X86FastISel::X86SelectDivRem(const Instruction *I) { 18590b57cec5SDimitry Andric const static unsigned NumTypes = 4; // i8, i16, i32, i64 18600b57cec5SDimitry Andric const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem 18610b57cec5SDimitry Andric const static bool S = true; // IsSigned 18620b57cec5SDimitry Andric const static bool U = false; // !IsSigned 18630b57cec5SDimitry Andric const static unsigned Copy = TargetOpcode::COPY; 18640b57cec5SDimitry Andric // For the X86 DIV/IDIV instruction, in most cases the dividend 18650b57cec5SDimitry Andric // (numerator) must be in a specific register pair highreg:lowreg, 18660b57cec5SDimitry Andric // producing the quotient in lowreg and the remainder in highreg. 18670b57cec5SDimitry Andric // For most data types, to set up the instruction, the dividend is 18680b57cec5SDimitry Andric // copied into lowreg, and lowreg is sign-extended or zero-extended 18690b57cec5SDimitry Andric // into highreg. The exception is i8, where the dividend is defined 18700b57cec5SDimitry Andric // as a single register rather than a register pair, and we 18710b57cec5SDimitry Andric // therefore directly sign-extend or zero-extend the dividend into 18720b57cec5SDimitry Andric // lowreg, instead of copying, and ignore the highreg. 18730b57cec5SDimitry Andric const static struct DivRemEntry { 18740b57cec5SDimitry Andric // The following portion depends only on the data type. 18750b57cec5SDimitry Andric const TargetRegisterClass *RC; 18760b57cec5SDimitry Andric unsigned LowInReg; // low part of the register pair 18770b57cec5SDimitry Andric unsigned HighInReg; // high part of the register pair 18780b57cec5SDimitry Andric // The following portion depends on both the data type and the operation. 18790b57cec5SDimitry Andric struct DivRemResult { 18800b57cec5SDimitry Andric unsigned OpDivRem; // The specific DIV/IDIV opcode to use. 18810b57cec5SDimitry Andric unsigned OpSignExtend; // Opcode for sign-extending lowreg into 18820b57cec5SDimitry Andric // highreg, or copying a zero into highreg. 18830b57cec5SDimitry Andric unsigned OpCopy; // Opcode for copying dividend into lowreg, or 18840b57cec5SDimitry Andric // zero/sign-extending into lowreg for i8. 18850b57cec5SDimitry Andric unsigned DivRemResultReg; // Register containing the desired result. 18860b57cec5SDimitry Andric bool IsOpSigned; // Whether to use signed or unsigned form. 18870b57cec5SDimitry Andric } ResultTable[NumOps]; 18880b57cec5SDimitry Andric } OpTable[NumTypes] = { 18890b57cec5SDimitry Andric { &X86::GR8RegClass, X86::AX, 0, { 18900b57cec5SDimitry Andric { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv 18910b57cec5SDimitry Andric { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem 18920b57cec5SDimitry Andric { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv 18930b57cec5SDimitry Andric { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem 18940b57cec5SDimitry Andric } 18950b57cec5SDimitry Andric }, // i8 18960b57cec5SDimitry Andric { &X86::GR16RegClass, X86::AX, X86::DX, { 18970b57cec5SDimitry Andric { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv 18980b57cec5SDimitry Andric { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem 18990b57cec5SDimitry Andric { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv 19000b57cec5SDimitry Andric { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem 19010b57cec5SDimitry Andric } 19020b57cec5SDimitry Andric }, // i16 19030b57cec5SDimitry Andric { &X86::GR32RegClass, X86::EAX, X86::EDX, { 19040b57cec5SDimitry Andric { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv 19050b57cec5SDimitry Andric { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem 19060b57cec5SDimitry Andric { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv 19070b57cec5SDimitry Andric { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem 19080b57cec5SDimitry Andric } 19090b57cec5SDimitry Andric }, // i32 19100b57cec5SDimitry Andric { &X86::GR64RegClass, X86::RAX, X86::RDX, { 19110b57cec5SDimitry Andric { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv 19120b57cec5SDimitry Andric { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem 19130b57cec5SDimitry Andric { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv 19140b57cec5SDimitry Andric { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem 19150b57cec5SDimitry Andric } 19160b57cec5SDimitry Andric }, // i64 19170b57cec5SDimitry Andric }; 19180b57cec5SDimitry Andric 19190b57cec5SDimitry Andric MVT VT; 19200b57cec5SDimitry Andric if (!isTypeLegal(I->getType(), VT)) 19210b57cec5SDimitry Andric return false; 19220b57cec5SDimitry Andric 19230b57cec5SDimitry Andric unsigned TypeIndex, OpIndex; 19240b57cec5SDimitry Andric switch (VT.SimpleTy) { 19250b57cec5SDimitry Andric default: return false; 19260b57cec5SDimitry Andric case MVT::i8: TypeIndex = 0; break; 19270b57cec5SDimitry Andric case MVT::i16: TypeIndex = 1; break; 19280b57cec5SDimitry Andric case MVT::i32: TypeIndex = 2; break; 19290b57cec5SDimitry Andric case MVT::i64: TypeIndex = 3; 19300b57cec5SDimitry Andric if (!Subtarget->is64Bit()) 19310b57cec5SDimitry Andric return false; 19320b57cec5SDimitry Andric break; 19330b57cec5SDimitry Andric } 19340b57cec5SDimitry Andric 19350b57cec5SDimitry Andric switch (I->getOpcode()) { 19360b57cec5SDimitry Andric default: llvm_unreachable("Unexpected div/rem opcode"); 19370b57cec5SDimitry Andric case Instruction::SDiv: OpIndex = 0; break; 19380b57cec5SDimitry Andric case Instruction::SRem: OpIndex = 1; break; 19390b57cec5SDimitry Andric case Instruction::UDiv: OpIndex = 2; break; 19400b57cec5SDimitry Andric case Instruction::URem: OpIndex = 3; break; 19410b57cec5SDimitry Andric } 19420b57cec5SDimitry Andric 19430b57cec5SDimitry Andric const DivRemEntry &TypeEntry = OpTable[TypeIndex]; 19440b57cec5SDimitry Andric const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; 19455ffd83dbSDimitry Andric Register Op0Reg = getRegForValue(I->getOperand(0)); 19460b57cec5SDimitry Andric if (Op0Reg == 0) 19470b57cec5SDimitry Andric return false; 19485ffd83dbSDimitry Andric Register Op1Reg = getRegForValue(I->getOperand(1)); 19490b57cec5SDimitry Andric if (Op1Reg == 0) 19500b57cec5SDimitry Andric return false; 19510b57cec5SDimitry Andric 19520b57cec5SDimitry Andric // Move op0 into low-order input register. 1953bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 19540b57cec5SDimitry Andric TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg); 19550b57cec5SDimitry Andric // Zero-extend or sign-extend into high-order input register. 19560b57cec5SDimitry Andric if (OpEntry.OpSignExtend) { 19570b57cec5SDimitry Andric if (OpEntry.IsOpSigned) 1958bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 19590b57cec5SDimitry Andric TII.get(OpEntry.OpSignExtend)); 19600b57cec5SDimitry Andric else { 19615ffd83dbSDimitry Andric Register Zero32 = createResultReg(&X86::GR32RegClass); 1962bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 19630b57cec5SDimitry Andric TII.get(X86::MOV32r0), Zero32); 19640b57cec5SDimitry Andric 19650b57cec5SDimitry Andric // Copy the zero into the appropriate sub/super/identical physical 19660b57cec5SDimitry Andric // register. Unfortunately the operations needed are not uniform enough 19670b57cec5SDimitry Andric // to fit neatly into the table above. 19680b57cec5SDimitry Andric if (VT == MVT::i16) { 1969bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 19700b57cec5SDimitry Andric TII.get(Copy), TypeEntry.HighInReg) 19710b57cec5SDimitry Andric .addReg(Zero32, 0, X86::sub_16bit); 19720b57cec5SDimitry Andric } else if (VT == MVT::i32) { 1973bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 19740b57cec5SDimitry Andric TII.get(Copy), TypeEntry.HighInReg) 19750b57cec5SDimitry Andric .addReg(Zero32); 19760b57cec5SDimitry Andric } else if (VT == MVT::i64) { 1977bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 19780b57cec5SDimitry Andric TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) 19790b57cec5SDimitry Andric .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); 19800b57cec5SDimitry Andric } 19810b57cec5SDimitry Andric } 19820b57cec5SDimitry Andric } 19830b57cec5SDimitry Andric // Generate the DIV/IDIV instruction. 1984bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 19850b57cec5SDimitry Andric TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); 19860b57cec5SDimitry Andric // For i8 remainder, we can't reference ah directly, as we'll end 19870b57cec5SDimitry Andric // up with bogus copies like %r9b = COPY %ah. Reference ax 19880b57cec5SDimitry Andric // instead to prevent ah references in a rex instruction. 19890b57cec5SDimitry Andric // 19900b57cec5SDimitry Andric // The current assumption of the fast register allocator is that isel 19910b57cec5SDimitry Andric // won't generate explicit references to the GR8_NOREX registers. If 19920b57cec5SDimitry Andric // the allocator and/or the backend get enhanced to be more robust in 19930b57cec5SDimitry Andric // that regard, this can be, and should be, removed. 19940b57cec5SDimitry Andric unsigned ResultReg = 0; 19950b57cec5SDimitry Andric if ((I->getOpcode() == Instruction::SRem || 19960b57cec5SDimitry Andric I->getOpcode() == Instruction::URem) && 19970b57cec5SDimitry Andric OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) { 19985ffd83dbSDimitry Andric Register SourceSuperReg = createResultReg(&X86::GR16RegClass); 19995ffd83dbSDimitry Andric Register ResultSuperReg = createResultReg(&X86::GR16RegClass); 2000bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 20010b57cec5SDimitry Andric TII.get(Copy), SourceSuperReg).addReg(X86::AX); 20020b57cec5SDimitry Andric 20030b57cec5SDimitry Andric // Shift AX right by 8 bits instead of using AH. 2004bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SHR16ri), 20050b57cec5SDimitry Andric ResultSuperReg).addReg(SourceSuperReg).addImm(8); 20060b57cec5SDimitry Andric 20070b57cec5SDimitry Andric // Now reference the 8-bit subreg of the result. 20080b57cec5SDimitry Andric ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg, 2009fe6060f1SDimitry Andric X86::sub_8bit); 20100b57cec5SDimitry Andric } 20110b57cec5SDimitry Andric // Copy the result out of the physreg if we haven't already. 20120b57cec5SDimitry Andric if (!ResultReg) { 20130b57cec5SDimitry Andric ResultReg = createResultReg(TypeEntry.RC); 2014bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Copy), ResultReg) 20150b57cec5SDimitry Andric .addReg(OpEntry.DivRemResultReg); 20160b57cec5SDimitry Andric } 20170b57cec5SDimitry Andric updateValueMap(I, ResultReg); 20180b57cec5SDimitry Andric 20190b57cec5SDimitry Andric return true; 20200b57cec5SDimitry Andric } 20210b57cec5SDimitry Andric 20220b57cec5SDimitry Andric /// Emit a conditional move instruction (if the are supported) to lower 20230b57cec5SDimitry Andric /// the select. 20240b57cec5SDimitry Andric bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { 20250b57cec5SDimitry Andric // Check if the subtarget supports these instructions. 202681ad6265SDimitry Andric if (!Subtarget->canUseCMOV()) 20270b57cec5SDimitry Andric return false; 20280b57cec5SDimitry Andric 20290b57cec5SDimitry Andric // FIXME: Add support for i8. 20300b57cec5SDimitry Andric if (RetVT < MVT::i16 || RetVT > MVT::i64) 20310b57cec5SDimitry Andric return false; 20320b57cec5SDimitry Andric 20330b57cec5SDimitry Andric const Value *Cond = I->getOperand(0); 20340b57cec5SDimitry Andric const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 20350b57cec5SDimitry Andric bool NeedTest = true; 20360b57cec5SDimitry Andric X86::CondCode CC = X86::COND_NE; 20370b57cec5SDimitry Andric 20380b57cec5SDimitry Andric // Optimize conditions coming from a compare if both instructions are in the 20390b57cec5SDimitry Andric // same basic block (values defined in other basic blocks may not have 20400b57cec5SDimitry Andric // initialized registers). 20410b57cec5SDimitry Andric const auto *CI = dyn_cast<CmpInst>(Cond); 20420b57cec5SDimitry Andric if (CI && (CI->getParent() == I->getParent())) { 20430b57cec5SDimitry Andric CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 20440b57cec5SDimitry Andric 20450b57cec5SDimitry Andric // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 20460b57cec5SDimitry Andric static const uint16_t SETFOpcTable[2][3] = { 20470b57cec5SDimitry Andric { X86::COND_NP, X86::COND_E, X86::TEST8rr }, 20480b57cec5SDimitry Andric { X86::COND_P, X86::COND_NE, X86::OR8rr } 20490b57cec5SDimitry Andric }; 20500b57cec5SDimitry Andric const uint16_t *SETFOpc = nullptr; 20510b57cec5SDimitry Andric switch (Predicate) { 20520b57cec5SDimitry Andric default: break; 20530b57cec5SDimitry Andric case CmpInst::FCMP_OEQ: 20540b57cec5SDimitry Andric SETFOpc = &SETFOpcTable[0][0]; 20550b57cec5SDimitry Andric Predicate = CmpInst::ICMP_NE; 20560b57cec5SDimitry Andric break; 20570b57cec5SDimitry Andric case CmpInst::FCMP_UNE: 20580b57cec5SDimitry Andric SETFOpc = &SETFOpcTable[1][0]; 20590b57cec5SDimitry Andric Predicate = CmpInst::ICMP_NE; 20600b57cec5SDimitry Andric break; 20610b57cec5SDimitry Andric } 20620b57cec5SDimitry Andric 20630b57cec5SDimitry Andric bool NeedSwap; 20640b57cec5SDimitry Andric std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate); 20650b57cec5SDimitry Andric assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 20660b57cec5SDimitry Andric 20670b57cec5SDimitry Andric const Value *CmpLHS = CI->getOperand(0); 20680b57cec5SDimitry Andric const Value *CmpRHS = CI->getOperand(1); 20690b57cec5SDimitry Andric if (NeedSwap) 20700b57cec5SDimitry Andric std::swap(CmpLHS, CmpRHS); 20710b57cec5SDimitry Andric 20720b57cec5SDimitry Andric EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType()); 20730b57cec5SDimitry Andric // Emit a compare of the LHS and RHS, setting the flags. 20740b57cec5SDimitry Andric if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) 20750b57cec5SDimitry Andric return false; 20760b57cec5SDimitry Andric 20770b57cec5SDimitry Andric if (SETFOpc) { 20785ffd83dbSDimitry Andric Register FlagReg1 = createResultReg(&X86::GR8RegClass); 20795ffd83dbSDimitry Andric Register FlagReg2 = createResultReg(&X86::GR8RegClass); 2080bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr), 20810b57cec5SDimitry Andric FlagReg1).addImm(SETFOpc[0]); 2082bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr), 20830b57cec5SDimitry Andric FlagReg2).addImm(SETFOpc[1]); 20840b57cec5SDimitry Andric auto const &II = TII.get(SETFOpc[2]); 20850b57cec5SDimitry Andric if (II.getNumDefs()) { 20865ffd83dbSDimitry Andric Register TmpReg = createResultReg(&X86::GR8RegClass); 2087bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, TmpReg) 20880b57cec5SDimitry Andric .addReg(FlagReg2).addReg(FlagReg1); 20890b57cec5SDimitry Andric } else { 2090bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 20910b57cec5SDimitry Andric .addReg(FlagReg2).addReg(FlagReg1); 20920b57cec5SDimitry Andric } 20930b57cec5SDimitry Andric } 20940b57cec5SDimitry Andric NeedTest = false; 20950b57cec5SDimitry Andric } else if (foldX86XALUIntrinsic(CC, I, Cond)) { 20960b57cec5SDimitry Andric // Fake request the condition, otherwise the intrinsic might be completely 20970b57cec5SDimitry Andric // optimized away. 20985ffd83dbSDimitry Andric Register TmpReg = getRegForValue(Cond); 20990b57cec5SDimitry Andric if (TmpReg == 0) 21000b57cec5SDimitry Andric return false; 21010b57cec5SDimitry Andric 21020b57cec5SDimitry Andric NeedTest = false; 21030b57cec5SDimitry Andric } 21040b57cec5SDimitry Andric 21050b57cec5SDimitry Andric if (NeedTest) { 21060b57cec5SDimitry Andric // Selects operate on i1, however, CondReg is 8 bits width and may contain 21070b57cec5SDimitry Andric // garbage. Indeed, only the less significant bit is supposed to be 21080b57cec5SDimitry Andric // accurate. If we read more than the lsb, we may see non-zero values 21090b57cec5SDimitry Andric // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for 21100b57cec5SDimitry Andric // the select. This is achieved by performing TEST against 1. 21115ffd83dbSDimitry Andric Register CondReg = getRegForValue(Cond); 21120b57cec5SDimitry Andric if (CondReg == 0) 21130b57cec5SDimitry Andric return false; 21140b57cec5SDimitry Andric 21150b57cec5SDimitry Andric // In case OpReg is a K register, COPY to a GPR 21160b57cec5SDimitry Andric if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { 21170b57cec5SDimitry Andric unsigned KCondReg = CondReg; 21180b57cec5SDimitry Andric CondReg = createResultReg(&X86::GR32RegClass); 2119bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 21200b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), CondReg) 2121fe6060f1SDimitry Andric .addReg(KCondReg); 2122fe6060f1SDimitry Andric CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit); 21230b57cec5SDimitry Andric } 2124bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri)) 2125fe6060f1SDimitry Andric .addReg(CondReg) 21260b57cec5SDimitry Andric .addImm(1); 21270b57cec5SDimitry Andric } 21280b57cec5SDimitry Andric 21290b57cec5SDimitry Andric const Value *LHS = I->getOperand(1); 21300b57cec5SDimitry Andric const Value *RHS = I->getOperand(2); 21310b57cec5SDimitry Andric 21325ffd83dbSDimitry Andric Register RHSReg = getRegForValue(RHS); 21335ffd83dbSDimitry Andric Register LHSReg = getRegForValue(LHS); 21340b57cec5SDimitry Andric if (!LHSReg || !RHSReg) 21350b57cec5SDimitry Andric return false; 21360b57cec5SDimitry Andric 21370b57cec5SDimitry Andric const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo(); 21380fca6ea1SDimitry Andric unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC) / 8, false, 21390fca6ea1SDimitry Andric Subtarget->hasNDD()); 2140fe6060f1SDimitry Andric Register ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC); 21410b57cec5SDimitry Andric updateValueMap(I, ResultReg); 21420b57cec5SDimitry Andric return true; 21430b57cec5SDimitry Andric } 21440b57cec5SDimitry Andric 21450b57cec5SDimitry Andric /// Emit SSE or AVX instructions to lower the select. 21460b57cec5SDimitry Andric /// 21470b57cec5SDimitry Andric /// Try to use SSE1/SSE2 instructions to simulate a select without branches. 21480b57cec5SDimitry Andric /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary 21490b57cec5SDimitry Andric /// SSE instructions are available. If AVX is available, try to use a VBLENDV. 21500b57cec5SDimitry Andric bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { 21510b57cec5SDimitry Andric // Optimize conditions coming from a compare if both instructions are in the 21520b57cec5SDimitry Andric // same basic block (values defined in other basic blocks may not have 21530b57cec5SDimitry Andric // initialized registers). 21540b57cec5SDimitry Andric const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0)); 21550b57cec5SDimitry Andric if (!CI || (CI->getParent() != I->getParent())) 21560b57cec5SDimitry Andric return false; 21570b57cec5SDimitry Andric 21580b57cec5SDimitry Andric if (I->getType() != CI->getOperand(0)->getType() || 21590b57cec5SDimitry Andric !((Subtarget->hasSSE1() && RetVT == MVT::f32) || 21600b57cec5SDimitry Andric (Subtarget->hasSSE2() && RetVT == MVT::f64))) 21610b57cec5SDimitry Andric return false; 21620b57cec5SDimitry Andric 21630b57cec5SDimitry Andric const Value *CmpLHS = CI->getOperand(0); 21640b57cec5SDimitry Andric const Value *CmpRHS = CI->getOperand(1); 21650b57cec5SDimitry Andric CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 21660b57cec5SDimitry Andric 21670b57cec5SDimitry Andric // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. 21680b57cec5SDimitry Andric // We don't have to materialize a zero constant for this case and can just use 21690b57cec5SDimitry Andric // %x again on the RHS. 21700b57cec5SDimitry Andric if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 21710b57cec5SDimitry Andric const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); 21720b57cec5SDimitry Andric if (CmpRHSC && CmpRHSC->isNullValue()) 21730b57cec5SDimitry Andric CmpRHS = CmpLHS; 21740b57cec5SDimitry Andric } 21750b57cec5SDimitry Andric 21760b57cec5SDimitry Andric unsigned CC; 21770b57cec5SDimitry Andric bool NeedSwap; 21780b57cec5SDimitry Andric std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); 21790b57cec5SDimitry Andric if (CC > 7 && !Subtarget->hasAVX()) 21800b57cec5SDimitry Andric return false; 21810b57cec5SDimitry Andric 21820b57cec5SDimitry Andric if (NeedSwap) 21830b57cec5SDimitry Andric std::swap(CmpLHS, CmpRHS); 21840b57cec5SDimitry Andric 21850b57cec5SDimitry Andric const Value *LHS = I->getOperand(1); 21860b57cec5SDimitry Andric const Value *RHS = I->getOperand(2); 21870b57cec5SDimitry Andric 21885ffd83dbSDimitry Andric Register LHSReg = getRegForValue(LHS); 21895ffd83dbSDimitry Andric Register RHSReg = getRegForValue(RHS); 21905ffd83dbSDimitry Andric Register CmpLHSReg = getRegForValue(CmpLHS); 21915ffd83dbSDimitry Andric Register CmpRHSReg = getRegForValue(CmpRHS); 21925ffd83dbSDimitry Andric if (!LHSReg || !RHSReg || !CmpLHSReg || !CmpRHSReg) 21930b57cec5SDimitry Andric return false; 21940b57cec5SDimitry Andric 21950b57cec5SDimitry Andric const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 21960b57cec5SDimitry Andric unsigned ResultReg; 21970b57cec5SDimitry Andric 21980b57cec5SDimitry Andric if (Subtarget->hasAVX512()) { 21990b57cec5SDimitry Andric // If we have AVX512 we can use a mask compare and masked movss/sd. 22000b57cec5SDimitry Andric const TargetRegisterClass *VR128X = &X86::VR128XRegClass; 22010b57cec5SDimitry Andric const TargetRegisterClass *VK1 = &X86::VK1RegClass; 22020b57cec5SDimitry Andric 22030b57cec5SDimitry Andric unsigned CmpOpcode = 22040fca6ea1SDimitry Andric (RetVT == MVT::f32) ? X86::VCMPSSZrri : X86::VCMPSDZrri; 2205fe6060f1SDimitry Andric Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpRHSReg, 2206fe6060f1SDimitry Andric CC); 22070b57cec5SDimitry Andric 22080b57cec5SDimitry Andric // Need an IMPLICIT_DEF for the input that is used to generate the upper 22090b57cec5SDimitry Andric // bits of the result register since its not based on any of the inputs. 22105ffd83dbSDimitry Andric Register ImplicitDefReg = createResultReg(VR128X); 2211bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 22120b57cec5SDimitry Andric TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); 22130b57cec5SDimitry Andric 22140b57cec5SDimitry Andric // Place RHSReg is the passthru of the masked movss/sd operation and put 22150b57cec5SDimitry Andric // LHS in the input. The mask input comes from the compare. 22160b57cec5SDimitry Andric unsigned MovOpcode = 22170b57cec5SDimitry Andric (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk; 2218fe6060f1SDimitry Andric unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, CmpReg, 2219fe6060f1SDimitry Andric ImplicitDefReg, LHSReg); 22200b57cec5SDimitry Andric 22210b57cec5SDimitry Andric ResultReg = createResultReg(RC); 2222bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 22230b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg); 22240b57cec5SDimitry Andric 22250b57cec5SDimitry Andric } else if (Subtarget->hasAVX()) { 22260b57cec5SDimitry Andric const TargetRegisterClass *VR128 = &X86::VR128RegClass; 22270b57cec5SDimitry Andric 22280b57cec5SDimitry Andric // If we have AVX, create 1 blendv instead of 3 logic instructions. 22290b57cec5SDimitry Andric // Blendv was introduced with SSE 4.1, but the 2 register form implicitly 22300b57cec5SDimitry Andric // uses XMM0 as the selection register. That may need just as many 22310b57cec5SDimitry Andric // instructions as the AND/ANDN/OR sequence due to register moves, so 22320b57cec5SDimitry Andric // don't bother. 22330b57cec5SDimitry Andric unsigned CmpOpcode = 22340fca6ea1SDimitry Andric (RetVT == MVT::f32) ? X86::VCMPSSrri : X86::VCMPSDrri; 22350b57cec5SDimitry Andric unsigned BlendOpcode = 22360fca6ea1SDimitry Andric (RetVT == MVT::f32) ? X86::VBLENDVPSrrr : X86::VBLENDVPDrrr; 22370b57cec5SDimitry Andric 2238fe6060f1SDimitry Andric Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg, 2239fe6060f1SDimitry Andric CC); 2240fe6060f1SDimitry Andric Register VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, LHSReg, 2241fe6060f1SDimitry Andric CmpReg); 22420b57cec5SDimitry Andric ResultReg = createResultReg(RC); 2243bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 22440b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg); 22450b57cec5SDimitry Andric } else { 22460b57cec5SDimitry Andric // Choose the SSE instruction sequence based on data type (float or double). 22470b57cec5SDimitry Andric static const uint16_t OpcTable[2][4] = { 22480fca6ea1SDimitry Andric { X86::CMPSSrri, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr }, 22490fca6ea1SDimitry Andric { X86::CMPSDrri, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr } 22500b57cec5SDimitry Andric }; 22510b57cec5SDimitry Andric 22520b57cec5SDimitry Andric const uint16_t *Opc = nullptr; 22530b57cec5SDimitry Andric switch (RetVT.SimpleTy) { 22540b57cec5SDimitry Andric default: return false; 22550b57cec5SDimitry Andric case MVT::f32: Opc = &OpcTable[0][0]; break; 22560b57cec5SDimitry Andric case MVT::f64: Opc = &OpcTable[1][0]; break; 22570b57cec5SDimitry Andric } 22580b57cec5SDimitry Andric 22590b57cec5SDimitry Andric const TargetRegisterClass *VR128 = &X86::VR128RegClass; 2260fe6060f1SDimitry Andric Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpRHSReg, CC); 2261fe6060f1SDimitry Andric Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, LHSReg); 2262fe6060f1SDimitry Andric Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, RHSReg); 2263fe6060f1SDimitry Andric Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, AndReg); 22640b57cec5SDimitry Andric ResultReg = createResultReg(RC); 2265bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 22660b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg); 22670b57cec5SDimitry Andric } 22680b57cec5SDimitry Andric updateValueMap(I, ResultReg); 22690b57cec5SDimitry Andric return true; 22700b57cec5SDimitry Andric } 22710b57cec5SDimitry Andric 22720b57cec5SDimitry Andric bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { 22730b57cec5SDimitry Andric // These are pseudo CMOV instructions and will be later expanded into control- 22740b57cec5SDimitry Andric // flow. 22750b57cec5SDimitry Andric unsigned Opc; 22760b57cec5SDimitry Andric switch (RetVT.SimpleTy) { 22770b57cec5SDimitry Andric default: return false; 22780b57cec5SDimitry Andric case MVT::i8: Opc = X86::CMOV_GR8; break; 22790b57cec5SDimitry Andric case MVT::i16: Opc = X86::CMOV_GR16; break; 22800b57cec5SDimitry Andric case MVT::i32: Opc = X86::CMOV_GR32; break; 228181ad6265SDimitry Andric case MVT::f16: 228281ad6265SDimitry Andric Opc = Subtarget->hasAVX512() ? X86::CMOV_FR16X : X86::CMOV_FR16; break; 228381ad6265SDimitry Andric case MVT::f32: 228481ad6265SDimitry Andric Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X : X86::CMOV_FR32; break; 228581ad6265SDimitry Andric case MVT::f64: 228681ad6265SDimitry Andric Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X : X86::CMOV_FR64; break; 22870b57cec5SDimitry Andric } 22880b57cec5SDimitry Andric 22890b57cec5SDimitry Andric const Value *Cond = I->getOperand(0); 22900b57cec5SDimitry Andric X86::CondCode CC = X86::COND_NE; 22910b57cec5SDimitry Andric 22920b57cec5SDimitry Andric // Optimize conditions coming from a compare if both instructions are in the 22930b57cec5SDimitry Andric // same basic block (values defined in other basic blocks may not have 22940b57cec5SDimitry Andric // initialized registers). 22950b57cec5SDimitry Andric const auto *CI = dyn_cast<CmpInst>(Cond); 22960b57cec5SDimitry Andric if (CI && (CI->getParent() == I->getParent())) { 22970b57cec5SDimitry Andric bool NeedSwap; 22980b57cec5SDimitry Andric std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate()); 22990b57cec5SDimitry Andric if (CC > X86::LAST_VALID_COND) 23000b57cec5SDimitry Andric return false; 23010b57cec5SDimitry Andric 23020b57cec5SDimitry Andric const Value *CmpLHS = CI->getOperand(0); 23030b57cec5SDimitry Andric const Value *CmpRHS = CI->getOperand(1); 23040b57cec5SDimitry Andric 23050b57cec5SDimitry Andric if (NeedSwap) 23060b57cec5SDimitry Andric std::swap(CmpLHS, CmpRHS); 23070b57cec5SDimitry Andric 23080b57cec5SDimitry Andric EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType()); 23090b57cec5SDimitry Andric if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) 23100b57cec5SDimitry Andric return false; 23110b57cec5SDimitry Andric } else { 23125ffd83dbSDimitry Andric Register CondReg = getRegForValue(Cond); 23130b57cec5SDimitry Andric if (CondReg == 0) 23140b57cec5SDimitry Andric return false; 23150b57cec5SDimitry Andric 23160b57cec5SDimitry Andric // In case OpReg is a K register, COPY to a GPR 23170b57cec5SDimitry Andric if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { 23180b57cec5SDimitry Andric unsigned KCondReg = CondReg; 23190b57cec5SDimitry Andric CondReg = createResultReg(&X86::GR32RegClass); 2320bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 23210b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), CondReg) 2322fe6060f1SDimitry Andric .addReg(KCondReg); 2323fe6060f1SDimitry Andric CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit); 23240b57cec5SDimitry Andric } 2325bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri)) 2326fe6060f1SDimitry Andric .addReg(CondReg) 23270b57cec5SDimitry Andric .addImm(1); 23280b57cec5SDimitry Andric } 23290b57cec5SDimitry Andric 23300b57cec5SDimitry Andric const Value *LHS = I->getOperand(1); 23310b57cec5SDimitry Andric const Value *RHS = I->getOperand(2); 23320b57cec5SDimitry Andric 23335ffd83dbSDimitry Andric Register LHSReg = getRegForValue(LHS); 23345ffd83dbSDimitry Andric Register RHSReg = getRegForValue(RHS); 23350b57cec5SDimitry Andric if (!LHSReg || !RHSReg) 23360b57cec5SDimitry Andric return false; 23370b57cec5SDimitry Andric 23380b57cec5SDimitry Andric const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 23390b57cec5SDimitry Andric 23405ffd83dbSDimitry Andric Register ResultReg = 2341fe6060f1SDimitry Andric fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC); 23420b57cec5SDimitry Andric updateValueMap(I, ResultReg); 23430b57cec5SDimitry Andric return true; 23440b57cec5SDimitry Andric } 23450b57cec5SDimitry Andric 23460b57cec5SDimitry Andric bool X86FastISel::X86SelectSelect(const Instruction *I) { 23470b57cec5SDimitry Andric MVT RetVT; 23480b57cec5SDimitry Andric if (!isTypeLegal(I->getType(), RetVT)) 23490b57cec5SDimitry Andric return false; 23500b57cec5SDimitry Andric 23510b57cec5SDimitry Andric // Check if we can fold the select. 23520b57cec5SDimitry Andric if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) { 23530b57cec5SDimitry Andric CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 23540b57cec5SDimitry Andric const Value *Opnd = nullptr; 23550b57cec5SDimitry Andric switch (Predicate) { 23560b57cec5SDimitry Andric default: break; 23570b57cec5SDimitry Andric case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break; 23580b57cec5SDimitry Andric case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break; 23590b57cec5SDimitry Andric } 23600b57cec5SDimitry Andric // No need for a select anymore - this is an unconditional move. 23610b57cec5SDimitry Andric if (Opnd) { 23625ffd83dbSDimitry Andric Register OpReg = getRegForValue(Opnd); 23630b57cec5SDimitry Andric if (OpReg == 0) 23640b57cec5SDimitry Andric return false; 23650b57cec5SDimitry Andric const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 23665ffd83dbSDimitry Andric Register ResultReg = createResultReg(RC); 2367bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 23680b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), ResultReg) 2369fe6060f1SDimitry Andric .addReg(OpReg); 23700b57cec5SDimitry Andric updateValueMap(I, ResultReg); 23710b57cec5SDimitry Andric return true; 23720b57cec5SDimitry Andric } 23730b57cec5SDimitry Andric } 23740b57cec5SDimitry Andric 23750b57cec5SDimitry Andric // First try to use real conditional move instructions. 23760b57cec5SDimitry Andric if (X86FastEmitCMoveSelect(RetVT, I)) 23770b57cec5SDimitry Andric return true; 23780b57cec5SDimitry Andric 23790b57cec5SDimitry Andric // Try to use a sequence of SSE instructions to simulate a conditional move. 23800b57cec5SDimitry Andric if (X86FastEmitSSESelect(RetVT, I)) 23810b57cec5SDimitry Andric return true; 23820b57cec5SDimitry Andric 23830b57cec5SDimitry Andric // Fall-back to pseudo conditional move instructions, which will be later 23840b57cec5SDimitry Andric // converted to control-flow. 23850b57cec5SDimitry Andric if (X86FastEmitPseudoSelect(RetVT, I)) 23860b57cec5SDimitry Andric return true; 23870b57cec5SDimitry Andric 23880b57cec5SDimitry Andric return false; 23890b57cec5SDimitry Andric } 23900b57cec5SDimitry Andric 23910b57cec5SDimitry Andric // Common code for X86SelectSIToFP and X86SelectUIToFP. 23920b57cec5SDimitry Andric bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) { 23930b57cec5SDimitry Andric // The target-independent selection algorithm in FastISel already knows how 23940b57cec5SDimitry Andric // to select a SINT_TO_FP if the target is SSE but not AVX. 23950b57cec5SDimitry Andric // Early exit if the subtarget doesn't have AVX. 23960b57cec5SDimitry Andric // Unsigned conversion requires avx512. 23970b57cec5SDimitry Andric bool HasAVX512 = Subtarget->hasAVX512(); 23980b57cec5SDimitry Andric if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512)) 23990b57cec5SDimitry Andric return false; 24000b57cec5SDimitry Andric 24010b57cec5SDimitry Andric // TODO: We could sign extend narrower types. 24025f757f3fSDimitry Andric EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); 24030b57cec5SDimitry Andric if (SrcVT != MVT::i32 && SrcVT != MVT::i64) 24040b57cec5SDimitry Andric return false; 24050b57cec5SDimitry Andric 24060b57cec5SDimitry Andric // Select integer to float/double conversion. 24075ffd83dbSDimitry Andric Register OpReg = getRegForValue(I->getOperand(0)); 24080b57cec5SDimitry Andric if (OpReg == 0) 24090b57cec5SDimitry Andric return false; 24100b57cec5SDimitry Andric 24110b57cec5SDimitry Andric unsigned Opcode; 24120b57cec5SDimitry Andric 24130b57cec5SDimitry Andric static const uint16_t SCvtOpc[2][2][2] = { 24140b57cec5SDimitry Andric { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr }, 24150b57cec5SDimitry Andric { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } }, 24160b57cec5SDimitry Andric { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr }, 24170b57cec5SDimitry Andric { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } }, 24180b57cec5SDimitry Andric }; 24190b57cec5SDimitry Andric static const uint16_t UCvtOpc[2][2] = { 24200b57cec5SDimitry Andric { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr }, 24210b57cec5SDimitry Andric { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr }, 24220b57cec5SDimitry Andric }; 24230b57cec5SDimitry Andric bool Is64Bit = SrcVT == MVT::i64; 24240b57cec5SDimitry Andric 24250b57cec5SDimitry Andric if (I->getType()->isDoubleTy()) { 24260b57cec5SDimitry Andric // s/uitofp int -> double 24270b57cec5SDimitry Andric Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit]; 24280b57cec5SDimitry Andric } else if (I->getType()->isFloatTy()) { 24290b57cec5SDimitry Andric // s/uitofp int -> float 24300b57cec5SDimitry Andric Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit]; 24310b57cec5SDimitry Andric } else 24320b57cec5SDimitry Andric return false; 24330b57cec5SDimitry Andric 24340b57cec5SDimitry Andric MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT(); 24350b57cec5SDimitry Andric const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT); 24365ffd83dbSDimitry Andric Register ImplicitDefReg = createResultReg(RC); 2437bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 24380b57cec5SDimitry Andric TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); 2439fe6060f1SDimitry Andric Register ResultReg = fastEmitInst_rr(Opcode, RC, ImplicitDefReg, OpReg); 24400b57cec5SDimitry Andric updateValueMap(I, ResultReg); 24410b57cec5SDimitry Andric return true; 24420b57cec5SDimitry Andric } 24430b57cec5SDimitry Andric 24440b57cec5SDimitry Andric bool X86FastISel::X86SelectSIToFP(const Instruction *I) { 24450b57cec5SDimitry Andric return X86SelectIntToFP(I, /*IsSigned*/true); 24460b57cec5SDimitry Andric } 24470b57cec5SDimitry Andric 24480b57cec5SDimitry Andric bool X86FastISel::X86SelectUIToFP(const Instruction *I) { 24490b57cec5SDimitry Andric return X86SelectIntToFP(I, /*IsSigned*/false); 24500b57cec5SDimitry Andric } 24510b57cec5SDimitry Andric 24520b57cec5SDimitry Andric // Helper method used by X86SelectFPExt and X86SelectFPTrunc. 24530b57cec5SDimitry Andric bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I, 24540b57cec5SDimitry Andric unsigned TargetOpc, 24550b57cec5SDimitry Andric const TargetRegisterClass *RC) { 24560b57cec5SDimitry Andric assert((I->getOpcode() == Instruction::FPExt || 24570b57cec5SDimitry Andric I->getOpcode() == Instruction::FPTrunc) && 24580b57cec5SDimitry Andric "Instruction must be an FPExt or FPTrunc!"); 24590b57cec5SDimitry Andric bool HasAVX = Subtarget->hasAVX(); 24600b57cec5SDimitry Andric 24615ffd83dbSDimitry Andric Register OpReg = getRegForValue(I->getOperand(0)); 24620b57cec5SDimitry Andric if (OpReg == 0) 24630b57cec5SDimitry Andric return false; 24640b57cec5SDimitry Andric 24650b57cec5SDimitry Andric unsigned ImplicitDefReg; 24660b57cec5SDimitry Andric if (HasAVX) { 24670b57cec5SDimitry Andric ImplicitDefReg = createResultReg(RC); 2468bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 24690b57cec5SDimitry Andric TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); 24700b57cec5SDimitry Andric 24710b57cec5SDimitry Andric } 24720b57cec5SDimitry Andric 24735ffd83dbSDimitry Andric Register ResultReg = createResultReg(RC); 24740b57cec5SDimitry Andric MachineInstrBuilder MIB; 2475bdd1243dSDimitry Andric MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpc), 24760b57cec5SDimitry Andric ResultReg); 24770b57cec5SDimitry Andric 24780b57cec5SDimitry Andric if (HasAVX) 24790b57cec5SDimitry Andric MIB.addReg(ImplicitDefReg); 24800b57cec5SDimitry Andric 24810b57cec5SDimitry Andric MIB.addReg(OpReg); 24820b57cec5SDimitry Andric updateValueMap(I, ResultReg); 24830b57cec5SDimitry Andric return true; 24840b57cec5SDimitry Andric } 24850b57cec5SDimitry Andric 24860b57cec5SDimitry Andric bool X86FastISel::X86SelectFPExt(const Instruction *I) { 248781ad6265SDimitry Andric if (Subtarget->hasSSE2() && I->getType()->isDoubleTy() && 24880b57cec5SDimitry Andric I->getOperand(0)->getType()->isFloatTy()) { 24890b57cec5SDimitry Andric bool HasAVX512 = Subtarget->hasAVX512(); 24900b57cec5SDimitry Andric // fpext from float to double. 24910b57cec5SDimitry Andric unsigned Opc = 24920b57cec5SDimitry Andric HasAVX512 ? X86::VCVTSS2SDZrr 24930b57cec5SDimitry Andric : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr; 24940b57cec5SDimitry Andric return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64)); 24950b57cec5SDimitry Andric } 24960b57cec5SDimitry Andric 24970b57cec5SDimitry Andric return false; 24980b57cec5SDimitry Andric } 24990b57cec5SDimitry Andric 25000b57cec5SDimitry Andric bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { 250181ad6265SDimitry Andric if (Subtarget->hasSSE2() && I->getType()->isFloatTy() && 25020b57cec5SDimitry Andric I->getOperand(0)->getType()->isDoubleTy()) { 25030b57cec5SDimitry Andric bool HasAVX512 = Subtarget->hasAVX512(); 25040b57cec5SDimitry Andric // fptrunc from double to float. 25050b57cec5SDimitry Andric unsigned Opc = 25060b57cec5SDimitry Andric HasAVX512 ? X86::VCVTSD2SSZrr 25070b57cec5SDimitry Andric : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr; 25080b57cec5SDimitry Andric return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32)); 25090b57cec5SDimitry Andric } 25100b57cec5SDimitry Andric 25110b57cec5SDimitry Andric return false; 25120b57cec5SDimitry Andric } 25130b57cec5SDimitry Andric 25140b57cec5SDimitry Andric bool X86FastISel::X86SelectTrunc(const Instruction *I) { 25150b57cec5SDimitry Andric EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); 25160b57cec5SDimitry Andric EVT DstVT = TLI.getValueType(DL, I->getType()); 25170b57cec5SDimitry Andric 25180b57cec5SDimitry Andric // This code only handles truncation to byte. 25190b57cec5SDimitry Andric if (DstVT != MVT::i8 && DstVT != MVT::i1) 25200b57cec5SDimitry Andric return false; 25210b57cec5SDimitry Andric if (!TLI.isTypeLegal(SrcVT)) 25220b57cec5SDimitry Andric return false; 25230b57cec5SDimitry Andric 25245ffd83dbSDimitry Andric Register InputReg = getRegForValue(I->getOperand(0)); 25250b57cec5SDimitry Andric if (!InputReg) 25260b57cec5SDimitry Andric // Unhandled operand. Halt "fast" selection and bail. 25270b57cec5SDimitry Andric return false; 25280b57cec5SDimitry Andric 25290b57cec5SDimitry Andric if (SrcVT == MVT::i8) { 25300b57cec5SDimitry Andric // Truncate from i8 to i1; no code needed. 25310b57cec5SDimitry Andric updateValueMap(I, InputReg); 25320b57cec5SDimitry Andric return true; 25330b57cec5SDimitry Andric } 25340b57cec5SDimitry Andric 25350b57cec5SDimitry Andric // Issue an extract_subreg. 2536fe6060f1SDimitry Andric Register ResultReg = fastEmitInst_extractsubreg(MVT::i8, InputReg, 25370b57cec5SDimitry Andric X86::sub_8bit); 25380b57cec5SDimitry Andric if (!ResultReg) 25390b57cec5SDimitry Andric return false; 25400b57cec5SDimitry Andric 25410b57cec5SDimitry Andric updateValueMap(I, ResultReg); 25420b57cec5SDimitry Andric return true; 25430b57cec5SDimitry Andric } 25440b57cec5SDimitry Andric 25450b57cec5SDimitry Andric bool X86FastISel::IsMemcpySmall(uint64_t Len) { 25460b57cec5SDimitry Andric return Len <= (Subtarget->is64Bit() ? 32 : 16); 25470b57cec5SDimitry Andric } 25480b57cec5SDimitry Andric 25490b57cec5SDimitry Andric bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, 25500b57cec5SDimitry Andric X86AddressMode SrcAM, uint64_t Len) { 25510b57cec5SDimitry Andric 25520b57cec5SDimitry Andric // Make sure we don't bloat code by inlining very large memcpy's. 25530b57cec5SDimitry Andric if (!IsMemcpySmall(Len)) 25540b57cec5SDimitry Andric return false; 25550b57cec5SDimitry Andric 25560b57cec5SDimitry Andric bool i64Legal = Subtarget->is64Bit(); 25570b57cec5SDimitry Andric 25580b57cec5SDimitry Andric // We don't care about alignment here since we just emit integer accesses. 25590b57cec5SDimitry Andric while (Len) { 25600b57cec5SDimitry Andric MVT VT; 25610b57cec5SDimitry Andric if (Len >= 8 && i64Legal) 25620b57cec5SDimitry Andric VT = MVT::i64; 25630b57cec5SDimitry Andric else if (Len >= 4) 25640b57cec5SDimitry Andric VT = MVT::i32; 25650b57cec5SDimitry Andric else if (Len >= 2) 25660b57cec5SDimitry Andric VT = MVT::i16; 25670b57cec5SDimitry Andric else 25680b57cec5SDimitry Andric VT = MVT::i8; 25690b57cec5SDimitry Andric 25700b57cec5SDimitry Andric unsigned Reg; 25710b57cec5SDimitry Andric bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg); 2572fe6060f1SDimitry Andric RV &= X86FastEmitStore(VT, Reg, DestAM); 25730b57cec5SDimitry Andric assert(RV && "Failed to emit load or store??"); 2574fe6060f1SDimitry Andric (void)RV; 25750b57cec5SDimitry Andric 25760b57cec5SDimitry Andric unsigned Size = VT.getSizeInBits()/8; 25770b57cec5SDimitry Andric Len -= Size; 25780b57cec5SDimitry Andric DestAM.Disp += Size; 25790b57cec5SDimitry Andric SrcAM.Disp += Size; 25800b57cec5SDimitry Andric } 25810b57cec5SDimitry Andric 25820b57cec5SDimitry Andric return true; 25830b57cec5SDimitry Andric } 25840b57cec5SDimitry Andric 25850b57cec5SDimitry Andric bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 25860b57cec5SDimitry Andric // FIXME: Handle more intrinsics. 25870b57cec5SDimitry Andric switch (II->getIntrinsicID()) { 25880b57cec5SDimitry Andric default: return false; 25890b57cec5SDimitry Andric case Intrinsic::convert_from_fp16: 25900b57cec5SDimitry Andric case Intrinsic::convert_to_fp16: { 25910b57cec5SDimitry Andric if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) 25920b57cec5SDimitry Andric return false; 25930b57cec5SDimitry Andric 25940b57cec5SDimitry Andric const Value *Op = II->getArgOperand(0); 25955ffd83dbSDimitry Andric Register InputReg = getRegForValue(Op); 25960b57cec5SDimitry Andric if (InputReg == 0) 25970b57cec5SDimitry Andric return false; 25980b57cec5SDimitry Andric 25990b57cec5SDimitry Andric // F16C only allows converting from float to half and from half to float. 26000b57cec5SDimitry Andric bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16; 26010b57cec5SDimitry Andric if (IsFloatToHalf) { 26020b57cec5SDimitry Andric if (!Op->getType()->isFloatTy()) 26030b57cec5SDimitry Andric return false; 26040b57cec5SDimitry Andric } else { 26050b57cec5SDimitry Andric if (!II->getType()->isFloatTy()) 26060b57cec5SDimitry Andric return false; 26070b57cec5SDimitry Andric } 26080b57cec5SDimitry Andric 26090b57cec5SDimitry Andric unsigned ResultReg = 0; 26100b57cec5SDimitry Andric const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16); 26110b57cec5SDimitry Andric if (IsFloatToHalf) { 26120b57cec5SDimitry Andric // 'InputReg' is implicitly promoted from register class FR32 to 26130b57cec5SDimitry Andric // register class VR128 by method 'constrainOperandRegClass' which is 26140b57cec5SDimitry Andric // directly called by 'fastEmitInst_ri'. 26150b57cec5SDimitry Andric // Instruction VCVTPS2PHrr takes an extra immediate operand which is 26160b57cec5SDimitry Andric // used to provide rounding control: use MXCSR.RC, encoded as 0b100. 26170b57cec5SDimitry Andric // It's consistent with the other FP instructions, which are usually 26180b57cec5SDimitry Andric // controlled by MXCSR. 26195ffd83dbSDimitry Andric unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr 26205ffd83dbSDimitry Andric : X86::VCVTPS2PHrr; 2621fe6060f1SDimitry Andric InputReg = fastEmitInst_ri(Opc, RC, InputReg, 4); 26220b57cec5SDimitry Andric 26230b57cec5SDimitry Andric // Move the lower 32-bits of ResultReg to another register of class GR32. 26245ffd83dbSDimitry Andric Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr 26255ffd83dbSDimitry Andric : X86::VMOVPDI2DIrr; 26260b57cec5SDimitry Andric ResultReg = createResultReg(&X86::GR32RegClass); 2627bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 26280b57cec5SDimitry Andric .addReg(InputReg, RegState::Kill); 26290b57cec5SDimitry Andric 26300b57cec5SDimitry Andric // The result value is in the lower 16-bits of ResultReg. 26310b57cec5SDimitry Andric unsigned RegIdx = X86::sub_16bit; 2632fe6060f1SDimitry Andric ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, RegIdx); 26330b57cec5SDimitry Andric } else { 26340b57cec5SDimitry Andric assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!"); 26355ffd83dbSDimitry Andric // Explicitly zero-extend the input to 32-bit. 2636fe6060f1SDimitry Andric InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::ZERO_EXTEND, InputReg); 26370b57cec5SDimitry Andric 26380b57cec5SDimitry Andric // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr. 26390b57cec5SDimitry Andric InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR, 2640fe6060f1SDimitry Andric InputReg); 26410b57cec5SDimitry Andric 26425ffd83dbSDimitry Andric unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr 26435ffd83dbSDimitry Andric : X86::VCVTPH2PSrr; 2644fe6060f1SDimitry Andric InputReg = fastEmitInst_r(Opc, RC, InputReg); 26450b57cec5SDimitry Andric 26460b57cec5SDimitry Andric // The result value is in the lower 32-bits of ResultReg. 26470b57cec5SDimitry Andric // Emit an explicit copy from register class VR128 to register class FR32. 26485ffd83dbSDimitry Andric ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 2649bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 26500b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), ResultReg) 26510b57cec5SDimitry Andric .addReg(InputReg, RegState::Kill); 26520b57cec5SDimitry Andric } 26530b57cec5SDimitry Andric 26540b57cec5SDimitry Andric updateValueMap(II, ResultReg); 26550b57cec5SDimitry Andric return true; 26560b57cec5SDimitry Andric } 26570b57cec5SDimitry Andric case Intrinsic::frameaddress: { 26580b57cec5SDimitry Andric MachineFunction *MF = FuncInfo.MF; 26590b57cec5SDimitry Andric if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI()) 26600b57cec5SDimitry Andric return false; 26610b57cec5SDimitry Andric 26620b57cec5SDimitry Andric Type *RetTy = II->getCalledFunction()->getReturnType(); 26630b57cec5SDimitry Andric 26640b57cec5SDimitry Andric MVT VT; 26650b57cec5SDimitry Andric if (!isTypeLegal(RetTy, VT)) 26660b57cec5SDimitry Andric return false; 26670b57cec5SDimitry Andric 26680b57cec5SDimitry Andric unsigned Opc; 26690b57cec5SDimitry Andric const TargetRegisterClass *RC = nullptr; 26700b57cec5SDimitry Andric 26710b57cec5SDimitry Andric switch (VT.SimpleTy) { 26720b57cec5SDimitry Andric default: llvm_unreachable("Invalid result type for frameaddress."); 26730b57cec5SDimitry Andric case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break; 26740b57cec5SDimitry Andric case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break; 26750b57cec5SDimitry Andric } 26760b57cec5SDimitry Andric 26770b57cec5SDimitry Andric // This needs to be set before we call getPtrSizedFrameRegister, otherwise 26780b57cec5SDimitry Andric // we get the wrong frame register. 26790b57cec5SDimitry Andric MachineFrameInfo &MFI = MF->getFrameInfo(); 26800b57cec5SDimitry Andric MFI.setFrameAddressIsTaken(true); 26810b57cec5SDimitry Andric 26820b57cec5SDimitry Andric const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 26830b57cec5SDimitry Andric unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF); 26840b57cec5SDimitry Andric assert(((FrameReg == X86::RBP && VT == MVT::i64) || 26850b57cec5SDimitry Andric (FrameReg == X86::EBP && VT == MVT::i32)) && 26860b57cec5SDimitry Andric "Invalid Frame Register!"); 26870b57cec5SDimitry Andric 26880b57cec5SDimitry Andric // Always make a copy of the frame register to a vreg first, so that we 26890b57cec5SDimitry Andric // never directly reference the frame register (the TwoAddressInstruction- 26900b57cec5SDimitry Andric // Pass doesn't like that). 26915ffd83dbSDimitry Andric Register SrcReg = createResultReg(RC); 2692bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 26930b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg); 26940b57cec5SDimitry Andric 26950b57cec5SDimitry Andric // Now recursively load from the frame address. 26960b57cec5SDimitry Andric // movq (%rbp), %rax 26970b57cec5SDimitry Andric // movq (%rax), %rax 26980b57cec5SDimitry Andric // movq (%rax), %rax 26990b57cec5SDimitry Andric // ... 27000b57cec5SDimitry Andric unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 27010b57cec5SDimitry Andric while (Depth--) { 2702e8d8bef9SDimitry Andric Register DestReg = createResultReg(RC); 2703bdd1243dSDimitry Andric addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 27040b57cec5SDimitry Andric TII.get(Opc), DestReg), SrcReg); 27050b57cec5SDimitry Andric SrcReg = DestReg; 27060b57cec5SDimitry Andric } 27070b57cec5SDimitry Andric 27080b57cec5SDimitry Andric updateValueMap(II, SrcReg); 27090b57cec5SDimitry Andric return true; 27100b57cec5SDimitry Andric } 27110b57cec5SDimitry Andric case Intrinsic::memcpy: { 27120b57cec5SDimitry Andric const MemCpyInst *MCI = cast<MemCpyInst>(II); 27130b57cec5SDimitry Andric // Don't handle volatile or variable length memcpys. 27140b57cec5SDimitry Andric if (MCI->isVolatile()) 27150b57cec5SDimitry Andric return false; 27160b57cec5SDimitry Andric 27170b57cec5SDimitry Andric if (isa<ConstantInt>(MCI->getLength())) { 27180b57cec5SDimitry Andric // Small memcpy's are common enough that we want to do them 27190b57cec5SDimitry Andric // without a call if possible. 27200b57cec5SDimitry Andric uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue(); 27210b57cec5SDimitry Andric if (IsMemcpySmall(Len)) { 27220b57cec5SDimitry Andric X86AddressMode DestAM, SrcAM; 27230b57cec5SDimitry Andric if (!X86SelectAddress(MCI->getRawDest(), DestAM) || 27240b57cec5SDimitry Andric !X86SelectAddress(MCI->getRawSource(), SrcAM)) 27250b57cec5SDimitry Andric return false; 27260b57cec5SDimitry Andric TryEmitSmallMemcpy(DestAM, SrcAM, Len); 27270b57cec5SDimitry Andric return true; 27280b57cec5SDimitry Andric } 27290b57cec5SDimitry Andric } 27300b57cec5SDimitry Andric 27310b57cec5SDimitry Andric unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; 27320b57cec5SDimitry Andric if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth)) 27330b57cec5SDimitry Andric return false; 27340b57cec5SDimitry Andric 27350b57cec5SDimitry Andric if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255) 27360b57cec5SDimitry Andric return false; 27370b57cec5SDimitry Andric 2738349cc55cSDimitry Andric return lowerCallTo(II, "memcpy", II->arg_size() - 1); 27390b57cec5SDimitry Andric } 27400b57cec5SDimitry Andric case Intrinsic::memset: { 27410b57cec5SDimitry Andric const MemSetInst *MSI = cast<MemSetInst>(II); 27420b57cec5SDimitry Andric 27430b57cec5SDimitry Andric if (MSI->isVolatile()) 27440b57cec5SDimitry Andric return false; 27450b57cec5SDimitry Andric 27460b57cec5SDimitry Andric unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; 27470b57cec5SDimitry Andric if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth)) 27480b57cec5SDimitry Andric return false; 27490b57cec5SDimitry Andric 27500b57cec5SDimitry Andric if (MSI->getDestAddressSpace() > 255) 27510b57cec5SDimitry Andric return false; 27520b57cec5SDimitry Andric 2753349cc55cSDimitry Andric return lowerCallTo(II, "memset", II->arg_size() - 1); 27540b57cec5SDimitry Andric } 27550b57cec5SDimitry Andric case Intrinsic::stackprotector: { 27560b57cec5SDimitry Andric // Emit code to store the stack guard onto the stack. 27570b57cec5SDimitry Andric EVT PtrTy = TLI.getPointerTy(DL); 27580b57cec5SDimitry Andric 27590b57cec5SDimitry Andric const Value *Op1 = II->getArgOperand(0); // The guard's value. 27600b57cec5SDimitry Andric const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1)); 27610b57cec5SDimitry Andric 27620b57cec5SDimitry Andric MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]); 27630b57cec5SDimitry Andric 27640b57cec5SDimitry Andric // Grab the frame index. 27650b57cec5SDimitry Andric X86AddressMode AM; 27660b57cec5SDimitry Andric if (!X86SelectAddress(Slot, AM)) return false; 27670b57cec5SDimitry Andric if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; 27680b57cec5SDimitry Andric return true; 27690b57cec5SDimitry Andric } 27700b57cec5SDimitry Andric case Intrinsic::dbg_declare: { 27710b57cec5SDimitry Andric const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); 27720b57cec5SDimitry Andric X86AddressMode AM; 27730b57cec5SDimitry Andric assert(DI->getAddress() && "Null address should be checked earlier!"); 27740b57cec5SDimitry Andric if (!X86SelectAddress(DI->getAddress(), AM)) 27750b57cec5SDimitry Andric return false; 27760b57cec5SDimitry Andric const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); 2777bdd1243dSDimitry Andric assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) && 27780b57cec5SDimitry Andric "Expected inlined-at fields to agree"); 2779bdd1243dSDimitry Andric addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II), AM) 27800b57cec5SDimitry Andric .addImm(0) 27810b57cec5SDimitry Andric .addMetadata(DI->getVariable()) 27820b57cec5SDimitry Andric .addMetadata(DI->getExpression()); 27830b57cec5SDimitry Andric return true; 27840b57cec5SDimitry Andric } 27850b57cec5SDimitry Andric case Intrinsic::trap: { 2786bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TRAP)); 27870b57cec5SDimitry Andric return true; 27880b57cec5SDimitry Andric } 27890b57cec5SDimitry Andric case Intrinsic::sqrt: { 27900b57cec5SDimitry Andric if (!Subtarget->hasSSE1()) 27910b57cec5SDimitry Andric return false; 27920b57cec5SDimitry Andric 27930b57cec5SDimitry Andric Type *RetTy = II->getCalledFunction()->getReturnType(); 27940b57cec5SDimitry Andric 27950b57cec5SDimitry Andric MVT VT; 27960b57cec5SDimitry Andric if (!isTypeLegal(RetTy, VT)) 27970b57cec5SDimitry Andric return false; 27980b57cec5SDimitry Andric 27990b57cec5SDimitry Andric // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT 28000b57cec5SDimitry Andric // is not generated by FastISel yet. 28010b57cec5SDimitry Andric // FIXME: Update this code once tablegen can handle it. 28020b57cec5SDimitry Andric static const uint16_t SqrtOpc[3][2] = { 28030b57cec5SDimitry Andric { X86::SQRTSSr, X86::SQRTSDr }, 28040b57cec5SDimitry Andric { X86::VSQRTSSr, X86::VSQRTSDr }, 28050b57cec5SDimitry Andric { X86::VSQRTSSZr, X86::VSQRTSDZr }, 28060b57cec5SDimitry Andric }; 28070b57cec5SDimitry Andric unsigned AVXLevel = Subtarget->hasAVX512() ? 2 : 28080b57cec5SDimitry Andric Subtarget->hasAVX() ? 1 : 28090b57cec5SDimitry Andric 0; 28100b57cec5SDimitry Andric unsigned Opc; 28110b57cec5SDimitry Andric switch (VT.SimpleTy) { 28120b57cec5SDimitry Andric default: return false; 28130b57cec5SDimitry Andric case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break; 28140b57cec5SDimitry Andric case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break; 28150b57cec5SDimitry Andric } 28160b57cec5SDimitry Andric 28170b57cec5SDimitry Andric const Value *SrcVal = II->getArgOperand(0); 28185ffd83dbSDimitry Andric Register SrcReg = getRegForValue(SrcVal); 28190b57cec5SDimitry Andric 28200b57cec5SDimitry Andric if (SrcReg == 0) 28210b57cec5SDimitry Andric return false; 28220b57cec5SDimitry Andric 28230b57cec5SDimitry Andric const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 28240b57cec5SDimitry Andric unsigned ImplicitDefReg = 0; 28250b57cec5SDimitry Andric if (AVXLevel > 0) { 28260b57cec5SDimitry Andric ImplicitDefReg = createResultReg(RC); 2827bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 28280b57cec5SDimitry Andric TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); 28290b57cec5SDimitry Andric } 28300b57cec5SDimitry Andric 28315ffd83dbSDimitry Andric Register ResultReg = createResultReg(RC); 28320b57cec5SDimitry Andric MachineInstrBuilder MIB; 2833bdd1243dSDimitry Andric MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), 28340b57cec5SDimitry Andric ResultReg); 28350b57cec5SDimitry Andric 28360b57cec5SDimitry Andric if (ImplicitDefReg) 28370b57cec5SDimitry Andric MIB.addReg(ImplicitDefReg); 28380b57cec5SDimitry Andric 28390b57cec5SDimitry Andric MIB.addReg(SrcReg); 28400b57cec5SDimitry Andric 28410b57cec5SDimitry Andric updateValueMap(II, ResultReg); 28420b57cec5SDimitry Andric return true; 28430b57cec5SDimitry Andric } 28440b57cec5SDimitry Andric case Intrinsic::sadd_with_overflow: 28450b57cec5SDimitry Andric case Intrinsic::uadd_with_overflow: 28460b57cec5SDimitry Andric case Intrinsic::ssub_with_overflow: 28470b57cec5SDimitry Andric case Intrinsic::usub_with_overflow: 28480b57cec5SDimitry Andric case Intrinsic::smul_with_overflow: 28490b57cec5SDimitry Andric case Intrinsic::umul_with_overflow: { 28500b57cec5SDimitry Andric // This implements the basic lowering of the xalu with overflow intrinsics 28510b57cec5SDimitry Andric // into add/sub/mul followed by either seto or setb. 28520b57cec5SDimitry Andric const Function *Callee = II->getCalledFunction(); 28530b57cec5SDimitry Andric auto *Ty = cast<StructType>(Callee->getReturnType()); 28540b57cec5SDimitry Andric Type *RetTy = Ty->getTypeAtIndex(0U); 28550b57cec5SDimitry Andric assert(Ty->getTypeAtIndex(1)->isIntegerTy() && 28560b57cec5SDimitry Andric Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 && 28570b57cec5SDimitry Andric "Overflow value expected to be an i1"); 28580b57cec5SDimitry Andric 28590b57cec5SDimitry Andric MVT VT; 28600b57cec5SDimitry Andric if (!isTypeLegal(RetTy, VT)) 28610b57cec5SDimitry Andric return false; 28620b57cec5SDimitry Andric 28630b57cec5SDimitry Andric if (VT < MVT::i8 || VT > MVT::i64) 28640b57cec5SDimitry Andric return false; 28650b57cec5SDimitry Andric 28660b57cec5SDimitry Andric const Value *LHS = II->getArgOperand(0); 28670b57cec5SDimitry Andric const Value *RHS = II->getArgOperand(1); 28680b57cec5SDimitry Andric 28690b57cec5SDimitry Andric // Canonicalize immediate to the RHS. 2870e8d8bef9SDimitry Andric if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 28710b57cec5SDimitry Andric std::swap(LHS, RHS); 28720b57cec5SDimitry Andric 28730b57cec5SDimitry Andric unsigned BaseOpc, CondCode; 28740b57cec5SDimitry Andric switch (II->getIntrinsicID()) { 28750b57cec5SDimitry Andric default: llvm_unreachable("Unexpected intrinsic!"); 28760b57cec5SDimitry Andric case Intrinsic::sadd_with_overflow: 28770b57cec5SDimitry Andric BaseOpc = ISD::ADD; CondCode = X86::COND_O; break; 28780b57cec5SDimitry Andric case Intrinsic::uadd_with_overflow: 28790b57cec5SDimitry Andric BaseOpc = ISD::ADD; CondCode = X86::COND_B; break; 28800b57cec5SDimitry Andric case Intrinsic::ssub_with_overflow: 28810b57cec5SDimitry Andric BaseOpc = ISD::SUB; CondCode = X86::COND_O; break; 28820b57cec5SDimitry Andric case Intrinsic::usub_with_overflow: 28830b57cec5SDimitry Andric BaseOpc = ISD::SUB; CondCode = X86::COND_B; break; 28840b57cec5SDimitry Andric case Intrinsic::smul_with_overflow: 28850b57cec5SDimitry Andric BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break; 28860b57cec5SDimitry Andric case Intrinsic::umul_with_overflow: 28870b57cec5SDimitry Andric BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break; 28880b57cec5SDimitry Andric } 28890b57cec5SDimitry Andric 28905ffd83dbSDimitry Andric Register LHSReg = getRegForValue(LHS); 28910b57cec5SDimitry Andric if (LHSReg == 0) 28920b57cec5SDimitry Andric return false; 28930b57cec5SDimitry Andric 28940b57cec5SDimitry Andric unsigned ResultReg = 0; 28950b57cec5SDimitry Andric // Check if we have an immediate version. 28960b57cec5SDimitry Andric if (const auto *CI = dyn_cast<ConstantInt>(RHS)) { 28970b57cec5SDimitry Andric static const uint16_t Opc[2][4] = { 28980b57cec5SDimitry Andric { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r }, 28990b57cec5SDimitry Andric { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } 29000b57cec5SDimitry Andric }; 29010b57cec5SDimitry Andric 29020b57cec5SDimitry Andric if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) && 29030b57cec5SDimitry Andric CondCode == X86::COND_O) { 29040b57cec5SDimitry Andric // We can use INC/DEC. 29050b57cec5SDimitry Andric ResultReg = createResultReg(TLI.getRegClassFor(VT)); 29060b57cec5SDimitry Andric bool IsDec = BaseOpc == ISD::SUB; 2907bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 29080b57cec5SDimitry Andric TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg) 2909fe6060f1SDimitry Andric .addReg(LHSReg); 29100b57cec5SDimitry Andric } else 2911fe6060f1SDimitry Andric ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, CI->getZExtValue()); 29120b57cec5SDimitry Andric } 29130b57cec5SDimitry Andric 29140b57cec5SDimitry Andric unsigned RHSReg; 29150b57cec5SDimitry Andric if (!ResultReg) { 29160b57cec5SDimitry Andric RHSReg = getRegForValue(RHS); 29170b57cec5SDimitry Andric if (RHSReg == 0) 29180b57cec5SDimitry Andric return false; 2919fe6060f1SDimitry Andric ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, RHSReg); 29200b57cec5SDimitry Andric } 29210b57cec5SDimitry Andric 29220b57cec5SDimitry Andric // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit 29230b57cec5SDimitry Andric // it manually. 29240b57cec5SDimitry Andric if (BaseOpc == X86ISD::UMUL && !ResultReg) { 29250b57cec5SDimitry Andric static const uint16_t MULOpc[] = 29260b57cec5SDimitry Andric { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r }; 29270b57cec5SDimitry Andric static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX }; 29280b57cec5SDimitry Andric // First copy the first operand into RAX, which is an implicit input to 29290b57cec5SDimitry Andric // the X86::MUL*r instruction. 2930bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 29310b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8]) 2932fe6060f1SDimitry Andric .addReg(LHSReg); 29330b57cec5SDimitry Andric ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8], 2934fe6060f1SDimitry Andric TLI.getRegClassFor(VT), RHSReg); 29350b57cec5SDimitry Andric } else if (BaseOpc == X86ISD::SMUL && !ResultReg) { 29360b57cec5SDimitry Andric static const uint16_t MULOpc[] = 29370b57cec5SDimitry Andric { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr }; 29380b57cec5SDimitry Andric if (VT == MVT::i8) { 29390b57cec5SDimitry Andric // Copy the first operand into AL, which is an implicit input to the 29400b57cec5SDimitry Andric // X86::IMUL8r instruction. 2941bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 29420b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), X86::AL) 2943fe6060f1SDimitry Andric .addReg(LHSReg); 2944fe6060f1SDimitry Andric ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg); 29450b57cec5SDimitry Andric } else 29460b57cec5SDimitry Andric ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8], 2947fe6060f1SDimitry Andric TLI.getRegClassFor(VT), LHSReg, RHSReg); 29480b57cec5SDimitry Andric } 29490b57cec5SDimitry Andric 29500b57cec5SDimitry Andric if (!ResultReg) 29510b57cec5SDimitry Andric return false; 29520b57cec5SDimitry Andric 29530b57cec5SDimitry Andric // Assign to a GPR since the overflow return value is lowered to a SETcc. 29545ffd83dbSDimitry Andric Register ResultReg2 = createResultReg(&X86::GR8RegClass); 29550b57cec5SDimitry Andric assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers."); 2956bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr), 29570b57cec5SDimitry Andric ResultReg2).addImm(CondCode); 29580b57cec5SDimitry Andric 29590b57cec5SDimitry Andric updateValueMap(II, ResultReg, 2); 29600b57cec5SDimitry Andric return true; 29610b57cec5SDimitry Andric } 29620b57cec5SDimitry Andric case Intrinsic::x86_sse_cvttss2si: 29630b57cec5SDimitry Andric case Intrinsic::x86_sse_cvttss2si64: 29640b57cec5SDimitry Andric case Intrinsic::x86_sse2_cvttsd2si: 29650b57cec5SDimitry Andric case Intrinsic::x86_sse2_cvttsd2si64: { 29660b57cec5SDimitry Andric bool IsInputDouble; 29670b57cec5SDimitry Andric switch (II->getIntrinsicID()) { 29680b57cec5SDimitry Andric default: llvm_unreachable("Unexpected intrinsic."); 29690b57cec5SDimitry Andric case Intrinsic::x86_sse_cvttss2si: 29700b57cec5SDimitry Andric case Intrinsic::x86_sse_cvttss2si64: 29710b57cec5SDimitry Andric if (!Subtarget->hasSSE1()) 29720b57cec5SDimitry Andric return false; 29730b57cec5SDimitry Andric IsInputDouble = false; 29740b57cec5SDimitry Andric break; 29750b57cec5SDimitry Andric case Intrinsic::x86_sse2_cvttsd2si: 29760b57cec5SDimitry Andric case Intrinsic::x86_sse2_cvttsd2si64: 29770b57cec5SDimitry Andric if (!Subtarget->hasSSE2()) 29780b57cec5SDimitry Andric return false; 29790b57cec5SDimitry Andric IsInputDouble = true; 29800b57cec5SDimitry Andric break; 29810b57cec5SDimitry Andric } 29820b57cec5SDimitry Andric 29830b57cec5SDimitry Andric Type *RetTy = II->getCalledFunction()->getReturnType(); 29840b57cec5SDimitry Andric MVT VT; 29850b57cec5SDimitry Andric if (!isTypeLegal(RetTy, VT)) 29860b57cec5SDimitry Andric return false; 29870b57cec5SDimitry Andric 29880b57cec5SDimitry Andric static const uint16_t CvtOpc[3][2][2] = { 29890b57cec5SDimitry Andric { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr }, 29900b57cec5SDimitry Andric { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } }, 29910b57cec5SDimitry Andric { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr }, 29920b57cec5SDimitry Andric { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } }, 29930b57cec5SDimitry Andric { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr }, 29940b57cec5SDimitry Andric { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } }, 29950b57cec5SDimitry Andric }; 29960b57cec5SDimitry Andric unsigned AVXLevel = Subtarget->hasAVX512() ? 2 : 29970b57cec5SDimitry Andric Subtarget->hasAVX() ? 1 : 29980b57cec5SDimitry Andric 0; 29990b57cec5SDimitry Andric unsigned Opc; 30000b57cec5SDimitry Andric switch (VT.SimpleTy) { 30010b57cec5SDimitry Andric default: llvm_unreachable("Unexpected result type."); 30020b57cec5SDimitry Andric case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break; 30030b57cec5SDimitry Andric case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break; 30040b57cec5SDimitry Andric } 30050b57cec5SDimitry Andric 30060b57cec5SDimitry Andric // Check if we can fold insertelement instructions into the convert. 30070b57cec5SDimitry Andric const Value *Op = II->getArgOperand(0); 30080b57cec5SDimitry Andric while (auto *IE = dyn_cast<InsertElementInst>(Op)) { 30090b57cec5SDimitry Andric const Value *Index = IE->getOperand(2); 30100b57cec5SDimitry Andric if (!isa<ConstantInt>(Index)) 30110b57cec5SDimitry Andric break; 30120b57cec5SDimitry Andric unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); 30130b57cec5SDimitry Andric 30140b57cec5SDimitry Andric if (Idx == 0) { 30150b57cec5SDimitry Andric Op = IE->getOperand(1); 30160b57cec5SDimitry Andric break; 30170b57cec5SDimitry Andric } 30180b57cec5SDimitry Andric Op = IE->getOperand(0); 30190b57cec5SDimitry Andric } 30200b57cec5SDimitry Andric 30215ffd83dbSDimitry Andric Register Reg = getRegForValue(Op); 30220b57cec5SDimitry Andric if (Reg == 0) 30230b57cec5SDimitry Andric return false; 30240b57cec5SDimitry Andric 30255ffd83dbSDimitry Andric Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3026bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 30270b57cec5SDimitry Andric .addReg(Reg); 30280b57cec5SDimitry Andric 30290b57cec5SDimitry Andric updateValueMap(II, ResultReg); 30300b57cec5SDimitry Andric return true; 30310b57cec5SDimitry Andric } 303206c3fb27SDimitry Andric case Intrinsic::x86_sse42_crc32_32_8: 303306c3fb27SDimitry Andric case Intrinsic::x86_sse42_crc32_32_16: 303406c3fb27SDimitry Andric case Intrinsic::x86_sse42_crc32_32_32: 303506c3fb27SDimitry Andric case Intrinsic::x86_sse42_crc32_64_64: { 303606c3fb27SDimitry Andric if (!Subtarget->hasCRC32()) 303706c3fb27SDimitry Andric return false; 303806c3fb27SDimitry Andric 303906c3fb27SDimitry Andric Type *RetTy = II->getCalledFunction()->getReturnType(); 304006c3fb27SDimitry Andric 304106c3fb27SDimitry Andric MVT VT; 304206c3fb27SDimitry Andric if (!isTypeLegal(RetTy, VT)) 304306c3fb27SDimitry Andric return false; 304406c3fb27SDimitry Andric 304506c3fb27SDimitry Andric unsigned Opc; 304606c3fb27SDimitry Andric const TargetRegisterClass *RC = nullptr; 304706c3fb27SDimitry Andric 304806c3fb27SDimitry Andric switch (II->getIntrinsicID()) { 304906c3fb27SDimitry Andric default: 305006c3fb27SDimitry Andric llvm_unreachable("Unexpected intrinsic."); 30511db9f3b2SDimitry Andric #define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC 305206c3fb27SDimitry Andric case Intrinsic::x86_sse42_crc32_32_8: 30531db9f3b2SDimitry Andric Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8); 305406c3fb27SDimitry Andric RC = &X86::GR32RegClass; 305506c3fb27SDimitry Andric break; 305606c3fb27SDimitry Andric case Intrinsic::x86_sse42_crc32_32_16: 30571db9f3b2SDimitry Andric Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r16); 305806c3fb27SDimitry Andric RC = &X86::GR32RegClass; 305906c3fb27SDimitry Andric break; 306006c3fb27SDimitry Andric case Intrinsic::x86_sse42_crc32_32_32: 30611db9f3b2SDimitry Andric Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r32); 306206c3fb27SDimitry Andric RC = &X86::GR32RegClass; 306306c3fb27SDimitry Andric break; 306406c3fb27SDimitry Andric case Intrinsic::x86_sse42_crc32_64_64: 30651db9f3b2SDimitry Andric Opc = GET_EGPR_IF_ENABLED(X86::CRC32r64r64); 306606c3fb27SDimitry Andric RC = &X86::GR64RegClass; 306706c3fb27SDimitry Andric break; 30681db9f3b2SDimitry Andric #undef GET_EGPR_IF_ENABLED 306906c3fb27SDimitry Andric } 307006c3fb27SDimitry Andric 307106c3fb27SDimitry Andric const Value *LHS = II->getArgOperand(0); 307206c3fb27SDimitry Andric const Value *RHS = II->getArgOperand(1); 307306c3fb27SDimitry Andric 307406c3fb27SDimitry Andric Register LHSReg = getRegForValue(LHS); 307506c3fb27SDimitry Andric Register RHSReg = getRegForValue(RHS); 307606c3fb27SDimitry Andric if (!LHSReg || !RHSReg) 307706c3fb27SDimitry Andric return false; 307806c3fb27SDimitry Andric 307906c3fb27SDimitry Andric Register ResultReg = fastEmitInst_rr(Opc, RC, LHSReg, RHSReg); 308006c3fb27SDimitry Andric if (!ResultReg) 308106c3fb27SDimitry Andric return false; 308206c3fb27SDimitry Andric 308306c3fb27SDimitry Andric updateValueMap(II, ResultReg); 308406c3fb27SDimitry Andric return true; 308506c3fb27SDimitry Andric } 30860b57cec5SDimitry Andric } 30870b57cec5SDimitry Andric } 30880b57cec5SDimitry Andric 30890b57cec5SDimitry Andric bool X86FastISel::fastLowerArguments() { 30900b57cec5SDimitry Andric if (!FuncInfo.CanLowerReturn) 30910b57cec5SDimitry Andric return false; 30920b57cec5SDimitry Andric 30930b57cec5SDimitry Andric const Function *F = FuncInfo.Fn; 30940b57cec5SDimitry Andric if (F->isVarArg()) 30950b57cec5SDimitry Andric return false; 30960b57cec5SDimitry Andric 30970b57cec5SDimitry Andric CallingConv::ID CC = F->getCallingConv(); 30980b57cec5SDimitry Andric if (CC != CallingConv::C) 30990b57cec5SDimitry Andric return false; 31000b57cec5SDimitry Andric 31010b57cec5SDimitry Andric if (Subtarget->isCallingConvWin64(CC)) 31020b57cec5SDimitry Andric return false; 31030b57cec5SDimitry Andric 31040b57cec5SDimitry Andric if (!Subtarget->is64Bit()) 31050b57cec5SDimitry Andric return false; 31060b57cec5SDimitry Andric 31070b57cec5SDimitry Andric if (Subtarget->useSoftFloat()) 31080b57cec5SDimitry Andric return false; 31090b57cec5SDimitry Andric 31100b57cec5SDimitry Andric // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. 31110b57cec5SDimitry Andric unsigned GPRCnt = 0; 31120b57cec5SDimitry Andric unsigned FPRCnt = 0; 31130b57cec5SDimitry Andric for (auto const &Arg : F->args()) { 31140b57cec5SDimitry Andric if (Arg.hasAttribute(Attribute::ByVal) || 31150b57cec5SDimitry Andric Arg.hasAttribute(Attribute::InReg) || 31160b57cec5SDimitry Andric Arg.hasAttribute(Attribute::StructRet) || 31170b57cec5SDimitry Andric Arg.hasAttribute(Attribute::SwiftSelf) || 3118fe6060f1SDimitry Andric Arg.hasAttribute(Attribute::SwiftAsync) || 31190b57cec5SDimitry Andric Arg.hasAttribute(Attribute::SwiftError) || 31200b57cec5SDimitry Andric Arg.hasAttribute(Attribute::Nest)) 31210b57cec5SDimitry Andric return false; 31220b57cec5SDimitry Andric 31230b57cec5SDimitry Andric Type *ArgTy = Arg.getType(); 31240b57cec5SDimitry Andric if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) 31250b57cec5SDimitry Andric return false; 31260b57cec5SDimitry Andric 31270b57cec5SDimitry Andric EVT ArgVT = TLI.getValueType(DL, ArgTy); 31280b57cec5SDimitry Andric if (!ArgVT.isSimple()) return false; 31290b57cec5SDimitry Andric switch (ArgVT.getSimpleVT().SimpleTy) { 31300b57cec5SDimitry Andric default: return false; 31310b57cec5SDimitry Andric case MVT::i32: 31320b57cec5SDimitry Andric case MVT::i64: 31330b57cec5SDimitry Andric ++GPRCnt; 31340b57cec5SDimitry Andric break; 31350b57cec5SDimitry Andric case MVT::f32: 31360b57cec5SDimitry Andric case MVT::f64: 31370b57cec5SDimitry Andric if (!Subtarget->hasSSE1()) 31380b57cec5SDimitry Andric return false; 31390b57cec5SDimitry Andric ++FPRCnt; 31400b57cec5SDimitry Andric break; 31410b57cec5SDimitry Andric } 31420b57cec5SDimitry Andric 31430b57cec5SDimitry Andric if (GPRCnt > 6) 31440b57cec5SDimitry Andric return false; 31450b57cec5SDimitry Andric 31460b57cec5SDimitry Andric if (FPRCnt > 8) 31470b57cec5SDimitry Andric return false; 31480b57cec5SDimitry Andric } 31490b57cec5SDimitry Andric 31500b57cec5SDimitry Andric static const MCPhysReg GPR32ArgRegs[] = { 31510b57cec5SDimitry Andric X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 31520b57cec5SDimitry Andric }; 31530b57cec5SDimitry Andric static const MCPhysReg GPR64ArgRegs[] = { 31540b57cec5SDimitry Andric X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 31550b57cec5SDimitry Andric }; 31560b57cec5SDimitry Andric static const MCPhysReg XMMArgRegs[] = { 31570b57cec5SDimitry Andric X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 31580b57cec5SDimitry Andric X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 31590b57cec5SDimitry Andric }; 31600b57cec5SDimitry Andric 31610b57cec5SDimitry Andric unsigned GPRIdx = 0; 31620b57cec5SDimitry Andric unsigned FPRIdx = 0; 31630b57cec5SDimitry Andric for (auto const &Arg : F->args()) { 31640b57cec5SDimitry Andric MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 31650b57cec5SDimitry Andric const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 31660b57cec5SDimitry Andric unsigned SrcReg; 31670b57cec5SDimitry Andric switch (VT.SimpleTy) { 31680b57cec5SDimitry Andric default: llvm_unreachable("Unexpected value type."); 31690b57cec5SDimitry Andric case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break; 31700b57cec5SDimitry Andric case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break; 3171bdd1243dSDimitry Andric case MVT::f32: [[fallthrough]]; 31720b57cec5SDimitry Andric case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break; 31730b57cec5SDimitry Andric } 31745ffd83dbSDimitry Andric Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 31750b57cec5SDimitry Andric // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 31760b57cec5SDimitry Andric // Without this, EmitLiveInCopies may eliminate the livein if its only 31770b57cec5SDimitry Andric // use is a bitcast (which isn't turned into an instruction). 31785ffd83dbSDimitry Andric Register ResultReg = createResultReg(RC); 3179bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 31800b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), ResultReg) 31810b57cec5SDimitry Andric .addReg(DstReg, getKillRegState(true)); 31820b57cec5SDimitry Andric updateValueMap(&Arg, ResultReg); 31830b57cec5SDimitry Andric } 31840b57cec5SDimitry Andric return true; 31850b57cec5SDimitry Andric } 31860b57cec5SDimitry Andric 31870b57cec5SDimitry Andric static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, 31880b57cec5SDimitry Andric CallingConv::ID CC, 31895ffd83dbSDimitry Andric const CallBase *CB) { 31900b57cec5SDimitry Andric if (Subtarget->is64Bit()) 31910b57cec5SDimitry Andric return 0; 31920b57cec5SDimitry Andric if (Subtarget->getTargetTriple().isOSMSVCRT()) 31930b57cec5SDimitry Andric return 0; 31940b57cec5SDimitry Andric if (CC == CallingConv::Fast || CC == CallingConv::GHC || 3195fe6060f1SDimitry Andric CC == CallingConv::HiPE || CC == CallingConv::Tail || 3196fe6060f1SDimitry Andric CC == CallingConv::SwiftTail) 31970b57cec5SDimitry Andric return 0; 31980b57cec5SDimitry Andric 31995ffd83dbSDimitry Andric if (CB) 32005ffd83dbSDimitry Andric if (CB->arg_empty() || !CB->paramHasAttr(0, Attribute::StructRet) || 32015ffd83dbSDimitry Andric CB->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU()) 32020b57cec5SDimitry Andric return 0; 32030b57cec5SDimitry Andric 32040b57cec5SDimitry Andric return 4; 32050b57cec5SDimitry Andric } 32060b57cec5SDimitry Andric 32070b57cec5SDimitry Andric bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { 32080b57cec5SDimitry Andric auto &OutVals = CLI.OutVals; 32090b57cec5SDimitry Andric auto &OutFlags = CLI.OutFlags; 32100b57cec5SDimitry Andric auto &OutRegs = CLI.OutRegs; 32110b57cec5SDimitry Andric auto &Ins = CLI.Ins; 32120b57cec5SDimitry Andric auto &InRegs = CLI.InRegs; 32130b57cec5SDimitry Andric CallingConv::ID CC = CLI.CallConv; 32140b57cec5SDimitry Andric bool &IsTailCall = CLI.IsTailCall; 32150b57cec5SDimitry Andric bool IsVarArg = CLI.IsVarArg; 32160b57cec5SDimitry Andric const Value *Callee = CLI.Callee; 32170b57cec5SDimitry Andric MCSymbol *Symbol = CLI.Symbol; 3218fe6060f1SDimitry Andric const auto *CB = CLI.CB; 32190b57cec5SDimitry Andric 32200b57cec5SDimitry Andric bool Is64Bit = Subtarget->is64Bit(); 32210b57cec5SDimitry Andric bool IsWin64 = Subtarget->isCallingConvWin64(CC); 32220b57cec5SDimitry Andric 32230b57cec5SDimitry Andric // Call / invoke instructions with NoCfCheck attribute require special 32240b57cec5SDimitry Andric // handling. 3225fe6060f1SDimitry Andric if (CB && CB->doesNoCfCheck()) 32260b57cec5SDimitry Andric return false; 32270b57cec5SDimitry Andric 32280b57cec5SDimitry Andric // Functions with no_caller_saved_registers that need special handling. 3229fe6060f1SDimitry Andric if ((CB && isa<CallInst>(CB) && CB->hasFnAttr("no_caller_saved_registers"))) 3230fe6060f1SDimitry Andric return false; 3231fe6060f1SDimitry Andric 3232fe6060f1SDimitry Andric // Functions with no_callee_saved_registers that need special handling. 3233fe6060f1SDimitry Andric if ((CB && CB->hasFnAttr("no_callee_saved_registers"))) 32340b57cec5SDimitry Andric return false; 32350b57cec5SDimitry Andric 3236bdd1243dSDimitry Andric // Indirect calls with CFI checks need special handling. 3237bdd1243dSDimitry Andric if (CB && CB->isIndirectCall() && CB->getOperandBundle(LLVMContext::OB_kcfi)) 3238bdd1243dSDimitry Andric return false; 3239bdd1243dSDimitry Andric 32400946e70aSDimitry Andric // Functions using thunks for indirect calls need to use SDISel. 32410946e70aSDimitry Andric if (Subtarget->useIndirectThunkCalls()) 32420b57cec5SDimitry Andric return false; 32430b57cec5SDimitry Andric 32445f757f3fSDimitry Andric // Handle only C and fastcc calling conventions for now. 32450b57cec5SDimitry Andric switch (CC) { 32460b57cec5SDimitry Andric default: return false; 32470b57cec5SDimitry Andric case CallingConv::C: 32480b57cec5SDimitry Andric case CallingConv::Fast: 32498bcb0991SDimitry Andric case CallingConv::Tail: 32500b57cec5SDimitry Andric case CallingConv::Swift: 3251fe6060f1SDimitry Andric case CallingConv::SwiftTail: 32520b57cec5SDimitry Andric case CallingConv::X86_FastCall: 32530b57cec5SDimitry Andric case CallingConv::X86_StdCall: 32540b57cec5SDimitry Andric case CallingConv::X86_ThisCall: 32550b57cec5SDimitry Andric case CallingConv::Win64: 32560b57cec5SDimitry Andric case CallingConv::X86_64_SysV: 3257480093f4SDimitry Andric case CallingConv::CFGuard_Check: 32580b57cec5SDimitry Andric break; 32590b57cec5SDimitry Andric } 32600b57cec5SDimitry Andric 32610b57cec5SDimitry Andric // Allow SelectionDAG isel to handle tail calls. 32620b57cec5SDimitry Andric if (IsTailCall) 32630b57cec5SDimitry Andric return false; 32640b57cec5SDimitry Andric 32650b57cec5SDimitry Andric // fastcc with -tailcallopt is intended to provide a guaranteed 32660b57cec5SDimitry Andric // tail call optimization. Fastisel doesn't know how to do that. 32678bcb0991SDimitry Andric if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || 3268fe6060f1SDimitry Andric CC == CallingConv::Tail || CC == CallingConv::SwiftTail) 32690b57cec5SDimitry Andric return false; 32700b57cec5SDimitry Andric 32710b57cec5SDimitry Andric // Don't know how to handle Win64 varargs yet. Nothing special needed for 32720b57cec5SDimitry Andric // x86-32. Special handling for x86-64 is implemented. 32730b57cec5SDimitry Andric if (IsVarArg && IsWin64) 32740b57cec5SDimitry Andric return false; 32750b57cec5SDimitry Andric 32760b57cec5SDimitry Andric // Don't know about inalloca yet. 32775ffd83dbSDimitry Andric if (CLI.CB && CLI.CB->hasInAllocaArgument()) 32780b57cec5SDimitry Andric return false; 32790b57cec5SDimitry Andric 32800b57cec5SDimitry Andric for (auto Flag : CLI.OutFlags) 32815ffd83dbSDimitry Andric if (Flag.isSwiftError() || Flag.isPreallocated()) 32820b57cec5SDimitry Andric return false; 32830b57cec5SDimitry Andric 32840b57cec5SDimitry Andric SmallVector<MVT, 16> OutVTs; 32850b57cec5SDimitry Andric SmallVector<unsigned, 16> ArgRegs; 32860b57cec5SDimitry Andric 32870b57cec5SDimitry Andric // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra 32880b57cec5SDimitry Andric // instruction. This is safe because it is common to all FastISel supported 32890b57cec5SDimitry Andric // calling conventions on x86. 32900b57cec5SDimitry Andric for (int i = 0, e = OutVals.size(); i != e; ++i) { 32910b57cec5SDimitry Andric Value *&Val = OutVals[i]; 32920b57cec5SDimitry Andric ISD::ArgFlagsTy Flags = OutFlags[i]; 32930b57cec5SDimitry Andric if (auto *CI = dyn_cast<ConstantInt>(Val)) { 32940b57cec5SDimitry Andric if (CI->getBitWidth() < 32) { 32950b57cec5SDimitry Andric if (Flags.isSExt()) 32965f757f3fSDimitry Andric Val = ConstantInt::get(CI->getContext(), CI->getValue().sext(32)); 32970b57cec5SDimitry Andric else 32985f757f3fSDimitry Andric Val = ConstantInt::get(CI->getContext(), CI->getValue().zext(32)); 32990b57cec5SDimitry Andric } 33000b57cec5SDimitry Andric } 33010b57cec5SDimitry Andric 33020b57cec5SDimitry Andric // Passing bools around ends up doing a trunc to i1 and passing it. 33030b57cec5SDimitry Andric // Codegen this as an argument + "and 1". 33040b57cec5SDimitry Andric MVT VT; 33050b57cec5SDimitry Andric auto *TI = dyn_cast<TruncInst>(Val); 33060b57cec5SDimitry Andric unsigned ResultReg; 33075ffd83dbSDimitry Andric if (TI && TI->getType()->isIntegerTy(1) && CLI.CB && 33085ffd83dbSDimitry Andric (TI->getParent() == CLI.CB->getParent()) && TI->hasOneUse()) { 33090b57cec5SDimitry Andric Value *PrevVal = TI->getOperand(0); 33100b57cec5SDimitry Andric ResultReg = getRegForValue(PrevVal); 33110b57cec5SDimitry Andric 33120b57cec5SDimitry Andric if (!ResultReg) 33130b57cec5SDimitry Andric return false; 33140b57cec5SDimitry Andric 33150b57cec5SDimitry Andric if (!isTypeLegal(PrevVal->getType(), VT)) 33160b57cec5SDimitry Andric return false; 33170b57cec5SDimitry Andric 3318fe6060f1SDimitry Andric ResultReg = fastEmit_ri(VT, VT, ISD::AND, ResultReg, 1); 33190b57cec5SDimitry Andric } else { 33205ffd83dbSDimitry Andric if (!isTypeLegal(Val->getType(), VT) || 33215ffd83dbSDimitry Andric (VT.isVector() && VT.getVectorElementType() == MVT::i1)) 33220b57cec5SDimitry Andric return false; 33230b57cec5SDimitry Andric ResultReg = getRegForValue(Val); 33240b57cec5SDimitry Andric } 33250b57cec5SDimitry Andric 33260b57cec5SDimitry Andric if (!ResultReg) 33270b57cec5SDimitry Andric return false; 33280b57cec5SDimitry Andric 33290b57cec5SDimitry Andric ArgRegs.push_back(ResultReg); 33300b57cec5SDimitry Andric OutVTs.push_back(VT); 33310b57cec5SDimitry Andric } 33320b57cec5SDimitry Andric 33330b57cec5SDimitry Andric // Analyze operands of the call, assigning locations to each operand. 33340b57cec5SDimitry Andric SmallVector<CCValAssign, 16> ArgLocs; 33350b57cec5SDimitry Andric CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext()); 33360b57cec5SDimitry Andric 33370b57cec5SDimitry Andric // Allocate shadow area for Win64 33380b57cec5SDimitry Andric if (IsWin64) 33395ffd83dbSDimitry Andric CCInfo.AllocateStack(32, Align(8)); 33400b57cec5SDimitry Andric 33410b57cec5SDimitry Andric CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86); 33420b57cec5SDimitry Andric 33430b57cec5SDimitry Andric // Get a count of how many bytes are to be pushed on the stack. 33440b57cec5SDimitry Andric unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 33450b57cec5SDimitry Andric 33460b57cec5SDimitry Andric // Issue CALLSEQ_START 33470b57cec5SDimitry Andric unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3348bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown)) 33490b57cec5SDimitry Andric .addImm(NumBytes).addImm(0).addImm(0); 33500b57cec5SDimitry Andric 33510b57cec5SDimitry Andric // Walk the register/memloc assignments, inserting copies/loads. 33520b57cec5SDimitry Andric const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3353cb14a3feSDimitry Andric for (const CCValAssign &VA : ArgLocs) { 33540b57cec5SDimitry Andric const Value *ArgVal = OutVals[VA.getValNo()]; 33550b57cec5SDimitry Andric MVT ArgVT = OutVTs[VA.getValNo()]; 33560b57cec5SDimitry Andric 33570b57cec5SDimitry Andric if (ArgVT == MVT::x86mmx) 33580b57cec5SDimitry Andric return false; 33590b57cec5SDimitry Andric 33600b57cec5SDimitry Andric unsigned ArgReg = ArgRegs[VA.getValNo()]; 33610b57cec5SDimitry Andric 33620b57cec5SDimitry Andric // Promote the value if needed. 33630b57cec5SDimitry Andric switch (VA.getLocInfo()) { 33640b57cec5SDimitry Andric case CCValAssign::Full: break; 33650b57cec5SDimitry Andric case CCValAssign::SExt: { 33660b57cec5SDimitry Andric assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 33670b57cec5SDimitry Andric "Unexpected extend"); 33680b57cec5SDimitry Andric 33690b57cec5SDimitry Andric if (ArgVT == MVT::i1) 33700b57cec5SDimitry Andric return false; 33710b57cec5SDimitry Andric 33720b57cec5SDimitry Andric bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, 33730b57cec5SDimitry Andric ArgVT, ArgReg); 33740b57cec5SDimitry Andric assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 33750b57cec5SDimitry Andric ArgVT = VA.getLocVT(); 33760b57cec5SDimitry Andric break; 33770b57cec5SDimitry Andric } 33780b57cec5SDimitry Andric case CCValAssign::ZExt: { 33790b57cec5SDimitry Andric assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 33800b57cec5SDimitry Andric "Unexpected extend"); 33810b57cec5SDimitry Andric 33820b57cec5SDimitry Andric // Handle zero-extension from i1 to i8, which is common. 33830b57cec5SDimitry Andric if (ArgVT == MVT::i1) { 33840b57cec5SDimitry Andric // Set the high bits to zero. 3385fe6060f1SDimitry Andric ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg); 33860b57cec5SDimitry Andric ArgVT = MVT::i8; 33870b57cec5SDimitry Andric 33880b57cec5SDimitry Andric if (ArgReg == 0) 33890b57cec5SDimitry Andric return false; 33900b57cec5SDimitry Andric } 33910b57cec5SDimitry Andric 33920b57cec5SDimitry Andric bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, 33930b57cec5SDimitry Andric ArgVT, ArgReg); 33940b57cec5SDimitry Andric assert(Emitted && "Failed to emit a zext!"); (void)Emitted; 33950b57cec5SDimitry Andric ArgVT = VA.getLocVT(); 33960b57cec5SDimitry Andric break; 33970b57cec5SDimitry Andric } 33980b57cec5SDimitry Andric case CCValAssign::AExt: { 33990b57cec5SDimitry Andric assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 34000b57cec5SDimitry Andric "Unexpected extend"); 34010b57cec5SDimitry Andric bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg, 34020b57cec5SDimitry Andric ArgVT, ArgReg); 34030b57cec5SDimitry Andric if (!Emitted) 34040b57cec5SDimitry Andric Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, 34050b57cec5SDimitry Andric ArgVT, ArgReg); 34060b57cec5SDimitry Andric if (!Emitted) 34070b57cec5SDimitry Andric Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, 34080b57cec5SDimitry Andric ArgVT, ArgReg); 34090b57cec5SDimitry Andric 34100b57cec5SDimitry Andric assert(Emitted && "Failed to emit a aext!"); (void)Emitted; 34110b57cec5SDimitry Andric ArgVT = VA.getLocVT(); 34120b57cec5SDimitry Andric break; 34130b57cec5SDimitry Andric } 34140b57cec5SDimitry Andric case CCValAssign::BCvt: { 3415fe6060f1SDimitry Andric ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg); 34160b57cec5SDimitry Andric assert(ArgReg && "Failed to emit a bitcast!"); 34170b57cec5SDimitry Andric ArgVT = VA.getLocVT(); 34180b57cec5SDimitry Andric break; 34190b57cec5SDimitry Andric } 34200b57cec5SDimitry Andric case CCValAssign::VExt: 34210b57cec5SDimitry Andric // VExt has not been implemented, so this should be impossible to reach 34220b57cec5SDimitry Andric // for now. However, fallback to Selection DAG isel once implemented. 34230b57cec5SDimitry Andric return false; 34240b57cec5SDimitry Andric case CCValAssign::AExtUpper: 34250b57cec5SDimitry Andric case CCValAssign::SExtUpper: 34260b57cec5SDimitry Andric case CCValAssign::ZExtUpper: 34270b57cec5SDimitry Andric case CCValAssign::FPExt: 34288bcb0991SDimitry Andric case CCValAssign::Trunc: 34290b57cec5SDimitry Andric llvm_unreachable("Unexpected loc info!"); 34300b57cec5SDimitry Andric case CCValAssign::Indirect: 34310b57cec5SDimitry Andric // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully 34320b57cec5SDimitry Andric // support this. 34330b57cec5SDimitry Andric return false; 34340b57cec5SDimitry Andric } 34350b57cec5SDimitry Andric 34360b57cec5SDimitry Andric if (VA.isRegLoc()) { 3437bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 34380b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 34390b57cec5SDimitry Andric OutRegs.push_back(VA.getLocReg()); 34400b57cec5SDimitry Andric } else { 34415ffd83dbSDimitry Andric assert(VA.isMemLoc() && "Unknown value location!"); 34420b57cec5SDimitry Andric 34430b57cec5SDimitry Andric // Don't emit stores for undef values. 34440b57cec5SDimitry Andric if (isa<UndefValue>(ArgVal)) 34450b57cec5SDimitry Andric continue; 34460b57cec5SDimitry Andric 34470b57cec5SDimitry Andric unsigned LocMemOffset = VA.getLocMemOffset(); 34480b57cec5SDimitry Andric X86AddressMode AM; 34490b57cec5SDimitry Andric AM.Base.Reg = RegInfo->getStackRegister(); 34500b57cec5SDimitry Andric AM.Disp = LocMemOffset; 34510b57cec5SDimitry Andric ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()]; 34525ffd83dbSDimitry Andric Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 34530b57cec5SDimitry Andric MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 34540b57cec5SDimitry Andric MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset), 34550b57cec5SDimitry Andric MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 34560b57cec5SDimitry Andric if (Flags.isByVal()) { 34570b57cec5SDimitry Andric X86AddressMode SrcAM; 34580b57cec5SDimitry Andric SrcAM.Base.Reg = ArgReg; 34590b57cec5SDimitry Andric if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize())) 34600b57cec5SDimitry Andric return false; 34610b57cec5SDimitry Andric } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) { 34620b57cec5SDimitry Andric // If this is a really simple value, emit this with the Value* version 34630b57cec5SDimitry Andric // of X86FastEmitStore. If it isn't simple, we don't want to do this, 34640b57cec5SDimitry Andric // as it can cause us to reevaluate the argument. 34650b57cec5SDimitry Andric if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO)) 34660b57cec5SDimitry Andric return false; 34670b57cec5SDimitry Andric } else { 3468fe6060f1SDimitry Andric if (!X86FastEmitStore(ArgVT, ArgReg, AM, MMO)) 34690b57cec5SDimitry Andric return false; 34700b57cec5SDimitry Andric } 34710b57cec5SDimitry Andric } 34720b57cec5SDimitry Andric } 34730b57cec5SDimitry Andric 34740b57cec5SDimitry Andric // ELF / PIC requires GOT in the EBX register before function calls via PLT 34750b57cec5SDimitry Andric // GOT pointer. 34760b57cec5SDimitry Andric if (Subtarget->isPICStyleGOT()) { 34770b57cec5SDimitry Andric unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 3478bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 34790b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base); 34800b57cec5SDimitry Andric } 34810b57cec5SDimitry Andric 34820b57cec5SDimitry Andric if (Is64Bit && IsVarArg && !IsWin64) { 34830b57cec5SDimitry Andric // From AMD64 ABI document: 34840b57cec5SDimitry Andric // For calls that may call functions that use varargs or stdargs 34850b57cec5SDimitry Andric // (prototype-less calls or calls to functions containing ellipsis (...) in 34860b57cec5SDimitry Andric // the declaration) %al is used as hidden argument to specify the number 34870b57cec5SDimitry Andric // of SSE registers used. The contents of %al do not need to match exactly 34880b57cec5SDimitry Andric // the number of registers, but must be an ubound on the number of SSE 34890b57cec5SDimitry Andric // registers used and is in the range 0 - 8 inclusive. 34900b57cec5SDimitry Andric 34910b57cec5SDimitry Andric // Count the number of XMM registers allocated. 34920b57cec5SDimitry Andric static const MCPhysReg XMMArgRegs[] = { 34930b57cec5SDimitry Andric X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 34940b57cec5SDimitry Andric X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 34950b57cec5SDimitry Andric }; 34960b57cec5SDimitry Andric unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); 34970b57cec5SDimitry Andric assert((Subtarget->hasSSE1() || !NumXMMRegs) 34980b57cec5SDimitry Andric && "SSE registers cannot be used when SSE is disabled"); 3499bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV8ri), 35000b57cec5SDimitry Andric X86::AL).addImm(NumXMMRegs); 35010b57cec5SDimitry Andric } 35020b57cec5SDimitry Andric 35030b57cec5SDimitry Andric // Materialize callee address in a register. FIXME: GV address can be 35040b57cec5SDimitry Andric // handled with a CALLpcrel32 instead. 35050b57cec5SDimitry Andric X86AddressMode CalleeAM; 35060b57cec5SDimitry Andric if (!X86SelectCallAddress(Callee, CalleeAM)) 35070b57cec5SDimitry Andric return false; 35080b57cec5SDimitry Andric 35090b57cec5SDimitry Andric unsigned CalleeOp = 0; 35100b57cec5SDimitry Andric const GlobalValue *GV = nullptr; 35110b57cec5SDimitry Andric if (CalleeAM.GV != nullptr) { 35120b57cec5SDimitry Andric GV = CalleeAM.GV; 35130b57cec5SDimitry Andric } else if (CalleeAM.Base.Reg != 0) { 35140b57cec5SDimitry Andric CalleeOp = CalleeAM.Base.Reg; 35150b57cec5SDimitry Andric } else 35160b57cec5SDimitry Andric return false; 35170b57cec5SDimitry Andric 35180b57cec5SDimitry Andric // Issue the call. 35190b57cec5SDimitry Andric MachineInstrBuilder MIB; 35200b57cec5SDimitry Andric if (CalleeOp) { 35210b57cec5SDimitry Andric // Register-indirect call. 35220b57cec5SDimitry Andric unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r; 3523bdd1243dSDimitry Andric MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc)) 35240b57cec5SDimitry Andric .addReg(CalleeOp); 35250b57cec5SDimitry Andric } else { 35260b57cec5SDimitry Andric // Direct call. 35270b57cec5SDimitry Andric assert(GV && "Not a direct call"); 35280b57cec5SDimitry Andric // See if we need any target-specific flags on the GV operand. 35290b57cec5SDimitry Andric unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV); 35305f757f3fSDimitry Andric if (OpFlags == X86II::MO_PLT && !Is64Bit && 35315f757f3fSDimitry Andric TM.getRelocationModel() == Reloc::Static && isa<Function>(GV) && 35325f757f3fSDimitry Andric cast<Function>(GV)->isIntrinsic()) 35335f757f3fSDimitry Andric OpFlags = X86II::MO_NO_FLAG; 35340b57cec5SDimitry Andric 35350b57cec5SDimitry Andric // This will be a direct call, or an indirect call through memory for 35360b57cec5SDimitry Andric // NonLazyBind calls or dllimport calls. 35370b57cec5SDimitry Andric bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT || 35380b57cec5SDimitry Andric OpFlags == X86II::MO_GOTPCREL || 3539349cc55cSDimitry Andric OpFlags == X86II::MO_GOTPCREL_NORELAX || 35400b57cec5SDimitry Andric OpFlags == X86II::MO_COFFSTUB; 35410b57cec5SDimitry Andric unsigned CallOpc = NeedLoad 35420b57cec5SDimitry Andric ? (Is64Bit ? X86::CALL64m : X86::CALL32m) 35430b57cec5SDimitry Andric : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32); 35440b57cec5SDimitry Andric 3545bdd1243dSDimitry Andric MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc)); 35460b57cec5SDimitry Andric if (NeedLoad) 35470b57cec5SDimitry Andric MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0); 35480b57cec5SDimitry Andric if (Symbol) 35490b57cec5SDimitry Andric MIB.addSym(Symbol, OpFlags); 35500b57cec5SDimitry Andric else 35510b57cec5SDimitry Andric MIB.addGlobalAddress(GV, 0, OpFlags); 35520b57cec5SDimitry Andric if (NeedLoad) 35530b57cec5SDimitry Andric MIB.addReg(0); 35540b57cec5SDimitry Andric } 35550b57cec5SDimitry Andric 35560b57cec5SDimitry Andric // Add a register mask operand representing the call-preserved registers. 35570b57cec5SDimitry Andric // Proper defs for return values will be added by setPhysRegsDeadExcept(). 35580b57cec5SDimitry Andric MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 35590b57cec5SDimitry Andric 35600b57cec5SDimitry Andric // Add an implicit use GOT pointer in EBX. 35610b57cec5SDimitry Andric if (Subtarget->isPICStyleGOT()) 35620b57cec5SDimitry Andric MIB.addReg(X86::EBX, RegState::Implicit); 35630b57cec5SDimitry Andric 35640b57cec5SDimitry Andric if (Is64Bit && IsVarArg && !IsWin64) 35650b57cec5SDimitry Andric MIB.addReg(X86::AL, RegState::Implicit); 35660b57cec5SDimitry Andric 35670b57cec5SDimitry Andric // Add implicit physical register uses to the call. 35680b57cec5SDimitry Andric for (auto Reg : OutRegs) 35690b57cec5SDimitry Andric MIB.addReg(Reg, RegState::Implicit); 35700b57cec5SDimitry Andric 35710b57cec5SDimitry Andric // Issue CALLSEQ_END 35720b57cec5SDimitry Andric unsigned NumBytesForCalleeToPop = 35730b57cec5SDimitry Andric X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg, 35740b57cec5SDimitry Andric TM.Options.GuaranteedTailCallOpt) 35750b57cec5SDimitry Andric ? NumBytes // Callee pops everything. 35765ffd83dbSDimitry Andric : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CB); 35770b57cec5SDimitry Andric unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3578bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp)) 35790b57cec5SDimitry Andric .addImm(NumBytes).addImm(NumBytesForCalleeToPop); 35800b57cec5SDimitry Andric 35810b57cec5SDimitry Andric // Now handle call return values. 35820b57cec5SDimitry Andric SmallVector<CCValAssign, 16> RVLocs; 35830b57cec5SDimitry Andric CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, 35840b57cec5SDimitry Andric CLI.RetTy->getContext()); 35850b57cec5SDimitry Andric CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86); 35860b57cec5SDimitry Andric 35870b57cec5SDimitry Andric // Copy all of the result registers out of their specified physreg. 35885ffd83dbSDimitry Andric Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy); 35890b57cec5SDimitry Andric for (unsigned i = 0; i != RVLocs.size(); ++i) { 35900b57cec5SDimitry Andric CCValAssign &VA = RVLocs[i]; 35910b57cec5SDimitry Andric EVT CopyVT = VA.getValVT(); 35920b57cec5SDimitry Andric unsigned CopyReg = ResultReg + i; 35938bcb0991SDimitry Andric Register SrcReg = VA.getLocReg(); 35940b57cec5SDimitry Andric 35950b57cec5SDimitry Andric // If this is x86-64, and we disabled SSE, we can't return FP values 35960b57cec5SDimitry Andric if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && 35970b57cec5SDimitry Andric ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { 35980b57cec5SDimitry Andric report_fatal_error("SSE register return with SSE disabled"); 35990b57cec5SDimitry Andric } 36000b57cec5SDimitry Andric 36010b57cec5SDimitry Andric // If we prefer to use the value in xmm registers, copy it out as f80 and 36020b57cec5SDimitry Andric // use a truncate to move it from fp stack reg to xmm reg. 36030b57cec5SDimitry Andric if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) && 36040b57cec5SDimitry Andric isScalarFPTypeInSSEReg(VA.getValVT())) { 36050b57cec5SDimitry Andric CopyVT = MVT::f80; 36060b57cec5SDimitry Andric CopyReg = createResultReg(&X86::RFP80RegClass); 36070b57cec5SDimitry Andric } 36080b57cec5SDimitry Andric 36090b57cec5SDimitry Andric // Copy out the result. 3610bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 36110b57cec5SDimitry Andric TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg); 36120b57cec5SDimitry Andric InRegs.push_back(VA.getLocReg()); 36130b57cec5SDimitry Andric 36140b57cec5SDimitry Andric // Round the f80 to the right size, which also moves it to the appropriate 36150b57cec5SDimitry Andric // xmm register. This is accomplished by storing the f80 value in memory 36160b57cec5SDimitry Andric // and then loading it back. 36170b57cec5SDimitry Andric if (CopyVT != VA.getValVT()) { 36180b57cec5SDimitry Andric EVT ResVT = VA.getValVT(); 36190b57cec5SDimitry Andric unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 36200b57cec5SDimitry Andric unsigned MemSize = ResVT.getSizeInBits()/8; 36215ffd83dbSDimitry Andric int FI = MFI.CreateStackObject(MemSize, Align(MemSize), false); 3622bdd1243dSDimitry Andric addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 36230b57cec5SDimitry Andric TII.get(Opc)), FI) 36240b57cec5SDimitry Andric .addReg(CopyReg); 36250b57cec5SDimitry Andric Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt; 3626bdd1243dSDimitry Andric addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 36270b57cec5SDimitry Andric TII.get(Opc), ResultReg + i), FI); 36280b57cec5SDimitry Andric } 36290b57cec5SDimitry Andric } 36300b57cec5SDimitry Andric 36310b57cec5SDimitry Andric CLI.ResultReg = ResultReg; 36320b57cec5SDimitry Andric CLI.NumResultRegs = RVLocs.size(); 36330b57cec5SDimitry Andric CLI.Call = MIB; 36340b57cec5SDimitry Andric 36350b57cec5SDimitry Andric return true; 36360b57cec5SDimitry Andric } 36370b57cec5SDimitry Andric 36380b57cec5SDimitry Andric bool 36390b57cec5SDimitry Andric X86FastISel::fastSelectInstruction(const Instruction *I) { 36400b57cec5SDimitry Andric switch (I->getOpcode()) { 36410b57cec5SDimitry Andric default: break; 36420b57cec5SDimitry Andric case Instruction::Load: 36430b57cec5SDimitry Andric return X86SelectLoad(I); 36440b57cec5SDimitry Andric case Instruction::Store: 36450b57cec5SDimitry Andric return X86SelectStore(I); 36460b57cec5SDimitry Andric case Instruction::Ret: 36470b57cec5SDimitry Andric return X86SelectRet(I); 36480b57cec5SDimitry Andric case Instruction::ICmp: 36490b57cec5SDimitry Andric case Instruction::FCmp: 36500b57cec5SDimitry Andric return X86SelectCmp(I); 36510b57cec5SDimitry Andric case Instruction::ZExt: 36520b57cec5SDimitry Andric return X86SelectZExt(I); 36530b57cec5SDimitry Andric case Instruction::SExt: 36540b57cec5SDimitry Andric return X86SelectSExt(I); 36550b57cec5SDimitry Andric case Instruction::Br: 36560b57cec5SDimitry Andric return X86SelectBranch(I); 36570b57cec5SDimitry Andric case Instruction::LShr: 36580b57cec5SDimitry Andric case Instruction::AShr: 36590b57cec5SDimitry Andric case Instruction::Shl: 36600b57cec5SDimitry Andric return X86SelectShift(I); 36610b57cec5SDimitry Andric case Instruction::SDiv: 36620b57cec5SDimitry Andric case Instruction::UDiv: 36630b57cec5SDimitry Andric case Instruction::SRem: 36640b57cec5SDimitry Andric case Instruction::URem: 36650b57cec5SDimitry Andric return X86SelectDivRem(I); 36660b57cec5SDimitry Andric case Instruction::Select: 36670b57cec5SDimitry Andric return X86SelectSelect(I); 36680b57cec5SDimitry Andric case Instruction::Trunc: 36690b57cec5SDimitry Andric return X86SelectTrunc(I); 36700b57cec5SDimitry Andric case Instruction::FPExt: 36710b57cec5SDimitry Andric return X86SelectFPExt(I); 36720b57cec5SDimitry Andric case Instruction::FPTrunc: 36730b57cec5SDimitry Andric return X86SelectFPTrunc(I); 36740b57cec5SDimitry Andric case Instruction::SIToFP: 36750b57cec5SDimitry Andric return X86SelectSIToFP(I); 36760b57cec5SDimitry Andric case Instruction::UIToFP: 36770b57cec5SDimitry Andric return X86SelectUIToFP(I); 36780b57cec5SDimitry Andric case Instruction::IntToPtr: // Deliberate fall-through. 36790b57cec5SDimitry Andric case Instruction::PtrToInt: { 36800b57cec5SDimitry Andric EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); 36810b57cec5SDimitry Andric EVT DstVT = TLI.getValueType(DL, I->getType()); 36820b57cec5SDimitry Andric if (DstVT.bitsGT(SrcVT)) 36830b57cec5SDimitry Andric return X86SelectZExt(I); 36840b57cec5SDimitry Andric if (DstVT.bitsLT(SrcVT)) 36850b57cec5SDimitry Andric return X86SelectTrunc(I); 36865ffd83dbSDimitry Andric Register Reg = getRegForValue(I->getOperand(0)); 36870b57cec5SDimitry Andric if (Reg == 0) return false; 36880b57cec5SDimitry Andric updateValueMap(I, Reg); 36890b57cec5SDimitry Andric return true; 36900b57cec5SDimitry Andric } 36910b57cec5SDimitry Andric case Instruction::BitCast: { 36920b57cec5SDimitry Andric // Select SSE2/AVX bitcasts between 128/256/512 bit vector types. 36930b57cec5SDimitry Andric if (!Subtarget->hasSSE2()) 36940b57cec5SDimitry Andric return false; 36950b57cec5SDimitry Andric 36960b57cec5SDimitry Andric MVT SrcVT, DstVT; 36970b57cec5SDimitry Andric if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) || 36980b57cec5SDimitry Andric !isTypeLegal(I->getType(), DstVT)) 36990b57cec5SDimitry Andric return false; 37000b57cec5SDimitry Andric 37010b57cec5SDimitry Andric // Only allow vectors that use xmm/ymm/zmm. 37020b57cec5SDimitry Andric if (!SrcVT.isVector() || !DstVT.isVector() || 37030b57cec5SDimitry Andric SrcVT.getVectorElementType() == MVT::i1 || 37040b57cec5SDimitry Andric DstVT.getVectorElementType() == MVT::i1) 37050b57cec5SDimitry Andric return false; 37060b57cec5SDimitry Andric 37075ffd83dbSDimitry Andric Register Reg = getRegForValue(I->getOperand(0)); 37085ffd83dbSDimitry Andric if (!Reg) 37090b57cec5SDimitry Andric return false; 37100b57cec5SDimitry Andric 37115ffd83dbSDimitry Andric // Emit a reg-reg copy so we don't propagate cached known bits information 37125ffd83dbSDimitry Andric // with the wrong VT if we fall out of fast isel after selecting this. 37135ffd83dbSDimitry Andric const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); 37145ffd83dbSDimitry Andric Register ResultReg = createResultReg(DstClass); 3715bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 37165ffd83dbSDimitry Andric TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg); 37175ffd83dbSDimitry Andric 37185ffd83dbSDimitry Andric updateValueMap(I, ResultReg); 37190b57cec5SDimitry Andric return true; 37200b57cec5SDimitry Andric } 37210b57cec5SDimitry Andric } 37220b57cec5SDimitry Andric 37230b57cec5SDimitry Andric return false; 37240b57cec5SDimitry Andric } 37250b57cec5SDimitry Andric 37260b57cec5SDimitry Andric unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { 37270b57cec5SDimitry Andric if (VT > MVT::i64) 37280b57cec5SDimitry Andric return 0; 37290b57cec5SDimitry Andric 37300b57cec5SDimitry Andric uint64_t Imm = CI->getZExtValue(); 37310b57cec5SDimitry Andric if (Imm == 0) { 37325ffd83dbSDimitry Andric Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass); 37330b57cec5SDimitry Andric switch (VT.SimpleTy) { 37340b57cec5SDimitry Andric default: llvm_unreachable("Unexpected value type"); 37350b57cec5SDimitry Andric case MVT::i1: 37360b57cec5SDimitry Andric case MVT::i8: 3737fe6060f1SDimitry Andric return fastEmitInst_extractsubreg(MVT::i8, SrcReg, X86::sub_8bit); 37380b57cec5SDimitry Andric case MVT::i16: 3739fe6060f1SDimitry Andric return fastEmitInst_extractsubreg(MVT::i16, SrcReg, X86::sub_16bit); 37400b57cec5SDimitry Andric case MVT::i32: 37410b57cec5SDimitry Andric return SrcReg; 37420b57cec5SDimitry Andric case MVT::i64: { 37435ffd83dbSDimitry Andric Register ResultReg = createResultReg(&X86::GR64RegClass); 3744bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 37450b57cec5SDimitry Andric TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg) 37460b57cec5SDimitry Andric .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit); 37470b57cec5SDimitry Andric return ResultReg; 37480b57cec5SDimitry Andric } 37490b57cec5SDimitry Andric } 37500b57cec5SDimitry Andric } 37510b57cec5SDimitry Andric 37520b57cec5SDimitry Andric unsigned Opc = 0; 37530b57cec5SDimitry Andric switch (VT.SimpleTy) { 37540b57cec5SDimitry Andric default: llvm_unreachable("Unexpected value type"); 37550b57cec5SDimitry Andric case MVT::i1: 37560b57cec5SDimitry Andric VT = MVT::i8; 3757bdd1243dSDimitry Andric [[fallthrough]]; 37580b57cec5SDimitry Andric case MVT::i8: Opc = X86::MOV8ri; break; 37590b57cec5SDimitry Andric case MVT::i16: Opc = X86::MOV16ri; break; 37600b57cec5SDimitry Andric case MVT::i32: Opc = X86::MOV32ri; break; 37610b57cec5SDimitry Andric case MVT::i64: { 37620b57cec5SDimitry Andric if (isUInt<32>(Imm)) 37630b57cec5SDimitry Andric Opc = X86::MOV32ri64; 37640b57cec5SDimitry Andric else if (isInt<32>(Imm)) 37650b57cec5SDimitry Andric Opc = X86::MOV64ri32; 37660b57cec5SDimitry Andric else 37670b57cec5SDimitry Andric Opc = X86::MOV64ri; 37680b57cec5SDimitry Andric break; 37690b57cec5SDimitry Andric } 37700b57cec5SDimitry Andric } 37710b57cec5SDimitry Andric return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 37720b57cec5SDimitry Andric } 37730b57cec5SDimitry Andric 37740b57cec5SDimitry Andric unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) { 37750b57cec5SDimitry Andric if (CFP->isNullValue()) 37760b57cec5SDimitry Andric return fastMaterializeFloatZero(CFP); 37770b57cec5SDimitry Andric 37780b57cec5SDimitry Andric // Can't handle alternate code models yet. 37790b57cec5SDimitry Andric CodeModel::Model CM = TM.getCodeModel(); 37805f757f3fSDimitry Andric if (CM != CodeModel::Small && CM != CodeModel::Medium && 37815f757f3fSDimitry Andric CM != CodeModel::Large) 37820b57cec5SDimitry Andric return 0; 37830b57cec5SDimitry Andric 37840b57cec5SDimitry Andric // Get opcode and regclass of the output for the given load instruction. 37850b57cec5SDimitry Andric unsigned Opc = 0; 378681ad6265SDimitry Andric bool HasSSE1 = Subtarget->hasSSE1(); 378781ad6265SDimitry Andric bool HasSSE2 = Subtarget->hasSSE2(); 37880b57cec5SDimitry Andric bool HasAVX = Subtarget->hasAVX(); 37890b57cec5SDimitry Andric bool HasAVX512 = Subtarget->hasAVX512(); 37900b57cec5SDimitry Andric switch (VT.SimpleTy) { 37910b57cec5SDimitry Andric default: return 0; 37920b57cec5SDimitry Andric case MVT::f32: 379381ad6265SDimitry Andric Opc = HasAVX512 ? X86::VMOVSSZrm_alt 379481ad6265SDimitry Andric : HasAVX ? X86::VMOVSSrm_alt 379581ad6265SDimitry Andric : HasSSE1 ? X86::MOVSSrm_alt 379681ad6265SDimitry Andric : X86::LD_Fp32m; 37970b57cec5SDimitry Andric break; 37980b57cec5SDimitry Andric case MVT::f64: 379981ad6265SDimitry Andric Opc = HasAVX512 ? X86::VMOVSDZrm_alt 380081ad6265SDimitry Andric : HasAVX ? X86::VMOVSDrm_alt 380181ad6265SDimitry Andric : HasSSE2 ? X86::MOVSDrm_alt 380281ad6265SDimitry Andric : X86::LD_Fp64m; 38030b57cec5SDimitry Andric break; 38040b57cec5SDimitry Andric case MVT::f80: 38050b57cec5SDimitry Andric // No f80 support yet. 38060b57cec5SDimitry Andric return 0; 38070b57cec5SDimitry Andric } 38080b57cec5SDimitry Andric 38090b57cec5SDimitry Andric // MachineConstantPool wants an explicit alignment. 38105ffd83dbSDimitry Andric Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 38110b57cec5SDimitry Andric 38120b57cec5SDimitry Andric // x86-32 PIC requires a PIC base register for constant pools. 38130b57cec5SDimitry Andric unsigned PICBase = 0; 38140b57cec5SDimitry Andric unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr); 38150b57cec5SDimitry Andric if (OpFlag == X86II::MO_PIC_BASE_OFFSET) 38160b57cec5SDimitry Andric PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 38170b57cec5SDimitry Andric else if (OpFlag == X86II::MO_GOTOFF) 38180b57cec5SDimitry Andric PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 38195f757f3fSDimitry Andric else if (Subtarget->is64Bit() && TM.getCodeModel() != CodeModel::Large) 38200b57cec5SDimitry Andric PICBase = X86::RIP; 38210b57cec5SDimitry Andric 38220b57cec5SDimitry Andric // Create the load from the constant pool. 38235ffd83dbSDimitry Andric unsigned CPI = MCP.getConstantPoolIndex(CFP, Alignment); 38245ffd83dbSDimitry Andric Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy)); 38250b57cec5SDimitry Andric 38265ffd83dbSDimitry Andric // Large code model only applies to 64-bit mode. 38275ffd83dbSDimitry Andric if (Subtarget->is64Bit() && CM == CodeModel::Large) { 38285ffd83dbSDimitry Andric Register AddrReg = createResultReg(&X86::GR64RegClass); 3829bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV64ri), 38300b57cec5SDimitry Andric AddrReg) 38310b57cec5SDimitry Andric .addConstantPoolIndex(CPI, 0, OpFlag); 3832bdd1243dSDimitry Andric MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 38330b57cec5SDimitry Andric TII.get(Opc), ResultReg); 3834e8d8bef9SDimitry Andric addRegReg(MIB, AddrReg, false, PICBase, false); 38350b57cec5SDimitry Andric MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 38360b57cec5SDimitry Andric MachinePointerInfo::getConstantPool(*FuncInfo.MF), 38375ffd83dbSDimitry Andric MachineMemOperand::MOLoad, DL.getPointerSize(), Alignment); 38380b57cec5SDimitry Andric MIB->addMemOperand(*FuncInfo.MF, MMO); 38390b57cec5SDimitry Andric return ResultReg; 38400b57cec5SDimitry Andric } 38410b57cec5SDimitry Andric 3842bdd1243dSDimitry Andric addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 38430b57cec5SDimitry Andric TII.get(Opc), ResultReg), 38440b57cec5SDimitry Andric CPI, PICBase, OpFlag); 38450b57cec5SDimitry Andric return ResultReg; 38460b57cec5SDimitry Andric } 38470b57cec5SDimitry Andric 38480b57cec5SDimitry Andric unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) { 38495f757f3fSDimitry Andric // Can't handle large GlobalValues yet. 38505f757f3fSDimitry Andric if (TM.getCodeModel() != CodeModel::Small && 38515f757f3fSDimitry Andric TM.getCodeModel() != CodeModel::Medium) 38525f757f3fSDimitry Andric return 0; 38535f757f3fSDimitry Andric if (TM.isLargeGlobalValue(GV)) 38540b57cec5SDimitry Andric return 0; 38550b57cec5SDimitry Andric 38560b57cec5SDimitry Andric // Materialize addresses with LEA/MOV instructions. 38570b57cec5SDimitry Andric X86AddressMode AM; 38580b57cec5SDimitry Andric if (X86SelectAddress(GV, AM)) { 38590b57cec5SDimitry Andric // If the expression is just a basereg, then we're done, otherwise we need 38600b57cec5SDimitry Andric // to emit an LEA. 38610b57cec5SDimitry Andric if (AM.BaseType == X86AddressMode::RegBase && 38620b57cec5SDimitry Andric AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr) 38630b57cec5SDimitry Andric return AM.Base.Reg; 38640b57cec5SDimitry Andric 38655ffd83dbSDimitry Andric Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 38660b57cec5SDimitry Andric if (TM.getRelocationModel() == Reloc::Static && 38670b57cec5SDimitry Andric TLI.getPointerTy(DL) == MVT::i64) { 38680b57cec5SDimitry Andric // The displacement code could be more than 32 bits away so we need to use 38690b57cec5SDimitry Andric // an instruction with a 64 bit immediate 3870bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV64ri), 38710b57cec5SDimitry Andric ResultReg) 38720b57cec5SDimitry Andric .addGlobalAddress(GV); 38730b57cec5SDimitry Andric } else { 38740b57cec5SDimitry Andric unsigned Opc = 38750b57cec5SDimitry Andric TLI.getPointerTy(DL) == MVT::i32 38760b57cec5SDimitry Andric ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r) 38770b57cec5SDimitry Andric : X86::LEA64r; 3878bdd1243dSDimitry Andric addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 38790b57cec5SDimitry Andric TII.get(Opc), ResultReg), AM); 38800b57cec5SDimitry Andric } 38810b57cec5SDimitry Andric return ResultReg; 38820b57cec5SDimitry Andric } 38830b57cec5SDimitry Andric return 0; 38840b57cec5SDimitry Andric } 38850b57cec5SDimitry Andric 38860b57cec5SDimitry Andric unsigned X86FastISel::fastMaterializeConstant(const Constant *C) { 38870b57cec5SDimitry Andric EVT CEVT = TLI.getValueType(DL, C->getType(), true); 38880b57cec5SDimitry Andric 38890b57cec5SDimitry Andric // Only handle simple types. 38900b57cec5SDimitry Andric if (!CEVT.isSimple()) 38910b57cec5SDimitry Andric return 0; 38920b57cec5SDimitry Andric MVT VT = CEVT.getSimpleVT(); 38930b57cec5SDimitry Andric 38940b57cec5SDimitry Andric if (const auto *CI = dyn_cast<ConstantInt>(C)) 38950b57cec5SDimitry Andric return X86MaterializeInt(CI, VT); 3896349cc55cSDimitry Andric if (const auto *CFP = dyn_cast<ConstantFP>(C)) 38970b57cec5SDimitry Andric return X86MaterializeFP(CFP, VT); 3898349cc55cSDimitry Andric if (const auto *GV = dyn_cast<GlobalValue>(C)) 38990b57cec5SDimitry Andric return X86MaterializeGV(GV, VT); 3900349cc55cSDimitry Andric if (isa<UndefValue>(C)) { 3901fe6060f1SDimitry Andric unsigned Opc = 0; 3902fe6060f1SDimitry Andric switch (VT.SimpleTy) { 3903fe6060f1SDimitry Andric default: 3904fe6060f1SDimitry Andric break; 3905fe6060f1SDimitry Andric case MVT::f32: 390681ad6265SDimitry Andric if (!Subtarget->hasSSE1()) 3907fe6060f1SDimitry Andric Opc = X86::LD_Fp032; 3908fe6060f1SDimitry Andric break; 3909fe6060f1SDimitry Andric case MVT::f64: 391081ad6265SDimitry Andric if (!Subtarget->hasSSE2()) 3911fe6060f1SDimitry Andric Opc = X86::LD_Fp064; 3912fe6060f1SDimitry Andric break; 3913fe6060f1SDimitry Andric case MVT::f80: 3914fe6060f1SDimitry Andric Opc = X86::LD_Fp080; 3915fe6060f1SDimitry Andric break; 3916fe6060f1SDimitry Andric } 3917fe6060f1SDimitry Andric 3918fe6060f1SDimitry Andric if (Opc) { 3919fe6060f1SDimitry Andric Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3920bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), 3921fe6060f1SDimitry Andric ResultReg); 3922fe6060f1SDimitry Andric return ResultReg; 3923fe6060f1SDimitry Andric } 3924fe6060f1SDimitry Andric } 39250b57cec5SDimitry Andric 39260b57cec5SDimitry Andric return 0; 39270b57cec5SDimitry Andric } 39280b57cec5SDimitry Andric 39290b57cec5SDimitry Andric unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) { 39300b57cec5SDimitry Andric // Fail on dynamic allocas. At this point, getRegForValue has already 39310b57cec5SDimitry Andric // checked its CSE maps, so if we're here trying to handle a dynamic 39320b57cec5SDimitry Andric // alloca, we're not going to succeed. X86SelectAddress has a 39330b57cec5SDimitry Andric // check for dynamic allocas, because it's called directly from 39340b57cec5SDimitry Andric // various places, but targetMaterializeAlloca also needs a check 39350b57cec5SDimitry Andric // in order to avoid recursion between getRegForValue, 39360b57cec5SDimitry Andric // X86SelectAddrss, and targetMaterializeAlloca. 39370b57cec5SDimitry Andric if (!FuncInfo.StaticAllocaMap.count(C)) 39380b57cec5SDimitry Andric return 0; 39390b57cec5SDimitry Andric assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?"); 39400b57cec5SDimitry Andric 39410b57cec5SDimitry Andric X86AddressMode AM; 39420b57cec5SDimitry Andric if (!X86SelectAddress(C, AM)) 39430b57cec5SDimitry Andric return 0; 39440b57cec5SDimitry Andric unsigned Opc = 39450b57cec5SDimitry Andric TLI.getPointerTy(DL) == MVT::i32 39460b57cec5SDimitry Andric ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r) 39470b57cec5SDimitry Andric : X86::LEA64r; 39480b57cec5SDimitry Andric const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); 39495ffd83dbSDimitry Andric Register ResultReg = createResultReg(RC); 3950bdd1243dSDimitry Andric addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 39510b57cec5SDimitry Andric TII.get(Opc), ResultReg), AM); 39520b57cec5SDimitry Andric return ResultReg; 39530b57cec5SDimitry Andric } 39540b57cec5SDimitry Andric 39550b57cec5SDimitry Andric unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) { 39560b57cec5SDimitry Andric MVT VT; 39570b57cec5SDimitry Andric if (!isTypeLegal(CF->getType(), VT)) 39580b57cec5SDimitry Andric return 0; 39590b57cec5SDimitry Andric 39600b57cec5SDimitry Andric // Get opcode and regclass for the given zero. 396181ad6265SDimitry Andric bool HasSSE1 = Subtarget->hasSSE1(); 396281ad6265SDimitry Andric bool HasSSE2 = Subtarget->hasSSE2(); 39630b57cec5SDimitry Andric bool HasAVX512 = Subtarget->hasAVX512(); 39640b57cec5SDimitry Andric unsigned Opc = 0; 39650b57cec5SDimitry Andric switch (VT.SimpleTy) { 39660b57cec5SDimitry Andric default: return 0; 396781ad6265SDimitry Andric case MVT::f16: 396881ad6265SDimitry Andric Opc = HasAVX512 ? X86::AVX512_FsFLD0SH : X86::FsFLD0SH; 396981ad6265SDimitry Andric break; 39700b57cec5SDimitry Andric case MVT::f32: 397181ad6265SDimitry Andric Opc = HasAVX512 ? X86::AVX512_FsFLD0SS 397281ad6265SDimitry Andric : HasSSE1 ? X86::FsFLD0SS 397381ad6265SDimitry Andric : X86::LD_Fp032; 39740b57cec5SDimitry Andric break; 39750b57cec5SDimitry Andric case MVT::f64: 397681ad6265SDimitry Andric Opc = HasAVX512 ? X86::AVX512_FsFLD0SD 397781ad6265SDimitry Andric : HasSSE2 ? X86::FsFLD0SD 397881ad6265SDimitry Andric : X86::LD_Fp064; 39790b57cec5SDimitry Andric break; 39800b57cec5SDimitry Andric case MVT::f80: 39810b57cec5SDimitry Andric // No f80 support yet. 39820b57cec5SDimitry Andric return 0; 39830b57cec5SDimitry Andric } 39840b57cec5SDimitry Andric 39855ffd83dbSDimitry Andric Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3986bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg); 39870b57cec5SDimitry Andric return ResultReg; 39880b57cec5SDimitry Andric } 39890b57cec5SDimitry Andric 39900b57cec5SDimitry Andric 39910b57cec5SDimitry Andric bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, 39920b57cec5SDimitry Andric const LoadInst *LI) { 39930b57cec5SDimitry Andric const Value *Ptr = LI->getPointerOperand(); 39940b57cec5SDimitry Andric X86AddressMode AM; 39950b57cec5SDimitry Andric if (!X86SelectAddress(Ptr, AM)) 39960b57cec5SDimitry Andric return false; 39970b57cec5SDimitry Andric 39980b57cec5SDimitry Andric const X86InstrInfo &XII = (const X86InstrInfo &)TII; 39990b57cec5SDimitry Andric 40000b57cec5SDimitry Andric unsigned Size = DL.getTypeAllocSize(LI->getType()); 40010b57cec5SDimitry Andric 40020b57cec5SDimitry Andric SmallVector<MachineOperand, 8> AddrOps; 40030b57cec5SDimitry Andric AM.getFullAddress(AddrOps); 40040b57cec5SDimitry Andric 40050b57cec5SDimitry Andric MachineInstr *Result = XII.foldMemoryOperandImpl( 40065ffd83dbSDimitry Andric *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(), 40070b57cec5SDimitry Andric /*AllowCommute=*/true); 40080b57cec5SDimitry Andric if (!Result) 40090b57cec5SDimitry Andric return false; 40100b57cec5SDimitry Andric 40110b57cec5SDimitry Andric // The index register could be in the wrong register class. Unfortunately, 40120b57cec5SDimitry Andric // foldMemoryOperandImpl could have commuted the instruction so its not enough 40130b57cec5SDimitry Andric // to just look at OpNo + the offset to the index reg. We actually need to 40140b57cec5SDimitry Andric // scan the instruction to find the index reg and see if its the correct reg 40150b57cec5SDimitry Andric // class. 40160b57cec5SDimitry Andric unsigned OperandNo = 0; 40170b57cec5SDimitry Andric for (MachineInstr::mop_iterator I = Result->operands_begin(), 40180b57cec5SDimitry Andric E = Result->operands_end(); I != E; ++I, ++OperandNo) { 40190b57cec5SDimitry Andric MachineOperand &MO = *I; 40200b57cec5SDimitry Andric if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg) 40210b57cec5SDimitry Andric continue; 40220b57cec5SDimitry Andric // Found the index reg, now try to rewrite it. 40235ffd83dbSDimitry Andric Register IndexReg = constrainOperandRegClass(Result->getDesc(), 40240b57cec5SDimitry Andric MO.getReg(), OperandNo); 40250b57cec5SDimitry Andric if (IndexReg == MO.getReg()) 40260b57cec5SDimitry Andric continue; 40270b57cec5SDimitry Andric MO.setReg(IndexReg); 40280b57cec5SDimitry Andric } 40290b57cec5SDimitry Andric 40300b57cec5SDimitry Andric Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); 40310b57cec5SDimitry Andric Result->cloneInstrSymbols(*FuncInfo.MF, *MI); 40320b57cec5SDimitry Andric MachineBasicBlock::iterator I(MI); 40330b57cec5SDimitry Andric removeDeadCode(I, std::next(I)); 40340b57cec5SDimitry Andric return true; 40350b57cec5SDimitry Andric } 40360b57cec5SDimitry Andric 40370b57cec5SDimitry Andric unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode, 40380b57cec5SDimitry Andric const TargetRegisterClass *RC, 4039fe6060f1SDimitry Andric unsigned Op0, unsigned Op1, 4040fe6060f1SDimitry Andric unsigned Op2, unsigned Op3) { 40410b57cec5SDimitry Andric const MCInstrDesc &II = TII.get(MachineInstOpcode); 40420b57cec5SDimitry Andric 40435ffd83dbSDimitry Andric Register ResultReg = createResultReg(RC); 40440b57cec5SDimitry Andric Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); 40450b57cec5SDimitry Andric Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); 40460b57cec5SDimitry Andric Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); 40470b57cec5SDimitry Andric Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3); 40480b57cec5SDimitry Andric 40490b57cec5SDimitry Andric if (II.getNumDefs() >= 1) 4050bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 4051fe6060f1SDimitry Andric .addReg(Op0) 4052fe6060f1SDimitry Andric .addReg(Op1) 4053fe6060f1SDimitry Andric .addReg(Op2) 4054fe6060f1SDimitry Andric .addReg(Op3); 40550b57cec5SDimitry Andric else { 4056bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 4057fe6060f1SDimitry Andric .addReg(Op0) 4058fe6060f1SDimitry Andric .addReg(Op1) 4059fe6060f1SDimitry Andric .addReg(Op2) 4060fe6060f1SDimitry Andric .addReg(Op3); 4061bdd1243dSDimitry Andric BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 4062bdd1243dSDimitry Andric ResultReg) 4063bdd1243dSDimitry Andric .addReg(II.implicit_defs()[0]); 40640b57cec5SDimitry Andric } 40650b57cec5SDimitry Andric return ResultReg; 40660b57cec5SDimitry Andric } 40670b57cec5SDimitry Andric 40680b57cec5SDimitry Andric 40690b57cec5SDimitry Andric namespace llvm { 40700b57cec5SDimitry Andric FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, 40710b57cec5SDimitry Andric const TargetLibraryInfo *libInfo) { 40720b57cec5SDimitry Andric return new X86FastISel(funcInfo, libInfo); 40730b57cec5SDimitry Andric } 40740b57cec5SDimitry Andric } 4075