xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp (revision 5deeebd8c6ca991269e72902a7a62cada57947f6)
10b57cec5SDimitry Andric //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines the X86-specific support for the FastISel class. Much
100b57cec5SDimitry Andric // of the target-specific code is generated by tablegen in the file
110b57cec5SDimitry Andric // X86GenFastISel.inc, which is #included here.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "X86.h"
160b57cec5SDimitry Andric #include "X86CallingConv.h"
170b57cec5SDimitry Andric #include "X86InstrBuilder.h"
180b57cec5SDimitry Andric #include "X86InstrInfo.h"
190b57cec5SDimitry Andric #include "X86MachineFunctionInfo.h"
200b57cec5SDimitry Andric #include "X86RegisterInfo.h"
210b57cec5SDimitry Andric #include "X86Subtarget.h"
220b57cec5SDimitry Andric #include "X86TargetMachine.h"
230b57cec5SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h"
240b57cec5SDimitry Andric #include "llvm/CodeGen/FastISel.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/FunctionLoweringInfo.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
290b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h"
300b57cec5SDimitry Andric #include "llvm/IR/DebugInfo.h"
310b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
320b57cec5SDimitry Andric #include "llvm/IR/GetElementPtrTypeIterator.h"
330b57cec5SDimitry Andric #include "llvm/IR/GlobalAlias.h"
340b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h"
350b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
360b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
37480093f4SDimitry Andric #include "llvm/IR/IntrinsicsX86.h"
380b57cec5SDimitry Andric #include "llvm/IR/Operator.h"
390b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h"
400b57cec5SDimitry Andric #include "llvm/MC/MCSymbol.h"
410b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
420b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h"
430b57cec5SDimitry Andric using namespace llvm;
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric namespace {
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric class X86FastISel final : public FastISel {
480b57cec5SDimitry Andric   /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
490b57cec5SDimitry Andric   /// make the right decision when generating code for different targets.
500b57cec5SDimitry Andric   const X86Subtarget *Subtarget;
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric public:
530b57cec5SDimitry Andric   explicit X86FastISel(FunctionLoweringInfo &funcInfo,
540b57cec5SDimitry Andric                        const TargetLibraryInfo *libInfo)
550b57cec5SDimitry Andric       : FastISel(funcInfo, libInfo) {
560b57cec5SDimitry Andric     Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
570b57cec5SDimitry Andric   }
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   bool fastSelectInstruction(const Instruction *I) override;
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric   /// The specified machine instr operand is a vreg, and that
620b57cec5SDimitry Andric   /// vreg is being provided by the specified load instruction.  If possible,
630b57cec5SDimitry Andric   /// try to fold the load as an operand to the instruction, returning true if
640b57cec5SDimitry Andric   /// possible.
650b57cec5SDimitry Andric   bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
660b57cec5SDimitry Andric                            const LoadInst *LI) override;
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   bool fastLowerArguments() override;
690b57cec5SDimitry Andric   bool fastLowerCall(CallLoweringInfo &CLI) override;
700b57cec5SDimitry Andric   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric #include "X86GenFastISel.inc"
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric private:
750b57cec5SDimitry Andric   bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
760b57cec5SDimitry Andric                           const DebugLoc &DL);
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
790b57cec5SDimitry Andric                        unsigned &ResultReg, unsigned Alignment = 1);
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric   bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
820b57cec5SDimitry Andric                         MachineMemOperand *MMO = nullptr, bool Aligned = false);
83fe6060f1SDimitry Andric   bool X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
840b57cec5SDimitry Andric                         MachineMemOperand *MMO = nullptr, bool Aligned = false);
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric   bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
870b57cec5SDimitry Andric                          unsigned &ResultReg);
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric   bool X86SelectAddress(const Value *V, X86AddressMode &AM);
900b57cec5SDimitry Andric   bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric   bool X86SelectLoad(const Instruction *I);
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric   bool X86SelectStore(const Instruction *I);
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric   bool X86SelectRet(const Instruction *I);
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric   bool X86SelectCmp(const Instruction *I);
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric   bool X86SelectZExt(const Instruction *I);
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric   bool X86SelectSExt(const Instruction *I);
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric   bool X86SelectBranch(const Instruction *I);
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric   bool X86SelectShift(const Instruction *I);
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric   bool X86SelectDivRem(const Instruction *I);
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric   bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
1110b57cec5SDimitry Andric 
1120b57cec5SDimitry Andric   bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric   bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
1150b57cec5SDimitry Andric 
1160b57cec5SDimitry Andric   bool X86SelectSelect(const Instruction *I);
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric   bool X86SelectTrunc(const Instruction *I);
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric   bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
1210b57cec5SDimitry Andric                                const TargetRegisterClass *RC);
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric   bool X86SelectFPExt(const Instruction *I);
1240b57cec5SDimitry Andric   bool X86SelectFPTrunc(const Instruction *I);
1250b57cec5SDimitry Andric   bool X86SelectSIToFP(const Instruction *I);
1260b57cec5SDimitry Andric   bool X86SelectUIToFP(const Instruction *I);
1270b57cec5SDimitry Andric   bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric   const X86InstrInfo *getInstrInfo() const {
1300b57cec5SDimitry Andric     return Subtarget->getInstrInfo();
1310b57cec5SDimitry Andric   }
1320b57cec5SDimitry Andric   const X86TargetMachine *getTargetMachine() const {
1330b57cec5SDimitry Andric     return static_cast<const X86TargetMachine *>(&TM);
1340b57cec5SDimitry Andric   }
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric   bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
1390b57cec5SDimitry Andric   unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
1400b57cec5SDimitry Andric   unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
1410b57cec5SDimitry Andric   unsigned fastMaterializeConstant(const Constant *C) override;
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric   unsigned fastMaterializeAlloca(const AllocaInst *C) override;
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric   unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric   /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
1480b57cec5SDimitry Andric   /// computed in an SSE register, not on the X87 floating point stack.
1490b57cec5SDimitry Andric   bool isScalarFPTypeInSSEReg(EVT VT) const {
15081ad6265SDimitry Andric     return (VT == MVT::f64 && Subtarget->hasSSE2()) ||
15181ad6265SDimitry Andric            (VT == MVT::f32 && Subtarget->hasSSE1()) || VT == MVT::f16;
1520b57cec5SDimitry Andric   }
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric   bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric   bool IsMemcpySmall(uint64_t Len);
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric   bool TryEmitSmallMemcpy(X86AddressMode DestAM,
1590b57cec5SDimitry Andric                           X86AddressMode SrcAM, uint64_t Len);
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
1620b57cec5SDimitry Andric                             const Value *Cond);
1630b57cec5SDimitry Andric 
1640b57cec5SDimitry Andric   const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
1650b57cec5SDimitry Andric                                             X86AddressMode &AM);
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric   unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
1680b57cec5SDimitry Andric                              const TargetRegisterClass *RC, unsigned Op0,
169fe6060f1SDimitry Andric                              unsigned Op1, unsigned Op2, unsigned Op3);
1700b57cec5SDimitry Andric };
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric } // end anonymous namespace.
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric static std::pair<unsigned, bool>
1750b57cec5SDimitry Andric getX86SSEConditionCode(CmpInst::Predicate Predicate) {
1760b57cec5SDimitry Andric   unsigned CC;
1770b57cec5SDimitry Andric   bool NeedSwap = false;
1780b57cec5SDimitry Andric 
1790b57cec5SDimitry Andric   // SSE Condition code mapping:
1800b57cec5SDimitry Andric   //  0 - EQ
1810b57cec5SDimitry Andric   //  1 - LT
1820b57cec5SDimitry Andric   //  2 - LE
1830b57cec5SDimitry Andric   //  3 - UNORD
1840b57cec5SDimitry Andric   //  4 - NEQ
1850b57cec5SDimitry Andric   //  5 - NLT
1860b57cec5SDimitry Andric   //  6 - NLE
1870b57cec5SDimitry Andric   //  7 - ORD
1880b57cec5SDimitry Andric   switch (Predicate) {
1890b57cec5SDimitry Andric   default: llvm_unreachable("Unexpected predicate");
1900b57cec5SDimitry Andric   case CmpInst::FCMP_OEQ: CC = 0;          break;
191bdd1243dSDimitry Andric   case CmpInst::FCMP_OGT: NeedSwap = true; [[fallthrough]];
1920b57cec5SDimitry Andric   case CmpInst::FCMP_OLT: CC = 1;          break;
193bdd1243dSDimitry Andric   case CmpInst::FCMP_OGE: NeedSwap = true; [[fallthrough]];
1940b57cec5SDimitry Andric   case CmpInst::FCMP_OLE: CC = 2;          break;
1950b57cec5SDimitry Andric   case CmpInst::FCMP_UNO: CC = 3;          break;
1960b57cec5SDimitry Andric   case CmpInst::FCMP_UNE: CC = 4;          break;
197bdd1243dSDimitry Andric   case CmpInst::FCMP_ULE: NeedSwap = true; [[fallthrough]];
1980b57cec5SDimitry Andric   case CmpInst::FCMP_UGE: CC = 5;          break;
199bdd1243dSDimitry Andric   case CmpInst::FCMP_ULT: NeedSwap = true; [[fallthrough]];
2000b57cec5SDimitry Andric   case CmpInst::FCMP_UGT: CC = 6;          break;
2010b57cec5SDimitry Andric   case CmpInst::FCMP_ORD: CC = 7;          break;
2020b57cec5SDimitry Andric   case CmpInst::FCMP_UEQ: CC = 8;          break;
2030b57cec5SDimitry Andric   case CmpInst::FCMP_ONE: CC = 12;         break;
2040b57cec5SDimitry Andric   }
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric   return std::make_pair(CC, NeedSwap);
2070b57cec5SDimitry Andric }
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric /// Adds a complex addressing mode to the given machine instr builder.
2100b57cec5SDimitry Andric /// Note, this will constrain the index register.  If its not possible to
2110b57cec5SDimitry Andric /// constrain the given index register, then a new one will be created.  The
2120b57cec5SDimitry Andric /// IndexReg field of the addressing mode will be updated to match in this case.
2130b57cec5SDimitry Andric const MachineInstrBuilder &
2140b57cec5SDimitry Andric X86FastISel::addFullAddress(const MachineInstrBuilder &MIB,
2150b57cec5SDimitry Andric                             X86AddressMode &AM) {
2160b57cec5SDimitry Andric   // First constrain the index register.  It needs to be a GR64_NOSP.
2170b57cec5SDimitry Andric   AM.IndexReg = constrainOperandRegClass(MIB->getDesc(), AM.IndexReg,
2180b57cec5SDimitry Andric                                          MIB->getNumOperands() +
2190b57cec5SDimitry Andric                                          X86::AddrIndexReg);
2200b57cec5SDimitry Andric   return ::addFullAddress(MIB, AM);
2210b57cec5SDimitry Andric }
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric /// Check if it is possible to fold the condition from the XALU intrinsic
2240b57cec5SDimitry Andric /// into the user. The condition code will only be updated on success.
2250b57cec5SDimitry Andric bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
2260b57cec5SDimitry Andric                                        const Value *Cond) {
2270b57cec5SDimitry Andric   if (!isa<ExtractValueInst>(Cond))
2280b57cec5SDimitry Andric     return false;
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   const auto *EV = cast<ExtractValueInst>(Cond);
2310b57cec5SDimitry Andric   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
2320b57cec5SDimitry Andric     return false;
2330b57cec5SDimitry Andric 
2340b57cec5SDimitry Andric   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
2350b57cec5SDimitry Andric   MVT RetVT;
2360b57cec5SDimitry Andric   const Function *Callee = II->getCalledFunction();
2370b57cec5SDimitry Andric   Type *RetTy =
2380b57cec5SDimitry Andric     cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
2390b57cec5SDimitry Andric   if (!isTypeLegal(RetTy, RetVT))
2400b57cec5SDimitry Andric     return false;
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric   if (RetVT != MVT::i32 && RetVT != MVT::i64)
2430b57cec5SDimitry Andric     return false;
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric   X86::CondCode TmpCC;
2460b57cec5SDimitry Andric   switch (II->getIntrinsicID()) {
2470b57cec5SDimitry Andric   default: return false;
2480b57cec5SDimitry Andric   case Intrinsic::sadd_with_overflow:
2490b57cec5SDimitry Andric   case Intrinsic::ssub_with_overflow:
2500b57cec5SDimitry Andric   case Intrinsic::smul_with_overflow:
2510b57cec5SDimitry Andric   case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
2520b57cec5SDimitry Andric   case Intrinsic::uadd_with_overflow:
2530b57cec5SDimitry Andric   case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
2540b57cec5SDimitry Andric   }
2550b57cec5SDimitry Andric 
2560b57cec5SDimitry Andric   // Check if both instructions are in the same basic block.
2570b57cec5SDimitry Andric   if (II->getParent() != I->getParent())
2580b57cec5SDimitry Andric     return false;
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric   // Make sure nothing is in the way
2610b57cec5SDimitry Andric   BasicBlock::const_iterator Start(I);
2620b57cec5SDimitry Andric   BasicBlock::const_iterator End(II);
2630b57cec5SDimitry Andric   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
2640b57cec5SDimitry Andric     // We only expect extractvalue instructions between the intrinsic and the
2650b57cec5SDimitry Andric     // instruction to be selected.
2660b57cec5SDimitry Andric     if (!isa<ExtractValueInst>(Itr))
2670b57cec5SDimitry Andric       return false;
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric     // Check that the extractvalue operand comes from the intrinsic.
2700b57cec5SDimitry Andric     const auto *EVI = cast<ExtractValueInst>(Itr);
2710b57cec5SDimitry Andric     if (EVI->getAggregateOperand() != II)
2720b57cec5SDimitry Andric       return false;
2730b57cec5SDimitry Andric   }
2740b57cec5SDimitry Andric 
275d409305fSDimitry Andric   // Make sure no potentially eflags clobbering phi moves can be inserted in
276d409305fSDimitry Andric   // between.
277bdd1243dSDimitry Andric   auto HasPhis = [](const BasicBlock *Succ) { return !Succ->phis().empty(); };
278d409305fSDimitry Andric   if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
279d409305fSDimitry Andric     return false;
280d409305fSDimitry Andric 
28181ad6265SDimitry Andric   // Make sure there are no potentially eflags clobbering constant
28281ad6265SDimitry Andric   // materializations in between.
28381ad6265SDimitry Andric   if (llvm::any_of(I->operands(), [](Value *V) { return isa<Constant>(V); }))
28481ad6265SDimitry Andric     return false;
28581ad6265SDimitry Andric 
2860b57cec5SDimitry Andric   CC = TmpCC;
2870b57cec5SDimitry Andric   return true;
2880b57cec5SDimitry Andric }
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
2910b57cec5SDimitry Andric   EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
2920b57cec5SDimitry Andric   if (evt == MVT::Other || !evt.isSimple())
2930b57cec5SDimitry Andric     // Unhandled type. Halt "fast" selection and bail.
2940b57cec5SDimitry Andric     return false;
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric   VT = evt.getSimpleVT();
2970b57cec5SDimitry Andric   // For now, require SSE/SSE2 for performing floating-point operations,
2980b57cec5SDimitry Andric   // since x87 requires additional work.
29981ad6265SDimitry Andric   if (VT == MVT::f64 && !Subtarget->hasSSE2())
3000b57cec5SDimitry Andric     return false;
30181ad6265SDimitry Andric   if (VT == MVT::f32 && !Subtarget->hasSSE1())
3020b57cec5SDimitry Andric     return false;
3030b57cec5SDimitry Andric   // Similarly, no f80 support yet.
3040b57cec5SDimitry Andric   if (VT == MVT::f80)
3050b57cec5SDimitry Andric     return false;
3060b57cec5SDimitry Andric   // We only handle legal types. For example, on x86-32 the instruction
3070b57cec5SDimitry Andric   // selector contains all of the 64-bit instructions from x86-64,
3080b57cec5SDimitry Andric   // under the assumption that i64 won't be used if the target doesn't
3090b57cec5SDimitry Andric   // support it.
3100b57cec5SDimitry Andric   return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
3110b57cec5SDimitry Andric }
3120b57cec5SDimitry Andric 
3130b57cec5SDimitry Andric /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
3140b57cec5SDimitry Andric /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
3150b57cec5SDimitry Andric /// Return true and the result register by reference if it is possible.
3160b57cec5SDimitry Andric bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
3170b57cec5SDimitry Andric                                   MachineMemOperand *MMO, unsigned &ResultReg,
3180b57cec5SDimitry Andric                                   unsigned Alignment) {
31981ad6265SDimitry Andric   bool HasSSE1 = Subtarget->hasSSE1();
32081ad6265SDimitry Andric   bool HasSSE2 = Subtarget->hasSSE2();
3210b57cec5SDimitry Andric   bool HasSSE41 = Subtarget->hasSSE41();
3220b57cec5SDimitry Andric   bool HasAVX = Subtarget->hasAVX();
3230b57cec5SDimitry Andric   bool HasAVX2 = Subtarget->hasAVX2();
3240b57cec5SDimitry Andric   bool HasAVX512 = Subtarget->hasAVX512();
3250b57cec5SDimitry Andric   bool HasVLX = Subtarget->hasVLX();
3260b57cec5SDimitry Andric   bool IsNonTemporal = MMO && MMO->isNonTemporal();
3270b57cec5SDimitry Andric 
3280b57cec5SDimitry Andric   // Treat i1 loads the same as i8 loads. Masking will be done when storing.
3290b57cec5SDimitry Andric   if (VT == MVT::i1)
3300b57cec5SDimitry Andric     VT = MVT::i8;
3310b57cec5SDimitry Andric 
3320b57cec5SDimitry Andric   // Get opcode and regclass of the output for the given load instruction.
3330b57cec5SDimitry Andric   unsigned Opc = 0;
3340b57cec5SDimitry Andric   switch (VT.SimpleTy) {
3350b57cec5SDimitry Andric   default: return false;
3360b57cec5SDimitry Andric   case MVT::i8:
3370b57cec5SDimitry Andric     Opc = X86::MOV8rm;
3380b57cec5SDimitry Andric     break;
3390b57cec5SDimitry Andric   case MVT::i16:
3400b57cec5SDimitry Andric     Opc = X86::MOV16rm;
3410b57cec5SDimitry Andric     break;
3420b57cec5SDimitry Andric   case MVT::i32:
3430b57cec5SDimitry Andric     Opc = X86::MOV32rm;
3440b57cec5SDimitry Andric     break;
3450b57cec5SDimitry Andric   case MVT::i64:
3460b57cec5SDimitry Andric     // Must be in x86-64 mode.
3470b57cec5SDimitry Andric     Opc = X86::MOV64rm;
3480b57cec5SDimitry Andric     break;
3490b57cec5SDimitry Andric   case MVT::f32:
35081ad6265SDimitry Andric     Opc = HasAVX512 ? X86::VMOVSSZrm_alt
35181ad6265SDimitry Andric           : HasAVX  ? X86::VMOVSSrm_alt
35281ad6265SDimitry Andric           : HasSSE1 ? X86::MOVSSrm_alt
35381ad6265SDimitry Andric                     : X86::LD_Fp32m;
3540b57cec5SDimitry Andric     break;
3550b57cec5SDimitry Andric   case MVT::f64:
35681ad6265SDimitry Andric     Opc = HasAVX512 ? X86::VMOVSDZrm_alt
35781ad6265SDimitry Andric           : HasAVX  ? X86::VMOVSDrm_alt
35881ad6265SDimitry Andric           : HasSSE2 ? X86::MOVSDrm_alt
35981ad6265SDimitry Andric                     : X86::LD_Fp64m;
3600b57cec5SDimitry Andric     break;
3610b57cec5SDimitry Andric   case MVT::f80:
3620b57cec5SDimitry Andric     // No f80 support yet.
3630b57cec5SDimitry Andric     return false;
3640b57cec5SDimitry Andric   case MVT::v4f32:
3650b57cec5SDimitry Andric     if (IsNonTemporal && Alignment >= 16 && HasSSE41)
3660b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
3670b57cec5SDimitry Andric             HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
3680b57cec5SDimitry Andric     else if (Alignment >= 16)
3690b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVAPSZ128rm :
3700b57cec5SDimitry Andric             HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
3710b57cec5SDimitry Andric     else
3720b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVUPSZ128rm :
3730b57cec5SDimitry Andric             HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
3740b57cec5SDimitry Andric     break;
3750b57cec5SDimitry Andric   case MVT::v2f64:
3760b57cec5SDimitry Andric     if (IsNonTemporal && Alignment >= 16 && HasSSE41)
3770b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
3780b57cec5SDimitry Andric             HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
3790b57cec5SDimitry Andric     else if (Alignment >= 16)
3800b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVAPDZ128rm :
3810b57cec5SDimitry Andric             HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
3820b57cec5SDimitry Andric     else
3830b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVUPDZ128rm :
3840b57cec5SDimitry Andric             HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
3850b57cec5SDimitry Andric     break;
3860b57cec5SDimitry Andric   case MVT::v4i32:
3870b57cec5SDimitry Andric   case MVT::v2i64:
3880b57cec5SDimitry Andric   case MVT::v8i16:
3890b57cec5SDimitry Andric   case MVT::v16i8:
3900b57cec5SDimitry Andric     if (IsNonTemporal && Alignment >= 16 && HasSSE41)
3910b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
3920b57cec5SDimitry Andric             HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
3930b57cec5SDimitry Andric     else if (Alignment >= 16)
3940b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVDQA64Z128rm :
3950b57cec5SDimitry Andric             HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
3960b57cec5SDimitry Andric     else
3970b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVDQU64Z128rm :
3980b57cec5SDimitry Andric             HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
3990b57cec5SDimitry Andric     break;
4000b57cec5SDimitry Andric   case MVT::v8f32:
4010b57cec5SDimitry Andric     assert(HasAVX);
4020b57cec5SDimitry Andric     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
4030b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
4040b57cec5SDimitry Andric     else if (IsNonTemporal && Alignment >= 16)
4050b57cec5SDimitry Andric       return false; // Force split for X86::VMOVNTDQArm
4060b57cec5SDimitry Andric     else if (Alignment >= 32)
4070b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
4080b57cec5SDimitry Andric     else
4090b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
4100b57cec5SDimitry Andric     break;
4110b57cec5SDimitry Andric   case MVT::v4f64:
4120b57cec5SDimitry Andric     assert(HasAVX);
4130b57cec5SDimitry Andric     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
4140b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
4150b57cec5SDimitry Andric     else if (IsNonTemporal && Alignment >= 16)
4160b57cec5SDimitry Andric       return false; // Force split for X86::VMOVNTDQArm
4170b57cec5SDimitry Andric     else if (Alignment >= 32)
4180b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
4190b57cec5SDimitry Andric     else
4200b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
4210b57cec5SDimitry Andric     break;
4220b57cec5SDimitry Andric   case MVT::v8i32:
4230b57cec5SDimitry Andric   case MVT::v4i64:
4240b57cec5SDimitry Andric   case MVT::v16i16:
4250b57cec5SDimitry Andric   case MVT::v32i8:
4260b57cec5SDimitry Andric     assert(HasAVX);
4270b57cec5SDimitry Andric     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
4280b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
4290b57cec5SDimitry Andric     else if (IsNonTemporal && Alignment >= 16)
4300b57cec5SDimitry Andric       return false; // Force split for X86::VMOVNTDQArm
4310b57cec5SDimitry Andric     else if (Alignment >= 32)
4320b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
4330b57cec5SDimitry Andric     else
4340b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
4350b57cec5SDimitry Andric     break;
4360b57cec5SDimitry Andric   case MVT::v16f32:
4370b57cec5SDimitry Andric     assert(HasAVX512);
4380b57cec5SDimitry Andric     if (IsNonTemporal && Alignment >= 64)
4390b57cec5SDimitry Andric       Opc = X86::VMOVNTDQAZrm;
4400b57cec5SDimitry Andric     else
4410b57cec5SDimitry Andric       Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
4420b57cec5SDimitry Andric     break;
4430b57cec5SDimitry Andric   case MVT::v8f64:
4440b57cec5SDimitry Andric     assert(HasAVX512);
4450b57cec5SDimitry Andric     if (IsNonTemporal && Alignment >= 64)
4460b57cec5SDimitry Andric       Opc = X86::VMOVNTDQAZrm;
4470b57cec5SDimitry Andric     else
4480b57cec5SDimitry Andric       Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
4490b57cec5SDimitry Andric     break;
4500b57cec5SDimitry Andric   case MVT::v8i64:
4510b57cec5SDimitry Andric   case MVT::v16i32:
4520b57cec5SDimitry Andric   case MVT::v32i16:
4530b57cec5SDimitry Andric   case MVT::v64i8:
4540b57cec5SDimitry Andric     assert(HasAVX512);
4550b57cec5SDimitry Andric     // Note: There are a lot more choices based on type with AVX-512, but
4560b57cec5SDimitry Andric     // there's really no advantage when the load isn't masked.
4570b57cec5SDimitry Andric     if (IsNonTemporal && Alignment >= 64)
4580b57cec5SDimitry Andric       Opc = X86::VMOVNTDQAZrm;
4590b57cec5SDimitry Andric     else
4600b57cec5SDimitry Andric       Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
4610b57cec5SDimitry Andric     break;
4620b57cec5SDimitry Andric   }
4630b57cec5SDimitry Andric 
4640b57cec5SDimitry Andric   const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric   ResultReg = createResultReg(RC);
4670b57cec5SDimitry Andric   MachineInstrBuilder MIB =
468bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg);
4690b57cec5SDimitry Andric   addFullAddress(MIB, AM);
4700b57cec5SDimitry Andric   if (MMO)
4710b57cec5SDimitry Andric     MIB->addMemOperand(*FuncInfo.MF, MMO);
4720b57cec5SDimitry Andric   return true;
4730b57cec5SDimitry Andric }
4740b57cec5SDimitry Andric 
4750b57cec5SDimitry Andric /// X86FastEmitStore - Emit a machine instruction to store a value Val of
4760b57cec5SDimitry Andric /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
4770b57cec5SDimitry Andric /// and a displacement offset, or a GlobalAddress,
4780b57cec5SDimitry Andric /// i.e. V. Return true if it is possible.
479fe6060f1SDimitry Andric bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
4800b57cec5SDimitry Andric                                    MachineMemOperand *MMO, bool Aligned) {
4810b57cec5SDimitry Andric   bool HasSSE1 = Subtarget->hasSSE1();
4820b57cec5SDimitry Andric   bool HasSSE2 = Subtarget->hasSSE2();
4830b57cec5SDimitry Andric   bool HasSSE4A = Subtarget->hasSSE4A();
4840b57cec5SDimitry Andric   bool HasAVX = Subtarget->hasAVX();
4850b57cec5SDimitry Andric   bool HasAVX512 = Subtarget->hasAVX512();
4860b57cec5SDimitry Andric   bool HasVLX = Subtarget->hasVLX();
4870b57cec5SDimitry Andric   bool IsNonTemporal = MMO && MMO->isNonTemporal();
4880b57cec5SDimitry Andric 
4890b57cec5SDimitry Andric   // Get opcode and regclass of the output for the given store instruction.
4900b57cec5SDimitry Andric   unsigned Opc = 0;
4910b57cec5SDimitry Andric   switch (VT.getSimpleVT().SimpleTy) {
4920b57cec5SDimitry Andric   case MVT::f80: // No f80 support yet.
4930b57cec5SDimitry Andric   default: return false;
4940b57cec5SDimitry Andric   case MVT::i1: {
4950b57cec5SDimitry Andric     // Mask out all but lowest bit.
4965ffd83dbSDimitry Andric     Register AndResult = createResultReg(&X86::GR8RegClass);
497bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4980b57cec5SDimitry Andric             TII.get(X86::AND8ri), AndResult)
499fe6060f1SDimitry Andric       .addReg(ValReg).addImm(1);
5000b57cec5SDimitry Andric     ValReg = AndResult;
501bdd1243dSDimitry Andric     [[fallthrough]]; // handle i1 as i8.
5020b57cec5SDimitry Andric   }
5030b57cec5SDimitry Andric   case MVT::i8:  Opc = X86::MOV8mr;  break;
5040b57cec5SDimitry Andric   case MVT::i16: Opc = X86::MOV16mr; break;
5050b57cec5SDimitry Andric   case MVT::i32:
5060b57cec5SDimitry Andric     Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
5070b57cec5SDimitry Andric     break;
5080b57cec5SDimitry Andric   case MVT::i64:
5090b57cec5SDimitry Andric     // Must be in x86-64 mode.
5100b57cec5SDimitry Andric     Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
5110b57cec5SDimitry Andric     break;
5120b57cec5SDimitry Andric   case MVT::f32:
51381ad6265SDimitry Andric     if (HasSSE1) {
5140b57cec5SDimitry Andric       if (IsNonTemporal && HasSSE4A)
5150b57cec5SDimitry Andric         Opc = X86::MOVNTSS;
5160b57cec5SDimitry Andric       else
5170b57cec5SDimitry Andric         Opc = HasAVX512 ? X86::VMOVSSZmr :
5180b57cec5SDimitry Andric               HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
5190b57cec5SDimitry Andric     } else
5200b57cec5SDimitry Andric       Opc = X86::ST_Fp32m;
5210b57cec5SDimitry Andric     break;
5220b57cec5SDimitry Andric   case MVT::f64:
52381ad6265SDimitry Andric     if (HasSSE2) {
5240b57cec5SDimitry Andric       if (IsNonTemporal && HasSSE4A)
5250b57cec5SDimitry Andric         Opc = X86::MOVNTSD;
5260b57cec5SDimitry Andric       else
5270b57cec5SDimitry Andric         Opc = HasAVX512 ? X86::VMOVSDZmr :
5280b57cec5SDimitry Andric               HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
5290b57cec5SDimitry Andric     } else
5300b57cec5SDimitry Andric       Opc = X86::ST_Fp64m;
5310b57cec5SDimitry Andric     break;
5320b57cec5SDimitry Andric   case MVT::x86mmx:
5330b57cec5SDimitry Andric     Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
5340b57cec5SDimitry Andric     break;
5350b57cec5SDimitry Andric   case MVT::v4f32:
5360b57cec5SDimitry Andric     if (Aligned) {
5370b57cec5SDimitry Andric       if (IsNonTemporal)
5380b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVNTPSZ128mr :
5390b57cec5SDimitry Andric               HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
5400b57cec5SDimitry Andric       else
5410b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVAPSZ128mr :
5420b57cec5SDimitry Andric               HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
5430b57cec5SDimitry Andric     } else
5440b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVUPSZ128mr :
5450b57cec5SDimitry Andric             HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
5460b57cec5SDimitry Andric     break;
5470b57cec5SDimitry Andric   case MVT::v2f64:
5480b57cec5SDimitry Andric     if (Aligned) {
5490b57cec5SDimitry Andric       if (IsNonTemporal)
5500b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVNTPDZ128mr :
5510b57cec5SDimitry Andric               HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
5520b57cec5SDimitry Andric       else
5530b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVAPDZ128mr :
5540b57cec5SDimitry Andric               HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
5550b57cec5SDimitry Andric     } else
5560b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVUPDZ128mr :
5570b57cec5SDimitry Andric             HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
5580b57cec5SDimitry Andric     break;
5590b57cec5SDimitry Andric   case MVT::v4i32:
5600b57cec5SDimitry Andric   case MVT::v2i64:
5610b57cec5SDimitry Andric   case MVT::v8i16:
5620b57cec5SDimitry Andric   case MVT::v16i8:
5630b57cec5SDimitry Andric     if (Aligned) {
5640b57cec5SDimitry Andric       if (IsNonTemporal)
5650b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVNTDQZ128mr :
5660b57cec5SDimitry Andric               HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
5670b57cec5SDimitry Andric       else
5680b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVDQA64Z128mr :
5690b57cec5SDimitry Andric               HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
5700b57cec5SDimitry Andric     } else
5710b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVDQU64Z128mr :
5720b57cec5SDimitry Andric             HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
5730b57cec5SDimitry Andric     break;
5740b57cec5SDimitry Andric   case MVT::v8f32:
5750b57cec5SDimitry Andric     assert(HasAVX);
5760b57cec5SDimitry Andric     if (Aligned) {
5770b57cec5SDimitry Andric       if (IsNonTemporal)
5780b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
5790b57cec5SDimitry Andric       else
5800b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
5810b57cec5SDimitry Andric     } else
5820b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
5830b57cec5SDimitry Andric     break;
5840b57cec5SDimitry Andric   case MVT::v4f64:
5850b57cec5SDimitry Andric     assert(HasAVX);
5860b57cec5SDimitry Andric     if (Aligned) {
5870b57cec5SDimitry Andric       if (IsNonTemporal)
5880b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
5890b57cec5SDimitry Andric       else
5900b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
5910b57cec5SDimitry Andric     } else
5920b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
5930b57cec5SDimitry Andric     break;
5940b57cec5SDimitry Andric   case MVT::v8i32:
5950b57cec5SDimitry Andric   case MVT::v4i64:
5960b57cec5SDimitry Andric   case MVT::v16i16:
5970b57cec5SDimitry Andric   case MVT::v32i8:
5980b57cec5SDimitry Andric     assert(HasAVX);
5990b57cec5SDimitry Andric     if (Aligned) {
6000b57cec5SDimitry Andric       if (IsNonTemporal)
6010b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
6020b57cec5SDimitry Andric       else
6030b57cec5SDimitry Andric         Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
6040b57cec5SDimitry Andric     } else
6050b57cec5SDimitry Andric       Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
6060b57cec5SDimitry Andric     break;
6070b57cec5SDimitry Andric   case MVT::v16f32:
6080b57cec5SDimitry Andric     assert(HasAVX512);
6090b57cec5SDimitry Andric     if (Aligned)
6100b57cec5SDimitry Andric       Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
6110b57cec5SDimitry Andric     else
6120b57cec5SDimitry Andric       Opc = X86::VMOVUPSZmr;
6130b57cec5SDimitry Andric     break;
6140b57cec5SDimitry Andric   case MVT::v8f64:
6150b57cec5SDimitry Andric     assert(HasAVX512);
6160b57cec5SDimitry Andric     if (Aligned) {
6170b57cec5SDimitry Andric       Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
6180b57cec5SDimitry Andric     } else
6190b57cec5SDimitry Andric       Opc = X86::VMOVUPDZmr;
6200b57cec5SDimitry Andric     break;
6210b57cec5SDimitry Andric   case MVT::v8i64:
6220b57cec5SDimitry Andric   case MVT::v16i32:
6230b57cec5SDimitry Andric   case MVT::v32i16:
6240b57cec5SDimitry Andric   case MVT::v64i8:
6250b57cec5SDimitry Andric     assert(HasAVX512);
6260b57cec5SDimitry Andric     // Note: There are a lot more choices based on type with AVX-512, but
6270b57cec5SDimitry Andric     // there's really no advantage when the store isn't masked.
6280b57cec5SDimitry Andric     if (Aligned)
6290b57cec5SDimitry Andric       Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
6300b57cec5SDimitry Andric     else
6310b57cec5SDimitry Andric       Opc = X86::VMOVDQU64Zmr;
6320b57cec5SDimitry Andric     break;
6330b57cec5SDimitry Andric   }
6340b57cec5SDimitry Andric 
6350b57cec5SDimitry Andric   const MCInstrDesc &Desc = TII.get(Opc);
6360b57cec5SDimitry Andric   // Some of the instructions in the previous switch use FR128 instead
6370b57cec5SDimitry Andric   // of FR32 for ValReg. Make sure the register we feed the instruction
6380b57cec5SDimitry Andric   // matches its register class constraints.
6390b57cec5SDimitry Andric   // Note: This is fine to do a copy from FR32 to FR128, this is the
6400b57cec5SDimitry Andric   // same registers behind the scene and actually why it did not trigger
6410b57cec5SDimitry Andric   // any bugs before.
6420b57cec5SDimitry Andric   ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
6430b57cec5SDimitry Andric   MachineInstrBuilder MIB =
644bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, Desc);
645fe6060f1SDimitry Andric   addFullAddress(MIB, AM).addReg(ValReg);
6460b57cec5SDimitry Andric   if (MMO)
6470b57cec5SDimitry Andric     MIB->addMemOperand(*FuncInfo.MF, MMO);
6480b57cec5SDimitry Andric 
6490b57cec5SDimitry Andric   return true;
6500b57cec5SDimitry Andric }
6510b57cec5SDimitry Andric 
6520b57cec5SDimitry Andric bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
6530b57cec5SDimitry Andric                                    X86AddressMode &AM,
6540b57cec5SDimitry Andric                                    MachineMemOperand *MMO, bool Aligned) {
6550b57cec5SDimitry Andric   // Handle 'null' like i32/i64 0.
6560b57cec5SDimitry Andric   if (isa<ConstantPointerNull>(Val))
6570b57cec5SDimitry Andric     Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
6580b57cec5SDimitry Andric 
6590b57cec5SDimitry Andric   // If this is a store of a simple constant, fold the constant into the store.
6600b57cec5SDimitry Andric   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
6610b57cec5SDimitry Andric     unsigned Opc = 0;
6620b57cec5SDimitry Andric     bool Signed = true;
6630b57cec5SDimitry Andric     switch (VT.getSimpleVT().SimpleTy) {
6640b57cec5SDimitry Andric     default: break;
6650b57cec5SDimitry Andric     case MVT::i1:
6660b57cec5SDimitry Andric       Signed = false;
667bdd1243dSDimitry Andric       [[fallthrough]]; // Handle as i8.
6680b57cec5SDimitry Andric     case MVT::i8:  Opc = X86::MOV8mi;  break;
6690b57cec5SDimitry Andric     case MVT::i16: Opc = X86::MOV16mi; break;
6700b57cec5SDimitry Andric     case MVT::i32: Opc = X86::MOV32mi; break;
6710b57cec5SDimitry Andric     case MVT::i64:
6720b57cec5SDimitry Andric       // Must be a 32-bit sign extended value.
6730b57cec5SDimitry Andric       if (isInt<32>(CI->getSExtValue()))
6740b57cec5SDimitry Andric         Opc = X86::MOV64mi32;
6750b57cec5SDimitry Andric       break;
6760b57cec5SDimitry Andric     }
6770b57cec5SDimitry Andric 
6780b57cec5SDimitry Andric     if (Opc) {
6790b57cec5SDimitry Andric       MachineInstrBuilder MIB =
680bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc));
6810b57cec5SDimitry Andric       addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
6820b57cec5SDimitry Andric                                             : CI->getZExtValue());
6830b57cec5SDimitry Andric       if (MMO)
6840b57cec5SDimitry Andric         MIB->addMemOperand(*FuncInfo.MF, MMO);
6850b57cec5SDimitry Andric       return true;
6860b57cec5SDimitry Andric     }
6870b57cec5SDimitry Andric   }
6880b57cec5SDimitry Andric 
6895ffd83dbSDimitry Andric   Register ValReg = getRegForValue(Val);
6900b57cec5SDimitry Andric   if (ValReg == 0)
6910b57cec5SDimitry Andric     return false;
6920b57cec5SDimitry Andric 
693fe6060f1SDimitry Andric   return X86FastEmitStore(VT, ValReg, AM, MMO, Aligned);
6940b57cec5SDimitry Andric }
6950b57cec5SDimitry Andric 
6960b57cec5SDimitry Andric /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
6970b57cec5SDimitry Andric /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
6980b57cec5SDimitry Andric /// ISD::SIGN_EXTEND).
6990b57cec5SDimitry Andric bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
7000b57cec5SDimitry Andric                                     unsigned Src, EVT SrcVT,
7010b57cec5SDimitry Andric                                     unsigned &ResultReg) {
702fe6060f1SDimitry Andric   unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
7030b57cec5SDimitry Andric   if (RR == 0)
7040b57cec5SDimitry Andric     return false;
7050b57cec5SDimitry Andric 
7060b57cec5SDimitry Andric   ResultReg = RR;
7070b57cec5SDimitry Andric   return true;
7080b57cec5SDimitry Andric }
7090b57cec5SDimitry Andric 
7100b57cec5SDimitry Andric bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
7110b57cec5SDimitry Andric   // Handle constant address.
7120b57cec5SDimitry Andric   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
7130b57cec5SDimitry Andric     // Can't handle alternate code models yet.
7145f757f3fSDimitry Andric     if (TM.getCodeModel() != CodeModel::Small &&
7155f757f3fSDimitry Andric         TM.getCodeModel() != CodeModel::Medium)
7165f757f3fSDimitry Andric       return false;
7175f757f3fSDimitry Andric 
7185f757f3fSDimitry Andric     // Can't handle large objects yet.
7195f757f3fSDimitry Andric     if (TM.isLargeGlobalValue(GV))
7200b57cec5SDimitry Andric       return false;
7210b57cec5SDimitry Andric 
7220b57cec5SDimitry Andric     // Can't handle TLS yet.
7230b57cec5SDimitry Andric     if (GV->isThreadLocal())
7240b57cec5SDimitry Andric       return false;
7250b57cec5SDimitry Andric 
7260b57cec5SDimitry Andric     // Can't handle !absolute_symbol references yet.
7270b57cec5SDimitry Andric     if (GV->isAbsoluteSymbolRef())
7280b57cec5SDimitry Andric       return false;
7290b57cec5SDimitry Andric 
7300b57cec5SDimitry Andric     // RIP-relative addresses can't have additional register operands, so if
7310b57cec5SDimitry Andric     // we've already folded stuff into the addressing mode, just force the
7320b57cec5SDimitry Andric     // global value into its own register, which we can use as the basereg.
7330b57cec5SDimitry Andric     if (!Subtarget->isPICStyleRIPRel() ||
7340b57cec5SDimitry Andric         (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
7350b57cec5SDimitry Andric       // Okay, we've committed to selecting this global. Set up the address.
7360b57cec5SDimitry Andric       AM.GV = GV;
7370b57cec5SDimitry Andric 
7380b57cec5SDimitry Andric       // Allow the subtarget to classify the global.
7390b57cec5SDimitry Andric       unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
7400b57cec5SDimitry Andric 
7410b57cec5SDimitry Andric       // If this reference is relative to the pic base, set it now.
7420b57cec5SDimitry Andric       if (isGlobalRelativeToPICBase(GVFlags)) {
7430b57cec5SDimitry Andric         // FIXME: How do we know Base.Reg is free??
7440b57cec5SDimitry Andric         AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
7450b57cec5SDimitry Andric       }
7460b57cec5SDimitry Andric 
7470b57cec5SDimitry Andric       // Unless the ABI requires an extra load, return a direct reference to
7480b57cec5SDimitry Andric       // the global.
7490b57cec5SDimitry Andric       if (!isGlobalStubReference(GVFlags)) {
7500b57cec5SDimitry Andric         if (Subtarget->isPICStyleRIPRel()) {
7510b57cec5SDimitry Andric           // Use rip-relative addressing if we can.  Above we verified that the
7520b57cec5SDimitry Andric           // base and index registers are unused.
7530b57cec5SDimitry Andric           assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
7540b57cec5SDimitry Andric           AM.Base.Reg = X86::RIP;
7550b57cec5SDimitry Andric         }
7560b57cec5SDimitry Andric         AM.GVOpFlags = GVFlags;
7570b57cec5SDimitry Andric         return true;
7580b57cec5SDimitry Andric       }
7590b57cec5SDimitry Andric 
7600b57cec5SDimitry Andric       // Ok, we need to do a load from a stub.  If we've already loaded from
7610b57cec5SDimitry Andric       // this stub, reuse the loaded pointer, otherwise emit the load now.
7625ffd83dbSDimitry Andric       DenseMap<const Value *, Register>::iterator I = LocalValueMap.find(V);
7635ffd83dbSDimitry Andric       Register LoadReg;
7645ffd83dbSDimitry Andric       if (I != LocalValueMap.end() && I->second) {
7650b57cec5SDimitry Andric         LoadReg = I->second;
7660b57cec5SDimitry Andric       } else {
7670b57cec5SDimitry Andric         // Issue load from stub.
7680b57cec5SDimitry Andric         unsigned Opc = 0;
7690b57cec5SDimitry Andric         const TargetRegisterClass *RC = nullptr;
7700b57cec5SDimitry Andric         X86AddressMode StubAM;
7710b57cec5SDimitry Andric         StubAM.Base.Reg = AM.Base.Reg;
7720b57cec5SDimitry Andric         StubAM.GV = GV;
7730b57cec5SDimitry Andric         StubAM.GVOpFlags = GVFlags;
7740b57cec5SDimitry Andric 
7750b57cec5SDimitry Andric         // Prepare for inserting code in the local-value area.
7760b57cec5SDimitry Andric         SavePoint SaveInsertPt = enterLocalValueArea();
7770b57cec5SDimitry Andric 
7780b57cec5SDimitry Andric         if (TLI.getPointerTy(DL) == MVT::i64) {
7790b57cec5SDimitry Andric           Opc = X86::MOV64rm;
7800b57cec5SDimitry Andric           RC  = &X86::GR64RegClass;
7810b57cec5SDimitry Andric         } else {
7820b57cec5SDimitry Andric           Opc = X86::MOV32rm;
7830b57cec5SDimitry Andric           RC  = &X86::GR32RegClass;
7840b57cec5SDimitry Andric         }
7850b57cec5SDimitry Andric 
786349cc55cSDimitry Andric         if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL ||
787349cc55cSDimitry Andric             GVFlags == X86II::MO_GOTPCREL_NORELAX)
788e8d8bef9SDimitry Andric           StubAM.Base.Reg = X86::RIP;
789e8d8bef9SDimitry Andric 
7900b57cec5SDimitry Andric         LoadReg = createResultReg(RC);
7910b57cec5SDimitry Andric         MachineInstrBuilder LoadMI =
792bdd1243dSDimitry Andric           BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), LoadReg);
7930b57cec5SDimitry Andric         addFullAddress(LoadMI, StubAM);
7940b57cec5SDimitry Andric 
7950b57cec5SDimitry Andric         // Ok, back to normal mode.
7960b57cec5SDimitry Andric         leaveLocalValueArea(SaveInsertPt);
7970b57cec5SDimitry Andric 
7980b57cec5SDimitry Andric         // Prevent loading GV stub multiple times in same MBB.
7990b57cec5SDimitry Andric         LocalValueMap[V] = LoadReg;
8000b57cec5SDimitry Andric       }
8010b57cec5SDimitry Andric 
8020b57cec5SDimitry Andric       // Now construct the final address. Note that the Disp, Scale,
8030b57cec5SDimitry Andric       // and Index values may already be set here.
8040b57cec5SDimitry Andric       AM.Base.Reg = LoadReg;
8050b57cec5SDimitry Andric       AM.GV = nullptr;
8060b57cec5SDimitry Andric       return true;
8070b57cec5SDimitry Andric     }
8080b57cec5SDimitry Andric   }
8090b57cec5SDimitry Andric 
8100b57cec5SDimitry Andric   // If all else fails, try to materialize the value in a register.
8110b57cec5SDimitry Andric   if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
8120b57cec5SDimitry Andric     if (AM.Base.Reg == 0) {
8130b57cec5SDimitry Andric       AM.Base.Reg = getRegForValue(V);
8140b57cec5SDimitry Andric       return AM.Base.Reg != 0;
8150b57cec5SDimitry Andric     }
8160b57cec5SDimitry Andric     if (AM.IndexReg == 0) {
8170b57cec5SDimitry Andric       assert(AM.Scale == 1 && "Scale with no index!");
8180b57cec5SDimitry Andric       AM.IndexReg = getRegForValue(V);
8190b57cec5SDimitry Andric       return AM.IndexReg != 0;
8200b57cec5SDimitry Andric     }
8210b57cec5SDimitry Andric   }
8220b57cec5SDimitry Andric 
8230b57cec5SDimitry Andric   return false;
8240b57cec5SDimitry Andric }
8250b57cec5SDimitry Andric 
8260b57cec5SDimitry Andric /// X86SelectAddress - Attempt to fill in an address from the given value.
8270b57cec5SDimitry Andric ///
8280b57cec5SDimitry Andric bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
8290b57cec5SDimitry Andric   SmallVector<const Value *, 32> GEPs;
8300b57cec5SDimitry Andric redo_gep:
8310b57cec5SDimitry Andric   const User *U = nullptr;
8320b57cec5SDimitry Andric   unsigned Opcode = Instruction::UserOp1;
8330b57cec5SDimitry Andric   if (const Instruction *I = dyn_cast<Instruction>(V)) {
8340b57cec5SDimitry Andric     // Don't walk into other basic blocks; it's possible we haven't
8350b57cec5SDimitry Andric     // visited them yet, so the instructions may not yet be assigned
8360b57cec5SDimitry Andric     // virtual registers.
8370b57cec5SDimitry Andric     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
8380b57cec5SDimitry Andric         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
8390b57cec5SDimitry Andric       Opcode = I->getOpcode();
8400b57cec5SDimitry Andric       U = I;
8410b57cec5SDimitry Andric     }
8420b57cec5SDimitry Andric   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
8430b57cec5SDimitry Andric     Opcode = C->getOpcode();
8440b57cec5SDimitry Andric     U = C;
8450b57cec5SDimitry Andric   }
8460b57cec5SDimitry Andric 
8470b57cec5SDimitry Andric   if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
8480b57cec5SDimitry Andric     if (Ty->getAddressSpace() > 255)
8490b57cec5SDimitry Andric       // Fast instruction selection doesn't support the special
8500b57cec5SDimitry Andric       // address spaces.
8510b57cec5SDimitry Andric       return false;
8520b57cec5SDimitry Andric 
8530b57cec5SDimitry Andric   switch (Opcode) {
8540b57cec5SDimitry Andric   default: break;
8550b57cec5SDimitry Andric   case Instruction::BitCast:
8560b57cec5SDimitry Andric     // Look past bitcasts.
8570b57cec5SDimitry Andric     return X86SelectAddress(U->getOperand(0), AM);
8580b57cec5SDimitry Andric 
8590b57cec5SDimitry Andric   case Instruction::IntToPtr:
8600b57cec5SDimitry Andric     // Look past no-op inttoptrs.
8610b57cec5SDimitry Andric     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
8620b57cec5SDimitry Andric         TLI.getPointerTy(DL))
8630b57cec5SDimitry Andric       return X86SelectAddress(U->getOperand(0), AM);
8640b57cec5SDimitry Andric     break;
8650b57cec5SDimitry Andric 
8660b57cec5SDimitry Andric   case Instruction::PtrToInt:
8670b57cec5SDimitry Andric     // Look past no-op ptrtoints.
8680b57cec5SDimitry Andric     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
8690b57cec5SDimitry Andric       return X86SelectAddress(U->getOperand(0), AM);
8700b57cec5SDimitry Andric     break;
8710b57cec5SDimitry Andric 
8720b57cec5SDimitry Andric   case Instruction::Alloca: {
8730b57cec5SDimitry Andric     // Do static allocas.
8740b57cec5SDimitry Andric     const AllocaInst *A = cast<AllocaInst>(V);
8750b57cec5SDimitry Andric     DenseMap<const AllocaInst *, int>::iterator SI =
8760b57cec5SDimitry Andric       FuncInfo.StaticAllocaMap.find(A);
8770b57cec5SDimitry Andric     if (SI != FuncInfo.StaticAllocaMap.end()) {
8780b57cec5SDimitry Andric       AM.BaseType = X86AddressMode::FrameIndexBase;
8790b57cec5SDimitry Andric       AM.Base.FrameIndex = SI->second;
8800b57cec5SDimitry Andric       return true;
8810b57cec5SDimitry Andric     }
8820b57cec5SDimitry Andric     break;
8830b57cec5SDimitry Andric   }
8840b57cec5SDimitry Andric 
8850b57cec5SDimitry Andric   case Instruction::Add: {
8860b57cec5SDimitry Andric     // Adds of constants are common and easy enough.
8870b57cec5SDimitry Andric     if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
8880b57cec5SDimitry Andric       uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
8890b57cec5SDimitry Andric       // They have to fit in the 32-bit signed displacement field though.
8900b57cec5SDimitry Andric       if (isInt<32>(Disp)) {
8910b57cec5SDimitry Andric         AM.Disp = (uint32_t)Disp;
8920b57cec5SDimitry Andric         return X86SelectAddress(U->getOperand(0), AM);
8930b57cec5SDimitry Andric       }
8940b57cec5SDimitry Andric     }
8950b57cec5SDimitry Andric     break;
8960b57cec5SDimitry Andric   }
8970b57cec5SDimitry Andric 
8980b57cec5SDimitry Andric   case Instruction::GetElementPtr: {
8990b57cec5SDimitry Andric     X86AddressMode SavedAM = AM;
9000b57cec5SDimitry Andric 
9010b57cec5SDimitry Andric     // Pattern-match simple GEPs.
9020b57cec5SDimitry Andric     uint64_t Disp = (int32_t)AM.Disp;
9030b57cec5SDimitry Andric     unsigned IndexReg = AM.IndexReg;
9040b57cec5SDimitry Andric     unsigned Scale = AM.Scale;
905*5deeebd8SDimitry Andric     MVT PtrVT = TLI.getValueType(DL, U->getType()).getSimpleVT();
906*5deeebd8SDimitry Andric 
9070b57cec5SDimitry Andric     gep_type_iterator GTI = gep_type_begin(U);
9080b57cec5SDimitry Andric     // Iterate through the indices, folding what we can. Constants can be
9090b57cec5SDimitry Andric     // folded, and one dynamic index can be handled, if the scale is supported.
9100b57cec5SDimitry Andric     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
9110b57cec5SDimitry Andric          i != e; ++i, ++GTI) {
9120b57cec5SDimitry Andric       const Value *Op = *i;
9130b57cec5SDimitry Andric       if (StructType *STy = GTI.getStructTypeOrNull()) {
9140b57cec5SDimitry Andric         const StructLayout *SL = DL.getStructLayout(STy);
9150b57cec5SDimitry Andric         Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
9160b57cec5SDimitry Andric         continue;
9170b57cec5SDimitry Andric       }
9180b57cec5SDimitry Andric 
9190b57cec5SDimitry Andric       // A array/variable index is always of the form i*S where S is the
9200b57cec5SDimitry Andric       // constant scale size.  See if we can push the scale into immediates.
9211db9f3b2SDimitry Andric       uint64_t S = GTI.getSequentialElementStride(DL);
9220b57cec5SDimitry Andric       for (;;) {
9230b57cec5SDimitry Andric         if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
9240b57cec5SDimitry Andric           // Constant-offset addressing.
9250b57cec5SDimitry Andric           Disp += CI->getSExtValue() * S;
9260b57cec5SDimitry Andric           break;
9270b57cec5SDimitry Andric         }
9280b57cec5SDimitry Andric         if (canFoldAddIntoGEP(U, Op)) {
9290b57cec5SDimitry Andric           // A compatible add with a constant operand. Fold the constant.
9300b57cec5SDimitry Andric           ConstantInt *CI =
9310b57cec5SDimitry Andric             cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
9320b57cec5SDimitry Andric           Disp += CI->getSExtValue() * S;
9330b57cec5SDimitry Andric           // Iterate on the other operand.
9340b57cec5SDimitry Andric           Op = cast<AddOperator>(Op)->getOperand(0);
9350b57cec5SDimitry Andric           continue;
9360b57cec5SDimitry Andric         }
9370b57cec5SDimitry Andric         if (IndexReg == 0 &&
9380b57cec5SDimitry Andric             (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
9390b57cec5SDimitry Andric             (S == 1 || S == 2 || S == 4 || S == 8)) {
9400b57cec5SDimitry Andric           // Scaled-index addressing.
9410b57cec5SDimitry Andric           Scale = S;
942*5deeebd8SDimitry Andric           IndexReg = getRegForGEPIndex(PtrVT, Op);
9430b57cec5SDimitry Andric           if (IndexReg == 0)
9440b57cec5SDimitry Andric             return false;
9450b57cec5SDimitry Andric           break;
9460b57cec5SDimitry Andric         }
9470b57cec5SDimitry Andric         // Unsupported.
9480b57cec5SDimitry Andric         goto unsupported_gep;
9490b57cec5SDimitry Andric       }
9500b57cec5SDimitry Andric     }
9510b57cec5SDimitry Andric 
9520b57cec5SDimitry Andric     // Check for displacement overflow.
9530b57cec5SDimitry Andric     if (!isInt<32>(Disp))
9540b57cec5SDimitry Andric       break;
9550b57cec5SDimitry Andric 
9560b57cec5SDimitry Andric     AM.IndexReg = IndexReg;
9570b57cec5SDimitry Andric     AM.Scale = Scale;
9580b57cec5SDimitry Andric     AM.Disp = (uint32_t)Disp;
9590b57cec5SDimitry Andric     GEPs.push_back(V);
9600b57cec5SDimitry Andric 
9610b57cec5SDimitry Andric     if (const GetElementPtrInst *GEP =
9620b57cec5SDimitry Andric           dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
9630b57cec5SDimitry Andric       // Ok, the GEP indices were covered by constant-offset and scaled-index
9640b57cec5SDimitry Andric       // addressing. Update the address state and move on to examining the base.
9650b57cec5SDimitry Andric       V = GEP;
9660b57cec5SDimitry Andric       goto redo_gep;
9670b57cec5SDimitry Andric     } else if (X86SelectAddress(U->getOperand(0), AM)) {
9680b57cec5SDimitry Andric       return true;
9690b57cec5SDimitry Andric     }
9700b57cec5SDimitry Andric 
9710b57cec5SDimitry Andric     // If we couldn't merge the gep value into this addr mode, revert back to
9720b57cec5SDimitry Andric     // our address and just match the value instead of completely failing.
9730b57cec5SDimitry Andric     AM = SavedAM;
9740b57cec5SDimitry Andric 
9750b57cec5SDimitry Andric     for (const Value *I : reverse(GEPs))
9760b57cec5SDimitry Andric       if (handleConstantAddresses(I, AM))
9770b57cec5SDimitry Andric         return true;
9780b57cec5SDimitry Andric 
9790b57cec5SDimitry Andric     return false;
9800b57cec5SDimitry Andric   unsupported_gep:
9810b57cec5SDimitry Andric     // Ok, the GEP indices weren't all covered.
9820b57cec5SDimitry Andric     break;
9830b57cec5SDimitry Andric   }
9840b57cec5SDimitry Andric   }
9850b57cec5SDimitry Andric 
9860b57cec5SDimitry Andric   return handleConstantAddresses(V, AM);
9870b57cec5SDimitry Andric }
9880b57cec5SDimitry Andric 
9890b57cec5SDimitry Andric /// X86SelectCallAddress - Attempt to fill in an address from the given value.
9900b57cec5SDimitry Andric ///
9910b57cec5SDimitry Andric bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
9920b57cec5SDimitry Andric   const User *U = nullptr;
9930b57cec5SDimitry Andric   unsigned Opcode = Instruction::UserOp1;
9940b57cec5SDimitry Andric   const Instruction *I = dyn_cast<Instruction>(V);
9950b57cec5SDimitry Andric   // Record if the value is defined in the same basic block.
9960b57cec5SDimitry Andric   //
9970b57cec5SDimitry Andric   // This information is crucial to know whether or not folding an
9980b57cec5SDimitry Andric   // operand is valid.
9990b57cec5SDimitry Andric   // Indeed, FastISel generates or reuses a virtual register for all
10000b57cec5SDimitry Andric   // operands of all instructions it selects. Obviously, the definition and
10010b57cec5SDimitry Andric   // its uses must use the same virtual register otherwise the produced
10020b57cec5SDimitry Andric   // code is incorrect.
10030b57cec5SDimitry Andric   // Before instruction selection, FunctionLoweringInfo::set sets the virtual
10040b57cec5SDimitry Andric   // registers for values that are alive across basic blocks. This ensures
10050b57cec5SDimitry Andric   // that the values are consistently set between across basic block, even
10060b57cec5SDimitry Andric   // if different instruction selection mechanisms are used (e.g., a mix of
10070b57cec5SDimitry Andric   // SDISel and FastISel).
10080b57cec5SDimitry Andric   // For values local to a basic block, the instruction selection process
10090b57cec5SDimitry Andric   // generates these virtual registers with whatever method is appropriate
10100b57cec5SDimitry Andric   // for its needs. In particular, FastISel and SDISel do not share the way
10110b57cec5SDimitry Andric   // local virtual registers are set.
10120b57cec5SDimitry Andric   // Therefore, this is impossible (or at least unsafe) to share values
10130b57cec5SDimitry Andric   // between basic blocks unless they use the same instruction selection
10140b57cec5SDimitry Andric   // method, which is not guarantee for X86.
10150b57cec5SDimitry Andric   // Moreover, things like hasOneUse could not be used accurately, if we
10160b57cec5SDimitry Andric   // allow to reference values across basic blocks whereas they are not
10170b57cec5SDimitry Andric   // alive across basic blocks initially.
10180b57cec5SDimitry Andric   bool InMBB = true;
10190b57cec5SDimitry Andric   if (I) {
10200b57cec5SDimitry Andric     Opcode = I->getOpcode();
10210b57cec5SDimitry Andric     U = I;
10220b57cec5SDimitry Andric     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
10230b57cec5SDimitry Andric   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
10240b57cec5SDimitry Andric     Opcode = C->getOpcode();
10250b57cec5SDimitry Andric     U = C;
10260b57cec5SDimitry Andric   }
10270b57cec5SDimitry Andric 
10280b57cec5SDimitry Andric   switch (Opcode) {
10290b57cec5SDimitry Andric   default: break;
10300b57cec5SDimitry Andric   case Instruction::BitCast:
10310b57cec5SDimitry Andric     // Look past bitcasts if its operand is in the same BB.
10320b57cec5SDimitry Andric     if (InMBB)
10330b57cec5SDimitry Andric       return X86SelectCallAddress(U->getOperand(0), AM);
10340b57cec5SDimitry Andric     break;
10350b57cec5SDimitry Andric 
10360b57cec5SDimitry Andric   case Instruction::IntToPtr:
10370b57cec5SDimitry Andric     // Look past no-op inttoptrs if its operand is in the same BB.
10380b57cec5SDimitry Andric     if (InMBB &&
10390b57cec5SDimitry Andric         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
10400b57cec5SDimitry Andric             TLI.getPointerTy(DL))
10410b57cec5SDimitry Andric       return X86SelectCallAddress(U->getOperand(0), AM);
10420b57cec5SDimitry Andric     break;
10430b57cec5SDimitry Andric 
10440b57cec5SDimitry Andric   case Instruction::PtrToInt:
10450b57cec5SDimitry Andric     // Look past no-op ptrtoints if its operand is in the same BB.
10460b57cec5SDimitry Andric     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
10470b57cec5SDimitry Andric       return X86SelectCallAddress(U->getOperand(0), AM);
10480b57cec5SDimitry Andric     break;
10490b57cec5SDimitry Andric   }
10500b57cec5SDimitry Andric 
10510b57cec5SDimitry Andric   // Handle constant address.
10520b57cec5SDimitry Andric   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
10530b57cec5SDimitry Andric     // Can't handle alternate code models yet.
10545f757f3fSDimitry Andric     if (TM.getCodeModel() != CodeModel::Small &&
10555f757f3fSDimitry Andric         TM.getCodeModel() != CodeModel::Medium)
10560b57cec5SDimitry Andric       return false;
10570b57cec5SDimitry Andric 
10580b57cec5SDimitry Andric     // RIP-relative addresses can't have additional register operands.
10590b57cec5SDimitry Andric     if (Subtarget->isPICStyleRIPRel() &&
10600b57cec5SDimitry Andric         (AM.Base.Reg != 0 || AM.IndexReg != 0))
10610b57cec5SDimitry Andric       return false;
10620b57cec5SDimitry Andric 
10630b57cec5SDimitry Andric     // Can't handle TLS.
10640b57cec5SDimitry Andric     if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
10650b57cec5SDimitry Andric       if (GVar->isThreadLocal())
10660b57cec5SDimitry Andric         return false;
10670b57cec5SDimitry Andric 
10680b57cec5SDimitry Andric     // Okay, we've committed to selecting this global. Set up the basic address.
10690b57cec5SDimitry Andric     AM.GV = GV;
10700b57cec5SDimitry Andric 
10710b57cec5SDimitry Andric     // Return a direct reference to the global. Fastisel can handle calls to
10720b57cec5SDimitry Andric     // functions that require loads, such as dllimport and nonlazybind
10730b57cec5SDimitry Andric     // functions.
10740b57cec5SDimitry Andric     if (Subtarget->isPICStyleRIPRel()) {
10750b57cec5SDimitry Andric       // Use rip-relative addressing if we can.  Above we verified that the
10760b57cec5SDimitry Andric       // base and index registers are unused.
10770b57cec5SDimitry Andric       assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
10780b57cec5SDimitry Andric       AM.Base.Reg = X86::RIP;
10790b57cec5SDimitry Andric     } else {
10800b57cec5SDimitry Andric       AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
10810b57cec5SDimitry Andric     }
10820b57cec5SDimitry Andric 
10830b57cec5SDimitry Andric     return true;
10840b57cec5SDimitry Andric   }
10850b57cec5SDimitry Andric 
10860b57cec5SDimitry Andric   // If all else fails, try to materialize the value in a register.
10870b57cec5SDimitry Andric   if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1088e8d8bef9SDimitry Andric     auto GetCallRegForValue = [this](const Value *V) {
1089e8d8bef9SDimitry Andric       Register Reg = getRegForValue(V);
1090e8d8bef9SDimitry Andric 
1091e8d8bef9SDimitry Andric       // In 64-bit mode, we need a 64-bit register even if pointers are 32 bits.
1092e8d8bef9SDimitry Andric       if (Reg && Subtarget->isTarget64BitILP32()) {
1093e8d8bef9SDimitry Andric         Register CopyReg = createResultReg(&X86::GR32RegClass);
1094bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV32rr),
1095e8d8bef9SDimitry Andric                 CopyReg)
1096e8d8bef9SDimitry Andric             .addReg(Reg);
1097e8d8bef9SDimitry Andric 
1098e8d8bef9SDimitry Andric         Register ExtReg = createResultReg(&X86::GR64RegClass);
1099bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1100e8d8bef9SDimitry Andric                 TII.get(TargetOpcode::SUBREG_TO_REG), ExtReg)
1101e8d8bef9SDimitry Andric             .addImm(0)
1102e8d8bef9SDimitry Andric             .addReg(CopyReg)
1103e8d8bef9SDimitry Andric             .addImm(X86::sub_32bit);
1104e8d8bef9SDimitry Andric         Reg = ExtReg;
1105e8d8bef9SDimitry Andric       }
1106e8d8bef9SDimitry Andric 
1107e8d8bef9SDimitry Andric       return Reg;
1108e8d8bef9SDimitry Andric     };
1109e8d8bef9SDimitry Andric 
11100b57cec5SDimitry Andric     if (AM.Base.Reg == 0) {
1111e8d8bef9SDimitry Andric       AM.Base.Reg = GetCallRegForValue(V);
11120b57cec5SDimitry Andric       return AM.Base.Reg != 0;
11130b57cec5SDimitry Andric     }
11140b57cec5SDimitry Andric     if (AM.IndexReg == 0) {
11150b57cec5SDimitry Andric       assert(AM.Scale == 1 && "Scale with no index!");
1116e8d8bef9SDimitry Andric       AM.IndexReg = GetCallRegForValue(V);
11170b57cec5SDimitry Andric       return AM.IndexReg != 0;
11180b57cec5SDimitry Andric     }
11190b57cec5SDimitry Andric   }
11200b57cec5SDimitry Andric 
11210b57cec5SDimitry Andric   return false;
11220b57cec5SDimitry Andric }
11230b57cec5SDimitry Andric 
11240b57cec5SDimitry Andric 
11250b57cec5SDimitry Andric /// X86SelectStore - Select and emit code to implement store instructions.
11260b57cec5SDimitry Andric bool X86FastISel::X86SelectStore(const Instruction *I) {
11270b57cec5SDimitry Andric   // Atomic stores need special handling.
11280b57cec5SDimitry Andric   const StoreInst *S = cast<StoreInst>(I);
11290b57cec5SDimitry Andric 
11300b57cec5SDimitry Andric   if (S->isAtomic())
11310b57cec5SDimitry Andric     return false;
11320b57cec5SDimitry Andric 
11330b57cec5SDimitry Andric   const Value *PtrV = I->getOperand(1);
11340b57cec5SDimitry Andric   if (TLI.supportSwiftError()) {
11350b57cec5SDimitry Andric     // Swifterror values can come from either a function parameter with
11360b57cec5SDimitry Andric     // swifterror attribute or an alloca with swifterror attribute.
11370b57cec5SDimitry Andric     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
11380b57cec5SDimitry Andric       if (Arg->hasSwiftErrorAttr())
11390b57cec5SDimitry Andric         return false;
11400b57cec5SDimitry Andric     }
11410b57cec5SDimitry Andric 
11420b57cec5SDimitry Andric     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
11430b57cec5SDimitry Andric       if (Alloca->isSwiftError())
11440b57cec5SDimitry Andric         return false;
11450b57cec5SDimitry Andric     }
11460b57cec5SDimitry Andric   }
11470b57cec5SDimitry Andric 
11480b57cec5SDimitry Andric   const Value *Val = S->getValueOperand();
11490b57cec5SDimitry Andric   const Value *Ptr = S->getPointerOperand();
11500b57cec5SDimitry Andric 
11510b57cec5SDimitry Andric   MVT VT;
11520b57cec5SDimitry Andric   if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
11530b57cec5SDimitry Andric     return false;
11540b57cec5SDimitry Andric 
11555ffd83dbSDimitry Andric   Align Alignment = S->getAlign();
11565ffd83dbSDimitry Andric   Align ABIAlignment = DL.getABITypeAlign(Val->getType());
11570b57cec5SDimitry Andric   bool Aligned = Alignment >= ABIAlignment;
11580b57cec5SDimitry Andric 
11590b57cec5SDimitry Andric   X86AddressMode AM;
11600b57cec5SDimitry Andric   if (!X86SelectAddress(Ptr, AM))
11610b57cec5SDimitry Andric     return false;
11620b57cec5SDimitry Andric 
11630b57cec5SDimitry Andric   return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
11640b57cec5SDimitry Andric }
11650b57cec5SDimitry Andric 
11660b57cec5SDimitry Andric /// X86SelectRet - Select and emit code to implement ret instructions.
11670b57cec5SDimitry Andric bool X86FastISel::X86SelectRet(const Instruction *I) {
11680b57cec5SDimitry Andric   const ReturnInst *Ret = cast<ReturnInst>(I);
11690b57cec5SDimitry Andric   const Function &F = *I->getParent()->getParent();
11700b57cec5SDimitry Andric   const X86MachineFunctionInfo *X86MFInfo =
11710b57cec5SDimitry Andric       FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
11720b57cec5SDimitry Andric 
11730b57cec5SDimitry Andric   if (!FuncInfo.CanLowerReturn)
11740b57cec5SDimitry Andric     return false;
11750b57cec5SDimitry Andric 
11760b57cec5SDimitry Andric   if (TLI.supportSwiftError() &&
11770b57cec5SDimitry Andric       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
11780b57cec5SDimitry Andric     return false;
11790b57cec5SDimitry Andric 
11800b57cec5SDimitry Andric   if (TLI.supportSplitCSR(FuncInfo.MF))
11810b57cec5SDimitry Andric     return false;
11820b57cec5SDimitry Andric 
11830b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
11840b57cec5SDimitry Andric   if (CC != CallingConv::C &&
11850b57cec5SDimitry Andric       CC != CallingConv::Fast &&
11868bcb0991SDimitry Andric       CC != CallingConv::Tail &&
1187fe6060f1SDimitry Andric       CC != CallingConv::SwiftTail &&
11880b57cec5SDimitry Andric       CC != CallingConv::X86_FastCall &&
11890b57cec5SDimitry Andric       CC != CallingConv::X86_StdCall &&
11900b57cec5SDimitry Andric       CC != CallingConv::X86_ThisCall &&
11910b57cec5SDimitry Andric       CC != CallingConv::X86_64_SysV &&
11920b57cec5SDimitry Andric       CC != CallingConv::Win64)
11930b57cec5SDimitry Andric     return false;
11940b57cec5SDimitry Andric 
11950b57cec5SDimitry Andric   // Don't handle popping bytes if they don't fit the ret's immediate.
11960b57cec5SDimitry Andric   if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
11970b57cec5SDimitry Andric     return false;
11980b57cec5SDimitry Andric 
11990b57cec5SDimitry Andric   // fastcc with -tailcallopt is intended to provide a guaranteed
12000b57cec5SDimitry Andric   // tail call optimization. Fastisel doesn't know how to do that.
12018bcb0991SDimitry Andric   if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
1202fe6060f1SDimitry Andric       CC == CallingConv::Tail || CC == CallingConv::SwiftTail)
12030b57cec5SDimitry Andric     return false;
12040b57cec5SDimitry Andric 
12050b57cec5SDimitry Andric   // Let SDISel handle vararg functions.
12060b57cec5SDimitry Andric   if (F.isVarArg())
12070b57cec5SDimitry Andric     return false;
12080b57cec5SDimitry Andric 
12090b57cec5SDimitry Andric   // Build a list of return value registers.
12100b57cec5SDimitry Andric   SmallVector<unsigned, 4> RetRegs;
12110b57cec5SDimitry Andric 
12120b57cec5SDimitry Andric   if (Ret->getNumOperands() > 0) {
12130b57cec5SDimitry Andric     SmallVector<ISD::OutputArg, 4> Outs;
12140b57cec5SDimitry Andric     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
12150b57cec5SDimitry Andric 
12160b57cec5SDimitry Andric     // Analyze operands of the call, assigning locations to each operand.
12170b57cec5SDimitry Andric     SmallVector<CCValAssign, 16> ValLocs;
12180b57cec5SDimitry Andric     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
12190b57cec5SDimitry Andric     CCInfo.AnalyzeReturn(Outs, RetCC_X86);
12200b57cec5SDimitry Andric 
12210b57cec5SDimitry Andric     const Value *RV = Ret->getOperand(0);
12225ffd83dbSDimitry Andric     Register Reg = getRegForValue(RV);
12230b57cec5SDimitry Andric     if (Reg == 0)
12240b57cec5SDimitry Andric       return false;
12250b57cec5SDimitry Andric 
12260b57cec5SDimitry Andric     // Only handle a single return value for now.
12270b57cec5SDimitry Andric     if (ValLocs.size() != 1)
12280b57cec5SDimitry Andric       return false;
12290b57cec5SDimitry Andric 
12300b57cec5SDimitry Andric     CCValAssign &VA = ValLocs[0];
12310b57cec5SDimitry Andric 
12320b57cec5SDimitry Andric     // Don't bother handling odd stuff for now.
12330b57cec5SDimitry Andric     if (VA.getLocInfo() != CCValAssign::Full)
12340b57cec5SDimitry Andric       return false;
12350b57cec5SDimitry Andric     // Only handle register returns for now.
12360b57cec5SDimitry Andric     if (!VA.isRegLoc())
12370b57cec5SDimitry Andric       return false;
12380b57cec5SDimitry Andric 
12390b57cec5SDimitry Andric     // The calling-convention tables for x87 returns don't tell
12400b57cec5SDimitry Andric     // the whole story.
12410b57cec5SDimitry Andric     if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
12420b57cec5SDimitry Andric       return false;
12430b57cec5SDimitry Andric 
12440b57cec5SDimitry Andric     unsigned SrcReg = Reg + VA.getValNo();
12450b57cec5SDimitry Andric     EVT SrcVT = TLI.getValueType(DL, RV->getType());
12460b57cec5SDimitry Andric     EVT DstVT = VA.getValVT();
12470b57cec5SDimitry Andric     // Special handling for extended integers.
12480b57cec5SDimitry Andric     if (SrcVT != DstVT) {
12490b57cec5SDimitry Andric       if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
12500b57cec5SDimitry Andric         return false;
12510b57cec5SDimitry Andric 
12520b57cec5SDimitry Andric       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
12530b57cec5SDimitry Andric         return false;
12540b57cec5SDimitry Andric 
12550b57cec5SDimitry Andric       if (SrcVT == MVT::i1) {
12560b57cec5SDimitry Andric         if (Outs[0].Flags.isSExt())
12570b57cec5SDimitry Andric           return false;
1258fe6060f1SDimitry Andric         SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg);
12590b57cec5SDimitry Andric         SrcVT = MVT::i8;
12600b57cec5SDimitry Andric       }
12610fca6ea1SDimitry Andric       if (SrcVT != DstVT) {
12620fca6ea1SDimitry Andric         unsigned Op =
12630fca6ea1SDimitry Andric             Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12640fca6ea1SDimitry Andric         SrcReg =
12650fca6ea1SDimitry Andric             fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, SrcReg);
12660fca6ea1SDimitry Andric       }
12670b57cec5SDimitry Andric     }
12680b57cec5SDimitry Andric 
12690b57cec5SDimitry Andric     // Make the copy.
12708bcb0991SDimitry Andric     Register DstReg = VA.getLocReg();
12710b57cec5SDimitry Andric     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
12720b57cec5SDimitry Andric     // Avoid a cross-class copy. This is very unlikely.
12730b57cec5SDimitry Andric     if (!SrcRC->contains(DstReg))
12740b57cec5SDimitry Andric       return false;
1275bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
12760b57cec5SDimitry Andric             TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
12770b57cec5SDimitry Andric 
12780b57cec5SDimitry Andric     // Add register to return instruction.
12790b57cec5SDimitry Andric     RetRegs.push_back(VA.getLocReg());
12800b57cec5SDimitry Andric   }
12810b57cec5SDimitry Andric 
12820b57cec5SDimitry Andric   // Swift calling convention does not require we copy the sret argument
12830b57cec5SDimitry Andric   // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
12840b57cec5SDimitry Andric 
12850b57cec5SDimitry Andric   // All x86 ABIs require that for returning structs by value we copy
12860b57cec5SDimitry Andric   // the sret argument into %rax/%eax (depending on ABI) for the return.
12870b57cec5SDimitry Andric   // We saved the argument into a virtual register in the entry block,
12880b57cec5SDimitry Andric   // so now we copy the value out and into %rax/%eax.
1289fe6060f1SDimitry Andric   if (F.hasStructRetAttr() && CC != CallingConv::Swift &&
1290fe6060f1SDimitry Andric       CC != CallingConv::SwiftTail) {
12915ffd83dbSDimitry Andric     Register Reg = X86MFInfo->getSRetReturnReg();
12920b57cec5SDimitry Andric     assert(Reg &&
12930b57cec5SDimitry Andric            "SRetReturnReg should have been set in LowerFormalArguments()!");
12940b57cec5SDimitry Andric     unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1295bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
12960b57cec5SDimitry Andric             TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
12970b57cec5SDimitry Andric     RetRegs.push_back(RetReg);
12980b57cec5SDimitry Andric   }
12990b57cec5SDimitry Andric 
13000b57cec5SDimitry Andric   // Now emit the RET.
13010b57cec5SDimitry Andric   MachineInstrBuilder MIB;
13020b57cec5SDimitry Andric   if (X86MFInfo->getBytesToPopOnReturn()) {
1303bdd1243dSDimitry Andric     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1304349cc55cSDimitry Andric                   TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32))
13050b57cec5SDimitry Andric               .addImm(X86MFInfo->getBytesToPopOnReturn());
13060b57cec5SDimitry Andric   } else {
1307bdd1243dSDimitry Andric     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1308349cc55cSDimitry Andric                   TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32));
13090b57cec5SDimitry Andric   }
1310cb14a3feSDimitry Andric   for (unsigned Reg : RetRegs)
1311cb14a3feSDimitry Andric     MIB.addReg(Reg, RegState::Implicit);
13120b57cec5SDimitry Andric   return true;
13130b57cec5SDimitry Andric }
13140b57cec5SDimitry Andric 
13150b57cec5SDimitry Andric /// X86SelectLoad - Select and emit code to implement load instructions.
13160b57cec5SDimitry Andric ///
13170b57cec5SDimitry Andric bool X86FastISel::X86SelectLoad(const Instruction *I) {
13180b57cec5SDimitry Andric   const LoadInst *LI = cast<LoadInst>(I);
13190b57cec5SDimitry Andric 
13200b57cec5SDimitry Andric   // Atomic loads need special handling.
13210b57cec5SDimitry Andric   if (LI->isAtomic())
13220b57cec5SDimitry Andric     return false;
13230b57cec5SDimitry Andric 
13240b57cec5SDimitry Andric   const Value *SV = I->getOperand(0);
13250b57cec5SDimitry Andric   if (TLI.supportSwiftError()) {
13260b57cec5SDimitry Andric     // Swifterror values can come from either a function parameter with
13270b57cec5SDimitry Andric     // swifterror attribute or an alloca with swifterror attribute.
13280b57cec5SDimitry Andric     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
13290b57cec5SDimitry Andric       if (Arg->hasSwiftErrorAttr())
13300b57cec5SDimitry Andric         return false;
13310b57cec5SDimitry Andric     }
13320b57cec5SDimitry Andric 
13330b57cec5SDimitry Andric     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
13340b57cec5SDimitry Andric       if (Alloca->isSwiftError())
13350b57cec5SDimitry Andric         return false;
13360b57cec5SDimitry Andric     }
13370b57cec5SDimitry Andric   }
13380b57cec5SDimitry Andric 
13390b57cec5SDimitry Andric   MVT VT;
13400b57cec5SDimitry Andric   if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
13410b57cec5SDimitry Andric     return false;
13420b57cec5SDimitry Andric 
13430b57cec5SDimitry Andric   const Value *Ptr = LI->getPointerOperand();
13440b57cec5SDimitry Andric 
13450b57cec5SDimitry Andric   X86AddressMode AM;
13460b57cec5SDimitry Andric   if (!X86SelectAddress(Ptr, AM))
13470b57cec5SDimitry Andric     return false;
13480b57cec5SDimitry Andric 
13490b57cec5SDimitry Andric   unsigned ResultReg = 0;
13500b57cec5SDimitry Andric   if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
13515ffd83dbSDimitry Andric                        LI->getAlign().value()))
13520b57cec5SDimitry Andric     return false;
13530b57cec5SDimitry Andric 
13540b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
13550b57cec5SDimitry Andric   return true;
13560b57cec5SDimitry Andric }
13570b57cec5SDimitry Andric 
13580b57cec5SDimitry Andric static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
13590b57cec5SDimitry Andric   bool HasAVX512 = Subtarget->hasAVX512();
13600b57cec5SDimitry Andric   bool HasAVX = Subtarget->hasAVX();
136181ad6265SDimitry Andric   bool HasSSE1 = Subtarget->hasSSE1();
136281ad6265SDimitry Andric   bool HasSSE2 = Subtarget->hasSSE2();
13630b57cec5SDimitry Andric 
13640b57cec5SDimitry Andric   switch (VT.getSimpleVT().SimpleTy) {
13650b57cec5SDimitry Andric   default:       return 0;
13660b57cec5SDimitry Andric   case MVT::i8:  return X86::CMP8rr;
13670b57cec5SDimitry Andric   case MVT::i16: return X86::CMP16rr;
13680b57cec5SDimitry Andric   case MVT::i32: return X86::CMP32rr;
13690b57cec5SDimitry Andric   case MVT::i64: return X86::CMP64rr;
13700b57cec5SDimitry Andric   case MVT::f32:
137181ad6265SDimitry Andric     return HasAVX512 ? X86::VUCOMISSZrr
137281ad6265SDimitry Andric            : HasAVX  ? X86::VUCOMISSrr
137381ad6265SDimitry Andric            : HasSSE1 ? X86::UCOMISSrr
13740b57cec5SDimitry Andric                      : 0;
13750b57cec5SDimitry Andric   case MVT::f64:
137681ad6265SDimitry Andric     return HasAVX512 ? X86::VUCOMISDZrr
137781ad6265SDimitry Andric            : HasAVX  ? X86::VUCOMISDrr
137881ad6265SDimitry Andric            : HasSSE2 ? X86::UCOMISDrr
13790b57cec5SDimitry Andric                      : 0;
13800b57cec5SDimitry Andric   }
13810b57cec5SDimitry Andric }
13820b57cec5SDimitry Andric 
13830b57cec5SDimitry Andric /// If we have a comparison with RHS as the RHS  of the comparison, return an
13840b57cec5SDimitry Andric /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
13850b57cec5SDimitry Andric static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
13860b57cec5SDimitry Andric   switch (VT.getSimpleVT().SimpleTy) {
13870b57cec5SDimitry Andric   // Otherwise, we can't fold the immediate into this comparison.
13880b57cec5SDimitry Andric   default:
13890b57cec5SDimitry Andric     return 0;
13900b57cec5SDimitry Andric   case MVT::i8:
13910b57cec5SDimitry Andric     return X86::CMP8ri;
13920b57cec5SDimitry Andric   case MVT::i16:
13930b57cec5SDimitry Andric     return X86::CMP16ri;
13940b57cec5SDimitry Andric   case MVT::i32:
13950b57cec5SDimitry Andric     return X86::CMP32ri;
13960b57cec5SDimitry Andric   case MVT::i64:
13970b57cec5SDimitry Andric     // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
13980b57cec5SDimitry Andric     // field.
139906c3fb27SDimitry Andric     return isInt<32>(RHSC->getSExtValue()) ? X86::CMP64ri32 : 0;
14000b57cec5SDimitry Andric   }
14010b57cec5SDimitry Andric }
14020b57cec5SDimitry Andric 
14030b57cec5SDimitry Andric bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1404bdd1243dSDimitry Andric                                      const DebugLoc &CurMIMD) {
14055ffd83dbSDimitry Andric   Register Op0Reg = getRegForValue(Op0);
14060b57cec5SDimitry Andric   if (Op0Reg == 0) return false;
14070b57cec5SDimitry Andric 
14080b57cec5SDimitry Andric   // Handle 'null' like i32/i64 0.
14090b57cec5SDimitry Andric   if (isa<ConstantPointerNull>(Op1))
14100b57cec5SDimitry Andric     Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
14110b57cec5SDimitry Andric 
14120b57cec5SDimitry Andric   // We have two options: compare with register or immediate.  If the RHS of
14130b57cec5SDimitry Andric   // the compare is an immediate that we can fold into this compare, use
14140b57cec5SDimitry Andric   // CMPri, otherwise use CMPrr.
14150b57cec5SDimitry Andric   if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
14160b57cec5SDimitry Andric     if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1417bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurMIMD, TII.get(CompareImmOpc))
14180b57cec5SDimitry Andric         .addReg(Op0Reg)
14190b57cec5SDimitry Andric         .addImm(Op1C->getSExtValue());
14200b57cec5SDimitry Andric       return true;
14210b57cec5SDimitry Andric     }
14220b57cec5SDimitry Andric   }
14230b57cec5SDimitry Andric 
14240b57cec5SDimitry Andric   unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
14250b57cec5SDimitry Andric   if (CompareOpc == 0) return false;
14260b57cec5SDimitry Andric 
14275ffd83dbSDimitry Andric   Register Op1Reg = getRegForValue(Op1);
14280b57cec5SDimitry Andric   if (Op1Reg == 0) return false;
1429bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurMIMD, TII.get(CompareOpc))
14300b57cec5SDimitry Andric     .addReg(Op0Reg)
14310b57cec5SDimitry Andric     .addReg(Op1Reg);
14320b57cec5SDimitry Andric 
14330b57cec5SDimitry Andric   return true;
14340b57cec5SDimitry Andric }
14350b57cec5SDimitry Andric 
14360b57cec5SDimitry Andric bool X86FastISel::X86SelectCmp(const Instruction *I) {
14370b57cec5SDimitry Andric   const CmpInst *CI = cast<CmpInst>(I);
14380b57cec5SDimitry Andric 
14390b57cec5SDimitry Andric   MVT VT;
14400b57cec5SDimitry Andric   if (!isTypeLegal(I->getOperand(0)->getType(), VT))
14410b57cec5SDimitry Andric     return false;
14420b57cec5SDimitry Andric 
1443fe6060f1SDimitry Andric   // Below code only works for scalars.
1444fe6060f1SDimitry Andric   if (VT.isVector())
1445fe6060f1SDimitry Andric     return false;
1446fe6060f1SDimitry Andric 
14470b57cec5SDimitry Andric   // Try to optimize or fold the cmp.
14480b57cec5SDimitry Andric   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
14490b57cec5SDimitry Andric   unsigned ResultReg = 0;
14500b57cec5SDimitry Andric   switch (Predicate) {
14510b57cec5SDimitry Andric   default: break;
14520b57cec5SDimitry Andric   case CmpInst::FCMP_FALSE: {
14530b57cec5SDimitry Andric     ResultReg = createResultReg(&X86::GR32RegClass);
1454bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV32r0),
14550b57cec5SDimitry Andric             ResultReg);
1456fe6060f1SDimitry Andric     ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, X86::sub_8bit);
14570b57cec5SDimitry Andric     if (!ResultReg)
14580b57cec5SDimitry Andric       return false;
14590b57cec5SDimitry Andric     break;
14600b57cec5SDimitry Andric   }
14610b57cec5SDimitry Andric   case CmpInst::FCMP_TRUE: {
14620b57cec5SDimitry Andric     ResultReg = createResultReg(&X86::GR8RegClass);
1463bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV8ri),
14640b57cec5SDimitry Andric             ResultReg).addImm(1);
14650b57cec5SDimitry Andric     break;
14660b57cec5SDimitry Andric   }
14670b57cec5SDimitry Andric   }
14680b57cec5SDimitry Andric 
14690b57cec5SDimitry Andric   if (ResultReg) {
14700b57cec5SDimitry Andric     updateValueMap(I, ResultReg);
14710b57cec5SDimitry Andric     return true;
14720b57cec5SDimitry Andric   }
14730b57cec5SDimitry Andric 
14740b57cec5SDimitry Andric   const Value *LHS = CI->getOperand(0);
14750b57cec5SDimitry Andric   const Value *RHS = CI->getOperand(1);
14760b57cec5SDimitry Andric 
14770b57cec5SDimitry Andric   // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
14780b57cec5SDimitry Andric   // We don't have to materialize a zero constant for this case and can just use
14790b57cec5SDimitry Andric   // %x again on the RHS.
14800b57cec5SDimitry Andric   if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
14810b57cec5SDimitry Andric     const auto *RHSC = dyn_cast<ConstantFP>(RHS);
14820b57cec5SDimitry Andric     if (RHSC && RHSC->isNullValue())
14830b57cec5SDimitry Andric       RHS = LHS;
14840b57cec5SDimitry Andric   }
14850b57cec5SDimitry Andric 
14860b57cec5SDimitry Andric   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
14870b57cec5SDimitry Andric   static const uint16_t SETFOpcTable[2][3] = {
14880b57cec5SDimitry Andric     { X86::COND_E,  X86::COND_NP, X86::AND8rr },
14890b57cec5SDimitry Andric     { X86::COND_NE, X86::COND_P,  X86::OR8rr  }
14900b57cec5SDimitry Andric   };
14910b57cec5SDimitry Andric   const uint16_t *SETFOpc = nullptr;
14920b57cec5SDimitry Andric   switch (Predicate) {
14930b57cec5SDimitry Andric   default: break;
14940b57cec5SDimitry Andric   case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
14950b57cec5SDimitry Andric   case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
14960b57cec5SDimitry Andric   }
14970b57cec5SDimitry Andric 
14980b57cec5SDimitry Andric   ResultReg = createResultReg(&X86::GR8RegClass);
14990b57cec5SDimitry Andric   if (SETFOpc) {
15000b57cec5SDimitry Andric     if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
15010b57cec5SDimitry Andric       return false;
15020b57cec5SDimitry Andric 
15035ffd83dbSDimitry Andric     Register FlagReg1 = createResultReg(&X86::GR8RegClass);
15045ffd83dbSDimitry Andric     Register FlagReg2 = createResultReg(&X86::GR8RegClass);
1505bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
15060b57cec5SDimitry Andric             FlagReg1).addImm(SETFOpc[0]);
1507bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
15080b57cec5SDimitry Andric             FlagReg2).addImm(SETFOpc[1]);
1509bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(SETFOpc[2]),
15100b57cec5SDimitry Andric             ResultReg).addReg(FlagReg1).addReg(FlagReg2);
15110b57cec5SDimitry Andric     updateValueMap(I, ResultReg);
15120b57cec5SDimitry Andric     return true;
15130b57cec5SDimitry Andric   }
15140b57cec5SDimitry Andric 
15150b57cec5SDimitry Andric   X86::CondCode CC;
15160b57cec5SDimitry Andric   bool SwapArgs;
15170b57cec5SDimitry Andric   std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
15180b57cec5SDimitry Andric   assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
15190b57cec5SDimitry Andric 
15200b57cec5SDimitry Andric   if (SwapArgs)
15210b57cec5SDimitry Andric     std::swap(LHS, RHS);
15220b57cec5SDimitry Andric 
15230b57cec5SDimitry Andric   // Emit a compare of LHS/RHS.
15240b57cec5SDimitry Andric   if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
15250b57cec5SDimitry Andric     return false;
15260b57cec5SDimitry Andric 
1527bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
15280b57cec5SDimitry Andric           ResultReg).addImm(CC);
15290b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
15300b57cec5SDimitry Andric   return true;
15310b57cec5SDimitry Andric }
15320b57cec5SDimitry Andric 
15330b57cec5SDimitry Andric bool X86FastISel::X86SelectZExt(const Instruction *I) {
15340b57cec5SDimitry Andric   EVT DstVT = TLI.getValueType(DL, I->getType());
15350b57cec5SDimitry Andric   if (!TLI.isTypeLegal(DstVT))
15360b57cec5SDimitry Andric     return false;
15370b57cec5SDimitry Andric 
15385ffd83dbSDimitry Andric   Register ResultReg = getRegForValue(I->getOperand(0));
15390b57cec5SDimitry Andric   if (ResultReg == 0)
15400b57cec5SDimitry Andric     return false;
15410b57cec5SDimitry Andric 
15420b57cec5SDimitry Andric   // Handle zero-extension from i1 to i8, which is common.
15430b57cec5SDimitry Andric   MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
15440b57cec5SDimitry Andric   if (SrcVT == MVT::i1) {
15450b57cec5SDimitry Andric     // Set the high bits to zero.
1546fe6060f1SDimitry Andric     ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
15470b57cec5SDimitry Andric     SrcVT = MVT::i8;
15480b57cec5SDimitry Andric 
15490b57cec5SDimitry Andric     if (ResultReg == 0)
15500b57cec5SDimitry Andric       return false;
15510b57cec5SDimitry Andric   }
15520b57cec5SDimitry Andric 
15530b57cec5SDimitry Andric   if (DstVT == MVT::i64) {
15540b57cec5SDimitry Andric     // Handle extension to 64-bits via sub-register shenanigans.
15550b57cec5SDimitry Andric     unsigned MovInst;
15560b57cec5SDimitry Andric 
15570b57cec5SDimitry Andric     switch (SrcVT.SimpleTy) {
15580b57cec5SDimitry Andric     case MVT::i8:  MovInst = X86::MOVZX32rr8;  break;
15590b57cec5SDimitry Andric     case MVT::i16: MovInst = X86::MOVZX32rr16; break;
15600b57cec5SDimitry Andric     case MVT::i32: MovInst = X86::MOV32rr;     break;
15610b57cec5SDimitry Andric     default: llvm_unreachable("Unexpected zext to i64 source type");
15620b57cec5SDimitry Andric     }
15630b57cec5SDimitry Andric 
15645ffd83dbSDimitry Andric     Register Result32 = createResultReg(&X86::GR32RegClass);
1565bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(MovInst), Result32)
15660b57cec5SDimitry Andric       .addReg(ResultReg);
15670b57cec5SDimitry Andric 
15680b57cec5SDimitry Andric     ResultReg = createResultReg(&X86::GR64RegClass);
1569bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::SUBREG_TO_REG),
15700b57cec5SDimitry Andric             ResultReg)
15710b57cec5SDimitry Andric       .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
15720b57cec5SDimitry Andric   } else if (DstVT == MVT::i16) {
15730b57cec5SDimitry Andric     // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
15740b57cec5SDimitry Andric     // extend to 32-bits and then extract down to 16-bits.
15755ffd83dbSDimitry Andric     Register Result32 = createResultReg(&X86::GR32RegClass);
1576bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOVZX32rr8),
15770b57cec5SDimitry Andric             Result32).addReg(ResultReg);
15780b57cec5SDimitry Andric 
1579fe6060f1SDimitry Andric     ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
15800b57cec5SDimitry Andric   } else if (DstVT != MVT::i8) {
15810b57cec5SDimitry Andric     ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1582fe6060f1SDimitry Andric                            ResultReg);
15830b57cec5SDimitry Andric     if (ResultReg == 0)
15840b57cec5SDimitry Andric       return false;
15850b57cec5SDimitry Andric   }
15860b57cec5SDimitry Andric 
15870b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
15880b57cec5SDimitry Andric   return true;
15890b57cec5SDimitry Andric }
15900b57cec5SDimitry Andric 
15910b57cec5SDimitry Andric bool X86FastISel::X86SelectSExt(const Instruction *I) {
15920b57cec5SDimitry Andric   EVT DstVT = TLI.getValueType(DL, I->getType());
15930b57cec5SDimitry Andric   if (!TLI.isTypeLegal(DstVT))
15940b57cec5SDimitry Andric     return false;
15950b57cec5SDimitry Andric 
15965ffd83dbSDimitry Andric   Register ResultReg = getRegForValue(I->getOperand(0));
15970b57cec5SDimitry Andric   if (ResultReg == 0)
15980b57cec5SDimitry Andric     return false;
15990b57cec5SDimitry Andric 
16000b57cec5SDimitry Andric   // Handle sign-extension from i1 to i8.
16010b57cec5SDimitry Andric   MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
16020b57cec5SDimitry Andric   if (SrcVT == MVT::i1) {
16030b57cec5SDimitry Andric     // Set the high bits to zero.
1604fe6060f1SDimitry Andric     Register ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
16050b57cec5SDimitry Andric     if (ZExtReg == 0)
16060b57cec5SDimitry Andric       return false;
16070b57cec5SDimitry Andric 
16080b57cec5SDimitry Andric     // Negate the result to make an 8-bit sign extended value.
16090b57cec5SDimitry Andric     ResultReg = createResultReg(&X86::GR8RegClass);
1610bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::NEG8r),
16110b57cec5SDimitry Andric             ResultReg).addReg(ZExtReg);
16120b57cec5SDimitry Andric 
16130b57cec5SDimitry Andric     SrcVT = MVT::i8;
16140b57cec5SDimitry Andric   }
16150b57cec5SDimitry Andric 
16160b57cec5SDimitry Andric   if (DstVT == MVT::i16) {
16170b57cec5SDimitry Andric     // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
16180b57cec5SDimitry Andric     // extend to 32-bits and then extract down to 16-bits.
16195ffd83dbSDimitry Andric     Register Result32 = createResultReg(&X86::GR32RegClass);
1620bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOVSX32rr8),
16210b57cec5SDimitry Andric             Result32).addReg(ResultReg);
16220b57cec5SDimitry Andric 
1623fe6060f1SDimitry Andric     ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
16240b57cec5SDimitry Andric   } else if (DstVT != MVT::i8) {
16250b57cec5SDimitry Andric     ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1626fe6060f1SDimitry Andric                            ResultReg);
16270b57cec5SDimitry Andric     if (ResultReg == 0)
16280b57cec5SDimitry Andric       return false;
16290b57cec5SDimitry Andric   }
16300b57cec5SDimitry Andric 
16310b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
16320b57cec5SDimitry Andric   return true;
16330b57cec5SDimitry Andric }
16340b57cec5SDimitry Andric 
16350b57cec5SDimitry Andric bool X86FastISel::X86SelectBranch(const Instruction *I) {
16360b57cec5SDimitry Andric   // Unconditional branches are selected by tablegen-generated code.
16370b57cec5SDimitry Andric   // Handle a conditional branch.
16380b57cec5SDimitry Andric   const BranchInst *BI = cast<BranchInst>(I);
16390b57cec5SDimitry Andric   MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
16400b57cec5SDimitry Andric   MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
16410b57cec5SDimitry Andric 
16420b57cec5SDimitry Andric   // Fold the common case of a conditional branch with a comparison
16430b57cec5SDimitry Andric   // in the same block (values defined on other blocks may not have
16440b57cec5SDimitry Andric   // initialized registers).
16450b57cec5SDimitry Andric   X86::CondCode CC;
16460b57cec5SDimitry Andric   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
16470b57cec5SDimitry Andric     if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
16480b57cec5SDimitry Andric       EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
16490b57cec5SDimitry Andric 
16500b57cec5SDimitry Andric       // Try to optimize or fold the cmp.
16510b57cec5SDimitry Andric       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
16520b57cec5SDimitry Andric       switch (Predicate) {
16530b57cec5SDimitry Andric       default: break;
1654bdd1243dSDimitry Andric       case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, MIMD.getDL()); return true;
1655bdd1243dSDimitry Andric       case CmpInst::FCMP_TRUE:  fastEmitBranch(TrueMBB, MIMD.getDL()); return true;
16560b57cec5SDimitry Andric       }
16570b57cec5SDimitry Andric 
16580b57cec5SDimitry Andric       const Value *CmpLHS = CI->getOperand(0);
16590b57cec5SDimitry Andric       const Value *CmpRHS = CI->getOperand(1);
16600b57cec5SDimitry Andric 
16610b57cec5SDimitry Andric       // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
16620b57cec5SDimitry Andric       // 0.0.
16630b57cec5SDimitry Andric       // We don't have to materialize a zero constant for this case and can just
16640b57cec5SDimitry Andric       // use %x again on the RHS.
16650b57cec5SDimitry Andric       if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
16660b57cec5SDimitry Andric         const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
16670b57cec5SDimitry Andric         if (CmpRHSC && CmpRHSC->isNullValue())
16680b57cec5SDimitry Andric           CmpRHS = CmpLHS;
16690b57cec5SDimitry Andric       }
16700b57cec5SDimitry Andric 
16710b57cec5SDimitry Andric       // Try to take advantage of fallthrough opportunities.
16720b57cec5SDimitry Andric       if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
16730b57cec5SDimitry Andric         std::swap(TrueMBB, FalseMBB);
16740b57cec5SDimitry Andric         Predicate = CmpInst::getInversePredicate(Predicate);
16750b57cec5SDimitry Andric       }
16760b57cec5SDimitry Andric 
16770b57cec5SDimitry Andric       // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
16780b57cec5SDimitry Andric       // code check. Instead two branch instructions are required to check all
16790b57cec5SDimitry Andric       // the flags. First we change the predicate to a supported condition code,
16800b57cec5SDimitry Andric       // which will be the first branch. Later one we will emit the second
16810b57cec5SDimitry Andric       // branch.
16820b57cec5SDimitry Andric       bool NeedExtraBranch = false;
16830b57cec5SDimitry Andric       switch (Predicate) {
16840b57cec5SDimitry Andric       default: break;
16850b57cec5SDimitry Andric       case CmpInst::FCMP_OEQ:
16860b57cec5SDimitry Andric         std::swap(TrueMBB, FalseMBB);
1687bdd1243dSDimitry Andric         [[fallthrough]];
16880b57cec5SDimitry Andric       case CmpInst::FCMP_UNE:
16890b57cec5SDimitry Andric         NeedExtraBranch = true;
16900b57cec5SDimitry Andric         Predicate = CmpInst::FCMP_ONE;
16910b57cec5SDimitry Andric         break;
16920b57cec5SDimitry Andric       }
16930b57cec5SDimitry Andric 
16940b57cec5SDimitry Andric       bool SwapArgs;
16950b57cec5SDimitry Andric       std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
16960b57cec5SDimitry Andric       assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
16970b57cec5SDimitry Andric 
16980b57cec5SDimitry Andric       if (SwapArgs)
16990b57cec5SDimitry Andric         std::swap(CmpLHS, CmpRHS);
17000b57cec5SDimitry Andric 
17010b57cec5SDimitry Andric       // Emit a compare of the LHS and RHS, setting the flags.
17020b57cec5SDimitry Andric       if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
17030b57cec5SDimitry Andric         return false;
17040b57cec5SDimitry Andric 
1705bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
17060b57cec5SDimitry Andric         .addMBB(TrueMBB).addImm(CC);
17070b57cec5SDimitry Andric 
17080b57cec5SDimitry Andric       // X86 requires a second branch to handle UNE (and OEQ, which is mapped
17090b57cec5SDimitry Andric       // to UNE above).
17100b57cec5SDimitry Andric       if (NeedExtraBranch) {
1711bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
17120b57cec5SDimitry Andric           .addMBB(TrueMBB).addImm(X86::COND_P);
17130b57cec5SDimitry Andric       }
17140b57cec5SDimitry Andric 
17150b57cec5SDimitry Andric       finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
17160b57cec5SDimitry Andric       return true;
17170b57cec5SDimitry Andric     }
17180b57cec5SDimitry Andric   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
17190b57cec5SDimitry Andric     // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
17200b57cec5SDimitry Andric     // typically happen for _Bool and C++ bools.
17210b57cec5SDimitry Andric     MVT SourceVT;
17220b57cec5SDimitry Andric     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
17230b57cec5SDimitry Andric         isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
17240b57cec5SDimitry Andric       unsigned TestOpc = 0;
17250b57cec5SDimitry Andric       switch (SourceVT.SimpleTy) {
17260b57cec5SDimitry Andric       default: break;
17270b57cec5SDimitry Andric       case MVT::i8:  TestOpc = X86::TEST8ri; break;
17280b57cec5SDimitry Andric       case MVT::i16: TestOpc = X86::TEST16ri; break;
17290b57cec5SDimitry Andric       case MVT::i32: TestOpc = X86::TEST32ri; break;
17300b57cec5SDimitry Andric       case MVT::i64: TestOpc = X86::TEST64ri32; break;
17310b57cec5SDimitry Andric       }
17320b57cec5SDimitry Andric       if (TestOpc) {
17335ffd83dbSDimitry Andric         Register OpReg = getRegForValue(TI->getOperand(0));
17340b57cec5SDimitry Andric         if (OpReg == 0) return false;
17350b57cec5SDimitry Andric 
1736bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TestOpc))
17370b57cec5SDimitry Andric           .addReg(OpReg).addImm(1);
17380b57cec5SDimitry Andric 
17390b57cec5SDimitry Andric         unsigned JmpCond = X86::COND_NE;
17400b57cec5SDimitry Andric         if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
17410b57cec5SDimitry Andric           std::swap(TrueMBB, FalseMBB);
17420b57cec5SDimitry Andric           JmpCond = X86::COND_E;
17430b57cec5SDimitry Andric         }
17440b57cec5SDimitry Andric 
1745bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
17460b57cec5SDimitry Andric           .addMBB(TrueMBB).addImm(JmpCond);
17470b57cec5SDimitry Andric 
17480b57cec5SDimitry Andric         finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
17490b57cec5SDimitry Andric         return true;
17500b57cec5SDimitry Andric       }
17510b57cec5SDimitry Andric     }
17520b57cec5SDimitry Andric   } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
17530b57cec5SDimitry Andric     // Fake request the condition, otherwise the intrinsic might be completely
17540b57cec5SDimitry Andric     // optimized away.
17555ffd83dbSDimitry Andric     Register TmpReg = getRegForValue(BI->getCondition());
17560b57cec5SDimitry Andric     if (TmpReg == 0)
17570b57cec5SDimitry Andric       return false;
17580b57cec5SDimitry Andric 
1759bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
17600b57cec5SDimitry Andric       .addMBB(TrueMBB).addImm(CC);
17610b57cec5SDimitry Andric     finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
17620b57cec5SDimitry Andric     return true;
17630b57cec5SDimitry Andric   }
17640b57cec5SDimitry Andric 
17650b57cec5SDimitry Andric   // Otherwise do a clumsy setcc and re-test it.
17660b57cec5SDimitry Andric   // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
17670b57cec5SDimitry Andric   // in an explicit cast, so make sure to handle that correctly.
17685ffd83dbSDimitry Andric   Register OpReg = getRegForValue(BI->getCondition());
17690b57cec5SDimitry Andric   if (OpReg == 0) return false;
17700b57cec5SDimitry Andric 
17710b57cec5SDimitry Andric   // In case OpReg is a K register, COPY to a GPR
17720b57cec5SDimitry Andric   if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
17730b57cec5SDimitry Andric     unsigned KOpReg = OpReg;
17740b57cec5SDimitry Andric     OpReg = createResultReg(&X86::GR32RegClass);
1775bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
17760b57cec5SDimitry Andric             TII.get(TargetOpcode::COPY), OpReg)
17770b57cec5SDimitry Andric         .addReg(KOpReg);
1778fe6060f1SDimitry Andric     OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, X86::sub_8bit);
17790b57cec5SDimitry Andric   }
1780bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri))
17810b57cec5SDimitry Andric       .addReg(OpReg)
17820b57cec5SDimitry Andric       .addImm(1);
1783bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
17840b57cec5SDimitry Andric     .addMBB(TrueMBB).addImm(X86::COND_NE);
17850b57cec5SDimitry Andric   finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
17860b57cec5SDimitry Andric   return true;
17870b57cec5SDimitry Andric }
17880b57cec5SDimitry Andric 
17890b57cec5SDimitry Andric bool X86FastISel::X86SelectShift(const Instruction *I) {
17900b57cec5SDimitry Andric   unsigned CReg = 0, OpReg = 0;
17910b57cec5SDimitry Andric   const TargetRegisterClass *RC = nullptr;
17920b57cec5SDimitry Andric   if (I->getType()->isIntegerTy(8)) {
17930b57cec5SDimitry Andric     CReg = X86::CL;
17940b57cec5SDimitry Andric     RC = &X86::GR8RegClass;
17950b57cec5SDimitry Andric     switch (I->getOpcode()) {
17960b57cec5SDimitry Andric     case Instruction::LShr: OpReg = X86::SHR8rCL; break;
17970b57cec5SDimitry Andric     case Instruction::AShr: OpReg = X86::SAR8rCL; break;
17980b57cec5SDimitry Andric     case Instruction::Shl:  OpReg = X86::SHL8rCL; break;
17990b57cec5SDimitry Andric     default: return false;
18000b57cec5SDimitry Andric     }
18010b57cec5SDimitry Andric   } else if (I->getType()->isIntegerTy(16)) {
18020b57cec5SDimitry Andric     CReg = X86::CX;
18030b57cec5SDimitry Andric     RC = &X86::GR16RegClass;
18040b57cec5SDimitry Andric     switch (I->getOpcode()) {
18050b57cec5SDimitry Andric     default: llvm_unreachable("Unexpected shift opcode");
18060b57cec5SDimitry Andric     case Instruction::LShr: OpReg = X86::SHR16rCL; break;
18070b57cec5SDimitry Andric     case Instruction::AShr: OpReg = X86::SAR16rCL; break;
18080b57cec5SDimitry Andric     case Instruction::Shl:  OpReg = X86::SHL16rCL; break;
18090b57cec5SDimitry Andric     }
18100b57cec5SDimitry Andric   } else if (I->getType()->isIntegerTy(32)) {
18110b57cec5SDimitry Andric     CReg = X86::ECX;
18120b57cec5SDimitry Andric     RC = &X86::GR32RegClass;
18130b57cec5SDimitry Andric     switch (I->getOpcode()) {
18140b57cec5SDimitry Andric     default: llvm_unreachable("Unexpected shift opcode");
18150b57cec5SDimitry Andric     case Instruction::LShr: OpReg = X86::SHR32rCL; break;
18160b57cec5SDimitry Andric     case Instruction::AShr: OpReg = X86::SAR32rCL; break;
18170b57cec5SDimitry Andric     case Instruction::Shl:  OpReg = X86::SHL32rCL; break;
18180b57cec5SDimitry Andric     }
18190b57cec5SDimitry Andric   } else if (I->getType()->isIntegerTy(64)) {
18200b57cec5SDimitry Andric     CReg = X86::RCX;
18210b57cec5SDimitry Andric     RC = &X86::GR64RegClass;
18220b57cec5SDimitry Andric     switch (I->getOpcode()) {
18230b57cec5SDimitry Andric     default: llvm_unreachable("Unexpected shift opcode");
18240b57cec5SDimitry Andric     case Instruction::LShr: OpReg = X86::SHR64rCL; break;
18250b57cec5SDimitry Andric     case Instruction::AShr: OpReg = X86::SAR64rCL; break;
18260b57cec5SDimitry Andric     case Instruction::Shl:  OpReg = X86::SHL64rCL; break;
18270b57cec5SDimitry Andric     }
18280b57cec5SDimitry Andric   } else {
18290b57cec5SDimitry Andric     return false;
18300b57cec5SDimitry Andric   }
18310b57cec5SDimitry Andric 
18320b57cec5SDimitry Andric   MVT VT;
18330b57cec5SDimitry Andric   if (!isTypeLegal(I->getType(), VT))
18340b57cec5SDimitry Andric     return false;
18350b57cec5SDimitry Andric 
18365ffd83dbSDimitry Andric   Register Op0Reg = getRegForValue(I->getOperand(0));
18370b57cec5SDimitry Andric   if (Op0Reg == 0) return false;
18380b57cec5SDimitry Andric 
18395ffd83dbSDimitry Andric   Register Op1Reg = getRegForValue(I->getOperand(1));
18400b57cec5SDimitry Andric   if (Op1Reg == 0) return false;
1841bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
18420b57cec5SDimitry Andric           CReg).addReg(Op1Reg);
18430b57cec5SDimitry Andric 
18440b57cec5SDimitry Andric   // The shift instruction uses X86::CL. If we defined a super-register
18450b57cec5SDimitry Andric   // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
18460b57cec5SDimitry Andric   if (CReg != X86::CL)
1847bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
18480b57cec5SDimitry Andric             TII.get(TargetOpcode::KILL), X86::CL)
18490b57cec5SDimitry Andric       .addReg(CReg, RegState::Kill);
18500b57cec5SDimitry Andric 
18515ffd83dbSDimitry Andric   Register ResultReg = createResultReg(RC);
1852bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(OpReg), ResultReg)
18530b57cec5SDimitry Andric     .addReg(Op0Reg);
18540b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
18550b57cec5SDimitry Andric   return true;
18560b57cec5SDimitry Andric }
18570b57cec5SDimitry Andric 
18580b57cec5SDimitry Andric bool X86FastISel::X86SelectDivRem(const Instruction *I) {
18590b57cec5SDimitry Andric   const static unsigned NumTypes = 4; // i8, i16, i32, i64
18600b57cec5SDimitry Andric   const static unsigned NumOps   = 4; // SDiv, SRem, UDiv, URem
18610b57cec5SDimitry Andric   const static bool S = true;  // IsSigned
18620b57cec5SDimitry Andric   const static bool U = false; // !IsSigned
18630b57cec5SDimitry Andric   const static unsigned Copy = TargetOpcode::COPY;
18640b57cec5SDimitry Andric   // For the X86 DIV/IDIV instruction, in most cases the dividend
18650b57cec5SDimitry Andric   // (numerator) must be in a specific register pair highreg:lowreg,
18660b57cec5SDimitry Andric   // producing the quotient in lowreg and the remainder in highreg.
18670b57cec5SDimitry Andric   // For most data types, to set up the instruction, the dividend is
18680b57cec5SDimitry Andric   // copied into lowreg, and lowreg is sign-extended or zero-extended
18690b57cec5SDimitry Andric   // into highreg.  The exception is i8, where the dividend is defined
18700b57cec5SDimitry Andric   // as a single register rather than a register pair, and we
18710b57cec5SDimitry Andric   // therefore directly sign-extend or zero-extend the dividend into
18720b57cec5SDimitry Andric   // lowreg, instead of copying, and ignore the highreg.
18730b57cec5SDimitry Andric   const static struct DivRemEntry {
18740b57cec5SDimitry Andric     // The following portion depends only on the data type.
18750b57cec5SDimitry Andric     const TargetRegisterClass *RC;
18760b57cec5SDimitry Andric     unsigned LowInReg;  // low part of the register pair
18770b57cec5SDimitry Andric     unsigned HighInReg; // high part of the register pair
18780b57cec5SDimitry Andric     // The following portion depends on both the data type and the operation.
18790b57cec5SDimitry Andric     struct DivRemResult {
18800b57cec5SDimitry Andric     unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
18810b57cec5SDimitry Andric     unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
18820b57cec5SDimitry Andric                               // highreg, or copying a zero into highreg.
18830b57cec5SDimitry Andric     unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
18840b57cec5SDimitry Andric                               // zero/sign-extending into lowreg for i8.
18850b57cec5SDimitry Andric     unsigned DivRemResultReg; // Register containing the desired result.
18860b57cec5SDimitry Andric     bool IsOpSigned;          // Whether to use signed or unsigned form.
18870b57cec5SDimitry Andric     } ResultTable[NumOps];
18880b57cec5SDimitry Andric   } OpTable[NumTypes] = {
18890b57cec5SDimitry Andric     { &X86::GR8RegClass,  X86::AX,  0, {
18900b57cec5SDimitry Andric         { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AL,  S }, // SDiv
18910b57cec5SDimitry Andric         { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AH,  S }, // SRem
18920b57cec5SDimitry Andric         { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AL,  U }, // UDiv
18930b57cec5SDimitry Andric         { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AH,  U }, // URem
18940b57cec5SDimitry Andric       }
18950b57cec5SDimitry Andric     }, // i8
18960b57cec5SDimitry Andric     { &X86::GR16RegClass, X86::AX,  X86::DX, {
18970b57cec5SDimitry Andric         { X86::IDIV16r, X86::CWD,     Copy,            X86::AX,  S }, // SDiv
18980b57cec5SDimitry Andric         { X86::IDIV16r, X86::CWD,     Copy,            X86::DX,  S }, // SRem
18990b57cec5SDimitry Andric         { X86::DIV16r,  X86::MOV32r0, Copy,            X86::AX,  U }, // UDiv
19000b57cec5SDimitry Andric         { X86::DIV16r,  X86::MOV32r0, Copy,            X86::DX,  U }, // URem
19010b57cec5SDimitry Andric       }
19020b57cec5SDimitry Andric     }, // i16
19030b57cec5SDimitry Andric     { &X86::GR32RegClass, X86::EAX, X86::EDX, {
19040b57cec5SDimitry Andric         { X86::IDIV32r, X86::CDQ,     Copy,            X86::EAX, S }, // SDiv
19050b57cec5SDimitry Andric         { X86::IDIV32r, X86::CDQ,     Copy,            X86::EDX, S }, // SRem
19060b57cec5SDimitry Andric         { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EAX, U }, // UDiv
19070b57cec5SDimitry Andric         { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EDX, U }, // URem
19080b57cec5SDimitry Andric       }
19090b57cec5SDimitry Andric     }, // i32
19100b57cec5SDimitry Andric     { &X86::GR64RegClass, X86::RAX, X86::RDX, {
19110b57cec5SDimitry Andric         { X86::IDIV64r, X86::CQO,     Copy,            X86::RAX, S }, // SDiv
19120b57cec5SDimitry Andric         { X86::IDIV64r, X86::CQO,     Copy,            X86::RDX, S }, // SRem
19130b57cec5SDimitry Andric         { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RAX, U }, // UDiv
19140b57cec5SDimitry Andric         { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RDX, U }, // URem
19150b57cec5SDimitry Andric       }
19160b57cec5SDimitry Andric     }, // i64
19170b57cec5SDimitry Andric   };
19180b57cec5SDimitry Andric 
19190b57cec5SDimitry Andric   MVT VT;
19200b57cec5SDimitry Andric   if (!isTypeLegal(I->getType(), VT))
19210b57cec5SDimitry Andric     return false;
19220b57cec5SDimitry Andric 
19230b57cec5SDimitry Andric   unsigned TypeIndex, OpIndex;
19240b57cec5SDimitry Andric   switch (VT.SimpleTy) {
19250b57cec5SDimitry Andric   default: return false;
19260b57cec5SDimitry Andric   case MVT::i8:  TypeIndex = 0; break;
19270b57cec5SDimitry Andric   case MVT::i16: TypeIndex = 1; break;
19280b57cec5SDimitry Andric   case MVT::i32: TypeIndex = 2; break;
19290b57cec5SDimitry Andric   case MVT::i64: TypeIndex = 3;
19300b57cec5SDimitry Andric     if (!Subtarget->is64Bit())
19310b57cec5SDimitry Andric       return false;
19320b57cec5SDimitry Andric     break;
19330b57cec5SDimitry Andric   }
19340b57cec5SDimitry Andric 
19350b57cec5SDimitry Andric   switch (I->getOpcode()) {
19360b57cec5SDimitry Andric   default: llvm_unreachable("Unexpected div/rem opcode");
19370b57cec5SDimitry Andric   case Instruction::SDiv: OpIndex = 0; break;
19380b57cec5SDimitry Andric   case Instruction::SRem: OpIndex = 1; break;
19390b57cec5SDimitry Andric   case Instruction::UDiv: OpIndex = 2; break;
19400b57cec5SDimitry Andric   case Instruction::URem: OpIndex = 3; break;
19410b57cec5SDimitry Andric   }
19420b57cec5SDimitry Andric 
19430b57cec5SDimitry Andric   const DivRemEntry &TypeEntry = OpTable[TypeIndex];
19440b57cec5SDimitry Andric   const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
19455ffd83dbSDimitry Andric   Register Op0Reg = getRegForValue(I->getOperand(0));
19460b57cec5SDimitry Andric   if (Op0Reg == 0)
19470b57cec5SDimitry Andric     return false;
19485ffd83dbSDimitry Andric   Register Op1Reg = getRegForValue(I->getOperand(1));
19490b57cec5SDimitry Andric   if (Op1Reg == 0)
19500b57cec5SDimitry Andric     return false;
19510b57cec5SDimitry Andric 
19520b57cec5SDimitry Andric   // Move op0 into low-order input register.
1953bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
19540b57cec5SDimitry Andric           TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
19550b57cec5SDimitry Andric   // Zero-extend or sign-extend into high-order input register.
19560b57cec5SDimitry Andric   if (OpEntry.OpSignExtend) {
19570b57cec5SDimitry Andric     if (OpEntry.IsOpSigned)
1958bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
19590b57cec5SDimitry Andric               TII.get(OpEntry.OpSignExtend));
19600b57cec5SDimitry Andric     else {
19615ffd83dbSDimitry Andric       Register Zero32 = createResultReg(&X86::GR32RegClass);
1962bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
19630b57cec5SDimitry Andric               TII.get(X86::MOV32r0), Zero32);
19640b57cec5SDimitry Andric 
19650b57cec5SDimitry Andric       // Copy the zero into the appropriate sub/super/identical physical
19660b57cec5SDimitry Andric       // register. Unfortunately the operations needed are not uniform enough
19670b57cec5SDimitry Andric       // to fit neatly into the table above.
19680b57cec5SDimitry Andric       if (VT == MVT::i16) {
1969bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
19700b57cec5SDimitry Andric                 TII.get(Copy), TypeEntry.HighInReg)
19710b57cec5SDimitry Andric           .addReg(Zero32, 0, X86::sub_16bit);
19720b57cec5SDimitry Andric       } else if (VT == MVT::i32) {
1973bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
19740b57cec5SDimitry Andric                 TII.get(Copy), TypeEntry.HighInReg)
19750b57cec5SDimitry Andric             .addReg(Zero32);
19760b57cec5SDimitry Andric       } else if (VT == MVT::i64) {
1977bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
19780b57cec5SDimitry Andric                 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
19790b57cec5SDimitry Andric             .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
19800b57cec5SDimitry Andric       }
19810b57cec5SDimitry Andric     }
19820b57cec5SDimitry Andric   }
19830b57cec5SDimitry Andric   // Generate the DIV/IDIV instruction.
1984bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
19850b57cec5SDimitry Andric           TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
19860b57cec5SDimitry Andric   // For i8 remainder, we can't reference ah directly, as we'll end
19870b57cec5SDimitry Andric   // up with bogus copies like %r9b = COPY %ah. Reference ax
19880b57cec5SDimitry Andric   // instead to prevent ah references in a rex instruction.
19890b57cec5SDimitry Andric   //
19900b57cec5SDimitry Andric   // The current assumption of the fast register allocator is that isel
19910b57cec5SDimitry Andric   // won't generate explicit references to the GR8_NOREX registers. If
19920b57cec5SDimitry Andric   // the allocator and/or the backend get enhanced to be more robust in
19930b57cec5SDimitry Andric   // that regard, this can be, and should be, removed.
19940b57cec5SDimitry Andric   unsigned ResultReg = 0;
19950b57cec5SDimitry Andric   if ((I->getOpcode() == Instruction::SRem ||
19960b57cec5SDimitry Andric        I->getOpcode() == Instruction::URem) &&
19970b57cec5SDimitry Andric       OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
19985ffd83dbSDimitry Andric     Register SourceSuperReg = createResultReg(&X86::GR16RegClass);
19995ffd83dbSDimitry Andric     Register ResultSuperReg = createResultReg(&X86::GR16RegClass);
2000bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
20010b57cec5SDimitry Andric             TII.get(Copy), SourceSuperReg).addReg(X86::AX);
20020b57cec5SDimitry Andric 
20030b57cec5SDimitry Andric     // Shift AX right by 8 bits instead of using AH.
2004bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SHR16ri),
20050b57cec5SDimitry Andric             ResultSuperReg).addReg(SourceSuperReg).addImm(8);
20060b57cec5SDimitry Andric 
20070b57cec5SDimitry Andric     // Now reference the 8-bit subreg of the result.
20080b57cec5SDimitry Andric     ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2009fe6060f1SDimitry Andric                                            X86::sub_8bit);
20100b57cec5SDimitry Andric   }
20110b57cec5SDimitry Andric   // Copy the result out of the physreg if we haven't already.
20120b57cec5SDimitry Andric   if (!ResultReg) {
20130b57cec5SDimitry Andric     ResultReg = createResultReg(TypeEntry.RC);
2014bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Copy), ResultReg)
20150b57cec5SDimitry Andric         .addReg(OpEntry.DivRemResultReg);
20160b57cec5SDimitry Andric   }
20170b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
20180b57cec5SDimitry Andric 
20190b57cec5SDimitry Andric   return true;
20200b57cec5SDimitry Andric }
20210b57cec5SDimitry Andric 
20220b57cec5SDimitry Andric /// Emit a conditional move instruction (if the are supported) to lower
20230b57cec5SDimitry Andric /// the select.
20240b57cec5SDimitry Andric bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
20250b57cec5SDimitry Andric   // Check if the subtarget supports these instructions.
202681ad6265SDimitry Andric   if (!Subtarget->canUseCMOV())
20270b57cec5SDimitry Andric     return false;
20280b57cec5SDimitry Andric 
20290b57cec5SDimitry Andric   // FIXME: Add support for i8.
20300b57cec5SDimitry Andric   if (RetVT < MVT::i16 || RetVT > MVT::i64)
20310b57cec5SDimitry Andric     return false;
20320b57cec5SDimitry Andric 
20330b57cec5SDimitry Andric   const Value *Cond = I->getOperand(0);
20340b57cec5SDimitry Andric   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
20350b57cec5SDimitry Andric   bool NeedTest = true;
20360b57cec5SDimitry Andric   X86::CondCode CC = X86::COND_NE;
20370b57cec5SDimitry Andric 
20380b57cec5SDimitry Andric   // Optimize conditions coming from a compare if both instructions are in the
20390b57cec5SDimitry Andric   // same basic block (values defined in other basic blocks may not have
20400b57cec5SDimitry Andric   // initialized registers).
20410b57cec5SDimitry Andric   const auto *CI = dyn_cast<CmpInst>(Cond);
20420b57cec5SDimitry Andric   if (CI && (CI->getParent() == I->getParent())) {
20430b57cec5SDimitry Andric     CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
20440b57cec5SDimitry Andric 
20450b57cec5SDimitry Andric     // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
20460b57cec5SDimitry Andric     static const uint16_t SETFOpcTable[2][3] = {
20470b57cec5SDimitry Andric       { X86::COND_NP, X86::COND_E,  X86::TEST8rr },
20480b57cec5SDimitry Andric       { X86::COND_P,  X86::COND_NE, X86::OR8rr   }
20490b57cec5SDimitry Andric     };
20500b57cec5SDimitry Andric     const uint16_t *SETFOpc = nullptr;
20510b57cec5SDimitry Andric     switch (Predicate) {
20520b57cec5SDimitry Andric     default: break;
20530b57cec5SDimitry Andric     case CmpInst::FCMP_OEQ:
20540b57cec5SDimitry Andric       SETFOpc = &SETFOpcTable[0][0];
20550b57cec5SDimitry Andric       Predicate = CmpInst::ICMP_NE;
20560b57cec5SDimitry Andric       break;
20570b57cec5SDimitry Andric     case CmpInst::FCMP_UNE:
20580b57cec5SDimitry Andric       SETFOpc = &SETFOpcTable[1][0];
20590b57cec5SDimitry Andric       Predicate = CmpInst::ICMP_NE;
20600b57cec5SDimitry Andric       break;
20610b57cec5SDimitry Andric     }
20620b57cec5SDimitry Andric 
20630b57cec5SDimitry Andric     bool NeedSwap;
20640b57cec5SDimitry Andric     std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
20650b57cec5SDimitry Andric     assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
20660b57cec5SDimitry Andric 
20670b57cec5SDimitry Andric     const Value *CmpLHS = CI->getOperand(0);
20680b57cec5SDimitry Andric     const Value *CmpRHS = CI->getOperand(1);
20690b57cec5SDimitry Andric     if (NeedSwap)
20700b57cec5SDimitry Andric       std::swap(CmpLHS, CmpRHS);
20710b57cec5SDimitry Andric 
20720b57cec5SDimitry Andric     EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
20730b57cec5SDimitry Andric     // Emit a compare of the LHS and RHS, setting the flags.
20740b57cec5SDimitry Andric     if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
20750b57cec5SDimitry Andric       return false;
20760b57cec5SDimitry Andric 
20770b57cec5SDimitry Andric     if (SETFOpc) {
20785ffd83dbSDimitry Andric       Register FlagReg1 = createResultReg(&X86::GR8RegClass);
20795ffd83dbSDimitry Andric       Register FlagReg2 = createResultReg(&X86::GR8RegClass);
2080bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
20810b57cec5SDimitry Andric               FlagReg1).addImm(SETFOpc[0]);
2082bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
20830b57cec5SDimitry Andric               FlagReg2).addImm(SETFOpc[1]);
20840b57cec5SDimitry Andric       auto const &II = TII.get(SETFOpc[2]);
20850b57cec5SDimitry Andric       if (II.getNumDefs()) {
20865ffd83dbSDimitry Andric         Register TmpReg = createResultReg(&X86::GR8RegClass);
2087bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, TmpReg)
20880b57cec5SDimitry Andric           .addReg(FlagReg2).addReg(FlagReg1);
20890b57cec5SDimitry Andric       } else {
2090bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
20910b57cec5SDimitry Andric           .addReg(FlagReg2).addReg(FlagReg1);
20920b57cec5SDimitry Andric       }
20930b57cec5SDimitry Andric     }
20940b57cec5SDimitry Andric     NeedTest = false;
20950b57cec5SDimitry Andric   } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
20960b57cec5SDimitry Andric     // Fake request the condition, otherwise the intrinsic might be completely
20970b57cec5SDimitry Andric     // optimized away.
20985ffd83dbSDimitry Andric     Register TmpReg = getRegForValue(Cond);
20990b57cec5SDimitry Andric     if (TmpReg == 0)
21000b57cec5SDimitry Andric       return false;
21010b57cec5SDimitry Andric 
21020b57cec5SDimitry Andric     NeedTest = false;
21030b57cec5SDimitry Andric   }
21040b57cec5SDimitry Andric 
21050b57cec5SDimitry Andric   if (NeedTest) {
21060b57cec5SDimitry Andric     // Selects operate on i1, however, CondReg is 8 bits width and may contain
21070b57cec5SDimitry Andric     // garbage. Indeed, only the less significant bit is supposed to be
21080b57cec5SDimitry Andric     // accurate. If we read more than the lsb, we may see non-zero values
21090b57cec5SDimitry Andric     // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
21100b57cec5SDimitry Andric     // the select. This is achieved by performing TEST against 1.
21115ffd83dbSDimitry Andric     Register CondReg = getRegForValue(Cond);
21120b57cec5SDimitry Andric     if (CondReg == 0)
21130b57cec5SDimitry Andric       return false;
21140b57cec5SDimitry Andric 
21150b57cec5SDimitry Andric     // In case OpReg is a K register, COPY to a GPR
21160b57cec5SDimitry Andric     if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
21170b57cec5SDimitry Andric       unsigned KCondReg = CondReg;
21180b57cec5SDimitry Andric       CondReg = createResultReg(&X86::GR32RegClass);
2119bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
21200b57cec5SDimitry Andric               TII.get(TargetOpcode::COPY), CondReg)
2121fe6060f1SDimitry Andric           .addReg(KCondReg);
2122fe6060f1SDimitry Andric       CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
21230b57cec5SDimitry Andric     }
2124bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri))
2125fe6060f1SDimitry Andric         .addReg(CondReg)
21260b57cec5SDimitry Andric         .addImm(1);
21270b57cec5SDimitry Andric   }
21280b57cec5SDimitry Andric 
21290b57cec5SDimitry Andric   const Value *LHS = I->getOperand(1);
21300b57cec5SDimitry Andric   const Value *RHS = I->getOperand(2);
21310b57cec5SDimitry Andric 
21325ffd83dbSDimitry Andric   Register RHSReg = getRegForValue(RHS);
21335ffd83dbSDimitry Andric   Register LHSReg = getRegForValue(LHS);
21340b57cec5SDimitry Andric   if (!LHSReg || !RHSReg)
21350b57cec5SDimitry Andric     return false;
21360b57cec5SDimitry Andric 
21370b57cec5SDimitry Andric   const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
21380fca6ea1SDimitry Andric   unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC) / 8, false,
21390fca6ea1SDimitry Andric                                     Subtarget->hasNDD());
2140fe6060f1SDimitry Andric   Register ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
21410b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
21420b57cec5SDimitry Andric   return true;
21430b57cec5SDimitry Andric }
21440b57cec5SDimitry Andric 
21450b57cec5SDimitry Andric /// Emit SSE or AVX instructions to lower the select.
21460b57cec5SDimitry Andric ///
21470b57cec5SDimitry Andric /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
21480b57cec5SDimitry Andric /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
21490b57cec5SDimitry Andric /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
21500b57cec5SDimitry Andric bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
21510b57cec5SDimitry Andric   // Optimize conditions coming from a compare if both instructions are in the
21520b57cec5SDimitry Andric   // same basic block (values defined in other basic blocks may not have
21530b57cec5SDimitry Andric   // initialized registers).
21540b57cec5SDimitry Andric   const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
21550b57cec5SDimitry Andric   if (!CI || (CI->getParent() != I->getParent()))
21560b57cec5SDimitry Andric     return false;
21570b57cec5SDimitry Andric 
21580b57cec5SDimitry Andric   if (I->getType() != CI->getOperand(0)->getType() ||
21590b57cec5SDimitry Andric       !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
21600b57cec5SDimitry Andric         (Subtarget->hasSSE2() && RetVT == MVT::f64)))
21610b57cec5SDimitry Andric     return false;
21620b57cec5SDimitry Andric 
21630b57cec5SDimitry Andric   const Value *CmpLHS = CI->getOperand(0);
21640b57cec5SDimitry Andric   const Value *CmpRHS = CI->getOperand(1);
21650b57cec5SDimitry Andric   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
21660b57cec5SDimitry Andric 
21670b57cec5SDimitry Andric   // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
21680b57cec5SDimitry Andric   // We don't have to materialize a zero constant for this case and can just use
21690b57cec5SDimitry Andric   // %x again on the RHS.
21700b57cec5SDimitry Andric   if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
21710b57cec5SDimitry Andric     const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
21720b57cec5SDimitry Andric     if (CmpRHSC && CmpRHSC->isNullValue())
21730b57cec5SDimitry Andric       CmpRHS = CmpLHS;
21740b57cec5SDimitry Andric   }
21750b57cec5SDimitry Andric 
21760b57cec5SDimitry Andric   unsigned CC;
21770b57cec5SDimitry Andric   bool NeedSwap;
21780b57cec5SDimitry Andric   std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
21790b57cec5SDimitry Andric   if (CC > 7 && !Subtarget->hasAVX())
21800b57cec5SDimitry Andric     return false;
21810b57cec5SDimitry Andric 
21820b57cec5SDimitry Andric   if (NeedSwap)
21830b57cec5SDimitry Andric     std::swap(CmpLHS, CmpRHS);
21840b57cec5SDimitry Andric 
21850b57cec5SDimitry Andric   const Value *LHS = I->getOperand(1);
21860b57cec5SDimitry Andric   const Value *RHS = I->getOperand(2);
21870b57cec5SDimitry Andric 
21885ffd83dbSDimitry Andric   Register LHSReg = getRegForValue(LHS);
21895ffd83dbSDimitry Andric   Register RHSReg = getRegForValue(RHS);
21905ffd83dbSDimitry Andric   Register CmpLHSReg = getRegForValue(CmpLHS);
21915ffd83dbSDimitry Andric   Register CmpRHSReg = getRegForValue(CmpRHS);
21925ffd83dbSDimitry Andric   if (!LHSReg || !RHSReg || !CmpLHSReg || !CmpRHSReg)
21930b57cec5SDimitry Andric     return false;
21940b57cec5SDimitry Andric 
21950b57cec5SDimitry Andric   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
21960b57cec5SDimitry Andric   unsigned ResultReg;
21970b57cec5SDimitry Andric 
21980b57cec5SDimitry Andric   if (Subtarget->hasAVX512()) {
21990b57cec5SDimitry Andric     // If we have AVX512 we can use a mask compare and masked movss/sd.
22000b57cec5SDimitry Andric     const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
22010b57cec5SDimitry Andric     const TargetRegisterClass *VK1 = &X86::VK1RegClass;
22020b57cec5SDimitry Andric 
22030b57cec5SDimitry Andric     unsigned CmpOpcode =
22040fca6ea1SDimitry Andric       (RetVT == MVT::f32) ? X86::VCMPSSZrri : X86::VCMPSDZrri;
2205fe6060f1SDimitry Andric     Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpRHSReg,
2206fe6060f1SDimitry Andric                                        CC);
22070b57cec5SDimitry Andric 
22080b57cec5SDimitry Andric     // Need an IMPLICIT_DEF for the input that is used to generate the upper
22090b57cec5SDimitry Andric     // bits of the result register since its not based on any of the inputs.
22105ffd83dbSDimitry Andric     Register ImplicitDefReg = createResultReg(VR128X);
2211bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
22120b57cec5SDimitry Andric             TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
22130b57cec5SDimitry Andric 
22140b57cec5SDimitry Andric     // Place RHSReg is the passthru of the masked movss/sd operation and put
22150b57cec5SDimitry Andric     // LHS in the input. The mask input comes from the compare.
22160b57cec5SDimitry Andric     unsigned MovOpcode =
22170b57cec5SDimitry Andric       (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2218fe6060f1SDimitry Andric     unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, CmpReg,
2219fe6060f1SDimitry Andric                                         ImplicitDefReg, LHSReg);
22200b57cec5SDimitry Andric 
22210b57cec5SDimitry Andric     ResultReg = createResultReg(RC);
2222bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
22230b57cec5SDimitry Andric             TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
22240b57cec5SDimitry Andric 
22250b57cec5SDimitry Andric   } else if (Subtarget->hasAVX()) {
22260b57cec5SDimitry Andric     const TargetRegisterClass *VR128 = &X86::VR128RegClass;
22270b57cec5SDimitry Andric 
22280b57cec5SDimitry Andric     // If we have AVX, create 1 blendv instead of 3 logic instructions.
22290b57cec5SDimitry Andric     // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
22300b57cec5SDimitry Andric     // uses XMM0 as the selection register. That may need just as many
22310b57cec5SDimitry Andric     // instructions as the AND/ANDN/OR sequence due to register moves, so
22320b57cec5SDimitry Andric     // don't bother.
22330b57cec5SDimitry Andric     unsigned CmpOpcode =
22340fca6ea1SDimitry Andric       (RetVT == MVT::f32) ? X86::VCMPSSrri : X86::VCMPSDrri;
22350b57cec5SDimitry Andric     unsigned BlendOpcode =
22360fca6ea1SDimitry Andric       (RetVT == MVT::f32) ? X86::VBLENDVPSrrr : X86::VBLENDVPDrrr;
22370b57cec5SDimitry Andric 
2238fe6060f1SDimitry Andric     Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg,
2239fe6060f1SDimitry Andric                                        CC);
2240fe6060f1SDimitry Andric     Register VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, LHSReg,
2241fe6060f1SDimitry Andric                                           CmpReg);
22420b57cec5SDimitry Andric     ResultReg = createResultReg(RC);
2243bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
22440b57cec5SDimitry Andric             TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
22450b57cec5SDimitry Andric   } else {
22460b57cec5SDimitry Andric     // Choose the SSE instruction sequence based on data type (float or double).
22470b57cec5SDimitry Andric     static const uint16_t OpcTable[2][4] = {
22480fca6ea1SDimitry Andric       { X86::CMPSSrri,  X86::ANDPSrr,  X86::ANDNPSrr,  X86::ORPSrr  },
22490fca6ea1SDimitry Andric       { X86::CMPSDrri,  X86::ANDPDrr,  X86::ANDNPDrr,  X86::ORPDrr  }
22500b57cec5SDimitry Andric     };
22510b57cec5SDimitry Andric 
22520b57cec5SDimitry Andric     const uint16_t *Opc = nullptr;
22530b57cec5SDimitry Andric     switch (RetVT.SimpleTy) {
22540b57cec5SDimitry Andric     default: return false;
22550b57cec5SDimitry Andric     case MVT::f32: Opc = &OpcTable[0][0]; break;
22560b57cec5SDimitry Andric     case MVT::f64: Opc = &OpcTable[1][0]; break;
22570b57cec5SDimitry Andric     }
22580b57cec5SDimitry Andric 
22590b57cec5SDimitry Andric     const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2260fe6060f1SDimitry Andric     Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpRHSReg, CC);
2261fe6060f1SDimitry Andric     Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, LHSReg);
2262fe6060f1SDimitry Andric     Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, RHSReg);
2263fe6060f1SDimitry Andric     Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, AndReg);
22640b57cec5SDimitry Andric     ResultReg = createResultReg(RC);
2265bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
22660b57cec5SDimitry Andric             TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
22670b57cec5SDimitry Andric   }
22680b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
22690b57cec5SDimitry Andric   return true;
22700b57cec5SDimitry Andric }
22710b57cec5SDimitry Andric 
22720b57cec5SDimitry Andric bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
22730b57cec5SDimitry Andric   // These are pseudo CMOV instructions and will be later expanded into control-
22740b57cec5SDimitry Andric   // flow.
22750b57cec5SDimitry Andric   unsigned Opc;
22760b57cec5SDimitry Andric   switch (RetVT.SimpleTy) {
22770b57cec5SDimitry Andric   default: return false;
22780b57cec5SDimitry Andric   case MVT::i8:  Opc = X86::CMOV_GR8;   break;
22790b57cec5SDimitry Andric   case MVT::i16: Opc = X86::CMOV_GR16;  break;
22800b57cec5SDimitry Andric   case MVT::i32: Opc = X86::CMOV_GR32;  break;
228181ad6265SDimitry Andric   case MVT::f16:
228281ad6265SDimitry Andric     Opc = Subtarget->hasAVX512() ? X86::CMOV_FR16X : X86::CMOV_FR16; break;
228381ad6265SDimitry Andric   case MVT::f32:
228481ad6265SDimitry Andric     Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X : X86::CMOV_FR32; break;
228581ad6265SDimitry Andric   case MVT::f64:
228681ad6265SDimitry Andric     Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X : X86::CMOV_FR64; break;
22870b57cec5SDimitry Andric   }
22880b57cec5SDimitry Andric 
22890b57cec5SDimitry Andric   const Value *Cond = I->getOperand(0);
22900b57cec5SDimitry Andric   X86::CondCode CC = X86::COND_NE;
22910b57cec5SDimitry Andric 
22920b57cec5SDimitry Andric   // Optimize conditions coming from a compare if both instructions are in the
22930b57cec5SDimitry Andric   // same basic block (values defined in other basic blocks may not have
22940b57cec5SDimitry Andric   // initialized registers).
22950b57cec5SDimitry Andric   const auto *CI = dyn_cast<CmpInst>(Cond);
22960b57cec5SDimitry Andric   if (CI && (CI->getParent() == I->getParent())) {
22970b57cec5SDimitry Andric     bool NeedSwap;
22980b57cec5SDimitry Andric     std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
22990b57cec5SDimitry Andric     if (CC > X86::LAST_VALID_COND)
23000b57cec5SDimitry Andric       return false;
23010b57cec5SDimitry Andric 
23020b57cec5SDimitry Andric     const Value *CmpLHS = CI->getOperand(0);
23030b57cec5SDimitry Andric     const Value *CmpRHS = CI->getOperand(1);
23040b57cec5SDimitry Andric 
23050b57cec5SDimitry Andric     if (NeedSwap)
23060b57cec5SDimitry Andric       std::swap(CmpLHS, CmpRHS);
23070b57cec5SDimitry Andric 
23080b57cec5SDimitry Andric     EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
23090b57cec5SDimitry Andric     if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
23100b57cec5SDimitry Andric       return false;
23110b57cec5SDimitry Andric   } else {
23125ffd83dbSDimitry Andric     Register CondReg = getRegForValue(Cond);
23130b57cec5SDimitry Andric     if (CondReg == 0)
23140b57cec5SDimitry Andric       return false;
23150b57cec5SDimitry Andric 
23160b57cec5SDimitry Andric     // In case OpReg is a K register, COPY to a GPR
23170b57cec5SDimitry Andric     if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
23180b57cec5SDimitry Andric       unsigned KCondReg = CondReg;
23190b57cec5SDimitry Andric       CondReg = createResultReg(&X86::GR32RegClass);
2320bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
23210b57cec5SDimitry Andric               TII.get(TargetOpcode::COPY), CondReg)
2322fe6060f1SDimitry Andric           .addReg(KCondReg);
2323fe6060f1SDimitry Andric       CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
23240b57cec5SDimitry Andric     }
2325bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri))
2326fe6060f1SDimitry Andric         .addReg(CondReg)
23270b57cec5SDimitry Andric         .addImm(1);
23280b57cec5SDimitry Andric   }
23290b57cec5SDimitry Andric 
23300b57cec5SDimitry Andric   const Value *LHS = I->getOperand(1);
23310b57cec5SDimitry Andric   const Value *RHS = I->getOperand(2);
23320b57cec5SDimitry Andric 
23335ffd83dbSDimitry Andric   Register LHSReg = getRegForValue(LHS);
23345ffd83dbSDimitry Andric   Register RHSReg = getRegForValue(RHS);
23350b57cec5SDimitry Andric   if (!LHSReg || !RHSReg)
23360b57cec5SDimitry Andric     return false;
23370b57cec5SDimitry Andric 
23380b57cec5SDimitry Andric   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
23390b57cec5SDimitry Andric 
23405ffd83dbSDimitry Andric   Register ResultReg =
2341fe6060f1SDimitry Andric     fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
23420b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
23430b57cec5SDimitry Andric   return true;
23440b57cec5SDimitry Andric }
23450b57cec5SDimitry Andric 
23460b57cec5SDimitry Andric bool X86FastISel::X86SelectSelect(const Instruction *I) {
23470b57cec5SDimitry Andric   MVT RetVT;
23480b57cec5SDimitry Andric   if (!isTypeLegal(I->getType(), RetVT))
23490b57cec5SDimitry Andric     return false;
23500b57cec5SDimitry Andric 
23510b57cec5SDimitry Andric   // Check if we can fold the select.
23520b57cec5SDimitry Andric   if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
23530b57cec5SDimitry Andric     CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
23540b57cec5SDimitry Andric     const Value *Opnd = nullptr;
23550b57cec5SDimitry Andric     switch (Predicate) {
23560b57cec5SDimitry Andric     default:                              break;
23570b57cec5SDimitry Andric     case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
23580b57cec5SDimitry Andric     case CmpInst::FCMP_TRUE:  Opnd = I->getOperand(1); break;
23590b57cec5SDimitry Andric     }
23600b57cec5SDimitry Andric     // No need for a select anymore - this is an unconditional move.
23610b57cec5SDimitry Andric     if (Opnd) {
23625ffd83dbSDimitry Andric       Register OpReg = getRegForValue(Opnd);
23630b57cec5SDimitry Andric       if (OpReg == 0)
23640b57cec5SDimitry Andric         return false;
23650b57cec5SDimitry Andric       const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
23665ffd83dbSDimitry Andric       Register ResultReg = createResultReg(RC);
2367bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
23680b57cec5SDimitry Andric               TII.get(TargetOpcode::COPY), ResultReg)
2369fe6060f1SDimitry Andric         .addReg(OpReg);
23700b57cec5SDimitry Andric       updateValueMap(I, ResultReg);
23710b57cec5SDimitry Andric       return true;
23720b57cec5SDimitry Andric     }
23730b57cec5SDimitry Andric   }
23740b57cec5SDimitry Andric 
23750b57cec5SDimitry Andric   // First try to use real conditional move instructions.
23760b57cec5SDimitry Andric   if (X86FastEmitCMoveSelect(RetVT, I))
23770b57cec5SDimitry Andric     return true;
23780b57cec5SDimitry Andric 
23790b57cec5SDimitry Andric   // Try to use a sequence of SSE instructions to simulate a conditional move.
23800b57cec5SDimitry Andric   if (X86FastEmitSSESelect(RetVT, I))
23810b57cec5SDimitry Andric     return true;
23820b57cec5SDimitry Andric 
23830b57cec5SDimitry Andric   // Fall-back to pseudo conditional move instructions, which will be later
23840b57cec5SDimitry Andric   // converted to control-flow.
23850b57cec5SDimitry Andric   if (X86FastEmitPseudoSelect(RetVT, I))
23860b57cec5SDimitry Andric     return true;
23870b57cec5SDimitry Andric 
23880b57cec5SDimitry Andric   return false;
23890b57cec5SDimitry Andric }
23900b57cec5SDimitry Andric 
23910b57cec5SDimitry Andric // Common code for X86SelectSIToFP and X86SelectUIToFP.
23920b57cec5SDimitry Andric bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
23930b57cec5SDimitry Andric   // The target-independent selection algorithm in FastISel already knows how
23940b57cec5SDimitry Andric   // to select a SINT_TO_FP if the target is SSE but not AVX.
23950b57cec5SDimitry Andric   // Early exit if the subtarget doesn't have AVX.
23960b57cec5SDimitry Andric   // Unsigned conversion requires avx512.
23970b57cec5SDimitry Andric   bool HasAVX512 = Subtarget->hasAVX512();
23980b57cec5SDimitry Andric   if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
23990b57cec5SDimitry Andric     return false;
24000b57cec5SDimitry Andric 
24010b57cec5SDimitry Andric   // TODO: We could sign extend narrower types.
24025f757f3fSDimitry Andric   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
24030b57cec5SDimitry Andric   if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
24040b57cec5SDimitry Andric     return false;
24050b57cec5SDimitry Andric 
24060b57cec5SDimitry Andric   // Select integer to float/double conversion.
24075ffd83dbSDimitry Andric   Register OpReg = getRegForValue(I->getOperand(0));
24080b57cec5SDimitry Andric   if (OpReg == 0)
24090b57cec5SDimitry Andric     return false;
24100b57cec5SDimitry Andric 
24110b57cec5SDimitry Andric   unsigned Opcode;
24120b57cec5SDimitry Andric 
24130b57cec5SDimitry Andric   static const uint16_t SCvtOpc[2][2][2] = {
24140b57cec5SDimitry Andric     { { X86::VCVTSI2SSrr,  X86::VCVTSI642SSrr },
24150b57cec5SDimitry Andric       { X86::VCVTSI2SDrr,  X86::VCVTSI642SDrr } },
24160b57cec5SDimitry Andric     { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
24170b57cec5SDimitry Andric       { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
24180b57cec5SDimitry Andric   };
24190b57cec5SDimitry Andric   static const uint16_t UCvtOpc[2][2] = {
24200b57cec5SDimitry Andric     { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
24210b57cec5SDimitry Andric     { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
24220b57cec5SDimitry Andric   };
24230b57cec5SDimitry Andric   bool Is64Bit = SrcVT == MVT::i64;
24240b57cec5SDimitry Andric 
24250b57cec5SDimitry Andric   if (I->getType()->isDoubleTy()) {
24260b57cec5SDimitry Andric     // s/uitofp int -> double
24270b57cec5SDimitry Andric     Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
24280b57cec5SDimitry Andric   } else if (I->getType()->isFloatTy()) {
24290b57cec5SDimitry Andric     // s/uitofp int -> float
24300b57cec5SDimitry Andric     Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
24310b57cec5SDimitry Andric   } else
24320b57cec5SDimitry Andric     return false;
24330b57cec5SDimitry Andric 
24340b57cec5SDimitry Andric   MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
24350b57cec5SDimitry Andric   const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
24365ffd83dbSDimitry Andric   Register ImplicitDefReg = createResultReg(RC);
2437bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
24380b57cec5SDimitry Andric           TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2439fe6060f1SDimitry Andric   Register ResultReg = fastEmitInst_rr(Opcode, RC, ImplicitDefReg, OpReg);
24400b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
24410b57cec5SDimitry Andric   return true;
24420b57cec5SDimitry Andric }
24430b57cec5SDimitry Andric 
24440b57cec5SDimitry Andric bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
24450b57cec5SDimitry Andric   return X86SelectIntToFP(I, /*IsSigned*/true);
24460b57cec5SDimitry Andric }
24470b57cec5SDimitry Andric 
24480b57cec5SDimitry Andric bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
24490b57cec5SDimitry Andric   return X86SelectIntToFP(I, /*IsSigned*/false);
24500b57cec5SDimitry Andric }
24510b57cec5SDimitry Andric 
24520b57cec5SDimitry Andric // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
24530b57cec5SDimitry Andric bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
24540b57cec5SDimitry Andric                                           unsigned TargetOpc,
24550b57cec5SDimitry Andric                                           const TargetRegisterClass *RC) {
24560b57cec5SDimitry Andric   assert((I->getOpcode() == Instruction::FPExt ||
24570b57cec5SDimitry Andric           I->getOpcode() == Instruction::FPTrunc) &&
24580b57cec5SDimitry Andric          "Instruction must be an FPExt or FPTrunc!");
24590b57cec5SDimitry Andric   bool HasAVX = Subtarget->hasAVX();
24600b57cec5SDimitry Andric 
24615ffd83dbSDimitry Andric   Register OpReg = getRegForValue(I->getOperand(0));
24620b57cec5SDimitry Andric   if (OpReg == 0)
24630b57cec5SDimitry Andric     return false;
24640b57cec5SDimitry Andric 
24650b57cec5SDimitry Andric   unsigned ImplicitDefReg;
24660b57cec5SDimitry Andric   if (HasAVX) {
24670b57cec5SDimitry Andric     ImplicitDefReg = createResultReg(RC);
2468bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
24690b57cec5SDimitry Andric             TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
24700b57cec5SDimitry Andric 
24710b57cec5SDimitry Andric   }
24720b57cec5SDimitry Andric 
24735ffd83dbSDimitry Andric   Register ResultReg = createResultReg(RC);
24740b57cec5SDimitry Andric   MachineInstrBuilder MIB;
2475bdd1243dSDimitry Andric   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpc),
24760b57cec5SDimitry Andric                 ResultReg);
24770b57cec5SDimitry Andric 
24780b57cec5SDimitry Andric   if (HasAVX)
24790b57cec5SDimitry Andric     MIB.addReg(ImplicitDefReg);
24800b57cec5SDimitry Andric 
24810b57cec5SDimitry Andric   MIB.addReg(OpReg);
24820b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
24830b57cec5SDimitry Andric   return true;
24840b57cec5SDimitry Andric }
24850b57cec5SDimitry Andric 
24860b57cec5SDimitry Andric bool X86FastISel::X86SelectFPExt(const Instruction *I) {
248781ad6265SDimitry Andric   if (Subtarget->hasSSE2() && I->getType()->isDoubleTy() &&
24880b57cec5SDimitry Andric       I->getOperand(0)->getType()->isFloatTy()) {
24890b57cec5SDimitry Andric     bool HasAVX512 = Subtarget->hasAVX512();
24900b57cec5SDimitry Andric     // fpext from float to double.
24910b57cec5SDimitry Andric     unsigned Opc =
24920b57cec5SDimitry Andric         HasAVX512 ? X86::VCVTSS2SDZrr
24930b57cec5SDimitry Andric                   : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
24940b57cec5SDimitry Andric     return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
24950b57cec5SDimitry Andric   }
24960b57cec5SDimitry Andric 
24970b57cec5SDimitry Andric   return false;
24980b57cec5SDimitry Andric }
24990b57cec5SDimitry Andric 
25000b57cec5SDimitry Andric bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
250181ad6265SDimitry Andric   if (Subtarget->hasSSE2() && I->getType()->isFloatTy() &&
25020b57cec5SDimitry Andric       I->getOperand(0)->getType()->isDoubleTy()) {
25030b57cec5SDimitry Andric     bool HasAVX512 = Subtarget->hasAVX512();
25040b57cec5SDimitry Andric     // fptrunc from double to float.
25050b57cec5SDimitry Andric     unsigned Opc =
25060b57cec5SDimitry Andric         HasAVX512 ? X86::VCVTSD2SSZrr
25070b57cec5SDimitry Andric                   : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
25080b57cec5SDimitry Andric     return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
25090b57cec5SDimitry Andric   }
25100b57cec5SDimitry Andric 
25110b57cec5SDimitry Andric   return false;
25120b57cec5SDimitry Andric }
25130b57cec5SDimitry Andric 
25140b57cec5SDimitry Andric bool X86FastISel::X86SelectTrunc(const Instruction *I) {
25150b57cec5SDimitry Andric   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
25160b57cec5SDimitry Andric   EVT DstVT = TLI.getValueType(DL, I->getType());
25170b57cec5SDimitry Andric 
25180b57cec5SDimitry Andric   // This code only handles truncation to byte.
25190b57cec5SDimitry Andric   if (DstVT != MVT::i8 && DstVT != MVT::i1)
25200b57cec5SDimitry Andric     return false;
25210b57cec5SDimitry Andric   if (!TLI.isTypeLegal(SrcVT))
25220b57cec5SDimitry Andric     return false;
25230b57cec5SDimitry Andric 
25245ffd83dbSDimitry Andric   Register InputReg = getRegForValue(I->getOperand(0));
25250b57cec5SDimitry Andric   if (!InputReg)
25260b57cec5SDimitry Andric     // Unhandled operand.  Halt "fast" selection and bail.
25270b57cec5SDimitry Andric     return false;
25280b57cec5SDimitry Andric 
25290b57cec5SDimitry Andric   if (SrcVT == MVT::i8) {
25300b57cec5SDimitry Andric     // Truncate from i8 to i1; no code needed.
25310b57cec5SDimitry Andric     updateValueMap(I, InputReg);
25320b57cec5SDimitry Andric     return true;
25330b57cec5SDimitry Andric   }
25340b57cec5SDimitry Andric 
25350b57cec5SDimitry Andric   // Issue an extract_subreg.
2536fe6060f1SDimitry Andric   Register ResultReg = fastEmitInst_extractsubreg(MVT::i8, InputReg,
25370b57cec5SDimitry Andric                                                   X86::sub_8bit);
25380b57cec5SDimitry Andric   if (!ResultReg)
25390b57cec5SDimitry Andric     return false;
25400b57cec5SDimitry Andric 
25410b57cec5SDimitry Andric   updateValueMap(I, ResultReg);
25420b57cec5SDimitry Andric   return true;
25430b57cec5SDimitry Andric }
25440b57cec5SDimitry Andric 
25450b57cec5SDimitry Andric bool X86FastISel::IsMemcpySmall(uint64_t Len) {
25460b57cec5SDimitry Andric   return Len <= (Subtarget->is64Bit() ? 32 : 16);
25470b57cec5SDimitry Andric }
25480b57cec5SDimitry Andric 
25490b57cec5SDimitry Andric bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
25500b57cec5SDimitry Andric                                      X86AddressMode SrcAM, uint64_t Len) {
25510b57cec5SDimitry Andric 
25520b57cec5SDimitry Andric   // Make sure we don't bloat code by inlining very large memcpy's.
25530b57cec5SDimitry Andric   if (!IsMemcpySmall(Len))
25540b57cec5SDimitry Andric     return false;
25550b57cec5SDimitry Andric 
25560b57cec5SDimitry Andric   bool i64Legal = Subtarget->is64Bit();
25570b57cec5SDimitry Andric 
25580b57cec5SDimitry Andric   // We don't care about alignment here since we just emit integer accesses.
25590b57cec5SDimitry Andric   while (Len) {
25600b57cec5SDimitry Andric     MVT VT;
25610b57cec5SDimitry Andric     if (Len >= 8 && i64Legal)
25620b57cec5SDimitry Andric       VT = MVT::i64;
25630b57cec5SDimitry Andric     else if (Len >= 4)
25640b57cec5SDimitry Andric       VT = MVT::i32;
25650b57cec5SDimitry Andric     else if (Len >= 2)
25660b57cec5SDimitry Andric       VT = MVT::i16;
25670b57cec5SDimitry Andric     else
25680b57cec5SDimitry Andric       VT = MVT::i8;
25690b57cec5SDimitry Andric 
25700b57cec5SDimitry Andric     unsigned Reg;
25710b57cec5SDimitry Andric     bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2572fe6060f1SDimitry Andric     RV &= X86FastEmitStore(VT, Reg, DestAM);
25730b57cec5SDimitry Andric     assert(RV && "Failed to emit load or store??");
2574fe6060f1SDimitry Andric     (void)RV;
25750b57cec5SDimitry Andric 
25760b57cec5SDimitry Andric     unsigned Size = VT.getSizeInBits()/8;
25770b57cec5SDimitry Andric     Len -= Size;
25780b57cec5SDimitry Andric     DestAM.Disp += Size;
25790b57cec5SDimitry Andric     SrcAM.Disp += Size;
25800b57cec5SDimitry Andric   }
25810b57cec5SDimitry Andric 
25820b57cec5SDimitry Andric   return true;
25830b57cec5SDimitry Andric }
25840b57cec5SDimitry Andric 
25850b57cec5SDimitry Andric bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
25860b57cec5SDimitry Andric   // FIXME: Handle more intrinsics.
25870b57cec5SDimitry Andric   switch (II->getIntrinsicID()) {
25880b57cec5SDimitry Andric   default: return false;
25890b57cec5SDimitry Andric   case Intrinsic::convert_from_fp16:
25900b57cec5SDimitry Andric   case Intrinsic::convert_to_fp16: {
25910b57cec5SDimitry Andric     if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
25920b57cec5SDimitry Andric       return false;
25930b57cec5SDimitry Andric 
25940b57cec5SDimitry Andric     const Value *Op = II->getArgOperand(0);
25955ffd83dbSDimitry Andric     Register InputReg = getRegForValue(Op);
25960b57cec5SDimitry Andric     if (InputReg == 0)
25970b57cec5SDimitry Andric       return false;
25980b57cec5SDimitry Andric 
25990b57cec5SDimitry Andric     // F16C only allows converting from float to half and from half to float.
26000b57cec5SDimitry Andric     bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
26010b57cec5SDimitry Andric     if (IsFloatToHalf) {
26020b57cec5SDimitry Andric       if (!Op->getType()->isFloatTy())
26030b57cec5SDimitry Andric         return false;
26040b57cec5SDimitry Andric     } else {
26050b57cec5SDimitry Andric       if (!II->getType()->isFloatTy())
26060b57cec5SDimitry Andric         return false;
26070b57cec5SDimitry Andric     }
26080b57cec5SDimitry Andric 
26090b57cec5SDimitry Andric     unsigned ResultReg = 0;
26100b57cec5SDimitry Andric     const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
26110b57cec5SDimitry Andric     if (IsFloatToHalf) {
26120b57cec5SDimitry Andric       // 'InputReg' is implicitly promoted from register class FR32 to
26130b57cec5SDimitry Andric       // register class VR128 by method 'constrainOperandRegClass' which is
26140b57cec5SDimitry Andric       // directly called by 'fastEmitInst_ri'.
26150b57cec5SDimitry Andric       // Instruction VCVTPS2PHrr takes an extra immediate operand which is
26160b57cec5SDimitry Andric       // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
26170b57cec5SDimitry Andric       // It's consistent with the other FP instructions, which are usually
26180b57cec5SDimitry Andric       // controlled by MXCSR.
26195ffd83dbSDimitry Andric       unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr
26205ffd83dbSDimitry Andric                                          : X86::VCVTPS2PHrr;
2621fe6060f1SDimitry Andric       InputReg = fastEmitInst_ri(Opc, RC, InputReg, 4);
26220b57cec5SDimitry Andric 
26230b57cec5SDimitry Andric       // Move the lower 32-bits of ResultReg to another register of class GR32.
26245ffd83dbSDimitry Andric       Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr
26255ffd83dbSDimitry Andric                                    : X86::VMOVPDI2DIrr;
26260b57cec5SDimitry Andric       ResultReg = createResultReg(&X86::GR32RegClass);
2627bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
26280b57cec5SDimitry Andric           .addReg(InputReg, RegState::Kill);
26290b57cec5SDimitry Andric 
26300b57cec5SDimitry Andric       // The result value is in the lower 16-bits of ResultReg.
26310b57cec5SDimitry Andric       unsigned RegIdx = X86::sub_16bit;
2632fe6060f1SDimitry Andric       ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, RegIdx);
26330b57cec5SDimitry Andric     } else {
26340b57cec5SDimitry Andric       assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
26355ffd83dbSDimitry Andric       // Explicitly zero-extend the input to 32-bit.
2636fe6060f1SDimitry Andric       InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::ZERO_EXTEND, InputReg);
26370b57cec5SDimitry Andric 
26380b57cec5SDimitry Andric       // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
26390b57cec5SDimitry Andric       InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2640fe6060f1SDimitry Andric                             InputReg);
26410b57cec5SDimitry Andric 
26425ffd83dbSDimitry Andric       unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
26435ffd83dbSDimitry Andric                                          : X86::VCVTPH2PSrr;
2644fe6060f1SDimitry Andric       InputReg = fastEmitInst_r(Opc, RC, InputReg);
26450b57cec5SDimitry Andric 
26460b57cec5SDimitry Andric       // The result value is in the lower 32-bits of ResultReg.
26470b57cec5SDimitry Andric       // Emit an explicit copy from register class VR128 to register class FR32.
26485ffd83dbSDimitry Andric       ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
2649bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
26500b57cec5SDimitry Andric               TII.get(TargetOpcode::COPY), ResultReg)
26510b57cec5SDimitry Andric           .addReg(InputReg, RegState::Kill);
26520b57cec5SDimitry Andric     }
26530b57cec5SDimitry Andric 
26540b57cec5SDimitry Andric     updateValueMap(II, ResultReg);
26550b57cec5SDimitry Andric     return true;
26560b57cec5SDimitry Andric   }
26570b57cec5SDimitry Andric   case Intrinsic::frameaddress: {
26580b57cec5SDimitry Andric     MachineFunction *MF = FuncInfo.MF;
26590b57cec5SDimitry Andric     if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
26600b57cec5SDimitry Andric       return false;
26610b57cec5SDimitry Andric 
26620b57cec5SDimitry Andric     Type *RetTy = II->getCalledFunction()->getReturnType();
26630b57cec5SDimitry Andric 
26640b57cec5SDimitry Andric     MVT VT;
26650b57cec5SDimitry Andric     if (!isTypeLegal(RetTy, VT))
26660b57cec5SDimitry Andric       return false;
26670b57cec5SDimitry Andric 
26680b57cec5SDimitry Andric     unsigned Opc;
26690b57cec5SDimitry Andric     const TargetRegisterClass *RC = nullptr;
26700b57cec5SDimitry Andric 
26710b57cec5SDimitry Andric     switch (VT.SimpleTy) {
26720b57cec5SDimitry Andric     default: llvm_unreachable("Invalid result type for frameaddress.");
26730b57cec5SDimitry Andric     case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
26740b57cec5SDimitry Andric     case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
26750b57cec5SDimitry Andric     }
26760b57cec5SDimitry Andric 
26770b57cec5SDimitry Andric     // This needs to be set before we call getPtrSizedFrameRegister, otherwise
26780b57cec5SDimitry Andric     // we get the wrong frame register.
26790b57cec5SDimitry Andric     MachineFrameInfo &MFI = MF->getFrameInfo();
26800b57cec5SDimitry Andric     MFI.setFrameAddressIsTaken(true);
26810b57cec5SDimitry Andric 
26820b57cec5SDimitry Andric     const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
26830b57cec5SDimitry Andric     unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
26840b57cec5SDimitry Andric     assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
26850b57cec5SDimitry Andric             (FrameReg == X86::EBP && VT == MVT::i32)) &&
26860b57cec5SDimitry Andric            "Invalid Frame Register!");
26870b57cec5SDimitry Andric 
26880b57cec5SDimitry Andric     // Always make a copy of the frame register to a vreg first, so that we
26890b57cec5SDimitry Andric     // never directly reference the frame register (the TwoAddressInstruction-
26900b57cec5SDimitry Andric     // Pass doesn't like that).
26915ffd83dbSDimitry Andric     Register SrcReg = createResultReg(RC);
2692bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
26930b57cec5SDimitry Andric             TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
26940b57cec5SDimitry Andric 
26950b57cec5SDimitry Andric     // Now recursively load from the frame address.
26960b57cec5SDimitry Andric     // movq (%rbp), %rax
26970b57cec5SDimitry Andric     // movq (%rax), %rax
26980b57cec5SDimitry Andric     // movq (%rax), %rax
26990b57cec5SDimitry Andric     // ...
27000b57cec5SDimitry Andric     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
27010b57cec5SDimitry Andric     while (Depth--) {
2702e8d8bef9SDimitry Andric       Register DestReg = createResultReg(RC);
2703bdd1243dSDimitry Andric       addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
27040b57cec5SDimitry Andric                            TII.get(Opc), DestReg), SrcReg);
27050b57cec5SDimitry Andric       SrcReg = DestReg;
27060b57cec5SDimitry Andric     }
27070b57cec5SDimitry Andric 
27080b57cec5SDimitry Andric     updateValueMap(II, SrcReg);
27090b57cec5SDimitry Andric     return true;
27100b57cec5SDimitry Andric   }
27110b57cec5SDimitry Andric   case Intrinsic::memcpy: {
27120b57cec5SDimitry Andric     const MemCpyInst *MCI = cast<MemCpyInst>(II);
27130b57cec5SDimitry Andric     // Don't handle volatile or variable length memcpys.
27140b57cec5SDimitry Andric     if (MCI->isVolatile())
27150b57cec5SDimitry Andric       return false;
27160b57cec5SDimitry Andric 
27170b57cec5SDimitry Andric     if (isa<ConstantInt>(MCI->getLength())) {
27180b57cec5SDimitry Andric       // Small memcpy's are common enough that we want to do them
27190b57cec5SDimitry Andric       // without a call if possible.
27200b57cec5SDimitry Andric       uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
27210b57cec5SDimitry Andric       if (IsMemcpySmall(Len)) {
27220b57cec5SDimitry Andric         X86AddressMode DestAM, SrcAM;
27230b57cec5SDimitry Andric         if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
27240b57cec5SDimitry Andric             !X86SelectAddress(MCI->getRawSource(), SrcAM))
27250b57cec5SDimitry Andric           return false;
27260b57cec5SDimitry Andric         TryEmitSmallMemcpy(DestAM, SrcAM, Len);
27270b57cec5SDimitry Andric         return true;
27280b57cec5SDimitry Andric       }
27290b57cec5SDimitry Andric     }
27300b57cec5SDimitry Andric 
27310b57cec5SDimitry Andric     unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
27320b57cec5SDimitry Andric     if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
27330b57cec5SDimitry Andric       return false;
27340b57cec5SDimitry Andric 
27350b57cec5SDimitry Andric     if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
27360b57cec5SDimitry Andric       return false;
27370b57cec5SDimitry Andric 
2738349cc55cSDimitry Andric     return lowerCallTo(II, "memcpy", II->arg_size() - 1);
27390b57cec5SDimitry Andric   }
27400b57cec5SDimitry Andric   case Intrinsic::memset: {
27410b57cec5SDimitry Andric     const MemSetInst *MSI = cast<MemSetInst>(II);
27420b57cec5SDimitry Andric 
27430b57cec5SDimitry Andric     if (MSI->isVolatile())
27440b57cec5SDimitry Andric       return false;
27450b57cec5SDimitry Andric 
27460b57cec5SDimitry Andric     unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
27470b57cec5SDimitry Andric     if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
27480b57cec5SDimitry Andric       return false;
27490b57cec5SDimitry Andric 
27500b57cec5SDimitry Andric     if (MSI->getDestAddressSpace() > 255)
27510b57cec5SDimitry Andric       return false;
27520b57cec5SDimitry Andric 
2753349cc55cSDimitry Andric     return lowerCallTo(II, "memset", II->arg_size() - 1);
27540b57cec5SDimitry Andric   }
27550b57cec5SDimitry Andric   case Intrinsic::stackprotector: {
27560b57cec5SDimitry Andric     // Emit code to store the stack guard onto the stack.
27570b57cec5SDimitry Andric     EVT PtrTy = TLI.getPointerTy(DL);
27580b57cec5SDimitry Andric 
27590b57cec5SDimitry Andric     const Value *Op1 = II->getArgOperand(0); // The guard's value.
27600b57cec5SDimitry Andric     const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
27610b57cec5SDimitry Andric 
27620b57cec5SDimitry Andric     MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
27630b57cec5SDimitry Andric 
27640b57cec5SDimitry Andric     // Grab the frame index.
27650b57cec5SDimitry Andric     X86AddressMode AM;
27660b57cec5SDimitry Andric     if (!X86SelectAddress(Slot, AM)) return false;
27670b57cec5SDimitry Andric     if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
27680b57cec5SDimitry Andric     return true;
27690b57cec5SDimitry Andric   }
27700b57cec5SDimitry Andric   case Intrinsic::dbg_declare: {
27710b57cec5SDimitry Andric     const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
27720b57cec5SDimitry Andric     X86AddressMode AM;
27730b57cec5SDimitry Andric     assert(DI->getAddress() && "Null address should be checked earlier!");
27740b57cec5SDimitry Andric     if (!X86SelectAddress(DI->getAddress(), AM))
27750b57cec5SDimitry Andric       return false;
27760b57cec5SDimitry Andric     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2777bdd1243dSDimitry Andric     assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) &&
27780b57cec5SDimitry Andric            "Expected inlined-at fields to agree");
2779bdd1243dSDimitry Andric     addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II), AM)
27800b57cec5SDimitry Andric         .addImm(0)
27810b57cec5SDimitry Andric         .addMetadata(DI->getVariable())
27820b57cec5SDimitry Andric         .addMetadata(DI->getExpression());
27830b57cec5SDimitry Andric     return true;
27840b57cec5SDimitry Andric   }
27850b57cec5SDimitry Andric   case Intrinsic::trap: {
2786bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TRAP));
27870b57cec5SDimitry Andric     return true;
27880b57cec5SDimitry Andric   }
27890b57cec5SDimitry Andric   case Intrinsic::sqrt: {
27900b57cec5SDimitry Andric     if (!Subtarget->hasSSE1())
27910b57cec5SDimitry Andric       return false;
27920b57cec5SDimitry Andric 
27930b57cec5SDimitry Andric     Type *RetTy = II->getCalledFunction()->getReturnType();
27940b57cec5SDimitry Andric 
27950b57cec5SDimitry Andric     MVT VT;
27960b57cec5SDimitry Andric     if (!isTypeLegal(RetTy, VT))
27970b57cec5SDimitry Andric       return false;
27980b57cec5SDimitry Andric 
27990b57cec5SDimitry Andric     // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
28000b57cec5SDimitry Andric     // is not generated by FastISel yet.
28010b57cec5SDimitry Andric     // FIXME: Update this code once tablegen can handle it.
28020b57cec5SDimitry Andric     static const uint16_t SqrtOpc[3][2] = {
28030b57cec5SDimitry Andric       { X86::SQRTSSr,   X86::SQRTSDr },
28040b57cec5SDimitry Andric       { X86::VSQRTSSr,  X86::VSQRTSDr },
28050b57cec5SDimitry Andric       { X86::VSQRTSSZr, X86::VSQRTSDZr },
28060b57cec5SDimitry Andric     };
28070b57cec5SDimitry Andric     unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
28080b57cec5SDimitry Andric                         Subtarget->hasAVX()    ? 1 :
28090b57cec5SDimitry Andric                                                  0;
28100b57cec5SDimitry Andric     unsigned Opc;
28110b57cec5SDimitry Andric     switch (VT.SimpleTy) {
28120b57cec5SDimitry Andric     default: return false;
28130b57cec5SDimitry Andric     case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
28140b57cec5SDimitry Andric     case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
28150b57cec5SDimitry Andric     }
28160b57cec5SDimitry Andric 
28170b57cec5SDimitry Andric     const Value *SrcVal = II->getArgOperand(0);
28185ffd83dbSDimitry Andric     Register SrcReg = getRegForValue(SrcVal);
28190b57cec5SDimitry Andric 
28200b57cec5SDimitry Andric     if (SrcReg == 0)
28210b57cec5SDimitry Andric       return false;
28220b57cec5SDimitry Andric 
28230b57cec5SDimitry Andric     const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
28240b57cec5SDimitry Andric     unsigned ImplicitDefReg = 0;
28250b57cec5SDimitry Andric     if (AVXLevel > 0) {
28260b57cec5SDimitry Andric       ImplicitDefReg = createResultReg(RC);
2827bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
28280b57cec5SDimitry Andric               TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
28290b57cec5SDimitry Andric     }
28300b57cec5SDimitry Andric 
28315ffd83dbSDimitry Andric     Register ResultReg = createResultReg(RC);
28320b57cec5SDimitry Andric     MachineInstrBuilder MIB;
2833bdd1243dSDimitry Andric     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
28340b57cec5SDimitry Andric                   ResultReg);
28350b57cec5SDimitry Andric 
28360b57cec5SDimitry Andric     if (ImplicitDefReg)
28370b57cec5SDimitry Andric       MIB.addReg(ImplicitDefReg);
28380b57cec5SDimitry Andric 
28390b57cec5SDimitry Andric     MIB.addReg(SrcReg);
28400b57cec5SDimitry Andric 
28410b57cec5SDimitry Andric     updateValueMap(II, ResultReg);
28420b57cec5SDimitry Andric     return true;
28430b57cec5SDimitry Andric   }
28440b57cec5SDimitry Andric   case Intrinsic::sadd_with_overflow:
28450b57cec5SDimitry Andric   case Intrinsic::uadd_with_overflow:
28460b57cec5SDimitry Andric   case Intrinsic::ssub_with_overflow:
28470b57cec5SDimitry Andric   case Intrinsic::usub_with_overflow:
28480b57cec5SDimitry Andric   case Intrinsic::smul_with_overflow:
28490b57cec5SDimitry Andric   case Intrinsic::umul_with_overflow: {
28500b57cec5SDimitry Andric     // This implements the basic lowering of the xalu with overflow intrinsics
28510b57cec5SDimitry Andric     // into add/sub/mul followed by either seto or setb.
28520b57cec5SDimitry Andric     const Function *Callee = II->getCalledFunction();
28530b57cec5SDimitry Andric     auto *Ty = cast<StructType>(Callee->getReturnType());
28540b57cec5SDimitry Andric     Type *RetTy = Ty->getTypeAtIndex(0U);
28550b57cec5SDimitry Andric     assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
28560b57cec5SDimitry Andric            Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
28570b57cec5SDimitry Andric            "Overflow value expected to be an i1");
28580b57cec5SDimitry Andric 
28590b57cec5SDimitry Andric     MVT VT;
28600b57cec5SDimitry Andric     if (!isTypeLegal(RetTy, VT))
28610b57cec5SDimitry Andric       return false;
28620b57cec5SDimitry Andric 
28630b57cec5SDimitry Andric     if (VT < MVT::i8 || VT > MVT::i64)
28640b57cec5SDimitry Andric       return false;
28650b57cec5SDimitry Andric 
28660b57cec5SDimitry Andric     const Value *LHS = II->getArgOperand(0);
28670b57cec5SDimitry Andric     const Value *RHS = II->getArgOperand(1);
28680b57cec5SDimitry Andric 
28690b57cec5SDimitry Andric     // Canonicalize immediate to the RHS.
2870e8d8bef9SDimitry Andric     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
28710b57cec5SDimitry Andric       std::swap(LHS, RHS);
28720b57cec5SDimitry Andric 
28730b57cec5SDimitry Andric     unsigned BaseOpc, CondCode;
28740b57cec5SDimitry Andric     switch (II->getIntrinsicID()) {
28750b57cec5SDimitry Andric     default: llvm_unreachable("Unexpected intrinsic!");
28760b57cec5SDimitry Andric     case Intrinsic::sadd_with_overflow:
28770b57cec5SDimitry Andric       BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
28780b57cec5SDimitry Andric     case Intrinsic::uadd_with_overflow:
28790b57cec5SDimitry Andric       BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
28800b57cec5SDimitry Andric     case Intrinsic::ssub_with_overflow:
28810b57cec5SDimitry Andric       BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
28820b57cec5SDimitry Andric     case Intrinsic::usub_with_overflow:
28830b57cec5SDimitry Andric       BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
28840b57cec5SDimitry Andric     case Intrinsic::smul_with_overflow:
28850b57cec5SDimitry Andric       BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
28860b57cec5SDimitry Andric     case Intrinsic::umul_with_overflow:
28870b57cec5SDimitry Andric       BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
28880b57cec5SDimitry Andric     }
28890b57cec5SDimitry Andric 
28905ffd83dbSDimitry Andric     Register LHSReg = getRegForValue(LHS);
28910b57cec5SDimitry Andric     if (LHSReg == 0)
28920b57cec5SDimitry Andric       return false;
28930b57cec5SDimitry Andric 
28940b57cec5SDimitry Andric     unsigned ResultReg = 0;
28950b57cec5SDimitry Andric     // Check if we have an immediate version.
28960b57cec5SDimitry Andric     if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
28970b57cec5SDimitry Andric       static const uint16_t Opc[2][4] = {
28980b57cec5SDimitry Andric         { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
28990b57cec5SDimitry Andric         { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
29000b57cec5SDimitry Andric       };
29010b57cec5SDimitry Andric 
29020b57cec5SDimitry Andric       if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
29030b57cec5SDimitry Andric           CondCode == X86::COND_O) {
29040b57cec5SDimitry Andric         // We can use INC/DEC.
29050b57cec5SDimitry Andric         ResultReg = createResultReg(TLI.getRegClassFor(VT));
29060b57cec5SDimitry Andric         bool IsDec = BaseOpc == ISD::SUB;
2907bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
29080b57cec5SDimitry Andric                 TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2909fe6060f1SDimitry Andric           .addReg(LHSReg);
29100b57cec5SDimitry Andric       } else
2911fe6060f1SDimitry Andric         ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, CI->getZExtValue());
29120b57cec5SDimitry Andric     }
29130b57cec5SDimitry Andric 
29140b57cec5SDimitry Andric     unsigned RHSReg;
29150b57cec5SDimitry Andric     if (!ResultReg) {
29160b57cec5SDimitry Andric       RHSReg = getRegForValue(RHS);
29170b57cec5SDimitry Andric       if (RHSReg == 0)
29180b57cec5SDimitry Andric         return false;
2919fe6060f1SDimitry Andric       ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, RHSReg);
29200b57cec5SDimitry Andric     }
29210b57cec5SDimitry Andric 
29220b57cec5SDimitry Andric     // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
29230b57cec5SDimitry Andric     // it manually.
29240b57cec5SDimitry Andric     if (BaseOpc == X86ISD::UMUL && !ResultReg) {
29250b57cec5SDimitry Andric       static const uint16_t MULOpc[] =
29260b57cec5SDimitry Andric         { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
29270b57cec5SDimitry Andric       static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
29280b57cec5SDimitry Andric       // First copy the first operand into RAX, which is an implicit input to
29290b57cec5SDimitry Andric       // the X86::MUL*r instruction.
2930bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
29310b57cec5SDimitry Andric               TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2932fe6060f1SDimitry Andric         .addReg(LHSReg);
29330b57cec5SDimitry Andric       ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2934fe6060f1SDimitry Andric                                  TLI.getRegClassFor(VT), RHSReg);
29350b57cec5SDimitry Andric     } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
29360b57cec5SDimitry Andric       static const uint16_t MULOpc[] =
29370b57cec5SDimitry Andric         { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
29380b57cec5SDimitry Andric       if (VT == MVT::i8) {
29390b57cec5SDimitry Andric         // Copy the first operand into AL, which is an implicit input to the
29400b57cec5SDimitry Andric         // X86::IMUL8r instruction.
2941bdd1243dSDimitry Andric         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
29420b57cec5SDimitry Andric                TII.get(TargetOpcode::COPY), X86::AL)
2943fe6060f1SDimitry Andric           .addReg(LHSReg);
2944fe6060f1SDimitry Andric         ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg);
29450b57cec5SDimitry Andric       } else
29460b57cec5SDimitry Andric         ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2947fe6060f1SDimitry Andric                                     TLI.getRegClassFor(VT), LHSReg, RHSReg);
29480b57cec5SDimitry Andric     }
29490b57cec5SDimitry Andric 
29500b57cec5SDimitry Andric     if (!ResultReg)
29510b57cec5SDimitry Andric       return false;
29520b57cec5SDimitry Andric 
29530b57cec5SDimitry Andric     // Assign to a GPR since the overflow return value is lowered to a SETcc.
29545ffd83dbSDimitry Andric     Register ResultReg2 = createResultReg(&X86::GR8RegClass);
29550b57cec5SDimitry Andric     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2956bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
29570b57cec5SDimitry Andric             ResultReg2).addImm(CondCode);
29580b57cec5SDimitry Andric 
29590b57cec5SDimitry Andric     updateValueMap(II, ResultReg, 2);
29600b57cec5SDimitry Andric     return true;
29610b57cec5SDimitry Andric   }
29620b57cec5SDimitry Andric   case Intrinsic::x86_sse_cvttss2si:
29630b57cec5SDimitry Andric   case Intrinsic::x86_sse_cvttss2si64:
29640b57cec5SDimitry Andric   case Intrinsic::x86_sse2_cvttsd2si:
29650b57cec5SDimitry Andric   case Intrinsic::x86_sse2_cvttsd2si64: {
29660b57cec5SDimitry Andric     bool IsInputDouble;
29670b57cec5SDimitry Andric     switch (II->getIntrinsicID()) {
29680b57cec5SDimitry Andric     default: llvm_unreachable("Unexpected intrinsic.");
29690b57cec5SDimitry Andric     case Intrinsic::x86_sse_cvttss2si:
29700b57cec5SDimitry Andric     case Intrinsic::x86_sse_cvttss2si64:
29710b57cec5SDimitry Andric       if (!Subtarget->hasSSE1())
29720b57cec5SDimitry Andric         return false;
29730b57cec5SDimitry Andric       IsInputDouble = false;
29740b57cec5SDimitry Andric       break;
29750b57cec5SDimitry Andric     case Intrinsic::x86_sse2_cvttsd2si:
29760b57cec5SDimitry Andric     case Intrinsic::x86_sse2_cvttsd2si64:
29770b57cec5SDimitry Andric       if (!Subtarget->hasSSE2())
29780b57cec5SDimitry Andric         return false;
29790b57cec5SDimitry Andric       IsInputDouble = true;
29800b57cec5SDimitry Andric       break;
29810b57cec5SDimitry Andric     }
29820b57cec5SDimitry Andric 
29830b57cec5SDimitry Andric     Type *RetTy = II->getCalledFunction()->getReturnType();
29840b57cec5SDimitry Andric     MVT VT;
29850b57cec5SDimitry Andric     if (!isTypeLegal(RetTy, VT))
29860b57cec5SDimitry Andric       return false;
29870b57cec5SDimitry Andric 
29880b57cec5SDimitry Andric     static const uint16_t CvtOpc[3][2][2] = {
29890b57cec5SDimitry Andric       { { X86::CVTTSS2SIrr,   X86::CVTTSS2SI64rr },
29900b57cec5SDimitry Andric         { X86::CVTTSD2SIrr,   X86::CVTTSD2SI64rr } },
29910b57cec5SDimitry Andric       { { X86::VCVTTSS2SIrr,  X86::VCVTTSS2SI64rr },
29920b57cec5SDimitry Andric         { X86::VCVTTSD2SIrr,  X86::VCVTTSD2SI64rr } },
29930b57cec5SDimitry Andric       { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
29940b57cec5SDimitry Andric         { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
29950b57cec5SDimitry Andric     };
29960b57cec5SDimitry Andric     unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
29970b57cec5SDimitry Andric                         Subtarget->hasAVX()    ? 1 :
29980b57cec5SDimitry Andric                                                  0;
29990b57cec5SDimitry Andric     unsigned Opc;
30000b57cec5SDimitry Andric     switch (VT.SimpleTy) {
30010b57cec5SDimitry Andric     default: llvm_unreachable("Unexpected result type.");
30020b57cec5SDimitry Andric     case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
30030b57cec5SDimitry Andric     case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
30040b57cec5SDimitry Andric     }
30050b57cec5SDimitry Andric 
30060b57cec5SDimitry Andric     // Check if we can fold insertelement instructions into the convert.
30070b57cec5SDimitry Andric     const Value *Op = II->getArgOperand(0);
30080b57cec5SDimitry Andric     while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
30090b57cec5SDimitry Andric       const Value *Index = IE->getOperand(2);
30100b57cec5SDimitry Andric       if (!isa<ConstantInt>(Index))
30110b57cec5SDimitry Andric         break;
30120b57cec5SDimitry Andric       unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
30130b57cec5SDimitry Andric 
30140b57cec5SDimitry Andric       if (Idx == 0) {
30150b57cec5SDimitry Andric         Op = IE->getOperand(1);
30160b57cec5SDimitry Andric         break;
30170b57cec5SDimitry Andric       }
30180b57cec5SDimitry Andric       Op = IE->getOperand(0);
30190b57cec5SDimitry Andric     }
30200b57cec5SDimitry Andric 
30215ffd83dbSDimitry Andric     Register Reg = getRegForValue(Op);
30220b57cec5SDimitry Andric     if (Reg == 0)
30230b57cec5SDimitry Andric       return false;
30240b57cec5SDimitry Andric 
30255ffd83dbSDimitry Andric     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3026bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
30270b57cec5SDimitry Andric       .addReg(Reg);
30280b57cec5SDimitry Andric 
30290b57cec5SDimitry Andric     updateValueMap(II, ResultReg);
30300b57cec5SDimitry Andric     return true;
30310b57cec5SDimitry Andric   }
303206c3fb27SDimitry Andric   case Intrinsic::x86_sse42_crc32_32_8:
303306c3fb27SDimitry Andric   case Intrinsic::x86_sse42_crc32_32_16:
303406c3fb27SDimitry Andric   case Intrinsic::x86_sse42_crc32_32_32:
303506c3fb27SDimitry Andric   case Intrinsic::x86_sse42_crc32_64_64: {
303606c3fb27SDimitry Andric     if (!Subtarget->hasCRC32())
303706c3fb27SDimitry Andric       return false;
303806c3fb27SDimitry Andric 
303906c3fb27SDimitry Andric     Type *RetTy = II->getCalledFunction()->getReturnType();
304006c3fb27SDimitry Andric 
304106c3fb27SDimitry Andric     MVT VT;
304206c3fb27SDimitry Andric     if (!isTypeLegal(RetTy, VT))
304306c3fb27SDimitry Andric       return false;
304406c3fb27SDimitry Andric 
304506c3fb27SDimitry Andric     unsigned Opc;
304606c3fb27SDimitry Andric     const TargetRegisterClass *RC = nullptr;
304706c3fb27SDimitry Andric 
304806c3fb27SDimitry Andric     switch (II->getIntrinsicID()) {
304906c3fb27SDimitry Andric     default:
305006c3fb27SDimitry Andric       llvm_unreachable("Unexpected intrinsic.");
30511db9f3b2SDimitry Andric #define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
305206c3fb27SDimitry Andric     case Intrinsic::x86_sse42_crc32_32_8:
30531db9f3b2SDimitry Andric       Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8);
305406c3fb27SDimitry Andric       RC = &X86::GR32RegClass;
305506c3fb27SDimitry Andric       break;
305606c3fb27SDimitry Andric     case Intrinsic::x86_sse42_crc32_32_16:
30571db9f3b2SDimitry Andric       Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r16);
305806c3fb27SDimitry Andric       RC = &X86::GR32RegClass;
305906c3fb27SDimitry Andric       break;
306006c3fb27SDimitry Andric     case Intrinsic::x86_sse42_crc32_32_32:
30611db9f3b2SDimitry Andric       Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r32);
306206c3fb27SDimitry Andric       RC = &X86::GR32RegClass;
306306c3fb27SDimitry Andric       break;
306406c3fb27SDimitry Andric     case Intrinsic::x86_sse42_crc32_64_64:
30651db9f3b2SDimitry Andric       Opc = GET_EGPR_IF_ENABLED(X86::CRC32r64r64);
306606c3fb27SDimitry Andric       RC = &X86::GR64RegClass;
306706c3fb27SDimitry Andric       break;
30681db9f3b2SDimitry Andric #undef GET_EGPR_IF_ENABLED
306906c3fb27SDimitry Andric     }
307006c3fb27SDimitry Andric 
307106c3fb27SDimitry Andric     const Value *LHS = II->getArgOperand(0);
307206c3fb27SDimitry Andric     const Value *RHS = II->getArgOperand(1);
307306c3fb27SDimitry Andric 
307406c3fb27SDimitry Andric     Register LHSReg = getRegForValue(LHS);
307506c3fb27SDimitry Andric     Register RHSReg = getRegForValue(RHS);
307606c3fb27SDimitry Andric     if (!LHSReg || !RHSReg)
307706c3fb27SDimitry Andric       return false;
307806c3fb27SDimitry Andric 
307906c3fb27SDimitry Andric     Register ResultReg = fastEmitInst_rr(Opc, RC, LHSReg, RHSReg);
308006c3fb27SDimitry Andric     if (!ResultReg)
308106c3fb27SDimitry Andric       return false;
308206c3fb27SDimitry Andric 
308306c3fb27SDimitry Andric     updateValueMap(II, ResultReg);
308406c3fb27SDimitry Andric     return true;
308506c3fb27SDimitry Andric   }
30860b57cec5SDimitry Andric   }
30870b57cec5SDimitry Andric }
30880b57cec5SDimitry Andric 
30890b57cec5SDimitry Andric bool X86FastISel::fastLowerArguments() {
30900b57cec5SDimitry Andric   if (!FuncInfo.CanLowerReturn)
30910b57cec5SDimitry Andric     return false;
30920b57cec5SDimitry Andric 
30930b57cec5SDimitry Andric   const Function *F = FuncInfo.Fn;
30940b57cec5SDimitry Andric   if (F->isVarArg())
30950b57cec5SDimitry Andric     return false;
30960b57cec5SDimitry Andric 
30970b57cec5SDimitry Andric   CallingConv::ID CC = F->getCallingConv();
30980b57cec5SDimitry Andric   if (CC != CallingConv::C)
30990b57cec5SDimitry Andric     return false;
31000b57cec5SDimitry Andric 
31010b57cec5SDimitry Andric   if (Subtarget->isCallingConvWin64(CC))
31020b57cec5SDimitry Andric     return false;
31030b57cec5SDimitry Andric 
31040b57cec5SDimitry Andric   if (!Subtarget->is64Bit())
31050b57cec5SDimitry Andric     return false;
31060b57cec5SDimitry Andric 
31070b57cec5SDimitry Andric   if (Subtarget->useSoftFloat())
31080b57cec5SDimitry Andric     return false;
31090b57cec5SDimitry Andric 
31100b57cec5SDimitry Andric   // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
31110b57cec5SDimitry Andric   unsigned GPRCnt = 0;
31120b57cec5SDimitry Andric   unsigned FPRCnt = 0;
31130b57cec5SDimitry Andric   for (auto const &Arg : F->args()) {
31140b57cec5SDimitry Andric     if (Arg.hasAttribute(Attribute::ByVal) ||
31150b57cec5SDimitry Andric         Arg.hasAttribute(Attribute::InReg) ||
31160b57cec5SDimitry Andric         Arg.hasAttribute(Attribute::StructRet) ||
31170b57cec5SDimitry Andric         Arg.hasAttribute(Attribute::SwiftSelf) ||
3118fe6060f1SDimitry Andric         Arg.hasAttribute(Attribute::SwiftAsync) ||
31190b57cec5SDimitry Andric         Arg.hasAttribute(Attribute::SwiftError) ||
31200b57cec5SDimitry Andric         Arg.hasAttribute(Attribute::Nest))
31210b57cec5SDimitry Andric       return false;
31220b57cec5SDimitry Andric 
31230b57cec5SDimitry Andric     Type *ArgTy = Arg.getType();
31240b57cec5SDimitry Andric     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
31250b57cec5SDimitry Andric       return false;
31260b57cec5SDimitry Andric 
31270b57cec5SDimitry Andric     EVT ArgVT = TLI.getValueType(DL, ArgTy);
31280b57cec5SDimitry Andric     if (!ArgVT.isSimple()) return false;
31290b57cec5SDimitry Andric     switch (ArgVT.getSimpleVT().SimpleTy) {
31300b57cec5SDimitry Andric     default: return false;
31310b57cec5SDimitry Andric     case MVT::i32:
31320b57cec5SDimitry Andric     case MVT::i64:
31330b57cec5SDimitry Andric       ++GPRCnt;
31340b57cec5SDimitry Andric       break;
31350b57cec5SDimitry Andric     case MVT::f32:
31360b57cec5SDimitry Andric     case MVT::f64:
31370b57cec5SDimitry Andric       if (!Subtarget->hasSSE1())
31380b57cec5SDimitry Andric         return false;
31390b57cec5SDimitry Andric       ++FPRCnt;
31400b57cec5SDimitry Andric       break;
31410b57cec5SDimitry Andric     }
31420b57cec5SDimitry Andric 
31430b57cec5SDimitry Andric     if (GPRCnt > 6)
31440b57cec5SDimitry Andric       return false;
31450b57cec5SDimitry Andric 
31460b57cec5SDimitry Andric     if (FPRCnt > 8)
31470b57cec5SDimitry Andric       return false;
31480b57cec5SDimitry Andric   }
31490b57cec5SDimitry Andric 
31500b57cec5SDimitry Andric   static const MCPhysReg GPR32ArgRegs[] = {
31510b57cec5SDimitry Andric     X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
31520b57cec5SDimitry Andric   };
31530b57cec5SDimitry Andric   static const MCPhysReg GPR64ArgRegs[] = {
31540b57cec5SDimitry Andric     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
31550b57cec5SDimitry Andric   };
31560b57cec5SDimitry Andric   static const MCPhysReg XMMArgRegs[] = {
31570b57cec5SDimitry Andric     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
31580b57cec5SDimitry Andric     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
31590b57cec5SDimitry Andric   };
31600b57cec5SDimitry Andric 
31610b57cec5SDimitry Andric   unsigned GPRIdx = 0;
31620b57cec5SDimitry Andric   unsigned FPRIdx = 0;
31630b57cec5SDimitry Andric   for (auto const &Arg : F->args()) {
31640b57cec5SDimitry Andric     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
31650b57cec5SDimitry Andric     const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
31660b57cec5SDimitry Andric     unsigned SrcReg;
31670b57cec5SDimitry Andric     switch (VT.SimpleTy) {
31680b57cec5SDimitry Andric     default: llvm_unreachable("Unexpected value type.");
31690b57cec5SDimitry Andric     case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
31700b57cec5SDimitry Andric     case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3171bdd1243dSDimitry Andric     case MVT::f32: [[fallthrough]];
31720b57cec5SDimitry Andric     case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
31730b57cec5SDimitry Andric     }
31745ffd83dbSDimitry Andric     Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
31750b57cec5SDimitry Andric     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
31760b57cec5SDimitry Andric     // Without this, EmitLiveInCopies may eliminate the livein if its only
31770b57cec5SDimitry Andric     // use is a bitcast (which isn't turned into an instruction).
31785ffd83dbSDimitry Andric     Register ResultReg = createResultReg(RC);
3179bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
31800b57cec5SDimitry Andric             TII.get(TargetOpcode::COPY), ResultReg)
31810b57cec5SDimitry Andric       .addReg(DstReg, getKillRegState(true));
31820b57cec5SDimitry Andric     updateValueMap(&Arg, ResultReg);
31830b57cec5SDimitry Andric   }
31840b57cec5SDimitry Andric   return true;
31850b57cec5SDimitry Andric }
31860b57cec5SDimitry Andric 
31870b57cec5SDimitry Andric static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
31880b57cec5SDimitry Andric                                                   CallingConv::ID CC,
31895ffd83dbSDimitry Andric                                                   const CallBase *CB) {
31900b57cec5SDimitry Andric   if (Subtarget->is64Bit())
31910b57cec5SDimitry Andric     return 0;
31920b57cec5SDimitry Andric   if (Subtarget->getTargetTriple().isOSMSVCRT())
31930b57cec5SDimitry Andric     return 0;
31940b57cec5SDimitry Andric   if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3195fe6060f1SDimitry Andric       CC == CallingConv::HiPE || CC == CallingConv::Tail ||
3196fe6060f1SDimitry Andric       CC == CallingConv::SwiftTail)
31970b57cec5SDimitry Andric     return 0;
31980b57cec5SDimitry Andric 
31995ffd83dbSDimitry Andric   if (CB)
32005ffd83dbSDimitry Andric     if (CB->arg_empty() || !CB->paramHasAttr(0, Attribute::StructRet) ||
32015ffd83dbSDimitry Andric         CB->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
32020b57cec5SDimitry Andric       return 0;
32030b57cec5SDimitry Andric 
32040b57cec5SDimitry Andric   return 4;
32050b57cec5SDimitry Andric }
32060b57cec5SDimitry Andric 
32070b57cec5SDimitry Andric bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
32080b57cec5SDimitry Andric   auto &OutVals       = CLI.OutVals;
32090b57cec5SDimitry Andric   auto &OutFlags      = CLI.OutFlags;
32100b57cec5SDimitry Andric   auto &OutRegs       = CLI.OutRegs;
32110b57cec5SDimitry Andric   auto &Ins           = CLI.Ins;
32120b57cec5SDimitry Andric   auto &InRegs        = CLI.InRegs;
32130b57cec5SDimitry Andric   CallingConv::ID CC  = CLI.CallConv;
32140b57cec5SDimitry Andric   bool &IsTailCall    = CLI.IsTailCall;
32150b57cec5SDimitry Andric   bool IsVarArg       = CLI.IsVarArg;
32160b57cec5SDimitry Andric   const Value *Callee = CLI.Callee;
32170b57cec5SDimitry Andric   MCSymbol *Symbol    = CLI.Symbol;
3218fe6060f1SDimitry Andric   const auto *CB      = CLI.CB;
32190b57cec5SDimitry Andric 
32200b57cec5SDimitry Andric   bool Is64Bit        = Subtarget->is64Bit();
32210b57cec5SDimitry Andric   bool IsWin64        = Subtarget->isCallingConvWin64(CC);
32220b57cec5SDimitry Andric 
32230b57cec5SDimitry Andric   // Call / invoke instructions with NoCfCheck attribute require special
32240b57cec5SDimitry Andric   // handling.
3225fe6060f1SDimitry Andric   if (CB && CB->doesNoCfCheck())
32260b57cec5SDimitry Andric     return false;
32270b57cec5SDimitry Andric 
32280b57cec5SDimitry Andric   // Functions with no_caller_saved_registers that need special handling.
3229fe6060f1SDimitry Andric   if ((CB && isa<CallInst>(CB) && CB->hasFnAttr("no_caller_saved_registers")))
3230fe6060f1SDimitry Andric     return false;
3231fe6060f1SDimitry Andric 
3232fe6060f1SDimitry Andric   // Functions with no_callee_saved_registers that need special handling.
3233fe6060f1SDimitry Andric   if ((CB && CB->hasFnAttr("no_callee_saved_registers")))
32340b57cec5SDimitry Andric     return false;
32350b57cec5SDimitry Andric 
3236bdd1243dSDimitry Andric   // Indirect calls with CFI checks need special handling.
3237bdd1243dSDimitry Andric   if (CB && CB->isIndirectCall() && CB->getOperandBundle(LLVMContext::OB_kcfi))
3238bdd1243dSDimitry Andric     return false;
3239bdd1243dSDimitry Andric 
32400946e70aSDimitry Andric   // Functions using thunks for indirect calls need to use SDISel.
32410946e70aSDimitry Andric   if (Subtarget->useIndirectThunkCalls())
32420b57cec5SDimitry Andric     return false;
32430b57cec5SDimitry Andric 
32445f757f3fSDimitry Andric   // Handle only C and fastcc calling conventions for now.
32450b57cec5SDimitry Andric   switch (CC) {
32460b57cec5SDimitry Andric   default: return false;
32470b57cec5SDimitry Andric   case CallingConv::C:
32480b57cec5SDimitry Andric   case CallingConv::Fast:
32498bcb0991SDimitry Andric   case CallingConv::Tail:
32500b57cec5SDimitry Andric   case CallingConv::Swift:
3251fe6060f1SDimitry Andric   case CallingConv::SwiftTail:
32520b57cec5SDimitry Andric   case CallingConv::X86_FastCall:
32530b57cec5SDimitry Andric   case CallingConv::X86_StdCall:
32540b57cec5SDimitry Andric   case CallingConv::X86_ThisCall:
32550b57cec5SDimitry Andric   case CallingConv::Win64:
32560b57cec5SDimitry Andric   case CallingConv::X86_64_SysV:
3257480093f4SDimitry Andric   case CallingConv::CFGuard_Check:
32580b57cec5SDimitry Andric     break;
32590b57cec5SDimitry Andric   }
32600b57cec5SDimitry Andric 
32610b57cec5SDimitry Andric   // Allow SelectionDAG isel to handle tail calls.
32620b57cec5SDimitry Andric   if (IsTailCall)
32630b57cec5SDimitry Andric     return false;
32640b57cec5SDimitry Andric 
32650b57cec5SDimitry Andric   // fastcc with -tailcallopt is intended to provide a guaranteed
32660b57cec5SDimitry Andric   // tail call optimization. Fastisel doesn't know how to do that.
32678bcb0991SDimitry Andric   if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
3268fe6060f1SDimitry Andric       CC == CallingConv::Tail || CC == CallingConv::SwiftTail)
32690b57cec5SDimitry Andric     return false;
32700b57cec5SDimitry Andric 
32710b57cec5SDimitry Andric   // Don't know how to handle Win64 varargs yet.  Nothing special needed for
32720b57cec5SDimitry Andric   // x86-32. Special handling for x86-64 is implemented.
32730b57cec5SDimitry Andric   if (IsVarArg && IsWin64)
32740b57cec5SDimitry Andric     return false;
32750b57cec5SDimitry Andric 
32760b57cec5SDimitry Andric   // Don't know about inalloca yet.
32775ffd83dbSDimitry Andric   if (CLI.CB && CLI.CB->hasInAllocaArgument())
32780b57cec5SDimitry Andric     return false;
32790b57cec5SDimitry Andric 
32800b57cec5SDimitry Andric   for (auto Flag : CLI.OutFlags)
32815ffd83dbSDimitry Andric     if (Flag.isSwiftError() || Flag.isPreallocated())
32820b57cec5SDimitry Andric       return false;
32830b57cec5SDimitry Andric 
32840b57cec5SDimitry Andric   SmallVector<MVT, 16> OutVTs;
32850b57cec5SDimitry Andric   SmallVector<unsigned, 16> ArgRegs;
32860b57cec5SDimitry Andric 
32870b57cec5SDimitry Andric   // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
32880b57cec5SDimitry Andric   // instruction. This is safe because it is common to all FastISel supported
32890b57cec5SDimitry Andric   // calling conventions on x86.
32900b57cec5SDimitry Andric   for (int i = 0, e = OutVals.size(); i != e; ++i) {
32910b57cec5SDimitry Andric     Value *&Val = OutVals[i];
32920b57cec5SDimitry Andric     ISD::ArgFlagsTy Flags = OutFlags[i];
32930b57cec5SDimitry Andric     if (auto *CI = dyn_cast<ConstantInt>(Val)) {
32940b57cec5SDimitry Andric       if (CI->getBitWidth() < 32) {
32950b57cec5SDimitry Andric         if (Flags.isSExt())
32965f757f3fSDimitry Andric           Val = ConstantInt::get(CI->getContext(), CI->getValue().sext(32));
32970b57cec5SDimitry Andric         else
32985f757f3fSDimitry Andric           Val = ConstantInt::get(CI->getContext(), CI->getValue().zext(32));
32990b57cec5SDimitry Andric       }
33000b57cec5SDimitry Andric     }
33010b57cec5SDimitry Andric 
33020b57cec5SDimitry Andric     // Passing bools around ends up doing a trunc to i1 and passing it.
33030b57cec5SDimitry Andric     // Codegen this as an argument + "and 1".
33040b57cec5SDimitry Andric     MVT VT;
33050b57cec5SDimitry Andric     auto *TI = dyn_cast<TruncInst>(Val);
33060b57cec5SDimitry Andric     unsigned ResultReg;
33075ffd83dbSDimitry Andric     if (TI && TI->getType()->isIntegerTy(1) && CLI.CB &&
33085ffd83dbSDimitry Andric         (TI->getParent() == CLI.CB->getParent()) && TI->hasOneUse()) {
33090b57cec5SDimitry Andric       Value *PrevVal = TI->getOperand(0);
33100b57cec5SDimitry Andric       ResultReg = getRegForValue(PrevVal);
33110b57cec5SDimitry Andric 
33120b57cec5SDimitry Andric       if (!ResultReg)
33130b57cec5SDimitry Andric         return false;
33140b57cec5SDimitry Andric 
33150b57cec5SDimitry Andric       if (!isTypeLegal(PrevVal->getType(), VT))
33160b57cec5SDimitry Andric         return false;
33170b57cec5SDimitry Andric 
3318fe6060f1SDimitry Andric       ResultReg = fastEmit_ri(VT, VT, ISD::AND, ResultReg, 1);
33190b57cec5SDimitry Andric     } else {
33205ffd83dbSDimitry Andric       if (!isTypeLegal(Val->getType(), VT) ||
33215ffd83dbSDimitry Andric           (VT.isVector() && VT.getVectorElementType() == MVT::i1))
33220b57cec5SDimitry Andric         return false;
33230b57cec5SDimitry Andric       ResultReg = getRegForValue(Val);
33240b57cec5SDimitry Andric     }
33250b57cec5SDimitry Andric 
33260b57cec5SDimitry Andric     if (!ResultReg)
33270b57cec5SDimitry Andric       return false;
33280b57cec5SDimitry Andric 
33290b57cec5SDimitry Andric     ArgRegs.push_back(ResultReg);
33300b57cec5SDimitry Andric     OutVTs.push_back(VT);
33310b57cec5SDimitry Andric   }
33320b57cec5SDimitry Andric 
33330b57cec5SDimitry Andric   // Analyze operands of the call, assigning locations to each operand.
33340b57cec5SDimitry Andric   SmallVector<CCValAssign, 16> ArgLocs;
33350b57cec5SDimitry Andric   CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
33360b57cec5SDimitry Andric 
33370b57cec5SDimitry Andric   // Allocate shadow area for Win64
33380b57cec5SDimitry Andric   if (IsWin64)
33395ffd83dbSDimitry Andric     CCInfo.AllocateStack(32, Align(8));
33400b57cec5SDimitry Andric 
33410b57cec5SDimitry Andric   CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
33420b57cec5SDimitry Andric 
33430b57cec5SDimitry Andric   // Get a count of how many bytes are to be pushed on the stack.
33440b57cec5SDimitry Andric   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
33450b57cec5SDimitry Andric 
33460b57cec5SDimitry Andric   // Issue CALLSEQ_START
33470b57cec5SDimitry Andric   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3348bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
33490b57cec5SDimitry Andric     .addImm(NumBytes).addImm(0).addImm(0);
33500b57cec5SDimitry Andric 
33510b57cec5SDimitry Andric   // Walk the register/memloc assignments, inserting copies/loads.
33520b57cec5SDimitry Andric   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3353cb14a3feSDimitry Andric   for (const CCValAssign &VA : ArgLocs) {
33540b57cec5SDimitry Andric     const Value *ArgVal = OutVals[VA.getValNo()];
33550b57cec5SDimitry Andric     MVT ArgVT = OutVTs[VA.getValNo()];
33560b57cec5SDimitry Andric 
33570b57cec5SDimitry Andric     if (ArgVT == MVT::x86mmx)
33580b57cec5SDimitry Andric       return false;
33590b57cec5SDimitry Andric 
33600b57cec5SDimitry Andric     unsigned ArgReg = ArgRegs[VA.getValNo()];
33610b57cec5SDimitry Andric 
33620b57cec5SDimitry Andric     // Promote the value if needed.
33630b57cec5SDimitry Andric     switch (VA.getLocInfo()) {
33640b57cec5SDimitry Andric     case CCValAssign::Full: break;
33650b57cec5SDimitry Andric     case CCValAssign::SExt: {
33660b57cec5SDimitry Andric       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
33670b57cec5SDimitry Andric              "Unexpected extend");
33680b57cec5SDimitry Andric 
33690b57cec5SDimitry Andric       if (ArgVT == MVT::i1)
33700b57cec5SDimitry Andric         return false;
33710b57cec5SDimitry Andric 
33720b57cec5SDimitry Andric       bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
33730b57cec5SDimitry Andric                                        ArgVT, ArgReg);
33740b57cec5SDimitry Andric       assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
33750b57cec5SDimitry Andric       ArgVT = VA.getLocVT();
33760b57cec5SDimitry Andric       break;
33770b57cec5SDimitry Andric     }
33780b57cec5SDimitry Andric     case CCValAssign::ZExt: {
33790b57cec5SDimitry Andric       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
33800b57cec5SDimitry Andric              "Unexpected extend");
33810b57cec5SDimitry Andric 
33820b57cec5SDimitry Andric       // Handle zero-extension from i1 to i8, which is common.
33830b57cec5SDimitry Andric       if (ArgVT == MVT::i1) {
33840b57cec5SDimitry Andric         // Set the high bits to zero.
3385fe6060f1SDimitry Andric         ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg);
33860b57cec5SDimitry Andric         ArgVT = MVT::i8;
33870b57cec5SDimitry Andric 
33880b57cec5SDimitry Andric         if (ArgReg == 0)
33890b57cec5SDimitry Andric           return false;
33900b57cec5SDimitry Andric       }
33910b57cec5SDimitry Andric 
33920b57cec5SDimitry Andric       bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
33930b57cec5SDimitry Andric                                        ArgVT, ArgReg);
33940b57cec5SDimitry Andric       assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
33950b57cec5SDimitry Andric       ArgVT = VA.getLocVT();
33960b57cec5SDimitry Andric       break;
33970b57cec5SDimitry Andric     }
33980b57cec5SDimitry Andric     case CCValAssign::AExt: {
33990b57cec5SDimitry Andric       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
34000b57cec5SDimitry Andric              "Unexpected extend");
34010b57cec5SDimitry Andric       bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
34020b57cec5SDimitry Andric                                        ArgVT, ArgReg);
34030b57cec5SDimitry Andric       if (!Emitted)
34040b57cec5SDimitry Andric         Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
34050b57cec5SDimitry Andric                                     ArgVT, ArgReg);
34060b57cec5SDimitry Andric       if (!Emitted)
34070b57cec5SDimitry Andric         Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
34080b57cec5SDimitry Andric                                     ArgVT, ArgReg);
34090b57cec5SDimitry Andric 
34100b57cec5SDimitry Andric       assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
34110b57cec5SDimitry Andric       ArgVT = VA.getLocVT();
34120b57cec5SDimitry Andric       break;
34130b57cec5SDimitry Andric     }
34140b57cec5SDimitry Andric     case CCValAssign::BCvt: {
3415fe6060f1SDimitry Andric       ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg);
34160b57cec5SDimitry Andric       assert(ArgReg && "Failed to emit a bitcast!");
34170b57cec5SDimitry Andric       ArgVT = VA.getLocVT();
34180b57cec5SDimitry Andric       break;
34190b57cec5SDimitry Andric     }
34200b57cec5SDimitry Andric     case CCValAssign::VExt:
34210b57cec5SDimitry Andric       // VExt has not been implemented, so this should be impossible to reach
34220b57cec5SDimitry Andric       // for now.  However, fallback to Selection DAG isel once implemented.
34230b57cec5SDimitry Andric       return false;
34240b57cec5SDimitry Andric     case CCValAssign::AExtUpper:
34250b57cec5SDimitry Andric     case CCValAssign::SExtUpper:
34260b57cec5SDimitry Andric     case CCValAssign::ZExtUpper:
34270b57cec5SDimitry Andric     case CCValAssign::FPExt:
34288bcb0991SDimitry Andric     case CCValAssign::Trunc:
34290b57cec5SDimitry Andric       llvm_unreachable("Unexpected loc info!");
34300b57cec5SDimitry Andric     case CCValAssign::Indirect:
34310b57cec5SDimitry Andric       // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
34320b57cec5SDimitry Andric       // support this.
34330b57cec5SDimitry Andric       return false;
34340b57cec5SDimitry Andric     }
34350b57cec5SDimitry Andric 
34360b57cec5SDimitry Andric     if (VA.isRegLoc()) {
3437bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
34380b57cec5SDimitry Andric               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
34390b57cec5SDimitry Andric       OutRegs.push_back(VA.getLocReg());
34400b57cec5SDimitry Andric     } else {
34415ffd83dbSDimitry Andric       assert(VA.isMemLoc() && "Unknown value location!");
34420b57cec5SDimitry Andric 
34430b57cec5SDimitry Andric       // Don't emit stores for undef values.
34440b57cec5SDimitry Andric       if (isa<UndefValue>(ArgVal))
34450b57cec5SDimitry Andric         continue;
34460b57cec5SDimitry Andric 
34470b57cec5SDimitry Andric       unsigned LocMemOffset = VA.getLocMemOffset();
34480b57cec5SDimitry Andric       X86AddressMode AM;
34490b57cec5SDimitry Andric       AM.Base.Reg = RegInfo->getStackRegister();
34500b57cec5SDimitry Andric       AM.Disp = LocMemOffset;
34510b57cec5SDimitry Andric       ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
34525ffd83dbSDimitry Andric       Align Alignment = DL.getABITypeAlign(ArgVal->getType());
34530b57cec5SDimitry Andric       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
34540b57cec5SDimitry Andric           MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
34550b57cec5SDimitry Andric           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
34560b57cec5SDimitry Andric       if (Flags.isByVal()) {
34570b57cec5SDimitry Andric         X86AddressMode SrcAM;
34580b57cec5SDimitry Andric         SrcAM.Base.Reg = ArgReg;
34590b57cec5SDimitry Andric         if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
34600b57cec5SDimitry Andric           return false;
34610b57cec5SDimitry Andric       } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
34620b57cec5SDimitry Andric         // If this is a really simple value, emit this with the Value* version
34630b57cec5SDimitry Andric         // of X86FastEmitStore.  If it isn't simple, we don't want to do this,
34640b57cec5SDimitry Andric         // as it can cause us to reevaluate the argument.
34650b57cec5SDimitry Andric         if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
34660b57cec5SDimitry Andric           return false;
34670b57cec5SDimitry Andric       } else {
3468fe6060f1SDimitry Andric         if (!X86FastEmitStore(ArgVT, ArgReg, AM, MMO))
34690b57cec5SDimitry Andric           return false;
34700b57cec5SDimitry Andric       }
34710b57cec5SDimitry Andric     }
34720b57cec5SDimitry Andric   }
34730b57cec5SDimitry Andric 
34740b57cec5SDimitry Andric   // ELF / PIC requires GOT in the EBX register before function calls via PLT
34750b57cec5SDimitry Andric   // GOT pointer.
34760b57cec5SDimitry Andric   if (Subtarget->isPICStyleGOT()) {
34770b57cec5SDimitry Andric     unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3478bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
34790b57cec5SDimitry Andric             TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
34800b57cec5SDimitry Andric   }
34810b57cec5SDimitry Andric 
34820b57cec5SDimitry Andric   if (Is64Bit && IsVarArg && !IsWin64) {
34830b57cec5SDimitry Andric     // From AMD64 ABI document:
34840b57cec5SDimitry Andric     // For calls that may call functions that use varargs or stdargs
34850b57cec5SDimitry Andric     // (prototype-less calls or calls to functions containing ellipsis (...) in
34860b57cec5SDimitry Andric     // the declaration) %al is used as hidden argument to specify the number
34870b57cec5SDimitry Andric     // of SSE registers used. The contents of %al do not need to match exactly
34880b57cec5SDimitry Andric     // the number of registers, but must be an ubound on the number of SSE
34890b57cec5SDimitry Andric     // registers used and is in the range 0 - 8 inclusive.
34900b57cec5SDimitry Andric 
34910b57cec5SDimitry Andric     // Count the number of XMM registers allocated.
34920b57cec5SDimitry Andric     static const MCPhysReg XMMArgRegs[] = {
34930b57cec5SDimitry Andric       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
34940b57cec5SDimitry Andric       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
34950b57cec5SDimitry Andric     };
34960b57cec5SDimitry Andric     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
34970b57cec5SDimitry Andric     assert((Subtarget->hasSSE1() || !NumXMMRegs)
34980b57cec5SDimitry Andric            && "SSE registers cannot be used when SSE is disabled");
3499bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV8ri),
35000b57cec5SDimitry Andric             X86::AL).addImm(NumXMMRegs);
35010b57cec5SDimitry Andric   }
35020b57cec5SDimitry Andric 
35030b57cec5SDimitry Andric   // Materialize callee address in a register. FIXME: GV address can be
35040b57cec5SDimitry Andric   // handled with a CALLpcrel32 instead.
35050b57cec5SDimitry Andric   X86AddressMode CalleeAM;
35060b57cec5SDimitry Andric   if (!X86SelectCallAddress(Callee, CalleeAM))
35070b57cec5SDimitry Andric     return false;
35080b57cec5SDimitry Andric 
35090b57cec5SDimitry Andric   unsigned CalleeOp = 0;
35100b57cec5SDimitry Andric   const GlobalValue *GV = nullptr;
35110b57cec5SDimitry Andric   if (CalleeAM.GV != nullptr) {
35120b57cec5SDimitry Andric     GV = CalleeAM.GV;
35130b57cec5SDimitry Andric   } else if (CalleeAM.Base.Reg != 0) {
35140b57cec5SDimitry Andric     CalleeOp = CalleeAM.Base.Reg;
35150b57cec5SDimitry Andric   } else
35160b57cec5SDimitry Andric     return false;
35170b57cec5SDimitry Andric 
35180b57cec5SDimitry Andric   // Issue the call.
35190b57cec5SDimitry Andric   MachineInstrBuilder MIB;
35200b57cec5SDimitry Andric   if (CalleeOp) {
35210b57cec5SDimitry Andric     // Register-indirect call.
35220b57cec5SDimitry Andric     unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3523bdd1243dSDimitry Andric     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc))
35240b57cec5SDimitry Andric       .addReg(CalleeOp);
35250b57cec5SDimitry Andric   } else {
35260b57cec5SDimitry Andric     // Direct call.
35270b57cec5SDimitry Andric     assert(GV && "Not a direct call");
35280b57cec5SDimitry Andric     // See if we need any target-specific flags on the GV operand.
35290b57cec5SDimitry Andric     unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
35305f757f3fSDimitry Andric     if (OpFlags == X86II::MO_PLT && !Is64Bit &&
35315f757f3fSDimitry Andric         TM.getRelocationModel() == Reloc::Static && isa<Function>(GV) &&
35325f757f3fSDimitry Andric         cast<Function>(GV)->isIntrinsic())
35335f757f3fSDimitry Andric       OpFlags = X86II::MO_NO_FLAG;
35340b57cec5SDimitry Andric 
35350b57cec5SDimitry Andric     // This will be a direct call, or an indirect call through memory for
35360b57cec5SDimitry Andric     // NonLazyBind calls or dllimport calls.
35370b57cec5SDimitry Andric     bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
35380b57cec5SDimitry Andric                     OpFlags == X86II::MO_GOTPCREL ||
3539349cc55cSDimitry Andric                     OpFlags == X86II::MO_GOTPCREL_NORELAX ||
35400b57cec5SDimitry Andric                     OpFlags == X86II::MO_COFFSTUB;
35410b57cec5SDimitry Andric     unsigned CallOpc = NeedLoad
35420b57cec5SDimitry Andric                            ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
35430b57cec5SDimitry Andric                            : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
35440b57cec5SDimitry Andric 
3545bdd1243dSDimitry Andric     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc));
35460b57cec5SDimitry Andric     if (NeedLoad)
35470b57cec5SDimitry Andric       MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
35480b57cec5SDimitry Andric     if (Symbol)
35490b57cec5SDimitry Andric       MIB.addSym(Symbol, OpFlags);
35500b57cec5SDimitry Andric     else
35510b57cec5SDimitry Andric       MIB.addGlobalAddress(GV, 0, OpFlags);
35520b57cec5SDimitry Andric     if (NeedLoad)
35530b57cec5SDimitry Andric       MIB.addReg(0);
35540b57cec5SDimitry Andric   }
35550b57cec5SDimitry Andric 
35560b57cec5SDimitry Andric   // Add a register mask operand representing the call-preserved registers.
35570b57cec5SDimitry Andric   // Proper defs for return values will be added by setPhysRegsDeadExcept().
35580b57cec5SDimitry Andric   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
35590b57cec5SDimitry Andric 
35600b57cec5SDimitry Andric   // Add an implicit use GOT pointer in EBX.
35610b57cec5SDimitry Andric   if (Subtarget->isPICStyleGOT())
35620b57cec5SDimitry Andric     MIB.addReg(X86::EBX, RegState::Implicit);
35630b57cec5SDimitry Andric 
35640b57cec5SDimitry Andric   if (Is64Bit && IsVarArg && !IsWin64)
35650b57cec5SDimitry Andric     MIB.addReg(X86::AL, RegState::Implicit);
35660b57cec5SDimitry Andric 
35670b57cec5SDimitry Andric   // Add implicit physical register uses to the call.
35680b57cec5SDimitry Andric   for (auto Reg : OutRegs)
35690b57cec5SDimitry Andric     MIB.addReg(Reg, RegState::Implicit);
35700b57cec5SDimitry Andric 
35710b57cec5SDimitry Andric   // Issue CALLSEQ_END
35720b57cec5SDimitry Andric   unsigned NumBytesForCalleeToPop =
35730b57cec5SDimitry Andric       X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
35740b57cec5SDimitry Andric                        TM.Options.GuaranteedTailCallOpt)
35750b57cec5SDimitry Andric           ? NumBytes // Callee pops everything.
35765ffd83dbSDimitry Andric           : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CB);
35770b57cec5SDimitry Andric   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3578bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
35790b57cec5SDimitry Andric     .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
35800b57cec5SDimitry Andric 
35810b57cec5SDimitry Andric   // Now handle call return values.
35820b57cec5SDimitry Andric   SmallVector<CCValAssign, 16> RVLocs;
35830b57cec5SDimitry Andric   CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
35840b57cec5SDimitry Andric                     CLI.RetTy->getContext());
35850b57cec5SDimitry Andric   CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
35860b57cec5SDimitry Andric 
35870b57cec5SDimitry Andric   // Copy all of the result registers out of their specified physreg.
35885ffd83dbSDimitry Andric   Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
35890b57cec5SDimitry Andric   for (unsigned i = 0; i != RVLocs.size(); ++i) {
35900b57cec5SDimitry Andric     CCValAssign &VA = RVLocs[i];
35910b57cec5SDimitry Andric     EVT CopyVT = VA.getValVT();
35920b57cec5SDimitry Andric     unsigned CopyReg = ResultReg + i;
35938bcb0991SDimitry Andric     Register SrcReg = VA.getLocReg();
35940b57cec5SDimitry Andric 
35950b57cec5SDimitry Andric     // If this is x86-64, and we disabled SSE, we can't return FP values
35960b57cec5SDimitry Andric     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
35970b57cec5SDimitry Andric         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
35980b57cec5SDimitry Andric       report_fatal_error("SSE register return with SSE disabled");
35990b57cec5SDimitry Andric     }
36000b57cec5SDimitry Andric 
36010b57cec5SDimitry Andric     // If we prefer to use the value in xmm registers, copy it out as f80 and
36020b57cec5SDimitry Andric     // use a truncate to move it from fp stack reg to xmm reg.
36030b57cec5SDimitry Andric     if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
36040b57cec5SDimitry Andric         isScalarFPTypeInSSEReg(VA.getValVT())) {
36050b57cec5SDimitry Andric       CopyVT = MVT::f80;
36060b57cec5SDimitry Andric       CopyReg = createResultReg(&X86::RFP80RegClass);
36070b57cec5SDimitry Andric     }
36080b57cec5SDimitry Andric 
36090b57cec5SDimitry Andric     // Copy out the result.
3610bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
36110b57cec5SDimitry Andric             TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
36120b57cec5SDimitry Andric     InRegs.push_back(VA.getLocReg());
36130b57cec5SDimitry Andric 
36140b57cec5SDimitry Andric     // Round the f80 to the right size, which also moves it to the appropriate
36150b57cec5SDimitry Andric     // xmm register. This is accomplished by storing the f80 value in memory
36160b57cec5SDimitry Andric     // and then loading it back.
36170b57cec5SDimitry Andric     if (CopyVT != VA.getValVT()) {
36180b57cec5SDimitry Andric       EVT ResVT = VA.getValVT();
36190b57cec5SDimitry Andric       unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
36200b57cec5SDimitry Andric       unsigned MemSize = ResVT.getSizeInBits()/8;
36215ffd83dbSDimitry Andric       int FI = MFI.CreateStackObject(MemSize, Align(MemSize), false);
3622bdd1243dSDimitry Andric       addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
36230b57cec5SDimitry Andric                                 TII.get(Opc)), FI)
36240b57cec5SDimitry Andric         .addReg(CopyReg);
36250b57cec5SDimitry Andric       Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
3626bdd1243dSDimitry Andric       addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
36270b57cec5SDimitry Andric                                 TII.get(Opc), ResultReg + i), FI);
36280b57cec5SDimitry Andric     }
36290b57cec5SDimitry Andric   }
36300b57cec5SDimitry Andric 
36310b57cec5SDimitry Andric   CLI.ResultReg = ResultReg;
36320b57cec5SDimitry Andric   CLI.NumResultRegs = RVLocs.size();
36330b57cec5SDimitry Andric   CLI.Call = MIB;
36340b57cec5SDimitry Andric 
36350b57cec5SDimitry Andric   return true;
36360b57cec5SDimitry Andric }
36370b57cec5SDimitry Andric 
36380b57cec5SDimitry Andric bool
36390b57cec5SDimitry Andric X86FastISel::fastSelectInstruction(const Instruction *I)  {
36400b57cec5SDimitry Andric   switch (I->getOpcode()) {
36410b57cec5SDimitry Andric   default: break;
36420b57cec5SDimitry Andric   case Instruction::Load:
36430b57cec5SDimitry Andric     return X86SelectLoad(I);
36440b57cec5SDimitry Andric   case Instruction::Store:
36450b57cec5SDimitry Andric     return X86SelectStore(I);
36460b57cec5SDimitry Andric   case Instruction::Ret:
36470b57cec5SDimitry Andric     return X86SelectRet(I);
36480b57cec5SDimitry Andric   case Instruction::ICmp:
36490b57cec5SDimitry Andric   case Instruction::FCmp:
36500b57cec5SDimitry Andric     return X86SelectCmp(I);
36510b57cec5SDimitry Andric   case Instruction::ZExt:
36520b57cec5SDimitry Andric     return X86SelectZExt(I);
36530b57cec5SDimitry Andric   case Instruction::SExt:
36540b57cec5SDimitry Andric     return X86SelectSExt(I);
36550b57cec5SDimitry Andric   case Instruction::Br:
36560b57cec5SDimitry Andric     return X86SelectBranch(I);
36570b57cec5SDimitry Andric   case Instruction::LShr:
36580b57cec5SDimitry Andric   case Instruction::AShr:
36590b57cec5SDimitry Andric   case Instruction::Shl:
36600b57cec5SDimitry Andric     return X86SelectShift(I);
36610b57cec5SDimitry Andric   case Instruction::SDiv:
36620b57cec5SDimitry Andric   case Instruction::UDiv:
36630b57cec5SDimitry Andric   case Instruction::SRem:
36640b57cec5SDimitry Andric   case Instruction::URem:
36650b57cec5SDimitry Andric     return X86SelectDivRem(I);
36660b57cec5SDimitry Andric   case Instruction::Select:
36670b57cec5SDimitry Andric     return X86SelectSelect(I);
36680b57cec5SDimitry Andric   case Instruction::Trunc:
36690b57cec5SDimitry Andric     return X86SelectTrunc(I);
36700b57cec5SDimitry Andric   case Instruction::FPExt:
36710b57cec5SDimitry Andric     return X86SelectFPExt(I);
36720b57cec5SDimitry Andric   case Instruction::FPTrunc:
36730b57cec5SDimitry Andric     return X86SelectFPTrunc(I);
36740b57cec5SDimitry Andric   case Instruction::SIToFP:
36750b57cec5SDimitry Andric     return X86SelectSIToFP(I);
36760b57cec5SDimitry Andric   case Instruction::UIToFP:
36770b57cec5SDimitry Andric     return X86SelectUIToFP(I);
36780b57cec5SDimitry Andric   case Instruction::IntToPtr: // Deliberate fall-through.
36790b57cec5SDimitry Andric   case Instruction::PtrToInt: {
36800b57cec5SDimitry Andric     EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
36810b57cec5SDimitry Andric     EVT DstVT = TLI.getValueType(DL, I->getType());
36820b57cec5SDimitry Andric     if (DstVT.bitsGT(SrcVT))
36830b57cec5SDimitry Andric       return X86SelectZExt(I);
36840b57cec5SDimitry Andric     if (DstVT.bitsLT(SrcVT))
36850b57cec5SDimitry Andric       return X86SelectTrunc(I);
36865ffd83dbSDimitry Andric     Register Reg = getRegForValue(I->getOperand(0));
36870b57cec5SDimitry Andric     if (Reg == 0) return false;
36880b57cec5SDimitry Andric     updateValueMap(I, Reg);
36890b57cec5SDimitry Andric     return true;
36900b57cec5SDimitry Andric   }
36910b57cec5SDimitry Andric   case Instruction::BitCast: {
36920b57cec5SDimitry Andric     // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
36930b57cec5SDimitry Andric     if (!Subtarget->hasSSE2())
36940b57cec5SDimitry Andric       return false;
36950b57cec5SDimitry Andric 
36960b57cec5SDimitry Andric     MVT SrcVT, DstVT;
36970b57cec5SDimitry Andric     if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) ||
36980b57cec5SDimitry Andric         !isTypeLegal(I->getType(), DstVT))
36990b57cec5SDimitry Andric       return false;
37000b57cec5SDimitry Andric 
37010b57cec5SDimitry Andric     // Only allow vectors that use xmm/ymm/zmm.
37020b57cec5SDimitry Andric     if (!SrcVT.isVector() || !DstVT.isVector() ||
37030b57cec5SDimitry Andric         SrcVT.getVectorElementType() == MVT::i1 ||
37040b57cec5SDimitry Andric         DstVT.getVectorElementType() == MVT::i1)
37050b57cec5SDimitry Andric       return false;
37060b57cec5SDimitry Andric 
37075ffd83dbSDimitry Andric     Register Reg = getRegForValue(I->getOperand(0));
37085ffd83dbSDimitry Andric     if (!Reg)
37090b57cec5SDimitry Andric       return false;
37100b57cec5SDimitry Andric 
37115ffd83dbSDimitry Andric     // Emit a reg-reg copy so we don't propagate cached known bits information
37125ffd83dbSDimitry Andric     // with the wrong VT if we fall out of fast isel after selecting this.
37135ffd83dbSDimitry Andric     const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
37145ffd83dbSDimitry Andric     Register ResultReg = createResultReg(DstClass);
3715bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
37165ffd83dbSDimitry Andric               TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
37175ffd83dbSDimitry Andric 
37185ffd83dbSDimitry Andric     updateValueMap(I, ResultReg);
37190b57cec5SDimitry Andric     return true;
37200b57cec5SDimitry Andric   }
37210b57cec5SDimitry Andric   }
37220b57cec5SDimitry Andric 
37230b57cec5SDimitry Andric   return false;
37240b57cec5SDimitry Andric }
37250b57cec5SDimitry Andric 
37260b57cec5SDimitry Andric unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
37270b57cec5SDimitry Andric   if (VT > MVT::i64)
37280b57cec5SDimitry Andric     return 0;
37290b57cec5SDimitry Andric 
37300b57cec5SDimitry Andric   uint64_t Imm = CI->getZExtValue();
37310b57cec5SDimitry Andric   if (Imm == 0) {
37325ffd83dbSDimitry Andric     Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
37330b57cec5SDimitry Andric     switch (VT.SimpleTy) {
37340b57cec5SDimitry Andric     default: llvm_unreachable("Unexpected value type");
37350b57cec5SDimitry Andric     case MVT::i1:
37360b57cec5SDimitry Andric     case MVT::i8:
3737fe6060f1SDimitry Andric       return fastEmitInst_extractsubreg(MVT::i8, SrcReg, X86::sub_8bit);
37380b57cec5SDimitry Andric     case MVT::i16:
3739fe6060f1SDimitry Andric       return fastEmitInst_extractsubreg(MVT::i16, SrcReg, X86::sub_16bit);
37400b57cec5SDimitry Andric     case MVT::i32:
37410b57cec5SDimitry Andric       return SrcReg;
37420b57cec5SDimitry Andric     case MVT::i64: {
37435ffd83dbSDimitry Andric       Register ResultReg = createResultReg(&X86::GR64RegClass);
3744bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
37450b57cec5SDimitry Andric               TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
37460b57cec5SDimitry Andric         .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
37470b57cec5SDimitry Andric       return ResultReg;
37480b57cec5SDimitry Andric     }
37490b57cec5SDimitry Andric     }
37500b57cec5SDimitry Andric   }
37510b57cec5SDimitry Andric 
37520b57cec5SDimitry Andric   unsigned Opc = 0;
37530b57cec5SDimitry Andric   switch (VT.SimpleTy) {
37540b57cec5SDimitry Andric   default: llvm_unreachable("Unexpected value type");
37550b57cec5SDimitry Andric   case MVT::i1:
37560b57cec5SDimitry Andric     VT = MVT::i8;
3757bdd1243dSDimitry Andric     [[fallthrough]];
37580b57cec5SDimitry Andric   case MVT::i8:  Opc = X86::MOV8ri;  break;
37590b57cec5SDimitry Andric   case MVT::i16: Opc = X86::MOV16ri; break;
37600b57cec5SDimitry Andric   case MVT::i32: Opc = X86::MOV32ri; break;
37610b57cec5SDimitry Andric   case MVT::i64: {
37620b57cec5SDimitry Andric     if (isUInt<32>(Imm))
37630b57cec5SDimitry Andric       Opc = X86::MOV32ri64;
37640b57cec5SDimitry Andric     else if (isInt<32>(Imm))
37650b57cec5SDimitry Andric       Opc = X86::MOV64ri32;
37660b57cec5SDimitry Andric     else
37670b57cec5SDimitry Andric       Opc = X86::MOV64ri;
37680b57cec5SDimitry Andric     break;
37690b57cec5SDimitry Andric   }
37700b57cec5SDimitry Andric   }
37710b57cec5SDimitry Andric   return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
37720b57cec5SDimitry Andric }
37730b57cec5SDimitry Andric 
37740b57cec5SDimitry Andric unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
37750b57cec5SDimitry Andric   if (CFP->isNullValue())
37760b57cec5SDimitry Andric     return fastMaterializeFloatZero(CFP);
37770b57cec5SDimitry Andric 
37780b57cec5SDimitry Andric   // Can't handle alternate code models yet.
37790b57cec5SDimitry Andric   CodeModel::Model CM = TM.getCodeModel();
37805f757f3fSDimitry Andric   if (CM != CodeModel::Small && CM != CodeModel::Medium &&
37815f757f3fSDimitry Andric       CM != CodeModel::Large)
37820b57cec5SDimitry Andric     return 0;
37830b57cec5SDimitry Andric 
37840b57cec5SDimitry Andric   // Get opcode and regclass of the output for the given load instruction.
37850b57cec5SDimitry Andric   unsigned Opc = 0;
378681ad6265SDimitry Andric   bool HasSSE1 = Subtarget->hasSSE1();
378781ad6265SDimitry Andric   bool HasSSE2 = Subtarget->hasSSE2();
37880b57cec5SDimitry Andric   bool HasAVX = Subtarget->hasAVX();
37890b57cec5SDimitry Andric   bool HasAVX512 = Subtarget->hasAVX512();
37900b57cec5SDimitry Andric   switch (VT.SimpleTy) {
37910b57cec5SDimitry Andric   default: return 0;
37920b57cec5SDimitry Andric   case MVT::f32:
379381ad6265SDimitry Andric     Opc = HasAVX512 ? X86::VMOVSSZrm_alt
379481ad6265SDimitry Andric           : HasAVX  ? X86::VMOVSSrm_alt
379581ad6265SDimitry Andric           : HasSSE1 ? X86::MOVSSrm_alt
379681ad6265SDimitry Andric                     : X86::LD_Fp32m;
37970b57cec5SDimitry Andric     break;
37980b57cec5SDimitry Andric   case MVT::f64:
379981ad6265SDimitry Andric     Opc = HasAVX512 ? X86::VMOVSDZrm_alt
380081ad6265SDimitry Andric           : HasAVX  ? X86::VMOVSDrm_alt
380181ad6265SDimitry Andric           : HasSSE2 ? X86::MOVSDrm_alt
380281ad6265SDimitry Andric                     : X86::LD_Fp64m;
38030b57cec5SDimitry Andric     break;
38040b57cec5SDimitry Andric   case MVT::f80:
38050b57cec5SDimitry Andric     // No f80 support yet.
38060b57cec5SDimitry Andric     return 0;
38070b57cec5SDimitry Andric   }
38080b57cec5SDimitry Andric 
38090b57cec5SDimitry Andric   // MachineConstantPool wants an explicit alignment.
38105ffd83dbSDimitry Andric   Align Alignment = DL.getPrefTypeAlign(CFP->getType());
38110b57cec5SDimitry Andric 
38120b57cec5SDimitry Andric   // x86-32 PIC requires a PIC base register for constant pools.
38130b57cec5SDimitry Andric   unsigned PICBase = 0;
38140b57cec5SDimitry Andric   unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
38150b57cec5SDimitry Andric   if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
38160b57cec5SDimitry Andric     PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
38170b57cec5SDimitry Andric   else if (OpFlag == X86II::MO_GOTOFF)
38180b57cec5SDimitry Andric     PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
38195f757f3fSDimitry Andric   else if (Subtarget->is64Bit() && TM.getCodeModel() != CodeModel::Large)
38200b57cec5SDimitry Andric     PICBase = X86::RIP;
38210b57cec5SDimitry Andric 
38220b57cec5SDimitry Andric   // Create the load from the constant pool.
38235ffd83dbSDimitry Andric   unsigned CPI = MCP.getConstantPoolIndex(CFP, Alignment);
38245ffd83dbSDimitry Andric   Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
38250b57cec5SDimitry Andric 
38265ffd83dbSDimitry Andric   // Large code model only applies to 64-bit mode.
38275ffd83dbSDimitry Andric   if (Subtarget->is64Bit() && CM == CodeModel::Large) {
38285ffd83dbSDimitry Andric     Register AddrReg = createResultReg(&X86::GR64RegClass);
3829bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV64ri),
38300b57cec5SDimitry Andric             AddrReg)
38310b57cec5SDimitry Andric       .addConstantPoolIndex(CPI, 0, OpFlag);
3832bdd1243dSDimitry Andric     MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
38330b57cec5SDimitry Andric                                       TII.get(Opc), ResultReg);
3834e8d8bef9SDimitry Andric     addRegReg(MIB, AddrReg, false, PICBase, false);
38350b57cec5SDimitry Andric     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
38360b57cec5SDimitry Andric         MachinePointerInfo::getConstantPool(*FuncInfo.MF),
38375ffd83dbSDimitry Andric         MachineMemOperand::MOLoad, DL.getPointerSize(), Alignment);
38380b57cec5SDimitry Andric     MIB->addMemOperand(*FuncInfo.MF, MMO);
38390b57cec5SDimitry Andric     return ResultReg;
38400b57cec5SDimitry Andric   }
38410b57cec5SDimitry Andric 
3842bdd1243dSDimitry Andric   addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
38430b57cec5SDimitry Andric                                    TII.get(Opc), ResultReg),
38440b57cec5SDimitry Andric                            CPI, PICBase, OpFlag);
38450b57cec5SDimitry Andric   return ResultReg;
38460b57cec5SDimitry Andric }
38470b57cec5SDimitry Andric 
38480b57cec5SDimitry Andric unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
38495f757f3fSDimitry Andric   // Can't handle large GlobalValues yet.
38505f757f3fSDimitry Andric   if (TM.getCodeModel() != CodeModel::Small &&
38515f757f3fSDimitry Andric       TM.getCodeModel() != CodeModel::Medium)
38525f757f3fSDimitry Andric     return 0;
38535f757f3fSDimitry Andric   if (TM.isLargeGlobalValue(GV))
38540b57cec5SDimitry Andric     return 0;
38550b57cec5SDimitry Andric 
38560b57cec5SDimitry Andric   // Materialize addresses with LEA/MOV instructions.
38570b57cec5SDimitry Andric   X86AddressMode AM;
38580b57cec5SDimitry Andric   if (X86SelectAddress(GV, AM)) {
38590b57cec5SDimitry Andric     // If the expression is just a basereg, then we're done, otherwise we need
38600b57cec5SDimitry Andric     // to emit an LEA.
38610b57cec5SDimitry Andric     if (AM.BaseType == X86AddressMode::RegBase &&
38620b57cec5SDimitry Andric         AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
38630b57cec5SDimitry Andric       return AM.Base.Reg;
38640b57cec5SDimitry Andric 
38655ffd83dbSDimitry Andric     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
38660b57cec5SDimitry Andric     if (TM.getRelocationModel() == Reloc::Static &&
38670b57cec5SDimitry Andric         TLI.getPointerTy(DL) == MVT::i64) {
38680b57cec5SDimitry Andric       // The displacement code could be more than 32 bits away so we need to use
38690b57cec5SDimitry Andric       // an instruction with a 64 bit immediate
3870bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV64ri),
38710b57cec5SDimitry Andric               ResultReg)
38720b57cec5SDimitry Andric         .addGlobalAddress(GV);
38730b57cec5SDimitry Andric     } else {
38740b57cec5SDimitry Andric       unsigned Opc =
38750b57cec5SDimitry Andric           TLI.getPointerTy(DL) == MVT::i32
38760b57cec5SDimitry Andric               ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
38770b57cec5SDimitry Andric               : X86::LEA64r;
3878bdd1243dSDimitry Andric       addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
38790b57cec5SDimitry Andric                              TII.get(Opc), ResultReg), AM);
38800b57cec5SDimitry Andric     }
38810b57cec5SDimitry Andric     return ResultReg;
38820b57cec5SDimitry Andric   }
38830b57cec5SDimitry Andric   return 0;
38840b57cec5SDimitry Andric }
38850b57cec5SDimitry Andric 
38860b57cec5SDimitry Andric unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
38870b57cec5SDimitry Andric   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
38880b57cec5SDimitry Andric 
38890b57cec5SDimitry Andric   // Only handle simple types.
38900b57cec5SDimitry Andric   if (!CEVT.isSimple())
38910b57cec5SDimitry Andric     return 0;
38920b57cec5SDimitry Andric   MVT VT = CEVT.getSimpleVT();
38930b57cec5SDimitry Andric 
38940b57cec5SDimitry Andric   if (const auto *CI = dyn_cast<ConstantInt>(C))
38950b57cec5SDimitry Andric     return X86MaterializeInt(CI, VT);
3896349cc55cSDimitry Andric   if (const auto *CFP = dyn_cast<ConstantFP>(C))
38970b57cec5SDimitry Andric     return X86MaterializeFP(CFP, VT);
3898349cc55cSDimitry Andric   if (const auto *GV = dyn_cast<GlobalValue>(C))
38990b57cec5SDimitry Andric     return X86MaterializeGV(GV, VT);
3900349cc55cSDimitry Andric   if (isa<UndefValue>(C)) {
3901fe6060f1SDimitry Andric     unsigned Opc = 0;
3902fe6060f1SDimitry Andric     switch (VT.SimpleTy) {
3903fe6060f1SDimitry Andric     default:
3904fe6060f1SDimitry Andric       break;
3905fe6060f1SDimitry Andric     case MVT::f32:
390681ad6265SDimitry Andric       if (!Subtarget->hasSSE1())
3907fe6060f1SDimitry Andric         Opc = X86::LD_Fp032;
3908fe6060f1SDimitry Andric       break;
3909fe6060f1SDimitry Andric     case MVT::f64:
391081ad6265SDimitry Andric       if (!Subtarget->hasSSE2())
3911fe6060f1SDimitry Andric         Opc = X86::LD_Fp064;
3912fe6060f1SDimitry Andric       break;
3913fe6060f1SDimitry Andric     case MVT::f80:
3914fe6060f1SDimitry Andric       Opc = X86::LD_Fp080;
3915fe6060f1SDimitry Andric       break;
3916fe6060f1SDimitry Andric     }
3917fe6060f1SDimitry Andric 
3918fe6060f1SDimitry Andric     if (Opc) {
3919fe6060f1SDimitry Andric       Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3920bdd1243dSDimitry Andric       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
3921fe6060f1SDimitry Andric               ResultReg);
3922fe6060f1SDimitry Andric       return ResultReg;
3923fe6060f1SDimitry Andric     }
3924fe6060f1SDimitry Andric   }
39250b57cec5SDimitry Andric 
39260b57cec5SDimitry Andric   return 0;
39270b57cec5SDimitry Andric }
39280b57cec5SDimitry Andric 
39290b57cec5SDimitry Andric unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
39300b57cec5SDimitry Andric   // Fail on dynamic allocas. At this point, getRegForValue has already
39310b57cec5SDimitry Andric   // checked its CSE maps, so if we're here trying to handle a dynamic
39320b57cec5SDimitry Andric   // alloca, we're not going to succeed. X86SelectAddress has a
39330b57cec5SDimitry Andric   // check for dynamic allocas, because it's called directly from
39340b57cec5SDimitry Andric   // various places, but targetMaterializeAlloca also needs a check
39350b57cec5SDimitry Andric   // in order to avoid recursion between getRegForValue,
39360b57cec5SDimitry Andric   // X86SelectAddrss, and targetMaterializeAlloca.
39370b57cec5SDimitry Andric   if (!FuncInfo.StaticAllocaMap.count(C))
39380b57cec5SDimitry Andric     return 0;
39390b57cec5SDimitry Andric   assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
39400b57cec5SDimitry Andric 
39410b57cec5SDimitry Andric   X86AddressMode AM;
39420b57cec5SDimitry Andric   if (!X86SelectAddress(C, AM))
39430b57cec5SDimitry Andric     return 0;
39440b57cec5SDimitry Andric   unsigned Opc =
39450b57cec5SDimitry Andric       TLI.getPointerTy(DL) == MVT::i32
39460b57cec5SDimitry Andric           ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
39470b57cec5SDimitry Andric           : X86::LEA64r;
39480b57cec5SDimitry Andric   const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
39495ffd83dbSDimitry Andric   Register ResultReg = createResultReg(RC);
3950bdd1243dSDimitry Andric   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
39510b57cec5SDimitry Andric                          TII.get(Opc), ResultReg), AM);
39520b57cec5SDimitry Andric   return ResultReg;
39530b57cec5SDimitry Andric }
39540b57cec5SDimitry Andric 
39550b57cec5SDimitry Andric unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
39560b57cec5SDimitry Andric   MVT VT;
39570b57cec5SDimitry Andric   if (!isTypeLegal(CF->getType(), VT))
39580b57cec5SDimitry Andric     return 0;
39590b57cec5SDimitry Andric 
39600b57cec5SDimitry Andric   // Get opcode and regclass for the given zero.
396181ad6265SDimitry Andric   bool HasSSE1 = Subtarget->hasSSE1();
396281ad6265SDimitry Andric   bool HasSSE2 = Subtarget->hasSSE2();
39630b57cec5SDimitry Andric   bool HasAVX512 = Subtarget->hasAVX512();
39640b57cec5SDimitry Andric   unsigned Opc = 0;
39650b57cec5SDimitry Andric   switch (VT.SimpleTy) {
39660b57cec5SDimitry Andric   default: return 0;
396781ad6265SDimitry Andric   case MVT::f16:
396881ad6265SDimitry Andric     Opc = HasAVX512 ? X86::AVX512_FsFLD0SH : X86::FsFLD0SH;
396981ad6265SDimitry Andric     break;
39700b57cec5SDimitry Andric   case MVT::f32:
397181ad6265SDimitry Andric     Opc = HasAVX512 ? X86::AVX512_FsFLD0SS
397281ad6265SDimitry Andric           : HasSSE1 ? X86::FsFLD0SS
397381ad6265SDimitry Andric                     : X86::LD_Fp032;
39740b57cec5SDimitry Andric     break;
39750b57cec5SDimitry Andric   case MVT::f64:
397681ad6265SDimitry Andric     Opc = HasAVX512 ? X86::AVX512_FsFLD0SD
397781ad6265SDimitry Andric           : HasSSE2 ? X86::FsFLD0SD
397881ad6265SDimitry Andric                     : X86::LD_Fp064;
39790b57cec5SDimitry Andric     break;
39800b57cec5SDimitry Andric   case MVT::f80:
39810b57cec5SDimitry Andric     // No f80 support yet.
39820b57cec5SDimitry Andric     return 0;
39830b57cec5SDimitry Andric   }
39840b57cec5SDimitry Andric 
39855ffd83dbSDimitry Andric   Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3986bdd1243dSDimitry Andric   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg);
39870b57cec5SDimitry Andric   return ResultReg;
39880b57cec5SDimitry Andric }
39890b57cec5SDimitry Andric 
39900b57cec5SDimitry Andric 
39910b57cec5SDimitry Andric bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
39920b57cec5SDimitry Andric                                       const LoadInst *LI) {
39930b57cec5SDimitry Andric   const Value *Ptr = LI->getPointerOperand();
39940b57cec5SDimitry Andric   X86AddressMode AM;
39950b57cec5SDimitry Andric   if (!X86SelectAddress(Ptr, AM))
39960b57cec5SDimitry Andric     return false;
39970b57cec5SDimitry Andric 
39980b57cec5SDimitry Andric   const X86InstrInfo &XII = (const X86InstrInfo &)TII;
39990b57cec5SDimitry Andric 
40000b57cec5SDimitry Andric   unsigned Size = DL.getTypeAllocSize(LI->getType());
40010b57cec5SDimitry Andric 
40020b57cec5SDimitry Andric   SmallVector<MachineOperand, 8> AddrOps;
40030b57cec5SDimitry Andric   AM.getFullAddress(AddrOps);
40040b57cec5SDimitry Andric 
40050b57cec5SDimitry Andric   MachineInstr *Result = XII.foldMemoryOperandImpl(
40065ffd83dbSDimitry Andric       *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(),
40070b57cec5SDimitry Andric       /*AllowCommute=*/true);
40080b57cec5SDimitry Andric   if (!Result)
40090b57cec5SDimitry Andric     return false;
40100b57cec5SDimitry Andric 
40110b57cec5SDimitry Andric   // The index register could be in the wrong register class.  Unfortunately,
40120b57cec5SDimitry Andric   // foldMemoryOperandImpl could have commuted the instruction so its not enough
40130b57cec5SDimitry Andric   // to just look at OpNo + the offset to the index reg.  We actually need to
40140b57cec5SDimitry Andric   // scan the instruction to find the index reg and see if its the correct reg
40150b57cec5SDimitry Andric   // class.
40160b57cec5SDimitry Andric   unsigned OperandNo = 0;
40170b57cec5SDimitry Andric   for (MachineInstr::mop_iterator I = Result->operands_begin(),
40180b57cec5SDimitry Andric        E = Result->operands_end(); I != E; ++I, ++OperandNo) {
40190b57cec5SDimitry Andric     MachineOperand &MO = *I;
40200b57cec5SDimitry Andric     if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
40210b57cec5SDimitry Andric       continue;
40220b57cec5SDimitry Andric     // Found the index reg, now try to rewrite it.
40235ffd83dbSDimitry Andric     Register IndexReg = constrainOperandRegClass(Result->getDesc(),
40240b57cec5SDimitry Andric                                                  MO.getReg(), OperandNo);
40250b57cec5SDimitry Andric     if (IndexReg == MO.getReg())
40260b57cec5SDimitry Andric       continue;
40270b57cec5SDimitry Andric     MO.setReg(IndexReg);
40280b57cec5SDimitry Andric   }
40290b57cec5SDimitry Andric 
40300b57cec5SDimitry Andric   Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
40310b57cec5SDimitry Andric   Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
40320b57cec5SDimitry Andric   MachineBasicBlock::iterator I(MI);
40330b57cec5SDimitry Andric   removeDeadCode(I, std::next(I));
40340b57cec5SDimitry Andric   return true;
40350b57cec5SDimitry Andric }
40360b57cec5SDimitry Andric 
40370b57cec5SDimitry Andric unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
40380b57cec5SDimitry Andric                                         const TargetRegisterClass *RC,
4039fe6060f1SDimitry Andric                                         unsigned Op0, unsigned Op1,
4040fe6060f1SDimitry Andric                                         unsigned Op2, unsigned Op3) {
40410b57cec5SDimitry Andric   const MCInstrDesc &II = TII.get(MachineInstOpcode);
40420b57cec5SDimitry Andric 
40435ffd83dbSDimitry Andric   Register ResultReg = createResultReg(RC);
40440b57cec5SDimitry Andric   Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
40450b57cec5SDimitry Andric   Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
40460b57cec5SDimitry Andric   Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
40470b57cec5SDimitry Andric   Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
40480b57cec5SDimitry Andric 
40490b57cec5SDimitry Andric   if (II.getNumDefs() >= 1)
4050bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
4051fe6060f1SDimitry Andric         .addReg(Op0)
4052fe6060f1SDimitry Andric         .addReg(Op1)
4053fe6060f1SDimitry Andric         .addReg(Op2)
4054fe6060f1SDimitry Andric         .addReg(Op3);
40550b57cec5SDimitry Andric   else {
4056bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
4057fe6060f1SDimitry Andric         .addReg(Op0)
4058fe6060f1SDimitry Andric         .addReg(Op1)
4059fe6060f1SDimitry Andric         .addReg(Op2)
4060fe6060f1SDimitry Andric         .addReg(Op3);
4061bdd1243dSDimitry Andric     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
4062bdd1243dSDimitry Andric             ResultReg)
4063bdd1243dSDimitry Andric         .addReg(II.implicit_defs()[0]);
40640b57cec5SDimitry Andric   }
40650b57cec5SDimitry Andric   return ResultReg;
40660b57cec5SDimitry Andric }
40670b57cec5SDimitry Andric 
40680b57cec5SDimitry Andric 
40690b57cec5SDimitry Andric namespace llvm {
40700b57cec5SDimitry Andric   FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
40710b57cec5SDimitry Andric                                 const TargetLibraryInfo *libInfo) {
40720b57cec5SDimitry Andric     return new X86FastISel(funcInfo, libInfo);
40730b57cec5SDimitry Andric   }
40740b57cec5SDimitry Andric }
4075