15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level, 105ffd83dbSDimitry Andric // before the legalizer. 115ffd83dbSDimitry Andric // 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 14e8d8bef9SDimitry Andric #include "AMDGPU.h" 15349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h" 16fe6060f1SDimitry Andric #include "AMDGPULegalizerInfo.h" 17fe6060f1SDimitry Andric #include "GCNSubtarget.h" 18fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 1981ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/CSEInfo.h" 205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 235ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 245ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 255ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 265ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 27e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 285ffd83dbSDimitry Andric 295ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" 305ffd83dbSDimitry Andric 315ffd83dbSDimitry Andric using namespace llvm; 325ffd83dbSDimitry Andric using namespace MIPatternMatch; 335ffd83dbSDimitry Andric 34fe6060f1SDimitry Andric class AMDGPUPreLegalizerCombinerHelper { 35fe6060f1SDimitry Andric protected: 36fe6060f1SDimitry Andric MachineIRBuilder &B; 37fe6060f1SDimitry Andric MachineFunction &MF; 38fe6060f1SDimitry Andric MachineRegisterInfo &MRI; 39349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper; 40fe6060f1SDimitry Andric 41fe6060f1SDimitry Andric public: 42349cc55cSDimitry Andric AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, 43349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper) 44fe6060f1SDimitry Andric : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 45fe6060f1SDimitry Andric 46fe6060f1SDimitry Andric struct ClampI64ToI16MatchInfo { 47fe6060f1SDimitry Andric int64_t Cmp1 = 0; 48fe6060f1SDimitry Andric int64_t Cmp2 = 0; 49fe6060f1SDimitry Andric Register Origin; 50fe6060f1SDimitry Andric }; 51fe6060f1SDimitry Andric 52fe6060f1SDimitry Andric bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, 53fe6060f1SDimitry Andric MachineFunction &MF, 54fe6060f1SDimitry Andric ClampI64ToI16MatchInfo &MatchInfo); 55fe6060f1SDimitry Andric 56fe6060f1SDimitry Andric void applyClampI64ToI16(MachineInstr &MI, 57fe6060f1SDimitry Andric const ClampI64ToI16MatchInfo &MatchInfo); 58fe6060f1SDimitry Andric }; 59fe6060f1SDimitry Andric 60fe6060f1SDimitry Andric bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16( 61fe6060f1SDimitry Andric MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, 62fe6060f1SDimitry Andric ClampI64ToI16MatchInfo &MatchInfo) { 63fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 64fe6060f1SDimitry Andric 65fe6060f1SDimitry Andric // Try to find a pattern where an i64 value should get clamped to short. 66fe6060f1SDimitry Andric const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 67fe6060f1SDimitry Andric if (SrcType != LLT::scalar(64)) 68fe6060f1SDimitry Andric return false; 69fe6060f1SDimitry Andric 70fe6060f1SDimitry Andric const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 71fe6060f1SDimitry Andric if (DstType != LLT::scalar(16)) 72fe6060f1SDimitry Andric return false; 73fe6060f1SDimitry Andric 74fe6060f1SDimitry Andric Register Base; 75fe6060f1SDimitry Andric 76fe6060f1SDimitry Andric auto IsApplicableForCombine = [&MatchInfo]() -> bool { 77fe6060f1SDimitry Andric const auto Cmp1 = MatchInfo.Cmp1; 78fe6060f1SDimitry Andric const auto Cmp2 = MatchInfo.Cmp2; 79fe6060f1SDimitry Andric const auto Diff = std::abs(Cmp2 - Cmp1); 80fe6060f1SDimitry Andric 81fe6060f1SDimitry Andric // If the difference between both comparison values is 0 or 1, there is no 82fe6060f1SDimitry Andric // need to clamp. 83fe6060f1SDimitry Andric if (Diff == 0 || Diff == 1) 84fe6060f1SDimitry Andric return false; 85fe6060f1SDimitry Andric 86fe6060f1SDimitry Andric const int64_t Min = std::numeric_limits<int16_t>::min(); 87fe6060f1SDimitry Andric const int64_t Max = std::numeric_limits<int16_t>::max(); 88fe6060f1SDimitry Andric 89fe6060f1SDimitry Andric // Check if the comparison values are between SHORT_MIN and SHORT_MAX. 90fe6060f1SDimitry Andric return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 91fe6060f1SDimitry Andric (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 92fe6060f1SDimitry Andric }; 93fe6060f1SDimitry Andric 94fe6060f1SDimitry Andric // Try to match a combination of min / max MIR opcodes. 95fe6060f1SDimitry Andric if (mi_match(MI.getOperand(1).getReg(), MRI, 96fe6060f1SDimitry Andric m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 97fe6060f1SDimitry Andric if (mi_match(Base, MRI, 98fe6060f1SDimitry Andric m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 99fe6060f1SDimitry Andric return IsApplicableForCombine(); 100fe6060f1SDimitry Andric } 101fe6060f1SDimitry Andric } 102fe6060f1SDimitry Andric 103fe6060f1SDimitry Andric if (mi_match(MI.getOperand(1).getReg(), MRI, 104fe6060f1SDimitry Andric m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 105fe6060f1SDimitry Andric if (mi_match(Base, MRI, 106fe6060f1SDimitry Andric m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 107fe6060f1SDimitry Andric return IsApplicableForCombine(); 108fe6060f1SDimitry Andric } 109fe6060f1SDimitry Andric } 110fe6060f1SDimitry Andric 111fe6060f1SDimitry Andric return false; 112fe6060f1SDimitry Andric } 113fe6060f1SDimitry Andric 114fe6060f1SDimitry Andric // We want to find a combination of instructions that 115fe6060f1SDimitry Andric // gets generated when an i64 gets clamped to i16. 116fe6060f1SDimitry Andric // The corresponding pattern is: 117fe6060f1SDimitry Andric // G_MAX / G_MAX for i16 <= G_TRUNC i64. 118fe6060f1SDimitry Andric // This can be efficiently written as following: 119fe6060f1SDimitry Andric // v_cvt_pk_i16_i32 v0, v0, v1 120fe6060f1SDimitry Andric // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 121fe6060f1SDimitry Andric void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16( 122fe6060f1SDimitry Andric MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) { 123fe6060f1SDimitry Andric 124fe6060f1SDimitry Andric Register Src = MatchInfo.Origin; 125fe6060f1SDimitry Andric assert(MI.getParent()->getParent()->getRegInfo().getType(Src) == 126fe6060f1SDimitry Andric LLT::scalar(64)); 127fe6060f1SDimitry Andric const LLT S32 = LLT::scalar(32); 128fe6060f1SDimitry Andric 129fe6060f1SDimitry Andric B.setInstrAndDebugLoc(MI); 130fe6060f1SDimitry Andric 131fe6060f1SDimitry Andric auto Unmerge = B.buildUnmerge(S32, Src); 132fe6060f1SDimitry Andric 133fe6060f1SDimitry Andric assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); 134fe6060f1SDimitry Andric 135fe6060f1SDimitry Andric const LLT V2S16 = LLT::fixed_vector(2, 16); 136fe6060f1SDimitry Andric auto CvtPk = 137fe6060f1SDimitry Andric B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16}, 138fe6060f1SDimitry Andric {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags()); 139fe6060f1SDimitry Andric 140fe6060f1SDimitry Andric auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 141fe6060f1SDimitry Andric auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 142fe6060f1SDimitry Andric auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); 143fe6060f1SDimitry Andric auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); 144fe6060f1SDimitry Andric 145fe6060f1SDimitry Andric auto Bitcast = B.buildBitcast({S32}, CvtPk); 146fe6060f1SDimitry Andric 147fe6060f1SDimitry Andric auto Med3 = B.buildInstr( 148fe6060f1SDimitry Andric AMDGPU::G_AMDGPU_SMED3, {S32}, 149fe6060f1SDimitry Andric {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)}, 150fe6060f1SDimitry Andric MI.getFlags()); 151fe6060f1SDimitry Andric 152fe6060f1SDimitry Andric B.buildTrunc(MI.getOperand(0).getReg(), Med3); 153fe6060f1SDimitry Andric 154fe6060f1SDimitry Andric MI.eraseFromParent(); 155fe6060f1SDimitry Andric } 156fe6060f1SDimitry Andric 157fe6060f1SDimitry Andric class AMDGPUPreLegalizerCombinerHelperState { 158fe6060f1SDimitry Andric protected: 159349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper; 160fe6060f1SDimitry Andric AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper; 161fe6060f1SDimitry Andric 162fe6060f1SDimitry Andric public: 163fe6060f1SDimitry Andric AMDGPUPreLegalizerCombinerHelperState( 164349cc55cSDimitry Andric AMDGPUCombinerHelper &Helper, 165fe6060f1SDimitry Andric AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper) 166fe6060f1SDimitry Andric : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {} 167fe6060f1SDimitry Andric }; 168fe6060f1SDimitry Andric 1695ffd83dbSDimitry Andric #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 1705ffd83dbSDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 1715ffd83dbSDimitry Andric #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 1725ffd83dbSDimitry Andric 1735ffd83dbSDimitry Andric namespace { 1745ffd83dbSDimitry Andric #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 1755ffd83dbSDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 1765ffd83dbSDimitry Andric #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 1775ffd83dbSDimitry Andric 178e8d8bef9SDimitry Andric class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo { 1795ffd83dbSDimitry Andric GISelKnownBits *KB; 1805ffd83dbSDimitry Andric MachineDominatorTree *MDT; 1815ffd83dbSDimitry Andric 1825ffd83dbSDimitry Andric public: 1835ffd83dbSDimitry Andric AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 1845ffd83dbSDimitry Andric 1855ffd83dbSDimitry Andric AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 1865ffd83dbSDimitry Andric GISelKnownBits *KB, MachineDominatorTree *MDT) 1875ffd83dbSDimitry Andric : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 1885ffd83dbSDimitry Andric /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), 1895ffd83dbSDimitry Andric KB(KB), MDT(MDT) { 1905ffd83dbSDimitry Andric if (!GeneratedRuleCfg.parseCommandLineOption()) 1915ffd83dbSDimitry Andric report_fatal_error("Invalid rule identifier"); 1925ffd83dbSDimitry Andric } 1935ffd83dbSDimitry Andric 194*972a253aSDimitry Andric bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 1955ffd83dbSDimitry Andric MachineIRBuilder &B) const override; 1965ffd83dbSDimitry Andric }; 1975ffd83dbSDimitry Andric 1985ffd83dbSDimitry Andric bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 1995ffd83dbSDimitry Andric MachineInstr &MI, 2005ffd83dbSDimitry Andric MachineIRBuilder &B) const { 201349cc55cSDimitry Andric AMDGPUCombinerHelper Helper(Observer, B, KB, MDT); 202fe6060f1SDimitry Andric AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper); 203fe6060f1SDimitry Andric AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, 204fe6060f1SDimitry Andric PreLegalizerHelper); 2055ffd83dbSDimitry Andric 206349cc55cSDimitry Andric if (Generated.tryCombineAll(Observer, MI, B)) 2075ffd83dbSDimitry Andric return true; 2085ffd83dbSDimitry Andric 2095ffd83dbSDimitry Andric switch (MI.getOpcode()) { 2105ffd83dbSDimitry Andric case TargetOpcode::G_CONCAT_VECTORS: 2115ffd83dbSDimitry Andric return Helper.tryCombineConcatVectors(MI); 2125ffd83dbSDimitry Andric case TargetOpcode::G_SHUFFLE_VECTOR: 2135ffd83dbSDimitry Andric return Helper.tryCombineShuffleVector(MI); 2145ffd83dbSDimitry Andric } 2155ffd83dbSDimitry Andric 2165ffd83dbSDimitry Andric return false; 2175ffd83dbSDimitry Andric } 2185ffd83dbSDimitry Andric 2195ffd83dbSDimitry Andric #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 2205ffd83dbSDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 2215ffd83dbSDimitry Andric #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 2225ffd83dbSDimitry Andric 2235ffd83dbSDimitry Andric // Pass boilerplate 2245ffd83dbSDimitry Andric // ================ 2255ffd83dbSDimitry Andric 2265ffd83dbSDimitry Andric class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { 2275ffd83dbSDimitry Andric public: 2285ffd83dbSDimitry Andric static char ID; 2295ffd83dbSDimitry Andric 2305ffd83dbSDimitry Andric AMDGPUPreLegalizerCombiner(bool IsOptNone = false); 2315ffd83dbSDimitry Andric 2325ffd83dbSDimitry Andric StringRef getPassName() const override { 2335ffd83dbSDimitry Andric return "AMDGPUPreLegalizerCombiner"; 2345ffd83dbSDimitry Andric } 2355ffd83dbSDimitry Andric 2365ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 2375ffd83dbSDimitry Andric 2385ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 2395ffd83dbSDimitry Andric private: 2405ffd83dbSDimitry Andric bool IsOptNone; 2415ffd83dbSDimitry Andric }; 2425ffd83dbSDimitry Andric } // end anonymous namespace 2435ffd83dbSDimitry Andric 2445ffd83dbSDimitry Andric void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 2455ffd83dbSDimitry Andric AU.addRequired<TargetPassConfig>(); 2465ffd83dbSDimitry Andric AU.setPreservesCFG(); 2475ffd83dbSDimitry Andric getSelectionDAGFallbackAnalysisUsage(AU); 2485ffd83dbSDimitry Andric AU.addRequired<GISelKnownBitsAnalysis>(); 2495ffd83dbSDimitry Andric AU.addPreserved<GISelKnownBitsAnalysis>(); 2505ffd83dbSDimitry Andric if (!IsOptNone) { 2515ffd83dbSDimitry Andric AU.addRequired<MachineDominatorTree>(); 2525ffd83dbSDimitry Andric AU.addPreserved<MachineDominatorTree>(); 2535ffd83dbSDimitry Andric } 254fe6060f1SDimitry Andric 255fe6060f1SDimitry Andric AU.addRequired<GISelCSEAnalysisWrapperPass>(); 256fe6060f1SDimitry Andric AU.addPreserved<GISelCSEAnalysisWrapperPass>(); 2575ffd83dbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 2585ffd83dbSDimitry Andric } 2595ffd83dbSDimitry Andric 2605ffd83dbSDimitry Andric AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) 2615ffd83dbSDimitry Andric : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 2625ffd83dbSDimitry Andric initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 2635ffd83dbSDimitry Andric } 2645ffd83dbSDimitry Andric 2655ffd83dbSDimitry Andric bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 2665ffd83dbSDimitry Andric if (MF.getProperties().hasProperty( 2675ffd83dbSDimitry Andric MachineFunctionProperties::Property::FailedISel)) 2685ffd83dbSDimitry Andric return false; 2695ffd83dbSDimitry Andric auto *TPC = &getAnalysis<TargetPassConfig>(); 2705ffd83dbSDimitry Andric const Function &F = MF.getFunction(); 2715ffd83dbSDimitry Andric bool EnableOpt = 2725ffd83dbSDimitry Andric MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 2735ffd83dbSDimitry Andric GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 2745ffd83dbSDimitry Andric MachineDominatorTree *MDT = 2755ffd83dbSDimitry Andric IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 2765ffd83dbSDimitry Andric AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 2775ffd83dbSDimitry Andric F.hasMinSize(), KB, MDT); 278fe6060f1SDimitry Andric // Enable CSE. 279fe6060f1SDimitry Andric GISelCSEAnalysisWrapper &Wrapper = 280fe6060f1SDimitry Andric getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); 281fe6060f1SDimitry Andric auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); 282fe6060f1SDimitry Andric 2835ffd83dbSDimitry Andric Combiner C(PCInfo, TPC); 284fe6060f1SDimitry Andric return C.combineMachineInstrs(MF, CSEInfo); 2855ffd83dbSDimitry Andric } 2865ffd83dbSDimitry Andric 2875ffd83dbSDimitry Andric char AMDGPUPreLegalizerCombiner::ID = 0; 2885ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 2895ffd83dbSDimitry Andric "Combine AMDGPU machine instrs before legalization", 2905ffd83dbSDimitry Andric false, false) 2915ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 2925ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 2935ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 2945ffd83dbSDimitry Andric "Combine AMDGPU machine instrs before legalization", false, 2955ffd83dbSDimitry Andric false) 2965ffd83dbSDimitry Andric 2975ffd83dbSDimitry Andric namespace llvm { 2985ffd83dbSDimitry Andric FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { 2995ffd83dbSDimitry Andric return new AMDGPUPreLegalizerCombiner(IsOptNone); 3005ffd83dbSDimitry Andric } 3015ffd83dbSDimitry Andric } // end namespace llvm 302