15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level, 105ffd83dbSDimitry Andric // before the legalizer. 115ffd83dbSDimitry Andric // 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 14e8d8bef9SDimitry Andric #include "AMDGPU.h" 15349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h" 16fe6060f1SDimitry Andric #include "AMDGPULegalizerInfo.h" 17fe6060f1SDimitry Andric #include "GCNSubtarget.h" 18fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 1981ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/CSEInfo.h" 205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 215ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 2306c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 245ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 255ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 265ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 275ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 28e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 295ffd83dbSDimitry Andric 3006c3fb27SDimitry Andric #define GET_GICOMBINER_DEPS 3106c3fb27SDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 3206c3fb27SDimitry Andric #undef GET_GICOMBINER_DEPS 3306c3fb27SDimitry Andric 345ffd83dbSDimitry Andric #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" 355ffd83dbSDimitry Andric 365ffd83dbSDimitry Andric using namespace llvm; 375ffd83dbSDimitry Andric using namespace MIPatternMatch; 3806c3fb27SDimitry Andric namespace { 395ffd83dbSDimitry Andric 4006c3fb27SDimitry Andric #define GET_GICOMBINER_TYPES 4106c3fb27SDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 4206c3fb27SDimitry Andric #undef GET_GICOMBINER_TYPES 4306c3fb27SDimitry Andric 445f757f3fSDimitry Andric class AMDGPUPreLegalizerCombinerImpl : public Combiner { 45fe6060f1SDimitry Andric protected: 4606c3fb27SDimitry Andric const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig; 4706c3fb27SDimitry Andric const GCNSubtarget &STI; 485f757f3fSDimitry Andric // TODO: Make CombinerHelper methods const. 495f757f3fSDimitry Andric mutable AMDGPUCombinerHelper Helper; 50fe6060f1SDimitry Andric 51fe6060f1SDimitry Andric public: 5206c3fb27SDimitry Andric AMDGPUPreLegalizerCombinerImpl( 535f757f3fSDimitry Andric MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 545f757f3fSDimitry Andric GISelKnownBits &KB, GISelCSEInfo *CSEInfo, 5506c3fb27SDimitry Andric const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, 565f757f3fSDimitry Andric const GCNSubtarget &STI, MachineDominatorTree *MDT, 575f757f3fSDimitry Andric const LegalizerInfo *LI); 5806c3fb27SDimitry Andric 5906c3fb27SDimitry Andric static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; } 6006c3fb27SDimitry Andric 615f757f3fSDimitry Andric bool tryCombineAllImpl(MachineInstr &MI) const; 625f757f3fSDimitry Andric bool tryCombineAll(MachineInstr &I) const override; 63fe6060f1SDimitry Andric 64fe6060f1SDimitry Andric struct ClampI64ToI16MatchInfo { 65fe6060f1SDimitry Andric int64_t Cmp1 = 0; 66fe6060f1SDimitry Andric int64_t Cmp2 = 0; 67fe6060f1SDimitry Andric Register Origin; 68fe6060f1SDimitry Andric }; 69fe6060f1SDimitry Andric 7006c3fb27SDimitry Andric bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI, 7106c3fb27SDimitry Andric const MachineFunction &MF, 7206c3fb27SDimitry Andric ClampI64ToI16MatchInfo &MatchInfo) const; 73fe6060f1SDimitry Andric 74fe6060f1SDimitry Andric void applyClampI64ToI16(MachineInstr &MI, 7506c3fb27SDimitry Andric const ClampI64ToI16MatchInfo &MatchInfo) const; 7606c3fb27SDimitry Andric 7706c3fb27SDimitry Andric private: 7806c3fb27SDimitry Andric #define GET_GICOMBINER_CLASS_MEMBERS 7906c3fb27SDimitry Andric #define AMDGPUSubtarget GCNSubtarget 8006c3fb27SDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 8106c3fb27SDimitry Andric #undef GET_GICOMBINER_CLASS_MEMBERS 8206c3fb27SDimitry Andric #undef AMDGPUSubtarget 83fe6060f1SDimitry Andric }; 84fe6060f1SDimitry Andric 8506c3fb27SDimitry Andric #define GET_GICOMBINER_IMPL 8606c3fb27SDimitry Andric #define AMDGPUSubtarget GCNSubtarget 8706c3fb27SDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 8806c3fb27SDimitry Andric #undef AMDGPUSubtarget 8906c3fb27SDimitry Andric #undef GET_GICOMBINER_IMPL 9006c3fb27SDimitry Andric 9106c3fb27SDimitry Andric AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl( 925f757f3fSDimitry Andric MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 935f757f3fSDimitry Andric GISelKnownBits &KB, GISelCSEInfo *CSEInfo, 9406c3fb27SDimitry Andric const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, 955f757f3fSDimitry Andric const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI) 965f757f3fSDimitry Andric : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI), 975f757f3fSDimitry Andric Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI), 9806c3fb27SDimitry Andric #define GET_GICOMBINER_CONSTRUCTOR_INITS 9906c3fb27SDimitry Andric #include "AMDGPUGenPreLegalizeGICombiner.inc" 10006c3fb27SDimitry Andric #undef GET_GICOMBINER_CONSTRUCTOR_INITS 10106c3fb27SDimitry Andric { 10206c3fb27SDimitry Andric } 10306c3fb27SDimitry Andric 1045f757f3fSDimitry Andric bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { 1055f757f3fSDimitry Andric if (tryCombineAllImpl(MI)) 1065f757f3fSDimitry Andric return true; 1075f757f3fSDimitry Andric 1085f757f3fSDimitry Andric switch (MI.getOpcode()) { 1095f757f3fSDimitry Andric case TargetOpcode::G_SHUFFLE_VECTOR: 1105f757f3fSDimitry Andric return Helper.tryCombineShuffleVector(MI); 1115f757f3fSDimitry Andric } 1125f757f3fSDimitry Andric 1135f757f3fSDimitry Andric return false; 1145f757f3fSDimitry Andric } 1155f757f3fSDimitry Andric 11606c3fb27SDimitry Andric bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16( 11706c3fb27SDimitry Andric MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF, 11806c3fb27SDimitry Andric ClampI64ToI16MatchInfo &MatchInfo) const { 119fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 120fe6060f1SDimitry Andric 121fe6060f1SDimitry Andric // Try to find a pattern where an i64 value should get clamped to short. 122fe6060f1SDimitry Andric const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 123fe6060f1SDimitry Andric if (SrcType != LLT::scalar(64)) 124fe6060f1SDimitry Andric return false; 125fe6060f1SDimitry Andric 126fe6060f1SDimitry Andric const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 127fe6060f1SDimitry Andric if (DstType != LLT::scalar(16)) 128fe6060f1SDimitry Andric return false; 129fe6060f1SDimitry Andric 130fe6060f1SDimitry Andric Register Base; 131fe6060f1SDimitry Andric 132fe6060f1SDimitry Andric auto IsApplicableForCombine = [&MatchInfo]() -> bool { 133fe6060f1SDimitry Andric const auto Cmp1 = MatchInfo.Cmp1; 134fe6060f1SDimitry Andric const auto Cmp2 = MatchInfo.Cmp2; 135fe6060f1SDimitry Andric const auto Diff = std::abs(Cmp2 - Cmp1); 136fe6060f1SDimitry Andric 137fe6060f1SDimitry Andric // If the difference between both comparison values is 0 or 1, there is no 138fe6060f1SDimitry Andric // need to clamp. 139fe6060f1SDimitry Andric if (Diff == 0 || Diff == 1) 140fe6060f1SDimitry Andric return false; 141fe6060f1SDimitry Andric 142fe6060f1SDimitry Andric const int64_t Min = std::numeric_limits<int16_t>::min(); 143fe6060f1SDimitry Andric const int64_t Max = std::numeric_limits<int16_t>::max(); 144fe6060f1SDimitry Andric 145fe6060f1SDimitry Andric // Check if the comparison values are between SHORT_MIN and SHORT_MAX. 146fe6060f1SDimitry Andric return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 147fe6060f1SDimitry Andric (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 148fe6060f1SDimitry Andric }; 149fe6060f1SDimitry Andric 150fe6060f1SDimitry Andric // Try to match a combination of min / max MIR opcodes. 151fe6060f1SDimitry Andric if (mi_match(MI.getOperand(1).getReg(), MRI, 152fe6060f1SDimitry Andric m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 153fe6060f1SDimitry Andric if (mi_match(Base, MRI, 154fe6060f1SDimitry Andric m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 155fe6060f1SDimitry Andric return IsApplicableForCombine(); 156fe6060f1SDimitry Andric } 157fe6060f1SDimitry Andric } 158fe6060f1SDimitry Andric 159fe6060f1SDimitry Andric if (mi_match(MI.getOperand(1).getReg(), MRI, 160fe6060f1SDimitry Andric m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 161fe6060f1SDimitry Andric if (mi_match(Base, MRI, 162fe6060f1SDimitry Andric m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 163fe6060f1SDimitry Andric return IsApplicableForCombine(); 164fe6060f1SDimitry Andric } 165fe6060f1SDimitry Andric } 166fe6060f1SDimitry Andric 167fe6060f1SDimitry Andric return false; 168fe6060f1SDimitry Andric } 169fe6060f1SDimitry Andric 170fe6060f1SDimitry Andric // We want to find a combination of instructions that 171fe6060f1SDimitry Andric // gets generated when an i64 gets clamped to i16. 172fe6060f1SDimitry Andric // The corresponding pattern is: 173fe6060f1SDimitry Andric // G_MAX / G_MAX for i16 <= G_TRUNC i64. 174fe6060f1SDimitry Andric // This can be efficiently written as following: 175fe6060f1SDimitry Andric // v_cvt_pk_i16_i32 v0, v0, v1 176fe6060f1SDimitry Andric // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 17706c3fb27SDimitry Andric void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16( 17806c3fb27SDimitry Andric MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const { 179fe6060f1SDimitry Andric 180fe6060f1SDimitry Andric Register Src = MatchInfo.Origin; 181fe6060f1SDimitry Andric assert(MI.getParent()->getParent()->getRegInfo().getType(Src) == 182fe6060f1SDimitry Andric LLT::scalar(64)); 183fe6060f1SDimitry Andric const LLT S32 = LLT::scalar(32); 184fe6060f1SDimitry Andric 185fe6060f1SDimitry Andric auto Unmerge = B.buildUnmerge(S32, Src); 186fe6060f1SDimitry Andric 187fe6060f1SDimitry Andric assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); 188fe6060f1SDimitry Andric 189fe6060f1SDimitry Andric const LLT V2S16 = LLT::fixed_vector(2, 16); 190fe6060f1SDimitry Andric auto CvtPk = 191fe6060f1SDimitry Andric B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16}, 192fe6060f1SDimitry Andric {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags()); 193fe6060f1SDimitry Andric 194fe6060f1SDimitry Andric auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 195fe6060f1SDimitry Andric auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 196fe6060f1SDimitry Andric auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); 197fe6060f1SDimitry Andric auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); 198fe6060f1SDimitry Andric 199fe6060f1SDimitry Andric auto Bitcast = B.buildBitcast({S32}, CvtPk); 200fe6060f1SDimitry Andric 201fe6060f1SDimitry Andric auto Med3 = B.buildInstr( 202fe6060f1SDimitry Andric AMDGPU::G_AMDGPU_SMED3, {S32}, 203fe6060f1SDimitry Andric {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)}, 204fe6060f1SDimitry Andric MI.getFlags()); 205fe6060f1SDimitry Andric 206fe6060f1SDimitry Andric B.buildTrunc(MI.getOperand(0).getReg(), Med3); 207fe6060f1SDimitry Andric 208fe6060f1SDimitry Andric MI.eraseFromParent(); 209fe6060f1SDimitry Andric } 210fe6060f1SDimitry Andric 2115ffd83dbSDimitry Andric // Pass boilerplate 2125ffd83dbSDimitry Andric // ================ 2135ffd83dbSDimitry Andric 2145ffd83dbSDimitry Andric class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { 2155ffd83dbSDimitry Andric public: 2165ffd83dbSDimitry Andric static char ID; 2175ffd83dbSDimitry Andric 2185ffd83dbSDimitry Andric AMDGPUPreLegalizerCombiner(bool IsOptNone = false); 2195ffd83dbSDimitry Andric 2205ffd83dbSDimitry Andric StringRef getPassName() const override { 2215ffd83dbSDimitry Andric return "AMDGPUPreLegalizerCombiner"; 2225ffd83dbSDimitry Andric } 2235ffd83dbSDimitry Andric 2245ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 2255ffd83dbSDimitry Andric 2265ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 22706c3fb27SDimitry Andric 2285ffd83dbSDimitry Andric private: 2295ffd83dbSDimitry Andric bool IsOptNone; 2305f757f3fSDimitry Andric AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig; 2315ffd83dbSDimitry Andric }; 2325ffd83dbSDimitry Andric } // end anonymous namespace 2335ffd83dbSDimitry Andric 2345ffd83dbSDimitry Andric void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 2355ffd83dbSDimitry Andric AU.addRequired<TargetPassConfig>(); 2365ffd83dbSDimitry Andric AU.setPreservesCFG(); 2375ffd83dbSDimitry Andric getSelectionDAGFallbackAnalysisUsage(AU); 2385ffd83dbSDimitry Andric AU.addRequired<GISelKnownBitsAnalysis>(); 2395ffd83dbSDimitry Andric AU.addPreserved<GISelKnownBitsAnalysis>(); 2405ffd83dbSDimitry Andric if (!IsOptNone) { 241*0fca6ea1SDimitry Andric AU.addRequired<MachineDominatorTreeWrapperPass>(); 242*0fca6ea1SDimitry Andric AU.addPreserved<MachineDominatorTreeWrapperPass>(); 2435ffd83dbSDimitry Andric } 244fe6060f1SDimitry Andric 245fe6060f1SDimitry Andric AU.addRequired<GISelCSEAnalysisWrapperPass>(); 246fe6060f1SDimitry Andric AU.addPreserved<GISelCSEAnalysisWrapperPass>(); 2475ffd83dbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 2485ffd83dbSDimitry Andric } 2495ffd83dbSDimitry Andric 2505ffd83dbSDimitry Andric AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) 2515ffd83dbSDimitry Andric : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 2525ffd83dbSDimitry Andric initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 2535f757f3fSDimitry Andric 2545f757f3fSDimitry Andric if (!RuleConfig.parseCommandLineOption()) 2555f757f3fSDimitry Andric report_fatal_error("Invalid rule identifier"); 2565ffd83dbSDimitry Andric } 2575ffd83dbSDimitry Andric 2585ffd83dbSDimitry Andric bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 2595ffd83dbSDimitry Andric if (MF.getProperties().hasProperty( 2605ffd83dbSDimitry Andric MachineFunctionProperties::Property::FailedISel)) 2615ffd83dbSDimitry Andric return false; 2625ffd83dbSDimitry Andric auto *TPC = &getAnalysis<TargetPassConfig>(); 2635ffd83dbSDimitry Andric const Function &F = MF.getFunction(); 2645ffd83dbSDimitry Andric bool EnableOpt = 2655f757f3fSDimitry Andric MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F); 2665ffd83dbSDimitry Andric GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 2675f757f3fSDimitry Andric 268fe6060f1SDimitry Andric // Enable CSE. 269fe6060f1SDimitry Andric GISelCSEAnalysisWrapper &Wrapper = 270fe6060f1SDimitry Andric getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); 271fe6060f1SDimitry Andric auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); 272fe6060f1SDimitry Andric 2735f757f3fSDimitry Andric const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>(); 2745f757f3fSDimitry Andric MachineDominatorTree *MDT = 275*0fca6ea1SDimitry Andric IsOptNone ? nullptr 276*0fca6ea1SDimitry Andric : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 2775f757f3fSDimitry Andric CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 2785f757f3fSDimitry Andric nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize()); 2795f757f3fSDimitry Andric AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig, 2805f757f3fSDimitry Andric STI, MDT, STI.getLegalizerInfo()); 2815f757f3fSDimitry Andric return Impl.combineMachineInstrs(); 2825ffd83dbSDimitry Andric } 2835ffd83dbSDimitry Andric 2845ffd83dbSDimitry Andric char AMDGPUPreLegalizerCombiner::ID = 0; 2855ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 2865ffd83dbSDimitry Andric "Combine AMDGPU machine instrs before legalization", 2875ffd83dbSDimitry Andric false, false) 2885ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 2895ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 2905ffd83dbSDimitry Andric INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 2915ffd83dbSDimitry Andric "Combine AMDGPU machine instrs before legalization", false, 2925ffd83dbSDimitry Andric false) 2935ffd83dbSDimitry Andric 2945ffd83dbSDimitry Andric namespace llvm { 2955ffd83dbSDimitry Andric FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { 2965ffd83dbSDimitry Andric return new AMDGPUPreLegalizerCombiner(IsOptNone); 2975ffd83dbSDimitry Andric } 2985ffd83dbSDimitry Andric } // end namespace llvm 299