xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp (revision bbb53d1a8cd37cbb31ec5ec7938a0f24f628c821)
1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // before the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUCombinerHelper.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Combiner.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/Target/TargetMachine.h"
29 
30 #define GET_GICOMBINER_DEPS
31 #include "AMDGPUGenPreLegalizeGICombiner.inc"
32 #undef GET_GICOMBINER_DEPS
33 
34 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35 
36 using namespace llvm;
37 using namespace MIPatternMatch;
38 namespace {
39 
40 #define GET_GICOMBINER_TYPES
41 #include "AMDGPUGenPreLegalizeGICombiner.inc"
42 #undef GET_GICOMBINER_TYPES
43 
44 class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45 protected:
46   const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47   const GCNSubtarget &STI;
48   const AMDGPUCombinerHelper Helper;
49 
50 public:
51   AMDGPUPreLegalizerCombinerImpl(
52       MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
53       GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
54       const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
55       const GCNSubtarget &STI, MachineDominatorTree *MDT,
56       const LegalizerInfo *LI);
57 
58   static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
59 
60   bool tryCombineAllImpl(MachineInstr &MI) const;
61   bool tryCombineAll(MachineInstr &I) const override;
62 
63   struct ClampI64ToI16MatchInfo {
64     int64_t Cmp1 = 0;
65     int64_t Cmp2 = 0;
66     Register Origin;
67   };
68 
69   bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
70                           const MachineFunction &MF,
71                           ClampI64ToI16MatchInfo &MatchInfo) const;
72 
73   void applyClampI64ToI16(MachineInstr &MI,
74                           const ClampI64ToI16MatchInfo &MatchInfo) const;
75 
76 private:
77 #define GET_GICOMBINER_CLASS_MEMBERS
78 #define AMDGPUSubtarget GCNSubtarget
79 #include "AMDGPUGenPreLegalizeGICombiner.inc"
80 #undef GET_GICOMBINER_CLASS_MEMBERS
81 #undef AMDGPUSubtarget
82 };
83 
84 #define GET_GICOMBINER_IMPL
85 #define AMDGPUSubtarget GCNSubtarget
86 #include "AMDGPUGenPreLegalizeGICombiner.inc"
87 #undef AMDGPUSubtarget
88 #undef GET_GICOMBINER_IMPL
89 
90 AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
91     MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
92     GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
93     const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
94     const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
95     : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
96       Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI, STI),
97 #define GET_GICOMBINER_CONSTRUCTOR_INITS
98 #include "AMDGPUGenPreLegalizeGICombiner.inc"
99 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
100 {
101 }
102 
103 bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
104   if (tryCombineAllImpl(MI))
105     return true;
106 
107   switch (MI.getOpcode()) {
108   case TargetOpcode::G_SHUFFLE_VECTOR:
109     return Helper.tryCombineShuffleVector(MI);
110   }
111 
112   return false;
113 }
114 
115 bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
116     MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
117     ClampI64ToI16MatchInfo &MatchInfo) const {
118   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
119 
120   // Try to find a pattern where an i64 value should get clamped to short.
121   const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
122   if (SrcType != LLT::scalar(64))
123     return false;
124 
125   const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
126   if (DstType != LLT::scalar(16))
127     return false;
128 
129   Register Base;
130 
131   auto IsApplicableForCombine = [&MatchInfo]() -> bool {
132     const auto Cmp1 = MatchInfo.Cmp1;
133     const auto Cmp2 = MatchInfo.Cmp2;
134     const auto Diff = std::abs(Cmp2 - Cmp1);
135 
136     // If the difference between both comparison values is 0 or 1, there is no
137     // need to clamp.
138     if (Diff == 0 || Diff == 1)
139       return false;
140 
141     const int64_t Min = std::numeric_limits<int16_t>::min();
142     const int64_t Max = std::numeric_limits<int16_t>::max();
143 
144     // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
145     return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
146             (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
147   };
148 
149   // Try to match a combination of min / max MIR opcodes.
150   if (mi_match(MI.getOperand(1).getReg(), MRI,
151                m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
152     if (mi_match(Base, MRI,
153                  m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
154       return IsApplicableForCombine();
155     }
156   }
157 
158   if (mi_match(MI.getOperand(1).getReg(), MRI,
159                m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
160     if (mi_match(Base, MRI,
161                  m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
162       return IsApplicableForCombine();
163     }
164   }
165 
166   return false;
167 }
168 
169 // We want to find a combination of instructions that
170 // gets generated when an i64 gets clamped to i16.
171 // The corresponding pattern is:
172 // G_MAX / G_MAX for i16 <= G_TRUNC i64.
173 // This can be efficiently written as following:
174 // v_cvt_pk_i16_i32 v0, v0, v1
175 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
176 void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
177     MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
178 
179   Register Src = MatchInfo.Origin;
180   assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
181          LLT::scalar(64));
182   const LLT S32 = LLT::scalar(32);
183 
184   auto Unmerge = B.buildUnmerge(S32, Src);
185 
186   assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
187 
188   const LLT V2S16 = LLT::fixed_vector(2, 16);
189   auto CvtPk =
190       B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
191                    {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
192 
193   auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
194   auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
195   auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
196   auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
197 
198   auto Bitcast = B.buildBitcast({S32}, CvtPk);
199 
200   auto Med3 = B.buildInstr(
201       AMDGPU::G_AMDGPU_SMED3, {S32},
202       {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
203       MI.getFlags());
204 
205   B.buildTrunc(MI.getOperand(0).getReg(), Med3);
206 
207   MI.eraseFromParent();
208 }
209 
210 // Pass boilerplate
211 // ================
212 
213 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
214 public:
215   static char ID;
216 
217   AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
218 
219   StringRef getPassName() const override {
220     return "AMDGPUPreLegalizerCombiner";
221   }
222 
223   bool runOnMachineFunction(MachineFunction &MF) override;
224 
225   void getAnalysisUsage(AnalysisUsage &AU) const override;
226 
227 private:
228   bool IsOptNone;
229   AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
230 };
231 } // end anonymous namespace
232 
233 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
234   AU.addRequired<TargetPassConfig>();
235   AU.setPreservesCFG();
236   getSelectionDAGFallbackAnalysisUsage(AU);
237   AU.addRequired<GISelKnownBitsAnalysis>();
238   AU.addPreserved<GISelKnownBitsAnalysis>();
239   if (!IsOptNone) {
240     AU.addRequired<MachineDominatorTreeWrapperPass>();
241     AU.addPreserved<MachineDominatorTreeWrapperPass>();
242   }
243 
244   AU.addRequired<GISelCSEAnalysisWrapperPass>();
245   AU.addPreserved<GISelCSEAnalysisWrapperPass>();
246   MachineFunctionPass::getAnalysisUsage(AU);
247 }
248 
249 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
250     : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
251   initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
252 
253   if (!RuleConfig.parseCommandLineOption())
254     report_fatal_error("Invalid rule identifier");
255 }
256 
257 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
258   if (MF.getProperties().hasProperty(
259           MachineFunctionProperties::Property::FailedISel))
260     return false;
261   auto *TPC = &getAnalysis<TargetPassConfig>();
262   const Function &F = MF.getFunction();
263   bool EnableOpt =
264       MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
265   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
266 
267   // Enable CSE.
268   GISelCSEAnalysisWrapper &Wrapper =
269       getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
270   auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
271 
272   const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
273   MachineDominatorTree *MDT =
274       IsOptNone ? nullptr
275                 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
276   CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
277                      nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
278   // Disable fixed-point iteration to reduce compile-time
279   CInfo.MaxIterations = 1;
280   CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
281   // This is the first Combiner, so the input IR might contain dead
282   // instructions.
283   CInfo.EnableFullDCE = true;
284   AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
285                                       STI, MDT, STI.getLegalizerInfo());
286   return Impl.combineMachineInstrs();
287 }
288 
289 char AMDGPUPreLegalizerCombiner::ID = 0;
290 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
291                       "Combine AMDGPU machine instrs before legalization",
292                       false, false)
293 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
294 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
295 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
296                     "Combine AMDGPU machine instrs before legalization", false,
297                     false)
298 
299 namespace llvm {
300 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) {
301   return new AMDGPUPreLegalizerCombiner(IsOptNone);
302 }
303 } // end namespace llvm
304