1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after register banks are known.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/Combiner.h"
20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/TargetPassConfig.h"
26 #include "llvm/Target/TargetMachine.h"
27 #define DEBUG_TYPE "amdgpu-regbank-combiner"
28
29 using namespace llvm;
30 using namespace MIPatternMatch;
31
32 class AMDGPURegBankCombinerHelper {
33 protected:
34 MachineIRBuilder &B;
35 MachineFunction &MF;
36 MachineRegisterInfo &MRI;
37 const RegisterBankInfo &RBI;
38 const TargetRegisterInfo &TRI;
39 CombinerHelper &Helper;
40
41 public:
AMDGPURegBankCombinerHelper(MachineIRBuilder & B,CombinerHelper & Helper)42 AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
43 : B(B), MF(B.getMF()), MRI(*B.getMRI()),
44 RBI(*MF.getSubtarget().getRegBankInfo()),
45 TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
46
47 bool isVgprRegBank(Register Reg);
48
49 struct MinMaxMedOpc {
50 unsigned Min, Max, Med;
51 };
52
53 struct Med3MatchInfo {
54 unsigned Opc;
55 Register Val0, Val1, Val2;
56 };
57
58 MinMaxMedOpc getMinMaxPair(unsigned Opc);
59
60 template <class m_Cst>
61 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
62 Register &Val, Register &K0, Register &K1);
63
64 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
65 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
66 };
67
isVgprRegBank(Register Reg)68 bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
69 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
70 }
71
72 AMDGPURegBankCombinerHelper::MinMaxMedOpc
getMinMaxPair(unsigned Opc)73 AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
74 switch (Opc) {
75 default:
76 llvm_unreachable("Unsupported opcode");
77 case AMDGPU::G_SMAX:
78 case AMDGPU::G_SMIN:
79 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
80 case AMDGPU::G_UMAX:
81 case AMDGPU::G_UMIN:
82 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
83 }
84 }
85
86 template <class m_Cst>
matchMed(MachineInstr & MI,MachineRegisterInfo & MRI,MinMaxMedOpc MMMOpc,Register & Val,Register & K0,Register & K1)87 bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
88 MachineRegisterInfo &MRI,
89 MinMaxMedOpc MMMOpc, Register &Val,
90 Register &K0, Register &K1) {
91 // 4 operand commutes of: min(max(Val, K0), K1).
92 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
93 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
94 // 4 operand commutes of: max(min(Val, K1), K0).
95 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
96 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
97 return mi_match(
98 MI, MRI,
99 m_any_of(
100 m_CommutativeBinOp(
101 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
102 m_Cst(K1)),
103 m_CommutativeBinOp(
104 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
105 m_Cst(K0))));
106 }
107
matchIntMinMaxToMed3(MachineInstr & MI,Med3MatchInfo & MatchInfo)108 bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
109 MachineInstr &MI, Med3MatchInfo &MatchInfo) {
110 Register Dst = MI.getOperand(0).getReg();
111 if (!isVgprRegBank(Dst))
112 return false;
113
114 if (MRI.getType(Dst).isVector())
115 return false;
116
117 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
118 Register Val, K0, K1;
119 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
120 if (!matchMed<ICstRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
121 return false;
122
123 const APInt &K0_Imm = getConstantIntVRegVal(K0, MRI)->getValue();
124 const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue();
125 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0_Imm.sgt(K1_Imm))
126 return false;
127 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0_Imm.ugt(K1_Imm))
128 return false;
129
130 MatchInfo = {OpcodeTriple.Med, Val, K0, K1};
131 return true;
132 }
133
applyMed3(MachineInstr & MI,Med3MatchInfo & MatchInfo)134 void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
135 Med3MatchInfo &MatchInfo) {
136 B.setInstrAndDebugLoc(MI);
137 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
138 {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
139 MI.eraseFromParent();
140 }
141
142 class AMDGPURegBankCombinerHelperState {
143 protected:
144 CombinerHelper &Helper;
145 AMDGPURegBankCombinerHelper &RegBankHelper;
146
147 public:
AMDGPURegBankCombinerHelperState(CombinerHelper & Helper,AMDGPURegBankCombinerHelper & RegBankHelper)148 AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
149 AMDGPURegBankCombinerHelper &RegBankHelper)
150 : Helper(Helper), RegBankHelper(RegBankHelper) {}
151 };
152
153 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
154 #include "AMDGPUGenRegBankGICombiner.inc"
155 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
156
157 namespace {
158 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
159 #include "AMDGPUGenRegBankGICombiner.inc"
160 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
161
162 class AMDGPURegBankCombinerInfo final : public CombinerInfo {
163 GISelKnownBits *KB;
164 MachineDominatorTree *MDT;
165
166 public:
167 AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
168
AMDGPURegBankCombinerInfo(bool EnableOpt,bool OptSize,bool MinSize,const AMDGPULegalizerInfo * LI,GISelKnownBits * KB,MachineDominatorTree * MDT)169 AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
170 const AMDGPULegalizerInfo *LI,
171 GISelKnownBits *KB, MachineDominatorTree *MDT)
172 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
173 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
174 KB(KB), MDT(MDT) {
175 if (!GeneratedRuleCfg.parseCommandLineOption())
176 report_fatal_error("Invalid rule identifier");
177 }
178
179 bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
180 MachineIRBuilder &B) const override;
181 };
182
combine(GISelChangeObserver & Observer,MachineInstr & MI,MachineIRBuilder & B) const183 bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
184 MachineInstr &MI,
185 MachineIRBuilder &B) const {
186 CombinerHelper Helper(Observer, B, KB, MDT);
187 AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
188 AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
189 RegBankHelper);
190
191 if (Generated.tryCombineAll(Observer, MI, B))
192 return true;
193
194 return false;
195 }
196
197 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
198 #include "AMDGPUGenRegBankGICombiner.inc"
199 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
200
201 // Pass boilerplate
202 // ================
203
204 class AMDGPURegBankCombiner : public MachineFunctionPass {
205 public:
206 static char ID;
207
208 AMDGPURegBankCombiner(bool IsOptNone = false);
209
getPassName() const210 StringRef getPassName() const override {
211 return "AMDGPURegBankCombiner";
212 }
213
214 bool runOnMachineFunction(MachineFunction &MF) override;
215
216 void getAnalysisUsage(AnalysisUsage &AU) const override;
217 private:
218 bool IsOptNone;
219 };
220 } // end anonymous namespace
221
getAnalysisUsage(AnalysisUsage & AU) const222 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
223 AU.addRequired<TargetPassConfig>();
224 AU.setPreservesCFG();
225 getSelectionDAGFallbackAnalysisUsage(AU);
226 AU.addRequired<GISelKnownBitsAnalysis>();
227 AU.addPreserved<GISelKnownBitsAnalysis>();
228 if (!IsOptNone) {
229 AU.addRequired<MachineDominatorTree>();
230 AU.addPreserved<MachineDominatorTree>();
231 }
232 MachineFunctionPass::getAnalysisUsage(AU);
233 }
234
AMDGPURegBankCombiner(bool IsOptNone)235 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
236 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
237 initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
238 }
239
runOnMachineFunction(MachineFunction & MF)240 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
241 if (MF.getProperties().hasProperty(
242 MachineFunctionProperties::Property::FailedISel))
243 return false;
244 auto *TPC = &getAnalysis<TargetPassConfig>();
245 const Function &F = MF.getFunction();
246 bool EnableOpt =
247 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
248
249 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
250 const AMDGPULegalizerInfo *LI
251 = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
252
253 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
254 MachineDominatorTree *MDT =
255 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
256 AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
257 F.hasMinSize(), LI, KB, MDT);
258 Combiner C(PCInfo, TPC);
259 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
260 }
261
262 char AMDGPURegBankCombiner::ID = 0;
263 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
264 "Combine AMDGPU machine instrs after regbankselect",
265 false, false)
266 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
267 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
268 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
269 "Combine AMDGPU machine instrs after regbankselect", false,
270 false)
271
272 namespace llvm {
createAMDGPURegBankCombiner(bool IsOptNone)273 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
274 return new AMDGPURegBankCombiner(IsOptNone);
275 }
276 } // end namespace llvm
277