1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after register banks are known.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Combiner.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
24 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/TargetPassConfig.h"
27 #include "llvm/IR/IntrinsicsAMDGPU.h"
28 #include "llvm/Target/TargetMachine.h"
29 #define DEBUG_TYPE "amdgpu-regbank-combiner"
30
31 using namespace llvm;
32 using namespace MIPatternMatch;
33
34 class AMDGPURegBankCombinerHelper {
35 protected:
36 MachineIRBuilder &B;
37 MachineFunction &MF;
38 MachineRegisterInfo &MRI;
39 const GCNSubtarget &Subtarget;
40 const RegisterBankInfo &RBI;
41 const TargetRegisterInfo &TRI;
42 const SIInstrInfo &TII;
43 CombinerHelper &Helper;
44
45 public:
AMDGPURegBankCombinerHelper(MachineIRBuilder & B,CombinerHelper & Helper)46 AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
47 : B(B), MF(B.getMF()), MRI(*B.getMRI()),
48 Subtarget(MF.getSubtarget<GCNSubtarget>()),
49 RBI(*Subtarget.getRegBankInfo()), TRI(*Subtarget.getRegisterInfo()),
50 TII(*Subtarget.getInstrInfo()), Helper(Helper){};
51
52 bool isVgprRegBank(Register Reg);
53 Register getAsVgpr(Register Reg);
54
55 struct MinMaxMedOpc {
56 unsigned Min, Max, Med;
57 };
58
59 struct Med3MatchInfo {
60 unsigned Opc;
61 Register Val0, Val1, Val2;
62 };
63
64 MinMaxMedOpc getMinMaxPair(unsigned Opc);
65
66 template <class m_Cst, typename CstTy>
67 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
68 Register &Val, CstTy &K0, CstTy &K1);
69
70 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
71 bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
72 bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg);
73 bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg);
74 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
75 void applyClamp(MachineInstr &MI, Register &Reg);
76
77 private:
78 AMDGPU::SIModeRegisterDefaults getMode();
79 bool getIEEE();
80 bool getDX10Clamp();
81 bool isFminnumIeee(const MachineInstr &MI);
82 bool isFCst(MachineInstr *MI);
83 bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1);
84 };
85
isVgprRegBank(Register Reg)86 bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
87 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
88 }
89
getAsVgpr(Register Reg)90 Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) {
91 if (isVgprRegBank(Reg))
92 return Reg;
93
94 // Search for existing copy of Reg to vgpr.
95 for (MachineInstr &Use : MRI.use_instructions(Reg)) {
96 Register Def = Use.getOperand(0).getReg();
97 if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
98 return Def;
99 }
100
101 // Copy Reg to vgpr.
102 Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
103 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
104 return VgprReg;
105 }
106
107 AMDGPURegBankCombinerHelper::MinMaxMedOpc
getMinMaxPair(unsigned Opc)108 AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
109 switch (Opc) {
110 default:
111 llvm_unreachable("Unsupported opcode");
112 case AMDGPU::G_SMAX:
113 case AMDGPU::G_SMIN:
114 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
115 case AMDGPU::G_UMAX:
116 case AMDGPU::G_UMIN:
117 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
118 case AMDGPU::G_FMAXNUM:
119 case AMDGPU::G_FMINNUM:
120 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
121 case AMDGPU::G_FMAXNUM_IEEE:
122 case AMDGPU::G_FMINNUM_IEEE:
123 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
124 AMDGPU::G_AMDGPU_FMED3};
125 }
126 }
127
128 template <class m_Cst, typename CstTy>
matchMed(MachineInstr & MI,MachineRegisterInfo & MRI,MinMaxMedOpc MMMOpc,Register & Val,CstTy & K0,CstTy & K1)129 bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
130 MachineRegisterInfo &MRI,
131 MinMaxMedOpc MMMOpc, Register &Val,
132 CstTy &K0, CstTy &K1) {
133 // 4 operand commutes of: min(max(Val, K0), K1).
134 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
135 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
136 // 4 operand commutes of: max(min(Val, K1), K0).
137 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
138 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
139 return mi_match(
140 MI, MRI,
141 m_any_of(
142 m_CommutativeBinOp(
143 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
144 m_Cst(K1)),
145 m_CommutativeBinOp(
146 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
147 m_Cst(K0))));
148 }
149
matchIntMinMaxToMed3(MachineInstr & MI,Med3MatchInfo & MatchInfo)150 bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
151 MachineInstr &MI, Med3MatchInfo &MatchInfo) {
152 Register Dst = MI.getOperand(0).getReg();
153 if (!isVgprRegBank(Dst))
154 return false;
155
156 // med3 for i16 is only available on gfx9+, and not available for v2i16.
157 LLT Ty = MRI.getType(Dst);
158 if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
159 Ty != LLT::scalar(32))
160 return false;
161
162 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
163 Register Val;
164 std::optional<ValueAndVReg> K0, K1;
165 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
166 if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
167 return false;
168
169 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
170 return false;
171 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
172 return false;
173
174 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
175 return true;
176 }
177
178 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
179 // ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
180 // ieee = false : min/max(NaN, K) = K
181 // clamp(NaN) = dx10_clamp ? 0.0 : NaN
182 // Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
183 // Other operand commutes (see matchMed) give same result since min and max are
184 // commutative.
185
186 // Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
187 // with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
188 // Val = SNaN only for ieee = true
189 // fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
190 // min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
191 // max(min(SNaN, K1), K0) = max(K1, K0) = K1
192 // Val = NaN,ieee = false or Val = QNaN,ieee = true
193 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
194 // min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
195 // max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
matchFPMinMaxToMed3(MachineInstr & MI,Med3MatchInfo & MatchInfo)196 bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3(
197 MachineInstr &MI, Med3MatchInfo &MatchInfo) {
198 Register Dst = MI.getOperand(0).getReg();
199 LLT Ty = MRI.getType(Dst);
200
201 // med3 for f16 is only available on gfx9+, and not available for v2f16.
202 if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
203 Ty != LLT::scalar(32))
204 return false;
205
206 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
207
208 Register Val;
209 std::optional<FPValueAndVReg> K0, K1;
210 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
211 if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
212 return false;
213
214 if (K0->Value > K1->Value)
215 return false;
216
217 // For IEEE=false perform combine only when it's safe to assume that there are
218 // no NaN inputs. Most often MI is marked with nnan fast math flag.
219 // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
220 // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
221 // nodes(max/min) have same behavior when one input is NaN and other isn't.
222 // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
223 // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
224 if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
225 // Don't fold single use constant that can't be inlined.
226 if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
227 (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
228 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
229 return true;
230 }
231 }
232
233 return false;
234 }
235
matchFPMinMaxToClamp(MachineInstr & MI,Register & Reg)236 bool AMDGPURegBankCombinerHelper::matchFPMinMaxToClamp(MachineInstr &MI,
237 Register &Reg) {
238 // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
239 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
240 Register Val;
241 std::optional<FPValueAndVReg> K0, K1;
242 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
243 if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
244 return false;
245
246 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
247 return false;
248
249 // For IEEE=false perform combine only when it's safe to assume that there are
250 // no NaN inputs. Most often MI is marked with nnan fast math flag.
251 // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
252 // to 0.0 requires dx10_clamp = true.
253 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
254 isKnownNeverSNaN(Val, MRI)) ||
255 isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
256 Reg = Val;
257 return true;
258 }
259
260 return false;
261 }
262
263 // Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
264 // Val = SNaN only for ieee = true. It is important which operand is NaN.
265 // min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
266 // min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
267 // min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
268 // Val = NaN,ieee = false or Val = QNaN,ieee = true
269 // min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
270 // min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
271 // min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
matchFPMed3ToClamp(MachineInstr & MI,Register & Reg)272 bool AMDGPURegBankCombinerHelper::matchFPMed3ToClamp(MachineInstr &MI,
273 Register &Reg) {
274 if (MI.getIntrinsicID() != Intrinsic::amdgcn_fmed3)
275 return false;
276
277 // In llvm-ir, clamp is often represented as an intrinsic call to
278 // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
279 MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
280 MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
281 MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
282
283 if (isFCst(Src0) && !isFCst(Src1))
284 std::swap(Src0, Src1);
285 if (isFCst(Src1) && !isFCst(Src2))
286 std::swap(Src1, Src2);
287 if (isFCst(Src0) && !isFCst(Src1))
288 std::swap(Src0, Src1);
289 if (!isClampZeroToOne(Src1, Src2))
290 return false;
291
292 Register Val = Src0->getOperand(0).getReg();
293
294 auto isOp3Zero = [&]() {
295 MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
296 if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
297 return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
298 return false;
299 };
300 // For IEEE=false perform combine only when it's safe to assume that there are
301 // no NaN inputs. Most often MI is marked with nnan fast math flag.
302 // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
303 // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
304 if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
305 (getIEEE() && getDX10Clamp() &&
306 (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
307 Reg = Val;
308 return true;
309 }
310
311 return false;
312 }
313
applyClamp(MachineInstr & MI,Register & Reg)314 void AMDGPURegBankCombinerHelper::applyClamp(MachineInstr &MI, Register &Reg) {
315 B.setInstrAndDebugLoc(MI);
316 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
317 MI.getFlags());
318 MI.eraseFromParent();
319 }
320
applyMed3(MachineInstr & MI,Med3MatchInfo & MatchInfo)321 void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
322 Med3MatchInfo &MatchInfo) {
323 B.setInstrAndDebugLoc(MI);
324 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
325 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
326 getAsVgpr(MatchInfo.Val2)},
327 MI.getFlags());
328 MI.eraseFromParent();
329 }
330
getMode()331 AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
332 return MF.getInfo<SIMachineFunctionInfo>()->getMode();
333 }
334
getIEEE()335 bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; }
336
getDX10Clamp()337 bool AMDGPURegBankCombinerHelper::getDX10Clamp() { return getMode().DX10Clamp; }
338
isFminnumIeee(const MachineInstr & MI)339 bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) {
340 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
341 }
342
isFCst(MachineInstr * MI)343 bool AMDGPURegBankCombinerHelper::isFCst(MachineInstr *MI) {
344 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
345 }
346
isClampZeroToOne(MachineInstr * K0,MachineInstr * K1)347 bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0,
348 MachineInstr *K1) {
349 if (isFCst(K0) && isFCst(K1)) {
350 const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
351 const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
352 return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
353 (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
354 }
355 return false;
356 }
357
358 class AMDGPURegBankCombinerHelperState {
359 protected:
360 CombinerHelper &Helper;
361 AMDGPURegBankCombinerHelper &RegBankHelper;
362
363 public:
AMDGPURegBankCombinerHelperState(CombinerHelper & Helper,AMDGPURegBankCombinerHelper & RegBankHelper)364 AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
365 AMDGPURegBankCombinerHelper &RegBankHelper)
366 : Helper(Helper), RegBankHelper(RegBankHelper) {}
367 };
368
369 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
370 #include "AMDGPUGenRegBankGICombiner.inc"
371 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
372
373 namespace {
374 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
375 #include "AMDGPUGenRegBankGICombiner.inc"
376 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
377
378 class AMDGPURegBankCombinerInfo final : public CombinerInfo {
379 GISelKnownBits *KB;
380 MachineDominatorTree *MDT;
381
382 public:
383 AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
384
AMDGPURegBankCombinerInfo(bool EnableOpt,bool OptSize,bool MinSize,const AMDGPULegalizerInfo * LI,GISelKnownBits * KB,MachineDominatorTree * MDT)385 AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
386 const AMDGPULegalizerInfo *LI,
387 GISelKnownBits *KB, MachineDominatorTree *MDT)
388 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
389 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
390 KB(KB), MDT(MDT) {
391 if (!GeneratedRuleCfg.parseCommandLineOption())
392 report_fatal_error("Invalid rule identifier");
393 }
394
395 bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
396 MachineIRBuilder &B) const override;
397 };
398
combine(GISelChangeObserver & Observer,MachineInstr & MI,MachineIRBuilder & B) const399 bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
400 MachineInstr &MI,
401 MachineIRBuilder &B) const {
402 CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB, MDT);
403 AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
404 AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
405 RegBankHelper);
406
407 if (Generated.tryCombineAll(Observer, MI, B))
408 return true;
409
410 return false;
411 }
412
413 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
414 #include "AMDGPUGenRegBankGICombiner.inc"
415 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
416
417 // Pass boilerplate
418 // ================
419
420 class AMDGPURegBankCombiner : public MachineFunctionPass {
421 public:
422 static char ID;
423
424 AMDGPURegBankCombiner(bool IsOptNone = false);
425
getPassName() const426 StringRef getPassName() const override {
427 return "AMDGPURegBankCombiner";
428 }
429
430 bool runOnMachineFunction(MachineFunction &MF) override;
431
432 void getAnalysisUsage(AnalysisUsage &AU) const override;
433 private:
434 bool IsOptNone;
435 };
436 } // end anonymous namespace
437
getAnalysisUsage(AnalysisUsage & AU) const438 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
439 AU.addRequired<TargetPassConfig>();
440 AU.setPreservesCFG();
441 getSelectionDAGFallbackAnalysisUsage(AU);
442 AU.addRequired<GISelKnownBitsAnalysis>();
443 AU.addPreserved<GISelKnownBitsAnalysis>();
444 if (!IsOptNone) {
445 AU.addRequired<MachineDominatorTree>();
446 AU.addPreserved<MachineDominatorTree>();
447 }
448 MachineFunctionPass::getAnalysisUsage(AU);
449 }
450
AMDGPURegBankCombiner(bool IsOptNone)451 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
452 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
453 initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
454 }
455
runOnMachineFunction(MachineFunction & MF)456 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
457 if (MF.getProperties().hasProperty(
458 MachineFunctionProperties::Property::FailedISel))
459 return false;
460 auto *TPC = &getAnalysis<TargetPassConfig>();
461 const Function &F = MF.getFunction();
462 bool EnableOpt =
463 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
464
465 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
466 const AMDGPULegalizerInfo *LI
467 = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
468
469 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
470 MachineDominatorTree *MDT =
471 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
472 AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
473 F.hasMinSize(), LI, KB, MDT);
474 Combiner C(PCInfo, TPC);
475 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
476 }
477
478 char AMDGPURegBankCombiner::ID = 0;
479 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
480 "Combine AMDGPU machine instrs after regbankselect",
481 false, false)
482 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
483 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
484 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
485 "Combine AMDGPU machine instrs after regbankselect", false,
486 false)
487
488 namespace llvm {
createAMDGPURegBankCombiner(bool IsOptNone)489 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
490 return new AMDGPURegBankCombiner(IsOptNone);
491 }
492 } // end namespace llvm
493