xref: /llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h (revision b33c807b39e2fa07977277d13552f3d773c6b61e)
1 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14 #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15 
16 #include "GCNRegPressure.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/CodeGen/MachineScheduler.h"
19 
20 namespace llvm {
21 
22 class SIMachineFunctionInfo;
23 class SIRegisterInfo;
24 class GCNSubtarget;
25 class GCNSchedStage;
26 
27 enum class GCNSchedStageID : unsigned {
28   OccInitialSchedule = 0,
29   UnclusteredHighRPReschedule = 1,
30   ClusteredLowOccupancyReschedule = 2,
31   PreRARematerialize = 3,
32   ILPInitialSchedule = 4,
33   MemoryClauseInitialSchedule = 5
34 };
35 
36 #ifndef NDEBUG
37 raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
38 #endif
39 
40 /// This is a minimal scheduler strategy.  The main difference between this
41 /// and the GenericScheduler is that GCNSchedStrategy uses different
42 /// heuristics to determine excess/critical pressure sets.
43 class GCNSchedStrategy : public GenericScheduler {
44 protected:
45   SUnit *pickNodeBidirectional(bool &IsTopNode);
46 
47   void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
48                          const RegPressureTracker &RPTracker,
49                          SchedCandidate &Cand, bool IsBottomUp);
50 
51   void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
52                      const RegPressureTracker &RPTracker,
53                      const SIRegisterInfo *SRI, unsigned SGPRPressure,
54                      unsigned VGPRPressure, bool IsBottomUp);
55 
56   std::vector<unsigned> Pressure;
57 
58   std::vector<unsigned> MaxPressure;
59 
60   unsigned SGPRExcessLimit;
61 
62   unsigned VGPRExcessLimit;
63 
64   unsigned TargetOccupancy;
65 
66   MachineFunction *MF;
67 
68   // Scheduling stages for this strategy.
69   SmallVector<GCNSchedStageID, 4> SchedStages;
70 
71   // Pointer to the current SchedStageID.
72   SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
73 
74   // GCN RP Tracker for top-down scheduling
75   mutable GCNDownwardRPTracker DownwardTracker;
76 
77   // GCN RP Tracker for botttom-up scheduling
78   mutable GCNUpwardRPTracker UpwardTracker;
79 
80 public:
81   // schedule() have seen register pressure over the critical limits and had to
82   // track register pressure for actual scheduling heuristics.
83   bool HasHighPressure;
84 
85   // Schedule known to have excess register pressure. Be more conservative in
86   // increasing ILP and preserving VGPRs.
87   bool KnownExcessRP = false;
88 
89   // An error margin is necessary because of poor performance of the generic RP
90   // tracker and can be adjusted up for tuning heuristics to try and more
91   // aggressively reduce register pressure.
92   unsigned ErrorMargin = 3;
93 
94   // Bias for SGPR limits under a high register pressure.
95   const unsigned HighRPSGPRBias = 7;
96 
97   // Bias for VGPR limits under a high register pressure.
98   const unsigned HighRPVGPRBias = 7;
99 
100   unsigned SGPRCriticalLimit;
101 
102   unsigned VGPRCriticalLimit;
103 
104   unsigned SGPRLimitBias = 0;
105 
106   unsigned VGPRLimitBias = 0;
107 
108   GCNSchedStrategy(const MachineSchedContext *C);
109 
110   SUnit *pickNode(bool &IsTopNode) override;
111 
112   void schedNode(SUnit *SU, bool IsTopNode) override;
113 
114   void initialize(ScheduleDAGMI *DAG) override;
115 
116   unsigned getTargetOccupancy() { return TargetOccupancy; }
117 
118   void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
119 
120   GCNSchedStageID getCurrentStage();
121 
122   // Advances stage. Returns true if there are remaining stages.
123   bool advanceStage();
124 
125   bool hasNextStage() const;
126 
127   GCNSchedStageID getNextStage() const;
128 
129   GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; }
130 
131   GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; }
132 };
133 
134 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
135 /// maximum number of waves per simd).
136 class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
137 public:
138   GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
139                                bool IsLegacyScheduler = false);
140 };
141 
142 /// The goal of this scheduling strategy is to maximize ILP for a single wave
143 /// (i.e. latency hiding).
144 class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
145 protected:
146   bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
147                     SchedBoundary *Zone) const override;
148 
149 public:
150   GCNMaxILPSchedStrategy(const MachineSchedContext *C);
151 };
152 
153 /// The goal of this scheduling strategy is to maximize memory clause for a
154 /// single wave.
155 class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy {
156 protected:
157   bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
158                     SchedBoundary *Zone) const override;
159 
160 public:
161   GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C);
162 };
163 
164 class ScheduleMetrics {
165   unsigned ScheduleLength;
166   unsigned BubbleCycles;
167 
168 public:
169   ScheduleMetrics() {}
170   ScheduleMetrics(unsigned L, unsigned BC)
171       : ScheduleLength(L), BubbleCycles(BC) {}
172   unsigned getLength() const { return ScheduleLength; }
173   unsigned getBubbles() const { return BubbleCycles; }
174   unsigned getMetric() const {
175     unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
176     // Metric is zero if the amount of bubbles is less than 1% which is too
177     // small. So, return 1.
178     return Metric ? Metric : 1;
179   }
180   static const unsigned ScaleFactor;
181 };
182 
183 inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
184   dbgs() << "\n Schedule Metric (scaled by "
185          << ScheduleMetrics::ScaleFactor
186          << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
187          << Sm.getLength() << " ]\n";
188   return OS;
189 }
190 
191 class GCNScheduleDAGMILive;
192 class RegionPressureMap {
193   GCNScheduleDAGMILive *DAG;
194   // The live in/out pressure as indexed by the first or last MI in the region
195   // before scheduling.
196   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap;
197   // The mapping of RegionIDx to key instruction
198   DenseMap<unsigned, MachineInstr *> IdxToInstruction;
199   // Whether we are calculating LiveOuts or LiveIns
200   bool IsLiveOut;
201 
202 public:
203   RegionPressureMap() {}
204   RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
205       : DAG(GCNDAG), IsLiveOut(LiveOut) {}
206   // Build the Instr->LiveReg and RegionIdx->Instr maps
207   void buildLiveRegMap();
208 
209   // Retrieve the LiveReg for a given RegionIdx
210   GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) {
211     assert(IdxToInstruction.find(RegionIdx) != IdxToInstruction.end());
212     MachineInstr *Key = IdxToInstruction[RegionIdx];
213     return RegionLiveRegMap[Key];
214   }
215 };
216 
217 class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
218   friend class GCNSchedStage;
219   friend class OccInitialScheduleStage;
220   friend class UnclusteredHighRPStage;
221   friend class ClusteredLowOccStage;
222   friend class PreRARematStage;
223   friend class ILPInitialScheduleStage;
224   friend class RegionPressureMap;
225 
226   const GCNSubtarget &ST;
227 
228   SIMachineFunctionInfo &MFI;
229 
230   // Occupancy target at the beginning of function scheduling cycle.
231   unsigned StartingOccupancy;
232 
233   // Minimal real occupancy recorder for the function.
234   unsigned MinOccupancy;
235 
236   // Vector of regions recorder for later rescheduling
237   SmallVector<std::pair<MachineBasicBlock::iterator,
238                         MachineBasicBlock::iterator>, 32> Regions;
239 
240   // Records if a region is not yet scheduled, or schedule has been reverted,
241   // or we generally desire to reschedule it.
242   BitVector RescheduleRegions;
243 
244   // Record regions with high register pressure.
245   BitVector RegionsWithHighRP;
246 
247   // Record regions with excess register pressure over the physical register
248   // limit. Register pressure in these regions usually will result in spilling.
249   BitVector RegionsWithExcessRP;
250 
251   // Regions that has the same occupancy as the latest MinOccupancy
252   BitVector RegionsWithMinOcc;
253 
254   // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
255   BitVector RegionsWithIGLPInstrs;
256 
257   // Region live-in cache.
258   SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
259 
260   // Region pressure cache.
261   SmallVector<GCNRegPressure, 32> Pressure;
262 
263   // Temporary basic block live-in cache.
264   DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
265 
266   // The map of the initial first region instruction to region live in registers
267   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
268 
269   // Calculate the map of the initial first region instruction to region live in
270   // registers
271   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const;
272 
273   // Calculate the map of the initial last region instruction to region live out
274   // registers
275   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
276   getRegionLiveOutMap() const;
277 
278   // The live out registers per region. These are internally stored as a map of
279   // the initial last region instruction to region live out registers, but can
280   // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
281   RegionPressureMap RegionLiveOuts;
282 
283   // Return current region pressure.
284   GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
285 
286   // Compute and cache live-ins and pressure for all regions in block.
287   void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
288 
289   // Update region boundaries when removing MI or inserting NewMI before MI.
290   void updateRegionBoundaries(
291       SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
292                                 MachineBasicBlock::iterator>> &RegionBoundaries,
293       MachineBasicBlock::iterator MI, MachineInstr *NewMI,
294       bool Removing = false);
295 
296   void runSchedStages();
297 
298   std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
299 
300 public:
301   GCNScheduleDAGMILive(MachineSchedContext *C,
302                        std::unique_ptr<MachineSchedStrategy> S);
303 
304   void schedule() override;
305 
306   void finalizeSchedule() override;
307 };
308 
309 // GCNSchedStrategy applies multiple scheduling stages to a function.
310 class GCNSchedStage {
311 protected:
312   GCNScheduleDAGMILive &DAG;
313 
314   GCNSchedStrategy &S;
315 
316   MachineFunction &MF;
317 
318   SIMachineFunctionInfo &MFI;
319 
320   const GCNSubtarget &ST;
321 
322   const GCNSchedStageID StageID;
323 
324   // The current block being scheduled.
325   MachineBasicBlock *CurrentMBB = nullptr;
326 
327   // Current region index.
328   unsigned RegionIdx = 0;
329 
330   // Record the original order of instructions before scheduling.
331   std::vector<MachineInstr *> Unsched;
332 
333   // RP before scheduling the current region.
334   GCNRegPressure PressureBefore;
335 
336   // RP after scheduling the current region.
337   GCNRegPressure PressureAfter;
338 
339   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
340 
341   GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
342 
343 public:
344   // Initialize state for a scheduling stage. Returns false if the current stage
345   // should be skipped.
346   virtual bool initGCNSchedStage();
347 
348   // Finalize state after finishing a scheduling pass on the function.
349   virtual void finalizeGCNSchedStage();
350 
351   // Setup for scheduling a region. Returns false if the current region should
352   // be skipped.
353   virtual bool initGCNRegion();
354 
355   // Track whether a new region is also a new MBB.
356   void setupNewBlock();
357 
358   // Finalize state after scheudling a region.
359   void finalizeGCNRegion();
360 
361   // Check result of scheduling.
362   void checkScheduling();
363 
364   // computes the given schedule virtual execution time in clocks
365   ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
366   ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
367   unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
368                                   DenseMap<unsigned, unsigned> &ReadyCycles,
369                                   const TargetSchedModel &SM);
370 
371   // Returns true if scheduling should be reverted.
372   virtual bool shouldRevertScheduling(unsigned WavesAfter);
373 
374   // Returns true if current region has known excess pressure.
375   bool isRegionWithExcessRP() const {
376     return DAG.RegionsWithExcessRP[RegionIdx];
377   }
378 
379   // The region number this stage is currently working on
380   unsigned getRegionIdx() { return RegionIdx; }
381 
382   // Returns true if the new schedule may result in more spilling.
383   bool mayCauseSpilling(unsigned WavesAfter);
384 
385   // Attempt to revert scheduling for this region.
386   void revertScheduling();
387 
388   void advanceRegion() { RegionIdx++; }
389 
390   virtual ~GCNSchedStage() = default;
391 };
392 
393 class OccInitialScheduleStage : public GCNSchedStage {
394 public:
395   bool shouldRevertScheduling(unsigned WavesAfter) override;
396 
397   OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
398       : GCNSchedStage(StageID, DAG) {}
399 };
400 
401 class UnclusteredHighRPStage : public GCNSchedStage {
402 private:
403   // Save the initial occupancy before starting this stage.
404   unsigned InitialOccupancy;
405 
406 public:
407   bool initGCNSchedStage() override;
408 
409   void finalizeGCNSchedStage() override;
410 
411   bool initGCNRegion() override;
412 
413   bool shouldRevertScheduling(unsigned WavesAfter) override;
414 
415   UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
416       : GCNSchedStage(StageID, DAG) {}
417 };
418 
419 // Retry function scheduling if we found resulting occupancy and it is
420 // lower than used for other scheduling passes. This will give more freedom
421 // to schedule low register pressure blocks.
422 class ClusteredLowOccStage : public GCNSchedStage {
423 public:
424   bool initGCNSchedStage() override;
425 
426   bool initGCNRegion() override;
427 
428   bool shouldRevertScheduling(unsigned WavesAfter) override;
429 
430   ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
431       : GCNSchedStage(StageID, DAG) {}
432 };
433 
434 class PreRARematStage : public GCNSchedStage {
435 private:
436   // Each region at MinOccupancy will have their own list of trivially
437   // rematerializable instructions we can remat to reduce RP. The list maps an
438   // instruction to the position we should remat before, usually the MI using
439   // the rematerializable instruction.
440   MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>>
441       RematerializableInsts;
442 
443   // Map a trivially rematerializable def to a list of regions at MinOccupancy
444   // that has the defined reg as a live-in.
445   DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;
446 
447   // Collect all trivially rematerializable VGPR instructions with a single def
448   // and single use outside the defining block into RematerializableInsts.
449   void collectRematerializableInstructions();
450 
451   bool isTriviallyReMaterializable(const MachineInstr &MI);
452 
453   // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
454   // Attempt to reduce RP of VGPR by sinking trivially rematerializable
455   // instructions. Returns true if we were able to sink instruction(s).
456   bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
457                                const TargetInstrInfo *TII);
458 
459 public:
460   bool initGCNSchedStage() override;
461 
462   bool initGCNRegion() override;
463 
464   bool shouldRevertScheduling(unsigned WavesAfter) override;
465 
466   PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
467       : GCNSchedStage(StageID, DAG) {}
468 };
469 
470 class ILPInitialScheduleStage : public GCNSchedStage {
471 public:
472   bool shouldRevertScheduling(unsigned WavesAfter) override;
473 
474   ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
475       : GCNSchedStage(StageID, DAG) {}
476 };
477 
478 class MemoryClauseInitialScheduleStage : public GCNSchedStage {
479 public:
480   bool shouldRevertScheduling(unsigned WavesAfter) override;
481 
482   MemoryClauseInitialScheduleStage(GCNSchedStageID StageID,
483                                    GCNScheduleDAGMILive &DAG)
484       : GCNSchedStage(StageID, DAG) {}
485 };
486 
487 class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
488 private:
489   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
490 
491   bool HasIGLPInstrs = false;
492 
493 public:
494   void schedule() override;
495 
496   void finalizeSchedule() override;
497 
498   GCNPostScheduleDAGMILive(MachineSchedContext *C,
499                            std::unique_ptr<MachineSchedStrategy> S,
500                            bool RemoveKillFlags);
501 };
502 
503 } // End namespace llvm
504 
505 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
506