1 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 14 #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 15 16 #include "GCNRegPressure.h" 17 #include "llvm/ADT/MapVector.h" 18 #include "llvm/CodeGen/MachineScheduler.h" 19 20 namespace llvm { 21 22 class SIMachineFunctionInfo; 23 class SIRegisterInfo; 24 class GCNSubtarget; 25 class GCNSchedStage; 26 27 enum class GCNSchedStageID : unsigned { 28 OccInitialSchedule = 0, 29 UnclusteredHighRPReschedule = 1, 30 ClusteredLowOccupancyReschedule = 2, 31 PreRARematerialize = 3, 32 ILPInitialSchedule = 4, 33 MemoryClauseInitialSchedule = 5 34 }; 35 36 #ifndef NDEBUG 37 raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID); 38 #endif 39 40 /// This is a minimal scheduler strategy. The main difference between this 41 /// and the GenericScheduler is that GCNSchedStrategy uses different 42 /// heuristics to determine excess/critical pressure sets. 43 class GCNSchedStrategy : public GenericScheduler { 44 protected: 45 SUnit *pickNodeBidirectional(bool &IsTopNode); 46 47 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, 48 const RegPressureTracker &RPTracker, 49 SchedCandidate &Cand, bool IsBottomUp); 50 51 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, 52 const RegPressureTracker &RPTracker, 53 const SIRegisterInfo *SRI, unsigned SGPRPressure, 54 unsigned VGPRPressure, bool IsBottomUp); 55 56 std::vector<unsigned> Pressure; 57 58 std::vector<unsigned> MaxPressure; 59 60 unsigned SGPRExcessLimit; 61 62 unsigned VGPRExcessLimit; 63 64 unsigned TargetOccupancy; 65 66 MachineFunction *MF; 67 68 // Scheduling stages for this strategy. 69 SmallVector<GCNSchedStageID, 4> SchedStages; 70 71 // Pointer to the current SchedStageID. 72 SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr; 73 74 // GCN RP Tracker for top-down scheduling 75 mutable GCNDownwardRPTracker DownwardTracker; 76 77 // GCN RP Tracker for botttom-up scheduling 78 mutable GCNUpwardRPTracker UpwardTracker; 79 80 public: 81 // schedule() have seen register pressure over the critical limits and had to 82 // track register pressure for actual scheduling heuristics. 83 bool HasHighPressure; 84 85 // Schedule known to have excess register pressure. Be more conservative in 86 // increasing ILP and preserving VGPRs. 87 bool KnownExcessRP = false; 88 89 // An error margin is necessary because of poor performance of the generic RP 90 // tracker and can be adjusted up for tuning heuristics to try and more 91 // aggressively reduce register pressure. 92 unsigned ErrorMargin = 3; 93 94 // Bias for SGPR limits under a high register pressure. 95 const unsigned HighRPSGPRBias = 7; 96 97 // Bias for VGPR limits under a high register pressure. 98 const unsigned HighRPVGPRBias = 7; 99 100 unsigned SGPRCriticalLimit; 101 102 unsigned VGPRCriticalLimit; 103 104 unsigned SGPRLimitBias = 0; 105 106 unsigned VGPRLimitBias = 0; 107 108 GCNSchedStrategy(const MachineSchedContext *C); 109 110 SUnit *pickNode(bool &IsTopNode) override; 111 112 void schedNode(SUnit *SU, bool IsTopNode) override; 113 114 void initialize(ScheduleDAGMI *DAG) override; 115 116 unsigned getTargetOccupancy() { return TargetOccupancy; } 117 118 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; } 119 120 GCNSchedStageID getCurrentStage(); 121 122 // Advances stage. Returns true if there are remaining stages. 123 bool advanceStage(); 124 125 bool hasNextStage() const; 126 127 GCNSchedStageID getNextStage() const; 128 129 GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; } 130 131 GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; } 132 }; 133 134 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e. 135 /// maximum number of waves per simd). 136 class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy { 137 public: 138 GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, 139 bool IsLegacyScheduler = false); 140 }; 141 142 /// The goal of this scheduling strategy is to maximize ILP for a single wave 143 /// (i.e. latency hiding). 144 class GCNMaxILPSchedStrategy final : public GCNSchedStrategy { 145 protected: 146 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 147 SchedBoundary *Zone) const override; 148 149 public: 150 GCNMaxILPSchedStrategy(const MachineSchedContext *C); 151 }; 152 153 /// The goal of this scheduling strategy is to maximize memory clause for a 154 /// single wave. 155 class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy { 156 protected: 157 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 158 SchedBoundary *Zone) const override; 159 160 public: 161 GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C); 162 }; 163 164 class ScheduleMetrics { 165 unsigned ScheduleLength; 166 unsigned BubbleCycles; 167 168 public: 169 ScheduleMetrics() {} 170 ScheduleMetrics(unsigned L, unsigned BC) 171 : ScheduleLength(L), BubbleCycles(BC) {} 172 unsigned getLength() const { return ScheduleLength; } 173 unsigned getBubbles() const { return BubbleCycles; } 174 unsigned getMetric() const { 175 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength; 176 // Metric is zero if the amount of bubbles is less than 1% which is too 177 // small. So, return 1. 178 return Metric ? Metric : 1; 179 } 180 static const unsigned ScaleFactor; 181 }; 182 183 inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) { 184 dbgs() << "\n Schedule Metric (scaled by " 185 << ScheduleMetrics::ScaleFactor 186 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/" 187 << Sm.getLength() << " ]\n"; 188 return OS; 189 } 190 191 class GCNScheduleDAGMILive; 192 class RegionPressureMap { 193 GCNScheduleDAGMILive *DAG; 194 // The live in/out pressure as indexed by the first or last MI in the region 195 // before scheduling. 196 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap; 197 // The mapping of RegionIDx to key instruction 198 DenseMap<unsigned, MachineInstr *> IdxToInstruction; 199 // Whether we are calculating LiveOuts or LiveIns 200 bool IsLiveOut; 201 202 public: 203 RegionPressureMap() {} 204 RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut) 205 : DAG(GCNDAG), IsLiveOut(LiveOut) {} 206 // Build the Instr->LiveReg and RegionIdx->Instr maps 207 void buildLiveRegMap(); 208 209 // Retrieve the LiveReg for a given RegionIdx 210 GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) { 211 assert(IdxToInstruction.find(RegionIdx) != IdxToInstruction.end()); 212 MachineInstr *Key = IdxToInstruction[RegionIdx]; 213 return RegionLiveRegMap[Key]; 214 } 215 }; 216 217 class GCNScheduleDAGMILive final : public ScheduleDAGMILive { 218 friend class GCNSchedStage; 219 friend class OccInitialScheduleStage; 220 friend class UnclusteredHighRPStage; 221 friend class ClusteredLowOccStage; 222 friend class PreRARematStage; 223 friend class ILPInitialScheduleStage; 224 friend class RegionPressureMap; 225 226 const GCNSubtarget &ST; 227 228 SIMachineFunctionInfo &MFI; 229 230 // Occupancy target at the beginning of function scheduling cycle. 231 unsigned StartingOccupancy; 232 233 // Minimal real occupancy recorder for the function. 234 unsigned MinOccupancy; 235 236 // Vector of regions recorder for later rescheduling 237 SmallVector<std::pair<MachineBasicBlock::iterator, 238 MachineBasicBlock::iterator>, 32> Regions; 239 240 // Records if a region is not yet scheduled, or schedule has been reverted, 241 // or we generally desire to reschedule it. 242 BitVector RescheduleRegions; 243 244 // Record regions with high register pressure. 245 BitVector RegionsWithHighRP; 246 247 // Record regions with excess register pressure over the physical register 248 // limit. Register pressure in these regions usually will result in spilling. 249 BitVector RegionsWithExcessRP; 250 251 // Regions that has the same occupancy as the latest MinOccupancy 252 BitVector RegionsWithMinOcc; 253 254 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT). 255 BitVector RegionsWithIGLPInstrs; 256 257 // Region live-in cache. 258 SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns; 259 260 // Region pressure cache. 261 SmallVector<GCNRegPressure, 32> Pressure; 262 263 // Temporary basic block live-in cache. 264 DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns; 265 266 // The map of the initial first region instruction to region live in registers 267 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap; 268 269 // Calculate the map of the initial first region instruction to region live in 270 // registers 271 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const; 272 273 // Calculate the map of the initial last region instruction to region live out 274 // registers 275 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 276 getRegionLiveOutMap() const; 277 278 // The live out registers per region. These are internally stored as a map of 279 // the initial last region instruction to region live out registers, but can 280 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx. 281 RegionPressureMap RegionLiveOuts; 282 283 // Return current region pressure. 284 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const; 285 286 // Compute and cache live-ins and pressure for all regions in block. 287 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB); 288 289 // Update region boundaries when removing MI or inserting NewMI before MI. 290 void updateRegionBoundaries( 291 SmallVectorImpl<std::pair<MachineBasicBlock::iterator, 292 MachineBasicBlock::iterator>> &RegionBoundaries, 293 MachineBasicBlock::iterator MI, MachineInstr *NewMI, 294 bool Removing = false); 295 296 void runSchedStages(); 297 298 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID); 299 300 public: 301 GCNScheduleDAGMILive(MachineSchedContext *C, 302 std::unique_ptr<MachineSchedStrategy> S); 303 304 void schedule() override; 305 306 void finalizeSchedule() override; 307 }; 308 309 // GCNSchedStrategy applies multiple scheduling stages to a function. 310 class GCNSchedStage { 311 protected: 312 GCNScheduleDAGMILive &DAG; 313 314 GCNSchedStrategy &S; 315 316 MachineFunction &MF; 317 318 SIMachineFunctionInfo &MFI; 319 320 const GCNSubtarget &ST; 321 322 const GCNSchedStageID StageID; 323 324 // The current block being scheduled. 325 MachineBasicBlock *CurrentMBB = nullptr; 326 327 // Current region index. 328 unsigned RegionIdx = 0; 329 330 // Record the original order of instructions before scheduling. 331 std::vector<MachineInstr *> Unsched; 332 333 // RP before scheduling the current region. 334 GCNRegPressure PressureBefore; 335 336 // RP after scheduling the current region. 337 GCNRegPressure PressureAfter; 338 339 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 340 341 GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG); 342 343 public: 344 // Initialize state for a scheduling stage. Returns false if the current stage 345 // should be skipped. 346 virtual bool initGCNSchedStage(); 347 348 // Finalize state after finishing a scheduling pass on the function. 349 virtual void finalizeGCNSchedStage(); 350 351 // Setup for scheduling a region. Returns false if the current region should 352 // be skipped. 353 virtual bool initGCNRegion(); 354 355 // Track whether a new region is also a new MBB. 356 void setupNewBlock(); 357 358 // Finalize state after scheudling a region. 359 void finalizeGCNRegion(); 360 361 // Check result of scheduling. 362 void checkScheduling(); 363 364 // computes the given schedule virtual execution time in clocks 365 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule); 366 ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG); 367 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, 368 DenseMap<unsigned, unsigned> &ReadyCycles, 369 const TargetSchedModel &SM); 370 371 // Returns true if scheduling should be reverted. 372 virtual bool shouldRevertScheduling(unsigned WavesAfter); 373 374 // Returns true if current region has known excess pressure. 375 bool isRegionWithExcessRP() const { 376 return DAG.RegionsWithExcessRP[RegionIdx]; 377 } 378 379 // The region number this stage is currently working on 380 unsigned getRegionIdx() { return RegionIdx; } 381 382 // Returns true if the new schedule may result in more spilling. 383 bool mayCauseSpilling(unsigned WavesAfter); 384 385 // Attempt to revert scheduling for this region. 386 void revertScheduling(); 387 388 void advanceRegion() { RegionIdx++; } 389 390 virtual ~GCNSchedStage() = default; 391 }; 392 393 class OccInitialScheduleStage : public GCNSchedStage { 394 public: 395 bool shouldRevertScheduling(unsigned WavesAfter) override; 396 397 OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 398 : GCNSchedStage(StageID, DAG) {} 399 }; 400 401 class UnclusteredHighRPStage : public GCNSchedStage { 402 private: 403 // Save the initial occupancy before starting this stage. 404 unsigned InitialOccupancy; 405 406 public: 407 bool initGCNSchedStage() override; 408 409 void finalizeGCNSchedStage() override; 410 411 bool initGCNRegion() override; 412 413 bool shouldRevertScheduling(unsigned WavesAfter) override; 414 415 UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 416 : GCNSchedStage(StageID, DAG) {} 417 }; 418 419 // Retry function scheduling if we found resulting occupancy and it is 420 // lower than used for other scheduling passes. This will give more freedom 421 // to schedule low register pressure blocks. 422 class ClusteredLowOccStage : public GCNSchedStage { 423 public: 424 bool initGCNSchedStage() override; 425 426 bool initGCNRegion() override; 427 428 bool shouldRevertScheduling(unsigned WavesAfter) override; 429 430 ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 431 : GCNSchedStage(StageID, DAG) {} 432 }; 433 434 class PreRARematStage : public GCNSchedStage { 435 private: 436 // Each region at MinOccupancy will have their own list of trivially 437 // rematerializable instructions we can remat to reduce RP. The list maps an 438 // instruction to the position we should remat before, usually the MI using 439 // the rematerializable instruction. 440 MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>> 441 RematerializableInsts; 442 443 // Map a trivially rematerializable def to a list of regions at MinOccupancy 444 // that has the defined reg as a live-in. 445 DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions; 446 447 // Collect all trivially rematerializable VGPR instructions with a single def 448 // and single use outside the defining block into RematerializableInsts. 449 void collectRematerializableInstructions(); 450 451 bool isTriviallyReMaterializable(const MachineInstr &MI); 452 453 // TODO: Should also attempt to reduce RP of SGPRs and AGPRs 454 // Attempt to reduce RP of VGPR by sinking trivially rematerializable 455 // instructions. Returns true if we were able to sink instruction(s). 456 bool sinkTriviallyRematInsts(const GCNSubtarget &ST, 457 const TargetInstrInfo *TII); 458 459 public: 460 bool initGCNSchedStage() override; 461 462 bool initGCNRegion() override; 463 464 bool shouldRevertScheduling(unsigned WavesAfter) override; 465 466 PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 467 : GCNSchedStage(StageID, DAG) {} 468 }; 469 470 class ILPInitialScheduleStage : public GCNSchedStage { 471 public: 472 bool shouldRevertScheduling(unsigned WavesAfter) override; 473 474 ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 475 : GCNSchedStage(StageID, DAG) {} 476 }; 477 478 class MemoryClauseInitialScheduleStage : public GCNSchedStage { 479 public: 480 bool shouldRevertScheduling(unsigned WavesAfter) override; 481 482 MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, 483 GCNScheduleDAGMILive &DAG) 484 : GCNSchedStage(StageID, DAG) {} 485 }; 486 487 class GCNPostScheduleDAGMILive final : public ScheduleDAGMI { 488 private: 489 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 490 491 bool HasIGLPInstrs = false; 492 493 public: 494 void schedule() override; 495 496 void finalizeSchedule() override; 497 498 GCNPostScheduleDAGMILive(MachineSchedContext *C, 499 std::unique_ptr<MachineSchedStrategy> S, 500 bool RemoveKillFlags); 501 }; 502 503 } // End namespace llvm 504 505 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 506