1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// SI Machine Scheduler interface 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 16 17 #include "SIInstrInfo.h" 18 #include "llvm/CodeGen/MachineBasicBlock.h" 19 #include "llvm/CodeGen/MachineScheduler.h" 20 #include "llvm/CodeGen/RegisterPressure.h" 21 #include "llvm/CodeGen/ScheduleDAG.h" 22 #include <cassert> 23 #include <cstdint> 24 #include <map> 25 #include <memory> 26 #include <set> 27 #include <vector> 28 29 namespace llvm { 30 31 enum SIScheduleCandReason { 32 NoCand, 33 RegUsage, 34 Latency, 35 Successor, 36 Depth, 37 NodeOrder 38 }; 39 40 struct SISchedulerCandidate { 41 // The reason for this candidate. 42 SIScheduleCandReason Reason = NoCand; 43 44 // Set of reasons that apply to multiple candidates. 45 uint32_t RepeatReasonSet = 0; 46 47 SISchedulerCandidate() = default; 48 49 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); } 50 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); } 51 }; 52 53 class SIScheduleDAGMI; 54 class SIScheduleBlockCreator; 55 56 enum SIScheduleBlockLinkKind { 57 NoData, 58 Data 59 }; 60 61 class SIScheduleBlock { 62 SIScheduleDAGMI *DAG; 63 SIScheduleBlockCreator *BC; 64 65 std::vector<SUnit*> SUnits; 66 std::map<unsigned, unsigned> NodeNum2Index; 67 std::vector<SUnit*> TopReadySUs; 68 std::vector<SUnit*> ScheduledSUnits; 69 70 /// The top of the unscheduled zone. 71 IntervalPressure TopPressure; 72 RegPressureTracker TopRPTracker; 73 74 // Pressure: number of said class of registers needed to 75 // store the live virtual and real registers. 76 // We do care only of SGPR32 and VGPR32 and do track only virtual registers. 77 // Pressure of additional registers required inside the block. 78 std::vector<unsigned> InternalAdditionnalPressure; 79 // Pressure of input and output registers 80 std::vector<unsigned> LiveInPressure; 81 std::vector<unsigned> LiveOutPressure; 82 // Registers required by the block, and outputs. 83 // We do track only virtual registers. 84 // Note that some registers are not 32 bits, 85 // and thus the pressure is not equal 86 // to the number of live registers. 87 std::set<unsigned> LiveInRegs; 88 std::set<unsigned> LiveOutRegs; 89 90 bool Scheduled = false; 91 bool HighLatencyBlock = false; 92 93 std::vector<unsigned> HasLowLatencyNonWaitedParent; 94 95 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table. 96 unsigned ID; 97 98 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors. 99 // All blocks successors, and the kind of link 100 std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs; 101 unsigned NumHighLatencySuccessors = 0; 102 103 public: 104 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, 105 unsigned ID): 106 DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {} 107 108 ~SIScheduleBlock() = default; 109 110 unsigned getID() const { return ID; } 111 112 /// Functions for Block construction. 113 void addUnit(SUnit *SU); 114 115 // When all SUs have been added. 116 void finalizeUnits(); 117 118 // Add block pred, which has instruction predecessor of SU. 119 void addPred(SIScheduleBlock *Pred); 120 void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind); 121 122 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; } 123 ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> 124 getSuccs() const { return Succs; } 125 126 unsigned Height; // Maximum topdown path length to block without outputs 127 unsigned Depth; // Maximum bottomup path length to block without inputs 128 129 unsigned getNumHighLatencySuccessors() const { 130 return NumHighLatencySuccessors; 131 } 132 133 bool isHighLatencyBlock() { return HighLatencyBlock; } 134 135 // This is approximative. 136 // Ideally should take into accounts some instructions (rcp, etc) 137 // are 4 times slower. 138 int getCost() { return SUnits.size(); } 139 140 // The block Predecessors and Successors must be all registered 141 // before fastSchedule(). 142 // Fast schedule with no particular requirement. 143 void fastSchedule(); 144 145 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; } 146 147 // Complete schedule that will try to minimize reg pressure and 148 // low latencies, and will fill liveins and liveouts. 149 // Needs all MIs to be grouped between BeginBlock and EndBlock. 150 // The MIs can be moved after the scheduling, 151 // it is just used to allow correct track of live registers. 152 void schedule(MachineBasicBlock::iterator BeginBlock, 153 MachineBasicBlock::iterator EndBlock); 154 155 bool isScheduled() { return Scheduled; } 156 157 // Needs the block to be scheduled inside 158 // TODO: find a way to compute it. 159 std::vector<unsigned> &getInternalAdditionnalRegUsage() { 160 return InternalAdditionnalPressure; 161 } 162 163 std::set<unsigned> &getInRegs() { return LiveInRegs; } 164 std::set<unsigned> &getOutRegs() { return LiveOutRegs; } 165 166 void printDebug(bool Full); 167 168 private: 169 struct SISchedCandidate : SISchedulerCandidate { 170 // The best SUnit candidate. 171 SUnit *SU = nullptr; 172 173 unsigned SGPRUsage; 174 unsigned VGPRUsage; 175 bool IsLowLatency; 176 unsigned LowLatencyOffset; 177 bool HasLowLatencyNonWaitedParent; 178 179 SISchedCandidate() = default; 180 181 bool isValid() const { return SU; } 182 183 // Copy the status of another candidate without changing policy. 184 void setBest(SISchedCandidate &Best) { 185 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 186 SU = Best.SU; 187 Reason = Best.Reason; 188 SGPRUsage = Best.SGPRUsage; 189 VGPRUsage = Best.VGPRUsage; 190 IsLowLatency = Best.IsLowLatency; 191 LowLatencyOffset = Best.LowLatencyOffset; 192 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent; 193 } 194 }; 195 196 void undoSchedule(); 197 198 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge); 199 void releaseSucc(SUnit *SU, SDep *SuccEdge); 200 // InOrOutBlock: restrict to links pointing inside the block (true), 201 // or restrict to links pointing outside the block (false). 202 void releaseSuccessors(SUnit *SU, bool InOrOutBlock); 203 204 void nodeScheduled(SUnit *SU); 205 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand); 206 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand); 207 SUnit* pickNode(); 208 void traceCandidate(const SISchedCandidate &Cand); 209 void initRegPressure(MachineBasicBlock::iterator BeginBlock, 210 MachineBasicBlock::iterator EndBlock); 211 }; 212 213 struct SIScheduleBlocks { 214 std::vector<SIScheduleBlock*> Blocks; 215 std::vector<int> TopDownIndex2Block; 216 std::vector<int> TopDownBlock2Index; 217 }; 218 219 enum SISchedulerBlockCreatorVariant { 220 LatenciesAlone, 221 LatenciesGrouped, 222 LatenciesAlonePlusConsecutive 223 }; 224 225 class SIScheduleBlockCreator { 226 SIScheduleDAGMI *DAG; 227 // unique_ptr handles freeing memory for us. 228 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs; 229 std::map<SISchedulerBlockCreatorVariant, 230 SIScheduleBlocks> Blocks; 231 std::vector<SIScheduleBlock*> CurrentBlocks; 232 std::vector<int> Node2CurrentBlock; 233 234 // Topological sort 235 // Maps topological index to the node number. 236 std::vector<int> TopDownIndex2Block; 237 std::vector<int> TopDownBlock2Index; 238 std::vector<int> BottomUpIndex2Block; 239 240 // 0 -> Color not given. 241 // 1 to SUnits.size() -> Reserved group (you should only add elements to them). 242 // Above -> Other groups. 243 int NextReservedID; 244 int NextNonReservedID; 245 std::vector<int> CurrentColoring; 246 std::vector<int> CurrentTopDownReservedDependencyColoring; 247 std::vector<int> CurrentBottomUpReservedDependencyColoring; 248 249 public: 250 SIScheduleBlockCreator(SIScheduleDAGMI *DAG); 251 ~SIScheduleBlockCreator(); 252 253 SIScheduleBlocks 254 getBlocks(SISchedulerBlockCreatorVariant BlockVariant); 255 256 bool isSUInBlock(SUnit *SU, unsigned ID); 257 258 private: 259 // Give a Reserved color to every high latency. 260 void colorHighLatenciesAlone(); 261 262 // Create groups of high latencies with a Reserved color. 263 void colorHighLatenciesGroups(); 264 265 // Compute coloring for topdown and bottom traversals with 266 // different colors depending on dependencies on Reserved colors. 267 void colorComputeReservedDependencies(); 268 269 // Give color to all non-colored SUs according to Reserved groups dependencies. 270 void colorAccordingToReservedDependencies(); 271 272 // Divides Blocks having no bottom up or top down dependencies on Reserved groups. 273 // The new colors are computed according to the dependencies on the other blocks 274 // formed with colorAccordingToReservedDependencies. 275 void colorEndsAccordingToDependencies(); 276 277 // Cut groups into groups with SUs in consecutive order (except for Reserved groups). 278 void colorForceConsecutiveOrderInGroup(); 279 280 // Merge Constant loads that have all their users into another group to the group. 281 // (TODO: else if all their users depend on the same group, put them there) 282 void colorMergeConstantLoadsNextGroup(); 283 284 // Merge SUs that have all their users into another group to the group 285 void colorMergeIfPossibleNextGroup(); 286 287 // Merge SUs that have all their users into another group to the group, 288 // but only for Reserved groups. 289 void colorMergeIfPossibleNextGroupOnlyForReserved(); 290 291 // Merge SUs that have all their users into another group to the group, 292 // but only if the group is no more than a few SUs. 293 void colorMergeIfPossibleSmallGroupsToNextGroup(); 294 295 // Divides Blocks with important size. 296 // Idea of implementation: attribute new colors depending on topdown and 297 // bottom up links to other blocks. 298 void cutHugeBlocks(); 299 300 // Put in one group all instructions with no users in this scheduling region 301 // (we'd want these groups be at the end). 302 void regroupNoUserInstructions(); 303 304 // Give Reserved color to export instructions 305 void colorExports(); 306 307 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant); 308 309 void topologicalSort(); 310 311 void scheduleInsideBlocks(); 312 313 void fillStats(); 314 }; 315 316 enum SISchedulerBlockSchedulerVariant { 317 BlockLatencyRegUsage, 318 BlockRegUsageLatency, 319 BlockRegUsage 320 }; 321 322 class SIScheduleBlockScheduler { 323 SIScheduleDAGMI *DAG; 324 SISchedulerBlockSchedulerVariant Variant; 325 std::vector<SIScheduleBlock*> Blocks; 326 327 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages; 328 std::set<unsigned> LiveRegs; 329 // Num of schedulable unscheduled blocks reading the register. 330 std::map<unsigned, unsigned> LiveRegsConsumers; 331 332 std::vector<unsigned> LastPosHighLatencyParentScheduled; 333 int LastPosWaitedHighLatency; 334 335 std::vector<SIScheduleBlock*> BlocksScheduled; 336 unsigned NumBlockScheduled; 337 std::vector<SIScheduleBlock*> ReadyBlocks; 338 339 unsigned VregCurrentUsage; 340 unsigned SregCurrentUsage; 341 342 // Currently is only approximation. 343 unsigned maxVregUsage; 344 unsigned maxSregUsage; 345 346 std::vector<unsigned> BlockNumPredsLeft; 347 std::vector<unsigned> BlockNumSuccsLeft; 348 349 public: 350 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, 351 SISchedulerBlockSchedulerVariant Variant, 352 SIScheduleBlocks BlocksStruct); 353 ~SIScheduleBlockScheduler() = default; 354 355 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; } 356 357 unsigned getVGPRUsage() { return maxVregUsage; } 358 unsigned getSGPRUsage() { return maxSregUsage; } 359 360 private: 361 struct SIBlockSchedCandidate : SISchedulerCandidate { 362 // The best Block candidate. 363 SIScheduleBlock *Block = nullptr; 364 365 bool IsHighLatency; 366 int VGPRUsageDiff; 367 unsigned NumSuccessors; 368 unsigned NumHighLatencySuccessors; 369 unsigned LastPosHighLatParentScheduled; 370 unsigned Height; 371 372 SIBlockSchedCandidate() = default; 373 374 bool isValid() const { return Block; } 375 376 // Copy the status of another candidate without changing policy. 377 void setBest(SIBlockSchedCandidate &Best) { 378 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 379 Block = Best.Block; 380 Reason = Best.Reason; 381 IsHighLatency = Best.IsHighLatency; 382 VGPRUsageDiff = Best.VGPRUsageDiff; 383 NumSuccessors = Best.NumSuccessors; 384 NumHighLatencySuccessors = Best.NumHighLatencySuccessors; 385 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled; 386 Height = Best.Height; 387 } 388 }; 389 390 bool tryCandidateLatency(SIBlockSchedCandidate &Cand, 391 SIBlockSchedCandidate &TryCand); 392 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand, 393 SIBlockSchedCandidate &TryCand); 394 SIScheduleBlock *pickBlock(); 395 396 void addLiveRegs(std::set<unsigned> &Regs); 397 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs); 398 void releaseBlockSuccs(SIScheduleBlock *Parent); 399 void blockScheduled(SIScheduleBlock *Block); 400 401 // Check register pressure change 402 // by scheduling a block with these LiveIn and LiveOut. 403 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs, 404 std::set<unsigned> &OutRegs); 405 406 void schedule(); 407 }; 408 409 struct SIScheduleBlockResult { 410 std::vector<unsigned> SUs; 411 unsigned MaxSGPRUsage; 412 unsigned MaxVGPRUsage; 413 }; 414 415 class SIScheduler { 416 SIScheduleDAGMI *DAG; 417 SIScheduleBlockCreator BlockCreator; 418 419 public: 420 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {} 421 422 ~SIScheduler() = default; 423 424 struct SIScheduleBlockResult 425 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, 426 SISchedulerBlockSchedulerVariant ScheduleVariant); 427 }; 428 429 class SIScheduleDAGMI final : public ScheduleDAGMILive { 430 const SIInstrInfo *SITII; 431 const SIRegisterInfo *SITRI; 432 433 std::vector<SUnit> SUnitsLinksBackup; 434 435 // For moveLowLatencies. After all Scheduling variants are tested. 436 std::vector<unsigned> ScheduledSUnits; 437 std::vector<unsigned> ScheduledSUnitsInv; 438 439 unsigned VGPRSetID; 440 unsigned SGPRSetID; 441 442 public: 443 SIScheduleDAGMI(MachineSchedContext *C); 444 445 ~SIScheduleDAGMI() override; 446 447 // Entry point for the schedule. 448 void schedule() override; 449 450 // To init Block's RPTracker. 451 void initRPTracker(RegPressureTracker &RPTracker) { 452 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false); 453 } 454 455 MachineBasicBlock *getBB() { return BB; } 456 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; } 457 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; } 458 LiveIntervals *getLIS() { return LIS; } 459 MachineRegisterInfo *getMRI() { return &MRI; } 460 const TargetRegisterInfo *getTRI() { return TRI; } 461 ScheduleDAGTopologicalSort *GetTopo() { return &Topo; } 462 SUnit& getEntrySU() { return EntrySU; } 463 SUnit& getExitSU() { return ExitSU; } 464 465 void restoreSULinksLeft(); 466 467 template<typename _Iterator> void fillVgprSgprCost(_Iterator First, 468 _Iterator End, 469 unsigned &VgprUsage, 470 unsigned &SgprUsage); 471 472 std::set<unsigned> getInRegs() { 473 std::set<unsigned> InRegs; 474 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) { 475 InRegs.insert(RegMaskPair.RegUnit); 476 } 477 return InRegs; 478 } 479 480 std::set<unsigned> getOutRegs() { 481 std::set<unsigned> OutRegs; 482 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) { 483 OutRegs.insert(RegMaskPair.RegUnit); 484 } 485 return OutRegs; 486 }; 487 488 unsigned getVGPRSetID() const { return VGPRSetID; } 489 unsigned getSGPRSetID() const { return SGPRSetID; } 490 491 private: 492 void topologicalSort(); 493 // After scheduling is done, improve low latency placements. 494 void moveLowLatencies(); 495 496 public: 497 // Some stats for scheduling inside blocks. 498 std::vector<unsigned> IsLowLatencySU; 499 std::vector<unsigned> LowLatencyOffset; 500 std::vector<unsigned> IsHighLatencySU; 501 // Topological sort 502 // Maps topological index to the node number. 503 std::vector<int> TopDownIndex2SU; 504 std::vector<int> BottomUpIndex2SU; 505 }; 506 507 } // end namespace llvm 508 509 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 510