1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/MachineScheduler.h" 16 #include "llvm/ADT/PriorityQueue.h" 17 #include "llvm/Analysis/AliasAnalysis.h" 18 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 19 #include "llvm/CodeGen/MachineDominators.h" 20 #include "llvm/CodeGen/MachineLoopInfo.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/Passes.h" 23 #include "llvm/CodeGen/RegisterClassInfo.h" 24 #include "llvm/CodeGen/ScheduleDFS.h" 25 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/GraphWriter.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include "llvm/Target/TargetInstrInfo.h" 32 #include <queue> 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "misched" 37 38 namespace llvm { 39 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 40 cl::desc("Force top-down list scheduling")); 41 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 42 cl::desc("Force bottom-up list scheduling")); 43 } 44 45 #ifndef NDEBUG 46 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 47 cl::desc("Pop up a window to show MISched dags after they are processed")); 48 49 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 50 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 51 52 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, 53 cl::desc("Only schedule this function")); 54 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, 55 cl::desc("Only schedule this MBB#")); 56 #else 57 static bool ViewMISchedDAGs = false; 58 #endif // NDEBUG 59 60 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, 61 cl::desc("Enable register pressure scheduling."), cl::init(true)); 62 63 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, 64 cl::desc("Enable cyclic critical path analysis."), cl::init(true)); 65 66 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, 67 cl::desc("Enable load clustering."), cl::init(true)); 68 69 // Experimental heuristics 70 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, 71 cl::desc("Enable scheduling for macro fusion."), cl::init(true)); 72 73 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, 74 cl::desc("Verify machine instrs before and after machine scheduling")); 75 76 // DAG subtrees must have at least this many nodes. 77 static const unsigned MinSubtreeSize = 8; 78 79 // Pin the vtables to this file. 80 void MachineSchedStrategy::anchor() {} 81 void ScheduleDAGMutation::anchor() {} 82 83 //===----------------------------------------------------------------------===// 84 // Machine Instruction Scheduling Pass and Registry 85 //===----------------------------------------------------------------------===// 86 87 MachineSchedContext::MachineSchedContext(): 88 MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) { 89 RegClassInfo = new RegisterClassInfo(); 90 } 91 92 MachineSchedContext::~MachineSchedContext() { 93 delete RegClassInfo; 94 } 95 96 namespace { 97 /// Base class for a machine scheduler class that can run at any point. 98 class MachineSchedulerBase : public MachineSchedContext, 99 public MachineFunctionPass { 100 public: 101 MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} 102 103 void print(raw_ostream &O, const Module* = nullptr) const override; 104 105 protected: 106 void scheduleRegions(ScheduleDAGInstrs &Scheduler); 107 }; 108 109 /// MachineScheduler runs after coalescing and before register allocation. 110 class MachineScheduler : public MachineSchedulerBase { 111 public: 112 MachineScheduler(); 113 114 void getAnalysisUsage(AnalysisUsage &AU) const override; 115 116 bool runOnMachineFunction(MachineFunction&) override; 117 118 static char ID; // Class identification, replacement for typeinfo 119 120 protected: 121 ScheduleDAGInstrs *createMachineScheduler(); 122 }; 123 124 /// PostMachineScheduler runs after shortly before code emission. 125 class PostMachineScheduler : public MachineSchedulerBase { 126 public: 127 PostMachineScheduler(); 128 129 void getAnalysisUsage(AnalysisUsage &AU) const override; 130 131 bool runOnMachineFunction(MachineFunction&) override; 132 133 static char ID; // Class identification, replacement for typeinfo 134 135 protected: 136 ScheduleDAGInstrs *createPostMachineScheduler(); 137 }; 138 } // namespace 139 140 char MachineScheduler::ID = 0; 141 142 char &llvm::MachineSchedulerID = MachineScheduler::ID; 143 144 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 145 "Machine Instruction Scheduler", false, false) 146 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 147 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 148 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 149 INITIALIZE_PASS_END(MachineScheduler, "misched", 150 "Machine Instruction Scheduler", false, false) 151 152 MachineScheduler::MachineScheduler() 153 : MachineSchedulerBase(ID) { 154 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 155 } 156 157 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 158 AU.setPreservesCFG(); 159 AU.addRequiredID(MachineDominatorsID); 160 AU.addRequired<MachineLoopInfo>(); 161 AU.addRequired<AliasAnalysis>(); 162 AU.addRequired<TargetPassConfig>(); 163 AU.addRequired<SlotIndexes>(); 164 AU.addPreserved<SlotIndexes>(); 165 AU.addRequired<LiveIntervals>(); 166 AU.addPreserved<LiveIntervals>(); 167 MachineFunctionPass::getAnalysisUsage(AU); 168 } 169 170 char PostMachineScheduler::ID = 0; 171 172 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; 173 174 INITIALIZE_PASS(PostMachineScheduler, "postmisched", 175 "PostRA Machine Instruction Scheduler", false, false) 176 177 PostMachineScheduler::PostMachineScheduler() 178 : MachineSchedulerBase(ID) { 179 initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); 180 } 181 182 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 183 AU.setPreservesCFG(); 184 AU.addRequiredID(MachineDominatorsID); 185 AU.addRequired<MachineLoopInfo>(); 186 AU.addRequired<TargetPassConfig>(); 187 MachineFunctionPass::getAnalysisUsage(AU); 188 } 189 190 MachinePassRegistry MachineSchedRegistry::Registry; 191 192 /// A dummy default scheduler factory indicates whether the scheduler 193 /// is overridden on the command line. 194 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 195 return nullptr; 196 } 197 198 /// MachineSchedOpt allows command line selection of the scheduler. 199 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 200 RegisterPassParser<MachineSchedRegistry> > 201 MachineSchedOpt("misched", 202 cl::init(&useDefaultMachineSched), cl::Hidden, 203 cl::desc("Machine instruction scheduler to use")); 204 205 static MachineSchedRegistry 206 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 207 useDefaultMachineSched); 208 209 /// Forward declare the standard machine scheduler. This will be used as the 210 /// default scheduler if the target does not set a default. 211 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); 212 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C); 213 214 /// Decrement this iterator until reaching the top or a non-debug instr. 215 static MachineBasicBlock::const_iterator 216 priorNonDebug(MachineBasicBlock::const_iterator I, 217 MachineBasicBlock::const_iterator Beg) { 218 assert(I != Beg && "reached the top of the region, cannot decrement"); 219 while (--I != Beg) { 220 if (!I->isDebugValue()) 221 break; 222 } 223 return I; 224 } 225 226 /// Non-const version. 227 static MachineBasicBlock::iterator 228 priorNonDebug(MachineBasicBlock::iterator I, 229 MachineBasicBlock::const_iterator Beg) { 230 return const_cast<MachineInstr*>( 231 &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)); 232 } 233 234 /// If this iterator is a debug value, increment until reaching the End or a 235 /// non-debug instruction. 236 static MachineBasicBlock::const_iterator 237 nextIfDebug(MachineBasicBlock::const_iterator I, 238 MachineBasicBlock::const_iterator End) { 239 for(; I != End; ++I) { 240 if (!I->isDebugValue()) 241 break; 242 } 243 return I; 244 } 245 246 /// Non-const version. 247 static MachineBasicBlock::iterator 248 nextIfDebug(MachineBasicBlock::iterator I, 249 MachineBasicBlock::const_iterator End) { 250 // Cast the return value to nonconst MachineInstr, then cast to an 251 // instr_iterator, which does not check for null, finally return a 252 // bundle_iterator. 253 return MachineBasicBlock::instr_iterator( 254 const_cast<MachineInstr*>( 255 &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); 256 } 257 258 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller. 259 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { 260 // Select the scheduler, or set the default. 261 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 262 if (Ctor != useDefaultMachineSched) 263 return Ctor(this); 264 265 // Get the default scheduler set by the target for this function. 266 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); 267 if (Scheduler) 268 return Scheduler; 269 270 // Default to GenericScheduler. 271 return createGenericSchedLive(this); 272 } 273 274 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by 275 /// the caller. We don't have a command line option to override the postRA 276 /// scheduler. The Target must configure it. 277 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { 278 // Get the postRA scheduler set by the target for this function. 279 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); 280 if (Scheduler) 281 return Scheduler; 282 283 // Default to GenericScheduler. 284 return createGenericSchedPostRA(this); 285 } 286 287 /// Top-level MachineScheduler pass driver. 288 /// 289 /// Visit blocks in function order. Divide each block into scheduling regions 290 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 291 /// consistent with the DAG builder, which traverses the interior of the 292 /// scheduling regions bottom-up. 293 /// 294 /// This design avoids exposing scheduling boundaries to the DAG builder, 295 /// simplifying the DAG builder's support for "special" target instructions. 296 /// At the same time the design allows target schedulers to operate across 297 /// scheduling boundaries, for example to bundle the boudary instructions 298 /// without reordering them. This creates complexity, because the target 299 /// scheduler must update the RegionBegin and RegionEnd positions cached by 300 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 301 /// design would be to split blocks at scheduling boundaries, but LLVM has a 302 /// general bias against block splitting purely for implementation simplicity. 303 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 304 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 305 306 // Initialize the context of the pass. 307 MF = &mf; 308 MLI = &getAnalysis<MachineLoopInfo>(); 309 MDT = &getAnalysis<MachineDominatorTree>(); 310 PassConfig = &getAnalysis<TargetPassConfig>(); 311 AA = &getAnalysis<AliasAnalysis>(); 312 313 LIS = &getAnalysis<LiveIntervals>(); 314 315 if (VerifyScheduling) { 316 DEBUG(LIS->dump()); 317 MF->verify(this, "Before machine scheduling."); 318 } 319 RegClassInfo->runOnMachineFunction(*MF); 320 321 // Instantiate the selected scheduler for this target, function, and 322 // optimization level. 323 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); 324 scheduleRegions(*Scheduler); 325 326 DEBUG(LIS->dump()); 327 if (VerifyScheduling) 328 MF->verify(this, "After machine scheduling."); 329 return true; 330 } 331 332 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { 333 if (skipOptnoneFunction(*mf.getFunction())) 334 return false; 335 336 DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); 337 338 // Initialize the context of the pass. 339 MF = &mf; 340 PassConfig = &getAnalysis<TargetPassConfig>(); 341 342 if (VerifyScheduling) 343 MF->verify(this, "Before post machine scheduling."); 344 345 // Instantiate the selected scheduler for this target, function, and 346 // optimization level. 347 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); 348 scheduleRegions(*Scheduler); 349 350 if (VerifyScheduling) 351 MF->verify(this, "After post machine scheduling."); 352 return true; 353 } 354 355 /// Return true of the given instruction should not be included in a scheduling 356 /// region. 357 /// 358 /// MachineScheduler does not currently support scheduling across calls. To 359 /// handle calls, the DAG builder needs to be modified to create register 360 /// anti/output dependencies on the registers clobbered by the call's regmask 361 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents 362 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce 363 /// the boundary, but there would be no benefit to postRA scheduling across 364 /// calls this late anyway. 365 static bool isSchedBoundary(MachineBasicBlock::iterator MI, 366 MachineBasicBlock *MBB, 367 MachineFunction *MF, 368 const TargetInstrInfo *TII, 369 bool IsPostRA) { 370 return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); 371 } 372 373 /// Main driver for both MachineScheduler and PostMachineScheduler. 374 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { 375 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 376 bool IsPostRA = Scheduler.isPostRA(); 377 378 // Visit all machine basic blocks. 379 // 380 // TODO: Visit blocks in global postorder or postorder within the bottom-up 381 // loop tree. Then we can optionally compute global RegPressure. 382 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 383 MBB != MBBEnd; ++MBB) { 384 385 Scheduler.startBlock(MBB); 386 387 #ifndef NDEBUG 388 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) 389 continue; 390 if (SchedOnlyBlock.getNumOccurrences() 391 && (int)SchedOnlyBlock != MBB->getNumber()) 392 continue; 393 #endif 394 395 // Break the block into scheduling regions [I, RegionEnd), and schedule each 396 // region as soon as it is discovered. RegionEnd points the scheduling 397 // boundary at the bottom of the region. The DAG does not include RegionEnd, 398 // but the region does (i.e. the next RegionEnd is above the previous 399 // RegionBegin). If the current block has no terminator then RegionEnd == 400 // MBB->end() for the bottom region. 401 // 402 // The Scheduler may insert instructions during either schedule() or 403 // exitRegion(), even for empty regions. So the local iterators 'I' and 404 // 'RegionEnd' are invalid across these calls. 405 // 406 // MBB::size() uses instr_iterator to count. Here we need a bundle to count 407 // as a single instruction. 408 unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end()); 409 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 410 RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { 411 412 // Avoid decrementing RegionEnd for blocks with no terminator. 413 if (RegionEnd != MBB->end() || 414 isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) { 415 --RegionEnd; 416 // Count the boundary instruction. 417 --RemainingInstrs; 418 } 419 420 // The next region starts above the previous region. Look backward in the 421 // instruction stream until we find the nearest boundary. 422 unsigned NumRegionInstrs = 0; 423 MachineBasicBlock::iterator I = RegionEnd; 424 for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { 425 if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA)) 426 break; 427 } 428 // Notify the scheduler of the region, even if we may skip scheduling 429 // it. Perhaps it still needs to be bundled. 430 Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs); 431 432 // Skip empty scheduling regions (0 or 1 schedulable instructions). 433 if (I == RegionEnd || I == std::prev(RegionEnd)) { 434 // Close the current region. Bundle the terminator if needed. 435 // This invalidates 'RegionEnd' and 'I'. 436 Scheduler.exitRegion(); 437 continue; 438 } 439 DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "") 440 << "MI Scheduling **********\n"); 441 DEBUG(dbgs() << MF->getName() 442 << ":BB#" << MBB->getNumber() << " " << MBB->getName() 443 << "\n From: " << *I << " To: "; 444 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 445 else dbgs() << "End"; 446 dbgs() << " RegionInstrs: " << NumRegionInstrs 447 << " Remaining: " << RemainingInstrs << "\n"); 448 449 // Schedule a region: possibly reorder instructions. 450 // This invalidates 'RegionEnd' and 'I'. 451 Scheduler.schedule(); 452 453 // Close the current region. 454 Scheduler.exitRegion(); 455 456 // Scheduling has invalidated the current iterator 'I'. Ask the 457 // scheduler for the top of it's scheduled region. 458 RegionEnd = Scheduler.begin(); 459 } 460 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 461 Scheduler.finishBlock(); 462 if (Scheduler.isPostRA()) { 463 // FIXME: Ideally, no further passes should rely on kill flags. However, 464 // thumb2 size reduction is currently an exception. 465 Scheduler.fixupKills(MBB); 466 } 467 } 468 Scheduler.finalizeSchedule(); 469 } 470 471 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { 472 // unimplemented 473 } 474 475 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 476 void ReadyQueue::dump() { 477 dbgs() << Name << ": "; 478 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 479 dbgs() << Queue[i]->NodeNum << " "; 480 dbgs() << "\n"; 481 } 482 #endif 483 484 //===----------------------------------------------------------------------===// 485 // ScheduleDAGMI - Basic machine instruction scheduling. This is 486 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for 487 // virtual registers. 488 // ===----------------------------------------------------------------------===/ 489 490 // Provide a vtable anchor. 491 ScheduleDAGMI::~ScheduleDAGMI() { 492 } 493 494 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { 495 return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); 496 } 497 498 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { 499 if (SuccSU != &ExitSU) { 500 // Do not use WillCreateCycle, it assumes SD scheduling. 501 // If Pred is reachable from Succ, then the edge creates a cycle. 502 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 503 return false; 504 Topo.AddPred(SuccSU, PredDep.getSUnit()); 505 } 506 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 507 // Return true regardless of whether a new edge needed to be inserted. 508 return true; 509 } 510 511 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 512 /// NumPredsLeft reaches zero, release the successor node. 513 /// 514 /// FIXME: Adjust SuccSU height based on MinLatency. 515 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 516 SUnit *SuccSU = SuccEdge->getSUnit(); 517 518 if (SuccEdge->isWeak()) { 519 --SuccSU->WeakPredsLeft; 520 if (SuccEdge->isCluster()) 521 NextClusterSucc = SuccSU; 522 return; 523 } 524 #ifndef NDEBUG 525 if (SuccSU->NumPredsLeft == 0) { 526 dbgs() << "*** Scheduling failed! ***\n"; 527 SuccSU->dump(this); 528 dbgs() << " has been released too many times!\n"; 529 llvm_unreachable(nullptr); 530 } 531 #endif 532 --SuccSU->NumPredsLeft; 533 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 534 SchedImpl->releaseTopNode(SuccSU); 535 } 536 537 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 538 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 539 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 540 I != E; ++I) { 541 releaseSucc(SU, &*I); 542 } 543 } 544 545 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 546 /// NumSuccsLeft reaches zero, release the predecessor node. 547 /// 548 /// FIXME: Adjust PredSU height based on MinLatency. 549 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 550 SUnit *PredSU = PredEdge->getSUnit(); 551 552 if (PredEdge->isWeak()) { 553 --PredSU->WeakSuccsLeft; 554 if (PredEdge->isCluster()) 555 NextClusterPred = PredSU; 556 return; 557 } 558 #ifndef NDEBUG 559 if (PredSU->NumSuccsLeft == 0) { 560 dbgs() << "*** Scheduling failed! ***\n"; 561 PredSU->dump(this); 562 dbgs() << " has been released too many times!\n"; 563 llvm_unreachable(nullptr); 564 } 565 #endif 566 --PredSU->NumSuccsLeft; 567 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 568 SchedImpl->releaseBottomNode(PredSU); 569 } 570 571 /// releasePredecessors - Call releasePred on each of SU's predecessors. 572 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 573 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 574 I != E; ++I) { 575 releasePred(SU, &*I); 576 } 577 } 578 579 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 580 /// crossing a scheduling boundary. [begin, end) includes all instructions in 581 /// the region, including the boundary itself and single-instruction regions 582 /// that don't get scheduled. 583 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 584 MachineBasicBlock::iterator begin, 585 MachineBasicBlock::iterator end, 586 unsigned regioninstrs) 587 { 588 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); 589 590 SchedImpl->initPolicy(begin, end, regioninstrs); 591 } 592 593 /// This is normally called from the main scheduler loop but may also be invoked 594 /// by the scheduling strategy to perform additional code motion. 595 void ScheduleDAGMI::moveInstruction( 596 MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { 597 // Advance RegionBegin if the first instruction moves down. 598 if (&*RegionBegin == MI) 599 ++RegionBegin; 600 601 // Update the instruction stream. 602 BB->splice(InsertPos, BB, MI); 603 604 // Update LiveIntervals 605 if (LIS) 606 LIS->handleMove(MI, /*UpdateFlags=*/true); 607 608 // Recede RegionBegin if an instruction moves above the first. 609 if (RegionBegin == InsertPos) 610 RegionBegin = MI; 611 } 612 613 bool ScheduleDAGMI::checkSchedLimit() { 614 #ifndef NDEBUG 615 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 616 CurrentTop = CurrentBottom; 617 return false; 618 } 619 ++NumInstrsScheduled; 620 #endif 621 return true; 622 } 623 624 /// Per-region scheduling driver, called back from 625 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that 626 /// does not consider liveness or register pressure. It is useful for PostRA 627 /// scheduling and potentially other custom schedulers. 628 void ScheduleDAGMI::schedule() { 629 // Build the DAG. 630 buildSchedGraph(AA); 631 632 Topo.InitDAGTopologicalSorting(); 633 634 postprocessDAG(); 635 636 SmallVector<SUnit*, 8> TopRoots, BotRoots; 637 findRootsAndBiasEdges(TopRoots, BotRoots); 638 639 // Initialize the strategy before modifying the DAG. 640 // This may initialize a DFSResult to be used for queue priority. 641 SchedImpl->initialize(this); 642 643 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 644 SUnits[su].dumpAll(this)); 645 if (ViewMISchedDAGs) viewGraph(); 646 647 // Initialize ready queues now that the DAG and priority data are finalized. 648 initQueues(TopRoots, BotRoots); 649 650 bool IsTopNode = false; 651 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 652 assert(!SU->isScheduled && "Node already scheduled"); 653 if (!checkSchedLimit()) 654 break; 655 656 MachineInstr *MI = SU->getInstr(); 657 if (IsTopNode) { 658 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 659 if (&*CurrentTop == MI) 660 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 661 else 662 moveInstruction(MI, CurrentTop); 663 } 664 else { 665 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 666 MachineBasicBlock::iterator priorII = 667 priorNonDebug(CurrentBottom, CurrentTop); 668 if (&*priorII == MI) 669 CurrentBottom = priorII; 670 else { 671 if (&*CurrentTop == MI) 672 CurrentTop = nextIfDebug(++CurrentTop, priorII); 673 moveInstruction(MI, CurrentBottom); 674 CurrentBottom = MI; 675 } 676 } 677 updateQueues(SU, IsTopNode); 678 679 // Notify the scheduling strategy after updating the DAG. 680 SchedImpl->schedNode(SU, IsTopNode); 681 } 682 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 683 684 placeDebugValues(); 685 686 DEBUG({ 687 unsigned BBNum = begin()->getParent()->getNumber(); 688 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 689 dumpSchedule(); 690 dbgs() << '\n'; 691 }); 692 } 693 694 /// Apply each ScheduleDAGMutation step in order. 695 void ScheduleDAGMI::postprocessDAG() { 696 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 697 Mutations[i]->apply(this); 698 } 699 } 700 701 void ScheduleDAGMI:: 702 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 703 SmallVectorImpl<SUnit*> &BotRoots) { 704 for (std::vector<SUnit>::iterator 705 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 706 SUnit *SU = &(*I); 707 assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); 708 709 // Order predecessors so DFSResult follows the critical path. 710 SU->biasCriticalPath(); 711 712 // A SUnit is ready to top schedule if it has no predecessors. 713 if (!I->NumPredsLeft) 714 TopRoots.push_back(SU); 715 // A SUnit is ready to bottom schedule if it has no successors. 716 if (!I->NumSuccsLeft) 717 BotRoots.push_back(SU); 718 } 719 ExitSU.biasCriticalPath(); 720 } 721 722 /// Identify DAG roots and setup scheduler queues. 723 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, 724 ArrayRef<SUnit*> BotRoots) { 725 NextClusterSucc = nullptr; 726 NextClusterPred = nullptr; 727 728 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 729 // 730 // Nodes with unreleased weak edges can still be roots. 731 // Release top roots in forward order. 732 for (SmallVectorImpl<SUnit*>::const_iterator 733 I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { 734 SchedImpl->releaseTopNode(*I); 735 } 736 // Release bottom roots in reverse order so the higher priority nodes appear 737 // first. This is more natural and slightly more efficient. 738 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 739 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { 740 SchedImpl->releaseBottomNode(*I); 741 } 742 743 releaseSuccessors(&EntrySU); 744 releasePredecessors(&ExitSU); 745 746 SchedImpl->registerRoots(); 747 748 // Advance past initial DebugValues. 749 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 750 CurrentBottom = RegionEnd; 751 } 752 753 /// Update scheduler queues after scheduling an instruction. 754 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 755 // Release dependent instructions for scheduling. 756 if (IsTopNode) 757 releaseSuccessors(SU); 758 else 759 releasePredecessors(SU); 760 761 SU->isScheduled = true; 762 } 763 764 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 765 void ScheduleDAGMI::placeDebugValues() { 766 // If first instruction was a DBG_VALUE then put it back. 767 if (FirstDbgValue) { 768 BB->splice(RegionBegin, BB, FirstDbgValue); 769 RegionBegin = FirstDbgValue; 770 } 771 772 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 773 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 774 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); 775 MachineInstr *DbgValue = P.first; 776 MachineBasicBlock::iterator OrigPrevMI = P.second; 777 if (&*RegionBegin == DbgValue) 778 ++RegionBegin; 779 BB->splice(++OrigPrevMI, BB, DbgValue); 780 if (OrigPrevMI == std::prev(RegionEnd)) 781 RegionEnd = DbgValue; 782 } 783 DbgValues.clear(); 784 FirstDbgValue = nullptr; 785 } 786 787 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 788 void ScheduleDAGMI::dumpSchedule() const { 789 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 790 if (SUnit *SU = getSUnit(&(*MI))) 791 SU->dump(this); 792 else 793 dbgs() << "Missing SUnit\n"; 794 } 795 } 796 #endif 797 798 //===----------------------------------------------------------------------===// 799 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals 800 // preservation. 801 //===----------------------------------------------------------------------===// 802 803 ScheduleDAGMILive::~ScheduleDAGMILive() { 804 delete DFSResult; 805 } 806 807 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 808 /// crossing a scheduling boundary. [begin, end) includes all instructions in 809 /// the region, including the boundary itself and single-instruction regions 810 /// that don't get scheduled. 811 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, 812 MachineBasicBlock::iterator begin, 813 MachineBasicBlock::iterator end, 814 unsigned regioninstrs) 815 { 816 // ScheduleDAGMI initializes SchedImpl's per-region policy. 817 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); 818 819 // For convenience remember the end of the liveness region. 820 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); 821 822 SUPressureDiffs.clear(); 823 824 ShouldTrackPressure = SchedImpl->shouldTrackPressure(); 825 } 826 827 // Setup the register pressure trackers for the top scheduled top and bottom 828 // scheduled regions. 829 void ScheduleDAGMILive::initRegPressure() { 830 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 831 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 832 833 // Close the RPTracker to finalize live ins. 834 RPTracker.closeRegion(); 835 836 DEBUG(RPTracker.dump()); 837 838 // Initialize the live ins and live outs. 839 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 840 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 841 842 // Close one end of the tracker so we can call 843 // getMaxUpward/DownwardPressureDelta before advancing across any 844 // instructions. This converts currently live regs into live ins/outs. 845 TopRPTracker.closeTop(); 846 BotRPTracker.closeBottom(); 847 848 BotRPTracker.initLiveThru(RPTracker); 849 if (!BotRPTracker.getLiveThru().empty()) { 850 TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); 851 DEBUG(dbgs() << "Live Thru: "; 852 dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); 853 }; 854 855 // For each live out vreg reduce the pressure change associated with other 856 // uses of the same vreg below the live-out reaching def. 857 updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); 858 859 // Account for liveness generated by the region boundary. 860 if (LiveRegionEnd != RegionEnd) { 861 SmallVector<unsigned, 8> LiveUses; 862 BotRPTracker.recede(&LiveUses); 863 updatePressureDiffs(LiveUses); 864 } 865 866 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 867 868 // Cache the list of excess pressure sets in this region. This will also track 869 // the max pressure in the scheduled code for these sets. 870 RegionCriticalPSets.clear(); 871 const std::vector<unsigned> &RegionPressure = 872 RPTracker.getPressure().MaxSetPressure; 873 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 874 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); 875 if (RegionPressure[i] > Limit) { 876 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 877 << " Limit " << Limit 878 << " Actual " << RegionPressure[i] << "\n"); 879 RegionCriticalPSets.push_back(PressureChange(i)); 880 } 881 } 882 DEBUG(dbgs() << "Excess PSets: "; 883 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 884 dbgs() << TRI->getRegPressureSetName( 885 RegionCriticalPSets[i].getPSet()) << " "; 886 dbgs() << "\n"); 887 } 888 889 void ScheduleDAGMILive:: 890 updateScheduledPressure(const SUnit *SU, 891 const std::vector<unsigned> &NewMaxPressure) { 892 const PressureDiff &PDiff = getPressureDiff(SU); 893 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); 894 for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); 895 I != E; ++I) { 896 if (!I->isValid()) 897 break; 898 unsigned ID = I->getPSet(); 899 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) 900 ++CritIdx; 901 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { 902 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() 903 && NewMaxPressure[ID] <= INT16_MAX) 904 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); 905 } 906 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); 907 if (NewMaxPressure[ID] >= Limit - 2) { 908 DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " 909 << NewMaxPressure[ID] << " > " << Limit << "(+ " 910 << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); 911 } 912 } 913 } 914 915 /// Update the PressureDiff array for liveness after scheduling this 916 /// instruction. 917 void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { 918 for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { 919 /// FIXME: Currently assuming single-use physregs. 920 unsigned Reg = LiveUses[LUIdx]; 921 DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); 922 if (!TRI->isVirtualRegister(Reg)) 923 continue; 924 925 // This may be called before CurrentBottom has been initialized. However, 926 // BotRPTracker must have a valid position. We want the value live into the 927 // instruction or live out of the block, so ask for the previous 928 // instruction's live-out. 929 const LiveInterval &LI = LIS->getInterval(Reg); 930 VNInfo *VNI; 931 MachineBasicBlock::const_iterator I = 932 nextIfDebug(BotRPTracker.getPos(), BB->end()); 933 if (I == BB->end()) 934 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 935 else { 936 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); 937 VNI = LRQ.valueIn(); 938 } 939 // RegisterPressureTracker guarantees that readsReg is true for LiveUses. 940 assert(VNI && "No live value at use."); 941 for (VReg2UseMap::iterator 942 UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { 943 SUnit *SU = UI->SU; 944 DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " 945 << *SU->getInstr()); 946 // If this use comes before the reaching def, it cannot be a last use, so 947 // descrease its pressure change. 948 if (!SU->isScheduled && SU != &ExitSU) { 949 LiveQueryResult LRQ 950 = LI.Query(LIS->getInstructionIndex(SU->getInstr())); 951 if (LRQ.valueIn() == VNI) 952 getPressureDiff(SU).addPressureChange(Reg, true, &MRI); 953 } 954 } 955 } 956 } 957 958 /// schedule - Called back from MachineScheduler::runOnMachineFunction 959 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 960 /// only includes instructions that have DAG nodes, not scheduling boundaries. 961 /// 962 /// This is a skeletal driver, with all the functionality pushed into helpers, 963 /// so that it can be easilly extended by experimental schedulers. Generally, 964 /// implementing MachineSchedStrategy should be sufficient to implement a new 965 /// scheduling algorithm. However, if a scheduler further subclasses 966 /// ScheduleDAGMILive then it will want to override this virtual method in order 967 /// to update any specialized state. 968 void ScheduleDAGMILive::schedule() { 969 buildDAGWithRegPressure(); 970 971 Topo.InitDAGTopologicalSorting(); 972 973 postprocessDAG(); 974 975 SmallVector<SUnit*, 8> TopRoots, BotRoots; 976 findRootsAndBiasEdges(TopRoots, BotRoots); 977 978 // Initialize the strategy before modifying the DAG. 979 // This may initialize a DFSResult to be used for queue priority. 980 SchedImpl->initialize(this); 981 982 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 983 SUnits[su].dumpAll(this)); 984 if (ViewMISchedDAGs) viewGraph(); 985 986 // Initialize ready queues now that the DAG and priority data are finalized. 987 initQueues(TopRoots, BotRoots); 988 989 if (ShouldTrackPressure) { 990 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); 991 TopRPTracker.setPos(CurrentTop); 992 } 993 994 bool IsTopNode = false; 995 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 996 assert(!SU->isScheduled && "Node already scheduled"); 997 if (!checkSchedLimit()) 998 break; 999 1000 scheduleMI(SU, IsTopNode); 1001 1002 updateQueues(SU, IsTopNode); 1003 1004 if (DFSResult) { 1005 unsigned SubtreeID = DFSResult->getSubtreeID(SU); 1006 if (!ScheduledTrees.test(SubtreeID)) { 1007 ScheduledTrees.set(SubtreeID); 1008 DFSResult->scheduleTree(SubtreeID); 1009 SchedImpl->scheduleTree(SubtreeID); 1010 } 1011 } 1012 1013 // Notify the scheduling strategy after updating the DAG. 1014 SchedImpl->schedNode(SU, IsTopNode); 1015 } 1016 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 1017 1018 placeDebugValues(); 1019 1020 DEBUG({ 1021 unsigned BBNum = begin()->getParent()->getNumber(); 1022 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 1023 dumpSchedule(); 1024 dbgs() << '\n'; 1025 }); 1026 } 1027 1028 /// Build the DAG and setup three register pressure trackers. 1029 void ScheduleDAGMILive::buildDAGWithRegPressure() { 1030 if (!ShouldTrackPressure) { 1031 RPTracker.reset(); 1032 RegionCriticalPSets.clear(); 1033 buildSchedGraph(AA); 1034 return; 1035 } 1036 1037 // Initialize the register pressure tracker used by buildSchedGraph. 1038 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, 1039 /*TrackUntiedDefs=*/true); 1040 1041 // Account for liveness generate by the region boundary. 1042 if (LiveRegionEnd != RegionEnd) 1043 RPTracker.recede(); 1044 1045 // Build the DAG, and compute current register pressure. 1046 buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); 1047 1048 // Initialize top/bottom trackers after computing region pressure. 1049 initRegPressure(); 1050 } 1051 1052 void ScheduleDAGMILive::computeDFSResult() { 1053 if (!DFSResult) 1054 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); 1055 DFSResult->clear(); 1056 ScheduledTrees.clear(); 1057 DFSResult->resize(SUnits.size()); 1058 DFSResult->compute(SUnits); 1059 ScheduledTrees.resize(DFSResult->getNumSubtrees()); 1060 } 1061 1062 /// Compute the max cyclic critical path through the DAG. The scheduling DAG 1063 /// only provides the critical path for single block loops. To handle loops that 1064 /// span blocks, we could use the vreg path latencies provided by 1065 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently 1066 /// available for use in the scheduler. 1067 /// 1068 /// The cyclic path estimation identifies a def-use pair that crosses the back 1069 /// edge and considers the depth and height of the nodes. For example, consider 1070 /// the following instruction sequence where each instruction has unit latency 1071 /// and defines an epomymous virtual register: 1072 /// 1073 /// a->b(a,c)->c(b)->d(c)->exit 1074 /// 1075 /// The cyclic critical path is a two cycles: b->c->b 1076 /// The acyclic critical path is four cycles: a->b->c->d->exit 1077 /// LiveOutHeight = height(c) = len(c->d->exit) = 2 1078 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 1079 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 1080 /// LiveInDepth = depth(b) = len(a->b) = 1 1081 /// 1082 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2 1083 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2 1084 /// CyclicCriticalPath = min(2, 2) = 2 1085 /// 1086 /// This could be relevant to PostRA scheduling, but is currently implemented 1087 /// assuming LiveIntervals. 1088 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { 1089 // This only applies to single block loop. 1090 if (!BB->isSuccessor(BB)) 1091 return 0; 1092 1093 unsigned MaxCyclicLatency = 0; 1094 // Visit each live out vreg def to find def/use pairs that cross iterations. 1095 ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs; 1096 for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end(); 1097 RI != RE; ++RI) { 1098 unsigned Reg = *RI; 1099 if (!TRI->isVirtualRegister(Reg)) 1100 continue; 1101 const LiveInterval &LI = LIS->getInterval(Reg); 1102 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 1103 if (!DefVNI) 1104 continue; 1105 1106 MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); 1107 const SUnit *DefSU = getSUnit(DefMI); 1108 if (!DefSU) 1109 continue; 1110 1111 unsigned LiveOutHeight = DefSU->getHeight(); 1112 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; 1113 // Visit all local users of the vreg def. 1114 for (VReg2UseMap::iterator 1115 UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { 1116 if (UI->SU == &ExitSU) 1117 continue; 1118 1119 // Only consider uses of the phi. 1120 LiveQueryResult LRQ = 1121 LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); 1122 if (!LRQ.valueIn()->isPHIDef()) 1123 continue; 1124 1125 // Assume that a path spanning two iterations is a cycle, which could 1126 // overestimate in strange cases. This allows cyclic latency to be 1127 // estimated as the minimum slack of the vreg's depth or height. 1128 unsigned CyclicLatency = 0; 1129 if (LiveOutDepth > UI->SU->getDepth()) 1130 CyclicLatency = LiveOutDepth - UI->SU->getDepth(); 1131 1132 unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; 1133 if (LiveInHeight > LiveOutHeight) { 1134 if (LiveInHeight - LiveOutHeight < CyclicLatency) 1135 CyclicLatency = LiveInHeight - LiveOutHeight; 1136 } 1137 else 1138 CyclicLatency = 0; 1139 1140 DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" 1141 << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); 1142 if (CyclicLatency > MaxCyclicLatency) 1143 MaxCyclicLatency = CyclicLatency; 1144 } 1145 } 1146 DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); 1147 return MaxCyclicLatency; 1148 } 1149 1150 /// Move an instruction and update register pressure. 1151 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { 1152 // Move the instruction to its new location in the instruction stream. 1153 MachineInstr *MI = SU->getInstr(); 1154 1155 if (IsTopNode) { 1156 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 1157 if (&*CurrentTop == MI) 1158 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 1159 else { 1160 moveInstruction(MI, CurrentTop); 1161 TopRPTracker.setPos(MI); 1162 } 1163 1164 if (ShouldTrackPressure) { 1165 // Update top scheduled pressure. 1166 TopRPTracker.advance(); 1167 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 1168 updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); 1169 } 1170 } 1171 else { 1172 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 1173 MachineBasicBlock::iterator priorII = 1174 priorNonDebug(CurrentBottom, CurrentTop); 1175 if (&*priorII == MI) 1176 CurrentBottom = priorII; 1177 else { 1178 if (&*CurrentTop == MI) { 1179 CurrentTop = nextIfDebug(++CurrentTop, priorII); 1180 TopRPTracker.setPos(CurrentTop); 1181 } 1182 moveInstruction(MI, CurrentBottom); 1183 CurrentBottom = MI; 1184 } 1185 if (ShouldTrackPressure) { 1186 // Update bottom scheduled pressure. 1187 SmallVector<unsigned, 8> LiveUses; 1188 BotRPTracker.recede(&LiveUses); 1189 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 1190 updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); 1191 updatePressureDiffs(LiveUses); 1192 } 1193 } 1194 } 1195 1196 //===----------------------------------------------------------------------===// 1197 // LoadClusterMutation - DAG post-processing to cluster loads. 1198 //===----------------------------------------------------------------------===// 1199 1200 namespace { 1201 /// \brief Post-process the DAG to create cluster edges between neighboring 1202 /// loads. 1203 class LoadClusterMutation : public ScheduleDAGMutation { 1204 struct LoadInfo { 1205 SUnit *SU; 1206 unsigned BaseReg; 1207 unsigned Offset; 1208 LoadInfo(SUnit *su, unsigned reg, unsigned ofs) 1209 : SU(su), BaseReg(reg), Offset(ofs) {} 1210 1211 bool operator<(const LoadInfo &RHS) const { 1212 return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset); 1213 } 1214 }; 1215 1216 const TargetInstrInfo *TII; 1217 const TargetRegisterInfo *TRI; 1218 public: 1219 LoadClusterMutation(const TargetInstrInfo *tii, 1220 const TargetRegisterInfo *tri) 1221 : TII(tii), TRI(tri) {} 1222 1223 void apply(ScheduleDAGMI *DAG) override; 1224 protected: 1225 void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); 1226 }; 1227 } // anonymous 1228 1229 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, 1230 ScheduleDAGMI *DAG) { 1231 SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; 1232 for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { 1233 SUnit *SU = Loads[Idx]; 1234 unsigned BaseReg; 1235 unsigned Offset; 1236 if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) 1237 LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); 1238 } 1239 if (LoadRecords.size() < 2) 1240 return; 1241 std::sort(LoadRecords.begin(), LoadRecords.end()); 1242 unsigned ClusterLength = 1; 1243 for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { 1244 if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { 1245 ClusterLength = 1; 1246 continue; 1247 } 1248 1249 SUnit *SUa = LoadRecords[Idx].SU; 1250 SUnit *SUb = LoadRecords[Idx+1].SU; 1251 if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) 1252 && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 1253 1254 DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" 1255 << SUb->NodeNum << ")\n"); 1256 // Copy successor edges from SUa to SUb. Interleaving computation 1257 // dependent on SUa can prevent load combining due to register reuse. 1258 // Predecessor edges do not need to be copied from SUb to SUa since nearby 1259 // loads should have effectively the same inputs. 1260 for (SUnit::const_succ_iterator 1261 SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { 1262 if (SI->getSUnit() == SUb) 1263 continue; 1264 DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); 1265 DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); 1266 } 1267 ++ClusterLength; 1268 } 1269 else 1270 ClusterLength = 1; 1271 } 1272 } 1273 1274 /// \brief Callback from DAG postProcessing to create cluster edges for loads. 1275 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { 1276 // Map DAG NodeNum to store chain ID. 1277 DenseMap<unsigned, unsigned> StoreChainIDs; 1278 // Map each store chain to a set of dependent loads. 1279 SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; 1280 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 1281 SUnit *SU = &DAG->SUnits[Idx]; 1282 if (!SU->getInstr()->mayLoad()) 1283 continue; 1284 unsigned ChainPredID = DAG->SUnits.size(); 1285 for (SUnit::const_pred_iterator 1286 PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 1287 if (PI->isCtrl()) { 1288 ChainPredID = PI->getSUnit()->NodeNum; 1289 break; 1290 } 1291 } 1292 // Check if this chain-like pred has been seen 1293 // before. ChainPredID==MaxNodeID for loads at the top of the schedule. 1294 unsigned NumChains = StoreChainDependents.size(); 1295 std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = 1296 StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); 1297 if (Result.second) 1298 StoreChainDependents.resize(NumChains + 1); 1299 StoreChainDependents[Result.first->second].push_back(SU); 1300 } 1301 // Iterate over the store chains. 1302 for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) 1303 clusterNeighboringLoads(StoreChainDependents[Idx], DAG); 1304 } 1305 1306 //===----------------------------------------------------------------------===// 1307 // MacroFusion - DAG post-processing to encourage fusion of macro ops. 1308 //===----------------------------------------------------------------------===// 1309 1310 namespace { 1311 /// \brief Post-process the DAG to create cluster edges between instructions 1312 /// that may be fused by the processor into a single operation. 1313 class MacroFusion : public ScheduleDAGMutation { 1314 const TargetInstrInfo *TII; 1315 public: 1316 MacroFusion(const TargetInstrInfo *tii): TII(tii) {} 1317 1318 void apply(ScheduleDAGMI *DAG) override; 1319 }; 1320 } // anonymous 1321 1322 /// \brief Callback from DAG postProcessing to create cluster edges to encourage 1323 /// fused operations. 1324 void MacroFusion::apply(ScheduleDAGMI *DAG) { 1325 // For now, assume targets can only fuse with the branch. 1326 MachineInstr *Branch = DAG->ExitSU.getInstr(); 1327 if (!Branch) 1328 return; 1329 1330 for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { 1331 SUnit *SU = &DAG->SUnits[--Idx]; 1332 if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) 1333 continue; 1334 1335 // Create a single weak edge from SU to ExitSU. The only effect is to cause 1336 // bottom-up scheduling to heavily prioritize the clustered SU. There is no 1337 // need to copy predecessor edges from ExitSU to SU, since top-down 1338 // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling 1339 // of SU, we could create an artificial edge from the deepest root, but it 1340 // hasn't been needed yet. 1341 bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); 1342 (void)Success; 1343 assert(Success && "No DAG nodes should be reachable from ExitSU"); 1344 1345 DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); 1346 break; 1347 } 1348 } 1349 1350 //===----------------------------------------------------------------------===// 1351 // CopyConstrain - DAG post-processing to encourage copy elimination. 1352 //===----------------------------------------------------------------------===// 1353 1354 namespace { 1355 /// \brief Post-process the DAG to create weak edges from all uses of a copy to 1356 /// the one use that defines the copy's source vreg, most likely an induction 1357 /// variable increment. 1358 class CopyConstrain : public ScheduleDAGMutation { 1359 // Transient state. 1360 SlotIndex RegionBeginIdx; 1361 // RegionEndIdx is the slot index of the last non-debug instruction in the 1362 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. 1363 SlotIndex RegionEndIdx; 1364 public: 1365 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} 1366 1367 void apply(ScheduleDAGMI *DAG) override; 1368 1369 protected: 1370 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); 1371 }; 1372 } // anonymous 1373 1374 /// constrainLocalCopy handles two possibilities: 1375 /// 1) Local src: 1376 /// I0: = dst 1377 /// I1: src = ... 1378 /// I2: = dst 1379 /// I3: dst = src (copy) 1380 /// (create pred->succ edges I0->I1, I2->I1) 1381 /// 1382 /// 2) Local copy: 1383 /// I0: dst = src (copy) 1384 /// I1: = dst 1385 /// I2: src = ... 1386 /// I3: = dst 1387 /// (create pred->succ edges I1->I2, I3->I2) 1388 /// 1389 /// Although the MachineScheduler is currently constrained to single blocks, 1390 /// this algorithm should handle extended blocks. An EBB is a set of 1391 /// contiguously numbered blocks such that the previous block in the EBB is 1392 /// always the single predecessor. 1393 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { 1394 LiveIntervals *LIS = DAG->getLIS(); 1395 MachineInstr *Copy = CopySU->getInstr(); 1396 1397 // Check for pure vreg copies. 1398 unsigned SrcReg = Copy->getOperand(1).getReg(); 1399 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 1400 return; 1401 1402 unsigned DstReg = Copy->getOperand(0).getReg(); 1403 if (!TargetRegisterInfo::isVirtualRegister(DstReg)) 1404 return; 1405 1406 // Check if either the dest or source is local. If it's live across a back 1407 // edge, it's not local. Note that if both vregs are live across the back 1408 // edge, we cannot successfully contrain the copy without cyclic scheduling. 1409 unsigned LocalReg = DstReg; 1410 unsigned GlobalReg = SrcReg; 1411 LiveInterval *LocalLI = &LIS->getInterval(LocalReg); 1412 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { 1413 LocalReg = SrcReg; 1414 GlobalReg = DstReg; 1415 LocalLI = &LIS->getInterval(LocalReg); 1416 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) 1417 return; 1418 } 1419 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); 1420 1421 // Find the global segment after the start of the local LI. 1422 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); 1423 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a 1424 // local live range. We could create edges from other global uses to the local 1425 // start, but the coalescer should have already eliminated these cases, so 1426 // don't bother dealing with it. 1427 if (GlobalSegment == GlobalLI->end()) 1428 return; 1429 1430 // If GlobalSegment is killed at the LocalLI->start, the call to find() 1431 // returned the next global segment. But if GlobalSegment overlaps with 1432 // LocalLI->start, then advance to the next segement. If a hole in GlobalLI 1433 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. 1434 if (GlobalSegment->contains(LocalLI->beginIndex())) 1435 ++GlobalSegment; 1436 1437 if (GlobalSegment == GlobalLI->end()) 1438 return; 1439 1440 // Check if GlobalLI contains a hole in the vicinity of LocalLI. 1441 if (GlobalSegment != GlobalLI->begin()) { 1442 // Two address defs have no hole. 1443 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, 1444 GlobalSegment->start)) { 1445 return; 1446 } 1447 // If the prior global segment may be defined by the same two-address 1448 // instruction that also defines LocalLI, then can't make a hole here. 1449 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, 1450 LocalLI->beginIndex())) { 1451 return; 1452 } 1453 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise 1454 // it would be a disconnected component in the live range. 1455 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && 1456 "Disconnected LRG within the scheduling region."); 1457 } 1458 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); 1459 if (!GlobalDef) 1460 return; 1461 1462 SUnit *GlobalSU = DAG->getSUnit(GlobalDef); 1463 if (!GlobalSU) 1464 return; 1465 1466 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by 1467 // constraining the uses of the last local def to precede GlobalDef. 1468 SmallVector<SUnit*,8> LocalUses; 1469 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); 1470 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); 1471 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); 1472 for (SUnit::const_succ_iterator 1473 I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); 1474 I != E; ++I) { 1475 if (I->getKind() != SDep::Data || I->getReg() != LocalReg) 1476 continue; 1477 if (I->getSUnit() == GlobalSU) 1478 continue; 1479 if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) 1480 return; 1481 LocalUses.push_back(I->getSUnit()); 1482 } 1483 // Open the top of the GlobalLI hole by constraining any earlier global uses 1484 // to precede the start of LocalLI. 1485 SmallVector<SUnit*,8> GlobalUses; 1486 MachineInstr *FirstLocalDef = 1487 LIS->getInstructionFromIndex(LocalLI->beginIndex()); 1488 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); 1489 for (SUnit::const_pred_iterator 1490 I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { 1491 if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) 1492 continue; 1493 if (I->getSUnit() == FirstLocalSU) 1494 continue; 1495 if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) 1496 return; 1497 GlobalUses.push_back(I->getSUnit()); 1498 } 1499 DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); 1500 // Add the weak edges. 1501 for (SmallVectorImpl<SUnit*>::const_iterator 1502 I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { 1503 DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" 1504 << GlobalSU->NodeNum << ")\n"); 1505 DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); 1506 } 1507 for (SmallVectorImpl<SUnit*>::const_iterator 1508 I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { 1509 DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" 1510 << FirstLocalSU->NodeNum << ")\n"); 1511 DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); 1512 } 1513 } 1514 1515 /// \brief Callback from DAG postProcessing to create weak edges to encourage 1516 /// copy elimination. 1517 void CopyConstrain::apply(ScheduleDAGMI *DAG) { 1518 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); 1519 1520 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); 1521 if (FirstPos == DAG->end()) 1522 return; 1523 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos); 1524 RegionEndIdx = DAG->getLIS()->getInstructionIndex( 1525 &*priorNonDebug(DAG->end(), DAG->begin())); 1526 1527 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 1528 SUnit *SU = &DAG->SUnits[Idx]; 1529 if (!SU->getInstr()->isCopy()) 1530 continue; 1531 1532 constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG)); 1533 } 1534 } 1535 1536 //===----------------------------------------------------------------------===// 1537 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler 1538 // and possibly other custom schedulers. 1539 //===----------------------------------------------------------------------===// 1540 1541 static const unsigned InvalidCycle = ~0U; 1542 1543 SchedBoundary::~SchedBoundary() { delete HazardRec; } 1544 1545 void SchedBoundary::reset() { 1546 // A new HazardRec is created for each DAG and owned by SchedBoundary. 1547 // Destroying and reconstructing it is very expensive though. So keep 1548 // invalid, placeholder HazardRecs. 1549 if (HazardRec && HazardRec->isEnabled()) { 1550 delete HazardRec; 1551 HazardRec = nullptr; 1552 } 1553 Available.clear(); 1554 Pending.clear(); 1555 CheckPending = false; 1556 NextSUs.clear(); 1557 CurrCycle = 0; 1558 CurrMOps = 0; 1559 MinReadyCycle = UINT_MAX; 1560 ExpectedLatency = 0; 1561 DependentLatency = 0; 1562 RetiredMOps = 0; 1563 MaxExecutedResCount = 0; 1564 ZoneCritResIdx = 0; 1565 IsResourceLimited = false; 1566 ReservedCycles.clear(); 1567 #ifndef NDEBUG 1568 // Track the maximum number of stall cycles that could arise either from the 1569 // latency of a DAG edge or the number of cycles that a processor resource is 1570 // reserved (SchedBoundary::ReservedCycles). 1571 MaxObservedLatency = 0; 1572 #endif 1573 // Reserve a zero-count for invalid CritResIdx. 1574 ExecutedResCounts.resize(1); 1575 assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); 1576 } 1577 1578 void SchedRemainder:: 1579 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1580 reset(); 1581 if (!SchedModel->hasInstrSchedModel()) 1582 return; 1583 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1584 for (std::vector<SUnit>::iterator 1585 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 1586 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 1587 RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) 1588 * SchedModel->getMicroOpFactor(); 1589 for (TargetSchedModel::ProcResIter 1590 PI = SchedModel->getWriteProcResBegin(SC), 1591 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1592 unsigned PIdx = PI->ProcResourceIdx; 1593 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1594 RemainingCounts[PIdx] += (Factor * PI->Cycles); 1595 } 1596 } 1597 } 1598 1599 void SchedBoundary:: 1600 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1601 reset(); 1602 DAG = dag; 1603 SchedModel = smodel; 1604 Rem = rem; 1605 if (SchedModel->hasInstrSchedModel()) { 1606 ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); 1607 ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); 1608 } 1609 } 1610 1611 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat 1612 /// these "soft stalls" differently than the hard stall cycles based on CPU 1613 /// resources and computed by checkHazard(). A fully in-order model 1614 /// (MicroOpBufferSize==0) will not make use of this since instructions are not 1615 /// available for scheduling until they are ready. However, a weaker in-order 1616 /// model may use this for heuristics. For example, if a processor has in-order 1617 /// behavior when reading certain resources, this may come into play. 1618 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { 1619 if (!SU->isUnbuffered) 1620 return 0; 1621 1622 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 1623 if (ReadyCycle > CurrCycle) 1624 return ReadyCycle - CurrCycle; 1625 return 0; 1626 } 1627 1628 /// Compute the next cycle at which the given processor resource can be 1629 /// scheduled. 1630 unsigned SchedBoundary:: 1631 getNextResourceCycle(unsigned PIdx, unsigned Cycles) { 1632 unsigned NextUnreserved = ReservedCycles[PIdx]; 1633 // If this resource has never been used, always return cycle zero. 1634 if (NextUnreserved == InvalidCycle) 1635 return 0; 1636 // For bottom-up scheduling add the cycles needed for the current operation. 1637 if (!isTop()) 1638 NextUnreserved += Cycles; 1639 return NextUnreserved; 1640 } 1641 1642 /// Does this SU have a hazard within the current instruction group. 1643 /// 1644 /// The scheduler supports two modes of hazard recognition. The first is the 1645 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1646 /// supports highly complicated in-order reservation tables 1647 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1648 /// 1649 /// The second is a streamlined mechanism that checks for hazards based on 1650 /// simple counters that the scheduler itself maintains. It explicitly checks 1651 /// for instruction dispatch limitations, including the number of micro-ops that 1652 /// can dispatch per cycle. 1653 /// 1654 /// TODO: Also check whether the SU must start a new group. 1655 bool SchedBoundary::checkHazard(SUnit *SU) { 1656 if (HazardRec->isEnabled() 1657 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { 1658 return true; 1659 } 1660 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1661 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { 1662 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1663 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1664 return true; 1665 } 1666 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { 1667 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1668 for (TargetSchedModel::ProcResIter 1669 PI = SchedModel->getWriteProcResBegin(SC), 1670 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1671 if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle) 1672 return true; 1673 } 1674 } 1675 return false; 1676 } 1677 1678 // Find the unscheduled node in ReadySUs with the highest latency. 1679 unsigned SchedBoundary:: 1680 findMaxLatency(ArrayRef<SUnit*> ReadySUs) { 1681 SUnit *LateSU = nullptr; 1682 unsigned RemLatency = 0; 1683 for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); 1684 I != E; ++I) { 1685 unsigned L = getUnscheduledLatency(*I); 1686 if (L > RemLatency) { 1687 RemLatency = L; 1688 LateSU = *I; 1689 } 1690 } 1691 if (LateSU) { 1692 DEBUG(dbgs() << Available.getName() << " RemLatency SU(" 1693 << LateSU->NodeNum << ") " << RemLatency << "c\n"); 1694 } 1695 return RemLatency; 1696 } 1697 1698 // Count resources in this zone and the remaining unscheduled 1699 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical 1700 // resource index, or zero if the zone is issue limited. 1701 unsigned SchedBoundary:: 1702 getOtherResourceCount(unsigned &OtherCritIdx) { 1703 OtherCritIdx = 0; 1704 if (!SchedModel->hasInstrSchedModel()) 1705 return 0; 1706 1707 unsigned OtherCritCount = Rem->RemIssueCount 1708 + (RetiredMOps * SchedModel->getMicroOpFactor()); 1709 DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " 1710 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); 1711 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); 1712 PIdx != PEnd; ++PIdx) { 1713 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; 1714 if (OtherCount > OtherCritCount) { 1715 OtherCritCount = OtherCount; 1716 OtherCritIdx = PIdx; 1717 } 1718 } 1719 if (OtherCritIdx) { 1720 DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " 1721 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) 1722 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); 1723 } 1724 return OtherCritCount; 1725 } 1726 1727 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { 1728 if (ReadyCycle < MinReadyCycle) 1729 MinReadyCycle = ReadyCycle; 1730 1731 // Check for interlocks first. For the purpose of other heuristics, an 1732 // instruction that cannot issue appears as if it's not in the ReadyQueue. 1733 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; 1734 if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) 1735 Pending.push(SU); 1736 else 1737 Available.push(SU); 1738 1739 // Record this node as an immediate dependent of the scheduled node. 1740 NextSUs.insert(SU); 1741 } 1742 1743 void SchedBoundary::releaseTopNode(SUnit *SU) { 1744 if (SU->isScheduled) 1745 return; 1746 1747 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1748 I != E; ++I) { 1749 if (I->isWeak()) 1750 continue; 1751 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1752 unsigned Latency = I->getLatency(); 1753 #ifndef NDEBUG 1754 MaxObservedLatency = std::max(Latency, MaxObservedLatency); 1755 #endif 1756 if (SU->TopReadyCycle < PredReadyCycle + Latency) 1757 SU->TopReadyCycle = PredReadyCycle + Latency; 1758 } 1759 releaseNode(SU, SU->TopReadyCycle); 1760 } 1761 1762 void SchedBoundary::releaseBottomNode(SUnit *SU) { 1763 if (SU->isScheduled) 1764 return; 1765 1766 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1767 1768 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1769 I != E; ++I) { 1770 if (I->isWeak()) 1771 continue; 1772 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1773 unsigned Latency = I->getLatency(); 1774 #ifndef NDEBUG 1775 MaxObservedLatency = std::max(Latency, MaxObservedLatency); 1776 #endif 1777 if (SU->BotReadyCycle < SuccReadyCycle + Latency) 1778 SU->BotReadyCycle = SuccReadyCycle + Latency; 1779 } 1780 releaseNode(SU, SU->BotReadyCycle); 1781 } 1782 1783 /// Move the boundary of scheduled code by one cycle. 1784 void SchedBoundary::bumpCycle(unsigned NextCycle) { 1785 if (SchedModel->getMicroOpBufferSize() == 0) { 1786 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 1787 if (MinReadyCycle > NextCycle) 1788 NextCycle = MinReadyCycle; 1789 } 1790 // Update the current micro-ops, which will issue in the next cycle. 1791 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); 1792 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; 1793 1794 // Decrement DependentLatency based on the next cycle. 1795 if ((NextCycle - CurrCycle) > DependentLatency) 1796 DependentLatency = 0; 1797 else 1798 DependentLatency -= (NextCycle - CurrCycle); 1799 1800 if (!HazardRec->isEnabled()) { 1801 // Bypass HazardRec virtual calls. 1802 CurrCycle = NextCycle; 1803 } 1804 else { 1805 // Bypass getHazardType calls in case of long latency. 1806 for (; CurrCycle != NextCycle; ++CurrCycle) { 1807 if (isTop()) 1808 HazardRec->AdvanceCycle(); 1809 else 1810 HazardRec->RecedeCycle(); 1811 } 1812 } 1813 CheckPending = true; 1814 unsigned LFactor = SchedModel->getLatencyFactor(); 1815 IsResourceLimited = 1816 (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) 1817 > (int)LFactor; 1818 1819 DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); 1820 } 1821 1822 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { 1823 ExecutedResCounts[PIdx] += Count; 1824 if (ExecutedResCounts[PIdx] > MaxExecutedResCount) 1825 MaxExecutedResCount = ExecutedResCounts[PIdx]; 1826 } 1827 1828 /// Add the given processor resource to this scheduled zone. 1829 /// 1830 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles 1831 /// during which this resource is consumed. 1832 /// 1833 /// \return the next cycle at which the instruction may execute without 1834 /// oversubscribing resources. 1835 unsigned SchedBoundary:: 1836 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { 1837 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1838 unsigned Count = Factor * Cycles; 1839 DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) 1840 << " +" << Cycles << "x" << Factor << "u\n"); 1841 1842 // Update Executed resources counts. 1843 incExecutedResources(PIdx, Count); 1844 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 1845 Rem->RemainingCounts[PIdx] -= Count; 1846 1847 // Check if this resource exceeds the current critical resource. If so, it 1848 // becomes the critical resource. 1849 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { 1850 ZoneCritResIdx = PIdx; 1851 DEBUG(dbgs() << " *** Critical resource " 1852 << SchedModel->getResourceName(PIdx) << ": " 1853 << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); 1854 } 1855 // For reserved resources, record the highest cycle using the resource. 1856 unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); 1857 if (NextAvailable > CurrCycle) { 1858 DEBUG(dbgs() << " Resource conflict: " 1859 << SchedModel->getProcResource(PIdx)->Name << " reserved until @" 1860 << NextAvailable << "\n"); 1861 } 1862 return NextAvailable; 1863 } 1864 1865 /// Move the boundary of scheduled code by one SUnit. 1866 void SchedBoundary::bumpNode(SUnit *SU) { 1867 // Update the reservation table. 1868 if (HazardRec->isEnabled()) { 1869 if (!isTop() && SU->isCall) { 1870 // Calls are scheduled with their preceding instructions. For bottom-up 1871 // scheduling, clear the pipeline state before emitting. 1872 HazardRec->Reset(); 1873 } 1874 HazardRec->EmitInstruction(SU); 1875 } 1876 // checkHazard should prevent scheduling multiple instructions per cycle that 1877 // exceed the issue width. 1878 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1879 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); 1880 assert( 1881 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && 1882 "Cannot schedule this instruction's MicroOps in the current cycle."); 1883 1884 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 1885 DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); 1886 1887 unsigned NextCycle = CurrCycle; 1888 switch (SchedModel->getMicroOpBufferSize()) { 1889 case 0: 1890 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); 1891 break; 1892 case 1: 1893 if (ReadyCycle > NextCycle) { 1894 NextCycle = ReadyCycle; 1895 DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); 1896 } 1897 break; 1898 default: 1899 // We don't currently model the OOO reorder buffer, so consider all 1900 // scheduled MOps to be "retired". We do loosely model in-order resource 1901 // latency. If this instruction uses an in-order resource, account for any 1902 // likely stall cycles. 1903 if (SU->isUnbuffered && ReadyCycle > NextCycle) 1904 NextCycle = ReadyCycle; 1905 break; 1906 } 1907 RetiredMOps += IncMOps; 1908 1909 // Update resource counts and critical resource. 1910 if (SchedModel->hasInstrSchedModel()) { 1911 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); 1912 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); 1913 Rem->RemIssueCount -= DecRemIssue; 1914 if (ZoneCritResIdx) { 1915 // Scale scheduled micro-ops for comparing with the critical resource. 1916 unsigned ScaledMOps = 1917 RetiredMOps * SchedModel->getMicroOpFactor(); 1918 1919 // If scaled micro-ops are now more than the previous critical resource by 1920 // a full cycle, then micro-ops issue becomes critical. 1921 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) 1922 >= (int)SchedModel->getLatencyFactor()) { 1923 ZoneCritResIdx = 0; 1924 DEBUG(dbgs() << " *** Critical resource NumMicroOps: " 1925 << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); 1926 } 1927 } 1928 for (TargetSchedModel::ProcResIter 1929 PI = SchedModel->getWriteProcResBegin(SC), 1930 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1931 unsigned RCycle = 1932 countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); 1933 if (RCycle > NextCycle) 1934 NextCycle = RCycle; 1935 } 1936 if (SU->hasReservedResource) { 1937 // For reserved resources, record the highest cycle using the resource. 1938 // For top-down scheduling, this is the cycle in which we schedule this 1939 // instruction plus the number of cycles the operations reserves the 1940 // resource. For bottom-up is it simply the instruction's cycle. 1941 for (TargetSchedModel::ProcResIter 1942 PI = SchedModel->getWriteProcResBegin(SC), 1943 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1944 unsigned PIdx = PI->ProcResourceIdx; 1945 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { 1946 ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle; 1947 #ifndef NDEBUG 1948 MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency); 1949 #endif 1950 } 1951 } 1952 } 1953 } 1954 // Update ExpectedLatency and DependentLatency. 1955 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; 1956 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; 1957 if (SU->getDepth() > TopLatency) { 1958 TopLatency = SU->getDepth(); 1959 DEBUG(dbgs() << " " << Available.getName() 1960 << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); 1961 } 1962 if (SU->getHeight() > BotLatency) { 1963 BotLatency = SU->getHeight(); 1964 DEBUG(dbgs() << " " << Available.getName() 1965 << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); 1966 } 1967 // If we stall for any reason, bump the cycle. 1968 if (NextCycle > CurrCycle) { 1969 bumpCycle(NextCycle); 1970 } 1971 else { 1972 // After updating ZoneCritResIdx and ExpectedLatency, check if we're 1973 // resource limited. If a stall occurred, bumpCycle does this. 1974 unsigned LFactor = SchedModel->getLatencyFactor(); 1975 IsResourceLimited = 1976 (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) 1977 > (int)LFactor; 1978 } 1979 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle 1980 // resets CurrMOps. Loop to handle instructions with more MOps than issue in 1981 // one cycle. Since we commonly reach the max MOps here, opportunistically 1982 // bump the cycle to avoid uselessly checking everything in the readyQ. 1983 CurrMOps += IncMOps; 1984 while (CurrMOps >= SchedModel->getIssueWidth()) { 1985 DEBUG(dbgs() << " *** Max MOps " << CurrMOps 1986 << " at cycle " << CurrCycle << '\n'); 1987 bumpCycle(++NextCycle); 1988 } 1989 DEBUG(dumpScheduledState()); 1990 } 1991 1992 /// Release pending ready nodes in to the available queue. This makes them 1993 /// visible to heuristics. 1994 void SchedBoundary::releasePending() { 1995 // If the available queue is empty, it is safe to reset MinReadyCycle. 1996 if (Available.empty()) 1997 MinReadyCycle = UINT_MAX; 1998 1999 // Check to see if any of the pending instructions are ready to issue. If 2000 // so, add them to the available queue. 2001 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; 2002 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 2003 SUnit *SU = *(Pending.begin()+i); 2004 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 2005 2006 if (ReadyCycle < MinReadyCycle) 2007 MinReadyCycle = ReadyCycle; 2008 2009 if (!IsBuffered && ReadyCycle > CurrCycle) 2010 continue; 2011 2012 if (checkHazard(SU)) 2013 continue; 2014 2015 Available.push(SU); 2016 Pending.remove(Pending.begin()+i); 2017 --i; --e; 2018 } 2019 DEBUG(if (!Pending.empty()) Pending.dump()); 2020 CheckPending = false; 2021 } 2022 2023 /// Remove SU from the ready set for this boundary. 2024 void SchedBoundary::removeReady(SUnit *SU) { 2025 if (Available.isInQueue(SU)) 2026 Available.remove(Available.find(SU)); 2027 else { 2028 assert(Pending.isInQueue(SU) && "bad ready count"); 2029 Pending.remove(Pending.find(SU)); 2030 } 2031 } 2032 2033 /// If this queue only has one ready candidate, return it. As a side effect, 2034 /// defer any nodes that now hit a hazard, and advance the cycle until at least 2035 /// one node is ready. If multiple instructions are ready, return NULL. 2036 SUnit *SchedBoundary::pickOnlyChoice() { 2037 if (CheckPending) 2038 releasePending(); 2039 2040 if (CurrMOps > 0) { 2041 // Defer any ready instrs that now have a hazard. 2042 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 2043 if (checkHazard(*I)) { 2044 Pending.push(*I); 2045 I = Available.remove(I); 2046 continue; 2047 } 2048 ++I; 2049 } 2050 } 2051 for (unsigned i = 0; Available.empty(); ++i) { 2052 assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && 2053 "permanent hazard"); (void)i; 2054 bumpCycle(CurrCycle + 1); 2055 releasePending(); 2056 } 2057 if (Available.size() == 1) 2058 return *Available.begin(); 2059 return nullptr; 2060 } 2061 2062 #ifndef NDEBUG 2063 // This is useful information to dump after bumpNode. 2064 // Note that the Queue contents are more useful before pickNodeFromQueue. 2065 void SchedBoundary::dumpScheduledState() { 2066 unsigned ResFactor; 2067 unsigned ResCount; 2068 if (ZoneCritResIdx) { 2069 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); 2070 ResCount = getResourceCount(ZoneCritResIdx); 2071 } 2072 else { 2073 ResFactor = SchedModel->getMicroOpFactor(); 2074 ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); 2075 } 2076 unsigned LFactor = SchedModel->getLatencyFactor(); 2077 dbgs() << Available.getName() << " @" << CurrCycle << "c\n" 2078 << " Retired: " << RetiredMOps; 2079 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; 2080 dbgs() << "\n Critical: " << ResCount / LFactor << "c, " 2081 << ResCount / ResFactor << " " 2082 << SchedModel->getResourceName(ZoneCritResIdx) 2083 << "\n ExpectedLatency: " << ExpectedLatency << "c\n" 2084 << (IsResourceLimited ? " - Resource" : " - Latency") 2085 << " limited.\n"; 2086 } 2087 #endif 2088 2089 //===----------------------------------------------------------------------===// 2090 // GenericScheduler - Generic implementation of MachineSchedStrategy. 2091 //===----------------------------------------------------------------------===// 2092 2093 namespace { 2094 /// Base class for GenericScheduler. This class maintains information about 2095 /// scheduling candidates based on TargetSchedModel making it easy to implement 2096 /// heuristics for either preRA or postRA scheduling. 2097 class GenericSchedulerBase : public MachineSchedStrategy { 2098 public: 2099 /// Represent the type of SchedCandidate found within a single queue. 2100 /// pickNodeBidirectional depends on these listed by decreasing priority. 2101 enum CandReason { 2102 NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax, 2103 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 2104 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; 2105 2106 #ifndef NDEBUG 2107 static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); 2108 #endif 2109 2110 /// Policy for scheduling the next instruction in the candidate's zone. 2111 struct CandPolicy { 2112 bool ReduceLatency; 2113 unsigned ReduceResIdx; 2114 unsigned DemandResIdx; 2115 2116 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 2117 }; 2118 2119 /// Status of an instruction's critical resource consumption. 2120 struct SchedResourceDelta { 2121 // Count critical resources in the scheduled region required by SU. 2122 unsigned CritResources; 2123 2124 // Count critical resources from another region consumed by SU. 2125 unsigned DemandedResources; 2126 2127 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 2128 2129 bool operator==(const SchedResourceDelta &RHS) const { 2130 return CritResources == RHS.CritResources 2131 && DemandedResources == RHS.DemandedResources; 2132 } 2133 bool operator!=(const SchedResourceDelta &RHS) const { 2134 return !operator==(RHS); 2135 } 2136 }; 2137 2138 /// Store the state used by GenericScheduler heuristics, required for the 2139 /// lifetime of one invocation of pickNode(). 2140 struct SchedCandidate { 2141 CandPolicy Policy; 2142 2143 // The best SUnit candidate. 2144 SUnit *SU; 2145 2146 // The reason for this candidate. 2147 CandReason Reason; 2148 2149 // Set of reasons that apply to multiple candidates. 2150 uint32_t RepeatReasonSet; 2151 2152 // Register pressure values for the best candidate. 2153 RegPressureDelta RPDelta; 2154 2155 // Critical resource consumption of the best candidate. 2156 SchedResourceDelta ResDelta; 2157 2158 SchedCandidate(const CandPolicy &policy) 2159 : Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {} 2160 2161 bool isValid() const { return SU; } 2162 2163 // Copy the status of another candidate without changing policy. 2164 void setBest(SchedCandidate &Best) { 2165 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 2166 SU = Best.SU; 2167 Reason = Best.Reason; 2168 RPDelta = Best.RPDelta; 2169 ResDelta = Best.ResDelta; 2170 } 2171 2172 bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } 2173 void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } 2174 2175 void initResourceDelta(const ScheduleDAGMI *DAG, 2176 const TargetSchedModel *SchedModel); 2177 }; 2178 2179 protected: 2180 const MachineSchedContext *Context; 2181 const TargetSchedModel *SchedModel; 2182 const TargetRegisterInfo *TRI; 2183 2184 SchedRemainder Rem; 2185 protected: 2186 GenericSchedulerBase(const MachineSchedContext *C): 2187 Context(C), SchedModel(nullptr), TRI(nullptr) {} 2188 2189 void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, 2190 SchedBoundary *OtherZone); 2191 2192 #ifndef NDEBUG 2193 void traceCandidate(const SchedCandidate &Cand); 2194 #endif 2195 }; 2196 } // namespace 2197 2198 void GenericSchedulerBase::SchedCandidate:: 2199 initResourceDelta(const ScheduleDAGMI *DAG, 2200 const TargetSchedModel *SchedModel) { 2201 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 2202 return; 2203 2204 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2205 for (TargetSchedModel::ProcResIter 2206 PI = SchedModel->getWriteProcResBegin(SC), 2207 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2208 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 2209 ResDelta.CritResources += PI->Cycles; 2210 if (PI->ProcResourceIdx == Policy.DemandResIdx) 2211 ResDelta.DemandedResources += PI->Cycles; 2212 } 2213 } 2214 2215 /// Set the CandPolicy given a scheduling zone given the current resources and 2216 /// latencies inside and outside the zone. 2217 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, 2218 bool IsPostRA, 2219 SchedBoundary &CurrZone, 2220 SchedBoundary *OtherZone) { 2221 // Apply preemptive heuristics based on the the total latency and resources 2222 // inside and outside this zone. Potential stalls should be considered before 2223 // following this policy. 2224 2225 // Compute remaining latency. We need this both to determine whether the 2226 // overall schedule has become latency-limited and whether the instructions 2227 // outside this zone are resource or latency limited. 2228 // 2229 // The "dependent" latency is updated incrementally during scheduling as the 2230 // max height/depth of scheduled nodes minus the cycles since it was 2231 // scheduled: 2232 // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone 2233 // 2234 // The "independent" latency is the max ready queue depth: 2235 // ILat = max N.depth for N in Available|Pending 2236 // 2237 // RemainingLatency is the greater of independent and dependent latency. 2238 unsigned RemLatency = CurrZone.getDependentLatency(); 2239 RemLatency = std::max(RemLatency, 2240 CurrZone.findMaxLatency(CurrZone.Available.elements())); 2241 RemLatency = std::max(RemLatency, 2242 CurrZone.findMaxLatency(CurrZone.Pending.elements())); 2243 2244 // Compute the critical resource outside the zone. 2245 unsigned OtherCritIdx = 0; 2246 unsigned OtherCount = 2247 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; 2248 2249 bool OtherResLimited = false; 2250 if (SchedModel->hasInstrSchedModel()) { 2251 unsigned LFactor = SchedModel->getLatencyFactor(); 2252 OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; 2253 } 2254 // Schedule aggressively for latency in PostRA mode. We don't check for 2255 // acyclic latency during PostRA, and highly out-of-order processors will 2256 // skip PostRA scheduling. 2257 if (!OtherResLimited) { 2258 if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { 2259 Policy.ReduceLatency |= true; 2260 DEBUG(dbgs() << " " << CurrZone.Available.getName() 2261 << " RemainingLatency " << RemLatency << " + " 2262 << CurrZone.getCurrCycle() << "c > CritPath " 2263 << Rem.CriticalPath << "\n"); 2264 } 2265 } 2266 // If the same resource is limiting inside and outside the zone, do nothing. 2267 if (CurrZone.getZoneCritResIdx() == OtherCritIdx) 2268 return; 2269 2270 DEBUG( 2271 if (CurrZone.isResourceLimited()) { 2272 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " 2273 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) 2274 << "\n"; 2275 } 2276 if (OtherResLimited) 2277 dbgs() << " RemainingLimit: " 2278 << SchedModel->getResourceName(OtherCritIdx) << "\n"; 2279 if (!CurrZone.isResourceLimited() && !OtherResLimited) 2280 dbgs() << " Latency limited both directions.\n"); 2281 2282 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) 2283 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); 2284 2285 if (OtherResLimited) 2286 Policy.DemandResIdx = OtherCritIdx; 2287 } 2288 2289 #ifndef NDEBUG 2290 const char *GenericSchedulerBase::getReasonStr( 2291 GenericSchedulerBase::CandReason Reason) { 2292 switch (Reason) { 2293 case NoCand: return "NOCAND "; 2294 case PhysRegCopy: return "PREG-COPY"; 2295 case RegExcess: return "REG-EXCESS"; 2296 case RegCritical: return "REG-CRIT "; 2297 case Stall: return "STALL "; 2298 case Cluster: return "CLUSTER "; 2299 case Weak: return "WEAK "; 2300 case RegMax: return "REG-MAX "; 2301 case ResourceReduce: return "RES-REDUCE"; 2302 case ResourceDemand: return "RES-DEMAND"; 2303 case TopDepthReduce: return "TOP-DEPTH "; 2304 case TopPathReduce: return "TOP-PATH "; 2305 case BotHeightReduce:return "BOT-HEIGHT"; 2306 case BotPathReduce: return "BOT-PATH "; 2307 case NextDefUse: return "DEF-USE "; 2308 case NodeOrder: return "ORDER "; 2309 }; 2310 llvm_unreachable("Unknown reason!"); 2311 } 2312 2313 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { 2314 PressureChange P; 2315 unsigned ResIdx = 0; 2316 unsigned Latency = 0; 2317 switch (Cand.Reason) { 2318 default: 2319 break; 2320 case RegExcess: 2321 P = Cand.RPDelta.Excess; 2322 break; 2323 case RegCritical: 2324 P = Cand.RPDelta.CriticalMax; 2325 break; 2326 case RegMax: 2327 P = Cand.RPDelta.CurrentMax; 2328 break; 2329 case ResourceReduce: 2330 ResIdx = Cand.Policy.ReduceResIdx; 2331 break; 2332 case ResourceDemand: 2333 ResIdx = Cand.Policy.DemandResIdx; 2334 break; 2335 case TopDepthReduce: 2336 Latency = Cand.SU->getDepth(); 2337 break; 2338 case TopPathReduce: 2339 Latency = Cand.SU->getHeight(); 2340 break; 2341 case BotHeightReduce: 2342 Latency = Cand.SU->getHeight(); 2343 break; 2344 case BotPathReduce: 2345 Latency = Cand.SU->getDepth(); 2346 break; 2347 } 2348 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); 2349 if (P.isValid()) 2350 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) 2351 << ":" << P.getUnitInc() << " "; 2352 else 2353 dbgs() << " "; 2354 if (ResIdx) 2355 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; 2356 else 2357 dbgs() << " "; 2358 if (Latency) 2359 dbgs() << " " << Latency << " cycles "; 2360 else 2361 dbgs() << " "; 2362 dbgs() << '\n'; 2363 } 2364 #endif 2365 2366 /// Return true if this heuristic determines order. 2367 static bool tryLess(int TryVal, int CandVal, 2368 GenericSchedulerBase::SchedCandidate &TryCand, 2369 GenericSchedulerBase::SchedCandidate &Cand, 2370 GenericSchedulerBase::CandReason Reason) { 2371 if (TryVal < CandVal) { 2372 TryCand.Reason = Reason; 2373 return true; 2374 } 2375 if (TryVal > CandVal) { 2376 if (Cand.Reason > Reason) 2377 Cand.Reason = Reason; 2378 return true; 2379 } 2380 Cand.setRepeat(Reason); 2381 return false; 2382 } 2383 2384 static bool tryGreater(int TryVal, int CandVal, 2385 GenericSchedulerBase::SchedCandidate &TryCand, 2386 GenericSchedulerBase::SchedCandidate &Cand, 2387 GenericSchedulerBase::CandReason Reason) { 2388 if (TryVal > CandVal) { 2389 TryCand.Reason = Reason; 2390 return true; 2391 } 2392 if (TryVal < CandVal) { 2393 if (Cand.Reason > Reason) 2394 Cand.Reason = Reason; 2395 return true; 2396 } 2397 Cand.setRepeat(Reason); 2398 return false; 2399 } 2400 2401 static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 2402 GenericSchedulerBase::SchedCandidate &Cand, 2403 SchedBoundary &Zone) { 2404 if (Zone.isTop()) { 2405 if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { 2406 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 2407 TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) 2408 return true; 2409 } 2410 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 2411 TryCand, Cand, GenericSchedulerBase::TopPathReduce)) 2412 return true; 2413 } 2414 else { 2415 if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { 2416 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 2417 TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) 2418 return true; 2419 } 2420 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 2421 TryCand, Cand, GenericSchedulerBase::BotPathReduce)) 2422 return true; 2423 } 2424 return false; 2425 } 2426 2427 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, 2428 bool IsTop) { 2429 DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") 2430 << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n'); 2431 } 2432 2433 namespace { 2434 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance 2435 /// the schedule. 2436 class GenericScheduler : public GenericSchedulerBase { 2437 ScheduleDAGMILive *DAG; 2438 2439 // State of the top and bottom scheduled instruction boundaries. 2440 SchedBoundary Top; 2441 SchedBoundary Bot; 2442 2443 MachineSchedPolicy RegionPolicy; 2444 public: 2445 GenericScheduler(const MachineSchedContext *C): 2446 GenericSchedulerBase(C), DAG(nullptr), Top(SchedBoundary::TopQID, "TopQ"), 2447 Bot(SchedBoundary::BotQID, "BotQ") {} 2448 2449 void initPolicy(MachineBasicBlock::iterator Begin, 2450 MachineBasicBlock::iterator End, 2451 unsigned NumRegionInstrs) override; 2452 2453 bool shouldTrackPressure() const override { 2454 return RegionPolicy.ShouldTrackPressure; 2455 } 2456 2457 void initialize(ScheduleDAGMI *dag) override; 2458 2459 SUnit *pickNode(bool &IsTopNode) override; 2460 2461 void schedNode(SUnit *SU, bool IsTopNode) override; 2462 2463 void releaseTopNode(SUnit *SU) override { 2464 Top.releaseTopNode(SU); 2465 } 2466 2467 void releaseBottomNode(SUnit *SU) override { 2468 Bot.releaseBottomNode(SU); 2469 } 2470 2471 void registerRoots() override; 2472 2473 protected: 2474 void checkAcyclicLatency(); 2475 2476 void tryCandidate(SchedCandidate &Cand, 2477 SchedCandidate &TryCand, 2478 SchedBoundary &Zone, 2479 const RegPressureTracker &RPTracker, 2480 RegPressureTracker &TempTracker); 2481 2482 SUnit *pickNodeBidirectional(bool &IsTopNode); 2483 2484 void pickNodeFromQueue(SchedBoundary &Zone, 2485 const RegPressureTracker &RPTracker, 2486 SchedCandidate &Candidate); 2487 2488 void reschedulePhysRegCopies(SUnit *SU, bool isTop); 2489 }; 2490 } // namespace 2491 2492 void GenericScheduler::initialize(ScheduleDAGMI *dag) { 2493 assert(dag->hasVRegLiveness() && 2494 "(PreRA)GenericScheduler needs vreg liveness"); 2495 DAG = static_cast<ScheduleDAGMILive*>(dag); 2496 SchedModel = DAG->getSchedModel(); 2497 TRI = DAG->TRI; 2498 2499 Rem.init(DAG, SchedModel); 2500 Top.init(DAG, SchedModel, &Rem); 2501 Bot.init(DAG, SchedModel, &Rem); 2502 2503 // Initialize resource counts. 2504 2505 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 2506 // are disabled, then these HazardRecs will be disabled. 2507 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 2508 const TargetMachine &TM = DAG->MF.getTarget(); 2509 if (!Top.HazardRec) { 2510 Top.HazardRec = 2511 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 2512 } 2513 if (!Bot.HazardRec) { 2514 Bot.HazardRec = 2515 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 2516 } 2517 } 2518 2519 /// Initialize the per-region scheduling policy. 2520 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, 2521 MachineBasicBlock::iterator End, 2522 unsigned NumRegionInstrs) { 2523 const TargetMachine &TM = Context->MF->getTarget(); 2524 const TargetLowering *TLI = TM.getTargetLowering(); 2525 2526 // Avoid setting up the register pressure tracker for small regions to save 2527 // compile time. As a rough heuristic, only track pressure when the number of 2528 // schedulable instructions exceeds half the integer register file. 2529 RegionPolicy.ShouldTrackPressure = true; 2530 for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { 2531 MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; 2532 if (TLI->isTypeLegal(LegalIntVT)) { 2533 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( 2534 TLI->getRegClassFor(LegalIntVT)); 2535 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); 2536 } 2537 } 2538 2539 // For generic targets, we default to bottom-up, because it's simpler and more 2540 // compile-time optimizations have been implemented in that direction. 2541 RegionPolicy.OnlyBottomUp = true; 2542 2543 // Allow the subtarget to override default policy. 2544 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 2545 ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); 2546 2547 // After subtarget overrides, apply command line options. 2548 if (!EnableRegPressure) 2549 RegionPolicy.ShouldTrackPressure = false; 2550 2551 // Check -misched-topdown/bottomup can force or unforce scheduling direction. 2552 // e.g. -misched-bottomup=false allows scheduling in both directions. 2553 assert((!ForceTopDown || !ForceBottomUp) && 2554 "-misched-topdown incompatible with -misched-bottomup"); 2555 if (ForceBottomUp.getNumOccurrences() > 0) { 2556 RegionPolicy.OnlyBottomUp = ForceBottomUp; 2557 if (RegionPolicy.OnlyBottomUp) 2558 RegionPolicy.OnlyTopDown = false; 2559 } 2560 if (ForceTopDown.getNumOccurrences() > 0) { 2561 RegionPolicy.OnlyTopDown = ForceTopDown; 2562 if (RegionPolicy.OnlyTopDown) 2563 RegionPolicy.OnlyBottomUp = false; 2564 } 2565 } 2566 2567 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic 2568 /// critical path by more cycles than it takes to drain the instruction buffer. 2569 /// We estimate an upper bounds on in-flight instructions as: 2570 /// 2571 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) 2572 /// InFlightIterations = AcyclicPath / CyclesPerIteration 2573 /// InFlightResources = InFlightIterations * LoopResources 2574 /// 2575 /// TODO: Check execution resources in addition to IssueCount. 2576 void GenericScheduler::checkAcyclicLatency() { 2577 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) 2578 return; 2579 2580 // Scaled number of cycles per loop iteration. 2581 unsigned IterCount = 2582 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), 2583 Rem.RemIssueCount); 2584 // Scaled acyclic critical path. 2585 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); 2586 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop 2587 unsigned InFlightCount = 2588 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; 2589 unsigned BufferLimit = 2590 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); 2591 2592 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; 2593 2594 DEBUG(dbgs() << "IssueCycles=" 2595 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " 2596 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() 2597 << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount 2598 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() 2599 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; 2600 if (Rem.IsAcyclicLatencyLimited) 2601 dbgs() << " ACYCLIC LATENCY LIMIT\n"); 2602 } 2603 2604 void GenericScheduler::registerRoots() { 2605 Rem.CriticalPath = DAG->ExitSU.getDepth(); 2606 2607 // Some roots may not feed into ExitSU. Check all of them in case. 2608 for (std::vector<SUnit*>::const_iterator 2609 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 2610 if ((*I)->getDepth() > Rem.CriticalPath) 2611 Rem.CriticalPath = (*I)->getDepth(); 2612 } 2613 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 2614 2615 if (EnableCyclicPath) { 2616 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); 2617 checkAcyclicLatency(); 2618 } 2619 } 2620 2621 static bool tryPressure(const PressureChange &TryP, 2622 const PressureChange &CandP, 2623 GenericSchedulerBase::SchedCandidate &TryCand, 2624 GenericSchedulerBase::SchedCandidate &Cand, 2625 GenericSchedulerBase::CandReason Reason) { 2626 int TryRank = TryP.getPSetOrMax(); 2627 int CandRank = CandP.getPSetOrMax(); 2628 // If both candidates affect the same set, go with the smallest increase. 2629 if (TryRank == CandRank) { 2630 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, 2631 Reason); 2632 } 2633 // If one candidate decreases and the other increases, go with it. 2634 // Invalid candidates have UnitInc==0. 2635 if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, 2636 Reason)) { 2637 return true; 2638 } 2639 // If the candidates are decreasing pressure, reverse priority. 2640 if (TryP.getUnitInc() < 0) 2641 std::swap(TryRank, CandRank); 2642 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); 2643 } 2644 2645 static unsigned getWeakLeft(const SUnit *SU, bool isTop) { 2646 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 2647 } 2648 2649 /// Minimize physical register live ranges. Regalloc wants them adjacent to 2650 /// their physreg def/use. 2651 /// 2652 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf 2653 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled 2654 /// with the operation that produces or consumes the physreg. We'll do this when 2655 /// regalloc has support for parallel copies. 2656 static int biasPhysRegCopy(const SUnit *SU, bool isTop) { 2657 const MachineInstr *MI = SU->getInstr(); 2658 if (!MI->isCopy()) 2659 return 0; 2660 2661 unsigned ScheduledOper = isTop ? 1 : 0; 2662 unsigned UnscheduledOper = isTop ? 0 : 1; 2663 // If we have already scheduled the physreg produce/consumer, immediately 2664 // schedule the copy. 2665 if (TargetRegisterInfo::isPhysicalRegister( 2666 MI->getOperand(ScheduledOper).getReg())) 2667 return 1; 2668 // If the physreg is at the boundary, defer it. Otherwise schedule it 2669 // immediately to free the dependent. We can hoist the copy later. 2670 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; 2671 if (TargetRegisterInfo::isPhysicalRegister( 2672 MI->getOperand(UnscheduledOper).getReg())) 2673 return AtBoundary ? -1 : 1; 2674 return 0; 2675 } 2676 2677 /// Apply a set of heursitics to a new candidate. Heuristics are currently 2678 /// hierarchical. This may be more efficient than a graduated cost model because 2679 /// we don't need to evaluate all aspects of the model for each node in the 2680 /// queue. But it's really done to make the heuristics easier to debug and 2681 /// statistically analyze. 2682 /// 2683 /// \param Cand provides the policy and current best candidate. 2684 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 2685 /// \param Zone describes the scheduled zone that we are extending. 2686 /// \param RPTracker describes reg pressure within the scheduled zone. 2687 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 2688 void GenericScheduler::tryCandidate(SchedCandidate &Cand, 2689 SchedCandidate &TryCand, 2690 SchedBoundary &Zone, 2691 const RegPressureTracker &RPTracker, 2692 RegPressureTracker &TempTracker) { 2693 2694 if (DAG->isTrackingPressure()) { 2695 // Always initialize TryCand's RPDelta. 2696 if (Zone.isTop()) { 2697 TempTracker.getMaxDownwardPressureDelta( 2698 TryCand.SU->getInstr(), 2699 TryCand.RPDelta, 2700 DAG->getRegionCriticalPSets(), 2701 DAG->getRegPressure().MaxSetPressure); 2702 } 2703 else { 2704 if (VerifyScheduling) { 2705 TempTracker.getMaxUpwardPressureDelta( 2706 TryCand.SU->getInstr(), 2707 &DAG->getPressureDiff(TryCand.SU), 2708 TryCand.RPDelta, 2709 DAG->getRegionCriticalPSets(), 2710 DAG->getRegPressure().MaxSetPressure); 2711 } 2712 else { 2713 RPTracker.getUpwardPressureDelta( 2714 TryCand.SU->getInstr(), 2715 DAG->getPressureDiff(TryCand.SU), 2716 TryCand.RPDelta, 2717 DAG->getRegionCriticalPSets(), 2718 DAG->getRegPressure().MaxSetPressure); 2719 } 2720 } 2721 } 2722 DEBUG(if (TryCand.RPDelta.Excess.isValid()) 2723 dbgs() << " SU(" << TryCand.SU->NodeNum << ") " 2724 << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) 2725 << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); 2726 2727 // Initialize the candidate if needed. 2728 if (!Cand.isValid()) { 2729 TryCand.Reason = NodeOrder; 2730 return; 2731 } 2732 2733 if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()), 2734 biasPhysRegCopy(Cand.SU, Zone.isTop()), 2735 TryCand, Cand, PhysRegCopy)) 2736 return; 2737 2738 // Avoid exceeding the target's limit. If signed PSetID is negative, it is 2739 // invalid; convert it to INT_MAX to give it lowest priority. 2740 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, 2741 Cand.RPDelta.Excess, 2742 TryCand, Cand, RegExcess)) 2743 return; 2744 2745 // Avoid increasing the max critical pressure in the scheduled region. 2746 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, 2747 Cand.RPDelta.CriticalMax, 2748 TryCand, Cand, RegCritical)) 2749 return; 2750 2751 // For loops that are acyclic path limited, aggressively schedule for latency. 2752 // This can result in very long dependence chains scheduled in sequence, so 2753 // once every cycle (when CurrMOps == 0), switch to normal heuristics. 2754 if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps() 2755 && tryLatency(TryCand, Cand, Zone)) 2756 return; 2757 2758 // Prioritize instructions that read unbuffered resources by stall cycles. 2759 if (tryLess(Zone.getLatencyStallCycles(TryCand.SU), 2760 Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 2761 return; 2762 2763 // Keep clustered nodes together to encourage downstream peephole 2764 // optimizations which may reduce resource requirements. 2765 // 2766 // This is a best effort to set things up for a post-RA pass. Optimizations 2767 // like generating loads of multiple registers should ideally be done within 2768 // the scheduler pass by combining the loads during DAG postprocessing. 2769 const SUnit *NextClusterSU = 2770 Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 2771 if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, 2772 TryCand, Cand, Cluster)) 2773 return; 2774 2775 // Weak edges are for clustering and other constraints. 2776 if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), 2777 getWeakLeft(Cand.SU, Zone.isTop()), 2778 TryCand, Cand, Weak)) { 2779 return; 2780 } 2781 // Avoid increasing the max pressure of the entire region. 2782 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, 2783 Cand.RPDelta.CurrentMax, 2784 TryCand, Cand, RegMax)) 2785 return; 2786 2787 // Avoid critical resource consumption and balance the schedule. 2788 TryCand.initResourceDelta(DAG, SchedModel); 2789 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 2790 TryCand, Cand, ResourceReduce)) 2791 return; 2792 if (tryGreater(TryCand.ResDelta.DemandedResources, 2793 Cand.ResDelta.DemandedResources, 2794 TryCand, Cand, ResourceDemand)) 2795 return; 2796 2797 // Avoid serializing long latency dependence chains. 2798 // For acyclic path limited loops, latency was already checked above. 2799 if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited 2800 && tryLatency(TryCand, Cand, Zone)) { 2801 return; 2802 } 2803 2804 // Prefer immediate defs/users of the last scheduled instruction. This is a 2805 // local pressure avoidance strategy that also makes the machine code 2806 // readable. 2807 if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU), 2808 TryCand, Cand, NextDefUse)) 2809 return; 2810 2811 // Fall through to original instruction order. 2812 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 2813 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 2814 TryCand.Reason = NodeOrder; 2815 } 2816 } 2817 2818 /// Pick the best candidate from the queue. 2819 /// 2820 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 2821 /// DAG building. To adjust for the current scheduling location we need to 2822 /// maintain the number of vreg uses remaining to be top-scheduled. 2823 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, 2824 const RegPressureTracker &RPTracker, 2825 SchedCandidate &Cand) { 2826 ReadyQueue &Q = Zone.Available; 2827 2828 DEBUG(Q.dump()); 2829 2830 // getMaxPressureDelta temporarily modifies the tracker. 2831 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 2832 2833 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 2834 2835 SchedCandidate TryCand(Cand.Policy); 2836 TryCand.SU = *I; 2837 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 2838 if (TryCand.Reason != NoCand) { 2839 // Initialize resource delta if needed in case future heuristics query it. 2840 if (TryCand.ResDelta == SchedResourceDelta()) 2841 TryCand.initResourceDelta(DAG, SchedModel); 2842 Cand.setBest(TryCand); 2843 DEBUG(traceCandidate(Cand)); 2844 } 2845 } 2846 } 2847 2848 /// Pick the best candidate node from either the top or bottom queue. 2849 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { 2850 // Schedule as far as possible in the direction of no choice. This is most 2851 // efficient, but also provides the best heuristics for CriticalPSets. 2852 if (SUnit *SU = Bot.pickOnlyChoice()) { 2853 IsTopNode = false; 2854 DEBUG(dbgs() << "Pick Bot NOCAND\n"); 2855 return SU; 2856 } 2857 if (SUnit *SU = Top.pickOnlyChoice()) { 2858 IsTopNode = true; 2859 DEBUG(dbgs() << "Pick Top NOCAND\n"); 2860 return SU; 2861 } 2862 CandPolicy NoPolicy; 2863 SchedCandidate BotCand(NoPolicy); 2864 SchedCandidate TopCand(NoPolicy); 2865 // Set the bottom-up policy based on the state of the current bottom zone and 2866 // the instructions outside the zone, including the top zone. 2867 setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top); 2868 // Set the top-down policy based on the state of the current top zone and 2869 // the instructions outside the zone, including the bottom zone. 2870 setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot); 2871 2872 // Prefer bottom scheduling when heuristics are silent. 2873 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 2874 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2875 2876 // If either Q has a single candidate that provides the least increase in 2877 // Excess pressure, we can immediately schedule from that Q. 2878 // 2879 // RegionCriticalPSets summarizes the pressure within the scheduled region and 2880 // affects picking from either Q. If scheduling in one direction must 2881 // increase pressure for one of the excess PSets, then schedule in that 2882 // direction first to provide more freedom in the other direction. 2883 if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess)) 2884 || (BotCand.Reason == RegCritical 2885 && !BotCand.isRepeat(RegCritical))) 2886 { 2887 IsTopNode = false; 2888 tracePick(BotCand, IsTopNode); 2889 return BotCand.SU; 2890 } 2891 // Check if the top Q has a better candidate. 2892 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 2893 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 2894 2895 // Choose the queue with the most important (lowest enum) reason. 2896 if (TopCand.Reason < BotCand.Reason) { 2897 IsTopNode = true; 2898 tracePick(TopCand, IsTopNode); 2899 return TopCand.SU; 2900 } 2901 // Otherwise prefer the bottom candidate, in node order if all else failed. 2902 IsTopNode = false; 2903 tracePick(BotCand, IsTopNode); 2904 return BotCand.SU; 2905 } 2906 2907 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 2908 SUnit *GenericScheduler::pickNode(bool &IsTopNode) { 2909 if (DAG->top() == DAG->bottom()) { 2910 assert(Top.Available.empty() && Top.Pending.empty() && 2911 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 2912 return nullptr; 2913 } 2914 SUnit *SU; 2915 do { 2916 if (RegionPolicy.OnlyTopDown) { 2917 SU = Top.pickOnlyChoice(); 2918 if (!SU) { 2919 CandPolicy NoPolicy; 2920 SchedCandidate TopCand(NoPolicy); 2921 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 2922 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 2923 tracePick(TopCand, true); 2924 SU = TopCand.SU; 2925 } 2926 IsTopNode = true; 2927 } 2928 else if (RegionPolicy.OnlyBottomUp) { 2929 SU = Bot.pickOnlyChoice(); 2930 if (!SU) { 2931 CandPolicy NoPolicy; 2932 SchedCandidate BotCand(NoPolicy); 2933 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 2934 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 2935 tracePick(BotCand, false); 2936 SU = BotCand.SU; 2937 } 2938 IsTopNode = false; 2939 } 2940 else { 2941 SU = pickNodeBidirectional(IsTopNode); 2942 } 2943 } while (SU->isScheduled); 2944 2945 if (SU->isTopReady()) 2946 Top.removeReady(SU); 2947 if (SU->isBottomReady()) 2948 Bot.removeReady(SU); 2949 2950 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); 2951 return SU; 2952 } 2953 2954 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { 2955 2956 MachineBasicBlock::iterator InsertPos = SU->getInstr(); 2957 if (!isTop) 2958 ++InsertPos; 2959 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; 2960 2961 // Find already scheduled copies with a single physreg dependence and move 2962 // them just above the scheduled instruction. 2963 for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); 2964 I != E; ++I) { 2965 if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) 2966 continue; 2967 SUnit *DepSU = I->getSUnit(); 2968 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) 2969 continue; 2970 MachineInstr *Copy = DepSU->getInstr(); 2971 if (!Copy->isCopy()) 2972 continue; 2973 DEBUG(dbgs() << " Rescheduling physreg copy "; 2974 I->getSUnit()->dump(DAG)); 2975 DAG->moveInstruction(Copy, InsertPos); 2976 } 2977 } 2978 2979 /// Update the scheduler's state after scheduling a node. This is the same node 2980 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to 2981 /// update it's state based on the current cycle before MachineSchedStrategy 2982 /// does. 2983 /// 2984 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling 2985 /// them here. See comments in biasPhysRegCopy. 2986 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 2987 if (IsTopNode) { 2988 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); 2989 Top.bumpNode(SU); 2990 if (SU->hasPhysRegUses) 2991 reschedulePhysRegCopies(SU, true); 2992 } 2993 else { 2994 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); 2995 Bot.bumpNode(SU); 2996 if (SU->hasPhysRegDefs) 2997 reschedulePhysRegCopies(SU, false); 2998 } 2999 } 3000 3001 /// Create the standard converging machine scheduler. This will be used as the 3002 /// default scheduler if the target does not set a default. 3003 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { 3004 ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)); 3005 // Register DAG post-processors. 3006 // 3007 // FIXME: extend the mutation API to allow earlier mutations to instantiate 3008 // data and pass it to later mutations. Have a single mutation that gathers 3009 // the interesting nodes in one pass. 3010 DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI)); 3011 if (EnableLoadCluster && DAG->TII->enableClusterLoads()) 3012 DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI)); 3013 if (EnableMacroFusion) 3014 DAG->addMutation(make_unique<MacroFusion>(DAG->TII)); 3015 return DAG; 3016 } 3017 3018 static MachineSchedRegistry 3019 GenericSchedRegistry("converge", "Standard converging scheduler.", 3020 createGenericSchedLive); 3021 3022 //===----------------------------------------------------------------------===// 3023 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. 3024 //===----------------------------------------------------------------------===// 3025 3026 namespace { 3027 /// PostGenericScheduler - Interface to the scheduling algorithm used by 3028 /// ScheduleDAGMI. 3029 /// 3030 /// Callbacks from ScheduleDAGMI: 3031 /// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... 3032 class PostGenericScheduler : public GenericSchedulerBase { 3033 ScheduleDAGMI *DAG; 3034 SchedBoundary Top; 3035 SmallVector<SUnit*, 8> BotRoots; 3036 public: 3037 PostGenericScheduler(const MachineSchedContext *C): 3038 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} 3039 3040 virtual ~PostGenericScheduler() {} 3041 3042 void initPolicy(MachineBasicBlock::iterator Begin, 3043 MachineBasicBlock::iterator End, 3044 unsigned NumRegionInstrs) override { 3045 /* no configurable policy */ 3046 }; 3047 3048 /// PostRA scheduling does not track pressure. 3049 bool shouldTrackPressure() const override { return false; } 3050 3051 void initialize(ScheduleDAGMI *Dag) override { 3052 DAG = Dag; 3053 SchedModel = DAG->getSchedModel(); 3054 TRI = DAG->TRI; 3055 3056 Rem.init(DAG, SchedModel); 3057 Top.init(DAG, SchedModel, &Rem); 3058 BotRoots.clear(); 3059 3060 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, 3061 // or are disabled, then these HazardRecs will be disabled. 3062 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 3063 const TargetMachine &TM = DAG->MF.getTarget(); 3064 if (!Top.HazardRec) { 3065 Top.HazardRec = 3066 TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 3067 } 3068 } 3069 3070 void registerRoots() override; 3071 3072 SUnit *pickNode(bool &IsTopNode) override; 3073 3074 void scheduleTree(unsigned SubtreeID) override { 3075 llvm_unreachable("PostRA scheduler does not support subtree analysis."); 3076 } 3077 3078 void schedNode(SUnit *SU, bool IsTopNode) override; 3079 3080 void releaseTopNode(SUnit *SU) override { 3081 Top.releaseTopNode(SU); 3082 } 3083 3084 // Only called for roots. 3085 void releaseBottomNode(SUnit *SU) override { 3086 BotRoots.push_back(SU); 3087 } 3088 3089 protected: 3090 void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); 3091 3092 void pickNodeFromQueue(SchedCandidate &Cand); 3093 }; 3094 } // namespace 3095 3096 void PostGenericScheduler::registerRoots() { 3097 Rem.CriticalPath = DAG->ExitSU.getDepth(); 3098 3099 // Some roots may not feed into ExitSU. Check all of them in case. 3100 for (SmallVectorImpl<SUnit*>::const_iterator 3101 I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { 3102 if ((*I)->getDepth() > Rem.CriticalPath) 3103 Rem.CriticalPath = (*I)->getDepth(); 3104 } 3105 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 3106 } 3107 3108 /// Apply a set of heursitics to a new candidate for PostRA scheduling. 3109 /// 3110 /// \param Cand provides the policy and current best candidate. 3111 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 3112 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, 3113 SchedCandidate &TryCand) { 3114 3115 // Initialize the candidate if needed. 3116 if (!Cand.isValid()) { 3117 TryCand.Reason = NodeOrder; 3118 return; 3119 } 3120 3121 // Prioritize instructions that read unbuffered resources by stall cycles. 3122 if (tryLess(Top.getLatencyStallCycles(TryCand.SU), 3123 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 3124 return; 3125 3126 // Avoid critical resource consumption and balance the schedule. 3127 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 3128 TryCand, Cand, ResourceReduce)) 3129 return; 3130 if (tryGreater(TryCand.ResDelta.DemandedResources, 3131 Cand.ResDelta.DemandedResources, 3132 TryCand, Cand, ResourceDemand)) 3133 return; 3134 3135 // Avoid serializing long latency dependence chains. 3136 if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { 3137 return; 3138 } 3139 3140 // Fall through to original instruction order. 3141 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) 3142 TryCand.Reason = NodeOrder; 3143 } 3144 3145 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { 3146 ReadyQueue &Q = Top.Available; 3147 3148 DEBUG(Q.dump()); 3149 3150 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 3151 SchedCandidate TryCand(Cand.Policy); 3152 TryCand.SU = *I; 3153 TryCand.initResourceDelta(DAG, SchedModel); 3154 tryCandidate(Cand, TryCand); 3155 if (TryCand.Reason != NoCand) { 3156 Cand.setBest(TryCand); 3157 DEBUG(traceCandidate(Cand)); 3158 } 3159 } 3160 } 3161 3162 /// Pick the next node to schedule. 3163 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { 3164 if (DAG->top() == DAG->bottom()) { 3165 assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); 3166 return nullptr; 3167 } 3168 SUnit *SU; 3169 do { 3170 SU = Top.pickOnlyChoice(); 3171 if (!SU) { 3172 CandPolicy NoPolicy; 3173 SchedCandidate TopCand(NoPolicy); 3174 // Set the top-down policy based on the state of the current top zone and 3175 // the instructions outside the zone, including the bottom zone. 3176 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); 3177 pickNodeFromQueue(TopCand); 3178 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 3179 tracePick(TopCand, true); 3180 SU = TopCand.SU; 3181 } 3182 } while (SU->isScheduled); 3183 3184 IsTopNode = true; 3185 Top.removeReady(SU); 3186 3187 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); 3188 return SU; 3189 } 3190 3191 /// Called after ScheduleDAGMI has scheduled an instruction and updated 3192 /// scheduled/remaining flags in the DAG nodes. 3193 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 3194 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); 3195 Top.bumpNode(SU); 3196 } 3197 3198 /// Create a generic scheduler with no vreg liveness or DAG mutation passes. 3199 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { 3200 return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true); 3201 } 3202 3203 //===----------------------------------------------------------------------===// 3204 // ILP Scheduler. Currently for experimental analysis of heuristics. 3205 //===----------------------------------------------------------------------===// 3206 3207 namespace { 3208 /// \brief Order nodes by the ILP metric. 3209 struct ILPOrder { 3210 const SchedDFSResult *DFSResult; 3211 const BitVector *ScheduledTrees; 3212 bool MaximizeILP; 3213 3214 ILPOrder(bool MaxILP) 3215 : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {} 3216 3217 /// \brief Apply a less-than relation on node priority. 3218 /// 3219 /// (Return true if A comes after B in the Q.) 3220 bool operator()(const SUnit *A, const SUnit *B) const { 3221 unsigned SchedTreeA = DFSResult->getSubtreeID(A); 3222 unsigned SchedTreeB = DFSResult->getSubtreeID(B); 3223 if (SchedTreeA != SchedTreeB) { 3224 // Unscheduled trees have lower priority. 3225 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) 3226 return ScheduledTrees->test(SchedTreeB); 3227 3228 // Trees with shallower connections have have lower priority. 3229 if (DFSResult->getSubtreeLevel(SchedTreeA) 3230 != DFSResult->getSubtreeLevel(SchedTreeB)) { 3231 return DFSResult->getSubtreeLevel(SchedTreeA) 3232 < DFSResult->getSubtreeLevel(SchedTreeB); 3233 } 3234 } 3235 if (MaximizeILP) 3236 return DFSResult->getILP(A) < DFSResult->getILP(B); 3237 else 3238 return DFSResult->getILP(A) > DFSResult->getILP(B); 3239 } 3240 }; 3241 3242 /// \brief Schedule based on the ILP metric. 3243 class ILPScheduler : public MachineSchedStrategy { 3244 ScheduleDAGMILive *DAG; 3245 ILPOrder Cmp; 3246 3247 std::vector<SUnit*> ReadyQ; 3248 public: 3249 ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {} 3250 3251 void initialize(ScheduleDAGMI *dag) override { 3252 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); 3253 DAG = static_cast<ScheduleDAGMILive*>(dag); 3254 DAG->computeDFSResult(); 3255 Cmp.DFSResult = DAG->getDFSResult(); 3256 Cmp.ScheduledTrees = &DAG->getScheduledTrees(); 3257 ReadyQ.clear(); 3258 } 3259 3260 void registerRoots() override { 3261 // Restore the heap in ReadyQ with the updated DFS results. 3262 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3263 } 3264 3265 /// Implement MachineSchedStrategy interface. 3266 /// ----------------------------------------- 3267 3268 /// Callback to select the highest priority node from the ready Q. 3269 SUnit *pickNode(bool &IsTopNode) override { 3270 if (ReadyQ.empty()) return nullptr; 3271 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3272 SUnit *SU = ReadyQ.back(); 3273 ReadyQ.pop_back(); 3274 IsTopNode = false; 3275 DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " 3276 << " ILP: " << DAG->getDFSResult()->getILP(SU) 3277 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" 3278 << DAG->getDFSResult()->getSubtreeLevel( 3279 DAG->getDFSResult()->getSubtreeID(SU)) << '\n' 3280 << "Scheduling " << *SU->getInstr()); 3281 return SU; 3282 } 3283 3284 /// \brief Scheduler callback to notify that a new subtree is scheduled. 3285 void scheduleTree(unsigned SubtreeID) override { 3286 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3287 } 3288 3289 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify 3290 /// DFSResults, and resort the priority Q. 3291 void schedNode(SUnit *SU, bool IsTopNode) override { 3292 assert(!IsTopNode && "SchedDFSResult needs bottom-up"); 3293 } 3294 3295 void releaseTopNode(SUnit *) override { /*only called for top roots*/ } 3296 3297 void releaseBottomNode(SUnit *SU) override { 3298 ReadyQ.push_back(SU); 3299 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3300 } 3301 }; 3302 } // namespace 3303 3304 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 3305 return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true)); 3306 } 3307 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 3308 return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false)); 3309 } 3310 static MachineSchedRegistry ILPMaxRegistry( 3311 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 3312 static MachineSchedRegistry ILPMinRegistry( 3313 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 3314 3315 //===----------------------------------------------------------------------===// 3316 // Machine Instruction Shuffler for Correctness Testing 3317 //===----------------------------------------------------------------------===// 3318 3319 #ifndef NDEBUG 3320 namespace { 3321 /// Apply a less-than relation on the node order, which corresponds to the 3322 /// instruction order prior to scheduling. IsReverse implements greater-than. 3323 template<bool IsReverse> 3324 struct SUnitOrder { 3325 bool operator()(SUnit *A, SUnit *B) const { 3326 if (IsReverse) 3327 return A->NodeNum > B->NodeNum; 3328 else 3329 return A->NodeNum < B->NodeNum; 3330 } 3331 }; 3332 3333 /// Reorder instructions as much as possible. 3334 class InstructionShuffler : public MachineSchedStrategy { 3335 bool IsAlternating; 3336 bool IsTopDown; 3337 3338 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 3339 // gives nodes with a higher number higher priority causing the latest 3340 // instructions to be scheduled first. 3341 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 3342 TopQ; 3343 // When scheduling bottom-up, use greater-than as the queue priority. 3344 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 3345 BottomQ; 3346 public: 3347 InstructionShuffler(bool alternate, bool topdown) 3348 : IsAlternating(alternate), IsTopDown(topdown) {} 3349 3350 void initialize(ScheduleDAGMI*) override { 3351 TopQ.clear(); 3352 BottomQ.clear(); 3353 } 3354 3355 /// Implement MachineSchedStrategy interface. 3356 /// ----------------------------------------- 3357 3358 SUnit *pickNode(bool &IsTopNode) override { 3359 SUnit *SU; 3360 if (IsTopDown) { 3361 do { 3362 if (TopQ.empty()) return nullptr; 3363 SU = TopQ.top(); 3364 TopQ.pop(); 3365 } while (SU->isScheduled); 3366 IsTopNode = true; 3367 } 3368 else { 3369 do { 3370 if (BottomQ.empty()) return nullptr; 3371 SU = BottomQ.top(); 3372 BottomQ.pop(); 3373 } while (SU->isScheduled); 3374 IsTopNode = false; 3375 } 3376 if (IsAlternating) 3377 IsTopDown = !IsTopDown; 3378 return SU; 3379 } 3380 3381 void schedNode(SUnit *SU, bool IsTopNode) override {} 3382 3383 void releaseTopNode(SUnit *SU) override { 3384 TopQ.push(SU); 3385 } 3386 void releaseBottomNode(SUnit *SU) override { 3387 BottomQ.push(SU); 3388 } 3389 }; 3390 } // namespace 3391 3392 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 3393 bool Alternate = !ForceTopDown && !ForceBottomUp; 3394 bool TopDown = !ForceBottomUp; 3395 assert((TopDown || !ForceTopDown) && 3396 "-misched-topdown incompatible with -misched-bottomup"); 3397 return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown)); 3398 } 3399 static MachineSchedRegistry ShufflerRegistry( 3400 "shuffle", "Shuffle machine instructions alternating directions", 3401 createInstructionShuffler); 3402 #endif // !NDEBUG 3403 3404 //===----------------------------------------------------------------------===// 3405 // GraphWriter support for ScheduleDAGMILive. 3406 //===----------------------------------------------------------------------===// 3407 3408 #ifndef NDEBUG 3409 namespace llvm { 3410 3411 template<> struct GraphTraits< 3412 ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; 3413 3414 template<> 3415 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { 3416 3417 DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} 3418 3419 static std::string getGraphName(const ScheduleDAG *G) { 3420 return G->MF.getName(); 3421 } 3422 3423 static bool renderGraphFromBottomUp() { 3424 return true; 3425 } 3426 3427 static bool isNodeHidden(const SUnit *Node) { 3428 return (Node->Preds.size() > 10 || Node->Succs.size() > 10); 3429 } 3430 3431 static bool hasNodeAddressLabel(const SUnit *Node, 3432 const ScheduleDAG *Graph) { 3433 return false; 3434 } 3435 3436 /// If you want to override the dot attributes printed for a particular 3437 /// edge, override this method. 3438 static std::string getEdgeAttributes(const SUnit *Node, 3439 SUnitIterator EI, 3440 const ScheduleDAG *Graph) { 3441 if (EI.isArtificialDep()) 3442 return "color=cyan,style=dashed"; 3443 if (EI.isCtrlDep()) 3444 return "color=blue,style=dashed"; 3445 return ""; 3446 } 3447 3448 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { 3449 std::string Str; 3450 raw_string_ostream SS(Str); 3451 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); 3452 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? 3453 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; 3454 SS << "SU:" << SU->NodeNum; 3455 if (DFS) 3456 SS << " I:" << DFS->getNumInstrs(SU); 3457 return SS.str(); 3458 } 3459 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { 3460 return G->getGraphNodeLabel(SU); 3461 } 3462 3463 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { 3464 std::string Str("shape=Mrecord"); 3465 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); 3466 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? 3467 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; 3468 if (DFS) { 3469 Str += ",style=filled,fillcolor=\"#"; 3470 Str += DOT::getColorString(DFS->getSubtreeID(N)); 3471 Str += '"'; 3472 } 3473 return Str; 3474 } 3475 }; 3476 } // namespace llvm 3477 #endif // NDEBUG 3478 3479 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG 3480 /// rendered using 'dot'. 3481 /// 3482 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { 3483 #ifndef NDEBUG 3484 ViewGraph(this, Name, false, Title); 3485 #else 3486 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " 3487 << "systems with Graphviz or gv!\n"; 3488 #endif // NDEBUG 3489 } 3490 3491 /// Out-of-line implementation with no arguments is handy for gdb. 3492 void ScheduleDAGMI::viewGraph() { 3493 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); 3494 } 3495