1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/BitVector.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/iterator_range.h" 19 #include "llvm/ADT/PriorityQueue.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/Analysis/AliasAnalysis.h" 23 #include "llvm/CodeGen/LiveInterval.h" 24 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineDominators.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineLoopInfo.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/CodeGen/MachinePassRegistry.h" 33 #include "llvm/CodeGen/RegisterPressure.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/MachineScheduler.h" 36 #include "llvm/CodeGen/MachineValueType.h" 37 #include "llvm/CodeGen/Passes.h" 38 #include "llvm/CodeGen/RegisterClassInfo.h" 39 #include "llvm/CodeGen/ScheduleDAG.h" 40 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 41 #include "llvm/CodeGen/ScheduleDAGMutation.h" 42 #include "llvm/CodeGen/ScheduleDFS.h" 43 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 44 #include "llvm/CodeGen/SlotIndexes.h" 45 #include "llvm/CodeGen/TargetPassConfig.h" 46 #include "llvm/CodeGen/TargetSchedule.h" 47 #include "llvm/MC/LaneBitmask.h" 48 #include "llvm/Pass.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/Compiler.h" 51 #include "llvm/Support/Debug.h" 52 #include "llvm/Support/ErrorHandling.h" 53 #include "llvm/Support/GraphWriter.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include "llvm/Target/TargetInstrInfo.h" 56 #include "llvm/Target/TargetLowering.h" 57 #include "llvm/Target/TargetRegisterInfo.h" 58 #include "llvm/Target/TargetSubtargetInfo.h" 59 #include <algorithm> 60 #include <cassert> 61 #include <cstdint> 62 #include <iterator> 63 #include <limits> 64 #include <memory> 65 #include <string> 66 #include <tuple> 67 #include <utility> 68 #include <vector> 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "machine-scheduler" 73 74 namespace llvm { 75 76 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 77 cl::desc("Force top-down list scheduling")); 78 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 79 cl::desc("Force bottom-up list scheduling")); 80 cl::opt<bool> 81 DumpCriticalPathLength("misched-dcpl", cl::Hidden, 82 cl::desc("Print critical path length to stdout")); 83 84 } // end namespace llvm 85 86 #ifndef NDEBUG 87 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 88 cl::desc("Pop up a window to show MISched dags after they are processed")); 89 90 /// In some situations a few uninteresting nodes depend on nearly all other 91 /// nodes in the graph, provide a cutoff to hide them. 92 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, 93 cl::desc("Hide nodes with more predecessor/successor than cutoff")); 94 95 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 96 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 97 98 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, 99 cl::desc("Only schedule this function")); 100 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, 101 cl::desc("Only schedule this MBB#")); 102 #else 103 static bool ViewMISchedDAGs = false; 104 #endif // NDEBUG 105 106 /// Avoid quadratic complexity in unusually large basic blocks by limiting the 107 /// size of the ready lists. 108 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden, 109 cl::desc("Limit ready list to N instructions"), cl::init(256)); 110 111 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, 112 cl::desc("Enable register pressure scheduling."), cl::init(true)); 113 114 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, 115 cl::desc("Enable cyclic critical path analysis."), cl::init(true)); 116 117 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden, 118 cl::desc("Enable memop clustering."), 119 cl::init(true)); 120 121 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, 122 cl::desc("Verify machine instrs before and after machine scheduling")); 123 124 // DAG subtrees must have at least this many nodes. 125 static const unsigned MinSubtreeSize = 8; 126 127 // Pin the vtables to this file. 128 void MachineSchedStrategy::anchor() {} 129 130 void ScheduleDAGMutation::anchor() {} 131 132 //===----------------------------------------------------------------------===// 133 // Machine Instruction Scheduling Pass and Registry 134 //===----------------------------------------------------------------------===// 135 136 MachineSchedContext::MachineSchedContext() { 137 RegClassInfo = new RegisterClassInfo(); 138 } 139 140 MachineSchedContext::~MachineSchedContext() { 141 delete RegClassInfo; 142 } 143 144 namespace { 145 146 /// Base class for a machine scheduler class that can run at any point. 147 class MachineSchedulerBase : public MachineSchedContext, 148 public MachineFunctionPass { 149 public: 150 MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} 151 152 void print(raw_ostream &O, const Module* = nullptr) const override; 153 154 protected: 155 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags); 156 }; 157 158 /// MachineScheduler runs after coalescing and before register allocation. 159 class MachineScheduler : public MachineSchedulerBase { 160 public: 161 MachineScheduler(); 162 163 void getAnalysisUsage(AnalysisUsage &AU) const override; 164 165 bool runOnMachineFunction(MachineFunction&) override; 166 167 static char ID; // Class identification, replacement for typeinfo 168 169 protected: 170 ScheduleDAGInstrs *createMachineScheduler(); 171 }; 172 173 /// PostMachineScheduler runs after shortly before code emission. 174 class PostMachineScheduler : public MachineSchedulerBase { 175 public: 176 PostMachineScheduler(); 177 178 void getAnalysisUsage(AnalysisUsage &AU) const override; 179 180 bool runOnMachineFunction(MachineFunction&) override; 181 182 static char ID; // Class identification, replacement for typeinfo 183 184 protected: 185 ScheduleDAGInstrs *createPostMachineScheduler(); 186 }; 187 188 } // end anonymous namespace 189 190 char MachineScheduler::ID = 0; 191 192 char &llvm::MachineSchedulerID = MachineScheduler::ID; 193 194 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, 195 "Machine Instruction Scheduler", false, false) 196 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 197 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 198 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 199 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 200 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE, 201 "Machine Instruction Scheduler", false, false) 202 203 MachineScheduler::MachineScheduler() 204 : MachineSchedulerBase(ID) { 205 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 206 } 207 208 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 209 AU.setPreservesCFG(); 210 AU.addRequiredID(MachineDominatorsID); 211 AU.addRequired<MachineLoopInfo>(); 212 AU.addRequired<AAResultsWrapperPass>(); 213 AU.addRequired<TargetPassConfig>(); 214 AU.addRequired<SlotIndexes>(); 215 AU.addPreserved<SlotIndexes>(); 216 AU.addRequired<LiveIntervals>(); 217 AU.addPreserved<LiveIntervals>(); 218 MachineFunctionPass::getAnalysisUsage(AU); 219 } 220 221 char PostMachineScheduler::ID = 0; 222 223 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; 224 225 INITIALIZE_PASS(PostMachineScheduler, "postmisched", 226 "PostRA Machine Instruction Scheduler", false, false) 227 228 PostMachineScheduler::PostMachineScheduler() 229 : MachineSchedulerBase(ID) { 230 initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); 231 } 232 233 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 234 AU.setPreservesCFG(); 235 AU.addRequiredID(MachineDominatorsID); 236 AU.addRequired<MachineLoopInfo>(); 237 AU.addRequired<TargetPassConfig>(); 238 MachineFunctionPass::getAnalysisUsage(AU); 239 } 240 241 MachinePassRegistry MachineSchedRegistry::Registry; 242 243 /// A dummy default scheduler factory indicates whether the scheduler 244 /// is overridden on the command line. 245 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 246 return nullptr; 247 } 248 249 /// MachineSchedOpt allows command line selection of the scheduler. 250 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 251 RegisterPassParser<MachineSchedRegistry>> 252 MachineSchedOpt("misched", 253 cl::init(&useDefaultMachineSched), cl::Hidden, 254 cl::desc("Machine instruction scheduler to use")); 255 256 static MachineSchedRegistry 257 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 258 useDefaultMachineSched); 259 260 static cl::opt<bool> EnableMachineSched( 261 "enable-misched", 262 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), 263 cl::Hidden); 264 265 static cl::opt<bool> EnablePostRAMachineSched( 266 "enable-post-misched", 267 cl::desc("Enable the post-ra machine instruction scheduling pass."), 268 cl::init(true), cl::Hidden); 269 270 /// Decrement this iterator until reaching the top or a non-debug instr. 271 static MachineBasicBlock::const_iterator 272 priorNonDebug(MachineBasicBlock::const_iterator I, 273 MachineBasicBlock::const_iterator Beg) { 274 assert(I != Beg && "reached the top of the region, cannot decrement"); 275 while (--I != Beg) { 276 if (!I->isDebugValue()) 277 break; 278 } 279 return I; 280 } 281 282 /// Non-const version. 283 static MachineBasicBlock::iterator 284 priorNonDebug(MachineBasicBlock::iterator I, 285 MachineBasicBlock::const_iterator Beg) { 286 return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg) 287 .getNonConstIterator(); 288 } 289 290 /// If this iterator is a debug value, increment until reaching the End or a 291 /// non-debug instruction. 292 static MachineBasicBlock::const_iterator 293 nextIfDebug(MachineBasicBlock::const_iterator I, 294 MachineBasicBlock::const_iterator End) { 295 for(; I != End; ++I) { 296 if (!I->isDebugValue()) 297 break; 298 } 299 return I; 300 } 301 302 /// Non-const version. 303 static MachineBasicBlock::iterator 304 nextIfDebug(MachineBasicBlock::iterator I, 305 MachineBasicBlock::const_iterator End) { 306 return nextIfDebug(MachineBasicBlock::const_iterator(I), End) 307 .getNonConstIterator(); 308 } 309 310 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller. 311 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { 312 // Select the scheduler, or set the default. 313 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 314 if (Ctor != useDefaultMachineSched) 315 return Ctor(this); 316 317 // Get the default scheduler set by the target for this function. 318 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); 319 if (Scheduler) 320 return Scheduler; 321 322 // Default to GenericScheduler. 323 return createGenericSchedLive(this); 324 } 325 326 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by 327 /// the caller. We don't have a command line option to override the postRA 328 /// scheduler. The Target must configure it. 329 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { 330 // Get the postRA scheduler set by the target for this function. 331 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); 332 if (Scheduler) 333 return Scheduler; 334 335 // Default to GenericScheduler. 336 return createGenericSchedPostRA(this); 337 } 338 339 /// Top-level MachineScheduler pass driver. 340 /// 341 /// Visit blocks in function order. Divide each block into scheduling regions 342 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 343 /// consistent with the DAG builder, which traverses the interior of the 344 /// scheduling regions bottom-up. 345 /// 346 /// This design avoids exposing scheduling boundaries to the DAG builder, 347 /// simplifying the DAG builder's support for "special" target instructions. 348 /// At the same time the design allows target schedulers to operate across 349 /// scheduling boundaries, for example to bundle the boudary instructions 350 /// without reordering them. This creates complexity, because the target 351 /// scheduler must update the RegionBegin and RegionEnd positions cached by 352 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 353 /// design would be to split blocks at scheduling boundaries, but LLVM has a 354 /// general bias against block splitting purely for implementation simplicity. 355 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 356 if (skipFunction(*mf.getFunction())) 357 return false; 358 359 if (EnableMachineSched.getNumOccurrences()) { 360 if (!EnableMachineSched) 361 return false; 362 } else if (!mf.getSubtarget().enableMachineScheduler()) 363 return false; 364 365 DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); 366 367 // Initialize the context of the pass. 368 MF = &mf; 369 MLI = &getAnalysis<MachineLoopInfo>(); 370 MDT = &getAnalysis<MachineDominatorTree>(); 371 PassConfig = &getAnalysis<TargetPassConfig>(); 372 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 373 374 LIS = &getAnalysis<LiveIntervals>(); 375 376 if (VerifyScheduling) { 377 DEBUG(LIS->dump()); 378 MF->verify(this, "Before machine scheduling."); 379 } 380 RegClassInfo->runOnMachineFunction(*MF); 381 382 // Instantiate the selected scheduler for this target, function, and 383 // optimization level. 384 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); 385 scheduleRegions(*Scheduler, false); 386 387 DEBUG(LIS->dump()); 388 if (VerifyScheduling) 389 MF->verify(this, "After machine scheduling."); 390 return true; 391 } 392 393 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { 394 if (skipFunction(*mf.getFunction())) 395 return false; 396 397 if (EnablePostRAMachineSched.getNumOccurrences()) { 398 if (!EnablePostRAMachineSched) 399 return false; 400 } else if (!mf.getSubtarget().enablePostRAScheduler()) { 401 DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); 402 return false; 403 } 404 DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); 405 406 // Initialize the context of the pass. 407 MF = &mf; 408 PassConfig = &getAnalysis<TargetPassConfig>(); 409 410 if (VerifyScheduling) 411 MF->verify(this, "Before post machine scheduling."); 412 413 // Instantiate the selected scheduler for this target, function, and 414 // optimization level. 415 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); 416 scheduleRegions(*Scheduler, true); 417 418 if (VerifyScheduling) 419 MF->verify(this, "After post machine scheduling."); 420 return true; 421 } 422 423 /// Return true of the given instruction should not be included in a scheduling 424 /// region. 425 /// 426 /// MachineScheduler does not currently support scheduling across calls. To 427 /// handle calls, the DAG builder needs to be modified to create register 428 /// anti/output dependencies on the registers clobbered by the call's regmask 429 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents 430 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce 431 /// the boundary, but there would be no benefit to postRA scheduling across 432 /// calls this late anyway. 433 static bool isSchedBoundary(MachineBasicBlock::iterator MI, 434 MachineBasicBlock *MBB, 435 MachineFunction *MF, 436 const TargetInstrInfo *TII) { 437 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); 438 } 439 440 /// Main driver for both MachineScheduler and PostMachineScheduler. 441 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, 442 bool FixKillFlags) { 443 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 444 445 // Visit all machine basic blocks. 446 // 447 // TODO: Visit blocks in global postorder or postorder within the bottom-up 448 // loop tree. Then we can optionally compute global RegPressure. 449 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 450 MBB != MBBEnd; ++MBB) { 451 452 Scheduler.startBlock(&*MBB); 453 454 #ifndef NDEBUG 455 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) 456 continue; 457 if (SchedOnlyBlock.getNumOccurrences() 458 && (int)SchedOnlyBlock != MBB->getNumber()) 459 continue; 460 #endif 461 462 // Break the block into scheduling regions [I, RegionEnd), and schedule each 463 // region as soon as it is discovered. RegionEnd points the scheduling 464 // boundary at the bottom of the region. The DAG does not include RegionEnd, 465 // but the region does (i.e. the next RegionEnd is above the previous 466 // RegionBegin). If the current block has no terminator then RegionEnd == 467 // MBB->end() for the bottom region. 468 // 469 // The Scheduler may insert instructions during either schedule() or 470 // exitRegion(), even for empty regions. So the local iterators 'I' and 471 // 'RegionEnd' are invalid across these calls. 472 // 473 // MBB::size() uses instr_iterator to count. Here we need a bundle to count 474 // as a single instruction. 475 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 476 RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { 477 478 // Avoid decrementing RegionEnd for blocks with no terminator. 479 if (RegionEnd != MBB->end() || 480 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { 481 --RegionEnd; 482 } 483 484 // The next region starts above the previous region. Look backward in the 485 // instruction stream until we find the nearest boundary. 486 unsigned NumRegionInstrs = 0; 487 MachineBasicBlock::iterator I = RegionEnd; 488 for (; I != MBB->begin(); --I) { 489 MachineInstr &MI = *std::prev(I); 490 if (isSchedBoundary(&MI, &*MBB, MF, TII)) 491 break; 492 if (!MI.isDebugValue()) 493 ++NumRegionInstrs; 494 } 495 // Notify the scheduler of the region, even if we may skip scheduling 496 // it. Perhaps it still needs to be bundled. 497 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); 498 499 // Skip empty scheduling regions (0 or 1 schedulable instructions). 500 if (I == RegionEnd || I == std::prev(RegionEnd)) { 501 // Close the current region. Bundle the terminator if needed. 502 // This invalidates 'RegionEnd' and 'I'. 503 Scheduler.exitRegion(); 504 continue; 505 } 506 DEBUG(dbgs() << "********** MI Scheduling **********\n"); 507 DEBUG(dbgs() << MF->getName() 508 << ":BB#" << MBB->getNumber() << " " << MBB->getName() 509 << "\n From: " << *I << " To: "; 510 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 511 else dbgs() << "End"; 512 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 513 if (DumpCriticalPathLength) { 514 errs() << MF->getName(); 515 errs() << ":BB# " << MBB->getNumber(); 516 errs() << " " << MBB->getName() << " \n"; 517 } 518 519 // Schedule a region: possibly reorder instructions. 520 // This invalidates 'RegionEnd' and 'I'. 521 Scheduler.schedule(); 522 523 // Close the current region. 524 Scheduler.exitRegion(); 525 526 // Scheduling has invalidated the current iterator 'I'. Ask the 527 // scheduler for the top of it's scheduled region. 528 RegionEnd = Scheduler.begin(); 529 } 530 Scheduler.finishBlock(); 531 // FIXME: Ideally, no further passes should rely on kill flags. However, 532 // thumb2 size reduction is currently an exception, so the PostMIScheduler 533 // needs to do this. 534 if (FixKillFlags) 535 Scheduler.fixupKills(&*MBB); 536 } 537 Scheduler.finalizeSchedule(); 538 } 539 540 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { 541 // unimplemented 542 } 543 544 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 545 LLVM_DUMP_METHOD void ReadyQueue::dump() { 546 dbgs() << "Queue " << Name << ": "; 547 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 548 dbgs() << Queue[i]->NodeNum << " "; 549 dbgs() << "\n"; 550 } 551 #endif 552 553 //===----------------------------------------------------------------------===// 554 // ScheduleDAGMI - Basic machine instruction scheduling. This is 555 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for 556 // virtual registers. 557 // ===----------------------------------------------------------------------===/ 558 559 // Provide a vtable anchor. 560 ScheduleDAGMI::~ScheduleDAGMI() = default; 561 562 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { 563 return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); 564 } 565 566 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { 567 if (SuccSU != &ExitSU) { 568 // Do not use WillCreateCycle, it assumes SD scheduling. 569 // If Pred is reachable from Succ, then the edge creates a cycle. 570 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 571 return false; 572 Topo.AddPred(SuccSU, PredDep.getSUnit()); 573 } 574 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 575 // Return true regardless of whether a new edge needed to be inserted. 576 return true; 577 } 578 579 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 580 /// NumPredsLeft reaches zero, release the successor node. 581 /// 582 /// FIXME: Adjust SuccSU height based on MinLatency. 583 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 584 SUnit *SuccSU = SuccEdge->getSUnit(); 585 586 if (SuccEdge->isWeak()) { 587 --SuccSU->WeakPredsLeft; 588 if (SuccEdge->isCluster()) 589 NextClusterSucc = SuccSU; 590 return; 591 } 592 #ifndef NDEBUG 593 if (SuccSU->NumPredsLeft == 0) { 594 dbgs() << "*** Scheduling failed! ***\n"; 595 SuccSU->dump(this); 596 dbgs() << " has been released too many times!\n"; 597 llvm_unreachable(nullptr); 598 } 599 #endif 600 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, 601 // CurrCycle may have advanced since then. 602 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) 603 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); 604 605 --SuccSU->NumPredsLeft; 606 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 607 SchedImpl->releaseTopNode(SuccSU); 608 } 609 610 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 611 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 612 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 613 I != E; ++I) { 614 releaseSucc(SU, &*I); 615 } 616 } 617 618 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 619 /// NumSuccsLeft reaches zero, release the predecessor node. 620 /// 621 /// FIXME: Adjust PredSU height based on MinLatency. 622 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 623 SUnit *PredSU = PredEdge->getSUnit(); 624 625 if (PredEdge->isWeak()) { 626 --PredSU->WeakSuccsLeft; 627 if (PredEdge->isCluster()) 628 NextClusterPred = PredSU; 629 return; 630 } 631 #ifndef NDEBUG 632 if (PredSU->NumSuccsLeft == 0) { 633 dbgs() << "*** Scheduling failed! ***\n"; 634 PredSU->dump(this); 635 dbgs() << " has been released too many times!\n"; 636 llvm_unreachable(nullptr); 637 } 638 #endif 639 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, 640 // CurrCycle may have advanced since then. 641 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) 642 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); 643 644 --PredSU->NumSuccsLeft; 645 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 646 SchedImpl->releaseBottomNode(PredSU); 647 } 648 649 /// releasePredecessors - Call releasePred on each of SU's predecessors. 650 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 651 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 652 I != E; ++I) { 653 releasePred(SU, &*I); 654 } 655 } 656 657 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 658 /// crossing a scheduling boundary. [begin, end) includes all instructions in 659 /// the region, including the boundary itself and single-instruction regions 660 /// that don't get scheduled. 661 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 662 MachineBasicBlock::iterator begin, 663 MachineBasicBlock::iterator end, 664 unsigned regioninstrs) 665 { 666 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); 667 668 SchedImpl->initPolicy(begin, end, regioninstrs); 669 } 670 671 /// This is normally called from the main scheduler loop but may also be invoked 672 /// by the scheduling strategy to perform additional code motion. 673 void ScheduleDAGMI::moveInstruction( 674 MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { 675 // Advance RegionBegin if the first instruction moves down. 676 if (&*RegionBegin == MI) 677 ++RegionBegin; 678 679 // Update the instruction stream. 680 BB->splice(InsertPos, BB, MI); 681 682 // Update LiveIntervals 683 if (LIS) 684 LIS->handleMove(*MI, /*UpdateFlags=*/true); 685 686 // Recede RegionBegin if an instruction moves above the first. 687 if (RegionBegin == InsertPos) 688 RegionBegin = MI; 689 } 690 691 bool ScheduleDAGMI::checkSchedLimit() { 692 #ifndef NDEBUG 693 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 694 CurrentTop = CurrentBottom; 695 return false; 696 } 697 ++NumInstrsScheduled; 698 #endif 699 return true; 700 } 701 702 /// Per-region scheduling driver, called back from 703 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that 704 /// does not consider liveness or register pressure. It is useful for PostRA 705 /// scheduling and potentially other custom schedulers. 706 void ScheduleDAGMI::schedule() { 707 DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); 708 DEBUG(SchedImpl->dumpPolicy()); 709 710 // Build the DAG. 711 buildSchedGraph(AA); 712 713 Topo.InitDAGTopologicalSorting(); 714 715 postprocessDAG(); 716 717 SmallVector<SUnit*, 8> TopRoots, BotRoots; 718 findRootsAndBiasEdges(TopRoots, BotRoots); 719 720 // Initialize the strategy before modifying the DAG. 721 // This may initialize a DFSResult to be used for queue priority. 722 SchedImpl->initialize(this); 723 724 DEBUG( 725 if (EntrySU.getInstr() != nullptr) 726 EntrySU.dumpAll(this); 727 for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 728 SUnits[su].dumpAll(this); 729 if (ExitSU.getInstr() != nullptr) 730 ExitSU.dumpAll(this); 731 ); 732 if (ViewMISchedDAGs) viewGraph(); 733 734 // Initialize ready queues now that the DAG and priority data are finalized. 735 initQueues(TopRoots, BotRoots); 736 737 bool IsTopNode = false; 738 while (true) { 739 DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); 740 SUnit *SU = SchedImpl->pickNode(IsTopNode); 741 if (!SU) break; 742 743 assert(!SU->isScheduled && "Node already scheduled"); 744 if (!checkSchedLimit()) 745 break; 746 747 MachineInstr *MI = SU->getInstr(); 748 if (IsTopNode) { 749 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 750 if (&*CurrentTop == MI) 751 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 752 else 753 moveInstruction(MI, CurrentTop); 754 } else { 755 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 756 MachineBasicBlock::iterator priorII = 757 priorNonDebug(CurrentBottom, CurrentTop); 758 if (&*priorII == MI) 759 CurrentBottom = priorII; 760 else { 761 if (&*CurrentTop == MI) 762 CurrentTop = nextIfDebug(++CurrentTop, priorII); 763 moveInstruction(MI, CurrentBottom); 764 CurrentBottom = MI; 765 } 766 } 767 // Notify the scheduling strategy before updating the DAG. 768 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues 769 // runs, it can then use the accurate ReadyCycle time to determine whether 770 // newly released nodes can move to the readyQ. 771 SchedImpl->schedNode(SU, IsTopNode); 772 773 updateQueues(SU, IsTopNode); 774 } 775 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 776 777 placeDebugValues(); 778 779 DEBUG({ 780 unsigned BBNum = begin()->getParent()->getNumber(); 781 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 782 dumpSchedule(); 783 dbgs() << '\n'; 784 }); 785 } 786 787 /// Apply each ScheduleDAGMutation step in order. 788 void ScheduleDAGMI::postprocessDAG() { 789 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 790 Mutations[i]->apply(this); 791 } 792 } 793 794 void ScheduleDAGMI:: 795 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 796 SmallVectorImpl<SUnit*> &BotRoots) { 797 for (std::vector<SUnit>::iterator 798 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 799 SUnit *SU = &(*I); 800 assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); 801 802 // Order predecessors so DFSResult follows the critical path. 803 SU->biasCriticalPath(); 804 805 // A SUnit is ready to top schedule if it has no predecessors. 806 if (!I->NumPredsLeft) 807 TopRoots.push_back(SU); 808 // A SUnit is ready to bottom schedule if it has no successors. 809 if (!I->NumSuccsLeft) 810 BotRoots.push_back(SU); 811 } 812 ExitSU.biasCriticalPath(); 813 } 814 815 /// Identify DAG roots and setup scheduler queues. 816 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, 817 ArrayRef<SUnit*> BotRoots) { 818 NextClusterSucc = nullptr; 819 NextClusterPred = nullptr; 820 821 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 822 // 823 // Nodes with unreleased weak edges can still be roots. 824 // Release top roots in forward order. 825 for (SmallVectorImpl<SUnit*>::const_iterator 826 I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { 827 SchedImpl->releaseTopNode(*I); 828 } 829 // Release bottom roots in reverse order so the higher priority nodes appear 830 // first. This is more natural and slightly more efficient. 831 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 832 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { 833 SchedImpl->releaseBottomNode(*I); 834 } 835 836 releaseSuccessors(&EntrySU); 837 releasePredecessors(&ExitSU); 838 839 SchedImpl->registerRoots(); 840 841 // Advance past initial DebugValues. 842 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 843 CurrentBottom = RegionEnd; 844 } 845 846 /// Update scheduler queues after scheduling an instruction. 847 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 848 // Release dependent instructions for scheduling. 849 if (IsTopNode) 850 releaseSuccessors(SU); 851 else 852 releasePredecessors(SU); 853 854 SU->isScheduled = true; 855 } 856 857 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 858 void ScheduleDAGMI::placeDebugValues() { 859 // If first instruction was a DBG_VALUE then put it back. 860 if (FirstDbgValue) { 861 BB->splice(RegionBegin, BB, FirstDbgValue); 862 RegionBegin = FirstDbgValue; 863 } 864 865 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator 866 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 867 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); 868 MachineInstr *DbgValue = P.first; 869 MachineBasicBlock::iterator OrigPrevMI = P.second; 870 if (&*RegionBegin == DbgValue) 871 ++RegionBegin; 872 BB->splice(++OrigPrevMI, BB, DbgValue); 873 if (OrigPrevMI == std::prev(RegionEnd)) 874 RegionEnd = DbgValue; 875 } 876 DbgValues.clear(); 877 FirstDbgValue = nullptr; 878 } 879 880 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 881 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const { 882 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 883 if (SUnit *SU = getSUnit(&(*MI))) 884 SU->dump(this); 885 else 886 dbgs() << "Missing SUnit\n"; 887 } 888 } 889 #endif 890 891 //===----------------------------------------------------------------------===// 892 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals 893 // preservation. 894 //===----------------------------------------------------------------------===// 895 896 ScheduleDAGMILive::~ScheduleDAGMILive() { 897 delete DFSResult; 898 } 899 900 void ScheduleDAGMILive::collectVRegUses(SUnit &SU) { 901 const MachineInstr &MI = *SU.getInstr(); 902 for (const MachineOperand &MO : MI.operands()) { 903 if (!MO.isReg()) 904 continue; 905 if (!MO.readsReg()) 906 continue; 907 if (TrackLaneMasks && !MO.isUse()) 908 continue; 909 910 unsigned Reg = MO.getReg(); 911 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 912 continue; 913 914 // Ignore re-defs. 915 if (TrackLaneMasks) { 916 bool FoundDef = false; 917 for (const MachineOperand &MO2 : MI.operands()) { 918 if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) { 919 FoundDef = true; 920 break; 921 } 922 } 923 if (FoundDef) 924 continue; 925 } 926 927 // Record this local VReg use. 928 VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); 929 for (; UI != VRegUses.end(); ++UI) { 930 if (UI->SU == &SU) 931 break; 932 } 933 if (UI == VRegUses.end()) 934 VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU)); 935 } 936 } 937 938 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 939 /// crossing a scheduling boundary. [begin, end) includes all instructions in 940 /// the region, including the boundary itself and single-instruction regions 941 /// that don't get scheduled. 942 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, 943 MachineBasicBlock::iterator begin, 944 MachineBasicBlock::iterator end, 945 unsigned regioninstrs) 946 { 947 // ScheduleDAGMI initializes SchedImpl's per-region policy. 948 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); 949 950 // For convenience remember the end of the liveness region. 951 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); 952 953 SUPressureDiffs.clear(); 954 955 ShouldTrackPressure = SchedImpl->shouldTrackPressure(); 956 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks(); 957 958 assert((!ShouldTrackLaneMasks || ShouldTrackPressure) && 959 "ShouldTrackLaneMasks requires ShouldTrackPressure"); 960 } 961 962 // Setup the register pressure trackers for the top scheduled top and bottom 963 // scheduled regions. 964 void ScheduleDAGMILive::initRegPressure() { 965 VRegUses.clear(); 966 VRegUses.setUniverse(MRI.getNumVirtRegs()); 967 for (SUnit &SU : SUnits) 968 collectVRegUses(SU); 969 970 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, 971 ShouldTrackLaneMasks, false); 972 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, 973 ShouldTrackLaneMasks, false); 974 975 // Close the RPTracker to finalize live ins. 976 RPTracker.closeRegion(); 977 978 DEBUG(RPTracker.dump()); 979 980 // Initialize the live ins and live outs. 981 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 982 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 983 984 // Close one end of the tracker so we can call 985 // getMaxUpward/DownwardPressureDelta before advancing across any 986 // instructions. This converts currently live regs into live ins/outs. 987 TopRPTracker.closeTop(); 988 BotRPTracker.closeBottom(); 989 990 BotRPTracker.initLiveThru(RPTracker); 991 if (!BotRPTracker.getLiveThru().empty()) { 992 TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); 993 DEBUG(dbgs() << "Live Thru: "; 994 dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); 995 }; 996 997 // For each live out vreg reduce the pressure change associated with other 998 // uses of the same vreg below the live-out reaching def. 999 updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); 1000 1001 // Account for liveness generated by the region boundary. 1002 if (LiveRegionEnd != RegionEnd) { 1003 SmallVector<RegisterMaskPair, 8> LiveUses; 1004 BotRPTracker.recede(&LiveUses); 1005 updatePressureDiffs(LiveUses); 1006 } 1007 1008 DEBUG( 1009 dbgs() << "Top Pressure:\n"; 1010 dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); 1011 dbgs() << "Bottom Pressure:\n"; 1012 dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); 1013 ); 1014 1015 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 1016 1017 // Cache the list of excess pressure sets in this region. This will also track 1018 // the max pressure in the scheduled code for these sets. 1019 RegionCriticalPSets.clear(); 1020 const std::vector<unsigned> &RegionPressure = 1021 RPTracker.getPressure().MaxSetPressure; 1022 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 1023 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); 1024 if (RegionPressure[i] > Limit) { 1025 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 1026 << " Limit " << Limit 1027 << " Actual " << RegionPressure[i] << "\n"); 1028 RegionCriticalPSets.push_back(PressureChange(i)); 1029 } 1030 } 1031 DEBUG(dbgs() << "Excess PSets: "; 1032 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 1033 dbgs() << TRI->getRegPressureSetName( 1034 RegionCriticalPSets[i].getPSet()) << " "; 1035 dbgs() << "\n"); 1036 } 1037 1038 void ScheduleDAGMILive:: 1039 updateScheduledPressure(const SUnit *SU, 1040 const std::vector<unsigned> &NewMaxPressure) { 1041 const PressureDiff &PDiff = getPressureDiff(SU); 1042 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); 1043 for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); 1044 I != E; ++I) { 1045 if (!I->isValid()) 1046 break; 1047 unsigned ID = I->getPSet(); 1048 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) 1049 ++CritIdx; 1050 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { 1051 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() 1052 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max()) 1053 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); 1054 } 1055 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); 1056 if (NewMaxPressure[ID] >= Limit - 2) { 1057 DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " 1058 << NewMaxPressure[ID] 1059 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit 1060 << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); 1061 } 1062 } 1063 } 1064 1065 /// Update the PressureDiff array for liveness after scheduling this 1066 /// instruction. 1067 void ScheduleDAGMILive::updatePressureDiffs( 1068 ArrayRef<RegisterMaskPair> LiveUses) { 1069 for (const RegisterMaskPair &P : LiveUses) { 1070 unsigned Reg = P.RegUnit; 1071 /// FIXME: Currently assuming single-use physregs. 1072 if (!TRI->isVirtualRegister(Reg)) 1073 continue; 1074 1075 if (ShouldTrackLaneMasks) { 1076 // If the register has just become live then other uses won't change 1077 // this fact anymore => decrement pressure. 1078 // If the register has just become dead then other uses make it come 1079 // back to life => increment pressure. 1080 bool Decrement = P.LaneMask.any(); 1081 1082 for (const VReg2SUnit &V2SU 1083 : make_range(VRegUses.find(Reg), VRegUses.end())) { 1084 SUnit &SU = *V2SU.SU; 1085 if (SU.isScheduled || &SU == &ExitSU) 1086 continue; 1087 1088 PressureDiff &PDiff = getPressureDiff(&SU); 1089 PDiff.addPressureChange(Reg, Decrement, &MRI); 1090 DEBUG( 1091 dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " 1092 << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) 1093 << ' ' << *SU.getInstr(); 1094 dbgs() << " to "; 1095 PDiff.dump(*TRI); 1096 ); 1097 } 1098 } else { 1099 assert(P.LaneMask.any()); 1100 DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); 1101 // This may be called before CurrentBottom has been initialized. However, 1102 // BotRPTracker must have a valid position. We want the value live into the 1103 // instruction or live out of the block, so ask for the previous 1104 // instruction's live-out. 1105 const LiveInterval &LI = LIS->getInterval(Reg); 1106 VNInfo *VNI; 1107 MachineBasicBlock::const_iterator I = 1108 nextIfDebug(BotRPTracker.getPos(), BB->end()); 1109 if (I == BB->end()) 1110 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 1111 else { 1112 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I)); 1113 VNI = LRQ.valueIn(); 1114 } 1115 // RegisterPressureTracker guarantees that readsReg is true for LiveUses. 1116 assert(VNI && "No live value at use."); 1117 for (const VReg2SUnit &V2SU 1118 : make_range(VRegUses.find(Reg), VRegUses.end())) { 1119 SUnit *SU = V2SU.SU; 1120 // If this use comes before the reaching def, it cannot be a last use, 1121 // so decrease its pressure change. 1122 if (!SU->isScheduled && SU != &ExitSU) { 1123 LiveQueryResult LRQ = 1124 LI.Query(LIS->getInstructionIndex(*SU->getInstr())); 1125 if (LRQ.valueIn() == VNI) { 1126 PressureDiff &PDiff = getPressureDiff(SU); 1127 PDiff.addPressureChange(Reg, true, &MRI); 1128 DEBUG( 1129 dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " 1130 << *SU->getInstr(); 1131 dbgs() << " to "; 1132 PDiff.dump(*TRI); 1133 ); 1134 } 1135 } 1136 } 1137 } 1138 } 1139 } 1140 1141 /// schedule - Called back from MachineScheduler::runOnMachineFunction 1142 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 1143 /// only includes instructions that have DAG nodes, not scheduling boundaries. 1144 /// 1145 /// This is a skeletal driver, with all the functionality pushed into helpers, 1146 /// so that it can be easily extended by experimental schedulers. Generally, 1147 /// implementing MachineSchedStrategy should be sufficient to implement a new 1148 /// scheduling algorithm. However, if a scheduler further subclasses 1149 /// ScheduleDAGMILive then it will want to override this virtual method in order 1150 /// to update any specialized state. 1151 void ScheduleDAGMILive::schedule() { 1152 DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); 1153 DEBUG(SchedImpl->dumpPolicy()); 1154 buildDAGWithRegPressure(); 1155 1156 Topo.InitDAGTopologicalSorting(); 1157 1158 postprocessDAG(); 1159 1160 SmallVector<SUnit*, 8> TopRoots, BotRoots; 1161 findRootsAndBiasEdges(TopRoots, BotRoots); 1162 1163 // Initialize the strategy before modifying the DAG. 1164 // This may initialize a DFSResult to be used for queue priority. 1165 SchedImpl->initialize(this); 1166 1167 DEBUG( 1168 if (EntrySU.getInstr() != nullptr) 1169 EntrySU.dumpAll(this); 1170 for (const SUnit &SU : SUnits) { 1171 SU.dumpAll(this); 1172 if (ShouldTrackPressure) { 1173 dbgs() << " Pressure Diff : "; 1174 getPressureDiff(&SU).dump(*TRI); 1175 } 1176 dbgs() << " Single Issue : "; 1177 if (SchedModel.mustBeginGroup(SU.getInstr()) && 1178 SchedModel.mustEndGroup(SU.getInstr())) 1179 dbgs() << "true;"; 1180 else 1181 dbgs() << "false;"; 1182 dbgs() << '\n'; 1183 } 1184 if (ExitSU.getInstr() != nullptr) 1185 ExitSU.dumpAll(this); 1186 ); 1187 if (ViewMISchedDAGs) viewGraph(); 1188 1189 // Initialize ready queues now that the DAG and priority data are finalized. 1190 initQueues(TopRoots, BotRoots); 1191 1192 bool IsTopNode = false; 1193 while (true) { 1194 DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); 1195 SUnit *SU = SchedImpl->pickNode(IsTopNode); 1196 if (!SU) break; 1197 1198 assert(!SU->isScheduled && "Node already scheduled"); 1199 if (!checkSchedLimit()) 1200 break; 1201 1202 scheduleMI(SU, IsTopNode); 1203 1204 if (DFSResult) { 1205 unsigned SubtreeID = DFSResult->getSubtreeID(SU); 1206 if (!ScheduledTrees.test(SubtreeID)) { 1207 ScheduledTrees.set(SubtreeID); 1208 DFSResult->scheduleTree(SubtreeID); 1209 SchedImpl->scheduleTree(SubtreeID); 1210 } 1211 } 1212 1213 // Notify the scheduling strategy after updating the DAG. 1214 SchedImpl->schedNode(SU, IsTopNode); 1215 1216 updateQueues(SU, IsTopNode); 1217 } 1218 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 1219 1220 placeDebugValues(); 1221 1222 DEBUG({ 1223 unsigned BBNum = begin()->getParent()->getNumber(); 1224 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 1225 dumpSchedule(); 1226 dbgs() << '\n'; 1227 }); 1228 } 1229 1230 /// Build the DAG and setup three register pressure trackers. 1231 void ScheduleDAGMILive::buildDAGWithRegPressure() { 1232 if (!ShouldTrackPressure) { 1233 RPTracker.reset(); 1234 RegionCriticalPSets.clear(); 1235 buildSchedGraph(AA); 1236 return; 1237 } 1238 1239 // Initialize the register pressure tracker used by buildSchedGraph. 1240 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, 1241 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true); 1242 1243 // Account for liveness generate by the region boundary. 1244 if (LiveRegionEnd != RegionEnd) 1245 RPTracker.recede(); 1246 1247 // Build the DAG, and compute current register pressure. 1248 buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks); 1249 1250 // Initialize top/bottom trackers after computing region pressure. 1251 initRegPressure(); 1252 } 1253 1254 void ScheduleDAGMILive::computeDFSResult() { 1255 if (!DFSResult) 1256 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); 1257 DFSResult->clear(); 1258 ScheduledTrees.clear(); 1259 DFSResult->resize(SUnits.size()); 1260 DFSResult->compute(SUnits); 1261 ScheduledTrees.resize(DFSResult->getNumSubtrees()); 1262 } 1263 1264 /// Compute the max cyclic critical path through the DAG. The scheduling DAG 1265 /// only provides the critical path for single block loops. To handle loops that 1266 /// span blocks, we could use the vreg path latencies provided by 1267 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently 1268 /// available for use in the scheduler. 1269 /// 1270 /// The cyclic path estimation identifies a def-use pair that crosses the back 1271 /// edge and considers the depth and height of the nodes. For example, consider 1272 /// the following instruction sequence where each instruction has unit latency 1273 /// and defines an epomymous virtual register: 1274 /// 1275 /// a->b(a,c)->c(b)->d(c)->exit 1276 /// 1277 /// The cyclic critical path is a two cycles: b->c->b 1278 /// The acyclic critical path is four cycles: a->b->c->d->exit 1279 /// LiveOutHeight = height(c) = len(c->d->exit) = 2 1280 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 1281 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 1282 /// LiveInDepth = depth(b) = len(a->b) = 1 1283 /// 1284 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2 1285 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2 1286 /// CyclicCriticalPath = min(2, 2) = 2 1287 /// 1288 /// This could be relevant to PostRA scheduling, but is currently implemented 1289 /// assuming LiveIntervals. 1290 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { 1291 // This only applies to single block loop. 1292 if (!BB->isSuccessor(BB)) 1293 return 0; 1294 1295 unsigned MaxCyclicLatency = 0; 1296 // Visit each live out vreg def to find def/use pairs that cross iterations. 1297 for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { 1298 unsigned Reg = P.RegUnit; 1299 if (!TRI->isVirtualRegister(Reg)) 1300 continue; 1301 const LiveInterval &LI = LIS->getInterval(Reg); 1302 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 1303 if (!DefVNI) 1304 continue; 1305 1306 MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); 1307 const SUnit *DefSU = getSUnit(DefMI); 1308 if (!DefSU) 1309 continue; 1310 1311 unsigned LiveOutHeight = DefSU->getHeight(); 1312 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; 1313 // Visit all local users of the vreg def. 1314 for (const VReg2SUnit &V2SU 1315 : make_range(VRegUses.find(Reg), VRegUses.end())) { 1316 SUnit *SU = V2SU.SU; 1317 if (SU == &ExitSU) 1318 continue; 1319 1320 // Only consider uses of the phi. 1321 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr())); 1322 if (!LRQ.valueIn()->isPHIDef()) 1323 continue; 1324 1325 // Assume that a path spanning two iterations is a cycle, which could 1326 // overestimate in strange cases. This allows cyclic latency to be 1327 // estimated as the minimum slack of the vreg's depth or height. 1328 unsigned CyclicLatency = 0; 1329 if (LiveOutDepth > SU->getDepth()) 1330 CyclicLatency = LiveOutDepth - SU->getDepth(); 1331 1332 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; 1333 if (LiveInHeight > LiveOutHeight) { 1334 if (LiveInHeight - LiveOutHeight < CyclicLatency) 1335 CyclicLatency = LiveInHeight - LiveOutHeight; 1336 } else 1337 CyclicLatency = 0; 1338 1339 DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" 1340 << SU->NodeNum << ") = " << CyclicLatency << "c\n"); 1341 if (CyclicLatency > MaxCyclicLatency) 1342 MaxCyclicLatency = CyclicLatency; 1343 } 1344 } 1345 DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); 1346 return MaxCyclicLatency; 1347 } 1348 1349 /// Release ExitSU predecessors and setup scheduler queues. Re-position 1350 /// the Top RP tracker in case the region beginning has changed. 1351 void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots, 1352 ArrayRef<SUnit*> BotRoots) { 1353 ScheduleDAGMI::initQueues(TopRoots, BotRoots); 1354 if (ShouldTrackPressure) { 1355 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); 1356 TopRPTracker.setPos(CurrentTop); 1357 } 1358 } 1359 1360 /// Move an instruction and update register pressure. 1361 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { 1362 // Move the instruction to its new location in the instruction stream. 1363 MachineInstr *MI = SU->getInstr(); 1364 1365 if (IsTopNode) { 1366 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 1367 if (&*CurrentTop == MI) 1368 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 1369 else { 1370 moveInstruction(MI, CurrentTop); 1371 TopRPTracker.setPos(MI); 1372 } 1373 1374 if (ShouldTrackPressure) { 1375 // Update top scheduled pressure. 1376 RegisterOperands RegOpers; 1377 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 1378 if (ShouldTrackLaneMasks) { 1379 // Adjust liveness and add missing dead+read-undef flags. 1380 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 1381 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 1382 } else { 1383 // Adjust for missing dead-def flags. 1384 RegOpers.detectDeadDefs(*MI, *LIS); 1385 } 1386 1387 TopRPTracker.advance(RegOpers); 1388 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 1389 DEBUG( 1390 dbgs() << "Top Pressure:\n"; 1391 dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); 1392 ); 1393 1394 updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); 1395 } 1396 } else { 1397 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 1398 MachineBasicBlock::iterator priorII = 1399 priorNonDebug(CurrentBottom, CurrentTop); 1400 if (&*priorII == MI) 1401 CurrentBottom = priorII; 1402 else { 1403 if (&*CurrentTop == MI) { 1404 CurrentTop = nextIfDebug(++CurrentTop, priorII); 1405 TopRPTracker.setPos(CurrentTop); 1406 } 1407 moveInstruction(MI, CurrentBottom); 1408 CurrentBottom = MI; 1409 } 1410 if (ShouldTrackPressure) { 1411 RegisterOperands RegOpers; 1412 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 1413 if (ShouldTrackLaneMasks) { 1414 // Adjust liveness and add missing dead+read-undef flags. 1415 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 1416 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 1417 } else { 1418 // Adjust for missing dead-def flags. 1419 RegOpers.detectDeadDefs(*MI, *LIS); 1420 } 1421 1422 BotRPTracker.recedeSkipDebugValues(); 1423 SmallVector<RegisterMaskPair, 8> LiveUses; 1424 BotRPTracker.recede(RegOpers, &LiveUses); 1425 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 1426 DEBUG( 1427 dbgs() << "Bottom Pressure:\n"; 1428 dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); 1429 ); 1430 1431 updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); 1432 updatePressureDiffs(LiveUses); 1433 } 1434 } 1435 } 1436 1437 //===----------------------------------------------------------------------===// 1438 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores. 1439 //===----------------------------------------------------------------------===// 1440 1441 namespace { 1442 1443 /// \brief Post-process the DAG to create cluster edges between neighboring 1444 /// loads or between neighboring stores. 1445 class BaseMemOpClusterMutation : public ScheduleDAGMutation { 1446 struct MemOpInfo { 1447 SUnit *SU; 1448 unsigned BaseReg; 1449 int64_t Offset; 1450 1451 MemOpInfo(SUnit *su, unsigned reg, int64_t ofs) 1452 : SU(su), BaseReg(reg), Offset(ofs) {} 1453 1454 bool operator<(const MemOpInfo&RHS) const { 1455 return std::tie(BaseReg, Offset, SU->NodeNum) < 1456 std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum); 1457 } 1458 }; 1459 1460 const TargetInstrInfo *TII; 1461 const TargetRegisterInfo *TRI; 1462 bool IsLoad; 1463 1464 public: 1465 BaseMemOpClusterMutation(const TargetInstrInfo *tii, 1466 const TargetRegisterInfo *tri, bool IsLoad) 1467 : TII(tii), TRI(tri), IsLoad(IsLoad) {} 1468 1469 void apply(ScheduleDAGInstrs *DAGInstrs) override; 1470 1471 protected: 1472 void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG); 1473 }; 1474 1475 class StoreClusterMutation : public BaseMemOpClusterMutation { 1476 public: 1477 StoreClusterMutation(const TargetInstrInfo *tii, 1478 const TargetRegisterInfo *tri) 1479 : BaseMemOpClusterMutation(tii, tri, false) {} 1480 }; 1481 1482 class LoadClusterMutation : public BaseMemOpClusterMutation { 1483 public: 1484 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri) 1485 : BaseMemOpClusterMutation(tii, tri, true) {} 1486 }; 1487 1488 } // end anonymous namespace 1489 1490 namespace llvm { 1491 1492 std::unique_ptr<ScheduleDAGMutation> 1493 createLoadClusterDAGMutation(const TargetInstrInfo *TII, 1494 const TargetRegisterInfo *TRI) { 1495 return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI) 1496 : nullptr; 1497 } 1498 1499 std::unique_ptr<ScheduleDAGMutation> 1500 createStoreClusterDAGMutation(const TargetInstrInfo *TII, 1501 const TargetRegisterInfo *TRI) { 1502 return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI) 1503 : nullptr; 1504 } 1505 1506 } // end namespace llvm 1507 1508 void BaseMemOpClusterMutation::clusterNeighboringMemOps( 1509 ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { 1510 SmallVector<MemOpInfo, 32> MemOpRecords; 1511 for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) { 1512 SUnit *SU = MemOps[Idx]; 1513 unsigned BaseReg; 1514 int64_t Offset; 1515 if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI)) 1516 MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset)); 1517 } 1518 if (MemOpRecords.size() < 2) 1519 return; 1520 1521 std::sort(MemOpRecords.begin(), MemOpRecords.end()); 1522 unsigned ClusterLength = 1; 1523 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { 1524 if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) { 1525 ClusterLength = 1; 1526 continue; 1527 } 1528 1529 SUnit *SUa = MemOpRecords[Idx].SU; 1530 SUnit *SUb = MemOpRecords[Idx+1].SU; 1531 if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(), 1532 ClusterLength) && 1533 DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 1534 DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" 1535 << SUb->NodeNum << ")\n"); 1536 // Copy successor edges from SUa to SUb. Interleaving computation 1537 // dependent on SUa can prevent load combining due to register reuse. 1538 // Predecessor edges do not need to be copied from SUb to SUa since nearby 1539 // loads should have effectively the same inputs. 1540 for (SUnit::const_succ_iterator 1541 SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { 1542 if (SI->getSUnit() == SUb) 1543 continue; 1544 DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); 1545 DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); 1546 } 1547 ++ClusterLength; 1548 } else 1549 ClusterLength = 1; 1550 } 1551 } 1552 1553 /// \brief Callback from DAG postProcessing to create cluster edges for loads. 1554 void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { 1555 1556 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); 1557 1558 // Map DAG NodeNum to store chain ID. 1559 DenseMap<unsigned, unsigned> StoreChainIDs; 1560 // Map each store chain to a set of dependent MemOps. 1561 SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; 1562 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 1563 SUnit *SU = &DAG->SUnits[Idx]; 1564 if ((IsLoad && !SU->getInstr()->mayLoad()) || 1565 (!IsLoad && !SU->getInstr()->mayStore())) 1566 continue; 1567 1568 unsigned ChainPredID = DAG->SUnits.size(); 1569 for (SUnit::const_pred_iterator 1570 PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 1571 if (PI->isCtrl()) { 1572 ChainPredID = PI->getSUnit()->NodeNum; 1573 break; 1574 } 1575 } 1576 // Check if this chain-like pred has been seen 1577 // before. ChainPredID==MaxNodeID at the top of the schedule. 1578 unsigned NumChains = StoreChainDependents.size(); 1579 std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = 1580 StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); 1581 if (Result.second) 1582 StoreChainDependents.resize(NumChains + 1); 1583 StoreChainDependents[Result.first->second].push_back(SU); 1584 } 1585 1586 // Iterate over the store chains. 1587 for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) 1588 clusterNeighboringMemOps(StoreChainDependents[Idx], DAG); 1589 } 1590 1591 //===----------------------------------------------------------------------===// 1592 // CopyConstrain - DAG post-processing to encourage copy elimination. 1593 //===----------------------------------------------------------------------===// 1594 1595 namespace { 1596 1597 /// \brief Post-process the DAG to create weak edges from all uses of a copy to 1598 /// the one use that defines the copy's source vreg, most likely an induction 1599 /// variable increment. 1600 class CopyConstrain : public ScheduleDAGMutation { 1601 // Transient state. 1602 SlotIndex RegionBeginIdx; 1603 // RegionEndIdx is the slot index of the last non-debug instruction in the 1604 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. 1605 SlotIndex RegionEndIdx; 1606 1607 public: 1608 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} 1609 1610 void apply(ScheduleDAGInstrs *DAGInstrs) override; 1611 1612 protected: 1613 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); 1614 }; 1615 1616 } // end anonymous namespace 1617 1618 namespace llvm { 1619 1620 std::unique_ptr<ScheduleDAGMutation> 1621 createCopyConstrainDAGMutation(const TargetInstrInfo *TII, 1622 const TargetRegisterInfo *TRI) { 1623 return llvm::make_unique<CopyConstrain>(TII, TRI); 1624 } 1625 1626 } // end namespace llvm 1627 1628 /// constrainLocalCopy handles two possibilities: 1629 /// 1) Local src: 1630 /// I0: = dst 1631 /// I1: src = ... 1632 /// I2: = dst 1633 /// I3: dst = src (copy) 1634 /// (create pred->succ edges I0->I1, I2->I1) 1635 /// 1636 /// 2) Local copy: 1637 /// I0: dst = src (copy) 1638 /// I1: = dst 1639 /// I2: src = ... 1640 /// I3: = dst 1641 /// (create pred->succ edges I1->I2, I3->I2) 1642 /// 1643 /// Although the MachineScheduler is currently constrained to single blocks, 1644 /// this algorithm should handle extended blocks. An EBB is a set of 1645 /// contiguously numbered blocks such that the previous block in the EBB is 1646 /// always the single predecessor. 1647 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { 1648 LiveIntervals *LIS = DAG->getLIS(); 1649 MachineInstr *Copy = CopySU->getInstr(); 1650 1651 // Check for pure vreg copies. 1652 const MachineOperand &SrcOp = Copy->getOperand(1); 1653 unsigned SrcReg = SrcOp.getReg(); 1654 if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) 1655 return; 1656 1657 const MachineOperand &DstOp = Copy->getOperand(0); 1658 unsigned DstReg = DstOp.getReg(); 1659 if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead()) 1660 return; 1661 1662 // Check if either the dest or source is local. If it's live across a back 1663 // edge, it's not local. Note that if both vregs are live across the back 1664 // edge, we cannot successfully contrain the copy without cyclic scheduling. 1665 // If both the copy's source and dest are local live intervals, then we 1666 // should treat the dest as the global for the purpose of adding 1667 // constraints. This adds edges from source's other uses to the copy. 1668 unsigned LocalReg = SrcReg; 1669 unsigned GlobalReg = DstReg; 1670 LiveInterval *LocalLI = &LIS->getInterval(LocalReg); 1671 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { 1672 LocalReg = DstReg; 1673 GlobalReg = SrcReg; 1674 LocalLI = &LIS->getInterval(LocalReg); 1675 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) 1676 return; 1677 } 1678 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); 1679 1680 // Find the global segment after the start of the local LI. 1681 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); 1682 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a 1683 // local live range. We could create edges from other global uses to the local 1684 // start, but the coalescer should have already eliminated these cases, so 1685 // don't bother dealing with it. 1686 if (GlobalSegment == GlobalLI->end()) 1687 return; 1688 1689 // If GlobalSegment is killed at the LocalLI->start, the call to find() 1690 // returned the next global segment. But if GlobalSegment overlaps with 1691 // LocalLI->start, then advance to the next segement. If a hole in GlobalLI 1692 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. 1693 if (GlobalSegment->contains(LocalLI->beginIndex())) 1694 ++GlobalSegment; 1695 1696 if (GlobalSegment == GlobalLI->end()) 1697 return; 1698 1699 // Check if GlobalLI contains a hole in the vicinity of LocalLI. 1700 if (GlobalSegment != GlobalLI->begin()) { 1701 // Two address defs have no hole. 1702 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, 1703 GlobalSegment->start)) { 1704 return; 1705 } 1706 // If the prior global segment may be defined by the same two-address 1707 // instruction that also defines LocalLI, then can't make a hole here. 1708 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, 1709 LocalLI->beginIndex())) { 1710 return; 1711 } 1712 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise 1713 // it would be a disconnected component in the live range. 1714 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && 1715 "Disconnected LRG within the scheduling region."); 1716 } 1717 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); 1718 if (!GlobalDef) 1719 return; 1720 1721 SUnit *GlobalSU = DAG->getSUnit(GlobalDef); 1722 if (!GlobalSU) 1723 return; 1724 1725 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by 1726 // constraining the uses of the last local def to precede GlobalDef. 1727 SmallVector<SUnit*,8> LocalUses; 1728 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); 1729 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); 1730 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); 1731 for (SUnit::const_succ_iterator 1732 I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); 1733 I != E; ++I) { 1734 if (I->getKind() != SDep::Data || I->getReg() != LocalReg) 1735 continue; 1736 if (I->getSUnit() == GlobalSU) 1737 continue; 1738 if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) 1739 return; 1740 LocalUses.push_back(I->getSUnit()); 1741 } 1742 // Open the top of the GlobalLI hole by constraining any earlier global uses 1743 // to precede the start of LocalLI. 1744 SmallVector<SUnit*,8> GlobalUses; 1745 MachineInstr *FirstLocalDef = 1746 LIS->getInstructionFromIndex(LocalLI->beginIndex()); 1747 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); 1748 for (SUnit::const_pred_iterator 1749 I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { 1750 if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) 1751 continue; 1752 if (I->getSUnit() == FirstLocalSU) 1753 continue; 1754 if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) 1755 return; 1756 GlobalUses.push_back(I->getSUnit()); 1757 } 1758 DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); 1759 // Add the weak edges. 1760 for (SmallVectorImpl<SUnit*>::const_iterator 1761 I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { 1762 DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" 1763 << GlobalSU->NodeNum << ")\n"); 1764 DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); 1765 } 1766 for (SmallVectorImpl<SUnit*>::const_iterator 1767 I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { 1768 DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" 1769 << FirstLocalSU->NodeNum << ")\n"); 1770 DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); 1771 } 1772 } 1773 1774 /// \brief Callback from DAG postProcessing to create weak edges to encourage 1775 /// copy elimination. 1776 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { 1777 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); 1778 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); 1779 1780 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); 1781 if (FirstPos == DAG->end()) 1782 return; 1783 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos); 1784 RegionEndIdx = DAG->getLIS()->getInstructionIndex( 1785 *priorNonDebug(DAG->end(), DAG->begin())); 1786 1787 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 1788 SUnit *SU = &DAG->SUnits[Idx]; 1789 if (!SU->getInstr()->isCopy()) 1790 continue; 1791 1792 constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG)); 1793 } 1794 } 1795 1796 //===----------------------------------------------------------------------===// 1797 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler 1798 // and possibly other custom schedulers. 1799 //===----------------------------------------------------------------------===// 1800 1801 static const unsigned InvalidCycle = ~0U; 1802 1803 SchedBoundary::~SchedBoundary() { delete HazardRec; } 1804 1805 void SchedBoundary::reset() { 1806 // A new HazardRec is created for each DAG and owned by SchedBoundary. 1807 // Destroying and reconstructing it is very expensive though. So keep 1808 // invalid, placeholder HazardRecs. 1809 if (HazardRec && HazardRec->isEnabled()) { 1810 delete HazardRec; 1811 HazardRec = nullptr; 1812 } 1813 Available.clear(); 1814 Pending.clear(); 1815 CheckPending = false; 1816 CurrCycle = 0; 1817 CurrMOps = 0; 1818 MinReadyCycle = std::numeric_limits<unsigned>::max(); 1819 ExpectedLatency = 0; 1820 DependentLatency = 0; 1821 RetiredMOps = 0; 1822 MaxExecutedResCount = 0; 1823 ZoneCritResIdx = 0; 1824 IsResourceLimited = false; 1825 ReservedCycles.clear(); 1826 #ifndef NDEBUG 1827 // Track the maximum number of stall cycles that could arise either from the 1828 // latency of a DAG edge or the number of cycles that a processor resource is 1829 // reserved (SchedBoundary::ReservedCycles). 1830 MaxObservedStall = 0; 1831 #endif 1832 // Reserve a zero-count for invalid CritResIdx. 1833 ExecutedResCounts.resize(1); 1834 assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); 1835 } 1836 1837 void SchedRemainder:: 1838 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1839 reset(); 1840 if (!SchedModel->hasInstrSchedModel()) 1841 return; 1842 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1843 for (std::vector<SUnit>::iterator 1844 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 1845 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 1846 RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) 1847 * SchedModel->getMicroOpFactor(); 1848 for (TargetSchedModel::ProcResIter 1849 PI = SchedModel->getWriteProcResBegin(SC), 1850 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1851 unsigned PIdx = PI->ProcResourceIdx; 1852 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1853 RemainingCounts[PIdx] += (Factor * PI->Cycles); 1854 } 1855 } 1856 } 1857 1858 void SchedBoundary:: 1859 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1860 reset(); 1861 DAG = dag; 1862 SchedModel = smodel; 1863 Rem = rem; 1864 if (SchedModel->hasInstrSchedModel()) { 1865 ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); 1866 ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); 1867 } 1868 } 1869 1870 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat 1871 /// these "soft stalls" differently than the hard stall cycles based on CPU 1872 /// resources and computed by checkHazard(). A fully in-order model 1873 /// (MicroOpBufferSize==0) will not make use of this since instructions are not 1874 /// available for scheduling until they are ready. However, a weaker in-order 1875 /// model may use this for heuristics. For example, if a processor has in-order 1876 /// behavior when reading certain resources, this may come into play. 1877 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { 1878 if (!SU->isUnbuffered) 1879 return 0; 1880 1881 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 1882 if (ReadyCycle > CurrCycle) 1883 return ReadyCycle - CurrCycle; 1884 return 0; 1885 } 1886 1887 /// Compute the next cycle at which the given processor resource can be 1888 /// scheduled. 1889 unsigned SchedBoundary:: 1890 getNextResourceCycle(unsigned PIdx, unsigned Cycles) { 1891 unsigned NextUnreserved = ReservedCycles[PIdx]; 1892 // If this resource has never been used, always return cycle zero. 1893 if (NextUnreserved == InvalidCycle) 1894 return 0; 1895 // For bottom-up scheduling add the cycles needed for the current operation. 1896 if (!isTop()) 1897 NextUnreserved += Cycles; 1898 return NextUnreserved; 1899 } 1900 1901 /// Does this SU have a hazard within the current instruction group. 1902 /// 1903 /// The scheduler supports two modes of hazard recognition. The first is the 1904 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1905 /// supports highly complicated in-order reservation tables 1906 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1907 /// 1908 /// The second is a streamlined mechanism that checks for hazards based on 1909 /// simple counters that the scheduler itself maintains. It explicitly checks 1910 /// for instruction dispatch limitations, including the number of micro-ops that 1911 /// can dispatch per cycle. 1912 /// 1913 /// TODO: Also check whether the SU must start a new group. 1914 bool SchedBoundary::checkHazard(SUnit *SU) { 1915 if (HazardRec->isEnabled() 1916 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { 1917 return true; 1918 } 1919 1920 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1921 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { 1922 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1923 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1924 return true; 1925 } 1926 1927 if (CurrMOps > 0 && 1928 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) || 1929 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) { 1930 DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must " 1931 << (isTop()? "begin" : "end") << " group\n"); 1932 return true; 1933 } 1934 1935 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { 1936 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1937 for (TargetSchedModel::ProcResIter 1938 PI = SchedModel->getWriteProcResBegin(SC), 1939 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1940 unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles); 1941 if (NRCycle > CurrCycle) { 1942 #ifndef NDEBUG 1943 MaxObservedStall = std::max(PI->Cycles, MaxObservedStall); 1944 #endif 1945 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " 1946 << SchedModel->getResourceName(PI->ProcResourceIdx) 1947 << "=" << NRCycle << "c\n"); 1948 return true; 1949 } 1950 } 1951 } 1952 return false; 1953 } 1954 1955 // Find the unscheduled node in ReadySUs with the highest latency. 1956 unsigned SchedBoundary:: 1957 findMaxLatency(ArrayRef<SUnit*> ReadySUs) { 1958 SUnit *LateSU = nullptr; 1959 unsigned RemLatency = 0; 1960 for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); 1961 I != E; ++I) { 1962 unsigned L = getUnscheduledLatency(*I); 1963 if (L > RemLatency) { 1964 RemLatency = L; 1965 LateSU = *I; 1966 } 1967 } 1968 if (LateSU) { 1969 DEBUG(dbgs() << Available.getName() << " RemLatency SU(" 1970 << LateSU->NodeNum << ") " << RemLatency << "c\n"); 1971 } 1972 return RemLatency; 1973 } 1974 1975 // Count resources in this zone and the remaining unscheduled 1976 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical 1977 // resource index, or zero if the zone is issue limited. 1978 unsigned SchedBoundary:: 1979 getOtherResourceCount(unsigned &OtherCritIdx) { 1980 OtherCritIdx = 0; 1981 if (!SchedModel->hasInstrSchedModel()) 1982 return 0; 1983 1984 unsigned OtherCritCount = Rem->RemIssueCount 1985 + (RetiredMOps * SchedModel->getMicroOpFactor()); 1986 DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " 1987 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); 1988 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); 1989 PIdx != PEnd; ++PIdx) { 1990 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; 1991 if (OtherCount > OtherCritCount) { 1992 OtherCritCount = OtherCount; 1993 OtherCritIdx = PIdx; 1994 } 1995 } 1996 if (OtherCritIdx) { 1997 DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " 1998 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) 1999 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); 2000 } 2001 return OtherCritCount; 2002 } 2003 2004 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { 2005 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 2006 2007 #ifndef NDEBUG 2008 // ReadyCycle was been bumped up to the CurrCycle when this node was 2009 // scheduled, but CurrCycle may have been eagerly advanced immediately after 2010 // scheduling, so may now be greater than ReadyCycle. 2011 if (ReadyCycle > CurrCycle) 2012 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); 2013 #endif 2014 2015 if (ReadyCycle < MinReadyCycle) 2016 MinReadyCycle = ReadyCycle; 2017 2018 // Check for interlocks first. For the purpose of other heuristics, an 2019 // instruction that cannot issue appears as if it's not in the ReadyQueue. 2020 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; 2021 if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) || 2022 Available.size() >= ReadyListLimit) 2023 Pending.push(SU); 2024 else 2025 Available.push(SU); 2026 } 2027 2028 /// Move the boundary of scheduled code by one cycle. 2029 void SchedBoundary::bumpCycle(unsigned NextCycle) { 2030 if (SchedModel->getMicroOpBufferSize() == 0) { 2031 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && 2032 "MinReadyCycle uninitialized"); 2033 if (MinReadyCycle > NextCycle) 2034 NextCycle = MinReadyCycle; 2035 } 2036 // Update the current micro-ops, which will issue in the next cycle. 2037 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); 2038 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; 2039 2040 // Decrement DependentLatency based on the next cycle. 2041 if ((NextCycle - CurrCycle) > DependentLatency) 2042 DependentLatency = 0; 2043 else 2044 DependentLatency -= (NextCycle - CurrCycle); 2045 2046 if (!HazardRec->isEnabled()) { 2047 // Bypass HazardRec virtual calls. 2048 CurrCycle = NextCycle; 2049 } else { 2050 // Bypass getHazardType calls in case of long latency. 2051 for (; CurrCycle != NextCycle; ++CurrCycle) { 2052 if (isTop()) 2053 HazardRec->AdvanceCycle(); 2054 else 2055 HazardRec->RecedeCycle(); 2056 } 2057 } 2058 CheckPending = true; 2059 unsigned LFactor = SchedModel->getLatencyFactor(); 2060 IsResourceLimited = 2061 (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) 2062 > (int)LFactor; 2063 2064 DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); 2065 } 2066 2067 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { 2068 ExecutedResCounts[PIdx] += Count; 2069 if (ExecutedResCounts[PIdx] > MaxExecutedResCount) 2070 MaxExecutedResCount = ExecutedResCounts[PIdx]; 2071 } 2072 2073 /// Add the given processor resource to this scheduled zone. 2074 /// 2075 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles 2076 /// during which this resource is consumed. 2077 /// 2078 /// \return the next cycle at which the instruction may execute without 2079 /// oversubscribing resources. 2080 unsigned SchedBoundary:: 2081 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { 2082 unsigned Factor = SchedModel->getResourceFactor(PIdx); 2083 unsigned Count = Factor * Cycles; 2084 DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) 2085 << " +" << Cycles << "x" << Factor << "u\n"); 2086 2087 // Update Executed resources counts. 2088 incExecutedResources(PIdx, Count); 2089 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 2090 Rem->RemainingCounts[PIdx] -= Count; 2091 2092 // Check if this resource exceeds the current critical resource. If so, it 2093 // becomes the critical resource. 2094 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { 2095 ZoneCritResIdx = PIdx; 2096 DEBUG(dbgs() << " *** Critical resource " 2097 << SchedModel->getResourceName(PIdx) << ": " 2098 << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); 2099 } 2100 // For reserved resources, record the highest cycle using the resource. 2101 unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); 2102 if (NextAvailable > CurrCycle) { 2103 DEBUG(dbgs() << " Resource conflict: " 2104 << SchedModel->getProcResource(PIdx)->Name << " reserved until @" 2105 << NextAvailable << "\n"); 2106 } 2107 return NextAvailable; 2108 } 2109 2110 /// Move the boundary of scheduled code by one SUnit. 2111 void SchedBoundary::bumpNode(SUnit *SU) { 2112 // Update the reservation table. 2113 if (HazardRec->isEnabled()) { 2114 if (!isTop() && SU->isCall) { 2115 // Calls are scheduled with their preceding instructions. For bottom-up 2116 // scheduling, clear the pipeline state before emitting. 2117 HazardRec->Reset(); 2118 } 2119 HazardRec->EmitInstruction(SU); 2120 } 2121 // checkHazard should prevent scheduling multiple instructions per cycle that 2122 // exceed the issue width. 2123 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2124 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); 2125 assert( 2126 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && 2127 "Cannot schedule this instruction's MicroOps in the current cycle."); 2128 2129 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 2130 DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); 2131 2132 unsigned NextCycle = CurrCycle; 2133 switch (SchedModel->getMicroOpBufferSize()) { 2134 case 0: 2135 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); 2136 break; 2137 case 1: 2138 if (ReadyCycle > NextCycle) { 2139 NextCycle = ReadyCycle; 2140 DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); 2141 } 2142 break; 2143 default: 2144 // We don't currently model the OOO reorder buffer, so consider all 2145 // scheduled MOps to be "retired". We do loosely model in-order resource 2146 // latency. If this instruction uses an in-order resource, account for any 2147 // likely stall cycles. 2148 if (SU->isUnbuffered && ReadyCycle > NextCycle) 2149 NextCycle = ReadyCycle; 2150 break; 2151 } 2152 RetiredMOps += IncMOps; 2153 2154 // Update resource counts and critical resource. 2155 if (SchedModel->hasInstrSchedModel()) { 2156 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); 2157 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); 2158 Rem->RemIssueCount -= DecRemIssue; 2159 if (ZoneCritResIdx) { 2160 // Scale scheduled micro-ops for comparing with the critical resource. 2161 unsigned ScaledMOps = 2162 RetiredMOps * SchedModel->getMicroOpFactor(); 2163 2164 // If scaled micro-ops are now more than the previous critical resource by 2165 // a full cycle, then micro-ops issue becomes critical. 2166 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) 2167 >= (int)SchedModel->getLatencyFactor()) { 2168 ZoneCritResIdx = 0; 2169 DEBUG(dbgs() << " *** Critical resource NumMicroOps: " 2170 << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); 2171 } 2172 } 2173 for (TargetSchedModel::ProcResIter 2174 PI = SchedModel->getWriteProcResBegin(SC), 2175 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2176 unsigned RCycle = 2177 countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); 2178 if (RCycle > NextCycle) 2179 NextCycle = RCycle; 2180 } 2181 if (SU->hasReservedResource) { 2182 // For reserved resources, record the highest cycle using the resource. 2183 // For top-down scheduling, this is the cycle in which we schedule this 2184 // instruction plus the number of cycles the operations reserves the 2185 // resource. For bottom-up is it simply the instruction's cycle. 2186 for (TargetSchedModel::ProcResIter 2187 PI = SchedModel->getWriteProcResBegin(SC), 2188 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2189 unsigned PIdx = PI->ProcResourceIdx; 2190 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { 2191 if (isTop()) { 2192 ReservedCycles[PIdx] = 2193 std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); 2194 } 2195 else 2196 ReservedCycles[PIdx] = NextCycle; 2197 } 2198 } 2199 } 2200 } 2201 // Update ExpectedLatency and DependentLatency. 2202 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; 2203 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; 2204 if (SU->getDepth() > TopLatency) { 2205 TopLatency = SU->getDepth(); 2206 DEBUG(dbgs() << " " << Available.getName() 2207 << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); 2208 } 2209 if (SU->getHeight() > BotLatency) { 2210 BotLatency = SU->getHeight(); 2211 DEBUG(dbgs() << " " << Available.getName() 2212 << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); 2213 } 2214 // If we stall for any reason, bump the cycle. 2215 if (NextCycle > CurrCycle) { 2216 bumpCycle(NextCycle); 2217 } else { 2218 // After updating ZoneCritResIdx and ExpectedLatency, check if we're 2219 // resource limited. If a stall occurred, bumpCycle does this. 2220 unsigned LFactor = SchedModel->getLatencyFactor(); 2221 IsResourceLimited = 2222 (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) 2223 > (int)LFactor; 2224 } 2225 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle 2226 // resets CurrMOps. Loop to handle instructions with more MOps than issue in 2227 // one cycle. Since we commonly reach the max MOps here, opportunistically 2228 // bump the cycle to avoid uselessly checking everything in the readyQ. 2229 CurrMOps += IncMOps; 2230 2231 // Bump the cycle count for issue group constraints. 2232 // This must be done after NextCycle has been adjust for all other stalls. 2233 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set 2234 // currCycle to X. 2235 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) || 2236 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) { 2237 DEBUG(dbgs() << " Bump cycle to " 2238 << (isTop() ? "end" : "begin") << " group\n"); 2239 bumpCycle(++NextCycle); 2240 } 2241 2242 while (CurrMOps >= SchedModel->getIssueWidth()) { 2243 DEBUG(dbgs() << " *** Max MOps " << CurrMOps 2244 << " at cycle " << CurrCycle << '\n'); 2245 bumpCycle(++NextCycle); 2246 } 2247 DEBUG(dumpScheduledState()); 2248 } 2249 2250 /// Release pending ready nodes in to the available queue. This makes them 2251 /// visible to heuristics. 2252 void SchedBoundary::releasePending() { 2253 // If the available queue is empty, it is safe to reset MinReadyCycle. 2254 if (Available.empty()) 2255 MinReadyCycle = std::numeric_limits<unsigned>::max(); 2256 2257 // Check to see if any of the pending instructions are ready to issue. If 2258 // so, add them to the available queue. 2259 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; 2260 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 2261 SUnit *SU = *(Pending.begin()+i); 2262 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 2263 2264 if (ReadyCycle < MinReadyCycle) 2265 MinReadyCycle = ReadyCycle; 2266 2267 if (!IsBuffered && ReadyCycle > CurrCycle) 2268 continue; 2269 2270 if (checkHazard(SU)) 2271 continue; 2272 2273 if (Available.size() >= ReadyListLimit) 2274 break; 2275 2276 Available.push(SU); 2277 Pending.remove(Pending.begin()+i); 2278 --i; --e; 2279 } 2280 CheckPending = false; 2281 } 2282 2283 /// Remove SU from the ready set for this boundary. 2284 void SchedBoundary::removeReady(SUnit *SU) { 2285 if (Available.isInQueue(SU)) 2286 Available.remove(Available.find(SU)); 2287 else { 2288 assert(Pending.isInQueue(SU) && "bad ready count"); 2289 Pending.remove(Pending.find(SU)); 2290 } 2291 } 2292 2293 /// If this queue only has one ready candidate, return it. As a side effect, 2294 /// defer any nodes that now hit a hazard, and advance the cycle until at least 2295 /// one node is ready. If multiple instructions are ready, return NULL. 2296 SUnit *SchedBoundary::pickOnlyChoice() { 2297 if (CheckPending) 2298 releasePending(); 2299 2300 if (CurrMOps > 0) { 2301 // Defer any ready instrs that now have a hazard. 2302 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 2303 if (checkHazard(*I)) { 2304 Pending.push(*I); 2305 I = Available.remove(I); 2306 continue; 2307 } 2308 ++I; 2309 } 2310 } 2311 for (unsigned i = 0; Available.empty(); ++i) { 2312 // FIXME: Re-enable assert once PR20057 is resolved. 2313 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && 2314 // "permanent hazard"); 2315 (void)i; 2316 bumpCycle(CurrCycle + 1); 2317 releasePending(); 2318 } 2319 2320 DEBUG(Pending.dump()); 2321 DEBUG(Available.dump()); 2322 2323 if (Available.size() == 1) 2324 return *Available.begin(); 2325 return nullptr; 2326 } 2327 2328 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2329 // This is useful information to dump after bumpNode. 2330 // Note that the Queue contents are more useful before pickNodeFromQueue. 2331 LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() { 2332 unsigned ResFactor; 2333 unsigned ResCount; 2334 if (ZoneCritResIdx) { 2335 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); 2336 ResCount = getResourceCount(ZoneCritResIdx); 2337 } else { 2338 ResFactor = SchedModel->getMicroOpFactor(); 2339 ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); 2340 } 2341 unsigned LFactor = SchedModel->getLatencyFactor(); 2342 dbgs() << Available.getName() << " @" << CurrCycle << "c\n" 2343 << " Retired: " << RetiredMOps; 2344 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; 2345 dbgs() << "\n Critical: " << ResCount / LFactor << "c, " 2346 << ResCount / ResFactor << " " 2347 << SchedModel->getResourceName(ZoneCritResIdx) 2348 << "\n ExpectedLatency: " << ExpectedLatency << "c\n" 2349 << (IsResourceLimited ? " - Resource" : " - Latency") 2350 << " limited.\n"; 2351 } 2352 #endif 2353 2354 //===----------------------------------------------------------------------===// 2355 // GenericScheduler - Generic implementation of MachineSchedStrategy. 2356 //===----------------------------------------------------------------------===// 2357 2358 void GenericSchedulerBase::SchedCandidate:: 2359 initResourceDelta(const ScheduleDAGMI *DAG, 2360 const TargetSchedModel *SchedModel) { 2361 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 2362 return; 2363 2364 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2365 for (TargetSchedModel::ProcResIter 2366 PI = SchedModel->getWriteProcResBegin(SC), 2367 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2368 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 2369 ResDelta.CritResources += PI->Cycles; 2370 if (PI->ProcResourceIdx == Policy.DemandResIdx) 2371 ResDelta.DemandedResources += PI->Cycles; 2372 } 2373 } 2374 2375 /// Set the CandPolicy given a scheduling zone given the current resources and 2376 /// latencies inside and outside the zone. 2377 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, 2378 SchedBoundary &CurrZone, 2379 SchedBoundary *OtherZone) { 2380 // Apply preemptive heuristics based on the total latency and resources 2381 // inside and outside this zone. Potential stalls should be considered before 2382 // following this policy. 2383 2384 // Compute remaining latency. We need this both to determine whether the 2385 // overall schedule has become latency-limited and whether the instructions 2386 // outside this zone are resource or latency limited. 2387 // 2388 // The "dependent" latency is updated incrementally during scheduling as the 2389 // max height/depth of scheduled nodes minus the cycles since it was 2390 // scheduled: 2391 // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone 2392 // 2393 // The "independent" latency is the max ready queue depth: 2394 // ILat = max N.depth for N in Available|Pending 2395 // 2396 // RemainingLatency is the greater of independent and dependent latency. 2397 unsigned RemLatency = CurrZone.getDependentLatency(); 2398 RemLatency = std::max(RemLatency, 2399 CurrZone.findMaxLatency(CurrZone.Available.elements())); 2400 RemLatency = std::max(RemLatency, 2401 CurrZone.findMaxLatency(CurrZone.Pending.elements())); 2402 2403 // Compute the critical resource outside the zone. 2404 unsigned OtherCritIdx = 0; 2405 unsigned OtherCount = 2406 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; 2407 2408 bool OtherResLimited = false; 2409 if (SchedModel->hasInstrSchedModel()) { 2410 unsigned LFactor = SchedModel->getLatencyFactor(); 2411 OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; 2412 } 2413 // Schedule aggressively for latency in PostRA mode. We don't check for 2414 // acyclic latency during PostRA, and highly out-of-order processors will 2415 // skip PostRA scheduling. 2416 if (!OtherResLimited) { 2417 if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { 2418 Policy.ReduceLatency |= true; 2419 DEBUG(dbgs() << " " << CurrZone.Available.getName() 2420 << " RemainingLatency " << RemLatency << " + " 2421 << CurrZone.getCurrCycle() << "c > CritPath " 2422 << Rem.CriticalPath << "\n"); 2423 } 2424 } 2425 // If the same resource is limiting inside and outside the zone, do nothing. 2426 if (CurrZone.getZoneCritResIdx() == OtherCritIdx) 2427 return; 2428 2429 DEBUG( 2430 if (CurrZone.isResourceLimited()) { 2431 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " 2432 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) 2433 << "\n"; 2434 } 2435 if (OtherResLimited) 2436 dbgs() << " RemainingLimit: " 2437 << SchedModel->getResourceName(OtherCritIdx) << "\n"; 2438 if (!CurrZone.isResourceLimited() && !OtherResLimited) 2439 dbgs() << " Latency limited both directions.\n"); 2440 2441 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) 2442 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); 2443 2444 if (OtherResLimited) 2445 Policy.DemandResIdx = OtherCritIdx; 2446 } 2447 2448 #ifndef NDEBUG 2449 const char *GenericSchedulerBase::getReasonStr( 2450 GenericSchedulerBase::CandReason Reason) { 2451 switch (Reason) { 2452 case NoCand: return "NOCAND "; 2453 case Only1: return "ONLY1 "; 2454 case PhysRegCopy: return "PREG-COPY "; 2455 case RegExcess: return "REG-EXCESS"; 2456 case RegCritical: return "REG-CRIT "; 2457 case Stall: return "STALL "; 2458 case Cluster: return "CLUSTER "; 2459 case Weak: return "WEAK "; 2460 case RegMax: return "REG-MAX "; 2461 case ResourceReduce: return "RES-REDUCE"; 2462 case ResourceDemand: return "RES-DEMAND"; 2463 case TopDepthReduce: return "TOP-DEPTH "; 2464 case TopPathReduce: return "TOP-PATH "; 2465 case BotHeightReduce:return "BOT-HEIGHT"; 2466 case BotPathReduce: return "BOT-PATH "; 2467 case NextDefUse: return "DEF-USE "; 2468 case NodeOrder: return "ORDER "; 2469 }; 2470 llvm_unreachable("Unknown reason!"); 2471 } 2472 2473 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { 2474 PressureChange P; 2475 unsigned ResIdx = 0; 2476 unsigned Latency = 0; 2477 switch (Cand.Reason) { 2478 default: 2479 break; 2480 case RegExcess: 2481 P = Cand.RPDelta.Excess; 2482 break; 2483 case RegCritical: 2484 P = Cand.RPDelta.CriticalMax; 2485 break; 2486 case RegMax: 2487 P = Cand.RPDelta.CurrentMax; 2488 break; 2489 case ResourceReduce: 2490 ResIdx = Cand.Policy.ReduceResIdx; 2491 break; 2492 case ResourceDemand: 2493 ResIdx = Cand.Policy.DemandResIdx; 2494 break; 2495 case TopDepthReduce: 2496 Latency = Cand.SU->getDepth(); 2497 break; 2498 case TopPathReduce: 2499 Latency = Cand.SU->getHeight(); 2500 break; 2501 case BotHeightReduce: 2502 Latency = Cand.SU->getHeight(); 2503 break; 2504 case BotPathReduce: 2505 Latency = Cand.SU->getDepth(); 2506 break; 2507 } 2508 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); 2509 if (P.isValid()) 2510 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) 2511 << ":" << P.getUnitInc() << " "; 2512 else 2513 dbgs() << " "; 2514 if (ResIdx) 2515 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; 2516 else 2517 dbgs() << " "; 2518 if (Latency) 2519 dbgs() << " " << Latency << " cycles "; 2520 else 2521 dbgs() << " "; 2522 dbgs() << '\n'; 2523 } 2524 #endif 2525 2526 /// Return true if this heuristic determines order. 2527 static bool tryLess(int TryVal, int CandVal, 2528 GenericSchedulerBase::SchedCandidate &TryCand, 2529 GenericSchedulerBase::SchedCandidate &Cand, 2530 GenericSchedulerBase::CandReason Reason) { 2531 if (TryVal < CandVal) { 2532 TryCand.Reason = Reason; 2533 return true; 2534 } 2535 if (TryVal > CandVal) { 2536 if (Cand.Reason > Reason) 2537 Cand.Reason = Reason; 2538 return true; 2539 } 2540 return false; 2541 } 2542 2543 static bool tryGreater(int TryVal, int CandVal, 2544 GenericSchedulerBase::SchedCandidate &TryCand, 2545 GenericSchedulerBase::SchedCandidate &Cand, 2546 GenericSchedulerBase::CandReason Reason) { 2547 if (TryVal > CandVal) { 2548 TryCand.Reason = Reason; 2549 return true; 2550 } 2551 if (TryVal < CandVal) { 2552 if (Cand.Reason > Reason) 2553 Cand.Reason = Reason; 2554 return true; 2555 } 2556 return false; 2557 } 2558 2559 static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 2560 GenericSchedulerBase::SchedCandidate &Cand, 2561 SchedBoundary &Zone) { 2562 if (Zone.isTop()) { 2563 if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { 2564 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 2565 TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) 2566 return true; 2567 } 2568 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 2569 TryCand, Cand, GenericSchedulerBase::TopPathReduce)) 2570 return true; 2571 } else { 2572 if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { 2573 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 2574 TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) 2575 return true; 2576 } 2577 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 2578 TryCand, Cand, GenericSchedulerBase::BotPathReduce)) 2579 return true; 2580 } 2581 return false; 2582 } 2583 2584 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { 2585 DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") 2586 << GenericSchedulerBase::getReasonStr(Reason) << '\n'); 2587 } 2588 2589 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) { 2590 tracePick(Cand.Reason, Cand.AtTop); 2591 } 2592 2593 void GenericScheduler::initialize(ScheduleDAGMI *dag) { 2594 assert(dag->hasVRegLiveness() && 2595 "(PreRA)GenericScheduler needs vreg liveness"); 2596 DAG = static_cast<ScheduleDAGMILive*>(dag); 2597 SchedModel = DAG->getSchedModel(); 2598 TRI = DAG->TRI; 2599 2600 Rem.init(DAG, SchedModel); 2601 Top.init(DAG, SchedModel, &Rem); 2602 Bot.init(DAG, SchedModel, &Rem); 2603 2604 // Initialize resource counts. 2605 2606 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 2607 // are disabled, then these HazardRecs will be disabled. 2608 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 2609 if (!Top.HazardRec) { 2610 Top.HazardRec = 2611 DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( 2612 Itin, DAG); 2613 } 2614 if (!Bot.HazardRec) { 2615 Bot.HazardRec = 2616 DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( 2617 Itin, DAG); 2618 } 2619 TopCand.SU = nullptr; 2620 BotCand.SU = nullptr; 2621 } 2622 2623 /// Initialize the per-region scheduling policy. 2624 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, 2625 MachineBasicBlock::iterator End, 2626 unsigned NumRegionInstrs) { 2627 const MachineFunction &MF = *Begin->getParent()->getParent(); 2628 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); 2629 2630 // Avoid setting up the register pressure tracker for small regions to save 2631 // compile time. As a rough heuristic, only track pressure when the number of 2632 // schedulable instructions exceeds half the integer register file. 2633 RegionPolicy.ShouldTrackPressure = true; 2634 for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { 2635 MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; 2636 if (TLI->isTypeLegal(LegalIntVT)) { 2637 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( 2638 TLI->getRegClassFor(LegalIntVT)); 2639 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); 2640 } 2641 } 2642 2643 // For generic targets, we default to bottom-up, because it's simpler and more 2644 // compile-time optimizations have been implemented in that direction. 2645 RegionPolicy.OnlyBottomUp = true; 2646 2647 // Allow the subtarget to override default policy. 2648 MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs); 2649 2650 // After subtarget overrides, apply command line options. 2651 if (!EnableRegPressure) 2652 RegionPolicy.ShouldTrackPressure = false; 2653 2654 // Check -misched-topdown/bottomup can force or unforce scheduling direction. 2655 // e.g. -misched-bottomup=false allows scheduling in both directions. 2656 assert((!ForceTopDown || !ForceBottomUp) && 2657 "-misched-topdown incompatible with -misched-bottomup"); 2658 if (ForceBottomUp.getNumOccurrences() > 0) { 2659 RegionPolicy.OnlyBottomUp = ForceBottomUp; 2660 if (RegionPolicy.OnlyBottomUp) 2661 RegionPolicy.OnlyTopDown = false; 2662 } 2663 if (ForceTopDown.getNumOccurrences() > 0) { 2664 RegionPolicy.OnlyTopDown = ForceTopDown; 2665 if (RegionPolicy.OnlyTopDown) 2666 RegionPolicy.OnlyBottomUp = false; 2667 } 2668 } 2669 2670 void GenericScheduler::dumpPolicy() { 2671 // Cannot completely remove virtual function even in release mode. 2672 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2673 dbgs() << "GenericScheduler RegionPolicy: " 2674 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure 2675 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown 2676 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp 2677 << "\n"; 2678 #endif 2679 } 2680 2681 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic 2682 /// critical path by more cycles than it takes to drain the instruction buffer. 2683 /// We estimate an upper bounds on in-flight instructions as: 2684 /// 2685 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) 2686 /// InFlightIterations = AcyclicPath / CyclesPerIteration 2687 /// InFlightResources = InFlightIterations * LoopResources 2688 /// 2689 /// TODO: Check execution resources in addition to IssueCount. 2690 void GenericScheduler::checkAcyclicLatency() { 2691 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) 2692 return; 2693 2694 // Scaled number of cycles per loop iteration. 2695 unsigned IterCount = 2696 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), 2697 Rem.RemIssueCount); 2698 // Scaled acyclic critical path. 2699 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); 2700 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop 2701 unsigned InFlightCount = 2702 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; 2703 unsigned BufferLimit = 2704 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); 2705 2706 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; 2707 2708 DEBUG(dbgs() << "IssueCycles=" 2709 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " 2710 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() 2711 << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount 2712 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() 2713 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; 2714 if (Rem.IsAcyclicLatencyLimited) 2715 dbgs() << " ACYCLIC LATENCY LIMIT\n"); 2716 } 2717 2718 void GenericScheduler::registerRoots() { 2719 Rem.CriticalPath = DAG->ExitSU.getDepth(); 2720 2721 // Some roots may not feed into ExitSU. Check all of them in case. 2722 for (std::vector<SUnit*>::const_iterator 2723 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 2724 if ((*I)->getDepth() > Rem.CriticalPath) 2725 Rem.CriticalPath = (*I)->getDepth(); 2726 } 2727 DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); 2728 if (DumpCriticalPathLength) { 2729 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; 2730 } 2731 2732 if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) { 2733 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); 2734 checkAcyclicLatency(); 2735 } 2736 } 2737 2738 static bool tryPressure(const PressureChange &TryP, 2739 const PressureChange &CandP, 2740 GenericSchedulerBase::SchedCandidate &TryCand, 2741 GenericSchedulerBase::SchedCandidate &Cand, 2742 GenericSchedulerBase::CandReason Reason, 2743 const TargetRegisterInfo *TRI, 2744 const MachineFunction &MF) { 2745 // If one candidate decreases and the other increases, go with it. 2746 // Invalid candidates have UnitInc==0. 2747 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, 2748 Reason)) { 2749 return true; 2750 } 2751 // Do not compare the magnitude of pressure changes between top and bottom 2752 // boundary. 2753 if (Cand.AtTop != TryCand.AtTop) 2754 return false; 2755 2756 // If both candidates affect the same set in the same boundary, go with the 2757 // smallest increase. 2758 unsigned TryPSet = TryP.getPSetOrMax(); 2759 unsigned CandPSet = CandP.getPSetOrMax(); 2760 if (TryPSet == CandPSet) { 2761 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, 2762 Reason); 2763 } 2764 2765 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : 2766 std::numeric_limits<int>::max(); 2767 2768 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) : 2769 std::numeric_limits<int>::max(); 2770 2771 // If the candidates are decreasing pressure, reverse priority. 2772 if (TryP.getUnitInc() < 0) 2773 std::swap(TryRank, CandRank); 2774 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); 2775 } 2776 2777 static unsigned getWeakLeft(const SUnit *SU, bool isTop) { 2778 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 2779 } 2780 2781 /// Minimize physical register live ranges. Regalloc wants them adjacent to 2782 /// their physreg def/use. 2783 /// 2784 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf 2785 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled 2786 /// with the operation that produces or consumes the physreg. We'll do this when 2787 /// regalloc has support for parallel copies. 2788 static int biasPhysRegCopy(const SUnit *SU, bool isTop) { 2789 const MachineInstr *MI = SU->getInstr(); 2790 if (!MI->isCopy()) 2791 return 0; 2792 2793 unsigned ScheduledOper = isTop ? 1 : 0; 2794 unsigned UnscheduledOper = isTop ? 0 : 1; 2795 // If we have already scheduled the physreg produce/consumer, immediately 2796 // schedule the copy. 2797 if (TargetRegisterInfo::isPhysicalRegister( 2798 MI->getOperand(ScheduledOper).getReg())) 2799 return 1; 2800 // If the physreg is at the boundary, defer it. Otherwise schedule it 2801 // immediately to free the dependent. We can hoist the copy later. 2802 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; 2803 if (TargetRegisterInfo::isPhysicalRegister( 2804 MI->getOperand(UnscheduledOper).getReg())) 2805 return AtBoundary ? -1 : 1; 2806 return 0; 2807 } 2808 2809 void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, 2810 bool AtTop, 2811 const RegPressureTracker &RPTracker, 2812 RegPressureTracker &TempTracker) { 2813 Cand.SU = SU; 2814 Cand.AtTop = AtTop; 2815 if (DAG->isTrackingPressure()) { 2816 if (AtTop) { 2817 TempTracker.getMaxDownwardPressureDelta( 2818 Cand.SU->getInstr(), 2819 Cand.RPDelta, 2820 DAG->getRegionCriticalPSets(), 2821 DAG->getRegPressure().MaxSetPressure); 2822 } else { 2823 if (VerifyScheduling) { 2824 TempTracker.getMaxUpwardPressureDelta( 2825 Cand.SU->getInstr(), 2826 &DAG->getPressureDiff(Cand.SU), 2827 Cand.RPDelta, 2828 DAG->getRegionCriticalPSets(), 2829 DAG->getRegPressure().MaxSetPressure); 2830 } else { 2831 RPTracker.getUpwardPressureDelta( 2832 Cand.SU->getInstr(), 2833 DAG->getPressureDiff(Cand.SU), 2834 Cand.RPDelta, 2835 DAG->getRegionCriticalPSets(), 2836 DAG->getRegPressure().MaxSetPressure); 2837 } 2838 } 2839 } 2840 DEBUG(if (Cand.RPDelta.Excess.isValid()) 2841 dbgs() << " Try SU(" << Cand.SU->NodeNum << ") " 2842 << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) 2843 << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n"); 2844 } 2845 2846 /// Apply a set of heursitics to a new candidate. Heuristics are currently 2847 /// hierarchical. This may be more efficient than a graduated cost model because 2848 /// we don't need to evaluate all aspects of the model for each node in the 2849 /// queue. But it's really done to make the heuristics easier to debug and 2850 /// statistically analyze. 2851 /// 2852 /// \param Cand provides the policy and current best candidate. 2853 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 2854 /// \param Zone describes the scheduled zone that we are extending, or nullptr 2855 // if Cand is from a different zone than TryCand. 2856 void GenericScheduler::tryCandidate(SchedCandidate &Cand, 2857 SchedCandidate &TryCand, 2858 SchedBoundary *Zone) { 2859 // Initialize the candidate if needed. 2860 if (!Cand.isValid()) { 2861 TryCand.Reason = NodeOrder; 2862 return; 2863 } 2864 2865 if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop), 2866 biasPhysRegCopy(Cand.SU, Cand.AtTop), 2867 TryCand, Cand, PhysRegCopy)) 2868 return; 2869 2870 // Avoid exceeding the target's limit. 2871 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, 2872 Cand.RPDelta.Excess, 2873 TryCand, Cand, RegExcess, TRI, 2874 DAG->MF)) 2875 return; 2876 2877 // Avoid increasing the max critical pressure in the scheduled region. 2878 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, 2879 Cand.RPDelta.CriticalMax, 2880 TryCand, Cand, RegCritical, TRI, 2881 DAG->MF)) 2882 return; 2883 2884 // We only compare a subset of features when comparing nodes between 2885 // Top and Bottom boundary. Some properties are simply incomparable, in many 2886 // other instances we should only override the other boundary if something 2887 // is a clear good pick on one boundary. Skip heuristics that are more 2888 // "tie-breaking" in nature. 2889 bool SameBoundary = Zone != nullptr; 2890 if (SameBoundary) { 2891 // For loops that are acyclic path limited, aggressively schedule for 2892 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal 2893 // heuristics to take precedence. 2894 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && 2895 tryLatency(TryCand, Cand, *Zone)) 2896 return; 2897 2898 // Prioritize instructions that read unbuffered resources by stall cycles. 2899 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), 2900 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 2901 return; 2902 } 2903 2904 // Keep clustered nodes together to encourage downstream peephole 2905 // optimizations which may reduce resource requirements. 2906 // 2907 // This is a best effort to set things up for a post-RA pass. Optimizations 2908 // like generating loads of multiple registers should ideally be done within 2909 // the scheduler pass by combining the loads during DAG postprocessing. 2910 const SUnit *CandNextClusterSU = 2911 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 2912 const SUnit *TryCandNextClusterSU = 2913 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 2914 if (tryGreater(TryCand.SU == TryCandNextClusterSU, 2915 Cand.SU == CandNextClusterSU, 2916 TryCand, Cand, Cluster)) 2917 return; 2918 2919 if (SameBoundary) { 2920 // Weak edges are for clustering and other constraints. 2921 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), 2922 getWeakLeft(Cand.SU, Cand.AtTop), 2923 TryCand, Cand, Weak)) 2924 return; 2925 } 2926 2927 // Avoid increasing the max pressure of the entire region. 2928 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, 2929 Cand.RPDelta.CurrentMax, 2930 TryCand, Cand, RegMax, TRI, 2931 DAG->MF)) 2932 return; 2933 2934 if (SameBoundary) { 2935 // Avoid critical resource consumption and balance the schedule. 2936 TryCand.initResourceDelta(DAG, SchedModel); 2937 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 2938 TryCand, Cand, ResourceReduce)) 2939 return; 2940 if (tryGreater(TryCand.ResDelta.DemandedResources, 2941 Cand.ResDelta.DemandedResources, 2942 TryCand, Cand, ResourceDemand)) 2943 return; 2944 2945 // Avoid serializing long latency dependence chains. 2946 // For acyclic path limited loops, latency was already checked above. 2947 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && 2948 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) 2949 return; 2950 2951 // Fall through to original instruction order. 2952 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 2953 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 2954 TryCand.Reason = NodeOrder; 2955 } 2956 } 2957 } 2958 2959 /// Pick the best candidate from the queue. 2960 /// 2961 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 2962 /// DAG building. To adjust for the current scheduling location we need to 2963 /// maintain the number of vreg uses remaining to be top-scheduled. 2964 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, 2965 const CandPolicy &ZonePolicy, 2966 const RegPressureTracker &RPTracker, 2967 SchedCandidate &Cand) { 2968 // getMaxPressureDelta temporarily modifies the tracker. 2969 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 2970 2971 ReadyQueue &Q = Zone.Available; 2972 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 2973 2974 SchedCandidate TryCand(ZonePolicy); 2975 initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker); 2976 // Pass SchedBoundary only when comparing nodes from the same boundary. 2977 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 2978 tryCandidate(Cand, TryCand, ZoneArg); 2979 if (TryCand.Reason != NoCand) { 2980 // Initialize resource delta if needed in case future heuristics query it. 2981 if (TryCand.ResDelta == SchedResourceDelta()) 2982 TryCand.initResourceDelta(DAG, SchedModel); 2983 Cand.setBest(TryCand); 2984 DEBUG(traceCandidate(Cand)); 2985 } 2986 } 2987 } 2988 2989 /// Pick the best candidate node from either the top or bottom queue. 2990 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { 2991 // Schedule as far as possible in the direction of no choice. This is most 2992 // efficient, but also provides the best heuristics for CriticalPSets. 2993 if (SUnit *SU = Bot.pickOnlyChoice()) { 2994 IsTopNode = false; 2995 tracePick(Only1, false); 2996 return SU; 2997 } 2998 if (SUnit *SU = Top.pickOnlyChoice()) { 2999 IsTopNode = true; 3000 tracePick(Only1, true); 3001 return SU; 3002 } 3003 // Set the bottom-up policy based on the state of the current bottom zone and 3004 // the instructions outside the zone, including the top zone. 3005 CandPolicy BotPolicy; 3006 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 3007 // Set the top-down policy based on the state of the current top zone and 3008 // the instructions outside the zone, including the bottom zone. 3009 CandPolicy TopPolicy; 3010 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 3011 3012 // See if BotCand is still valid (because we previously scheduled from Top). 3013 DEBUG(dbgs() << "Picking from Bot:\n"); 3014 if (!BotCand.isValid() || BotCand.SU->isScheduled || 3015 BotCand.Policy != BotPolicy) { 3016 BotCand.reset(CandPolicy()); 3017 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 3018 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 3019 } else { 3020 DEBUG(traceCandidate(BotCand)); 3021 #ifndef NDEBUG 3022 if (VerifyScheduling) { 3023 SchedCandidate TCand; 3024 TCand.reset(CandPolicy()); 3025 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 3026 assert(TCand.SU == BotCand.SU && 3027 "Last pick result should correspond to re-picking right now"); 3028 } 3029 #endif 3030 } 3031 3032 // Check if the top Q has a better candidate. 3033 DEBUG(dbgs() << "Picking from Top:\n"); 3034 if (!TopCand.isValid() || TopCand.SU->isScheduled || 3035 TopCand.Policy != TopPolicy) { 3036 TopCand.reset(CandPolicy()); 3037 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 3038 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 3039 } else { 3040 DEBUG(traceCandidate(TopCand)); 3041 #ifndef NDEBUG 3042 if (VerifyScheduling) { 3043 SchedCandidate TCand; 3044 TCand.reset(CandPolicy()); 3045 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 3046 assert(TCand.SU == TopCand.SU && 3047 "Last pick result should correspond to re-picking right now"); 3048 } 3049 #endif 3050 } 3051 3052 // Pick best from BotCand and TopCand. 3053 assert(BotCand.isValid()); 3054 assert(TopCand.isValid()); 3055 SchedCandidate Cand = BotCand; 3056 TopCand.Reason = NoCand; 3057 tryCandidate(Cand, TopCand, nullptr); 3058 if (TopCand.Reason != NoCand) { 3059 Cand.setBest(TopCand); 3060 DEBUG(traceCandidate(Cand)); 3061 } 3062 3063 IsTopNode = Cand.AtTop; 3064 tracePick(Cand); 3065 return Cand.SU; 3066 } 3067 3068 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 3069 SUnit *GenericScheduler::pickNode(bool &IsTopNode) { 3070 if (DAG->top() == DAG->bottom()) { 3071 assert(Top.Available.empty() && Top.Pending.empty() && 3072 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 3073 return nullptr; 3074 } 3075 SUnit *SU; 3076 do { 3077 if (RegionPolicy.OnlyTopDown) { 3078 SU = Top.pickOnlyChoice(); 3079 if (!SU) { 3080 CandPolicy NoPolicy; 3081 TopCand.reset(NoPolicy); 3082 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 3083 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 3084 tracePick(TopCand); 3085 SU = TopCand.SU; 3086 } 3087 IsTopNode = true; 3088 } else if (RegionPolicy.OnlyBottomUp) { 3089 SU = Bot.pickOnlyChoice(); 3090 if (!SU) { 3091 CandPolicy NoPolicy; 3092 BotCand.reset(NoPolicy); 3093 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 3094 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 3095 tracePick(BotCand); 3096 SU = BotCand.SU; 3097 } 3098 IsTopNode = false; 3099 } else { 3100 SU = pickNodeBidirectional(IsTopNode); 3101 } 3102 } while (SU->isScheduled); 3103 3104 if (SU->isTopReady()) 3105 Top.removeReady(SU); 3106 if (SU->isBottomReady()) 3107 Bot.removeReady(SU); 3108 3109 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); 3110 return SU; 3111 } 3112 3113 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { 3114 MachineBasicBlock::iterator InsertPos = SU->getInstr(); 3115 if (!isTop) 3116 ++InsertPos; 3117 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; 3118 3119 // Find already scheduled copies with a single physreg dependence and move 3120 // them just above the scheduled instruction. 3121 for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); 3122 I != E; ++I) { 3123 if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) 3124 continue; 3125 SUnit *DepSU = I->getSUnit(); 3126 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) 3127 continue; 3128 MachineInstr *Copy = DepSU->getInstr(); 3129 if (!Copy->isCopy()) 3130 continue; 3131 DEBUG(dbgs() << " Rescheduling physreg copy "; 3132 I->getSUnit()->dump(DAG)); 3133 DAG->moveInstruction(Copy, InsertPos); 3134 } 3135 } 3136 3137 /// Update the scheduler's state after scheduling a node. This is the same node 3138 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to 3139 /// update it's state based on the current cycle before MachineSchedStrategy 3140 /// does. 3141 /// 3142 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling 3143 /// them here. See comments in biasPhysRegCopy. 3144 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 3145 if (IsTopNode) { 3146 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); 3147 Top.bumpNode(SU); 3148 if (SU->hasPhysRegUses) 3149 reschedulePhysRegCopies(SU, true); 3150 } else { 3151 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); 3152 Bot.bumpNode(SU); 3153 if (SU->hasPhysRegDefs) 3154 reschedulePhysRegCopies(SU, false); 3155 } 3156 } 3157 3158 /// Create the standard converging machine scheduler. This will be used as the 3159 /// default scheduler if the target does not set a default. 3160 ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { 3161 ScheduleDAGMILive *DAG = 3162 new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C)); 3163 // Register DAG post-processors. 3164 // 3165 // FIXME: extend the mutation API to allow earlier mutations to instantiate 3166 // data and pass it to later mutations. Have a single mutation that gathers 3167 // the interesting nodes in one pass. 3168 DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); 3169 return DAG; 3170 } 3171 3172 static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) { 3173 return createGenericSchedLive(C); 3174 } 3175 3176 static MachineSchedRegistry 3177 GenericSchedRegistry("converge", "Standard converging scheduler.", 3178 createConveringSched); 3179 3180 //===----------------------------------------------------------------------===// 3181 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. 3182 //===----------------------------------------------------------------------===// 3183 3184 void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { 3185 DAG = Dag; 3186 SchedModel = DAG->getSchedModel(); 3187 TRI = DAG->TRI; 3188 3189 Rem.init(DAG, SchedModel); 3190 Top.init(DAG, SchedModel, &Rem); 3191 BotRoots.clear(); 3192 3193 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, 3194 // or are disabled, then these HazardRecs will be disabled. 3195 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 3196 if (!Top.HazardRec) { 3197 Top.HazardRec = 3198 DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( 3199 Itin, DAG); 3200 } 3201 } 3202 3203 void PostGenericScheduler::registerRoots() { 3204 Rem.CriticalPath = DAG->ExitSU.getDepth(); 3205 3206 // Some roots may not feed into ExitSU. Check all of them in case. 3207 for (SmallVectorImpl<SUnit*>::const_iterator 3208 I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { 3209 if ((*I)->getDepth() > Rem.CriticalPath) 3210 Rem.CriticalPath = (*I)->getDepth(); 3211 } 3212 DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); 3213 if (DumpCriticalPathLength) { 3214 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; 3215 } 3216 } 3217 3218 /// Apply a set of heursitics to a new candidate for PostRA scheduling. 3219 /// 3220 /// \param Cand provides the policy and current best candidate. 3221 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 3222 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, 3223 SchedCandidate &TryCand) { 3224 3225 // Initialize the candidate if needed. 3226 if (!Cand.isValid()) { 3227 TryCand.Reason = NodeOrder; 3228 return; 3229 } 3230 3231 // Prioritize instructions that read unbuffered resources by stall cycles. 3232 if (tryLess(Top.getLatencyStallCycles(TryCand.SU), 3233 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 3234 return; 3235 3236 // Keep clustered nodes together. 3237 if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), 3238 Cand.SU == DAG->getNextClusterSucc(), 3239 TryCand, Cand, Cluster)) 3240 return; 3241 3242 // Avoid critical resource consumption and balance the schedule. 3243 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 3244 TryCand, Cand, ResourceReduce)) 3245 return; 3246 if (tryGreater(TryCand.ResDelta.DemandedResources, 3247 Cand.ResDelta.DemandedResources, 3248 TryCand, Cand, ResourceDemand)) 3249 return; 3250 3251 // Avoid serializing long latency dependence chains. 3252 if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { 3253 return; 3254 } 3255 3256 // Fall through to original instruction order. 3257 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) 3258 TryCand.Reason = NodeOrder; 3259 } 3260 3261 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { 3262 ReadyQueue &Q = Top.Available; 3263 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 3264 SchedCandidate TryCand(Cand.Policy); 3265 TryCand.SU = *I; 3266 TryCand.AtTop = true; 3267 TryCand.initResourceDelta(DAG, SchedModel); 3268 tryCandidate(Cand, TryCand); 3269 if (TryCand.Reason != NoCand) { 3270 Cand.setBest(TryCand); 3271 DEBUG(traceCandidate(Cand)); 3272 } 3273 } 3274 } 3275 3276 /// Pick the next node to schedule. 3277 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { 3278 if (DAG->top() == DAG->bottom()) { 3279 assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); 3280 return nullptr; 3281 } 3282 SUnit *SU; 3283 do { 3284 SU = Top.pickOnlyChoice(); 3285 if (SU) { 3286 tracePick(Only1, true); 3287 } else { 3288 CandPolicy NoPolicy; 3289 SchedCandidate TopCand(NoPolicy); 3290 // Set the top-down policy based on the state of the current top zone and 3291 // the instructions outside the zone, including the bottom zone. 3292 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); 3293 pickNodeFromQueue(TopCand); 3294 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 3295 tracePick(TopCand); 3296 SU = TopCand.SU; 3297 } 3298 } while (SU->isScheduled); 3299 3300 IsTopNode = true; 3301 Top.removeReady(SU); 3302 3303 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); 3304 return SU; 3305 } 3306 3307 /// Called after ScheduleDAGMI has scheduled an instruction and updated 3308 /// scheduled/remaining flags in the DAG nodes. 3309 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 3310 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); 3311 Top.bumpNode(SU); 3312 } 3313 3314 ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { 3315 return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C), 3316 /*RemoveKillFlags=*/true); 3317 } 3318 3319 //===----------------------------------------------------------------------===// 3320 // ILP Scheduler. Currently for experimental analysis of heuristics. 3321 //===----------------------------------------------------------------------===// 3322 3323 namespace { 3324 3325 /// \brief Order nodes by the ILP metric. 3326 struct ILPOrder { 3327 const SchedDFSResult *DFSResult = nullptr; 3328 const BitVector *ScheduledTrees = nullptr; 3329 bool MaximizeILP; 3330 3331 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {} 3332 3333 /// \brief Apply a less-than relation on node priority. 3334 /// 3335 /// (Return true if A comes after B in the Q.) 3336 bool operator()(const SUnit *A, const SUnit *B) const { 3337 unsigned SchedTreeA = DFSResult->getSubtreeID(A); 3338 unsigned SchedTreeB = DFSResult->getSubtreeID(B); 3339 if (SchedTreeA != SchedTreeB) { 3340 // Unscheduled trees have lower priority. 3341 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) 3342 return ScheduledTrees->test(SchedTreeB); 3343 3344 // Trees with shallower connections have have lower priority. 3345 if (DFSResult->getSubtreeLevel(SchedTreeA) 3346 != DFSResult->getSubtreeLevel(SchedTreeB)) { 3347 return DFSResult->getSubtreeLevel(SchedTreeA) 3348 < DFSResult->getSubtreeLevel(SchedTreeB); 3349 } 3350 } 3351 if (MaximizeILP) 3352 return DFSResult->getILP(A) < DFSResult->getILP(B); 3353 else 3354 return DFSResult->getILP(A) > DFSResult->getILP(B); 3355 } 3356 }; 3357 3358 /// \brief Schedule based on the ILP metric. 3359 class ILPScheduler : public MachineSchedStrategy { 3360 ScheduleDAGMILive *DAG = nullptr; 3361 ILPOrder Cmp; 3362 3363 std::vector<SUnit*> ReadyQ; 3364 3365 public: 3366 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {} 3367 3368 void initialize(ScheduleDAGMI *dag) override { 3369 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); 3370 DAG = static_cast<ScheduleDAGMILive*>(dag); 3371 DAG->computeDFSResult(); 3372 Cmp.DFSResult = DAG->getDFSResult(); 3373 Cmp.ScheduledTrees = &DAG->getScheduledTrees(); 3374 ReadyQ.clear(); 3375 } 3376 3377 void registerRoots() override { 3378 // Restore the heap in ReadyQ with the updated DFS results. 3379 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3380 } 3381 3382 /// Implement MachineSchedStrategy interface. 3383 /// ----------------------------------------- 3384 3385 /// Callback to select the highest priority node from the ready Q. 3386 SUnit *pickNode(bool &IsTopNode) override { 3387 if (ReadyQ.empty()) return nullptr; 3388 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3389 SUnit *SU = ReadyQ.back(); 3390 ReadyQ.pop_back(); 3391 IsTopNode = false; 3392 DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " 3393 << " ILP: " << DAG->getDFSResult()->getILP(SU) 3394 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" 3395 << DAG->getDFSResult()->getSubtreeLevel( 3396 DAG->getDFSResult()->getSubtreeID(SU)) << '\n' 3397 << "Scheduling " << *SU->getInstr()); 3398 return SU; 3399 } 3400 3401 /// \brief Scheduler callback to notify that a new subtree is scheduled. 3402 void scheduleTree(unsigned SubtreeID) override { 3403 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3404 } 3405 3406 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify 3407 /// DFSResults, and resort the priority Q. 3408 void schedNode(SUnit *SU, bool IsTopNode) override { 3409 assert(!IsTopNode && "SchedDFSResult needs bottom-up"); 3410 } 3411 3412 void releaseTopNode(SUnit *) override { /*only called for top roots*/ } 3413 3414 void releaseBottomNode(SUnit *SU) override { 3415 ReadyQ.push_back(SU); 3416 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3417 } 3418 }; 3419 3420 } // end anonymous namespace 3421 3422 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 3423 return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true)); 3424 } 3425 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 3426 return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false)); 3427 } 3428 3429 static MachineSchedRegistry ILPMaxRegistry( 3430 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 3431 static MachineSchedRegistry ILPMinRegistry( 3432 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 3433 3434 //===----------------------------------------------------------------------===// 3435 // Machine Instruction Shuffler for Correctness Testing 3436 //===----------------------------------------------------------------------===// 3437 3438 #ifndef NDEBUG 3439 namespace { 3440 3441 /// Apply a less-than relation on the node order, which corresponds to the 3442 /// instruction order prior to scheduling. IsReverse implements greater-than. 3443 template<bool IsReverse> 3444 struct SUnitOrder { 3445 bool operator()(SUnit *A, SUnit *B) const { 3446 if (IsReverse) 3447 return A->NodeNum > B->NodeNum; 3448 else 3449 return A->NodeNum < B->NodeNum; 3450 } 3451 }; 3452 3453 /// Reorder instructions as much as possible. 3454 class InstructionShuffler : public MachineSchedStrategy { 3455 bool IsAlternating; 3456 bool IsTopDown; 3457 3458 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 3459 // gives nodes with a higher number higher priority causing the latest 3460 // instructions to be scheduled first. 3461 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>> 3462 TopQ; 3463 // When scheduling bottom-up, use greater-than as the queue priority. 3464 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>> 3465 BottomQ; 3466 3467 public: 3468 InstructionShuffler(bool alternate, bool topdown) 3469 : IsAlternating(alternate), IsTopDown(topdown) {} 3470 3471 void initialize(ScheduleDAGMI*) override { 3472 TopQ.clear(); 3473 BottomQ.clear(); 3474 } 3475 3476 /// Implement MachineSchedStrategy interface. 3477 /// ----------------------------------------- 3478 3479 SUnit *pickNode(bool &IsTopNode) override { 3480 SUnit *SU; 3481 if (IsTopDown) { 3482 do { 3483 if (TopQ.empty()) return nullptr; 3484 SU = TopQ.top(); 3485 TopQ.pop(); 3486 } while (SU->isScheduled); 3487 IsTopNode = true; 3488 } else { 3489 do { 3490 if (BottomQ.empty()) return nullptr; 3491 SU = BottomQ.top(); 3492 BottomQ.pop(); 3493 } while (SU->isScheduled); 3494 IsTopNode = false; 3495 } 3496 if (IsAlternating) 3497 IsTopDown = !IsTopDown; 3498 return SU; 3499 } 3500 3501 void schedNode(SUnit *SU, bool IsTopNode) override {} 3502 3503 void releaseTopNode(SUnit *SU) override { 3504 TopQ.push(SU); 3505 } 3506 void releaseBottomNode(SUnit *SU) override { 3507 BottomQ.push(SU); 3508 } 3509 }; 3510 3511 } // end anonymous namespace 3512 3513 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 3514 bool Alternate = !ForceTopDown && !ForceBottomUp; 3515 bool TopDown = !ForceBottomUp; 3516 assert((TopDown || !ForceTopDown) && 3517 "-misched-topdown incompatible with -misched-bottomup"); 3518 return new ScheduleDAGMILive( 3519 C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown)); 3520 } 3521 3522 static MachineSchedRegistry ShufflerRegistry( 3523 "shuffle", "Shuffle machine instructions alternating directions", 3524 createInstructionShuffler); 3525 #endif // !NDEBUG 3526 3527 //===----------------------------------------------------------------------===// 3528 // GraphWriter support for ScheduleDAGMILive. 3529 //===----------------------------------------------------------------------===// 3530 3531 #ifndef NDEBUG 3532 namespace llvm { 3533 3534 template<> struct GraphTraits< 3535 ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; 3536 3537 template<> 3538 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { 3539 DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} 3540 3541 static std::string getGraphName(const ScheduleDAG *G) { 3542 return G->MF.getName(); 3543 } 3544 3545 static bool renderGraphFromBottomUp() { 3546 return true; 3547 } 3548 3549 static bool isNodeHidden(const SUnit *Node) { 3550 if (ViewMISchedCutoff == 0) 3551 return false; 3552 return (Node->Preds.size() > ViewMISchedCutoff 3553 || Node->Succs.size() > ViewMISchedCutoff); 3554 } 3555 3556 /// If you want to override the dot attributes printed for a particular 3557 /// edge, override this method. 3558 static std::string getEdgeAttributes(const SUnit *Node, 3559 SUnitIterator EI, 3560 const ScheduleDAG *Graph) { 3561 if (EI.isArtificialDep()) 3562 return "color=cyan,style=dashed"; 3563 if (EI.isCtrlDep()) 3564 return "color=blue,style=dashed"; 3565 return ""; 3566 } 3567 3568 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { 3569 std::string Str; 3570 raw_string_ostream SS(Str); 3571 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); 3572 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? 3573 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; 3574 SS << "SU:" << SU->NodeNum; 3575 if (DFS) 3576 SS << " I:" << DFS->getNumInstrs(SU); 3577 return SS.str(); 3578 } 3579 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { 3580 return G->getGraphNodeLabel(SU); 3581 } 3582 3583 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { 3584 std::string Str("shape=Mrecord"); 3585 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); 3586 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? 3587 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; 3588 if (DFS) { 3589 Str += ",style=filled,fillcolor=\"#"; 3590 Str += DOT::getColorString(DFS->getSubtreeID(N)); 3591 Str += '"'; 3592 } 3593 return Str; 3594 } 3595 }; 3596 3597 } // end namespace llvm 3598 #endif // NDEBUG 3599 3600 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG 3601 /// rendered using 'dot'. 3602 /// 3603 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { 3604 #ifndef NDEBUG 3605 ViewGraph(this, Name, false, Title); 3606 #else 3607 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " 3608 << "systems with Graphviz or gv!\n"; 3609 #endif // NDEBUG 3610 } 3611 3612 /// Out-of-line implementation with no arguments is handy for gdb. 3613 void ScheduleDAGMI::viewGraph() { 3614 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); 3615 } 3616