1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "misched" 16 17 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 18 #include "llvm/CodeGen/MachineScheduler.h" 19 #include "llvm/CodeGen/Passes.h" 20 #include "llvm/CodeGen/RegisterClassInfo.h" 21 #include "llvm/CodeGen/ScheduleDAGILP.h" 22 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include "llvm/ADT/OwningPtr.h" 29 #include "llvm/ADT/PriorityQueue.h" 30 31 #include <queue> 32 33 using namespace llvm; 34 35 namespace llvm { 36 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 37 cl::desc("Force top-down list scheduling")); 38 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 39 cl::desc("Force bottom-up list scheduling")); 40 } 41 42 #ifndef NDEBUG 43 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 44 cl::desc("Pop up a window to show MISched dags after they are processed")); 45 46 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 47 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 48 #else 49 static bool ViewMISchedDAGs = false; 50 #endif // NDEBUG 51 52 // Threshold to very roughly model an out-of-order processor's instruction 53 // buffers. If the actual value of this threshold matters much in practice, then 54 // it can be specified by the machine model. For now, it's an experimental 55 // tuning knob to determine when and if it matters. 56 static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, 57 cl::desc("Allow expected latency to exceed the critical path by N cycles " 58 "before attempting to balance ILP"), 59 cl::init(10U)); 60 61 //===----------------------------------------------------------------------===// 62 // Machine Instruction Scheduling Pass and Registry 63 //===----------------------------------------------------------------------===// 64 65 MachineSchedContext::MachineSchedContext(): 66 MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { 67 RegClassInfo = new RegisterClassInfo(); 68 } 69 70 MachineSchedContext::~MachineSchedContext() { 71 delete RegClassInfo; 72 } 73 74 namespace { 75 /// MachineScheduler runs after coalescing and before register allocation. 76 class MachineScheduler : public MachineSchedContext, 77 public MachineFunctionPass { 78 public: 79 MachineScheduler(); 80 81 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 82 83 virtual void releaseMemory() {} 84 85 virtual bool runOnMachineFunction(MachineFunction&); 86 87 virtual void print(raw_ostream &O, const Module* = 0) const; 88 89 static char ID; // Class identification, replacement for typeinfo 90 }; 91 } // namespace 92 93 char MachineScheduler::ID = 0; 94 95 char &llvm::MachineSchedulerID = MachineScheduler::ID; 96 97 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 98 "Machine Instruction Scheduler", false, false) 99 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 100 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 101 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 102 INITIALIZE_PASS_END(MachineScheduler, "misched", 103 "Machine Instruction Scheduler", false, false) 104 105 MachineScheduler::MachineScheduler() 106 : MachineFunctionPass(ID) { 107 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 108 } 109 110 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 111 AU.setPreservesCFG(); 112 AU.addRequiredID(MachineDominatorsID); 113 AU.addRequired<MachineLoopInfo>(); 114 AU.addRequired<AliasAnalysis>(); 115 AU.addRequired<TargetPassConfig>(); 116 AU.addRequired<SlotIndexes>(); 117 AU.addPreserved<SlotIndexes>(); 118 AU.addRequired<LiveIntervals>(); 119 AU.addPreserved<LiveIntervals>(); 120 MachineFunctionPass::getAnalysisUsage(AU); 121 } 122 123 MachinePassRegistry MachineSchedRegistry::Registry; 124 125 /// A dummy default scheduler factory indicates whether the scheduler 126 /// is overridden on the command line. 127 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 128 return 0; 129 } 130 131 /// MachineSchedOpt allows command line selection of the scheduler. 132 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 133 RegisterPassParser<MachineSchedRegistry> > 134 MachineSchedOpt("misched", 135 cl::init(&useDefaultMachineSched), cl::Hidden, 136 cl::desc("Machine instruction scheduler to use")); 137 138 static MachineSchedRegistry 139 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 140 useDefaultMachineSched); 141 142 /// Forward declare the standard machine scheduler. This will be used as the 143 /// default scheduler if the target does not set a default. 144 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); 145 146 147 /// Decrement this iterator until reaching the top or a non-debug instr. 148 static MachineBasicBlock::iterator 149 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { 150 assert(I != Beg && "reached the top of the region, cannot decrement"); 151 while (--I != Beg) { 152 if (!I->isDebugValue()) 153 break; 154 } 155 return I; 156 } 157 158 /// If this iterator is a debug value, increment until reaching the End or a 159 /// non-debug instruction. 160 static MachineBasicBlock::iterator 161 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { 162 for(; I != End; ++I) { 163 if (!I->isDebugValue()) 164 break; 165 } 166 return I; 167 } 168 169 /// Top-level MachineScheduler pass driver. 170 /// 171 /// Visit blocks in function order. Divide each block into scheduling regions 172 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 173 /// consistent with the DAG builder, which traverses the interior of the 174 /// scheduling regions bottom-up. 175 /// 176 /// This design avoids exposing scheduling boundaries to the DAG builder, 177 /// simplifying the DAG builder's support for "special" target instructions. 178 /// At the same time the design allows target schedulers to operate across 179 /// scheduling boundaries, for example to bundle the boudary instructions 180 /// without reordering them. This creates complexity, because the target 181 /// scheduler must update the RegionBegin and RegionEnd positions cached by 182 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 183 /// design would be to split blocks at scheduling boundaries, but LLVM has a 184 /// general bias against block splitting purely for implementation simplicity. 185 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 186 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 187 188 // Initialize the context of the pass. 189 MF = &mf; 190 MLI = &getAnalysis<MachineLoopInfo>(); 191 MDT = &getAnalysis<MachineDominatorTree>(); 192 PassConfig = &getAnalysis<TargetPassConfig>(); 193 AA = &getAnalysis<AliasAnalysis>(); 194 195 LIS = &getAnalysis<LiveIntervals>(); 196 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 197 198 RegClassInfo->runOnMachineFunction(*MF); 199 200 // Select the scheduler, or set the default. 201 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 202 if (Ctor == useDefaultMachineSched) { 203 // Get the default scheduler set by the target. 204 Ctor = MachineSchedRegistry::getDefault(); 205 if (!Ctor) { 206 Ctor = createConvergingSched; 207 MachineSchedRegistry::setDefault(Ctor); 208 } 209 } 210 // Instantiate the selected scheduler. 211 OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); 212 213 // Visit all machine basic blocks. 214 // 215 // TODO: Visit blocks in global postorder or postorder within the bottom-up 216 // loop tree. Then we can optionally compute global RegPressure. 217 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 218 MBB != MBBEnd; ++MBB) { 219 220 Scheduler->startBlock(MBB); 221 222 // Break the block into scheduling regions [I, RegionEnd), and schedule each 223 // region as soon as it is discovered. RegionEnd points the scheduling 224 // boundary at the bottom of the region. The DAG does not include RegionEnd, 225 // but the region does (i.e. the next RegionEnd is above the previous 226 // RegionBegin). If the current block has no terminator then RegionEnd == 227 // MBB->end() for the bottom region. 228 // 229 // The Scheduler may insert instructions during either schedule() or 230 // exitRegion(), even for empty regions. So the local iterators 'I' and 231 // 'RegionEnd' are invalid across these calls. 232 unsigned RemainingInstrs = MBB->size(); 233 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 234 RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { 235 236 // Avoid decrementing RegionEnd for blocks with no terminator. 237 if (RegionEnd != MBB->end() 238 || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { 239 --RegionEnd; 240 // Count the boundary instruction. 241 --RemainingInstrs; 242 } 243 244 // The next region starts above the previous region. Look backward in the 245 // instruction stream until we find the nearest boundary. 246 MachineBasicBlock::iterator I = RegionEnd; 247 for(;I != MBB->begin(); --I, --RemainingInstrs) { 248 if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) 249 break; 250 } 251 // Notify the scheduler of the region, even if we may skip scheduling 252 // it. Perhaps it still needs to be bundled. 253 Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); 254 255 // Skip empty scheduling regions (0 or 1 schedulable instructions). 256 if (I == RegionEnd || I == llvm::prior(RegionEnd)) { 257 // Close the current region. Bundle the terminator if needed. 258 // This invalidates 'RegionEnd' and 'I'. 259 Scheduler->exitRegion(); 260 continue; 261 } 262 DEBUG(dbgs() << "********** MI Scheduling **********\n"); 263 DEBUG(dbgs() << MF->getName() 264 << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; 265 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 266 else dbgs() << "End"; 267 dbgs() << " Remaining: " << RemainingInstrs << "\n"); 268 269 // Schedule a region: possibly reorder instructions. 270 // This invalidates 'RegionEnd' and 'I'. 271 Scheduler->schedule(); 272 273 // Close the current region. 274 Scheduler->exitRegion(); 275 276 // Scheduling has invalidated the current iterator 'I'. Ask the 277 // scheduler for the top of it's scheduled region. 278 RegionEnd = Scheduler->begin(); 279 } 280 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 281 Scheduler->finishBlock(); 282 } 283 Scheduler->finalizeSchedule(); 284 DEBUG(LIS->print(dbgs())); 285 return true; 286 } 287 288 void MachineScheduler::print(raw_ostream &O, const Module* m) const { 289 // unimplemented 290 } 291 292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 293 void ReadyQueue::dump() { 294 dbgs() << Name << ": "; 295 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 296 dbgs() << Queue[i]->NodeNum << " "; 297 dbgs() << "\n"; 298 } 299 #endif 300 301 //===----------------------------------------------------------------------===// 302 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals 303 // preservation. 304 //===----------------------------------------------------------------------===// 305 306 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 307 /// NumPredsLeft reaches zero, release the successor node. 308 /// 309 /// FIXME: Adjust SuccSU height based on MinLatency. 310 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 311 SUnit *SuccSU = SuccEdge->getSUnit(); 312 313 #ifndef NDEBUG 314 if (SuccSU->NumPredsLeft == 0) { 315 dbgs() << "*** Scheduling failed! ***\n"; 316 SuccSU->dump(this); 317 dbgs() << " has been released too many times!\n"; 318 llvm_unreachable(0); 319 } 320 #endif 321 --SuccSU->NumPredsLeft; 322 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 323 SchedImpl->releaseTopNode(SuccSU); 324 } 325 326 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 327 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 328 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 329 I != E; ++I) { 330 releaseSucc(SU, &*I); 331 } 332 } 333 334 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 335 /// NumSuccsLeft reaches zero, release the predecessor node. 336 /// 337 /// FIXME: Adjust PredSU height based on MinLatency. 338 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 339 SUnit *PredSU = PredEdge->getSUnit(); 340 341 #ifndef NDEBUG 342 if (PredSU->NumSuccsLeft == 0) { 343 dbgs() << "*** Scheduling failed! ***\n"; 344 PredSU->dump(this); 345 dbgs() << " has been released too many times!\n"; 346 llvm_unreachable(0); 347 } 348 #endif 349 --PredSU->NumSuccsLeft; 350 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 351 SchedImpl->releaseBottomNode(PredSU); 352 } 353 354 /// releasePredecessors - Call releasePred on each of SU's predecessors. 355 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 356 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 357 I != E; ++I) { 358 releasePred(SU, &*I); 359 } 360 } 361 362 void ScheduleDAGMI::moveInstruction(MachineInstr *MI, 363 MachineBasicBlock::iterator InsertPos) { 364 // Advance RegionBegin if the first instruction moves down. 365 if (&*RegionBegin == MI) 366 ++RegionBegin; 367 368 // Update the instruction stream. 369 BB->splice(InsertPos, BB, MI); 370 371 // Update LiveIntervals 372 LIS->handleMove(MI, /*UpdateFlags=*/true); 373 374 // Recede RegionBegin if an instruction moves above the first. 375 if (RegionBegin == InsertPos) 376 RegionBegin = MI; 377 } 378 379 bool ScheduleDAGMI::checkSchedLimit() { 380 #ifndef NDEBUG 381 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 382 CurrentTop = CurrentBottom; 383 return false; 384 } 385 ++NumInstrsScheduled; 386 #endif 387 return true; 388 } 389 390 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 391 /// crossing a scheduling boundary. [begin, end) includes all instructions in 392 /// the region, including the boundary itself and single-instruction regions 393 /// that don't get scheduled. 394 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 395 MachineBasicBlock::iterator begin, 396 MachineBasicBlock::iterator end, 397 unsigned endcount) 398 { 399 ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); 400 401 // For convenience remember the end of the liveness region. 402 LiveRegionEnd = 403 (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); 404 } 405 406 // Setup the register pressure trackers for the top scheduled top and bottom 407 // scheduled regions. 408 void ScheduleDAGMI::initRegPressure() { 409 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 410 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 411 412 // Close the RPTracker to finalize live ins. 413 RPTracker.closeRegion(); 414 415 DEBUG(RPTracker.getPressure().dump(TRI)); 416 417 // Initialize the live ins and live outs. 418 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 419 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 420 421 // Close one end of the tracker so we can call 422 // getMaxUpward/DownwardPressureDelta before advancing across any 423 // instructions. This converts currently live regs into live ins/outs. 424 TopRPTracker.closeTop(); 425 BotRPTracker.closeBottom(); 426 427 // Account for liveness generated by the region boundary. 428 if (LiveRegionEnd != RegionEnd) 429 BotRPTracker.recede(); 430 431 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 432 433 // Cache the list of excess pressure sets in this region. This will also track 434 // the max pressure in the scheduled code for these sets. 435 RegionCriticalPSets.clear(); 436 std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; 437 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 438 unsigned Limit = TRI->getRegPressureSetLimit(i); 439 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 440 << "Limit " << Limit 441 << " Actual " << RegionPressure[i] << "\n"); 442 if (RegionPressure[i] > Limit) 443 RegionCriticalPSets.push_back(PressureElement(i, 0)); 444 } 445 DEBUG(dbgs() << "Excess PSets: "; 446 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 447 dbgs() << TRI->getRegPressureSetName( 448 RegionCriticalPSets[i].PSetID) << " "; 449 dbgs() << "\n"); 450 } 451 452 // FIXME: When the pressure tracker deals in pressure differences then we won't 453 // iterate over all RegionCriticalPSets[i]. 454 void ScheduleDAGMI:: 455 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { 456 for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { 457 unsigned ID = RegionCriticalPSets[i].PSetID; 458 int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; 459 if ((int)NewMaxPressure[ID] > MaxUnits) 460 MaxUnits = NewMaxPressure[ID]; 461 } 462 } 463 464 /// schedule - Called back from MachineScheduler::runOnMachineFunction 465 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 466 /// only includes instructions that have DAG nodes, not scheduling boundaries. 467 /// 468 /// This is a skeletal driver, with all the functionality pushed into helpers, 469 /// so that it can be easilly extended by experimental schedulers. Generally, 470 /// implementing MachineSchedStrategy should be sufficient to implement a new 471 /// scheduling algorithm. However, if a scheduler further subclasses 472 /// ScheduleDAGMI then it will want to override this virtual method in order to 473 /// update any specialized state. 474 void ScheduleDAGMI::schedule() { 475 buildDAGWithRegPressure(); 476 477 postprocessDAG(); 478 479 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 480 SUnits[su].dumpAll(this)); 481 482 if (ViewMISchedDAGs) viewGraph(); 483 484 initQueues(); 485 486 bool IsTopNode = false; 487 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 488 assert(!SU->isScheduled && "Node already scheduled"); 489 if (!checkSchedLimit()) 490 break; 491 492 scheduleMI(SU, IsTopNode); 493 494 updateQueues(SU, IsTopNode); 495 } 496 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 497 498 placeDebugValues(); 499 500 DEBUG({ 501 unsigned BBNum = top()->getParent()->getNumber(); 502 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 503 dumpSchedule(); 504 dbgs() << '\n'; 505 }); 506 } 507 508 /// Build the DAG and setup three register pressure trackers. 509 void ScheduleDAGMI::buildDAGWithRegPressure() { 510 // Initialize the register pressure tracker used by buildSchedGraph. 511 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 512 513 // Account for liveness generate by the region boundary. 514 if (LiveRegionEnd != RegionEnd) 515 RPTracker.recede(); 516 517 // Build the DAG, and compute current register pressure. 518 buildSchedGraph(AA, &RPTracker); 519 if (ViewMISchedDAGs) viewGraph(); 520 521 // Initialize top/bottom trackers after computing region pressure. 522 initRegPressure(); 523 } 524 525 /// Apply each ScheduleDAGMutation step in order. 526 void ScheduleDAGMI::postprocessDAG() { 527 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 528 Mutations[i]->apply(this); 529 } 530 } 531 532 // Release all DAG roots for scheduling. 533 void ScheduleDAGMI::releaseRoots() { 534 SmallVector<SUnit*, 16> BotRoots; 535 536 for (std::vector<SUnit>::iterator 537 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 538 // A SUnit is ready to top schedule if it has no predecessors. 539 if (I->Preds.empty()) 540 SchedImpl->releaseTopNode(&(*I)); 541 // A SUnit is ready to bottom schedule if it has no successors. 542 if (I->Succs.empty()) 543 BotRoots.push_back(&(*I)); 544 } 545 // Release bottom roots in reverse order so the higher priority nodes appear 546 // first. This is more natural and slightly more efficient. 547 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 548 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) 549 SchedImpl->releaseBottomNode(*I); 550 } 551 552 /// Identify DAG roots and setup scheduler queues. 553 void ScheduleDAGMI::initQueues() { 554 555 // Initialize the strategy before modifying the DAG. 556 SchedImpl->initialize(this); 557 558 // Release edges from the special Entry node or to the special Exit node. 559 releaseSuccessors(&EntrySU); 560 releasePredecessors(&ExitSU); 561 562 // Release all DAG roots for scheduling. 563 releaseRoots(); 564 565 SchedImpl->registerRoots(); 566 567 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 568 CurrentBottom = RegionEnd; 569 } 570 571 /// Move an instruction and update register pressure. 572 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { 573 // Move the instruction to its new location in the instruction stream. 574 MachineInstr *MI = SU->getInstr(); 575 576 if (IsTopNode) { 577 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 578 if (&*CurrentTop == MI) 579 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 580 else { 581 moveInstruction(MI, CurrentTop); 582 TopRPTracker.setPos(MI); 583 } 584 585 // Update top scheduled pressure. 586 TopRPTracker.advance(); 587 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 588 updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); 589 } 590 else { 591 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 592 MachineBasicBlock::iterator priorII = 593 priorNonDebug(CurrentBottom, CurrentTop); 594 if (&*priorII == MI) 595 CurrentBottom = priorII; 596 else { 597 if (&*CurrentTop == MI) { 598 CurrentTop = nextIfDebug(++CurrentTop, priorII); 599 TopRPTracker.setPos(CurrentTop); 600 } 601 moveInstruction(MI, CurrentBottom); 602 CurrentBottom = MI; 603 } 604 // Update bottom scheduled pressure. 605 BotRPTracker.recede(); 606 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 607 updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); 608 } 609 } 610 611 /// Update scheduler queues after scheduling an instruction. 612 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 613 // Release dependent instructions for scheduling. 614 if (IsTopNode) 615 releaseSuccessors(SU); 616 else 617 releasePredecessors(SU); 618 619 SU->isScheduled = true; 620 621 // Notify the scheduling strategy after updating the DAG. 622 SchedImpl->schedNode(SU, IsTopNode); 623 } 624 625 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 626 void ScheduleDAGMI::placeDebugValues() { 627 // If first instruction was a DBG_VALUE then put it back. 628 if (FirstDbgValue) { 629 BB->splice(RegionBegin, BB, FirstDbgValue); 630 RegionBegin = FirstDbgValue; 631 } 632 633 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 634 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 635 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 636 MachineInstr *DbgValue = P.first; 637 MachineBasicBlock::iterator OrigPrevMI = P.second; 638 BB->splice(++OrigPrevMI, BB, DbgValue); 639 if (OrigPrevMI == llvm::prior(RegionEnd)) 640 RegionEnd = DbgValue; 641 } 642 DbgValues.clear(); 643 FirstDbgValue = NULL; 644 } 645 646 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 647 void ScheduleDAGMI::dumpSchedule() const { 648 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 649 if (SUnit *SU = getSUnit(&(*MI))) 650 SU->dump(this); 651 else 652 dbgs() << "Missing SUnit\n"; 653 } 654 } 655 #endif 656 657 //===----------------------------------------------------------------------===// 658 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. 659 //===----------------------------------------------------------------------===// 660 661 namespace { 662 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance 663 /// the schedule. 664 class ConvergingScheduler : public MachineSchedStrategy { 665 public: 666 /// Represent the type of SchedCandidate found within a single queue. 667 /// pickNodeBidirectional depends on these listed by decreasing priority. 668 enum CandReason { 669 NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand, 670 BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, 671 SingleMax, MultiPressure, NextDefUse, NodeOrder}; 672 673 #ifndef NDEBUG 674 static const char *getReasonStr(ConvergingScheduler::CandReason Reason); 675 #endif 676 677 /// Policy for scheduling the next instruction in the candidate's zone. 678 struct CandPolicy { 679 bool ReduceLatency; 680 unsigned ReduceResIdx; 681 unsigned DemandResIdx; 682 683 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 684 }; 685 686 /// Status of an instruction's critical resource consumption. 687 struct SchedResourceDelta { 688 // Count critical resources in the scheduled region required by SU. 689 unsigned CritResources; 690 691 // Count critical resources from another region consumed by SU. 692 unsigned DemandedResources; 693 694 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 695 696 bool operator==(const SchedResourceDelta &RHS) const { 697 return CritResources == RHS.CritResources 698 && DemandedResources == RHS.DemandedResources; 699 } 700 bool operator!=(const SchedResourceDelta &RHS) const { 701 return !operator==(RHS); 702 } 703 }; 704 705 /// Store the state used by ConvergingScheduler heuristics, required for the 706 /// lifetime of one invocation of pickNode(). 707 struct SchedCandidate { 708 CandPolicy Policy; 709 710 // The best SUnit candidate. 711 SUnit *SU; 712 713 // The reason for this candidate. 714 CandReason Reason; 715 716 // Register pressure values for the best candidate. 717 RegPressureDelta RPDelta; 718 719 // Critical resource consumption of the best candidate. 720 SchedResourceDelta ResDelta; 721 722 SchedCandidate(const CandPolicy &policy) 723 : Policy(policy), SU(NULL), Reason(NoCand) {} 724 725 bool isValid() const { return SU; } 726 727 // Copy the status of another candidate without changing policy. 728 void setBest(SchedCandidate &Best) { 729 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 730 SU = Best.SU; 731 Reason = Best.Reason; 732 RPDelta = Best.RPDelta; 733 ResDelta = Best.ResDelta; 734 } 735 736 void initResourceDelta(const ScheduleDAGMI *DAG, 737 const TargetSchedModel *SchedModel); 738 }; 739 740 /// Summarize the unscheduled region. 741 struct SchedRemainder { 742 // Critical path through the DAG in expected latency. 743 unsigned CriticalPath; 744 745 // Unscheduled resources 746 SmallVector<unsigned, 16> RemainingCounts; 747 // Critical resource for the unscheduled zone. 748 unsigned CritResIdx; 749 // Number of micro-ops left to schedule. 750 unsigned RemainingMicroOps; 751 // Is the unscheduled zone resource limited. 752 bool IsResourceLimited; 753 754 unsigned MaxRemainingCount; 755 756 void reset() { 757 CriticalPath = 0; 758 RemainingCounts.clear(); 759 CritResIdx = 0; 760 RemainingMicroOps = 0; 761 IsResourceLimited = false; 762 MaxRemainingCount = 0; 763 } 764 765 SchedRemainder() { reset(); } 766 767 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 768 }; 769 770 /// Each Scheduling boundary is associated with ready queues. It tracks the 771 /// current cycle in the direction of movement, and maintains the state 772 /// of "hazards" and other interlocks at the current cycle. 773 struct SchedBoundary { 774 ScheduleDAGMI *DAG; 775 const TargetSchedModel *SchedModel; 776 SchedRemainder *Rem; 777 778 ReadyQueue Available; 779 ReadyQueue Pending; 780 bool CheckPending; 781 782 // For heuristics, keep a list of the nodes that immediately depend on the 783 // most recently scheduled node. 784 SmallPtrSet<const SUnit*, 8> NextSUs; 785 786 ScheduleHazardRecognizer *HazardRec; 787 788 unsigned CurrCycle; 789 unsigned IssueCount; 790 791 /// MinReadyCycle - Cycle of the soonest available instruction. 792 unsigned MinReadyCycle; 793 794 // The expected latency of the critical path in this scheduled zone. 795 unsigned ExpectedLatency; 796 797 // Resources used in the scheduled zone beyond this boundary. 798 SmallVector<unsigned, 16> ResourceCounts; 799 800 // Cache the critical resources ID in this scheduled zone. 801 unsigned CritResIdx; 802 803 // Is the scheduled region resource limited vs. latency limited. 804 bool IsResourceLimited; 805 806 unsigned ExpectedCount; 807 808 // Policy flag: attempt to find ILP until expected latency is covered. 809 bool ShouldIncreaseILP; 810 811 #ifndef NDEBUG 812 // Remember the greatest min operand latency. 813 unsigned MaxMinLatency; 814 #endif 815 816 void reset() { 817 Available.clear(); 818 Pending.clear(); 819 CheckPending = false; 820 NextSUs.clear(); 821 HazardRec = 0; 822 CurrCycle = 0; 823 IssueCount = 0; 824 MinReadyCycle = UINT_MAX; 825 ExpectedLatency = 0; 826 ResourceCounts.resize(1); 827 assert(!ResourceCounts[0] && "nonzero count for bad resource"); 828 CritResIdx = 0; 829 IsResourceLimited = false; 830 ExpectedCount = 0; 831 ShouldIncreaseILP = false; 832 #ifndef NDEBUG 833 MaxMinLatency = 0; 834 #endif 835 // Reserve a zero-count for invalid CritResIdx. 836 ResourceCounts.resize(1); 837 } 838 839 /// Pending queues extend the ready queues with the same ID and the 840 /// PendingFlag set. 841 SchedBoundary(unsigned ID, const Twine &Name): 842 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), 843 Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { 844 reset(); 845 } 846 847 ~SchedBoundary() { delete HazardRec; } 848 849 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 850 SchedRemainder *rem); 851 852 bool isTop() const { 853 return Available.getID() == ConvergingScheduler::TopQID; 854 } 855 856 unsigned getUnscheduledLatency(SUnit *SU) const { 857 if (isTop()) 858 return SU->getHeight(); 859 return SU->getDepth(); 860 } 861 862 unsigned getCriticalCount() const { 863 return ResourceCounts[CritResIdx]; 864 } 865 866 bool checkHazard(SUnit *SU); 867 868 void checkILPPolicy(); 869 870 void releaseNode(SUnit *SU, unsigned ReadyCycle); 871 872 void bumpCycle(); 873 874 void countResource(unsigned PIdx, unsigned Cycles); 875 876 void bumpNode(SUnit *SU); 877 878 void releasePending(); 879 880 void removeReady(SUnit *SU); 881 882 SUnit *pickOnlyChoice(); 883 }; 884 885 private: 886 ScheduleDAGMI *DAG; 887 const TargetSchedModel *SchedModel; 888 const TargetRegisterInfo *TRI; 889 890 // State of the top and bottom scheduled instruction boundaries. 891 SchedRemainder Rem; 892 SchedBoundary Top; 893 SchedBoundary Bot; 894 895 public: 896 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 897 enum { 898 TopQID = 1, 899 BotQID = 2, 900 LogMaxQID = 2 901 }; 902 903 ConvergingScheduler(): 904 DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} 905 906 virtual void initialize(ScheduleDAGMI *dag); 907 908 virtual SUnit *pickNode(bool &IsTopNode); 909 910 virtual void schedNode(SUnit *SU, bool IsTopNode); 911 912 virtual void releaseTopNode(SUnit *SU); 913 914 virtual void releaseBottomNode(SUnit *SU); 915 916 virtual void registerRoots(); 917 918 protected: 919 void balanceZones( 920 ConvergingScheduler::SchedBoundary &CriticalZone, 921 ConvergingScheduler::SchedCandidate &CriticalCand, 922 ConvergingScheduler::SchedBoundary &OppositeZone, 923 ConvergingScheduler::SchedCandidate &OppositeCand); 924 925 void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, 926 ConvergingScheduler::SchedCandidate &BotCand); 927 928 void tryCandidate(SchedCandidate &Cand, 929 SchedCandidate &TryCand, 930 SchedBoundary &Zone, 931 const RegPressureTracker &RPTracker, 932 RegPressureTracker &TempTracker); 933 934 SUnit *pickNodeBidirectional(bool &IsTopNode); 935 936 void pickNodeFromQueue(SchedBoundary &Zone, 937 const RegPressureTracker &RPTracker, 938 SchedCandidate &Candidate); 939 940 #ifndef NDEBUG 941 void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); 942 #endif 943 }; 944 } // namespace 945 946 void ConvergingScheduler::SchedRemainder:: 947 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 948 reset(); 949 if (!SchedModel->hasInstrSchedModel()) 950 return; 951 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 952 for (std::vector<SUnit>::iterator 953 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 954 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 955 RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); 956 for (TargetSchedModel::ProcResIter 957 PI = SchedModel->getWriteProcResBegin(SC), 958 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 959 unsigned PIdx = PI->ProcResourceIdx; 960 unsigned Factor = SchedModel->getResourceFactor(PIdx); 961 RemainingCounts[PIdx] += (Factor * PI->Cycles); 962 } 963 } 964 } 965 966 void ConvergingScheduler::SchedBoundary:: 967 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 968 reset(); 969 DAG = dag; 970 SchedModel = smodel; 971 Rem = rem; 972 if (SchedModel->hasInstrSchedModel()) 973 ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); 974 } 975 976 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { 977 DAG = dag; 978 SchedModel = DAG->getSchedModel(); 979 TRI = DAG->TRI; 980 Rem.init(DAG, SchedModel); 981 Top.init(DAG, SchedModel, &Rem); 982 Bot.init(DAG, SchedModel, &Rem); 983 984 // Initialize resource counts. 985 986 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 987 // are disabled, then these HazardRecs will be disabled. 988 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 989 const TargetMachine &TM = DAG->MF.getTarget(); 990 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 991 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 992 993 assert((!ForceTopDown || !ForceBottomUp) && 994 "-misched-topdown incompatible with -misched-bottomup"); 995 } 996 997 void ConvergingScheduler::releaseTopNode(SUnit *SU) { 998 if (SU->isScheduled) 999 return; 1000 1001 for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1002 I != E; ++I) { 1003 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1004 unsigned MinLatency = I->getMinLatency(); 1005 #ifndef NDEBUG 1006 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 1007 #endif 1008 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 1009 SU->TopReadyCycle = PredReadyCycle + MinLatency; 1010 } 1011 Top.releaseNode(SU, SU->TopReadyCycle); 1012 } 1013 1014 void ConvergingScheduler::releaseBottomNode(SUnit *SU) { 1015 if (SU->isScheduled) 1016 return; 1017 1018 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1019 1020 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1021 I != E; ++I) { 1022 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1023 unsigned MinLatency = I->getMinLatency(); 1024 #ifndef NDEBUG 1025 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 1026 #endif 1027 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 1028 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 1029 } 1030 Bot.releaseNode(SU, SU->BotReadyCycle); 1031 } 1032 1033 void ConvergingScheduler::registerRoots() { 1034 Rem.CriticalPath = DAG->ExitSU.getDepth(); 1035 // Some roots may not feed into ExitSU. Check all of them in case. 1036 for (std::vector<SUnit*>::const_iterator 1037 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 1038 if ((*I)->getDepth() > Rem.CriticalPath) 1039 Rem.CriticalPath = (*I)->getDepth(); 1040 } 1041 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 1042 } 1043 1044 /// Does this SU have a hazard within the current instruction group. 1045 /// 1046 /// The scheduler supports two modes of hazard recognition. The first is the 1047 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1048 /// supports highly complicated in-order reservation tables 1049 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1050 /// 1051 /// The second is a streamlined mechanism that checks for hazards based on 1052 /// simple counters that the scheduler itself maintains. It explicitly checks 1053 /// for instruction dispatch limitations, including the number of micro-ops that 1054 /// can dispatch per cycle. 1055 /// 1056 /// TODO: Also check whether the SU must start a new group. 1057 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1058 if (HazardRec->isEnabled()) 1059 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1060 1061 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1062 if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { 1063 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1064 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1065 return true; 1066 } 1067 return false; 1068 } 1069 1070 /// If expected latency is covered, disable ILP policy. 1071 void ConvergingScheduler::SchedBoundary::checkILPPolicy() { 1072 if (ShouldIncreaseILP 1073 && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { 1074 ShouldIncreaseILP = false; 1075 DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); 1076 } 1077 } 1078 1079 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, 1080 unsigned ReadyCycle) { 1081 1082 if (ReadyCycle < MinReadyCycle) 1083 MinReadyCycle = ReadyCycle; 1084 1085 // Check for interlocks first. For the purpose of other heuristics, an 1086 // instruction that cannot issue appears as if it's not in the ReadyQueue. 1087 if (ReadyCycle > CurrCycle || checkHazard(SU)) 1088 Pending.push(SU); 1089 else 1090 Available.push(SU); 1091 1092 // Record this node as an immediate dependent of the scheduled node. 1093 NextSUs.insert(SU); 1094 1095 // If CriticalPath has been computed, then check if the unscheduled nodes 1096 // exceed the ILP window. Before registerRoots, CriticalPath==0. 1097 if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) 1098 > Rem->CriticalPath + ILPWindow)) { 1099 ShouldIncreaseILP = true; 1100 DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " 1101 << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); 1102 } 1103 } 1104 1105 /// Move the boundary of scheduled code by one cycle. 1106 void ConvergingScheduler::SchedBoundary::bumpCycle() { 1107 unsigned Width = SchedModel->getIssueWidth(); 1108 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 1109 1110 unsigned NextCycle = CurrCycle + 1; 1111 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 1112 if (MinReadyCycle > NextCycle) { 1113 IssueCount = 0; 1114 NextCycle = MinReadyCycle; 1115 } 1116 1117 if (!HazardRec->isEnabled()) { 1118 // Bypass HazardRec virtual calls. 1119 CurrCycle = NextCycle; 1120 } 1121 else { 1122 // Bypass getHazardType calls in case of long latency. 1123 for (; CurrCycle != NextCycle; ++CurrCycle) { 1124 if (isTop()) 1125 HazardRec->AdvanceCycle(); 1126 else 1127 HazardRec->RecedeCycle(); 1128 } 1129 } 1130 CheckPending = true; 1131 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1132 1133 DEBUG(dbgs() << " *** " << Available.getName() << " cycle " 1134 << CurrCycle << '\n'); 1135 } 1136 1137 /// Add the given processor resource to this scheduled zone. 1138 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, 1139 unsigned Cycles) { 1140 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1141 DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name 1142 << " +(" << Cycles << "x" << Factor 1143 << ") / " << SchedModel->getLatencyFactor() << '\n'); 1144 1145 unsigned Count = Factor * Cycles; 1146 ResourceCounts[PIdx] += Count; 1147 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 1148 Rem->RemainingCounts[PIdx] -= Count; 1149 1150 // Reset MaxRemainingCount for sanity. 1151 Rem->MaxRemainingCount = 0; 1152 1153 // Check if this resource exceeds the current critical resource by a full 1154 // cycle. If so, it becomes the critical resource. 1155 if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) 1156 >= (int)SchedModel->getLatencyFactor()) { 1157 CritResIdx = PIdx; 1158 DEBUG(dbgs() << " *** Critical resource " 1159 << SchedModel->getProcResource(PIdx)->Name << " x" 1160 << ResourceCounts[PIdx] << '\n'); 1161 } 1162 } 1163 1164 /// Move the boundary of scheduled code by one SUnit. 1165 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { 1166 // Update the reservation table. 1167 if (HazardRec->isEnabled()) { 1168 if (!isTop() && SU->isCall) { 1169 // Calls are scheduled with their preceding instructions. For bottom-up 1170 // scheduling, clear the pipeline state before emitting. 1171 HazardRec->Reset(); 1172 } 1173 HazardRec->EmitInstruction(SU); 1174 } 1175 // Update resource counts and critical resource. 1176 if (SchedModel->hasInstrSchedModel()) { 1177 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1178 Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); 1179 for (TargetSchedModel::ProcResIter 1180 PI = SchedModel->getWriteProcResBegin(SC), 1181 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1182 countResource(PI->ProcResourceIdx, PI->Cycles); 1183 } 1184 } 1185 if (isTop()) { 1186 if (SU->getDepth() > ExpectedLatency) 1187 ExpectedLatency = SU->getDepth(); 1188 } 1189 else { 1190 if (SU->getHeight() > ExpectedLatency) 1191 ExpectedLatency = SU->getHeight(); 1192 } 1193 1194 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1195 1196 // Check the instruction group dispatch limit. 1197 // TODO: Check if this SU must end a dispatch group. 1198 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 1199 1200 // checkHazard prevents scheduling multiple instructions per cycle that exceed 1201 // issue width. However, we commonly reach the maximum. In this case 1202 // opportunistically bump the cycle to avoid uselessly checking everything in 1203 // the readyQ. Furthermore, a single instruction may produce more than one 1204 // cycle's worth of micro-ops. 1205 if (IssueCount >= SchedModel->getIssueWidth()) { 1206 DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); 1207 bumpCycle(); 1208 } 1209 } 1210 1211 /// Release pending ready nodes in to the available queue. This makes them 1212 /// visible to heuristics. 1213 void ConvergingScheduler::SchedBoundary::releasePending() { 1214 // If the available queue is empty, it is safe to reset MinReadyCycle. 1215 if (Available.empty()) 1216 MinReadyCycle = UINT_MAX; 1217 1218 // Check to see if any of the pending instructions are ready to issue. If 1219 // so, add them to the available queue. 1220 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 1221 SUnit *SU = *(Pending.begin()+i); 1222 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 1223 1224 if (ReadyCycle < MinReadyCycle) 1225 MinReadyCycle = ReadyCycle; 1226 1227 if (ReadyCycle > CurrCycle) 1228 continue; 1229 1230 if (checkHazard(SU)) 1231 continue; 1232 1233 Available.push(SU); 1234 Pending.remove(Pending.begin()+i); 1235 --i; --e; 1236 } 1237 DEBUG(if (!Pending.empty()) Pending.dump()); 1238 CheckPending = false; 1239 } 1240 1241 /// Remove SU from the ready set for this boundary. 1242 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { 1243 if (Available.isInQueue(SU)) 1244 Available.remove(Available.find(SU)); 1245 else { 1246 assert(Pending.isInQueue(SU) && "bad ready count"); 1247 Pending.remove(Pending.find(SU)); 1248 } 1249 } 1250 1251 /// If this queue only has one ready candidate, return it. As a side effect, 1252 /// defer any nodes that now hit a hazard, and advance the cycle until at least 1253 /// one node is ready. If multiple instructions are ready, return NULL. 1254 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { 1255 if (CheckPending) 1256 releasePending(); 1257 1258 if (IssueCount > 0) { 1259 // Defer any ready instrs that now have a hazard. 1260 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 1261 if (checkHazard(*I)) { 1262 Pending.push(*I); 1263 I = Available.remove(I); 1264 continue; 1265 } 1266 ++I; 1267 } 1268 } 1269 for (unsigned i = 0; Available.empty(); ++i) { 1270 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 1271 "permanent hazard"); (void)i; 1272 bumpCycle(); 1273 releasePending(); 1274 } 1275 if (Available.size() == 1) 1276 return *Available.begin(); 1277 return NULL; 1278 } 1279 1280 /// Record the candidate policy for opposite zones with different critical 1281 /// resources. 1282 /// 1283 /// If the CriticalZone is latency limited, don't force a policy for the 1284 /// candidates here. Instead, When releasing each candidate, releaseNode 1285 /// compares the region's critical path to the candidate's height or depth and 1286 /// the scheduled zone's expected latency then sets ShouldIncreaseILP. 1287 void ConvergingScheduler::balanceZones( 1288 ConvergingScheduler::SchedBoundary &CriticalZone, 1289 ConvergingScheduler::SchedCandidate &CriticalCand, 1290 ConvergingScheduler::SchedBoundary &OppositeZone, 1291 ConvergingScheduler::SchedCandidate &OppositeCand) { 1292 1293 if (!CriticalZone.IsResourceLimited) 1294 return; 1295 1296 SchedRemainder *Rem = CriticalZone.Rem; 1297 1298 // If the critical zone is overconsuming a resource relative to the 1299 // remainder, try to reduce it. 1300 unsigned RemainingCritCount = 1301 Rem->RemainingCounts[CriticalZone.CritResIdx]; 1302 if ((int)(Rem->MaxRemainingCount - RemainingCritCount) 1303 > (int)SchedModel->getLatencyFactor()) { 1304 CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; 1305 DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " 1306 << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name 1307 << '\n'); 1308 } 1309 // If the other zone is underconsuming a resource relative to the full zone, 1310 // try to increase it. 1311 unsigned OppositeCount = 1312 OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; 1313 if ((int)(OppositeZone.ExpectedCount - OppositeCount) 1314 > (int)SchedModel->getLatencyFactor()) { 1315 OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; 1316 DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " 1317 << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name 1318 << '\n'); 1319 } 1320 } 1321 1322 /// Determine if the scheduled zones exceed resource limits or critical path and 1323 /// set each candidate's ReduceHeight policy accordingly. 1324 void ConvergingScheduler::checkResourceLimits( 1325 ConvergingScheduler::SchedCandidate &TopCand, 1326 ConvergingScheduler::SchedCandidate &BotCand) { 1327 1328 Bot.checkILPPolicy(); 1329 Top.checkILPPolicy(); 1330 if (Bot.ShouldIncreaseILP) 1331 BotCand.Policy.ReduceLatency = true; 1332 if (Top.ShouldIncreaseILP) 1333 TopCand.Policy.ReduceLatency = true; 1334 1335 // Handle resource-limited regions. 1336 if (Top.IsResourceLimited && Bot.IsResourceLimited 1337 && Top.CritResIdx == Bot.CritResIdx) { 1338 // If the scheduled critical resource in both zones is no longer the 1339 // critical remaining resource, attempt to reduce resource height both ways. 1340 if (Top.CritResIdx != Rem.CritResIdx) { 1341 TopCand.Policy.ReduceResIdx = Top.CritResIdx; 1342 BotCand.Policy.ReduceResIdx = Bot.CritResIdx; 1343 DEBUG(dbgs() << "Reduce scheduled " 1344 << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); 1345 } 1346 return; 1347 } 1348 // Handle latency-limited regions. 1349 if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { 1350 // If the total scheduled expected latency exceeds the region's critical 1351 // path then reduce latency both ways. 1352 // 1353 // Just because a zone is not resource limited does not mean it is latency 1354 // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle 1355 // to exceed expected latency. 1356 if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) 1357 && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { 1358 TopCand.Policy.ReduceLatency = true; 1359 BotCand.Policy.ReduceLatency = true; 1360 DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency 1361 << " + " << Bot.ExpectedLatency << '\n'); 1362 } 1363 return; 1364 } 1365 // The critical resource is different in each zone, so request balancing. 1366 1367 // Compute the cost of each zone. 1368 Rem.MaxRemainingCount = std::max( 1369 Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), 1370 Rem.RemainingCounts[Rem.CritResIdx]); 1371 Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); 1372 Top.ExpectedCount = std::max( 1373 Top.getCriticalCount(), 1374 Top.ExpectedCount * SchedModel->getLatencyFactor()); 1375 Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); 1376 Bot.ExpectedCount = std::max( 1377 Bot.getCriticalCount(), 1378 Bot.ExpectedCount * SchedModel->getLatencyFactor()); 1379 1380 balanceZones(Top, TopCand, Bot, BotCand); 1381 balanceZones(Bot, BotCand, Top, TopCand); 1382 } 1383 1384 void ConvergingScheduler::SchedCandidate:: 1385 initResourceDelta(const ScheduleDAGMI *DAG, 1386 const TargetSchedModel *SchedModel) { 1387 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 1388 return; 1389 1390 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1391 for (TargetSchedModel::ProcResIter 1392 PI = SchedModel->getWriteProcResBegin(SC), 1393 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1394 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 1395 ResDelta.CritResources += PI->Cycles; 1396 if (PI->ProcResourceIdx == Policy.DemandResIdx) 1397 ResDelta.DemandedResources += PI->Cycles; 1398 } 1399 } 1400 1401 /// Return true if this heuristic determines order. 1402 static bool tryLess(unsigned TryVal, unsigned CandVal, 1403 ConvergingScheduler::SchedCandidate &TryCand, 1404 ConvergingScheduler::SchedCandidate &Cand, 1405 ConvergingScheduler::CandReason Reason) { 1406 if (TryVal < CandVal) { 1407 TryCand.Reason = Reason; 1408 return true; 1409 } 1410 if (TryVal > CandVal) { 1411 if (Cand.Reason > Reason) 1412 Cand.Reason = Reason; 1413 return true; 1414 } 1415 return false; 1416 } 1417 static bool tryGreater(unsigned TryVal, unsigned CandVal, 1418 ConvergingScheduler::SchedCandidate &TryCand, 1419 ConvergingScheduler::SchedCandidate &Cand, 1420 ConvergingScheduler::CandReason Reason) { 1421 if (TryVal > CandVal) { 1422 TryCand.Reason = Reason; 1423 return true; 1424 } 1425 if (TryVal < CandVal) { 1426 if (Cand.Reason > Reason) 1427 Cand.Reason = Reason; 1428 return true; 1429 } 1430 return false; 1431 } 1432 1433 /// Apply a set of heursitics to a new candidate. Heuristics are currently 1434 /// hierarchical. This may be more efficient than a graduated cost model because 1435 /// we don't need to evaluate all aspects of the model for each node in the 1436 /// queue. But it's really done to make the heuristics easier to debug and 1437 /// statistically analyze. 1438 /// 1439 /// \param Cand provides the policy and current best candidate. 1440 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 1441 /// \param Zone describes the scheduled zone that we are extending. 1442 /// \param RPTracker describes reg pressure within the scheduled zone. 1443 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 1444 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, 1445 SchedCandidate &TryCand, 1446 SchedBoundary &Zone, 1447 const RegPressureTracker &RPTracker, 1448 RegPressureTracker &TempTracker) { 1449 1450 // Always initialize TryCand's RPDelta. 1451 TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, 1452 DAG->getRegionCriticalPSets(), 1453 DAG->getRegPressure().MaxSetPressure); 1454 1455 // Initialize the candidate if needed. 1456 if (!Cand.isValid()) { 1457 TryCand.Reason = NodeOrder; 1458 return; 1459 } 1460 // Avoid exceeding the target's limit. 1461 if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, 1462 Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) 1463 return; 1464 if (Cand.Reason == SingleExcess) 1465 Cand.Reason = MultiPressure; 1466 1467 // Avoid increasing the max critical pressure in the scheduled region. 1468 if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, 1469 Cand.RPDelta.CriticalMax.UnitIncrease, 1470 TryCand, Cand, SingleCritical)) 1471 return; 1472 if (Cand.Reason == SingleCritical) 1473 Cand.Reason = MultiPressure; 1474 1475 // Avoid critical resource consumption and balance the schedule. 1476 TryCand.initResourceDelta(DAG, SchedModel); 1477 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 1478 TryCand, Cand, ResourceReduce)) 1479 return; 1480 if (tryGreater(TryCand.ResDelta.DemandedResources, 1481 Cand.ResDelta.DemandedResources, 1482 TryCand, Cand, ResourceDemand)) 1483 return; 1484 1485 // Avoid serializing long latency dependence chains. 1486 if (Cand.Policy.ReduceLatency) { 1487 if (Zone.isTop()) { 1488 if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() 1489 > Zone.ExpectedCount) { 1490 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1491 TryCand, Cand, TopDepthReduce)) 1492 return; 1493 } 1494 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1495 TryCand, Cand, TopPathReduce)) 1496 return; 1497 } 1498 else { 1499 if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() 1500 > Zone.ExpectedCount) { 1501 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1502 TryCand, Cand, BotHeightReduce)) 1503 return; 1504 } 1505 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1506 TryCand, Cand, BotPathReduce)) 1507 return; 1508 } 1509 } 1510 1511 // Avoid increasing the max pressure of the entire region. 1512 if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, 1513 Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) 1514 return; 1515 if (Cand.Reason == SingleMax) 1516 Cand.Reason = MultiPressure; 1517 1518 // Prefer immediate defs/users of the last scheduled instruction. This is a 1519 // nice pressure avoidance strategy that also conserves the processor's 1520 // register renaming resources and keeps the machine code readable. 1521 if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) { 1522 TryCand.Reason = NextDefUse; 1523 return; 1524 } 1525 if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) { 1526 if (Cand.Reason > NextDefUse) 1527 Cand.Reason = NextDefUse; 1528 return; 1529 } 1530 // Fall through to original instruction order. 1531 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 1532 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 1533 TryCand.Reason = NodeOrder; 1534 } 1535 } 1536 1537 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is 1538 /// more desirable than RHS from scheduling standpoint. 1539 static bool compareRPDelta(const RegPressureDelta &LHS, 1540 const RegPressureDelta &RHS) { 1541 // Compare each component of pressure in decreasing order of importance 1542 // without checking if any are valid. Invalid PressureElements are assumed to 1543 // have UnitIncrease==0, so are neutral. 1544 1545 // Avoid increasing the max critical pressure in the scheduled region. 1546 if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { 1547 DEBUG(dbgs() << "RP excess top - bot: " 1548 << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); 1549 return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; 1550 } 1551 // Avoid increasing the max critical pressure in the scheduled region. 1552 if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { 1553 DEBUG(dbgs() << "RP critical top - bot: " 1554 << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) 1555 << '\n'); 1556 return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; 1557 } 1558 // Avoid increasing the max pressure of the entire region. 1559 if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { 1560 DEBUG(dbgs() << "RP current top - bot: " 1561 << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) 1562 << '\n'); 1563 return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; 1564 } 1565 return false; 1566 } 1567 1568 #ifndef NDEBUG 1569 const char *ConvergingScheduler::getReasonStr( 1570 ConvergingScheduler::CandReason Reason) { 1571 switch (Reason) { 1572 case NoCand: return "NOCAND "; 1573 case SingleExcess: return "REG-EXCESS"; 1574 case SingleCritical: return "REG-CRIT "; 1575 case SingleMax: return "REG-MAX "; 1576 case MultiPressure: return "REG-MULTI "; 1577 case ResourceReduce: return "RES-REDUCE"; 1578 case ResourceDemand: return "RES-DEMAND"; 1579 case TopDepthReduce: return "TOP-DEPTH "; 1580 case TopPathReduce: return "TOP-PATH "; 1581 case BotHeightReduce:return "BOT-HEIGHT"; 1582 case BotPathReduce: return "BOT-PATH "; 1583 case NextDefUse: return "DEF-USE "; 1584 case NodeOrder: return "ORDER "; 1585 }; 1586 } 1587 1588 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, 1589 const SchedBoundary &Zone) { 1590 const char *Label = getReasonStr(Cand.Reason); 1591 PressureElement P; 1592 unsigned ResIdx = 0; 1593 unsigned Latency = 0; 1594 switch (Cand.Reason) { 1595 default: 1596 break; 1597 case SingleExcess: 1598 P = Cand.RPDelta.Excess; 1599 break; 1600 case SingleCritical: 1601 P = Cand.RPDelta.CriticalMax; 1602 break; 1603 case SingleMax: 1604 P = Cand.RPDelta.CurrentMax; 1605 break; 1606 case ResourceReduce: 1607 ResIdx = Cand.Policy.ReduceResIdx; 1608 break; 1609 case ResourceDemand: 1610 ResIdx = Cand.Policy.DemandResIdx; 1611 break; 1612 case TopDepthReduce: 1613 Latency = Cand.SU->getDepth(); 1614 break; 1615 case TopPathReduce: 1616 Latency = Cand.SU->getHeight(); 1617 break; 1618 case BotHeightReduce: 1619 Latency = Cand.SU->getHeight(); 1620 break; 1621 case BotPathReduce: 1622 Latency = Cand.SU->getDepth(); 1623 break; 1624 } 1625 dbgs() << Label << " " << Zone.Available.getName() << " "; 1626 if (P.isValid()) 1627 dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease 1628 << " "; 1629 else 1630 dbgs() << " "; 1631 if (ResIdx) 1632 dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; 1633 else 1634 dbgs() << " "; 1635 if (Latency) 1636 dbgs() << Latency << " cycles "; 1637 else 1638 dbgs() << " "; 1639 Cand.SU->dump(DAG); 1640 } 1641 #endif 1642 1643 /// Pick the best candidate from the top queue. 1644 /// 1645 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 1646 /// DAG building. To adjust for the current scheduling location we need to 1647 /// maintain the number of vreg uses remaining to be top-scheduled. 1648 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, 1649 const RegPressureTracker &RPTracker, 1650 SchedCandidate &Cand) { 1651 ReadyQueue &Q = Zone.Available; 1652 1653 DEBUG(Q.dump()); 1654 1655 // getMaxPressureDelta temporarily modifies the tracker. 1656 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1657 1658 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 1659 1660 SchedCandidate TryCand(Cand.Policy); 1661 TryCand.SU = *I; 1662 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 1663 if (TryCand.Reason != NoCand) { 1664 // Initialize resource delta if needed in case future heuristics query it. 1665 if (TryCand.ResDelta == SchedResourceDelta()) 1666 TryCand.initResourceDelta(DAG, SchedModel); 1667 Cand.setBest(TryCand); 1668 DEBUG(traceCandidate(Cand, Zone)); 1669 } 1670 TryCand.SU = *I; 1671 } 1672 } 1673 1674 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, 1675 bool IsTop) { 1676 DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") 1677 << " SU(" << Cand.SU->NodeNum << ") " 1678 << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); 1679 } 1680 1681 /// Pick the best candidate node from either the top or bottom queue. 1682 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { 1683 // Schedule as far as possible in the direction of no choice. This is most 1684 // efficient, but also provides the best heuristics for CriticalPSets. 1685 if (SUnit *SU = Bot.pickOnlyChoice()) { 1686 IsTopNode = false; 1687 return SU; 1688 } 1689 if (SUnit *SU = Top.pickOnlyChoice()) { 1690 IsTopNode = true; 1691 return SU; 1692 } 1693 CandPolicy NoPolicy; 1694 SchedCandidate BotCand(NoPolicy); 1695 SchedCandidate TopCand(NoPolicy); 1696 checkResourceLimits(TopCand, BotCand); 1697 1698 // Prefer bottom scheduling when heuristics are silent. 1699 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1700 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1701 1702 // If either Q has a single candidate that provides the least increase in 1703 // Excess pressure, we can immediately schedule from that Q. 1704 // 1705 // RegionCriticalPSets summarizes the pressure within the scheduled region and 1706 // affects picking from either Q. If scheduling in one direction must 1707 // increase pressure for one of the excess PSets, then schedule in that 1708 // direction first to provide more freedom in the other direction. 1709 if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { 1710 IsTopNode = false; 1711 tracePick(BotCand, IsTopNode); 1712 return BotCand.SU; 1713 } 1714 // Check if the top Q has a better candidate. 1715 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1716 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1717 1718 // If either Q has a single candidate that minimizes pressure above the 1719 // original region's pressure pick it. 1720 if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { 1721 if (TopCand.Reason < BotCand.Reason) { 1722 IsTopNode = true; 1723 tracePick(TopCand, IsTopNode); 1724 return TopCand.SU; 1725 } 1726 IsTopNode = false; 1727 tracePick(BotCand, IsTopNode); 1728 return BotCand.SU; 1729 } 1730 // Check for a salient pressure difference and pick the best from either side. 1731 if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { 1732 IsTopNode = true; 1733 tracePick(TopCand, IsTopNode); 1734 return TopCand.SU; 1735 } 1736 // Otherwise prefer the bottom candidate, in node order if all else failed. 1737 if (TopCand.Reason < BotCand.Reason) { 1738 IsTopNode = true; 1739 tracePick(TopCand, IsTopNode); 1740 return TopCand.SU; 1741 } 1742 IsTopNode = false; 1743 tracePick(BotCand, IsTopNode); 1744 return BotCand.SU; 1745 } 1746 1747 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 1748 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { 1749 if (DAG->top() == DAG->bottom()) { 1750 assert(Top.Available.empty() && Top.Pending.empty() && 1751 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 1752 return NULL; 1753 } 1754 SUnit *SU; 1755 do { 1756 if (ForceTopDown) { 1757 SU = Top.pickOnlyChoice(); 1758 if (!SU) { 1759 CandPolicy NoPolicy; 1760 SchedCandidate TopCand(NoPolicy); 1761 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1762 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1763 SU = TopCand.SU; 1764 } 1765 IsTopNode = true; 1766 } 1767 else if (ForceBottomUp) { 1768 SU = Bot.pickOnlyChoice(); 1769 if (!SU) { 1770 CandPolicy NoPolicy; 1771 SchedCandidate BotCand(NoPolicy); 1772 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1773 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1774 SU = BotCand.SU; 1775 } 1776 IsTopNode = false; 1777 } 1778 else { 1779 SU = pickNodeBidirectional(IsTopNode); 1780 } 1781 } while (SU->isScheduled); 1782 1783 if (SU->isTopReady()) 1784 Top.removeReady(SU); 1785 if (SU->isBottomReady()) 1786 Bot.removeReady(SU); 1787 1788 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 1789 << " Scheduling Instruction in cycle " 1790 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; 1791 SU->dump(DAG)); 1792 return SU; 1793 } 1794 1795 /// Update the scheduler's state after scheduling a node. This is the same node 1796 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update 1797 /// it's state based on the current cycle before MachineSchedStrategy does. 1798 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { 1799 if (IsTopNode) { 1800 SU->TopReadyCycle = Top.CurrCycle; 1801 Top.bumpNode(SU); 1802 } 1803 else { 1804 SU->BotReadyCycle = Bot.CurrCycle; 1805 Bot.bumpNode(SU); 1806 } 1807 } 1808 1809 /// Create the standard converging machine scheduler. This will be used as the 1810 /// default scheduler if the target does not set a default. 1811 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { 1812 assert((!ForceTopDown || !ForceBottomUp) && 1813 "-misched-topdown incompatible with -misched-bottomup"); 1814 return new ScheduleDAGMI(C, new ConvergingScheduler()); 1815 } 1816 static MachineSchedRegistry 1817 ConvergingSchedRegistry("converge", "Standard converging scheduler.", 1818 createConvergingSched); 1819 1820 //===----------------------------------------------------------------------===// 1821 // ILP Scheduler. Currently for experimental analysis of heuristics. 1822 //===----------------------------------------------------------------------===// 1823 1824 namespace { 1825 /// \brief Order nodes by the ILP metric. 1826 struct ILPOrder { 1827 ScheduleDAGILP *ILP; 1828 bool MaximizeILP; 1829 1830 ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} 1831 1832 /// \brief Apply a less-than relation on node priority. 1833 bool operator()(const SUnit *A, const SUnit *B) const { 1834 // Return true if A comes after B in the Q. 1835 if (MaximizeILP) 1836 return ILP->getILP(A) < ILP->getILP(B); 1837 else 1838 return ILP->getILP(A) > ILP->getILP(B); 1839 } 1840 }; 1841 1842 /// \brief Schedule based on the ILP metric. 1843 class ILPScheduler : public MachineSchedStrategy { 1844 ScheduleDAGILP ILP; 1845 ILPOrder Cmp; 1846 1847 std::vector<SUnit*> ReadyQ; 1848 public: 1849 ILPScheduler(bool MaximizeILP) 1850 : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} 1851 1852 virtual void initialize(ScheduleDAGMI *DAG) { 1853 ReadyQ.clear(); 1854 ILP.resize(DAG->SUnits.size()); 1855 } 1856 1857 virtual void registerRoots() { 1858 for (std::vector<SUnit*>::const_iterator 1859 I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { 1860 ILP.computeILP(*I); 1861 } 1862 } 1863 1864 /// Implement MachineSchedStrategy interface. 1865 /// ----------------------------------------- 1866 1867 virtual SUnit *pickNode(bool &IsTopNode) { 1868 if (ReadyQ.empty()) return NULL; 1869 pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 1870 SUnit *SU = ReadyQ.back(); 1871 ReadyQ.pop_back(); 1872 IsTopNode = false; 1873 DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() 1874 << " ILP: " << ILP.getILP(SU) << '\n'); 1875 return SU; 1876 } 1877 1878 virtual void schedNode(SUnit *, bool) {} 1879 1880 virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } 1881 1882 virtual void releaseBottomNode(SUnit *SU) { 1883 ReadyQ.push_back(SU); 1884 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 1885 } 1886 }; 1887 } // namespace 1888 1889 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 1890 return new ScheduleDAGMI(C, new ILPScheduler(true)); 1891 } 1892 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 1893 return new ScheduleDAGMI(C, new ILPScheduler(false)); 1894 } 1895 static MachineSchedRegistry ILPMaxRegistry( 1896 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 1897 static MachineSchedRegistry ILPMinRegistry( 1898 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 1899 1900 //===----------------------------------------------------------------------===// 1901 // Machine Instruction Shuffler for Correctness Testing 1902 //===----------------------------------------------------------------------===// 1903 1904 #ifndef NDEBUG 1905 namespace { 1906 /// Apply a less-than relation on the node order, which corresponds to the 1907 /// instruction order prior to scheduling. IsReverse implements greater-than. 1908 template<bool IsReverse> 1909 struct SUnitOrder { 1910 bool operator()(SUnit *A, SUnit *B) const { 1911 if (IsReverse) 1912 return A->NodeNum > B->NodeNum; 1913 else 1914 return A->NodeNum < B->NodeNum; 1915 } 1916 }; 1917 1918 /// Reorder instructions as much as possible. 1919 class InstructionShuffler : public MachineSchedStrategy { 1920 bool IsAlternating; 1921 bool IsTopDown; 1922 1923 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 1924 // gives nodes with a higher number higher priority causing the latest 1925 // instructions to be scheduled first. 1926 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 1927 TopQ; 1928 // When scheduling bottom-up, use greater-than as the queue priority. 1929 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 1930 BottomQ; 1931 public: 1932 InstructionShuffler(bool alternate, bool topdown) 1933 : IsAlternating(alternate), IsTopDown(topdown) {} 1934 1935 virtual void initialize(ScheduleDAGMI *) { 1936 TopQ.clear(); 1937 BottomQ.clear(); 1938 } 1939 1940 /// Implement MachineSchedStrategy interface. 1941 /// ----------------------------------------- 1942 1943 virtual SUnit *pickNode(bool &IsTopNode) { 1944 SUnit *SU; 1945 if (IsTopDown) { 1946 do { 1947 if (TopQ.empty()) return NULL; 1948 SU = TopQ.top(); 1949 TopQ.pop(); 1950 } while (SU->isScheduled); 1951 IsTopNode = true; 1952 } 1953 else { 1954 do { 1955 if (BottomQ.empty()) return NULL; 1956 SU = BottomQ.top(); 1957 BottomQ.pop(); 1958 } while (SU->isScheduled); 1959 IsTopNode = false; 1960 } 1961 if (IsAlternating) 1962 IsTopDown = !IsTopDown; 1963 return SU; 1964 } 1965 1966 virtual void schedNode(SUnit *SU, bool IsTopNode) {} 1967 1968 virtual void releaseTopNode(SUnit *SU) { 1969 TopQ.push(SU); 1970 } 1971 virtual void releaseBottomNode(SUnit *SU) { 1972 BottomQ.push(SU); 1973 } 1974 }; 1975 } // namespace 1976 1977 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 1978 bool Alternate = !ForceTopDown && !ForceBottomUp; 1979 bool TopDown = !ForceBottomUp; 1980 assert((TopDown || !ForceTopDown) && 1981 "-misched-topdown incompatible with -misched-bottomup"); 1982 return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); 1983 } 1984 static MachineSchedRegistry ShufflerRegistry( 1985 "shuffle", "Shuffle machine instructions alternating directions", 1986 createInstructionShuffler); 1987 #endif // !NDEBUG 1988