1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "misched" 16 17 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 18 #include "llvm/CodeGen/MachineScheduler.h" 19 #include "llvm/CodeGen/Passes.h" 20 #include "llvm/CodeGen/RegisterClassInfo.h" 21 #include "llvm/CodeGen/ScheduleDAGILP.h" 22 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include "llvm/ADT/OwningPtr.h" 29 #include "llvm/ADT/PriorityQueue.h" 30 31 #include <queue> 32 33 using namespace llvm; 34 35 namespace llvm { 36 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 37 cl::desc("Force top-down list scheduling")); 38 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 39 cl::desc("Force bottom-up list scheduling")); 40 } 41 42 #ifndef NDEBUG 43 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 44 cl::desc("Pop up a window to show MISched dags after they are processed")); 45 46 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 47 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 48 #else 49 static bool ViewMISchedDAGs = false; 50 #endif // NDEBUG 51 52 // Threshold to very roughly model an out-of-order processor's instruction 53 // buffers. If the actual value of this threshold matters much in practice, then 54 // it can be specified by the machine model. For now, it's an experimental 55 // tuning knob to determine when and if it matters. 56 static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, 57 cl::desc("Allow expected latency to exceed the critical path by N cycles " 58 "before attempting to balance ILP"), 59 cl::init(10U)); 60 61 //===----------------------------------------------------------------------===// 62 // Machine Instruction Scheduling Pass and Registry 63 //===----------------------------------------------------------------------===// 64 65 MachineSchedContext::MachineSchedContext(): 66 MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { 67 RegClassInfo = new RegisterClassInfo(); 68 } 69 70 MachineSchedContext::~MachineSchedContext() { 71 delete RegClassInfo; 72 } 73 74 namespace { 75 /// MachineScheduler runs after coalescing and before register allocation. 76 class MachineScheduler : public MachineSchedContext, 77 public MachineFunctionPass { 78 public: 79 MachineScheduler(); 80 81 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 82 83 virtual void releaseMemory() {} 84 85 virtual bool runOnMachineFunction(MachineFunction&); 86 87 virtual void print(raw_ostream &O, const Module* = 0) const; 88 89 static char ID; // Class identification, replacement for typeinfo 90 }; 91 } // namespace 92 93 char MachineScheduler::ID = 0; 94 95 char &llvm::MachineSchedulerID = MachineScheduler::ID; 96 97 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 98 "Machine Instruction Scheduler", false, false) 99 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 100 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 101 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 102 INITIALIZE_PASS_END(MachineScheduler, "misched", 103 "Machine Instruction Scheduler", false, false) 104 105 MachineScheduler::MachineScheduler() 106 : MachineFunctionPass(ID) { 107 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 108 } 109 110 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 111 AU.setPreservesCFG(); 112 AU.addRequiredID(MachineDominatorsID); 113 AU.addRequired<MachineLoopInfo>(); 114 AU.addRequired<AliasAnalysis>(); 115 AU.addRequired<TargetPassConfig>(); 116 AU.addRequired<SlotIndexes>(); 117 AU.addPreserved<SlotIndexes>(); 118 AU.addRequired<LiveIntervals>(); 119 AU.addPreserved<LiveIntervals>(); 120 MachineFunctionPass::getAnalysisUsage(AU); 121 } 122 123 MachinePassRegistry MachineSchedRegistry::Registry; 124 125 /// A dummy default scheduler factory indicates whether the scheduler 126 /// is overridden on the command line. 127 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 128 return 0; 129 } 130 131 /// MachineSchedOpt allows command line selection of the scheduler. 132 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 133 RegisterPassParser<MachineSchedRegistry> > 134 MachineSchedOpt("misched", 135 cl::init(&useDefaultMachineSched), cl::Hidden, 136 cl::desc("Machine instruction scheduler to use")); 137 138 static MachineSchedRegistry 139 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 140 useDefaultMachineSched); 141 142 /// Forward declare the standard machine scheduler. This will be used as the 143 /// default scheduler if the target does not set a default. 144 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); 145 146 147 /// Decrement this iterator until reaching the top or a non-debug instr. 148 static MachineBasicBlock::iterator 149 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { 150 assert(I != Beg && "reached the top of the region, cannot decrement"); 151 while (--I != Beg) { 152 if (!I->isDebugValue()) 153 break; 154 } 155 return I; 156 } 157 158 /// If this iterator is a debug value, increment until reaching the End or a 159 /// non-debug instruction. 160 static MachineBasicBlock::iterator 161 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { 162 for(; I != End; ++I) { 163 if (!I->isDebugValue()) 164 break; 165 } 166 return I; 167 } 168 169 /// Top-level MachineScheduler pass driver. 170 /// 171 /// Visit blocks in function order. Divide each block into scheduling regions 172 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 173 /// consistent with the DAG builder, which traverses the interior of the 174 /// scheduling regions bottom-up. 175 /// 176 /// This design avoids exposing scheduling boundaries to the DAG builder, 177 /// simplifying the DAG builder's support for "special" target instructions. 178 /// At the same time the design allows target schedulers to operate across 179 /// scheduling boundaries, for example to bundle the boudary instructions 180 /// without reordering them. This creates complexity, because the target 181 /// scheduler must update the RegionBegin and RegionEnd positions cached by 182 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 183 /// design would be to split blocks at scheduling boundaries, but LLVM has a 184 /// general bias against block splitting purely for implementation simplicity. 185 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 186 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 187 188 // Initialize the context of the pass. 189 MF = &mf; 190 MLI = &getAnalysis<MachineLoopInfo>(); 191 MDT = &getAnalysis<MachineDominatorTree>(); 192 PassConfig = &getAnalysis<TargetPassConfig>(); 193 AA = &getAnalysis<AliasAnalysis>(); 194 195 LIS = &getAnalysis<LiveIntervals>(); 196 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 197 198 RegClassInfo->runOnMachineFunction(*MF); 199 200 // Select the scheduler, or set the default. 201 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 202 if (Ctor == useDefaultMachineSched) { 203 // Get the default scheduler set by the target. 204 Ctor = MachineSchedRegistry::getDefault(); 205 if (!Ctor) { 206 Ctor = createConvergingSched; 207 MachineSchedRegistry::setDefault(Ctor); 208 } 209 } 210 // Instantiate the selected scheduler. 211 OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); 212 213 // Visit all machine basic blocks. 214 // 215 // TODO: Visit blocks in global postorder or postorder within the bottom-up 216 // loop tree. Then we can optionally compute global RegPressure. 217 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 218 MBB != MBBEnd; ++MBB) { 219 220 Scheduler->startBlock(MBB); 221 222 // Break the block into scheduling regions [I, RegionEnd), and schedule each 223 // region as soon as it is discovered. RegionEnd points the scheduling 224 // boundary at the bottom of the region. The DAG does not include RegionEnd, 225 // but the region does (i.e. the next RegionEnd is above the previous 226 // RegionBegin). If the current block has no terminator then RegionEnd == 227 // MBB->end() for the bottom region. 228 // 229 // The Scheduler may insert instructions during either schedule() or 230 // exitRegion(), even for empty regions. So the local iterators 'I' and 231 // 'RegionEnd' are invalid across these calls. 232 unsigned RemainingInstrs = MBB->size(); 233 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 234 RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { 235 236 // Avoid decrementing RegionEnd for blocks with no terminator. 237 if (RegionEnd != MBB->end() 238 || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { 239 --RegionEnd; 240 // Count the boundary instruction. 241 --RemainingInstrs; 242 } 243 244 // The next region starts above the previous region. Look backward in the 245 // instruction stream until we find the nearest boundary. 246 MachineBasicBlock::iterator I = RegionEnd; 247 for(;I != MBB->begin(); --I, --RemainingInstrs) { 248 if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) 249 break; 250 } 251 // Notify the scheduler of the region, even if we may skip scheduling 252 // it. Perhaps it still needs to be bundled. 253 Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); 254 255 // Skip empty scheduling regions (0 or 1 schedulable instructions). 256 if (I == RegionEnd || I == llvm::prior(RegionEnd)) { 257 // Close the current region. Bundle the terminator if needed. 258 // This invalidates 'RegionEnd' and 'I'. 259 Scheduler->exitRegion(); 260 continue; 261 } 262 DEBUG(dbgs() << "********** MI Scheduling **********\n"); 263 DEBUG(dbgs() << MF->getName() 264 << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; 265 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 266 else dbgs() << "End"; 267 dbgs() << " Remaining: " << RemainingInstrs << "\n"); 268 269 // Schedule a region: possibly reorder instructions. 270 // This invalidates 'RegionEnd' and 'I'. 271 Scheduler->schedule(); 272 273 // Close the current region. 274 Scheduler->exitRegion(); 275 276 // Scheduling has invalidated the current iterator 'I'. Ask the 277 // scheduler for the top of it's scheduled region. 278 RegionEnd = Scheduler->begin(); 279 } 280 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 281 Scheduler->finishBlock(); 282 } 283 Scheduler->finalizeSchedule(); 284 DEBUG(LIS->print(dbgs())); 285 return true; 286 } 287 288 void MachineScheduler::print(raw_ostream &O, const Module* m) const { 289 // unimplemented 290 } 291 292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 293 void ReadyQueue::dump() { 294 dbgs() << Name << ": "; 295 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 296 dbgs() << Queue[i]->NodeNum << " "; 297 dbgs() << "\n"; 298 } 299 #endif 300 301 //===----------------------------------------------------------------------===// 302 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals 303 // preservation. 304 //===----------------------------------------------------------------------===// 305 306 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 307 /// NumPredsLeft reaches zero, release the successor node. 308 /// 309 /// FIXME: Adjust SuccSU height based on MinLatency. 310 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 311 SUnit *SuccSU = SuccEdge->getSUnit(); 312 313 if (SuccEdge->isWeak()) { 314 --SuccSU->WeakPredsLeft; 315 return; 316 } 317 #ifndef NDEBUG 318 if (SuccSU->NumPredsLeft == 0) { 319 dbgs() << "*** Scheduling failed! ***\n"; 320 SuccSU->dump(this); 321 dbgs() << " has been released too many times!\n"; 322 llvm_unreachable(0); 323 } 324 #endif 325 --SuccSU->NumPredsLeft; 326 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 327 SchedImpl->releaseTopNode(SuccSU); 328 } 329 330 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 331 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 332 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 333 I != E; ++I) { 334 releaseSucc(SU, &*I); 335 } 336 } 337 338 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 339 /// NumSuccsLeft reaches zero, release the predecessor node. 340 /// 341 /// FIXME: Adjust PredSU height based on MinLatency. 342 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 343 SUnit *PredSU = PredEdge->getSUnit(); 344 345 if (PredEdge->isWeak()) { 346 --PredSU->WeakSuccsLeft; 347 return; 348 } 349 #ifndef NDEBUG 350 if (PredSU->NumSuccsLeft == 0) { 351 dbgs() << "*** Scheduling failed! ***\n"; 352 PredSU->dump(this); 353 dbgs() << " has been released too many times!\n"; 354 llvm_unreachable(0); 355 } 356 #endif 357 --PredSU->NumSuccsLeft; 358 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 359 SchedImpl->releaseBottomNode(PredSU); 360 } 361 362 /// releasePredecessors - Call releasePred on each of SU's predecessors. 363 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 364 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 365 I != E; ++I) { 366 releasePred(SU, &*I); 367 } 368 } 369 370 void ScheduleDAGMI::moveInstruction(MachineInstr *MI, 371 MachineBasicBlock::iterator InsertPos) { 372 // Advance RegionBegin if the first instruction moves down. 373 if (&*RegionBegin == MI) 374 ++RegionBegin; 375 376 // Update the instruction stream. 377 BB->splice(InsertPos, BB, MI); 378 379 // Update LiveIntervals 380 LIS->handleMove(MI, /*UpdateFlags=*/true); 381 382 // Recede RegionBegin if an instruction moves above the first. 383 if (RegionBegin == InsertPos) 384 RegionBegin = MI; 385 } 386 387 bool ScheduleDAGMI::checkSchedLimit() { 388 #ifndef NDEBUG 389 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 390 CurrentTop = CurrentBottom; 391 return false; 392 } 393 ++NumInstrsScheduled; 394 #endif 395 return true; 396 } 397 398 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 399 /// crossing a scheduling boundary. [begin, end) includes all instructions in 400 /// the region, including the boundary itself and single-instruction regions 401 /// that don't get scheduled. 402 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 403 MachineBasicBlock::iterator begin, 404 MachineBasicBlock::iterator end, 405 unsigned endcount) 406 { 407 ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); 408 409 // For convenience remember the end of the liveness region. 410 LiveRegionEnd = 411 (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); 412 } 413 414 // Setup the register pressure trackers for the top scheduled top and bottom 415 // scheduled regions. 416 void ScheduleDAGMI::initRegPressure() { 417 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 418 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 419 420 // Close the RPTracker to finalize live ins. 421 RPTracker.closeRegion(); 422 423 DEBUG(RPTracker.getPressure().dump(TRI)); 424 425 // Initialize the live ins and live outs. 426 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 427 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 428 429 // Close one end of the tracker so we can call 430 // getMaxUpward/DownwardPressureDelta before advancing across any 431 // instructions. This converts currently live regs into live ins/outs. 432 TopRPTracker.closeTop(); 433 BotRPTracker.closeBottom(); 434 435 // Account for liveness generated by the region boundary. 436 if (LiveRegionEnd != RegionEnd) 437 BotRPTracker.recede(); 438 439 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 440 441 // Cache the list of excess pressure sets in this region. This will also track 442 // the max pressure in the scheduled code for these sets. 443 RegionCriticalPSets.clear(); 444 std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; 445 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 446 unsigned Limit = TRI->getRegPressureSetLimit(i); 447 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 448 << "Limit " << Limit 449 << " Actual " << RegionPressure[i] << "\n"); 450 if (RegionPressure[i] > Limit) 451 RegionCriticalPSets.push_back(PressureElement(i, 0)); 452 } 453 DEBUG(dbgs() << "Excess PSets: "; 454 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 455 dbgs() << TRI->getRegPressureSetName( 456 RegionCriticalPSets[i].PSetID) << " "; 457 dbgs() << "\n"); 458 } 459 460 // FIXME: When the pressure tracker deals in pressure differences then we won't 461 // iterate over all RegionCriticalPSets[i]. 462 void ScheduleDAGMI:: 463 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { 464 for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { 465 unsigned ID = RegionCriticalPSets[i].PSetID; 466 int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; 467 if ((int)NewMaxPressure[ID] > MaxUnits) 468 MaxUnits = NewMaxPressure[ID]; 469 } 470 } 471 472 /// schedule - Called back from MachineScheduler::runOnMachineFunction 473 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 474 /// only includes instructions that have DAG nodes, not scheduling boundaries. 475 /// 476 /// This is a skeletal driver, with all the functionality pushed into helpers, 477 /// so that it can be easilly extended by experimental schedulers. Generally, 478 /// implementing MachineSchedStrategy should be sufficient to implement a new 479 /// scheduling algorithm. However, if a scheduler further subclasses 480 /// ScheduleDAGMI then it will want to override this virtual method in order to 481 /// update any specialized state. 482 void ScheduleDAGMI::schedule() { 483 buildDAGWithRegPressure(); 484 485 postprocessDAG(); 486 487 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 488 SUnits[su].dumpAll(this)); 489 490 if (ViewMISchedDAGs) viewGraph(); 491 492 initQueues(); 493 494 bool IsTopNode = false; 495 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 496 assert(!SU->isScheduled && "Node already scheduled"); 497 if (!checkSchedLimit()) 498 break; 499 500 scheduleMI(SU, IsTopNode); 501 502 updateQueues(SU, IsTopNode); 503 } 504 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 505 506 placeDebugValues(); 507 508 DEBUG({ 509 unsigned BBNum = top()->getParent()->getNumber(); 510 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 511 dumpSchedule(); 512 dbgs() << '\n'; 513 }); 514 } 515 516 /// Build the DAG and setup three register pressure trackers. 517 void ScheduleDAGMI::buildDAGWithRegPressure() { 518 // Initialize the register pressure tracker used by buildSchedGraph. 519 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 520 521 // Account for liveness generate by the region boundary. 522 if (LiveRegionEnd != RegionEnd) 523 RPTracker.recede(); 524 525 // Build the DAG, and compute current register pressure. 526 buildSchedGraph(AA, &RPTracker); 527 if (ViewMISchedDAGs) viewGraph(); 528 529 // Initialize top/bottom trackers after computing region pressure. 530 initRegPressure(); 531 } 532 533 /// Apply each ScheduleDAGMutation step in order. 534 void ScheduleDAGMI::postprocessDAG() { 535 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 536 Mutations[i]->apply(this); 537 } 538 } 539 540 // Release all DAG roots for scheduling. 541 // 542 // Nodes with unreleased weak edges can still be roots. 543 void ScheduleDAGMI::releaseRoots() { 544 SmallVector<SUnit*, 16> BotRoots; 545 546 for (std::vector<SUnit>::iterator 547 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 548 SUnit *SU = &(*I); 549 // A SUnit is ready to top schedule if it has no predecessors. 550 if (!I->NumPredsLeft && SU != &EntrySU) 551 SchedImpl->releaseTopNode(SU); 552 // A SUnit is ready to bottom schedule if it has no successors. 553 if (!I->NumSuccsLeft && SU != &ExitSU) 554 BotRoots.push_back(SU); 555 } 556 // Release bottom roots in reverse order so the higher priority nodes appear 557 // first. This is more natural and slightly more efficient. 558 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 559 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) 560 SchedImpl->releaseBottomNode(*I); 561 } 562 563 /// Identify DAG roots and setup scheduler queues. 564 void ScheduleDAGMI::initQueues() { 565 566 // Initialize the strategy before modifying the DAG. 567 SchedImpl->initialize(this); 568 569 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 570 releaseRoots(); 571 572 releaseSuccessors(&EntrySU); 573 releasePredecessors(&ExitSU); 574 575 SchedImpl->registerRoots(); 576 577 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 578 CurrentBottom = RegionEnd; 579 } 580 581 /// Move an instruction and update register pressure. 582 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { 583 // Move the instruction to its new location in the instruction stream. 584 MachineInstr *MI = SU->getInstr(); 585 586 if (IsTopNode) { 587 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 588 if (&*CurrentTop == MI) 589 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 590 else { 591 moveInstruction(MI, CurrentTop); 592 TopRPTracker.setPos(MI); 593 } 594 595 // Update top scheduled pressure. 596 TopRPTracker.advance(); 597 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 598 updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); 599 } 600 else { 601 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 602 MachineBasicBlock::iterator priorII = 603 priorNonDebug(CurrentBottom, CurrentTop); 604 if (&*priorII == MI) 605 CurrentBottom = priorII; 606 else { 607 if (&*CurrentTop == MI) { 608 CurrentTop = nextIfDebug(++CurrentTop, priorII); 609 TopRPTracker.setPos(CurrentTop); 610 } 611 moveInstruction(MI, CurrentBottom); 612 CurrentBottom = MI; 613 } 614 // Update bottom scheduled pressure. 615 BotRPTracker.recede(); 616 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 617 updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); 618 } 619 } 620 621 /// Update scheduler queues after scheduling an instruction. 622 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 623 // Release dependent instructions for scheduling. 624 if (IsTopNode) 625 releaseSuccessors(SU); 626 else 627 releasePredecessors(SU); 628 629 SU->isScheduled = true; 630 631 // Notify the scheduling strategy after updating the DAG. 632 SchedImpl->schedNode(SU, IsTopNode); 633 } 634 635 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 636 void ScheduleDAGMI::placeDebugValues() { 637 // If first instruction was a DBG_VALUE then put it back. 638 if (FirstDbgValue) { 639 BB->splice(RegionBegin, BB, FirstDbgValue); 640 RegionBegin = FirstDbgValue; 641 } 642 643 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 644 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 645 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 646 MachineInstr *DbgValue = P.first; 647 MachineBasicBlock::iterator OrigPrevMI = P.second; 648 BB->splice(++OrigPrevMI, BB, DbgValue); 649 if (OrigPrevMI == llvm::prior(RegionEnd)) 650 RegionEnd = DbgValue; 651 } 652 DbgValues.clear(); 653 FirstDbgValue = NULL; 654 } 655 656 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 657 void ScheduleDAGMI::dumpSchedule() const { 658 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 659 if (SUnit *SU = getSUnit(&(*MI))) 660 SU->dump(this); 661 else 662 dbgs() << "Missing SUnit\n"; 663 } 664 } 665 #endif 666 667 //===----------------------------------------------------------------------===// 668 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. 669 //===----------------------------------------------------------------------===// 670 671 namespace { 672 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance 673 /// the schedule. 674 class ConvergingScheduler : public MachineSchedStrategy { 675 public: 676 /// Represent the type of SchedCandidate found within a single queue. 677 /// pickNodeBidirectional depends on these listed by decreasing priority. 678 enum CandReason { 679 NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand, 680 BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, 681 SingleMax, MultiPressure, NextDefUse, NodeOrder}; 682 683 #ifndef NDEBUG 684 static const char *getReasonStr(ConvergingScheduler::CandReason Reason); 685 #endif 686 687 /// Policy for scheduling the next instruction in the candidate's zone. 688 struct CandPolicy { 689 bool ReduceLatency; 690 unsigned ReduceResIdx; 691 unsigned DemandResIdx; 692 693 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 694 }; 695 696 /// Status of an instruction's critical resource consumption. 697 struct SchedResourceDelta { 698 // Count critical resources in the scheduled region required by SU. 699 unsigned CritResources; 700 701 // Count critical resources from another region consumed by SU. 702 unsigned DemandedResources; 703 704 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 705 706 bool operator==(const SchedResourceDelta &RHS) const { 707 return CritResources == RHS.CritResources 708 && DemandedResources == RHS.DemandedResources; 709 } 710 bool operator!=(const SchedResourceDelta &RHS) const { 711 return !operator==(RHS); 712 } 713 }; 714 715 /// Store the state used by ConvergingScheduler heuristics, required for the 716 /// lifetime of one invocation of pickNode(). 717 struct SchedCandidate { 718 CandPolicy Policy; 719 720 // The best SUnit candidate. 721 SUnit *SU; 722 723 // The reason for this candidate. 724 CandReason Reason; 725 726 // Register pressure values for the best candidate. 727 RegPressureDelta RPDelta; 728 729 // Critical resource consumption of the best candidate. 730 SchedResourceDelta ResDelta; 731 732 SchedCandidate(const CandPolicy &policy) 733 : Policy(policy), SU(NULL), Reason(NoCand) {} 734 735 bool isValid() const { return SU; } 736 737 // Copy the status of another candidate without changing policy. 738 void setBest(SchedCandidate &Best) { 739 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 740 SU = Best.SU; 741 Reason = Best.Reason; 742 RPDelta = Best.RPDelta; 743 ResDelta = Best.ResDelta; 744 } 745 746 void initResourceDelta(const ScheduleDAGMI *DAG, 747 const TargetSchedModel *SchedModel); 748 }; 749 750 /// Summarize the unscheduled region. 751 struct SchedRemainder { 752 // Critical path through the DAG in expected latency. 753 unsigned CriticalPath; 754 755 // Unscheduled resources 756 SmallVector<unsigned, 16> RemainingCounts; 757 // Critical resource for the unscheduled zone. 758 unsigned CritResIdx; 759 // Number of micro-ops left to schedule. 760 unsigned RemainingMicroOps; 761 // Is the unscheduled zone resource limited. 762 bool IsResourceLimited; 763 764 unsigned MaxRemainingCount; 765 766 void reset() { 767 CriticalPath = 0; 768 RemainingCounts.clear(); 769 CritResIdx = 0; 770 RemainingMicroOps = 0; 771 IsResourceLimited = false; 772 MaxRemainingCount = 0; 773 } 774 775 SchedRemainder() { reset(); } 776 777 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 778 }; 779 780 /// Each Scheduling boundary is associated with ready queues. It tracks the 781 /// current cycle in the direction of movement, and maintains the state 782 /// of "hazards" and other interlocks at the current cycle. 783 struct SchedBoundary { 784 ScheduleDAGMI *DAG; 785 const TargetSchedModel *SchedModel; 786 SchedRemainder *Rem; 787 788 ReadyQueue Available; 789 ReadyQueue Pending; 790 bool CheckPending; 791 792 // For heuristics, keep a list of the nodes that immediately depend on the 793 // most recently scheduled node. 794 SmallPtrSet<const SUnit*, 8> NextSUs; 795 796 ScheduleHazardRecognizer *HazardRec; 797 798 unsigned CurrCycle; 799 unsigned IssueCount; 800 801 /// MinReadyCycle - Cycle of the soonest available instruction. 802 unsigned MinReadyCycle; 803 804 // The expected latency of the critical path in this scheduled zone. 805 unsigned ExpectedLatency; 806 807 // Resources used in the scheduled zone beyond this boundary. 808 SmallVector<unsigned, 16> ResourceCounts; 809 810 // Cache the critical resources ID in this scheduled zone. 811 unsigned CritResIdx; 812 813 // Is the scheduled region resource limited vs. latency limited. 814 bool IsResourceLimited; 815 816 unsigned ExpectedCount; 817 818 // Policy flag: attempt to find ILP until expected latency is covered. 819 bool ShouldIncreaseILP; 820 821 #ifndef NDEBUG 822 // Remember the greatest min operand latency. 823 unsigned MaxMinLatency; 824 #endif 825 826 void reset() { 827 Available.clear(); 828 Pending.clear(); 829 CheckPending = false; 830 NextSUs.clear(); 831 HazardRec = 0; 832 CurrCycle = 0; 833 IssueCount = 0; 834 MinReadyCycle = UINT_MAX; 835 ExpectedLatency = 0; 836 ResourceCounts.resize(1); 837 assert(!ResourceCounts[0] && "nonzero count for bad resource"); 838 CritResIdx = 0; 839 IsResourceLimited = false; 840 ExpectedCount = 0; 841 ShouldIncreaseILP = false; 842 #ifndef NDEBUG 843 MaxMinLatency = 0; 844 #endif 845 // Reserve a zero-count for invalid CritResIdx. 846 ResourceCounts.resize(1); 847 } 848 849 /// Pending queues extend the ready queues with the same ID and the 850 /// PendingFlag set. 851 SchedBoundary(unsigned ID, const Twine &Name): 852 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), 853 Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { 854 reset(); 855 } 856 857 ~SchedBoundary() { delete HazardRec; } 858 859 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 860 SchedRemainder *rem); 861 862 bool isTop() const { 863 return Available.getID() == ConvergingScheduler::TopQID; 864 } 865 866 unsigned getUnscheduledLatency(SUnit *SU) const { 867 if (isTop()) 868 return SU->getHeight(); 869 return SU->getDepth(); 870 } 871 872 unsigned getCriticalCount() const { 873 return ResourceCounts[CritResIdx]; 874 } 875 876 bool checkHazard(SUnit *SU); 877 878 void checkILPPolicy(); 879 880 void releaseNode(SUnit *SU, unsigned ReadyCycle); 881 882 void bumpCycle(); 883 884 void countResource(unsigned PIdx, unsigned Cycles); 885 886 void bumpNode(SUnit *SU); 887 888 void releasePending(); 889 890 void removeReady(SUnit *SU); 891 892 SUnit *pickOnlyChoice(); 893 }; 894 895 private: 896 ScheduleDAGMI *DAG; 897 const TargetSchedModel *SchedModel; 898 const TargetRegisterInfo *TRI; 899 900 // State of the top and bottom scheduled instruction boundaries. 901 SchedRemainder Rem; 902 SchedBoundary Top; 903 SchedBoundary Bot; 904 905 public: 906 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 907 enum { 908 TopQID = 1, 909 BotQID = 2, 910 LogMaxQID = 2 911 }; 912 913 ConvergingScheduler(): 914 DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} 915 916 virtual void initialize(ScheduleDAGMI *dag); 917 918 virtual SUnit *pickNode(bool &IsTopNode); 919 920 virtual void schedNode(SUnit *SU, bool IsTopNode); 921 922 virtual void releaseTopNode(SUnit *SU); 923 924 virtual void releaseBottomNode(SUnit *SU); 925 926 virtual void registerRoots(); 927 928 protected: 929 void balanceZones( 930 ConvergingScheduler::SchedBoundary &CriticalZone, 931 ConvergingScheduler::SchedCandidate &CriticalCand, 932 ConvergingScheduler::SchedBoundary &OppositeZone, 933 ConvergingScheduler::SchedCandidate &OppositeCand); 934 935 void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, 936 ConvergingScheduler::SchedCandidate &BotCand); 937 938 void tryCandidate(SchedCandidate &Cand, 939 SchedCandidate &TryCand, 940 SchedBoundary &Zone, 941 const RegPressureTracker &RPTracker, 942 RegPressureTracker &TempTracker); 943 944 SUnit *pickNodeBidirectional(bool &IsTopNode); 945 946 void pickNodeFromQueue(SchedBoundary &Zone, 947 const RegPressureTracker &RPTracker, 948 SchedCandidate &Candidate); 949 950 #ifndef NDEBUG 951 void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); 952 #endif 953 }; 954 } // namespace 955 956 void ConvergingScheduler::SchedRemainder:: 957 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 958 reset(); 959 if (!SchedModel->hasInstrSchedModel()) 960 return; 961 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 962 for (std::vector<SUnit>::iterator 963 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 964 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 965 RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); 966 for (TargetSchedModel::ProcResIter 967 PI = SchedModel->getWriteProcResBegin(SC), 968 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 969 unsigned PIdx = PI->ProcResourceIdx; 970 unsigned Factor = SchedModel->getResourceFactor(PIdx); 971 RemainingCounts[PIdx] += (Factor * PI->Cycles); 972 } 973 } 974 } 975 976 void ConvergingScheduler::SchedBoundary:: 977 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 978 reset(); 979 DAG = dag; 980 SchedModel = smodel; 981 Rem = rem; 982 if (SchedModel->hasInstrSchedModel()) 983 ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); 984 } 985 986 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { 987 DAG = dag; 988 SchedModel = DAG->getSchedModel(); 989 TRI = DAG->TRI; 990 Rem.init(DAG, SchedModel); 991 Top.init(DAG, SchedModel, &Rem); 992 Bot.init(DAG, SchedModel, &Rem); 993 994 // Initialize resource counts. 995 996 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 997 // are disabled, then these HazardRecs will be disabled. 998 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 999 const TargetMachine &TM = DAG->MF.getTarget(); 1000 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1001 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1002 1003 assert((!ForceTopDown || !ForceBottomUp) && 1004 "-misched-topdown incompatible with -misched-bottomup"); 1005 } 1006 1007 void ConvergingScheduler::releaseTopNode(SUnit *SU) { 1008 if (SU->isScheduled) 1009 return; 1010 1011 for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1012 I != E; ++I) { 1013 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1014 unsigned MinLatency = I->getMinLatency(); 1015 #ifndef NDEBUG 1016 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 1017 #endif 1018 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 1019 SU->TopReadyCycle = PredReadyCycle + MinLatency; 1020 } 1021 Top.releaseNode(SU, SU->TopReadyCycle); 1022 } 1023 1024 void ConvergingScheduler::releaseBottomNode(SUnit *SU) { 1025 if (SU->isScheduled) 1026 return; 1027 1028 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1029 1030 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1031 I != E; ++I) { 1032 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1033 unsigned MinLatency = I->getMinLatency(); 1034 #ifndef NDEBUG 1035 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 1036 #endif 1037 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 1038 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 1039 } 1040 Bot.releaseNode(SU, SU->BotReadyCycle); 1041 } 1042 1043 void ConvergingScheduler::registerRoots() { 1044 Rem.CriticalPath = DAG->ExitSU.getDepth(); 1045 // Some roots may not feed into ExitSU. Check all of them in case. 1046 for (std::vector<SUnit*>::const_iterator 1047 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 1048 if ((*I)->getDepth() > Rem.CriticalPath) 1049 Rem.CriticalPath = (*I)->getDepth(); 1050 } 1051 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 1052 } 1053 1054 /// Does this SU have a hazard within the current instruction group. 1055 /// 1056 /// The scheduler supports two modes of hazard recognition. The first is the 1057 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1058 /// supports highly complicated in-order reservation tables 1059 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1060 /// 1061 /// The second is a streamlined mechanism that checks for hazards based on 1062 /// simple counters that the scheduler itself maintains. It explicitly checks 1063 /// for instruction dispatch limitations, including the number of micro-ops that 1064 /// can dispatch per cycle. 1065 /// 1066 /// TODO: Also check whether the SU must start a new group. 1067 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1068 if (HazardRec->isEnabled()) 1069 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1070 1071 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1072 if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { 1073 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1074 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1075 return true; 1076 } 1077 return false; 1078 } 1079 1080 /// If expected latency is covered, disable ILP policy. 1081 void ConvergingScheduler::SchedBoundary::checkILPPolicy() { 1082 if (ShouldIncreaseILP 1083 && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { 1084 ShouldIncreaseILP = false; 1085 DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); 1086 } 1087 } 1088 1089 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, 1090 unsigned ReadyCycle) { 1091 1092 if (ReadyCycle < MinReadyCycle) 1093 MinReadyCycle = ReadyCycle; 1094 1095 // Check for interlocks first. For the purpose of other heuristics, an 1096 // instruction that cannot issue appears as if it's not in the ReadyQueue. 1097 if (ReadyCycle > CurrCycle || checkHazard(SU)) 1098 Pending.push(SU); 1099 else 1100 Available.push(SU); 1101 1102 // Record this node as an immediate dependent of the scheduled node. 1103 NextSUs.insert(SU); 1104 1105 // If CriticalPath has been computed, then check if the unscheduled nodes 1106 // exceed the ILP window. Before registerRoots, CriticalPath==0. 1107 if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) 1108 > Rem->CriticalPath + ILPWindow)) { 1109 ShouldIncreaseILP = true; 1110 DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " 1111 << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); 1112 } 1113 } 1114 1115 /// Move the boundary of scheduled code by one cycle. 1116 void ConvergingScheduler::SchedBoundary::bumpCycle() { 1117 unsigned Width = SchedModel->getIssueWidth(); 1118 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 1119 1120 unsigned NextCycle = CurrCycle + 1; 1121 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 1122 if (MinReadyCycle > NextCycle) { 1123 IssueCount = 0; 1124 NextCycle = MinReadyCycle; 1125 } 1126 1127 if (!HazardRec->isEnabled()) { 1128 // Bypass HazardRec virtual calls. 1129 CurrCycle = NextCycle; 1130 } 1131 else { 1132 // Bypass getHazardType calls in case of long latency. 1133 for (; CurrCycle != NextCycle; ++CurrCycle) { 1134 if (isTop()) 1135 HazardRec->AdvanceCycle(); 1136 else 1137 HazardRec->RecedeCycle(); 1138 } 1139 } 1140 CheckPending = true; 1141 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1142 1143 DEBUG(dbgs() << " *** " << Available.getName() << " cycle " 1144 << CurrCycle << '\n'); 1145 } 1146 1147 /// Add the given processor resource to this scheduled zone. 1148 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, 1149 unsigned Cycles) { 1150 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1151 DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name 1152 << " +(" << Cycles << "x" << Factor 1153 << ") / " << SchedModel->getLatencyFactor() << '\n'); 1154 1155 unsigned Count = Factor * Cycles; 1156 ResourceCounts[PIdx] += Count; 1157 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 1158 Rem->RemainingCounts[PIdx] -= Count; 1159 1160 // Reset MaxRemainingCount for sanity. 1161 Rem->MaxRemainingCount = 0; 1162 1163 // Check if this resource exceeds the current critical resource by a full 1164 // cycle. If so, it becomes the critical resource. 1165 if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) 1166 >= (int)SchedModel->getLatencyFactor()) { 1167 CritResIdx = PIdx; 1168 DEBUG(dbgs() << " *** Critical resource " 1169 << SchedModel->getProcResource(PIdx)->Name << " x" 1170 << ResourceCounts[PIdx] << '\n'); 1171 } 1172 } 1173 1174 /// Move the boundary of scheduled code by one SUnit. 1175 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { 1176 // Update the reservation table. 1177 if (HazardRec->isEnabled()) { 1178 if (!isTop() && SU->isCall) { 1179 // Calls are scheduled with their preceding instructions. For bottom-up 1180 // scheduling, clear the pipeline state before emitting. 1181 HazardRec->Reset(); 1182 } 1183 HazardRec->EmitInstruction(SU); 1184 } 1185 // Update resource counts and critical resource. 1186 if (SchedModel->hasInstrSchedModel()) { 1187 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1188 Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); 1189 for (TargetSchedModel::ProcResIter 1190 PI = SchedModel->getWriteProcResBegin(SC), 1191 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1192 countResource(PI->ProcResourceIdx, PI->Cycles); 1193 } 1194 } 1195 if (isTop()) { 1196 if (SU->getDepth() > ExpectedLatency) 1197 ExpectedLatency = SU->getDepth(); 1198 } 1199 else { 1200 if (SU->getHeight() > ExpectedLatency) 1201 ExpectedLatency = SU->getHeight(); 1202 } 1203 1204 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1205 1206 // Check the instruction group dispatch limit. 1207 // TODO: Check if this SU must end a dispatch group. 1208 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 1209 1210 // checkHazard prevents scheduling multiple instructions per cycle that exceed 1211 // issue width. However, we commonly reach the maximum. In this case 1212 // opportunistically bump the cycle to avoid uselessly checking everything in 1213 // the readyQ. Furthermore, a single instruction may produce more than one 1214 // cycle's worth of micro-ops. 1215 if (IssueCount >= SchedModel->getIssueWidth()) { 1216 DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); 1217 bumpCycle(); 1218 } 1219 } 1220 1221 /// Release pending ready nodes in to the available queue. This makes them 1222 /// visible to heuristics. 1223 void ConvergingScheduler::SchedBoundary::releasePending() { 1224 // If the available queue is empty, it is safe to reset MinReadyCycle. 1225 if (Available.empty()) 1226 MinReadyCycle = UINT_MAX; 1227 1228 // Check to see if any of the pending instructions are ready to issue. If 1229 // so, add them to the available queue. 1230 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 1231 SUnit *SU = *(Pending.begin()+i); 1232 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 1233 1234 if (ReadyCycle < MinReadyCycle) 1235 MinReadyCycle = ReadyCycle; 1236 1237 if (ReadyCycle > CurrCycle) 1238 continue; 1239 1240 if (checkHazard(SU)) 1241 continue; 1242 1243 Available.push(SU); 1244 Pending.remove(Pending.begin()+i); 1245 --i; --e; 1246 } 1247 DEBUG(if (!Pending.empty()) Pending.dump()); 1248 CheckPending = false; 1249 } 1250 1251 /// Remove SU from the ready set for this boundary. 1252 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { 1253 if (Available.isInQueue(SU)) 1254 Available.remove(Available.find(SU)); 1255 else { 1256 assert(Pending.isInQueue(SU) && "bad ready count"); 1257 Pending.remove(Pending.find(SU)); 1258 } 1259 } 1260 1261 /// If this queue only has one ready candidate, return it. As a side effect, 1262 /// defer any nodes that now hit a hazard, and advance the cycle until at least 1263 /// one node is ready. If multiple instructions are ready, return NULL. 1264 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { 1265 if (CheckPending) 1266 releasePending(); 1267 1268 if (IssueCount > 0) { 1269 // Defer any ready instrs that now have a hazard. 1270 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 1271 if (checkHazard(*I)) { 1272 Pending.push(*I); 1273 I = Available.remove(I); 1274 continue; 1275 } 1276 ++I; 1277 } 1278 } 1279 for (unsigned i = 0; Available.empty(); ++i) { 1280 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 1281 "permanent hazard"); (void)i; 1282 bumpCycle(); 1283 releasePending(); 1284 } 1285 if (Available.size() == 1) 1286 return *Available.begin(); 1287 return NULL; 1288 } 1289 1290 /// Record the candidate policy for opposite zones with different critical 1291 /// resources. 1292 /// 1293 /// If the CriticalZone is latency limited, don't force a policy for the 1294 /// candidates here. Instead, When releasing each candidate, releaseNode 1295 /// compares the region's critical path to the candidate's height or depth and 1296 /// the scheduled zone's expected latency then sets ShouldIncreaseILP. 1297 void ConvergingScheduler::balanceZones( 1298 ConvergingScheduler::SchedBoundary &CriticalZone, 1299 ConvergingScheduler::SchedCandidate &CriticalCand, 1300 ConvergingScheduler::SchedBoundary &OppositeZone, 1301 ConvergingScheduler::SchedCandidate &OppositeCand) { 1302 1303 if (!CriticalZone.IsResourceLimited) 1304 return; 1305 1306 SchedRemainder *Rem = CriticalZone.Rem; 1307 1308 // If the critical zone is overconsuming a resource relative to the 1309 // remainder, try to reduce it. 1310 unsigned RemainingCritCount = 1311 Rem->RemainingCounts[CriticalZone.CritResIdx]; 1312 if ((int)(Rem->MaxRemainingCount - RemainingCritCount) 1313 > (int)SchedModel->getLatencyFactor()) { 1314 CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; 1315 DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " 1316 << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name 1317 << '\n'); 1318 } 1319 // If the other zone is underconsuming a resource relative to the full zone, 1320 // try to increase it. 1321 unsigned OppositeCount = 1322 OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; 1323 if ((int)(OppositeZone.ExpectedCount - OppositeCount) 1324 > (int)SchedModel->getLatencyFactor()) { 1325 OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; 1326 DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " 1327 << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name 1328 << '\n'); 1329 } 1330 } 1331 1332 /// Determine if the scheduled zones exceed resource limits or critical path and 1333 /// set each candidate's ReduceHeight policy accordingly. 1334 void ConvergingScheduler::checkResourceLimits( 1335 ConvergingScheduler::SchedCandidate &TopCand, 1336 ConvergingScheduler::SchedCandidate &BotCand) { 1337 1338 Bot.checkILPPolicy(); 1339 Top.checkILPPolicy(); 1340 if (Bot.ShouldIncreaseILP) 1341 BotCand.Policy.ReduceLatency = true; 1342 if (Top.ShouldIncreaseILP) 1343 TopCand.Policy.ReduceLatency = true; 1344 1345 // Handle resource-limited regions. 1346 if (Top.IsResourceLimited && Bot.IsResourceLimited 1347 && Top.CritResIdx == Bot.CritResIdx) { 1348 // If the scheduled critical resource in both zones is no longer the 1349 // critical remaining resource, attempt to reduce resource height both ways. 1350 if (Top.CritResIdx != Rem.CritResIdx) { 1351 TopCand.Policy.ReduceResIdx = Top.CritResIdx; 1352 BotCand.Policy.ReduceResIdx = Bot.CritResIdx; 1353 DEBUG(dbgs() << "Reduce scheduled " 1354 << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); 1355 } 1356 return; 1357 } 1358 // Handle latency-limited regions. 1359 if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { 1360 // If the total scheduled expected latency exceeds the region's critical 1361 // path then reduce latency both ways. 1362 // 1363 // Just because a zone is not resource limited does not mean it is latency 1364 // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle 1365 // to exceed expected latency. 1366 if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) 1367 && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { 1368 TopCand.Policy.ReduceLatency = true; 1369 BotCand.Policy.ReduceLatency = true; 1370 DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency 1371 << " + " << Bot.ExpectedLatency << '\n'); 1372 } 1373 return; 1374 } 1375 // The critical resource is different in each zone, so request balancing. 1376 1377 // Compute the cost of each zone. 1378 Rem.MaxRemainingCount = std::max( 1379 Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), 1380 Rem.RemainingCounts[Rem.CritResIdx]); 1381 Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); 1382 Top.ExpectedCount = std::max( 1383 Top.getCriticalCount(), 1384 Top.ExpectedCount * SchedModel->getLatencyFactor()); 1385 Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); 1386 Bot.ExpectedCount = std::max( 1387 Bot.getCriticalCount(), 1388 Bot.ExpectedCount * SchedModel->getLatencyFactor()); 1389 1390 balanceZones(Top, TopCand, Bot, BotCand); 1391 balanceZones(Bot, BotCand, Top, TopCand); 1392 } 1393 1394 void ConvergingScheduler::SchedCandidate:: 1395 initResourceDelta(const ScheduleDAGMI *DAG, 1396 const TargetSchedModel *SchedModel) { 1397 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 1398 return; 1399 1400 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1401 for (TargetSchedModel::ProcResIter 1402 PI = SchedModel->getWriteProcResBegin(SC), 1403 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1404 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 1405 ResDelta.CritResources += PI->Cycles; 1406 if (PI->ProcResourceIdx == Policy.DemandResIdx) 1407 ResDelta.DemandedResources += PI->Cycles; 1408 } 1409 } 1410 1411 /// Return true if this heuristic determines order. 1412 static bool tryLess(unsigned TryVal, unsigned CandVal, 1413 ConvergingScheduler::SchedCandidate &TryCand, 1414 ConvergingScheduler::SchedCandidate &Cand, 1415 ConvergingScheduler::CandReason Reason) { 1416 if (TryVal < CandVal) { 1417 TryCand.Reason = Reason; 1418 return true; 1419 } 1420 if (TryVal > CandVal) { 1421 if (Cand.Reason > Reason) 1422 Cand.Reason = Reason; 1423 return true; 1424 } 1425 return false; 1426 } 1427 static bool tryGreater(unsigned TryVal, unsigned CandVal, 1428 ConvergingScheduler::SchedCandidate &TryCand, 1429 ConvergingScheduler::SchedCandidate &Cand, 1430 ConvergingScheduler::CandReason Reason) { 1431 if (TryVal > CandVal) { 1432 TryCand.Reason = Reason; 1433 return true; 1434 } 1435 if (TryVal < CandVal) { 1436 if (Cand.Reason > Reason) 1437 Cand.Reason = Reason; 1438 return true; 1439 } 1440 return false; 1441 } 1442 1443 /// Apply a set of heursitics to a new candidate. Heuristics are currently 1444 /// hierarchical. This may be more efficient than a graduated cost model because 1445 /// we don't need to evaluate all aspects of the model for each node in the 1446 /// queue. But it's really done to make the heuristics easier to debug and 1447 /// statistically analyze. 1448 /// 1449 /// \param Cand provides the policy and current best candidate. 1450 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 1451 /// \param Zone describes the scheduled zone that we are extending. 1452 /// \param RPTracker describes reg pressure within the scheduled zone. 1453 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 1454 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, 1455 SchedCandidate &TryCand, 1456 SchedBoundary &Zone, 1457 const RegPressureTracker &RPTracker, 1458 RegPressureTracker &TempTracker) { 1459 1460 // Always initialize TryCand's RPDelta. 1461 TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, 1462 DAG->getRegionCriticalPSets(), 1463 DAG->getRegPressure().MaxSetPressure); 1464 1465 // Initialize the candidate if needed. 1466 if (!Cand.isValid()) { 1467 TryCand.Reason = NodeOrder; 1468 return; 1469 } 1470 // Avoid exceeding the target's limit. 1471 if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, 1472 Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) 1473 return; 1474 if (Cand.Reason == SingleExcess) 1475 Cand.Reason = MultiPressure; 1476 1477 // Avoid increasing the max critical pressure in the scheduled region. 1478 if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, 1479 Cand.RPDelta.CriticalMax.UnitIncrease, 1480 TryCand, Cand, SingleCritical)) 1481 return; 1482 if (Cand.Reason == SingleCritical) 1483 Cand.Reason = MultiPressure; 1484 1485 // Avoid critical resource consumption and balance the schedule. 1486 TryCand.initResourceDelta(DAG, SchedModel); 1487 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 1488 TryCand, Cand, ResourceReduce)) 1489 return; 1490 if (tryGreater(TryCand.ResDelta.DemandedResources, 1491 Cand.ResDelta.DemandedResources, 1492 TryCand, Cand, ResourceDemand)) 1493 return; 1494 1495 // Avoid serializing long latency dependence chains. 1496 if (Cand.Policy.ReduceLatency) { 1497 if (Zone.isTop()) { 1498 if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() 1499 > Zone.ExpectedCount) { 1500 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1501 TryCand, Cand, TopDepthReduce)) 1502 return; 1503 } 1504 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1505 TryCand, Cand, TopPathReduce)) 1506 return; 1507 } 1508 else { 1509 if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() 1510 > Zone.ExpectedCount) { 1511 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1512 TryCand, Cand, BotHeightReduce)) 1513 return; 1514 } 1515 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1516 TryCand, Cand, BotPathReduce)) 1517 return; 1518 } 1519 } 1520 1521 // Avoid increasing the max pressure of the entire region. 1522 if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, 1523 Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) 1524 return; 1525 if (Cand.Reason == SingleMax) 1526 Cand.Reason = MultiPressure; 1527 1528 // Prefer immediate defs/users of the last scheduled instruction. This is a 1529 // nice pressure avoidance strategy that also conserves the processor's 1530 // register renaming resources and keeps the machine code readable. 1531 if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) { 1532 TryCand.Reason = NextDefUse; 1533 return; 1534 } 1535 if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) { 1536 if (Cand.Reason > NextDefUse) 1537 Cand.Reason = NextDefUse; 1538 return; 1539 } 1540 // Fall through to original instruction order. 1541 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 1542 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 1543 TryCand.Reason = NodeOrder; 1544 } 1545 } 1546 1547 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is 1548 /// more desirable than RHS from scheduling standpoint. 1549 static bool compareRPDelta(const RegPressureDelta &LHS, 1550 const RegPressureDelta &RHS) { 1551 // Compare each component of pressure in decreasing order of importance 1552 // without checking if any are valid. Invalid PressureElements are assumed to 1553 // have UnitIncrease==0, so are neutral. 1554 1555 // Avoid increasing the max critical pressure in the scheduled region. 1556 if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { 1557 DEBUG(dbgs() << "RP excess top - bot: " 1558 << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); 1559 return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; 1560 } 1561 // Avoid increasing the max critical pressure in the scheduled region. 1562 if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { 1563 DEBUG(dbgs() << "RP critical top - bot: " 1564 << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) 1565 << '\n'); 1566 return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; 1567 } 1568 // Avoid increasing the max pressure of the entire region. 1569 if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { 1570 DEBUG(dbgs() << "RP current top - bot: " 1571 << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) 1572 << '\n'); 1573 return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; 1574 } 1575 return false; 1576 } 1577 1578 #ifndef NDEBUG 1579 const char *ConvergingScheduler::getReasonStr( 1580 ConvergingScheduler::CandReason Reason) { 1581 switch (Reason) { 1582 case NoCand: return "NOCAND "; 1583 case SingleExcess: return "REG-EXCESS"; 1584 case SingleCritical: return "REG-CRIT "; 1585 case SingleMax: return "REG-MAX "; 1586 case MultiPressure: return "REG-MULTI "; 1587 case ResourceReduce: return "RES-REDUCE"; 1588 case ResourceDemand: return "RES-DEMAND"; 1589 case TopDepthReduce: return "TOP-DEPTH "; 1590 case TopPathReduce: return "TOP-PATH "; 1591 case BotHeightReduce:return "BOT-HEIGHT"; 1592 case BotPathReduce: return "BOT-PATH "; 1593 case NextDefUse: return "DEF-USE "; 1594 case NodeOrder: return "ORDER "; 1595 }; 1596 llvm_unreachable("Unknown reason!"); 1597 } 1598 1599 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, 1600 const SchedBoundary &Zone) { 1601 const char *Label = getReasonStr(Cand.Reason); 1602 PressureElement P; 1603 unsigned ResIdx = 0; 1604 unsigned Latency = 0; 1605 switch (Cand.Reason) { 1606 default: 1607 break; 1608 case SingleExcess: 1609 P = Cand.RPDelta.Excess; 1610 break; 1611 case SingleCritical: 1612 P = Cand.RPDelta.CriticalMax; 1613 break; 1614 case SingleMax: 1615 P = Cand.RPDelta.CurrentMax; 1616 break; 1617 case ResourceReduce: 1618 ResIdx = Cand.Policy.ReduceResIdx; 1619 break; 1620 case ResourceDemand: 1621 ResIdx = Cand.Policy.DemandResIdx; 1622 break; 1623 case TopDepthReduce: 1624 Latency = Cand.SU->getDepth(); 1625 break; 1626 case TopPathReduce: 1627 Latency = Cand.SU->getHeight(); 1628 break; 1629 case BotHeightReduce: 1630 Latency = Cand.SU->getHeight(); 1631 break; 1632 case BotPathReduce: 1633 Latency = Cand.SU->getDepth(); 1634 break; 1635 } 1636 dbgs() << Label << " " << Zone.Available.getName() << " "; 1637 if (P.isValid()) 1638 dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease 1639 << " "; 1640 else 1641 dbgs() << " "; 1642 if (ResIdx) 1643 dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; 1644 else 1645 dbgs() << " "; 1646 if (Latency) 1647 dbgs() << Latency << " cycles "; 1648 else 1649 dbgs() << " "; 1650 Cand.SU->dump(DAG); 1651 } 1652 #endif 1653 1654 /// Pick the best candidate from the top queue. 1655 /// 1656 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 1657 /// DAG building. To adjust for the current scheduling location we need to 1658 /// maintain the number of vreg uses remaining to be top-scheduled. 1659 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, 1660 const RegPressureTracker &RPTracker, 1661 SchedCandidate &Cand) { 1662 ReadyQueue &Q = Zone.Available; 1663 1664 DEBUG(Q.dump()); 1665 1666 // getMaxPressureDelta temporarily modifies the tracker. 1667 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1668 1669 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 1670 1671 SchedCandidate TryCand(Cand.Policy); 1672 TryCand.SU = *I; 1673 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 1674 if (TryCand.Reason != NoCand) { 1675 // Initialize resource delta if needed in case future heuristics query it. 1676 if (TryCand.ResDelta == SchedResourceDelta()) 1677 TryCand.initResourceDelta(DAG, SchedModel); 1678 Cand.setBest(TryCand); 1679 DEBUG(traceCandidate(Cand, Zone)); 1680 } 1681 TryCand.SU = *I; 1682 } 1683 } 1684 1685 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, 1686 bool IsTop) { 1687 DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") 1688 << " SU(" << Cand.SU->NodeNum << ") " 1689 << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); 1690 } 1691 1692 /// Pick the best candidate node from either the top or bottom queue. 1693 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { 1694 // Schedule as far as possible in the direction of no choice. This is most 1695 // efficient, but also provides the best heuristics for CriticalPSets. 1696 if (SUnit *SU = Bot.pickOnlyChoice()) { 1697 IsTopNode = false; 1698 return SU; 1699 } 1700 if (SUnit *SU = Top.pickOnlyChoice()) { 1701 IsTopNode = true; 1702 return SU; 1703 } 1704 CandPolicy NoPolicy; 1705 SchedCandidate BotCand(NoPolicy); 1706 SchedCandidate TopCand(NoPolicy); 1707 checkResourceLimits(TopCand, BotCand); 1708 1709 // Prefer bottom scheduling when heuristics are silent. 1710 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1711 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1712 1713 // If either Q has a single candidate that provides the least increase in 1714 // Excess pressure, we can immediately schedule from that Q. 1715 // 1716 // RegionCriticalPSets summarizes the pressure within the scheduled region and 1717 // affects picking from either Q. If scheduling in one direction must 1718 // increase pressure for one of the excess PSets, then schedule in that 1719 // direction first to provide more freedom in the other direction. 1720 if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { 1721 IsTopNode = false; 1722 tracePick(BotCand, IsTopNode); 1723 return BotCand.SU; 1724 } 1725 // Check if the top Q has a better candidate. 1726 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1727 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1728 1729 // If either Q has a single candidate that minimizes pressure above the 1730 // original region's pressure pick it. 1731 if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { 1732 if (TopCand.Reason < BotCand.Reason) { 1733 IsTopNode = true; 1734 tracePick(TopCand, IsTopNode); 1735 return TopCand.SU; 1736 } 1737 IsTopNode = false; 1738 tracePick(BotCand, IsTopNode); 1739 return BotCand.SU; 1740 } 1741 // Check for a salient pressure difference and pick the best from either side. 1742 if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { 1743 IsTopNode = true; 1744 tracePick(TopCand, IsTopNode); 1745 return TopCand.SU; 1746 } 1747 // Otherwise prefer the bottom candidate, in node order if all else failed. 1748 if (TopCand.Reason < BotCand.Reason) { 1749 IsTopNode = true; 1750 tracePick(TopCand, IsTopNode); 1751 return TopCand.SU; 1752 } 1753 IsTopNode = false; 1754 tracePick(BotCand, IsTopNode); 1755 return BotCand.SU; 1756 } 1757 1758 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 1759 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { 1760 if (DAG->top() == DAG->bottom()) { 1761 assert(Top.Available.empty() && Top.Pending.empty() && 1762 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 1763 return NULL; 1764 } 1765 SUnit *SU; 1766 do { 1767 if (ForceTopDown) { 1768 SU = Top.pickOnlyChoice(); 1769 if (!SU) { 1770 CandPolicy NoPolicy; 1771 SchedCandidate TopCand(NoPolicy); 1772 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1773 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1774 SU = TopCand.SU; 1775 } 1776 IsTopNode = true; 1777 } 1778 else if (ForceBottomUp) { 1779 SU = Bot.pickOnlyChoice(); 1780 if (!SU) { 1781 CandPolicy NoPolicy; 1782 SchedCandidate BotCand(NoPolicy); 1783 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1784 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1785 SU = BotCand.SU; 1786 } 1787 IsTopNode = false; 1788 } 1789 else { 1790 SU = pickNodeBidirectional(IsTopNode); 1791 } 1792 } while (SU->isScheduled); 1793 1794 if (SU->isTopReady()) 1795 Top.removeReady(SU); 1796 if (SU->isBottomReady()) 1797 Bot.removeReady(SU); 1798 1799 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 1800 << " Scheduling Instruction in cycle " 1801 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; 1802 SU->dump(DAG)); 1803 return SU; 1804 } 1805 1806 /// Update the scheduler's state after scheduling a node. This is the same node 1807 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update 1808 /// it's state based on the current cycle before MachineSchedStrategy does. 1809 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { 1810 if (IsTopNode) { 1811 SU->TopReadyCycle = Top.CurrCycle; 1812 Top.bumpNode(SU); 1813 } 1814 else { 1815 SU->BotReadyCycle = Bot.CurrCycle; 1816 Bot.bumpNode(SU); 1817 } 1818 } 1819 1820 /// Create the standard converging machine scheduler. This will be used as the 1821 /// default scheduler if the target does not set a default. 1822 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { 1823 assert((!ForceTopDown || !ForceBottomUp) && 1824 "-misched-topdown incompatible with -misched-bottomup"); 1825 return new ScheduleDAGMI(C, new ConvergingScheduler()); 1826 } 1827 static MachineSchedRegistry 1828 ConvergingSchedRegistry("converge", "Standard converging scheduler.", 1829 createConvergingSched); 1830 1831 //===----------------------------------------------------------------------===// 1832 // ILP Scheduler. Currently for experimental analysis of heuristics. 1833 //===----------------------------------------------------------------------===// 1834 1835 namespace { 1836 /// \brief Order nodes by the ILP metric. 1837 struct ILPOrder { 1838 ScheduleDAGILP *ILP; 1839 bool MaximizeILP; 1840 1841 ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} 1842 1843 /// \brief Apply a less-than relation on node priority. 1844 bool operator()(const SUnit *A, const SUnit *B) const { 1845 // Return true if A comes after B in the Q. 1846 if (MaximizeILP) 1847 return ILP->getILP(A) < ILP->getILP(B); 1848 else 1849 return ILP->getILP(A) > ILP->getILP(B); 1850 } 1851 }; 1852 1853 /// \brief Schedule based on the ILP metric. 1854 class ILPScheduler : public MachineSchedStrategy { 1855 ScheduleDAGILP ILP; 1856 ILPOrder Cmp; 1857 1858 std::vector<SUnit*> ReadyQ; 1859 public: 1860 ILPScheduler(bool MaximizeILP) 1861 : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} 1862 1863 virtual void initialize(ScheduleDAGMI *DAG) { 1864 ReadyQ.clear(); 1865 ILP.resize(DAG->SUnits.size()); 1866 } 1867 1868 virtual void registerRoots() { 1869 for (std::vector<SUnit*>::const_iterator 1870 I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { 1871 ILP.computeILP(*I); 1872 } 1873 } 1874 1875 /// Implement MachineSchedStrategy interface. 1876 /// ----------------------------------------- 1877 1878 virtual SUnit *pickNode(bool &IsTopNode) { 1879 if (ReadyQ.empty()) return NULL; 1880 pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 1881 SUnit *SU = ReadyQ.back(); 1882 ReadyQ.pop_back(); 1883 IsTopNode = false; 1884 DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() 1885 << " ILP: " << ILP.getILP(SU) << '\n'); 1886 return SU; 1887 } 1888 1889 virtual void schedNode(SUnit *, bool) {} 1890 1891 virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } 1892 1893 virtual void releaseBottomNode(SUnit *SU) { 1894 ReadyQ.push_back(SU); 1895 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 1896 } 1897 }; 1898 } // namespace 1899 1900 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 1901 return new ScheduleDAGMI(C, new ILPScheduler(true)); 1902 } 1903 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 1904 return new ScheduleDAGMI(C, new ILPScheduler(false)); 1905 } 1906 static MachineSchedRegistry ILPMaxRegistry( 1907 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 1908 static MachineSchedRegistry ILPMinRegistry( 1909 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 1910 1911 //===----------------------------------------------------------------------===// 1912 // Machine Instruction Shuffler for Correctness Testing 1913 //===----------------------------------------------------------------------===// 1914 1915 #ifndef NDEBUG 1916 namespace { 1917 /// Apply a less-than relation on the node order, which corresponds to the 1918 /// instruction order prior to scheduling. IsReverse implements greater-than. 1919 template<bool IsReverse> 1920 struct SUnitOrder { 1921 bool operator()(SUnit *A, SUnit *B) const { 1922 if (IsReverse) 1923 return A->NodeNum > B->NodeNum; 1924 else 1925 return A->NodeNum < B->NodeNum; 1926 } 1927 }; 1928 1929 /// Reorder instructions as much as possible. 1930 class InstructionShuffler : public MachineSchedStrategy { 1931 bool IsAlternating; 1932 bool IsTopDown; 1933 1934 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 1935 // gives nodes with a higher number higher priority causing the latest 1936 // instructions to be scheduled first. 1937 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 1938 TopQ; 1939 // When scheduling bottom-up, use greater-than as the queue priority. 1940 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 1941 BottomQ; 1942 public: 1943 InstructionShuffler(bool alternate, bool topdown) 1944 : IsAlternating(alternate), IsTopDown(topdown) {} 1945 1946 virtual void initialize(ScheduleDAGMI *) { 1947 TopQ.clear(); 1948 BottomQ.clear(); 1949 } 1950 1951 /// Implement MachineSchedStrategy interface. 1952 /// ----------------------------------------- 1953 1954 virtual SUnit *pickNode(bool &IsTopNode) { 1955 SUnit *SU; 1956 if (IsTopDown) { 1957 do { 1958 if (TopQ.empty()) return NULL; 1959 SU = TopQ.top(); 1960 TopQ.pop(); 1961 } while (SU->isScheduled); 1962 IsTopNode = true; 1963 } 1964 else { 1965 do { 1966 if (BottomQ.empty()) return NULL; 1967 SU = BottomQ.top(); 1968 BottomQ.pop(); 1969 } while (SU->isScheduled); 1970 IsTopNode = false; 1971 } 1972 if (IsAlternating) 1973 IsTopDown = !IsTopDown; 1974 return SU; 1975 } 1976 1977 virtual void schedNode(SUnit *SU, bool IsTopNode) {} 1978 1979 virtual void releaseTopNode(SUnit *SU) { 1980 TopQ.push(SU); 1981 } 1982 virtual void releaseBottomNode(SUnit *SU) { 1983 BottomQ.push(SU); 1984 } 1985 }; 1986 } // namespace 1987 1988 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 1989 bool Alternate = !ForceTopDown && !ForceBottomUp; 1990 bool TopDown = !ForceBottomUp; 1991 assert((TopDown || !ForceTopDown) && 1992 "-misched-topdown incompatible with -misched-bottomup"); 1993 return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); 1994 } 1995 static MachineSchedRegistry ShufflerRegistry( 1996 "shuffle", "Shuffle machine instructions alternating directions", 1997 createInstructionShuffler); 1998 #endif // !NDEBUG 1999