1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "misched" 16 17 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 18 #include "llvm/CodeGen/MachineScheduler.h" 19 #include "llvm/CodeGen/Passes.h" 20 #include "llvm/CodeGen/RegisterClassInfo.h" 21 #include "llvm/CodeGen/ScheduleDAGILP.h" 22 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include "llvm/ADT/OwningPtr.h" 29 #include "llvm/ADT/PriorityQueue.h" 30 31 #include <queue> 32 33 using namespace llvm; 34 35 namespace llvm { 36 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 37 cl::desc("Force top-down list scheduling")); 38 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 39 cl::desc("Force bottom-up list scheduling")); 40 } 41 42 #ifndef NDEBUG 43 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 44 cl::desc("Pop up a window to show MISched dags after they are processed")); 45 46 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 47 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 48 #else 49 static bool ViewMISchedDAGs = false; 50 #endif // NDEBUG 51 52 // Threshold to very roughly model an out-of-order processor's instruction 53 // buffers. If the actual value of this threshold matters much in practice, then 54 // it can be specified by the machine model. For now, it's an experimental 55 // tuning knob to determine when and if it matters. 56 static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, 57 cl::desc("Allow expected latency to exceed the critical path by N cycles " 58 "before attempting to balance ILP"), 59 cl::init(10U)); 60 61 // Experimental heuristics 62 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, 63 cl::desc("Enable load clustering.")); 64 65 //===----------------------------------------------------------------------===// 66 // Machine Instruction Scheduling Pass and Registry 67 //===----------------------------------------------------------------------===// 68 69 MachineSchedContext::MachineSchedContext(): 70 MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { 71 RegClassInfo = new RegisterClassInfo(); 72 } 73 74 MachineSchedContext::~MachineSchedContext() { 75 delete RegClassInfo; 76 } 77 78 namespace { 79 /// MachineScheduler runs after coalescing and before register allocation. 80 class MachineScheduler : public MachineSchedContext, 81 public MachineFunctionPass { 82 public: 83 MachineScheduler(); 84 85 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 86 87 virtual void releaseMemory() {} 88 89 virtual bool runOnMachineFunction(MachineFunction&); 90 91 virtual void print(raw_ostream &O, const Module* = 0) const; 92 93 static char ID; // Class identification, replacement for typeinfo 94 }; 95 } // namespace 96 97 char MachineScheduler::ID = 0; 98 99 char &llvm::MachineSchedulerID = MachineScheduler::ID; 100 101 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 102 "Machine Instruction Scheduler", false, false) 103 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 104 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 105 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 106 INITIALIZE_PASS_END(MachineScheduler, "misched", 107 "Machine Instruction Scheduler", false, false) 108 109 MachineScheduler::MachineScheduler() 110 : MachineFunctionPass(ID) { 111 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 112 } 113 114 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 115 AU.setPreservesCFG(); 116 AU.addRequiredID(MachineDominatorsID); 117 AU.addRequired<MachineLoopInfo>(); 118 AU.addRequired<AliasAnalysis>(); 119 AU.addRequired<TargetPassConfig>(); 120 AU.addRequired<SlotIndexes>(); 121 AU.addPreserved<SlotIndexes>(); 122 AU.addRequired<LiveIntervals>(); 123 AU.addPreserved<LiveIntervals>(); 124 MachineFunctionPass::getAnalysisUsage(AU); 125 } 126 127 MachinePassRegistry MachineSchedRegistry::Registry; 128 129 /// A dummy default scheduler factory indicates whether the scheduler 130 /// is overridden on the command line. 131 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 132 return 0; 133 } 134 135 /// MachineSchedOpt allows command line selection of the scheduler. 136 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 137 RegisterPassParser<MachineSchedRegistry> > 138 MachineSchedOpt("misched", 139 cl::init(&useDefaultMachineSched), cl::Hidden, 140 cl::desc("Machine instruction scheduler to use")); 141 142 static MachineSchedRegistry 143 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 144 useDefaultMachineSched); 145 146 /// Forward declare the standard machine scheduler. This will be used as the 147 /// default scheduler if the target does not set a default. 148 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); 149 150 151 /// Decrement this iterator until reaching the top or a non-debug instr. 152 static MachineBasicBlock::iterator 153 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { 154 assert(I != Beg && "reached the top of the region, cannot decrement"); 155 while (--I != Beg) { 156 if (!I->isDebugValue()) 157 break; 158 } 159 return I; 160 } 161 162 /// If this iterator is a debug value, increment until reaching the End or a 163 /// non-debug instruction. 164 static MachineBasicBlock::iterator 165 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { 166 for(; I != End; ++I) { 167 if (!I->isDebugValue()) 168 break; 169 } 170 return I; 171 } 172 173 /// Top-level MachineScheduler pass driver. 174 /// 175 /// Visit blocks in function order. Divide each block into scheduling regions 176 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 177 /// consistent with the DAG builder, which traverses the interior of the 178 /// scheduling regions bottom-up. 179 /// 180 /// This design avoids exposing scheduling boundaries to the DAG builder, 181 /// simplifying the DAG builder's support for "special" target instructions. 182 /// At the same time the design allows target schedulers to operate across 183 /// scheduling boundaries, for example to bundle the boudary instructions 184 /// without reordering them. This creates complexity, because the target 185 /// scheduler must update the RegionBegin and RegionEnd positions cached by 186 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 187 /// design would be to split blocks at scheduling boundaries, but LLVM has a 188 /// general bias against block splitting purely for implementation simplicity. 189 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 190 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 191 192 // Initialize the context of the pass. 193 MF = &mf; 194 MLI = &getAnalysis<MachineLoopInfo>(); 195 MDT = &getAnalysis<MachineDominatorTree>(); 196 PassConfig = &getAnalysis<TargetPassConfig>(); 197 AA = &getAnalysis<AliasAnalysis>(); 198 199 LIS = &getAnalysis<LiveIntervals>(); 200 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 201 202 RegClassInfo->runOnMachineFunction(*MF); 203 204 // Select the scheduler, or set the default. 205 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 206 if (Ctor == useDefaultMachineSched) { 207 // Get the default scheduler set by the target. 208 Ctor = MachineSchedRegistry::getDefault(); 209 if (!Ctor) { 210 Ctor = createConvergingSched; 211 MachineSchedRegistry::setDefault(Ctor); 212 } 213 } 214 // Instantiate the selected scheduler. 215 OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); 216 217 // Visit all machine basic blocks. 218 // 219 // TODO: Visit blocks in global postorder or postorder within the bottom-up 220 // loop tree. Then we can optionally compute global RegPressure. 221 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 222 MBB != MBBEnd; ++MBB) { 223 224 Scheduler->startBlock(MBB); 225 226 // Break the block into scheduling regions [I, RegionEnd), and schedule each 227 // region as soon as it is discovered. RegionEnd points the scheduling 228 // boundary at the bottom of the region. The DAG does not include RegionEnd, 229 // but the region does (i.e. the next RegionEnd is above the previous 230 // RegionBegin). If the current block has no terminator then RegionEnd == 231 // MBB->end() for the bottom region. 232 // 233 // The Scheduler may insert instructions during either schedule() or 234 // exitRegion(), even for empty regions. So the local iterators 'I' and 235 // 'RegionEnd' are invalid across these calls. 236 unsigned RemainingInstrs = MBB->size(); 237 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 238 RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { 239 240 // Avoid decrementing RegionEnd for blocks with no terminator. 241 if (RegionEnd != MBB->end() 242 || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { 243 --RegionEnd; 244 // Count the boundary instruction. 245 --RemainingInstrs; 246 } 247 248 // The next region starts above the previous region. Look backward in the 249 // instruction stream until we find the nearest boundary. 250 MachineBasicBlock::iterator I = RegionEnd; 251 for(;I != MBB->begin(); --I, --RemainingInstrs) { 252 if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) 253 break; 254 } 255 // Notify the scheduler of the region, even if we may skip scheduling 256 // it. Perhaps it still needs to be bundled. 257 Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); 258 259 // Skip empty scheduling regions (0 or 1 schedulable instructions). 260 if (I == RegionEnd || I == llvm::prior(RegionEnd)) { 261 // Close the current region. Bundle the terminator if needed. 262 // This invalidates 'RegionEnd' and 'I'. 263 Scheduler->exitRegion(); 264 continue; 265 } 266 DEBUG(dbgs() << "********** MI Scheduling **********\n"); 267 DEBUG(dbgs() << MF->getName() 268 << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; 269 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 270 else dbgs() << "End"; 271 dbgs() << " Remaining: " << RemainingInstrs << "\n"); 272 273 // Schedule a region: possibly reorder instructions. 274 // This invalidates 'RegionEnd' and 'I'. 275 Scheduler->schedule(); 276 277 // Close the current region. 278 Scheduler->exitRegion(); 279 280 // Scheduling has invalidated the current iterator 'I'. Ask the 281 // scheduler for the top of it's scheduled region. 282 RegionEnd = Scheduler->begin(); 283 } 284 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 285 Scheduler->finishBlock(); 286 } 287 Scheduler->finalizeSchedule(); 288 DEBUG(LIS->print(dbgs())); 289 return true; 290 } 291 292 void MachineScheduler::print(raw_ostream &O, const Module* m) const { 293 // unimplemented 294 } 295 296 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 297 void ReadyQueue::dump() { 298 dbgs() << Name << ": "; 299 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 300 dbgs() << Queue[i]->NodeNum << " "; 301 dbgs() << "\n"; 302 } 303 #endif 304 305 //===----------------------------------------------------------------------===// 306 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals 307 // preservation. 308 //===----------------------------------------------------------------------===// 309 310 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { 311 // Do not use WillCreateCycle, it assumes SD scheduling. 312 // If Pred is reachable from Succ, then the edge creates a cycle. 313 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 314 return false; 315 Topo.AddPred(SuccSU, PredDep.getSUnit()); 316 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 317 // Return true regardless of whether a new edge needed to be inserted. 318 return true; 319 } 320 321 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 322 /// NumPredsLeft reaches zero, release the successor node. 323 /// 324 /// FIXME: Adjust SuccSU height based on MinLatency. 325 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 326 SUnit *SuccSU = SuccEdge->getSUnit(); 327 328 if (SuccEdge->isWeak()) { 329 --SuccSU->WeakPredsLeft; 330 if (SuccEdge->isCluster()) 331 NextClusterSucc = SuccSU; 332 return; 333 } 334 #ifndef NDEBUG 335 if (SuccSU->NumPredsLeft == 0) { 336 dbgs() << "*** Scheduling failed! ***\n"; 337 SuccSU->dump(this); 338 dbgs() << " has been released too many times!\n"; 339 llvm_unreachable(0); 340 } 341 #endif 342 --SuccSU->NumPredsLeft; 343 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 344 SchedImpl->releaseTopNode(SuccSU); 345 } 346 347 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 348 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 349 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 350 I != E; ++I) { 351 releaseSucc(SU, &*I); 352 } 353 } 354 355 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 356 /// NumSuccsLeft reaches zero, release the predecessor node. 357 /// 358 /// FIXME: Adjust PredSU height based on MinLatency. 359 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 360 SUnit *PredSU = PredEdge->getSUnit(); 361 362 if (PredEdge->isWeak()) { 363 --PredSU->WeakSuccsLeft; 364 if (PredEdge->isCluster()) 365 NextClusterPred = PredSU; 366 return; 367 } 368 #ifndef NDEBUG 369 if (PredSU->NumSuccsLeft == 0) { 370 dbgs() << "*** Scheduling failed! ***\n"; 371 PredSU->dump(this); 372 dbgs() << " has been released too many times!\n"; 373 llvm_unreachable(0); 374 } 375 #endif 376 --PredSU->NumSuccsLeft; 377 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 378 SchedImpl->releaseBottomNode(PredSU); 379 } 380 381 /// releasePredecessors - Call releasePred on each of SU's predecessors. 382 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 383 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 384 I != E; ++I) { 385 releasePred(SU, &*I); 386 } 387 } 388 389 void ScheduleDAGMI::moveInstruction(MachineInstr *MI, 390 MachineBasicBlock::iterator InsertPos) { 391 // Advance RegionBegin if the first instruction moves down. 392 if (&*RegionBegin == MI) 393 ++RegionBegin; 394 395 // Update the instruction stream. 396 BB->splice(InsertPos, BB, MI); 397 398 // Update LiveIntervals 399 LIS->handleMove(MI, /*UpdateFlags=*/true); 400 401 // Recede RegionBegin if an instruction moves above the first. 402 if (RegionBegin == InsertPos) 403 RegionBegin = MI; 404 } 405 406 bool ScheduleDAGMI::checkSchedLimit() { 407 #ifndef NDEBUG 408 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 409 CurrentTop = CurrentBottom; 410 return false; 411 } 412 ++NumInstrsScheduled; 413 #endif 414 return true; 415 } 416 417 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 418 /// crossing a scheduling boundary. [begin, end) includes all instructions in 419 /// the region, including the boundary itself and single-instruction regions 420 /// that don't get scheduled. 421 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 422 MachineBasicBlock::iterator begin, 423 MachineBasicBlock::iterator end, 424 unsigned endcount) 425 { 426 ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); 427 428 // For convenience remember the end of the liveness region. 429 LiveRegionEnd = 430 (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); 431 } 432 433 // Setup the register pressure trackers for the top scheduled top and bottom 434 // scheduled regions. 435 void ScheduleDAGMI::initRegPressure() { 436 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 437 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 438 439 // Close the RPTracker to finalize live ins. 440 RPTracker.closeRegion(); 441 442 DEBUG(RPTracker.getPressure().dump(TRI)); 443 444 // Initialize the live ins and live outs. 445 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 446 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 447 448 // Close one end of the tracker so we can call 449 // getMaxUpward/DownwardPressureDelta before advancing across any 450 // instructions. This converts currently live regs into live ins/outs. 451 TopRPTracker.closeTop(); 452 BotRPTracker.closeBottom(); 453 454 // Account for liveness generated by the region boundary. 455 if (LiveRegionEnd != RegionEnd) 456 BotRPTracker.recede(); 457 458 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 459 460 // Cache the list of excess pressure sets in this region. This will also track 461 // the max pressure in the scheduled code for these sets. 462 RegionCriticalPSets.clear(); 463 std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; 464 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 465 unsigned Limit = TRI->getRegPressureSetLimit(i); 466 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 467 << "Limit " << Limit 468 << " Actual " << RegionPressure[i] << "\n"); 469 if (RegionPressure[i] > Limit) 470 RegionCriticalPSets.push_back(PressureElement(i, 0)); 471 } 472 DEBUG(dbgs() << "Excess PSets: "; 473 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 474 dbgs() << TRI->getRegPressureSetName( 475 RegionCriticalPSets[i].PSetID) << " "; 476 dbgs() << "\n"); 477 } 478 479 // FIXME: When the pressure tracker deals in pressure differences then we won't 480 // iterate over all RegionCriticalPSets[i]. 481 void ScheduleDAGMI:: 482 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { 483 for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { 484 unsigned ID = RegionCriticalPSets[i].PSetID; 485 int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; 486 if ((int)NewMaxPressure[ID] > MaxUnits) 487 MaxUnits = NewMaxPressure[ID]; 488 } 489 } 490 491 /// schedule - Called back from MachineScheduler::runOnMachineFunction 492 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 493 /// only includes instructions that have DAG nodes, not scheduling boundaries. 494 /// 495 /// This is a skeletal driver, with all the functionality pushed into helpers, 496 /// so that it can be easilly extended by experimental schedulers. Generally, 497 /// implementing MachineSchedStrategy should be sufficient to implement a new 498 /// scheduling algorithm. However, if a scheduler further subclasses 499 /// ScheduleDAGMI then it will want to override this virtual method in order to 500 /// update any specialized state. 501 void ScheduleDAGMI::schedule() { 502 buildDAGWithRegPressure(); 503 504 Topo.InitDAGTopologicalSorting(); 505 506 postprocessDAG(); 507 508 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 509 SUnits[su].dumpAll(this)); 510 511 if (ViewMISchedDAGs) viewGraph(); 512 513 initQueues(); 514 515 bool IsTopNode = false; 516 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 517 assert(!SU->isScheduled && "Node already scheduled"); 518 if (!checkSchedLimit()) 519 break; 520 521 scheduleMI(SU, IsTopNode); 522 523 updateQueues(SU, IsTopNode); 524 } 525 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 526 527 placeDebugValues(); 528 529 DEBUG({ 530 unsigned BBNum = top()->getParent()->getNumber(); 531 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 532 dumpSchedule(); 533 dbgs() << '\n'; 534 }); 535 } 536 537 /// Build the DAG and setup three register pressure trackers. 538 void ScheduleDAGMI::buildDAGWithRegPressure() { 539 // Initialize the register pressure tracker used by buildSchedGraph. 540 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 541 542 // Account for liveness generate by the region boundary. 543 if (LiveRegionEnd != RegionEnd) 544 RPTracker.recede(); 545 546 // Build the DAG, and compute current register pressure. 547 buildSchedGraph(AA, &RPTracker); 548 if (ViewMISchedDAGs) viewGraph(); 549 550 // Initialize top/bottom trackers after computing region pressure. 551 initRegPressure(); 552 } 553 554 /// Apply each ScheduleDAGMutation step in order. 555 void ScheduleDAGMI::postprocessDAG() { 556 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 557 Mutations[i]->apply(this); 558 } 559 } 560 561 // Release all DAG roots for scheduling. 562 // 563 // Nodes with unreleased weak edges can still be roots. 564 void ScheduleDAGMI::releaseRoots() { 565 SmallVector<SUnit*, 16> BotRoots; 566 567 for (std::vector<SUnit>::iterator 568 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 569 SUnit *SU = &(*I); 570 // A SUnit is ready to top schedule if it has no predecessors. 571 if (!I->NumPredsLeft && SU != &EntrySU) 572 SchedImpl->releaseTopNode(SU); 573 // A SUnit is ready to bottom schedule if it has no successors. 574 if (!I->NumSuccsLeft && SU != &ExitSU) 575 BotRoots.push_back(SU); 576 } 577 // Release bottom roots in reverse order so the higher priority nodes appear 578 // first. This is more natural and slightly more efficient. 579 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 580 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) 581 SchedImpl->releaseBottomNode(*I); 582 } 583 584 /// Identify DAG roots and setup scheduler queues. 585 void ScheduleDAGMI::initQueues() { 586 NextClusterSucc = NULL; 587 NextClusterPred = NULL; 588 589 // Initialize the strategy before modifying the DAG. 590 SchedImpl->initialize(this); 591 592 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 593 releaseRoots(); 594 595 releaseSuccessors(&EntrySU); 596 releasePredecessors(&ExitSU); 597 598 SchedImpl->registerRoots(); 599 600 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 601 CurrentBottom = RegionEnd; 602 } 603 604 /// Move an instruction and update register pressure. 605 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { 606 // Move the instruction to its new location in the instruction stream. 607 MachineInstr *MI = SU->getInstr(); 608 609 if (IsTopNode) { 610 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 611 if (&*CurrentTop == MI) 612 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 613 else { 614 moveInstruction(MI, CurrentTop); 615 TopRPTracker.setPos(MI); 616 } 617 618 // Update top scheduled pressure. 619 TopRPTracker.advance(); 620 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 621 updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); 622 } 623 else { 624 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 625 MachineBasicBlock::iterator priorII = 626 priorNonDebug(CurrentBottom, CurrentTop); 627 if (&*priorII == MI) 628 CurrentBottom = priorII; 629 else { 630 if (&*CurrentTop == MI) { 631 CurrentTop = nextIfDebug(++CurrentTop, priorII); 632 TopRPTracker.setPos(CurrentTop); 633 } 634 moveInstruction(MI, CurrentBottom); 635 CurrentBottom = MI; 636 } 637 // Update bottom scheduled pressure. 638 BotRPTracker.recede(); 639 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 640 updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); 641 } 642 } 643 644 /// Update scheduler queues after scheduling an instruction. 645 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 646 // Release dependent instructions for scheduling. 647 if (IsTopNode) 648 releaseSuccessors(SU); 649 else 650 releasePredecessors(SU); 651 652 SU->isScheduled = true; 653 654 // Notify the scheduling strategy after updating the DAG. 655 SchedImpl->schedNode(SU, IsTopNode); 656 } 657 658 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 659 void ScheduleDAGMI::placeDebugValues() { 660 // If first instruction was a DBG_VALUE then put it back. 661 if (FirstDbgValue) { 662 BB->splice(RegionBegin, BB, FirstDbgValue); 663 RegionBegin = FirstDbgValue; 664 } 665 666 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 667 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 668 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 669 MachineInstr *DbgValue = P.first; 670 MachineBasicBlock::iterator OrigPrevMI = P.second; 671 BB->splice(++OrigPrevMI, BB, DbgValue); 672 if (OrigPrevMI == llvm::prior(RegionEnd)) 673 RegionEnd = DbgValue; 674 } 675 DbgValues.clear(); 676 FirstDbgValue = NULL; 677 } 678 679 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 680 void ScheduleDAGMI::dumpSchedule() const { 681 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 682 if (SUnit *SU = getSUnit(&(*MI))) 683 SU->dump(this); 684 else 685 dbgs() << "Missing SUnit\n"; 686 } 687 } 688 #endif 689 690 namespace { 691 /// \brief Post-process the DAG to create cluster edges between neighboring 692 /// loads. 693 class LoadClusterMutation : public ScheduleDAGMutation { 694 struct LoadInfo { 695 SUnit *SU; 696 unsigned BaseReg; 697 unsigned Offset; 698 LoadInfo(SUnit *su, unsigned reg, unsigned ofs) 699 : SU(su), BaseReg(reg), Offset(ofs) {} 700 }; 701 static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, 702 const LoadClusterMutation::LoadInfo &RHS); 703 704 const TargetInstrInfo *TII; 705 const TargetRegisterInfo *TRI; 706 public: 707 LoadClusterMutation(const TargetInstrInfo *tii, 708 const TargetRegisterInfo *tri) 709 : TII(tii), TRI(tri) {} 710 711 virtual void apply(ScheduleDAGMI *DAG); 712 protected: 713 void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); 714 }; 715 } // anonymous 716 717 bool LoadClusterMutation::LoadInfoLess( 718 const LoadClusterMutation::LoadInfo &LHS, 719 const LoadClusterMutation::LoadInfo &RHS) { 720 if (LHS.BaseReg != RHS.BaseReg) 721 return LHS.BaseReg < RHS.BaseReg; 722 return LHS.Offset < RHS.Offset; 723 } 724 725 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, 726 ScheduleDAGMI *DAG) { 727 SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; 728 for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { 729 SUnit *SU = Loads[Idx]; 730 unsigned BaseReg; 731 unsigned Offset; 732 if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) 733 LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); 734 } 735 if (LoadRecords.size() < 2) 736 return; 737 std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); 738 unsigned ClusterLength = 1; 739 for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { 740 if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { 741 ClusterLength = 1; 742 continue; 743 } 744 745 SUnit *SUa = LoadRecords[Idx].SU; 746 SUnit *SUb = LoadRecords[Idx+1].SU; 747 if (TII->shouldScheduleLoadsNear(SUa->getInstr(), SUb->getInstr(), 748 ClusterLength) 749 && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 750 751 DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" 752 << SUb->NodeNum << ")\n"); 753 // Copy successor edges from SUa to SUb. Interleaving computation 754 // dependent on SUa can prevent load combining due to register reuse. 755 // Predecessor edges do not need to be copied from SUb to SUa since nearby 756 // loads should have effectively the same inputs. 757 for (SUnit::const_succ_iterator 758 SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { 759 if (SI->getSUnit() == SUb) 760 continue; 761 DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); 762 DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); 763 } 764 ++ClusterLength; 765 } 766 else 767 ClusterLength = 1; 768 } 769 } 770 771 /// \brief Callback from DAG postProcessing to create cluster edges for loads. 772 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { 773 // Map DAG NodeNum to store chain ID. 774 DenseMap<unsigned, unsigned> StoreChainIDs; 775 // Map each store chain to a set of dependent loads. 776 SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; 777 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 778 SUnit *SU = &DAG->SUnits[Idx]; 779 if (!SU->getInstr()->mayLoad()) 780 continue; 781 unsigned ChainPredID = DAG->SUnits.size(); 782 for (SUnit::const_pred_iterator 783 PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 784 if (PI->isCtrl()) { 785 ChainPredID = PI->getSUnit()->NodeNum; 786 break; 787 } 788 } 789 // Check if this chain-like pred has been seen 790 // before. ChainPredID==MaxNodeID for loads at the top of the schedule. 791 unsigned NumChains = StoreChainDependents.size(); 792 std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = 793 StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); 794 if (Result.second) 795 StoreChainDependents.resize(NumChains + 1); 796 StoreChainDependents[Result.first->second].push_back(SU); 797 } 798 // Iterate over the store chains. 799 for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) 800 clusterNeighboringLoads(StoreChainDependents[Idx], DAG); 801 } 802 803 //===----------------------------------------------------------------------===// 804 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. 805 //===----------------------------------------------------------------------===// 806 807 namespace { 808 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance 809 /// the schedule. 810 class ConvergingScheduler : public MachineSchedStrategy { 811 public: 812 /// Represent the type of SchedCandidate found within a single queue. 813 /// pickNodeBidirectional depends on these listed by decreasing priority. 814 enum CandReason { 815 NoCand, SingleExcess, SingleCritical, Cluster, 816 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 817 TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, 818 NodeOrder}; 819 820 #ifndef NDEBUG 821 static const char *getReasonStr(ConvergingScheduler::CandReason Reason); 822 #endif 823 824 /// Policy for scheduling the next instruction in the candidate's zone. 825 struct CandPolicy { 826 bool ReduceLatency; 827 unsigned ReduceResIdx; 828 unsigned DemandResIdx; 829 830 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 831 }; 832 833 /// Status of an instruction's critical resource consumption. 834 struct SchedResourceDelta { 835 // Count critical resources in the scheduled region required by SU. 836 unsigned CritResources; 837 838 // Count critical resources from another region consumed by SU. 839 unsigned DemandedResources; 840 841 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 842 843 bool operator==(const SchedResourceDelta &RHS) const { 844 return CritResources == RHS.CritResources 845 && DemandedResources == RHS.DemandedResources; 846 } 847 bool operator!=(const SchedResourceDelta &RHS) const { 848 return !operator==(RHS); 849 } 850 }; 851 852 /// Store the state used by ConvergingScheduler heuristics, required for the 853 /// lifetime of one invocation of pickNode(). 854 struct SchedCandidate { 855 CandPolicy Policy; 856 857 // The best SUnit candidate. 858 SUnit *SU; 859 860 // The reason for this candidate. 861 CandReason Reason; 862 863 // Register pressure values for the best candidate. 864 RegPressureDelta RPDelta; 865 866 // Critical resource consumption of the best candidate. 867 SchedResourceDelta ResDelta; 868 869 SchedCandidate(const CandPolicy &policy) 870 : Policy(policy), SU(NULL), Reason(NoCand) {} 871 872 bool isValid() const { return SU; } 873 874 // Copy the status of another candidate without changing policy. 875 void setBest(SchedCandidate &Best) { 876 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 877 SU = Best.SU; 878 Reason = Best.Reason; 879 RPDelta = Best.RPDelta; 880 ResDelta = Best.ResDelta; 881 } 882 883 void initResourceDelta(const ScheduleDAGMI *DAG, 884 const TargetSchedModel *SchedModel); 885 }; 886 887 /// Summarize the unscheduled region. 888 struct SchedRemainder { 889 // Critical path through the DAG in expected latency. 890 unsigned CriticalPath; 891 892 // Unscheduled resources 893 SmallVector<unsigned, 16> RemainingCounts; 894 // Critical resource for the unscheduled zone. 895 unsigned CritResIdx; 896 // Number of micro-ops left to schedule. 897 unsigned RemainingMicroOps; 898 // Is the unscheduled zone resource limited. 899 bool IsResourceLimited; 900 901 unsigned MaxRemainingCount; 902 903 void reset() { 904 CriticalPath = 0; 905 RemainingCounts.clear(); 906 CritResIdx = 0; 907 RemainingMicroOps = 0; 908 IsResourceLimited = false; 909 MaxRemainingCount = 0; 910 } 911 912 SchedRemainder() { reset(); } 913 914 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 915 }; 916 917 /// Each Scheduling boundary is associated with ready queues. It tracks the 918 /// current cycle in the direction of movement, and maintains the state 919 /// of "hazards" and other interlocks at the current cycle. 920 struct SchedBoundary { 921 ScheduleDAGMI *DAG; 922 const TargetSchedModel *SchedModel; 923 SchedRemainder *Rem; 924 925 ReadyQueue Available; 926 ReadyQueue Pending; 927 bool CheckPending; 928 929 // For heuristics, keep a list of the nodes that immediately depend on the 930 // most recently scheduled node. 931 SmallPtrSet<const SUnit*, 8> NextSUs; 932 933 ScheduleHazardRecognizer *HazardRec; 934 935 unsigned CurrCycle; 936 unsigned IssueCount; 937 938 /// MinReadyCycle - Cycle of the soonest available instruction. 939 unsigned MinReadyCycle; 940 941 // The expected latency of the critical path in this scheduled zone. 942 unsigned ExpectedLatency; 943 944 // Resources used in the scheduled zone beyond this boundary. 945 SmallVector<unsigned, 16> ResourceCounts; 946 947 // Cache the critical resources ID in this scheduled zone. 948 unsigned CritResIdx; 949 950 // Is the scheduled region resource limited vs. latency limited. 951 bool IsResourceLimited; 952 953 unsigned ExpectedCount; 954 955 // Policy flag: attempt to find ILP until expected latency is covered. 956 bool ShouldIncreaseILP; 957 958 #ifndef NDEBUG 959 // Remember the greatest min operand latency. 960 unsigned MaxMinLatency; 961 #endif 962 963 void reset() { 964 Available.clear(); 965 Pending.clear(); 966 CheckPending = false; 967 NextSUs.clear(); 968 HazardRec = 0; 969 CurrCycle = 0; 970 IssueCount = 0; 971 MinReadyCycle = UINT_MAX; 972 ExpectedLatency = 0; 973 ResourceCounts.resize(1); 974 assert(!ResourceCounts[0] && "nonzero count for bad resource"); 975 CritResIdx = 0; 976 IsResourceLimited = false; 977 ExpectedCount = 0; 978 ShouldIncreaseILP = false; 979 #ifndef NDEBUG 980 MaxMinLatency = 0; 981 #endif 982 // Reserve a zero-count for invalid CritResIdx. 983 ResourceCounts.resize(1); 984 } 985 986 /// Pending queues extend the ready queues with the same ID and the 987 /// PendingFlag set. 988 SchedBoundary(unsigned ID, const Twine &Name): 989 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), 990 Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { 991 reset(); 992 } 993 994 ~SchedBoundary() { delete HazardRec; } 995 996 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 997 SchedRemainder *rem); 998 999 bool isTop() const { 1000 return Available.getID() == ConvergingScheduler::TopQID; 1001 } 1002 1003 unsigned getUnscheduledLatency(SUnit *SU) const { 1004 if (isTop()) 1005 return SU->getHeight(); 1006 return SU->getDepth(); 1007 } 1008 1009 unsigned getCriticalCount() const { 1010 return ResourceCounts[CritResIdx]; 1011 } 1012 1013 bool checkHazard(SUnit *SU); 1014 1015 void checkILPPolicy(); 1016 1017 void releaseNode(SUnit *SU, unsigned ReadyCycle); 1018 1019 void bumpCycle(); 1020 1021 void countResource(unsigned PIdx, unsigned Cycles); 1022 1023 void bumpNode(SUnit *SU); 1024 1025 void releasePending(); 1026 1027 void removeReady(SUnit *SU); 1028 1029 SUnit *pickOnlyChoice(); 1030 }; 1031 1032 private: 1033 ScheduleDAGMI *DAG; 1034 const TargetSchedModel *SchedModel; 1035 const TargetRegisterInfo *TRI; 1036 1037 // State of the top and bottom scheduled instruction boundaries. 1038 SchedRemainder Rem; 1039 SchedBoundary Top; 1040 SchedBoundary Bot; 1041 1042 public: 1043 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 1044 enum { 1045 TopQID = 1, 1046 BotQID = 2, 1047 LogMaxQID = 2 1048 }; 1049 1050 ConvergingScheduler(): 1051 DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} 1052 1053 virtual void initialize(ScheduleDAGMI *dag); 1054 1055 virtual SUnit *pickNode(bool &IsTopNode); 1056 1057 virtual void schedNode(SUnit *SU, bool IsTopNode); 1058 1059 virtual void releaseTopNode(SUnit *SU); 1060 1061 virtual void releaseBottomNode(SUnit *SU); 1062 1063 virtual void registerRoots(); 1064 1065 protected: 1066 void balanceZones( 1067 ConvergingScheduler::SchedBoundary &CriticalZone, 1068 ConvergingScheduler::SchedCandidate &CriticalCand, 1069 ConvergingScheduler::SchedBoundary &OppositeZone, 1070 ConvergingScheduler::SchedCandidate &OppositeCand); 1071 1072 void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, 1073 ConvergingScheduler::SchedCandidate &BotCand); 1074 1075 void tryCandidate(SchedCandidate &Cand, 1076 SchedCandidate &TryCand, 1077 SchedBoundary &Zone, 1078 const RegPressureTracker &RPTracker, 1079 RegPressureTracker &TempTracker); 1080 1081 SUnit *pickNodeBidirectional(bool &IsTopNode); 1082 1083 void pickNodeFromQueue(SchedBoundary &Zone, 1084 const RegPressureTracker &RPTracker, 1085 SchedCandidate &Candidate); 1086 1087 #ifndef NDEBUG 1088 void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); 1089 #endif 1090 }; 1091 } // namespace 1092 1093 void ConvergingScheduler::SchedRemainder:: 1094 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1095 reset(); 1096 if (!SchedModel->hasInstrSchedModel()) 1097 return; 1098 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1099 for (std::vector<SUnit>::iterator 1100 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 1101 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 1102 RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); 1103 for (TargetSchedModel::ProcResIter 1104 PI = SchedModel->getWriteProcResBegin(SC), 1105 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1106 unsigned PIdx = PI->ProcResourceIdx; 1107 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1108 RemainingCounts[PIdx] += (Factor * PI->Cycles); 1109 } 1110 } 1111 } 1112 1113 void ConvergingScheduler::SchedBoundary:: 1114 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1115 reset(); 1116 DAG = dag; 1117 SchedModel = smodel; 1118 Rem = rem; 1119 if (SchedModel->hasInstrSchedModel()) 1120 ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); 1121 } 1122 1123 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { 1124 DAG = dag; 1125 SchedModel = DAG->getSchedModel(); 1126 TRI = DAG->TRI; 1127 Rem.init(DAG, SchedModel); 1128 Top.init(DAG, SchedModel, &Rem); 1129 Bot.init(DAG, SchedModel, &Rem); 1130 1131 // Initialize resource counts. 1132 1133 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 1134 // are disabled, then these HazardRecs will be disabled. 1135 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 1136 const TargetMachine &TM = DAG->MF.getTarget(); 1137 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1138 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1139 1140 assert((!ForceTopDown || !ForceBottomUp) && 1141 "-misched-topdown incompatible with -misched-bottomup"); 1142 } 1143 1144 void ConvergingScheduler::releaseTopNode(SUnit *SU) { 1145 if (SU->isScheduled) 1146 return; 1147 1148 for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1149 I != E; ++I) { 1150 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1151 unsigned MinLatency = I->getMinLatency(); 1152 #ifndef NDEBUG 1153 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 1154 #endif 1155 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 1156 SU->TopReadyCycle = PredReadyCycle + MinLatency; 1157 } 1158 Top.releaseNode(SU, SU->TopReadyCycle); 1159 } 1160 1161 void ConvergingScheduler::releaseBottomNode(SUnit *SU) { 1162 if (SU->isScheduled) 1163 return; 1164 1165 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1166 1167 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1168 I != E; ++I) { 1169 if (I->isWeak()) 1170 continue; 1171 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1172 unsigned MinLatency = I->getMinLatency(); 1173 #ifndef NDEBUG 1174 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 1175 #endif 1176 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 1177 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 1178 } 1179 Bot.releaseNode(SU, SU->BotReadyCycle); 1180 } 1181 1182 void ConvergingScheduler::registerRoots() { 1183 Rem.CriticalPath = DAG->ExitSU.getDepth(); 1184 // Some roots may not feed into ExitSU. Check all of them in case. 1185 for (std::vector<SUnit*>::const_iterator 1186 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 1187 if ((*I)->getDepth() > Rem.CriticalPath) 1188 Rem.CriticalPath = (*I)->getDepth(); 1189 } 1190 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 1191 } 1192 1193 /// Does this SU have a hazard within the current instruction group. 1194 /// 1195 /// The scheduler supports two modes of hazard recognition. The first is the 1196 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1197 /// supports highly complicated in-order reservation tables 1198 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1199 /// 1200 /// The second is a streamlined mechanism that checks for hazards based on 1201 /// simple counters that the scheduler itself maintains. It explicitly checks 1202 /// for instruction dispatch limitations, including the number of micro-ops that 1203 /// can dispatch per cycle. 1204 /// 1205 /// TODO: Also check whether the SU must start a new group. 1206 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1207 if (HazardRec->isEnabled()) 1208 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1209 1210 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1211 if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { 1212 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1213 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1214 return true; 1215 } 1216 return false; 1217 } 1218 1219 /// If expected latency is covered, disable ILP policy. 1220 void ConvergingScheduler::SchedBoundary::checkILPPolicy() { 1221 if (ShouldIncreaseILP 1222 && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { 1223 ShouldIncreaseILP = false; 1224 DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); 1225 } 1226 } 1227 1228 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, 1229 unsigned ReadyCycle) { 1230 1231 if (ReadyCycle < MinReadyCycle) 1232 MinReadyCycle = ReadyCycle; 1233 1234 // Check for interlocks first. For the purpose of other heuristics, an 1235 // instruction that cannot issue appears as if it's not in the ReadyQueue. 1236 if (ReadyCycle > CurrCycle || checkHazard(SU)) 1237 Pending.push(SU); 1238 else 1239 Available.push(SU); 1240 1241 // Record this node as an immediate dependent of the scheduled node. 1242 NextSUs.insert(SU); 1243 1244 // If CriticalPath has been computed, then check if the unscheduled nodes 1245 // exceed the ILP window. Before registerRoots, CriticalPath==0. 1246 if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) 1247 > Rem->CriticalPath + ILPWindow)) { 1248 ShouldIncreaseILP = true; 1249 DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " 1250 << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); 1251 } 1252 } 1253 1254 /// Move the boundary of scheduled code by one cycle. 1255 void ConvergingScheduler::SchedBoundary::bumpCycle() { 1256 unsigned Width = SchedModel->getIssueWidth(); 1257 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 1258 1259 unsigned NextCycle = CurrCycle + 1; 1260 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 1261 if (MinReadyCycle > NextCycle) { 1262 IssueCount = 0; 1263 NextCycle = MinReadyCycle; 1264 } 1265 1266 if (!HazardRec->isEnabled()) { 1267 // Bypass HazardRec virtual calls. 1268 CurrCycle = NextCycle; 1269 } 1270 else { 1271 // Bypass getHazardType calls in case of long latency. 1272 for (; CurrCycle != NextCycle; ++CurrCycle) { 1273 if (isTop()) 1274 HazardRec->AdvanceCycle(); 1275 else 1276 HazardRec->RecedeCycle(); 1277 } 1278 } 1279 CheckPending = true; 1280 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1281 1282 DEBUG(dbgs() << " *** " << Available.getName() << " cycle " 1283 << CurrCycle << '\n'); 1284 } 1285 1286 /// Add the given processor resource to this scheduled zone. 1287 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, 1288 unsigned Cycles) { 1289 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1290 DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name 1291 << " +(" << Cycles << "x" << Factor 1292 << ") / " << SchedModel->getLatencyFactor() << '\n'); 1293 1294 unsigned Count = Factor * Cycles; 1295 ResourceCounts[PIdx] += Count; 1296 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 1297 Rem->RemainingCounts[PIdx] -= Count; 1298 1299 // Reset MaxRemainingCount for sanity. 1300 Rem->MaxRemainingCount = 0; 1301 1302 // Check if this resource exceeds the current critical resource by a full 1303 // cycle. If so, it becomes the critical resource. 1304 if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) 1305 >= (int)SchedModel->getLatencyFactor()) { 1306 CritResIdx = PIdx; 1307 DEBUG(dbgs() << " *** Critical resource " 1308 << SchedModel->getProcResource(PIdx)->Name << " x" 1309 << ResourceCounts[PIdx] << '\n'); 1310 } 1311 } 1312 1313 /// Move the boundary of scheduled code by one SUnit. 1314 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { 1315 // Update the reservation table. 1316 if (HazardRec->isEnabled()) { 1317 if (!isTop() && SU->isCall) { 1318 // Calls are scheduled with their preceding instructions. For bottom-up 1319 // scheduling, clear the pipeline state before emitting. 1320 HazardRec->Reset(); 1321 } 1322 HazardRec->EmitInstruction(SU); 1323 } 1324 // Update resource counts and critical resource. 1325 if (SchedModel->hasInstrSchedModel()) { 1326 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1327 Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); 1328 for (TargetSchedModel::ProcResIter 1329 PI = SchedModel->getWriteProcResBegin(SC), 1330 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1331 countResource(PI->ProcResourceIdx, PI->Cycles); 1332 } 1333 } 1334 if (isTop()) { 1335 if (SU->getDepth() > ExpectedLatency) 1336 ExpectedLatency = SU->getDepth(); 1337 } 1338 else { 1339 if (SU->getHeight() > ExpectedLatency) 1340 ExpectedLatency = SU->getHeight(); 1341 } 1342 1343 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1344 1345 // Check the instruction group dispatch limit. 1346 // TODO: Check if this SU must end a dispatch group. 1347 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 1348 1349 // checkHazard prevents scheduling multiple instructions per cycle that exceed 1350 // issue width. However, we commonly reach the maximum. In this case 1351 // opportunistically bump the cycle to avoid uselessly checking everything in 1352 // the readyQ. Furthermore, a single instruction may produce more than one 1353 // cycle's worth of micro-ops. 1354 if (IssueCount >= SchedModel->getIssueWidth()) { 1355 DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); 1356 bumpCycle(); 1357 } 1358 } 1359 1360 /// Release pending ready nodes in to the available queue. This makes them 1361 /// visible to heuristics. 1362 void ConvergingScheduler::SchedBoundary::releasePending() { 1363 // If the available queue is empty, it is safe to reset MinReadyCycle. 1364 if (Available.empty()) 1365 MinReadyCycle = UINT_MAX; 1366 1367 // Check to see if any of the pending instructions are ready to issue. If 1368 // so, add them to the available queue. 1369 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 1370 SUnit *SU = *(Pending.begin()+i); 1371 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 1372 1373 if (ReadyCycle < MinReadyCycle) 1374 MinReadyCycle = ReadyCycle; 1375 1376 if (ReadyCycle > CurrCycle) 1377 continue; 1378 1379 if (checkHazard(SU)) 1380 continue; 1381 1382 Available.push(SU); 1383 Pending.remove(Pending.begin()+i); 1384 --i; --e; 1385 } 1386 DEBUG(if (!Pending.empty()) Pending.dump()); 1387 CheckPending = false; 1388 } 1389 1390 /// Remove SU from the ready set for this boundary. 1391 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { 1392 if (Available.isInQueue(SU)) 1393 Available.remove(Available.find(SU)); 1394 else { 1395 assert(Pending.isInQueue(SU) && "bad ready count"); 1396 Pending.remove(Pending.find(SU)); 1397 } 1398 } 1399 1400 /// If this queue only has one ready candidate, return it. As a side effect, 1401 /// defer any nodes that now hit a hazard, and advance the cycle until at least 1402 /// one node is ready. If multiple instructions are ready, return NULL. 1403 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { 1404 if (CheckPending) 1405 releasePending(); 1406 1407 if (IssueCount > 0) { 1408 // Defer any ready instrs that now have a hazard. 1409 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 1410 if (checkHazard(*I)) { 1411 Pending.push(*I); 1412 I = Available.remove(I); 1413 continue; 1414 } 1415 ++I; 1416 } 1417 } 1418 for (unsigned i = 0; Available.empty(); ++i) { 1419 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 1420 "permanent hazard"); (void)i; 1421 bumpCycle(); 1422 releasePending(); 1423 } 1424 if (Available.size() == 1) 1425 return *Available.begin(); 1426 return NULL; 1427 } 1428 1429 /// Record the candidate policy for opposite zones with different critical 1430 /// resources. 1431 /// 1432 /// If the CriticalZone is latency limited, don't force a policy for the 1433 /// candidates here. Instead, When releasing each candidate, releaseNode 1434 /// compares the region's critical path to the candidate's height or depth and 1435 /// the scheduled zone's expected latency then sets ShouldIncreaseILP. 1436 void ConvergingScheduler::balanceZones( 1437 ConvergingScheduler::SchedBoundary &CriticalZone, 1438 ConvergingScheduler::SchedCandidate &CriticalCand, 1439 ConvergingScheduler::SchedBoundary &OppositeZone, 1440 ConvergingScheduler::SchedCandidate &OppositeCand) { 1441 1442 if (!CriticalZone.IsResourceLimited) 1443 return; 1444 1445 SchedRemainder *Rem = CriticalZone.Rem; 1446 1447 // If the critical zone is overconsuming a resource relative to the 1448 // remainder, try to reduce it. 1449 unsigned RemainingCritCount = 1450 Rem->RemainingCounts[CriticalZone.CritResIdx]; 1451 if ((int)(Rem->MaxRemainingCount - RemainingCritCount) 1452 > (int)SchedModel->getLatencyFactor()) { 1453 CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; 1454 DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " 1455 << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name 1456 << '\n'); 1457 } 1458 // If the other zone is underconsuming a resource relative to the full zone, 1459 // try to increase it. 1460 unsigned OppositeCount = 1461 OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; 1462 if ((int)(OppositeZone.ExpectedCount - OppositeCount) 1463 > (int)SchedModel->getLatencyFactor()) { 1464 OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; 1465 DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " 1466 << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name 1467 << '\n'); 1468 } 1469 } 1470 1471 /// Determine if the scheduled zones exceed resource limits or critical path and 1472 /// set each candidate's ReduceHeight policy accordingly. 1473 void ConvergingScheduler::checkResourceLimits( 1474 ConvergingScheduler::SchedCandidate &TopCand, 1475 ConvergingScheduler::SchedCandidate &BotCand) { 1476 1477 Bot.checkILPPolicy(); 1478 Top.checkILPPolicy(); 1479 if (Bot.ShouldIncreaseILP) 1480 BotCand.Policy.ReduceLatency = true; 1481 if (Top.ShouldIncreaseILP) 1482 TopCand.Policy.ReduceLatency = true; 1483 1484 // Handle resource-limited regions. 1485 if (Top.IsResourceLimited && Bot.IsResourceLimited 1486 && Top.CritResIdx == Bot.CritResIdx) { 1487 // If the scheduled critical resource in both zones is no longer the 1488 // critical remaining resource, attempt to reduce resource height both ways. 1489 if (Top.CritResIdx != Rem.CritResIdx) { 1490 TopCand.Policy.ReduceResIdx = Top.CritResIdx; 1491 BotCand.Policy.ReduceResIdx = Bot.CritResIdx; 1492 DEBUG(dbgs() << "Reduce scheduled " 1493 << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); 1494 } 1495 return; 1496 } 1497 // Handle latency-limited regions. 1498 if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { 1499 // If the total scheduled expected latency exceeds the region's critical 1500 // path then reduce latency both ways. 1501 // 1502 // Just because a zone is not resource limited does not mean it is latency 1503 // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle 1504 // to exceed expected latency. 1505 if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) 1506 && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { 1507 TopCand.Policy.ReduceLatency = true; 1508 BotCand.Policy.ReduceLatency = true; 1509 DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency 1510 << " + " << Bot.ExpectedLatency << '\n'); 1511 } 1512 return; 1513 } 1514 // The critical resource is different in each zone, so request balancing. 1515 1516 // Compute the cost of each zone. 1517 Rem.MaxRemainingCount = std::max( 1518 Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), 1519 Rem.RemainingCounts[Rem.CritResIdx]); 1520 Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); 1521 Top.ExpectedCount = std::max( 1522 Top.getCriticalCount(), 1523 Top.ExpectedCount * SchedModel->getLatencyFactor()); 1524 Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); 1525 Bot.ExpectedCount = std::max( 1526 Bot.getCriticalCount(), 1527 Bot.ExpectedCount * SchedModel->getLatencyFactor()); 1528 1529 balanceZones(Top, TopCand, Bot, BotCand); 1530 balanceZones(Bot, BotCand, Top, TopCand); 1531 } 1532 1533 void ConvergingScheduler::SchedCandidate:: 1534 initResourceDelta(const ScheduleDAGMI *DAG, 1535 const TargetSchedModel *SchedModel) { 1536 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 1537 return; 1538 1539 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1540 for (TargetSchedModel::ProcResIter 1541 PI = SchedModel->getWriteProcResBegin(SC), 1542 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1543 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 1544 ResDelta.CritResources += PI->Cycles; 1545 if (PI->ProcResourceIdx == Policy.DemandResIdx) 1546 ResDelta.DemandedResources += PI->Cycles; 1547 } 1548 } 1549 1550 /// Return true if this heuristic determines order. 1551 static bool tryLess(unsigned TryVal, unsigned CandVal, 1552 ConvergingScheduler::SchedCandidate &TryCand, 1553 ConvergingScheduler::SchedCandidate &Cand, 1554 ConvergingScheduler::CandReason Reason) { 1555 if (TryVal < CandVal) { 1556 TryCand.Reason = Reason; 1557 return true; 1558 } 1559 if (TryVal > CandVal) { 1560 if (Cand.Reason > Reason) 1561 Cand.Reason = Reason; 1562 return true; 1563 } 1564 return false; 1565 } 1566 1567 static bool tryGreater(unsigned TryVal, unsigned CandVal, 1568 ConvergingScheduler::SchedCandidate &TryCand, 1569 ConvergingScheduler::SchedCandidate &Cand, 1570 ConvergingScheduler::CandReason Reason) { 1571 if (TryVal > CandVal) { 1572 TryCand.Reason = Reason; 1573 return true; 1574 } 1575 if (TryVal < CandVal) { 1576 if (Cand.Reason > Reason) 1577 Cand.Reason = Reason; 1578 return true; 1579 } 1580 return false; 1581 } 1582 1583 static unsigned getWeakLeft(const SUnit *SU, bool isTop) { 1584 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 1585 } 1586 1587 /// Apply a set of heursitics to a new candidate. Heuristics are currently 1588 /// hierarchical. This may be more efficient than a graduated cost model because 1589 /// we don't need to evaluate all aspects of the model for each node in the 1590 /// queue. But it's really done to make the heuristics easier to debug and 1591 /// statistically analyze. 1592 /// 1593 /// \param Cand provides the policy and current best candidate. 1594 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 1595 /// \param Zone describes the scheduled zone that we are extending. 1596 /// \param RPTracker describes reg pressure within the scheduled zone. 1597 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 1598 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, 1599 SchedCandidate &TryCand, 1600 SchedBoundary &Zone, 1601 const RegPressureTracker &RPTracker, 1602 RegPressureTracker &TempTracker) { 1603 1604 // Always initialize TryCand's RPDelta. 1605 TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, 1606 DAG->getRegionCriticalPSets(), 1607 DAG->getRegPressure().MaxSetPressure); 1608 1609 // Initialize the candidate if needed. 1610 if (!Cand.isValid()) { 1611 TryCand.Reason = NodeOrder; 1612 return; 1613 } 1614 // Avoid exceeding the target's limit. 1615 if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, 1616 Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) 1617 return; 1618 if (Cand.Reason == SingleExcess) 1619 Cand.Reason = MultiPressure; 1620 1621 // Avoid increasing the max critical pressure in the scheduled region. 1622 if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, 1623 Cand.RPDelta.CriticalMax.UnitIncrease, 1624 TryCand, Cand, SingleCritical)) 1625 return; 1626 if (Cand.Reason == SingleCritical) 1627 Cand.Reason = MultiPressure; 1628 1629 // Keep clustered nodes together to encourage downstream peephole 1630 // optimizations which may reduce resource requirements. 1631 // 1632 // This is a best effort to set things up for a post-RA pass. Optimizations 1633 // like generating loads of multiple registers should ideally be done within 1634 // the scheduler pass by combining the loads during DAG postprocessing. 1635 const SUnit *NextClusterSU = 1636 Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 1637 if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, 1638 TryCand, Cand, Cluster)) 1639 return; 1640 // Currently, weak edges are for clustering, so we hard-code that reason. 1641 // However, deferring the current TryCand will not change Cand's reason. 1642 CandReason OrigReason = Cand.Reason; 1643 if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), 1644 getWeakLeft(Cand.SU, Zone.isTop()), 1645 TryCand, Cand, Cluster)) { 1646 Cand.Reason = OrigReason; 1647 return; 1648 } 1649 // Avoid critical resource consumption and balance the schedule. 1650 TryCand.initResourceDelta(DAG, SchedModel); 1651 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 1652 TryCand, Cand, ResourceReduce)) 1653 return; 1654 if (tryGreater(TryCand.ResDelta.DemandedResources, 1655 Cand.ResDelta.DemandedResources, 1656 TryCand, Cand, ResourceDemand)) 1657 return; 1658 1659 // Avoid serializing long latency dependence chains. 1660 if (Cand.Policy.ReduceLatency) { 1661 if (Zone.isTop()) { 1662 if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() 1663 > Zone.ExpectedCount) { 1664 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1665 TryCand, Cand, TopDepthReduce)) 1666 return; 1667 } 1668 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1669 TryCand, Cand, TopPathReduce)) 1670 return; 1671 } 1672 else { 1673 if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() 1674 > Zone.ExpectedCount) { 1675 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1676 TryCand, Cand, BotHeightReduce)) 1677 return; 1678 } 1679 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1680 TryCand, Cand, BotPathReduce)) 1681 return; 1682 } 1683 } 1684 1685 // Avoid increasing the max pressure of the entire region. 1686 if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, 1687 Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) 1688 return; 1689 if (Cand.Reason == SingleMax) 1690 Cand.Reason = MultiPressure; 1691 1692 // Prefer immediate defs/users of the last scheduled instruction. This is a 1693 // nice pressure avoidance strategy that also conserves the processor's 1694 // register renaming resources and keeps the machine code readable. 1695 if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), 1696 TryCand, Cand, NextDefUse)) 1697 return; 1698 1699 // Fall through to original instruction order. 1700 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 1701 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 1702 TryCand.Reason = NodeOrder; 1703 } 1704 } 1705 1706 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is 1707 /// more desirable than RHS from scheduling standpoint. 1708 static bool compareRPDelta(const RegPressureDelta &LHS, 1709 const RegPressureDelta &RHS) { 1710 // Compare each component of pressure in decreasing order of importance 1711 // without checking if any are valid. Invalid PressureElements are assumed to 1712 // have UnitIncrease==0, so are neutral. 1713 1714 // Avoid increasing the max critical pressure in the scheduled region. 1715 if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { 1716 DEBUG(dbgs() << "RP excess top - bot: " 1717 << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); 1718 return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; 1719 } 1720 // Avoid increasing the max critical pressure in the scheduled region. 1721 if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { 1722 DEBUG(dbgs() << "RP critical top - bot: " 1723 << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) 1724 << '\n'); 1725 return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; 1726 } 1727 // Avoid increasing the max pressure of the entire region. 1728 if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { 1729 DEBUG(dbgs() << "RP current top - bot: " 1730 << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) 1731 << '\n'); 1732 return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; 1733 } 1734 return false; 1735 } 1736 1737 #ifndef NDEBUG 1738 const char *ConvergingScheduler::getReasonStr( 1739 ConvergingScheduler::CandReason Reason) { 1740 switch (Reason) { 1741 case NoCand: return "NOCAND "; 1742 case SingleExcess: return "REG-EXCESS"; 1743 case SingleCritical: return "REG-CRIT "; 1744 case Cluster: return "CLUSTER "; 1745 case SingleMax: return "REG-MAX "; 1746 case MultiPressure: return "REG-MULTI "; 1747 case ResourceReduce: return "RES-REDUCE"; 1748 case ResourceDemand: return "RES-DEMAND"; 1749 case TopDepthReduce: return "TOP-DEPTH "; 1750 case TopPathReduce: return "TOP-PATH "; 1751 case BotHeightReduce:return "BOT-HEIGHT"; 1752 case BotPathReduce: return "BOT-PATH "; 1753 case NextDefUse: return "DEF-USE "; 1754 case NodeOrder: return "ORDER "; 1755 }; 1756 llvm_unreachable("Unknown reason!"); 1757 } 1758 1759 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, 1760 const SchedBoundary &Zone) { 1761 const char *Label = getReasonStr(Cand.Reason); 1762 PressureElement P; 1763 unsigned ResIdx = 0; 1764 unsigned Latency = 0; 1765 switch (Cand.Reason) { 1766 default: 1767 break; 1768 case SingleExcess: 1769 P = Cand.RPDelta.Excess; 1770 break; 1771 case SingleCritical: 1772 P = Cand.RPDelta.CriticalMax; 1773 break; 1774 case SingleMax: 1775 P = Cand.RPDelta.CurrentMax; 1776 break; 1777 case ResourceReduce: 1778 ResIdx = Cand.Policy.ReduceResIdx; 1779 break; 1780 case ResourceDemand: 1781 ResIdx = Cand.Policy.DemandResIdx; 1782 break; 1783 case TopDepthReduce: 1784 Latency = Cand.SU->getDepth(); 1785 break; 1786 case TopPathReduce: 1787 Latency = Cand.SU->getHeight(); 1788 break; 1789 case BotHeightReduce: 1790 Latency = Cand.SU->getHeight(); 1791 break; 1792 case BotPathReduce: 1793 Latency = Cand.SU->getDepth(); 1794 break; 1795 } 1796 dbgs() << Label << " " << Zone.Available.getName() << " "; 1797 if (P.isValid()) 1798 dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease 1799 << " "; 1800 else 1801 dbgs() << " "; 1802 if (ResIdx) 1803 dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; 1804 else 1805 dbgs() << " "; 1806 if (Latency) 1807 dbgs() << Latency << " cycles "; 1808 else 1809 dbgs() << " "; 1810 Cand.SU->dump(DAG); 1811 } 1812 #endif 1813 1814 /// Pick the best candidate from the top queue. 1815 /// 1816 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 1817 /// DAG building. To adjust for the current scheduling location we need to 1818 /// maintain the number of vreg uses remaining to be top-scheduled. 1819 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, 1820 const RegPressureTracker &RPTracker, 1821 SchedCandidate &Cand) { 1822 ReadyQueue &Q = Zone.Available; 1823 1824 DEBUG(Q.dump()); 1825 1826 // getMaxPressureDelta temporarily modifies the tracker. 1827 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1828 1829 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 1830 1831 SchedCandidate TryCand(Cand.Policy); 1832 TryCand.SU = *I; 1833 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 1834 if (TryCand.Reason != NoCand) { 1835 // Initialize resource delta if needed in case future heuristics query it. 1836 if (TryCand.ResDelta == SchedResourceDelta()) 1837 TryCand.initResourceDelta(DAG, SchedModel); 1838 Cand.setBest(TryCand); 1839 DEBUG(traceCandidate(Cand, Zone)); 1840 } 1841 TryCand.SU = *I; 1842 } 1843 } 1844 1845 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, 1846 bool IsTop) { 1847 DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") 1848 << " SU(" << Cand.SU->NodeNum << ") " 1849 << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); 1850 } 1851 1852 /// Pick the best candidate node from either the top or bottom queue. 1853 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { 1854 // Schedule as far as possible in the direction of no choice. This is most 1855 // efficient, but also provides the best heuristics for CriticalPSets. 1856 if (SUnit *SU = Bot.pickOnlyChoice()) { 1857 IsTopNode = false; 1858 return SU; 1859 } 1860 if (SUnit *SU = Top.pickOnlyChoice()) { 1861 IsTopNode = true; 1862 return SU; 1863 } 1864 CandPolicy NoPolicy; 1865 SchedCandidate BotCand(NoPolicy); 1866 SchedCandidate TopCand(NoPolicy); 1867 checkResourceLimits(TopCand, BotCand); 1868 1869 // Prefer bottom scheduling when heuristics are silent. 1870 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1871 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1872 1873 // If either Q has a single candidate that provides the least increase in 1874 // Excess pressure, we can immediately schedule from that Q. 1875 // 1876 // RegionCriticalPSets summarizes the pressure within the scheduled region and 1877 // affects picking from either Q. If scheduling in one direction must 1878 // increase pressure for one of the excess PSets, then schedule in that 1879 // direction first to provide more freedom in the other direction. 1880 if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { 1881 IsTopNode = false; 1882 tracePick(BotCand, IsTopNode); 1883 return BotCand.SU; 1884 } 1885 // Check if the top Q has a better candidate. 1886 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1887 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1888 1889 // If either Q has a single candidate that minimizes pressure above the 1890 // original region's pressure pick it. 1891 if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { 1892 if (TopCand.Reason < BotCand.Reason) { 1893 IsTopNode = true; 1894 tracePick(TopCand, IsTopNode); 1895 return TopCand.SU; 1896 } 1897 IsTopNode = false; 1898 tracePick(BotCand, IsTopNode); 1899 return BotCand.SU; 1900 } 1901 // Check for a salient pressure difference and pick the best from either side. 1902 if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { 1903 IsTopNode = true; 1904 tracePick(TopCand, IsTopNode); 1905 return TopCand.SU; 1906 } 1907 // Otherwise prefer the bottom candidate, in node order if all else failed. 1908 if (TopCand.Reason < BotCand.Reason) { 1909 IsTopNode = true; 1910 tracePick(TopCand, IsTopNode); 1911 return TopCand.SU; 1912 } 1913 IsTopNode = false; 1914 tracePick(BotCand, IsTopNode); 1915 return BotCand.SU; 1916 } 1917 1918 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 1919 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { 1920 if (DAG->top() == DAG->bottom()) { 1921 assert(Top.Available.empty() && Top.Pending.empty() && 1922 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 1923 return NULL; 1924 } 1925 SUnit *SU; 1926 do { 1927 if (ForceTopDown) { 1928 SU = Top.pickOnlyChoice(); 1929 if (!SU) { 1930 CandPolicy NoPolicy; 1931 SchedCandidate TopCand(NoPolicy); 1932 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1933 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1934 SU = TopCand.SU; 1935 } 1936 IsTopNode = true; 1937 } 1938 else if (ForceBottomUp) { 1939 SU = Bot.pickOnlyChoice(); 1940 if (!SU) { 1941 CandPolicy NoPolicy; 1942 SchedCandidate BotCand(NoPolicy); 1943 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1944 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1945 SU = BotCand.SU; 1946 } 1947 IsTopNode = false; 1948 } 1949 else { 1950 SU = pickNodeBidirectional(IsTopNode); 1951 } 1952 } while (SU->isScheduled); 1953 1954 if (SU->isTopReady()) 1955 Top.removeReady(SU); 1956 if (SU->isBottomReady()) 1957 Bot.removeReady(SU); 1958 1959 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 1960 << " Scheduling Instruction in cycle " 1961 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; 1962 SU->dump(DAG)); 1963 return SU; 1964 } 1965 1966 /// Update the scheduler's state after scheduling a node. This is the same node 1967 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update 1968 /// it's state based on the current cycle before MachineSchedStrategy does. 1969 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { 1970 if (IsTopNode) { 1971 SU->TopReadyCycle = Top.CurrCycle; 1972 Top.bumpNode(SU); 1973 } 1974 else { 1975 SU->BotReadyCycle = Bot.CurrCycle; 1976 Bot.bumpNode(SU); 1977 } 1978 } 1979 1980 /// Create the standard converging machine scheduler. This will be used as the 1981 /// default scheduler if the target does not set a default. 1982 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { 1983 assert((!ForceTopDown || !ForceBottomUp) && 1984 "-misched-topdown incompatible with -misched-bottomup"); 1985 ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); 1986 // Register DAG post-processors. 1987 if (EnableLoadCluster) 1988 DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); 1989 return DAG; 1990 } 1991 static MachineSchedRegistry 1992 ConvergingSchedRegistry("converge", "Standard converging scheduler.", 1993 createConvergingSched); 1994 1995 //===----------------------------------------------------------------------===// 1996 // ILP Scheduler. Currently for experimental analysis of heuristics. 1997 //===----------------------------------------------------------------------===// 1998 1999 namespace { 2000 /// \brief Order nodes by the ILP metric. 2001 struct ILPOrder { 2002 ScheduleDAGILP *ILP; 2003 bool MaximizeILP; 2004 2005 ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} 2006 2007 /// \brief Apply a less-than relation on node priority. 2008 bool operator()(const SUnit *A, const SUnit *B) const { 2009 // Return true if A comes after B in the Q. 2010 if (MaximizeILP) 2011 return ILP->getILP(A) < ILP->getILP(B); 2012 else 2013 return ILP->getILP(A) > ILP->getILP(B); 2014 } 2015 }; 2016 2017 /// \brief Schedule based on the ILP metric. 2018 class ILPScheduler : public MachineSchedStrategy { 2019 ScheduleDAGILP ILP; 2020 ILPOrder Cmp; 2021 2022 std::vector<SUnit*> ReadyQ; 2023 public: 2024 ILPScheduler(bool MaximizeILP) 2025 : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} 2026 2027 virtual void initialize(ScheduleDAGMI *DAG) { 2028 ReadyQ.clear(); 2029 ILP.resize(DAG->SUnits.size()); 2030 } 2031 2032 virtual void registerRoots() { 2033 for (std::vector<SUnit*>::const_iterator 2034 I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { 2035 ILP.computeILP(*I); 2036 } 2037 } 2038 2039 /// Implement MachineSchedStrategy interface. 2040 /// ----------------------------------------- 2041 2042 virtual SUnit *pickNode(bool &IsTopNode) { 2043 if (ReadyQ.empty()) return NULL; 2044 pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2045 SUnit *SU = ReadyQ.back(); 2046 ReadyQ.pop_back(); 2047 IsTopNode = false; 2048 DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() 2049 << " ILP: " << ILP.getILP(SU) << '\n'); 2050 return SU; 2051 } 2052 2053 virtual void schedNode(SUnit *, bool) {} 2054 2055 virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } 2056 2057 virtual void releaseBottomNode(SUnit *SU) { 2058 ReadyQ.push_back(SU); 2059 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2060 } 2061 }; 2062 } // namespace 2063 2064 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 2065 return new ScheduleDAGMI(C, new ILPScheduler(true)); 2066 } 2067 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 2068 return new ScheduleDAGMI(C, new ILPScheduler(false)); 2069 } 2070 static MachineSchedRegistry ILPMaxRegistry( 2071 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 2072 static MachineSchedRegistry ILPMinRegistry( 2073 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 2074 2075 //===----------------------------------------------------------------------===// 2076 // Machine Instruction Shuffler for Correctness Testing 2077 //===----------------------------------------------------------------------===// 2078 2079 #ifndef NDEBUG 2080 namespace { 2081 /// Apply a less-than relation on the node order, which corresponds to the 2082 /// instruction order prior to scheduling. IsReverse implements greater-than. 2083 template<bool IsReverse> 2084 struct SUnitOrder { 2085 bool operator()(SUnit *A, SUnit *B) const { 2086 if (IsReverse) 2087 return A->NodeNum > B->NodeNum; 2088 else 2089 return A->NodeNum < B->NodeNum; 2090 } 2091 }; 2092 2093 /// Reorder instructions as much as possible. 2094 class InstructionShuffler : public MachineSchedStrategy { 2095 bool IsAlternating; 2096 bool IsTopDown; 2097 2098 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 2099 // gives nodes with a higher number higher priority causing the latest 2100 // instructions to be scheduled first. 2101 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 2102 TopQ; 2103 // When scheduling bottom-up, use greater-than as the queue priority. 2104 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 2105 BottomQ; 2106 public: 2107 InstructionShuffler(bool alternate, bool topdown) 2108 : IsAlternating(alternate), IsTopDown(topdown) {} 2109 2110 virtual void initialize(ScheduleDAGMI *) { 2111 TopQ.clear(); 2112 BottomQ.clear(); 2113 } 2114 2115 /// Implement MachineSchedStrategy interface. 2116 /// ----------------------------------------- 2117 2118 virtual SUnit *pickNode(bool &IsTopNode) { 2119 SUnit *SU; 2120 if (IsTopDown) { 2121 do { 2122 if (TopQ.empty()) return NULL; 2123 SU = TopQ.top(); 2124 TopQ.pop(); 2125 } while (SU->isScheduled); 2126 IsTopNode = true; 2127 } 2128 else { 2129 do { 2130 if (BottomQ.empty()) return NULL; 2131 SU = BottomQ.top(); 2132 BottomQ.pop(); 2133 } while (SU->isScheduled); 2134 IsTopNode = false; 2135 } 2136 if (IsAlternating) 2137 IsTopDown = !IsTopDown; 2138 return SU; 2139 } 2140 2141 virtual void schedNode(SUnit *SU, bool IsTopNode) {} 2142 2143 virtual void releaseTopNode(SUnit *SU) { 2144 TopQ.push(SU); 2145 } 2146 virtual void releaseBottomNode(SUnit *SU) { 2147 BottomQ.push(SU); 2148 } 2149 }; 2150 } // namespace 2151 2152 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 2153 bool Alternate = !ForceTopDown && !ForceBottomUp; 2154 bool TopDown = !ForceBottomUp; 2155 assert((TopDown || !ForceTopDown) && 2156 "-misched-topdown incompatible with -misched-bottomup"); 2157 return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); 2158 } 2159 static MachineSchedRegistry ShufflerRegistry( 2160 "shuffle", "Shuffle machine instructions alternating directions", 2161 createInstructionShuffler); 2162 #endif // !NDEBUG 2163