1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "misched" 16 17 #include "llvm/CodeGen/MachineScheduler.h" 18 #include "llvm/ADT/OwningPtr.h" 19 #include "llvm/ADT/PriorityQueue.h" 20 #include "llvm/Analysis/AliasAnalysis.h" 21 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 22 #include "llvm/CodeGen/Passes.h" 23 #include "llvm/CodeGen/RegisterClassInfo.h" 24 #include "llvm/CodeGen/ScheduleDFS.h" 25 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <queue> 31 32 using namespace llvm; 33 34 namespace llvm { 35 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 36 cl::desc("Force top-down list scheduling")); 37 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 38 cl::desc("Force bottom-up list scheduling")); 39 } 40 41 #ifndef NDEBUG 42 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 43 cl::desc("Pop up a window to show MISched dags after they are processed")); 44 45 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 46 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 47 #else 48 static bool ViewMISchedDAGs = false; 49 #endif // NDEBUG 50 51 // Experimental heuristics 52 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, 53 cl::desc("Enable load clustering."), cl::init(true)); 54 55 // Experimental heuristics 56 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, 57 cl::desc("Enable scheduling for macro fusion."), cl::init(true)); 58 59 // DAG subtrees must have at least this many nodes. 60 static const unsigned MinSubtreeSize = 8; 61 62 //===----------------------------------------------------------------------===// 63 // Machine Instruction Scheduling Pass and Registry 64 //===----------------------------------------------------------------------===// 65 66 MachineSchedContext::MachineSchedContext(): 67 MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { 68 RegClassInfo = new RegisterClassInfo(); 69 } 70 71 MachineSchedContext::~MachineSchedContext() { 72 delete RegClassInfo; 73 } 74 75 namespace { 76 /// MachineScheduler runs after coalescing and before register allocation. 77 class MachineScheduler : public MachineSchedContext, 78 public MachineFunctionPass { 79 public: 80 MachineScheduler(); 81 82 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 83 84 virtual void releaseMemory() {} 85 86 virtual bool runOnMachineFunction(MachineFunction&); 87 88 virtual void print(raw_ostream &O, const Module* = 0) const; 89 90 static char ID; // Class identification, replacement for typeinfo 91 }; 92 } // namespace 93 94 char MachineScheduler::ID = 0; 95 96 char &llvm::MachineSchedulerID = MachineScheduler::ID; 97 98 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 99 "Machine Instruction Scheduler", false, false) 100 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 101 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 102 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 103 INITIALIZE_PASS_END(MachineScheduler, "misched", 104 "Machine Instruction Scheduler", false, false) 105 106 MachineScheduler::MachineScheduler() 107 : MachineFunctionPass(ID) { 108 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 109 } 110 111 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 112 AU.setPreservesCFG(); 113 AU.addRequiredID(MachineDominatorsID); 114 AU.addRequired<MachineLoopInfo>(); 115 AU.addRequired<AliasAnalysis>(); 116 AU.addRequired<TargetPassConfig>(); 117 AU.addRequired<SlotIndexes>(); 118 AU.addPreserved<SlotIndexes>(); 119 AU.addRequired<LiveIntervals>(); 120 AU.addPreserved<LiveIntervals>(); 121 MachineFunctionPass::getAnalysisUsage(AU); 122 } 123 124 MachinePassRegistry MachineSchedRegistry::Registry; 125 126 /// A dummy default scheduler factory indicates whether the scheduler 127 /// is overridden on the command line. 128 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 129 return 0; 130 } 131 132 /// MachineSchedOpt allows command line selection of the scheduler. 133 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 134 RegisterPassParser<MachineSchedRegistry> > 135 MachineSchedOpt("misched", 136 cl::init(&useDefaultMachineSched), cl::Hidden, 137 cl::desc("Machine instruction scheduler to use")); 138 139 static MachineSchedRegistry 140 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 141 useDefaultMachineSched); 142 143 /// Forward declare the standard machine scheduler. This will be used as the 144 /// default scheduler if the target does not set a default. 145 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); 146 147 148 /// Decrement this iterator until reaching the top or a non-debug instr. 149 static MachineBasicBlock::iterator 150 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { 151 assert(I != Beg && "reached the top of the region, cannot decrement"); 152 while (--I != Beg) { 153 if (!I->isDebugValue()) 154 break; 155 } 156 return I; 157 } 158 159 /// If this iterator is a debug value, increment until reaching the End or a 160 /// non-debug instruction. 161 static MachineBasicBlock::iterator 162 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { 163 for(; I != End; ++I) { 164 if (!I->isDebugValue()) 165 break; 166 } 167 return I; 168 } 169 170 /// Top-level MachineScheduler pass driver. 171 /// 172 /// Visit blocks in function order. Divide each block into scheduling regions 173 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 174 /// consistent with the DAG builder, which traverses the interior of the 175 /// scheduling regions bottom-up. 176 /// 177 /// This design avoids exposing scheduling boundaries to the DAG builder, 178 /// simplifying the DAG builder's support for "special" target instructions. 179 /// At the same time the design allows target schedulers to operate across 180 /// scheduling boundaries, for example to bundle the boudary instructions 181 /// without reordering them. This creates complexity, because the target 182 /// scheduler must update the RegionBegin and RegionEnd positions cached by 183 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 184 /// design would be to split blocks at scheduling boundaries, but LLVM has a 185 /// general bias against block splitting purely for implementation simplicity. 186 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 187 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 188 189 // Initialize the context of the pass. 190 MF = &mf; 191 MLI = &getAnalysis<MachineLoopInfo>(); 192 MDT = &getAnalysis<MachineDominatorTree>(); 193 PassConfig = &getAnalysis<TargetPassConfig>(); 194 AA = &getAnalysis<AliasAnalysis>(); 195 196 LIS = &getAnalysis<LiveIntervals>(); 197 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 198 199 RegClassInfo->runOnMachineFunction(*MF); 200 201 // Select the scheduler, or set the default. 202 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 203 if (Ctor == useDefaultMachineSched) { 204 // Get the default scheduler set by the target. 205 Ctor = MachineSchedRegistry::getDefault(); 206 if (!Ctor) { 207 Ctor = createConvergingSched; 208 MachineSchedRegistry::setDefault(Ctor); 209 } 210 } 211 // Instantiate the selected scheduler. 212 OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); 213 214 // Visit all machine basic blocks. 215 // 216 // TODO: Visit blocks in global postorder or postorder within the bottom-up 217 // loop tree. Then we can optionally compute global RegPressure. 218 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 219 MBB != MBBEnd; ++MBB) { 220 221 Scheduler->startBlock(MBB); 222 223 // Break the block into scheduling regions [I, RegionEnd), and schedule each 224 // region as soon as it is discovered. RegionEnd points the scheduling 225 // boundary at the bottom of the region. The DAG does not include RegionEnd, 226 // but the region does (i.e. the next RegionEnd is above the previous 227 // RegionBegin). If the current block has no terminator then RegionEnd == 228 // MBB->end() for the bottom region. 229 // 230 // The Scheduler may insert instructions during either schedule() or 231 // exitRegion(), even for empty regions. So the local iterators 'I' and 232 // 'RegionEnd' are invalid across these calls. 233 unsigned RemainingInstrs = MBB->size(); 234 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 235 RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { 236 237 // Avoid decrementing RegionEnd for blocks with no terminator. 238 if (RegionEnd != MBB->end() 239 || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { 240 --RegionEnd; 241 // Count the boundary instruction. 242 --RemainingInstrs; 243 } 244 245 // The next region starts above the previous region. Look backward in the 246 // instruction stream until we find the nearest boundary. 247 MachineBasicBlock::iterator I = RegionEnd; 248 for(;I != MBB->begin(); --I, --RemainingInstrs) { 249 if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) 250 break; 251 } 252 // Notify the scheduler of the region, even if we may skip scheduling 253 // it. Perhaps it still needs to be bundled. 254 Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); 255 256 // Skip empty scheduling regions (0 or 1 schedulable instructions). 257 if (I == RegionEnd || I == llvm::prior(RegionEnd)) { 258 // Close the current region. Bundle the terminator if needed. 259 // This invalidates 'RegionEnd' and 'I'. 260 Scheduler->exitRegion(); 261 continue; 262 } 263 DEBUG(dbgs() << "********** MI Scheduling **********\n"); 264 DEBUG(dbgs() << MF->getName() 265 << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; 266 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 267 else dbgs() << "End"; 268 dbgs() << " Remaining: " << RemainingInstrs << "\n"); 269 270 // Schedule a region: possibly reorder instructions. 271 // This invalidates 'RegionEnd' and 'I'. 272 Scheduler->schedule(); 273 274 // Close the current region. 275 Scheduler->exitRegion(); 276 277 // Scheduling has invalidated the current iterator 'I'. Ask the 278 // scheduler for the top of it's scheduled region. 279 RegionEnd = Scheduler->begin(); 280 } 281 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 282 Scheduler->finishBlock(); 283 } 284 Scheduler->finalizeSchedule(); 285 DEBUG(LIS->print(dbgs())); 286 return true; 287 } 288 289 void MachineScheduler::print(raw_ostream &O, const Module* m) const { 290 // unimplemented 291 } 292 293 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 294 void ReadyQueue::dump() { 295 dbgs() << Name << ": "; 296 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 297 dbgs() << Queue[i]->NodeNum << " "; 298 dbgs() << "\n"; 299 } 300 #endif 301 302 //===----------------------------------------------------------------------===// 303 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals 304 // preservation. 305 //===----------------------------------------------------------------------===// 306 307 ScheduleDAGMI::~ScheduleDAGMI() { 308 delete DFSResult; 309 DeleteContainerPointers(Mutations); 310 delete SchedImpl; 311 } 312 313 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { 314 if (SuccSU != &ExitSU) { 315 // Do not use WillCreateCycle, it assumes SD scheduling. 316 // If Pred is reachable from Succ, then the edge creates a cycle. 317 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 318 return false; 319 Topo.AddPred(SuccSU, PredDep.getSUnit()); 320 } 321 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 322 // Return true regardless of whether a new edge needed to be inserted. 323 return true; 324 } 325 326 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 327 /// NumPredsLeft reaches zero, release the successor node. 328 /// 329 /// FIXME: Adjust SuccSU height based on MinLatency. 330 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 331 SUnit *SuccSU = SuccEdge->getSUnit(); 332 333 if (SuccEdge->isWeak()) { 334 --SuccSU->WeakPredsLeft; 335 if (SuccEdge->isCluster()) 336 NextClusterSucc = SuccSU; 337 return; 338 } 339 #ifndef NDEBUG 340 if (SuccSU->NumPredsLeft == 0) { 341 dbgs() << "*** Scheduling failed! ***\n"; 342 SuccSU->dump(this); 343 dbgs() << " has been released too many times!\n"; 344 llvm_unreachable(0); 345 } 346 #endif 347 --SuccSU->NumPredsLeft; 348 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 349 SchedImpl->releaseTopNode(SuccSU); 350 } 351 352 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 353 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 354 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 355 I != E; ++I) { 356 releaseSucc(SU, &*I); 357 } 358 } 359 360 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 361 /// NumSuccsLeft reaches zero, release the predecessor node. 362 /// 363 /// FIXME: Adjust PredSU height based on MinLatency. 364 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 365 SUnit *PredSU = PredEdge->getSUnit(); 366 367 if (PredEdge->isWeak()) { 368 --PredSU->WeakSuccsLeft; 369 if (PredEdge->isCluster()) 370 NextClusterPred = PredSU; 371 return; 372 } 373 #ifndef NDEBUG 374 if (PredSU->NumSuccsLeft == 0) { 375 dbgs() << "*** Scheduling failed! ***\n"; 376 PredSU->dump(this); 377 dbgs() << " has been released too many times!\n"; 378 llvm_unreachable(0); 379 } 380 #endif 381 --PredSU->NumSuccsLeft; 382 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 383 SchedImpl->releaseBottomNode(PredSU); 384 } 385 386 /// releasePredecessors - Call releasePred on each of SU's predecessors. 387 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 388 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 389 I != E; ++I) { 390 releasePred(SU, &*I); 391 } 392 } 393 394 void ScheduleDAGMI::moveInstruction(MachineInstr *MI, 395 MachineBasicBlock::iterator InsertPos) { 396 // Advance RegionBegin if the first instruction moves down. 397 if (&*RegionBegin == MI) 398 ++RegionBegin; 399 400 // Update the instruction stream. 401 BB->splice(InsertPos, BB, MI); 402 403 // Update LiveIntervals 404 LIS->handleMove(MI, /*UpdateFlags=*/true); 405 406 // Recede RegionBegin if an instruction moves above the first. 407 if (RegionBegin == InsertPos) 408 RegionBegin = MI; 409 } 410 411 bool ScheduleDAGMI::checkSchedLimit() { 412 #ifndef NDEBUG 413 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 414 CurrentTop = CurrentBottom; 415 return false; 416 } 417 ++NumInstrsScheduled; 418 #endif 419 return true; 420 } 421 422 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 423 /// crossing a scheduling boundary. [begin, end) includes all instructions in 424 /// the region, including the boundary itself and single-instruction regions 425 /// that don't get scheduled. 426 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 427 MachineBasicBlock::iterator begin, 428 MachineBasicBlock::iterator end, 429 unsigned endcount) 430 { 431 ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); 432 433 // For convenience remember the end of the liveness region. 434 LiveRegionEnd = 435 (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); 436 } 437 438 // Setup the register pressure trackers for the top scheduled top and bottom 439 // scheduled regions. 440 void ScheduleDAGMI::initRegPressure() { 441 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 442 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 443 444 // Close the RPTracker to finalize live ins. 445 RPTracker.closeRegion(); 446 447 DEBUG(RPTracker.getPressure().dump(TRI)); 448 449 // Initialize the live ins and live outs. 450 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 451 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 452 453 // Close one end of the tracker so we can call 454 // getMaxUpward/DownwardPressureDelta before advancing across any 455 // instructions. This converts currently live regs into live ins/outs. 456 TopRPTracker.closeTop(); 457 BotRPTracker.closeBottom(); 458 459 // Account for liveness generated by the region boundary. 460 if (LiveRegionEnd != RegionEnd) 461 BotRPTracker.recede(); 462 463 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 464 465 // Cache the list of excess pressure sets in this region. This will also track 466 // the max pressure in the scheduled code for these sets. 467 RegionCriticalPSets.clear(); 468 std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; 469 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 470 unsigned Limit = TRI->getRegPressureSetLimit(i); 471 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 472 << "Limit " << Limit 473 << " Actual " << RegionPressure[i] << "\n"); 474 if (RegionPressure[i] > Limit) 475 RegionCriticalPSets.push_back(PressureElement(i, 0)); 476 } 477 DEBUG(dbgs() << "Excess PSets: "; 478 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 479 dbgs() << TRI->getRegPressureSetName( 480 RegionCriticalPSets[i].PSetID) << " "; 481 dbgs() << "\n"); 482 } 483 484 // FIXME: When the pressure tracker deals in pressure differences then we won't 485 // iterate over all RegionCriticalPSets[i]. 486 void ScheduleDAGMI:: 487 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { 488 for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { 489 unsigned ID = RegionCriticalPSets[i].PSetID; 490 int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; 491 if ((int)NewMaxPressure[ID] > MaxUnits) 492 MaxUnits = NewMaxPressure[ID]; 493 } 494 } 495 496 /// schedule - Called back from MachineScheduler::runOnMachineFunction 497 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 498 /// only includes instructions that have DAG nodes, not scheduling boundaries. 499 /// 500 /// This is a skeletal driver, with all the functionality pushed into helpers, 501 /// so that it can be easilly extended by experimental schedulers. Generally, 502 /// implementing MachineSchedStrategy should be sufficient to implement a new 503 /// scheduling algorithm. However, if a scheduler further subclasses 504 /// ScheduleDAGMI then it will want to override this virtual method in order to 505 /// update any specialized state. 506 void ScheduleDAGMI::schedule() { 507 buildDAGWithRegPressure(); 508 509 Topo.InitDAGTopologicalSorting(); 510 511 postprocessDAG(); 512 513 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 514 SUnits[su].dumpAll(this)); 515 516 initQueues(); 517 518 bool IsTopNode = false; 519 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 520 assert(!SU->isScheduled && "Node already scheduled"); 521 if (!checkSchedLimit()) 522 break; 523 524 scheduleMI(SU, IsTopNode); 525 526 updateQueues(SU, IsTopNode); 527 } 528 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 529 530 placeDebugValues(); 531 532 DEBUG({ 533 unsigned BBNum = begin()->getParent()->getNumber(); 534 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 535 dumpSchedule(); 536 dbgs() << '\n'; 537 }); 538 } 539 540 /// Build the DAG and setup three register pressure trackers. 541 void ScheduleDAGMI::buildDAGWithRegPressure() { 542 // Initialize the register pressure tracker used by buildSchedGraph. 543 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 544 545 // Account for liveness generate by the region boundary. 546 if (LiveRegionEnd != RegionEnd) 547 RPTracker.recede(); 548 549 // Build the DAG, and compute current register pressure. 550 buildSchedGraph(AA, &RPTracker); 551 if (ViewMISchedDAGs) viewGraph(); 552 553 // Initialize top/bottom trackers after computing region pressure. 554 initRegPressure(); 555 } 556 557 /// Apply each ScheduleDAGMutation step in order. 558 void ScheduleDAGMI::postprocessDAG() { 559 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 560 Mutations[i]->apply(this); 561 } 562 } 563 564 void ScheduleDAGMI::initDFSResult() { 565 if (!DFSResult) 566 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); 567 DFSResult->clear(); 568 DFSResult->resize(SUnits.size()); 569 ScheduledTrees.clear(); 570 } 571 572 void ScheduleDAGMI::computeDFSResult(ArrayRef<SUnit*> Roots) { 573 DFSResult->compute(Roots); 574 ScheduledTrees.resize(DFSResult->getNumSubtrees()); 575 } 576 577 // Release all DAG roots for scheduling. 578 // 579 // Nodes with unreleased weak edges can still be roots. 580 void ScheduleDAGMI::releaseRoots() { 581 SmallVector<SUnit*, 16> BotRoots; 582 583 for (std::vector<SUnit>::iterator 584 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 585 SUnit *SU = &(*I); 586 587 // Order predecessors so DFSResult follows the critical path. 588 SU->biasCriticalPath(); 589 590 // A SUnit is ready to top schedule if it has no predecessors. 591 if (!I->NumPredsLeft && SU != &EntrySU) 592 SchedImpl->releaseTopNode(SU); 593 // A SUnit is ready to bottom schedule if it has no successors. 594 if (!I->NumSuccsLeft && SU != &ExitSU) 595 BotRoots.push_back(SU); 596 } 597 // Release bottom roots in reverse order so the higher priority nodes appear 598 // first. This is more natural and slightly more efficient. 599 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 600 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) 601 SchedImpl->releaseBottomNode(*I); 602 } 603 604 /// Identify DAG roots and setup scheduler queues. 605 void ScheduleDAGMI::initQueues() { 606 NextClusterSucc = NULL; 607 NextClusterPred = NULL; 608 609 // Initialize the strategy before modifying the DAG. 610 SchedImpl->initialize(this); 611 612 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 613 releaseRoots(); 614 615 releaseSuccessors(&EntrySU); 616 releasePredecessors(&ExitSU); 617 618 SchedImpl->registerRoots(); 619 620 // Advance past initial DebugValues. 621 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); 622 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 623 TopRPTracker.setPos(CurrentTop); 624 625 CurrentBottom = RegionEnd; 626 } 627 628 /// Move an instruction and update register pressure. 629 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { 630 // Move the instruction to its new location in the instruction stream. 631 MachineInstr *MI = SU->getInstr(); 632 633 if (IsTopNode) { 634 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 635 if (&*CurrentTop == MI) 636 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 637 else { 638 moveInstruction(MI, CurrentTop); 639 TopRPTracker.setPos(MI); 640 } 641 642 // Update top scheduled pressure. 643 TopRPTracker.advance(); 644 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 645 updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); 646 } 647 else { 648 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 649 MachineBasicBlock::iterator priorII = 650 priorNonDebug(CurrentBottom, CurrentTop); 651 if (&*priorII == MI) 652 CurrentBottom = priorII; 653 else { 654 if (&*CurrentTop == MI) { 655 CurrentTop = nextIfDebug(++CurrentTop, priorII); 656 TopRPTracker.setPos(CurrentTop); 657 } 658 moveInstruction(MI, CurrentBottom); 659 CurrentBottom = MI; 660 } 661 // Update bottom scheduled pressure. 662 BotRPTracker.recede(); 663 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 664 updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); 665 } 666 } 667 668 /// Update scheduler queues after scheduling an instruction. 669 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 670 // Release dependent instructions for scheduling. 671 if (IsTopNode) 672 releaseSuccessors(SU); 673 else 674 releasePredecessors(SU); 675 676 SU->isScheduled = true; 677 678 if (DFSResult) { 679 unsigned SubtreeID = DFSResult->getSubtreeID(SU); 680 if (!ScheduledTrees.test(SubtreeID)) { 681 ScheduledTrees.set(SubtreeID); 682 DFSResult->scheduleTree(SubtreeID); 683 SchedImpl->scheduleTree(SubtreeID); 684 } 685 } 686 687 // Notify the scheduling strategy after updating the DAG. 688 SchedImpl->schedNode(SU, IsTopNode); 689 } 690 691 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 692 void ScheduleDAGMI::placeDebugValues() { 693 // If first instruction was a DBG_VALUE then put it back. 694 if (FirstDbgValue) { 695 BB->splice(RegionBegin, BB, FirstDbgValue); 696 RegionBegin = FirstDbgValue; 697 } 698 699 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 700 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 701 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 702 MachineInstr *DbgValue = P.first; 703 MachineBasicBlock::iterator OrigPrevMI = P.second; 704 if (&*RegionBegin == DbgValue) 705 ++RegionBegin; 706 BB->splice(++OrigPrevMI, BB, DbgValue); 707 if (OrigPrevMI == llvm::prior(RegionEnd)) 708 RegionEnd = DbgValue; 709 } 710 DbgValues.clear(); 711 FirstDbgValue = NULL; 712 } 713 714 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 715 void ScheduleDAGMI::dumpSchedule() const { 716 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 717 if (SUnit *SU = getSUnit(&(*MI))) 718 SU->dump(this); 719 else 720 dbgs() << "Missing SUnit\n"; 721 } 722 } 723 #endif 724 725 //===----------------------------------------------------------------------===// 726 // LoadClusterMutation - DAG post-processing to cluster loads. 727 //===----------------------------------------------------------------------===// 728 729 namespace { 730 /// \brief Post-process the DAG to create cluster edges between neighboring 731 /// loads. 732 class LoadClusterMutation : public ScheduleDAGMutation { 733 struct LoadInfo { 734 SUnit *SU; 735 unsigned BaseReg; 736 unsigned Offset; 737 LoadInfo(SUnit *su, unsigned reg, unsigned ofs) 738 : SU(su), BaseReg(reg), Offset(ofs) {} 739 }; 740 static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, 741 const LoadClusterMutation::LoadInfo &RHS); 742 743 const TargetInstrInfo *TII; 744 const TargetRegisterInfo *TRI; 745 public: 746 LoadClusterMutation(const TargetInstrInfo *tii, 747 const TargetRegisterInfo *tri) 748 : TII(tii), TRI(tri) {} 749 750 virtual void apply(ScheduleDAGMI *DAG); 751 protected: 752 void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); 753 }; 754 } // anonymous 755 756 bool LoadClusterMutation::LoadInfoLess( 757 const LoadClusterMutation::LoadInfo &LHS, 758 const LoadClusterMutation::LoadInfo &RHS) { 759 if (LHS.BaseReg != RHS.BaseReg) 760 return LHS.BaseReg < RHS.BaseReg; 761 return LHS.Offset < RHS.Offset; 762 } 763 764 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, 765 ScheduleDAGMI *DAG) { 766 SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; 767 for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { 768 SUnit *SU = Loads[Idx]; 769 unsigned BaseReg; 770 unsigned Offset; 771 if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) 772 LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); 773 } 774 if (LoadRecords.size() < 2) 775 return; 776 std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); 777 unsigned ClusterLength = 1; 778 for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { 779 if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { 780 ClusterLength = 1; 781 continue; 782 } 783 784 SUnit *SUa = LoadRecords[Idx].SU; 785 SUnit *SUb = LoadRecords[Idx+1].SU; 786 if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) 787 && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 788 789 DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" 790 << SUb->NodeNum << ")\n"); 791 // Copy successor edges from SUa to SUb. Interleaving computation 792 // dependent on SUa can prevent load combining due to register reuse. 793 // Predecessor edges do not need to be copied from SUb to SUa since nearby 794 // loads should have effectively the same inputs. 795 for (SUnit::const_succ_iterator 796 SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { 797 if (SI->getSUnit() == SUb) 798 continue; 799 DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); 800 DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); 801 } 802 ++ClusterLength; 803 } 804 else 805 ClusterLength = 1; 806 } 807 } 808 809 /// \brief Callback from DAG postProcessing to create cluster edges for loads. 810 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { 811 // Map DAG NodeNum to store chain ID. 812 DenseMap<unsigned, unsigned> StoreChainIDs; 813 // Map each store chain to a set of dependent loads. 814 SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; 815 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 816 SUnit *SU = &DAG->SUnits[Idx]; 817 if (!SU->getInstr()->mayLoad()) 818 continue; 819 unsigned ChainPredID = DAG->SUnits.size(); 820 for (SUnit::const_pred_iterator 821 PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 822 if (PI->isCtrl()) { 823 ChainPredID = PI->getSUnit()->NodeNum; 824 break; 825 } 826 } 827 // Check if this chain-like pred has been seen 828 // before. ChainPredID==MaxNodeID for loads at the top of the schedule. 829 unsigned NumChains = StoreChainDependents.size(); 830 std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = 831 StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); 832 if (Result.second) 833 StoreChainDependents.resize(NumChains + 1); 834 StoreChainDependents[Result.first->second].push_back(SU); 835 } 836 // Iterate over the store chains. 837 for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) 838 clusterNeighboringLoads(StoreChainDependents[Idx], DAG); 839 } 840 841 //===----------------------------------------------------------------------===// 842 // MacroFusion - DAG post-processing to encourage fusion of macro ops. 843 //===----------------------------------------------------------------------===// 844 845 namespace { 846 /// \brief Post-process the DAG to create cluster edges between instructions 847 /// that may be fused by the processor into a single operation. 848 class MacroFusion : public ScheduleDAGMutation { 849 const TargetInstrInfo *TII; 850 public: 851 MacroFusion(const TargetInstrInfo *tii): TII(tii) {} 852 853 virtual void apply(ScheduleDAGMI *DAG); 854 }; 855 } // anonymous 856 857 /// \brief Callback from DAG postProcessing to create cluster edges to encourage 858 /// fused operations. 859 void MacroFusion::apply(ScheduleDAGMI *DAG) { 860 // For now, assume targets can only fuse with the branch. 861 MachineInstr *Branch = DAG->ExitSU.getInstr(); 862 if (!Branch) 863 return; 864 865 for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { 866 SUnit *SU = &DAG->SUnits[--Idx]; 867 if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) 868 continue; 869 870 // Create a single weak edge from SU to ExitSU. The only effect is to cause 871 // bottom-up scheduling to heavily prioritize the clustered SU. There is no 872 // need to copy predecessor edges from ExitSU to SU, since top-down 873 // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling 874 // of SU, we could create an artificial edge from the deepest root, but it 875 // hasn't been needed yet. 876 bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); 877 (void)Success; 878 assert(Success && "No DAG nodes should be reachable from ExitSU"); 879 880 DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); 881 break; 882 } 883 } 884 885 //===----------------------------------------------------------------------===// 886 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. 887 //===----------------------------------------------------------------------===// 888 889 namespace { 890 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance 891 /// the schedule. 892 class ConvergingScheduler : public MachineSchedStrategy { 893 public: 894 /// Represent the type of SchedCandidate found within a single queue. 895 /// pickNodeBidirectional depends on these listed by decreasing priority. 896 enum CandReason { 897 NoCand, SingleExcess, SingleCritical, Cluster, 898 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 899 TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, 900 NodeOrder}; 901 902 #ifndef NDEBUG 903 static const char *getReasonStr(ConvergingScheduler::CandReason Reason); 904 #endif 905 906 /// Policy for scheduling the next instruction in the candidate's zone. 907 struct CandPolicy { 908 bool ReduceLatency; 909 unsigned ReduceResIdx; 910 unsigned DemandResIdx; 911 912 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 913 }; 914 915 /// Status of an instruction's critical resource consumption. 916 struct SchedResourceDelta { 917 // Count critical resources in the scheduled region required by SU. 918 unsigned CritResources; 919 920 // Count critical resources from another region consumed by SU. 921 unsigned DemandedResources; 922 923 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 924 925 bool operator==(const SchedResourceDelta &RHS) const { 926 return CritResources == RHS.CritResources 927 && DemandedResources == RHS.DemandedResources; 928 } 929 bool operator!=(const SchedResourceDelta &RHS) const { 930 return !operator==(RHS); 931 } 932 }; 933 934 /// Store the state used by ConvergingScheduler heuristics, required for the 935 /// lifetime of one invocation of pickNode(). 936 struct SchedCandidate { 937 CandPolicy Policy; 938 939 // The best SUnit candidate. 940 SUnit *SU; 941 942 // The reason for this candidate. 943 CandReason Reason; 944 945 // Register pressure values for the best candidate. 946 RegPressureDelta RPDelta; 947 948 // Critical resource consumption of the best candidate. 949 SchedResourceDelta ResDelta; 950 951 SchedCandidate(const CandPolicy &policy) 952 : Policy(policy), SU(NULL), Reason(NoCand) {} 953 954 bool isValid() const { return SU; } 955 956 // Copy the status of another candidate without changing policy. 957 void setBest(SchedCandidate &Best) { 958 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 959 SU = Best.SU; 960 Reason = Best.Reason; 961 RPDelta = Best.RPDelta; 962 ResDelta = Best.ResDelta; 963 } 964 965 void initResourceDelta(const ScheduleDAGMI *DAG, 966 const TargetSchedModel *SchedModel); 967 }; 968 969 /// Summarize the unscheduled region. 970 struct SchedRemainder { 971 // Critical path through the DAG in expected latency. 972 unsigned CriticalPath; 973 974 // Unscheduled resources 975 SmallVector<unsigned, 16> RemainingCounts; 976 // Critical resource for the unscheduled zone. 977 unsigned CritResIdx; 978 // Number of micro-ops left to schedule. 979 unsigned RemainingMicroOps; 980 981 void reset() { 982 CriticalPath = 0; 983 RemainingCounts.clear(); 984 CritResIdx = 0; 985 RemainingMicroOps = 0; 986 } 987 988 SchedRemainder() { reset(); } 989 990 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 991 992 unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const { 993 if (!SchedModel->hasInstrSchedModel()) 994 return 0; 995 996 return std::max( 997 RemainingMicroOps * SchedModel->getMicroOpFactor(), 998 RemainingCounts[CritResIdx]); 999 } 1000 }; 1001 1002 /// Each Scheduling boundary is associated with ready queues. It tracks the 1003 /// current cycle in the direction of movement, and maintains the state 1004 /// of "hazards" and other interlocks at the current cycle. 1005 struct SchedBoundary { 1006 ScheduleDAGMI *DAG; 1007 const TargetSchedModel *SchedModel; 1008 SchedRemainder *Rem; 1009 1010 ReadyQueue Available; 1011 ReadyQueue Pending; 1012 bool CheckPending; 1013 1014 // For heuristics, keep a list of the nodes that immediately depend on the 1015 // most recently scheduled node. 1016 SmallPtrSet<const SUnit*, 8> NextSUs; 1017 1018 ScheduleHazardRecognizer *HazardRec; 1019 1020 unsigned CurrCycle; 1021 unsigned IssueCount; 1022 1023 /// MinReadyCycle - Cycle of the soonest available instruction. 1024 unsigned MinReadyCycle; 1025 1026 // The expected latency of the critical path in this scheduled zone. 1027 unsigned ExpectedLatency; 1028 1029 // Resources used in the scheduled zone beyond this boundary. 1030 SmallVector<unsigned, 16> ResourceCounts; 1031 1032 // Cache the critical resources ID in this scheduled zone. 1033 unsigned CritResIdx; 1034 1035 // Is the scheduled region resource limited vs. latency limited. 1036 bool IsResourceLimited; 1037 1038 unsigned ExpectedCount; 1039 1040 #ifndef NDEBUG 1041 // Remember the greatest min operand latency. 1042 unsigned MaxMinLatency; 1043 #endif 1044 1045 void reset() { 1046 Available.clear(); 1047 Pending.clear(); 1048 CheckPending = false; 1049 NextSUs.clear(); 1050 HazardRec = 0; 1051 CurrCycle = 0; 1052 IssueCount = 0; 1053 MinReadyCycle = UINT_MAX; 1054 ExpectedLatency = 0; 1055 ResourceCounts.resize(1); 1056 assert(!ResourceCounts[0] && "nonzero count for bad resource"); 1057 CritResIdx = 0; 1058 IsResourceLimited = false; 1059 ExpectedCount = 0; 1060 #ifndef NDEBUG 1061 MaxMinLatency = 0; 1062 #endif 1063 // Reserve a zero-count for invalid CritResIdx. 1064 ResourceCounts.resize(1); 1065 } 1066 1067 /// Pending queues extend the ready queues with the same ID and the 1068 /// PendingFlag set. 1069 SchedBoundary(unsigned ID, const Twine &Name): 1070 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), 1071 Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { 1072 reset(); 1073 } 1074 1075 ~SchedBoundary() { delete HazardRec; } 1076 1077 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 1078 SchedRemainder *rem); 1079 1080 bool isTop() const { 1081 return Available.getID() == ConvergingScheduler::TopQID; 1082 } 1083 1084 unsigned getUnscheduledLatency(SUnit *SU) const { 1085 if (isTop()) 1086 return SU->getHeight(); 1087 return SU->getDepth() + SU->Latency; 1088 } 1089 1090 unsigned getCriticalCount() const { 1091 return ResourceCounts[CritResIdx]; 1092 } 1093 1094 bool checkHazard(SUnit *SU); 1095 1096 void setLatencyPolicy(CandPolicy &Policy); 1097 1098 void releaseNode(SUnit *SU, unsigned ReadyCycle); 1099 1100 void bumpCycle(); 1101 1102 void countResource(unsigned PIdx, unsigned Cycles); 1103 1104 void bumpNode(SUnit *SU); 1105 1106 void releasePending(); 1107 1108 void removeReady(SUnit *SU); 1109 1110 SUnit *pickOnlyChoice(); 1111 }; 1112 1113 private: 1114 ScheduleDAGMI *DAG; 1115 const TargetSchedModel *SchedModel; 1116 const TargetRegisterInfo *TRI; 1117 1118 // State of the top and bottom scheduled instruction boundaries. 1119 SchedRemainder Rem; 1120 SchedBoundary Top; 1121 SchedBoundary Bot; 1122 1123 public: 1124 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 1125 enum { 1126 TopQID = 1, 1127 BotQID = 2, 1128 LogMaxQID = 2 1129 }; 1130 1131 ConvergingScheduler(): 1132 DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} 1133 1134 virtual void initialize(ScheduleDAGMI *dag); 1135 1136 virtual SUnit *pickNode(bool &IsTopNode); 1137 1138 virtual void schedNode(SUnit *SU, bool IsTopNode); 1139 1140 virtual void releaseTopNode(SUnit *SU); 1141 1142 virtual void releaseBottomNode(SUnit *SU); 1143 1144 virtual void registerRoots(); 1145 1146 protected: 1147 void balanceZones( 1148 ConvergingScheduler::SchedBoundary &CriticalZone, 1149 ConvergingScheduler::SchedCandidate &CriticalCand, 1150 ConvergingScheduler::SchedBoundary &OppositeZone, 1151 ConvergingScheduler::SchedCandidate &OppositeCand); 1152 1153 void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, 1154 ConvergingScheduler::SchedCandidate &BotCand); 1155 1156 void tryCandidate(SchedCandidate &Cand, 1157 SchedCandidate &TryCand, 1158 SchedBoundary &Zone, 1159 const RegPressureTracker &RPTracker, 1160 RegPressureTracker &TempTracker); 1161 1162 SUnit *pickNodeBidirectional(bool &IsTopNode); 1163 1164 void pickNodeFromQueue(SchedBoundary &Zone, 1165 const RegPressureTracker &RPTracker, 1166 SchedCandidate &Candidate); 1167 1168 #ifndef NDEBUG 1169 void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); 1170 #endif 1171 }; 1172 } // namespace 1173 1174 void ConvergingScheduler::SchedRemainder:: 1175 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1176 reset(); 1177 if (!SchedModel->hasInstrSchedModel()) 1178 return; 1179 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1180 for (std::vector<SUnit>::iterator 1181 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 1182 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 1183 RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); 1184 for (TargetSchedModel::ProcResIter 1185 PI = SchedModel->getWriteProcResBegin(SC), 1186 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1187 unsigned PIdx = PI->ProcResourceIdx; 1188 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1189 RemainingCounts[PIdx] += (Factor * PI->Cycles); 1190 } 1191 } 1192 for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds(); 1193 PIdx != PEnd; ++PIdx) { 1194 if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx]) 1195 >= (int)SchedModel->getLatencyFactor()) { 1196 CritResIdx = PIdx; 1197 } 1198 } 1199 } 1200 1201 void ConvergingScheduler::SchedBoundary:: 1202 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1203 reset(); 1204 DAG = dag; 1205 SchedModel = smodel; 1206 Rem = rem; 1207 if (SchedModel->hasInstrSchedModel()) 1208 ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); 1209 } 1210 1211 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { 1212 DAG = dag; 1213 SchedModel = DAG->getSchedModel(); 1214 TRI = DAG->TRI; 1215 Rem.init(DAG, SchedModel); 1216 Top.init(DAG, SchedModel, &Rem); 1217 Bot.init(DAG, SchedModel, &Rem); 1218 1219 DAG->initDFSResult(); 1220 1221 // Initialize resource counts. 1222 1223 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 1224 // are disabled, then these HazardRecs will be disabled. 1225 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 1226 const TargetMachine &TM = DAG->MF.getTarget(); 1227 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1228 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1229 1230 assert((!ForceTopDown || !ForceBottomUp) && 1231 "-misched-topdown incompatible with -misched-bottomup"); 1232 } 1233 1234 void ConvergingScheduler::releaseTopNode(SUnit *SU) { 1235 if (SU->isScheduled) 1236 return; 1237 1238 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1239 I != E; ++I) { 1240 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1241 unsigned MinLatency = I->getMinLatency(); 1242 #ifndef NDEBUG 1243 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 1244 #endif 1245 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 1246 SU->TopReadyCycle = PredReadyCycle + MinLatency; 1247 } 1248 Top.releaseNode(SU, SU->TopReadyCycle); 1249 } 1250 1251 void ConvergingScheduler::releaseBottomNode(SUnit *SU) { 1252 if (SU->isScheduled) 1253 return; 1254 1255 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1256 1257 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1258 I != E; ++I) { 1259 if (I->isWeak()) 1260 continue; 1261 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1262 unsigned MinLatency = I->getMinLatency(); 1263 #ifndef NDEBUG 1264 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 1265 #endif 1266 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 1267 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 1268 } 1269 Bot.releaseNode(SU, SU->BotReadyCycle); 1270 } 1271 1272 void ConvergingScheduler::registerRoots() { 1273 Rem.CriticalPath = DAG->ExitSU.getDepth(); 1274 // Some roots may not feed into ExitSU. Check all of them in case. 1275 for (std::vector<SUnit*>::const_iterator 1276 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 1277 if ((*I)->getDepth() > Rem.CriticalPath) 1278 Rem.CriticalPath = (*I)->getDepth(); 1279 } 1280 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 1281 1282 DAG->computeDFSResult(Bot.Available.elements()); 1283 } 1284 1285 /// Does this SU have a hazard within the current instruction group. 1286 /// 1287 /// The scheduler supports two modes of hazard recognition. The first is the 1288 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1289 /// supports highly complicated in-order reservation tables 1290 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1291 /// 1292 /// The second is a streamlined mechanism that checks for hazards based on 1293 /// simple counters that the scheduler itself maintains. It explicitly checks 1294 /// for instruction dispatch limitations, including the number of micro-ops that 1295 /// can dispatch per cycle. 1296 /// 1297 /// TODO: Also check whether the SU must start a new group. 1298 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1299 if (HazardRec->isEnabled()) 1300 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1301 1302 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1303 if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { 1304 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1305 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1306 return true; 1307 } 1308 return false; 1309 } 1310 1311 /// Compute the remaining latency to determine whether ILP should be increased. 1312 void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { 1313 // FIXME: compile time. In all, we visit four queues here one we should only 1314 // need to visit the one that was last popped if we cache the result. 1315 unsigned RemLatency = 0; 1316 for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); 1317 I != E; ++I) { 1318 unsigned L = getUnscheduledLatency(*I); 1319 if (L > RemLatency) 1320 RemLatency = L; 1321 } 1322 for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end(); 1323 I != E; ++I) { 1324 unsigned L = getUnscheduledLatency(*I); 1325 if (L > RemLatency) 1326 RemLatency = L; 1327 } 1328 unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); 1329 if (RemLatency + ExpectedLatency >= CriticalPathLimit 1330 && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { 1331 Policy.ReduceLatency = true; 1332 DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); 1333 } 1334 } 1335 1336 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, 1337 unsigned ReadyCycle) { 1338 1339 if (ReadyCycle < MinReadyCycle) 1340 MinReadyCycle = ReadyCycle; 1341 1342 // Check for interlocks first. For the purpose of other heuristics, an 1343 // instruction that cannot issue appears as if it's not in the ReadyQueue. 1344 if (ReadyCycle > CurrCycle || checkHazard(SU)) 1345 Pending.push(SU); 1346 else 1347 Available.push(SU); 1348 1349 // Record this node as an immediate dependent of the scheduled node. 1350 NextSUs.insert(SU); 1351 } 1352 1353 /// Move the boundary of scheduled code by one cycle. 1354 void ConvergingScheduler::SchedBoundary::bumpCycle() { 1355 unsigned Width = SchedModel->getIssueWidth(); 1356 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 1357 1358 unsigned NextCycle = CurrCycle + 1; 1359 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 1360 if (MinReadyCycle > NextCycle) { 1361 IssueCount = 0; 1362 NextCycle = MinReadyCycle; 1363 } 1364 1365 if (!HazardRec->isEnabled()) { 1366 // Bypass HazardRec virtual calls. 1367 CurrCycle = NextCycle; 1368 } 1369 else { 1370 // Bypass getHazardType calls in case of long latency. 1371 for (; CurrCycle != NextCycle; ++CurrCycle) { 1372 if (isTop()) 1373 HazardRec->AdvanceCycle(); 1374 else 1375 HazardRec->RecedeCycle(); 1376 } 1377 } 1378 CheckPending = true; 1379 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1380 1381 DEBUG(dbgs() << " *** " << Available.getName() << " cycle " 1382 << CurrCycle << '\n'); 1383 } 1384 1385 /// Add the given processor resource to this scheduled zone. 1386 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, 1387 unsigned Cycles) { 1388 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1389 DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name 1390 << " +(" << Cycles << "x" << Factor 1391 << ") / " << SchedModel->getLatencyFactor() << '\n'); 1392 1393 unsigned Count = Factor * Cycles; 1394 ResourceCounts[PIdx] += Count; 1395 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 1396 Rem->RemainingCounts[PIdx] -= Count; 1397 1398 // Check if this resource exceeds the current critical resource by a full 1399 // cycle. If so, it becomes the critical resource. 1400 if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) 1401 >= (int)SchedModel->getLatencyFactor()) { 1402 CritResIdx = PIdx; 1403 DEBUG(dbgs() << " *** Critical resource " 1404 << SchedModel->getProcResource(PIdx)->Name << " x" 1405 << ResourceCounts[PIdx] << '\n'); 1406 } 1407 } 1408 1409 /// Move the boundary of scheduled code by one SUnit. 1410 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { 1411 // Update the reservation table. 1412 if (HazardRec->isEnabled()) { 1413 if (!isTop() && SU->isCall) { 1414 // Calls are scheduled with their preceding instructions. For bottom-up 1415 // scheduling, clear the pipeline state before emitting. 1416 HazardRec->Reset(); 1417 } 1418 HazardRec->EmitInstruction(SU); 1419 } 1420 // Update resource counts and critical resource. 1421 if (SchedModel->hasInstrSchedModel()) { 1422 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1423 Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); 1424 for (TargetSchedModel::ProcResIter 1425 PI = SchedModel->getWriteProcResBegin(SC), 1426 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1427 countResource(PI->ProcResourceIdx, PI->Cycles); 1428 } 1429 } 1430 if (isTop()) { 1431 if (SU->getDepth() > ExpectedLatency) 1432 ExpectedLatency = SU->getDepth(); 1433 } 1434 else { 1435 if (SU->getHeight() > ExpectedLatency) 1436 ExpectedLatency = SU->getHeight(); 1437 } 1438 1439 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1440 1441 // Check the instruction group dispatch limit. 1442 // TODO: Check if this SU must end a dispatch group. 1443 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 1444 1445 // checkHazard prevents scheduling multiple instructions per cycle that exceed 1446 // issue width. However, we commonly reach the maximum. In this case 1447 // opportunistically bump the cycle to avoid uselessly checking everything in 1448 // the readyQ. Furthermore, a single instruction may produce more than one 1449 // cycle's worth of micro-ops. 1450 if (IssueCount >= SchedModel->getIssueWidth()) { 1451 DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); 1452 bumpCycle(); 1453 } 1454 } 1455 1456 /// Release pending ready nodes in to the available queue. This makes them 1457 /// visible to heuristics. 1458 void ConvergingScheduler::SchedBoundary::releasePending() { 1459 // If the available queue is empty, it is safe to reset MinReadyCycle. 1460 if (Available.empty()) 1461 MinReadyCycle = UINT_MAX; 1462 1463 // Check to see if any of the pending instructions are ready to issue. If 1464 // so, add them to the available queue. 1465 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 1466 SUnit *SU = *(Pending.begin()+i); 1467 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 1468 1469 if (ReadyCycle < MinReadyCycle) 1470 MinReadyCycle = ReadyCycle; 1471 1472 if (ReadyCycle > CurrCycle) 1473 continue; 1474 1475 if (checkHazard(SU)) 1476 continue; 1477 1478 Available.push(SU); 1479 Pending.remove(Pending.begin()+i); 1480 --i; --e; 1481 } 1482 DEBUG(if (!Pending.empty()) Pending.dump()); 1483 CheckPending = false; 1484 } 1485 1486 /// Remove SU from the ready set for this boundary. 1487 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { 1488 if (Available.isInQueue(SU)) 1489 Available.remove(Available.find(SU)); 1490 else { 1491 assert(Pending.isInQueue(SU) && "bad ready count"); 1492 Pending.remove(Pending.find(SU)); 1493 } 1494 } 1495 1496 /// If this queue only has one ready candidate, return it. As a side effect, 1497 /// defer any nodes that now hit a hazard, and advance the cycle until at least 1498 /// one node is ready. If multiple instructions are ready, return NULL. 1499 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { 1500 if (CheckPending) 1501 releasePending(); 1502 1503 if (IssueCount > 0) { 1504 // Defer any ready instrs that now have a hazard. 1505 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 1506 if (checkHazard(*I)) { 1507 Pending.push(*I); 1508 I = Available.remove(I); 1509 continue; 1510 } 1511 ++I; 1512 } 1513 } 1514 for (unsigned i = 0; Available.empty(); ++i) { 1515 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 1516 "permanent hazard"); (void)i; 1517 bumpCycle(); 1518 releasePending(); 1519 } 1520 if (Available.size() == 1) 1521 return *Available.begin(); 1522 return NULL; 1523 } 1524 1525 /// Record the candidate policy for opposite zones with different critical 1526 /// resources. 1527 /// 1528 /// If the CriticalZone is latency limited, don't force a policy for the 1529 /// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed. 1530 void ConvergingScheduler::balanceZones( 1531 ConvergingScheduler::SchedBoundary &CriticalZone, 1532 ConvergingScheduler::SchedCandidate &CriticalCand, 1533 ConvergingScheduler::SchedBoundary &OppositeZone, 1534 ConvergingScheduler::SchedCandidate &OppositeCand) { 1535 1536 if (!CriticalZone.IsResourceLimited) 1537 return; 1538 assert(SchedModel->hasInstrSchedModel() && "required schedmodel"); 1539 1540 SchedRemainder *Rem = CriticalZone.Rem; 1541 1542 // If the critical zone is overconsuming a resource relative to the 1543 // remainder, try to reduce it. 1544 unsigned RemainingCritCount = 1545 Rem->RemainingCounts[CriticalZone.CritResIdx]; 1546 if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) 1547 > (int)SchedModel->getLatencyFactor()) { 1548 CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; 1549 DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " 1550 << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name 1551 << '\n'); 1552 } 1553 // If the other zone is underconsuming a resource relative to the full zone, 1554 // try to increase it. 1555 unsigned OppositeCount = 1556 OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; 1557 if ((int)(OppositeZone.ExpectedCount - OppositeCount) 1558 > (int)SchedModel->getLatencyFactor()) { 1559 OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; 1560 DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " 1561 << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name 1562 << '\n'); 1563 } 1564 } 1565 1566 /// Determine if the scheduled zones exceed resource limits or critical path and 1567 /// set each candidate's ReduceHeight policy accordingly. 1568 void ConvergingScheduler::checkResourceLimits( 1569 ConvergingScheduler::SchedCandidate &TopCand, 1570 ConvergingScheduler::SchedCandidate &BotCand) { 1571 1572 // Set ReduceLatency to true if needed. 1573 Bot.setLatencyPolicy(BotCand.Policy); 1574 Top.setLatencyPolicy(TopCand.Policy); 1575 1576 // Handle resource-limited regions. 1577 if (Top.IsResourceLimited && Bot.IsResourceLimited 1578 && Top.CritResIdx == Bot.CritResIdx) { 1579 // If the scheduled critical resource in both zones is no longer the 1580 // critical remaining resource, attempt to reduce resource height both ways. 1581 if (Top.CritResIdx != Rem.CritResIdx) { 1582 TopCand.Policy.ReduceResIdx = Top.CritResIdx; 1583 BotCand.Policy.ReduceResIdx = Bot.CritResIdx; 1584 DEBUG(dbgs() << "Reduce scheduled " 1585 << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); 1586 } 1587 return; 1588 } 1589 // Handle latency-limited regions. 1590 if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { 1591 // If the total scheduled expected latency exceeds the region's critical 1592 // path then reduce latency both ways. 1593 // 1594 // Just because a zone is not resource limited does not mean it is latency 1595 // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle 1596 // to exceed expected latency. 1597 if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) 1598 && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { 1599 TopCand.Policy.ReduceLatency = true; 1600 BotCand.Policy.ReduceLatency = true; 1601 DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency 1602 << " + " << Bot.ExpectedLatency << '\n'); 1603 } 1604 return; 1605 } 1606 // The critical resource is different in each zone, so request balancing. 1607 1608 // Compute the cost of each zone. 1609 Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); 1610 Top.ExpectedCount = std::max( 1611 Top.getCriticalCount(), 1612 Top.ExpectedCount * SchedModel->getLatencyFactor()); 1613 Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); 1614 Bot.ExpectedCount = std::max( 1615 Bot.getCriticalCount(), 1616 Bot.ExpectedCount * SchedModel->getLatencyFactor()); 1617 1618 balanceZones(Top, TopCand, Bot, BotCand); 1619 balanceZones(Bot, BotCand, Top, TopCand); 1620 } 1621 1622 void ConvergingScheduler::SchedCandidate:: 1623 initResourceDelta(const ScheduleDAGMI *DAG, 1624 const TargetSchedModel *SchedModel) { 1625 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 1626 return; 1627 1628 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1629 for (TargetSchedModel::ProcResIter 1630 PI = SchedModel->getWriteProcResBegin(SC), 1631 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1632 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 1633 ResDelta.CritResources += PI->Cycles; 1634 if (PI->ProcResourceIdx == Policy.DemandResIdx) 1635 ResDelta.DemandedResources += PI->Cycles; 1636 } 1637 } 1638 1639 /// Return true if this heuristic determines order. 1640 static bool tryLess(unsigned TryVal, unsigned CandVal, 1641 ConvergingScheduler::SchedCandidate &TryCand, 1642 ConvergingScheduler::SchedCandidate &Cand, 1643 ConvergingScheduler::CandReason Reason) { 1644 if (TryVal < CandVal) { 1645 TryCand.Reason = Reason; 1646 return true; 1647 } 1648 if (TryVal > CandVal) { 1649 if (Cand.Reason > Reason) 1650 Cand.Reason = Reason; 1651 return true; 1652 } 1653 return false; 1654 } 1655 1656 static bool tryGreater(unsigned TryVal, unsigned CandVal, 1657 ConvergingScheduler::SchedCandidate &TryCand, 1658 ConvergingScheduler::SchedCandidate &Cand, 1659 ConvergingScheduler::CandReason Reason) { 1660 if (TryVal > CandVal) { 1661 TryCand.Reason = Reason; 1662 return true; 1663 } 1664 if (TryVal < CandVal) { 1665 if (Cand.Reason > Reason) 1666 Cand.Reason = Reason; 1667 return true; 1668 } 1669 return false; 1670 } 1671 1672 static unsigned getWeakLeft(const SUnit *SU, bool isTop) { 1673 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 1674 } 1675 1676 /// Apply a set of heursitics to a new candidate. Heuristics are currently 1677 /// hierarchical. This may be more efficient than a graduated cost model because 1678 /// we don't need to evaluate all aspects of the model for each node in the 1679 /// queue. But it's really done to make the heuristics easier to debug and 1680 /// statistically analyze. 1681 /// 1682 /// \param Cand provides the policy and current best candidate. 1683 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 1684 /// \param Zone describes the scheduled zone that we are extending. 1685 /// \param RPTracker describes reg pressure within the scheduled zone. 1686 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 1687 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, 1688 SchedCandidate &TryCand, 1689 SchedBoundary &Zone, 1690 const RegPressureTracker &RPTracker, 1691 RegPressureTracker &TempTracker) { 1692 1693 // Always initialize TryCand's RPDelta. 1694 TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, 1695 DAG->getRegionCriticalPSets(), 1696 DAG->getRegPressure().MaxSetPressure); 1697 1698 // Initialize the candidate if needed. 1699 if (!Cand.isValid()) { 1700 TryCand.Reason = NodeOrder; 1701 return; 1702 } 1703 // Avoid exceeding the target's limit. 1704 if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, 1705 Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) 1706 return; 1707 if (Cand.Reason == SingleExcess) 1708 Cand.Reason = MultiPressure; 1709 1710 // Avoid increasing the max critical pressure in the scheduled region. 1711 if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, 1712 Cand.RPDelta.CriticalMax.UnitIncrease, 1713 TryCand, Cand, SingleCritical)) 1714 return; 1715 if (Cand.Reason == SingleCritical) 1716 Cand.Reason = MultiPressure; 1717 1718 // Keep clustered nodes together to encourage downstream peephole 1719 // optimizations which may reduce resource requirements. 1720 // 1721 // This is a best effort to set things up for a post-RA pass. Optimizations 1722 // like generating loads of multiple registers should ideally be done within 1723 // the scheduler pass by combining the loads during DAG postprocessing. 1724 const SUnit *NextClusterSU = 1725 Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 1726 if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, 1727 TryCand, Cand, Cluster)) 1728 return; 1729 // Currently, weak edges are for clustering, so we hard-code that reason. 1730 // However, deferring the current TryCand will not change Cand's reason. 1731 CandReason OrigReason = Cand.Reason; 1732 if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), 1733 getWeakLeft(Cand.SU, Zone.isTop()), 1734 TryCand, Cand, Cluster)) { 1735 Cand.Reason = OrigReason; 1736 return; 1737 } 1738 // Avoid critical resource consumption and balance the schedule. 1739 TryCand.initResourceDelta(DAG, SchedModel); 1740 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 1741 TryCand, Cand, ResourceReduce)) 1742 return; 1743 if (tryGreater(TryCand.ResDelta.DemandedResources, 1744 Cand.ResDelta.DemandedResources, 1745 TryCand, Cand, ResourceDemand)) 1746 return; 1747 1748 // Avoid serializing long latency dependence chains. 1749 if (Cand.Policy.ReduceLatency) { 1750 if (Zone.isTop()) { 1751 if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() 1752 > Zone.ExpectedCount) { 1753 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1754 TryCand, Cand, TopDepthReduce)) 1755 return; 1756 } 1757 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1758 TryCand, Cand, TopPathReduce)) 1759 return; 1760 } 1761 else { 1762 if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() 1763 > Zone.ExpectedCount) { 1764 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1765 TryCand, Cand, BotHeightReduce)) 1766 return; 1767 } 1768 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1769 TryCand, Cand, BotPathReduce)) 1770 return; 1771 } 1772 } 1773 1774 // Avoid increasing the max pressure of the entire region. 1775 if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, 1776 Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) 1777 return; 1778 if (Cand.Reason == SingleMax) 1779 Cand.Reason = MultiPressure; 1780 1781 // Prefer immediate defs/users of the last scheduled instruction. This is a 1782 // nice pressure avoidance strategy that also conserves the processor's 1783 // register renaming resources and keeps the machine code readable. 1784 if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), 1785 TryCand, Cand, NextDefUse)) 1786 return; 1787 1788 // Fall through to original instruction order. 1789 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 1790 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 1791 TryCand.Reason = NodeOrder; 1792 } 1793 } 1794 1795 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is 1796 /// more desirable than RHS from scheduling standpoint. 1797 static bool compareRPDelta(const RegPressureDelta &LHS, 1798 const RegPressureDelta &RHS) { 1799 // Compare each component of pressure in decreasing order of importance 1800 // without checking if any are valid. Invalid PressureElements are assumed to 1801 // have UnitIncrease==0, so are neutral. 1802 1803 // Avoid increasing the max critical pressure in the scheduled region. 1804 if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { 1805 DEBUG(dbgs() << "RP excess top - bot: " 1806 << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); 1807 return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; 1808 } 1809 // Avoid increasing the max critical pressure in the scheduled region. 1810 if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { 1811 DEBUG(dbgs() << "RP critical top - bot: " 1812 << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) 1813 << '\n'); 1814 return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; 1815 } 1816 // Avoid increasing the max pressure of the entire region. 1817 if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { 1818 DEBUG(dbgs() << "RP current top - bot: " 1819 << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) 1820 << '\n'); 1821 return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; 1822 } 1823 return false; 1824 } 1825 1826 #ifndef NDEBUG 1827 const char *ConvergingScheduler::getReasonStr( 1828 ConvergingScheduler::CandReason Reason) { 1829 switch (Reason) { 1830 case NoCand: return "NOCAND "; 1831 case SingleExcess: return "REG-EXCESS"; 1832 case SingleCritical: return "REG-CRIT "; 1833 case Cluster: return "CLUSTER "; 1834 case SingleMax: return "REG-MAX "; 1835 case MultiPressure: return "REG-MULTI "; 1836 case ResourceReduce: return "RES-REDUCE"; 1837 case ResourceDemand: return "RES-DEMAND"; 1838 case TopDepthReduce: return "TOP-DEPTH "; 1839 case TopPathReduce: return "TOP-PATH "; 1840 case BotHeightReduce:return "BOT-HEIGHT"; 1841 case BotPathReduce: return "BOT-PATH "; 1842 case NextDefUse: return "DEF-USE "; 1843 case NodeOrder: return "ORDER "; 1844 }; 1845 llvm_unreachable("Unknown reason!"); 1846 } 1847 1848 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, 1849 const SchedBoundary &Zone) { 1850 const char *Label = getReasonStr(Cand.Reason); 1851 PressureElement P; 1852 unsigned ResIdx = 0; 1853 unsigned Latency = 0; 1854 switch (Cand.Reason) { 1855 default: 1856 break; 1857 case SingleExcess: 1858 P = Cand.RPDelta.Excess; 1859 break; 1860 case SingleCritical: 1861 P = Cand.RPDelta.CriticalMax; 1862 break; 1863 case SingleMax: 1864 P = Cand.RPDelta.CurrentMax; 1865 break; 1866 case ResourceReduce: 1867 ResIdx = Cand.Policy.ReduceResIdx; 1868 break; 1869 case ResourceDemand: 1870 ResIdx = Cand.Policy.DemandResIdx; 1871 break; 1872 case TopDepthReduce: 1873 Latency = Cand.SU->getDepth(); 1874 break; 1875 case TopPathReduce: 1876 Latency = Cand.SU->getHeight(); 1877 break; 1878 case BotHeightReduce: 1879 Latency = Cand.SU->getHeight(); 1880 break; 1881 case BotPathReduce: 1882 Latency = Cand.SU->getDepth(); 1883 break; 1884 } 1885 dbgs() << Label << " " << Zone.Available.getName() << " "; 1886 if (P.isValid()) 1887 dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease 1888 << " "; 1889 else 1890 dbgs() << " "; 1891 if (ResIdx) 1892 dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; 1893 else 1894 dbgs() << " "; 1895 if (Latency) 1896 dbgs() << Latency << " cycles "; 1897 else 1898 dbgs() << " "; 1899 Cand.SU->dump(DAG); 1900 } 1901 #endif 1902 1903 /// Pick the best candidate from the top queue. 1904 /// 1905 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 1906 /// DAG building. To adjust for the current scheduling location we need to 1907 /// maintain the number of vreg uses remaining to be top-scheduled. 1908 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, 1909 const RegPressureTracker &RPTracker, 1910 SchedCandidate &Cand) { 1911 ReadyQueue &Q = Zone.Available; 1912 1913 DEBUG(Q.dump()); 1914 1915 // getMaxPressureDelta temporarily modifies the tracker. 1916 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1917 1918 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 1919 1920 SchedCandidate TryCand(Cand.Policy); 1921 TryCand.SU = *I; 1922 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 1923 if (TryCand.Reason != NoCand) { 1924 // Initialize resource delta if needed in case future heuristics query it. 1925 if (TryCand.ResDelta == SchedResourceDelta()) 1926 TryCand.initResourceDelta(DAG, SchedModel); 1927 Cand.setBest(TryCand); 1928 DEBUG(traceCandidate(Cand, Zone)); 1929 } 1930 } 1931 } 1932 1933 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, 1934 bool IsTop) { 1935 DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") 1936 << " SU(" << Cand.SU->NodeNum << ") " 1937 << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); 1938 } 1939 1940 /// Pick the best candidate node from either the top or bottom queue. 1941 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { 1942 // Schedule as far as possible in the direction of no choice. This is most 1943 // efficient, but also provides the best heuristics for CriticalPSets. 1944 if (SUnit *SU = Bot.pickOnlyChoice()) { 1945 IsTopNode = false; 1946 return SU; 1947 } 1948 if (SUnit *SU = Top.pickOnlyChoice()) { 1949 IsTopNode = true; 1950 return SU; 1951 } 1952 CandPolicy NoPolicy; 1953 SchedCandidate BotCand(NoPolicy); 1954 SchedCandidate TopCand(NoPolicy); 1955 checkResourceLimits(TopCand, BotCand); 1956 1957 // Prefer bottom scheduling when heuristics are silent. 1958 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1959 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1960 1961 // If either Q has a single candidate that provides the least increase in 1962 // Excess pressure, we can immediately schedule from that Q. 1963 // 1964 // RegionCriticalPSets summarizes the pressure within the scheduled region and 1965 // affects picking from either Q. If scheduling in one direction must 1966 // increase pressure for one of the excess PSets, then schedule in that 1967 // direction first to provide more freedom in the other direction. 1968 if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { 1969 IsTopNode = false; 1970 tracePick(BotCand, IsTopNode); 1971 return BotCand.SU; 1972 } 1973 // Check if the top Q has a better candidate. 1974 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1975 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1976 1977 // If either Q has a single candidate that minimizes pressure above the 1978 // original region's pressure pick it. 1979 if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { 1980 if (TopCand.Reason < BotCand.Reason) { 1981 IsTopNode = true; 1982 tracePick(TopCand, IsTopNode); 1983 return TopCand.SU; 1984 } 1985 IsTopNode = false; 1986 tracePick(BotCand, IsTopNode); 1987 return BotCand.SU; 1988 } 1989 // Check for a salient pressure difference and pick the best from either side. 1990 if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { 1991 IsTopNode = true; 1992 tracePick(TopCand, IsTopNode); 1993 return TopCand.SU; 1994 } 1995 // Otherwise prefer the bottom candidate, in node order if all else failed. 1996 if (TopCand.Reason < BotCand.Reason) { 1997 IsTopNode = true; 1998 tracePick(TopCand, IsTopNode); 1999 return TopCand.SU; 2000 } 2001 IsTopNode = false; 2002 tracePick(BotCand, IsTopNode); 2003 return BotCand.SU; 2004 } 2005 2006 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 2007 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { 2008 if (DAG->top() == DAG->bottom()) { 2009 assert(Top.Available.empty() && Top.Pending.empty() && 2010 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 2011 return NULL; 2012 } 2013 SUnit *SU; 2014 do { 2015 if (ForceTopDown) { 2016 SU = Top.pickOnlyChoice(); 2017 if (!SU) { 2018 CandPolicy NoPolicy; 2019 SchedCandidate TopCand(NoPolicy); 2020 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 2021 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 2022 SU = TopCand.SU; 2023 } 2024 IsTopNode = true; 2025 } 2026 else if (ForceBottomUp) { 2027 SU = Bot.pickOnlyChoice(); 2028 if (!SU) { 2029 CandPolicy NoPolicy; 2030 SchedCandidate BotCand(NoPolicy); 2031 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 2032 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2033 SU = BotCand.SU; 2034 } 2035 IsTopNode = false; 2036 } 2037 else { 2038 SU = pickNodeBidirectional(IsTopNode); 2039 } 2040 } while (SU->isScheduled); 2041 2042 if (SU->isTopReady()) 2043 Top.removeReady(SU); 2044 if (SU->isBottomReady()) 2045 Bot.removeReady(SU); 2046 2047 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 2048 << " Scheduling Instruction in cycle " 2049 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; 2050 SU->dump(DAG)); 2051 return SU; 2052 } 2053 2054 /// Update the scheduler's state after scheduling a node. This is the same node 2055 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update 2056 /// it's state based on the current cycle before MachineSchedStrategy does. 2057 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { 2058 if (IsTopNode) { 2059 SU->TopReadyCycle = Top.CurrCycle; 2060 Top.bumpNode(SU); 2061 } 2062 else { 2063 SU->BotReadyCycle = Bot.CurrCycle; 2064 Bot.bumpNode(SU); 2065 } 2066 } 2067 2068 /// Create the standard converging machine scheduler. This will be used as the 2069 /// default scheduler if the target does not set a default. 2070 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { 2071 assert((!ForceTopDown || !ForceBottomUp) && 2072 "-misched-topdown incompatible with -misched-bottomup"); 2073 ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); 2074 // Register DAG post-processors. 2075 if (EnableLoadCluster) 2076 DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); 2077 if (EnableMacroFusion) 2078 DAG->addMutation(new MacroFusion(DAG->TII)); 2079 return DAG; 2080 } 2081 static MachineSchedRegistry 2082 ConvergingSchedRegistry("converge", "Standard converging scheduler.", 2083 createConvergingSched); 2084 2085 //===----------------------------------------------------------------------===// 2086 // ILP Scheduler. Currently for experimental analysis of heuristics. 2087 //===----------------------------------------------------------------------===// 2088 2089 namespace { 2090 /// \brief Order nodes by the ILP metric. 2091 struct ILPOrder { 2092 const SchedDFSResult *DFSResult; 2093 const BitVector *ScheduledTrees; 2094 bool MaximizeILP; 2095 2096 ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {} 2097 2098 /// \brief Apply a less-than relation on node priority. 2099 /// 2100 /// (Return true if A comes after B in the Q.) 2101 bool operator()(const SUnit *A, const SUnit *B) const { 2102 unsigned SchedTreeA = DFSResult->getSubtreeID(A); 2103 unsigned SchedTreeB = DFSResult->getSubtreeID(B); 2104 if (SchedTreeA != SchedTreeB) { 2105 // Unscheduled trees have lower priority. 2106 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) 2107 return ScheduledTrees->test(SchedTreeB); 2108 2109 // Trees with shallower connections have have lower priority. 2110 if (DFSResult->getSubtreeLevel(SchedTreeA) 2111 != DFSResult->getSubtreeLevel(SchedTreeB)) { 2112 return DFSResult->getSubtreeLevel(SchedTreeA) 2113 < DFSResult->getSubtreeLevel(SchedTreeB); 2114 } 2115 } 2116 if (MaximizeILP) 2117 return DFSResult->getILP(A) < DFSResult->getILP(B); 2118 else 2119 return DFSResult->getILP(A) > DFSResult->getILP(B); 2120 } 2121 }; 2122 2123 /// \brief Schedule based on the ILP metric. 2124 class ILPScheduler : public MachineSchedStrategy { 2125 /// In case all subtrees are eventually connected to a common root through 2126 /// data dependence (e.g. reduction), place an upper limit on their size. 2127 /// 2128 /// FIXME: A subtree limit is generally good, but in the situation commented 2129 /// above, where multiple similar subtrees feed a common root, we should 2130 /// only split at a point where the resulting subtrees will be balanced. 2131 /// (a motivating test case must be found). 2132 static const unsigned SubtreeLimit = 16; 2133 2134 ScheduleDAGMI *DAG; 2135 ILPOrder Cmp; 2136 2137 std::vector<SUnit*> ReadyQ; 2138 public: 2139 ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} 2140 2141 virtual void initialize(ScheduleDAGMI *dag) { 2142 DAG = dag; 2143 DAG->initDFSResult(); 2144 Cmp.DFSResult = DAG->getDFSResult(); 2145 Cmp.ScheduledTrees = &DAG->getScheduledTrees(); 2146 ReadyQ.clear(); 2147 } 2148 2149 virtual void registerRoots() { 2150 DAG->computeDFSResult(ReadyQ); 2151 // Restore the heap in ReadyQ with the updated DFS results. 2152 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2153 } 2154 2155 /// Implement MachineSchedStrategy interface. 2156 /// ----------------------------------------- 2157 2158 /// Callback to select the highest priority node from the ready Q. 2159 virtual SUnit *pickNode(bool &IsTopNode) { 2160 if (ReadyQ.empty()) return NULL; 2161 pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2162 SUnit *SU = ReadyQ.back(); 2163 ReadyQ.pop_back(); 2164 IsTopNode = false; 2165 DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " 2166 << *SU->getInstr() 2167 << " ILP: " << DAG->getDFSResult()->getILP(SU) 2168 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" 2169 << DAG->getDFSResult()->getSubtreeLevel( 2170 DAG->getDFSResult()->getSubtreeID(SU)) << '\n'); 2171 return SU; 2172 } 2173 2174 /// \brief Scheduler callback to notify that a new subtree is scheduled. 2175 virtual void scheduleTree(unsigned SubtreeID) { 2176 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2177 } 2178 2179 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify 2180 /// DFSResults, and resort the priority Q. 2181 virtual void schedNode(SUnit *SU, bool IsTopNode) { 2182 assert(!IsTopNode && "SchedDFSResult needs bottom-up"); 2183 } 2184 2185 virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } 2186 2187 virtual void releaseBottomNode(SUnit *SU) { 2188 ReadyQ.push_back(SU); 2189 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2190 } 2191 }; 2192 } // namespace 2193 2194 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 2195 return new ScheduleDAGMI(C, new ILPScheduler(true)); 2196 } 2197 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 2198 return new ScheduleDAGMI(C, new ILPScheduler(false)); 2199 } 2200 static MachineSchedRegistry ILPMaxRegistry( 2201 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 2202 static MachineSchedRegistry ILPMinRegistry( 2203 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 2204 2205 //===----------------------------------------------------------------------===// 2206 // Machine Instruction Shuffler for Correctness Testing 2207 //===----------------------------------------------------------------------===// 2208 2209 #ifndef NDEBUG 2210 namespace { 2211 /// Apply a less-than relation on the node order, which corresponds to the 2212 /// instruction order prior to scheduling. IsReverse implements greater-than. 2213 template<bool IsReverse> 2214 struct SUnitOrder { 2215 bool operator()(SUnit *A, SUnit *B) const { 2216 if (IsReverse) 2217 return A->NodeNum > B->NodeNum; 2218 else 2219 return A->NodeNum < B->NodeNum; 2220 } 2221 }; 2222 2223 /// Reorder instructions as much as possible. 2224 class InstructionShuffler : public MachineSchedStrategy { 2225 bool IsAlternating; 2226 bool IsTopDown; 2227 2228 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 2229 // gives nodes with a higher number higher priority causing the latest 2230 // instructions to be scheduled first. 2231 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 2232 TopQ; 2233 // When scheduling bottom-up, use greater-than as the queue priority. 2234 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 2235 BottomQ; 2236 public: 2237 InstructionShuffler(bool alternate, bool topdown) 2238 : IsAlternating(alternate), IsTopDown(topdown) {} 2239 2240 virtual void initialize(ScheduleDAGMI *) { 2241 TopQ.clear(); 2242 BottomQ.clear(); 2243 } 2244 2245 /// Implement MachineSchedStrategy interface. 2246 /// ----------------------------------------- 2247 2248 virtual SUnit *pickNode(bool &IsTopNode) { 2249 SUnit *SU; 2250 if (IsTopDown) { 2251 do { 2252 if (TopQ.empty()) return NULL; 2253 SU = TopQ.top(); 2254 TopQ.pop(); 2255 } while (SU->isScheduled); 2256 IsTopNode = true; 2257 } 2258 else { 2259 do { 2260 if (BottomQ.empty()) return NULL; 2261 SU = BottomQ.top(); 2262 BottomQ.pop(); 2263 } while (SU->isScheduled); 2264 IsTopNode = false; 2265 } 2266 if (IsAlternating) 2267 IsTopDown = !IsTopDown; 2268 return SU; 2269 } 2270 2271 virtual void schedNode(SUnit *SU, bool IsTopNode) {} 2272 2273 virtual void releaseTopNode(SUnit *SU) { 2274 TopQ.push(SU); 2275 } 2276 virtual void releaseBottomNode(SUnit *SU) { 2277 BottomQ.push(SU); 2278 } 2279 }; 2280 } // namespace 2281 2282 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 2283 bool Alternate = !ForceTopDown && !ForceBottomUp; 2284 bool TopDown = !ForceBottomUp; 2285 assert((TopDown || !ForceTopDown) && 2286 "-misched-topdown incompatible with -misched-bottomup"); 2287 return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); 2288 } 2289 static MachineSchedRegistry ShufflerRegistry( 2290 "shuffle", "Shuffle machine instructions alternating directions", 2291 createInstructionShuffler); 2292 #endif // !NDEBUG 2293