1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "misched" 16 17 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 18 #include "llvm/CodeGen/MachineScheduler.h" 19 #include "llvm/CodeGen/Passes.h" 20 #include "llvm/CodeGen/RegisterClassInfo.h" 21 #include "llvm/CodeGen/ScheduleDFS.h" 22 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include "llvm/ADT/OwningPtr.h" 29 #include "llvm/ADT/PriorityQueue.h" 30 31 #include <queue> 32 33 using namespace llvm; 34 35 namespace llvm { 36 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 37 cl::desc("Force top-down list scheduling")); 38 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 39 cl::desc("Force bottom-up list scheduling")); 40 } 41 42 #ifndef NDEBUG 43 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 44 cl::desc("Pop up a window to show MISched dags after they are processed")); 45 46 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 47 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 48 #else 49 static bool ViewMISchedDAGs = false; 50 #endif // NDEBUG 51 52 // Threshold to very roughly model an out-of-order processor's instruction 53 // buffers. If the actual value of this threshold matters much in practice, then 54 // it can be specified by the machine model. For now, it's an experimental 55 // tuning knob to determine when and if it matters. 56 static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, 57 cl::desc("Allow expected latency to exceed the critical path by N cycles " 58 "before attempting to balance ILP"), 59 cl::init(10U)); 60 61 // Experimental heuristics 62 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, 63 cl::desc("Enable load clustering."), cl::init(true)); 64 65 // Experimental heuristics 66 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, 67 cl::desc("Enable scheduling for macro fusion."), cl::init(true)); 68 69 //===----------------------------------------------------------------------===// 70 // Machine Instruction Scheduling Pass and Registry 71 //===----------------------------------------------------------------------===// 72 73 MachineSchedContext::MachineSchedContext(): 74 MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { 75 RegClassInfo = new RegisterClassInfo(); 76 } 77 78 MachineSchedContext::~MachineSchedContext() { 79 delete RegClassInfo; 80 } 81 82 namespace { 83 /// MachineScheduler runs after coalescing and before register allocation. 84 class MachineScheduler : public MachineSchedContext, 85 public MachineFunctionPass { 86 public: 87 MachineScheduler(); 88 89 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 90 91 virtual void releaseMemory() {} 92 93 virtual bool runOnMachineFunction(MachineFunction&); 94 95 virtual void print(raw_ostream &O, const Module* = 0) const; 96 97 static char ID; // Class identification, replacement for typeinfo 98 }; 99 } // namespace 100 101 char MachineScheduler::ID = 0; 102 103 char &llvm::MachineSchedulerID = MachineScheduler::ID; 104 105 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 106 "Machine Instruction Scheduler", false, false) 107 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 108 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 109 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 110 INITIALIZE_PASS_END(MachineScheduler, "misched", 111 "Machine Instruction Scheduler", false, false) 112 113 MachineScheduler::MachineScheduler() 114 : MachineFunctionPass(ID) { 115 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 116 } 117 118 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 119 AU.setPreservesCFG(); 120 AU.addRequiredID(MachineDominatorsID); 121 AU.addRequired<MachineLoopInfo>(); 122 AU.addRequired<AliasAnalysis>(); 123 AU.addRequired<TargetPassConfig>(); 124 AU.addRequired<SlotIndexes>(); 125 AU.addPreserved<SlotIndexes>(); 126 AU.addRequired<LiveIntervals>(); 127 AU.addPreserved<LiveIntervals>(); 128 MachineFunctionPass::getAnalysisUsage(AU); 129 } 130 131 MachinePassRegistry MachineSchedRegistry::Registry; 132 133 /// A dummy default scheduler factory indicates whether the scheduler 134 /// is overridden on the command line. 135 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 136 return 0; 137 } 138 139 /// MachineSchedOpt allows command line selection of the scheduler. 140 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 141 RegisterPassParser<MachineSchedRegistry> > 142 MachineSchedOpt("misched", 143 cl::init(&useDefaultMachineSched), cl::Hidden, 144 cl::desc("Machine instruction scheduler to use")); 145 146 static MachineSchedRegistry 147 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 148 useDefaultMachineSched); 149 150 /// Forward declare the standard machine scheduler. This will be used as the 151 /// default scheduler if the target does not set a default. 152 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); 153 154 155 /// Decrement this iterator until reaching the top or a non-debug instr. 156 static MachineBasicBlock::iterator 157 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { 158 assert(I != Beg && "reached the top of the region, cannot decrement"); 159 while (--I != Beg) { 160 if (!I->isDebugValue()) 161 break; 162 } 163 return I; 164 } 165 166 /// If this iterator is a debug value, increment until reaching the End or a 167 /// non-debug instruction. 168 static MachineBasicBlock::iterator 169 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { 170 for(; I != End; ++I) { 171 if (!I->isDebugValue()) 172 break; 173 } 174 return I; 175 } 176 177 /// Top-level MachineScheduler pass driver. 178 /// 179 /// Visit blocks in function order. Divide each block into scheduling regions 180 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 181 /// consistent with the DAG builder, which traverses the interior of the 182 /// scheduling regions bottom-up. 183 /// 184 /// This design avoids exposing scheduling boundaries to the DAG builder, 185 /// simplifying the DAG builder's support for "special" target instructions. 186 /// At the same time the design allows target schedulers to operate across 187 /// scheduling boundaries, for example to bundle the boudary instructions 188 /// without reordering them. This creates complexity, because the target 189 /// scheduler must update the RegionBegin and RegionEnd positions cached by 190 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 191 /// design would be to split blocks at scheduling boundaries, but LLVM has a 192 /// general bias against block splitting purely for implementation simplicity. 193 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 194 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 195 196 // Initialize the context of the pass. 197 MF = &mf; 198 MLI = &getAnalysis<MachineLoopInfo>(); 199 MDT = &getAnalysis<MachineDominatorTree>(); 200 PassConfig = &getAnalysis<TargetPassConfig>(); 201 AA = &getAnalysis<AliasAnalysis>(); 202 203 LIS = &getAnalysis<LiveIntervals>(); 204 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 205 206 RegClassInfo->runOnMachineFunction(*MF); 207 208 // Select the scheduler, or set the default. 209 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 210 if (Ctor == useDefaultMachineSched) { 211 // Get the default scheduler set by the target. 212 Ctor = MachineSchedRegistry::getDefault(); 213 if (!Ctor) { 214 Ctor = createConvergingSched; 215 MachineSchedRegistry::setDefault(Ctor); 216 } 217 } 218 // Instantiate the selected scheduler. 219 OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); 220 221 // Visit all machine basic blocks. 222 // 223 // TODO: Visit blocks in global postorder or postorder within the bottom-up 224 // loop tree. Then we can optionally compute global RegPressure. 225 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 226 MBB != MBBEnd; ++MBB) { 227 228 Scheduler->startBlock(MBB); 229 230 // Break the block into scheduling regions [I, RegionEnd), and schedule each 231 // region as soon as it is discovered. RegionEnd points the scheduling 232 // boundary at the bottom of the region. The DAG does not include RegionEnd, 233 // but the region does (i.e. the next RegionEnd is above the previous 234 // RegionBegin). If the current block has no terminator then RegionEnd == 235 // MBB->end() for the bottom region. 236 // 237 // The Scheduler may insert instructions during either schedule() or 238 // exitRegion(), even for empty regions. So the local iterators 'I' and 239 // 'RegionEnd' are invalid across these calls. 240 unsigned RemainingInstrs = MBB->size(); 241 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 242 RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { 243 244 // Avoid decrementing RegionEnd for blocks with no terminator. 245 if (RegionEnd != MBB->end() 246 || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { 247 --RegionEnd; 248 // Count the boundary instruction. 249 --RemainingInstrs; 250 } 251 252 // The next region starts above the previous region. Look backward in the 253 // instruction stream until we find the nearest boundary. 254 MachineBasicBlock::iterator I = RegionEnd; 255 for(;I != MBB->begin(); --I, --RemainingInstrs) { 256 if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) 257 break; 258 } 259 // Notify the scheduler of the region, even if we may skip scheduling 260 // it. Perhaps it still needs to be bundled. 261 Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); 262 263 // Skip empty scheduling regions (0 or 1 schedulable instructions). 264 if (I == RegionEnd || I == llvm::prior(RegionEnd)) { 265 // Close the current region. Bundle the terminator if needed. 266 // This invalidates 'RegionEnd' and 'I'. 267 Scheduler->exitRegion(); 268 continue; 269 } 270 DEBUG(dbgs() << "********** MI Scheduling **********\n"); 271 DEBUG(dbgs() << MF->getName() 272 << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; 273 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 274 else dbgs() << "End"; 275 dbgs() << " Remaining: " << RemainingInstrs << "\n"); 276 277 // Schedule a region: possibly reorder instructions. 278 // This invalidates 'RegionEnd' and 'I'. 279 Scheduler->schedule(); 280 281 // Close the current region. 282 Scheduler->exitRegion(); 283 284 // Scheduling has invalidated the current iterator 'I'. Ask the 285 // scheduler for the top of it's scheduled region. 286 RegionEnd = Scheduler->begin(); 287 } 288 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 289 Scheduler->finishBlock(); 290 } 291 Scheduler->finalizeSchedule(); 292 DEBUG(LIS->print(dbgs())); 293 return true; 294 } 295 296 void MachineScheduler::print(raw_ostream &O, const Module* m) const { 297 // unimplemented 298 } 299 300 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 301 void ReadyQueue::dump() { 302 dbgs() << Name << ": "; 303 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 304 dbgs() << Queue[i]->NodeNum << " "; 305 dbgs() << "\n"; 306 } 307 #endif 308 309 //===----------------------------------------------------------------------===// 310 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals 311 // preservation. 312 //===----------------------------------------------------------------------===// 313 314 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { 315 if (SuccSU != &ExitSU) { 316 // Do not use WillCreateCycle, it assumes SD scheduling. 317 // If Pred is reachable from Succ, then the edge creates a cycle. 318 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 319 return false; 320 Topo.AddPred(SuccSU, PredDep.getSUnit()); 321 } 322 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 323 // Return true regardless of whether a new edge needed to be inserted. 324 return true; 325 } 326 327 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 328 /// NumPredsLeft reaches zero, release the successor node. 329 /// 330 /// FIXME: Adjust SuccSU height based on MinLatency. 331 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 332 SUnit *SuccSU = SuccEdge->getSUnit(); 333 334 if (SuccEdge->isWeak()) { 335 --SuccSU->WeakPredsLeft; 336 if (SuccEdge->isCluster()) 337 NextClusterSucc = SuccSU; 338 return; 339 } 340 #ifndef NDEBUG 341 if (SuccSU->NumPredsLeft == 0) { 342 dbgs() << "*** Scheduling failed! ***\n"; 343 SuccSU->dump(this); 344 dbgs() << " has been released too many times!\n"; 345 llvm_unreachable(0); 346 } 347 #endif 348 --SuccSU->NumPredsLeft; 349 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 350 SchedImpl->releaseTopNode(SuccSU); 351 } 352 353 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 354 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 355 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 356 I != E; ++I) { 357 releaseSucc(SU, &*I); 358 } 359 } 360 361 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 362 /// NumSuccsLeft reaches zero, release the predecessor node. 363 /// 364 /// FIXME: Adjust PredSU height based on MinLatency. 365 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 366 SUnit *PredSU = PredEdge->getSUnit(); 367 368 if (PredEdge->isWeak()) { 369 --PredSU->WeakSuccsLeft; 370 if (PredEdge->isCluster()) 371 NextClusterPred = PredSU; 372 return; 373 } 374 #ifndef NDEBUG 375 if (PredSU->NumSuccsLeft == 0) { 376 dbgs() << "*** Scheduling failed! ***\n"; 377 PredSU->dump(this); 378 dbgs() << " has been released too many times!\n"; 379 llvm_unreachable(0); 380 } 381 #endif 382 --PredSU->NumSuccsLeft; 383 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 384 SchedImpl->releaseBottomNode(PredSU); 385 } 386 387 /// releasePredecessors - Call releasePred on each of SU's predecessors. 388 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 389 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 390 I != E; ++I) { 391 releasePred(SU, &*I); 392 } 393 } 394 395 void ScheduleDAGMI::moveInstruction(MachineInstr *MI, 396 MachineBasicBlock::iterator InsertPos) { 397 // Advance RegionBegin if the first instruction moves down. 398 if (&*RegionBegin == MI) 399 ++RegionBegin; 400 401 // Update the instruction stream. 402 BB->splice(InsertPos, BB, MI); 403 404 // Update LiveIntervals 405 LIS->handleMove(MI, /*UpdateFlags=*/true); 406 407 // Recede RegionBegin if an instruction moves above the first. 408 if (RegionBegin == InsertPos) 409 RegionBegin = MI; 410 } 411 412 bool ScheduleDAGMI::checkSchedLimit() { 413 #ifndef NDEBUG 414 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 415 CurrentTop = CurrentBottom; 416 return false; 417 } 418 ++NumInstrsScheduled; 419 #endif 420 return true; 421 } 422 423 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 424 /// crossing a scheduling boundary. [begin, end) includes all instructions in 425 /// the region, including the boundary itself and single-instruction regions 426 /// that don't get scheduled. 427 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 428 MachineBasicBlock::iterator begin, 429 MachineBasicBlock::iterator end, 430 unsigned endcount) 431 { 432 ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); 433 434 // For convenience remember the end of the liveness region. 435 LiveRegionEnd = 436 (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); 437 } 438 439 // Setup the register pressure trackers for the top scheduled top and bottom 440 // scheduled regions. 441 void ScheduleDAGMI::initRegPressure() { 442 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 443 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 444 445 // Close the RPTracker to finalize live ins. 446 RPTracker.closeRegion(); 447 448 DEBUG(RPTracker.getPressure().dump(TRI)); 449 450 // Initialize the live ins and live outs. 451 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 452 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 453 454 // Close one end of the tracker so we can call 455 // getMaxUpward/DownwardPressureDelta before advancing across any 456 // instructions. This converts currently live regs into live ins/outs. 457 TopRPTracker.closeTop(); 458 BotRPTracker.closeBottom(); 459 460 // Account for liveness generated by the region boundary. 461 if (LiveRegionEnd != RegionEnd) 462 BotRPTracker.recede(); 463 464 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 465 466 // Cache the list of excess pressure sets in this region. This will also track 467 // the max pressure in the scheduled code for these sets. 468 RegionCriticalPSets.clear(); 469 std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; 470 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 471 unsigned Limit = TRI->getRegPressureSetLimit(i); 472 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 473 << "Limit " << Limit 474 << " Actual " << RegionPressure[i] << "\n"); 475 if (RegionPressure[i] > Limit) 476 RegionCriticalPSets.push_back(PressureElement(i, 0)); 477 } 478 DEBUG(dbgs() << "Excess PSets: "; 479 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 480 dbgs() << TRI->getRegPressureSetName( 481 RegionCriticalPSets[i].PSetID) << " "; 482 dbgs() << "\n"); 483 } 484 485 // FIXME: When the pressure tracker deals in pressure differences then we won't 486 // iterate over all RegionCriticalPSets[i]. 487 void ScheduleDAGMI:: 488 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { 489 for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { 490 unsigned ID = RegionCriticalPSets[i].PSetID; 491 int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; 492 if ((int)NewMaxPressure[ID] > MaxUnits) 493 MaxUnits = NewMaxPressure[ID]; 494 } 495 } 496 497 /// schedule - Called back from MachineScheduler::runOnMachineFunction 498 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 499 /// only includes instructions that have DAG nodes, not scheduling boundaries. 500 /// 501 /// This is a skeletal driver, with all the functionality pushed into helpers, 502 /// so that it can be easilly extended by experimental schedulers. Generally, 503 /// implementing MachineSchedStrategy should be sufficient to implement a new 504 /// scheduling algorithm. However, if a scheduler further subclasses 505 /// ScheduleDAGMI then it will want to override this virtual method in order to 506 /// update any specialized state. 507 void ScheduleDAGMI::schedule() { 508 buildDAGWithRegPressure(); 509 510 Topo.InitDAGTopologicalSorting(); 511 512 postprocessDAG(); 513 514 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 515 SUnits[su].dumpAll(this)); 516 517 if (ViewMISchedDAGs) viewGraph(); 518 519 initQueues(); 520 521 bool IsTopNode = false; 522 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 523 assert(!SU->isScheduled && "Node already scheduled"); 524 if (!checkSchedLimit()) 525 break; 526 527 scheduleMI(SU, IsTopNode); 528 529 updateQueues(SU, IsTopNode); 530 } 531 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 532 533 placeDebugValues(); 534 535 DEBUG({ 536 unsigned BBNum = begin()->getParent()->getNumber(); 537 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 538 dumpSchedule(); 539 dbgs() << '\n'; 540 }); 541 } 542 543 /// Build the DAG and setup three register pressure trackers. 544 void ScheduleDAGMI::buildDAGWithRegPressure() { 545 // Initialize the register pressure tracker used by buildSchedGraph. 546 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 547 548 // Account for liveness generate by the region boundary. 549 if (LiveRegionEnd != RegionEnd) 550 RPTracker.recede(); 551 552 // Build the DAG, and compute current register pressure. 553 buildSchedGraph(AA, &RPTracker); 554 if (ViewMISchedDAGs) viewGraph(); 555 556 // Initialize top/bottom trackers after computing region pressure. 557 initRegPressure(); 558 } 559 560 /// Apply each ScheduleDAGMutation step in order. 561 void ScheduleDAGMI::postprocessDAG() { 562 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 563 Mutations[i]->apply(this); 564 } 565 } 566 567 // Release all DAG roots for scheduling. 568 // 569 // Nodes with unreleased weak edges can still be roots. 570 void ScheduleDAGMI::releaseRoots() { 571 SmallVector<SUnit*, 16> BotRoots; 572 573 for (std::vector<SUnit>::iterator 574 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 575 SUnit *SU = &(*I); 576 // A SUnit is ready to top schedule if it has no predecessors. 577 if (!I->NumPredsLeft && SU != &EntrySU) 578 SchedImpl->releaseTopNode(SU); 579 // A SUnit is ready to bottom schedule if it has no successors. 580 if (!I->NumSuccsLeft && SU != &ExitSU) 581 BotRoots.push_back(SU); 582 } 583 // Release bottom roots in reverse order so the higher priority nodes appear 584 // first. This is more natural and slightly more efficient. 585 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 586 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) 587 SchedImpl->releaseBottomNode(*I); 588 } 589 590 /// Identify DAG roots and setup scheduler queues. 591 void ScheduleDAGMI::initQueues() { 592 NextClusterSucc = NULL; 593 NextClusterPred = NULL; 594 595 // Initialize the strategy before modifying the DAG. 596 SchedImpl->initialize(this); 597 598 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 599 releaseRoots(); 600 601 releaseSuccessors(&EntrySU); 602 releasePredecessors(&ExitSU); 603 604 SchedImpl->registerRoots(); 605 606 // Advance past initial DebugValues. 607 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); 608 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 609 TopRPTracker.setPos(CurrentTop); 610 611 CurrentBottom = RegionEnd; 612 } 613 614 /// Move an instruction and update register pressure. 615 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { 616 // Move the instruction to its new location in the instruction stream. 617 MachineInstr *MI = SU->getInstr(); 618 619 if (IsTopNode) { 620 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 621 if (&*CurrentTop == MI) 622 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 623 else { 624 moveInstruction(MI, CurrentTop); 625 TopRPTracker.setPos(MI); 626 } 627 628 // Update top scheduled pressure. 629 TopRPTracker.advance(); 630 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 631 updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); 632 } 633 else { 634 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 635 MachineBasicBlock::iterator priorII = 636 priorNonDebug(CurrentBottom, CurrentTop); 637 if (&*priorII == MI) 638 CurrentBottom = priorII; 639 else { 640 if (&*CurrentTop == MI) { 641 CurrentTop = nextIfDebug(++CurrentTop, priorII); 642 TopRPTracker.setPos(CurrentTop); 643 } 644 moveInstruction(MI, CurrentBottom); 645 CurrentBottom = MI; 646 } 647 // Update bottom scheduled pressure. 648 BotRPTracker.recede(); 649 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 650 updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); 651 } 652 } 653 654 /// Update scheduler queues after scheduling an instruction. 655 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 656 // Release dependent instructions for scheduling. 657 if (IsTopNode) 658 releaseSuccessors(SU); 659 else 660 releasePredecessors(SU); 661 662 SU->isScheduled = true; 663 664 // Notify the scheduling strategy after updating the DAG. 665 SchedImpl->schedNode(SU, IsTopNode); 666 } 667 668 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 669 void ScheduleDAGMI::placeDebugValues() { 670 // If first instruction was a DBG_VALUE then put it back. 671 if (FirstDbgValue) { 672 BB->splice(RegionBegin, BB, FirstDbgValue); 673 RegionBegin = FirstDbgValue; 674 } 675 676 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 677 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 678 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 679 MachineInstr *DbgValue = P.first; 680 MachineBasicBlock::iterator OrigPrevMI = P.second; 681 if (&*RegionBegin == DbgValue) 682 ++RegionBegin; 683 BB->splice(++OrigPrevMI, BB, DbgValue); 684 if (OrigPrevMI == llvm::prior(RegionEnd)) 685 RegionEnd = DbgValue; 686 } 687 DbgValues.clear(); 688 FirstDbgValue = NULL; 689 } 690 691 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 692 void ScheduleDAGMI::dumpSchedule() const { 693 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 694 if (SUnit *SU = getSUnit(&(*MI))) 695 SU->dump(this); 696 else 697 dbgs() << "Missing SUnit\n"; 698 } 699 } 700 #endif 701 702 //===----------------------------------------------------------------------===// 703 // LoadClusterMutation - DAG post-processing to cluster loads. 704 //===----------------------------------------------------------------------===// 705 706 namespace { 707 /// \brief Post-process the DAG to create cluster edges between neighboring 708 /// loads. 709 class LoadClusterMutation : public ScheduleDAGMutation { 710 struct LoadInfo { 711 SUnit *SU; 712 unsigned BaseReg; 713 unsigned Offset; 714 LoadInfo(SUnit *su, unsigned reg, unsigned ofs) 715 : SU(su), BaseReg(reg), Offset(ofs) {} 716 }; 717 static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, 718 const LoadClusterMutation::LoadInfo &RHS); 719 720 const TargetInstrInfo *TII; 721 const TargetRegisterInfo *TRI; 722 public: 723 LoadClusterMutation(const TargetInstrInfo *tii, 724 const TargetRegisterInfo *tri) 725 : TII(tii), TRI(tri) {} 726 727 virtual void apply(ScheduleDAGMI *DAG); 728 protected: 729 void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); 730 }; 731 } // anonymous 732 733 bool LoadClusterMutation::LoadInfoLess( 734 const LoadClusterMutation::LoadInfo &LHS, 735 const LoadClusterMutation::LoadInfo &RHS) { 736 if (LHS.BaseReg != RHS.BaseReg) 737 return LHS.BaseReg < RHS.BaseReg; 738 return LHS.Offset < RHS.Offset; 739 } 740 741 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, 742 ScheduleDAGMI *DAG) { 743 SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; 744 for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { 745 SUnit *SU = Loads[Idx]; 746 unsigned BaseReg; 747 unsigned Offset; 748 if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) 749 LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); 750 } 751 if (LoadRecords.size() < 2) 752 return; 753 std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); 754 unsigned ClusterLength = 1; 755 for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { 756 if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { 757 ClusterLength = 1; 758 continue; 759 } 760 761 SUnit *SUa = LoadRecords[Idx].SU; 762 SUnit *SUb = LoadRecords[Idx+1].SU; 763 if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) 764 && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 765 766 DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" 767 << SUb->NodeNum << ")\n"); 768 // Copy successor edges from SUa to SUb. Interleaving computation 769 // dependent on SUa can prevent load combining due to register reuse. 770 // Predecessor edges do not need to be copied from SUb to SUa since nearby 771 // loads should have effectively the same inputs. 772 for (SUnit::const_succ_iterator 773 SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { 774 if (SI->getSUnit() == SUb) 775 continue; 776 DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); 777 DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); 778 } 779 ++ClusterLength; 780 } 781 else 782 ClusterLength = 1; 783 } 784 } 785 786 /// \brief Callback from DAG postProcessing to create cluster edges for loads. 787 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { 788 // Map DAG NodeNum to store chain ID. 789 DenseMap<unsigned, unsigned> StoreChainIDs; 790 // Map each store chain to a set of dependent loads. 791 SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; 792 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 793 SUnit *SU = &DAG->SUnits[Idx]; 794 if (!SU->getInstr()->mayLoad()) 795 continue; 796 unsigned ChainPredID = DAG->SUnits.size(); 797 for (SUnit::const_pred_iterator 798 PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 799 if (PI->isCtrl()) { 800 ChainPredID = PI->getSUnit()->NodeNum; 801 break; 802 } 803 } 804 // Check if this chain-like pred has been seen 805 // before. ChainPredID==MaxNodeID for loads at the top of the schedule. 806 unsigned NumChains = StoreChainDependents.size(); 807 std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = 808 StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); 809 if (Result.second) 810 StoreChainDependents.resize(NumChains + 1); 811 StoreChainDependents[Result.first->second].push_back(SU); 812 } 813 // Iterate over the store chains. 814 for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) 815 clusterNeighboringLoads(StoreChainDependents[Idx], DAG); 816 } 817 818 //===----------------------------------------------------------------------===// 819 // MacroFusion - DAG post-processing to encourage fusion of macro ops. 820 //===----------------------------------------------------------------------===// 821 822 namespace { 823 /// \brief Post-process the DAG to create cluster edges between instructions 824 /// that may be fused by the processor into a single operation. 825 class MacroFusion : public ScheduleDAGMutation { 826 const TargetInstrInfo *TII; 827 public: 828 MacroFusion(const TargetInstrInfo *tii): TII(tii) {} 829 830 virtual void apply(ScheduleDAGMI *DAG); 831 }; 832 } // anonymous 833 834 /// \brief Callback from DAG postProcessing to create cluster edges to encourage 835 /// fused operations. 836 void MacroFusion::apply(ScheduleDAGMI *DAG) { 837 // For now, assume targets can only fuse with the branch. 838 MachineInstr *Branch = DAG->ExitSU.getInstr(); 839 if (!Branch) 840 return; 841 842 for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { 843 SUnit *SU = &DAG->SUnits[--Idx]; 844 if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) 845 continue; 846 847 // Create a single weak edge from SU to ExitSU. The only effect is to cause 848 // bottom-up scheduling to heavily prioritize the clustered SU. There is no 849 // need to copy predecessor edges from ExitSU to SU, since top-down 850 // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling 851 // of SU, we could create an artificial edge from the deepest root, but it 852 // hasn't been needed yet. 853 bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); 854 (void)Success; 855 assert(Success && "No DAG nodes should be reachable from ExitSU"); 856 857 DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); 858 break; 859 } 860 } 861 862 //===----------------------------------------------------------------------===// 863 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. 864 //===----------------------------------------------------------------------===// 865 866 namespace { 867 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance 868 /// the schedule. 869 class ConvergingScheduler : public MachineSchedStrategy { 870 public: 871 /// Represent the type of SchedCandidate found within a single queue. 872 /// pickNodeBidirectional depends on these listed by decreasing priority. 873 enum CandReason { 874 NoCand, SingleExcess, SingleCritical, Cluster, 875 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 876 TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, 877 NodeOrder}; 878 879 #ifndef NDEBUG 880 static const char *getReasonStr(ConvergingScheduler::CandReason Reason); 881 #endif 882 883 /// Policy for scheduling the next instruction in the candidate's zone. 884 struct CandPolicy { 885 bool ReduceLatency; 886 unsigned ReduceResIdx; 887 unsigned DemandResIdx; 888 889 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 890 }; 891 892 /// Status of an instruction's critical resource consumption. 893 struct SchedResourceDelta { 894 // Count critical resources in the scheduled region required by SU. 895 unsigned CritResources; 896 897 // Count critical resources from another region consumed by SU. 898 unsigned DemandedResources; 899 900 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 901 902 bool operator==(const SchedResourceDelta &RHS) const { 903 return CritResources == RHS.CritResources 904 && DemandedResources == RHS.DemandedResources; 905 } 906 bool operator!=(const SchedResourceDelta &RHS) const { 907 return !operator==(RHS); 908 } 909 }; 910 911 /// Store the state used by ConvergingScheduler heuristics, required for the 912 /// lifetime of one invocation of pickNode(). 913 struct SchedCandidate { 914 CandPolicy Policy; 915 916 // The best SUnit candidate. 917 SUnit *SU; 918 919 // The reason for this candidate. 920 CandReason Reason; 921 922 // Register pressure values for the best candidate. 923 RegPressureDelta RPDelta; 924 925 // Critical resource consumption of the best candidate. 926 SchedResourceDelta ResDelta; 927 928 SchedCandidate(const CandPolicy &policy) 929 : Policy(policy), SU(NULL), Reason(NoCand) {} 930 931 bool isValid() const { return SU; } 932 933 // Copy the status of another candidate without changing policy. 934 void setBest(SchedCandidate &Best) { 935 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 936 SU = Best.SU; 937 Reason = Best.Reason; 938 RPDelta = Best.RPDelta; 939 ResDelta = Best.ResDelta; 940 } 941 942 void initResourceDelta(const ScheduleDAGMI *DAG, 943 const TargetSchedModel *SchedModel); 944 }; 945 946 /// Summarize the unscheduled region. 947 struct SchedRemainder { 948 // Critical path through the DAG in expected latency. 949 unsigned CriticalPath; 950 951 // Unscheduled resources 952 SmallVector<unsigned, 16> RemainingCounts; 953 // Critical resource for the unscheduled zone. 954 unsigned CritResIdx; 955 // Number of micro-ops left to schedule. 956 unsigned RemainingMicroOps; 957 // Is the unscheduled zone resource limited. 958 bool IsResourceLimited; 959 960 unsigned MaxRemainingCount; 961 962 void reset() { 963 CriticalPath = 0; 964 RemainingCounts.clear(); 965 CritResIdx = 0; 966 RemainingMicroOps = 0; 967 IsResourceLimited = false; 968 MaxRemainingCount = 0; 969 } 970 971 SchedRemainder() { reset(); } 972 973 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 974 }; 975 976 /// Each Scheduling boundary is associated with ready queues. It tracks the 977 /// current cycle in the direction of movement, and maintains the state 978 /// of "hazards" and other interlocks at the current cycle. 979 struct SchedBoundary { 980 ScheduleDAGMI *DAG; 981 const TargetSchedModel *SchedModel; 982 SchedRemainder *Rem; 983 984 ReadyQueue Available; 985 ReadyQueue Pending; 986 bool CheckPending; 987 988 // For heuristics, keep a list of the nodes that immediately depend on the 989 // most recently scheduled node. 990 SmallPtrSet<const SUnit*, 8> NextSUs; 991 992 ScheduleHazardRecognizer *HazardRec; 993 994 unsigned CurrCycle; 995 unsigned IssueCount; 996 997 /// MinReadyCycle - Cycle of the soonest available instruction. 998 unsigned MinReadyCycle; 999 1000 // The expected latency of the critical path in this scheduled zone. 1001 unsigned ExpectedLatency; 1002 1003 // Resources used in the scheduled zone beyond this boundary. 1004 SmallVector<unsigned, 16> ResourceCounts; 1005 1006 // Cache the critical resources ID in this scheduled zone. 1007 unsigned CritResIdx; 1008 1009 // Is the scheduled region resource limited vs. latency limited. 1010 bool IsResourceLimited; 1011 1012 unsigned ExpectedCount; 1013 1014 // Policy flag: attempt to find ILP until expected latency is covered. 1015 bool ShouldIncreaseILP; 1016 1017 #ifndef NDEBUG 1018 // Remember the greatest min operand latency. 1019 unsigned MaxMinLatency; 1020 #endif 1021 1022 void reset() { 1023 Available.clear(); 1024 Pending.clear(); 1025 CheckPending = false; 1026 NextSUs.clear(); 1027 HazardRec = 0; 1028 CurrCycle = 0; 1029 IssueCount = 0; 1030 MinReadyCycle = UINT_MAX; 1031 ExpectedLatency = 0; 1032 ResourceCounts.resize(1); 1033 assert(!ResourceCounts[0] && "nonzero count for bad resource"); 1034 CritResIdx = 0; 1035 IsResourceLimited = false; 1036 ExpectedCount = 0; 1037 ShouldIncreaseILP = false; 1038 #ifndef NDEBUG 1039 MaxMinLatency = 0; 1040 #endif 1041 // Reserve a zero-count for invalid CritResIdx. 1042 ResourceCounts.resize(1); 1043 } 1044 1045 /// Pending queues extend the ready queues with the same ID and the 1046 /// PendingFlag set. 1047 SchedBoundary(unsigned ID, const Twine &Name): 1048 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), 1049 Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { 1050 reset(); 1051 } 1052 1053 ~SchedBoundary() { delete HazardRec; } 1054 1055 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 1056 SchedRemainder *rem); 1057 1058 bool isTop() const { 1059 return Available.getID() == ConvergingScheduler::TopQID; 1060 } 1061 1062 unsigned getUnscheduledLatency(SUnit *SU) const { 1063 if (isTop()) 1064 return SU->getHeight(); 1065 return SU->getDepth(); 1066 } 1067 1068 unsigned getCriticalCount() const { 1069 return ResourceCounts[CritResIdx]; 1070 } 1071 1072 bool checkHazard(SUnit *SU); 1073 1074 void checkILPPolicy(); 1075 1076 void releaseNode(SUnit *SU, unsigned ReadyCycle); 1077 1078 void bumpCycle(); 1079 1080 void countResource(unsigned PIdx, unsigned Cycles); 1081 1082 void bumpNode(SUnit *SU); 1083 1084 void releasePending(); 1085 1086 void removeReady(SUnit *SU); 1087 1088 SUnit *pickOnlyChoice(); 1089 }; 1090 1091 private: 1092 ScheduleDAGMI *DAG; 1093 const TargetSchedModel *SchedModel; 1094 const TargetRegisterInfo *TRI; 1095 1096 // State of the top and bottom scheduled instruction boundaries. 1097 SchedRemainder Rem; 1098 SchedBoundary Top; 1099 SchedBoundary Bot; 1100 1101 public: 1102 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 1103 enum { 1104 TopQID = 1, 1105 BotQID = 2, 1106 LogMaxQID = 2 1107 }; 1108 1109 ConvergingScheduler(): 1110 DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} 1111 1112 virtual void initialize(ScheduleDAGMI *dag); 1113 1114 virtual SUnit *pickNode(bool &IsTopNode); 1115 1116 virtual void schedNode(SUnit *SU, bool IsTopNode); 1117 1118 virtual void releaseTopNode(SUnit *SU); 1119 1120 virtual void releaseBottomNode(SUnit *SU); 1121 1122 virtual void registerRoots(); 1123 1124 protected: 1125 void balanceZones( 1126 ConvergingScheduler::SchedBoundary &CriticalZone, 1127 ConvergingScheduler::SchedCandidate &CriticalCand, 1128 ConvergingScheduler::SchedBoundary &OppositeZone, 1129 ConvergingScheduler::SchedCandidate &OppositeCand); 1130 1131 void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, 1132 ConvergingScheduler::SchedCandidate &BotCand); 1133 1134 void tryCandidate(SchedCandidate &Cand, 1135 SchedCandidate &TryCand, 1136 SchedBoundary &Zone, 1137 const RegPressureTracker &RPTracker, 1138 RegPressureTracker &TempTracker); 1139 1140 SUnit *pickNodeBidirectional(bool &IsTopNode); 1141 1142 void pickNodeFromQueue(SchedBoundary &Zone, 1143 const RegPressureTracker &RPTracker, 1144 SchedCandidate &Candidate); 1145 1146 #ifndef NDEBUG 1147 void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); 1148 #endif 1149 }; 1150 } // namespace 1151 1152 void ConvergingScheduler::SchedRemainder:: 1153 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1154 reset(); 1155 if (!SchedModel->hasInstrSchedModel()) 1156 return; 1157 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1158 for (std::vector<SUnit>::iterator 1159 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 1160 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 1161 RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); 1162 for (TargetSchedModel::ProcResIter 1163 PI = SchedModel->getWriteProcResBegin(SC), 1164 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1165 unsigned PIdx = PI->ProcResourceIdx; 1166 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1167 RemainingCounts[PIdx] += (Factor * PI->Cycles); 1168 } 1169 } 1170 } 1171 1172 void ConvergingScheduler::SchedBoundary:: 1173 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1174 reset(); 1175 DAG = dag; 1176 SchedModel = smodel; 1177 Rem = rem; 1178 if (SchedModel->hasInstrSchedModel()) 1179 ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); 1180 } 1181 1182 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { 1183 DAG = dag; 1184 SchedModel = DAG->getSchedModel(); 1185 TRI = DAG->TRI; 1186 Rem.init(DAG, SchedModel); 1187 Top.init(DAG, SchedModel, &Rem); 1188 Bot.init(DAG, SchedModel, &Rem); 1189 1190 // Initialize resource counts. 1191 1192 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 1193 // are disabled, then these HazardRecs will be disabled. 1194 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 1195 const TargetMachine &TM = DAG->MF.getTarget(); 1196 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1197 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1198 1199 assert((!ForceTopDown || !ForceBottomUp) && 1200 "-misched-topdown incompatible with -misched-bottomup"); 1201 } 1202 1203 void ConvergingScheduler::releaseTopNode(SUnit *SU) { 1204 if (SU->isScheduled) 1205 return; 1206 1207 for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1208 I != E; ++I) { 1209 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1210 unsigned MinLatency = I->getMinLatency(); 1211 #ifndef NDEBUG 1212 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 1213 #endif 1214 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 1215 SU->TopReadyCycle = PredReadyCycle + MinLatency; 1216 } 1217 Top.releaseNode(SU, SU->TopReadyCycle); 1218 } 1219 1220 void ConvergingScheduler::releaseBottomNode(SUnit *SU) { 1221 if (SU->isScheduled) 1222 return; 1223 1224 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1225 1226 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1227 I != E; ++I) { 1228 if (I->isWeak()) 1229 continue; 1230 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1231 unsigned MinLatency = I->getMinLatency(); 1232 #ifndef NDEBUG 1233 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 1234 #endif 1235 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 1236 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 1237 } 1238 Bot.releaseNode(SU, SU->BotReadyCycle); 1239 } 1240 1241 void ConvergingScheduler::registerRoots() { 1242 Rem.CriticalPath = DAG->ExitSU.getDepth(); 1243 // Some roots may not feed into ExitSU. Check all of them in case. 1244 for (std::vector<SUnit*>::const_iterator 1245 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 1246 if ((*I)->getDepth() > Rem.CriticalPath) 1247 Rem.CriticalPath = (*I)->getDepth(); 1248 } 1249 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 1250 } 1251 1252 /// Does this SU have a hazard within the current instruction group. 1253 /// 1254 /// The scheduler supports two modes of hazard recognition. The first is the 1255 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1256 /// supports highly complicated in-order reservation tables 1257 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1258 /// 1259 /// The second is a streamlined mechanism that checks for hazards based on 1260 /// simple counters that the scheduler itself maintains. It explicitly checks 1261 /// for instruction dispatch limitations, including the number of micro-ops that 1262 /// can dispatch per cycle. 1263 /// 1264 /// TODO: Also check whether the SU must start a new group. 1265 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1266 if (HazardRec->isEnabled()) 1267 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1268 1269 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1270 if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { 1271 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1272 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1273 return true; 1274 } 1275 return false; 1276 } 1277 1278 /// If expected latency is covered, disable ILP policy. 1279 void ConvergingScheduler::SchedBoundary::checkILPPolicy() { 1280 if (ShouldIncreaseILP 1281 && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { 1282 ShouldIncreaseILP = false; 1283 DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); 1284 } 1285 } 1286 1287 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, 1288 unsigned ReadyCycle) { 1289 1290 if (ReadyCycle < MinReadyCycle) 1291 MinReadyCycle = ReadyCycle; 1292 1293 // Check for interlocks first. For the purpose of other heuristics, an 1294 // instruction that cannot issue appears as if it's not in the ReadyQueue. 1295 if (ReadyCycle > CurrCycle || checkHazard(SU)) 1296 Pending.push(SU); 1297 else 1298 Available.push(SU); 1299 1300 // Record this node as an immediate dependent of the scheduled node. 1301 NextSUs.insert(SU); 1302 1303 // If CriticalPath has been computed, then check if the unscheduled nodes 1304 // exceed the ILP window. Before registerRoots, CriticalPath==0. 1305 if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) 1306 > Rem->CriticalPath + ILPWindow)) { 1307 ShouldIncreaseILP = true; 1308 DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " 1309 << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); 1310 } 1311 } 1312 1313 /// Move the boundary of scheduled code by one cycle. 1314 void ConvergingScheduler::SchedBoundary::bumpCycle() { 1315 unsigned Width = SchedModel->getIssueWidth(); 1316 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 1317 1318 unsigned NextCycle = CurrCycle + 1; 1319 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 1320 if (MinReadyCycle > NextCycle) { 1321 IssueCount = 0; 1322 NextCycle = MinReadyCycle; 1323 } 1324 1325 if (!HazardRec->isEnabled()) { 1326 // Bypass HazardRec virtual calls. 1327 CurrCycle = NextCycle; 1328 } 1329 else { 1330 // Bypass getHazardType calls in case of long latency. 1331 for (; CurrCycle != NextCycle; ++CurrCycle) { 1332 if (isTop()) 1333 HazardRec->AdvanceCycle(); 1334 else 1335 HazardRec->RecedeCycle(); 1336 } 1337 } 1338 CheckPending = true; 1339 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1340 1341 DEBUG(dbgs() << " *** " << Available.getName() << " cycle " 1342 << CurrCycle << '\n'); 1343 } 1344 1345 /// Add the given processor resource to this scheduled zone. 1346 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, 1347 unsigned Cycles) { 1348 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1349 DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name 1350 << " +(" << Cycles << "x" << Factor 1351 << ") / " << SchedModel->getLatencyFactor() << '\n'); 1352 1353 unsigned Count = Factor * Cycles; 1354 ResourceCounts[PIdx] += Count; 1355 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 1356 Rem->RemainingCounts[PIdx] -= Count; 1357 1358 // Reset MaxRemainingCount for sanity. 1359 Rem->MaxRemainingCount = 0; 1360 1361 // Check if this resource exceeds the current critical resource by a full 1362 // cycle. If so, it becomes the critical resource. 1363 if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) 1364 >= (int)SchedModel->getLatencyFactor()) { 1365 CritResIdx = PIdx; 1366 DEBUG(dbgs() << " *** Critical resource " 1367 << SchedModel->getProcResource(PIdx)->Name << " x" 1368 << ResourceCounts[PIdx] << '\n'); 1369 } 1370 } 1371 1372 /// Move the boundary of scheduled code by one SUnit. 1373 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { 1374 // Update the reservation table. 1375 if (HazardRec->isEnabled()) { 1376 if (!isTop() && SU->isCall) { 1377 // Calls are scheduled with their preceding instructions. For bottom-up 1378 // scheduling, clear the pipeline state before emitting. 1379 HazardRec->Reset(); 1380 } 1381 HazardRec->EmitInstruction(SU); 1382 } 1383 // Update resource counts and critical resource. 1384 if (SchedModel->hasInstrSchedModel()) { 1385 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1386 Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); 1387 for (TargetSchedModel::ProcResIter 1388 PI = SchedModel->getWriteProcResBegin(SC), 1389 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1390 countResource(PI->ProcResourceIdx, PI->Cycles); 1391 } 1392 } 1393 if (isTop()) { 1394 if (SU->getDepth() > ExpectedLatency) 1395 ExpectedLatency = SU->getDepth(); 1396 } 1397 else { 1398 if (SU->getHeight() > ExpectedLatency) 1399 ExpectedLatency = SU->getHeight(); 1400 } 1401 1402 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1403 1404 // Check the instruction group dispatch limit. 1405 // TODO: Check if this SU must end a dispatch group. 1406 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 1407 1408 // checkHazard prevents scheduling multiple instructions per cycle that exceed 1409 // issue width. However, we commonly reach the maximum. In this case 1410 // opportunistically bump the cycle to avoid uselessly checking everything in 1411 // the readyQ. Furthermore, a single instruction may produce more than one 1412 // cycle's worth of micro-ops. 1413 if (IssueCount >= SchedModel->getIssueWidth()) { 1414 DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); 1415 bumpCycle(); 1416 } 1417 } 1418 1419 /// Release pending ready nodes in to the available queue. This makes them 1420 /// visible to heuristics. 1421 void ConvergingScheduler::SchedBoundary::releasePending() { 1422 // If the available queue is empty, it is safe to reset MinReadyCycle. 1423 if (Available.empty()) 1424 MinReadyCycle = UINT_MAX; 1425 1426 // Check to see if any of the pending instructions are ready to issue. If 1427 // so, add them to the available queue. 1428 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 1429 SUnit *SU = *(Pending.begin()+i); 1430 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 1431 1432 if (ReadyCycle < MinReadyCycle) 1433 MinReadyCycle = ReadyCycle; 1434 1435 if (ReadyCycle > CurrCycle) 1436 continue; 1437 1438 if (checkHazard(SU)) 1439 continue; 1440 1441 Available.push(SU); 1442 Pending.remove(Pending.begin()+i); 1443 --i; --e; 1444 } 1445 DEBUG(if (!Pending.empty()) Pending.dump()); 1446 CheckPending = false; 1447 } 1448 1449 /// Remove SU from the ready set for this boundary. 1450 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { 1451 if (Available.isInQueue(SU)) 1452 Available.remove(Available.find(SU)); 1453 else { 1454 assert(Pending.isInQueue(SU) && "bad ready count"); 1455 Pending.remove(Pending.find(SU)); 1456 } 1457 } 1458 1459 /// If this queue only has one ready candidate, return it. As a side effect, 1460 /// defer any nodes that now hit a hazard, and advance the cycle until at least 1461 /// one node is ready. If multiple instructions are ready, return NULL. 1462 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { 1463 if (CheckPending) 1464 releasePending(); 1465 1466 if (IssueCount > 0) { 1467 // Defer any ready instrs that now have a hazard. 1468 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 1469 if (checkHazard(*I)) { 1470 Pending.push(*I); 1471 I = Available.remove(I); 1472 continue; 1473 } 1474 ++I; 1475 } 1476 } 1477 for (unsigned i = 0; Available.empty(); ++i) { 1478 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 1479 "permanent hazard"); (void)i; 1480 bumpCycle(); 1481 releasePending(); 1482 } 1483 if (Available.size() == 1) 1484 return *Available.begin(); 1485 return NULL; 1486 } 1487 1488 /// Record the candidate policy for opposite zones with different critical 1489 /// resources. 1490 /// 1491 /// If the CriticalZone is latency limited, don't force a policy for the 1492 /// candidates here. Instead, When releasing each candidate, releaseNode 1493 /// compares the region's critical path to the candidate's height or depth and 1494 /// the scheduled zone's expected latency then sets ShouldIncreaseILP. 1495 void ConvergingScheduler::balanceZones( 1496 ConvergingScheduler::SchedBoundary &CriticalZone, 1497 ConvergingScheduler::SchedCandidate &CriticalCand, 1498 ConvergingScheduler::SchedBoundary &OppositeZone, 1499 ConvergingScheduler::SchedCandidate &OppositeCand) { 1500 1501 if (!CriticalZone.IsResourceLimited) 1502 return; 1503 1504 SchedRemainder *Rem = CriticalZone.Rem; 1505 1506 // If the critical zone is overconsuming a resource relative to the 1507 // remainder, try to reduce it. 1508 unsigned RemainingCritCount = 1509 Rem->RemainingCounts[CriticalZone.CritResIdx]; 1510 if ((int)(Rem->MaxRemainingCount - RemainingCritCount) 1511 > (int)SchedModel->getLatencyFactor()) { 1512 CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; 1513 DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " 1514 << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name 1515 << '\n'); 1516 } 1517 // If the other zone is underconsuming a resource relative to the full zone, 1518 // try to increase it. 1519 unsigned OppositeCount = 1520 OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; 1521 if ((int)(OppositeZone.ExpectedCount - OppositeCount) 1522 > (int)SchedModel->getLatencyFactor()) { 1523 OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; 1524 DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " 1525 << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name 1526 << '\n'); 1527 } 1528 } 1529 1530 /// Determine if the scheduled zones exceed resource limits or critical path and 1531 /// set each candidate's ReduceHeight policy accordingly. 1532 void ConvergingScheduler::checkResourceLimits( 1533 ConvergingScheduler::SchedCandidate &TopCand, 1534 ConvergingScheduler::SchedCandidate &BotCand) { 1535 1536 Bot.checkILPPolicy(); 1537 Top.checkILPPolicy(); 1538 if (Bot.ShouldIncreaseILP) 1539 BotCand.Policy.ReduceLatency = true; 1540 if (Top.ShouldIncreaseILP) 1541 TopCand.Policy.ReduceLatency = true; 1542 1543 // Handle resource-limited regions. 1544 if (Top.IsResourceLimited && Bot.IsResourceLimited 1545 && Top.CritResIdx == Bot.CritResIdx) { 1546 // If the scheduled critical resource in both zones is no longer the 1547 // critical remaining resource, attempt to reduce resource height both ways. 1548 if (Top.CritResIdx != Rem.CritResIdx) { 1549 TopCand.Policy.ReduceResIdx = Top.CritResIdx; 1550 BotCand.Policy.ReduceResIdx = Bot.CritResIdx; 1551 DEBUG(dbgs() << "Reduce scheduled " 1552 << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); 1553 } 1554 return; 1555 } 1556 // Handle latency-limited regions. 1557 if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { 1558 // If the total scheduled expected latency exceeds the region's critical 1559 // path then reduce latency both ways. 1560 // 1561 // Just because a zone is not resource limited does not mean it is latency 1562 // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle 1563 // to exceed expected latency. 1564 if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) 1565 && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { 1566 TopCand.Policy.ReduceLatency = true; 1567 BotCand.Policy.ReduceLatency = true; 1568 DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency 1569 << " + " << Bot.ExpectedLatency << '\n'); 1570 } 1571 return; 1572 } 1573 // The critical resource is different in each zone, so request balancing. 1574 1575 // Compute the cost of each zone. 1576 Rem.MaxRemainingCount = std::max( 1577 Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), 1578 Rem.RemainingCounts[Rem.CritResIdx]); 1579 Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); 1580 Top.ExpectedCount = std::max( 1581 Top.getCriticalCount(), 1582 Top.ExpectedCount * SchedModel->getLatencyFactor()); 1583 Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); 1584 Bot.ExpectedCount = std::max( 1585 Bot.getCriticalCount(), 1586 Bot.ExpectedCount * SchedModel->getLatencyFactor()); 1587 1588 balanceZones(Top, TopCand, Bot, BotCand); 1589 balanceZones(Bot, BotCand, Top, TopCand); 1590 } 1591 1592 void ConvergingScheduler::SchedCandidate:: 1593 initResourceDelta(const ScheduleDAGMI *DAG, 1594 const TargetSchedModel *SchedModel) { 1595 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 1596 return; 1597 1598 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1599 for (TargetSchedModel::ProcResIter 1600 PI = SchedModel->getWriteProcResBegin(SC), 1601 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1602 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 1603 ResDelta.CritResources += PI->Cycles; 1604 if (PI->ProcResourceIdx == Policy.DemandResIdx) 1605 ResDelta.DemandedResources += PI->Cycles; 1606 } 1607 } 1608 1609 /// Return true if this heuristic determines order. 1610 static bool tryLess(unsigned TryVal, unsigned CandVal, 1611 ConvergingScheduler::SchedCandidate &TryCand, 1612 ConvergingScheduler::SchedCandidate &Cand, 1613 ConvergingScheduler::CandReason Reason) { 1614 if (TryVal < CandVal) { 1615 TryCand.Reason = Reason; 1616 return true; 1617 } 1618 if (TryVal > CandVal) { 1619 if (Cand.Reason > Reason) 1620 Cand.Reason = Reason; 1621 return true; 1622 } 1623 return false; 1624 } 1625 1626 static bool tryGreater(unsigned TryVal, unsigned CandVal, 1627 ConvergingScheduler::SchedCandidate &TryCand, 1628 ConvergingScheduler::SchedCandidate &Cand, 1629 ConvergingScheduler::CandReason Reason) { 1630 if (TryVal > CandVal) { 1631 TryCand.Reason = Reason; 1632 return true; 1633 } 1634 if (TryVal < CandVal) { 1635 if (Cand.Reason > Reason) 1636 Cand.Reason = Reason; 1637 return true; 1638 } 1639 return false; 1640 } 1641 1642 static unsigned getWeakLeft(const SUnit *SU, bool isTop) { 1643 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 1644 } 1645 1646 /// Apply a set of heursitics to a new candidate. Heuristics are currently 1647 /// hierarchical. This may be more efficient than a graduated cost model because 1648 /// we don't need to evaluate all aspects of the model for each node in the 1649 /// queue. But it's really done to make the heuristics easier to debug and 1650 /// statistically analyze. 1651 /// 1652 /// \param Cand provides the policy and current best candidate. 1653 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 1654 /// \param Zone describes the scheduled zone that we are extending. 1655 /// \param RPTracker describes reg pressure within the scheduled zone. 1656 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 1657 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, 1658 SchedCandidate &TryCand, 1659 SchedBoundary &Zone, 1660 const RegPressureTracker &RPTracker, 1661 RegPressureTracker &TempTracker) { 1662 1663 // Always initialize TryCand's RPDelta. 1664 TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, 1665 DAG->getRegionCriticalPSets(), 1666 DAG->getRegPressure().MaxSetPressure); 1667 1668 // Initialize the candidate if needed. 1669 if (!Cand.isValid()) { 1670 TryCand.Reason = NodeOrder; 1671 return; 1672 } 1673 // Avoid exceeding the target's limit. 1674 if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, 1675 Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) 1676 return; 1677 if (Cand.Reason == SingleExcess) 1678 Cand.Reason = MultiPressure; 1679 1680 // Avoid increasing the max critical pressure in the scheduled region. 1681 if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, 1682 Cand.RPDelta.CriticalMax.UnitIncrease, 1683 TryCand, Cand, SingleCritical)) 1684 return; 1685 if (Cand.Reason == SingleCritical) 1686 Cand.Reason = MultiPressure; 1687 1688 // Keep clustered nodes together to encourage downstream peephole 1689 // optimizations which may reduce resource requirements. 1690 // 1691 // This is a best effort to set things up for a post-RA pass. Optimizations 1692 // like generating loads of multiple registers should ideally be done within 1693 // the scheduler pass by combining the loads during DAG postprocessing. 1694 const SUnit *NextClusterSU = 1695 Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 1696 if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, 1697 TryCand, Cand, Cluster)) 1698 return; 1699 // Currently, weak edges are for clustering, so we hard-code that reason. 1700 // However, deferring the current TryCand will not change Cand's reason. 1701 CandReason OrigReason = Cand.Reason; 1702 if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), 1703 getWeakLeft(Cand.SU, Zone.isTop()), 1704 TryCand, Cand, Cluster)) { 1705 Cand.Reason = OrigReason; 1706 return; 1707 } 1708 // Avoid critical resource consumption and balance the schedule. 1709 TryCand.initResourceDelta(DAG, SchedModel); 1710 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 1711 TryCand, Cand, ResourceReduce)) 1712 return; 1713 if (tryGreater(TryCand.ResDelta.DemandedResources, 1714 Cand.ResDelta.DemandedResources, 1715 TryCand, Cand, ResourceDemand)) 1716 return; 1717 1718 // Avoid serializing long latency dependence chains. 1719 if (Cand.Policy.ReduceLatency) { 1720 if (Zone.isTop()) { 1721 if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() 1722 > Zone.ExpectedCount) { 1723 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1724 TryCand, Cand, TopDepthReduce)) 1725 return; 1726 } 1727 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1728 TryCand, Cand, TopPathReduce)) 1729 return; 1730 } 1731 else { 1732 if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() 1733 > Zone.ExpectedCount) { 1734 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1735 TryCand, Cand, BotHeightReduce)) 1736 return; 1737 } 1738 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1739 TryCand, Cand, BotPathReduce)) 1740 return; 1741 } 1742 } 1743 1744 // Avoid increasing the max pressure of the entire region. 1745 if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, 1746 Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) 1747 return; 1748 if (Cand.Reason == SingleMax) 1749 Cand.Reason = MultiPressure; 1750 1751 // Prefer immediate defs/users of the last scheduled instruction. This is a 1752 // nice pressure avoidance strategy that also conserves the processor's 1753 // register renaming resources and keeps the machine code readable. 1754 if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), 1755 TryCand, Cand, NextDefUse)) 1756 return; 1757 1758 // Fall through to original instruction order. 1759 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 1760 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 1761 TryCand.Reason = NodeOrder; 1762 } 1763 } 1764 1765 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is 1766 /// more desirable than RHS from scheduling standpoint. 1767 static bool compareRPDelta(const RegPressureDelta &LHS, 1768 const RegPressureDelta &RHS) { 1769 // Compare each component of pressure in decreasing order of importance 1770 // without checking if any are valid. Invalid PressureElements are assumed to 1771 // have UnitIncrease==0, so are neutral. 1772 1773 // Avoid increasing the max critical pressure in the scheduled region. 1774 if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { 1775 DEBUG(dbgs() << "RP excess top - bot: " 1776 << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); 1777 return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; 1778 } 1779 // Avoid increasing the max critical pressure in the scheduled region. 1780 if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { 1781 DEBUG(dbgs() << "RP critical top - bot: " 1782 << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) 1783 << '\n'); 1784 return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; 1785 } 1786 // Avoid increasing the max pressure of the entire region. 1787 if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { 1788 DEBUG(dbgs() << "RP current top - bot: " 1789 << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) 1790 << '\n'); 1791 return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; 1792 } 1793 return false; 1794 } 1795 1796 #ifndef NDEBUG 1797 const char *ConvergingScheduler::getReasonStr( 1798 ConvergingScheduler::CandReason Reason) { 1799 switch (Reason) { 1800 case NoCand: return "NOCAND "; 1801 case SingleExcess: return "REG-EXCESS"; 1802 case SingleCritical: return "REG-CRIT "; 1803 case Cluster: return "CLUSTER "; 1804 case SingleMax: return "REG-MAX "; 1805 case MultiPressure: return "REG-MULTI "; 1806 case ResourceReduce: return "RES-REDUCE"; 1807 case ResourceDemand: return "RES-DEMAND"; 1808 case TopDepthReduce: return "TOP-DEPTH "; 1809 case TopPathReduce: return "TOP-PATH "; 1810 case BotHeightReduce:return "BOT-HEIGHT"; 1811 case BotPathReduce: return "BOT-PATH "; 1812 case NextDefUse: return "DEF-USE "; 1813 case NodeOrder: return "ORDER "; 1814 }; 1815 llvm_unreachable("Unknown reason!"); 1816 } 1817 1818 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, 1819 const SchedBoundary &Zone) { 1820 const char *Label = getReasonStr(Cand.Reason); 1821 PressureElement P; 1822 unsigned ResIdx = 0; 1823 unsigned Latency = 0; 1824 switch (Cand.Reason) { 1825 default: 1826 break; 1827 case SingleExcess: 1828 P = Cand.RPDelta.Excess; 1829 break; 1830 case SingleCritical: 1831 P = Cand.RPDelta.CriticalMax; 1832 break; 1833 case SingleMax: 1834 P = Cand.RPDelta.CurrentMax; 1835 break; 1836 case ResourceReduce: 1837 ResIdx = Cand.Policy.ReduceResIdx; 1838 break; 1839 case ResourceDemand: 1840 ResIdx = Cand.Policy.DemandResIdx; 1841 break; 1842 case TopDepthReduce: 1843 Latency = Cand.SU->getDepth(); 1844 break; 1845 case TopPathReduce: 1846 Latency = Cand.SU->getHeight(); 1847 break; 1848 case BotHeightReduce: 1849 Latency = Cand.SU->getHeight(); 1850 break; 1851 case BotPathReduce: 1852 Latency = Cand.SU->getDepth(); 1853 break; 1854 } 1855 dbgs() << Label << " " << Zone.Available.getName() << " "; 1856 if (P.isValid()) 1857 dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease 1858 << " "; 1859 else 1860 dbgs() << " "; 1861 if (ResIdx) 1862 dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; 1863 else 1864 dbgs() << " "; 1865 if (Latency) 1866 dbgs() << Latency << " cycles "; 1867 else 1868 dbgs() << " "; 1869 Cand.SU->dump(DAG); 1870 } 1871 #endif 1872 1873 /// Pick the best candidate from the top queue. 1874 /// 1875 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 1876 /// DAG building. To adjust for the current scheduling location we need to 1877 /// maintain the number of vreg uses remaining to be top-scheduled. 1878 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, 1879 const RegPressureTracker &RPTracker, 1880 SchedCandidate &Cand) { 1881 ReadyQueue &Q = Zone.Available; 1882 1883 DEBUG(Q.dump()); 1884 1885 // getMaxPressureDelta temporarily modifies the tracker. 1886 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1887 1888 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 1889 1890 SchedCandidate TryCand(Cand.Policy); 1891 TryCand.SU = *I; 1892 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 1893 if (TryCand.Reason != NoCand) { 1894 // Initialize resource delta if needed in case future heuristics query it. 1895 if (TryCand.ResDelta == SchedResourceDelta()) 1896 TryCand.initResourceDelta(DAG, SchedModel); 1897 Cand.setBest(TryCand); 1898 DEBUG(traceCandidate(Cand, Zone)); 1899 } 1900 TryCand.SU = *I; 1901 } 1902 } 1903 1904 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, 1905 bool IsTop) { 1906 DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") 1907 << " SU(" << Cand.SU->NodeNum << ") " 1908 << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); 1909 } 1910 1911 /// Pick the best candidate node from either the top or bottom queue. 1912 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { 1913 // Schedule as far as possible in the direction of no choice. This is most 1914 // efficient, but also provides the best heuristics for CriticalPSets. 1915 if (SUnit *SU = Bot.pickOnlyChoice()) { 1916 IsTopNode = false; 1917 return SU; 1918 } 1919 if (SUnit *SU = Top.pickOnlyChoice()) { 1920 IsTopNode = true; 1921 return SU; 1922 } 1923 CandPolicy NoPolicy; 1924 SchedCandidate BotCand(NoPolicy); 1925 SchedCandidate TopCand(NoPolicy); 1926 checkResourceLimits(TopCand, BotCand); 1927 1928 // Prefer bottom scheduling when heuristics are silent. 1929 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1930 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1931 1932 // If either Q has a single candidate that provides the least increase in 1933 // Excess pressure, we can immediately schedule from that Q. 1934 // 1935 // RegionCriticalPSets summarizes the pressure within the scheduled region and 1936 // affects picking from either Q. If scheduling in one direction must 1937 // increase pressure for one of the excess PSets, then schedule in that 1938 // direction first to provide more freedom in the other direction. 1939 if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { 1940 IsTopNode = false; 1941 tracePick(BotCand, IsTopNode); 1942 return BotCand.SU; 1943 } 1944 // Check if the top Q has a better candidate. 1945 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1946 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1947 1948 // If either Q has a single candidate that minimizes pressure above the 1949 // original region's pressure pick it. 1950 if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { 1951 if (TopCand.Reason < BotCand.Reason) { 1952 IsTopNode = true; 1953 tracePick(TopCand, IsTopNode); 1954 return TopCand.SU; 1955 } 1956 IsTopNode = false; 1957 tracePick(BotCand, IsTopNode); 1958 return BotCand.SU; 1959 } 1960 // Check for a salient pressure difference and pick the best from either side. 1961 if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { 1962 IsTopNode = true; 1963 tracePick(TopCand, IsTopNode); 1964 return TopCand.SU; 1965 } 1966 // Otherwise prefer the bottom candidate, in node order if all else failed. 1967 if (TopCand.Reason < BotCand.Reason) { 1968 IsTopNode = true; 1969 tracePick(TopCand, IsTopNode); 1970 return TopCand.SU; 1971 } 1972 IsTopNode = false; 1973 tracePick(BotCand, IsTopNode); 1974 return BotCand.SU; 1975 } 1976 1977 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 1978 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { 1979 if (DAG->top() == DAG->bottom()) { 1980 assert(Top.Available.empty() && Top.Pending.empty() && 1981 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 1982 return NULL; 1983 } 1984 SUnit *SU; 1985 do { 1986 if (ForceTopDown) { 1987 SU = Top.pickOnlyChoice(); 1988 if (!SU) { 1989 CandPolicy NoPolicy; 1990 SchedCandidate TopCand(NoPolicy); 1991 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1992 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1993 SU = TopCand.SU; 1994 } 1995 IsTopNode = true; 1996 } 1997 else if (ForceBottomUp) { 1998 SU = Bot.pickOnlyChoice(); 1999 if (!SU) { 2000 CandPolicy NoPolicy; 2001 SchedCandidate BotCand(NoPolicy); 2002 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 2003 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2004 SU = BotCand.SU; 2005 } 2006 IsTopNode = false; 2007 } 2008 else { 2009 SU = pickNodeBidirectional(IsTopNode); 2010 } 2011 } while (SU->isScheduled); 2012 2013 if (SU->isTopReady()) 2014 Top.removeReady(SU); 2015 if (SU->isBottomReady()) 2016 Bot.removeReady(SU); 2017 2018 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 2019 << " Scheduling Instruction in cycle " 2020 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; 2021 SU->dump(DAG)); 2022 return SU; 2023 } 2024 2025 /// Update the scheduler's state after scheduling a node. This is the same node 2026 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update 2027 /// it's state based on the current cycle before MachineSchedStrategy does. 2028 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { 2029 if (IsTopNode) { 2030 SU->TopReadyCycle = Top.CurrCycle; 2031 Top.bumpNode(SU); 2032 } 2033 else { 2034 SU->BotReadyCycle = Bot.CurrCycle; 2035 Bot.bumpNode(SU); 2036 } 2037 } 2038 2039 /// Create the standard converging machine scheduler. This will be used as the 2040 /// default scheduler if the target does not set a default. 2041 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { 2042 assert((!ForceTopDown || !ForceBottomUp) && 2043 "-misched-topdown incompatible with -misched-bottomup"); 2044 ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); 2045 // Register DAG post-processors. 2046 if (EnableLoadCluster) 2047 DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); 2048 if (EnableMacroFusion) 2049 DAG->addMutation(new MacroFusion(DAG->TII)); 2050 return DAG; 2051 } 2052 static MachineSchedRegistry 2053 ConvergingSchedRegistry("converge", "Standard converging scheduler.", 2054 createConvergingSched); 2055 2056 //===----------------------------------------------------------------------===// 2057 // ILP Scheduler. Currently for experimental analysis of heuristics. 2058 //===----------------------------------------------------------------------===// 2059 2060 namespace { 2061 /// \brief Order nodes by the ILP metric. 2062 struct ILPOrder { 2063 SchedDFSResult *DFSResult; 2064 BitVector *ScheduledTrees; 2065 bool MaximizeILP; 2066 2067 ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP) 2068 : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP) {} 2069 2070 /// \brief Apply a less-than relation on node priority. 2071 /// 2072 /// (Return true if A comes after B in the Q.) 2073 bool operator()(const SUnit *A, const SUnit *B) const { 2074 unsigned SchedTreeA = DFSResult->getSubtreeID(A); 2075 unsigned SchedTreeB = DFSResult->getSubtreeID(B); 2076 if (SchedTreeA != SchedTreeB) { 2077 // Unscheduled trees have lower priority. 2078 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) 2079 return ScheduledTrees->test(SchedTreeB); 2080 2081 // Trees with shallower connections have have lower priority. 2082 if (DFSResult->getSubtreeLevel(SchedTreeA) 2083 != DFSResult->getSubtreeLevel(SchedTreeB)) { 2084 return DFSResult->getSubtreeLevel(SchedTreeA) 2085 < DFSResult->getSubtreeLevel(SchedTreeB); 2086 } 2087 } 2088 if (MaximizeILP) 2089 return DFSResult->getILP(A) < DFSResult->getILP(B); 2090 else 2091 return DFSResult->getILP(A) > DFSResult->getILP(B); 2092 } 2093 }; 2094 2095 /// \brief Schedule based on the ILP metric. 2096 class ILPScheduler : public MachineSchedStrategy { 2097 /// In case all subtrees are eventually connected to a common root through 2098 /// data dependence (e.g. reduction), place an upper limit on their size. 2099 /// 2100 /// FIXME: A subtree limit is generally good, but in the situation commented 2101 /// above, where multiple similar subtrees feed a common root, we should 2102 /// only split at a point where the resulting subtrees will be balanced. 2103 /// (a motivating test case must be found). 2104 static const unsigned SubtreeLimit = 16; 2105 2106 SchedDFSResult DFSResult; 2107 BitVector ScheduledTrees; 2108 ILPOrder Cmp; 2109 2110 std::vector<SUnit*> ReadyQ; 2111 public: 2112 ILPScheduler(bool MaximizeILP) 2113 : DFSResult(/*BottomUp=*/true, SubtreeLimit), 2114 Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {} 2115 2116 virtual void initialize(ScheduleDAGMI *DAG) { 2117 ReadyQ.clear(); 2118 DFSResult.clear(); 2119 DFSResult.resize(DAG->SUnits.size()); 2120 ScheduledTrees.clear(); 2121 } 2122 2123 virtual void registerRoots() { 2124 DFSResult.compute(ReadyQ); 2125 ScheduledTrees.resize(DFSResult.getNumSubtrees()); 2126 // Restore the heap in ReadyQ with the updated DFS results. 2127 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2128 } 2129 2130 /// Implement MachineSchedStrategy interface. 2131 /// ----------------------------------------- 2132 2133 /// Callback to select the highest priority node from the ready Q. 2134 virtual SUnit *pickNode(bool &IsTopNode) { 2135 if (ReadyQ.empty()) return NULL; 2136 pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2137 SUnit *SU = ReadyQ.back(); 2138 ReadyQ.pop_back(); 2139 IsTopNode = false; 2140 DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " 2141 << *SU->getInstr() 2142 << " ILP: " << DFSResult.getILP(SU) 2143 << " Tree: " << DFSResult.getSubtreeID(SU) << " @" 2144 << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<< '\n'); 2145 return SU; 2146 } 2147 2148 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify 2149 /// DFSResults, and resort the priority Q. 2150 virtual void schedNode(SUnit *SU, bool IsTopNode) { 2151 assert(!IsTopNode && "SchedDFSResult needs bottom-up"); 2152 if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) { 2153 ScheduledTrees.set(DFSResult.getSubtreeID(SU)); 2154 DFSResult.scheduleTree(DFSResult.getSubtreeID(SU)); 2155 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2156 } 2157 } 2158 2159 virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } 2160 2161 virtual void releaseBottomNode(SUnit *SU) { 2162 ReadyQ.push_back(SU); 2163 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2164 } 2165 }; 2166 } // namespace 2167 2168 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 2169 return new ScheduleDAGMI(C, new ILPScheduler(true)); 2170 } 2171 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 2172 return new ScheduleDAGMI(C, new ILPScheduler(false)); 2173 } 2174 static MachineSchedRegistry ILPMaxRegistry( 2175 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 2176 static MachineSchedRegistry ILPMinRegistry( 2177 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 2178 2179 //===----------------------------------------------------------------------===// 2180 // Machine Instruction Shuffler for Correctness Testing 2181 //===----------------------------------------------------------------------===// 2182 2183 #ifndef NDEBUG 2184 namespace { 2185 /// Apply a less-than relation on the node order, which corresponds to the 2186 /// instruction order prior to scheduling. IsReverse implements greater-than. 2187 template<bool IsReverse> 2188 struct SUnitOrder { 2189 bool operator()(SUnit *A, SUnit *B) const { 2190 if (IsReverse) 2191 return A->NodeNum > B->NodeNum; 2192 else 2193 return A->NodeNum < B->NodeNum; 2194 } 2195 }; 2196 2197 /// Reorder instructions as much as possible. 2198 class InstructionShuffler : public MachineSchedStrategy { 2199 bool IsAlternating; 2200 bool IsTopDown; 2201 2202 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 2203 // gives nodes with a higher number higher priority causing the latest 2204 // instructions to be scheduled first. 2205 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 2206 TopQ; 2207 // When scheduling bottom-up, use greater-than as the queue priority. 2208 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 2209 BottomQ; 2210 public: 2211 InstructionShuffler(bool alternate, bool topdown) 2212 : IsAlternating(alternate), IsTopDown(topdown) {} 2213 2214 virtual void initialize(ScheduleDAGMI *) { 2215 TopQ.clear(); 2216 BottomQ.clear(); 2217 } 2218 2219 /// Implement MachineSchedStrategy interface. 2220 /// ----------------------------------------- 2221 2222 virtual SUnit *pickNode(bool &IsTopNode) { 2223 SUnit *SU; 2224 if (IsTopDown) { 2225 do { 2226 if (TopQ.empty()) return NULL; 2227 SU = TopQ.top(); 2228 TopQ.pop(); 2229 } while (SU->isScheduled); 2230 IsTopNode = true; 2231 } 2232 else { 2233 do { 2234 if (BottomQ.empty()) return NULL; 2235 SU = BottomQ.top(); 2236 BottomQ.pop(); 2237 } while (SU->isScheduled); 2238 IsTopNode = false; 2239 } 2240 if (IsAlternating) 2241 IsTopDown = !IsTopDown; 2242 return SU; 2243 } 2244 2245 virtual void schedNode(SUnit *SU, bool IsTopNode) {} 2246 2247 virtual void releaseTopNode(SUnit *SU) { 2248 TopQ.push(SU); 2249 } 2250 virtual void releaseBottomNode(SUnit *SU) { 2251 BottomQ.push(SU); 2252 } 2253 }; 2254 } // namespace 2255 2256 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 2257 bool Alternate = !ForceTopDown && !ForceBottomUp; 2258 bool TopDown = !ForceBottomUp; 2259 assert((TopDown || !ForceTopDown) && 2260 "-misched-topdown incompatible with -misched-bottomup"); 2261 return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); 2262 } 2263 static MachineSchedRegistry ShufflerRegistry( 2264 "shuffle", "Shuffle machine instructions alternating directions", 2265 createInstructionShuffler); 2266 #endif // !NDEBUG 2267